From: William Roche <william.roche@oracle.com>
Repair poisoned memory location(s), calling ram_block_discard_range():
punching a hole in the backend file when necessary and regenerating
a usable memory.
If the kernel doesn't support the madvise calls used by this function
and we are dealing with anonymous memory, fall back to remapping the
location(s).
Signed-off-by: William Roche <william.roche@oracle.com>
---
system/physmem.c | 63 ++++++++++++++++++++++++++++++++----------------
1 file changed, 42 insertions(+), 21 deletions(-)
diff --git a/system/physmem.c b/system/physmem.c
index 2c90cc2d78..b228a692f8 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2180,13 +2180,37 @@ void qemu_ram_free(RAMBlock *block)
}
#ifndef _WIN32
+/* Try to simply remap the given location */
+static void qemu_ram_remap_mmap(RAMBlock *block, void* vaddr, size_t size,
+ ram_addr_t offset)
+{
+ int flags, prot;
+ void *area;
+
+ flags = MAP_FIXED;
+ flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
+ flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
+ prot = PROT_READ;
+ prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
+ if (block->fd >= 0) {
+ area = mmap(vaddr, size, prot, flags, block->fd,
+ offset + block->fd_offset);
+ } else {
+ flags |= MAP_ANONYMOUS;
+ area = mmap(vaddr, size, prot, flags, -1, 0);
+ }
+ if (area != vaddr) {
+ error_report("Could not remap addr: " RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
+ size, block->offset + offset);
+ exit(1);
+ }
+}
+
void qemu_ram_remap(ram_addr_t addr)
{
RAMBlock *block;
ram_addr_t offset;
- int flags;
- void *area, *vaddr;
- int prot;
+ void *vaddr;
size_t page_size;
RAMBLOCK_FOREACH(block) {
@@ -2202,24 +2226,21 @@ void qemu_ram_remap(ram_addr_t addr)
} else if (xen_enabled()) {
abort();
} else {
- flags = MAP_FIXED;
- flags |= block->flags & RAM_SHARED ?
- MAP_SHARED : MAP_PRIVATE;
- flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
- prot = PROT_READ;
- prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
- if (block->fd >= 0) {
- area = mmap(vaddr, page_size, prot, flags, block->fd,
- offset + block->fd_offset);
- } else {
- flags |= MAP_ANONYMOUS;
- area = mmap(vaddr, page_size, prot, flags, -1, 0);
- }
- if (area != vaddr) {
- error_report("Could not remap addr: "
- RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
- page_size, addr);
- exit(1);
+ if (ram_block_discard_range(block, offset + block->fd_offset,
+ page_size) != 0) {
+ /*
+ * Fold back to using mmap() only for anonymous mapping,
+ * as if a backing file is associated we may not be able
+ * to recover the memory in all cases.
+ * So don't take the risk of using only mmap and fail now.
+ */
+ if (block->fd >= 0) {
+ error_report("Memory poison recovery failure addr: "
+ RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
+ page_size, addr);
+ exit(1);
+ }
+ qemu_ram_remap_mmap(block, vaddr, page_size, offset);
}
memory_try_enable_merging(vaddr, page_size);
qemu_ram_setup_dump(vaddr, page_size);
--
2.43.5
On 14.12.24 14:45, “William Roche wrote:
> From: William Roche <william.roche@oracle.com>
>
> Repair poisoned memory location(s), calling ram_block_discard_range():
> punching a hole in the backend file when necessary and regenerating
> a usable memory.
> If the kernel doesn't support the madvise calls used by this function
> and we are dealing with anonymous memory, fall back to remapping the
> location(s).
>
> Signed-off-by: William Roche <william.roche@oracle.com>
> ---
> system/physmem.c | 63 ++++++++++++++++++++++++++++++++----------------
> 1 file changed, 42 insertions(+), 21 deletions(-)
>
> diff --git a/system/physmem.c b/system/physmem.c
> index 2c90cc2d78..b228a692f8 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -2180,13 +2180,37 @@ void qemu_ram_free(RAMBlock *block)
> }
>
> #ifndef _WIN32
> +/* Try to simply remap the given location */
> +static void qemu_ram_remap_mmap(RAMBlock *block, void* vaddr, size_t size,
> + ram_addr_t offset)
Can you make the parameters match the ones of ram_block_discard_range()
so the invocation gets easier to read? You can calculate vaddr easily
internally.
Something like
static void qemu_ram_remap_mmap(RAMBlock *rb, uint64_t start,
size_t length)
> +{
> + int flags, prot;
> + void *area;
> +
> + flags = MAP_FIXED;
> + flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
> + flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
> + prot = PROT_READ;
> + prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
> + if (block->fd >= 0) {
Heh, that case can no longer happen!
assert(block->fs < 0);
?
> + area = mmap(vaddr, size, prot, flags, block->fd,
> + offset + block->fd_offset);
> + } else {
> + flags |= MAP_ANONYMOUS;
> + area = mmap(vaddr, size, prot, flags, -1, 0);
> + }
> + if (area != vaddr) {
> + error_report("Could not remap addr: " RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
> + size, block->offset + offset);
> + exit(1);
> + }
> +}
> +
> void qemu_ram_remap(ram_addr_t addr)
> {
> RAMBlock *block;
> ram_addr_t offset;
> - int flags;
> - void *area, *vaddr;
> - int prot;
> + void *vaddr;
> size_t page_size;
>
> RAMBLOCK_FOREACH(block) {
> @@ -2202,24 +2226,21 @@ void qemu_ram_remap(ram_addr_t addr)
> } else if (xen_enabled()) {
> abort();
> } else {
> - flags = MAP_FIXED;
> - flags |= block->flags & RAM_SHARED ?
> - MAP_SHARED : MAP_PRIVATE;
> - flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
> - prot = PROT_READ;
> - prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
> - if (block->fd >= 0) {
> - area = mmap(vaddr, page_size, prot, flags, block->fd,
> - offset + block->fd_offset);
> - } else {
> - flags |= MAP_ANONYMOUS;
> - area = mmap(vaddr, page_size, prot, flags, -1, 0);
> - }
> - if (area != vaddr) {
> - error_report("Could not remap addr: "
> - RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
> - page_size, addr);
> - exit(1);
> + if (ram_block_discard_range(block, offset + block->fd_offset,
> + page_size) != 0) {
> + /*
> + * Fold back to using mmap() only for anonymous mapping,
s/Fold/Fall/
> + * as if a backing file is associated we may not be able
> + * to recover the memory in all cases.
> + * So don't take the risk of using only mmap and fail now.
> + */
> + if (block->fd >= 0) {
> + error_report("Memory poison recovery failure addr: "
> + RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
> + page_size, addr);
> + exit(1);
> + }
> + qemu_ram_remap_mmap(block, vaddr, page_size, offset);
> }
> memory_try_enable_merging(vaddr, page_size);
> qemu_ram_setup_dump(vaddr, page_size);
These two can be moved into qemu_ram_remap_mmap(). They are not required
if we didn't actually mess with mmap().
--
Cheers,
David / dhildenb
On 1/8/25 22:44, David Hildenbrand wrote:
> On 14.12.24 14:45, “William Roche wrote:
>> +/* Try to simply remap the given location */
>> +static void qemu_ram_remap_mmap(RAMBlock *block, void* vaddr, size_t
>> size,
>> + ram_addr_t offset)
>
> Can you make the parameters match the ones of ram_block_discard_range()
> so the invocation gets easier to read? You can calculate vaddr easily
> internally.
>
> Something like
>
> static void qemu_ram_remap_mmap(RAMBlock *rb, uint64_t start,
> size_t length)
I used the same arguments as ram_block_discard_range() as you asked.
>
>> +{
>> + int flags, prot;
>> + void *area;
>> +
>> + flags = MAP_FIXED;
>> + flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
>> + flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
>> + prot = PROT_READ;
>> + prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
>> + if (block->fd >= 0) {
>
> Heh, that case can no longer happen!
I also removed the used case of remapping a file in the
qemu_ram_remap_mmap() function.
>
> assert(block->fs < 0);
And added the assert() you suggested.
>> + if (ram_block_discard_range(block, offset + block->fd_offset,
>> + page_size) != 0) {
Studying some more the arguments used by ram_block_discard_range() and
the need to fallocate/Punch the underlying file, I think that I should
simply provide the 'offset' here and that block->fd_offset is missing in
the ram_block_discard_range() function where we have to punch a hole in
the file. Don't you agree ?
If we can get the current set of fixes integrated, I'll submit another
fix proposal to take the fd_offset into account in a second time. (Not
enlarging the current set)
But here is what I'm thinking about. That we can discuss later if you want:
@@ -3730,11 +3724,12 @@ int ram_block_discard_range(RAMBlock *rb,
uint64_t start, size_t length)
}
ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE |
FALLOC_FL_KEEP_SIZE,
- start, length);
+ start + rb->fd_offset, length);
if (ret) {
ret = -errno;
error_report("%s: Failed to fallocate %s:%" PRIx64 "
+%zx (%d)",
- __func__, rb->idstr, start, length, ret);
+ __func__, rb->idstr, start + rb->fd_offset,
+ length, ret);
goto err;
}
Or I can integrate that as an addition patch if you prefer.
>> + /*
>> + * Fold back to using mmap() only for anonymous mapping,
>
> s/Fold/Fall/
typo fixed
>> }
>> memory_try_enable_merging(vaddr, page_size);
>> qemu_ram_setup_dump(vaddr, page_size);
>
> These two can be moved into qemu_ram_remap_mmap(). They are not required
> if we didn't actually mess with mmap().
These functions will be replaced by the ram_block_notify_remap() of
patch 7 which is called no matter the ram_block_discard_range()
succeeded or not.
So we should leave these 2 function calls here for now as they mimic an
aspect of what the notifier code will do.
> If we can get the current set of fixes integrated, I'll submit another
> fix proposal to take the fd_offset into account in a second time. (Not
> enlarging the current set)
>
> But here is what I'm thinking about. That we can discuss later if you want:
>
> @@ -3730,11 +3724,12 @@ int ram_block_discard_range(RAMBlock *rb,
> uint64_t start, size_t length)
> }
>
> ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE |
> FALLOC_FL_KEEP_SIZE,
> - start, length);
> + start + rb->fd_offset, length);
> if (ret) {
> ret = -errno;
> error_report("%s: Failed to fallocate %s:%" PRIx64 "
> +%zx (%d)",
> - __func__, rb->idstr, start, length, ret);
> + __func__, rb->idstr, start + rb->fd_offset,
> + length, ret);
> goto err;
> }
>
>
> Or I can integrate that as an addition patch if you prefer.
Very good point! We missed to take fd_offset into account here.
Can you send that out as a separate fix?
Fixed: 4b870dc4d0c0 ("hostmem-file: add offset option")
--
Cheers,
David / dhildenb
On 1/14/25 15:00, David Hildenbrand wrote:
>> If we can get the current set of fixes integrated, I'll submit another
>> fix proposal to take the fd_offset into account in a second time. (Not
>> enlarging the current set)
>>
>> But here is what I'm thinking about. That we can discuss later if you
>> want:
>>
>> @@ -3730,11 +3724,12 @@ int ram_block_discard_range(RAMBlock *rb,
>> uint64_t start, size_t length)
>> }
>>
>> ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE |
>> FALLOC_FL_KEEP_SIZE,
>> - start, length);
>> + start + rb->fd_offset, length);
>> if (ret) {
>> ret = -errno;
>> error_report("%s: Failed to fallocate %s:%" PRIx64 "
>> +%zx (%d)",
>> - __func__, rb->idstr, start, length, ret);
>> + __func__, rb->idstr, start + rb->fd_offset,
>> + length, ret);
>> goto err;
>> }
>>
>>
>> Or I can integrate that as an addition patch if you prefer.
>
> Very good point! We missed to take fd_offset into account here.
>
> Can you send that out as a separate fix?
>
> Fixed: 4b870dc4d0c0 ("hostmem-file: add offset option"
Thanks to Peter Xu and to you for your reviews of my proposal for this
separate fix that should be on track to be integrated soon.
© 2016 - 2026 Red Hat, Inc.