From: William Roche <william.roche@oracle.com>
Repair poisoned memory location(s), calling ram_block_discard_range():
punching a hole in the backend file when necessary and regenerating
a usable memory.
If the kernel doesn't support the madvise calls used by this function
and we are dealing with anonymous memory, fall back to remapping the
location(s).
Signed-off-by: William Roche <william.roche@oracle.com>
---
system/physmem.c | 63 ++++++++++++++++++++++++++++++++----------------
1 file changed, 42 insertions(+), 21 deletions(-)
diff --git a/system/physmem.c b/system/physmem.c
index 2c90cc2d78..b228a692f8 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -2180,13 +2180,37 @@ void qemu_ram_free(RAMBlock *block)
}
#ifndef _WIN32
+/* Try to simply remap the given location */
+static void qemu_ram_remap_mmap(RAMBlock *block, void* vaddr, size_t size,
+ ram_addr_t offset)
+{
+ int flags, prot;
+ void *area;
+
+ flags = MAP_FIXED;
+ flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE;
+ flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
+ prot = PROT_READ;
+ prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
+ if (block->fd >= 0) {
+ area = mmap(vaddr, size, prot, flags, block->fd,
+ offset + block->fd_offset);
+ } else {
+ flags |= MAP_ANONYMOUS;
+ area = mmap(vaddr, size, prot, flags, -1, 0);
+ }
+ if (area != vaddr) {
+ error_report("Could not remap addr: " RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
+ size, block->offset + offset);
+ exit(1);
+ }
+}
+
void qemu_ram_remap(ram_addr_t addr)
{
RAMBlock *block;
ram_addr_t offset;
- int flags;
- void *area, *vaddr;
- int prot;
+ void *vaddr;
size_t page_size;
RAMBLOCK_FOREACH(block) {
@@ -2202,24 +2226,21 @@ void qemu_ram_remap(ram_addr_t addr)
} else if (xen_enabled()) {
abort();
} else {
- flags = MAP_FIXED;
- flags |= block->flags & RAM_SHARED ?
- MAP_SHARED : MAP_PRIVATE;
- flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0;
- prot = PROT_READ;
- prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE;
- if (block->fd >= 0) {
- area = mmap(vaddr, page_size, prot, flags, block->fd,
- offset + block->fd_offset);
- } else {
- flags |= MAP_ANONYMOUS;
- area = mmap(vaddr, page_size, prot, flags, -1, 0);
- }
- if (area != vaddr) {
- error_report("Could not remap addr: "
- RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
- page_size, addr);
- exit(1);
+ if (ram_block_discard_range(block, offset + block->fd_offset,
+ page_size) != 0) {
+ /*
+ * Fold back to using mmap() only for anonymous mapping,
+ * as if a backing file is associated we may not be able
+ * to recover the memory in all cases.
+ * So don't take the risk of using only mmap and fail now.
+ */
+ if (block->fd >= 0) {
+ error_report("Memory poison recovery failure addr: "
+ RAM_ADDR_FMT "@" RAM_ADDR_FMT "",
+ page_size, addr);
+ exit(1);
+ }
+ qemu_ram_remap_mmap(block, vaddr, page_size, offset);
}
memory_try_enable_merging(vaddr, page_size);
qemu_ram_setup_dump(vaddr, page_size);
--
2.43.5
On 14.12.24 14:45, “William Roche wrote: > From: William Roche <william.roche@oracle.com> > > Repair poisoned memory location(s), calling ram_block_discard_range(): > punching a hole in the backend file when necessary and regenerating > a usable memory. > If the kernel doesn't support the madvise calls used by this function > and we are dealing with anonymous memory, fall back to remapping the > location(s). > > Signed-off-by: William Roche <william.roche@oracle.com> > --- > system/physmem.c | 63 ++++++++++++++++++++++++++++++++---------------- > 1 file changed, 42 insertions(+), 21 deletions(-) > > diff --git a/system/physmem.c b/system/physmem.c > index 2c90cc2d78..b228a692f8 100644 > --- a/system/physmem.c > +++ b/system/physmem.c > @@ -2180,13 +2180,37 @@ void qemu_ram_free(RAMBlock *block) > } > > #ifndef _WIN32 > +/* Try to simply remap the given location */ > +static void qemu_ram_remap_mmap(RAMBlock *block, void* vaddr, size_t size, > + ram_addr_t offset) Can you make the parameters match the ones of ram_block_discard_range() so the invocation gets easier to read? You can calculate vaddr easily internally. Something like static void qemu_ram_remap_mmap(RAMBlock *rb, uint64_t start, size_t length) > +{ > + int flags, prot; > + void *area; > + > + flags = MAP_FIXED; > + flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE; > + flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0; > + prot = PROT_READ; > + prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE; > + if (block->fd >= 0) { Heh, that case can no longer happen! assert(block->fs < 0); ? > + area = mmap(vaddr, size, prot, flags, block->fd, > + offset + block->fd_offset); > + } else { > + flags |= MAP_ANONYMOUS; > + area = mmap(vaddr, size, prot, flags, -1, 0); > + } > + if (area != vaddr) { > + error_report("Could not remap addr: " RAM_ADDR_FMT "@" RAM_ADDR_FMT "", > + size, block->offset + offset); > + exit(1); > + } > +} > + > void qemu_ram_remap(ram_addr_t addr) > { > RAMBlock *block; > ram_addr_t offset; > - int flags; > - void *area, *vaddr; > - int prot; > + void *vaddr; > size_t page_size; > > RAMBLOCK_FOREACH(block) { > @@ -2202,24 +2226,21 @@ void qemu_ram_remap(ram_addr_t addr) > } else if (xen_enabled()) { > abort(); > } else { > - flags = MAP_FIXED; > - flags |= block->flags & RAM_SHARED ? > - MAP_SHARED : MAP_PRIVATE; > - flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0; > - prot = PROT_READ; > - prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE; > - if (block->fd >= 0) { > - area = mmap(vaddr, page_size, prot, flags, block->fd, > - offset + block->fd_offset); > - } else { > - flags |= MAP_ANONYMOUS; > - area = mmap(vaddr, page_size, prot, flags, -1, 0); > - } > - if (area != vaddr) { > - error_report("Could not remap addr: " > - RAM_ADDR_FMT "@" RAM_ADDR_FMT "", > - page_size, addr); > - exit(1); > + if (ram_block_discard_range(block, offset + block->fd_offset, > + page_size) != 0) { > + /* > + * Fold back to using mmap() only for anonymous mapping, s/Fold/Fall/ > + * as if a backing file is associated we may not be able > + * to recover the memory in all cases. > + * So don't take the risk of using only mmap and fail now. > + */ > + if (block->fd >= 0) { > + error_report("Memory poison recovery failure addr: " > + RAM_ADDR_FMT "@" RAM_ADDR_FMT "", > + page_size, addr); > + exit(1); > + } > + qemu_ram_remap_mmap(block, vaddr, page_size, offset); > } > memory_try_enable_merging(vaddr, page_size); > qemu_ram_setup_dump(vaddr, page_size); These two can be moved into qemu_ram_remap_mmap(). They are not required if we didn't actually mess with mmap(). -- Cheers, David / dhildenb
On 1/8/25 22:44, David Hildenbrand wrote: > On 14.12.24 14:45, “William Roche wrote: >> +/* Try to simply remap the given location */ >> +static void qemu_ram_remap_mmap(RAMBlock *block, void* vaddr, size_t >> size, >> + ram_addr_t offset) > > Can you make the parameters match the ones of ram_block_discard_range() > so the invocation gets easier to read? You can calculate vaddr easily > internally. > > Something like > > static void qemu_ram_remap_mmap(RAMBlock *rb, uint64_t start, > size_t length) I used the same arguments as ram_block_discard_range() as you asked. > >> +{ >> + int flags, prot; >> + void *area; >> + >> + flags = MAP_FIXED; >> + flags |= block->flags & RAM_SHARED ? MAP_SHARED : MAP_PRIVATE; >> + flags |= block->flags & RAM_NORESERVE ? MAP_NORESERVE : 0; >> + prot = PROT_READ; >> + prot |= block->flags & RAM_READONLY ? 0 : PROT_WRITE; >> + if (block->fd >= 0) { > > Heh, that case can no longer happen! I also removed the used case of remapping a file in the qemu_ram_remap_mmap() function. > > assert(block->fs < 0); And added the assert() you suggested. >> + if (ram_block_discard_range(block, offset + block->fd_offset, >> + page_size) != 0) { Studying some more the arguments used by ram_block_discard_range() and the need to fallocate/Punch the underlying file, I think that I should simply provide the 'offset' here and that block->fd_offset is missing in the ram_block_discard_range() function where we have to punch a hole in the file. Don't you agree ? If we can get the current set of fixes integrated, I'll submit another fix proposal to take the fd_offset into account in a second time. (Not enlarging the current set) But here is what I'm thinking about. That we can discuss later if you want: @@ -3730,11 +3724,12 @@ int ram_block_discard_range(RAMBlock *rb, uint64_t start, size_t length) } ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, - start, length); + start + rb->fd_offset, length); if (ret) { ret = -errno; error_report("%s: Failed to fallocate %s:%" PRIx64 " +%zx (%d)", - __func__, rb->idstr, start, length, ret); + __func__, rb->idstr, start + rb->fd_offset, + length, ret); goto err; } Or I can integrate that as an addition patch if you prefer. >> + /* >> + * Fold back to using mmap() only for anonymous mapping, > > s/Fold/Fall/ typo fixed >> } >> memory_try_enable_merging(vaddr, page_size); >> qemu_ram_setup_dump(vaddr, page_size); > > These two can be moved into qemu_ram_remap_mmap(). They are not required > if we didn't actually mess with mmap(). These functions will be replaced by the ram_block_notify_remap() of patch 7 which is called no matter the ram_block_discard_range() succeeded or not. So we should leave these 2 function calls here for now as they mimic an aspect of what the notifier code will do.
> If we can get the current set of fixes integrated, I'll submit another > fix proposal to take the fd_offset into account in a second time. (Not > enlarging the current set) > > But here is what I'm thinking about. That we can discuss later if you want: > > @@ -3730,11 +3724,12 @@ int ram_block_discard_range(RAMBlock *rb, > uint64_t start, size_t length) > } > > ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | > FALLOC_FL_KEEP_SIZE, > - start, length); > + start + rb->fd_offset, length); > if (ret) { > ret = -errno; > error_report("%s: Failed to fallocate %s:%" PRIx64 " > +%zx (%d)", > - __func__, rb->idstr, start, length, ret); > + __func__, rb->idstr, start + rb->fd_offset, > + length, ret); > goto err; > } > > > Or I can integrate that as an addition patch if you prefer. Very good point! We missed to take fd_offset into account here. Can you send that out as a separate fix? Fixed: 4b870dc4d0c0 ("hostmem-file: add offset option") -- Cheers, David / dhildenb
On 1/14/25 15:00, David Hildenbrand wrote: >> If we can get the current set of fixes integrated, I'll submit another >> fix proposal to take the fd_offset into account in a second time. (Not >> enlarging the current set) >> >> But here is what I'm thinking about. That we can discuss later if you >> want: >> >> @@ -3730,11 +3724,12 @@ int ram_block_discard_range(RAMBlock *rb, >> uint64_t start, size_t length) >> } >> >> ret = fallocate(rb->fd, FALLOC_FL_PUNCH_HOLE | >> FALLOC_FL_KEEP_SIZE, >> - start, length); >> + start + rb->fd_offset, length); >> if (ret) { >> ret = -errno; >> error_report("%s: Failed to fallocate %s:%" PRIx64 " >> +%zx (%d)", >> - __func__, rb->idstr, start, length, ret); >> + __func__, rb->idstr, start + rb->fd_offset, >> + length, ret); >> goto err; >> } >> >> >> Or I can integrate that as an addition patch if you prefer. > > Very good point! We missed to take fd_offset into account here. > > Can you send that out as a separate fix? > > Fixed: 4b870dc4d0c0 ("hostmem-file: add offset option" Thanks to Peter Xu and to you for your reviews of my proposal for this separate fix that should be on track to be integrated soon.
© 2016 - 2025 Red Hat, Inc.