include/exec/ramlist.h | 1 + system/physmem.c | 44 ++++++++++++++---------------------------- 2 files changed, 16 insertions(+), 29 deletions(-)
As reported by Peter, we might be leaking memory when removing the
highest RAMBlock (in the weird ram_addr_t space), and adding a new one.
We will fail to realize that we already allocated bitmaps for more
dirty memory blocks, and effectively discard the pointers to them.
Fix it by getting rid of last_ram_page() and simply storing the number
of dirty memory blocks that have been allocated. We'll store the number
of blocks along with the actual pointer to keep it simple.
Looks like this leak was introduced as we switched from using a single
bitmap_zero_extend() to allocating multiple bitmaps:
bitmap_zero_extend() relies on g_renew() which should have taken care of
this.
Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com
Reported-by: Peter Maydell <peter.maydell@linaro.org>
Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug")
Cc: qemu-stable@nongnu.org
Cc: Stefan Hajnoczi <stefanha@redhat.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org>
Signed-off-by: David Hildenbrand <david@redhat.com>
---
include/exec/ramlist.h | 1 +
system/physmem.c | 44 ++++++++++++++----------------------------
2 files changed, 16 insertions(+), 29 deletions(-)
diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h
index 2ad2a81acc..f2a965f293 100644
--- a/include/exec/ramlist.h
+++ b/include/exec/ramlist.h
@@ -41,6 +41,7 @@ typedef struct RAMBlockNotifier RAMBlockNotifier;
#define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8)
typedef struct {
struct rcu_head rcu;
+ unsigned int num_blocks;
unsigned long *blocks[];
} DirtyMemoryBlocks;
diff --git a/system/physmem.c b/system/physmem.c
index 94600a33ec..fa48ff8333 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -1534,18 +1534,6 @@ static ram_addr_t find_ram_offset(ram_addr_t size)
return offset;
}
-static unsigned long last_ram_page(void)
-{
- RAMBlock *block;
- ram_addr_t last = 0;
-
- RCU_READ_LOCK_GUARD();
- RAMBLOCK_FOREACH(block) {
- last = MAX(last, block->offset + block->max_length);
- }
- return last >> TARGET_PAGE_BITS;
-}
-
static void qemu_ram_setup_dump(void *addr, ram_addr_t size)
{
int ret;
@@ -1799,28 +1787,31 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
}
/* Called with ram_list.mutex held */
-static void dirty_memory_extend(ram_addr_t old_ram_size,
- ram_addr_t new_ram_size)
+static void dirty_memory_extend(ram_addr_t new_ram_size)
{
- ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
- DIRTY_MEMORY_BLOCK_SIZE);
ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
DIRTY_MEMORY_BLOCK_SIZE);
int i;
- /* Only need to extend if block count increased */
- if (new_num_blocks <= old_num_blocks) {
- return;
- }
-
for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
DirtyMemoryBlocks *old_blocks;
DirtyMemoryBlocks *new_blocks;
+ ram_addr_t old_num_blocks = 0;
int j;
old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
+ if (old_blocks) {
+ old_num_blocks = old_blocks->num_blocks;
+
+ /* Only need to extend if block count increased */
+ if (new_num_blocks <= old_num_blocks) {
+ return;
+ }
+ }
+
new_blocks = g_malloc(sizeof(*new_blocks) +
sizeof(new_blocks->blocks[0]) * new_num_blocks);
+ new_blocks->num_blocks = new_num_blocks;
if (old_num_blocks) {
memcpy(new_blocks->blocks, old_blocks->blocks,
@@ -1846,11 +1837,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
RAMBlock *block;
RAMBlock *last_block = NULL;
bool free_on_error = false;
- ram_addr_t old_ram_size, new_ram_size;
+ ram_addr_t ram_size;
Error *err = NULL;
- old_ram_size = last_ram_page();
-
qemu_mutex_lock_ramlist();
new_block->offset = find_ram_offset(new_block->max_length);
@@ -1901,11 +1890,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
}
}
- new_ram_size = MAX(old_ram_size,
- (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
- if (new_ram_size > old_ram_size) {
- dirty_memory_extend(old_ram_size, new_ram_size);
- }
+ ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
+ dirty_memory_extend(ram_size);
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
* QLIST (which has an RCU-friendly variant) does not have insertion at
* tail, so save the last element in last_block.
--
2.46.0
On Tue, Aug 27, 2024 at 10:37:15AM +0200, David Hildenbrand wrote: > /* Called with ram_list.mutex held */ > -static void dirty_memory_extend(ram_addr_t old_ram_size, > - ram_addr_t new_ram_size) > +static void dirty_memory_extend(ram_addr_t new_ram_size) > { > - ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, > - DIRTY_MEMORY_BLOCK_SIZE); > ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size, > DIRTY_MEMORY_BLOCK_SIZE); > int i; > > - /* Only need to extend if block count increased */ > - if (new_num_blocks <= old_num_blocks) { > - return; > - } One nitpick here: IMHO we could move the n_blocks cache in ram_list instead, then we keep the check here and avoid caching it three times with the same value. > - > for (i = 0; i < DIRTY_MEMORY_NUM; i++) { > DirtyMemoryBlocks *old_blocks; > DirtyMemoryBlocks *new_blocks; > + ram_addr_t old_num_blocks = 0; > int j; > > old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]); > + if (old_blocks) { > + old_num_blocks = old_blocks->num_blocks; > + > + /* Only need to extend if block count increased */ > + if (new_num_blocks <= old_num_blocks) { > + return; > + } > + } -- Peter Xu
On 27.08.24 19:57, Peter Xu wrote: > On Tue, Aug 27, 2024 at 10:37:15AM +0200, David Hildenbrand wrote: >> /* Called with ram_list.mutex held */ >> -static void dirty_memory_extend(ram_addr_t old_ram_size, >> - ram_addr_t new_ram_size) >> +static void dirty_memory_extend(ram_addr_t new_ram_size) >> { >> - ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, >> - DIRTY_MEMORY_BLOCK_SIZE); >> ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size, >> DIRTY_MEMORY_BLOCK_SIZE); >> int i; >> >> - /* Only need to extend if block count increased */ >> - if (new_num_blocks <= old_num_blocks) { >> - return; >> - } > > One nitpick here: IMHO we could move the n_blocks cache in ram_list > instead, then we keep the check here and avoid caching it three times with > the same value. yes, as written in the patch description: "We'll store the number of blocks along with the actual pointer to keep it simple." It's cleaner to me to store it along the RCU-freed data structure that has this size. -- Cheers, David / dhildenb
On Tue, Aug 27, 2024 at 08:00:07PM +0200, David Hildenbrand wrote: > On 27.08.24 19:57, Peter Xu wrote: > > On Tue, Aug 27, 2024 at 10:37:15AM +0200, David Hildenbrand wrote: > > > /* Called with ram_list.mutex held */ > > > -static void dirty_memory_extend(ram_addr_t old_ram_size, > > > - ram_addr_t new_ram_size) > > > +static void dirty_memory_extend(ram_addr_t new_ram_size) > > > { > > > - ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, > > > - DIRTY_MEMORY_BLOCK_SIZE); > > > ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size, > > > DIRTY_MEMORY_BLOCK_SIZE); > > > int i; > > > - /* Only need to extend if block count increased */ > > > - if (new_num_blocks <= old_num_blocks) { > > > - return; > > > - } > > > > One nitpick here: IMHO we could move the n_blocks cache in ram_list > > instead, then we keep the check here and avoid caching it three times with > > the same value. > > yes, as written in the patch description: "We'll store the number of blocks > along with the actual pointer to keep it simple." > > It's cleaner to me to store it along the RCU-freed data structure that has > this size. Yep, I can get that. I think one reason I had my current preference is to avoid things like: for (...) { if (...) return; } I'd at least want to sanity check before "return" to make sure all three bitmap chunks are having the same size. It gave me the feeling that we could process "blocks[]" differently but we actually couldn't - In our case it has the ram_list mutex when update, so it must be guaranteed. However due to the same reason, I see it cleaner to just keep the counter there too. No strong feelings, though. -- Peter Xu
On 27.08.24 20:41, Peter Xu wrote: > On Tue, Aug 27, 2024 at 08:00:07PM +0200, David Hildenbrand wrote: >> On 27.08.24 19:57, Peter Xu wrote: >>> On Tue, Aug 27, 2024 at 10:37:15AM +0200, David Hildenbrand wrote: >>>> /* Called with ram_list.mutex held */ >>>> -static void dirty_memory_extend(ram_addr_t old_ram_size, >>>> - ram_addr_t new_ram_size) >>>> +static void dirty_memory_extend(ram_addr_t new_ram_size) >>>> { >>>> - ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, >>>> - DIRTY_MEMORY_BLOCK_SIZE); >>>> ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size, >>>> DIRTY_MEMORY_BLOCK_SIZE); >>>> int i; >>>> - /* Only need to extend if block count increased */ >>>> - if (new_num_blocks <= old_num_blocks) { >>>> - return; >>>> - } >>> >>> One nitpick here: IMHO we could move the n_blocks cache in ram_list >>> instead, then we keep the check here and avoid caching it three times with >>> the same value. >> >> yes, as written in the patch description: "We'll store the number of blocks >> along with the actual pointer to keep it simple." >> >> It's cleaner to me to store it along the RCU-freed data structure that has >> this size. > > Yep, I can get that. > > I think one reason I had my current preference is to avoid things like: > > for (...) { > if (...) > return; > } > > I'd at least want to sanity check before "return" to make sure all three > bitmap chunks are having the same size. It gave me the feeling that we > could process "blocks[]" differently but we actually couldn't - In our case > it has the ram_list mutex when update, so it must be guaranteed. However > due to the same reason, I see it cleaner to just keep the counter there > too. I'll move it to the higher level because I have more important stuff to work on and want to get this off my plate. "num_blocks" does not quite make sense in RAMList (where we have a different "blocks" variable) so I'll call it "num_dirty_blocks" or sth like that. -- Cheers, David / dhildenb
On Tue, 27 Aug 2024 at 04:38, David Hildenbrand <david@redhat.com> wrote: > > As reported by Peter, we might be leaking memory when removing the > highest RAMBlock (in the weird ram_addr_t space), and adding a new one. > > We will fail to realize that we already allocated bitmaps for more > dirty memory blocks, and effectively discard the pointers to them. > > Fix it by getting rid of last_ram_page() and simply storing the number > of dirty memory blocks that have been allocated. We'll store the number > of blocks along with the actual pointer to keep it simple. > > Looks like this leak was introduced as we switched from using a single > bitmap_zero_extend() to allocating multiple bitmaps: > bitmap_zero_extend() relies on g_renew() which should have taken care of > this. > > Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com > Reported-by: Peter Maydell <peter.maydell@linaro.org> > Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug") > Cc: qemu-stable@nongnu.org > Cc: Stefan Hajnoczi <stefanha@redhat.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Peter Xu <peterx@redhat.com> > Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org> > Signed-off-by: David Hildenbrand <david@redhat.com> > --- > include/exec/ramlist.h | 1 + > system/physmem.c | 44 ++++++++++++++---------------------------- > 2 files changed, 16 insertions(+), 29 deletions(-) > > diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h > index 2ad2a81acc..f2a965f293 100644 > --- a/include/exec/ramlist.h > +++ b/include/exec/ramlist.h > @@ -41,6 +41,7 @@ typedef struct RAMBlockNotifier RAMBlockNotifier; > #define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8) > typedef struct { > struct rcu_head rcu; > + unsigned int num_blocks; The maximum amount of memory supported by unsigned int is: (2 ^ 32 - 1) * 4KB * DIRTY_MEMORY_BLOCK_SIZE = ~32 exabytes This should be fine. The maximum guest RAM sizes are in the TBs range (source: https://access.redhat.com/articles/rhel-kvm-limits). Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> > unsigned long *blocks[]; > } DirtyMemoryBlocks; > > diff --git a/system/physmem.c b/system/physmem.c > index 94600a33ec..fa48ff8333 100644 > --- a/system/physmem.c > +++ b/system/physmem.c > @@ -1534,18 +1534,6 @@ static ram_addr_t find_ram_offset(ram_addr_t size) > return offset; > } > > -static unsigned long last_ram_page(void) > -{ > - RAMBlock *block; > - ram_addr_t last = 0; > - > - RCU_READ_LOCK_GUARD(); > - RAMBLOCK_FOREACH(block) { > - last = MAX(last, block->offset + block->max_length); > - } > - return last >> TARGET_PAGE_BITS; > -} > - > static void qemu_ram_setup_dump(void *addr, ram_addr_t size) > { > int ret; > @@ -1799,28 +1787,31 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length) > } > > /* Called with ram_list.mutex held */ > -static void dirty_memory_extend(ram_addr_t old_ram_size, > - ram_addr_t new_ram_size) > +static void dirty_memory_extend(ram_addr_t new_ram_size) > { > - ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size, > - DIRTY_MEMORY_BLOCK_SIZE); > ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size, > DIRTY_MEMORY_BLOCK_SIZE); > int i; > > - /* Only need to extend if block count increased */ > - if (new_num_blocks <= old_num_blocks) { > - return; > - } > - > for (i = 0; i < DIRTY_MEMORY_NUM; i++) { > DirtyMemoryBlocks *old_blocks; > DirtyMemoryBlocks *new_blocks; > + ram_addr_t old_num_blocks = 0; > int j; > > old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]); > + if (old_blocks) { > + old_num_blocks = old_blocks->num_blocks; > + > + /* Only need to extend if block count increased */ > + if (new_num_blocks <= old_num_blocks) { > + return; > + } > + } > + > new_blocks = g_malloc(sizeof(*new_blocks) + > sizeof(new_blocks->blocks[0]) * new_num_blocks); > + new_blocks->num_blocks = new_num_blocks; > > if (old_num_blocks) { > memcpy(new_blocks->blocks, old_blocks->blocks, > @@ -1846,11 +1837,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) > RAMBlock *block; > RAMBlock *last_block = NULL; > bool free_on_error = false; > - ram_addr_t old_ram_size, new_ram_size; > + ram_addr_t ram_size; > Error *err = NULL; > > - old_ram_size = last_ram_page(); > - > qemu_mutex_lock_ramlist(); > new_block->offset = find_ram_offset(new_block->max_length); > > @@ -1901,11 +1890,8 @@ static void ram_block_add(RAMBlock *new_block, Error **errp) > } > } > > - new_ram_size = MAX(old_ram_size, > - (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS); > - if (new_ram_size > old_ram_size) { > - dirty_memory_extend(old_ram_size, new_ram_size); > - } > + ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS; > + dirty_memory_extend(ram_size); > /* Keep the list sorted from biggest to smallest block. Unlike QTAILQ, > * QLIST (which has an RCU-friendly variant) does not have insertion at > * tail, so save the last element in last_block. > -- > 2.46.0 > >
On 27.08.24 18:52, Stefan Hajnoczi wrote: > On Tue, 27 Aug 2024 at 04:38, David Hildenbrand <david@redhat.com> wrote: >> >> As reported by Peter, we might be leaking memory when removing the >> highest RAMBlock (in the weird ram_addr_t space), and adding a new one. >> >> We will fail to realize that we already allocated bitmaps for more >> dirty memory blocks, and effectively discard the pointers to them. >> >> Fix it by getting rid of last_ram_page() and simply storing the number >> of dirty memory blocks that have been allocated. We'll store the number >> of blocks along with the actual pointer to keep it simple. >> >> Looks like this leak was introduced as we switched from using a single >> bitmap_zero_extend() to allocating multiple bitmaps: >> bitmap_zero_extend() relies on g_renew() which should have taken care of >> this. >> >> Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com >> Reported-by: Peter Maydell <peter.maydell@linaro.org> >> Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug") >> Cc: qemu-stable@nongnu.org >> Cc: Stefan Hajnoczi <stefanha@redhat.com> >> Cc: Paolo Bonzini <pbonzini@redhat.com> >> Cc: Peter Xu <peterx@redhat.com> >> Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org> >> Signed-off-by: David Hildenbrand <david@redhat.com> >> --- >> include/exec/ramlist.h | 1 + >> system/physmem.c | 44 ++++++++++++++---------------------------- >> 2 files changed, 16 insertions(+), 29 deletions(-) >> >> diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h >> index 2ad2a81acc..f2a965f293 100644 >> --- a/include/exec/ramlist.h >> +++ b/include/exec/ramlist.h >> @@ -41,6 +41,7 @@ typedef struct RAMBlockNotifier RAMBlockNotifier; >> #define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8) >> typedef struct { >> struct rcu_head rcu; >> + unsigned int num_blocks; > > The maximum amount of memory supported by unsigned int is: > (2 ^ 32 - 1) * 4KB * DIRTY_MEMORY_BLOCK_SIZE > = ~32 exabytes > True, should we simply use ram_addr_t ? > This should be fine. The maximum guest RAM sizes are in the TBs range > (source: https://access.redhat.com/articles/rhel-kvm-limits). > > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Thanks -- Cheers, David / dhildenb
On Tue, 27 Aug 2024 at 13:24, David Hildenbrand <david@redhat.com> wrote: > > On 27.08.24 18:52, Stefan Hajnoczi wrote: > > On Tue, 27 Aug 2024 at 04:38, David Hildenbrand <david@redhat.com> wrote: > >> > >> As reported by Peter, we might be leaking memory when removing the > >> highest RAMBlock (in the weird ram_addr_t space), and adding a new one. > >> > >> We will fail to realize that we already allocated bitmaps for more > >> dirty memory blocks, and effectively discard the pointers to them. > >> > >> Fix it by getting rid of last_ram_page() and simply storing the number > >> of dirty memory blocks that have been allocated. We'll store the number > >> of blocks along with the actual pointer to keep it simple. > >> > >> Looks like this leak was introduced as we switched from using a single > >> bitmap_zero_extend() to allocating multiple bitmaps: > >> bitmap_zero_extend() relies on g_renew() which should have taken care of > >> this. > >> > >> Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com > >> Reported-by: Peter Maydell <peter.maydell@linaro.org> > >> Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug") > >> Cc: qemu-stable@nongnu.org > >> Cc: Stefan Hajnoczi <stefanha@redhat.com> > >> Cc: Paolo Bonzini <pbonzini@redhat.com> > >> Cc: Peter Xu <peterx@redhat.com> > >> Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org> > >> Signed-off-by: David Hildenbrand <david@redhat.com> > >> --- > >> include/exec/ramlist.h | 1 + > >> system/physmem.c | 44 ++++++++++++++---------------------------- > >> 2 files changed, 16 insertions(+), 29 deletions(-) > >> > >> diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h > >> index 2ad2a81acc..f2a965f293 100644 > >> --- a/include/exec/ramlist.h > >> +++ b/include/exec/ramlist.h > >> @@ -41,6 +41,7 @@ typedef struct RAMBlockNotifier RAMBlockNotifier; > >> #define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8) > >> typedef struct { > >> struct rcu_head rcu; > >> + unsigned int num_blocks; > > > > The maximum amount of memory supported by unsigned int is: > > (2 ^ 32 - 1) * 4KB * DIRTY_MEMORY_BLOCK_SIZE > > = ~32 exabytes > > > > True, should we simply use ram_addr_t ? Sounds good to me. In practice scalability bottlenecks are likely with those memory sizes and it will be necessary to change how guest memory is organized anyway. But it doesn't hurt to make this counter future-proof. Stefan
On Tue, Aug 27, 2024 at 01:28:02PM -0400, Stefan Hajnoczi wrote: > On Tue, 27 Aug 2024 at 13:24, David Hildenbrand <david@redhat.com> wrote: > > > > On 27.08.24 18:52, Stefan Hajnoczi wrote: > > > On Tue, 27 Aug 2024 at 04:38, David Hildenbrand <david@redhat.com> wrote: > > >> > > >> As reported by Peter, we might be leaking memory when removing the > > >> highest RAMBlock (in the weird ram_addr_t space), and adding a new one. > > >> > > >> We will fail to realize that we already allocated bitmaps for more > > >> dirty memory blocks, and effectively discard the pointers to them. > > >> > > >> Fix it by getting rid of last_ram_page() and simply storing the number > > >> of dirty memory blocks that have been allocated. We'll store the number > > >> of blocks along with the actual pointer to keep it simple. > > >> > > >> Looks like this leak was introduced as we switched from using a single > > >> bitmap_zero_extend() to allocating multiple bitmaps: > > >> bitmap_zero_extend() relies on g_renew() which should have taken care of > > >> this. > > >> > > >> Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com > > >> Reported-by: Peter Maydell <peter.maydell@linaro.org> > > >> Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug") > > >> Cc: qemu-stable@nongnu.org > > >> Cc: Stefan Hajnoczi <stefanha@redhat.com> > > >> Cc: Paolo Bonzini <pbonzini@redhat.com> > > >> Cc: Peter Xu <peterx@redhat.com> > > >> Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org> > > >> Signed-off-by: David Hildenbrand <david@redhat.com> > > >> --- > > >> include/exec/ramlist.h | 1 + > > >> system/physmem.c | 44 ++++++++++++++---------------------------- > > >> 2 files changed, 16 insertions(+), 29 deletions(-) > > >> > > >> diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h > > >> index 2ad2a81acc..f2a965f293 100644 > > >> --- a/include/exec/ramlist.h > > >> +++ b/include/exec/ramlist.h > > >> @@ -41,6 +41,7 @@ typedef struct RAMBlockNotifier RAMBlockNotifier; > > >> #define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8) > > >> typedef struct { > > >> struct rcu_head rcu; > > >> + unsigned int num_blocks; > > > > > > The maximum amount of memory supported by unsigned int is: > > > (2 ^ 32 - 1) * 4KB * DIRTY_MEMORY_BLOCK_SIZE > > > = ~32 exabytes > > > > > > > True, should we simply use ram_addr_t ? > > Sounds good to me. In practice scalability bottlenecks are likely with > those memory sizes and it will be necessary to change how guest memory > is organized anyway. But it doesn't hurt to make this counter > future-proof. IMHO it'll be nice to only use ram_addr_t when a variable is describing the ramblock address space (with an offset, or a length there). In this case it is a pure counter for how many bitmap chunks we allocated, so maybe "unsigned long" or "uint64_t" would suite more? Though I'd think "unsigned int" is good enough per the calculation Stefan provided. Reviewed-by: Peter Xu <peterx@redhat.com> Thanks, -- Peter Xu
On 27.08.24 19:50, Peter Xu wrote: > On Tue, Aug 27, 2024 at 01:28:02PM -0400, Stefan Hajnoczi wrote: >> On Tue, 27 Aug 2024 at 13:24, David Hildenbrand <david@redhat.com> wrote: >>> >>> On 27.08.24 18:52, Stefan Hajnoczi wrote: >>>> On Tue, 27 Aug 2024 at 04:38, David Hildenbrand <david@redhat.com> wrote: >>>>> >>>>> As reported by Peter, we might be leaking memory when removing the >>>>> highest RAMBlock (in the weird ram_addr_t space), and adding a new one. >>>>> >>>>> We will fail to realize that we already allocated bitmaps for more >>>>> dirty memory blocks, and effectively discard the pointers to them. >>>>> >>>>> Fix it by getting rid of last_ram_page() and simply storing the number >>>>> of dirty memory blocks that have been allocated. We'll store the number >>>>> of blocks along with the actual pointer to keep it simple. >>>>> >>>>> Looks like this leak was introduced as we switched from using a single >>>>> bitmap_zero_extend() to allocating multiple bitmaps: >>>>> bitmap_zero_extend() relies on g_renew() which should have taken care of >>>>> this. >>>>> >>>>> Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com >>>>> Reported-by: Peter Maydell <peter.maydell@linaro.org> >>>>> Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug") >>>>> Cc: qemu-stable@nongnu.org >>>>> Cc: Stefan Hajnoczi <stefanha@redhat.com> >>>>> Cc: Paolo Bonzini <pbonzini@redhat.com> >>>>> Cc: Peter Xu <peterx@redhat.com> >>>>> Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org> >>>>> Signed-off-by: David Hildenbrand <david@redhat.com> >>>>> --- >>>>> include/exec/ramlist.h | 1 + >>>>> system/physmem.c | 44 ++++++++++++++---------------------------- >>>>> 2 files changed, 16 insertions(+), 29 deletions(-) >>>>> >>>>> diff --git a/include/exec/ramlist.h b/include/exec/ramlist.h >>>>> index 2ad2a81acc..f2a965f293 100644 >>>>> --- a/include/exec/ramlist.h >>>>> +++ b/include/exec/ramlist.h >>>>> @@ -41,6 +41,7 @@ typedef struct RAMBlockNotifier RAMBlockNotifier; >>>>> #define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8) >>>>> typedef struct { >>>>> struct rcu_head rcu; >>>>> + unsigned int num_blocks; >>>> >>>> The maximum amount of memory supported by unsigned int is: >>>> (2 ^ 32 - 1) * 4KB * DIRTY_MEMORY_BLOCK_SIZE >>>> = ~32 exabytes >>>> >>> >>> True, should we simply use ram_addr_t ? >> >> Sounds good to me. In practice scalability bottlenecks are likely with >> those memory sizes and it will be necessary to change how guest memory >> is organized anyway. But it doesn't hurt to make this counter >> future-proof. > > IMHO it'll be nice to only use ram_addr_t when a variable is describing the > ramblock address space (with an offset, or a length there). In this case > it is a pure counter for how many bitmap chunks we allocated, so maybe > "unsigned long" or "uint64_t" would suite more? > > Though I'd think "unsigned int" is good enough per the calculation Stefan > provided. Likely best, "ram_addr_t" requires including "exec/cpu-common.h". So let's stick to "unsigned int" for now. Likely best to also include for consistency: diff --git a/system/physmem.c b/system/physmem.c index fa48ff8333..e1391492fd 100644 --- a/system/physmem.c +++ b/system/physmem.c @@ -1789,14 +1789,14 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length) /* Called with ram_list.mutex held */ static void dirty_memory_extend(ram_addr_t new_ram_size) { - ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size, - DIRTY_MEMORY_BLOCK_SIZE); + unsigned int new_num_blocks = DIV_ROUND_UP(new_ram_size, + DIRTY_MEMORY_BLOCK_SIZE); int i; for (i = 0; i < DIRTY_MEMORY_NUM; i++) { DirtyMemoryBlocks *old_blocks; DirtyMemoryBlocks *new_blocks; - ram_addr_t old_num_blocks = 0; + unsigned int old_num_blocks = 0; int j; old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]); > > Reviewed-by: Peter Xu <peterx@redhat.com> Thanks! -- Cheers, David / dhildenb
On Tue, 27 Aug 2024 at 09:37, David Hildenbrand <david@redhat.com> wrote: > > As reported by Peter, we might be leaking memory when removing the > highest RAMBlock (in the weird ram_addr_t space), and adding a new one. > > We will fail to realize that we already allocated bitmaps for more > dirty memory blocks, and effectively discard the pointers to them. > > Fix it by getting rid of last_ram_page() and simply storing the number > of dirty memory blocks that have been allocated. We'll store the number > of blocks along with the actual pointer to keep it simple. > > Looks like this leak was introduced as we switched from using a single > bitmap_zero_extend() to allocating multiple bitmaps: > bitmap_zero_extend() relies on g_renew() which should have taken care of > this. > > Resolves: https://lkml.kernel.org/r/CAFEAcA-k7a+VObGAfCFNygQNfCKL=AfX6A4kScq=VSSK0peqPg@mail.gmail.com > Reported-by: Peter Maydell <peter.maydell@linaro.org> > Fixes: 5b82b703b69a ("memory: RCU ram_list.dirty_memory[] for safe RAM hotplug") > Cc: qemu-stable@nongnu.org > Cc: Stefan Hajnoczi <stefanha@redhat.com> > Cc: Paolo Bonzini <pbonzini@redhat.com> > Cc: Peter Xu <peterx@redhat.com> > Cc: "Philippe Mathieu-Daudé" <philmd@linaro.org> > Signed-off-by: David Hildenbrand <david@redhat.com> I checked, and with this fix (plus various others I've sent out last week) we can run "make check" for the x86_64-softmmu target under asan/lsan without getting any leak reports. So (at least to that extent) Tested-by: Peter Maydell <peter.maydell@linaro.org> -- PMM
© 2016 - 2024 Red Hat, Inc.