When we remove a ramblock, we should decrease the dirty bitmap blocks
of ramlist to avoid memory leakage. This patch rebuilds dirty_memory_
extend to support both "extend" and "decrease".
Reported-by: Euler Robot <euler.robot@huawei.com>
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
---
softmmu/physmem.c | 29 +++++++++++++++++++++--------
1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
index 3027747c03..3e4f29f126 100644
--- a/softmmu/physmem.c
+++ b/softmmu/physmem.c
@@ -1816,17 +1816,19 @@ void qemu_ram_msync(RAMBlock *block, ram_addr_t start, ram_addr_t length)
}
/* Called with ram_list.mutex held */
-static void dirty_memory_extend(ram_addr_t old_ram_size,
+static void dirty_memory_resize(ram_addr_t old_ram_size,
ram_addr_t new_ram_size)
{
ram_addr_t old_num_blocks = DIV_ROUND_UP(old_ram_size,
DIRTY_MEMORY_BLOCK_SIZE);
ram_addr_t new_num_blocks = DIV_ROUND_UP(new_ram_size,
DIRTY_MEMORY_BLOCK_SIZE);
+ ram_addr_t cpy_num_blocks = MIN(old_num_blocks, new_num_blocks);
+ bool extend = new_num_blocks > old_num_blocks;
int i;
- /* Only need to extend if block count increased */
- if (new_num_blocks <= old_num_blocks) {
+ /* Only need to resize if block count changed */
+ if (new_num_blocks == old_num_blocks) {
return;
}
@@ -1839,15 +1841,26 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
new_blocks = g_malloc(sizeof(*new_blocks) +
sizeof(new_blocks->blocks[0]) * new_num_blocks);
- if (old_num_blocks) {
+ if (cpy_num_blocks) {
memcpy(new_blocks->blocks, old_blocks->blocks,
- old_num_blocks * sizeof(old_blocks->blocks[0]));
+ cpy_num_blocks * sizeof(old_blocks->blocks[0]));
}
- for (j = old_num_blocks; j < new_num_blocks; j++) {
- new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
+ if (extend) {
+ for (j = cpy_num_blocks; j < new_num_blocks; j++) {
+ new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
+ }
+ } else {
+ for (j = cpy_num_blocks; j < old_num_blocks; j++) {
+ /* We are safe to free it, for that it is out-of-use */
+ g_free(old_blocks->blocks[j]);
+ }
}
+ if (!new_num_blocks) {
+ g_free(new_blocks);
+ new_blocks = NULL;
+ }
qatomic_rcu_set(&ram_list.dirty_memory[i], new_blocks);
if (old_blocks) {
@@ -1894,7 +1907,7 @@ static void ram_block_add(RAMBlock *new_block, Error **errp, bool shared)
new_ram_size = MAX(old_ram_size,
(new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS);
if (new_ram_size > old_ram_size) {
- dirty_memory_extend(old_ram_size, new_ram_size);
+ dirty_memory_resize(old_ram_size, new_ram_size);
}
/* Keep the list sorted from biggest to smallest block. Unlike QTAILQ,
* QLIST (which has an RCU-friendly variant) does not have insertion at
--
2.23.0
On Mon, Nov 30, 2020 at 09:11:03PM +0800, Keqian Zhu wrote:
> @@ -1839,15 +1841,26 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
> new_blocks = g_malloc(sizeof(*new_blocks) +
> sizeof(new_blocks->blocks[0]) * new_num_blocks);
>
> - if (old_num_blocks) {
> + if (cpy_num_blocks) {
> memcpy(new_blocks->blocks, old_blocks->blocks,
> - old_num_blocks * sizeof(old_blocks->blocks[0]));
> + cpy_num_blocks * sizeof(old_blocks->blocks[0]));
> }
>
> - for (j = old_num_blocks; j < new_num_blocks; j++) {
> - new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
> + if (extend) {
> + for (j = cpy_num_blocks; j < new_num_blocks; j++) {
> + new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
> + }
> + } else {
> + for (j = cpy_num_blocks; j < old_num_blocks; j++) {
> + /* We are safe to free it, for that it is out-of-use */
> + g_free(old_blocks->blocks[j]);
This looks unsafe because this code uses Read Copy Update (RCU):
old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
Other threads may still be accessing old_blocks so we cannot modify it
immediately. Changes need to be deferred until the next RCU period.
g_free_rcu() needs to be used to do this.
On 2020/12/17 18:05, Stefan Hajnoczi wrote:
> On Mon, Nov 30, 2020 at 09:11:03PM +0800, Keqian Zhu wrote:
>> @@ -1839,15 +1841,26 @@ static void dirty_memory_extend(ram_addr_t old_ram_size,
>> new_blocks = g_malloc(sizeof(*new_blocks) +
>> sizeof(new_blocks->blocks[0]) * new_num_blocks);
>>
>> - if (old_num_blocks) {
>> + if (cpy_num_blocks) {
>> memcpy(new_blocks->blocks, old_blocks->blocks,
>> - old_num_blocks * sizeof(old_blocks->blocks[0]));
>> + cpy_num_blocks * sizeof(old_blocks->blocks[0]));
>> }
>>
>> - for (j = old_num_blocks; j < new_num_blocks; j++) {
>> - new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
>> + if (extend) {
>> + for (j = cpy_num_blocks; j < new_num_blocks; j++) {
>> + new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
>> + }
>> + } else {
>> + for (j = cpy_num_blocks; j < old_num_blocks; j++) {
>> + /* We are safe to free it, for that it is out-of-use */
>> + g_free(old_blocks->blocks[j]);
>
> This looks unsafe because this code uses Read Copy Update (RCU):
>
> old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
>
> Other threads may still be accessing old_blocks so we cannot modify it
> immediately. Changes need to be deferred until the next RCU period.
> g_free_rcu() needs to be used to do this.
>
Hi Stefan,
You are right. I was thinking about the VM life cycle before. We shrink the dirty_memory
when we are removing unused ramblock. However we can not rely on this.
I also notice that "Organization into blocks allows dirty memory to grow (but not shrink)
under RCU". Why "but not shrink"? Any thoughts?
[...]
* Organization into blocks allows dirty memory to grow (but not shrink) under
* RCU. When adding new RAMBlocks requires the dirty memory to grow, a new
* DirtyMemoryBlocks array is allocated with pointers to existing blocks kept
* the same. Other threads can safely access existing blocks while dirty
* memory is being grown. When no threads are using the old DirtyMemoryBlocks
* anymore it is freed by RCU (but the underlying blocks stay because they are
* pointed to from the new DirtyMemoryBlocks).
*/
#define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8)
typedef struct {
struct rcu_head rcu;
unsigned long *blocks[];
} DirtyMemoryBlocks;
[...]
Thanks,
Keqian
[...]
>>> - for (j = old_num_blocks; j < new_num_blocks; j++) {
>>> - new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
>>> + if (extend) {
>>> + for (j = cpy_num_blocks; j < new_num_blocks; j++) {
>>> + new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
>>> + }
>>> + } else {
>>> + for (j = cpy_num_blocks; j < old_num_blocks; j++) {
>>> + /* We are safe to free it, for that it is out-of-use */
>>> + g_free(old_blocks->blocks[j]);
>>
>> This looks unsafe because this code uses Read Copy Update (RCU):
>>
>> old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
>>
>> Other threads may still be accessing old_blocks so we cannot modify it
>> immediately. Changes need to be deferred until the next RCU period.
>> g_free_rcu() needs to be used to do this.
>>
> Hi Stefan,
>
> You are right. I was thinking about the VM life cycle before. We shrink the dirty_memory
> when we are removing unused ramblock. However we can not rely on this.
>
> I also notice that "Organization into blocks allows dirty memory to grow (but not shrink)
> under RCU". Why "but not shrink"? Any thoughts?
Hi,
After my analysis, it's both unsafe to grow or shrink under RCU.
ram_list.blocks and ram_list.dirty_memory[X] are closely related and
both protected by RCU. For the lockless RCU readers, we can't promise they
always see consistent version of the two structures.
For grow, a reader may see un-growed @dirty_memory and growed @blocks, causing out-of-bound access.
For shrink, a reader may see shrinked @dirty_memory and un-shrinked @blocks, causing out-of-bound access too.
I think it's a design problem, RCU can just protect one structure, not two.
Thanks,
Keqian.
>
> [...]
> * Organization into blocks allows dirty memory to grow (but not shrink) under
> * RCU. When adding new RAMBlocks requires the dirty memory to grow, a new
> * DirtyMemoryBlocks array is allocated with pointers to existing blocks kept
> * the same. Other threads can safely access existing blocks while dirty
> * memory is being grown. When no threads are using the old DirtyMemoryBlocks
> * anymore it is freed by RCU (but the underlying blocks stay because they are
> * pointed to from the new DirtyMemoryBlocks).
> */
> #define DIRTY_MEMORY_BLOCK_SIZE ((ram_addr_t)256 * 1024 * 8)
> typedef struct {
> struct rcu_head rcu;
> unsigned long *blocks[];
> } DirtyMemoryBlocks;
> [...]
>
> Thanks,
> Keqian
>
>
> .
>
On Sat, Dec 26, 2020 at 03:11:53PM +0800, Keqian Zhu wrote:
>
> [...]
>
> >>> - for (j = old_num_blocks; j < new_num_blocks; j++) {
> >>> - new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
> >>> + if (extend) {
> >>> + for (j = cpy_num_blocks; j < new_num_blocks; j++) {
> >>> + new_blocks->blocks[j] = bitmap_new(DIRTY_MEMORY_BLOCK_SIZE);
> >>> + }
> >>> + } else {
> >>> + for (j = cpy_num_blocks; j < old_num_blocks; j++) {
> >>> + /* We are safe to free it, for that it is out-of-use */
> >>> + g_free(old_blocks->blocks[j]);
> >>
> >> This looks unsafe because this code uses Read Copy Update (RCU):
> >>
> >> old_blocks = qatomic_rcu_read(&ram_list.dirty_memory[i]);
> >>
> >> Other threads may still be accessing old_blocks so we cannot modify it
> >> immediately. Changes need to be deferred until the next RCU period.
> >> g_free_rcu() needs to be used to do this.
> >>
> > Hi Stefan,
> >
> > You are right. I was thinking about the VM life cycle before. We shrink the dirty_memory
> > when we are removing unused ramblock. However we can not rely on this.
> >
> > I also notice that "Organization into blocks allows dirty memory to grow (but not shrink)
> > under RCU". Why "but not shrink"? Any thoughts?
> Hi,
>
> After my analysis, it's both unsafe to grow or shrink under RCU.
>
> ram_list.blocks and ram_list.dirty_memory[X] are closely related and
> both protected by RCU. For the lockless RCU readers, we can't promise they
> always see consistent version of the two structures.
>
> For grow, a reader may see un-growed @dirty_memory and growed @blocks, causing out-of-bound access.
Growth is safe because other threads only access pre-existing ranges
(below the old maximum size). They will only start accessing the newly
added ranges after resize.
Did you find a code path where this constraint is violated?
Stefan
© 2016 - 2026 Red Hat, Inc.