system/physmem.c | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-)
The race condition was not reported in any real bug, but I found it while
reviewing some relevant chnages from Akihiko [1]. It's not clear if
there's any way to reproduce it, so far it's only theoretical. Hence
there's also no Fixes tag attached. There's also no need to copy stable
until we have a solid reproducer.
Currently, mru_block might still points to ramblocks that are removed if
below race condition happens:
Reader A Writer
-------- ------
rcu_read_lock()
walk list, find block X
QLIST_REMOVE_RCU(X)
qatomic_set(&mru_block, NULL)
call_rcu(X, reclaim_ramblock)
qatomic_set(&mru_block, X) <----------- overwrites NULL
rcu_read_unlock()
grace period ends, X freed
Reader C
--------
rcu_read_lock()
qatomic_rcu_read(&mru_block) -> X
Read X's block->offset ... <----------- UAF
To fix it, we can introduce a nested RCU free for reset of the mru_block
field, and only free the ramblock until the 2nd RCU call.
Since QEMU always: (1) dequeue the RCU node before invoking the
function, (2) reset all fields in rcu_head in the call_rcu1() call, nested
RCU will work all fine like what's used in this patch.
[1] https://lore.kernel.org/r/5fd9e540-cf13-45f5-ba9a-c7faf364035b@rsg.ci.i.u-tokyo.ac.jp
Reviewed-by: Akihiko Odaki <odaki@rsg.ci.i.u-tokyo.ac.jp>
Signed-off-by: Peter Xu <peterx@redhat.com>
---
Based-on: <5fd9e540-cf13-45f5-ba9a-c7faf364035b@rsg.ci.i.u-tokyo.ac.jp>
v2:
- Refine comments, remove the 1st reset of mru_block [Akihiki]
---
system/physmem.c | 46 ++++++++++++++++++++++++++++++----------------
1 file changed, 30 insertions(+), 16 deletions(-)
diff --git a/system/physmem.c b/system/physmem.c
index 9e1ac13e82..c9e5696d6a 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -836,21 +836,14 @@ static RAMBlock *qemu_get_ram_block(ram_addr_t addr)
abort();
found:
- /* It is safe to write mru_block outside the BQL. This
- * is what happens:
- *
- * qatomic_set(&mru_block, xxx)
- * rcu_read_unlock()
- * xxx removed from list
- * rcu_read_lock()
- * read mru_block
- * qatomic_set(&mru_block, NULL);
- * call_rcu(reclaim_ramblock, xxx);
- * rcu_read_unlock()
+ /*
+ * It is safe to write mru_block outside the BQL, the writer (e.g. when
+ * QEMU frees a ramblock) is designed to be thread-safe with readers
+ * updating this field concurrently. See reclaim_ramblock_prepare().
*
- * qatomic_rcu_set is not needed here. The block was already published
- * when it was placed into the list. Here we're just making an extra
- * copy of the pointer.
+ * qatomic_rcu_set() is not needed here, because the block was already
+ * published when it was placed into the list. Here we're just making
+ * an extra copy of the pointer.
*/
qatomic_set(&ram_list.mru_block, block);
return block;
@@ -2590,6 +2583,23 @@ static void reclaim_ramblock(RAMBlock *block)
g_free(block);
}
+static void reclaim_ramblock_prepare(RAMBlock *block)
+{
+ /*
+ * After one round of grace period, no more reader can see this
+ * ramblock via ram_list. Reset this field making sure it will never
+ * point to the ramblock being freed.
+ */
+ qatomic_set(&ram_list.mru_block, NULL);
+ /*
+ * Wait for a second round of grace period to make sure whoever
+ * accessed the ramblock previously via mru_block has finished using
+ * it. Note: this is an intended nested use of rcu_head. If needed,
+ * we can provide two rcu_heads for ramblock.
+ */
+ call_rcu(block, reclaim_ramblock, rcu);
+}
+
void qemu_ram_free(RAMBlock *block)
{
g_autofree char *name = NULL;
@@ -2607,10 +2617,14 @@ void qemu_ram_free(RAMBlock *block)
name = cpr_name(block->mr);
cpr_delete_fd(name, 0);
QLIST_REMOVE_RCU(block, next);
- qatomic_set(&ram_list.mru_block, NULL);
/* Write list before version */
qatomic_store_release(&ram_list.version, ram_list.version + 1);
- call_rcu(block, reclaim_ramblock, rcu);
+ /*
+ * Wait for a grace period to make sure no reader can see this ramblock
+ * via ram_list anymore. Note that readers can still see and access
+ * the ramblock via mru_block, so we can't free it yet.
+ */
+ call_rcu(block, reclaim_ramblock_prepare, rcu);
qemu_mutex_unlock_ramlist();
}
--
2.53.0
© 2016 - 2026 Red Hat, Inc.