call_drain_rcu() has limitations that make it unsuitable for use in
qmp_device_add(). Introduce a new coroutine version of drain_call_rcu()
with the same functionality but that does not drop the BQL. The next
patch will use it to fix qmp_device_add().
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
MAINTAINERS | 2 ++
docs/devel/rcu.txt | 21 +++++++++++++++++
include/qemu/rcu.h | 1 +
util/rcu-internal.h | 8 +++++++
util/rcu-co.c | 55 +++++++++++++++++++++++++++++++++++++++++++++
util/rcu.c | 3 ++-
util/meson.build | 2 +-
7 files changed, 90 insertions(+), 2 deletions(-)
create mode 100644 util/rcu-internal.h
create mode 100644 util/rcu-co.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 3b29568ed4..7f98253bda 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2908,6 +2908,8 @@ F: include/qemu/rcu*.h
F: tests/unit/rcutorture.c
F: tests/unit/test-rcu-*.c
F: util/rcu.c
+F: util/rcu-co.c
+F: util/rcu-internal.h
Human Monitor (HMP)
M: Dr. David Alan Gilbert <dave@treblig.org>
diff --git a/docs/devel/rcu.txt b/docs/devel/rcu.txt
index 2e6cc607a1..344764527f 100644
--- a/docs/devel/rcu.txt
+++ b/docs/devel/rcu.txt
@@ -130,6 +130,27 @@ The core RCU API is small:
g_free_rcu(&foo, rcu);
+ void coroutine_fn drain_call_rcu_co(void);
+
+ drain_call_rcu_co() yields until the reclamation phase is finished.
+ Reclaimer functions previously submitted with call_rcu1() in this
+ thread will have finished by the time drain_call_rcu_co() returns.
+
+ void drain_call_rcu(void);
+
+ drain_call_rcu() releases the Big QEMU Lock (BQL), if held, waits until
+ the reclamation phase is finished, and then re-acquires the BQL, if
+ previously held. Reclaimer functions previously submitted with
+ call_rcu1() in this thread will have finished by the time
+ drain_call_rcu() returns.
+
+ drain_call_rcu() has the following limitations:
+ 1. It deadlocks when called within an RCU read-side critical section.
+ 2. All functions on the call stack must be designed to handle dropping
+ the BQL.
+
+ Prefer drain_call_rcu_co() over drain_call_rcu().
+
typeof(*p) qatomic_rcu_read(p);
qatomic_rcu_read() is similar to qatomic_load_acquire(), but it makes
diff --git a/include/qemu/rcu.h b/include/qemu/rcu.h
index fea058aa9f..53055df1dc 100644
--- a/include/qemu/rcu.h
+++ b/include/qemu/rcu.h
@@ -141,6 +141,7 @@ struct rcu_head {
};
void call_rcu1(struct rcu_head *head, RCUCBFunc *func);
+void coroutine_fn drain_call_rcu_co(void);
void drain_call_rcu(void);
/* The operands of the minus operator must have the same type,
diff --git a/util/rcu-internal.h b/util/rcu-internal.h
new file mode 100644
index 0000000000..7d85366d54
--- /dev/null
+++ b/util/rcu-internal.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+
+#ifndef RCU_INTERNAL_H
+#define RCU_INTERNAL_H
+
+extern int in_drain_call_rcu;
+
+#endif /* RCU_INTERNAL_H */
diff --git a/util/rcu-co.c b/util/rcu-co.c
new file mode 100644
index 0000000000..920fcacb7a
--- /dev/null
+++ b/util/rcu-co.c
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
+/*
+ * RCU APIs for coroutines
+ *
+ * The RCU coroutine APIs are kept separate from the main RCU code to avoid
+ * depending on AioContext APIs in rcu.c. This is necessary because at least
+ * tests/unit/ptimer-test.c has replacement functions for AioContext APIs that
+ * conflict with the real functions.
+ *
+ * It's also nice to logically separate the core RCU code from the coroutine
+ * APIs :).
+ */
+#include "qemu/osdep.h"
+#include "block/aio.h"
+#include "qemu/atomic.h"
+#include "qemu/coroutine.h"
+#include "qemu/rcu.h"
+#include "rcu-internal.h"
+
+typedef struct {
+ struct rcu_head rcu;
+ Coroutine *co;
+} RcuDrainCo;
+
+static void drain_call_rcu_co_bh(void *opaque)
+{
+ RcuDrainCo *data = opaque;
+
+ /* Re-enter drain_call_rcu_co() where it yielded */
+ aio_co_wake(data->co);
+}
+
+static void drain_call_rcu_co_cb(struct rcu_head *node)
+{
+ RcuDrainCo *data = container_of(node, RcuDrainCo, rcu);
+ AioContext *ctx = qemu_coroutine_get_aio_context(data->co);
+
+ /*
+ * drain_call_rcu_co() might still be running in its thread, so schedule a
+ * BH in its thread. The BH only runs after the coroutine has yielded.
+ */
+ aio_bh_schedule_oneshot(ctx, drain_call_rcu_co_bh, data);
+}
+
+void coroutine_fn drain_call_rcu_co(void)
+{
+ RcuDrainCo data = {
+ .co = qemu_coroutine_self(),
+ };
+
+ qatomic_inc(&in_drain_call_rcu);
+ call_rcu1(&data.rcu, drain_call_rcu_co_cb);
+ qemu_coroutine_yield(); /* wait for drain_rcu_co_bh() */
+ qatomic_dec(&in_drain_call_rcu);
+}
diff --git a/util/rcu.c b/util/rcu.c
index e587bcc483..2519bd7d5c 100644
--- a/util/rcu.c
+++ b/util/rcu.c
@@ -32,6 +32,7 @@
#include "qemu/thread.h"
#include "qemu/main-loop.h"
#include "qemu/lockable.h"
+#include "rcu-internal.h"
#if defined(CONFIG_MALLOC_TRIM)
#include <malloc.h>
#endif
@@ -46,7 +47,7 @@
unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
QemuEvent rcu_gp_event;
-static int in_drain_call_rcu;
+int in_drain_call_rcu;
static QemuMutex rcu_registry_lock;
static QemuMutex rcu_sync_lock;
diff --git a/util/meson.build b/util/meson.build
index a375160286..849d56f756 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -43,7 +43,7 @@ util_ss.add(files('keyval.c'))
util_ss.add(files('crc32c.c'))
util_ss.add(files('uuid.c'))
util_ss.add(files('getauxval.c'))
-util_ss.add(files('rcu.c'))
+util_ss.add(files('rcu.c', 'rcu-co.c'))
if have_membarrier
util_ss.add(files('sys_membarrier.c'))
endif
--
2.41.0
Am 06.09.2023 um 21:01 hat Stefan Hajnoczi geschrieben: > call_drain_rcu() has limitations that make it unsuitable for use in > qmp_device_add(). This sounds a bit vague with only alluding to some unnamed limitations. I assume that you mean the two points you add to rcu.txt. If so, maybe it would be better to add a reference to that in the commit message. > Introduce a new coroutine version of drain_call_rcu() > with the same functionality but that does not drop the BQL. The next > patch will use it to fix qmp_device_add(). > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> I don't understand the reasoning here. How does yielding from the coroutine not effectively release the BQL, too? It's just that you won't have explicit code here, but the mainloop will do it for you while waiting for new events. Is this about not dropping the BQL specifically in nested event loops, but letting the coroutine wait until we return to the real main loop where dropping the BQL is hopefully not a problem? call_rcu_thread() still waits for the BQL to be dropped somewhere, so from the perspective of the coroutine, it will definitely be dropped during the yield. So if this was your intention, the change probably makes sense, but the description could be clearer. Took me a bit to understand what this is really doing. Kevin
On Tue, 12 Sept 2023 at 12:37, Kevin Wolf <kwolf@redhat.com> wrote: > > Am 06.09.2023 um 21:01 hat Stefan Hajnoczi geschrieben: > > call_drain_rcu() has limitations that make it unsuitable for use in > > qmp_device_add(). > > This sounds a bit vague with only alluding to some unnamed limitations. > I assume that you mean the two points you add to rcu.txt. If so, maybe > it would be better to add a reference to that in the commit message. Yes, exactly. I will add a reference to the commit message. > > > Introduce a new coroutine version of drain_call_rcu() > > with the same functionality but that does not drop the BQL. The next > > patch will use it to fix qmp_device_add(). > > > > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> > > I don't understand the reasoning here. How does yielding from the > coroutine not effectively release the BQL, too? It's just that you won't > have explicit code here, but the mainloop will do it for you while > waiting for new events. > > Is this about not dropping the BQL specifically in nested event loops, > but letting the coroutine wait until we return to the real main loop > where dropping the BQL is hopefully not a problem? Yes. Stefan
© 2016 - 2024 Red Hat, Inc.