[PATCH v2 1/4] scsi: only access SCSIDevice->requests from one thread

Stefan Hajnoczi posted 4 patches 11 months, 4 weeks ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, Fam Zheng <fam@euphon.net>, "Michael S. Tsirkin" <mst@redhat.com>, Peter Xu <peterx@redhat.com>, David Hildenbrand <david@redhat.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>
[PATCH v2 1/4] scsi: only access SCSIDevice->requests from one thread
Posted by Stefan Hajnoczi 11 months, 4 weeks ago
Stop depending on the AioContext lock and instead access
SCSIDevice->requests from only one thread at a time:
- When the VM is running only the BlockBackend's AioContext may access
  the requests list.
- When the VM is stopped only the main loop may access the requests
  list.

These constraints protect the requests list without the need for locking
in the I/O code path.

Note that multiple IOThreads are not supported yet because the code
assumes all SCSIRequests are executed from a single AioContext. Leave
that as future work.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 include/hw/scsi/scsi.h |   7 +-
 hw/scsi/scsi-bus.c     | 180 ++++++++++++++++++++++++++++-------------
 2 files changed, 130 insertions(+), 57 deletions(-)

diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
index 3692ca82f3..10c4e8288d 100644
--- a/include/hw/scsi/scsi.h
+++ b/include/hw/scsi/scsi.h
@@ -69,14 +69,19 @@ struct SCSIDevice
 {
     DeviceState qdev;
     VMChangeStateEntry *vmsentry;
-    QEMUBH *bh;
     uint32_t id;
     BlockConf conf;
     SCSISense unit_attention;
     bool sense_is_ua;
     uint8_t sense[SCSI_SENSE_BUF_SIZE];
     uint32_t sense_len;
+
+    /*
+     * The requests list is only accessed from the AioContext that executes
+     * requests or from the main loop when IOThread processing is stopped.
+     */
     QTAILQ_HEAD(, SCSIRequest) requests;
+
     uint32_t channel;
     uint32_t lun;
     int blocksize;
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
index fc4b77fdb0..f3ec11f892 100644
--- a/hw/scsi/scsi-bus.c
+++ b/hw/scsi/scsi-bus.c
@@ -85,6 +85,88 @@ SCSIDevice *scsi_device_get(SCSIBus *bus, int channel, int id, int lun)
     return d;
 }
 
+/*
+ * Invoke @fn() for each enqueued request in device @s. Must be called from the
+ * main loop thread while the guest is stopped. This is only suitable for
+ * vmstate ->put(), use scsi_device_for_each_req_async() for other cases.
+ */
+static void scsi_device_for_each_req_sync(SCSIDevice *s,
+                                          void (*fn)(SCSIRequest *, void *),
+                                          void *opaque)
+{
+    SCSIRequest *req;
+    SCSIRequest *next_req;
+
+    assert(!runstate_is_running());
+    assert(qemu_in_main_thread());
+
+    QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
+        fn(req, opaque);
+    }
+}
+
+typedef struct {
+    SCSIDevice *s;
+    void (*fn)(SCSIRequest *, void *);
+    void *fn_opaque;
+} SCSIDeviceForEachReqAsyncData;
+
+static void scsi_device_for_each_req_async_bh(void *opaque)
+{
+    g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
+    SCSIDevice *s = data->s;
+    AioContext *ctx;
+    SCSIRequest *req;
+    SCSIRequest *next;
+
+    /*
+     * If the AioContext changed before this BH was called then reschedule into
+     * the new AioContext before accessing ->requests. This can happen when
+     * scsi_device_for_each_req_async() is called and then the AioContext is
+     * changed before BHs are run.
+     */
+    ctx = blk_get_aio_context(s->conf.blk);
+    if (ctx != qemu_get_current_aio_context()) {
+        aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, data);
+        return;
+    }
+
+    QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
+        data->fn(req, data->fn_opaque);
+    }
+
+    /* Drop the reference taken by scsi_device_for_each_req_async() */
+    object_unref(OBJECT(s));
+}
+
+/*
+ * Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
+ * runs in the AioContext that is executing the request.
+ */
+static void scsi_device_for_each_req_async(SCSIDevice *s,
+                                           void (*fn)(SCSIRequest *, void *),
+                                           void *opaque)
+{
+    assert(qemu_in_main_thread());
+
+    SCSIDeviceForEachReqAsyncData *data =
+        g_new(SCSIDeviceForEachReqAsyncData, 1);
+
+    data->s = s;
+    data->fn = fn;
+    data->fn_opaque = opaque;
+
+    /*
+     * Hold a reference to the SCSIDevice until
+     * scsi_device_for_each_req_async_bh() finishes.
+     */
+    object_ref(OBJECT(s));
+
+    aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
+                            scsi_device_for_each_req_async_bh,
+                            data);
+}
+
 static void scsi_device_realize(SCSIDevice *s, Error **errp)
 {
     SCSIDeviceClass *sc = SCSI_DEVICE_GET_CLASS(s);
@@ -144,20 +226,18 @@ void scsi_bus_init_named(SCSIBus *bus, size_t bus_size, DeviceState *host,
     qbus_set_bus_hotplug_handler(BUS(bus));
 }
 
-static void scsi_dma_restart_bh(void *opaque)
+void scsi_req_retry(SCSIRequest *req)
 {
-    SCSIDevice *s = opaque;
-    SCSIRequest *req, *next;
+    req->retry = true;
+}
 
-    qemu_bh_delete(s->bh);
-    s->bh = NULL;
-
-    aio_context_acquire(blk_get_aio_context(s->conf.blk));
-    QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
-        scsi_req_ref(req);
-        if (req->retry) {
-            req->retry = false;
-            switch (req->cmd.mode) {
+/* Called in the AioContext that is executing the request */
+static void scsi_dma_restart_req(SCSIRequest *req, void *opaque)
+{
+    scsi_req_ref(req);
+    if (req->retry) {
+        req->retry = false;
+        switch (req->cmd.mode) {
             case SCSI_XFER_FROM_DEV:
             case SCSI_XFER_TO_DEV:
                 scsi_req_continue(req);
@@ -166,37 +246,22 @@ static void scsi_dma_restart_bh(void *opaque)
                 scsi_req_dequeue(req);
                 scsi_req_enqueue(req);
                 break;
-            }
         }
-        scsi_req_unref(req);
     }
-    aio_context_release(blk_get_aio_context(s->conf.blk));
-    /* Drop the reference that was acquired in scsi_dma_restart_cb */
-    object_unref(OBJECT(s));
-}
-
-void scsi_req_retry(SCSIRequest *req)
-{
-    /* No need to save a reference, because scsi_dma_restart_bh just
-     * looks at the request list.  */
-    req->retry = true;
+    scsi_req_unref(req);
 }
 
 static void scsi_dma_restart_cb(void *opaque, bool running, RunState state)
 {
     SCSIDevice *s = opaque;
 
+    assert(qemu_in_main_thread());
+
     if (!running) {
         return;
     }
-    if (!s->bh) {
-        AioContext *ctx = blk_get_aio_context(s->conf.blk);
-        /* The reference is dropped in scsi_dma_restart_bh.*/
-        object_ref(OBJECT(s));
-        s->bh = aio_bh_new_guarded(ctx, scsi_dma_restart_bh, s,
-                                   &DEVICE(s)->mem_reentrancy_guard);
-        qemu_bh_schedule(s->bh);
-    }
+
+    scsi_device_for_each_req_async(s, scsi_dma_restart_req, NULL);
 }
 
 static bool scsi_bus_is_address_free(SCSIBus *bus,
@@ -1657,15 +1722,16 @@ void scsi_device_set_ua(SCSIDevice *sdev, SCSISense sense)
     }
 }
 
+static void scsi_device_purge_one_req(SCSIRequest *req, void *opaque)
+{
+    scsi_req_cancel_async(req, NULL);
+}
+
 void scsi_device_purge_requests(SCSIDevice *sdev, SCSISense sense)
 {
-    SCSIRequest *req;
+    scsi_device_for_each_req_async(sdev, scsi_device_purge_one_req, NULL);
 
     aio_context_acquire(blk_get_aio_context(sdev->conf.blk));
-    while (!QTAILQ_EMPTY(&sdev->requests)) {
-        req = QTAILQ_FIRST(&sdev->requests);
-        scsi_req_cancel_async(req, NULL);
-    }
     blk_drain(sdev->conf.blk);
     aio_context_release(blk_get_aio_context(sdev->conf.blk));
     scsi_device_set_ua(sdev, sense);
@@ -1737,31 +1803,33 @@ static char *scsibus_get_fw_dev_path(DeviceState *dev)
 
 /* SCSI request list.  For simplicity, pv points to the whole device */
 
+static void put_scsi_req(SCSIRequest *req, void *opaque)
+{
+    QEMUFile *f = opaque;
+
+    assert(!req->io_canceled);
+    assert(req->status == -1 && req->host_status == -1);
+    assert(req->enqueued);
+
+    qemu_put_sbyte(f, req->retry ? 1 : 2);
+    qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf));
+    qemu_put_be32s(f, &req->tag);
+    qemu_put_be32s(f, &req->lun);
+    if (req->bus->info->save_request) {
+        req->bus->info->save_request(f, req);
+    }
+    if (req->ops->save_request) {
+        req->ops->save_request(f, req);
+    }
+}
+
 static int put_scsi_requests(QEMUFile *f, void *pv, size_t size,
                              const VMStateField *field, JSONWriter *vmdesc)
 {
     SCSIDevice *s = pv;
-    SCSIBus *bus = DO_UPCAST(SCSIBus, qbus, s->qdev.parent_bus);
-    SCSIRequest *req;
 
-    QTAILQ_FOREACH(req, &s->requests, next) {
-        assert(!req->io_canceled);
-        assert(req->status == -1 && req->host_status == -1);
-        assert(req->enqueued);
-
-        qemu_put_sbyte(f, req->retry ? 1 : 2);
-        qemu_put_buffer(f, req->cmd.buf, sizeof(req->cmd.buf));
-        qemu_put_be32s(f, &req->tag);
-        qemu_put_be32s(f, &req->lun);
-        if (bus->info->save_request) {
-            bus->info->save_request(f, req);
-        }
-        if (req->ops->save_request) {
-            req->ops->save_request(f, req);
-        }
-    }
+    scsi_device_for_each_req_sync(s, put_scsi_req, f);
     qemu_put_sbyte(f, 0);
-
     return 0;
 }
 
-- 
2.43.0
Re: [PATCH v2 1/4] scsi: only access SCSIDevice->requests from one thread
Posted by Kevin Wolf 11 months, 1 week ago
Am 04.12.2023 um 17:42 hat Stefan Hajnoczi geschrieben:
> Stop depending on the AioContext lock and instead access
> SCSIDevice->requests from only one thread at a time:
> - When the VM is running only the BlockBackend's AioContext may access
>   the requests list.
> - When the VM is stopped only the main loop may access the requests
>   list.
> 
> These constraints protect the requests list without the need for locking
> in the I/O code path.
> 
> Note that multiple IOThreads are not supported yet because the code
> assumes all SCSIRequests are executed from a single AioContext. Leave
> that as future work.
> 
> Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> Reviewed-by: Eric Blake <eblake@redhat.com>

This makes qemu-iotests 238 240 245 307 fail for me (tested with qcow2).

The crashes are segfaults and look like below. Maybe the device has gone
away before the BH was executed? Though in theory we still hold a
reference to the object.

Kevin


(gdb) bt
#0  scsi_device_for_each_req_async_bh (opaque=0x558b4a2f6e90) at ../hw/scsi/scsi-bus.c:128
#1  0x0000558b47e1c8e6 in aio_bh_poll (ctx=ctx@entry=0x558b4a518ef0) at ../util/async.c:216
#2  0x0000558b47e0764a in aio_poll (ctx=0x558b4a518ef0, blocking=blocking@entry=true) at ../util/aio-posix.c:722
#3  0x0000558b47cb1cd6 in iothread_run (opaque=opaque@entry=0x558b49822a60) at ../iothread.c:63
#4  0x0000558b47e0a6e8 in qemu_thread_start (args=0x558b4a58d5b0) at ../util/qemu-thread-posix.c:541
#5  0x00007f992f0ae947 in start_thread () at /lib64/libc.so.6
#6  0x00007f992f134860 in clone3 () at /lib64/libc.so.6
(gdb) l
123          * If the AioContext changed before this BH was called then reschedule into
124          * the new AioContext before accessing ->requests. This can happen when
125          * scsi_device_for_each_req_async() is called and then the AioContext is
126          * changed before BHs are run.
127          */
128         ctx = blk_get_aio_context(s->conf.blk);
129         if (ctx != qemu_get_current_aio_context()) {
130             aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, data);
131             return;
132         }
(gdb) p s
$1 = (SCSIDevice *) 0x558b4a2f6
(gdb) p *s
Cannot access memory at address 0x558b4a2f6
Re: [PATCH v2 1/4] scsi: only access SCSIDevice->requests from one thread
Posted by Stefan Hajnoczi 11 months, 1 week ago
On Tue, 19 Dec 2023 at 10:12, Kevin Wolf <kwolf@redhat.com> wrote:
>
> Am 04.12.2023 um 17:42 hat Stefan Hajnoczi geschrieben:
> > Stop depending on the AioContext lock and instead access
> > SCSIDevice->requests from only one thread at a time:
> > - When the VM is running only the BlockBackend's AioContext may access
> >   the requests list.
> > - When the VM is stopped only the main loop may access the requests
> >   list.
> >
> > These constraints protect the requests list without the need for locking
> > in the I/O code path.
> >
> > Note that multiple IOThreads are not supported yet because the code
> > assumes all SCSIRequests are executed from a single AioContext. Leave
> > that as future work.
> >
> > Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
> > Reviewed-by: Eric Blake <eblake@redhat.com>
>
> This makes qemu-iotests 238 240 245 307 fail for me (tested with qcow2).
>
> The crashes are segfaults and look like below. Maybe the device has gone
> away before the BH was executed? Though in theory we still hold a
> reference to the object.
>
> Kevin
>
>
> (gdb) bt
> #0  scsi_device_for_each_req_async_bh (opaque=0x558b4a2f6e90) at ../hw/scsi/scsi-bus.c:128
> #1  0x0000558b47e1c8e6 in aio_bh_poll (ctx=ctx@entry=0x558b4a518ef0) at ../util/async.c:216
> #2  0x0000558b47e0764a in aio_poll (ctx=0x558b4a518ef0, blocking=blocking@entry=true) at ../util/aio-posix.c:722
> #3  0x0000558b47cb1cd6 in iothread_run (opaque=opaque@entry=0x558b49822a60) at ../iothread.c:63
> #4  0x0000558b47e0a6e8 in qemu_thread_start (args=0x558b4a58d5b0) at ../util/qemu-thread-posix.c:541
> #5  0x00007f992f0ae947 in start_thread () at /lib64/libc.so.6
> #6  0x00007f992f134860 in clone3 () at /lib64/libc.so.6
> (gdb) l
> 123          * If the AioContext changed before this BH was called then reschedule into
> 124          * the new AioContext before accessing ->requests. This can happen when
> 125          * scsi_device_for_each_req_async() is called and then the AioContext is
> 126          * changed before BHs are run.
> 127          */
> 128         ctx = blk_get_aio_context(s->conf.blk);
> 129         if (ctx != qemu_get_current_aio_context()) {
> 130             aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, data);

I forgot that data is g_autofree. This crash can be fixed by:

aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh,
g_steal_pointer(&data));

> 131             return;
> 132         }
> (gdb) p s
> $1 = (SCSIDevice *) 0x558b4a2f6
> (gdb) p *s
> Cannot access memory at address 0x558b4a2f6
>
>