1
The following changes since commit 79fc2fb685f35a5e71e23629760ef4025d6aba31:
1
The following changes since commit eaefea537b476cb853e2edbdc68e969ec777e4bb:
2
2
3
Merge tag 'trivial-branch-for-7.2-pull-request' of https://gitlab.com/laurent_vivier/qemu into staging (2022-10-25 11:37:17 -0400)
3
Merge remote-tracking branch 'remotes/mjt/tags/trivial-patches-fetch' into staging (2017-12-18 14:17:42 +0000)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
7
git://github.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to baf422684d73c7bf38e2c18815e18d44fcf395b6:
9
for you to fetch changes up to 7a9dda0d7f9831c2432620dcfefdadbb7ae888dc:
10
10
11
virtio-blk: use BDRV_REQ_REGISTERED_BUF optimization hint (2022-10-26 14:56:42 -0400)
11
qemu-iotests: add 203 savevm with IOThreads test (2017-12-19 10:25:09 +0000)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Pull request
15
15
16
v2:
17
* Fixed incorrect virtio_blk_data_plane_create() local_err refactoring in
18
"hw/block: Use errp directly rather than local_err" that broke virtio-blk
19
over virtio-mmio [Peter]
20
16
----------------------------------------------------------------
21
----------------------------------------------------------------
17
22
18
Stefan Hajnoczi (13):
23
Mao Zhongyi (4):
19
coroutine: add flag to re-queue at front of CoQueue
24
hw/block/nvme: Convert to realize
20
blkio: add libblkio block driver
25
hw/block: Fix the return type
21
numa: call ->ram_block_removed() in ram_block_notifer_remove()
26
hw/block: Use errp directly rather than local_err
22
block: pass size to bdrv_unregister_buf()
27
dev-storage: Fix the unusual function name
23
block: use BdrvRequestFlags type for supported flag fields
24
block: add BDRV_REQ_REGISTERED_BUF request flag
25
block: return errors from bdrv_register_buf()
26
numa: use QLIST_FOREACH_SAFE() for RAM block notifiers
27
block: add BlockRAMRegistrar
28
exec/cpu-common: add qemu_ram_get_fd()
29
stubs: add qemu_ram_block_from_host() and qemu_ram_get_fd()
30
blkio: implement BDRV_REQ_REGISTERED_BUF optimization
31
virtio-blk: use BDRV_REQ_REGISTERED_BUF optimization hint
32
28
33
MAINTAINERS | 7 +
29
Mark Kanda (2):
34
meson_options.txt | 2 +
30
virtio-blk: make queue size configurable
35
qapi/block-core.json | 77 +-
31
virtio-blk: reject configs with logical block size > physical block
36
meson.build | 9 +
32
size
37
include/block/block-common.h | 9 +
33
38
include/block/block-global-state.h | 10 +-
34
Paolo Bonzini (1):
39
include/block/block_int-common.h | 15 +-
35
block: avoid recursive AioContext acquire in bdrv_inactivate_all()
40
include/exec/cpu-common.h | 1 +
36
41
include/hw/virtio/virtio-blk.h | 2 +
37
Stefan Hajnoczi (16):
42
include/qemu/coroutine.h | 15 +-
38
coroutine: simplify co_aio_sleep_ns() prototype
43
include/sysemu/block-backend-global-state.h | 4 +-
39
qdev: drop unused #include "sysemu/iothread.h"
44
include/sysemu/block-ram-registrar.h | 37 +
40
blockdev: hold AioContext for bdrv_unref() in
45
block.c | 14 +
41
external_snapshot_clean()
46
block/blkio.c | 1008 +++++++++++++++++++
42
block: don't keep AioContext acquired after
47
block/blkverify.c | 4 +-
43
external_snapshot_prepare()
48
block/block-backend.c | 8 +-
44
block: don't keep AioContext acquired after drive_backup_prepare()
49
block/block-ram-registrar.c | 58 ++
45
block: don't keep AioContext acquired after blockdev_backup_prepare()
50
block/crypto.c | 4 +-
46
block: don't keep AioContext acquired after
51
block/file-posix.c | 1 -
47
internal_snapshot_prepare()
52
block/gluster.c | 1 -
48
block: drop unused BlockDirtyBitmapState->aio_context field
53
block/io.c | 101 +-
49
iothread: add iothread_by_id() API
54
block/mirror.c | 2 +
50
blockdev: add x-blockdev-set-iothread testing command
55
block/nbd.c | 1 -
51
qemu-iotests: add 202 external snapshots IOThread test
56
block/nvme.c | 20 +-
52
docs: mark nested AioContext locking as a legacy API
57
block/parallels.c | 1 -
53
blockdev: add x-blockdev-set-iothread force boolean
58
block/qcow.c | 2 -
54
iotests: add VM.add_object()
59
block/qed.c | 1 -
55
iothread: fix iothread_stop() race condition
60
block/raw-format.c | 2 +
56
qemu-iotests: add 203 savevm with IOThreads test
61
block/replication.c | 1 -
57
62
block/ssh.c | 1 -
58
docs/devel/multiple-iothreads.txt | 7 +-
63
block/vhdx.c | 1 -
59
qapi/block-core.json | 40 ++++++
64
hw/block/virtio-blk.c | 39 +-
60
hw/block/dataplane/virtio-blk.h | 2 +-
65
hw/core/numa.c | 26 +-
61
include/hw/block/block.h | 4 +-
66
qemu-img.c | 6 +-
62
include/hw/virtio/virtio-blk.h | 1 +
67
softmmu/physmem.c | 5 +
63
include/qemu/coroutine.h | 6 +-
68
stubs/physmem.c | 13 +
64
include/sysemu/iothread.h | 4 +-
69
tests/qtest/modules-test.c | 3 +
65
block.c | 14 ++-
70
util/qemu-coroutine-lock.c | 9 +-
66
block/null.c | 3 +-
71
util/vfio-helpers.c | 5 +-
67
block/sheepdog.c | 3 +-
72
block/meson.build | 2 +
68
blockdev.c | 259 +++++++++++++++++++++++++++-----------
73
scripts/meson-buildoptions.sh | 3 +
69
hw/block/block.c | 15 ++-
74
stubs/meson.build | 1 +
70
hw/block/dataplane/virtio-blk.c | 12 +-
75
42 files changed, 1435 insertions(+), 96 deletions(-)
71
hw/block/fdc.c | 17 +--
76
create mode 100644 include/sysemu/block-ram-registrar.h
72
hw/block/nvme.c | 23 ++--
77
create mode 100644 block/blkio.c
73
hw/block/virtio-blk.c | 30 +++--
78
create mode 100644 block/block-ram-registrar.c
74
hw/core/qdev-properties-system.c | 1 -
79
create mode 100644 stubs/physmem.c
75
hw/ide/qdev.c | 12 +-
76
hw/scsi/scsi-disk.c | 13 +-
77
hw/usb/dev-storage.c | 29 ++---
78
iothread.c | 27 +++-
79
util/qemu-coroutine-sleep.c | 4 +-
80
tests/qemu-iotests/202 | 95 ++++++++++++++
81
tests/qemu-iotests/202.out | 11 ++
82
tests/qemu-iotests/203 | 59 +++++++++
83
tests/qemu-iotests/203.out | 6 +
84
tests/qemu-iotests/group | 2 +
85
tests/qemu-iotests/iotests.py | 5 +
86
28 files changed, 531 insertions(+), 173 deletions(-)
87
create mode 100755 tests/qemu-iotests/202
88
create mode 100644 tests/qemu-iotests/202.out
89
create mode 100755 tests/qemu-iotests/203
90
create mode 100644 tests/qemu-iotests/203.out
80
91
81
--
92
--
82
2.37.3
93
2.14.3
94
95
diff view generated by jsdifflib
1
When a coroutine wakes up it may determine that it must re-queue.
1
The AioContext pointer argument to co_aio_sleep_ns() is only used for
2
Normally coroutines are pushed onto the back of the CoQueue, but for
2
the sleep timer. It does not affect where the caller coroutine is
3
fairness it may be necessary to push it onto the front of the CoQueue.
3
resumed.
4
4
5
Add a flag to specify that the coroutine should be pushed onto the front
5
Due to changes to coroutine and AIO APIs it is now possible to drop the
6
of the CoQueue. A later patch will use this to ensure fairness in the
6
AioContext pointer argument. This is safe to do since no caller has
7
bounce buffer CoQueue used by the blkio BlockDriver.
7
specific requirements for which AioContext the timer must run in.
8
8
9
This patch drops the AioContext pointer argument and renames the
10
function to simplify the API.
11
12
Reported-by: Paolo Bonzini <pbonzini@redhat.com>
13
Reported-by: Eric Blake <eblake@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-id: 20221013185908.1297568-2-stefanha@redhat.com
15
Reviewed-by: Eric Blake <eblake@redhat.com>
16
Message-id: 20171109102652.6360-1-stefanha@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
18
---
13
include/qemu/coroutine.h | 15 +++++++++++++--
19
include/qemu/coroutine.h | 6 +-----
14
util/qemu-coroutine-lock.c | 9 +++++++--
20
block/null.c | 3 +--
15
2 files changed, 20 insertions(+), 4 deletions(-)
21
block/sheepdog.c | 3 +--
22
util/qemu-coroutine-sleep.c | 4 ++--
23
4 files changed, 5 insertions(+), 11 deletions(-)
16
24
17
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
25
diff --git a/include/qemu/coroutine.h b/include/qemu/coroutine.h
18
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
19
--- a/include/qemu/coroutine.h
27
--- a/include/qemu/coroutine.h
20
+++ b/include/qemu/coroutine.h
28
+++ b/include/qemu/coroutine.h
21
@@ -XXX,XX +XXX,XX @@ typedef struct CoQueue {
29
@@ -XXX,XX +XXX,XX @@ void qemu_co_rwlock_unlock(CoRwlock *lock);
30
31
/**
32
* Yield the coroutine for a given duration
33
- *
34
- * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be
35
- * resumed when using aio_poll().
22
*/
36
*/
23
void qemu_co_queue_init(CoQueue *queue);
37
-void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
24
38
- int64_t ns);
25
+typedef enum {
39
+void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns);
26
+ /*
40
27
+ * Enqueue at front instead of back. Use this to re-queue a request when
28
+ * its wait condition is not satisfied after being woken up.
29
+ */
30
+ CO_QUEUE_WAIT_FRONT = 0x1,
31
+} CoQueueWaitFlags;
32
+
33
/**
41
/**
34
* Adds the current coroutine to the CoQueue and transfers control to the
42
* Yield until a file descriptor becomes readable
35
* caller of the coroutine. The mutex is unlocked during the wait and
43
diff --git a/block/null.c b/block/null.c
36
* locked again afterwards.
37
*/
38
#define qemu_co_queue_wait(queue, lock) \
39
- qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock))
40
-void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock);
41
+ qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), 0)
42
+#define qemu_co_queue_wait_flags(queue, lock, flags) \
43
+ qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), (flags))
44
+void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock,
45
+ CoQueueWaitFlags flags);
46
47
/**
48
* Removes the next coroutine from the CoQueue, and queue it to run after
49
diff --git a/util/qemu-coroutine-lock.c b/util/qemu-coroutine-lock.c
50
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
51
--- a/util/qemu-coroutine-lock.c
45
--- a/block/null.c
52
+++ b/util/qemu-coroutine-lock.c
46
+++ b/block/null.c
53
@@ -XXX,XX +XXX,XX @@ void qemu_co_queue_init(CoQueue *queue)
47
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int null_co_common(BlockDriverState *bs)
54
QSIMPLEQ_INIT(&queue->entries);
48
BDRVNullState *s = bs->opaque;
49
50
if (s->latency_ns) {
51
- co_aio_sleep_ns(bdrv_get_aio_context(bs), QEMU_CLOCK_REALTIME,
52
- s->latency_ns);
53
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, s->latency_ns);
54
}
55
return 0;
55
}
56
}
56
57
diff --git a/block/sheepdog.c b/block/sheepdog.c
57
-void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock)
58
index XXXXXXX..XXXXXXX 100644
58
+void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock,
59
--- a/block/sheepdog.c
59
+ CoQueueWaitFlags flags)
60
+++ b/block/sheepdog.c
61
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void reconnect_to_sdog(void *opaque)
62
if (s->fd < 0) {
63
DPRINTF("Wait for connection to be established\n");
64
error_report_err(local_err);
65
- co_aio_sleep_ns(bdrv_get_aio_context(s->bs), QEMU_CLOCK_REALTIME,
66
- 1000000000ULL);
67
+ qemu_co_sleep_ns(QEMU_CLOCK_REALTIME, 1000000000ULL);
68
}
69
};
70
71
diff --git a/util/qemu-coroutine-sleep.c b/util/qemu-coroutine-sleep.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/util/qemu-coroutine-sleep.c
74
+++ b/util/qemu-coroutine-sleep.c
75
@@ -XXX,XX +XXX,XX @@ static void co_sleep_cb(void *opaque)
76
aio_co_wake(sleep_cb->co);
77
}
78
79
-void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
80
- int64_t ns)
81
+void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns)
60
{
82
{
61
Coroutine *self = qemu_coroutine_self();
83
+ AioContext *ctx = qemu_get_current_aio_context();
62
- QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
84
CoSleepCB sleep_cb = {
63
+ if (flags & CO_QUEUE_WAIT_FRONT) {
85
.co = qemu_coroutine_self(),
64
+ QSIMPLEQ_INSERT_HEAD(&queue->entries, self, co_queue_next);
86
};
65
+ } else {
66
+ QSIMPLEQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
67
+ }
68
69
if (lock) {
70
qemu_lockable_unlock(lock);
71
--
87
--
72
2.37.3
88
2.14.3
89
90
diff view generated by jsdifflib
New patch
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
1
2
3
Convert nvme_init() to realize and rename it to nvme_realize().
4
5
Cc: John Snow <jsnow@redhat.com>
6
Cc: Keith Busch <keith.busch@intel.com>
7
Cc: Kevin Wolf <kwolf@redhat.com>
8
Cc: Max Reitz <mreitz@redhat.com>
9
Cc: Markus Armbruster <armbru@redhat.com>
10
11
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
12
Message-id: 2882e72d795e04cbe2120f569d551aef2467ac60.1511317952.git.maozy.fnst@cn.fujitsu.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
---
15
hw/block/nvme.c | 18 ++++++++++--------
16
1 file changed, 10 insertions(+), 8 deletions(-)
17
18
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/block/nvme.c
21
+++ b/hw/block/nvme.c
22
@@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps nvme_cmb_ops = {
23
},
24
};
25
26
-static int nvme_init(PCIDevice *pci_dev)
27
+static void nvme_realize(PCIDevice *pci_dev, Error **errp)
28
{
29
NvmeCtrl *n = NVME(pci_dev);
30
NvmeIdCtrl *id = &n->id_ctrl;
31
@@ -XXX,XX +XXX,XX @@ static int nvme_init(PCIDevice *pci_dev)
32
Error *local_err = NULL;
33
34
if (!n->conf.blk) {
35
- return -1;
36
+ error_setg(errp, "drive property not set");
37
+ return;
38
}
39
40
bs_size = blk_getlength(n->conf.blk);
41
if (bs_size < 0) {
42
- return -1;
43
+ error_setg(errp, "could not get backing file size");
44
+ return;
45
}
46
47
blkconf_serial(&n->conf, &n->serial);
48
if (!n->serial) {
49
- return -1;
50
+ error_setg(errp, "serial property not set");
51
+ return;
52
}
53
blkconf_blocksizes(&n->conf);
54
blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
55
false, &local_err);
56
if (local_err) {
57
- error_report_err(local_err);
58
- return -1;
59
+ error_propagate(errp, local_err);
60
+ return;
61
}
62
63
pci_conf = pci_dev->config;
64
@@ -XXX,XX +XXX,XX @@ static int nvme_init(PCIDevice *pci_dev)
65
cpu_to_le64(n->ns_size >>
66
id_ns->lbaf[NVME_ID_NS_FLBAS_INDEX(ns->id_ns.flbas)].ds);
67
}
68
- return 0;
69
}
70
71
static void nvme_exit(PCIDevice *pci_dev)
72
@@ -XXX,XX +XXX,XX @@ static void nvme_class_init(ObjectClass *oc, void *data)
73
DeviceClass *dc = DEVICE_CLASS(oc);
74
PCIDeviceClass *pc = PCI_DEVICE_CLASS(oc);
75
76
- pc->init = nvme_init;
77
+ pc->realize = nvme_realize;
78
pc->exit = nvme_exit;
79
pc->class_id = PCI_CLASS_STORAGE_EXPRESS;
80
pc->vendor_id = PCI_VENDOR_ID_INTEL;
81
--
82
2.14.3
83
84
diff view generated by jsdifflib
1
Registering an I/O buffer is only a performance optimization hint but it
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
2
is still necessary to return errors when it fails.
3
2
4
Later patches will need to detect errors when registering buffers but an
3
When the function no success value to transmit, it usually make the
5
immediate advantage is that error_report() calls are no longer needed in
4
function return void. It has turned out not to be a success, because
6
block driver .bdrv_register_buf() functions.
5
it means that the extra local_err variable and error_propagate() will
6
be needed. It leads to cumbersome code, therefore, transmit success/
7
failure in the return value is worth.
7
8
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
So fix the return type of blkconf_apply_backend_options(),
9
Message-id: 20221013185908.1297568-8-stefanha@redhat.com
10
blkconf_geometry() and virtio_blk_data_plane_create() to avoid it.
11
12
Cc: John Snow <jsnow@redhat.com>
13
Cc: Kevin Wolf <kwolf@redhat.com>
14
Cc: Max Reitz <mreitz@redhat.com>
15
Cc: Stefan Hajnoczi <stefanha@redhat.com>
16
17
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Message-id: ac0edc1fc70c4457e5cec94405eb7d1f89f9c2c1.1511317952.git.maozy.fnst@cn.fujitsu.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
20
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
21
---
12
include/block/block-global-state.h | 5 ++-
22
hw/block/dataplane/virtio-blk.h | 2 +-
13
include/block/block_int-common.h | 5 ++-
23
include/hw/block/block.h | 4 ++--
14
include/sysemu/block-backend-global-state.h | 2 +-
24
hw/block/block.c | 15 +++++++++------
15
block/block-backend.c | 4 +--
25
hw/block/dataplane/virtio-blk.c | 12 +++++++-----
16
block/io.c | 34 +++++++++++++++++++--
26
4 files changed, 19 insertions(+), 14 deletions(-)
17
block/nvme.c | 18 +++++------
18
qemu-img.c | 2 +-
19
7 files changed, 52 insertions(+), 18 deletions(-)
20
27
21
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
28
diff --git a/hw/block/dataplane/virtio-blk.h b/hw/block/dataplane/virtio-blk.h
22
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
23
--- a/include/block/block-global-state.h
30
--- a/hw/block/dataplane/virtio-blk.h
24
+++ b/include/block/block-global-state.h
31
+++ b/hw/block/dataplane/virtio-blk.h
25
@@ -XXX,XX +XXX,XX @@ void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
32
@@ -XXX,XX +XXX,XX @@
26
*
33
27
* Buffers must not overlap and they must be unregistered with the same <host,
34
typedef struct VirtIOBlockDataPlane VirtIOBlockDataPlane;
28
* size> values that they were registered with.
35
29
+ *
36
-void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
30
+ * Returns: true on success, false on failure
37
+bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
31
*/
38
VirtIOBlockDataPlane **dataplane,
32
-void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
39
Error **errp);
33
+bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
40
void virtio_blk_data_plane_destroy(VirtIOBlockDataPlane *s);
34
+ Error **errp);
41
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
35
void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size);
36
37
void bdrv_cancel_in_flight(BlockDriverState *bs);
38
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
39
index XXXXXXX..XXXXXXX 100644
42
index XXXXXXX..XXXXXXX 100644
40
--- a/include/block/block_int-common.h
43
--- a/include/hw/block/block.h
41
+++ b/include/block/block_int-common.h
44
+++ b/include/hw/block/block.h
42
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
45
@@ -XXX,XX +XXX,XX @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
43
* that it can do IOMMU mapping with VFIO etc., in order to get better
46
/* Configuration helpers */
44
* performance. In the case of VFIO drivers, this callback is used to do
47
45
* DMA mapping for hot buffers.
48
void blkconf_serial(BlockConf *conf, char **serial);
46
+ *
49
-void blkconf_geometry(BlockConf *conf, int *trans,
47
+ * Returns: true on success, false on failure
50
+bool blkconf_geometry(BlockConf *conf, int *trans,
48
*/
51
unsigned cyls_max, unsigned heads_max, unsigned secs_max,
49
- void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
52
Error **errp);
50
+ bool (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size,
53
void blkconf_blocksizes(BlockConf *conf);
51
+ Error **errp);
54
-void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
52
void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host, size_t size);
55
+bool blkconf_apply_backend_options(BlockConf *conf, bool readonly,
53
56
bool resizable, Error **errp);
54
/*
57
55
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
58
/* Hard disk geometry */
59
diff --git a/hw/block/block.c b/hw/block/block.c
56
index XXXXXXX..XXXXXXX 100644
60
index XXXXXXX..XXXXXXX 100644
57
--- a/include/sysemu/block-backend-global-state.h
61
--- a/hw/block/block.c
58
+++ b/include/sysemu/block-backend-global-state.h
62
+++ b/hw/block/block.c
59
@@ -XXX,XX +XXX,XX @@ void blk_io_limits_enable(BlockBackend *blk, const char *group);
63
@@ -XXX,XX +XXX,XX @@ void blkconf_blocksizes(BlockConf *conf)
60
void blk_io_limits_update_group(BlockBackend *blk, const char *group);
61
void blk_set_force_allow_inactivate(BlockBackend *blk);
62
63
-void blk_register_buf(BlockBackend *blk, void *host, size_t size);
64
+bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp);
65
void blk_unregister_buf(BlockBackend *blk, void *host, size_t size);
66
67
const BdrvChild *blk_root(BlockBackend *blk);
68
diff --git a/block/block-backend.c b/block/block-backend.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/block/block-backend.c
71
+++ b/block/block-backend.c
72
@@ -XXX,XX +XXX,XX @@ static void blk_root_drained_end(BdrvChild *child, int *drained_end_counter)
73
}
64
}
74
}
65
}
75
66
76
-void blk_register_buf(BlockBackend *blk, void *host, size_t size)
67
-void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
77
+bool blk_register_buf(BlockBackend *blk, void *host, size_t size, Error **errp)
68
+bool blkconf_apply_backend_options(BlockConf *conf, bool readonly,
69
bool resizable, Error **errp)
78
{
70
{
79
GLOBAL_STATE_CODE();
71
BlockBackend *blk = conf->blk;
80
- bdrv_register_buf(blk_bs(blk), host, size);
72
@@ -XXX,XX +XXX,XX @@ void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
81
+ return bdrv_register_buf(blk_bs(blk), host, size, errp);
73
74
ret = blk_set_perm(blk, perm, shared_perm, errp);
75
if (ret < 0) {
76
- return;
77
+ return false;
78
}
79
80
switch (conf->wce) {
81
@@ -XXX,XX +XXX,XX @@ void blkconf_apply_backend_options(BlockConf *conf, bool readonly,
82
83
blk_set_enable_write_cache(blk, wce);
84
blk_set_on_error(blk, rerror, werror);
85
+
86
+ return true;
82
}
87
}
83
88
84
void blk_unregister_buf(BlockBackend *blk, void *host, size_t size)
89
-void blkconf_geometry(BlockConf *conf, int *ptrans,
85
diff --git a/block/io.c b/block/io.c
90
+bool blkconf_geometry(BlockConf *conf, int *ptrans,
86
index XXXXXXX..XXXXXXX 100644
91
unsigned cyls_max, unsigned heads_max, unsigned secs_max,
87
--- a/block/io.c
92
Error **errp)
88
+++ b/block/io.c
89
@@ -XXX,XX +XXX,XX @@ void bdrv_io_unplug(BlockDriverState *bs)
90
}
91
}
92
93
-void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
94
+/* Helper that undoes bdrv_register_buf() when it fails partway through */
95
+static void bdrv_register_buf_rollback(BlockDriverState *bs,
96
+ void *host,
97
+ size_t size,
98
+ BdrvChild *final_child)
99
+{
100
+ BdrvChild *child;
101
+
102
+ QLIST_FOREACH(child, &bs->children, next) {
103
+ if (child == final_child) {
104
+ break;
105
+ }
106
+
107
+ bdrv_unregister_buf(child->bs, host, size);
108
+ }
109
+
110
+ if (bs->drv && bs->drv->bdrv_unregister_buf) {
111
+ bs->drv->bdrv_unregister_buf(bs, host, size);
112
+ }
113
+}
114
+
115
+bool bdrv_register_buf(BlockDriverState *bs, void *host, size_t size,
116
+ Error **errp)
117
{
93
{
118
BdrvChild *child;
94
@@ -XXX,XX +XXX,XX @@ void blkconf_geometry(BlockConf *conf, int *ptrans,
119
95
if (conf->cyls || conf->heads || conf->secs) {
120
GLOBAL_STATE_CODE();
96
if (conf->cyls < 1 || conf->cyls > cyls_max) {
121
if (bs->drv && bs->drv->bdrv_register_buf) {
97
error_setg(errp, "cyls must be between 1 and %u", cyls_max);
122
- bs->drv->bdrv_register_buf(bs, host, size);
98
- return;
123
+ if (!bs->drv->bdrv_register_buf(bs, host, size, errp)) {
124
+ return false;
99
+ return false;
125
+ }
100
}
126
}
101
if (conf->heads < 1 || conf->heads > heads_max) {
127
QLIST_FOREACH(child, &bs->children, next) {
102
error_setg(errp, "heads must be between 1 and %u", heads_max);
128
- bdrv_register_buf(child->bs, host, size);
103
- return;
129
+ if (!bdrv_register_buf(child->bs, host, size, errp)) {
130
+ bdrv_register_buf_rollback(bs, host, size, child);
131
+ return false;
104
+ return false;
132
+ }
105
}
106
if (conf->secs < 1 || conf->secs > secs_max) {
107
error_setg(errp, "secs must be between 1 and %u", secs_max);
108
- return;
109
+ return false;
110
}
133
}
111
}
134
+ return true;
112
+ return true;
135
}
113
}
136
114
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
137
void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
138
diff --git a/block/nvme.c b/block/nvme.c
139
index XXXXXXX..XXXXXXX 100644
115
index XXXXXXX..XXXXXXX 100644
140
--- a/block/nvme.c
116
--- a/hw/block/dataplane/virtio-blk.c
141
+++ b/block/nvme.c
117
+++ b/hw/block/dataplane/virtio-blk.c
142
@@ -XXX,XX +XXX,XX @@ static void nvme_aio_unplug(BlockDriverState *bs)
118
@@ -XXX,XX +XXX,XX @@ static void notify_guest_bh(void *opaque)
119
}
120
121
/* Context: QEMU global mutex held */
122
-void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
123
+bool virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
124
VirtIOBlockDataPlane **dataplane,
125
Error **errp)
126
{
127
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
128
error_setg(errp,
129
"device is incompatible with iothread "
130
"(transport does not support notifiers)");
131
- return;
132
+ return false;
133
}
134
if (!virtio_device_ioeventfd_enabled(vdev)) {
135
error_setg(errp, "ioeventfd is required for iothread");
136
- return;
137
+ return false;
138
}
139
140
/* If dataplane is (re-)enabled while the guest is running there could
141
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
142
*/
143
if (blk_op_is_blocked(conf->conf.blk, BLOCK_OP_TYPE_DATAPLANE, errp)) {
144
error_prepend(errp, "cannot start virtio-blk dataplane: ");
145
- return;
146
+ return false;
147
}
143
}
148
}
149
/* Don't try if transport does not support notifiers. */
150
if (!virtio_device_ioeventfd_enabled(vdev)) {
151
- return;
152
+ return false;
153
}
154
155
s = g_new0(VirtIOBlockDataPlane, 1);
156
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_create(VirtIODevice *vdev, VirtIOBlkConf *conf,
157
s->batch_notify_vqs = bitmap_new(conf->num_queues);
158
159
*dataplane = s;
160
+
161
+ return true;
144
}
162
}
145
163
146
-static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
164
/* Context: QEMU global mutex held */
147
+static bool nvme_register_buf(BlockDriverState *bs, void *host, size_t size,
148
+ Error **errp)
149
{
150
int ret;
151
- Error *local_err = NULL;
152
BDRVNVMeState *s = bs->opaque;
153
154
- ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, &local_err);
155
- if (ret) {
156
- /* FIXME: we may run out of IOVA addresses after repeated
157
- * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
158
- * doesn't reclaim addresses for fixed mappings. */
159
- error_reportf_err(local_err, "nvme_register_buf failed: ");
160
- }
161
+ /*
162
+ * FIXME: we may run out of IOVA addresses after repeated
163
+ * bdrv_register_buf/bdrv_unregister_buf, because nvme_vfio_dma_unmap
164
+ * doesn't reclaim addresses for fixed mappings.
165
+ */
166
+ ret = qemu_vfio_dma_map(s->vfio, host, size, false, NULL, errp);
167
+ return ret == 0;
168
}
169
170
static void nvme_unregister_buf(BlockDriverState *bs, void *host, size_t size)
171
diff --git a/qemu-img.c b/qemu-img.c
172
index XXXXXXX..XXXXXXX 100644
173
--- a/qemu-img.c
174
+++ b/qemu-img.c
175
@@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv)
176
data.buf = blk_blockalign(blk, buf_size);
177
memset(data.buf, pattern, data.nrreq * data.bufsize);
178
179
- blk_register_buf(blk, data.buf, buf_size);
180
+ blk_register_buf(blk, data.buf, buf_size, &error_fatal);
181
182
data.qiov = g_new(QEMUIOVector, data.nrreq);
183
for (i = 0; i < data.nrreq; i++) {
184
--
165
--
185
2.37.3
166
2.14.3
167
168
diff view generated by jsdifflib
New patch
1
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
2
3
[Drop virtio_blk_data_plane_create() change that misinterprets return
4
value when the virtio transport does not support dataplane.
5
--Stefan]
6
7
Cc: John Snow <jsnow@redhat.com>
8
Cc: Kevin Wolf <kwolf@redhat.com>
9
Cc: Max Reitz <mreitz@redhat.com>
10
Cc: Keith Busch <keith.busch@intel.com>
11
Cc: Stefan Hajnoczi <stefanha@redhat.com>
12
Cc: "Michael S. Tsirkin" <mst@redhat.com>
13
Cc: Paolo Bonzini <pbonzini@redhat.com>
14
Cc: Gerd Hoffmann <kraxel@redhat.com>
15
Cc: Markus Armbruster <armbru@redhat.com>
16
17
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
18
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Message-id: e77848d3735ba590f23ffbf8094379c646c33d79.1511317952.git.maozy.fnst@cn.fujitsu.com
20
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
21
---
22
hw/block/fdc.c | 17 ++++++-----------
23
hw/block/nvme.c | 7 ++-----
24
hw/block/virtio-blk.c | 13 +++++--------
25
hw/ide/qdev.c | 12 ++++--------
26
hw/scsi/scsi-disk.c | 13 ++++---------
27
hw/usb/dev-storage.c | 9 +++------
28
6 files changed, 24 insertions(+), 47 deletions(-)
29
30
diff --git a/hw/block/fdc.c b/hw/block/fdc.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/hw/block/fdc.c
33
+++ b/hw/block/fdc.c
34
@@ -XXX,XX +XXX,XX @@ static void fd_revalidate(FDrive *drv)
35
static void fd_change_cb(void *opaque, bool load, Error **errp)
36
{
37
FDrive *drive = opaque;
38
- Error *local_err = NULL;
39
40
if (!load) {
41
blk_set_perm(drive->blk, 0, BLK_PERM_ALL, &error_abort);
42
} else {
43
- blkconf_apply_backend_options(drive->conf,
44
- blk_is_read_only(drive->blk), false,
45
- &local_err);
46
- if (local_err) {
47
- error_propagate(errp, local_err);
48
+ if (!blkconf_apply_backend_options(drive->conf,
49
+ blk_is_read_only(drive->blk), false,
50
+ errp)) {
51
return;
52
}
53
}
54
@@ -XXX,XX +XXX,XX @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp)
55
FloppyDrive *dev = FLOPPY_DRIVE(qdev);
56
FloppyBus *bus = FLOPPY_BUS(qdev->parent_bus);
57
FDrive *drive;
58
- Error *local_err = NULL;
59
int ret;
60
61
if (dev->unit == -1) {
62
@@ -XXX,XX +XXX,XX @@ static void floppy_drive_realize(DeviceState *qdev, Error **errp)
63
dev->conf.rerror = BLOCKDEV_ON_ERROR_AUTO;
64
dev->conf.werror = BLOCKDEV_ON_ERROR_AUTO;
65
66
- blkconf_apply_backend_options(&dev->conf, blk_is_read_only(dev->conf.blk),
67
- false, &local_err);
68
- if (local_err) {
69
- error_propagate(errp, local_err);
70
+ if (!blkconf_apply_backend_options(&dev->conf,
71
+ blk_is_read_only(dev->conf.blk),
72
+ false, errp)) {
73
return;
74
}
75
76
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/hw/block/nvme.c
79
+++ b/hw/block/nvme.c
80
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
81
int i;
82
int64_t bs_size;
83
uint8_t *pci_conf;
84
- Error *local_err = NULL;
85
86
if (!n->conf.blk) {
87
error_setg(errp, "drive property not set");
88
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
89
return;
90
}
91
blkconf_blocksizes(&n->conf);
92
- blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
93
- false, &local_err);
94
- if (local_err) {
95
- error_propagate(errp, local_err);
96
+ if (!blkconf_apply_backend_options(&n->conf, blk_is_read_only(n->conf.blk),
97
+ false, errp)) {
98
return;
99
}
100
101
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/hw/block/virtio-blk.c
104
+++ b/hw/block/virtio-blk.c
105
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
106
}
107
108
blkconf_serial(&conf->conf, &conf->serial);
109
- blkconf_apply_backend_options(&conf->conf,
110
- blk_is_read_only(conf->conf.blk), true,
111
- &err);
112
- if (err) {
113
- error_propagate(errp, err);
114
+ if (!blkconf_apply_backend_options(&conf->conf,
115
+ blk_is_read_only(conf->conf.blk), true,
116
+ errp)) {
117
return;
118
}
119
s->original_wce = blk_enable_write_cache(conf->conf.blk);
120
- blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, &err);
121
- if (err) {
122
- error_propagate(errp, err);
123
+ if (!blkconf_geometry(&conf->conf, NULL, 65535, 255, 255, errp)) {
124
return;
125
}
126
+
127
blkconf_blocksizes(&conf->conf);
128
129
virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
130
diff --git a/hw/ide/qdev.c b/hw/ide/qdev.c
131
index XXXXXXX..XXXXXXX 100644
132
--- a/hw/ide/qdev.c
133
+++ b/hw/ide/qdev.c
134
@@ -XXX,XX +XXX,XX @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp)
135
{
136
IDEBus *bus = DO_UPCAST(IDEBus, qbus, dev->qdev.parent_bus);
137
IDEState *s = bus->ifs + dev->unit;
138
- Error *err = NULL;
139
int ret;
140
141
if (!dev->conf.blk) {
142
@@ -XXX,XX +XXX,XX @@ static void ide_dev_initfn(IDEDevice *dev, IDEDriveKind kind, Error **errp)
143
144
blkconf_serial(&dev->conf, &dev->serial);
145
if (kind != IDE_CD) {
146
- blkconf_geometry(&dev->conf, &dev->chs_trans, 65535, 16, 255, &err);
147
- if (err) {
148
- error_propagate(errp, err);
149
+ if (!blkconf_geometry(&dev->conf, &dev->chs_trans, 65535, 16, 255,
150
+ errp)) {
151
return;
152
}
153
}
154
- blkconf_apply_backend_options(&dev->conf, kind == IDE_CD, kind != IDE_CD,
155
- &err);
156
- if (err) {
157
- error_propagate(errp, err);
158
+ if (!blkconf_apply_backend_options(&dev->conf, kind == IDE_CD,
159
+ kind != IDE_CD, errp)) {
160
return;
161
}
162
163
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
164
index XXXXXXX..XXXXXXX 100644
165
--- a/hw/scsi/scsi-disk.c
166
+++ b/hw/scsi/scsi-disk.c
167
@@ -XXX,XX +XXX,XX @@ static void scsi_disk_unit_attention_reported(SCSIDevice *dev)
168
static void scsi_realize(SCSIDevice *dev, Error **errp)
169
{
170
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, dev);
171
- Error *err = NULL;
172
173
if (!s->qdev.conf.blk) {
174
error_setg(errp, "drive property not set");
175
@@ -XXX,XX +XXX,XX @@ static void scsi_realize(SCSIDevice *dev, Error **errp)
176
}
177
178
if (dev->type == TYPE_DISK) {
179
- blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, &err);
180
- if (err) {
181
- error_propagate(errp, err);
182
+ if (!blkconf_geometry(&dev->conf, NULL, 65535, 255, 255, errp)) {
183
return;
184
}
185
}
186
- blkconf_apply_backend_options(&dev->conf,
187
- blk_is_read_only(s->qdev.conf.blk),
188
- dev->type == TYPE_DISK, &err);
189
- if (err) {
190
- error_propagate(errp, err);
191
+ if (!blkconf_apply_backend_options(&dev->conf,
192
+ blk_is_read_only(s->qdev.conf.blk),
193
+ dev->type == TYPE_DISK, errp)) {
194
return;
195
}
196
197
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
198
index XXXXXXX..XXXXXXX 100644
199
--- a/hw/usb/dev-storage.c
200
+++ b/hw/usb/dev-storage.c
201
@@ -XXX,XX +XXX,XX @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
202
MSDState *s = USB_STORAGE_DEV(dev);
203
BlockBackend *blk = s->conf.blk;
204
SCSIDevice *scsi_dev;
205
- Error *err = NULL;
206
207
if (!blk) {
208
error_setg(errp, "drive property not set");
209
@@ -XXX,XX +XXX,XX @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
210
211
blkconf_serial(&s->conf, &dev->serial);
212
blkconf_blocksizes(&s->conf);
213
- blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true, &err);
214
- if (err) {
215
- error_propagate(errp, err);
216
+ if (!blkconf_apply_backend_options(&s->conf, blk_is_read_only(blk), true,
217
+ errp)) {
218
return;
219
}
220
221
@@ -XXX,XX +XXX,XX @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
222
&usb_msd_scsi_info_storage, NULL);
223
scsi_dev = scsi_bus_legacy_add_drive(&s->bus, blk, 0, !!s->removable,
224
s->conf.bootindex, dev->serial,
225
- &err);
226
+ errp);
227
blk_unref(blk);
228
if (!scsi_dev) {
229
- error_propagate(errp, err);
230
return;
231
}
232
usb_msd_handle_reset(dev);
233
--
234
2.14.3
235
236
diff view generated by jsdifflib
New patch
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
1
2
3
The function name of usb_msd_{realize,unrealize}_*,
4
usb_msd_class_initfn_* are unusual. Rename it to
5
usb_msd_*_{realize,unrealize}, usb_msd_class_*_initfn.
6
7
Cc: Gerd Hoffmann <kraxel@redhat.com>
8
9
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
10
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
11
Message-id: 11e6003433abce35f3f4970e1acc71ee92dbcf51.1511317952.git.maozy.fnst@cn.fujitsu.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
hw/usb/dev-storage.c | 20 ++++++++++----------
15
1 file changed, 10 insertions(+), 10 deletions(-)
16
17
diff --git a/hw/usb/dev-storage.c b/hw/usb/dev-storage.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/usb/dev-storage.c
20
+++ b/hw/usb/dev-storage.c
21
@@ -XXX,XX +XXX,XX @@ static void usb_msd_unrealize_storage(USBDevice *dev, Error **errp)
22
object_unref(OBJECT(&s->bus));
23
}
24
25
-static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
26
+static void usb_msd_storage_realize(USBDevice *dev, Error **errp)
27
{
28
MSDState *s = USB_STORAGE_DEV(dev);
29
BlockBackend *blk = s->conf.blk;
30
@@ -XXX,XX +XXX,XX @@ static void usb_msd_realize_storage(USBDevice *dev, Error **errp)
31
s->scsi_dev = scsi_dev;
32
}
33
34
-static void usb_msd_unrealize_bot(USBDevice *dev, Error **errp)
35
+static void usb_msd_bot_unrealize(USBDevice *dev, Error **errp)
36
{
37
MSDState *s = USB_STORAGE_DEV(dev);
38
39
object_unref(OBJECT(&s->bus));
40
}
41
42
-static void usb_msd_realize_bot(USBDevice *dev, Error **errp)
43
+static void usb_msd_bot_realize(USBDevice *dev, Error **errp)
44
{
45
MSDState *s = USB_STORAGE_DEV(dev);
46
DeviceState *d = DEVICE(dev);
47
@@ -XXX,XX +XXX,XX @@ static void usb_msd_class_initfn_common(ObjectClass *klass, void *data)
48
dc->vmsd = &vmstate_usb_msd;
49
}
50
51
-static void usb_msd_class_initfn_storage(ObjectClass *klass, void *data)
52
+static void usb_msd_class_storage_initfn(ObjectClass *klass, void *data)
53
{
54
DeviceClass *dc = DEVICE_CLASS(klass);
55
USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
56
57
- uc->realize = usb_msd_realize_storage;
58
+ uc->realize = usb_msd_storage_realize;
59
uc->unrealize = usb_msd_unrealize_storage;
60
dc->props = msd_properties;
61
}
62
@@ -XXX,XX +XXX,XX @@ static void usb_msd_instance_init(Object *obj)
63
object_property_set_int(obj, -1, "bootindex", NULL);
64
}
65
66
-static void usb_msd_class_initfn_bot(ObjectClass *klass, void *data)
67
+static void usb_msd_class_bot_initfn(ObjectClass *klass, void *data)
68
{
69
USBDeviceClass *uc = USB_DEVICE_CLASS(klass);
70
71
- uc->realize = usb_msd_realize_bot;
72
- uc->unrealize = usb_msd_unrealize_bot;
73
+ uc->realize = usb_msd_bot_realize;
74
+ uc->unrealize = usb_msd_bot_unrealize;
75
uc->attached_settable = true;
76
}
77
78
static const TypeInfo msd_info = {
79
.name = "usb-storage",
80
.parent = TYPE_USB_STORAGE,
81
- .class_init = usb_msd_class_initfn_storage,
82
+ .class_init = usb_msd_class_storage_initfn,
83
.instance_init = usb_msd_instance_init,
84
};
85
86
static const TypeInfo bot_info = {
87
.name = "usb-bot",
88
.parent = TYPE_USB_STORAGE,
89
- .class_init = usb_msd_class_initfn_bot,
90
+ .class_init = usb_msd_class_bot_initfn,
91
};
92
93
static void usb_msd_register_types(void)
94
--
95
2.14.3
96
97
diff view generated by jsdifflib
New patch
1
Commit 1351d1ec89eabebc9fdff20451a62c413d7accc1 ("qdev: drop iothread
2
property type") forgot to remove this include.
1
3
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Message-id: 20171205133954.31006-1-stefanha@redhat.com
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
8
hw/core/qdev-properties-system.c | 1 -
9
1 file changed, 1 deletion(-)
10
11
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/hw/core/qdev-properties-system.c
14
+++ b/hw/core/qdev-properties-system.c
15
@@ -XXX,XX +XXX,XX @@
16
#include "qapi/visitor.h"
17
#include "chardev/char-fe.h"
18
#include "sysemu/tpm_backend.h"
19
-#include "sysemu/iothread.h"
20
21
static void get_pointer(Object *obj, Visitor *v, Property *prop,
22
char *(*print)(void *ptr),
23
--
24
2.14.3
25
26
diff view generated by jsdifflib
New patch
1
bdrv_unref() requires the AioContext lock because bdrv_flush() uses
2
BDRV_POLL_WHILE(), which assumes the AioContext is currently held. If
3
BDRV_POLL_WHILE() runs without AioContext held the
4
pthread_mutex_unlock() call in aio_context_release() fails.
1
5
6
This patch moves bdrv_unref() into the AioContext locked region to solve
7
the following pthread_mutex_unlock() failure:
8
9
#0 0x00007f566181969b in raise () at /lib64/libc.so.6
10
#1 0x00007f566181b3b1 in abort () at /lib64/libc.so.6
11
#2 0x00005592cd590458 in error_exit (err=<optimized out>, msg=msg@entry=0x5592cdaf6d60 <__func__.23977> "qemu_mutex_unlock") at util/qemu-thread-posix.c:36
12
#3 0x00005592cd96e738 in qemu_mutex_unlock (mutex=mutex@entry=0x5592ce9505e0) at util/qemu-thread-posix.c:96
13
#4 0x00005592cd969b69 in aio_context_release (ctx=ctx@entry=0x5592ce950580) at util/async.c:507
14
#5 0x00005592cd8ead78 in bdrv_flush (bs=bs@entry=0x5592cfa87210) at block/io.c:2478
15
#6 0x00005592cd89df30 in bdrv_close (bs=0x5592cfa87210) at block.c:3207
16
#7 0x00005592cd89df30 in bdrv_delete (bs=0x5592cfa87210) at block.c:3395
17
#8 0x00005592cd89df30 in bdrv_unref (bs=0x5592cfa87210) at block.c:4418
18
#9 0x00005592cd6b7f86 in qmp_transaction (dev_list=<optimized out>, has_props=<optimized out>, props=<optimized out>, errp=errp@entry=0x7ffe4a1fc9d8) at blockdev.c:2308
19
20
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
21
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
22
Reviewed-by: Eric Blake <eblake@redhat.com>
23
Message-id: 20171206144550.22295-2-stefanha@redhat.com
24
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
25
---
26
blockdev.c | 2 +-
27
1 file changed, 1 insertion(+), 1 deletion(-)
28
29
diff --git a/blockdev.c b/blockdev.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/blockdev.c
32
+++ b/blockdev.c
33
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_clean(BlkActionState *common)
34
DO_UPCAST(ExternalSnapshotState, common, common);
35
if (state->aio_context) {
36
bdrv_drained_end(state->old_bs);
37
- aio_context_release(state->aio_context);
38
bdrv_unref(state->new_bs);
39
+ aio_context_release(state->aio_context);
40
}
41
}
42
43
--
44
2.14.3
45
46
diff view generated by jsdifflib
New patch
1
1
It is not necessary to hold AioContext across transactions anymore since
2
bdrv_drained_begin/end() is used to keep the nodes quiesced. In fact,
3
using the AioContext lock for this purpose was always buggy.
4
5
This patch reduces the scope of AioContext locked regions. This is not
6
just a cleanup but also fixes hangs that occur in BDRV_POLL_WHILE()
7
because it is unware of recursive locking and does not release the
8
AioContext the necessary number of times to allow progress to be made.
9
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
Reviewed-by: Eric Blake <eblake@redhat.com>
13
Message-id: 20171206144550.22295-3-stefanha@redhat.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
blockdev.c | 71 ++++++++++++++++++++++++++++++++++++++++++--------------------
17
1 file changed, 48 insertions(+), 23 deletions(-)
18
19
diff --git a/blockdev.c b/blockdev.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/blockdev.c
22
+++ b/blockdev.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct ExternalSnapshotState {
24
BlkActionState common;
25
BlockDriverState *old_bs;
26
BlockDriverState *new_bs;
27
- AioContext *aio_context;
28
bool overlay_appended;
29
} ExternalSnapshotState;
30
31
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_prepare(BlkActionState *common,
32
ExternalSnapshotState *state =
33
DO_UPCAST(ExternalSnapshotState, common, common);
34
TransactionAction *action = common->action;
35
+ AioContext *aio_context;
36
37
/* 'blockdev-snapshot' and 'blockdev-snapshot-sync' have similar
38
* purpose but a different set of parameters */
39
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_prepare(BlkActionState *common,
40
return;
41
}
42
43
- /* Acquire AioContext now so any threads operating on old_bs stop */
44
- state->aio_context = bdrv_get_aio_context(state->old_bs);
45
- aio_context_acquire(state->aio_context);
46
+ aio_context = bdrv_get_aio_context(state->old_bs);
47
+ aio_context_acquire(aio_context);
48
+
49
+ /* Paired with .clean() */
50
bdrv_drained_begin(state->old_bs);
51
52
if (!bdrv_is_inserted(state->old_bs)) {
53
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, device);
54
- return;
55
+ goto out;
56
}
57
58
if (bdrv_op_is_blocked(state->old_bs,
59
BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, errp)) {
60
- return;
61
+ goto out;
62
}
63
64
if (!bdrv_is_read_only(state->old_bs)) {
65
if (bdrv_flush(state->old_bs)) {
66
error_setg(errp, QERR_IO_ERROR);
67
- return;
68
+ goto out;
69
}
70
}
71
72
if (!bdrv_is_first_non_filter(state->old_bs)) {
73
error_setg(errp, QERR_FEATURE_DISABLED, "snapshot");
74
- return;
75
+ goto out;
76
}
77
78
if (action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC) {
79
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_prepare(BlkActionState *common,
80
81
if (node_name && !snapshot_node_name) {
82
error_setg(errp, "New snapshot node name missing");
83
- return;
84
+ goto out;
85
}
86
87
if (snapshot_node_name &&
88
bdrv_lookup_bs(snapshot_node_name, snapshot_node_name, NULL)) {
89
error_setg(errp, "New snapshot node name already in use");
90
- return;
91
+ goto out;
92
}
93
94
flags = state->old_bs->open_flags;
95
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_prepare(BlkActionState *common,
96
int64_t size = bdrv_getlength(state->old_bs);
97
if (size < 0) {
98
error_setg_errno(errp, -size, "bdrv_getlength failed");
99
- return;
100
+ goto out;
101
}
102
bdrv_img_create(new_image_file, format,
103
state->old_bs->filename,
104
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_prepare(BlkActionState *common,
105
NULL, size, flags, false, &local_err);
106
if (local_err) {
107
error_propagate(errp, local_err);
108
- return;
109
+ goto out;
110
}
111
}
112
113
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_prepare(BlkActionState *common,
114
errp);
115
/* We will manually add the backing_hd field to the bs later */
116
if (!state->new_bs) {
117
- return;
118
+ goto out;
119
}
120
121
if (bdrv_has_blk(state->new_bs)) {
122
error_setg(errp, "The snapshot is already in use");
123
- return;
124
+ goto out;
125
}
126
127
if (bdrv_op_is_blocked(state->new_bs, BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
128
errp)) {
129
- return;
130
+ goto out;
131
}
132
133
if (state->new_bs->backing != NULL) {
134
error_setg(errp, "The snapshot already has a backing image");
135
- return;
136
+ goto out;
137
}
138
139
if (!state->new_bs->drv->supports_backing) {
140
error_setg(errp, "The snapshot does not support backing images");
141
- return;
142
+ goto out;
143
}
144
145
- bdrv_set_aio_context(state->new_bs, state->aio_context);
146
+ bdrv_set_aio_context(state->new_bs, aio_context);
147
148
/* This removes our old bs and adds the new bs. This is an operation that
149
* can fail, so we need to do it in .prepare; undoing it for abort is
150
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_prepare(BlkActionState *common,
151
bdrv_append(state->new_bs, state->old_bs, &local_err);
152
if (local_err) {
153
error_propagate(errp, local_err);
154
- return;
155
+ goto out;
156
}
157
state->overlay_appended = true;
158
+
159
+out:
160
+ aio_context_release(aio_context);
161
}
162
163
static void external_snapshot_commit(BlkActionState *common)
164
{
165
ExternalSnapshotState *state =
166
DO_UPCAST(ExternalSnapshotState, common, common);
167
+ AioContext *aio_context;
168
+
169
+ aio_context = bdrv_get_aio_context(state->old_bs);
170
+ aio_context_acquire(aio_context);
171
172
/* We don't need (or want) to use the transactional
173
* bdrv_reopen_multiple() across all the entries at once, because we
174
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_commit(BlkActionState *common)
175
bdrv_reopen(state->old_bs, state->old_bs->open_flags & ~BDRV_O_RDWR,
176
NULL);
177
}
178
+
179
+ aio_context_release(aio_context);
180
}
181
182
static void external_snapshot_abort(BlkActionState *common)
183
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_abort(BlkActionState *common)
184
DO_UPCAST(ExternalSnapshotState, common, common);
185
if (state->new_bs) {
186
if (state->overlay_appended) {
187
+ AioContext *aio_context;
188
+
189
+ aio_context = bdrv_get_aio_context(state->old_bs);
190
+ aio_context_acquire(aio_context);
191
+
192
bdrv_ref(state->old_bs); /* we can't let bdrv_set_backind_hd()
193
close state->old_bs; we need it */
194
bdrv_set_backing_hd(state->new_bs, NULL, &error_abort);
195
bdrv_replace_node(state->new_bs, state->old_bs, &error_abort);
196
bdrv_unref(state->old_bs); /* bdrv_replace_node() ref'ed old_bs */
197
+
198
+ aio_context_release(aio_context);
199
}
200
}
201
}
202
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_clean(BlkActionState *common)
203
{
204
ExternalSnapshotState *state =
205
DO_UPCAST(ExternalSnapshotState, common, common);
206
- if (state->aio_context) {
207
- bdrv_drained_end(state->old_bs);
208
- bdrv_unref(state->new_bs);
209
- aio_context_release(state->aio_context);
210
+ AioContext *aio_context;
211
+
212
+ if (!state->old_bs) {
213
+ return;
214
}
215
+
216
+ aio_context = bdrv_get_aio_context(state->old_bs);
217
+ aio_context_acquire(aio_context);
218
+
219
+ bdrv_drained_end(state->old_bs);
220
+ bdrv_unref(state->new_bs);
221
+
222
+ aio_context_release(aio_context);
223
}
224
225
typedef struct DriveBackupState {
226
--
227
2.14.3
228
229
diff view generated by jsdifflib
New patch
1
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
3
Reviewed-by: Eric Blake <eblake@redhat.com>
4
Message-id: 20171206144550.22295-4-stefanha@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
blockdev.c | 42 ++++++++++++++++++++++++++++++++++--------
8
1 file changed, 34 insertions(+), 8 deletions(-)
1
9
10
diff --git a/blockdev.c b/blockdev.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/blockdev.c
13
+++ b/blockdev.c
14
@@ -XXX,XX +XXX,XX @@ static void external_snapshot_clean(BlkActionState *common)
15
typedef struct DriveBackupState {
16
BlkActionState common;
17
BlockDriverState *bs;
18
- AioContext *aio_context;
19
BlockJob *job;
20
} DriveBackupState;
21
22
@@ -XXX,XX +XXX,XX @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
23
DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
24
BlockDriverState *bs;
25
DriveBackup *backup;
26
+ AioContext *aio_context;
27
Error *local_err = NULL;
28
29
assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
30
@@ -XXX,XX +XXX,XX @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
31
return;
32
}
33
34
- /* AioContext is released in .clean() */
35
- state->aio_context = bdrv_get_aio_context(bs);
36
- aio_context_acquire(state->aio_context);
37
+ aio_context = bdrv_get_aio_context(bs);
38
+ aio_context_acquire(aio_context);
39
+
40
+ /* Paired with .clean() */
41
bdrv_drained_begin(bs);
42
+
43
state->bs = bs;
44
45
state->job = do_drive_backup(backup, common->block_job_txn, &local_err);
46
if (local_err) {
47
error_propagate(errp, local_err);
48
- return;
49
+ goto out;
50
}
51
+
52
+out:
53
+ aio_context_release(aio_context);
54
}
55
56
static void drive_backup_commit(BlkActionState *common)
57
{
58
DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
59
+ AioContext *aio_context;
60
+
61
+ aio_context = bdrv_get_aio_context(state->bs);
62
+ aio_context_acquire(aio_context);
63
+
64
assert(state->job);
65
block_job_start(state->job);
66
+
67
+ aio_context_release(aio_context);
68
}
69
70
static void drive_backup_abort(BlkActionState *common)
71
@@ -XXX,XX +XXX,XX @@ static void drive_backup_abort(BlkActionState *common)
72
DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
73
74
if (state->job) {
75
+ AioContext *aio_context;
76
+
77
+ aio_context = bdrv_get_aio_context(state->bs);
78
+ aio_context_acquire(aio_context);
79
+
80
block_job_cancel_sync(state->job);
81
+
82
+ aio_context_release(aio_context);
83
}
84
}
85
86
static void drive_backup_clean(BlkActionState *common)
87
{
88
DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common);
89
+ AioContext *aio_context;
90
91
- if (state->aio_context) {
92
- bdrv_drained_end(state->bs);
93
- aio_context_release(state->aio_context);
94
+ if (!state->bs) {
95
+ return;
96
}
97
+
98
+ aio_context = bdrv_get_aio_context(state->bs);
99
+ aio_context_acquire(aio_context);
100
+
101
+ bdrv_drained_end(state->bs);
102
+
103
+ aio_context_release(aio_context);
104
}
105
106
typedef struct BlockdevBackupState {
107
--
108
2.14.3
109
110
diff view generated by jsdifflib
1
The only implementor of bdrv_register_buf() is block/nvme.c, where the
2
size is not needed when unregistering a buffer. This is because
3
util/vfio-helpers.c can look up mappings by address.
4
5
Future block drivers that implement bdrv_register_buf() may not be able
6
to do their job given only the buffer address. Add a size argument to
7
bdrv_unregister_buf().
8
9
Also document the assumptions about
10
bdrv_register_buf()/bdrv_unregister_buf() calls. The same <host, size>
11
values that were given to bdrv_register_buf() must be given to
12
bdrv_unregister_buf().
13
14
gcc 11.2.1 emits a spurious warning that img_bench()'s buf_size local
15
variable might be uninitialized, so it's necessary to silence the
16
compiler.
17
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
1
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
2
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
20
Message-id: 20221013185908.1297568-5-stefanha@redhat.com
3
Reviewed-by: Eric Blake <eblake@redhat.com>
4
Message-id: 20171206144550.22295-5-stefanha@redhat.com
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
---
6
---
23
include/block/block-global-state.h | 5 ++++-
7
blockdev.c | 44 ++++++++++++++++++++++++++++++++++----------
24
include/block/block_int-common.h | 2 +-
8
1 file changed, 34 insertions(+), 10 deletions(-)
25
include/sysemu/block-backend-global-state.h | 2 +-
26
block/block-backend.c | 4 ++--
27
block/io.c | 6 +++---
28
block/nvme.c | 2 +-
29
qemu-img.c | 4 ++--
30
7 files changed, 14 insertions(+), 11 deletions(-)
31
9
32
diff --git a/include/block/block-global-state.h b/include/block/block-global-state.h
10
diff --git a/blockdev.c b/blockdev.c
33
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
34
--- a/include/block/block-global-state.h
12
--- a/blockdev.c
35
+++ b/include/block/block-global-state.h
13
+++ b/blockdev.c
36
@@ -XXX,XX +XXX,XX @@ void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
14
@@ -XXX,XX +XXX,XX @@ typedef struct BlockdevBackupState {
37
* Register/unregister a buffer for I/O. For example, VFIO drivers are
15
BlkActionState common;
38
* interested to know the memory areas that would later be used for I/O, so
16
BlockDriverState *bs;
39
* that they can prepare IOMMU mapping etc., to get better performance.
17
BlockJob *job;
40
+ *
18
- AioContext *aio_context;
41
+ * Buffers must not overlap and they must be unregistered with the same <host,
19
} BlockdevBackupState;
42
+ * size> values that they were registered with.
20
43
*/
21
static BlockJob *do_blockdev_backup(BlockdevBackup *backup, BlockJobTxn *txn,
44
void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
22
@@ -XXX,XX +XXX,XX @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
45
-void bdrv_unregister_buf(BlockDriverState *bs, void *host);
23
BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
46
+void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size);
24
BlockdevBackup *backup;
47
25
BlockDriverState *bs, *target;
48
void bdrv_cancel_in_flight(BlockDriverState *bs);
26
+ AioContext *aio_context;
49
27
Error *local_err = NULL;
50
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
28
51
index XXXXXXX..XXXXXXX 100644
29
assert(common->action->type == TRANSACTION_ACTION_KIND_BLOCKDEV_BACKUP);
52
--- a/include/block/block_int-common.h
30
@@ -XXX,XX +XXX,XX @@ static void blockdev_backup_prepare(BlkActionState *common, Error **errp)
53
+++ b/include/block/block_int-common.h
31
return;
54
@@ -XXX,XX +XXX,XX @@ struct BlockDriver {
32
}
55
* DMA mapping for hot buffers.
33
56
*/
34
- /* AioContext is released in .clean() */
57
void (*bdrv_register_buf)(BlockDriverState *bs, void *host, size_t size);
35
- state->aio_context = bdrv_get_aio_context(bs);
58
- void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host);
36
- if (state->aio_context != bdrv_get_aio_context(target)) {
59
+ void (*bdrv_unregister_buf)(BlockDriverState *bs, void *host, size_t size);
37
- state->aio_context = NULL;
60
38
+ aio_context = bdrv_get_aio_context(bs);
61
/*
39
+ if (aio_context != bdrv_get_aio_context(target)) {
62
* This field is modified only under the BQL, and is part of
40
error_setg(errp, "Backup between two IO threads is not implemented");
63
diff --git a/include/sysemu/block-backend-global-state.h b/include/sysemu/block-backend-global-state.h
41
return;
64
index XXXXXXX..XXXXXXX 100644
42
}
65
--- a/include/sysemu/block-backend-global-state.h
43
- aio_context_acquire(state->aio_context);
66
+++ b/include/sysemu/block-backend-global-state.h
44
+ aio_context_acquire(aio_context);
67
@@ -XXX,XX +XXX,XX @@ void blk_io_limits_update_group(BlockBackend *blk, const char *group);
45
state->bs = bs;
68
void blk_set_force_allow_inactivate(BlockBackend *blk);
46
+
69
47
+ /* Paired with .clean() */
70
void blk_register_buf(BlockBackend *blk, void *host, size_t size);
48
bdrv_drained_begin(state->bs);
71
-void blk_unregister_buf(BlockBackend *blk, void *host);
49
72
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size);
50
state->job = do_blockdev_backup(backup, common->block_job_txn, &local_err);
73
51
if (local_err) {
74
const BdrvChild *blk_root(BlockBackend *blk);
52
error_propagate(errp, local_err);
75
53
- return;
76
diff --git a/block/block-backend.c b/block/block-backend.c
54
+ goto out;
77
index XXXXXXX..XXXXXXX 100644
55
}
78
--- a/block/block-backend.c
56
+
79
+++ b/block/block-backend.c
57
+out:
80
@@ -XXX,XX +XXX,XX @@ void blk_register_buf(BlockBackend *blk, void *host, size_t size)
58
+ aio_context_release(aio_context);
81
bdrv_register_buf(blk_bs(blk), host, size);
82
}
59
}
83
60
84
-void blk_unregister_buf(BlockBackend *blk, void *host)
61
static void blockdev_backup_commit(BlkActionState *common)
85
+void blk_unregister_buf(BlockBackend *blk, void *host, size_t size)
86
{
62
{
87
GLOBAL_STATE_CODE();
63
BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
88
- bdrv_unregister_buf(blk_bs(blk), host);
64
+ AioContext *aio_context;
89
+ bdrv_unregister_buf(blk_bs(blk), host, size);
65
+
66
+ aio_context = bdrv_get_aio_context(state->bs);
67
+ aio_context_acquire(aio_context);
68
+
69
assert(state->job);
70
block_job_start(state->job);
71
+
72
+ aio_context_release(aio_context);
90
}
73
}
91
74
92
int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
75
static void blockdev_backup_abort(BlkActionState *common)
93
diff --git a/block/io.c b/block/io.c
76
@@ -XXX,XX +XXX,XX @@ static void blockdev_backup_abort(BlkActionState *common)
94
index XXXXXXX..XXXXXXX 100644
77
BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
95
--- a/block/io.c
78
96
+++ b/block/io.c
79
if (state->job) {
97
@@ -XXX,XX +XXX,XX @@ void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
80
+ AioContext *aio_context;
81
+
82
+ aio_context = bdrv_get_aio_context(state->bs);
83
+ aio_context_acquire(aio_context);
84
+
85
block_job_cancel_sync(state->job);
86
+
87
+ aio_context_release(aio_context);
98
}
88
}
99
}
89
}
100
90
101
-void bdrv_unregister_buf(BlockDriverState *bs, void *host)
91
static void blockdev_backup_clean(BlkActionState *common)
102
+void bdrv_unregister_buf(BlockDriverState *bs, void *host, size_t size)
103
{
92
{
104
BdrvChild *child;
93
BlockdevBackupState *state = DO_UPCAST(BlockdevBackupState, common, common);
105
94
+ AioContext *aio_context;
106
GLOBAL_STATE_CODE();
95
107
if (bs->drv && bs->drv->bdrv_unregister_buf) {
96
- if (state->aio_context) {
108
- bs->drv->bdrv_unregister_buf(bs, host);
97
- bdrv_drained_end(state->bs);
109
+ bs->drv->bdrv_unregister_buf(bs, host, size);
98
- aio_context_release(state->aio_context);
99
+ if (!state->bs) {
100
+ return;
110
}
101
}
111
QLIST_FOREACH(child, &bs->children, next) {
102
+
112
- bdrv_unregister_buf(child->bs, host);
103
+ aio_context = bdrv_get_aio_context(state->bs);
113
+ bdrv_unregister_buf(child->bs, host, size);
104
+ aio_context_acquire(aio_context);
114
}
105
+
106
+ bdrv_drained_end(state->bs);
107
+
108
+ aio_context_release(aio_context);
115
}
109
}
116
110
117
diff --git a/block/nvme.c b/block/nvme.c
111
typedef struct BlockDirtyBitmapState {
118
index XXXXXXX..XXXXXXX 100644
119
--- a/block/nvme.c
120
+++ b/block/nvme.c
121
@@ -XXX,XX +XXX,XX @@ static void nvme_register_buf(BlockDriverState *bs, void *host, size_t size)
122
}
123
}
124
125
-static void nvme_unregister_buf(BlockDriverState *bs, void *host)
126
+static void nvme_unregister_buf(BlockDriverState *bs, void *host, size_t size)
127
{
128
BDRVNVMeState *s = bs->opaque;
129
130
diff --git a/qemu-img.c b/qemu-img.c
131
index XXXXXXX..XXXXXXX 100644
132
--- a/qemu-img.c
133
+++ b/qemu-img.c
134
@@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv)
135
struct timeval t1, t2;
136
int i;
137
bool force_share = false;
138
- size_t buf_size;
139
+ size_t buf_size = 0;
140
141
for (;;) {
142
static const struct option long_options[] = {
143
@@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv)
144
145
out:
146
if (data.buf) {
147
- blk_unregister_buf(blk, data.buf);
148
+ blk_unregister_buf(blk, data.buf, buf_size);
149
}
150
qemu_vfree(data.buf);
151
blk_unref(blk);
152
--
112
--
153
2.37.3
113
2.14.3
114
115
diff view generated by jsdifflib
New patch
1
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
3
Reviewed-by: Eric Blake <eblake@redhat.com>
4
Message-id: 20171206144550.22295-6-stefanha@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
blockdev.c | 47 +++++++++++++++++++++++++++++++----------------
8
1 file changed, 31 insertions(+), 16 deletions(-)
1
9
10
diff --git a/blockdev.c b/blockdev.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/blockdev.c
13
+++ b/blockdev.c
14
@@ -XXX,XX +XXX,XX @@ struct BlkActionState {
15
typedef struct InternalSnapshotState {
16
BlkActionState common;
17
BlockDriverState *bs;
18
- AioContext *aio_context;
19
QEMUSnapshotInfo sn;
20
bool created;
21
} InternalSnapshotState;
22
@@ -XXX,XX +XXX,XX @@ static void internal_snapshot_prepare(BlkActionState *common,
23
qemu_timeval tv;
24
BlockdevSnapshotInternal *internal;
25
InternalSnapshotState *state;
26
+ AioContext *aio_context;
27
int ret1;
28
29
g_assert(common->action->type ==
30
@@ -XXX,XX +XXX,XX @@ static void internal_snapshot_prepare(BlkActionState *common,
31
return;
32
}
33
34
- /* AioContext is released in .clean() */
35
- state->aio_context = bdrv_get_aio_context(bs);
36
- aio_context_acquire(state->aio_context);
37
+ aio_context = bdrv_get_aio_context(bs);
38
+ aio_context_acquire(aio_context);
39
40
state->bs = bs;
41
+
42
+ /* Paired with .clean() */
43
bdrv_drained_begin(bs);
44
45
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, errp)) {
46
- return;
47
+ goto out;
48
}
49
50
if (bdrv_is_read_only(bs)) {
51
error_setg(errp, "Device '%s' is read only", device);
52
- return;
53
+ goto out;
54
}
55
56
if (!bdrv_can_snapshot(bs)) {
57
error_setg(errp, "Block format '%s' used by device '%s' "
58
"does not support internal snapshots",
59
bs->drv->format_name, device);
60
- return;
61
+ goto out;
62
}
63
64
if (!strlen(name)) {
65
error_setg(errp, "Name is empty");
66
- return;
67
+ goto out;
68
}
69
70
/* check whether a snapshot with name exist */
71
@@ -XXX,XX +XXX,XX @@ static void internal_snapshot_prepare(BlkActionState *common,
72
&local_err);
73
if (local_err) {
74
error_propagate(errp, local_err);
75
- return;
76
+ goto out;
77
} else if (ret) {
78
error_setg(errp,
79
"Snapshot with name '%s' already exists on device '%s'",
80
name, device);
81
- return;
82
+ goto out;
83
}
84
85
/* 3. take the snapshot */
86
@@ -XXX,XX +XXX,XX @@ static void internal_snapshot_prepare(BlkActionState *common,
87
error_setg_errno(errp, -ret1,
88
"Failed to create snapshot '%s' on device '%s'",
89
name, device);
90
- return;
91
+ goto out;
92
}
93
94
/* 4. succeed, mark a snapshot is created */
95
state->created = true;
96
+
97
+out:
98
+ aio_context_release(aio_context);
99
}
100
101
static void internal_snapshot_abort(BlkActionState *common)
102
@@ -XXX,XX +XXX,XX @@ static void internal_snapshot_abort(BlkActionState *common)
103
DO_UPCAST(InternalSnapshotState, common, common);
104
BlockDriverState *bs = state->bs;
105
QEMUSnapshotInfo *sn = &state->sn;
106
+ AioContext *aio_context;
107
Error *local_error = NULL;
108
109
if (!state->created) {
110
return;
111
}
112
113
+ aio_context = bdrv_get_aio_context(state->bs);
114
+ aio_context_acquire(aio_context);
115
+
116
if (bdrv_snapshot_delete(bs, sn->id_str, sn->name, &local_error) < 0) {
117
error_reportf_err(local_error,
118
"Failed to delete snapshot with id '%s' and "
119
@@ -XXX,XX +XXX,XX @@ static void internal_snapshot_abort(BlkActionState *common)
120
sn->id_str, sn->name,
121
bdrv_get_device_name(bs));
122
}
123
+
124
+ aio_context_release(aio_context);
125
}
126
127
static void internal_snapshot_clean(BlkActionState *common)
128
{
129
InternalSnapshotState *state = DO_UPCAST(InternalSnapshotState,
130
common, common);
131
+ AioContext *aio_context;
132
133
- if (state->aio_context) {
134
- if (state->bs) {
135
- bdrv_drained_end(state->bs);
136
- }
137
- aio_context_release(state->aio_context);
138
+ if (!state->bs) {
139
+ return;
140
}
141
+
142
+ aio_context = bdrv_get_aio_context(state->bs);
143
+ aio_context_acquire(aio_context);
144
+
145
+ bdrv_drained_end(state->bs);
146
+
147
+ aio_context_release(aio_context);
148
}
149
150
/* external snapshot private data */
151
--
152
2.14.3
153
154
diff view generated by jsdifflib
New patch
1
The dirty bitmap actions in qmp_transaction have not used AioContext
2
since the dirty bitmap locking discipline was introduced in commit
3
2119882c7eb7e2c612b24fc0c8d86f5887d6f1c3 ("block: introduce
4
dirty_bitmap_mutex"). Remove the unused field.
1
5
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
8
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Message-id: 20171206144550.22295-7-stefanha@redhat.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
blockdev.c | 13 -------------
13
1 file changed, 13 deletions(-)
14
15
diff --git a/blockdev.c b/blockdev.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/blockdev.c
18
+++ b/blockdev.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct BlockDirtyBitmapState {
20
BlkActionState common;
21
BdrvDirtyBitmap *bitmap;
22
BlockDriverState *bs;
23
- AioContext *aio_context;
24
HBitmap *backup;
25
bool prepared;
26
} BlockDirtyBitmapState;
27
@@ -XXX,XX +XXX,XX @@ static void block_dirty_bitmap_clear_prepare(BlkActionState *common,
28
}
29
30
bdrv_clear_dirty_bitmap(state->bitmap, &state->backup);
31
- /* AioContext is released in .clean() */
32
}
33
34
static void block_dirty_bitmap_clear_abort(BlkActionState *common)
35
@@ -XXX,XX +XXX,XX @@ static void block_dirty_bitmap_clear_commit(BlkActionState *common)
36
hbitmap_free(state->backup);
37
}
38
39
-static void block_dirty_bitmap_clear_clean(BlkActionState *common)
40
-{
41
- BlockDirtyBitmapState *state = DO_UPCAST(BlockDirtyBitmapState,
42
- common, common);
43
-
44
- if (state->aio_context) {
45
- aio_context_release(state->aio_context);
46
- }
47
-}
48
-
49
static void abort_prepare(BlkActionState *common, Error **errp)
50
{
51
error_setg(errp, "Transaction aborted using Abort action");
52
@@ -XXX,XX +XXX,XX @@ static const BlkActionOps actions[] = {
53
.prepare = block_dirty_bitmap_clear_prepare,
54
.commit = block_dirty_bitmap_clear_commit,
55
.abort = block_dirty_bitmap_clear_abort,
56
- .clean = block_dirty_bitmap_clear_clean,
57
}
58
};
59
60
--
61
2.14.3
62
63
diff view generated by jsdifflib
New patch
1
Encapsulate IOThread QOM object lookup so that callers don't need to
2
know how and where IOThread objects live.
1
3
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Message-id: 20171206144550.22295-8-stefanha@redhat.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
include/sysemu/iothread.h | 1 +
11
iothread.c | 7 +++++++
12
2 files changed, 8 insertions(+)
13
14
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/sysemu/iothread.h
17
+++ b/include/sysemu/iothread.h
18
@@ -XXX,XX +XXX,XX @@ typedef struct {
19
OBJECT_CHECK(IOThread, obj, TYPE_IOTHREAD)
20
21
char *iothread_get_id(IOThread *iothread);
22
+IOThread *iothread_by_id(const char *id);
23
AioContext *iothread_get_aio_context(IOThread *iothread);
24
void iothread_stop_all(void);
25
GMainContext *iothread_get_g_main_context(IOThread *iothread);
26
diff --git a/iothread.c b/iothread.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/iothread.c
29
+++ b/iothread.c
30
@@ -XXX,XX +XXX,XX @@ void iothread_destroy(IOThread *iothread)
31
{
32
object_unparent(OBJECT(iothread));
33
}
34
+
35
+/* Lookup IOThread by its id. Only finds user-created objects, not internal
36
+ * iothread_create() objects. */
37
+IOThread *iothread_by_id(const char *id)
38
+{
39
+ return IOTHREAD(object_resolve_path_type(id, TYPE_IOTHREAD, NULL));
40
+}
41
--
42
2.14.3
43
44
diff view generated by jsdifflib
1
libblkio (https://gitlab.com/libblkio/libblkio/) is a library for
1
Currently there is no easy way for iotests to ensure that a BDS is bound
2
high-performance disk I/O. It currently supports io_uring,
2
to a particular IOThread. Normally the virtio-blk device calls
3
virtio-blk-vhost-user, and virtio-blk-vhost-vdpa with additional drivers
3
blk_set_aio_context() when dataplane is enabled during guest driver
4
under development.
4
initialization. This never happens in iotests since -machine
5
accel=qtest means there is no guest activity (including device driver
6
initialization).
5
7
6
One of the reasons for developing libblkio is that other applications
8
This patch adds a QMP command to explicitly assign IOThreads in test
7
besides QEMU can use it. This will be particularly useful for
9
cases. See qapi/block-core.json for a description of the command.
8
virtio-blk-vhost-user which applications may wish to use for connecting
9
to qemu-storage-daemon.
10
11
libblkio also gives us an opportunity to develop in Rust behind a C API
12
that is easy to consume from QEMU.
13
14
This commit adds io_uring, nvme-io_uring, virtio-blk-vhost-user, and
15
virtio-blk-vhost-vdpa BlockDrivers to QEMU using libblkio. It will be
16
easy to add other libblkio drivers since they will share the majority of
17
code.
18
19
For now I/O buffers are copied through bounce buffers if the libblkio
20
driver requires it. Later commits add an optimization for
21
pre-registering guest RAM to avoid bounce buffers.
22
23
The syntax is:
24
25
--blockdev io_uring,node-name=drive0,filename=test.img,readonly=on|off,cache.direct=on|off
26
27
--blockdev nvme-io_uring,node-name=drive0,filename=/dev/ng0n1,readonly=on|off,cache.direct=on
28
29
--blockdev virtio-blk-vhost-vdpa,node-name=drive0,path=/dev/vdpa...,readonly=on|off,cache.direct=on
30
31
--blockdev virtio-blk-vhost-user,node-name=drive0,path=vhost-user-blk.sock,readonly=on|off,cache.direct=on
32
10
33
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
34
Acked-by: Markus Armbruster <armbru@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
35
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
13
Reviewed-by: Eric Blake <eblake@redhat.com>
36
Message-id: 20221013185908.1297568-3-stefanha@redhat.com
14
Message-id: 20171206144550.22295-9-stefanha@redhat.com
37
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
38
---
16
---
39
MAINTAINERS | 6 +
17
qapi/block-core.json | 36 ++++++++++++++++++++++++++++++++++++
40
meson_options.txt | 2 +
18
blockdev.c | 41 +++++++++++++++++++++++++++++++++++++++++
41
qapi/block-core.json | 77 +++-
19
2 files changed, 77 insertions(+)
42
meson.build | 9 +
43
block/blkio.c | 831 ++++++++++++++++++++++++++++++++++
44
tests/qtest/modules-test.c | 3 +
45
block/meson.build | 1 +
46
scripts/meson-buildoptions.sh | 3 +
47
8 files changed, 928 insertions(+), 4 deletions(-)
48
create mode 100644 block/blkio.c
49
20
50
diff --git a/MAINTAINERS b/MAINTAINERS
51
index XXXXXXX..XXXXXXX 100644
52
--- a/MAINTAINERS
53
+++ b/MAINTAINERS
54
@@ -XXX,XX +XXX,XX @@ L: qemu-block@nongnu.org
55
S: Maintained
56
F: block/vdi.c
57
58
+blkio
59
+M: Stefan Hajnoczi <stefanha@redhat.com>
60
+L: qemu-block@nongnu.org
61
+S: Maintained
62
+F: block/blkio.c
63
+
64
iSCSI
65
M: Ronnie Sahlberg <ronniesahlberg@gmail.com>
66
M: Paolo Bonzini <pbonzini@redhat.com>
67
diff --git a/meson_options.txt b/meson_options.txt
68
index XXXXXXX..XXXXXXX 100644
69
--- a/meson_options.txt
70
+++ b/meson_options.txt
71
@@ -XXX,XX +XXX,XX @@ option('bzip2', type : 'feature', value : 'auto',
72
description: 'bzip2 support for DMG images')
73
option('cap_ng', type : 'feature', value : 'auto',
74
description: 'cap_ng support')
75
+option('blkio', type : 'feature', value : 'auto',
76
+ description: 'libblkio block device driver')
77
option('bpf', type : 'feature', value : 'auto',
78
description: 'eBPF support')
79
option('cocoa', type : 'feature', value : 'auto',
80
diff --git a/qapi/block-core.json b/qapi/block-core.json
21
diff --git a/qapi/block-core.json b/qapi/block-core.json
81
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
82
--- a/qapi/block-core.json
23
--- a/qapi/block-core.json
83
+++ b/qapi/block-core.json
24
+++ b/qapi/block-core.json
84
@@ -XXX,XX +XXX,XX @@
25
@@ -XXX,XX +XXX,XX @@
85
'file', 'snapshot-access', 'ftp', 'ftps', 'gluster',
26
'data' : { 'parent': 'str',
86
{'name': 'host_cdrom', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
27
'*child': 'str',
87
{'name': 'host_device', 'if': 'HAVE_HOST_BLOCK_DEVICE' },
28
'*node': 'str' } }
88
- 'http', 'https', 'iscsi',
89
- 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels',
90
- 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum', 'raw', 'rbd',
91
+ 'http', 'https',
92
+ { 'name': 'io_uring', 'if': 'CONFIG_BLKIO' },
93
+ 'iscsi',
94
+ 'luks', 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme',
95
+ { 'name': 'nvme-io_uring', 'if': 'CONFIG_BLKIO' },
96
+ 'parallels', 'preallocate', 'qcow', 'qcow2', 'qed', 'quorum',
97
+ 'raw', 'rbd',
98
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
99
- 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat' ] }
100
+ 'ssh', 'throttle', 'vdi', 'vhdx',
101
+ { 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
102
+ { 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
103
+ 'vmdk', 'vpc', 'vvfat' ] }
104
105
##
106
# @BlockdevOptionsFile:
107
@@ -XXX,XX +XXX,XX @@
108
'*debug': 'int',
109
'*logfile': 'str' } }
110
111
+##
112
+# @BlockdevOptionsIoUring:
113
+#
114
+# Driver specific block device options for the io_uring backend.
115
+#
116
+# @filename: path to the image file
117
+#
118
+# Since: 7.2
119
+##
120
+{ 'struct': 'BlockdevOptionsIoUring',
121
+ 'data': { 'filename': 'str' },
122
+ 'if': 'CONFIG_BLKIO' }
123
+
29
+
124
+##
30
+##
125
+# @BlockdevOptionsNvmeIoUring:
31
+# @x-blockdev-set-iothread:
126
+#
32
+#
127
+# Driver specific block device options for the nvme-io_uring backend.
33
+# Move @node and its children into the @iothread. If @iothread is null then
34
+# move @node and its children into the main loop.
128
+#
35
+#
129
+# @filename: path to the image file
36
+# The node must not be attached to a BlockBackend.
130
+#
37
+#
131
+# Since: 7.2
38
+# @node-name: the name of the block driver node
39
+#
40
+# @iothread: the name of the IOThread object or null for the main loop
41
+#
42
+# Note: this command is experimental and intended for test cases that need
43
+# control over IOThreads only.
44
+#
45
+# Since: 2.12
46
+#
47
+# Example:
48
+#
49
+# 1. Move a node into an IOThread
50
+# -> { "execute": "x-blockdev-set-iothread",
51
+# "arguments": { "node-name": "disk1",
52
+# "iothread": "iothread0" } }
53
+# <- { "return": {} }
54
+#
55
+# 2. Move a node into the main loop
56
+# -> { "execute": "x-blockdev-set-iothread",
57
+# "arguments": { "node-name": "disk1",
58
+# "iothread": null } }
59
+# <- { "return": {} }
60
+#
132
+##
61
+##
133
+{ 'struct': 'BlockdevOptionsNvmeIoUring',
62
+{ 'command': 'x-blockdev-set-iothread',
134
+ 'data': { 'filename': 'str' },
63
+ 'data' : { 'node-name': 'str',
135
+ 'if': 'CONFIG_BLKIO' }
64
+ 'iothread': 'StrOrNull' } }
65
diff --git a/blockdev.c b/blockdev.c
66
index XXXXXXX..XXXXXXX 100644
67
--- a/blockdev.c
68
+++ b/blockdev.c
69
@@ -XXX,XX +XXX,XX @@
70
#include "qapi/qmp/qerror.h"
71
#include "qapi/qobject-output-visitor.h"
72
#include "sysemu/sysemu.h"
73
+#include "sysemu/iothread.h"
74
#include "block/block_int.h"
75
#include "qmp-commands.h"
76
#include "block/trace.h"
77
@@ -XXX,XX +XXX,XX @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
78
return head;
79
}
80
81
+void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
82
+ Error **errp)
83
+{
84
+ AioContext *old_context;
85
+ AioContext *new_context;
86
+ BlockDriverState *bs;
136
+
87
+
137
+##
88
+ bs = bdrv_find_node(node_name);
138
+# @BlockdevOptionsVirtioBlkVhostUser:
89
+ if (!bs) {
139
+#
90
+ error_setg(errp, "Cannot find node %s", node_name);
140
+# Driver specific block device options for the virtio-blk-vhost-user backend.
141
+#
142
+# @path: path to the vhost-user UNIX domain socket.
143
+#
144
+# Since: 7.2
145
+##
146
+{ 'struct': 'BlockdevOptionsVirtioBlkVhostUser',
147
+ 'data': { 'path': 'str' },
148
+ 'if': 'CONFIG_BLKIO' }
149
+
150
+##
151
+# @BlockdevOptionsVirtioBlkVhostVdpa:
152
+#
153
+# Driver specific block device options for the virtio-blk-vhost-vdpa backend.
154
+#
155
+# @path: path to the vhost-vdpa character device.
156
+#
157
+# Since: 7.2
158
+##
159
+{ 'struct': 'BlockdevOptionsVirtioBlkVhostVdpa',
160
+ 'data': { 'path': 'str' },
161
+ 'if': 'CONFIG_BLKIO' }
162
+
163
##
164
# @IscsiTransport:
165
#
166
@@ -XXX,XX +XXX,XX @@
167
'if': 'HAVE_HOST_BLOCK_DEVICE' },
168
'http': 'BlockdevOptionsCurlHttp',
169
'https': 'BlockdevOptionsCurlHttps',
170
+ 'io_uring': { 'type': 'BlockdevOptionsIoUring',
171
+ 'if': 'CONFIG_BLKIO' },
172
'iscsi': 'BlockdevOptionsIscsi',
173
'luks': 'BlockdevOptionsLUKS',
174
'nbd': 'BlockdevOptionsNbd',
175
@@ -XXX,XX +XXX,XX @@
176
'null-aio': 'BlockdevOptionsNull',
177
'null-co': 'BlockdevOptionsNull',
178
'nvme': 'BlockdevOptionsNVMe',
179
+ 'nvme-io_uring': { 'type': 'BlockdevOptionsNvmeIoUring',
180
+ 'if': 'CONFIG_BLKIO' },
181
'parallels': 'BlockdevOptionsGenericFormat',
182
'preallocate':'BlockdevOptionsPreallocate',
183
'qcow2': 'BlockdevOptionsQcow2',
184
@@ -XXX,XX +XXX,XX @@
185
'throttle': 'BlockdevOptionsThrottle',
186
'vdi': 'BlockdevOptionsGenericFormat',
187
'vhdx': 'BlockdevOptionsGenericFormat',
188
+ 'virtio-blk-vhost-user':
189
+ { 'type': 'BlockdevOptionsVirtioBlkVhostUser',
190
+ 'if': 'CONFIG_BLKIO' },
191
+ 'virtio-blk-vhost-vdpa':
192
+ { 'type': 'BlockdevOptionsVirtioBlkVhostVdpa',
193
+ 'if': 'CONFIG_BLKIO' },
194
'vmdk': 'BlockdevOptionsGenericCOWFormat',
195
'vpc': 'BlockdevOptionsGenericFormat',
196
'vvfat': 'BlockdevOptionsVVFAT'
197
diff --git a/meson.build b/meson.build
198
index XXXXXXX..XXXXXXX 100644
199
--- a/meson.build
200
+++ b/meson.build
201
@@ -XXX,XX +XXX,XX @@ if not get_option('virglrenderer').auto() or have_system or have_vhost_user_gpu
202
required: get_option('virglrenderer'),
203
kwargs: static_kwargs)
204
endif
205
+blkio = not_found
206
+if not get_option('blkio').auto() or have_block
207
+ blkio = dependency('blkio',
208
+ method: 'pkg-config',
209
+ required: get_option('blkio'),
210
+ kwargs: static_kwargs)
211
+endif
212
curl = not_found
213
if not get_option('curl').auto() or have_block
214
curl = dependency('libcurl', version: '>=7.29.0',
215
@@ -XXX,XX +XXX,XX @@ config_host_data.set('CONFIG_LIBUDEV', libudev.found())
216
config_host_data.set('CONFIG_LZO', lzo.found())
217
config_host_data.set('CONFIG_MPATH', mpathpersist.found())
218
config_host_data.set('CONFIG_MPATH_NEW_API', mpathpersist_new_api)
219
+config_host_data.set('CONFIG_BLKIO', blkio.found())
220
config_host_data.set('CONFIG_CURL', curl.found())
221
config_host_data.set('CONFIG_CURSES', curses.found())
222
config_host_data.set('CONFIG_GBM', gbm.found())
223
@@ -XXX,XX +XXX,XX @@ summary_info += {'PAM': pam}
224
summary_info += {'iconv support': iconv}
225
summary_info += {'curses support': curses}
226
summary_info += {'virgl support': virgl}
227
+summary_info += {'blkio support': blkio}
228
summary_info += {'curl support': curl}
229
summary_info += {'Multipath support': mpathpersist}
230
summary_info += {'PNG support': png}
231
diff --git a/block/blkio.c b/block/blkio.c
232
new file mode 100644
233
index XXXXXXX..XXXXXXX
234
--- /dev/null
235
+++ b/block/blkio.c
236
@@ -XXX,XX +XXX,XX @@
237
+/* SPDX-License-Identifier: LGPL-2.1-or-later */
238
+/*
239
+ * libblkio BlockDriver
240
+ *
241
+ * Copyright Red Hat, Inc.
242
+ *
243
+ * Author:
244
+ * Stefan Hajnoczi <stefanha@redhat.com>
245
+ */
246
+
247
+#include "qemu/osdep.h"
248
+#include <blkio.h>
249
+#include "block/block_int.h"
250
+#include "qapi/error.h"
251
+#include "qapi/qmp/qdict.h"
252
+#include "qemu/module.h"
253
+
254
+/*
255
+ * Keep the QEMU BlockDriver names identical to the libblkio driver names.
256
+ * Using macros instead of typing out the string literals avoids typos.
257
+ */
258
+#define DRIVER_IO_URING "io_uring"
259
+#define DRIVER_NVME_IO_URING "nvme-io_uring"
260
+#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
261
+#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
262
+
263
+/*
264
+ * Allocated bounce buffers are kept in a list sorted by buffer address.
265
+ */
266
+typedef struct BlkioBounceBuf {
267
+ QLIST_ENTRY(BlkioBounceBuf) next;
268
+
269
+ /* The bounce buffer */
270
+ struct iovec buf;
271
+} BlkioBounceBuf;
272
+
273
+typedef struct {
274
+ /*
275
+ * libblkio is not thread-safe so this lock protects ->blkio and
276
+ * ->blkioq.
277
+ */
278
+ QemuMutex blkio_lock;
279
+ struct blkio *blkio;
280
+ struct blkioq *blkioq; /* make this multi-queue in the future... */
281
+ int completion_fd;
282
+
283
+ /*
284
+ * Polling fetches the next completion into this field.
285
+ *
286
+ * No lock is necessary since only one thread calls aio_poll() and invokes
287
+ * fd and poll handlers.
288
+ */
289
+ struct blkio_completion poll_completion;
290
+
291
+ /*
292
+ * Protects ->bounce_pool, ->bounce_bufs, ->bounce_available.
293
+ *
294
+ * Lock ordering: ->bounce_lock before ->blkio_lock.
295
+ */
296
+ CoMutex bounce_lock;
297
+
298
+ /* Bounce buffer pool */
299
+ struct blkio_mem_region bounce_pool;
300
+
301
+ /* Sorted list of allocated bounce buffers */
302
+ QLIST_HEAD(, BlkioBounceBuf) bounce_bufs;
303
+
304
+ /* Queue for coroutines waiting for bounce buffer space */
305
+ CoQueue bounce_available;
306
+
307
+ /* The value of the "mem-region-alignment" property */
308
+ size_t mem_region_alignment;
309
+
310
+ /* Can we skip adding/deleting blkio_mem_regions? */
311
+ bool needs_mem_regions;
312
+} BDRVBlkioState;
313
+
314
+/* Called with s->bounce_lock held */
315
+static int blkio_resize_bounce_pool(BDRVBlkioState *s, int64_t bytes)
316
+{
317
+ /* There can be no allocated bounce buffers during resize */
318
+ assert(QLIST_EMPTY(&s->bounce_bufs));
319
+
320
+ /* Pad size to reduce frequency of resize calls */
321
+ bytes += 128 * 1024;
322
+
323
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
324
+ int ret;
325
+
326
+ if (s->bounce_pool.addr) {
327
+ blkio_unmap_mem_region(s->blkio, &s->bounce_pool);
328
+ blkio_free_mem_region(s->blkio, &s->bounce_pool);
329
+ memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
330
+ }
331
+
332
+ /* Automatically freed when s->blkio is destroyed */
333
+ ret = blkio_alloc_mem_region(s->blkio, &s->bounce_pool, bytes);
334
+ if (ret < 0) {
335
+ return ret;
336
+ }
337
+
338
+ ret = blkio_map_mem_region(s->blkio, &s->bounce_pool);
339
+ if (ret < 0) {
340
+ blkio_free_mem_region(s->blkio, &s->bounce_pool);
341
+ memset(&s->bounce_pool, 0, sizeof(s->bounce_pool));
342
+ return ret;
343
+ }
344
+ }
345
+
346
+ return 0;
347
+}
348
+
349
+/* Called with s->bounce_lock held */
350
+static bool
351
+blkio_do_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
352
+ int64_t bytes)
353
+{
354
+ void *addr = s->bounce_pool.addr;
355
+ BlkioBounceBuf *cur = NULL;
356
+ BlkioBounceBuf *prev = NULL;
357
+ ptrdiff_t space;
358
+
359
+ /*
360
+ * This is just a linear search over the holes between requests. An
361
+ * efficient allocator would be nice.
362
+ */
363
+ QLIST_FOREACH(cur, &s->bounce_bufs, next) {
364
+ space = cur->buf.iov_base - addr;
365
+ if (bytes <= space) {
366
+ QLIST_INSERT_BEFORE(cur, bounce, next);
367
+ bounce->buf.iov_base = addr;
368
+ bounce->buf.iov_len = bytes;
369
+ return true;
370
+ }
371
+
372
+ addr = cur->buf.iov_base + cur->buf.iov_len;
373
+ prev = cur;
374
+ }
375
+
376
+ /* Is there space after the last request? */
377
+ space = s->bounce_pool.addr + s->bounce_pool.len - addr;
378
+ if (bytes > space) {
379
+ return false;
380
+ }
381
+ if (prev) {
382
+ QLIST_INSERT_AFTER(prev, bounce, next);
383
+ } else {
384
+ QLIST_INSERT_HEAD(&s->bounce_bufs, bounce, next);
385
+ }
386
+ bounce->buf.iov_base = addr;
387
+ bounce->buf.iov_len = bytes;
388
+ return true;
389
+}
390
+
391
+static int coroutine_fn
392
+blkio_alloc_bounce_buffer(BDRVBlkioState *s, BlkioBounceBuf *bounce,
393
+ int64_t bytes)
394
+{
395
+ /*
396
+ * Ensure fairness: first time around we join the back of the queue,
397
+ * subsequently we join the front so we don't lose our place.
398
+ */
399
+ CoQueueWaitFlags wait_flags = 0;
400
+
401
+ QEMU_LOCK_GUARD(&s->bounce_lock);
402
+
403
+ /* Ensure fairness: don't even try if other requests are already waiting */
404
+ if (!qemu_co_queue_empty(&s->bounce_available)) {
405
+ qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
406
+ wait_flags);
407
+ wait_flags = CO_QUEUE_WAIT_FRONT;
408
+ }
409
+
410
+ while (true) {
411
+ if (blkio_do_alloc_bounce_buffer(s, bounce, bytes)) {
412
+ /* Kick the next queued request since there may be space */
413
+ qemu_co_queue_next(&s->bounce_available);
414
+ return 0;
415
+ }
416
+
417
+ /*
418
+ * If there are no in-flight requests then the pool was simply too
419
+ * small.
420
+ */
421
+ if (QLIST_EMPTY(&s->bounce_bufs)) {
422
+ bool ok;
423
+ int ret;
424
+
425
+ ret = blkio_resize_bounce_pool(s, bytes);
426
+ if (ret < 0) {
427
+ /* Kick the next queued request since that may fail too */
428
+ qemu_co_queue_next(&s->bounce_available);
429
+ return ret;
430
+ }
431
+
432
+ ok = blkio_do_alloc_bounce_buffer(s, bounce, bytes);
433
+ assert(ok); /* must have space this time */
434
+ return 0;
435
+ }
436
+
437
+ qemu_co_queue_wait_flags(&s->bounce_available, &s->bounce_lock,
438
+ wait_flags);
439
+ wait_flags = CO_QUEUE_WAIT_FRONT;
440
+ }
441
+}
442
+
443
+static void coroutine_fn blkio_free_bounce_buffer(BDRVBlkioState *s,
444
+ BlkioBounceBuf *bounce)
445
+{
446
+ QEMU_LOCK_GUARD(&s->bounce_lock);
447
+
448
+ QLIST_REMOVE(bounce, next);
449
+
450
+ /* Wake up waiting coroutines since space may now be available */
451
+ qemu_co_queue_next(&s->bounce_available);
452
+}
453
+
454
+/* For async to .bdrv_co_*() conversion */
455
+typedef struct {
456
+ Coroutine *coroutine;
457
+ int ret;
458
+} BlkioCoData;
459
+
460
+static void blkio_completion_fd_read(void *opaque)
461
+{
462
+ BlockDriverState *bs = opaque;
463
+ BDRVBlkioState *s = bs->opaque;
464
+ uint64_t val;
465
+ int ret;
466
+
467
+ /* Polling may have already fetched a completion */
468
+ if (s->poll_completion.user_data != NULL) {
469
+ BlkioCoData *cod = s->poll_completion.user_data;
470
+ cod->ret = s->poll_completion.ret;
471
+
472
+ /* Clear it in case aio_co_wake() enters a nested event loop */
473
+ s->poll_completion.user_data = NULL;
474
+
475
+ aio_co_wake(cod->coroutine);
476
+ }
477
+
478
+ /* Reset completion fd status */
479
+ ret = read(s->completion_fd, &val, sizeof(val));
480
+
481
+ /* Ignore errors, there's nothing we can do */
482
+ (void)ret;
483
+
484
+ /*
485
+ * Reading one completion at a time makes nested event loop re-entrancy
486
+ * simple. Change this loop to get multiple completions in one go if it
487
+ * becomes a performance bottleneck.
488
+ */
489
+ while (true) {
490
+ struct blkio_completion completion;
491
+
492
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
493
+ ret = blkioq_do_io(s->blkioq, &completion, 0, 1, NULL);
494
+ }
495
+ if (ret != 1) {
496
+ break;
497
+ }
498
+
499
+ BlkioCoData *cod = completion.user_data;
500
+ cod->ret = completion.ret;
501
+ aio_co_wake(cod->coroutine);
502
+ }
503
+}
504
+
505
+static bool blkio_completion_fd_poll(void *opaque)
506
+{
507
+ BlockDriverState *bs = opaque;
508
+ BDRVBlkioState *s = bs->opaque;
509
+ int ret;
510
+
511
+ /* Just in case we already fetched a completion */
512
+ if (s->poll_completion.user_data != NULL) {
513
+ return true;
514
+ }
515
+
516
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
517
+ ret = blkioq_do_io(s->blkioq, &s->poll_completion, 0, 1, NULL);
518
+ }
519
+ return ret == 1;
520
+}
521
+
522
+static void blkio_completion_fd_poll_ready(void *opaque)
523
+{
524
+ blkio_completion_fd_read(opaque);
525
+}
526
+
527
+static void blkio_attach_aio_context(BlockDriverState *bs,
528
+ AioContext *new_context)
529
+{
530
+ BDRVBlkioState *s = bs->opaque;
531
+
532
+ aio_set_fd_handler(new_context,
533
+ s->completion_fd,
534
+ false,
535
+ blkio_completion_fd_read,
536
+ NULL,
537
+ blkio_completion_fd_poll,
538
+ blkio_completion_fd_poll_ready,
539
+ bs);
540
+}
541
+
542
+static void blkio_detach_aio_context(BlockDriverState *bs)
543
+{
544
+ BDRVBlkioState *s = bs->opaque;
545
+
546
+ aio_set_fd_handler(bdrv_get_aio_context(bs),
547
+ s->completion_fd,
548
+ false, NULL, NULL, NULL, NULL, NULL);
549
+}
550
+
551
+/* Call with s->blkio_lock held to submit I/O after enqueuing a new request */
552
+static void blkio_submit_io(BlockDriverState *bs)
553
+{
554
+ if (qatomic_read(&bs->io_plugged) == 0) {
555
+ BDRVBlkioState *s = bs->opaque;
556
+
557
+ blkioq_do_io(s->blkioq, NULL, 0, 0, NULL);
558
+ }
559
+}
560
+
561
+static int coroutine_fn
562
+blkio_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes)
563
+{
564
+ BDRVBlkioState *s = bs->opaque;
565
+ BlkioCoData cod = {
566
+ .coroutine = qemu_coroutine_self(),
567
+ };
568
+
569
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
570
+ blkioq_discard(s->blkioq, offset, bytes, &cod, 0);
571
+ blkio_submit_io(bs);
572
+ }
573
+
574
+ qemu_coroutine_yield();
575
+ return cod.ret;
576
+}
577
+
578
+static int coroutine_fn
579
+blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
580
+ QEMUIOVector *qiov, BdrvRequestFlags flags)
581
+{
582
+ BlkioCoData cod = {
583
+ .coroutine = qemu_coroutine_self(),
584
+ };
585
+ BDRVBlkioState *s = bs->opaque;
586
+ bool use_bounce_buffer = s->needs_mem_regions;
587
+ BlkioBounceBuf bounce;
588
+ struct iovec *iov = qiov->iov;
589
+ int iovcnt = qiov->niov;
590
+
591
+ if (use_bounce_buffer) {
592
+ int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
593
+ if (ret < 0) {
594
+ return ret;
595
+ }
596
+
597
+ iov = &bounce.buf;
598
+ iovcnt = 1;
599
+ }
600
+
601
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
602
+ blkioq_readv(s->blkioq, offset, iov, iovcnt, &cod, 0);
603
+ blkio_submit_io(bs);
604
+ }
605
+
606
+ qemu_coroutine_yield();
607
+
608
+ if (use_bounce_buffer) {
609
+ if (cod.ret == 0) {
610
+ qemu_iovec_from_buf(qiov, 0,
611
+ bounce.buf.iov_base,
612
+ bounce.buf.iov_len);
613
+ }
614
+
615
+ blkio_free_bounce_buffer(s, &bounce);
616
+ }
617
+
618
+ return cod.ret;
619
+}
620
+
621
+static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset,
622
+ int64_t bytes, QEMUIOVector *qiov, BdrvRequestFlags flags)
623
+{
624
+ uint32_t blkio_flags = (flags & BDRV_REQ_FUA) ? BLKIO_REQ_FUA : 0;
625
+ BlkioCoData cod = {
626
+ .coroutine = qemu_coroutine_self(),
627
+ };
628
+ BDRVBlkioState *s = bs->opaque;
629
+ bool use_bounce_buffer = s->needs_mem_regions;
630
+ BlkioBounceBuf bounce;
631
+ struct iovec *iov = qiov->iov;
632
+ int iovcnt = qiov->niov;
633
+
634
+ if (use_bounce_buffer) {
635
+ int ret = blkio_alloc_bounce_buffer(s, &bounce, bytes);
636
+ if (ret < 0) {
637
+ return ret;
638
+ }
639
+
640
+ qemu_iovec_to_buf(qiov, 0, bounce.buf.iov_base, bytes);
641
+ iov = &bounce.buf;
642
+ iovcnt = 1;
643
+ }
644
+
645
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
646
+ blkioq_writev(s->blkioq, offset, iov, iovcnt, &cod, blkio_flags);
647
+ blkio_submit_io(bs);
648
+ }
649
+
650
+ qemu_coroutine_yield();
651
+
652
+ if (use_bounce_buffer) {
653
+ blkio_free_bounce_buffer(s, &bounce);
654
+ }
655
+
656
+ return cod.ret;
657
+}
658
+
659
+static int coroutine_fn blkio_co_flush(BlockDriverState *bs)
660
+{
661
+ BDRVBlkioState *s = bs->opaque;
662
+ BlkioCoData cod = {
663
+ .coroutine = qemu_coroutine_self(),
664
+ };
665
+
666
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
667
+ blkioq_flush(s->blkioq, &cod, 0);
668
+ blkio_submit_io(bs);
669
+ }
670
+
671
+ qemu_coroutine_yield();
672
+ return cod.ret;
673
+}
674
+
675
+static int coroutine_fn blkio_co_pwrite_zeroes(BlockDriverState *bs,
676
+ int64_t offset, int64_t bytes, BdrvRequestFlags flags)
677
+{
678
+ BDRVBlkioState *s = bs->opaque;
679
+ BlkioCoData cod = {
680
+ .coroutine = qemu_coroutine_self(),
681
+ };
682
+ uint32_t blkio_flags = 0;
683
+
684
+ if (flags & BDRV_REQ_FUA) {
685
+ blkio_flags |= BLKIO_REQ_FUA;
686
+ }
687
+ if (!(flags & BDRV_REQ_MAY_UNMAP)) {
688
+ blkio_flags |= BLKIO_REQ_NO_UNMAP;
689
+ }
690
+ if (flags & BDRV_REQ_NO_FALLBACK) {
691
+ blkio_flags |= BLKIO_REQ_NO_FALLBACK;
692
+ }
693
+
694
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
695
+ blkioq_write_zeroes(s->blkioq, offset, bytes, &cod, blkio_flags);
696
+ blkio_submit_io(bs);
697
+ }
698
+
699
+ qemu_coroutine_yield();
700
+ return cod.ret;
701
+}
702
+
703
+static void blkio_io_unplug(BlockDriverState *bs)
704
+{
705
+ BDRVBlkioState *s = bs->opaque;
706
+
707
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
708
+ blkio_submit_io(bs);
709
+ }
710
+}
711
+
712
+static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
713
+ Error **errp)
714
+{
715
+ const char *filename = qdict_get_str(options, "filename");
716
+ BDRVBlkioState *s = bs->opaque;
717
+ int ret;
718
+
719
+ ret = blkio_set_str(s->blkio, "path", filename);
720
+ qdict_del(options, "filename");
721
+ if (ret < 0) {
722
+ error_setg_errno(errp, -ret, "failed to set path: %s",
723
+ blkio_get_error_msg());
724
+ return ret;
725
+ }
726
+
727
+ if (flags & BDRV_O_NOCACHE) {
728
+ ret = blkio_set_bool(s->blkio, "direct", true);
729
+ if (ret < 0) {
730
+ error_setg_errno(errp, -ret, "failed to set direct: %s",
731
+ blkio_get_error_msg());
732
+ return ret;
733
+ }
734
+ }
735
+
736
+ return 0;
737
+}
738
+
739
+static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
740
+ Error **errp)
741
+{
742
+ const char *filename = qdict_get_str(options, "filename");
743
+ BDRVBlkioState *s = bs->opaque;
744
+ int ret;
745
+
746
+ ret = blkio_set_str(s->blkio, "path", filename);
747
+ qdict_del(options, "filename");
748
+ if (ret < 0) {
749
+ error_setg_errno(errp, -ret, "failed to set path: %s",
750
+ blkio_get_error_msg());
751
+ return ret;
752
+ }
753
+
754
+ if (!(flags & BDRV_O_NOCACHE)) {
755
+ error_setg(errp, "cache.direct=off is not supported");
756
+ return -EINVAL;
757
+ }
758
+
759
+ return 0;
760
+}
761
+
762
+static int blkio_virtio_blk_common_open(BlockDriverState *bs,
763
+ QDict *options, int flags, Error **errp)
764
+{
765
+ const char *path = qdict_get_try_str(options, "path");
766
+ BDRVBlkioState *s = bs->opaque;
767
+ int ret;
768
+
769
+ if (!path) {
770
+ error_setg(errp, "missing 'path' option");
771
+ return -EINVAL;
772
+ }
773
+
774
+ ret = blkio_set_str(s->blkio, "path", path);
775
+ qdict_del(options, "path");
776
+ if (ret < 0) {
777
+ error_setg_errno(errp, -ret, "failed to set path: %s",
778
+ blkio_get_error_msg());
779
+ return ret;
780
+ }
781
+
782
+ if (!(flags & BDRV_O_NOCACHE)) {
783
+ error_setg(errp, "cache.direct=off is not supported");
784
+ return -EINVAL;
785
+ }
786
+ return 0;
787
+}
788
+
789
+static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
790
+ Error **errp)
791
+{
792
+ const char *blkio_driver = bs->drv->protocol_name;
793
+ BDRVBlkioState *s = bs->opaque;
794
+ int ret;
795
+
796
+ ret = blkio_create(blkio_driver, &s->blkio);
797
+ if (ret < 0) {
798
+ error_setg_errno(errp, -ret, "blkio_create failed: %s",
799
+ blkio_get_error_msg());
800
+ return ret;
801
+ }
802
+
803
+ if (strcmp(blkio_driver, DRIVER_IO_URING) == 0) {
804
+ ret = blkio_io_uring_open(bs, options, flags, errp);
805
+ } else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
806
+ ret = blkio_nvme_io_uring(bs, options, flags, errp);
807
+ } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
808
+ ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
809
+ } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
810
+ ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
811
+ } else {
812
+ g_assert_not_reached();
813
+ }
814
+ if (ret < 0) {
815
+ blkio_destroy(&s->blkio);
816
+ return ret;
817
+ }
818
+
819
+ if (!(flags & BDRV_O_RDWR)) {
820
+ ret = blkio_set_bool(s->blkio, "read-only", true);
821
+ if (ret < 0) {
822
+ error_setg_errno(errp, -ret, "failed to set read-only: %s",
823
+ blkio_get_error_msg());
824
+ blkio_destroy(&s->blkio);
825
+ return ret;
826
+ }
827
+ }
828
+
829
+ ret = blkio_connect(s->blkio);
830
+ if (ret < 0) {
831
+ error_setg_errno(errp, -ret, "blkio_connect failed: %s",
832
+ blkio_get_error_msg());
833
+ blkio_destroy(&s->blkio);
834
+ return ret;
835
+ }
836
+
837
+ ret = blkio_get_bool(s->blkio,
838
+ "needs-mem-regions",
839
+ &s->needs_mem_regions);
840
+ if (ret < 0) {
841
+ error_setg_errno(errp, -ret,
842
+ "failed to get needs-mem-regions: %s",
843
+ blkio_get_error_msg());
844
+ blkio_destroy(&s->blkio);
845
+ return ret;
846
+ }
847
+
848
+ ret = blkio_get_uint64(s->blkio,
849
+ "mem-region-alignment",
850
+ &s->mem_region_alignment);
851
+ if (ret < 0) {
852
+ error_setg_errno(errp, -ret,
853
+ "failed to get mem-region-alignment: %s",
854
+ blkio_get_error_msg());
855
+ blkio_destroy(&s->blkio);
856
+ return ret;
857
+ }
858
+
859
+ ret = blkio_start(s->blkio);
860
+ if (ret < 0) {
861
+ error_setg_errno(errp, -ret, "blkio_start failed: %s",
862
+ blkio_get_error_msg());
863
+ blkio_destroy(&s->blkio);
864
+ return ret;
865
+ }
866
+
867
+ bs->supported_write_flags = BDRV_REQ_FUA;
868
+ bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP |
869
+ BDRV_REQ_NO_FALLBACK;
870
+
871
+ qemu_mutex_init(&s->blkio_lock);
872
+ qemu_co_mutex_init(&s->bounce_lock);
873
+ qemu_co_queue_init(&s->bounce_available);
874
+ QLIST_INIT(&s->bounce_bufs);
875
+ s->blkioq = blkio_get_queue(s->blkio, 0);
876
+ s->completion_fd = blkioq_get_completion_fd(s->blkioq);
877
+
878
+ blkio_attach_aio_context(bs, bdrv_get_aio_context(bs));
879
+ return 0;
880
+}
881
+
882
+static void blkio_close(BlockDriverState *bs)
883
+{
884
+ BDRVBlkioState *s = bs->opaque;
885
+
886
+ /* There is no destroy() API for s->bounce_lock */
887
+
888
+ qemu_mutex_destroy(&s->blkio_lock);
889
+ blkio_detach_aio_context(bs);
890
+ blkio_destroy(&s->blkio);
891
+}
892
+
893
+static int64_t blkio_getlength(BlockDriverState *bs)
894
+{
895
+ BDRVBlkioState *s = bs->opaque;
896
+ uint64_t capacity;
897
+ int ret;
898
+
899
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
900
+ ret = blkio_get_uint64(s->blkio, "capacity", &capacity);
901
+ }
902
+ if (ret < 0) {
903
+ return -ret;
904
+ }
905
+
906
+ return capacity;
907
+}
908
+
909
+static int blkio_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
910
+{
911
+ return 0;
912
+}
913
+
914
+static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
915
+{
916
+ BDRVBlkioState *s = bs->opaque;
917
+ QEMU_LOCK_GUARD(&s->blkio_lock);
918
+ int value;
919
+ int ret;
920
+
921
+ ret = blkio_get_int(s->blkio, "request-alignment", &value);
922
+ if (ret < 0) {
923
+ error_setg_errno(errp, -ret, "failed to get \"request-alignment\": %s",
924
+ blkio_get_error_msg());
925
+ return;
926
+ }
927
+ bs->bl.request_alignment = value;
928
+ if (bs->bl.request_alignment < 1 ||
929
+ bs->bl.request_alignment >= INT_MAX ||
930
+ !is_power_of_2(bs->bl.request_alignment)) {
931
+ error_setg(errp, "invalid \"request-alignment\" value %" PRIu32 ", "
932
+ "must be a power of 2 less than INT_MAX",
933
+ bs->bl.request_alignment);
934
+ return;
91
+ return;
935
+ }
92
+ }
936
+
93
+
937
+ ret = blkio_get_int(s->blkio, "optimal-io-size", &value);
94
+ /* If we want to allow more extreme test scenarios this guard could be
938
+ if (ret < 0) {
95
+ * removed. For now it protects against accidents. */
939
+ error_setg_errno(errp, -ret, "failed to get \"optimal-io-size\": %s",
96
+ if (bdrv_has_blk(bs)) {
940
+ blkio_get_error_msg());
97
+ error_setg(errp, "Node %s is in use", node_name);
941
+ return;
942
+ }
943
+ bs->bl.opt_transfer = value;
944
+ if (bs->bl.opt_transfer > INT_MAX ||
945
+ (bs->bl.opt_transfer % bs->bl.request_alignment)) {
946
+ error_setg(errp, "invalid \"optimal-io-size\" value %" PRIu32 ", must "
947
+ "be a multiple of %" PRIu32, bs->bl.opt_transfer,
948
+ bs->bl.request_alignment);
949
+ return;
98
+ return;
950
+ }
99
+ }
951
+
100
+
952
+ ret = blkio_get_int(s->blkio, "max-transfer", &value);
101
+ if (iothread->type == QTYPE_QSTRING) {
953
+ if (ret < 0) {
102
+ IOThread *obj = iothread_by_id(iothread->u.s);
954
+ error_setg_errno(errp, -ret, "failed to get \"max-transfer\": %s",
103
+ if (!obj) {
955
+ blkio_get_error_msg());
104
+ error_setg(errp, "Cannot find iothread %s", iothread->u.s);
956
+ return;
105
+ return;
957
+ }
106
+ }
958
+ bs->bl.max_transfer = value;
107
+
959
+ if ((bs->bl.max_transfer % bs->bl.request_alignment) ||
108
+ new_context = iothread_get_aio_context(obj);
960
+ (bs->bl.opt_transfer && (bs->bl.max_transfer % bs->bl.opt_transfer))) {
109
+ } else {
961
+ error_setg(errp, "invalid \"max-transfer\" value %" PRIu32 ", must be "
110
+ new_context = qemu_get_aio_context();
962
+ "a multiple of %" PRIu32 " and %" PRIu32 " (if non-zero)",
963
+ bs->bl.max_transfer, bs->bl.request_alignment,
964
+ bs->bl.opt_transfer);
965
+ return;
966
+ }
111
+ }
967
+
112
+
968
+ ret = blkio_get_int(s->blkio, "buf-alignment", &value);
113
+ old_context = bdrv_get_aio_context(bs);
969
+ if (ret < 0) {
114
+ aio_context_acquire(old_context);
970
+ error_setg_errno(errp, -ret, "failed to get \"buf-alignment\": %s",
971
+ blkio_get_error_msg());
972
+ return;
973
+ }
974
+ if (value < 1) {
975
+ error_setg(errp, "invalid \"buf-alignment\" value %d, must be "
976
+ "positive", value);
977
+ return;
978
+ }
979
+ bs->bl.min_mem_alignment = value;
980
+
115
+
981
+ ret = blkio_get_int(s->blkio, "optimal-buf-alignment", &value);
116
+ bdrv_set_aio_context(bs, new_context);
982
+ if (ret < 0) {
983
+ error_setg_errno(errp, -ret,
984
+ "failed to get \"optimal-buf-alignment\": %s",
985
+ blkio_get_error_msg());
986
+ return;
987
+ }
988
+ if (value < 1) {
989
+ error_setg(errp, "invalid \"optimal-buf-alignment\" value %d, "
990
+ "must be positive", value);
991
+ return;
992
+ }
993
+ bs->bl.opt_mem_alignment = value;
994
+
117
+
995
+ ret = blkio_get_int(s->blkio, "max-segments", &value);
118
+ aio_context_release(old_context);
996
+ if (ret < 0) {
997
+ error_setg_errno(errp, -ret, "failed to get \"max-segments\": %s",
998
+ blkio_get_error_msg());
999
+ return;
1000
+ }
1001
+ if (value < 1) {
1002
+ error_setg(errp, "invalid \"max-segments\" value %d, must be positive",
1003
+ value);
1004
+ return;
1005
+ }
1006
+ bs->bl.max_iov = value;
1007
+}
119
+}
1008
+
120
+
1009
+/*
121
QemuOptsList qemu_common_drive_opts = {
1010
+ * TODO
122
.name = "drive",
1011
+ * Missing libblkio APIs:
123
.head = QTAILQ_HEAD_INITIALIZER(qemu_common_drive_opts.head),
1012
+ * - block_status
1013
+ * - co_invalidate_cache
1014
+ *
1015
+ * Out of scope?
1016
+ * - create
1017
+ * - truncate
1018
+ */
1019
+
1020
+#define BLKIO_DRIVER(name, ...) \
1021
+ { \
1022
+ .format_name = name, \
1023
+ .protocol_name = name, \
1024
+ .instance_size = sizeof(BDRVBlkioState), \
1025
+ .bdrv_file_open = blkio_file_open, \
1026
+ .bdrv_close = blkio_close, \
1027
+ .bdrv_getlength = blkio_getlength, \
1028
+ .bdrv_get_info = blkio_get_info, \
1029
+ .bdrv_attach_aio_context = blkio_attach_aio_context, \
1030
+ .bdrv_detach_aio_context = blkio_detach_aio_context, \
1031
+ .bdrv_co_pdiscard = blkio_co_pdiscard, \
1032
+ .bdrv_co_preadv = blkio_co_preadv, \
1033
+ .bdrv_co_pwritev = blkio_co_pwritev, \
1034
+ .bdrv_co_flush_to_disk = blkio_co_flush, \
1035
+ .bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
1036
+ .bdrv_io_unplug = blkio_io_unplug, \
1037
+ .bdrv_refresh_limits = blkio_refresh_limits, \
1038
+ __VA_ARGS__ \
1039
+ }
1040
+
1041
+static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
1042
+ DRIVER_IO_URING,
1043
+ .bdrv_needs_filename = true,
1044
+);
1045
+
1046
+static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
1047
+ DRIVER_NVME_IO_URING,
1048
+ .bdrv_needs_filename = true,
1049
+);
1050
+
1051
+static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
1052
+ DRIVER_VIRTIO_BLK_VHOST_USER
1053
+);
1054
+
1055
+static BlockDriver bdrv_virtio_blk_vhost_vdpa = BLKIO_DRIVER(
1056
+ DRIVER_VIRTIO_BLK_VHOST_VDPA
1057
+);
1058
+
1059
+static void bdrv_blkio_init(void)
1060
+{
1061
+ bdrv_register(&bdrv_io_uring);
1062
+ bdrv_register(&bdrv_nvme_io_uring);
1063
+ bdrv_register(&bdrv_virtio_blk_vhost_user);
1064
+ bdrv_register(&bdrv_virtio_blk_vhost_vdpa);
1065
+}
1066
+
1067
+block_init(bdrv_blkio_init);
1068
diff --git a/tests/qtest/modules-test.c b/tests/qtest/modules-test.c
1069
index XXXXXXX..XXXXXXX 100644
1070
--- a/tests/qtest/modules-test.c
1071
+++ b/tests/qtest/modules-test.c
1072
@@ -XXX,XX +XXX,XX @@ static void test_modules_load(const void *data)
1073
int main(int argc, char *argv[])
1074
{
1075
const char *modules[] = {
1076
+#ifdef CONFIG_BLKIO
1077
+ "block-", "blkio",
1078
+#endif
1079
#ifdef CONFIG_CURL
1080
"block-", "curl",
1081
#endif
1082
diff --git a/block/meson.build b/block/meson.build
1083
index XXXXXXX..XXXXXXX 100644
1084
--- a/block/meson.build
1085
+++ b/block/meson.build
1086
@@ -XXX,XX +XXX,XX @@ block_modules = {}
1087
1088
modsrc = []
1089
foreach m : [
1090
+ [blkio, 'blkio', files('blkio.c')],
1091
[curl, 'curl', files('curl.c')],
1092
[glusterfs, 'gluster', files('gluster.c')],
1093
[libiscsi, 'iscsi', [files('iscsi.c'), libm]],
1094
diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh
1095
index XXXXXXX..XXXXXXX 100644
1096
--- a/scripts/meson-buildoptions.sh
1097
+++ b/scripts/meson-buildoptions.sh
1098
@@ -XXX,XX +XXX,XX @@ meson_options_help() {
1099
printf "%s\n" ' auth-pam PAM access control'
1100
printf "%s\n" ' avx2 AVX2 optimizations'
1101
printf "%s\n" ' avx512f AVX512F optimizations'
1102
+ printf "%s\n" ' blkio libblkio block device driver'
1103
printf "%s\n" ' bochs bochs image format support'
1104
printf "%s\n" ' bpf eBPF support'
1105
printf "%s\n" ' brlapi brlapi character device driver'
1106
@@ -XXX,XX +XXX,XX @@ _meson_option_parse() {
1107
--disable-gcov) printf "%s" -Db_coverage=false ;;
1108
--enable-lto) printf "%s" -Db_lto=true ;;
1109
--disable-lto) printf "%s" -Db_lto=false ;;
1110
+ --enable-blkio) printf "%s" -Dblkio=enabled ;;
1111
+ --disable-blkio) printf "%s" -Dblkio=disabled ;;
1112
--block-drv-ro-whitelist=*) quote_sh "-Dblock_drv_ro_whitelist=$2" ;;
1113
--block-drv-rw-whitelist=*) quote_sh "-Dblock_drv_rw_whitelist=$2" ;;
1114
--enable-block-drv-whitelist-in-tools) printf "%s" -Dblock_drv_whitelist_in_tools=true ;;
1115
--
124
--
1116
2.37.3
125
2.14.3
126
127
diff view generated by jsdifflib
1
Emulated devices and other BlockBackend users wishing to take advantage
1
QMP 'transaction' blockdev-snapshot-sync with multiple disks in an
2
of blk_register_buf() all have the same repetitive job: register
2
IOThread is an untested code path. Several bugs have been found in
3
RAMBlocks with the BlockBackend using RAMBlockNotifier.
3
connection with this command. This patch adds a test case to prevent
4
4
future regressions.
5
Add a BlockRAMRegistrar API to do this. A later commit will use this
6
from hw/block/virtio-blk.c.
7
5
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
7
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Message-id: 20221013185908.1297568-10-stefanha@redhat.com
8
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Message-id: 20171206144550.22295-10-stefanha@redhat.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
11
---
13
MAINTAINERS | 1 +
12
tests/qemu-iotests/202 | 95 ++++++++++++++++++++++++++++++++++++++++++++++
14
include/sysemu/block-ram-registrar.h | 37 ++++++++++++++++++
13
tests/qemu-iotests/202.out | 11 ++++++
15
block/block-ram-registrar.c | 58 ++++++++++++++++++++++++++++
14
tests/qemu-iotests/group | 1 +
16
block/meson.build | 1 +
15
3 files changed, 107 insertions(+)
17
4 files changed, 97 insertions(+)
16
create mode 100755 tests/qemu-iotests/202
18
create mode 100644 include/sysemu/block-ram-registrar.h
17
create mode 100644 tests/qemu-iotests/202.out
19
create mode 100644 block/block-ram-registrar.c
20
18
21
diff --git a/MAINTAINERS b/MAINTAINERS
19
diff --git a/tests/qemu-iotests/202 b/tests/qemu-iotests/202
22
index XXXXXXX..XXXXXXX 100644
20
new file mode 100755
23
--- a/MAINTAINERS
21
index XXXXXXX..XXXXXXX
24
+++ b/MAINTAINERS
22
--- /dev/null
25
@@ -XXX,XX +XXX,XX @@ F: block*
23
+++ b/tests/qemu-iotests/202
26
F: block/
24
@@ -XXX,XX +XXX,XX @@
27
F: hw/block/
25
+#!/usr/bin/env python
28
F: include/block/
26
+#
29
+F: include/sysemu/block-*.h
27
+# Copyright (C) 2017 Red Hat, Inc.
30
F: qemu-img*
28
+#
31
F: docs/tools/qemu-img.rst
29
+# This program is free software; you can redistribute it and/or modify
32
F: qemu-io*
30
+# it under the terms of the GNU General Public License as published by
33
diff --git a/include/sysemu/block-ram-registrar.h b/include/sysemu/block-ram-registrar.h
31
+# the Free Software Foundation; either version 2 of the License, or
32
+# (at your option) any later version.
33
+#
34
+# This program is distributed in the hope that it will be useful,
35
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
36
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
37
+# GNU General Public License for more details.
38
+#
39
+# You should have received a copy of the GNU General Public License
40
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
41
+#
42
+# Creator/Owner: Stefan Hajnoczi <stefanha@redhat.com>
43
+#
44
+# Check that QMP 'transaction' blockdev-snapshot-sync with multiple drives on a
45
+# single IOThread completes successfully. This particular command triggered a
46
+# hang due to recursive AioContext locking and BDRV_POLL_WHILE(). Protect
47
+# against regressions.
48
+
49
+import iotests
50
+
51
+iotests.verify_image_format(supported_fmts=['qcow2'])
52
+iotests.verify_platform(['linux'])
53
+
54
+with iotests.FilePath('disk0.img') as disk0_img_path, \
55
+ iotests.FilePath('disk1.img') as disk1_img_path, \
56
+ iotests.FilePath('disk0-snap.img') as disk0_snap_img_path, \
57
+ iotests.FilePath('disk1-snap.img') as disk1_snap_img_path, \
58
+ iotests.VM() as vm:
59
+
60
+ img_size = '10M'
61
+ iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, disk0_img_path, img_size)
62
+ iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, disk1_img_path, img_size)
63
+
64
+ iotests.log('Launching VM...')
65
+ vm.launch()
66
+
67
+ iotests.log('Adding IOThread...')
68
+ iotests.log(vm.qmp('object-add',
69
+ qom_type='iothread',
70
+ id='iothread0'))
71
+
72
+ iotests.log('Adding blockdevs...')
73
+ iotests.log(vm.qmp('blockdev-add',
74
+ driver=iotests.imgfmt,
75
+ node_name='disk0',
76
+ file={
77
+ 'driver': 'file',
78
+ 'filename': disk0_img_path,
79
+ }))
80
+ iotests.log(vm.qmp('blockdev-add',
81
+ driver=iotests.imgfmt,
82
+ node_name='disk1',
83
+ file={
84
+ 'driver': 'file',
85
+ 'filename': disk1_img_path,
86
+ }))
87
+
88
+ iotests.log('Setting iothread...')
89
+ iotests.log(vm.qmp('x-blockdev-set-iothread',
90
+ node_name='disk0',
91
+ iothread='iothread0'))
92
+ iotests.log(vm.qmp('x-blockdev-set-iothread',
93
+ node_name='disk1',
94
+ iothread='iothread0'))
95
+
96
+ iotests.log('Creating external snapshots...')
97
+ iotests.log(vm.qmp(
98
+ 'transaction',
99
+ actions=[
100
+ {
101
+ 'data': {
102
+ 'node-name': 'disk0',
103
+ 'snapshot-file': disk0_snap_img_path,
104
+ 'snapshot-node-name': 'disk0-snap',
105
+ 'mode': 'absolute-paths',
106
+ 'format': iotests.imgfmt,
107
+ },
108
+ 'type': 'blockdev-snapshot-sync'
109
+ }, {
110
+ 'data': {
111
+ 'node-name': 'disk1',
112
+ 'snapshot-file': disk1_snap_img_path,
113
+ 'snapshot-node-name': 'disk1-snap',
114
+ 'mode': 'absolute-paths',
115
+ 'format': iotests.imgfmt
116
+ },
117
+ 'type': 'blockdev-snapshot-sync'
118
+ }
119
+ ]))
120
diff --git a/tests/qemu-iotests/202.out b/tests/qemu-iotests/202.out
34
new file mode 100644
121
new file mode 100644
35
index XXXXXXX..XXXXXXX
122
index XXXXXXX..XXXXXXX
36
--- /dev/null
123
--- /dev/null
37
+++ b/include/sysemu/block-ram-registrar.h
124
+++ b/tests/qemu-iotests/202.out
38
@@ -XXX,XX +XXX,XX @@
125
@@ -XXX,XX +XXX,XX @@
39
+/*
126
+Launching VM...
40
+ * BlockBackend RAM Registrar
127
+Adding IOThread...
41
+ *
128
+{u'return': {}}
42
+ * SPDX-License-Identifier: GPL-2.0-or-later
129
+Adding blockdevs...
43
+ */
130
+{u'return': {}}
44
+
131
+{u'return': {}}
45
+#ifndef BLOCK_RAM_REGISTRAR_H
132
+Setting iothread...
46
+#define BLOCK_RAM_REGISTRAR_H
133
+{u'return': {}}
47
+
134
+{u'return': {}}
48
+#include "exec/ramlist.h"
135
+Creating external snapshots...
49
+
136
+{u'return': {}}
50
+/**
137
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
51
+ * struct BlockRAMRegistrar:
138
index XXXXXXX..XXXXXXX 100644
52
+ *
139
--- a/tests/qemu-iotests/group
53
+ * Keeps RAMBlock memory registered with a BlockBackend using
140
+++ b/tests/qemu-iotests/group
54
+ * blk_register_buf() including hotplugged memory.
55
+ *
56
+ * Emulated devices or other BlockBackend users initialize a BlockRAMRegistrar
57
+ * with blk_ram_registrar_init() before submitting I/O requests with the
58
+ * BDRV_REQ_REGISTERED_BUF flag set.
59
+ */
60
+typedef struct {
61
+ BlockBackend *blk;
62
+ RAMBlockNotifier notifier;
63
+ bool ok;
64
+} BlockRAMRegistrar;
65
+
66
+void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk);
67
+void blk_ram_registrar_destroy(BlockRAMRegistrar *r);
68
+
69
+/* Have all RAMBlocks been registered successfully? */
70
+static inline bool blk_ram_registrar_ok(BlockRAMRegistrar *r)
71
+{
72
+ return r->ok;
73
+}
74
+
75
+#endif /* BLOCK_RAM_REGISTRAR_H */
76
diff --git a/block/block-ram-registrar.c b/block/block-ram-registrar.c
77
new file mode 100644
78
index XXXXXXX..XXXXXXX
79
--- /dev/null
80
+++ b/block/block-ram-registrar.c
81
@@ -XXX,XX +XXX,XX @@
141
@@ -XXX,XX +XXX,XX @@
82
+/*
142
197 rw auto quick
83
+ * BlockBackend RAM Registrar
143
198 rw auto
84
+ *
144
200 rw auto
85
+ * SPDX-License-Identifier: GPL-2.0-or-later
145
+202 rw auto quick
86
+ */
87
+
88
+#include "qemu/osdep.h"
89
+#include "sysemu/block-backend.h"
90
+#include "sysemu/block-ram-registrar.h"
91
+#include "qapi/error.h"
92
+
93
+static void ram_block_added(RAMBlockNotifier *n, void *host, size_t size,
94
+ size_t max_size)
95
+{
96
+ BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier);
97
+ Error *err = NULL;
98
+
99
+ if (!r->ok) {
100
+ return; /* don't try again if we've already failed */
101
+ }
102
+
103
+ if (!blk_register_buf(r->blk, host, max_size, &err)) {
104
+ error_report_err(err);
105
+ ram_block_notifier_remove(&r->notifier);
106
+ r->ok = false;
107
+ }
108
+}
109
+
110
+static void ram_block_removed(RAMBlockNotifier *n, void *host, size_t size,
111
+ size_t max_size)
112
+{
113
+ BlockRAMRegistrar *r = container_of(n, BlockRAMRegistrar, notifier);
114
+ blk_unregister_buf(r->blk, host, max_size);
115
+}
116
+
117
+void blk_ram_registrar_init(BlockRAMRegistrar *r, BlockBackend *blk)
118
+{
119
+ r->blk = blk;
120
+ r->notifier = (RAMBlockNotifier){
121
+ .ram_block_added = ram_block_added,
122
+ .ram_block_removed = ram_block_removed,
123
+
124
+ /*
125
+ * .ram_block_resized() is not necessary because we use the max_size
126
+ * value that does not change across resize.
127
+ */
128
+ };
129
+ r->ok = true;
130
+
131
+ ram_block_notifier_add(&r->notifier);
132
+}
133
+
134
+void blk_ram_registrar_destroy(BlockRAMRegistrar *r)
135
+{
136
+ if (r->ok) {
137
+ ram_block_notifier_remove(&r->notifier);
138
+ }
139
+}
140
diff --git a/block/meson.build b/block/meson.build
141
index XXXXXXX..XXXXXXX 100644
142
--- a/block/meson.build
143
+++ b/block/meson.build
144
@@ -XXX,XX +XXX,XX @@ block_ss.add(files(
145
), zstd, zlib, gnutls)
146
147
softmmu_ss.add(when: 'CONFIG_TCG', if_true: files('blkreplay.c'))
148
+softmmu_ss.add(files('block-ram-registrar.c'))
149
150
if get_option('qcow1').allowed()
151
block_ss.add(files('qcow.c'))
152
--
146
--
153
2.37.3
147
2.14.3
148
149
diff view generated by jsdifflib
1
Register guest RAM using BlockRAMRegistrar and set the
1
From: Mark Kanda <mark.kanda@oracle.com>
2
BDRV_REQ_REGISTERED_BUF flag so block drivers can optimize memory
3
accesses in I/O requests.
4
2
5
This is for vdpa-blk, vhost-user-blk, and other I/O interfaces that rely
3
Depending on the configuration, it can be beneficial to adjust the virtio-blk
6
on DMA mapping/unmapping.
4
queue size to something other than the current default of 128. Add a new
5
property to make the queue size configurable.
7
6
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Mark Kanda <mark.kanda@oracle.com>
9
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
8
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
10
Message-id: 20221013185908.1297568-14-stefanha@redhat.com
9
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
10
Reviewed-by: Ameya More <ameya.more@oracle.com>
11
Message-id: 52e6d742811f10dbd16e996e86cf375b9577c187.1513005190.git.mark.kanda@oracle.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
---
13
include/hw/virtio/virtio-blk.h | 2 ++
14
include/hw/virtio/virtio-blk.h | 1 +
14
hw/block/virtio-blk.c | 39 ++++++++++++++++++++++------------
15
hw/block/virtio-blk.c | 10 +++++++++-
15
2 files changed, 27 insertions(+), 14 deletions(-)
16
2 files changed, 10 insertions(+), 1 deletion(-)
16
17
17
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
18
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
18
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
19
--- a/include/hw/virtio/virtio-blk.h
20
--- a/include/hw/virtio/virtio-blk.h
20
+++ b/include/hw/virtio/virtio-blk.h
21
+++ b/include/hw/virtio/virtio-blk.h
21
@@ -XXX,XX +XXX,XX @@
22
@@ -XXX,XX +XXX,XX @@ struct VirtIOBlkConf
22
#include "hw/block/block.h"
23
uint32_t config_wce;
23
#include "sysemu/iothread.h"
24
uint32_t request_merging;
24
#include "sysemu/block-backend.h"
25
uint16_t num_queues;
25
+#include "sysemu/block-ram-registrar.h"
26
+ uint16_t queue_size;
26
#include "qom/object.h"
27
28
#define TYPE_VIRTIO_BLK "virtio-blk-device"
29
@@ -XXX,XX +XXX,XX @@ struct VirtIOBlock {
30
struct VirtIOBlockDataPlane *dataplane;
31
uint64_t host_features;
32
size_t config_size;
33
+ BlockRAMRegistrar blk_ram_registrar;
34
};
27
};
35
28
36
typedef struct VirtIOBlockReq {
29
struct VirtIOBlockDataPlane;
37
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
30
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
38
index XXXXXXX..XXXXXXX 100644
31
index XXXXXXX..XXXXXXX 100644
39
--- a/hw/block/virtio-blk.c
32
--- a/hw/block/virtio-blk.c
40
+++ b/hw/block/virtio-blk.c
33
+++ b/hw/block/virtio-blk.c
41
@@ -XXX,XX +XXX,XX @@
34
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
42
#include "hw/block/block.h"
35
error_setg(errp, "num-queues property must be larger than 0");
43
#include "hw/qdev-properties.h"
44
#include "sysemu/blockdev.h"
45
+#include "sysemu/block-ram-registrar.h"
46
#include "sysemu/sysemu.h"
47
#include "sysemu/runstate.h"
48
#include "hw/virtio/virtio-blk.h"
49
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
50
}
51
}
52
53
-static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
54
+static inline void submit_requests(VirtIOBlock *s, MultiReqBuffer *mrb,
55
int start, int num_reqs, int niov)
56
{
57
+ BlockBackend *blk = s->blk;
58
QEMUIOVector *qiov = &mrb->reqs[start]->qiov;
59
int64_t sector_num = mrb->reqs[start]->sector_num;
60
bool is_write = mrb->is_write;
61
+ BdrvRequestFlags flags = 0;
62
63
if (num_reqs > 1) {
64
int i;
65
@@ -XXX,XX +XXX,XX @@ static inline void submit_requests(BlockBackend *blk, MultiReqBuffer *mrb,
66
num_reqs - 1);
67
}
68
69
+ if (blk_ram_registrar_ok(&s->blk_ram_registrar)) {
70
+ flags |= BDRV_REQ_REGISTERED_BUF;
71
+ }
72
+
73
if (is_write) {
74
- blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
75
- virtio_blk_rw_complete, mrb->reqs[start]);
76
+ blk_aio_pwritev(blk, sector_num << BDRV_SECTOR_BITS, qiov,
77
+ flags, virtio_blk_rw_complete,
78
+ mrb->reqs[start]);
79
} else {
80
- blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov, 0,
81
- virtio_blk_rw_complete, mrb->reqs[start]);
82
+ blk_aio_preadv(blk, sector_num << BDRV_SECTOR_BITS, qiov,
83
+ flags, virtio_blk_rw_complete,
84
+ mrb->reqs[start]);
85
}
86
}
87
88
@@ -XXX,XX +XXX,XX @@ static int multireq_compare(const void *a, const void *b)
89
}
90
}
91
92
-static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
93
+static void virtio_blk_submit_multireq(VirtIOBlock *s, MultiReqBuffer *mrb)
94
{
95
int i = 0, start = 0, num_reqs = 0, niov = 0, nb_sectors = 0;
96
uint32_t max_transfer;
97
int64_t sector_num = 0;
98
99
if (mrb->num_reqs == 1) {
100
- submit_requests(blk, mrb, 0, 1, -1);
101
+ submit_requests(s, mrb, 0, 1, -1);
102
mrb->num_reqs = 0;
103
return;
36
return;
104
}
37
}
105
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
38
+ if (!is_power_of_2(conf->queue_size) ||
106
* 3. merge would exceed maximum transfer length of backend device
39
+ conf->queue_size > VIRTQUEUE_MAX_SIZE) {
107
*/
40
+ error_setg(errp, "invalid queue-size property (%" PRIu16 "), "
108
if (sector_num + nb_sectors != req->sector_num ||
41
+ "must be a power of 2 (max %d)",
109
- niov > blk_get_max_iov(blk) - req->qiov.niov ||
42
+ conf->queue_size, VIRTQUEUE_MAX_SIZE);
110
+ niov > blk_get_max_iov(s->blk) - req->qiov.niov ||
43
+ return;
111
req->qiov.size > max_transfer ||
44
+ }
112
nb_sectors > (max_transfer -
45
113
req->qiov.size) / BDRV_SECTOR_SIZE) {
46
blkconf_serial(&conf->conf, &conf->serial);
114
- submit_requests(blk, mrb, start, num_reqs, niov);
47
if (!blkconf_apply_backend_options(&conf->conf,
115
+ submit_requests(s, mrb, start, num_reqs, niov);
48
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
116
num_reqs = 0;
49
s->sector_mask = (s->conf.conf.logical_block_size / BDRV_SECTOR_SIZE) - 1;
117
}
50
118
}
51
for (i = 0; i < conf->num_queues; i++) {
119
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_submit_multireq(BlockBackend *blk, MultiReqBuffer *mrb)
52
- virtio_add_queue(vdev, 128, virtio_blk_handle_output);
120
num_reqs++;
53
+ virtio_add_queue(vdev, conf->queue_size, virtio_blk_handle_output);
121
}
54
}
122
55
virtio_blk_data_plane_create(vdev, conf, &s->dataplane, &err);
123
- submit_requests(blk, mrb, start, num_reqs, niov);
56
if (err != NULL) {
124
+ submit_requests(s, mrb, start, num_reqs, niov);
57
@@ -XXX,XX +XXX,XX @@ static Property virtio_blk_properties[] = {
125
mrb->num_reqs = 0;
58
DEFINE_PROP_BIT("request-merging", VirtIOBlock, conf.request_merging, 0,
126
}
59
true),
127
60
DEFINE_PROP_UINT16("num-queues", VirtIOBlock, conf.num_queues, 1),
128
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
61
+ DEFINE_PROP_UINT16("queue-size", VirtIOBlock, conf.queue_size, 128),
129
* Make sure all outstanding writes are posted to the backing device.
62
DEFINE_PROP_LINK("iothread", VirtIOBlock, conf.iothread, TYPE_IOTHREAD,
130
*/
63
IOThread *),
131
if (mrb->is_write && mrb->num_reqs > 0) {
64
DEFINE_PROP_END_OF_LIST(),
132
- virtio_blk_submit_multireq(s->blk, mrb);
133
+ virtio_blk_submit_multireq(s, mrb);
134
}
135
blk_aio_flush(s->blk, virtio_blk_flush_complete, req);
136
}
137
@@ -XXX,XX +XXX,XX @@ static int virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
138
if (mrb->num_reqs > 0 && (mrb->num_reqs == VIRTIO_BLK_MAX_MERGE_REQS ||
139
is_write != mrb->is_write ||
140
!s->conf.request_merging)) {
141
- virtio_blk_submit_multireq(s->blk, mrb);
142
+ virtio_blk_submit_multireq(s, mrb);
143
}
144
145
assert(mrb->num_reqs < VIRTIO_BLK_MAX_MERGE_REQS);
146
@@ -XXX,XX +XXX,XX @@ void virtio_blk_handle_vq(VirtIOBlock *s, VirtQueue *vq)
147
} while (!virtio_queue_empty(vq));
148
149
if (mrb.num_reqs) {
150
- virtio_blk_submit_multireq(s->blk, &mrb);
151
+ virtio_blk_submit_multireq(s, &mrb);
152
}
153
154
blk_io_unplug(s->blk);
155
@@ -XXX,XX +XXX,XX @@ void virtio_blk_process_queued_requests(VirtIOBlock *s, bool is_bh)
156
}
157
158
if (mrb.num_reqs) {
159
- virtio_blk_submit_multireq(s->blk, &mrb);
160
+ virtio_blk_submit_multireq(s, &mrb);
161
}
162
if (is_bh) {
163
blk_dec_in_flight(s->conf.conf.blk);
164
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
165
}
166
167
s->change = qemu_add_vm_change_state_handler(virtio_blk_dma_restart_cb, s);
168
+ blk_ram_registrar_init(&s->blk_ram_registrar, s->blk);
169
blk_set_dev_ops(s->blk, &virtio_block_ops, s);
170
171
blk_iostatus_enable(s->blk);
172
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_device_unrealize(DeviceState *dev)
173
virtio_del_queue(vdev, i);
174
}
175
qemu_coroutine_dec_pool_size(conf->num_queues * conf->queue_size / 2);
176
+ blk_ram_registrar_destroy(&s->blk_ram_registrar);
177
qemu_del_vm_change_state_handler(s->change);
178
blockdev_mark_auto_del(s->blk);
179
virtio_cleanup(vdev);
180
--
65
--
181
2.37.3
66
2.14.3
67
68
diff view generated by jsdifflib
1
Avoid bounce buffers when QEMUIOVector elements are within previously
1
From: Mark Kanda <mark.kanda@oracle.com>
2
registered bdrv_register_buf() buffers.
3
2
4
The idea is that emulated storage controllers will register guest RAM
3
virtio-blk logical block size should never be larger than physical block
5
using bdrv_register_buf() and set the BDRV_REQ_REGISTERED_BUF on I/O
4
size because it doesn't make sense to have such configurations. QEMU doesn't
6
requests. Therefore no blkio_map_mem_region() calls are necessary in the
5
have a way to effectively express this condition; the best it can do is
7
performance-critical I/O code path.
6
report the physical block exponent as 0 - indicating the logical block size
7
equals the physical block size.
8
8
9
This optimization doesn't apply if the I/O buffer is internally
9
This is identical to commit 3da023b5827543ee4c022986ea2ad9d1274410b2
10
allocated by QEMU (e.g. qcow2 metadata). There we still take the slow
10
but applied to virtio-blk (instead of virtio-scsi).
11
path because BDRV_REQ_REGISTERED_BUF is not set.
12
11
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Mark Kanda <mark.kanda@oracle.com>
14
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
13
Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
15
Message-id: 20221013185908.1297568-13-stefanha@redhat.com
14
Reviewed-by: Ameya More <ameya.more@oracle.com>
15
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
16
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Message-id: 773169891f9f2deb4cb7c4ef2655580dbe24c1d1.1513005190.git.mark.kanda@oracle.com
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
---
19
---
18
block/blkio.c | 183 +++++++++++++++++++++++++++++++++++++++++++++++++-
20
hw/block/virtio-blk.c | 7 +++++++
19
1 file changed, 180 insertions(+), 3 deletions(-)
21
1 file changed, 7 insertions(+)
20
22
21
diff --git a/block/blkio.c b/block/blkio.c
23
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
22
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
23
--- a/block/blkio.c
25
--- a/hw/block/virtio-blk.c
24
+++ b/block/blkio.c
26
+++ b/hw/block/virtio-blk.c
25
@@ -XXX,XX +XXX,XX @@
27
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_device_realize(DeviceState *dev, Error **errp)
26
#include "qemu/osdep.h"
28
27
#include <blkio.h>
29
blkconf_blocksizes(&conf->conf);
28
#include "block/block_int.h"
30
29
+#include "exec/memory.h"
31
+ if (conf->conf.logical_block_size >
30
+#include "exec/cpu-common.h" /* for qemu_ram_get_fd() */
32
+ conf->conf.physical_block_size) {
31
#include "qapi/error.h"
33
+ error_setg(errp,
32
+#include "qemu/error-report.h"
34
+ "logical_block_size > physical_block_size not supported");
33
#include "qapi/qmp/qdict.h"
34
#include "qemu/module.h"
35
+#include "exec/memory.h" /* for ram_block_discard_disable() */
36
37
/*
38
* Keep the QEMU BlockDriver names identical to the libblkio driver names.
39
@@ -XXX,XX +XXX,XX @@ typedef struct {
40
41
/* Can we skip adding/deleting blkio_mem_regions? */
42
bool needs_mem_regions;
43
+
44
+ /* Are file descriptors necessary for blkio_mem_regions? */
45
+ bool needs_mem_region_fd;
46
+
47
+ /* Are madvise(MADV_DONTNEED)-style operations unavailable? */
48
+ bool may_pin_mem_regions;
49
} BDRVBlkioState;
50
51
/* Called with s->bounce_lock held */
52
@@ -XXX,XX +XXX,XX @@ blkio_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
53
.coroutine = qemu_coroutine_self(),
54
};
55
BDRVBlkioState *s = bs->opaque;
56
- bool use_bounce_buffer = s->needs_mem_regions;
57
+ bool use_bounce_buffer =
58
+ s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
59
BlkioBounceBuf bounce;
60
struct iovec *iov = qiov->iov;
61
int iovcnt = qiov->niov;
62
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn blkio_co_pwritev(BlockDriverState *bs, int64_t offset,
63
.coroutine = qemu_coroutine_self(),
64
};
65
BDRVBlkioState *s = bs->opaque;
66
- bool use_bounce_buffer = s->needs_mem_regions;
67
+ bool use_bounce_buffer =
68
+ s->needs_mem_regions && !(flags & BDRV_REQ_REGISTERED_BUF);
69
BlkioBounceBuf bounce;
70
struct iovec *iov = qiov->iov;
71
int iovcnt = qiov->niov;
72
@@ -XXX,XX +XXX,XX @@ static void blkio_io_unplug(BlockDriverState *bs)
73
}
74
}
75
76
+typedef enum {
77
+ BMRR_OK,
78
+ BMRR_SKIP,
79
+ BMRR_FAIL,
80
+} BlkioMemRegionResult;
81
+
82
+/*
83
+ * Produce a struct blkio_mem_region for a given address and size.
84
+ *
85
+ * This function produces identical results when called multiple times with the
86
+ * same arguments. This property is necessary because blkio_unmap_mem_region()
87
+ * must receive the same struct blkio_mem_region field values that were passed
88
+ * to blkio_map_mem_region().
89
+ */
90
+static BlkioMemRegionResult
91
+blkio_mem_region_from_host(BlockDriverState *bs,
92
+ void *host, size_t size,
93
+ struct blkio_mem_region *region,
94
+ Error **errp)
95
+{
96
+ BDRVBlkioState *s = bs->opaque;
97
+ int fd = -1;
98
+ ram_addr_t fd_offset = 0;
99
+
100
+ if (((uintptr_t)host | size) % s->mem_region_alignment) {
101
+ error_setg(errp, "unaligned buf %p with size %zu", host, size);
102
+ return BMRR_FAIL;
103
+ }
104
+
105
+ /* Attempt to find the fd for the underlying memory */
106
+ if (s->needs_mem_region_fd) {
107
+ RAMBlock *ram_block;
108
+ RAMBlock *end_block;
109
+ ram_addr_t offset;
110
+
111
+ /*
112
+ * bdrv_register_buf() is called with the BQL held so mr lives at least
113
+ * until this function returns.
114
+ */
115
+ ram_block = qemu_ram_block_from_host(host, false, &fd_offset);
116
+ if (ram_block) {
117
+ fd = qemu_ram_get_fd(ram_block);
118
+ }
119
+ if (fd == -1) {
120
+ /*
121
+ * Ideally every RAMBlock would have an fd. pc-bios and other
122
+ * things don't. Luckily they are usually not I/O buffers and we
123
+ * can just ignore them.
124
+ */
125
+ return BMRR_SKIP;
126
+ }
127
+
128
+ /* Make sure the fd covers the entire range */
129
+ end_block = qemu_ram_block_from_host(host + size - 1, false, &offset);
130
+ if (ram_block != end_block) {
131
+ error_setg(errp, "registered buffer at %p with size %zu extends "
132
+ "beyond RAMBlock", host, size);
133
+ return BMRR_FAIL;
134
+ }
135
+ }
136
+
137
+ *region = (struct blkio_mem_region){
138
+ .addr = host,
139
+ .len = size,
140
+ .fd = fd,
141
+ .fd_offset = fd_offset,
142
+ };
143
+ return BMRR_OK;
144
+}
145
+
146
+static bool blkio_register_buf(BlockDriverState *bs, void *host, size_t size,
147
+ Error **errp)
148
+{
149
+ BDRVBlkioState *s = bs->opaque;
150
+ struct blkio_mem_region region;
151
+ BlkioMemRegionResult region_result;
152
+ int ret;
153
+
154
+ /*
155
+ * Mapping memory regions conflicts with RAM discard (virtio-mem) when
156
+ * there is pinning, so only do it when necessary.
157
+ */
158
+ if (!s->needs_mem_regions && s->may_pin_mem_regions) {
159
+ return true;
160
+ }
161
+
162
+ region_result = blkio_mem_region_from_host(bs, host, size, &region, errp);
163
+ if (region_result == BMRR_SKIP) {
164
+ return true;
165
+ } else if (region_result != BMRR_OK) {
166
+ return false;
167
+ }
168
+
169
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
170
+ ret = blkio_map_mem_region(s->blkio, &region);
171
+ }
172
+
173
+ if (ret < 0) {
174
+ error_setg(errp, "Failed to add blkio mem region %p with size %zu: %s",
175
+ host, size, blkio_get_error_msg());
176
+ return false;
177
+ }
178
+ return true;
179
+}
180
+
181
+static void blkio_unregister_buf(BlockDriverState *bs, void *host, size_t size)
182
+{
183
+ BDRVBlkioState *s = bs->opaque;
184
+ struct blkio_mem_region region;
185
+
186
+ /* See blkio_register_buf() */
187
+ if (!s->needs_mem_regions && s->may_pin_mem_regions) {
188
+ return;
35
+ return;
189
+ }
36
+ }
190
+
37
+
191
+ if (blkio_mem_region_from_host(bs, host, size, &region, NULL) != BMRR_OK) {
38
virtio_init(vdev, "virtio-blk", VIRTIO_ID_BLOCK,
192
+ return;
39
sizeof(struct virtio_blk_config));
193
+ }
194
+
195
+ WITH_QEMU_LOCK_GUARD(&s->blkio_lock) {
196
+ blkio_unmap_mem_region(s->blkio, &region);
197
+ }
198
+}
199
+
200
static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
201
Error **errp)
202
{
203
@@ -XXX,XX +XXX,XX @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
204
return ret;
205
}
206
207
+ ret = blkio_get_bool(s->blkio,
208
+ "needs-mem-region-fd",
209
+ &s->needs_mem_region_fd);
210
+ if (ret < 0) {
211
+ error_setg_errno(errp, -ret,
212
+ "failed to get needs-mem-region-fd: %s",
213
+ blkio_get_error_msg());
214
+ blkio_destroy(&s->blkio);
215
+ return ret;
216
+ }
217
+
218
ret = blkio_get_uint64(s->blkio,
219
"mem-region-alignment",
220
&s->mem_region_alignment);
221
@@ -XXX,XX +XXX,XX @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
222
return ret;
223
}
224
225
+ ret = blkio_get_bool(s->blkio,
226
+ "may-pin-mem-regions",
227
+ &s->may_pin_mem_regions);
228
+ if (ret < 0) {
229
+ /* Be conservative (assume pinning) if the property is not supported */
230
+ s->may_pin_mem_regions = s->needs_mem_regions;
231
+ }
232
+
233
+ /*
234
+ * Notify if libblkio drivers pin memory and prevent features like
235
+ * virtio-mem from working.
236
+ */
237
+ if (s->may_pin_mem_regions) {
238
+ ret = ram_block_discard_disable(true);
239
+ if (ret < 0) {
240
+ error_setg_errno(errp, -ret, "ram_block_discard_disable() failed");
241
+ blkio_destroy(&s->blkio);
242
+ return ret;
243
+ }
244
+ }
245
+
246
ret = blkio_start(s->blkio);
247
if (ret < 0) {
248
error_setg_errno(errp, -ret, "blkio_start failed: %s",
249
blkio_get_error_msg());
250
blkio_destroy(&s->blkio);
251
+ if (s->may_pin_mem_regions) {
252
+ ram_block_discard_disable(false);
253
+ }
254
return ret;
255
}
256
257
- bs->supported_write_flags = BDRV_REQ_FUA;
258
+ bs->supported_write_flags = BDRV_REQ_FUA | BDRV_REQ_REGISTERED_BUF;
259
bs->supported_zero_flags = BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP |
260
BDRV_REQ_NO_FALLBACK;
261
262
@@ -XXX,XX +XXX,XX @@ static void blkio_close(BlockDriverState *bs)
263
qemu_mutex_destroy(&s->blkio_lock);
264
blkio_detach_aio_context(bs);
265
blkio_destroy(&s->blkio);
266
+
267
+ if (s->may_pin_mem_regions) {
268
+ ram_block_discard_disable(false);
269
+ }
270
}
271
272
static int64_t blkio_getlength(BlockDriverState *bs)
273
@@ -XXX,XX +XXX,XX @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
274
.bdrv_co_pwrite_zeroes = blkio_co_pwrite_zeroes, \
275
.bdrv_io_unplug = blkio_io_unplug, \
276
.bdrv_refresh_limits = blkio_refresh_limits, \
277
+ .bdrv_register_buf = blkio_register_buf, \
278
+ .bdrv_unregister_buf = blkio_unregister_buf, \
279
__VA_ARGS__ \
280
}
281
40
282
--
41
--
283
2.37.3
42
2.14.3
43
44
diff view generated by jsdifflib
1
Block drivers may optimize I/O requests accessing buffers previously
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
registered with bdrv_register_buf(). Checking whether all elements of a
3
request's QEMUIOVector are within previously registered buffers is
4
expensive, so we need a hint from the user to avoid costly checks.
5
2
6
Add a BDRV_REQ_REGISTERED_BUF request flag to indicate that all
3
BDRV_POLL_WHILE() does not support recursive AioContext locking. It
7
QEMUIOVector elements in an I/O request are known to be within
4
only releases the AioContext lock once regardless of how many times the
8
previously registered buffers.
5
caller has acquired it. This results in a hang since the IOThread does
6
not make progress while the AioContext is still locked.
9
7
10
Always pass the flag through to driver read/write functions. There is
8
The following steps trigger the hang:
11
little harm in passing the flag to a driver that does not use it.
12
Passing the flag to drivers avoids changes across many block drivers.
13
Filter drivers would need to explicitly support the flag and pass
14
through to their children when the children support it. That's a lot of
15
code changes and it's hard to remember to do that everywhere, leading to
16
silent reduced performance when the flag is accidentally dropped.
17
9
18
The only problematic scenario with the approach in this patch is when a
10
$ qemu-system-x86_64 -M accel=kvm -m 1G -cpu host \
19
driver passes the flag through to internal I/O requests that don't use
11
-object iothread,id=iothread0 \
20
the same I/O buffer. In that case the hint may be set when it should
12
-device virtio-scsi-pci,iothread=iothread0 \
21
actually be clear. This is a rare case though so the risk is low.
13
-drive if=none,id=drive0,file=test.img,format=raw \
14
-device scsi-hd,drive=drive0 \
15
-drive if=none,id=drive1,file=test.img,format=raw \
16
-device scsi-hd,drive=drive1
17
$ qemu-system-x86_64 ...same options... \
18
-incoming tcp::1234
19
(qemu) migrate tcp:127.0.0.1:1234
20
...hang...
22
21
23
Some drivers have assert(!flags), which no longer works when
22
Tested-by: Stefan Hajnoczi <stefanha@redhat.com>
24
BDRV_REQ_REGISTERED_BUF is passed in. These assertions aren't very
23
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
25
useful anyway since the functions are called almost exclusively by
24
Reviewed-by: Eric Blake <eblake@redhat.com>
26
bdrv_driver_preadv/pwritev() so if we get flags handling right there
25
Message-id: 20171207201320.19284-2-stefanha@redhat.com
27
then the assertion is not needed.
28
29
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
30
Message-id: 20221013185908.1297568-7-stefanha@redhat.com
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
26
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
32
---
27
---
33
include/block/block-common.h | 9 ++++++
28
block.c | 14 +++++++++++---
34
block.c | 14 +++++++++
29
1 file changed, 11 insertions(+), 3 deletions(-)
35
block/blkverify.c | 4 +--
36
block/crypto.c | 4 +--
37
block/file-posix.c | 1 -
38
block/gluster.c | 1 -
39
block/io.c | 61 ++++++++++++++++++++++--------------
40
block/mirror.c | 2 ++
41
block/nbd.c | 1 -
42
block/parallels.c | 1 -
43
block/qcow.c | 2 --
44
block/qed.c | 1 -
45
block/raw-format.c | 2 ++
46
block/replication.c | 1 -
47
block/ssh.c | 1 -
48
block/vhdx.c | 1 -
49
16 files changed, 69 insertions(+), 37 deletions(-)
50
30
51
diff --git a/include/block/block-common.h b/include/block/block-common.h
52
index XXXXXXX..XXXXXXX 100644
53
--- a/include/block/block-common.h
54
+++ b/include/block/block-common.h
55
@@ -XXX,XX +XXX,XX @@ typedef enum {
56
*/
57
BDRV_REQ_MAY_UNMAP = 0x4,
58
59
+ /*
60
+ * An optimization hint when all QEMUIOVector elements are within
61
+ * previously registered bdrv_register_buf() memory ranges.
62
+ *
63
+ * Code that replaces the user's QEMUIOVector elements with bounce buffers
64
+ * must take care to clear this flag.
65
+ */
66
+ BDRV_REQ_REGISTERED_BUF = 0x8,
67
+
68
BDRV_REQ_FUA = 0x10,
69
BDRV_REQ_WRITE_COMPRESSED = 0x20,
70
71
diff --git a/block.c b/block.c
31
diff --git a/block.c b/block.c
72
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
73
--- a/block.c
33
--- a/block.c
74
+++ b/block.c
34
+++ b/block.c
75
@@ -XXX,XX +XXX,XX @@ static int bdrv_open_driver(BlockDriverState *bs, BlockDriver *drv,
35
@@ -XXX,XX +XXX,XX @@ int bdrv_inactivate_all(void)
76
goto open_failed;
36
BdrvNextIterator it;
37
int ret = 0;
38
int pass;
39
+ GSList *aio_ctxs = NULL, *ctx;
40
41
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
42
- aio_context_acquire(bdrv_get_aio_context(bs));
43
+ AioContext *aio_context = bdrv_get_aio_context(bs);
44
+
45
+ if (!g_slist_find(aio_ctxs, aio_context)) {
46
+ aio_ctxs = g_slist_prepend(aio_ctxs, aio_context);
47
+ aio_context_acquire(aio_context);
48
+ }
77
}
49
}
78
50
79
+ assert(!(bs->supported_read_flags & ~BDRV_REQ_MASK));
51
/* We do two passes of inactivation. The first pass calls to drivers'
80
+ assert(!(bs->supported_write_flags & ~BDRV_REQ_MASK));
52
@@ -XXX,XX +XXX,XX @@ int bdrv_inactivate_all(void)
81
+
53
}
82
+ /*
54
83
+ * Always allow the BDRV_REQ_REGISTERED_BUF optimization hint. This saves
55
out:
84
+ * drivers that pass read/write requests through to a child the trouble of
56
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
85
+ * declaring support explicitly.
57
- aio_context_release(bdrv_get_aio_context(bs));
86
+ *
58
+ for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
87
+ * Drivers must not propagate this flag accidentally when they initiate I/O
59
+ AioContext *aio_context = ctx->data;
88
+ * to a bounce buffer. That case should be rare though.
60
+ aio_context_release(aio_context);
89
+ */
61
}
90
+ bs->supported_read_flags |= BDRV_REQ_REGISTERED_BUF;
62
+ g_slist_free(aio_ctxs);
91
+ bs->supported_write_flags |= BDRV_REQ_REGISTERED_BUF;
63
92
+
64
return ret;
93
ret = refresh_total_sectors(bs, bs->total_sectors);
94
if (ret < 0) {
95
error_setg_errno(errp, -ret, "Could not refresh total sector count");
96
diff --git a/block/blkverify.c b/block/blkverify.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/block/blkverify.c
99
+++ b/block/blkverify.c
100
@@ -XXX,XX +XXX,XX @@ blkverify_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
101
qemu_iovec_init(&raw_qiov, qiov->niov);
102
qemu_iovec_clone(&raw_qiov, qiov, buf);
103
104
- ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov, flags,
105
- false);
106
+ ret = blkverify_co_prwv(bs, &r, offset, bytes, qiov, &raw_qiov,
107
+ flags & ~BDRV_REQ_REGISTERED_BUF, false);
108
109
cmp_offset = qemu_iovec_compare(qiov, &raw_qiov);
110
if (cmp_offset != -1) {
111
diff --git a/block/crypto.c b/block/crypto.c
112
index XXXXXXX..XXXXXXX 100644
113
--- a/block/crypto.c
114
+++ b/block/crypto.c
115
@@ -XXX,XX +XXX,XX @@ block_crypto_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
116
uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
117
uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
118
119
- assert(!flags);
120
assert(payload_offset < INT64_MAX);
121
assert(QEMU_IS_ALIGNED(offset, sector_size));
122
assert(QEMU_IS_ALIGNED(bytes, sector_size));
123
@@ -XXX,XX +XXX,XX @@ block_crypto_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
124
uint64_t sector_size = qcrypto_block_get_sector_size(crypto->block);
125
uint64_t payload_offset = qcrypto_block_get_payload_offset(crypto->block);
126
127
- assert(!(flags & ~BDRV_REQ_FUA));
128
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
129
+
130
assert(payload_offset < INT64_MAX);
131
assert(QEMU_IS_ALIGNED(offset, sector_size));
132
assert(QEMU_IS_ALIGNED(bytes, sector_size));
133
diff --git a/block/file-posix.c b/block/file-posix.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/block/file-posix.c
136
+++ b/block/file-posix.c
137
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
138
int64_t bytes, QEMUIOVector *qiov,
139
BdrvRequestFlags flags)
140
{
141
- assert(flags == 0);
142
return raw_co_prw(bs, offset, bytes, qiov, QEMU_AIO_WRITE);
143
}
65
}
144
145
diff --git a/block/gluster.c b/block/gluster.c
146
index XXXXXXX..XXXXXXX 100644
147
--- a/block/gluster.c
148
+++ b/block/gluster.c
149
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qemu_gluster_co_writev(BlockDriverState *bs,
150
QEMUIOVector *qiov,
151
int flags)
152
{
153
- assert(!flags);
154
return qemu_gluster_co_rw(bs, sector_num, nb_sectors, qiov, 1);
155
}
156
157
diff --git a/block/io.c b/block/io.c
158
index XXXXXXX..XXXXXXX 100644
159
--- a/block/io.c
160
+++ b/block/io.c
161
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
162
int ret;
163
164
bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
165
- assert(!(flags & ~BDRV_REQ_MASK));
166
- assert(!(flags & BDRV_REQ_NO_FALLBACK));
167
+ assert(!(flags & ~bs->supported_read_flags));
168
169
if (!drv) {
170
return -ENOMEDIUM;
171
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
172
BdrvRequestFlags flags)
173
{
174
BlockDriver *drv = bs->drv;
175
+ bool emulate_fua = false;
176
int64_t sector_num;
177
unsigned int nb_sectors;
178
QEMUIOVector local_qiov;
179
int ret;
180
181
bdrv_check_qiov_request(offset, bytes, qiov, qiov_offset, &error_abort);
182
- assert(!(flags & ~BDRV_REQ_MASK));
183
- assert(!(flags & BDRV_REQ_NO_FALLBACK));
184
185
if (!drv) {
186
return -ENOMEDIUM;
187
}
188
189
+ if ((flags & BDRV_REQ_FUA) &&
190
+ (~bs->supported_write_flags & BDRV_REQ_FUA)) {
191
+ flags &= ~BDRV_REQ_FUA;
192
+ emulate_fua = true;
193
+ }
194
+
195
+ flags &= bs->supported_write_flags;
196
+
197
if (drv->bdrv_co_pwritev_part) {
198
ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset,
199
- flags & bs->supported_write_flags);
200
- flags &= ~bs->supported_write_flags;
201
+ flags);
202
goto emulate_flags;
203
}
204
205
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
206
}
207
208
if (drv->bdrv_co_pwritev) {
209
- ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov,
210
- flags & bs->supported_write_flags);
211
- flags &= ~bs->supported_write_flags;
212
+ ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, flags);
213
goto emulate_flags;
214
}
215
216
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
217
.coroutine = qemu_coroutine_self(),
218
};
219
220
- acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov,
221
- flags & bs->supported_write_flags,
222
+ acb = drv->bdrv_aio_pwritev(bs, offset, bytes, qiov, flags,
223
bdrv_co_io_em_complete, &co);
224
- flags &= ~bs->supported_write_flags;
225
if (acb == NULL) {
226
ret = -EIO;
227
} else {
228
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
229
assert(bytes <= BDRV_REQUEST_MAX_BYTES);
230
231
assert(drv->bdrv_co_writev);
232
- ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov,
233
- flags & bs->supported_write_flags);
234
- flags &= ~bs->supported_write_flags;
235
+ ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov, flags);
236
237
emulate_flags:
238
- if (ret == 0 && (flags & BDRV_REQ_FUA)) {
239
+ if (ret == 0 && emulate_fua) {
240
ret = bdrv_co_flush(bs);
241
}
242
243
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
244
max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX),
245
align);
246
247
- /* TODO: We would need a per-BDS .supported_read_flags and
248
+ /*
249
+ * TODO: We would need a per-BDS .supported_read_flags and
250
* potential fallback support, if we ever implement any read flags
251
* to pass through to drivers. For now, there aren't any
252
- * passthrough flags. */
253
- assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
254
+ * passthrough flags except the BDRV_REQ_REGISTERED_BUF optimization hint.
255
+ */
256
+ assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH |
257
+ BDRV_REQ_REGISTERED_BUF)));
258
259
/* Handle Copy on Read and associated serialisation */
260
if (flags & BDRV_REQ_COPY_ON_READ) {
261
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
262
goto out;
263
}
264
265
- assert(!(flags & ~bs->supported_read_flags));
266
+ assert(!(flags & ~(bs->supported_read_flags | BDRV_REQ_REGISTERED_BUF)));
267
268
max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align);
269
if (bytes <= max_bytes && bytes <= max_transfer) {
270
@@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad)
271
static int bdrv_pad_request(BlockDriverState *bs,
272
QEMUIOVector **qiov, size_t *qiov_offset,
273
int64_t *offset, int64_t *bytes,
274
- BdrvRequestPadding *pad, bool *padded)
275
+ BdrvRequestPadding *pad, bool *padded,
276
+ BdrvRequestFlags *flags)
277
{
278
int ret;
279
280
@@ -XXX,XX +XXX,XX @@ static int bdrv_pad_request(BlockDriverState *bs,
281
if (padded) {
282
*padded = true;
283
}
284
+ if (flags) {
285
+ /* Can't use optimization hint with bounce buffer */
286
+ *flags &= ~BDRV_REQ_REGISTERED_BUF;
287
+ }
288
289
return 0;
290
}
291
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
292
}
293
294
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
295
- NULL);
296
+ NULL, &flags);
297
if (ret < 0) {
298
goto fail;
299
}
300
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
301
return -ENOTSUP;
302
}
303
304
+ /* By definition there is no user buffer so this flag doesn't make sense */
305
+ if (flags & BDRV_REQ_REGISTERED_BUF) {
306
+ return -EINVAL;
307
+ }
308
+
309
/* Invalidate the cached block-status data range if this write overlaps */
310
bdrv_bsc_invalidate_range(bs, offset, bytes);
311
312
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
313
bool padding;
314
BdrvRequestPadding pad;
315
316
+ /* This flag doesn't make sense for padding or zero writes */
317
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
318
+
319
padding = bdrv_init_padding(bs, offset, bytes, &pad);
320
if (padding) {
321
assert(!(flags & BDRV_REQ_NO_WAIT));
322
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
323
* alignment only if there is no ZERO flag.
324
*/
325
ret = bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad,
326
- &padded);
327
+ &padded, &flags);
328
if (ret < 0) {
329
return ret;
330
}
331
diff --git a/block/mirror.c b/block/mirror.c
332
index XXXXXXX..XXXXXXX 100644
333
--- a/block/mirror.c
334
+++ b/block/mirror.c
335
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_mirror_top_pwritev(BlockDriverState *bs,
336
qemu_iovec_init(&bounce_qiov, 1);
337
qemu_iovec_add(&bounce_qiov, bounce_buf, bytes);
338
qiov = &bounce_qiov;
339
+
340
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
341
}
342
343
ret = bdrv_mirror_top_do_write(bs, MIRROR_METHOD_COPY, offset, bytes, qiov,
344
diff --git a/block/nbd.c b/block/nbd.c
345
index XXXXXXX..XXXXXXX 100644
346
--- a/block/nbd.c
347
+++ b/block/nbd.c
348
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nbd_client_co_preadv(BlockDriverState *bs, int64_t offse
349
};
350
351
assert(bytes <= NBD_MAX_BUFFER_SIZE);
352
- assert(!flags);
353
354
if (!bytes) {
355
return 0;
356
diff --git a/block/parallels.c b/block/parallels.c
357
index XXXXXXX..XXXXXXX 100644
358
--- a/block/parallels.c
359
+++ b/block/parallels.c
360
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs,
361
QEMUIOVector hd_qiov;
362
int ret = 0;
363
364
- assert(!flags);
365
qemu_iovec_init(&hd_qiov, qiov->niov);
366
367
while (nb_sectors > 0) {
368
diff --git a/block/qcow.c b/block/qcow.c
369
index XXXXXXX..XXXXXXX 100644
370
--- a/block/qcow.c
371
+++ b/block/qcow.c
372
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_preadv(BlockDriverState *bs, int64_t offset,
373
uint8_t *buf;
374
void *orig_buf;
375
376
- assert(!flags);
377
if (qiov->niov > 1) {
378
buf = orig_buf = qemu_try_blockalign(bs, qiov->size);
379
if (buf == NULL) {
380
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow_co_pwritev(BlockDriverState *bs, int64_t offset,
381
uint8_t *buf;
382
void *orig_buf;
383
384
- assert(!flags);
385
s->cluster_cache_offset = -1; /* disable compressed cache */
386
387
/* We must always copy the iov when encrypting, so we
388
diff --git a/block/qed.c b/block/qed.c
389
index XXXXXXX..XXXXXXX 100644
390
--- a/block/qed.c
391
+++ b/block/qed.c
392
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_qed_co_writev(BlockDriverState *bs,
393
int64_t sector_num, int nb_sectors,
394
QEMUIOVector *qiov, int flags)
395
{
396
- assert(!flags);
397
return qed_co_request(bs, sector_num, qiov, nb_sectors, QED_AIOCB_WRITE);
398
}
399
400
diff --git a/block/raw-format.c b/block/raw-format.c
401
index XXXXXXX..XXXXXXX 100644
402
--- a/block/raw-format.c
403
+++ b/block/raw-format.c
404
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
405
qemu_iovec_add(&local_qiov, buf, 512);
406
qemu_iovec_concat(&local_qiov, qiov, 512, qiov->size - 512);
407
qiov = &local_qiov;
408
+
409
+ flags &= ~BDRV_REQ_REGISTERED_BUF;
410
}
411
412
ret = raw_adjust_offset(bs, &offset, bytes, true);
413
diff --git a/block/replication.c b/block/replication.c
414
index XXXXXXX..XXXXXXX 100644
415
--- a/block/replication.c
416
+++ b/block/replication.c
417
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int replication_co_writev(BlockDriverState *bs,
418
int ret;
419
int64_t n;
420
421
- assert(!flags);
422
ret = replication_get_io_status(s);
423
if (ret < 0) {
424
goto out;
425
diff --git a/block/ssh.c b/block/ssh.c
426
index XXXXXXX..XXXXXXX 100644
427
--- a/block/ssh.c
428
+++ b/block/ssh.c
429
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_co_writev(BlockDriverState *bs,
430
BDRVSSHState *s = bs->opaque;
431
int ret;
432
433
- assert(!flags);
434
qemu_co_mutex_lock(&s->lock);
435
ret = ssh_write(s, bs, sector_num * BDRV_SECTOR_SIZE,
436
nb_sectors * BDRV_SECTOR_SIZE, qiov);
437
diff --git a/block/vhdx.c b/block/vhdx.c
438
index XXXXXXX..XXXXXXX 100644
439
--- a/block/vhdx.c
440
+++ b/block/vhdx.c
441
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int vhdx_co_writev(BlockDriverState *bs, int64_t sector_num,
442
uint64_t bat_prior_offset = 0;
443
bool bat_update = false;
444
445
- assert(!flags);
446
qemu_iovec_init(&hd_qiov, qiov->niov);
447
448
qemu_co_mutex_lock(&s->lock);
449
--
66
--
450
2.37.3
67
2.14.3
68
69
diff view generated by jsdifflib
1
Make list traversal work when a callback removes a notifier
1
See the patch for why nested AioContext locking is no longer allowed.
2
mid-traversal. This is a cleanup to prevent bugs in the future.
3
2
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Reviewed-by: David Hildenbrand <david@redhat.com>
4
Reviewed-by: Eric Blake <eblake@redhat.com>
6
Message-id: 20221013185908.1297568-9-stefanha@redhat.com
5
Message-id: 20171207201320.19284-3-stefanha@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
7
---
9
hw/core/numa.c | 9 ++++++---
8
docs/devel/multiple-iothreads.txt | 7 ++++---
10
1 file changed, 6 insertions(+), 3 deletions(-)
9
1 file changed, 4 insertions(+), 3 deletions(-)
11
10
12
diff --git a/hw/core/numa.c b/hw/core/numa.c
11
diff --git a/docs/devel/multiple-iothreads.txt b/docs/devel/multiple-iothreads.txt
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/core/numa.c
13
--- a/docs/devel/multiple-iothreads.txt
15
+++ b/hw/core/numa.c
14
+++ b/docs/devel/multiple-iothreads.txt
16
@@ -XXX,XX +XXX,XX @@ void ram_block_notifier_remove(RAMBlockNotifier *n)
15
@@ -XXX,XX +XXX,XX @@
17
void ram_block_notify_add(void *host, size_t size, size_t max_size)
16
-Copyright (c) 2014 Red Hat Inc.
18
{
17
+Copyright (c) 2014-2017 Red Hat Inc.
19
RAMBlockNotifier *notifier;
18
20
+ RAMBlockNotifier *next;
19
This work is licensed under the terms of the GNU GPL, version 2 or later. See
21
20
the COPYING file in the top-level directory.
22
- QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
21
@@ -XXX,XX +XXX,XX @@ aio_context_acquire()/aio_context_release() for mutual exclusion. Once the
23
+ QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) {
22
context is acquired no other thread can access it or run event loop iterations
24
if (notifier->ram_block_added) {
23
in this AioContext.
25
notifier->ram_block_added(notifier, host, size, max_size);
24
26
}
25
-aio_context_acquire()/aio_context_release() calls may be nested. This
27
@@ -XXX,XX +XXX,XX @@ void ram_block_notify_add(void *host, size_t size, size_t max_size)
26
-means you can call them if you're not sure whether #2 applies.
28
void ram_block_notify_remove(void *host, size_t size, size_t max_size)
27
+Legacy code sometimes nests aio_context_acquire()/aio_context_release() calls.
29
{
28
+Do not use nesting anymore, it is incompatible with the BDRV_POLL_WHILE() macro
30
RAMBlockNotifier *notifier;
29
+used in the block layer and can lead to hangs.
31
+ RAMBlockNotifier *next;
30
32
31
There is currently no lock ordering rule if a thread needs to acquire multiple
33
- QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
32
AioContexts simultaneously. Therefore, it is only safe for code holding the
34
+ QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) {
35
if (notifier->ram_block_removed) {
36
notifier->ram_block_removed(notifier, host, size, max_size);
37
}
38
@@ -XXX,XX +XXX,XX @@ void ram_block_notify_remove(void *host, size_t size, size_t max_size)
39
void ram_block_notify_resize(void *host, size_t old_size, size_t new_size)
40
{
41
RAMBlockNotifier *notifier;
42
+ RAMBlockNotifier *next;
43
44
- QLIST_FOREACH(notifier, &ram_list.ramblock_notifiers, next) {
45
+ QLIST_FOREACH_SAFE(notifier, &ram_list.ramblock_notifiers, next, next) {
46
if (notifier->ram_block_resized) {
47
notifier->ram_block_resized(notifier, host, old_size, new_size);
48
}
49
--
33
--
50
2.37.3
34
2.14.3
35
36
diff view generated by jsdifflib
1
Add a function to get the file descriptor for a RAMBlock. Device
1
When a node is already associated with a BlockBackend the
2
emulation code typically uses the MemoryRegion APIs but vhost-style code
2
x-blockdev-set-iothread command refuses to set the IOThread. This is to
3
may use RAMBlock directly for sharing guest memory with another process.
3
prevent accidentally changing the IOThread when the nodes are in use.
4
4
5
This new API will be used by the libblkio block driver so it can share
5
When the nodes are created with -drive they automatically get a
6
guest memory via .bdrv_register_buf().
6
BlockBackend. In that case we know nothing is using them yet and it's
7
safe to set the IOThread. Add a force boolean to override the check.
7
8
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: 20221013185908.1297568-11-stefanha@redhat.com
10
Reviewed-by: Eric Blake <eblake@redhat.com>
11
Message-id: 20171207201320.19284-4-stefanha@redhat.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
13
---
12
include/exec/cpu-common.h | 1 +
14
qapi/block-core.json | 6 +++++-
13
softmmu/physmem.c | 5 +++++
15
blockdev.c | 11 ++++++-----
14
2 files changed, 6 insertions(+)
16
2 files changed, 11 insertions(+), 6 deletions(-)
15
17
16
diff --git a/include/exec/cpu-common.h b/include/exec/cpu-common.h
18
diff --git a/qapi/block-core.json b/qapi/block-core.json
17
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
18
--- a/include/exec/cpu-common.h
20
--- a/qapi/block-core.json
19
+++ b/include/exec/cpu-common.h
21
+++ b/qapi/block-core.json
20
@@ -XXX,XX +XXX,XX @@ void qemu_ram_set_uf_zeroable(RAMBlock *rb);
22
@@ -XXX,XX +XXX,XX @@
21
bool qemu_ram_is_migratable(RAMBlock *rb);
23
#
22
void qemu_ram_set_migratable(RAMBlock *rb);
24
# @iothread: the name of the IOThread object or null for the main loop
23
void qemu_ram_unset_migratable(RAMBlock *rb);
25
#
24
+int qemu_ram_get_fd(RAMBlock *rb);
26
+# @force: true if the node and its children should be moved when a BlockBackend
25
27
+# is already attached
26
size_t qemu_ram_pagesize(RAMBlock *block);
28
+#
27
size_t qemu_ram_pagesize_largest(void);
29
# Note: this command is experimental and intended for test cases that need
28
diff --git a/softmmu/physmem.c b/softmmu/physmem.c
30
# control over IOThreads only.
31
#
32
@@ -XXX,XX +XXX,XX @@
33
##
34
{ 'command': 'x-blockdev-set-iothread',
35
'data' : { 'node-name': 'str',
36
- 'iothread': 'StrOrNull' } }
37
+ 'iothread': 'StrOrNull',
38
+ '*force': 'bool' } }
39
diff --git a/blockdev.c b/blockdev.c
29
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
30
--- a/softmmu/physmem.c
41
--- a/blockdev.c
31
+++ b/softmmu/physmem.c
42
+++ b/blockdev.c
32
@@ -XXX,XX +XXX,XX @@ void qemu_ram_unset_migratable(RAMBlock *rb)
43
@@ -XXX,XX +XXX,XX @@ BlockJobInfoList *qmp_query_block_jobs(Error **errp)
33
rb->flags &= ~RAM_MIGRATABLE;
34
}
44
}
35
45
36
+int qemu_ram_get_fd(RAMBlock *rb)
46
void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
37
+{
47
- Error **errp)
38
+ return rb->fd;
48
+ bool has_force, bool force, Error **errp)
39
+}
40
+
41
/* Called with iothread lock held. */
42
void qemu_ram_set_idstr(RAMBlock *new_block, const char *name, DeviceState *dev)
43
{
49
{
50
AioContext *old_context;
51
AioContext *new_context;
52
@@ -XXX,XX +XXX,XX @@ void qmp_x_blockdev_set_iothread(const char *node_name, StrOrNull *iothread,
53
return;
54
}
55
56
- /* If we want to allow more extreme test scenarios this guard could be
57
- * removed. For now it protects against accidents. */
58
- if (bdrv_has_blk(bs)) {
59
- error_setg(errp, "Node %s is in use", node_name);
60
+ /* Protects against accidents. */
61
+ if (!(has_force && force) && bdrv_has_blk(bs)) {
62
+ error_setg(errp, "Node %s is associated with a BlockBackend and could "
63
+ "be in use (use force=true to override this check)",
64
+ node_name);
65
return;
66
}
67
44
--
68
--
45
2.37.3
69
2.14.3
70
71
diff view generated by jsdifflib
1
Use the enum type so GDB displays the enum members instead of printing a
1
The VM.add_object() method can be used to add IOThreads or memory
2
numeric constant.
2
backend objects.
3
3
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
5
Reviewed-by: Eric Blake <eblake@redhat.com>
6
Message-id: 20221013185908.1297568-6-stefanha@redhat.com
6
Message-id: 20171207201320.19284-5-stefanha@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
8
---
9
include/block/block_int-common.h | 8 ++++----
9
tests/qemu-iotests/iotests.py | 5 +++++
10
1 file changed, 4 insertions(+), 4 deletions(-)
10
1 file changed, 5 insertions(+)
11
11
12
diff --git a/include/block/block_int-common.h b/include/block/block_int-common.h
12
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/include/block/block_int-common.h
14
--- a/tests/qemu-iotests/iotests.py
15
+++ b/include/block/block_int-common.h
15
+++ b/tests/qemu-iotests/iotests.py
16
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
16
@@ -XXX,XX +XXX,XX @@ class VM(qtest.QEMUQtestMachine):
17
/*
17
socket_scm_helper=socket_scm_helper)
18
* Flags honored during pread
18
self._num_drives = 0
19
*/
19
20
- unsigned int supported_read_flags;
20
+ def add_object(self, opts):
21
+ BdrvRequestFlags supported_read_flags;
21
+ self._args.append('-object')
22
/*
22
+ self._args.append(opts)
23
* Flags honored during pwrite (so far: BDRV_REQ_FUA,
23
+ return self
24
* BDRV_REQ_WRITE_UNCHANGED).
24
+
25
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
25
def add_device(self, opts):
26
* flag), or they have to explicitly take the WRITE permission for
26
self._args.append('-device')
27
* their children.
27
self._args.append(opts)
28
*/
29
- unsigned int supported_write_flags;
30
+ BdrvRequestFlags supported_write_flags;
31
/*
32
* Flags honored during pwrite_zeroes (so far: BDRV_REQ_FUA,
33
* BDRV_REQ_MAY_UNMAP, BDRV_REQ_WRITE_UNCHANGED)
34
*/
35
- unsigned int supported_zero_flags;
36
+ BdrvRequestFlags supported_zero_flags;
37
/*
38
* Flags honoured during truncate (so far: BDRV_REQ_ZERO_WRITE).
39
*
40
@@ -XXX,XX +XXX,XX @@ struct BlockDriverState {
41
* that any added space reads as all zeros. If this can't be guaranteed,
42
* the operation must fail.
43
*/
44
- unsigned int supported_truncate_flags;
45
+ BdrvRequestFlags supported_truncate_flags;
46
47
/* the following member gives a name to every node on the bs graph. */
48
char node_name[32];
49
--
28
--
50
2.37.3
29
2.14.3
30
31
diff view generated by jsdifflib
1
When a RAMBlockNotifier is added, ->ram_block_added() is called with all
1
There is a small chance that iothread_stop() hangs as follows:
2
existing RAMBlocks. There is no equivalent ->ram_block_removed() call
3
when a RAMBlockNotifier is removed.
4
2
5
The util/vfio-helpers.c code (the sole user of RAMBlockNotifier) is fine
3
Thread 3 (Thread 0x7f63eba5f700 (LWP 16105)):
6
with this asymmetry because it does not rely on RAMBlockNotifier for
4
#0 0x00007f64012c09b6 in ppoll () at /lib64/libc.so.6
7
cleanup. It walks its internal list of DMA mappings and unmaps them by
5
#1 0x000055959992eac9 in ppoll (__ss=0x0, __timeout=0x0, __nfds=<optimized out>, __fds=<optimized out>) at /usr/include/bits/poll2.h:77
8
itself.
6
#2 0x000055959992eac9 in qemu_poll_ns (fds=<optimized out>, nfds=<optimized out>, timeout=<optimized out>) at util/qemu-timer.c:322
7
#3 0x0000559599930711 in aio_poll (ctx=0x55959bdb83c0, blocking=blocking@entry=true) at util/aio-posix.c:629
8
#4 0x00005595996806fe in iothread_run (opaque=0x55959bd78400) at iothread.c:59
9
#5 0x00007f640159f609 in start_thread () at /lib64/libpthread.so.0
10
#6 0x00007f64012cce6f in clone () at /lib64/libc.so.6
9
11
10
Future users of RAMBlockNotifier may not have an internal data structure
12
Thread 1 (Thread 0x7f640b45b280 (LWP 16103)):
11
that records added RAMBlocks so they will need ->ram_block_removed()
13
#0 0x00007f64015a0b6d in pthread_join () at /lib64/libpthread.so.0
12
callbacks.
14
#1 0x00005595999332ef in qemu_thread_join (thread=<optimized out>) at util/qemu-thread-posix.c:547
15
#2 0x00005595996808ae in iothread_stop (iothread=<optimized out>) at iothread.c:91
16
#3 0x000055959968094d in iothread_stop_iter (object=<optimized out>, opaque=<optimized out>) at iothread.c:102
17
#4 0x0000559599857d97 in do_object_child_foreach (obj=obj@entry=0x55959bdb8100, fn=fn@entry=0x559599680930 <iothread_stop_iter>, opaque=opaque@entry=0x0, recurse=recurse@entry=false) at qom/object.c:852
18
#5 0x0000559599859477 in object_child_foreach (obj=obj@entry=0x55959bdb8100, fn=fn@entry=0x559599680930 <iothread_stop_iter>, opaque=opaque@entry=0x0) at qom/object.c:867
19
#6 0x0000559599680a6e in iothread_stop_all () at iothread.c:341
20
#7 0x000055959955b1d5 in main (argc=<optimized out>, argv=<optimized out>, envp=<optimized out>) at vl.c:4913
13
21
14
This patch makes ram_block_notifier_remove() symmetric with respect to
22
The relevant code from iothread_run() is:
15
callbacks. Now util/vfio-helpers.c needs to unmap remaining DMA mappings
16
after ram_block_notifier_remove() has been called. This is necessary
17
since users like block/nvme.c may create additional DMA mappings that do
18
not originate from the RAMBlockNotifier.
19
23
20
Reviewed-by: David Hildenbrand <david@redhat.com>
24
while (!atomic_read(&iothread->stopping)) {
25
aio_poll(iothread->ctx, true);
26
27
and iothread_stop():
28
29
iothread->stopping = true;
30
aio_notify(iothread->ctx);
31
...
32
qemu_thread_join(&iothread->thread);
33
34
The following scenario can occur:
35
36
1. IOThread:
37
while (!atomic_read(&iothread->stopping)) -> stopping=false
38
39
2. Main loop:
40
iothread->stopping = true;
41
aio_notify(iothread->ctx);
42
43
3. IOThread:
44
aio_poll(iothread->ctx, true); -> hang
45
46
The bug is explained by the AioContext->notify_me doc comments:
47
48
"If this field is 0, everything (file descriptors, bottom halves,
49
timers) will be re-evaluated before the next blocking poll(), thus the
50
event_notifier_set call can be skipped."
51
52
The problem is that "everything" does not include checking
53
iothread->stopping. This means iothread_run() will block in aio_poll()
54
if aio_notify() was called just before aio_poll().
55
56
This patch fixes the hang by replacing aio_notify() with
57
aio_bh_schedule_oneshot(). This makes aio_poll() or g_main_loop_run()
58
to return.
59
60
Implementing this properly required a new bool running flag. The new
61
flag prevents races that are tricky if we try to use iothread->stopping.
62
Now iothread->stopping is purely for iothread_stop() and
63
iothread->running is purely for the iothread_run() thread.
64
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
65
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
Message-id: 20221013185908.1297568-4-stefanha@redhat.com
66
Reviewed-by: Eric Blake <eblake@redhat.com>
67
Message-id: 20171207201320.19284-6-stefanha@redhat.com
23
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
68
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
24
---
69
---
25
hw/core/numa.c | 17 +++++++++++++++++
70
include/sysemu/iothread.h | 3 ++-
26
util/vfio-helpers.c | 5 ++++-
71
iothread.c | 20 +++++++++++++++-----
27
2 files changed, 21 insertions(+), 1 deletion(-)
72
2 files changed, 17 insertions(+), 6 deletions(-)
28
73
29
diff --git a/hw/core/numa.c b/hw/core/numa.c
74
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
30
index XXXXXXX..XXXXXXX 100644
75
index XXXXXXX..XXXXXXX 100644
31
--- a/hw/core/numa.c
76
--- a/include/sysemu/iothread.h
32
+++ b/hw/core/numa.c
77
+++ b/include/sysemu/iothread.h
33
@@ -XXX,XX +XXX,XX @@ static int ram_block_notify_add_single(RAMBlock *rb, void *opaque)
78
@@ -XXX,XX +XXX,XX @@ typedef struct {
34
return 0;
79
GOnce once;
80
QemuMutex init_done_lock;
81
QemuCond init_done_cond; /* is thread initialization done? */
82
- bool stopping;
83
+ bool stopping; /* has iothread_stop() been called? */
84
+ bool running; /* should iothread_run() continue? */
85
int thread_id;
86
87
/* AioContext poll parameters */
88
diff --git a/iothread.c b/iothread.c
89
index XXXXXXX..XXXXXXX 100644
90
--- a/iothread.c
91
+++ b/iothread.c
92
@@ -XXX,XX +XXX,XX @@ static void *iothread_run(void *opaque)
93
qemu_cond_signal(&iothread->init_done_cond);
94
qemu_mutex_unlock(&iothread->init_done_lock);
95
96
- while (!atomic_read(&iothread->stopping)) {
97
+ while (iothread->running) {
98
aio_poll(iothread->ctx, true);
99
100
if (atomic_read(&iothread->worker_context)) {
101
@@ -XXX,XX +XXX,XX @@ static void *iothread_run(void *opaque)
102
return NULL;
35
}
103
}
36
104
37
+static int ram_block_notify_remove_single(RAMBlock *rb, void *opaque)
105
+/* Runs in iothread_run() thread */
106
+static void iothread_stop_bh(void *opaque)
38
+{
107
+{
39
+ const ram_addr_t max_size = qemu_ram_get_max_length(rb);
108
+ IOThread *iothread = opaque;
40
+ const ram_addr_t size = qemu_ram_get_used_length(rb);
41
+ void *host = qemu_ram_get_host_addr(rb);
42
+ RAMBlockNotifier *notifier = opaque;
43
+
109
+
44
+ if (host) {
110
+ iothread->running = false; /* stop iothread_run() */
45
+ notifier->ram_block_removed(notifier, host, size, max_size);
111
+
112
+ if (iothread->main_loop) {
113
+ g_main_loop_quit(iothread->main_loop);
46
+ }
114
+ }
47
+ return 0;
48
+}
115
+}
49
+
116
+
50
void ram_block_notifier_add(RAMBlockNotifier *n)
117
void iothread_stop(IOThread *iothread)
51
{
118
{
52
QLIST_INSERT_HEAD(&ram_list.ramblock_notifiers, n, next);
119
if (!iothread->ctx || iothread->stopping) {
53
@@ -XXX,XX +XXX,XX @@ void ram_block_notifier_add(RAMBlockNotifier *n)
54
void ram_block_notifier_remove(RAMBlockNotifier *n)
55
{
56
QLIST_REMOVE(n, next);
57
+
58
+ if (n->ram_block_removed) {
59
+ qemu_ram_foreach_block(ram_block_notify_remove_single, n);
60
+ }
61
}
62
63
void ram_block_notify_add(void *host, size_t size, size_t max_size)
64
diff --git a/util/vfio-helpers.c b/util/vfio-helpers.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/util/vfio-helpers.c
67
+++ b/util/vfio-helpers.c
68
@@ -XXX,XX +XXX,XX @@ void qemu_vfio_close(QEMUVFIOState *s)
69
if (!s) {
70
return;
120
return;
71
}
121
}
72
+
122
iothread->stopping = true;
73
+ ram_block_notifier_remove(&s->ram_notifier);
123
- aio_notify(iothread->ctx);
74
+
124
- if (atomic_read(&iothread->main_loop)) {
75
for (i = 0; i < s->nr_mappings; ++i) {
125
- g_main_loop_quit(iothread->main_loop);
76
qemu_vfio_undo_mapping(s, &s->mappings[i], NULL);
126
- }
77
}
127
+ aio_bh_schedule_oneshot(iothread->ctx, iothread_stop_bh, iothread);
78
- ram_block_notifier_remove(&s->ram_notifier);
128
qemu_thread_join(&iothread->thread);
79
+
129
}
80
g_free(s->usable_iova_ranges);
130
81
s->nb_iova_ranges = 0;
131
@@ -XXX,XX +XXX,XX @@ static void iothread_complete(UserCreatable *obj, Error **errp)
82
qemu_vfio_reset(s);
132
char *name, *thread_name;
133
134
iothread->stopping = false;
135
+ iothread->running = true;
136
iothread->thread_id = -1;
137
iothread->ctx = aio_context_new(&local_error);
138
if (!iothread->ctx) {
83
--
139
--
84
2.37.3
140
2.14.3
141
142
diff view generated by jsdifflib
1
The blkio block driver will need to look up the file descriptor for a
1
This test case will prevent future regressions with savevm and
2
given pointer. This is possible in softmmu builds where the RAMBlock API
2
IOThreads.
3
is available for querying guest RAM.
4
5
Add stubs so tools like qemu-img that link the block layer still build
6
successfully. In this case there is no guest RAM but that is fine.
7
Bounce buffers and their file descriptors will be allocated with
8
libblkio's blkio_alloc_mem_region() so we won't rely on QEMU's
9
qemu_ram_get_fd() in that case.
10
3
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-id: 20221013185908.1297568-12-stefanha@redhat.com
5
Reviewed-by: Eric Blake <eblake@redhat.com>
6
Message-id: 20171207201320.19284-7-stefanha@redhat.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
---
8
---
15
stubs/physmem.c | 13 +++++++++++++
9
tests/qemu-iotests/203 | 59 ++++++++++++++++++++++++++++++++++++++++++++++
16
stubs/meson.build | 1 +
10
tests/qemu-iotests/203.out | 6 +++++
17
2 files changed, 14 insertions(+)
11
tests/qemu-iotests/group | 1 +
18
create mode 100644 stubs/physmem.c
12
3 files changed, 66 insertions(+)
13
create mode 100755 tests/qemu-iotests/203
14
create mode 100644 tests/qemu-iotests/203.out
19
15
20
diff --git a/stubs/physmem.c b/stubs/physmem.c
16
diff --git a/tests/qemu-iotests/203 b/tests/qemu-iotests/203
17
new file mode 100755
18
index XXXXXXX..XXXXXXX
19
--- /dev/null
20
+++ b/tests/qemu-iotests/203
21
@@ -XXX,XX +XXX,XX @@
22
+#!/usr/bin/env python
23
+#
24
+# Copyright (C) 2017 Red Hat, Inc.
25
+#
26
+# This program is free software; you can redistribute it and/or modify
27
+# it under the terms of the GNU General Public License as published by
28
+# the Free Software Foundation; either version 2 of the License, or
29
+# (at your option) any later version.
30
+#
31
+# This program is distributed in the hope that it will be useful,
32
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
33
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
34
+# GNU General Public License for more details.
35
+#
36
+# You should have received a copy of the GNU General Public License
37
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
38
+#
39
+# Creator/Owner: Stefan Hajnoczi <stefanha@redhat.com>
40
+#
41
+# Check that QMP 'migrate' with multiple drives on a single IOThread completes
42
+# successfully. This particular command triggered a hang in the source QEMU
43
+# process due to recursive AioContext locking in bdrv_invalidate_all() and
44
+# BDRV_POLL_WHILE().
45
+
46
+import iotests
47
+
48
+iotests.verify_image_format(supported_fmts=['qcow2'])
49
+iotests.verify_platform(['linux'])
50
+
51
+with iotests.FilePath('disk0.img') as disk0_img_path, \
52
+ iotests.FilePath('disk1.img') as disk1_img_path, \
53
+ iotests.VM() as vm:
54
+
55
+ img_size = '10M'
56
+ iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, disk0_img_path, img_size)
57
+ iotests.qemu_img_pipe('create', '-f', iotests.imgfmt, disk1_img_path, img_size)
58
+
59
+ iotests.log('Launching VM...')
60
+ (vm.add_object('iothread,id=iothread0')
61
+ .add_drive(disk0_img_path, 'node-name=drive0-node', interface='none')
62
+ .add_drive(disk1_img_path, 'node-name=drive1-node', interface='none')
63
+ .launch())
64
+
65
+ iotests.log('Setting IOThreads...')
66
+ iotests.log(vm.qmp('x-blockdev-set-iothread',
67
+ node_name='drive0-node', iothread='iothread0',
68
+ force=True))
69
+ iotests.log(vm.qmp('x-blockdev-set-iothread',
70
+ node_name='drive1-node', iothread='iothread0',
71
+ force=True))
72
+
73
+ iotests.log('Starting migration...')
74
+ iotests.log(vm.qmp('migrate', uri='exec:cat >/dev/null'))
75
+ while True:
76
+ vm.get_qmp_event(wait=60.0)
77
+ result = vm.qmp('query-migrate')
78
+ status = result.get('return', {}).get('status', None)
79
+ if status == 'completed':
80
+ break
81
diff --git a/tests/qemu-iotests/203.out b/tests/qemu-iotests/203.out
21
new file mode 100644
82
new file mode 100644
22
index XXXXXXX..XXXXXXX
83
index XXXXXXX..XXXXXXX
23
--- /dev/null
84
--- /dev/null
24
+++ b/stubs/physmem.c
85
+++ b/tests/qemu-iotests/203.out
25
@@ -XXX,XX +XXX,XX @@
86
@@ -XXX,XX +XXX,XX @@
26
+#include "qemu/osdep.h"
87
+Launching VM...
27
+#include "exec/cpu-common.h"
88
+Setting IOThreads...
28
+
89
+{u'return': {}}
29
+RAMBlock *qemu_ram_block_from_host(void *ptr, bool round_offset,
90
+{u'return': {}}
30
+ ram_addr_t *offset)
91
+Starting migration...
31
+{
92
+{u'return': {}}
32
+ return NULL;
93
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
33
+}
34
+
35
+int qemu_ram_get_fd(RAMBlock *rb)
36
+{
37
+ return -1;
38
+}
39
diff --git a/stubs/meson.build b/stubs/meson.build
40
index XXXXXXX..XXXXXXX 100644
94
index XXXXXXX..XXXXXXX 100644
41
--- a/stubs/meson.build
95
--- a/tests/qemu-iotests/group
42
+++ b/stubs/meson.build
96
+++ b/tests/qemu-iotests/group
43
@@ -XXX,XX +XXX,XX @@ stub_ss.add(files('migr-blocker.c'))
97
@@ -XXX,XX +XXX,XX @@
44
stub_ss.add(files('module-opts.c'))
98
198 rw auto
45
stub_ss.add(files('monitor.c'))
99
200 rw auto
46
stub_ss.add(files('monitor-core.c'))
100
202 rw auto quick
47
+stub_ss.add(files('physmem.c'))
101
+203 rw auto
48
stub_ss.add(files('qemu-timer-notify-cb.c'))
49
stub_ss.add(files('qmp_memory_device.c'))
50
stub_ss.add(files('qmp-command-available.c'))
51
--
102
--
52
2.37.3
103
2.14.3
104
105
diff view generated by jsdifflib