1
The following changes since commit 52ed34cbddde1cb89b2ac263e758e349a77f21e1:
1
The following changes since commit 825b96dbcee23d134b691fc75618b59c5f53da32:
2
2
3
Merge tag 'pull-request-2023-06-26' of https://gitlab.com/thuth/qemu into staging (2023-06-26 10:38:41 +0200)
3
Merge tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu into staging (2025-03-11 09:32:07 +0800)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://repo.or.cz/qemu/kevin.git tags/for-upstream
7
https://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to 17362398ee1a7f04e8006a46333145d8b707fd35:
9
for you to fetch changes up to a93c04f3cbe690877b3297a9df4767aa811fcd97:
10
10
11
block: use bdrv_co_debug_event in coroutine context (2023-06-28 09:46:34 +0200)
11
virtio-scsi: only expose cmd vqs via iothread-vq-mapping (2025-03-11 15:49:22 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches
14
Block layer patches
15
15
16
- Re-enable the graph lock
16
- virtio-scsi: add iothread-vq-mapping parameter
17
- More fixes to coroutine_fn marking
17
- Improve writethrough performance
18
- Fix missing zero init in bdrv_snapshot_goto()
19
- Code cleanup and iotests fixes
18
20
19
----------------------------------------------------------------
21
----------------------------------------------------------------
20
Kevin Wolf (11):
22
Kevin Wolf (8):
21
iotests: Test active commit with iothread and background I/O
23
block: Remove unused blk_op_is_blocked()
22
qdev-properties-system: Lock AioContext for blk_insert_bs()
24
block: Zero block driver state before reopening
23
test-block-iothread: Lock AioContext for blk_insert_bs()
25
file-posix: Support FUA writes
24
block: Fix AioContext locking in bdrv_open_child()
26
block/io: Ignore FUA with cache.no-flush=on
25
block: Fix AioContext locking in bdrv_attach_child_common()
27
aio: Create AioPolledEvent
26
block: Fix AioContext locking in bdrv_reopen_parse_file_or_backing()
28
aio-posix: Factor out adjust_polling_time()
27
block: Fix AioContext locking in bdrv_open_inherit()
29
aio-posix: Separate AioPolledEvent per AioHandler
28
block: Fix AioContext locking in bdrv_open_backing_file()
30
aio-posix: Adjust polling time also for new handlers
29
blockjob: Fix AioContext locking in block_job_add_bdrv()
30
graph-lock: Unlock the AioContext while polling
31
Revert "graph-lock: Disable locking for now"
32
31
33
Paolo Bonzini (12):
32
Stefan Hajnoczi (13):
34
file-posix: remove incorrect coroutine_fn calls
33
scsi-disk: drop unused SCSIDiskState->bh field
35
qed: mark more functions as coroutine_fns and GRAPH_RDLOCK
34
dma: use current AioContext for dma_blk_io()
36
vpc: mark more functions as coroutine_fns and GRAPH_RDLOCK
35
scsi: track per-SCSIRequest AioContext
37
bochs: mark more functions as coroutine_fns and GRAPH_RDLOCK
36
scsi: introduce requests_lock
38
block: mark another function as coroutine_fns and GRAPH_UNLOCKED
37
virtio-scsi: introduce event and ctrl virtqueue locks
39
cloop: mark more functions as coroutine_fns and GRAPH_RDLOCK
38
virtio-scsi: protect events_dropped field
40
dmg: mark more functions as coroutine_fns and GRAPH_RDLOCK
39
virtio-scsi: perform TMFs in appropriate AioContexts
41
vmdk: mark more functions as coroutine_fns and GRAPH_RDLOCK
40
virtio-blk: extract cleanup_iothread_vq_mapping() function
42
vhdx: mark more functions as coroutine_fns and GRAPH_RDLOCK
41
virtio-blk: tidy up iothread_vq_mapping functions
43
qcow2: mark more functions as coroutine_fns and GRAPH_RDLOCK
42
virtio: extract iothread-vq-mapping.h API
44
block: use bdrv_co_getlength in coroutine context
43
virtio-scsi: add iothread-vq-mapping parameter
45
block: use bdrv_co_debug_event in coroutine context
44
virtio-scsi: handle ctrl virtqueue in main loop
45
virtio-scsi: only expose cmd vqs via iothread-vq-mapping
46
46
47
block/qcow2.h | 33 +++--
47
Thomas Huth (1):
48
block/vhdx.h | 5 +-
48
iotests: Limit qsd-migrate to working formats
49
include/block/block-io.h | 7 ++
49
50
include/block/graph-lock.h | 6 +-
50
include/block/aio.h | 5 +-
51
block.c | 114 ++++++++++++++++--
51
include/block/raw-aio.h | 8 +-
52
block/bochs.c | 7 +-
52
include/hw/scsi/scsi.h | 8 +-
53
block/cloop.c | 9 +-
53
include/hw/virtio/iothread-vq-mapping.h | 45 +++
54
block/dmg.c | 21 ++--
54
include/hw/virtio/virtio-scsi.h | 15 +-
55
block/file-posix.c | 29 +++--
55
include/system/block-backend-global-state.h | 1 -
56
block/graph-lock.c | 43 +++----
56
include/system/dma.h | 3 +-
57
block/io.c | 14 +--
57
util/aio-posix.h | 1 +
58
block/parallels.c | 4 +-
58
block/block-backend.c | 12 -
59
block/qcow.c | 30 ++---
59
block/file-posix.c | 26 +-
60
block/qcow2-bitmap.c | 26 ++--
60
block/io.c | 4 +
61
block/qcow2-cluster.c | 24 ++--
61
block/io_uring.c | 13 +-
62
block/qcow2-refcount.c | 134 +++++++++++----------
62
block/linux-aio.c | 24 +-
63
block/qcow2.c | 20 +--
63
block/snapshot.c | 1 +
64
block/qed-check.c | 5 +-
64
hw/block/virtio-blk.c | 132 +-------
65
block/qed-table.c | 6 +-
65
hw/ide/core.c | 3 +-
66
block/qed.c | 15 +--
66
hw/ide/macio.c | 3 +-
67
block/raw-format.c | 4 +-
67
hw/scsi/scsi-bus.c | 121 +++++--
68
block/vhdx-log.c | 36 +++---
68
hw/scsi/scsi-disk.c | 24 +-
69
block/vhdx.c | 73 ++++++-----
69
hw/scsi/virtio-scsi-dataplane.c | 103 ++++--
70
block/vmdk.c | 55 ++++-----
70
hw/scsi/virtio-scsi.c | 502 ++++++++++++++++------------
71
block/vpc.c | 52 ++++----
71
hw/virtio/iothread-vq-mapping.c | 131 ++++++++
72
blockjob.c | 17 ++-
72
system/dma-helpers.c | 8 +-
73
hw/core/qdev-properties-system.c | 8 +-
73
util/aio-posix.c | 114 ++++---
74
tests/unit/test-block-iothread.c | 7 +-
74
util/async.c | 1 -
75
tests/qemu-iotests/tests/iothreads-commit-active | 85 +++++++++++++
75
hw/virtio/meson.build | 1 +
76
.../qemu-iotests/tests/iothreads-commit-active.out | 23 ++++
76
meson.build | 4 +
77
30 files changed, 573 insertions(+), 339 deletions(-)
77
tests/qemu-iotests/tests/qsd-migrate | 2 +-
78
create mode 100755 tests/qemu-iotests/tests/iothreads-commit-active
78
28 files changed, 803 insertions(+), 512 deletions(-)
79
create mode 100644 tests/qemu-iotests/tests/iothreads-commit-active.out
79
create mode 100644 include/hw/virtio/iothread-vq-mapping.h
80
create mode 100644 hw/virtio/iothread-vq-mapping.c
diff view generated by jsdifflib
1
bdrv_attach_child() requires that the caller holds the AioContext lock
1
Commit fc4e394b28 removed the last caller of blk_op_is_blocked(). Remove
2
for the new child node. Take it in bdrv_open_child() and document that
2
the now unused function.
3
the caller must not hold any AioContext apart from the main AioContext.
4
3
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Message-ID: <20230605085711.21261-5-kwolf@redhat.com>
5
Message-ID: <20250206165331.379033-1-kwolf@redhat.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
9
---
10
block.c | 13 +++++++++++--
10
include/system/block-backend-global-state.h | 1 -
11
1 file changed, 11 insertions(+), 2 deletions(-)
11
block/block-backend.c | 12 ------------
12
2 files changed, 13 deletions(-)
12
13
13
diff --git a/block.c b/block.c
14
diff --git a/include/system/block-backend-global-state.h b/include/system/block-backend-global-state.h
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/block.c
16
--- a/include/system/block-backend-global-state.h
16
+++ b/block.c
17
+++ b/include/system/block-backend-global-state.h
17
@@ -XXX,XX +XXX,XX @@ done:
18
@@ -XXX,XX +XXX,XX @@ bool blk_supports_write_perm(BlockBackend *blk);
18
*
19
bool blk_is_sg(BlockBackend *blk);
19
* The BlockdevRef will be removed from the options QDict.
20
void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
20
*
21
int blk_get_flags(BlockBackend *blk);
21
+ * The caller must hold the lock of the main AioContext and no other AioContext.
22
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
22
* @parent can move to a different AioContext in this function. Callers must
23
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
23
* make sure that their AioContext locking is still correct after this.
24
Error **errp);
24
*/
25
void blk_add_aio_context_notifier(BlockBackend *blk,
25
@@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_open_child(const char *filename,
26
diff --git a/block/block-backend.c b/block/block-backend.c
26
bool allow_none, Error **errp)
27
index XXXXXXX..XXXXXXX 100644
27
{
28
--- a/block/block-backend.c
28
BlockDriverState *bs;
29
+++ b/block/block-backend.c
29
+ BdrvChild *child;
30
@@ -XXX,XX +XXX,XX @@ void *blk_blockalign(BlockBackend *blk, size_t size)
30
+ AioContext *ctx;
31
return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
31
32
GLOBAL_STATE_CODE();
33
34
@@ -XXX,XX +XXX,XX @@ BdrvChild *bdrv_open_child(const char *filename,
35
return NULL;
36
}
37
38
- return bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
39
- errp);
40
+ ctx = bdrv_get_aio_context(bs);
41
+ aio_context_acquire(ctx);
42
+ child = bdrv_attach_child(parent, bs, bdref_key, child_class, child_role,
43
+ errp);
44
+ aio_context_release(ctx);
45
+
46
+ return child;
47
}
32
}
48
33
49
/*
34
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
50
* Wrapper on bdrv_open_child() for most popular case: open primary child of bs.
35
-{
51
*
36
- BlockDriverState *bs = blk_bs(blk);
52
+ * The caller must hold the lock of the main AioContext and no other AioContext.
37
- GLOBAL_STATE_CODE();
53
* @parent can move to a different AioContext in this function. Callers must
38
- GRAPH_RDLOCK_GUARD_MAINLOOP();
54
* make sure that their AioContext locking is still correct after this.
39
-
55
*/
40
- if (!bs) {
41
- return false;
42
- }
43
-
44
- return bdrv_op_is_blocked(bs, op, errp);
45
-}
46
47
/**
48
* Return BB's current AioContext. Note that this context may change
56
--
49
--
57
2.41.0
50
2.48.1
51
52
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
Block drivers assume in their .bdrv_open() implementation that their
2
state in bs->opaque has been zeroed; it is initially allocated with
3
g_malloc0() in bdrv_open_driver().
2
4
3
Mark functions as coroutine_fn when they are only called by other coroutine_fns
5
bdrv_snapshot_goto() needs to make sure that it is zeroed again before
4
and they can suspend. Change calls to co_wrappers to use the non-wrapped
6
calling drv->bdrv_open() to avoid that block drivers use stale values.
5
functions, which in turn requires adding GRAPH_RDLOCK annotations.
6
7
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
One symptom of this bug is VMDK running into a double free when the user
8
Message-ID: <20230601115145.196465-9-pbonzini@redhat.com>
9
tries to apply an internal snapshot like 'qemu-img snapshot -a test
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
test.vmdk'. This should be a graceful error because VMDK doesn't support
11
internal snapshots.
12
13
==25507== Invalid free() / delete / delete[] / realloc()
14
==25507== at 0x484B347: realloc (vg_replace_malloc.c:1801)
15
==25507== by 0x54B592A: g_realloc (gmem.c:171)
16
==25507== by 0x1B221D: vmdk_add_extent (../block/vmdk.c:570)
17
==25507== by 0x1B1084: vmdk_open_sparse (../block/vmdk.c:1059)
18
==25507== by 0x1AF3D8: vmdk_open (../block/vmdk.c:1371)
19
==25507== by 0x1A2AE0: bdrv_snapshot_goto (../block/snapshot.c:299)
20
==25507== by 0x205C77: img_snapshot (../qemu-img.c:3500)
21
==25507== by 0x58FA087: (below main) (libc_start_call_main.h:58)
22
==25507== Address 0x832f3e0 is 0 bytes inside a block of size 272 free'd
23
==25507== at 0x4846B83: free (vg_replace_malloc.c:989)
24
==25507== by 0x54AEAC4: g_free (gmem.c:208)
25
==25507== by 0x1AF629: vmdk_close (../block/vmdk.c:2889)
26
==25507== by 0x1A2A9C: bdrv_snapshot_goto (../block/snapshot.c:290)
27
==25507== by 0x205C77: img_snapshot (../qemu-img.c:3500)
28
==25507== by 0x58FA087: (below main) (libc_start_call_main.h:58)
29
30
This error was discovered by fuzzing qemu-img.
31
32
Cc: qemu-stable@nongnu.org
33
Closes: https://gitlab.com/qemu-project/qemu/-/issues/2853
34
Closes: https://gitlab.com/qemu-project/qemu/-/issues/2851
35
Reported-by: Denis Rastyogin <gerben@altlinux.org>
36
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
37
Message-ID: <20250310104858.28221-1-kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
38
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
39
---
12
block/vmdk.c | 27 ++++++++++++++-------------
40
block/snapshot.c | 1 +
13
1 file changed, 14 insertions(+), 13 deletions(-)
41
1 file changed, 1 insertion(+)
14
42
15
diff --git a/block/vmdk.c b/block/vmdk.c
43
diff --git a/block/snapshot.c b/block/snapshot.c
16
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
17
--- a/block/vmdk.c
45
--- a/block/snapshot.c
18
+++ b/block/vmdk.c
46
+++ b/block/snapshot.c
19
@@ -XXX,XX +XXX,XX @@ out:
47
@@ -XXX,XX +XXX,XX @@ int bdrv_snapshot_goto(BlockDriverState *bs,
20
return ret;
48
bdrv_graph_wrunlock();
21
}
49
22
50
ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
23
-static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
51
+ memset(bs->opaque, 0, drv->instance_size);
24
+static int coroutine_fn GRAPH_RDLOCK
52
open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
25
+vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
53
qobject_unref(options);
26
{
54
if (open_ret < 0) {
27
char *desc, *tmp_desc;
28
char *p_name, *tmp_str;
29
@@ -XXX,XX +XXX,XX @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
30
31
desc = g_malloc0(DESC_SIZE);
32
tmp_desc = g_malloc0(DESC_SIZE);
33
- ret = bdrv_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
34
+ ret = bdrv_co_pread(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
35
if (ret < 0) {
36
goto out;
37
}
38
@@ -XXX,XX +XXX,XX @@ static int vmdk_write_cid(BlockDriverState *bs, uint32_t cid)
39
pstrcat(desc, DESC_SIZE, tmp_desc);
40
}
41
42
- ret = bdrv_pwrite_sync(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
43
+ ret = bdrv_co_pwrite_sync(bs->file, s->desc_offset, DESC_SIZE, desc, 0);
44
45
out:
46
g_free(desc);
47
@@ -XXX,XX +XXX,XX @@ vmdk_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes,
48
return ret;
49
}
50
51
-static int GRAPH_UNLOCKED
52
+static int coroutine_fn GRAPH_UNLOCKED
53
vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress,
54
bool zeroed_grain, Error **errp)
55
{
56
@@ -XXX,XX +XXX,XX @@ vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress,
57
int gd_buf_size;
58
59
if (flat) {
60
- ret = blk_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
61
+ ret = blk_co_truncate(blk, filesize, false, PREALLOC_MODE_OFF, 0, errp);
62
goto exit;
63
}
64
magic = cpu_to_be32(VMDK4_MAGIC);
65
@@ -XXX,XX +XXX,XX @@ vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress,
66
header.check_bytes[3] = 0xa;
67
68
/* write all the data */
69
- ret = blk_pwrite(blk, 0, sizeof(magic), &magic, 0);
70
+ ret = blk_co_pwrite(blk, 0, sizeof(magic), &magic, 0);
71
if (ret < 0) {
72
error_setg(errp, QERR_IO_ERROR);
73
goto exit;
74
}
75
- ret = blk_pwrite(blk, sizeof(magic), sizeof(header), &header, 0);
76
+ ret = blk_co_pwrite(blk, sizeof(magic), sizeof(header), &header, 0);
77
if (ret < 0) {
78
error_setg(errp, QERR_IO_ERROR);
79
goto exit;
80
}
81
82
- ret = blk_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
83
- PREALLOC_MODE_OFF, 0, errp);
84
+ ret = blk_co_truncate(blk, le64_to_cpu(header.grain_offset) << 9, false,
85
+ PREALLOC_MODE_OFF, 0, errp);
86
if (ret < 0) {
87
goto exit;
88
}
89
@@ -XXX,XX +XXX,XX @@ vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress,
90
i < gt_count; i++, tmp += gt_size) {
91
gd_buf[i] = cpu_to_le32(tmp);
92
}
93
- ret = blk_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
94
- gd_buf_size, gd_buf, 0);
95
+ ret = blk_co_pwrite(blk, le64_to_cpu(header.rgd_offset) * BDRV_SECTOR_SIZE,
96
+ gd_buf_size, gd_buf, 0);
97
if (ret < 0) {
98
error_setg(errp, QERR_IO_ERROR);
99
goto exit;
100
@@ -XXX,XX +XXX,XX @@ vmdk_init_extent(BlockBackend *blk, int64_t filesize, bool flat, bool compress,
101
i < gt_count; i++, tmp += gt_size) {
102
gd_buf[i] = cpu_to_le32(tmp);
103
}
104
- ret = blk_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
105
- gd_buf_size, gd_buf, 0);
106
+ ret = blk_co_pwrite(blk, le64_to_cpu(header.gd_offset) * BDRV_SECTOR_SIZE,
107
+ gd_buf_size, gd_buf, 0);
108
if (ret < 0) {
109
error_setg(errp, QERR_IO_ERROR);
110
}
111
--
55
--
112
2.41.0
56
2.48.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
Until now, FUA was always emulated with a separate flush after the write
2
2
for file-posix. The overhead of processing a second request can reduce
3
raw_co_getlength is called by handle_aiocb_write_zeroes, which is not a coroutine
3
performance significantly for a guest disk that has disabled the write
4
function. This is harmless because raw_co_getlength does not actually suspend,
4
cache, especially if the host disk is already write through, too, and
5
but in the interest of clarity make it a non-coroutine_fn that is just wrapped
5
the flush isn't actually doing anything.
6
by the coroutine_fn raw_co_getlength. Likewise, check_cache_dropped was only
6
7
a coroutine_fn because it called raw_co_getlength, so it can be made non-coroutine
7
Advertise support for REQ_FUA in write requests and implement it for
8
as well.
8
Linux AIO and io_uring using the RWF_DSYNC flag for write requests. The
9
9
thread pool still performs a separate fdatasync() call. This can be
10
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
improved later by using the pwritev2() syscall if available.
11
Message-ID: <20230601115145.196465-2-pbonzini@redhat.com>
11
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
As an example, this is how fio numbers can be improved in some scenarios
13
with this patch (all using virtio-blk with cache=directsync on an nvme
14
block device for the VM, fio with ioengine=libaio,direct=1,sync=1):
15
16
| old | with FUA support
17
------------------------------+---------------+-------------------
18
bs=4k, iodepth=1, numjobs=1 | 45.6k iops | 56.1k iops
19
bs=4k, iodepth=1, numjobs=16 | 183.3k iops | 236.0k iops
20
bs=4k, iodepth=16, numjobs=1 | 258.4k iops | 311.1k iops
21
22
However, not all scenarios are clear wins. On another slower disk I saw
23
little to no improvment. In fact, in two corner case scenarios, I even
24
observed a regression, which I however consider acceptable:
25
26
1. On slow host disks in a write through cache mode, when the guest is
27
using virtio-blk in a separate iothread so that polling can be
28
enabled, and each completion is quickly followed up with a new
29
request (so that polling gets it), it can happen that enabling FUA
30
makes things slower - the additional very fast no-op flush we used to
31
have gave the adaptive polling algorithm a success so that it kept
32
polling. Without it, we only have the slow write request, which
33
disables polling. This is a problem in the polling algorithm that
34
will be fixed later in this series.
35
36
2. With a high queue depth, it can be beneficial to have flush requests
37
for another reason: The optimisation in bdrv_co_flush() that flushes
38
only once per write generation acts as a synchronisation mechanism
39
that lets all requests complete at the same time. This can result in
40
better batching and if the disk is very fast (I only saw this with a
41
null_blk backend), this can make up for the overhead of the flush and
42
improve throughput. In theory, we could optionally introduce a
43
similar artificial latency in the normal completion path to achieve
44
the same kind of completion batching. This is not implemented in this
45
series.
46
47
Compatibility is not a concern for io_uring, it has supported RWF_DSYNC
48
from the start. Linux AIO started supporting it in Linux 4.13 and libaio
49
0.3.111. The kernel is not a problem for any supported build platform,
50
so it's not necessary to add runtime checks. However, openSUSE is still
51
stuck with an older libaio version that would break the build. We must
52
detect this at build time to avoid build failures.
53
54
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
55
Message-ID: <20250307221634.71951-2-kwolf@redhat.com>
56
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
57
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
58
---
15
block/file-posix.c | 29 +++++++++++++++++------------
59
include/block/raw-aio.h | 8 ++++++--
16
1 file changed, 17 insertions(+), 12 deletions(-)
60
block/file-posix.c | 26 ++++++++++++++++++--------
17
61
block/io_uring.c | 13 ++++++++-----
62
block/linux-aio.c | 24 +++++++++++++++++++++---
63
meson.build | 4 ++++
64
5 files changed, 57 insertions(+), 18 deletions(-)
65
66
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
67
index XXXXXXX..XXXXXXX 100644
68
--- a/include/block/raw-aio.h
69
+++ b/include/block/raw-aio.h
70
@@ -XXX,XX +XXX,XX @@
71
#define QEMU_RAW_AIO_H
72
73
#include "block/aio.h"
74
+#include "block/block-common.h"
75
#include "qemu/iov.h"
76
77
/* AIO request types */
78
@@ -XXX,XX +XXX,XX @@ void laio_cleanup(LinuxAioState *s);
79
80
/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
81
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
82
- int type, uint64_t dev_max_batch);
83
+ int type, BdrvRequestFlags flags,
84
+ uint64_t dev_max_batch);
85
86
bool laio_has_fdsync(int);
87
+bool laio_has_fua(void);
88
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
89
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
90
#endif
91
@@ -XXX,XX +XXX,XX @@ void luring_cleanup(LuringState *s);
92
93
/* luring_co_submit: submit I/O requests in the thread's current AioContext. */
94
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
95
- QEMUIOVector *qiov, int type);
96
+ QEMUIOVector *qiov, int type,
97
+ BdrvRequestFlags flags);
98
void luring_detach_aio_context(LuringState *s, AioContext *old_context);
99
void luring_attach_aio_context(LuringState *s, AioContext *new_context);
100
#endif
18
diff --git a/block/file-posix.c b/block/file-posix.c
101
diff --git a/block/file-posix.c b/block/file-posix.c
19
index XXXXXXX..XXXXXXX 100644
102
index XXXXXXX..XXXXXXX 100644
20
--- a/block/file-posix.c
103
--- a/block/file-posix.c
21
+++ b/block/file-posix.c
104
+++ b/block/file-posix.c
22
@@ -XXX,XX +XXX,XX @@ static int fd_open(BlockDriverState *bs)
105
@@ -XXX,XX +XXX,XX @@ static int fd_open(BlockDriverState *bs)
23
return -EIO;
106
}
24
}
107
25
108
static int64_t raw_getlength(BlockDriverState *bs);
26
-static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs);
109
+static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs);
27
+static int64_t raw_getlength(BlockDriverState *bs);
28
110
29
typedef struct RawPosixAIOData {
111
typedef struct RawPosixAIOData {
30
BlockDriverState *bs;
112
BlockDriverState *bs;
31
@@ -XXX,XX +XXX,XX @@ static int handle_aiocb_write_zeroes(void *opaque)
113
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
32
#ifdef CONFIG_FALLOCATE
114
#endif
33
/* Last resort: we are trying to extend the file with zeroed data. This
115
s->needs_alignment = raw_needs_alignment(bs);
34
* can be done via fallocate(fd, 0) */
116
35
- len = raw_co_getlength(aiocb->bs);
117
+ if (!s->use_linux_aio || laio_has_fua()) {
36
+ len = raw_getlength(aiocb->bs);
118
+ bs->supported_write_flags = BDRV_REQ_FUA;
37
if (s->has_fallocate && len >= 0 && aiocb->aio_offset >= len) {
119
+ }
38
int ret = do_fallocate(s->fd, 0, aiocb->aio_offset, aiocb->aio_nbytes);
120
+
39
if (ret == 0 || ret != -ENOTSUP) {
121
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
40
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
122
if (S_ISREG(st.st_mode)) {
123
/* When extending regular files, we get zeros from the OS */
124
@@ -XXX,XX +XXX,XX @@ static inline bool raw_check_linux_aio(BDRVRawState *s)
125
#endif
126
127
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
128
- uint64_t bytes, QEMUIOVector *qiov, int type)
129
+ uint64_t bytes, QEMUIOVector *qiov, int type,
130
+ int flags)
131
{
132
BDRVRawState *s = bs->opaque;
133
RawPosixAIOData acb;
134
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
135
#ifdef CONFIG_LINUX_IO_URING
136
} else if (raw_check_linux_io_uring(s)) {
137
assert(qiov->size == bytes);
138
- ret = luring_co_submit(bs, s->fd, offset, qiov, type);
139
+ ret = luring_co_submit(bs, s->fd, offset, qiov, type, flags);
140
goto out;
141
#endif
142
#ifdef CONFIG_LINUX_AIO
143
} else if (raw_check_linux_aio(s)) {
144
assert(qiov->size == bytes);
145
- ret = laio_co_submit(s->fd, offset, qiov, type,
146
+ ret = laio_co_submit(s->fd, offset, qiov, type, flags,
147
s->aio_max_batch);
148
goto out;
149
#endif
150
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
151
152
assert(qiov->size == bytes);
153
ret = raw_thread_pool_submit(handle_aiocb_rw, &acb);
154
+ if (ret == 0 && (flags & BDRV_REQ_FUA)) {
155
+ /* TODO Use pwritev2() instead if it's available */
156
+ ret = raw_co_flush_to_disk(bs);
157
+ }
158
goto out; /* Avoid the compiler err of unused label */
159
160
out:
161
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
162
int64_t bytes, QEMUIOVector *qiov,
163
BdrvRequestFlags flags)
164
{
165
- return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
166
+ return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ, flags);
167
}
168
169
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
170
int64_t bytes, QEMUIOVector *qiov,
171
BdrvRequestFlags flags)
172
{
173
- return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
174
+ return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE, flags);
175
}
176
177
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
178
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
179
180
#ifdef CONFIG_LINUX_IO_URING
181
if (raw_check_linux_io_uring(s)) {
182
- return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
183
+ return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
41
}
184
}
42
185
#endif
43
if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) {
186
#ifdef CONFIG_LINUX_AIO
44
- int64_t cur_length = raw_co_getlength(bs);
187
if (s->has_laio_fdsync && raw_check_linux_aio(s)) {
45
+ int64_t cur_length = raw_getlength(bs);
188
- return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
46
189
+ return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0, 0);
47
if (offset != cur_length && exact) {
190
}
48
error_setg(errp, "Cannot resize device files");
191
#endif
49
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_truncate(BlockDriverState *bs, int64_t offset,
192
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
50
}
193
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
51
194
}
52
#ifdef __OpenBSD__
195
53
-static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
196
trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
54
+static int64_t raw_getlength(BlockDriverState *bs)
197
- return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
55
{
198
+ return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND, 0);
56
BDRVRawState *s = bs->opaque;
199
}
57
int fd = s->fd;
200
#endif
58
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
201
59
return st.st_size;
202
diff --git a/block/io_uring.c b/block/io_uring.c
60
}
203
index XXXXXXX..XXXXXXX 100644
61
#elif defined(__NetBSD__)
204
--- a/block/io_uring.c
62
-static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
205
+++ b/block/io_uring.c
63
+static int64_t raw_getlength(BlockDriverState *bs)
206
@@ -XXX,XX +XXX,XX @@ static void luring_deferred_fn(void *opaque)
64
{
207
*
65
BDRVRawState *s = bs->opaque;
208
*/
66
int fd = s->fd;
209
static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
67
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
210
- uint64_t offset, int type)
68
return st.st_size;
211
+ uint64_t offset, int type, BdrvRequestFlags flags)
69
}
212
{
70
#elif defined(__sun__)
71
-static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
72
+static int64_t raw_getlength(BlockDriverState *bs)
73
{
74
BDRVRawState *s = bs->opaque;
75
struct dk_minfo minfo;
76
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
77
return size;
78
}
79
#elif defined(CONFIG_BSD)
80
-static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
81
+static int64_t raw_getlength(BlockDriverState *bs)
82
{
83
BDRVRawState *s = bs->opaque;
84
int fd = s->fd;
85
@@ -XXX,XX +XXX,XX @@ again:
86
return size;
87
}
88
#else
89
-static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
90
+static int64_t raw_getlength(BlockDriverState *bs)
91
{
92
BDRVRawState *s = bs->opaque;
93
int ret;
213
int ret;
94
@@ -XXX,XX +XXX,XX @@ static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
214
struct io_uring_sqe *sqes = &luringcb->sqeq;
95
}
215
+ int luring_flags;
96
#endif
216
97
217
switch (type) {
98
+static int64_t coroutine_fn raw_co_getlength(BlockDriverState *bs)
218
case QEMU_AIO_WRITE:
219
- io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
220
- luringcb->qiov->niov, offset);
221
+ luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
222
+ io_uring_prep_writev2(sqes, fd, luringcb->qiov->iov,
223
+ luringcb->qiov->niov, offset, luring_flags);
224
break;
225
case QEMU_AIO_ZONE_APPEND:
226
io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
227
@@ -XXX,XX +XXX,XX @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
228
}
229
230
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
231
- QEMUIOVector *qiov, int type)
232
+ QEMUIOVector *qiov, int type,
233
+ BdrvRequestFlags flags)
234
{
235
int ret;
236
AioContext *ctx = qemu_get_current_aio_context();
237
@@ -XXX,XX +XXX,XX @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
238
};
239
trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0,
240
type);
241
- ret = luring_do_submit(fd, &luringcb, s, offset, type);
242
+ ret = luring_do_submit(fd, &luringcb, s, offset, type, flags);
243
244
if (ret < 0) {
245
return ret;
246
diff --git a/block/linux-aio.c b/block/linux-aio.c
247
index XXXXXXX..XXXXXXX 100644
248
--- a/block/linux-aio.c
249
+++ b/block/linux-aio.c
250
@@ -XXX,XX +XXX,XX @@ static void laio_deferred_fn(void *opaque)
251
}
252
253
static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
254
- int type, uint64_t dev_max_batch)
255
+ int type, BdrvRequestFlags flags,
256
+ uint64_t dev_max_batch)
257
{
258
LinuxAioState *s = laiocb->ctx;
259
struct iocb *iocbs = &laiocb->iocb;
260
QEMUIOVector *qiov = laiocb->qiov;
261
+ int laio_flags;
262
263
switch (type) {
264
case QEMU_AIO_WRITE:
265
+#ifdef HAVE_IO_PREP_PWRITEV2
266
+ laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
267
+ io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags);
268
+#else
269
+ assert(flags == 0);
270
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
271
+#endif
272
break;
273
case QEMU_AIO_ZONE_APPEND:
274
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
275
@@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
276
}
277
278
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
279
- int type, uint64_t dev_max_batch)
280
+ int type, BdrvRequestFlags flags,
281
+ uint64_t dev_max_batch)
282
{
283
int ret;
284
AioContext *ctx = qemu_get_current_aio_context();
285
@@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
286
.qiov = qiov,
287
};
288
289
- ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch);
290
+ ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch);
291
if (ret < 0) {
292
return ret;
293
}
294
@@ -XXX,XX +XXX,XX @@ bool laio_has_fdsync(int fd)
295
io_destroy(ctx);
296
return (ret == -EINVAL) ? false : true;
297
}
298
+
299
+bool laio_has_fua(void)
99
+{
300
+{
100
+ return raw_getlength(bs);
301
+#ifdef HAVE_IO_PREP_PWRITEV2
302
+ return true;
303
+#else
304
+ return false;
305
+#endif
101
+}
306
+}
102
+
307
diff --git a/meson.build b/meson.build
103
static int64_t coroutine_fn raw_co_get_allocated_file_size(BlockDriverState *bs)
308
index XXXXXXX..XXXXXXX 100644
104
{
309
--- a/meson.build
105
struct stat st;
310
+++ b/meson.build
106
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
311
@@ -XXX,XX +XXX,XX @@ config_host_data.set('HAVE_OPTRESET',
107
* round up if necessary.
312
cc.has_header_symbol('getopt.h', 'optreset'))
108
*/
313
config_host_data.set('HAVE_IPPROTO_MPTCP',
109
if (!QEMU_IS_ALIGNED(*pnum, bs->bl.request_alignment)) {
314
cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP'))
110
- int64_t file_length = raw_co_getlength(bs);
315
+if libaio.found()
111
+ int64_t file_length = raw_getlength(bs);
316
+ config_host_data.set('HAVE_IO_PREP_PWRITEV2',
112
if (file_length > 0) {
317
+ cc.has_header_symbol('libaio.h', 'io_prep_pwritev2'))
113
/* Ignore errors, this is just a safeguard */
318
+endif
114
assert(hole == file_length);
319
115
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_block_status(BlockDriverState *bs,
320
# has_member
116
321
config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
117
#if defined(__linux__)
118
/* Verify that the file is not in the page cache */
119
-static void coroutine_fn check_cache_dropped(BlockDriverState *bs, Error **errp)
120
+static void check_cache_dropped(BlockDriverState *bs, Error **errp)
121
{
122
const size_t window_size = 128 * 1024 * 1024;
123
BDRVRawState *s = bs->opaque;
124
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn check_cache_dropped(BlockDriverState *bs, Error **errp)
125
page_size = sysconf(_SC_PAGESIZE);
126
vec = g_malloc(DIV_ROUND_UP(window_size, page_size));
127
128
- end = raw_co_getlength(bs);
129
+ end = raw_getlength(bs);
130
131
for (offset = 0; offset < end; offset += window_size) {
132
void *new_window;
133
@@ -XXX,XX +XXX,XX @@ static int cdrom_reopen(BlockDriverState *bs)
134
135
static bool coroutine_fn cdrom_co_is_inserted(BlockDriverState *bs)
136
{
137
- return raw_co_getlength(bs) > 0;
138
+ return raw_getlength(bs) > 0;
139
}
140
141
static void coroutine_fn cdrom_co_eject(BlockDriverState *bs, bool eject_flag)
142
--
322
--
143
2.41.0
323
2.48.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
For block drivers that don't advertise FUA support, we already call
2
bdrv_co_flush(), which considers BDRV_O_NO_FLUSH. However, drivers that
3
do support FUA still see the FUA flag with BDRV_O_NO_FLUSH and get the
4
associated performance penalty that cache.no-flush=on was supposed to
5
avoid.
2
6
3
bdrv_co_getlength was recently introduced, with bdrv_getlength becoming
7
Clear FUA for write requests if BDRV_O_NO_FLUSH is set.
4
a wrapper for use in unknown context. Switch to bdrv_co_getlength when
5
possible.
6
8
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Message-ID: <20230601115145.196465-12-pbonzini@redhat.com>
10
Message-ID: <20250307221634.71951-3-kwolf@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
13
---
12
block/io.c | 10 +++++-----
14
block/io.c | 4 ++++
13
block/parallels.c | 4 ++--
15
1 file changed, 4 insertions(+)
14
block/qcow.c | 6 +++---
15
block/vmdk.c | 4 ++--
16
4 files changed, 12 insertions(+), 12 deletions(-)
17
16
18
diff --git a/block/io.c b/block/io.c
17
diff --git a/block/io.c b/block/io.c
19
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
20
--- a/block/io.c
19
--- a/block/io.c
21
+++ b/block/io.c
20
+++ b/block/io.c
22
@@ -XXX,XX +XXX,XX @@ bdrv_aligned_preadv(BdrvChild *child, BdrvTrackedRequest *req,
21
@@ -XXX,XX +XXX,XX @@ bdrv_driver_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
22
return -ENOMEDIUM;
23
}
23
}
24
24
25
/* Forward the request to the BlockDriver, possibly fragmenting it */
25
+ if (bs->open_flags & BDRV_O_NO_FLUSH) {
26
- total_bytes = bdrv_getlength(bs);
26
+ flags &= ~BDRV_REQ_FUA;
27
+ total_bytes = bdrv_co_getlength(bs);
27
+ }
28
if (total_bytes < 0) {
28
+
29
ret = total_bytes;
29
if ((flags & BDRV_REQ_FUA) &&
30
goto out;
30
(~bs->supported_write_flags & BDRV_REQ_FUA)) {
31
@@ -XXX,XX +XXX,XX @@ bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
31
flags &= ~BDRV_REQ_FUA;
32
assert(pnum);
33
assert_bdrv_graph_readable();
34
*pnum = 0;
35
- total_size = bdrv_getlength(bs);
36
+ total_size = bdrv_co_getlength(bs);
37
if (total_size < 0) {
38
ret = total_size;
39
goto early_out;
40
@@ -XXX,XX +XXX,XX @@ bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
41
bytes = n;
42
}
43
44
- /* Must be non-NULL or bdrv_getlength() would have failed */
45
+ /* Must be non-NULL or bdrv_co_getlength() would have failed */
46
assert(bs->drv);
47
has_filtered_child = bdrv_filter_child(bs);
48
if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
49
@@ -XXX,XX +XXX,XX @@ bdrv_co_block_status(BlockDriverState *bs, bool want_zero,
50
if (!cow_bs) {
51
ret |= BDRV_BLOCK_ZERO;
52
} else if (want_zero) {
53
- int64_t size2 = bdrv_getlength(cow_bs);
54
+ int64_t size2 = bdrv_co_getlength(cow_bs);
55
56
if (size2 >= 0 && offset >= size2) {
57
ret |= BDRV_BLOCK_ZERO;
58
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
59
return ret;
60
}
61
62
- old_size = bdrv_getlength(bs);
63
+ old_size = bdrv_co_getlength(bs);
64
if (old_size < 0) {
65
error_setg_errno(errp, -old_size, "Failed to get old image size");
66
return old_size;
67
diff --git a/block/parallels.c b/block/parallels.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/block/parallels.c
70
+++ b/block/parallels.c
71
@@ -XXX,XX +XXX,XX @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num,
72
assert(idx < s->bat_size && idx + to_allocate <= s->bat_size);
73
74
space = to_allocate * s->tracks;
75
- len = bdrv_getlength(bs->file->bs);
76
+ len = bdrv_co_getlength(bs->file->bs);
77
if (len < 0) {
78
return len;
79
}
80
@@ -XXX,XX +XXX,XX @@ parallels_check_outside_image(BlockDriverState *bs, BdrvCheckResult *res,
81
uint32_t i;
82
int64_t off, high_off, size;
83
84
- size = bdrv_getlength(bs->file->bs);
85
+ size = bdrv_co_getlength(bs->file->bs);
86
if (size < 0) {
87
res->check_errors++;
88
return size;
89
diff --git a/block/qcow.c b/block/qcow.c
90
index XXXXXXX..XXXXXXX 100644
91
--- a/block/qcow.c
92
+++ b/block/qcow.c
93
@@ -XXX,XX +XXX,XX @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
94
if (!allocate)
95
return 0;
96
/* allocate a new l2 entry */
97
- l2_offset = bdrv_getlength(bs->file->bs);
98
+ l2_offset = bdrv_co_getlength(bs->file->bs);
99
if (l2_offset < 0) {
100
return l2_offset;
101
}
102
@@ -XXX,XX +XXX,XX @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
103
if (decompress_cluster(bs, cluster_offset) < 0) {
104
return -EIO;
105
}
106
- cluster_offset = bdrv_getlength(bs->file->bs);
107
+ cluster_offset = bdrv_co_getlength(bs->file->bs);
108
if ((int64_t) cluster_offset < 0) {
109
return cluster_offset;
110
}
111
@@ -XXX,XX +XXX,XX @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
112
return ret;
113
}
114
} else {
115
- cluster_offset = bdrv_getlength(bs->file->bs);
116
+ cluster_offset = bdrv_co_getlength(bs->file->bs);
117
if ((int64_t) cluster_offset < 0) {
118
return cluster_offset;
119
}
120
diff --git a/block/vmdk.c b/block/vmdk.c
121
index XXXXXXX..XXXXXXX 100644
122
--- a/block/vmdk.c
123
+++ b/block/vmdk.c
124
@@ -XXX,XX +XXX,XX @@ vmdk_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
125
int64_t length;
126
127
for (i = 0; i < s->num_extents; i++) {
128
- length = bdrv_getlength(s->extents[i].file->bs);
129
+ length = bdrv_co_getlength(s->extents[i].file->bs);
130
if (length < 0) {
131
return length;
132
}
133
@@ -XXX,XX +XXX,XX @@ vmdk_co_check(BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix)
134
break;
135
}
136
if (ret == VMDK_OK) {
137
- int64_t extent_len = bdrv_getlength(extent->file->bs);
138
+ int64_t extent_len = bdrv_co_getlength(extent->file->bs);
139
if (extent_len < 0) {
140
fprintf(stderr,
141
"ERROR: could not get extent file length for sector %"
142
--
32
--
143
2.41.0
33
2.48.1
diff view generated by jsdifflib
1
The function can move the child node to a different AioContext. In this
1
As a preparation for having multiple adaptive polling states per
2
case, it also must take the AioContext lock for the new context before
2
AioContext, move the 'ns' field into a separate struct.
3
calling functions that require the caller to hold the AioContext for the
4
child node.
5
3
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Message-ID: <20230605085711.21261-6-kwolf@redhat.com>
5
Message-ID: <20250307221634.71951-4-kwolf@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
8
---
11
block.c | 21 ++++++++++++++++++++-
9
include/block/aio.h | 6 +++++-
12
1 file changed, 20 insertions(+), 1 deletion(-)
10
util/aio-posix.c | 31 ++++++++++++++++---------------
11
util/async.c | 3 ++-
12
3 files changed, 23 insertions(+), 17 deletions(-)
13
13
14
diff --git a/block.c b/block.c
14
diff --git a/include/block/aio.h b/include/block/aio.h
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block.c
16
--- a/include/block/aio.h
17
+++ b/block.c
17
+++ b/include/block/aio.h
18
@@ -XXX,XX +XXX,XX @@ static TransactionActionDrv bdrv_attach_child_common_drv = {
18
@@ -XXX,XX +XXX,XX @@ struct BHListSlice {
19
* Function doesn't update permissions, caller is responsible for this.
19
20
*
20
typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;
21
* Returns new created child.
21
22
+ *
22
+typedef struct AioPolledEvent {
23
+ * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
23
+ int64_t ns; /* current polling time in nanoseconds */
24
+ * @child_bs can move to a different AioContext in this function. Callers must
24
+} AioPolledEvent;
25
+ * make sure that their AioContext locking is still correct after this.
25
+
26
*/
26
struct AioContext {
27
static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
27
GSource source;
28
const char *child_name,
28
29
@@ -XXX,XX +XXX,XX @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
29
@@ -XXX,XX +XXX,XX @@ struct AioContext {
30
Transaction *tran, Error **errp)
30
int poll_disable_cnt;
31
{
31
32
BdrvChild *new_child;
32
/* Polling mode parameters */
33
- AioContext *parent_ctx;
33
- int64_t poll_ns; /* current polling time in nanoseconds */
34
+ AioContext *parent_ctx, *new_child_ctx;
34
+ AioPolledEvent poll;
35
AioContext *child_ctx = bdrv_get_aio_context(child_bs);
35
int64_t poll_max_ns; /* maximum polling time in nanoseconds */
36
36
int64_t poll_grow; /* polling time growth factor */
37
assert(child_class->get_parent_desc);
37
int64_t poll_shrink; /* polling time shrink factor */
38
@@ -XXX,XX +XXX,XX @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
38
diff --git a/util/aio-posix.c b/util/aio-posix.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/util/aio-posix.c
41
+++ b/util/aio-posix.c
42
@@ -XXX,XX +XXX,XX @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
43
return false;
44
}
45
46
- max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
47
+ max_ns = qemu_soonest_timeout(*timeout, ctx->poll.ns);
48
if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
49
/*
50
* Enable poll mode. It pairs with the poll_set_started() in
51
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
52
if (ctx->poll_max_ns) {
53
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
54
55
- if (block_ns <= ctx->poll_ns) {
56
+ if (block_ns <= ctx->poll.ns) {
57
/* This is the sweet spot, no adjustment needed */
58
} else if (block_ns > ctx->poll_max_ns) {
59
/* We'd have to poll for too long, poll less */
60
- int64_t old = ctx->poll_ns;
61
+ int64_t old = ctx->poll.ns;
62
63
if (ctx->poll_shrink) {
64
- ctx->poll_ns /= ctx->poll_shrink;
65
+ ctx->poll.ns /= ctx->poll_shrink;
66
} else {
67
- ctx->poll_ns = 0;
68
+ ctx->poll.ns = 0;
69
}
70
71
- trace_poll_shrink(ctx, old, ctx->poll_ns);
72
- } else if (ctx->poll_ns < ctx->poll_max_ns &&
73
+ trace_poll_shrink(ctx, old, ctx->poll.ns);
74
+ } else if (ctx->poll.ns < ctx->poll_max_ns &&
75
block_ns < ctx->poll_max_ns) {
76
/* There is room to grow, poll longer */
77
- int64_t old = ctx->poll_ns;
78
+ int64_t old = ctx->poll.ns;
79
int64_t grow = ctx->poll_grow;
80
81
if (grow == 0) {
82
grow = 2;
83
}
84
85
- if (ctx->poll_ns) {
86
- ctx->poll_ns *= grow;
87
+ if (ctx->poll.ns) {
88
+ ctx->poll.ns *= grow;
89
} else {
90
- ctx->poll_ns = 4000; /* start polling at 4 microseconds */
91
+ ctx->poll.ns = 4000; /* start polling at 4 microseconds */
92
}
93
94
- if (ctx->poll_ns > ctx->poll_max_ns) {
95
- ctx->poll_ns = ctx->poll_max_ns;
96
+ if (ctx->poll.ns > ctx->poll_max_ns) {
97
+ ctx->poll.ns = ctx->poll_max_ns;
98
}
99
100
- trace_poll_grow(ctx, old, ctx->poll_ns);
101
+ trace_poll_grow(ctx, old, ctx->poll.ns);
39
}
102
}
40
}
103
}
41
104
42
+ new_child_ctx = bdrv_get_aio_context(child_bs);
105
@@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
43
+ if (new_child_ctx != child_ctx) {
106
/* No thread synchronization here, it doesn't matter if an incorrect value
44
+ aio_context_release(child_ctx);
107
* is used once.
45
+ aio_context_acquire(new_child_ctx);
108
*/
46
+ }
109
+ ctx->poll.ns = 0;
47
+
110
+
48
bdrv_ref(child_bs);
111
ctx->poll_max_ns = max_ns;
49
/*
112
- ctx->poll_ns = 0;
50
* Let every new BdrvChild start with a drained parent. Inserting the child
113
ctx->poll_grow = grow;
51
@@ -XXX,XX +XXX,XX @@ static BdrvChild *bdrv_attach_child_common(BlockDriverState *child_bs,
114
ctx->poll_shrink = shrink;
52
};
115
53
tran_add(tran, &bdrv_attach_child_common_drv, s);
116
diff --git a/util/async.c b/util/async.c
54
117
index XXXXXXX..XXXXXXX 100644
55
+ if (new_child_ctx != child_ctx) {
118
--- a/util/async.c
56
+ aio_context_release(new_child_ctx);
119
+++ b/util/async.c
57
+ aio_context_acquire(child_ctx);
120
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
58
+ }
121
qemu_rec_mutex_init(&ctx->lock);
122
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
123
124
- ctx->poll_ns = 0;
125
+ ctx->poll.ns = 0;
59
+
126
+
60
return new_child;
127
ctx->poll_max_ns = 0;
61
}
128
ctx->poll_grow = 0;
62
129
ctx->poll_shrink = 0;
63
/*
64
* Function doesn't update permissions, caller is responsible for this.
65
+ *
66
+ * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
67
+ * @child_bs can move to a different AioContext in this function. Callers must
68
+ * make sure that their AioContext locking is still correct after this.
69
*/
70
static BdrvChild *bdrv_attach_child_noperm(BlockDriverState *parent_bs,
71
BlockDriverState *child_bs,
72
--
130
--
73
2.41.0
131
2.48.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
2
Message-ID: <20250307221634.71951-5-kwolf@redhat.com>
3
Mark functions as coroutine_fn when they are only called by other coroutine_fns
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
and they can suspend. Change calls to co_wrappers to use the non-wrapped
5
functions, which in turn requires adding GRAPH_RDLOCK annotations.
6
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
8
Message-ID: <20230601115145.196465-8-pbonzini@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
5
---
12
block/dmg.c | 21 +++++++++++----------
6
util/aio-posix.c | 77 ++++++++++++++++++++++++++----------------------
13
1 file changed, 11 insertions(+), 10 deletions(-)
7
1 file changed, 41 insertions(+), 36 deletions(-)
14
8
15
diff --git a/block/dmg.c b/block/dmg.c
9
diff --git a/util/aio-posix.c b/util/aio-posix.c
16
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
17
--- a/block/dmg.c
11
--- a/util/aio-posix.c
18
+++ b/block/dmg.c
12
+++ b/util/aio-posix.c
19
@@ -XXX,XX +XXX,XX @@ err:
13
@@ -XXX,XX +XXX,XX @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
20
return s->n_chunks; /* error */
14
return false;
21
}
15
}
22
16
23
-static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
17
+static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
24
+static int coroutine_fn GRAPH_RDLOCK
18
+ int64_t block_ns)
25
+dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
19
+{
20
+ if (block_ns <= poll->ns) {
21
+ /* This is the sweet spot, no adjustment needed */
22
+ } else if (block_ns > ctx->poll_max_ns) {
23
+ /* We'd have to poll for too long, poll less */
24
+ int64_t old = poll->ns;
25
+
26
+ if (ctx->poll_shrink) {
27
+ poll->ns /= ctx->poll_shrink;
28
+ } else {
29
+ poll->ns = 0;
30
+ }
31
+
32
+ trace_poll_shrink(ctx, old, poll->ns);
33
+ } else if (poll->ns < ctx->poll_max_ns &&
34
+ block_ns < ctx->poll_max_ns) {
35
+ /* There is room to grow, poll longer */
36
+ int64_t old = poll->ns;
37
+ int64_t grow = ctx->poll_grow;
38
+
39
+ if (grow == 0) {
40
+ grow = 2;
41
+ }
42
+
43
+ if (poll->ns) {
44
+ poll->ns *= grow;
45
+ } else {
46
+ poll->ns = 4000; /* start polling at 4 microseconds */
47
+ }
48
+
49
+ if (poll->ns > ctx->poll_max_ns) {
50
+ poll->ns = ctx->poll_max_ns;
51
+ }
52
+
53
+ trace_poll_grow(ctx, old, poll->ns);
54
+ }
55
+}
56
+
57
bool aio_poll(AioContext *ctx, bool blocking)
26
{
58
{
27
BDRVDMGState *s = bs->opaque;
59
AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
28
60
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
29
@@ -XXX,XX +XXX,XX @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
61
/* Adjust polling time */
30
case UDZO: { /* zlib compressed */
62
if (ctx->poll_max_ns) {
31
/* we need to buffer, because only the chunk as whole can be
63
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
32
* inflated. */
64
-
33
- ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
65
- if (block_ns <= ctx->poll.ns) {
34
- s->compressed_chunk, 0);
66
- /* This is the sweet spot, no adjustment needed */
35
+ ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
67
- } else if (block_ns > ctx->poll_max_ns) {
36
+ s->compressed_chunk, 0);
68
- /* We'd have to poll for too long, poll less */
37
if (ret < 0) {
69
- int64_t old = ctx->poll.ns;
38
return -1;
70
-
39
}
71
- if (ctx->poll_shrink) {
40
@@ -XXX,XX +XXX,XX @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
72
- ctx->poll.ns /= ctx->poll_shrink;
41
}
73
- } else {
42
/* we need to buffer, because only the chunk as whole can be
74
- ctx->poll.ns = 0;
43
* inflated. */
75
- }
44
- ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
76
-
45
- s->compressed_chunk, 0);
77
- trace_poll_shrink(ctx, old, ctx->poll.ns);
46
+ ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
78
- } else if (ctx->poll.ns < ctx->poll_max_ns &&
47
+ s->compressed_chunk, 0);
79
- block_ns < ctx->poll_max_ns) {
48
if (ret < 0) {
80
- /* There is room to grow, poll longer */
49
return -1;
81
- int64_t old = ctx->poll.ns;
50
}
82
- int64_t grow = ctx->poll_grow;
51
@@ -XXX,XX +XXX,XX @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
83
-
52
}
84
- if (grow == 0) {
53
/* we need to buffer, because only the chunk as whole can be
85
- grow = 2;
54
* inflated. */
86
- }
55
- ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
87
-
56
- s->compressed_chunk, 0);
88
- if (ctx->poll.ns) {
57
+ ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
89
- ctx->poll.ns *= grow;
58
+ s->compressed_chunk, 0);
90
- } else {
59
if (ret < 0) {
91
- ctx->poll.ns = 4000; /* start polling at 4 microseconds */
60
return -1;
92
- }
61
}
93
-
62
@@ -XXX,XX +XXX,XX @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
94
- if (ctx->poll.ns > ctx->poll_max_ns) {
63
}
95
- ctx->poll.ns = ctx->poll_max_ns;
64
break;
96
- }
65
case UDRW: /* copy */
97
-
66
- ret = bdrv_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
98
- trace_poll_grow(ctx, old, ctx->poll.ns);
67
- s->uncompressed_chunk, 0);
99
- }
68
+ ret = bdrv_co_pread(bs->file, s->offsets[chunk], s->lengths[chunk],
100
+ adjust_polling_time(ctx, &ctx->poll, block_ns);
69
+ s->uncompressed_chunk, 0);
101
}
70
if (ret < 0) {
102
71
return -1;
103
progress |= aio_bh_poll(ctx);
72
}
73
@@ -XXX,XX +XXX,XX @@ static inline int dmg_read_chunk(BlockDriverState *bs, uint64_t sector_num)
74
return 0;
75
}
76
77
-static int coroutine_fn
78
+static int coroutine_fn GRAPH_RDLOCK
79
dmg_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
80
QEMUIOVector *qiov, BdrvRequestFlags flags)
81
{
82
--
104
--
83
2.41.0
105
2.48.1
diff view generated by jsdifflib
1
bdrv_root_attach_child() requires callers to hold the AioContext lock
1
Adaptive polling has a big problem: It doesn't consider that an event
2
for child_bs. Take it in block_job_add_bdrv() before calling the
2
loop can wait for many different events that may have very different
3
function.
3
typical latencies.
4
5
For example, think of a guest that tends to send a new I/O request soon
6
after the previous I/O request completes, but the storage on the host is
7
rather slow. In this case, getting the new request from guest quickly
8
means that polling is enabled, but the next thing is performing the I/O
9
request on the backend, which is slow and disables polling again for the
10
next guest request. This means that in such a scenario, polling could
11
help for every other event, but is only ever enabled when it can't
12
succeed.
13
14
In order to fix this, keep a separate AioPolledEvent for each
15
AioHandler. We will then know that the backend file descriptor always
16
has a high latency and isn't worth polling for, but we also know that
17
the guest is always fast and we should poll for it. This solves at least
18
half of the problem, we can now keep polling for those cases where it
19
makes sense and get the improved performance from it.
20
21
Since the event loop doesn't know which event will be next, we still do
22
some unnecessary polling while we're waiting for the slow disk. I made
23
some attempts to be more clever than just randomly growing and shrinking
24
the polling time, and even to let callers be explicit about when they
25
expect a new event, but so far this hasn't resulted in improved
26
performance or even caused performance regressions. For now, let's just
27
fix the part that is easy enough to fix, we can revisit the rest later.
4
28
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
29
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Message-ID: <20230605085711.21261-10-kwolf@redhat.com>
30
Message-ID: <20250307221634.71951-6-kwolf@redhat.com>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
31
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
32
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
33
---
10
blockjob.c | 17 ++++++++++++-----
34
include/block/aio.h | 1 -
11
1 file changed, 12 insertions(+), 5 deletions(-)
35
util/aio-posix.h | 1 +
36
util/aio-posix.c | 26 ++++++++++++++++++++++----
37
util/async.c | 2 --
38
4 files changed, 23 insertions(+), 7 deletions(-)
12
39
13
diff --git a/blockjob.c b/blockjob.c
40
diff --git a/include/block/aio.h b/include/block/aio.h
14
index XXXXXXX..XXXXXXX 100644
41
index XXXXXXX..XXXXXXX 100644
15
--- a/blockjob.c
42
--- a/include/block/aio.h
16
+++ b/blockjob.c
43
+++ b/include/block/aio.h
17
@@ -XXX,XX +XXX,XX @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
44
@@ -XXX,XX +XXX,XX @@ struct AioContext {
18
uint64_t perm, uint64_t shared_perm, Error **errp)
45
int poll_disable_cnt;
46
47
/* Polling mode parameters */
48
- AioPolledEvent poll;
49
int64_t poll_max_ns; /* maximum polling time in nanoseconds */
50
int64_t poll_grow; /* polling time growth factor */
51
int64_t poll_shrink; /* polling time shrink factor */
52
diff --git a/util/aio-posix.h b/util/aio-posix.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/util/aio-posix.h
55
+++ b/util/aio-posix.h
56
@@ -XXX,XX +XXX,XX @@ struct AioHandler {
57
#endif
58
int64_t poll_idle_timeout; /* when to stop userspace polling */
59
bool poll_ready; /* has polling detected an event? */
60
+ AioPolledEvent poll;
61
};
62
63
/* Add a handler to a ready list */
64
diff --git a/util/aio-posix.c b/util/aio-posix.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/util/aio-posix.c
67
+++ b/util/aio-posix.c
68
@@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list,
69
static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
70
int64_t *timeout)
19
{
71
{
20
BdrvChild *c;
72
+ AioHandler *node;
21
+ AioContext *ctx = bdrv_get_aio_context(bs);
73
int64_t max_ns;
22
bool need_context_ops;
74
23
GLOBAL_STATE_CODE();
75
if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
24
76
return false;
25
bdrv_ref(bs);
26
27
- need_context_ops = bdrv_get_aio_context(bs) != job->job.aio_context;
28
+ need_context_ops = ctx != job->job.aio_context;
29
30
- if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
31
- aio_context_release(job->job.aio_context);
32
+ if (need_context_ops) {
33
+ if (job->job.aio_context != qemu_get_aio_context()) {
34
+ aio_context_release(job->job.aio_context);
35
+ }
36
+ aio_context_acquire(ctx);
37
}
77
}
38
c = bdrv_root_attach_child(bs, name, &child_job, 0, perm, shared_perm, job,
78
39
errp);
79
- max_ns = qemu_soonest_timeout(*timeout, ctx->poll.ns);
40
- if (need_context_ops && job->job.aio_context != qemu_get_aio_context()) {
80
+ max_ns = 0;
41
- aio_context_acquire(job->job.aio_context);
81
+ QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
42
+ if (need_context_ops) {
82
+ max_ns = MAX(max_ns, node->poll.ns);
43
+ aio_context_release(ctx);
83
+ }
44
+ if (job->job.aio_context != qemu_get_aio_context()) {
84
+ max_ns = qemu_soonest_timeout(*timeout, max_ns);
45
+ aio_context_acquire(job->job.aio_context);
85
+
86
if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
87
/*
88
* Enable poll mode. It pairs with the poll_set_started() in
89
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
90
91
/* Adjust polling time */
92
if (ctx->poll_max_ns) {
93
+ AioHandler *node;
94
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
95
- adjust_polling_time(ctx, &ctx->poll, block_ns);
96
+
97
+ QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
98
+ if (QLIST_IS_INSERTED(node, node_ready)) {
99
+ adjust_polling_time(ctx, &node->poll, block_ns);
100
+ }
46
+ }
101
+ }
47
}
102
}
48
if (c == NULL) {
103
49
return -EPERM;
104
progress |= aio_bh_poll(ctx);
105
@@ -XXX,XX +XXX,XX @@ void aio_context_use_g_source(AioContext *ctx)
106
void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
107
int64_t grow, int64_t shrink, Error **errp)
108
{
109
+ AioHandler *node;
110
+
111
+ qemu_lockcnt_inc(&ctx->list_lock);
112
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
113
+ node->poll.ns = 0;
114
+ }
115
+ qemu_lockcnt_dec(&ctx->list_lock);
116
+
117
/* No thread synchronization here, it doesn't matter if an incorrect value
118
* is used once.
119
*/
120
- ctx->poll.ns = 0;
121
-
122
ctx->poll_max_ns = max_ns;
123
ctx->poll_grow = grow;
124
ctx->poll_shrink = shrink;
125
diff --git a/util/async.c b/util/async.c
126
index XXXXXXX..XXXXXXX 100644
127
--- a/util/async.c
128
+++ b/util/async.c
129
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
130
qemu_rec_mutex_init(&ctx->lock);
131
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
132
133
- ctx->poll.ns = 0;
134
-
135
ctx->poll_max_ns = 0;
136
ctx->poll_grow = 0;
137
ctx->poll_shrink = 0;
50
--
138
--
51
2.41.0
139
2.48.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
aio_dispatch_handler() adds handlers to ctx->poll_aio_handlers if
2
polling should be enabled. If we call adjust_polling_time() for all
3
polling handlers before this, new polling handlers are still left at
4
poll->ns = 0 and polling is only actually enabled after the next event.
5
Move the adjust_polling_time() call after aio_dispatch_handler().
2
6
3
bdrv_co_debug_event was recently introduced, with bdrv_debug_event
7
This fixes test-nested-aio-poll, which expects that polling becomes
4
becoming a wrapper for use in unknown context. Because most of the
8
effective the first time around.
5
time bdrv_debug_event is used on a BdrvChild via the wrapper macro
6
BLKDBG_EVENT, introduce a similar macro BLKDBG_CO_EVENT that calls
7
bdrv_co_debug_event, and switch whenever possible.
8
9
9
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Message-ID: <20230601115145.196465-13-pbonzini@redhat.com>
11
Message-ID: <20250311141912.135657-1-kwolf@redhat.com>
11
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
13
---
14
include/block/block-io.h | 7 +++++++
14
util/aio-posix.c | 28 +++++++++++++++++-----------
15
block/io.c | 4 ++--
15
1 file changed, 17 insertions(+), 11 deletions(-)
16
block/qcow.c | 24 ++++++++++++------------
17
block/qcow2-cluster.c | 12 ++++++------
18
block/qcow2-refcount.c | 4 ++--
19
block/qcow2.c | 18 +++++++++---------
20
block/qed-table.c | 6 +++---
21
block/qed.c | 8 ++++----
22
block/raw-format.c | 4 ++--
23
block/vmdk.c | 24 ++++++++++++------------
24
10 files changed, 59 insertions(+), 52 deletions(-)
25
16
26
diff --git a/include/block/block-io.h b/include/block/block-io.h
17
diff --git a/util/aio-posix.c b/util/aio-posix.c
27
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
28
--- a/include/block/block-io.h
19
--- a/util/aio-posix.c
29
+++ b/include/block/block-io.h
20
+++ b/util/aio-posix.c
30
@@ -XXX,XX +XXX,XX @@ bdrv_co_debug_event(BlockDriverState *bs, BlkdebugEvent event);
21
@@ -XXX,XX +XXX,XX @@
31
void co_wrapper_mixed_bdrv_rdlock
22
/* Stop userspace polling on a handler if it isn't active for some time */
32
bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
23
#define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
33
24
34
+#define BLKDBG_CO_EVENT(child, evt) \
25
+static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
35
+ do { \
26
+ int64_t block_ns);
36
+ if (child) { \
37
+ bdrv_co_debug_event(child->bs, evt); \
38
+ } \
39
+ } while (0)
40
+
27
+
41
#define BLKDBG_EVENT(child, evt) \
28
bool aio_poll_disabled(AioContext *ctx)
42
do { \
29
{
43
if (child) { \
30
return qatomic_read(&ctx->poll_disable_cnt);
44
diff --git a/block/io.c b/block/io.c
31
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
45
index XXXXXXX..XXXXXXX 100644
32
* scanning all handlers with aio_dispatch_handlers().
46
--- a/block/io.c
33
*/
47
+++ b/block/io.c
34
static bool aio_dispatch_ready_handlers(AioContext *ctx,
48
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
35
- AioHandlerList *ready_list)
36
+ AioHandlerList *ready_list,
37
+ int64_t block_ns)
38
{
39
bool progress = false;
40
AioHandler *node;
41
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_ready_handlers(AioContext *ctx,
42
while ((node = QLIST_FIRST(ready_list))) {
43
QLIST_REMOVE(node, node_ready);
44
progress = aio_dispatch_handler(ctx, node) || progress;
45
+
46
+ /*
47
+ * Adjust polling time only after aio_dispatch_handler(), which can
48
+ * add the handler to ctx->poll_aio_handlers.
49
+ */
50
+ if (ctx->poll_max_ns && QLIST_IS_INSERTED(node, node_poll)) {
51
+ adjust_polling_time(ctx, &node->poll, block_ns);
52
+ }
49
}
53
}
50
54
51
/* Write back cached data to the OS even with cache=unsafe */
55
return progress;
52
- BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
56
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
53
+ BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
57
bool use_notify_me;
54
if (bs->drv->bdrv_co_flush_to_os) {
58
int64_t timeout;
55
ret = bs->drv->bdrv_co_flush_to_os(bs);
59
int64_t start = 0;
56
if (ret < 0) {
60
+ int64_t block_ns = 0;
57
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
61
58
goto flush_children;
62
/*
63
* There cannot be two concurrent aio_poll calls for the same AioContext (or
64
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
65
66
aio_notify_accept(ctx);
67
68
- /* Adjust polling time */
69
+ /* Calculate blocked time for adaptive polling */
70
if (ctx->poll_max_ns) {
71
- AioHandler *node;
72
- int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
73
-
74
- QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
75
- if (QLIST_IS_INSERTED(node, node_ready)) {
76
- adjust_polling_time(ctx, &node->poll, block_ns);
77
- }
78
- }
79
+ block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
59
}
80
}
60
81
61
- BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
82
progress |= aio_bh_poll(ctx);
62
+ BLKDBG_CO_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
83
- progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
63
if (!bs->drv) {
84
+ progress |= aio_dispatch_ready_handlers(ctx, &ready_list, block_ns);
64
/* bs->drv->bdrv_co_flush() might have ejected the BDS
85
65
* (even in case of apparent success) */
86
aio_free_deleted_handlers(ctx);
66
diff --git a/block/qcow.c b/block/qcow.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/block/qcow.c
69
+++ b/block/qcow.c
70
@@ -XXX,XX +XXX,XX @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
71
/* update the L1 entry */
72
s->l1_table[l1_index] = l2_offset;
73
tmp = cpu_to_be64(l2_offset);
74
- BLKDBG_EVENT(bs->file, BLKDBG_L1_UPDATE);
75
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_L1_UPDATE);
76
ret = bdrv_co_pwrite_sync(bs->file,
77
s->l1_table_offset + l1_index * sizeof(tmp),
78
sizeof(tmp), &tmp, 0);
79
@@ -XXX,XX +XXX,XX @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
80
}
81
}
82
l2_table = s->l2_cache + (min_index << s->l2_bits);
83
- BLKDBG_EVENT(bs->file, BLKDBG_L2_LOAD);
84
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_LOAD);
85
if (new_l2_table) {
86
memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
87
ret = bdrv_co_pwrite_sync(bs->file, l2_offset,
88
@@ -XXX,XX +XXX,XX @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
89
((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1)) {
90
if (!allocate)
91
return 0;
92
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
93
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC);
94
assert(QEMU_IS_ALIGNED(n_start | n_end, BDRV_SECTOR_SIZE));
95
/* allocate a new cluster */
96
if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
97
@@ -XXX,XX +XXX,XX @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
98
}
99
cluster_offset = QEMU_ALIGN_UP(cluster_offset, s->cluster_size);
100
/* write the cluster content */
101
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
102
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
103
ret = bdrv_co_pwrite(bs->file, cluster_offset, s->cluster_size,
104
s->cluster_cache, 0);
105
if (ret < 0) {
106
@@ -XXX,XX +XXX,XX @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
107
NULL) < 0) {
108
return -EIO;
109
}
110
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
111
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
112
ret = bdrv_co_pwrite(bs->file, cluster_offset + i,
113
BDRV_SECTOR_SIZE,
114
s->cluster_data, 0);
115
@@ -XXX,XX +XXX,XX @@ get_cluster_offset(BlockDriverState *bs, uint64_t offset, int allocate,
116
tmp = cpu_to_be64(cluster_offset);
117
l2_table[l2_index] = tmp;
118
if (allocate == 2) {
119
- BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
120
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
121
} else {
122
- BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE);
123
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_UPDATE);
124
}
125
ret = bdrv_co_pwrite_sync(bs->file, l2_offset + l2_index * sizeof(tmp),
126
sizeof(tmp), &tmp, 0);
127
@@ -XXX,XX +XXX,XX @@ decompress_cluster(BlockDriverState *bs, uint64_t cluster_offset)
128
if (s->cluster_cache_offset != coffset) {
129
csize = cluster_offset >> (63 - s->cluster_bits);
130
csize &= (s->cluster_size - 1);
131
- BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
132
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
133
ret = bdrv_co_pread(bs->file, coffset, csize, s->cluster_data, 0);
134
if (ret < 0)
135
return -1;
136
@@ -XXX,XX +XXX,XX @@ qcow_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
137
/* read from the base image */
138
qemu_co_mutex_unlock(&s->lock);
139
/* qcow2 emits this on bs->file instead of bs->backing */
140
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
141
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
142
ret = bdrv_co_pread(bs->backing, offset, n, buf, 0);
143
qemu_co_mutex_lock(&s->lock);
144
if (ret < 0) {
145
@@ -XXX,XX +XXX,XX @@ qcow_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
146
break;
147
}
148
qemu_co_mutex_unlock(&s->lock);
149
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
150
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
151
ret = bdrv_co_pread(bs->file, cluster_offset + offset_in_cluster,
152
n, buf, 0);
153
qemu_co_mutex_lock(&s->lock);
154
@@ -XXX,XX +XXX,XX @@ qcow_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
155
}
156
157
qemu_co_mutex_unlock(&s->lock);
158
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
159
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
160
ret = bdrv_co_pwrite(bs->file, cluster_offset + offset_in_cluster,
161
n, buf, 0);
162
qemu_co_mutex_lock(&s->lock);
163
@@ -XXX,XX +XXX,XX @@ qcow_co_pwritev_compressed(BlockDriverState *bs, int64_t offset, int64_t bytes,
164
}
165
cluster_offset &= s->cluster_offset_mask;
166
167
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
168
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_COMPRESSED);
169
ret = bdrv_co_pwrite(bs->file, cluster_offset, out_len, out_buf, 0);
170
if (ret < 0) {
171
goto fail;
172
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
173
index XXXXXXX..XXXXXXX 100644
174
--- a/block/qcow2-cluster.c
175
+++ b/block/qcow2-cluster.c
176
@@ -XXX,XX +XXX,XX @@ int coroutine_fn qcow2_shrink_l1_table(BlockDriverState *bs,
177
fprintf(stderr, "shrink l1_table from %d to %d\n", s->l1_size, new_l1_size);
178
#endif
179
180
- BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE);
181
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_L1_SHRINK_WRITE_TABLE);
182
ret = bdrv_co_pwrite_zeroes(bs->file,
183
s->l1_table_offset + new_l1_size * L1E_SIZE,
184
(s->l1_size - new_l1_size) * L1E_SIZE, 0);
185
@@ -XXX,XX +XXX,XX @@ int coroutine_fn qcow2_shrink_l1_table(BlockDriverState *bs,
186
goto fail;
187
}
188
189
- BLKDBG_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS);
190
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_L1_SHRINK_FREE_L2_CLUSTERS);
191
for (i = s->l1_size - 1; i > new_l1_size - 1; i--) {
192
if ((s->l1_table[i] & L1E_OFFSET_MASK) == 0) {
193
continue;
194
@@ -XXX,XX +XXX,XX @@ do_perform_cow_read(BlockDriverState *bs, uint64_t src_cluster_offset,
195
return 0;
196
}
197
198
- BLKDBG_EVENT(bs->file, BLKDBG_COW_READ);
199
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_COW_READ);
200
201
if (!bs->drv) {
202
return -ENOMEDIUM;
203
@@ -XXX,XX +XXX,XX @@ do_perform_cow_write(BlockDriverState *bs, uint64_t cluster_offset,
204
return ret;
205
}
206
207
- BLKDBG_EVENT(bs->file, BLKDBG_COW_WRITE);
208
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_COW_WRITE);
209
ret = bdrv_co_pwritev(s->data_file, cluster_offset + offset_in_cluster,
210
qiov->size, qiov, 0);
211
if (ret < 0) {
212
@@ -XXX,XX +XXX,XX @@ qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset,
213
214
/* compressed clusters never have the copied flag */
215
216
- BLKDBG_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
217
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_L2_UPDATE_COMPRESSED);
218
qcow2_cache_entry_mark_dirty(s->l2_table_cache, l2_slice);
219
set_l2_entry(s, l2_slice, l2_index, cluster_offset);
220
if (has_subclusters(s)) {
221
@@ -XXX,XX +XXX,XX @@ perform_cow(BlockDriverState *bs, QCowL2Meta *m)
222
/* NOTE: we have a write_aio blkdebug event here followed by
223
* a cow_write one in do_perform_cow_write(), but there's only
224
* one single I/O operation */
225
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
226
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
227
ret = do_perform_cow_write(bs, m->alloc_offset, start->offset, &qiov);
228
} else {
229
/* If there's no guest data then write both COW regions separately */
230
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
231
index XXXXXXX..XXXXXXX 100644
232
--- a/block/qcow2-refcount.c
233
+++ b/block/qcow2-refcount.c
234
@@ -XXX,XX +XXX,XX @@ int coroutine_fn qcow2_refcount_init(BlockDriverState *bs)
235
ret = -ENOMEM;
236
goto fail;
237
}
238
- BLKDBG_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
239
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_REFTABLE_LOAD);
240
ret = bdrv_co_pread(bs->file, s->refcount_table_offset,
241
refcount_table_size2, s->refcount_table, 0);
242
if (ret < 0) {
243
@@ -XXX,XX +XXX,XX @@ int64_t coroutine_fn GRAPH_RDLOCK qcow2_alloc_bytes(BlockDriverState *bs, int si
244
size_t free_in_cluster;
245
int ret;
246
247
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
248
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_BYTES);
249
assert(size > 0 && size <= s->cluster_size);
250
assert(!s->free_byte_offset || offset_into_cluster(s, s->free_byte_offset));
251
252
diff --git a/block/qcow2.c b/block/qcow2.c
253
index XXXXXXX..XXXXXXX 100644
254
--- a/block/qcow2.c
255
+++ b/block/qcow2.c
256
@@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_encrypted(BlockDriverState *bs,
257
return -ENOMEM;
258
}
259
260
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
261
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
262
ret = bdrv_co_pread(s->data_file, host_offset, bytes, buf, 0);
263
if (ret < 0) {
264
goto fail;
265
@@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_task(BlockDriverState *bs, QCow2SubclusterType subc_type,
266
case QCOW2_SUBCLUSTER_UNALLOCATED_ALLOC:
267
assert(bs->backing); /* otherwise handled in qcow2_co_preadv_part */
268
269
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
270
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
271
return bdrv_co_preadv_part(bs->backing, offset, bytes,
272
qiov, qiov_offset, 0);
273
274
@@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_task(BlockDriverState *bs, QCow2SubclusterType subc_type,
275
offset, bytes, qiov, qiov_offset);
276
}
277
278
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
279
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
280
return bdrv_co_preadv_part(s->data_file, host_offset,
281
bytes, qiov, qiov_offset, 0);
282
283
@@ -XXX,XX +XXX,XX @@ handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta)
284
return ret;
285
}
286
287
- BLKDBG_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
288
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_CLUSTER_ALLOC_SPACE);
289
ret = bdrv_co_pwrite_zeroes(s->data_file, start_offset, nb_bytes,
290
BDRV_REQ_NO_FALLBACK);
291
if (ret < 0) {
292
@@ -XXX,XX +XXX,XX @@ int qcow2_co_pwritev_task(BlockDriverState *bs, uint64_t host_offset,
293
* guest data now.
294
*/
295
if (!merge_cow(offset, bytes, qiov, qiov_offset, l2meta)) {
296
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
297
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
298
trace_qcow2_writev_data(qemu_coroutine_self(), host_offset);
299
ret = bdrv_co_pwritev_part(s->data_file, host_offset,
300
bytes, qiov, qiov_offset, 0);
301
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed_task(BlockDriverState *bs,
302
goto fail;
303
}
304
305
- BLKDBG_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
306
+ BLKDBG_CO_EVENT(s->data_file, BLKDBG_WRITE_COMPRESSED);
307
ret = bdrv_co_pwrite(s->data_file, cluster_offset, out_len, out_buf, 0);
308
if (ret < 0) {
309
goto fail;
310
@@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs,
311
312
out_buf = qemu_blockalign(bs, s->cluster_size);
313
314
- BLKDBG_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
315
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_COMPRESSED);
316
ret = bdrv_co_pread(bs->file, coffset, csize, buf, 0);
317
if (ret < 0) {
318
goto fail;
319
@@ -XXX,XX +XXX,XX @@ qcow2_co_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
320
return offset;
321
}
322
323
- BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
324
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_VMSTATE_SAVE);
325
return bs->drv->bdrv_co_pwritev_part(bs, offset, qiov->size, qiov, 0, 0);
326
}
327
328
@@ -XXX,XX +XXX,XX @@ qcow2_co_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
329
return offset;
330
}
331
332
- BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
333
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_VMSTATE_LOAD);
334
return bs->drv->bdrv_co_preadv_part(bs, offset, qiov->size, qiov, 0, 0);
335
}
336
337
diff --git a/block/qed-table.c b/block/qed-table.c
338
index XXXXXXX..XXXXXXX 100644
339
--- a/block/qed-table.c
340
+++ b/block/qed-table.c
341
@@ -XXX,XX +XXX,XX @@ int coroutine_fn qed_read_l1_table_sync(BDRVQEDState *s)
342
int coroutine_fn qed_write_l1_table(BDRVQEDState *s, unsigned int index,
343
unsigned int n)
344
{
345
- BLKDBG_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
346
+ BLKDBG_CO_EVENT(s->bs->file, BLKDBG_L1_UPDATE);
347
return qed_write_table(s, s->header.l1_table_offset,
348
s->l1_table, index, n, false);
349
}
350
@@ -XXX,XX +XXX,XX @@ int coroutine_fn qed_read_l2_table(BDRVQEDState *s, QEDRequest *request,
351
request->l2_table = qed_alloc_l2_cache_entry(&s->l2_cache);
352
request->l2_table->table = qed_alloc_table(s);
353
354
- BLKDBG_EVENT(s->bs->file, BLKDBG_L2_LOAD);
355
+ BLKDBG_CO_EVENT(s->bs->file, BLKDBG_L2_LOAD);
356
ret = qed_read_table(s, offset, request->l2_table->table);
357
358
if (ret) {
359
@@ -XXX,XX +XXX,XX @@ int coroutine_fn qed_write_l2_table(BDRVQEDState *s, QEDRequest *request,
360
unsigned int index, unsigned int n,
361
bool flush)
362
{
363
- BLKDBG_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
364
+ BLKDBG_CO_EVENT(s->bs->file, BLKDBG_L2_UPDATE);
365
return qed_write_table(s, request->l2_table->offset,
366
request->l2_table->table, index, n, flush);
367
}
368
diff --git a/block/qed.c b/block/qed.c
369
index XXXXXXX..XXXXXXX 100644
370
--- a/block/qed.c
371
+++ b/block/qed.c
372
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn GRAPH_RDLOCK
373
qed_read_backing_file(BDRVQEDState *s, uint64_t pos, QEMUIOVector *qiov)
374
{
375
if (s->bs->backing) {
376
- BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
377
+ BLKDBG_CO_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
378
return bdrv_co_preadv(s->bs->backing, pos, qiov->size, qiov, 0);
379
}
380
qemu_iovec_memset(qiov, 0, 0, qiov->size);
381
@@ -XXX,XX +XXX,XX @@ qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, uint64_t len,
382
goto out;
383
}
384
385
- BLKDBG_EVENT(s->bs->file, BLKDBG_COW_WRITE);
386
+ BLKDBG_CO_EVENT(s->bs->file, BLKDBG_COW_WRITE);
387
ret = bdrv_co_pwritev(s->bs->file, offset, qiov.size, &qiov, 0);
388
if (ret < 0) {
389
goto out;
390
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn GRAPH_RDLOCK qed_aio_write_main(QEDAIOCB *acb)
391
392
trace_qed_aio_write_main(s, acb, 0, offset, acb->cur_qiov.size);
393
394
- BLKDBG_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
395
+ BLKDBG_CO_EVENT(s->bs->file, BLKDBG_WRITE_AIO);
396
return bdrv_co_pwritev(s->bs->file, offset, acb->cur_qiov.size,
397
&acb->cur_qiov, 0);
398
}
399
@@ -XXX,XX +XXX,XX @@ qed_aio_read_data(void *opaque, int ret, uint64_t offset, size_t len)
400
} else if (ret != QED_CLUSTER_FOUND) {
401
r = qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov);
402
} else {
403
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
404
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
405
r = bdrv_co_preadv(bs->file, offset, acb->cur_qiov.size,
406
&acb->cur_qiov, 0);
407
}
408
diff --git a/block/raw-format.c b/block/raw-format.c
409
index XXXXXXX..XXXXXXX 100644
410
--- a/block/raw-format.c
411
+++ b/block/raw-format.c
412
@@ -XXX,XX +XXX,XX @@ raw_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
413
return ret;
414
}
415
416
- BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO);
417
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_AIO);
418
return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
419
}
420
421
@@ -XXX,XX +XXX,XX @@ raw_co_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
422
goto fail;
423
}
424
425
- BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO);
426
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_WRITE_AIO);
427
ret = bdrv_co_pwritev(bs->file, offset, bytes, qiov, flags);
428
429
fail:
430
diff --git a/block/vmdk.c b/block/vmdk.c
431
index XXXXXXX..XXXXXXX 100644
432
--- a/block/vmdk.c
433
+++ b/block/vmdk.c
434
@@ -XXX,XX +XXX,XX @@ get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent,
435
if (skip_start_bytes > 0) {
436
if (copy_from_backing) {
437
/* qcow2 emits this on bs->file instead of bs->backing */
438
- BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
439
+ BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_READ);
440
ret = bdrv_co_pread(bs->backing, offset, skip_start_bytes,
441
whole_grain, 0);
442
if (ret < 0) {
443
@@ -XXX,XX +XXX,XX @@ get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent,
444
goto exit;
445
}
446
}
447
- BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE);
448
+ BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_WRITE);
449
ret = bdrv_co_pwrite(extent->file, cluster_offset, skip_start_bytes,
450
whole_grain, 0);
451
if (ret < 0) {
452
@@ -XXX,XX +XXX,XX @@ get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent,
453
if (skip_end_bytes < cluster_bytes) {
454
if (copy_from_backing) {
455
/* qcow2 emits this on bs->file instead of bs->backing */
456
- BLKDBG_EVENT(extent->file, BLKDBG_COW_READ);
457
+ BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_READ);
458
ret = bdrv_co_pread(bs->backing, offset + skip_end_bytes,
459
cluster_bytes - skip_end_bytes,
460
whole_grain + skip_end_bytes, 0);
461
@@ -XXX,XX +XXX,XX @@ get_whole_cluster(BlockDriverState *bs, VmdkExtent *extent,
462
goto exit;
463
}
464
}
465
- BLKDBG_EVENT(extent->file, BLKDBG_COW_WRITE);
466
+ BLKDBG_CO_EVENT(extent->file, BLKDBG_COW_WRITE);
467
ret = bdrv_co_pwrite(extent->file, cluster_offset + skip_end_bytes,
468
cluster_bytes - skip_end_bytes,
469
whole_grain + skip_end_bytes, 0);
470
@@ -XXX,XX +XXX,XX @@ vmdk_L2update(VmdkExtent *extent, VmdkMetaData *m_data, uint32_t offset)
471
{
472
offset = cpu_to_le32(offset);
473
/* update L2 table */
474
- BLKDBG_EVENT(extent->file, BLKDBG_L2_UPDATE);
475
+ BLKDBG_CO_EVENT(extent->file, BLKDBG_L2_UPDATE);
476
if (bdrv_co_pwrite(extent->file,
477
((int64_t)m_data->l2_offset * 512)
478
+ (m_data->l2_index * sizeof(offset)),
479
@@ -XXX,XX +XXX,XX @@ get_cluster_offset(BlockDriverState *bs, VmdkExtent *extent,
480
}
481
}
482
l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
483
- BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
484
+ BLKDBG_CO_EVENT(extent->file, BLKDBG_L2_LOAD);
485
if (bdrv_co_pread(extent->file,
486
(int64_t)l2_offset * 512,
487
l2_size_bytes,
488
@@ -XXX,XX +XXX,XX @@ vmdk_write_extent(VmdkExtent *extent, int64_t cluster_offset,
489
n_bytes = buf_len + sizeof(VmdkGrainMarker);
490
qemu_iovec_init_buf(&local_qiov, data, n_bytes);
491
492
- BLKDBG_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED);
493
+ BLKDBG_CO_EVENT(extent->file, BLKDBG_WRITE_COMPRESSED);
494
} else {
495
qemu_iovec_init(&local_qiov, qiov->niov);
496
qemu_iovec_concat(&local_qiov, qiov, qiov_offset, n_bytes);
497
498
- BLKDBG_EVENT(extent->file, BLKDBG_WRITE_AIO);
499
+ BLKDBG_CO_EVENT(extent->file, BLKDBG_WRITE_AIO);
500
}
501
502
write_offset = cluster_offset + offset_in_cluster;
503
@@ -XXX,XX +XXX,XX @@ vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
504
505
506
if (!extent->compressed) {
507
- BLKDBG_EVENT(extent->file, BLKDBG_READ_AIO);
508
+ BLKDBG_CO_EVENT(extent->file, BLKDBG_READ_AIO);
509
ret = bdrv_co_preadv(extent->file,
510
cluster_offset + offset_in_cluster, bytes,
511
qiov, 0);
512
@@ -XXX,XX +XXX,XX @@ vmdk_read_extent(VmdkExtent *extent, int64_t cluster_offset,
513
buf_bytes = cluster_bytes * 2;
514
cluster_buf = g_malloc(buf_bytes);
515
uncomp_buf = g_malloc(cluster_bytes);
516
- BLKDBG_EVENT(extent->file, BLKDBG_READ_COMPRESSED);
517
+ BLKDBG_CO_EVENT(extent->file, BLKDBG_READ_COMPRESSED);
518
ret = bdrv_co_pread(extent->file, cluster_offset, buf_bytes, cluster_buf,
519
0);
520
if (ret < 0) {
521
@@ -XXX,XX +XXX,XX @@ vmdk_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
522
qemu_iovec_concat(&local_qiov, qiov, bytes_done, n_bytes);
523
524
/* qcow2 emits this on bs->file instead of bs->backing */
525
- BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
526
+ BLKDBG_CO_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
527
ret = bdrv_co_preadv(bs->backing, offset, n_bytes,
528
&local_qiov, 0);
529
if (ret < 0) {
530
@@ -XXX,XX +XXX,XX @@ vmdk_co_check(BlockDriverState *bs, BdrvCheckResult *result, BdrvCheckMode fix)
531
BDRVVmdkState *s = bs->opaque;
532
VmdkExtent *extent = NULL;
533
int64_t sector_num = 0;
534
- int64_t total_sectors = bdrv_nb_sectors(bs);
535
+ int64_t total_sectors = bdrv_co_nb_sectors(bs);
536
int ret;
537
uint64_t cluster_offset;
538
87
539
--
88
--
540
2.41.0
89
2.48.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
Mark functions as coroutine_fn when they are only called by other coroutine_fns
3
qsd-migrate is currently only working for raw, qcow2 and qed.
4
and they can suspend. Change calls to co_wrappers to use the non-wrapped
4
Other formats are failing, e.g. because they don't support migration.
5
functions, which in turn requires adding GRAPH_RDLOCK annotations.
5
Thus let's limit this test to the three usable formats now.
6
6
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Suggested-by: Kevin Wolf <kwolf@redhat.com>
8
Message-ID: <20230601115145.196465-11-pbonzini@redhat.com>
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
9
Message-ID: <20250224214058.205889-1-thuth@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
12
---
12
block/qcow2.h | 33 +++++------
13
tests/qemu-iotests/tests/qsd-migrate | 2 +-
13
block/qcow2-bitmap.c | 26 +++++----
14
1 file changed, 1 insertion(+), 1 deletion(-)
14
block/qcow2-cluster.c | 12 ++--
15
block/qcow2-refcount.c | 130 +++++++++++++++++++++--------------------
16
block/qcow2.c | 2 +-
17
5 files changed, 105 insertions(+), 98 deletions(-)
18
15
19
diff --git a/block/qcow2.h b/block/qcow2.h
16
diff --git a/tests/qemu-iotests/tests/qsd-migrate b/tests/qemu-iotests/tests/qsd-migrate
20
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100755
21
--- a/block/qcow2.h
18
--- a/tests/qemu-iotests/tests/qsd-migrate
22
+++ b/block/qcow2.h
19
+++ b/tests/qemu-iotests/tests/qsd-migrate
23
@@ -XXX,XX +XXX,XX @@ int64_t qcow2_refcount_metadata_size(int64_t clusters, size_t cluster_size,
20
@@ -XXX,XX +XXX,XX @@ import iotests
24
21
25
int qcow2_mark_dirty(BlockDriverState *bs);
22
from iotests import filter_qemu_io, filter_qtest
26
int qcow2_mark_corrupt(BlockDriverState *bs);
23
27
-int qcow2_mark_consistent(BlockDriverState *bs);
24
-iotests.script_initialize(supported_fmts=['generic'],
28
int qcow2_update_header(BlockDriverState *bs);
25
+iotests.script_initialize(supported_fmts=['qcow2', 'qed', 'raw'],
29
26
supported_protocols=['file'],
30
void qcow2_signal_corruption(BlockDriverState *bs, bool fatal, int64_t offset,
27
supported_platforms=['linux'])
31
@@ -XXX,XX +XXX,XX @@ int64_t qcow2_refcount_area(BlockDriverState *bs, uint64_t offset,
32
int64_t qcow2_alloc_clusters(BlockDriverState *bs, uint64_t size);
33
int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offset,
34
int64_t nb_clusters);
35
-int64_t coroutine_fn qcow2_alloc_bytes(BlockDriverState *bs, int size);
36
+int64_t coroutine_fn GRAPH_RDLOCK qcow2_alloc_bytes(BlockDriverState *bs, int size);
37
void qcow2_free_clusters(BlockDriverState *bs,
38
int64_t offset, int64_t size,
39
enum qcow2_discard_type type);
40
@@ -XXX,XX +XXX,XX @@ int qcow2_update_snapshot_refcount(BlockDriverState *bs,
41
42
int qcow2_flush_caches(BlockDriverState *bs);
43
int qcow2_write_caches(BlockDriverState *bs);
44
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
45
- BdrvCheckMode fix);
46
+int coroutine_fn qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
47
+ BdrvCheckMode fix);
48
49
void qcow2_process_discards(BlockDriverState *bs, int ret);
50
51
@@ -XXX,XX +XXX,XX @@ int qcow2_check_metadata_overlap(BlockDriverState *bs, int ign, int64_t offset,
52
int64_t size);
53
int qcow2_pre_write_overlap_check(BlockDriverState *bs, int ign, int64_t offset,
54
int64_t size, bool data_file);
55
-int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
56
- void **refcount_table,
57
- int64_t *refcount_table_size,
58
- int64_t offset, int64_t size);
59
+int coroutine_fn qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
60
+ void **refcount_table,
61
+ int64_t *refcount_table_size,
62
+ int64_t offset, int64_t size);
63
64
int qcow2_change_refcount_order(BlockDriverState *bs, int refcount_order,
65
BlockDriverAmendStatusCB *status_cb,
66
@@ -XXX,XX +XXX,XX @@ int qcow2_get_host_offset(BlockDriverState *bs, uint64_t offset,
67
int coroutine_fn qcow2_alloc_host_offset(BlockDriverState *bs, uint64_t offset,
68
unsigned int *bytes,
69
uint64_t *host_offset, QCowL2Meta **m);
70
-int coroutine_fn qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
71
- uint64_t offset,
72
- int compressed_size,
73
- uint64_t *host_offset);
74
+int coroutine_fn GRAPH_RDLOCK
75
+qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset,
76
+ int compressed_size, uint64_t *host_offset);
77
void qcow2_parse_compressed_l2_entry(BlockDriverState *bs, uint64_t l2_entry,
78
uint64_t *coffset, int *csize);
79
80
@@ -XXX,XX +XXX,XX @@ void *qcow2_cache_is_table_offset(Qcow2Cache *c, uint64_t offset);
81
void qcow2_cache_discard(Qcow2Cache *c, void *table);
82
83
/* qcow2-bitmap.c functions */
84
-int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
85
- void **refcount_table,
86
- int64_t *refcount_table_size);
87
-bool coroutine_fn qcow2_load_dirty_bitmaps(BlockDriverState *bs,
88
- bool *header_updated, Error **errp);
89
+int coroutine_fn
90
+qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
91
+ void **refcount_table,
92
+ int64_t *refcount_table_size);
93
+bool coroutine_fn GRAPH_RDLOCK
94
+qcow2_load_dirty_bitmaps(BlockDriverState *bs, bool *header_updated, Error **errp);
95
bool qcow2_get_bitmap_info_list(BlockDriverState *bs,
96
Qcow2BitmapInfoList **info_list, Error **errp);
97
int qcow2_reopen_bitmaps_rw(BlockDriverState *bs, Error **errp);
98
diff --git a/block/qcow2-bitmap.c b/block/qcow2-bitmap.c
99
index XXXXXXX..XXXXXXX 100644
100
--- a/block/qcow2-bitmap.c
101
+++ b/block/qcow2-bitmap.c
102
@@ -XXX,XX +XXX,XX @@ static int free_bitmap_clusters(BlockDriverState *bs, Qcow2BitmapTable *tb)
103
/* load_bitmap_data
104
* @bitmap_table entries must satisfy specification constraints.
105
* @bitmap must be cleared */
106
-static int load_bitmap_data(BlockDriverState *bs,
107
- const uint64_t *bitmap_table,
108
- uint32_t bitmap_table_size,
109
- BdrvDirtyBitmap *bitmap)
110
+static int coroutine_fn GRAPH_RDLOCK
111
+load_bitmap_data(BlockDriverState *bs, const uint64_t *bitmap_table,
112
+ uint32_t bitmap_table_size, BdrvDirtyBitmap *bitmap)
113
{
114
int ret = 0;
115
BDRVQcow2State *s = bs->opaque;
116
@@ -XXX,XX +XXX,XX @@ static int load_bitmap_data(BlockDriverState *bs,
117
* already cleared */
118
}
119
} else {
120
- ret = bdrv_pread(bs->file, data_offset, s->cluster_size, buf, 0);
121
+ ret = bdrv_co_pread(bs->file, data_offset, s->cluster_size, buf, 0);
122
if (ret < 0) {
123
goto finish;
124
}
125
@@ -XXX,XX +XXX,XX @@ finish:
126
return ret;
127
}
128
129
-static BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs,
130
- Qcow2Bitmap *bm, Error **errp)
131
+static coroutine_fn GRAPH_RDLOCK
132
+BdrvDirtyBitmap *load_bitmap(BlockDriverState *bs,
133
+ Qcow2Bitmap *bm, Error **errp)
134
{
135
int ret;
136
uint64_t *bitmap_table = NULL;
137
@@ -XXX,XX +XXX,XX @@ fail:
138
return NULL;
139
}
140
141
-int qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
142
- void **refcount_table,
143
- int64_t *refcount_table_size)
144
+int coroutine_fn
145
+qcow2_check_bitmaps_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
146
+ void **refcount_table,
147
+ int64_t *refcount_table_size)
148
{
149
int ret;
150
BDRVQcow2State *s = bs->opaque;
151
@@ -XXX,XX +XXX,XX @@ static void set_readonly_helper(gpointer bitmap, gpointer value)
152
* If header_updated is not NULL then it is set appropriately regardless of
153
* the return value.
154
*/
155
-bool coroutine_fn qcow2_load_dirty_bitmaps(BlockDriverState *bs,
156
- bool *header_updated, Error **errp)
157
+bool coroutine_fn GRAPH_RDLOCK
158
+qcow2_load_dirty_bitmaps(BlockDriverState *bs,
159
+ bool *header_updated, Error **errp)
160
{
161
BDRVQcow2State *s = bs->opaque;
162
Qcow2BitmapList *bm_list;
163
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
164
index XXXXXXX..XXXXXXX 100644
165
--- a/block/qcow2-cluster.c
166
+++ b/block/qcow2-cluster.c
167
@@ -XXX,XX +XXX,XX @@ static int get_cluster_table(BlockDriverState *bs, uint64_t offset,
168
*
169
* Return 0 on success and -errno in error cases
170
*/
171
-int coroutine_fn qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs,
172
- uint64_t offset,
173
- int compressed_size,
174
- uint64_t *host_offset)
175
+int coroutine_fn GRAPH_RDLOCK
176
+qcow2_alloc_compressed_cluster_offset(BlockDriverState *bs, uint64_t offset,
177
+ int compressed_size, uint64_t *host_offset)
178
{
179
BDRVQcow2State *s = bs->opaque;
180
int l2_index, ret;
181
@@ -XXX,XX +XXX,XX @@ fail:
182
* all clusters in the same L2 slice) and returns the number of zeroed
183
* clusters.
184
*/
185
-static int zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
186
- uint64_t nb_clusters, int flags)
187
+static int coroutine_fn
188
+zero_in_l2_slice(BlockDriverState *bs, uint64_t offset,
189
+ uint64_t nb_clusters, int flags)
190
{
191
BDRVQcow2State *s = bs->opaque;
192
uint64_t *l2_slice;
193
diff --git a/block/qcow2-refcount.c b/block/qcow2-refcount.c
194
index XXXXXXX..XXXXXXX 100644
195
--- a/block/qcow2-refcount.c
196
+++ b/block/qcow2-refcount.c
197
@@ -XXX,XX +XXX,XX @@ int64_t coroutine_fn qcow2_alloc_clusters_at(BlockDriverState *bs, uint64_t offs
198
199
/* only used to allocate compressed sectors. We try to allocate
200
contiguous sectors. size must be <= cluster_size */
201
-int64_t coroutine_fn qcow2_alloc_bytes(BlockDriverState *bs, int size)
202
+int64_t coroutine_fn GRAPH_RDLOCK qcow2_alloc_bytes(BlockDriverState *bs, int size)
203
{
204
BDRVQcow2State *s = bs->opaque;
205
int64_t offset;
206
@@ -XXX,XX +XXX,XX @@ static int realloc_refcount_array(BDRVQcow2State *s, void **array,
207
*
208
* Modifies the number of errors in res.
209
*/
210
-int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
211
- void **refcount_table,
212
- int64_t *refcount_table_size,
213
- int64_t offset, int64_t size)
214
+int coroutine_fn GRAPH_RDLOCK
215
+qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
216
+ void **refcount_table,
217
+ int64_t *refcount_table_size,
218
+ int64_t offset, int64_t size)
219
{
220
BDRVQcow2State *s = bs->opaque;
221
uint64_t start, last, cluster_offset, k, refcount;
222
@@ -XXX,XX +XXX,XX @@ int qcow2_inc_refcounts_imrt(BlockDriverState *bs, BdrvCheckResult *res,
223
return 0;
224
}
225
226
- file_len = bdrv_getlength(bs->file->bs);
227
+ file_len = bdrv_co_getlength(bs->file->bs);
228
if (file_len < 0) {
229
return file_len;
230
}
231
@@ -XXX,XX +XXX,XX @@ enum {
232
*
233
* On failure in-memory @l2_table may be modified.
234
*/
235
-static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
236
- uint64_t l2_offset,
237
- uint64_t *l2_table, int l2_index, bool active,
238
- bool *metadata_overlap)
239
+static int coroutine_fn GRAPH_RDLOCK
240
+fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
241
+ uint64_t l2_offset, uint64_t *l2_table,
242
+ int l2_index, bool active,
243
+ bool *metadata_overlap)
244
{
245
BDRVQcow2State *s = bs->opaque;
246
int ret;
247
@@ -XXX,XX +XXX,XX @@ static int fix_l2_entry_by_zero(BlockDriverState *bs, BdrvCheckResult *res,
248
goto fail;
249
}
250
251
- ret = bdrv_pwrite_sync(bs->file, l2e_offset, l2_entry_size(s),
252
- &l2_table[idx], 0);
253
+ ret = bdrv_co_pwrite_sync(bs->file, l2e_offset, l2_entry_size(s),
254
+ &l2_table[idx], 0);
255
if (ret < 0) {
256
fprintf(stderr, "ERROR: Failed to overwrite L2 "
257
"table entry: %s\n", strerror(-ret));
258
@@ -XXX,XX +XXX,XX @@ fail:
259
* Returns the number of errors found by the checks or -errno if an internal
260
* error occurred.
261
*/
262
-static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
263
- void **refcount_table,
264
- int64_t *refcount_table_size, int64_t l2_offset,
265
- int flags, BdrvCheckMode fix, bool active)
266
+static int coroutine_fn GRAPH_RDLOCK
267
+check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
268
+ void **refcount_table,
269
+ int64_t *refcount_table_size, int64_t l2_offset,
270
+ int flags, BdrvCheckMode fix, bool active)
271
{
272
BDRVQcow2State *s = bs->opaque;
273
uint64_t l2_entry, l2_bitmap;
274
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
275
bool metadata_overlap;
276
277
/* Read L2 table from disk */
278
- ret = bdrv_pread(bs->file, l2_offset, l2_size_bytes, l2_table, 0);
279
+ ret = bdrv_co_pread(bs->file, l2_offset, l2_size_bytes, l2_table, 0);
280
if (ret < 0) {
281
fprintf(stderr, "ERROR: I/O error in check_refcounts_l2\n");
282
res->check_errors++;
283
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l2(BlockDriverState *bs, BdrvCheckResult *res,
284
* Returns the number of errors found by the checks or -errno if an internal
285
* error occurred.
286
*/
287
-static int check_refcounts_l1(BlockDriverState *bs,
288
- BdrvCheckResult *res,
289
- void **refcount_table,
290
- int64_t *refcount_table_size,
291
- int64_t l1_table_offset, int l1_size,
292
- int flags, BdrvCheckMode fix, bool active)
293
+static int coroutine_fn GRAPH_RDLOCK
294
+check_refcounts_l1(BlockDriverState *bs, BdrvCheckResult *res,
295
+ void **refcount_table, int64_t *refcount_table_size,
296
+ int64_t l1_table_offset, int l1_size,
297
+ int flags, BdrvCheckMode fix, bool active)
298
{
299
BDRVQcow2State *s = bs->opaque;
300
size_t l1_size_bytes = l1_size * L1E_SIZE;
301
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l1(BlockDriverState *bs,
302
}
303
304
/* Read L1 table entries from disk */
305
- ret = bdrv_pread(bs->file, l1_table_offset, l1_size_bytes, l1_table, 0);
306
+ ret = bdrv_co_pread(bs->file, l1_table_offset, l1_size_bytes, l1_table, 0);
307
if (ret < 0) {
308
fprintf(stderr, "ERROR: I/O error in check_refcounts_l1\n");
309
res->check_errors++;
310
@@ -XXX,XX +XXX,XX @@ static int check_refcounts_l1(BlockDriverState *bs,
311
* have been already detected and sufficiently signaled by the calling function
312
* (qcow2_check_refcounts) by the time this function is called).
313
*/
314
-static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
315
- BdrvCheckMode fix)
316
+static int coroutine_fn GRAPH_RDLOCK
317
+check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
318
{
319
BDRVQcow2State *s = bs->opaque;
320
uint64_t *l2_table = qemu_blockalign(bs, s->cluster_size);
321
@@ -XXX,XX +XXX,XX @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
322
}
323
}
324
325
- ret = bdrv_pread(bs->file, l2_offset, s->l2_size * l2_entry_size(s),
326
- l2_table, 0);
327
+ ret = bdrv_co_pread(bs->file, l2_offset, s->l2_size * l2_entry_size(s),
328
+ l2_table, 0);
329
if (ret < 0) {
330
fprintf(stderr, "ERROR: Could not read L2 table: %s\n",
331
strerror(-ret));
332
@@ -XXX,XX +XXX,XX @@ static int check_oflag_copied(BlockDriverState *bs, BdrvCheckResult *res,
333
goto fail;
334
}
335
336
- ret = bdrv_pwrite(bs->file, l2_offset, s->cluster_size, l2_table,
337
- 0);
338
+ ret = bdrv_co_pwrite(bs->file, l2_offset, s->cluster_size, l2_table, 0);
339
if (ret < 0) {
340
fprintf(stderr, "ERROR: Could not write L2 table: %s\n",
341
strerror(-ret));
342
@@ -XXX,XX +XXX,XX @@ fail:
343
* Checks consistency of refblocks and accounts for each refblock in
344
* *refcount_table.
345
*/
346
-static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
347
- BdrvCheckMode fix, bool *rebuild,
348
- void **refcount_table, int64_t *nb_clusters)
349
+static int coroutine_fn GRAPH_RDLOCK
350
+check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
351
+ BdrvCheckMode fix, bool *rebuild,
352
+ void **refcount_table, int64_t *nb_clusters)
353
{
354
BDRVQcow2State *s = bs->opaque;
355
int64_t i, size;
356
@@ -XXX,XX +XXX,XX @@ static int check_refblocks(BlockDriverState *bs, BdrvCheckResult *res,
357
goto resize_fail;
358
}
359
360
- ret = bdrv_truncate(bs->file, offset + s->cluster_size, false,
361
- PREALLOC_MODE_OFF, 0, &local_err);
362
+ ret = bdrv_co_truncate(bs->file, offset + s->cluster_size, false,
363
+ PREALLOC_MODE_OFF, 0, &local_err);
364
if (ret < 0) {
365
error_report_err(local_err);
366
goto resize_fail;
367
}
368
- size = bdrv_getlength(bs->file->bs);
369
+ size = bdrv_co_getlength(bs->file->bs);
370
if (size < 0) {
371
ret = size;
372
goto resize_fail;
373
@@ -XXX,XX +XXX,XX @@ resize_fail:
374
/*
375
* Calculates an in-memory refcount table.
376
*/
377
-static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
378
- BdrvCheckMode fix, bool *rebuild,
379
- void **refcount_table, int64_t *nb_clusters)
380
+static int coroutine_fn GRAPH_RDLOCK
381
+calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
382
+ BdrvCheckMode fix, bool *rebuild,
383
+ void **refcount_table, int64_t *nb_clusters)
384
{
385
BDRVQcow2State *s = bs->opaque;
386
int64_t i;
387
@@ -XXX,XX +XXX,XX @@ static int calculate_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
388
* Compares the actual reference count for each cluster in the image against the
389
* refcount as reported by the refcount structures on-disk.
390
*/
391
-static void compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
392
- BdrvCheckMode fix, bool *rebuild,
393
- int64_t *highest_cluster,
394
- void *refcount_table, int64_t nb_clusters)
395
+static void coroutine_fn
396
+compare_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
397
+ BdrvCheckMode fix, bool *rebuild,
398
+ int64_t *highest_cluster,
399
+ void *refcount_table, int64_t nb_clusters)
400
{
401
BDRVQcow2State *s = bs->opaque;
402
int64_t i;
403
@@ -XXX,XX +XXX,XX @@ static int64_t alloc_clusters_imrt(BlockDriverState *bs,
404
* Return whether the on-disk reftable array was resized (true/false),
405
* or -errno on error.
406
*/
407
-static int rebuild_refcounts_write_refblocks(
408
+static int coroutine_fn GRAPH_RDLOCK
409
+rebuild_refcounts_write_refblocks(
410
BlockDriverState *bs, void **refcount_table, int64_t *nb_clusters,
411
int64_t first_cluster, int64_t end_cluster,
412
uint64_t **on_disk_reftable_ptr, uint32_t *on_disk_reftable_entries_ptr,
413
@@ -XXX,XX +XXX,XX @@ static int rebuild_refcounts_write_refblocks(
414
on_disk_refblock = (void *)((char *) *refcount_table +
415
refblock_index * s->cluster_size);
416
417
- ret = bdrv_pwrite(bs->file, refblock_offset, s->cluster_size,
418
- on_disk_refblock, 0);
419
+ ret = bdrv_co_pwrite(bs->file, refblock_offset, s->cluster_size,
420
+ on_disk_refblock, 0);
421
if (ret < 0) {
422
error_setg_errno(errp, -ret, "ERROR writing refblock");
423
return ret;
424
@@ -XXX,XX +XXX,XX @@ static int rebuild_refcounts_write_refblocks(
425
* On success, the old refcount structure is leaked (it will be covered by the
426
* new refcount structure).
427
*/
428
-static int rebuild_refcount_structure(BlockDriverState *bs,
429
- BdrvCheckResult *res,
430
- void **refcount_table,
431
- int64_t *nb_clusters,
432
- Error **errp)
433
+static int coroutine_fn GRAPH_RDLOCK
434
+rebuild_refcount_structure(BlockDriverState *bs, BdrvCheckResult *res,
435
+ void **refcount_table, int64_t *nb_clusters,
436
+ Error **errp)
437
{
438
BDRVQcow2State *s = bs->opaque;
439
int64_t reftable_offset = -1;
440
@@ -XXX,XX +XXX,XX @@ static int rebuild_refcount_structure(BlockDriverState *bs,
441
}
442
443
assert(reftable_length < INT_MAX);
444
- ret = bdrv_pwrite(bs->file, reftable_offset, reftable_length,
445
- on_disk_reftable, 0);
446
+ ret = bdrv_co_pwrite(bs->file, reftable_offset, reftable_length,
447
+ on_disk_reftable, 0);
448
if (ret < 0) {
449
error_setg_errno(errp, -ret, "ERROR writing reftable");
450
goto fail;
451
@@ -XXX,XX +XXX,XX @@ static int rebuild_refcount_structure(BlockDriverState *bs,
452
reftable_offset_and_clusters.reftable_offset = cpu_to_be64(reftable_offset);
453
reftable_offset_and_clusters.reftable_clusters =
454
cpu_to_be32(reftable_clusters);
455
- ret = bdrv_pwrite_sync(bs->file,
456
- offsetof(QCowHeader, refcount_table_offset),
457
- sizeof(reftable_offset_and_clusters),
458
- &reftable_offset_and_clusters, 0);
459
+ ret = bdrv_co_pwrite_sync(bs->file,
460
+ offsetof(QCowHeader, refcount_table_offset),
461
+ sizeof(reftable_offset_and_clusters),
462
+ &reftable_offset_and_clusters, 0);
463
if (ret < 0) {
464
error_setg_errno(errp, -ret, "ERROR setting reftable");
465
goto fail;
466
@@ -XXX,XX +XXX,XX @@ fail:
467
* Returns 0 if no errors are found, the number of errors in case the image is
468
* detected as corrupted, and -errno when an internal error occurred.
469
*/
470
-int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
471
- BdrvCheckMode fix)
472
+int coroutine_fn GRAPH_RDLOCK
473
+qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix)
474
{
475
BDRVQcow2State *s = bs->opaque;
476
BdrvCheckResult pre_compare_res;
477
@@ -XXX,XX +XXX,XX @@ int qcow2_check_refcounts(BlockDriverState *bs, BdrvCheckResult *res,
478
bool rebuild = false;
479
int ret;
480
481
- size = bdrv_getlength(bs->file->bs);
482
+ size = bdrv_co_getlength(bs->file->bs);
483
if (size < 0) {
484
res->check_errors++;
485
return size;
486
@@ -XXX,XX +XXX,XX @@ done:
487
return ret;
488
}
489
490
-static int64_t get_refblock_offset(BlockDriverState *bs, uint64_t offset)
491
+static int64_t coroutine_fn get_refblock_offset(BlockDriverState *bs,
492
+ uint64_t offset)
493
{
494
BDRVQcow2State *s = bs->opaque;
495
uint32_t index = offset_to_reftable_index(s, offset);
496
@@ -XXX,XX +XXX,XX @@ int64_t coroutine_fn qcow2_get_last_cluster(BlockDriverState *bs, int64_t size)
497
return -EIO;
498
}
499
500
-int coroutine_fn qcow2_detect_metadata_preallocation(BlockDriverState *bs)
501
+int coroutine_fn GRAPH_RDLOCK
502
+qcow2_detect_metadata_preallocation(BlockDriverState *bs)
503
{
504
BDRVQcow2State *s = bs->opaque;
505
int64_t i, end_cluster, cluster_count = 0, threshold;
506
diff --git a/block/qcow2.c b/block/qcow2.c
507
index XXXXXXX..XXXXXXX 100644
508
--- a/block/qcow2.c
509
+++ b/block/qcow2.c
510
@@ -XXX,XX +XXX,XX @@ int qcow2_mark_corrupt(BlockDriverState *bs)
511
* Marks the image as consistent, i.e., unsets the corrupt bit, and flushes
512
* before if necessary.
513
*/
514
-int qcow2_mark_consistent(BlockDriverState *bs)
515
+static int coroutine_fn qcow2_mark_consistent(BlockDriverState *bs)
516
{
517
BDRVQcow2State *s = bs->opaque;
518
28
519
--
29
--
520
2.41.0
30
2.48.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Mark functions as coroutine_fn when they are only called by other coroutine_fns
3
Commit 71544d30a6f8 ("scsi: push request restart to SCSIDevice") removed
4
and they can suspend. Because this function operates on a BlockBackend, mark it
4
the only user of SCSIDiskState->bh.
5
GRAPH_UNLOCKED.
6
5
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-ID: <20230601115145.196465-6-pbonzini@redhat.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
Message-ID: <20250311132616.1049687-2-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
11
---
12
block.c | 11 ++++++-----
12
hw/scsi/scsi-disk.c | 1 -
13
1 file changed, 6 insertions(+), 5 deletions(-)
13
1 file changed, 1 deletion(-)
14
14
15
diff --git a/block.c b/block.c
15
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
16
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block.c
17
--- a/hw/scsi/scsi-disk.c
18
+++ b/block.c
18
+++ b/hw/scsi/scsi-disk.c
19
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_create(BlockDriver *drv, const char *filename,
19
@@ -XXX,XX +XXX,XX @@ struct SCSIDiskState {
20
* On success, return @blk's actual length.
20
uint64_t max_unmap_size;
21
* Otherwise, return -errno.
21
uint64_t max_io_size;
22
*/
22
uint32_t quirks;
23
-static int64_t create_file_fallback_truncate(BlockBackend *blk,
23
- QEMUBH *bh;
24
- int64_t minimum_size, Error **errp)
24
char *version;
25
+static int64_t coroutine_fn GRAPH_UNLOCKED
25
char *serial;
26
+create_file_fallback_truncate(BlockBackend *blk, int64_t minimum_size,
26
char *vendor;
27
+ Error **errp)
28
{
29
Error *local_err = NULL;
30
int64_t size;
31
@@ -XXX,XX +XXX,XX @@ static int64_t create_file_fallback_truncate(BlockBackend *blk,
32
33
GLOBAL_STATE_CODE();
34
35
- ret = blk_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
36
- &local_err);
37
+ ret = blk_co_truncate(blk, minimum_size, false, PREALLOC_MODE_OFF, 0,
38
+ &local_err);
39
if (ret < 0 && ret != -ENOTSUP) {
40
error_propagate(errp, local_err);
41
return ret;
42
}
43
44
- size = blk_getlength(blk);
45
+ size = blk_co_getlength(blk);
46
if (size < 0) {
47
error_free(local_err);
48
error_setg_errno(errp, -size,
49
--
27
--
50
2.41.0
28
2.48.1
29
30
diff view generated by jsdifflib
1
bdrv_set_backing() requires the caller to hold the AioContext lock for
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
@backing_hd. Take it in bdrv_open_backing_file() before calling the
3
function.
4
2
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
In the past a single AioContext was used for block I/O and it was
6
Message-ID: <20230605085711.21261-9-kwolf@redhat.com>
4
fetched using blk_get_aio_context(). Nowadays the block layer supports
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
running I/O from any AioContext and multiple AioContexts at the same
6
time. Remove the dma_blk_io() AioContext argument and use the current
7
AioContext instead.
8
9
This makes calling the function easier and enables multiple IOThreads to
10
use dma_blk_io() concurrently for the same block device.
11
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
14
Message-ID: <20250311132616.1049687-3-stefanha@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
16
---
10
block.c | 5 +++++
17
include/system/dma.h | 3 +--
11
1 file changed, 5 insertions(+)
18
hw/ide/core.c | 3 +--
19
hw/ide/macio.c | 3 +--
20
hw/scsi/scsi-disk.c | 6 ++----
21
system/dma-helpers.c | 8 ++++----
22
5 files changed, 9 insertions(+), 14 deletions(-)
12
23
13
diff --git a/block.c b/block.c
24
diff --git a/include/system/dma.h b/include/system/dma.h
14
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
15
--- a/block.c
26
--- a/include/system/dma.h
16
+++ b/block.c
27
+++ b/include/system/dma.h
17
@@ -XXX,XX +XXX,XX @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
28
@@ -XXX,XX +XXX,XX @@ typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov,
18
int ret = 0;
29
BlockCompletionFunc *cb, void *cb_opaque,
19
bool implicit_backing = false;
30
void *opaque);
20
BlockDriverState *backing_hd;
31
21
+ AioContext *backing_hd_ctx;
32
-BlockAIOCB *dma_blk_io(AioContext *ctx,
22
QDict *options;
33
- QEMUSGList *sg, uint64_t offset, uint32_t align,
23
QDict *tmp_parent_options = NULL;
34
+BlockAIOCB *dma_blk_io(QEMUSGList *sg, uint64_t offset, uint32_t align,
24
Error *local_err = NULL;
35
DMAIOFunc *io_func, void *io_func_opaque,
25
@@ -XXX,XX +XXX,XX @@ int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
36
BlockCompletionFunc *cb, void *opaque, DMADirection dir);
26
37
BlockAIOCB *dma_blk_read(BlockBackend *blk,
27
/* Hook up the backing file link; drop our reference, bs owns the
38
diff --git a/hw/ide/core.c b/hw/ide/core.c
28
* backing_hd reference now */
39
index XXXXXXX..XXXXXXX 100644
29
+ backing_hd_ctx = bdrv_get_aio_context(backing_hd);
40
--- a/hw/ide/core.c
30
+ aio_context_acquire(backing_hd_ctx);
41
+++ b/hw/ide/core.c
31
ret = bdrv_set_backing_hd(bs, backing_hd, errp);
42
@@ -XXX,XX +XXX,XX @@ static void ide_dma_cb(void *opaque, int ret)
32
bdrv_unref(backing_hd);
43
BDRV_SECTOR_SIZE, ide_dma_cb, s);
33
+ aio_context_release(backing_hd_ctx);
44
break;
34
+
45
case IDE_DMA_TRIM:
35
if (ret < 0) {
46
- s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk),
36
goto free_exit;
47
- &s->sg, offset, BDRV_SECTOR_SIZE,
37
}
48
+ s->bus->dma->aiocb = dma_blk_io(&s->sg, offset, BDRV_SECTOR_SIZE,
49
ide_issue_trim, s, ide_dma_cb, s,
50
DMA_DIRECTION_TO_DEVICE);
51
break;
52
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/hw/ide/macio.c
55
+++ b/hw/ide/macio.c
56
@@ -XXX,XX +XXX,XX @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
57
pmac_ide_transfer_cb, io);
58
break;
59
case IDE_DMA_TRIM:
60
- s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk), &s->sg,
61
- offset, 0x1, ide_issue_trim, s,
62
+ s->bus->dma->aiocb = dma_blk_io(&s->sg, offset, 0x1, ide_issue_trim, s,
63
pmac_ide_transfer_cb, io,
64
DMA_DIRECTION_TO_DEVICE);
65
break;
66
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/hw/scsi/scsi-disk.c
69
+++ b/hw/scsi/scsi-disk.c
70
@@ -XXX,XX +XXX,XX @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
71
if (r->req.sg) {
72
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
73
r->req.residual -= r->req.sg->size;
74
- r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
75
- r->req.sg, r->sector << BDRV_SECTOR_BITS,
76
+ r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
77
BDRV_SECTOR_SIZE,
78
sdc->dma_readv, r, scsi_dma_complete, r,
79
DMA_DIRECTION_FROM_DEVICE);
80
@@ -XXX,XX +XXX,XX @@ static void scsi_write_data(SCSIRequest *req)
81
if (r->req.sg) {
82
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
83
r->req.residual -= r->req.sg->size;
84
- r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
85
- r->req.sg, r->sector << BDRV_SECTOR_BITS,
86
+ r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
87
BDRV_SECTOR_SIZE,
88
sdc->dma_writev, r, scsi_dma_complete, r,
89
DMA_DIRECTION_TO_DEVICE);
90
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/system/dma-helpers.c
93
+++ b/system/dma-helpers.c
94
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo dma_aiocb_info = {
95
.cancel_async = dma_aio_cancel,
96
};
97
98
-BlockAIOCB *dma_blk_io(AioContext *ctx,
99
+BlockAIOCB *dma_blk_io(
100
QEMUSGList *sg, uint64_t offset, uint32_t align,
101
DMAIOFunc *io_func, void *io_func_opaque,
102
BlockCompletionFunc *cb,
103
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *dma_blk_io(AioContext *ctx,
104
105
dbs->acb = NULL;
106
dbs->sg = sg;
107
- dbs->ctx = ctx;
108
+ dbs->ctx = qemu_get_current_aio_context();
109
dbs->offset = offset;
110
dbs->align = align;
111
dbs->sg_cur_index = 0;
112
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *dma_blk_read(BlockBackend *blk,
113
QEMUSGList *sg, uint64_t offset, uint32_t align,
114
void (*cb)(void *opaque, int ret), void *opaque)
115
{
116
- return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
117
+ return dma_blk_io(sg, offset, align,
118
dma_blk_read_io_func, blk, cb, opaque,
119
DMA_DIRECTION_FROM_DEVICE);
120
}
121
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *dma_blk_write(BlockBackend *blk,
122
QEMUSGList *sg, uint64_t offset, uint32_t align,
123
void (*cb)(void *opaque, int ret), void *opaque)
124
{
125
- return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
126
+ return dma_blk_io(sg, offset, align,
127
dma_blk_write_io_func, blk, cb, opaque,
128
DMA_DIRECTION_TO_DEVICE);
129
}
38
--
130
--
39
2.41.0
131
2.48.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Mark functions as coroutine_fn when they are only called by other coroutine_fns
3
Until now, a SCSIDevice's I/O requests have run in a single AioContext.
4
and they can suspend. Change calls to co_wrappers to use the non-wrapped
4
In order to support multiple IOThreads it will be necessary to move to
5
functions, which in turn requires adding GRAPH_RDLOCK annotations.
5
the concept of a per-SCSIRequest AioContext.
6
6
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-ID: <20230601115145.196465-10-pbonzini@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
Message-ID: <20250311132616.1049687-4-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
11
---
12
block/vhdx.h | 5 ++--
12
include/hw/scsi/scsi.h | 1 +
13
block/vhdx-log.c | 36 +++++++++++++-----------
13
hw/scsi/scsi-bus.c | 1 +
14
block/vhdx.c | 73 +++++++++++++++++++++++-------------------------
14
hw/scsi/scsi-disk.c | 17 ++++++-----------
15
3 files changed, 57 insertions(+), 57 deletions(-)
15
3 files changed, 8 insertions(+), 11 deletions(-)
16
16
17
diff --git a/block/vhdx.h b/block/vhdx.h
17
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/vhdx.h
19
--- a/include/hw/scsi/scsi.h
20
+++ b/block/vhdx.h
20
+++ b/include/hw/scsi/scsi.h
21
@@ -XXX,XX +XXX,XX @@ bool vhdx_checksum_is_valid(uint8_t *buf, size_t size, int crc_offset);
21
@@ -XXX,XX +XXX,XX @@ struct SCSIRequest {
22
int vhdx_parse_log(BlockDriverState *bs, BDRVVHDXState *s, bool *flushed,
22
SCSIBus *bus;
23
Error **errp);
23
SCSIDevice *dev;
24
24
const SCSIReqOps *ops;
25
-int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
25
+ AioContext *ctx;
26
- void *data, uint32_t length, uint64_t offset);
26
uint32_t refcount;
27
+int coroutine_fn GRAPH_RDLOCK
27
uint32_t tag;
28
+vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
28
uint32_t lun;
29
+ void *data, uint32_t length, uint64_t offset);
29
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
30
31
static inline void leguid_to_cpus(MSGUID *guid)
32
{
33
diff --git a/block/vhdx-log.c b/block/vhdx-log.c
34
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
35
--- a/block/vhdx-log.c
31
--- a/hw/scsi/scsi-bus.c
36
+++ b/block/vhdx-log.c
32
+++ b/hw/scsi/scsi-bus.c
37
@@ -XXX,XX +XXX,XX @@ exit:
33
@@ -XXX,XX +XXX,XX @@ invalid_opcode:
38
* It is assumed that 'buffer' is at least 4096*num_sectors large.
39
*
40
* 0 is returned on success, -errno otherwise */
41
-static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
42
- uint32_t *sectors_written, void *buffer,
43
- uint32_t num_sectors)
44
+static int coroutine_fn GRAPH_RDLOCK
45
+vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
46
+ uint32_t *sectors_written, void *buffer,
47
+ uint32_t num_sectors)
48
{
49
int ret = 0;
50
uint64_t offset;
51
@@ -XXX,XX +XXX,XX @@ static int vhdx_log_write_sectors(BlockDriverState *bs, VHDXLogEntries *log,
52
/* full */
53
break;
54
}
55
- ret = bdrv_pwrite(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer_tmp,
56
- 0);
57
+ ret = bdrv_co_pwrite(bs->file, offset, VHDX_LOG_SECTOR_SIZE, buffer_tmp, 0);
58
if (ret < 0) {
59
goto exit;
60
}
61
@@ -XXX,XX +XXX,XX @@ static void vhdx_log_raw_to_le_sector(VHDXLogDescriptor *desc,
62
}
63
64
65
-static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
66
- void *data, uint32_t length, uint64_t offset)
67
+static int coroutine_fn GRAPH_RDLOCK
68
+vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
69
+ void *data, uint32_t length, uint64_t offset)
70
{
71
int ret = 0;
72
void *buffer = NULL;
73
@@ -XXX,XX +XXX,XX @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
74
75
sectors += partial_sectors;
76
77
- file_length = bdrv_getlength(bs->file->bs);
78
+ file_length = bdrv_co_getlength(bs->file->bs);
79
if (file_length < 0) {
80
ret = file_length;
81
goto exit;
82
@@ -XXX,XX +XXX,XX @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
83
84
if (i == 0 && leading_length) {
85
/* partial sector at the front of the buffer */
86
- ret = bdrv_pread(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
87
- merged_sector, 0);
88
+ ret = bdrv_co_pread(bs->file, file_offset, VHDX_LOG_SECTOR_SIZE,
89
+ merged_sector, 0);
90
if (ret < 0) {
91
goto exit;
92
}
93
@@ -XXX,XX +XXX,XX @@ static int vhdx_log_write(BlockDriverState *bs, BDRVVHDXState *s,
94
sector_write = merged_sector;
95
} else if (i == sectors - 1 && trailing_length) {
96
/* partial sector at the end of the buffer */
97
- ret = bdrv_pread(bs->file, file_offset + trailing_length,
98
- VHDX_LOG_SECTOR_SIZE - trailing_length,
99
- merged_sector + trailing_length, 0);
100
+ ret = bdrv_co_pread(bs->file, file_offset + trailing_length,
101
+ VHDX_LOG_SECTOR_SIZE - trailing_length,
102
+ merged_sector + trailing_length, 0);
103
if (ret < 0) {
104
goto exit;
105
}
106
@@ -XXX,XX +XXX,XX @@ exit:
107
}
108
109
/* Perform a log write, and then immediately flush the entire log */
110
-int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
111
- void *data, uint32_t length, uint64_t offset)
112
+int coroutine_fn
113
+vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
114
+ void *data, uint32_t length, uint64_t offset)
115
{
116
int ret = 0;
117
VHDXLogSequence logs = { .valid = true,
118
@@ -XXX,XX +XXX,XX @@ int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
119
120
/* Make sure data written (new and/or changed blocks) is stable
121
* on disk, before creating log entry */
122
- ret = bdrv_flush(bs);
123
+ ret = bdrv_co_flush(bs);
124
if (ret < 0) {
125
goto exit;
126
}
127
@@ -XXX,XX +XXX,XX @@ int vhdx_log_write_and_flush(BlockDriverState *bs, BDRVVHDXState *s,
128
logs.log = s->log;
129
130
/* Make sure log is stable on disk */
131
- ret = bdrv_flush(bs);
132
+ ret = bdrv_co_flush(bs);
133
if (ret < 0) {
134
goto exit;
135
}
136
diff --git a/block/vhdx.c b/block/vhdx.c
137
index XXXXXXX..XXXXXXX 100644
138
--- a/block/vhdx.c
139
+++ b/block/vhdx.c
140
@@ -XXX,XX +XXX,XX @@ exit:
141
*
142
* Returns the file offset start of the new payload block
143
*/
144
-static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
145
- uint64_t *new_offset, bool *need_zero)
146
+static int coroutine_fn GRAPH_RDLOCK
147
+vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
148
+ uint64_t *new_offset, bool *need_zero)
149
{
150
int64_t current_len;
151
152
- current_len = bdrv_getlength(bs->file->bs);
153
+ current_len = bdrv_co_getlength(bs->file->bs);
154
if (current_len < 0) {
155
return current_len;
156
}
157
@@ -XXX,XX +XXX,XX @@ static int vhdx_allocate_block(BlockDriverState *bs, BDRVVHDXState *s,
158
if (*need_zero) {
159
int ret;
160
161
- ret = bdrv_truncate(bs->file, *new_offset + s->block_size, false,
162
- PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL);
163
+ ret = bdrv_co_truncate(bs->file, *new_offset + s->block_size, false,
164
+ PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL);
165
if (ret != -ENOTSUP) {
166
*need_zero = false;
167
return ret;
168
}
34
}
169
}
35
}
170
36
171
- return bdrv_truncate(bs->file, *new_offset + s->block_size, false,
37
+ req->ctx = qemu_get_current_aio_context();
172
- PREALLOC_MODE_OFF, 0, NULL);
38
req->cmd = cmd;
173
+ return bdrv_co_truncate(bs->file, *new_offset + s->block_size, false,
39
req->residual = req->cmd.xfer;
174
+ PREALLOC_MODE_OFF, 0, NULL);
40
175
}
41
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
176
42
index XXXXXXX..XXXXXXX 100644
177
/*
43
--- a/hw/scsi/scsi-disk.c
178
@@ -XXX,XX +XXX,XX @@ exit:
44
+++ b/hw/scsi/scsi-disk.c
179
* The first 64KB of the Metadata section is reserved for the metadata
45
@@ -XXX,XX +XXX,XX @@ static void scsi_aio_complete(void *opaque, int ret)
180
* header and entries; beyond that, the metadata items themselves reside.
46
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
181
*/
47
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
182
-static int vhdx_create_new_metadata(BlockBackend *blk,
48
183
- uint64_t image_size,
49
- /* The request must only run in the BlockBackend's AioContext */
184
- uint32_t block_size,
50
- assert(blk_get_aio_context(s->qdev.conf.blk) ==
185
- uint32_t sector_size,
51
- qemu_get_current_aio_context());
186
- uint64_t metadata_offset,
52
+ /* The request must run in its AioContext */
187
- VHDXImageType type)
53
+ assert(r->req.ctx == qemu_get_current_aio_context());
188
+static int coroutine_fn
54
189
+vhdx_create_new_metadata(BlockBackend *blk, uint64_t image_size,
55
assert(r->req.aiocb != NULL);
190
+ uint32_t block_size, uint32_t sector_size,
56
r->req.aiocb = NULL;
191
+ uint64_t metadata_offset, VHDXImageType type)
57
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_complete(void *opaque, int ret)
58
59
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
192
{
60
{
193
int ret = 0;
61
- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
194
uint32_t offset = 0;
62
uint32_t n;
195
@@ -XXX,XX +XXX,XX @@ static int vhdx_create_new_metadata(BlockBackend *blk,
63
196
VHDX_META_FLAGS_IS_VIRTUAL_DISK;
64
- /* The request must only run in the BlockBackend's AioContext */
197
vhdx_metadata_entry_le_export(&md_table_entry[4]);
65
- assert(blk_get_aio_context(s->qdev.conf.blk) ==
198
66
- qemu_get_current_aio_context());
199
- ret = blk_pwrite(blk, metadata_offset, VHDX_HEADER_BLOCK_SIZE, buffer, 0);
67
+ /* The request must run in its AioContext */
200
+ ret = blk_co_pwrite(blk, metadata_offset, VHDX_HEADER_BLOCK_SIZE, buffer, 0);
68
+ assert(r->req.ctx == qemu_get_current_aio_context());
201
if (ret < 0) {
69
202
goto exit;
70
assert(r->req.aiocb == NULL);
203
}
71
if (scsi_disk_req_check_error(r, ret, ret > 0)) {
204
72
@@ -XXX,XX +XXX,XX @@ static void scsi_read_data(SCSIRequest *req)
205
- ret = blk_pwrite(blk, metadata_offset + (64 * KiB),
73
206
- VHDX_METADATA_ENTRY_BUFFER_SIZE, entry_buffer, 0);
74
static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
207
+ ret = blk_co_pwrite(blk, metadata_offset + (64 * KiB),
208
+ VHDX_METADATA_ENTRY_BUFFER_SIZE, entry_buffer, 0);
209
if (ret < 0) {
210
goto exit;
211
}
212
@@ -XXX,XX +XXX,XX @@ exit:
213
* Fixed images: default state of the BAT is fully populated, with
214
* file offsets and state PAYLOAD_BLOCK_FULLY_PRESENT.
215
*/
216
-static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
217
- uint64_t image_size, VHDXImageType type,
218
- bool use_zero_blocks, uint64_t file_offset,
219
- uint32_t length, Error **errp)
220
+static int coroutine_fn
221
+vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
222
+ uint64_t image_size, VHDXImageType type,
223
+ bool use_zero_blocks, uint64_t file_offset,
224
+ uint32_t length, Error **errp)
225
{
75
{
226
int ret = 0;
76
- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
227
uint64_t data_file_offset;
77
uint32_t n;
228
@@ -XXX,XX +XXX,XX @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
78
229
if (type == VHDX_TYPE_DYNAMIC) {
79
- /* The request must only run in the BlockBackend's AioContext */
230
/* All zeroes, so we can just extend the file - the end of the BAT
80
- assert(blk_get_aio_context(s->qdev.conf.blk) ==
231
* is the furthest thing we have written yet */
81
- qemu_get_current_aio_context());
232
- ret = blk_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF,
82
+ /* The request must run in its AioContext */
233
- 0, errp);
83
+ assert(r->req.ctx == qemu_get_current_aio_context());
234
+ ret = blk_co_truncate(blk, data_file_offset, false, PREALLOC_MODE_OFF,
84
235
+ 0, errp);
85
assert (r->req.aiocb == NULL);
236
if (ret < 0) {
86
if (scsi_disk_req_check_error(r, ret, ret > 0)) {
237
goto exit;
238
}
239
} else if (type == VHDX_TYPE_FIXED) {
240
- ret = blk_truncate(blk, data_file_offset + image_size, false,
241
- PREALLOC_MODE_OFF, 0, errp);
242
+ ret = blk_co_truncate(blk, data_file_offset + image_size, false,
243
+ PREALLOC_MODE_OFF, 0, errp);
244
if (ret < 0) {
245
goto exit;
246
}
247
@@ -XXX,XX +XXX,XX @@ static int vhdx_create_bat(BlockBackend *blk, BDRVVHDXState *s,
248
s->bat[sinfo.bat_idx] = cpu_to_le64(s->bat[sinfo.bat_idx]);
249
sector_num += s->sectors_per_block;
250
}
251
- ret = blk_pwrite(blk, file_offset, length, s->bat, 0);
252
+ ret = blk_co_pwrite(blk, file_offset, length, s->bat, 0);
253
if (ret < 0) {
254
error_setg_errno(errp, -ret, "Failed to write the BAT");
255
goto exit;
256
@@ -XXX,XX +XXX,XX @@ exit:
257
* to create the BAT itself, we will also cause the BAT to be
258
* created.
259
*/
260
-static int vhdx_create_new_region_table(BlockBackend *blk,
261
- uint64_t image_size,
262
- uint32_t block_size,
263
- uint32_t sector_size,
264
- uint32_t log_size,
265
- bool use_zero_blocks,
266
- VHDXImageType type,
267
- uint64_t *metadata_offset,
268
- Error **errp)
269
+static int coroutine_fn
270
+vhdx_create_new_region_table(BlockBackend *blk, uint64_t image_size,
271
+ uint32_t block_size, uint32_t sector_size,
272
+ uint32_t log_size, bool use_zero_blocks,
273
+ VHDXImageType type, uint64_t *metadata_offset,
274
+ Error **errp)
275
{
276
int ret = 0;
277
uint32_t offset = 0;
278
@@ -XXX,XX +XXX,XX @@ static int vhdx_create_new_region_table(BlockBackend *blk,
279
}
280
281
/* Now write out the region headers to disk */
282
- ret = blk_pwrite(blk, VHDX_REGION_TABLE_OFFSET, VHDX_HEADER_BLOCK_SIZE,
283
- buffer, 0);
284
+ ret = blk_co_pwrite(blk, VHDX_REGION_TABLE_OFFSET, VHDX_HEADER_BLOCK_SIZE,
285
+ buffer, 0);
286
if (ret < 0) {
287
error_setg_errno(errp, -ret, "Failed to write first region table");
288
goto exit;
289
}
290
291
- ret = blk_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, VHDX_HEADER_BLOCK_SIZE,
292
- buffer, 0);
293
+ ret = blk_co_pwrite(blk, VHDX_REGION_TABLE2_OFFSET, VHDX_HEADER_BLOCK_SIZE,
294
+ buffer, 0);
295
if (ret < 0) {
296
error_setg_errno(errp, -ret, "Failed to write second region table");
297
goto exit;
298
--
87
--
299
2.41.0
88
2.48.1
diff view generated by jsdifflib
1
If the caller keeps the AioContext lock for a block node in an iothread,
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
polling in bdrv_graph_wrlock() deadlocks if the condition isn't
3
fulfilled immediately.
4
2
5
Now that all callers make sure to actually have the AioContext locked
3
SCSIDevice keeps track of in-flight requests for device reset and Task
6
when they call bdrv_replace_child_noperm() like they should, we can
4
Management Functions (TMFs). The request list requires protection so
7
change bdrv_graph_wrlock() to take a BlockDriverState whose AioContext
5
that multi-threaded SCSI emulation can be implemented in commits that
8
lock the caller holds (NULL if it doesn't) and unlock it temporarily
6
follow.
9
while polling.
10
7
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Message-ID: <20230605085711.21261-11-kwolf@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-ID: <20250311132616.1049687-5-stefanha@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
12
---
16
include/block/graph-lock.h | 6 ++++--
13
include/hw/scsi/scsi.h | 7 ++-
17
block.c | 4 ++--
14
hw/scsi/scsi-bus.c | 120 +++++++++++++++++++++++++++++------------
18
block/graph-lock.c | 23 ++++++++++++++++++++++-
15
2 files changed, 88 insertions(+), 39 deletions(-)
19
3 files changed, 28 insertions(+), 5 deletions(-)
20
16
21
diff --git a/include/block/graph-lock.h b/include/block/graph-lock.h
17
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
22
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
23
--- a/include/block/graph-lock.h
19
--- a/include/hw/scsi/scsi.h
24
+++ b/include/block/graph-lock.h
20
+++ b/include/hw/scsi/scsi.h
25
@@ -XXX,XX +XXX,XX @@ void unregister_aiocontext(AioContext *ctx);
21
@@ -XXX,XX +XXX,XX @@ struct SCSIRequest {
26
* The wrlock can only be taken from the main loop, with BQL held, as only the
22
bool dma_started;
27
* main loop is allowed to modify the graph.
23
BlockAIOCB *aiocb;
28
*
24
QEMUSGList *sg;
29
+ * If @bs is non-NULL, its AioContext is temporarily released.
25
+
26
+ /* Protected by SCSIDevice->requests_lock */
27
QTAILQ_ENTRY(SCSIRequest) next;
28
};
29
30
@@ -XXX,XX +XXX,XX @@ struct SCSIDevice
31
uint8_t sense[SCSI_SENSE_BUF_SIZE];
32
uint32_t sense_len;
33
34
- /*
35
- * The requests list is only accessed from the AioContext that executes
36
- * requests or from the main loop when IOThread processing is stopped.
37
- */
38
+ QemuMutex requests_lock; /* protects the requests list */
39
QTAILQ_HEAD(, SCSIRequest) requests;
40
41
uint32_t channel;
42
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/hw/scsi/scsi-bus.c
45
+++ b/hw/scsi/scsi-bus.c
46
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_sync(SCSIDevice *s,
47
assert(!runstate_is_running());
48
assert(qemu_in_main_thread());
49
50
- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
51
- fn(req, opaque);
52
+ /*
53
+ * Locking is not necessary because the guest is stopped and no other
54
+ * threads can be accessing the requests list, but take the lock for
55
+ * consistency.
56
+ */
57
+ WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
58
+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
59
+ fn(req, opaque);
60
+ }
61
}
62
}
63
64
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async_bh(void *opaque)
65
{
66
g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
67
SCSIDevice *s = data->s;
68
- AioContext *ctx;
69
- SCSIRequest *req;
70
- SCSIRequest *next;
71
+ g_autoptr(GList) reqs = NULL;
72
73
/*
74
- * The BB cannot have changed contexts between this BH being scheduled and
75
- * now: BBs' AioContexts, when they have a node attached, can only be
76
- * changed via bdrv_try_change_aio_context(), in a drained section. While
77
- * we have the in-flight counter incremented, that drain must block.
78
+ * Build a list of requests in this AioContext so fn() can be invoked later
79
+ * outside requests_lock.
80
*/
81
- ctx = blk_get_aio_context(s->conf.blk);
82
- assert(ctx == qemu_get_current_aio_context());
83
+ WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
84
+ AioContext *ctx = qemu_get_current_aio_context();
85
+ SCSIRequest *req;
86
+ SCSIRequest *next;
87
+
88
+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
89
+ if (req->ctx == ctx) {
90
+ scsi_req_ref(req); /* dropped after calling fn() */
91
+ reqs = g_list_prepend(reqs, req);
92
+ }
93
+ }
94
+ }
95
96
- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
97
- data->fn(req, data->fn_opaque);
98
+ /* Call fn() on each request */
99
+ for (GList *elem = g_list_first(reqs); elem; elem = g_list_next(elem)) {
100
+ data->fn(elem->data, data->fn_opaque);
101
+ scsi_req_unref(elem->data);
102
}
103
104
/* Drop the reference taken by scsi_device_for_each_req_async() */
105
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async_bh(void *opaque)
106
blk_dec_in_flight(s->conf.blk);
107
}
108
109
+static void scsi_device_for_each_req_async_do_ctx(gpointer key, gpointer value,
110
+ gpointer user_data)
111
+{
112
+ AioContext *ctx = key;
113
+ SCSIDeviceForEachReqAsyncData *params = user_data;
114
+ SCSIDeviceForEachReqAsyncData *data;
115
+
116
+ data = g_new(SCSIDeviceForEachReqAsyncData, 1);
117
+ data->s = params->s;
118
+ data->fn = params->fn;
119
+ data->fn_opaque = params->fn_opaque;
120
+
121
+ /*
122
+ * Hold a reference to the SCSIDevice until
123
+ * scsi_device_for_each_req_async_bh() finishes.
124
+ */
125
+ object_ref(OBJECT(data->s));
126
+
127
+ /* Paired with scsi_device_for_each_req_async_bh() */
128
+ blk_inc_in_flight(data->s->conf.blk);
129
+
130
+ aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, data);
131
+}
132
+
133
/*
134
* Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
135
- * runs in the AioContext that is executing the request.
136
+ * must be thread-safe because it runs concurrently in each AioContext that is
137
+ * executing a request.
30
+ *
138
+ *
31
* This function polls. Callers must not hold the lock of any AioContext other
139
* Keeps the BlockBackend's in-flight counter incremented until everything is
32
- * than the current one.
140
* done, so draining it will settle all scheduled @fn() calls.
33
+ * than the current one and the one of @bs.
34
*/
141
*/
35
-void bdrv_graph_wrlock(void) TSA_ACQUIRE(graph_lock) TSA_NO_TSA;
142
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async(SCSIDevice *s,
36
+void bdrv_graph_wrlock(BlockDriverState *bs) TSA_ACQUIRE(graph_lock) TSA_NO_TSA;
37
38
/*
39
* bdrv_graph_wrunlock:
40
diff --git a/block.c b/block.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/block.c
43
+++ b/block.c
44
@@ -XXX,XX +XXX,XX @@ uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm)
45
* Replaces the node that a BdrvChild points to without updating permissions.
46
*
47
* If @new_bs is non-NULL, the parent of @child must already be drained through
48
- * @child.
49
+ * @child and the caller must hold the AioContext lock for @new_bs.
50
*/
51
static void bdrv_replace_child_noperm(BdrvChild *child,
52
BlockDriverState *new_bs)
53
@@ -XXX,XX +XXX,XX @@ static void bdrv_replace_child_noperm(BdrvChild *child,
54
}
55
56
/* TODO Pull this up into the callers to avoid polling here */
57
- bdrv_graph_wrlock();
58
+ bdrv_graph_wrlock(new_bs);
59
if (old_bs) {
60
if (child->klass->detach) {
61
child->klass->detach(child);
62
diff --git a/block/graph-lock.c b/block/graph-lock.c
63
index XXXXXXX..XXXXXXX 100644
64
--- a/block/graph-lock.c
65
+++ b/block/graph-lock.c
66
@@ -XXX,XX +XXX,XX @@ static uint32_t reader_count(void)
67
}
68
#endif
69
70
-void bdrv_graph_wrlock(void)
71
+void bdrv_graph_wrlock(BlockDriverState *bs)
72
{
143
{
73
+ AioContext *ctx = NULL;
144
assert(qemu_in_main_thread());
74
+
145
75
GLOBAL_STATE_CODE();
146
- SCSIDeviceForEachReqAsyncData *data =
76
/*
147
- g_new(SCSIDeviceForEachReqAsyncData, 1);
77
* TODO Some callers hold an AioContext lock when this is called, which
148
-
78
@@ -XXX,XX +XXX,XX @@ void bdrv_graph_wrlock(void)
149
- data->s = s;
79
*/
150
- data->fn = fn;
80
#if 0
151
- data->fn_opaque = opaque;
81
assert(!qatomic_read(&has_writer));
152
-
82
+#endif
153
- /*
83
+
154
- * Hold a reference to the SCSIDevice until
84
+ /*
155
- * scsi_device_for_each_req_async_bh() finishes.
85
+ * Release only non-mainloop AioContext. The mainloop often relies on the
156
- */
86
+ * BQL and doesn't lock the main AioContext before doing things.
157
- object_ref(OBJECT(s));
87
+ */
158
+ /* The set of AioContexts where the requests are being processed */
88
+ if (bs) {
159
+ g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
89
+ ctx = bdrv_get_aio_context(bs);
160
+ WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
90
+ if (ctx != qemu_get_aio_context()) {
161
+ SCSIRequest *req;
91
+ aio_context_release(ctx);
162
+ QTAILQ_FOREACH(req, &s->requests, next) {
92
+ } else {
163
+ g_hash_table_add(aio_contexts, req->ctx);
93
+ ctx = NULL;
94
+ }
164
+ }
95
+ }
165
+ }
96
166
97
+#if 0
167
- /* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */
98
/* Make sure that constantly arriving new I/O doesn't cause starvation */
168
- blk_inc_in_flight(s->conf.blk);
99
bdrv_drain_all_begin_nopoll();
169
- aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
100
170
- scsi_device_for_each_req_async_bh,
101
@@ -XXX,XX +XXX,XX @@ void bdrv_graph_wrlock(void)
171
- data);
102
172
+ /* Schedule a BH for each AioContext */
103
bdrv_drain_all_end();
173
+ SCSIDeviceForEachReqAsyncData params = {
104
#endif
174
+ .s = s,
105
+
175
+ .fn = fn,
106
+ if (ctx) {
176
+ .fn_opaque = opaque,
107
+ aio_context_acquire(bdrv_get_aio_context(bs));
177
+ };
178
+ g_hash_table_foreach(
179
+ aio_contexts,
180
+ scsi_device_for_each_req_async_do_ctx,
181
+ &params
182
+ );
183
}
184
185
static void scsi_device_realize(SCSIDevice *s, Error **errp)
186
@@ -XXX,XX +XXX,XX @@ static void scsi_qdev_realize(DeviceState *qdev, Error **errp)
187
dev->lun = lun;
188
}
189
190
+ qemu_mutex_init(&dev->requests_lock);
191
QTAILQ_INIT(&dev->requests);
192
scsi_device_realize(dev, &local_err);
193
if (local_err) {
194
@@ -XXX,XX +XXX,XX @@ static void scsi_qdev_unrealize(DeviceState *qdev)
195
196
scsi_device_purge_requests(dev, SENSE_CODE(NO_SENSE));
197
198
+ qemu_mutex_destroy(&dev->requests_lock);
199
+
200
scsi_device_unrealize(dev);
201
202
blockdev_mark_auto_del(dev->conf.blk);
203
@@ -XXX,XX +XXX,XX @@ static void scsi_req_enqueue_internal(SCSIRequest *req)
204
req->sg = NULL;
205
}
206
req->enqueued = true;
207
- QTAILQ_INSERT_TAIL(&req->dev->requests, req, next);
208
+
209
+ WITH_QEMU_LOCK_GUARD(&req->dev->requests_lock) {
210
+ QTAILQ_INSERT_TAIL(&req->dev->requests, req, next);
108
+ }
211
+ }
109
}
212
}
110
213
111
void bdrv_graph_wrunlock(void)
214
int32_t scsi_req_enqueue(SCSIRequest *req)
215
@@ -XXX,XX +XXX,XX @@ static void scsi_req_dequeue(SCSIRequest *req)
216
trace_scsi_req_dequeue(req->dev->id, req->lun, req->tag);
217
req->retry = false;
218
if (req->enqueued) {
219
- QTAILQ_REMOVE(&req->dev->requests, req, next);
220
+ WITH_QEMU_LOCK_GUARD(&req->dev->requests_lock) {
221
+ QTAILQ_REMOVE(&req->dev->requests, req, next);
222
+ }
223
req->enqueued = false;
224
scsi_req_unref(req);
225
}
226
@@ -XXX,XX +XXX,XX @@ static void scsi_device_class_init(ObjectClass *klass, void *data)
227
228
static void scsi_dev_instance_init(Object *obj)
229
{
230
- DeviceState *dev = DEVICE(obj);
231
- SCSIDevice *s = SCSI_DEVICE(dev);
232
+ SCSIDevice *s = SCSI_DEVICE(obj);
233
234
device_add_bootindex_property(obj, &s->conf.bootindex,
235
"bootindex", NULL,
112
--
236
--
113
2.41.0
237
2.48.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Mark functions as coroutine_fn when they are only called by other coroutine_fns
3
Virtqueues are not thread-safe. Until now this was not a major issue
4
and they can suspend. Change calls to co_wrappers to use the non-wrapped
4
since all virtqueue processing happened in the same thread. The ctrl
5
functions, which in turn requires adding GRAPH_RDLOCK annotations.
5
queue's Task Management Function (TMF) requests sometimes need the main
6
6
loop, so a BH was used to schedule the virtqueue completion back in the
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
thread that has virtqueue access.
8
Message-ID: <20230601115145.196465-4-pbonzini@redhat.com>
8
9
When IOThread Virtqueue Mapping is introduced in later commits, event
10
and ctrl virtqueue accesses from other threads will become necessary.
11
Introduce an optional per-virtqueue lock so the event and ctrl
12
virtqueues can be protected in the commits that follow.
13
14
The addition of the ctrl virtqueue lock makes
15
virtio_scsi_complete_req_from_main_loop() and its BH unnecessary.
16
Instead, take the ctrl virtqueue lock from the main loop thread.
17
18
The cmd virtqueue does not have a lock because the entirety of SCSI
19
command processing happens in one thread. Only one thread accesses the
20
cmd virtqueue and a lock is unnecessary.
21
22
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
23
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
24
Message-ID: <20250311132616.1049687-6-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
25
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
26
---
12
block/vpc.c | 52 ++++++++++++++++++++++++++--------------------------
27
include/hw/virtio/virtio-scsi.h | 3 ++
13
1 file changed, 26 insertions(+), 26 deletions(-)
28
hw/scsi/virtio-scsi.c | 84 ++++++++++++++++++---------------
14
29
2 files changed, 49 insertions(+), 38 deletions(-)
15
diff --git a/block/vpc.c b/block/vpc.c
30
31
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
16
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
17
--- a/block/vpc.c
33
--- a/include/hw/virtio/virtio-scsi.h
18
+++ b/block/vpc.c
34
+++ b/include/hw/virtio/virtio-scsi.h
19
@@ -XXX,XX +XXX,XX @@ static int vpc_reopen_prepare(BDRVReopenState *state,
35
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
20
* operation (the block bitmaps is updated then), 0 otherwise.
36
int resetting; /* written from main loop thread, read from any thread */
21
* If write is true then err must not be NULL.
37
bool events_dropped;
22
*/
38
23
-static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
39
+ QemuMutex ctrl_lock; /* protects ctrl_vq */
24
- bool write, int *err)
40
+ QemuMutex event_lock; /* protects event_vq */
25
+static int64_t coroutine_fn GRAPH_RDLOCK
41
+
26
+get_image_offset(BlockDriverState *bs, uint64_t offset, bool write, int *err)
42
/*
27
{
43
* TMFs deferred to main loop BH. These fields are protected by
28
BDRVVPCState *s = bs->opaque;
44
* tmf_bh_lock.
29
uint64_t bitmap_offset, block_offset;
45
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
30
@@ -XXX,XX +XXX,XX @@ static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
46
index XXXXXXX..XXXXXXX 100644
31
47
--- a/hw/scsi/virtio-scsi.c
32
s->last_bitmap_offset = bitmap_offset;
48
+++ b/hw/scsi/virtio-scsi.c
33
memset(bitmap, 0xff, s->bitmap_size);
49
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_free_req(VirtIOSCSIReq *req)
34
- r = bdrv_pwrite_sync(bs->file, bitmap_offset, s->bitmap_size, bitmap,
50
g_free(req);
35
- 0);
51
}
36
+ r = bdrv_co_pwrite_sync(bs->file, bitmap_offset, s->bitmap_size, bitmap, 0);
52
37
if (r < 0) {
53
-static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
38
*err = r;
54
+static void virtio_scsi_complete_req(VirtIOSCSIReq *req, QemuMutex *vq_lock)
39
return -2;
55
{
40
@@ -XXX,XX +XXX,XX @@ static inline int64_t get_image_offset(BlockDriverState *bs, uint64_t offset,
56
VirtIOSCSI *s = req->dev;
41
*
57
VirtQueue *vq = req->vq;
42
* Returns 0 on success and < 0 on error
58
VirtIODevice *vdev = VIRTIO_DEVICE(s);
43
*/
59
44
-static int rewrite_footer(BlockDriverState *bs)
60
qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size);
45
+static int coroutine_fn GRAPH_RDLOCK rewrite_footer(BlockDriverState *bs)
61
+
46
{
62
+ if (vq_lock) {
47
int ret;
63
+ qemu_mutex_lock(vq_lock);
48
BDRVVPCState *s = bs->opaque;
64
+ }
49
int64_t offset = s->free_data_block_offset;
65
+
50
66
virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size);
51
- ret = bdrv_pwrite_sync(bs->file, offset, sizeof(s->footer), &s->footer, 0);
67
if (s->dataplane_started && !s->dataplane_fenced) {
52
+ ret = bdrv_co_pwrite_sync(bs->file, offset, sizeof(s->footer), &s->footer, 0);
68
virtio_notify_irqfd(vdev, vq);
53
if (ret < 0)
69
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
54
return ret;
70
virtio_notify(vdev, vq);
55
71
}
56
@@ -XXX,XX +XXX,XX @@ static int rewrite_footer(BlockDriverState *bs)
72
57
*
73
+ if (vq_lock) {
58
* Returns the sectors' offset in the image file on success and < 0 on error
74
+ qemu_mutex_unlock(vq_lock);
59
*/
75
+ }
60
-static int64_t alloc_block(BlockDriverState *bs, int64_t offset)
76
+
61
+static int64_t coroutine_fn GRAPH_RDLOCK
77
if (req->sreq) {
62
+alloc_block(BlockDriverState *bs, int64_t offset)
78
req->sreq->hba_private = NULL;
63
{
79
scsi_req_unref(req->sreq);
64
BDRVVPCState *s = bs->opaque;
80
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
65
int64_t bat_offset;
81
virtio_scsi_free_req(req);
66
@@ -XXX,XX +XXX,XX @@ static int64_t alloc_block(BlockDriverState *bs, int64_t offset)
82
}
67
83
68
/* Initialize the block's bitmap */
84
-static void virtio_scsi_complete_req_bh(void *opaque)
69
memset(bitmap, 0xff, s->bitmap_size);
85
+static void virtio_scsi_bad_req(VirtIOSCSIReq *req, QemuMutex *vq_lock)
70
- ret = bdrv_pwrite_sync(bs->file, s->free_data_block_offset,
86
{
71
- s->bitmap_size, bitmap, 0);
87
- VirtIOSCSIReq *req = opaque;
72
+ ret = bdrv_co_pwrite_sync(bs->file, s->free_data_block_offset,
88
+ virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
73
+ s->bitmap_size, bitmap, 0);
89
74
if (ret < 0) {
90
- virtio_scsi_complete_req(req);
75
return ret;
91
-}
76
}
92
+ if (vq_lock) {
77
@@ -XXX,XX +XXX,XX @@ static int64_t alloc_block(BlockDriverState *bs, int64_t offset)
93
+ qemu_mutex_lock(vq_lock);
78
/* Write BAT entry to disk */
94
+ }
79
bat_offset = s->bat_offset + (4 * index);
95
80
bat_value = cpu_to_be32(s->pagetable[index]);
96
-/*
81
- ret = bdrv_pwrite_sync(bs->file, bat_offset, 4, &bat_value, 0);
97
- * Called from virtio_scsi_do_one_tmf_bh() in main loop thread. The main loop
82
+ ret = bdrv_co_pwrite_sync(bs->file, bat_offset, 4, &bat_value, 0);
98
- * thread cannot touch the virtqueue since that could race with an IOThread.
83
if (ret < 0)
99
- */
84
goto fail;
100
-static void virtio_scsi_complete_req_from_main_loop(VirtIOSCSIReq *req)
85
101
-{
86
@@ -XXX,XX +XXX,XX @@ fail:
102
- VirtIOSCSI *s = req->dev;
87
return ret;
103
+ virtqueue_detach_element(req->vq, &req->elem, 0);
88
}
104
89
105
- if (!s->ctx || s->ctx == qemu_get_aio_context()) {
90
-static int coroutine_fn vpc_co_block_status(BlockDriverState *bs,
106
- /* No need to schedule a BH when there is no IOThread */
91
- bool want_zero,
107
- virtio_scsi_complete_req(req);
92
- int64_t offset, int64_t bytes,
108
- } else {
93
- int64_t *pnum, int64_t *map,
109
- /* Run request completion in the IOThread */
94
- BlockDriverState **file)
110
- aio_wait_bh_oneshot(s->ctx, virtio_scsi_complete_req_bh, req);
95
+static int coroutine_fn GRAPH_RDLOCK
111
+ if (vq_lock) {
96
+vpc_co_block_status(BlockDriverState *bs, bool want_zero,
112
+ qemu_mutex_unlock(vq_lock);
97
+ int64_t offset, int64_t bytes,
113
}
98
+ int64_t *pnum, int64_t *map,
114
-}
99
+ BlockDriverState **file)
115
100
{
116
-static void virtio_scsi_bad_req(VirtIOSCSIReq *req)
101
BDRVVPCState *s = bs->opaque;
117
-{
102
int64_t image_offset;
118
- virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
103
@@ -XXX,XX +XXX,XX @@ static int calculate_geometry(int64_t total_sectors, uint16_t *cyls,
119
- virtqueue_detach_element(req->vq, &req->elem, 0);
120
virtio_scsi_free_req(req);
121
}
122
123
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_parse_req(VirtIOSCSIReq *req,
104
return 0;
124
return 0;
105
}
125
}
106
126
107
-static int create_dynamic_disk(BlockBackend *blk, VHDFooter *footer,
127
-static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq)
108
- int64_t total_sectors)
128
+static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq, QemuMutex *vq_lock)
109
+static int coroutine_fn create_dynamic_disk(BlockBackend *blk, VHDFooter *footer,
129
{
110
+ int64_t total_sectors)
130
VirtIOSCSICommon *vs = (VirtIOSCSICommon *)s;
111
{
131
VirtIOSCSIReq *req;
112
VHDDynDiskHeader dyndisk_header;
132
113
uint8_t bat_sector[512];
133
+ if (vq_lock) {
114
@@ -XXX,XX +XXX,XX @@ static int create_dynamic_disk(BlockBackend *blk, VHDFooter *footer,
134
+ qemu_mutex_lock(vq_lock);
115
block_size = 0x200000;
135
+ }
116
num_bat_entries = DIV_ROUND_UP(total_sectors, block_size / 512);
136
+
117
137
req = virtqueue_pop(vq, sizeof(VirtIOSCSIReq) + vs->cdb_size);
118
- ret = blk_pwrite(blk, offset, sizeof(*footer), footer, 0);
138
+
119
+ ret = blk_co_pwrite(blk, offset, sizeof(*footer), footer, 0);
139
+ if (vq_lock) {
120
if (ret < 0) {
140
+ qemu_mutex_unlock(vq_lock);
121
goto fail;
141
+ }
122
}
142
+
123
143
if (!req) {
124
offset = 1536 + ((num_bat_entries * 4 + 511) & ~511);
144
return NULL;
125
- ret = blk_pwrite(blk, offset, sizeof(*footer), footer, 0);
145
}
126
+ ret = blk_co_pwrite(blk, offset, sizeof(*footer), footer, 0);
146
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
127
if (ret < 0) {
147
128
goto fail;
148
trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(req->req.tmf.lun),
129
}
149
req->req.tmf.tag, req->resp.tmf.response);
130
@@ -XXX,XX +XXX,XX @@ static int create_dynamic_disk(BlockBackend *blk, VHDFooter *footer,
150
- virtio_scsi_complete_req(req);
131
151
+ virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
132
memset(bat_sector, 0xFF, 512);
152
}
133
for (i = 0; i < DIV_ROUND_UP(num_bat_entries * 4, 512); i++) {
153
g_free(n);
134
- ret = blk_pwrite(blk, offset, 512, bat_sector, 0);
154
}
135
+ ret = blk_co_pwrite(blk, offset, 512, bat_sector, 0);
155
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
136
if (ret < 0) {
156
137
goto fail;
157
out:
158
object_unref(OBJECT(d));
159
- virtio_scsi_complete_req_from_main_loop(req);
160
+ virtio_scsi_complete_req(req, &s->ctrl_lock);
161
}
162
163
/* Some TMFs must be processed from the main loop thread */
164
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
165
166
/* SAM-6 6.3.2 Hard reset */
167
req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
168
- virtio_scsi_complete_req(req);
169
+ virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
170
}
171
}
172
173
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
174
175
if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0,
176
&type, sizeof(type)) < sizeof(type)) {
177
- virtio_scsi_bad_req(req);
178
+ virtio_scsi_bad_req(req, &s->ctrl_lock);
179
return;
180
}
181
182
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
183
if (type == VIRTIO_SCSI_T_TMF) {
184
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlTMFReq),
185
sizeof(VirtIOSCSICtrlTMFResp)) < 0) {
186
- virtio_scsi_bad_req(req);
187
+ virtio_scsi_bad_req(req, &s->ctrl_lock);
188
return;
189
} else {
190
r = virtio_scsi_do_tmf(s, req);
191
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
192
type == VIRTIO_SCSI_T_AN_SUBSCRIBE) {
193
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlANReq),
194
sizeof(VirtIOSCSICtrlANResp)) < 0) {
195
- virtio_scsi_bad_req(req);
196
+ virtio_scsi_bad_req(req, &s->ctrl_lock);
197
return;
198
} else {
199
req->req.an.event_requested =
200
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
201
type == VIRTIO_SCSI_T_AN_SUBSCRIBE)
202
trace_virtio_scsi_an_resp(virtio_scsi_get_lun(req->req.an.lun),
203
req->resp.an.response);
204
- virtio_scsi_complete_req(req);
205
+ virtio_scsi_complete_req(req, &s->ctrl_lock);
206
} else {
207
assert(r == -EINPROGRESS);
208
}
209
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
210
{
211
VirtIOSCSIReq *req;
212
213
- while ((req = virtio_scsi_pop_req(s, vq))) {
214
+ while ((req = virtio_scsi_pop_req(s, vq, &s->ctrl_lock))) {
215
virtio_scsi_handle_ctrl_req(s, req);
216
}
217
}
218
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req)
219
* in virtio_scsi_command_complete.
220
*/
221
req->resp_size = sizeof(VirtIOSCSICmdResp);
222
- virtio_scsi_complete_req(req);
223
+ virtio_scsi_complete_req(req, NULL);
224
}
225
226
static void virtio_scsi_command_failed(SCSIRequest *r)
227
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
228
virtio_scsi_fail_cmd_req(req);
229
return -ENOTSUP;
230
} else {
231
- virtio_scsi_bad_req(req);
232
+ virtio_scsi_bad_req(req, NULL);
233
return -EINVAL;
138
}
234
}
139
@@ -XXX,XX +XXX,XX @@ static int create_dynamic_disk(BlockBackend *blk, VHDFooter *footer,
235
}
140
/* Write the header */
236
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
141
offset = 512;
237
virtio_queue_set_notification(vq, 0);
142
238
}
143
- ret = blk_pwrite(blk, offset, sizeof(dyndisk_header), &dyndisk_header, 0);
239
144
+ ret = blk_co_pwrite(blk, offset, sizeof(dyndisk_header), &dyndisk_header, 0);
240
- while ((req = virtio_scsi_pop_req(s, vq))) {
145
if (ret < 0) {
241
+ while ((req = virtio_scsi_pop_req(s, vq, NULL))) {
146
goto fail;
242
ret = virtio_scsi_handle_cmd_req_prepare(s, req);
147
}
243
if (!ret) {
148
@@ -XXX,XX +XXX,XX @@ static int create_dynamic_disk(BlockBackend *blk, VHDFooter *footer,
244
QTAILQ_INSERT_TAIL(&reqs, req, next);
149
return ret;
245
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
150
}
246
return;
151
247
}
152
-static int create_fixed_disk(BlockBackend *blk, VHDFooter *footer,
248
153
- int64_t total_size, Error **errp)
249
- req = virtio_scsi_pop_req(s, vs->event_vq);
154
+static int coroutine_fn create_fixed_disk(BlockBackend *blk, VHDFooter *footer,
250
+ req = virtio_scsi_pop_req(s, vs->event_vq, &s->event_lock);
155
+ int64_t total_size, Error **errp)
251
if (!req) {
156
{
252
s->events_dropped = true;
157
int ret;
253
return;
158
254
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
159
/* Add footer to total size */
255
}
160
total_size += sizeof(*footer);
256
161
257
if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) {
162
- ret = blk_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp);
258
- virtio_scsi_bad_req(req);
163
+ ret = blk_co_truncate(blk, total_size, false, PREALLOC_MODE_OFF, 0, errp);
259
+ virtio_scsi_bad_req(req, &s->event_lock);
164
if (ret < 0) {
260
return;
165
return ret;
261
}
166
}
262
167
263
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
168
- ret = blk_pwrite(blk, total_size - sizeof(*footer), sizeof(*footer),
264
}
169
- footer, 0);
265
trace_virtio_scsi_event(virtio_scsi_get_lun(evt->lun), event, reason);
170
+ ret = blk_co_pwrite(blk, total_size - sizeof(*footer), sizeof(*footer),
266
171
+ footer, 0);
267
- virtio_scsi_complete_req(req);
172
if (ret < 0) {
268
+ virtio_scsi_complete_req(req, &s->event_lock);
173
error_setg_errno(errp, -ret, "Unable to write VHD header");
269
}
174
return ret;
270
271
static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
272
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
273
Error *err = NULL;
274
275
QTAILQ_INIT(&s->tmf_bh_list);
276
+ qemu_mutex_init(&s->ctrl_lock);
277
+ qemu_mutex_init(&s->event_lock);
278
qemu_mutex_init(&s->tmf_bh_lock);
279
280
virtio_scsi_common_realize(dev,
281
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_unrealize(DeviceState *dev)
282
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
283
virtio_scsi_common_unrealize(dev);
284
qemu_mutex_destroy(&s->tmf_bh_lock);
285
+ qemu_mutex_destroy(&s->event_lock);
286
+ qemu_mutex_destroy(&s->ctrl_lock);
287
}
288
289
static const Property virtio_scsi_properties[] = {
175
--
290
--
176
2.41.0
291
2.48.1
diff view generated by jsdifflib
1
bdrv_set_file_or_backing_noperm() requires the caller to hold the
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
AioContext lock for the child node, but we hold the one for the parent
3
node in bdrv_reopen_parse_file_or_backing(). Take the other one
4
temporarily.
5
2
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
The block layer can invoke the resize callback from any AioContext that
7
Message-ID: <20230605085711.21261-7-kwolf@redhat.com>
4
is processing requests. The virtqueue is already protected but the
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
events_dropped field also needs to be protected against races. Cover it
6
using the event virtqueue lock because it is closely associated with
7
accesses to the virtqueue.
8
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Message-ID: <20250311132616.1049687-7-stefanha@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
13
---
11
block.c | 35 +++++++++++++++++++++++++++++++++--
14
include/hw/virtio/virtio-scsi.h | 3 ++-
12
1 file changed, 33 insertions(+), 2 deletions(-)
15
hw/scsi/virtio-scsi.c | 29 ++++++++++++++++++++---------
16
2 files changed, 22 insertions(+), 10 deletions(-)
13
17
14
diff --git a/block.c b/block.c
18
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
15
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
16
--- a/block.c
20
--- a/include/hw/virtio/virtio-scsi.h
17
+++ b/block.c
21
+++ b/include/hw/virtio/virtio-scsi.h
18
@@ -XXX,XX +XXX,XX @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
22
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
19
* callers which don't need their own reference any more must call bdrv_unref().
23
20
*
24
SCSIBus bus;
21
* Function doesn't update permissions, caller is responsible for this.
25
int resetting; /* written from main loop thread, read from any thread */
22
+ *
26
+
23
+ * The caller must hold the AioContext lock for @child_bs. Both @parent_bs and
27
+ QemuMutex event_lock; /* protects event_vq and events_dropped */
24
+ * @child_bs can move to a different AioContext in this function. Callers must
28
bool events_dropped;
25
+ * make sure that their AioContext locking is still correct after this.
29
26
*/
30
QemuMutex ctrl_lock; /* protects ctrl_vq */
27
static int bdrv_set_file_or_backing_noperm(BlockDriverState *parent_bs,
31
- QemuMutex event_lock; /* protects event_vq */
28
BlockDriverState *child_bs,
32
29
@@ -XXX,XX +XXX,XX @@ out:
33
/*
30
return 0;
34
* TMFs deferred to main loop BH. These fields are protected by
35
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/scsi/virtio-scsi.c
38
+++ b/hw/scsi/virtio-scsi.c
39
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev)
40
41
vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
42
vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
43
- s->events_dropped = false;
44
+
45
+ WITH_QEMU_LOCK_GUARD(&s->event_lock) {
46
+ s->events_dropped = false;
47
+ }
31
}
48
}
32
49
33
+/*
50
typedef struct {
34
+ * The caller must hold the AioContext lock for @backing_hd. Both @bs and
51
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
35
+ * @backing_hd can move to a different AioContext in this function. Callers must
36
+ * make sure that their AioContext locking is still correct after this.
37
+ */
38
static int bdrv_set_backing_noperm(BlockDriverState *bs,
39
BlockDriverState *backing_hd,
40
Transaction *tran, Error **errp)
41
@@ -XXX,XX +XXX,XX @@ int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
42
* backing BlockDriverState (or NULL).
43
*
44
* Return 0 on success, otherwise return < 0 and set @errp.
45
+ *
46
+ * The caller must hold the AioContext lock of @reopen_state->bs.
47
+ * @reopen_state->bs can move to a different AioContext in this function.
48
+ * Callers must make sure that their AioContext locking is still correct after
49
+ * this.
50
*/
51
static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
52
bool is_backing, Transaction *tran,
53
@@ -XXX,XX +XXX,XX @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
54
const char *child_name = is_backing ? "backing" : "file";
55
QObject *value;
56
const char *str;
57
+ AioContext *ctx, *old_ctx;
58
+ int ret;
59
60
GLOBAL_STATE_CODE();
61
62
@@ -XXX,XX +XXX,XX @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
63
reopen_state->old_file_bs = old_child_bs;
64
}
52
}
65
53
66
- return bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
54
req = virtio_scsi_pop_req(s, vs->event_vq, &s->event_lock);
67
- tran, errp);
55
- if (!req) {
68
+ old_ctx = bdrv_get_aio_context(bs);
56
- s->events_dropped = true;
69
+ ctx = bdrv_get_aio_context(new_child_bs);
57
- return;
70
+ if (old_ctx != ctx) {
58
- }
71
+ aio_context_release(old_ctx);
59
+ WITH_QEMU_LOCK_GUARD(&s->event_lock) {
72
+ aio_context_acquire(ctx);
60
+ if (!req) {
61
+ s->events_dropped = true;
62
+ return;
63
+ }
64
65
- if (s->events_dropped) {
66
- event |= VIRTIO_SCSI_T_EVENTS_MISSED;
67
- s->events_dropped = false;
68
+ if (s->events_dropped) {
69
+ event |= VIRTIO_SCSI_T_EVENTS_MISSED;
70
+ s->events_dropped = false;
71
+ }
72
}
73
74
if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) {
75
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
76
77
static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
78
{
79
- if (s->events_dropped) {
80
+ bool events_dropped;
81
+
82
+ WITH_QEMU_LOCK_GUARD(&s->event_lock) {
83
+ events_dropped = s->events_dropped;
73
+ }
84
+ }
74
+
85
+
75
+ ret = bdrv_set_file_or_backing_noperm(bs, new_child_bs, is_backing,
86
+ if (events_dropped) {
76
+ tran, errp);
87
VirtIOSCSIEventInfo info = {
77
+
88
.event = VIRTIO_SCSI_T_NO_EVENT,
78
+ if (old_ctx != ctx) {
89
};
79
+ aio_context_release(ctx);
80
+ aio_context_acquire(old_ctx);
81
+ }
82
+
83
+ return ret;
84
}
85
86
/*
87
@@ -XXX,XX +XXX,XX @@ static int bdrv_reopen_parse_file_or_backing(BDRVReopenState *reopen_state,
88
* It is the responsibility of the caller to then call the abort() or
89
* commit() for any other BDS that have been left in a prepare() state
90
*
91
+ * The caller must hold the AioContext lock of @reopen_state->bs.
92
*/
93
static int bdrv_reopen_prepare(BDRVReopenState *reopen_state,
94
BlockReopenQueue *queue,
95
--
90
--
96
2.41.0
91
2.48.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Mark functions as coroutine_fn when they are only called by other coroutine_fns
3
With IOThread Virtqueue Mapping there will be multiple AioContexts
4
and they can suspend. Change calls to co_wrappers to use the non-wrapped
4
processing SCSI requests. scsi_req_cancel() and other SCSI request
5
functions, which in turn requires adding GRAPH_RDLOCK annotations.
5
operations must be performed from the AioContext where the request is
6
6
running.
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
8
Message-ID: <20230601115145.196465-7-pbonzini@redhat.com>
8
Introduce a virtio_scsi_defer_tmf_to_aio_context() function and the
9
necessary VirtIOSCSIReq->remaining refcount infrastructure to move the
10
TMF code into the AioContext where the request is running.
11
12
For the time being there is still just one AioContext: the main loop or
13
the IOThread. When the iothread-vq-mapping parameter is added in a later
14
patch this will be changed to per-virtqueue AioContexts.
15
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
17
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
18
Message-ID: <20250311132616.1049687-8-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
20
---
12
block/cloop.c | 9 +++++----
21
hw/scsi/virtio-scsi.c | 270 ++++++++++++++++++++++++++++++++----------
13
1 file changed, 5 insertions(+), 4 deletions(-)
22
1 file changed, 206 insertions(+), 64 deletions(-)
14
23
15
diff --git a/block/cloop.c b/block/cloop.c
24
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
16
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
17
--- a/block/cloop.c
26
--- a/hw/scsi/virtio-scsi.c
18
+++ b/block/cloop.c
27
+++ b/hw/scsi/virtio-scsi.c
19
@@ -XXX,XX +XXX,XX @@ static void cloop_refresh_limits(BlockDriverState *bs, Error **errp)
28
@@ -XXX,XX +XXX,XX @@ typedef struct VirtIOSCSIReq {
20
bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
29
/* Used for two-stage request submission and TMFs deferred to BH */
30
QTAILQ_ENTRY(VirtIOSCSIReq) next;
31
32
- /* Used for cancellation of request during TMFs */
33
+ /* Used for cancellation of request during TMFs. Atomic. */
34
int remaining;
35
36
SCSIRequest *sreq;
37
@@ -XXX,XX +XXX,XX @@ typedef struct {
38
VirtIOSCSIReq *tmf_req;
39
} VirtIOSCSICancelNotifier;
40
41
+static void virtio_scsi_tmf_dec_remaining(VirtIOSCSIReq *tmf)
42
+{
43
+ if (qatomic_fetch_dec(&tmf->remaining) == 1) {
44
+ trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(tmf->req.tmf.lun),
45
+ tmf->req.tmf.tag, tmf->resp.tmf.response);
46
+
47
+ virtio_scsi_complete_req(tmf, &tmf->dev->ctrl_lock);
48
+ }
49
+}
50
+
51
static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
52
{
53
VirtIOSCSICancelNotifier *n = container_of(notifier,
54
VirtIOSCSICancelNotifier,
55
notifier);
56
57
- if (--n->tmf_req->remaining == 0) {
58
- VirtIOSCSIReq *req = n->tmf_req;
59
-
60
- trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(req->req.tmf.lun),
61
- req->req.tmf.tag, req->resp.tmf.response);
62
- virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
63
- }
64
+ virtio_scsi_tmf_dec_remaining(n->tmf_req);
65
g_free(n);
21
}
66
}
22
67
23
-static inline int cloop_read_block(BlockDriverState *bs, int block_num)
68
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
24
+static int coroutine_fn GRAPH_RDLOCK
69
}
25
+cloop_read_block(BlockDriverState *bs, int block_num)
70
}
71
72
-static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
73
+static void virtio_scsi_defer_tmf_to_main_loop(VirtIOSCSIReq *req)
26
{
74
{
27
BDRVCloopState *s = bs->opaque;
75
VirtIOSCSI *s = req->dev;
28
76
29
@@ -XXX,XX +XXX,XX @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
77
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
30
int ret;
78
}
31
uint32_t bytes = s->offsets[block_num + 1] - s->offsets[block_num];
32
33
- ret = bdrv_pread(bs->file, s->offsets[block_num], bytes,
34
- s->compressed_block, 0);
35
+ ret = bdrv_co_pread(bs->file, s->offsets[block_num], bytes,
36
+ s->compressed_block, 0);
37
if (ret < 0) {
38
return -1;
39
}
40
@@ -XXX,XX +XXX,XX @@ static inline int cloop_read_block(BlockDriverState *bs, int block_num)
41
return 0;
42
}
79
}
43
80
44
-static int coroutine_fn
81
+static void virtio_scsi_tmf_cancel_req(VirtIOSCSIReq *tmf, SCSIRequest *r)
45
+static int coroutine_fn GRAPH_RDLOCK
82
+{
46
cloop_co_preadv(BlockDriverState *bs, int64_t offset, int64_t bytes,
83
+ VirtIOSCSICancelNotifier *notifier;
47
QEMUIOVector *qiov, BdrvRequestFlags flags)
84
+
85
+ assert(r->ctx == qemu_get_current_aio_context());
86
+
87
+ /* Decremented in virtio_scsi_cancel_notify() */
88
+ qatomic_inc(&tmf->remaining);
89
+
90
+ notifier = g_new(VirtIOSCSICancelNotifier, 1);
91
+ notifier->notifier.notify = virtio_scsi_cancel_notify;
92
+ notifier->tmf_req = tmf;
93
+ scsi_req_cancel_async(r, &notifier->notifier);
94
+}
95
+
96
+/* Execute a TMF on the requests in the current AioContext */
97
+static void virtio_scsi_do_tmf_aio_context(void *opaque)
98
+{
99
+ AioContext *ctx = qemu_get_current_aio_context();
100
+ VirtIOSCSIReq *tmf = opaque;
101
+ VirtIOSCSI *s = tmf->dev;
102
+ SCSIDevice *d = virtio_scsi_device_get(s, tmf->req.tmf.lun);
103
+ SCSIRequest *r;
104
+ bool match_tag;
105
+
106
+ if (!d) {
107
+ tmf->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
108
+ virtio_scsi_tmf_dec_remaining(tmf);
109
+ return;
110
+ }
111
+
112
+ /*
113
+ * This function could handle other subtypes that need to be processed in
114
+ * the request's AioContext in the future, but for now only request
115
+ * cancelation subtypes are performed here.
116
+ */
117
+ switch (tmf->req.tmf.subtype) {
118
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK:
119
+ match_tag = true;
120
+ break;
121
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
122
+ case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
123
+ match_tag = false;
124
+ break;
125
+ default:
126
+ g_assert_not_reached();
127
+ }
128
+
129
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
130
+ QTAILQ_FOREACH(r, &d->requests, next) {
131
+ VirtIOSCSIReq *cmd_req = r->hba_private;
132
+ assert(cmd_req); /* request has hba_private while enqueued */
133
+
134
+ if (r->ctx != ctx) {
135
+ continue;
136
+ }
137
+ if (match_tag && cmd_req->req.cmd.tag != tmf->req.tmf.tag) {
138
+ continue;
139
+ }
140
+ virtio_scsi_tmf_cancel_req(tmf, r);
141
+ }
142
+ }
143
+
144
+ /* Incremented by virtio_scsi_do_tmf() */
145
+ virtio_scsi_tmf_dec_remaining(tmf);
146
+
147
+ object_unref(d);
148
+}
149
+
150
+static void dummy_bh(void *opaque)
151
+{
152
+ /* Do nothing */
153
+}
154
+
155
+/*
156
+ * Wait for pending virtio_scsi_defer_tmf_to_aio_context() BHs.
157
+ */
158
+static void virtio_scsi_flush_defer_tmf_to_aio_context(VirtIOSCSI *s)
159
+{
160
+ GLOBAL_STATE_CODE();
161
+
162
+ assert(!s->dataplane_started);
163
+
164
+ if (s->ctx) {
165
+ /* Our BH only runs after previously scheduled BHs */
166
+ aio_wait_bh_oneshot(s->ctx, dummy_bh, NULL);
167
+ }
168
+}
169
+
170
+/*
171
+ * Run the TMF in a specific AioContext, handling only requests in that
172
+ * AioContext. This is necessary because requests can run in different
173
+ * AioContext and it is only possible to cancel them from the AioContext where
174
+ * they are running.
175
+ */
176
+static void virtio_scsi_defer_tmf_to_aio_context(VirtIOSCSIReq *tmf,
177
+ AioContext *ctx)
178
+{
179
+ /* Decremented in virtio_scsi_do_tmf_aio_context() */
180
+ qatomic_inc(&tmf->remaining);
181
+
182
+ /* See virtio_scsi_flush_defer_tmf_to_aio_context() cleanup during reset */
183
+ aio_bh_schedule_oneshot(ctx, virtio_scsi_do_tmf_aio_context, tmf);
184
+}
185
+
186
+/*
187
+ * Returns the AioContext for a given TMF's tag field or NULL. Note that the
188
+ * request identified by the tag may have completed by the time you can execute
189
+ * a BH in the AioContext, so don't assume the request still exists in your BH.
190
+ */
191
+static AioContext *find_aio_context_for_tmf_tag(SCSIDevice *d,
192
+ VirtIOSCSIReq *tmf)
193
+{
194
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
195
+ SCSIRequest *r;
196
+ SCSIRequest *next;
197
+
198
+ QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
199
+ VirtIOSCSIReq *cmd_req = r->hba_private;
200
+
201
+ /* hba_private is non-NULL while the request is enqueued */
202
+ assert(cmd_req);
203
+
204
+ if (cmd_req->req.cmd.tag == tmf->req.tmf.tag) {
205
+ return r->ctx;
206
+ }
207
+ }
208
+ }
209
+ return NULL;
210
+}
211
+
212
/* Return 0 if the request is ready to be completed and return to guest;
213
* -EINPROGRESS if the request is submitted and will be completed later, in the
214
* case of async cancellation. */
215
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
48
{
216
{
217
SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
218
SCSIRequest *r, *next;
219
+ AioContext *ctx;
220
int ret = 0;
221
222
virtio_scsi_ctx_check(s, d);
223
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
224
req->req.tmf.tag, req->req.tmf.subtype);
225
226
switch (req->req.tmf.subtype) {
227
- case VIRTIO_SCSI_T_TMF_ABORT_TASK:
228
- case VIRTIO_SCSI_T_TMF_QUERY_TASK:
229
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK: {
230
if (!d) {
231
goto fail;
232
}
233
if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
234
goto incorrect_lun;
235
}
236
- QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
237
- VirtIOSCSIReq *cmd_req = r->hba_private;
238
- if (cmd_req && cmd_req->req.cmd.tag == req->req.tmf.tag) {
239
- break;
240
- }
241
+
242
+ ctx = find_aio_context_for_tmf_tag(d, req);
243
+ if (ctx) {
244
+ virtio_scsi_defer_tmf_to_aio_context(req, ctx);
245
+ ret = -EINPROGRESS;
246
}
247
- if (r) {
248
- /*
249
- * Assert that the request has not been completed yet, we
250
- * check for it in the loop above.
251
- */
252
- assert(r->hba_private);
253
- if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK) {
254
- /* "If the specified command is present in the task set, then
255
- * return a service response set to FUNCTION SUCCEEDED".
256
- */
257
- req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
258
- } else {
259
- VirtIOSCSICancelNotifier *notifier;
260
-
261
- req->remaining = 1;
262
- notifier = g_new(VirtIOSCSICancelNotifier, 1);
263
- notifier->tmf_req = req;
264
- notifier->notifier.notify = virtio_scsi_cancel_notify;
265
- scsi_req_cancel_async(r, &notifier->notifier);
266
- ret = -EINPROGRESS;
267
+ break;
268
+ }
269
+
270
+ case VIRTIO_SCSI_T_TMF_QUERY_TASK:
271
+ if (!d) {
272
+ goto fail;
273
+ }
274
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
275
+ goto incorrect_lun;
276
+ }
277
+
278
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
279
+ QTAILQ_FOREACH(r, &d->requests, next) {
280
+ VirtIOSCSIReq *cmd_req = r->hba_private;
281
+ assert(cmd_req); /* request has hba_private while enqueued */
282
+
283
+ if (cmd_req->req.cmd.tag == req->req.tmf.tag) {
284
+ /*
285
+ * "If the specified command is present in the task set,
286
+ * then return a service response set to FUNCTION
287
+ * SUCCEEDED".
288
+ */
289
+ req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
290
+ }
291
}
292
}
293
break;
294
295
case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
296
case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
297
- virtio_scsi_defer_tmf_to_bh(req);
298
+ virtio_scsi_defer_tmf_to_main_loop(req);
299
ret = -EINPROGRESS;
300
break;
301
302
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
303
- case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
304
+ case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: {
305
+ if (!d) {
306
+ goto fail;
307
+ }
308
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
309
+ goto incorrect_lun;
310
+ }
311
+
312
+ qatomic_inc(&req->remaining);
313
+
314
+ ctx = s->ctx ?: qemu_get_aio_context();
315
+ virtio_scsi_defer_tmf_to_aio_context(req, ctx);
316
+
317
+ virtio_scsi_tmf_dec_remaining(req);
318
+ ret = -EINPROGRESS;
319
+ break;
320
+ }
321
+
322
case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
323
if (!d) {
324
goto fail;
325
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
326
goto incorrect_lun;
327
}
328
329
- /* Add 1 to "remaining" until virtio_scsi_do_tmf returns.
330
- * This way, if the bus starts calling back to the notifiers
331
- * even before we finish the loop, virtio_scsi_cancel_notify
332
- * will not complete the TMF too early.
333
- */
334
- req->remaining = 1;
335
- QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
336
- if (r->hba_private) {
337
- if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK_SET) {
338
- /* "If there is any command present in the task set, then
339
- * return a service response set to FUNCTION SUCCEEDED".
340
- */
341
- req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
342
- break;
343
- } else {
344
- VirtIOSCSICancelNotifier *notifier;
345
-
346
- req->remaining++;
347
- notifier = g_new(VirtIOSCSICancelNotifier, 1);
348
- notifier->notifier.notify = virtio_scsi_cancel_notify;
349
- notifier->tmf_req = req;
350
- scsi_req_cancel_async(r, &notifier->notifier);
351
- }
352
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
353
+ QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
354
+ /* Request has hba_private while enqueued */
355
+ assert(r->hba_private);
356
+
357
+ /*
358
+ * "If there is any command present in the task set, then
359
+ * return a service response set to FUNCTION SUCCEEDED".
360
+ */
361
+ req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
362
+ break;
363
}
364
}
365
- if (--req->remaining > 0) {
366
- ret = -EINPROGRESS;
367
- }
368
break;
369
370
case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
371
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev)
372
assert(!s->dataplane_started);
373
374
virtio_scsi_reset_tmf_bh(s);
375
+ virtio_scsi_flush_defer_tmf_to_aio_context(s);
376
377
qatomic_inc(&s->resetting);
378
bus_cold_reset(BUS(&s->bus));
49
--
379
--
50
2.41.0
380
2.48.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Mark functions as coroutine_fn when they are only called by other coroutine_fns
3
This is the cleanup function that must be called after
4
and they can suspend. Change calls to co_wrappers to use the non-wrapped
4
apply_iothread_vq_mapping() succeeds. virtio-scsi will need this
5
functions, which in turn requires adding GRAPH_RDLOCK annotations.
5
function too, so extract it.
6
6
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-ID: <20230601115145.196465-3-pbonzini@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
Message-ID: <20250311132616.1049687-9-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
11
---
12
block/qed-check.c | 5 +++--
12
hw/block/virtio-blk.c | 27 +++++++++++++++++++++------
13
block/qed.c | 7 ++++---
13
1 file changed, 21 insertions(+), 6 deletions(-)
14
2 files changed, 7 insertions(+), 5 deletions(-)
15
14
16
diff --git a/block/qed-check.c b/block/qed-check.c
15
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qed-check.c
17
--- a/hw/block/virtio-blk.c
19
+++ b/block/qed-check.c
18
+++ b/hw/block/virtio-blk.c
20
@@ -XXX,XX +XXX,XX @@ static void qed_check_for_leaks(QEDCheck *check)
19
@@ -XXX,XX +XXX,XX @@ validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
21
/**
20
* Fill in the AioContext for each virtqueue in the @vq_aio_context array given
22
* Mark an image clean once it passes check or has been repaired
21
* the iothread-vq-mapping parameter in @iothread_vq_mapping_list.
23
*/
22
*
24
-static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
23
+ * cleanup_iothread_vq_mapping() must be called to free IOThread object
25
+static void coroutine_fn GRAPH_RDLOCK
24
+ * references after this function returns success.
26
+qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
25
+ *
26
* Returns: %true on success, %false on failure.
27
**/
28
static bool apply_iothread_vq_mapping(
29
@@ -XXX,XX +XXX,XX @@ static bool apply_iothread_vq_mapping(
30
return true;
31
}
32
33
+/**
34
+ * cleanup_iothread_vq_mapping:
35
+ * @list: The mapping of virtqueues to IOThreads.
36
+ *
37
+ * Release IOThread object references that were acquired by
38
+ * apply_iothread_vq_mapping().
39
+ */
40
+static void cleanup_iothread_vq_mapping(IOThreadVirtQueueMappingList *list)
41
+{
42
+ IOThreadVirtQueueMappingList *node;
43
+
44
+ for (node = list; node; node = node->next) {
45
+ IOThread *iothread = iothread_by_id(node->value->iothread);
46
+ object_unref(OBJECT(iothread));
47
+ }
48
+}
49
+
50
/* Context: BQL held */
51
static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
27
{
52
{
28
/* Skip if there were unfixable corruptions or I/O errors */
53
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s)
29
if (result->corruptions > 0 || result->check_errors > 0) {
54
assert(!s->ioeventfd_started);
30
@@ -XXX,XX +XXX,XX @@ static void qed_check_mark_clean(BDRVQEDState *s, BdrvCheckResult *result)
55
56
if (conf->iothread_vq_mapping_list) {
57
- IOThreadVirtQueueMappingList *node;
58
-
59
- for (node = conf->iothread_vq_mapping_list; node; node = node->next) {
60
- IOThread *iothread = iothread_by_id(node->value->iothread);
61
- object_unref(OBJECT(iothread));
62
- }
63
+ cleanup_iothread_vq_mapping(conf->iothread_vq_mapping_list);
31
}
64
}
32
65
33
/* Ensure fixes reach storage before clearing check bit */
66
if (conf->iothread) {
34
- bdrv_flush(s->bs);
35
+ bdrv_co_flush(s->bs);
36
37
s->header.features &= ~QED_F_NEED_CHECK;
38
qed_write_header_sync(s);
39
diff --git a/block/qed.c b/block/qed.c
40
index XXXXXXX..XXXXXXX 100644
41
--- a/block/qed.c
42
+++ b/block/qed.c
43
@@ -XXX,XX +XXX,XX @@ static bool qed_is_image_size_valid(uint64_t image_size, uint32_t cluster_size,
44
*
45
* The string is NUL-terminated.
46
*/
47
-static int qed_read_string(BdrvChild *file, uint64_t offset, size_t n,
48
- char *buf, size_t buflen)
49
+static int coroutine_fn GRAPH_RDLOCK
50
+qed_read_string(BdrvChild *file, uint64_t offset,
51
+ size_t n, char *buf, size_t buflen)
52
{
53
int ret;
54
if (n >= buflen) {
55
return -EINVAL;
56
}
57
- ret = bdrv_pread(file, offset, n, buf, 0);
58
+ ret = bdrv_co_pread(file, offset, n, buf, 0);
59
if (ret < 0) {
60
return ret;
61
}
62
--
67
--
63
2.41.0
68
2.48.1
diff view generated by jsdifflib
1
bdrv_open_inherit() calls several functions for which it needs to hold
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
the AioContext lock, but currently doesn't. This includes calls in
3
bdrv_append_temp_snapshot(), for which bdrv_open_inherit() is the only
4
caller. Fix the locking in these places.
5
2
6
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Use noun_verb() function naming instead of verb_noun() because the
7
Message-ID: <20230605085711.21261-8-kwolf@redhat.com>
4
former is the most common naming style for APIs. The next commit will
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
move these functions into a header file so that virtio-scsi can call
6
them.
7
8
Shorten iothread_vq_mapping_apply()'s iothread_vq_mapping_list argument
9
to just "list" like in the other functions.
10
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Message-ID: <20250311132616.1049687-10-stefanha@redhat.com>
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
15
---
11
block.c | 25 ++++++++++++++++++++++++-
16
hw/block/virtio-blk.c | 33 ++++++++++++++++-----------------
12
1 file changed, 24 insertions(+), 1 deletion(-)
17
1 file changed, 16 insertions(+), 17 deletions(-)
13
18
14
diff --git a/block.c b/block.c
19
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
15
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
16
--- a/block.c
21
--- a/hw/block/virtio-blk.c
17
+++ b/block.c
22
+++ b/hw/block/virtio-blk.c
18
@@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
23
@@ -XXX,XX +XXX,XX @@ static const BlockDevOps virtio_block_ops = {
19
int64_t total_size;
24
};
20
QemuOpts *opts = NULL;
25
21
BlockDriverState *bs_snapshot = NULL;
26
static bool
22
+ AioContext *ctx = bdrv_get_aio_context(bs);
27
-validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
23
int ret;
28
- uint16_t num_queues, Error **errp)
24
29
+iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t
25
GLOBAL_STATE_CODE();
30
+ num_queues, Error **errp)
26
@@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
31
{
27
instead of opening 'filename' directly */
32
g_autofree unsigned long *vqs = bitmap_new(num_queues);
28
33
g_autoptr(GHashTable) iothreads =
29
/* Get the required size from the image */
34
@@ -XXX,XX +XXX,XX @@ validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
30
+ aio_context_acquire(ctx);
35
}
31
total_size = bdrv_getlength(bs);
36
32
+ aio_context_release(ctx);
37
/**
33
+
38
- * apply_iothread_vq_mapping:
34
if (total_size < 0) {
39
- * @iothread_vq_mapping_list: The mapping of virtqueues to IOThreads.
35
error_setg_errno(errp, -total_size, "Could not get image size");
40
+ * iothread_vq_mapping_apply:
36
goto out;
41
+ * @list: The mapping of virtqueues to IOThreads.
37
@@ -XXX,XX +XXX,XX @@ static BlockDriverState *bdrv_append_temp_snapshot(BlockDriverState *bs,
42
* @vq_aio_context: The array of AioContext pointers to fill in.
38
goto out;
43
* @num_queues: The length of @vq_aio_context.
44
* @errp: If an error occurs, a pointer to the area to store the error.
45
*
46
* Fill in the AioContext for each virtqueue in the @vq_aio_context array given
47
- * the iothread-vq-mapping parameter in @iothread_vq_mapping_list.
48
+ * the iothread-vq-mapping parameter in @list.
49
*
50
- * cleanup_iothread_vq_mapping() must be called to free IOThread object
51
+ * iothread_vq_mapping_cleanup() must be called to free IOThread object
52
* references after this function returns success.
53
*
54
* Returns: %true on success, %false on failure.
55
**/
56
-static bool apply_iothread_vq_mapping(
57
- IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
58
+static bool iothread_vq_mapping_apply(
59
+ IOThreadVirtQueueMappingList *list,
60
AioContext **vq_aio_context,
61
uint16_t num_queues,
62
Error **errp)
63
@@ -XXX,XX +XXX,XX @@ static bool apply_iothread_vq_mapping(
64
size_t num_iothreads = 0;
65
size_t cur_iothread = 0;
66
67
- if (!validate_iothread_vq_mapping_list(iothread_vq_mapping_list,
68
- num_queues, errp)) {
69
+ if (!iothread_vq_mapping_validate(list, num_queues, errp)) {
70
return false;
39
}
71
}
40
72
41
+ aio_context_acquire(ctx);
73
- for (node = iothread_vq_mapping_list; node; node = node->next) {
42
ret = bdrv_append(bs_snapshot, bs, errp);
74
+ for (node = list; node; node = node->next) {
43
+ aio_context_release(ctx);
75
num_iothreads++;
44
+
45
if (ret < 0) {
46
bs_snapshot = NULL;
47
goto out;
48
@@ -XXX,XX +XXX,XX @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
49
Error *local_err = NULL;
50
QDict *snapshot_options = NULL;
51
int snapshot_flags = 0;
52
+ AioContext *ctx = qemu_get_aio_context();
53
54
assert(!child_class || !flags);
55
assert(!child_class == !parent);
56
@@ -XXX,XX +XXX,XX @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
57
/* Not requesting BLK_PERM_CONSISTENT_READ because we're only
58
* looking at the header to guess the image format. This works even
59
* in cases where a guest would not see a consistent state. */
60
- file = blk_new(bdrv_get_aio_context(file_bs), 0, BLK_PERM_ALL);
61
+ ctx = bdrv_get_aio_context(file_bs);
62
+ aio_context_acquire(ctx);
63
+ file = blk_new(ctx, 0, BLK_PERM_ALL);
64
blk_insert_bs(file, file_bs, &local_err);
65
bdrv_unref(file_bs);
66
+ aio_context_release(ctx);
67
+
68
if (local_err) {
69
goto fail;
70
}
71
@@ -XXX,XX +XXX,XX @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
72
goto fail;
73
}
76
}
74
77
75
+ /* The AioContext could have changed during bdrv_open_common() */
78
- for (node = iothread_vq_mapping_list; node; node = node->next) {
76
+ ctx = bdrv_get_aio_context(bs);
79
+ for (node = list; node; node = node->next) {
77
+
80
IOThread *iothread = iothread_by_id(node->value->iothread);
78
if (file) {
81
AioContext *ctx = iothread_get_aio_context(iothread);
79
+ aio_context_acquire(ctx);
82
80
blk_unref(file);
83
@@ -XXX,XX +XXX,XX @@ static bool apply_iothread_vq_mapping(
81
+ aio_context_release(ctx);
84
}
82
file = NULL;
85
86
/**
87
- * cleanup_iothread_vq_mapping:
88
+ * iothread_vq_mapping_cleanup:
89
* @list: The mapping of virtqueues to IOThreads.
90
*
91
* Release IOThread object references that were acquired by
92
- * apply_iothread_vq_mapping().
93
+ * iothread_vq_mapping_apply().
94
*/
95
-static void cleanup_iothread_vq_mapping(IOThreadVirtQueueMappingList *list)
96
+static void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list)
97
{
98
IOThreadVirtQueueMappingList *node;
99
100
@@ -XXX,XX +XXX,XX @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
101
s->vq_aio_context = g_new(AioContext *, conf->num_queues);
102
103
if (conf->iothread_vq_mapping_list) {
104
- if (!apply_iothread_vq_mapping(conf->iothread_vq_mapping_list,
105
+ if (!iothread_vq_mapping_apply(conf->iothread_vq_mapping_list,
106
s->vq_aio_context,
107
conf->num_queues,
108
errp)) {
109
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s)
110
assert(!s->ioeventfd_started);
111
112
if (conf->iothread_vq_mapping_list) {
113
- cleanup_iothread_vq_mapping(conf->iothread_vq_mapping_list);
114
+ iothread_vq_mapping_cleanup(conf->iothread_vq_mapping_list);
83
}
115
}
84
116
85
@@ -XXX,XX +XXX,XX @@ bdrv_open_inherit(const char *filename, const char *reference, QDict *options,
117
if (conf->iothread) {
86
* (snapshot_bs); thus, we have to drop the strong reference to bs
87
* (which we obtained by calling bdrv_new()). bs will not be deleted,
88
* though, because the overlay still has a reference to it. */
89
+ aio_context_acquire(ctx);
90
bdrv_unref(bs);
91
+ aio_context_release(ctx);
92
bs = snapshot_bs;
93
}
94
95
return bs;
96
97
fail:
98
+ aio_context_acquire(ctx);
99
blk_unref(file);
100
qobject_unref(snapshot_options);
101
qobject_unref(bs->explicit_options);
102
@@ -XXX,XX +XXX,XX @@ fail:
103
bs->options = NULL;
104
bs->explicit_options = NULL;
105
bdrv_unref(bs);
106
+ aio_context_release(ctx);
107
error_propagate(errp, local_err);
108
return NULL;
109
110
close_and_fail:
111
+ aio_context_acquire(ctx);
112
bdrv_unref(bs);
113
+ aio_context_release(ctx);
114
qobject_unref(snapshot_options);
115
qobject_unref(options);
116
error_propagate(errp, local_err);
117
--
118
--
118
2.41.0
119
2.48.1
diff view generated by jsdifflib
1
This is a better regression test for the bugs hidden by commit 80fc5d26
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
('graph-lock: Disable locking for now'). With that commit reverted, it
3
hangs instantaneously and reliably for me.
4
2
5
It is important to have a reliable test like this, because the following
3
The code that builds an array of AioContext pointers indexed by the
6
commits will set out to fix the actual root cause of the deadlocks and
4
virtqueue is not specific to virtio-blk. virtio-scsi will need to do the
7
then finally revert commit 80fc5d26, which was only a stopgap solution.
5
same thing, so extract the functions.
8
6
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-ID: <20230605085711.21261-2-kwolf@redhat.com>
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-ID: <20250311132616.1049687-11-stefanha@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
11
---
14
.../tests/iothreads-commit-active | 85 +++++++++++++++++++
12
include/hw/virtio/iothread-vq-mapping.h | 45 ++++++++
15
.../tests/iothreads-commit-active.out | 23 +++++
13
hw/block/virtio-blk.c | 142 +-----------------------
16
2 files changed, 108 insertions(+)
14
hw/virtio/iothread-vq-mapping.c | 131 ++++++++++++++++++++++
17
create mode 100755 tests/qemu-iotests/tests/iothreads-commit-active
15
hw/virtio/meson.build | 1 +
18
create mode 100644 tests/qemu-iotests/tests/iothreads-commit-active.out
16
4 files changed, 178 insertions(+), 141 deletions(-)
17
create mode 100644 include/hw/virtio/iothread-vq-mapping.h
18
create mode 100644 hw/virtio/iothread-vq-mapping.c
19
19
20
diff --git a/tests/qemu-iotests/tests/iothreads-commit-active b/tests/qemu-iotests/tests/iothreads-commit-active
20
diff --git a/include/hw/virtio/iothread-vq-mapping.h b/include/hw/virtio/iothread-vq-mapping.h
21
new file mode 100755
22
index XXXXXXX..XXXXXXX
23
--- /dev/null
24
+++ b/tests/qemu-iotests/tests/iothreads-commit-active
25
@@ -XXX,XX +XXX,XX @@
26
+#!/usr/bin/env python3
27
+# group: rw quick auto
28
+#
29
+# Copyright (C) 2023 Red Hat, Inc.
30
+#
31
+# This program is free software; you can redistribute it and/or modify
32
+# it under the terms of the GNU General Public License as published by
33
+# the Free Software Foundation; either version 2 of the License, or
34
+# (at your option) any later version.
35
+#
36
+# This program is distributed in the hope that it will be useful,
37
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
38
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
39
+# GNU General Public License for more details.
40
+#
41
+# You should have received a copy of the GNU General Public License
42
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
43
+#
44
+# Creator/Owner: Kevin Wolf <kwolf@redhat.com>
45
+
46
+import asyncio
47
+import iotests
48
+
49
+iotests.script_initialize(supported_fmts=['qcow2'],
50
+ supported_platforms=['linux'])
51
+iotests.verify_virtio_scsi_pci_or_ccw()
52
+
53
+with iotests.FilePath('disk0.img') as img_path, \
54
+ iotests.FilePath('disk0-snap.img') as snap_path, \
55
+ iotests.FilePath('mirror-src.img') as src_path, \
56
+ iotests.FilePath('mirror-dst.img') as dst_path, \
57
+ iotests.VM() as vm:
58
+
59
+ img_size = '10M'
60
+ iotests.qemu_img_create('-f', iotests.imgfmt, img_path, img_size)
61
+ iotests.qemu_img_create('-f', iotests.imgfmt, '-b', img_path,
62
+ '-F', iotests.imgfmt, snap_path)
63
+ iotests.qemu_img_create('-f', iotests.imgfmt, src_path, img_size)
64
+ iotests.qemu_img_create('-f', iotests.imgfmt, dst_path, img_size)
65
+
66
+ iotests.qemu_io_log('-c', 'write 0 64k', img_path)
67
+ iotests.qemu_io_log('-c', 'write 1M 64k', snap_path)
68
+ iotests.qemu_io_log('-c', 'write 3M 64k', snap_path)
69
+
70
+ iotests.qemu_io_log('-c', f'write 0 {img_size}', src_path)
71
+
72
+ iotests.log('Launching VM...')
73
+ vm.add_object('iothread,id=iothread0')
74
+ vm.add_object('throttle-group,x-bps-write=1048576,id=tg0')
75
+ vm.add_blockdev(f'file,node-name=disk0-file,filename={img_path}')
76
+ vm.add_blockdev('qcow2,node-name=disk0-fmt,file=disk0-file')
77
+ vm.add_drive(snap_path, 'backing=disk0-fmt,node-name=disk0',
78
+ interface='none')
79
+ vm.add_device('virtio-scsi,iothread=iothread0')
80
+ vm.add_device('scsi-hd,drive=drive0')
81
+
82
+ vm.add_blockdev(f'file,filename={src_path},node-name=mirror-src-file')
83
+ vm.add_blockdev('qcow2,file=mirror-src-file,node-name=mirror-src')
84
+ vm.add_blockdev(f'file,filename={dst_path},node-name=mirror-dst-file')
85
+ vm.add_blockdev('qcow2,file=mirror-dst-file,node-name=mirror-dst-fmt')
86
+ vm.add_blockdev('throttle,throttle-group=tg0,file=mirror-dst-fmt,'
87
+ 'node-name=mirror-dst')
88
+ vm.add_device('scsi-hd,drive=mirror-src')
89
+
90
+ vm.launch()
91
+
92
+ # The background I/O is created on unrelated nodes (so that they won't be
93
+ # drained together with the other ones), but on the same iothread
94
+ iotests.log('Creating some background I/O...')
95
+ iotests.log(vm.qmp('blockdev-mirror', job_id='job0', sync='full',
96
+ device='mirror-src', target='mirror-dst',
97
+ auto_dismiss=False))
98
+
99
+ iotests.log('Starting active commit...')
100
+ iotests.log(vm.qmp('block-commit', device='disk0', job_id='job1',
101
+ auto_dismiss=False))
102
+
103
+ # Should succeed and not time out
104
+ try:
105
+ vm.run_job('job1', wait=5.0)
106
+ vm.shutdown()
107
+ except asyncio.TimeoutError:
108
+ # VM may be stuck, kill it
109
+ vm.kill()
110
+ raise
111
diff --git a/tests/qemu-iotests/tests/iothreads-commit-active.out b/tests/qemu-iotests/tests/iothreads-commit-active.out
112
new file mode 100644
21
new file mode 100644
113
index XXXXXXX..XXXXXXX
22
index XXXXXXX..XXXXXXX
114
--- /dev/null
23
--- /dev/null
115
+++ b/tests/qemu-iotests/tests/iothreads-commit-active.out
24
+++ b/include/hw/virtio/iothread-vq-mapping.h
116
@@ -XXX,XX +XXX,XX @@
25
@@ -XXX,XX +XXX,XX @@
117
+wrote 65536/65536 bytes at offset 0
26
+/*
118
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
27
+ * IOThread Virtqueue Mapping
119
+
28
+ *
120
+wrote 65536/65536 bytes at offset 1048576
29
+ * Copyright Red Hat, Inc
121
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
30
+ *
122
+
31
+ * SPDX-License-Identifier: GPL-2.0-only
123
+wrote 65536/65536 bytes at offset 3145728
32
+ */
124
+64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
33
+
125
+
34
+#ifndef HW_VIRTIO_IOTHREAD_VQ_MAPPING_H
126
+wrote 10485760/10485760 bytes at offset 0
35
+#define HW_VIRTIO_IOTHREAD_VQ_MAPPING_H
127
+10 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
36
+
128
+
37
+#include "qapi/error.h"
129
+Launching VM...
38
+#include "qapi/qapi-types-virtio.h"
130
+Creating some background I/O...
39
+
131
+{"return": {}}
40
+/**
132
+Starting active commit...
41
+ * iothread_vq_mapping_apply:
133
+{"return": {}}
42
+ * @list: The mapping of virtqueues to IOThreads.
134
+{"execute": "job-complete", "arguments": {"id": "job1"}}
43
+ * @vq_aio_context: The array of AioContext pointers to fill in.
135
+{"return": {}}
44
+ * @num_queues: The length of @vq_aio_context.
136
+{"data": {"device": "job1", "len": 131072, "offset": 131072, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
45
+ * @errp: If an error occurs, a pointer to the area to store the error.
137
+{"data": {"device": "job1", "len": 131072, "offset": 131072, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
46
+ *
138
+{"execute": "job-dismiss", "arguments": {"id": "job1"}}
47
+ * Fill in the AioContext for each virtqueue in the @vq_aio_context array given
139
+{"return": {}}
48
+ * the iothread-vq-mapping parameter in @list.
49
+ *
50
+ * iothread_vq_mapping_cleanup() must be called to free IOThread object
51
+ * references after this function returns success.
52
+ *
53
+ * Returns: %true on success, %false on failure.
54
+ **/
55
+bool iothread_vq_mapping_apply(
56
+ IOThreadVirtQueueMappingList *list,
57
+ AioContext **vq_aio_context,
58
+ uint16_t num_queues,
59
+ Error **errp);
60
+
61
+/**
62
+ * iothread_vq_mapping_cleanup:
63
+ * @list: The mapping of virtqueues to IOThreads.
64
+ *
65
+ * Release IOThread object references that were acquired by
66
+ * iothread_vq_mapping_apply().
67
+ */
68
+void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list);
69
+
70
+#endif /* HW_VIRTIO_IOTHREAD_VQ_MAPPING_H */
71
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/hw/block/virtio-blk.c
74
+++ b/hw/block/virtio-blk.c
75
@@ -XXX,XX +XXX,XX @@
76
#endif
77
#include "hw/virtio/virtio-bus.h"
78
#include "migration/qemu-file-types.h"
79
+#include "hw/virtio/iothread-vq-mapping.h"
80
#include "hw/virtio/virtio-access.h"
81
#include "hw/virtio/virtio-blk-common.h"
82
#include "qemu/coroutine.h"
83
@@ -XXX,XX +XXX,XX @@ static const BlockDevOps virtio_block_ops = {
84
.drained_end = virtio_blk_drained_end,
85
};
86
87
-static bool
88
-iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t
89
- num_queues, Error **errp)
90
-{
91
- g_autofree unsigned long *vqs = bitmap_new(num_queues);
92
- g_autoptr(GHashTable) iothreads =
93
- g_hash_table_new(g_str_hash, g_str_equal);
94
-
95
- for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
96
- const char *name = node->value->iothread;
97
- uint16List *vq;
98
-
99
- if (!iothread_by_id(name)) {
100
- error_setg(errp, "IOThread \"%s\" object does not exist", name);
101
- return false;
102
- }
103
-
104
- if (!g_hash_table_add(iothreads, (gpointer)name)) {
105
- error_setg(errp,
106
- "duplicate IOThread name \"%s\" in iothread-vq-mapping",
107
- name);
108
- return false;
109
- }
110
-
111
- if (node != list) {
112
- if (!!node->value->vqs != !!list->value->vqs) {
113
- error_setg(errp, "either all items in iothread-vq-mapping "
114
- "must have vqs or none of them must have it");
115
- return false;
116
- }
117
- }
118
-
119
- for (vq = node->value->vqs; vq; vq = vq->next) {
120
- if (vq->value >= num_queues) {
121
- error_setg(errp, "vq index %u for IOThread \"%s\" must be "
122
- "less than num_queues %u in iothread-vq-mapping",
123
- vq->value, name, num_queues);
124
- return false;
125
- }
126
-
127
- if (test_and_set_bit(vq->value, vqs)) {
128
- error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
129
- "because it is already assigned", vq->value, name);
130
- return false;
131
- }
132
- }
133
- }
134
-
135
- if (list->value->vqs) {
136
- for (uint16_t i = 0; i < num_queues; i++) {
137
- if (!test_bit(i, vqs)) {
138
- error_setg(errp,
139
- "missing vq %u IOThread assignment in iothread-vq-mapping",
140
- i);
141
- return false;
142
- }
143
- }
144
- }
145
-
146
- return true;
147
-}
148
-
149
-/**
150
- * iothread_vq_mapping_apply:
151
- * @list: The mapping of virtqueues to IOThreads.
152
- * @vq_aio_context: The array of AioContext pointers to fill in.
153
- * @num_queues: The length of @vq_aio_context.
154
- * @errp: If an error occurs, a pointer to the area to store the error.
155
- *
156
- * Fill in the AioContext for each virtqueue in the @vq_aio_context array given
157
- * the iothread-vq-mapping parameter in @list.
158
- *
159
- * iothread_vq_mapping_cleanup() must be called to free IOThread object
160
- * references after this function returns success.
161
- *
162
- * Returns: %true on success, %false on failure.
163
- **/
164
-static bool iothread_vq_mapping_apply(
165
- IOThreadVirtQueueMappingList *list,
166
- AioContext **vq_aio_context,
167
- uint16_t num_queues,
168
- Error **errp)
169
-{
170
- IOThreadVirtQueueMappingList *node;
171
- size_t num_iothreads = 0;
172
- size_t cur_iothread = 0;
173
-
174
- if (!iothread_vq_mapping_validate(list, num_queues, errp)) {
175
- return false;
176
- }
177
-
178
- for (node = list; node; node = node->next) {
179
- num_iothreads++;
180
- }
181
-
182
- for (node = list; node; node = node->next) {
183
- IOThread *iothread = iothread_by_id(node->value->iothread);
184
- AioContext *ctx = iothread_get_aio_context(iothread);
185
-
186
- /* Released in virtio_blk_vq_aio_context_cleanup() */
187
- object_ref(OBJECT(iothread));
188
-
189
- if (node->value->vqs) {
190
- uint16List *vq;
191
-
192
- /* Explicit vq:IOThread assignment */
193
- for (vq = node->value->vqs; vq; vq = vq->next) {
194
- assert(vq->value < num_queues);
195
- vq_aio_context[vq->value] = ctx;
196
- }
197
- } else {
198
- /* Round-robin vq:IOThread assignment */
199
- for (unsigned i = cur_iothread; i < num_queues;
200
- i += num_iothreads) {
201
- vq_aio_context[i] = ctx;
202
- }
203
- }
204
-
205
- cur_iothread++;
206
- }
207
-
208
- return true;
209
-}
210
-
211
-/**
212
- * iothread_vq_mapping_cleanup:
213
- * @list: The mapping of virtqueues to IOThreads.
214
- *
215
- * Release IOThread object references that were acquired by
216
- * iothread_vq_mapping_apply().
217
- */
218
-static void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list)
219
-{
220
- IOThreadVirtQueueMappingList *node;
221
-
222
- for (node = list; node; node = node->next) {
223
- IOThread *iothread = iothread_by_id(node->value->iothread);
224
- object_unref(OBJECT(iothread));
225
- }
226
-}
227
-
228
/* Context: BQL held */
229
static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
230
{
231
diff --git a/hw/virtio/iothread-vq-mapping.c b/hw/virtio/iothread-vq-mapping.c
232
new file mode 100644
233
index XXXXXXX..XXXXXXX
234
--- /dev/null
235
+++ b/hw/virtio/iothread-vq-mapping.c
236
@@ -XXX,XX +XXX,XX @@
237
+/*
238
+ * IOThread Virtqueue Mapping
239
+ *
240
+ * Copyright Red Hat, Inc
241
+ *
242
+ * SPDX-License-Identifier: GPL-2.0-only
243
+ */
244
+
245
+#include "qemu/osdep.h"
246
+#include "system/iothread.h"
247
+#include "hw/virtio/iothread-vq-mapping.h"
248
+
249
+static bool
250
+iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t
251
+ num_queues, Error **errp)
252
+{
253
+ g_autofree unsigned long *vqs = bitmap_new(num_queues);
254
+ g_autoptr(GHashTable) iothreads =
255
+ g_hash_table_new(g_str_hash, g_str_equal);
256
+
257
+ for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
258
+ const char *name = node->value->iothread;
259
+ uint16List *vq;
260
+
261
+ if (!iothread_by_id(name)) {
262
+ error_setg(errp, "IOThread \"%s\" object does not exist", name);
263
+ return false;
264
+ }
265
+
266
+ if (!g_hash_table_add(iothreads, (gpointer)name)) {
267
+ error_setg(errp,
268
+ "duplicate IOThread name \"%s\" in iothread-vq-mapping",
269
+ name);
270
+ return false;
271
+ }
272
+
273
+ if (node != list) {
274
+ if (!!node->value->vqs != !!list->value->vqs) {
275
+ error_setg(errp, "either all items in iothread-vq-mapping "
276
+ "must have vqs or none of them must have it");
277
+ return false;
278
+ }
279
+ }
280
+
281
+ for (vq = node->value->vqs; vq; vq = vq->next) {
282
+ if (vq->value >= num_queues) {
283
+ error_setg(errp, "vq index %u for IOThread \"%s\" must be "
284
+ "less than num_queues %u in iothread-vq-mapping",
285
+ vq->value, name, num_queues);
286
+ return false;
287
+ }
288
+
289
+ if (test_and_set_bit(vq->value, vqs)) {
290
+ error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
291
+ "because it is already assigned", vq->value, name);
292
+ return false;
293
+ }
294
+ }
295
+ }
296
+
297
+ if (list->value->vqs) {
298
+ for (uint16_t i = 0; i < num_queues; i++) {
299
+ if (!test_bit(i, vqs)) {
300
+ error_setg(errp,
301
+ "missing vq %u IOThread assignment in iothread-vq-mapping",
302
+ i);
303
+ return false;
304
+ }
305
+ }
306
+ }
307
+
308
+ return true;
309
+}
310
+
311
+bool iothread_vq_mapping_apply(
312
+ IOThreadVirtQueueMappingList *list,
313
+ AioContext **vq_aio_context,
314
+ uint16_t num_queues,
315
+ Error **errp)
316
+{
317
+ IOThreadVirtQueueMappingList *node;
318
+ size_t num_iothreads = 0;
319
+ size_t cur_iothread = 0;
320
+
321
+ if (!iothread_vq_mapping_validate(list, num_queues, errp)) {
322
+ return false;
323
+ }
324
+
325
+ for (node = list; node; node = node->next) {
326
+ num_iothreads++;
327
+ }
328
+
329
+ for (node = list; node; node = node->next) {
330
+ IOThread *iothread = iothread_by_id(node->value->iothread);
331
+ AioContext *ctx = iothread_get_aio_context(iothread);
332
+
333
+ /* Released in virtio_blk_vq_aio_context_cleanup() */
334
+ object_ref(OBJECT(iothread));
335
+
336
+ if (node->value->vqs) {
337
+ uint16List *vq;
338
+
339
+ /* Explicit vq:IOThread assignment */
340
+ for (vq = node->value->vqs; vq; vq = vq->next) {
341
+ assert(vq->value < num_queues);
342
+ vq_aio_context[vq->value] = ctx;
343
+ }
344
+ } else {
345
+ /* Round-robin vq:IOThread assignment */
346
+ for (unsigned i = cur_iothread; i < num_queues;
347
+ i += num_iothreads) {
348
+ vq_aio_context[i] = ctx;
349
+ }
350
+ }
351
+
352
+ cur_iothread++;
353
+ }
354
+
355
+ return true;
356
+}
357
+
358
+void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list)
359
+{
360
+ IOThreadVirtQueueMappingList *node;
361
+
362
+ for (node = list; node; node = node->next) {
363
+ IOThread *iothread = iothread_by_id(node->value->iothread);
364
+ object_unref(OBJECT(iothread));
365
+ }
366
+}
367
+
368
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
369
index XXXXXXX..XXXXXXX 100644
370
--- a/hw/virtio/meson.build
371
+++ b/hw/virtio/meson.build
372
@@ -XXX,XX +XXX,XX @@
373
system_virtio_ss = ss.source_set()
374
system_virtio_ss.add(files('virtio-bus.c'))
375
+system_virtio_ss.add(files('iothread-vq-mapping.c'))
376
system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c'))
377
system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c'))
378
system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c'))
140
--
379
--
141
2.41.0
380
2.48.1
diff view generated by jsdifflib
Deleted patch
1
blk_insert_bs() requires that callers hold the AioContext lock for the
2
node that should be inserted. Take it.
3
1
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Message-ID: <20230605085711.21261-3-kwolf@redhat.com>
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
9
hw/core/qdev-properties-system.c | 8 ++++++--
10
1 file changed, 6 insertions(+), 2 deletions(-)
11
12
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/core/qdev-properties-system.c
15
+++ b/hw/core/qdev-properties-system.c
16
@@ -XXX,XX +XXX,XX @@ static void set_drive_helper(Object *obj, Visitor *v, const char *name,
17
* aware of iothreads require their BlockBackends to be in the main
18
* AioContext.
19
*/
20
- ctx = iothread ? bdrv_get_aio_context(bs) : qemu_get_aio_context();
21
- blk = blk_new(ctx, 0, BLK_PERM_ALL);
22
+ ctx = bdrv_get_aio_context(bs);
23
+ blk = blk_new(iothread ? ctx : qemu_get_aio_context(),
24
+ 0, BLK_PERM_ALL);
25
blk_created = true;
26
27
+ aio_context_acquire(ctx);
28
ret = blk_insert_bs(blk, bs, errp);
29
+ aio_context_release(ctx);
30
+
31
if (ret < 0) {
32
goto fail;
33
}
34
--
35
2.41.0
diff view generated by jsdifflib
1
blk_insert_bs() requires that callers hold the AioContext lock for the
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
node that should be inserted. Take it.
3
2
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Allow virtio-scsi virtqueues to be assigned to different IOThreads. This
5
Message-ID: <20230605085711.21261-4-kwolf@redhat.com>
4
makes it possible to take advantage of host multi-queue block layer
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
5
scalability by assigning virtqueues that have affinity with vCPUs to
6
different IOThreads that have affinity with host CPUs. The same feature
7
was introduced for virtio-blk in the past:
8
https://developers.redhat.com/articles/2024/09/05/scaling-virtio-blk-disk-io-iothread-virtqueue-mapping
9
10
Here are fio randread 4k iodepth=64 results from a 4 vCPU guest with an
11
Intel P4800X SSD:
12
iothreads IOPS
13
------------------------------
14
1 189576
15
2 312698
16
4 346744
17
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Message-ID: <20250311132616.1049687-12-stefanha@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
20
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
21
---
9
tests/unit/test-block-iothread.c | 7 ++++++-
22
include/hw/virtio/virtio-scsi.h | 5 +-
10
1 file changed, 6 insertions(+), 1 deletion(-)
23
hw/scsi/virtio-scsi-dataplane.c | 90 ++++++++++++++++++++++++---------
24
hw/scsi/virtio-scsi.c | 63 ++++++++++++++---------
25
3 files changed, 107 insertions(+), 51 deletions(-)
11
26
12
diff --git a/tests/unit/test-block-iothread.c b/tests/unit/test-block-iothread.c
27
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
13
index XXXXXXX..XXXXXXX 100644
28
index XXXXXXX..XXXXXXX 100644
14
--- a/tests/unit/test-block-iothread.c
29
--- a/include/hw/virtio/virtio-scsi.h
15
+++ b/tests/unit/test-block-iothread.c
30
+++ b/include/hw/virtio/virtio-scsi.h
16
@@ -XXX,XX +XXX,XX @@ static void test_attach_second_node(void)
31
@@ -XXX,XX +XXX,XX @@
17
BlockDriverState *bs, *filter;
32
#include "hw/virtio/virtio.h"
18
QDict *options;
33
#include "hw/scsi/scsi.h"
19
34
#include "chardev/char-fe.h"
20
+ aio_context_acquire(main_ctx);
35
+#include "qapi/qapi-types-virtio.h"
21
blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
36
#include "system/iothread.h"
22
bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
37
23
blk_insert_bs(blk, bs, &error_abort);
38
#define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common"
24
@@ -XXX,XX +XXX,XX @@ static void test_attach_second_node(void)
39
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSIConf {
25
qdict_put_str(options, "driver", "raw");
40
CharBackend chardev;
26
qdict_put_str(options, "file", "base");
41
uint32_t boot_tpgt;
27
42
IOThread *iothread;
28
- aio_context_acquire(main_ctx);
43
+ IOThreadVirtQueueMappingList *iothread_vq_mapping_list;
29
filter = bdrv_open(NULL, NULL, options, BDRV_O_RDWR, &error_abort);
44
};
30
aio_context_release(main_ctx);
45
31
46
struct VirtIOSCSI;
32
@@ -XXX,XX +XXX,XX @@ static void test_attach_preserve_blk_ctx(void)
47
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
33
{
48
QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
34
IOThread *iothread = iothread_new();
49
35
AioContext *ctx = iothread_get_aio_context(iothread);
50
/* Fields for dataplane below */
36
+ AioContext *main_ctx = qemu_get_aio_context();
51
- AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
37
BlockBackend *blk;
52
+ AioContext **vq_aio_context; /* per-virtqueue AioContext pointer */
38
BlockDriverState *bs;
53
39
54
bool dataplane_started;
40
+ aio_context_acquire(main_ctx);
55
bool dataplane_starting;
41
blk = blk_new(ctx, BLK_PERM_ALL, BLK_PERM_ALL);
56
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_common_realize(DeviceState *dev,
42
bs = bdrv_new_open_driver(&bdrv_test, "base", BDRV_O_RDWR, &error_abort);
57
void virtio_scsi_common_unrealize(DeviceState *dev);
43
bs->total_sectors = 65536 / BDRV_SECTOR_SIZE;
58
44
@@ -XXX,XX +XXX,XX @@ static void test_attach_preserve_blk_ctx(void)
59
void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp);
45
blk_insert_bs(blk, bs, &error_abort);
60
+void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s);
46
g_assert(blk_get_aio_context(blk) == ctx);
61
int virtio_scsi_dataplane_start(VirtIODevice *s);
47
g_assert(bdrv_get_aio_context(bs) == ctx);
62
void virtio_scsi_dataplane_stop(VirtIODevice *s);
48
+ aio_context_release(main_ctx);
63
49
64
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
50
/* Remove the node again */
65
index XXXXXXX..XXXXXXX 100644
51
aio_context_acquire(ctx);
66
--- a/hw/scsi/virtio-scsi-dataplane.c
52
@@ -XXX,XX +XXX,XX @@ static void test_attach_preserve_blk_ctx(void)
67
+++ b/hw/scsi/virtio-scsi-dataplane.c
53
g_assert(bdrv_get_aio_context(bs) == qemu_get_aio_context());
68
@@ -XXX,XX +XXX,XX @@
54
69
#include "system/block-backend.h"
55
/* Re-attach the node */
70
#include "hw/scsi/scsi.h"
56
+ aio_context_acquire(main_ctx);
71
#include "scsi/constants.h"
57
blk_insert_bs(blk, bs, &error_abort);
72
+#include "hw/virtio/iothread-vq-mapping.h"
58
+ aio_context_release(main_ctx);
73
#include "hw/virtio/virtio-bus.h"
59
g_assert(blk_get_aio_context(blk) == ctx);
74
60
g_assert(bdrv_get_aio_context(bs) == ctx);
75
/* Context: BQL held */
61
76
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
77
VirtIODevice *vdev = VIRTIO_DEVICE(s);
78
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
79
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
80
+ uint16_t num_vqs = vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED;
81
82
- if (vs->conf.iothread) {
83
+ if (vs->conf.iothread && vs->conf.iothread_vq_mapping_list) {
84
+ error_setg(errp,
85
+ "iothread and iothread-vq-mapping properties cannot be set "
86
+ "at the same time");
87
+ return;
88
+ }
89
+
90
+ if (vs->conf.iothread || vs->conf.iothread_vq_mapping_list) {
91
if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
92
error_setg(errp,
93
"device is incompatible with iothread "
94
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
95
error_setg(errp, "ioeventfd is required for iothread");
96
return;
97
}
98
- s->ctx = iothread_get_aio_context(vs->conf.iothread);
99
- } else {
100
- if (!virtio_device_ioeventfd_enabled(vdev)) {
101
+ }
102
+
103
+ s->vq_aio_context = g_new(AioContext *, num_vqs);
104
+
105
+ if (vs->conf.iothread_vq_mapping_list) {
106
+ if (!iothread_vq_mapping_apply(vs->conf.iothread_vq_mapping_list,
107
+ s->vq_aio_context, num_vqs, errp)) {
108
+ g_free(s->vq_aio_context);
109
+ s->vq_aio_context = NULL;
110
return;
111
}
112
- s->ctx = qemu_get_aio_context();
113
+ } else if (vs->conf.iothread) {
114
+ AioContext *ctx = iothread_get_aio_context(vs->conf.iothread);
115
+ for (uint16_t i = 0; i < num_vqs; i++) {
116
+ s->vq_aio_context[i] = ctx;
117
+ }
118
+
119
+ /* Released in virtio_scsi_dataplane_cleanup() */
120
+ object_ref(OBJECT(vs->conf.iothread));
121
+ } else {
122
+ AioContext *ctx = qemu_get_aio_context();
123
+ for (unsigned i = 0; i < num_vqs; i++) {
124
+ s->vq_aio_context[i] = ctx;
125
+ }
126
+ }
127
+}
128
+
129
+/* Context: BQL held */
130
+void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s)
131
+{
132
+ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
133
+
134
+ if (vs->conf.iothread_vq_mapping_list) {
135
+ iothread_vq_mapping_cleanup(vs->conf.iothread_vq_mapping_list);
136
}
137
+
138
+ if (vs->conf.iothread) {
139
+ object_unref(OBJECT(vs->conf.iothread));
140
+ }
141
+
142
+ g_free(s->vq_aio_context);
143
+ s->vq_aio_context = NULL;
144
}
145
146
static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
147
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
148
}
149
150
/* Context: BH in IOThread */
151
-static void virtio_scsi_dataplane_stop_bh(void *opaque)
152
+static void virtio_scsi_dataplane_stop_vq_bh(void *opaque)
153
{
154
- VirtIOSCSI *s = opaque;
155
- VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
156
+ AioContext *ctx = qemu_get_current_aio_context();
157
+ VirtQueue *vq = opaque;
158
EventNotifier *host_notifier;
159
- int i;
160
161
- virtio_queue_aio_detach_host_notifier(vs->ctrl_vq, s->ctx);
162
- host_notifier = virtio_queue_get_host_notifier(vs->ctrl_vq);
163
+ virtio_queue_aio_detach_host_notifier(vq, ctx);
164
+ host_notifier = virtio_queue_get_host_notifier(vq);
165
166
/*
167
* Test and clear notifier after disabling event, in case poll callback
168
* didn't have time to run.
169
*/
170
virtio_queue_host_notifier_read(host_notifier);
171
-
172
- virtio_queue_aio_detach_host_notifier(vs->event_vq, s->ctx);
173
- host_notifier = virtio_queue_get_host_notifier(vs->event_vq);
174
- virtio_queue_host_notifier_read(host_notifier);
175
-
176
- for (i = 0; i < vs->conf.num_queues; i++) {
177
- virtio_queue_aio_detach_host_notifier(vs->cmd_vqs[i], s->ctx);
178
- host_notifier = virtio_queue_get_host_notifier(vs->cmd_vqs[i]);
179
- virtio_queue_host_notifier_read(host_notifier);
180
- }
181
}
182
183
/* Context: BQL held */
184
@@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
185
smp_wmb(); /* paired with aio_notify_accept() */
186
187
if (s->bus.drain_count == 0) {
188
- virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
189
- virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
190
+ virtio_queue_aio_attach_host_notifier(vs->ctrl_vq,
191
+ s->vq_aio_context[0]);
192
+ virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq,
193
+ s->vq_aio_context[1]);
194
195
for (i = 0; i < vs->conf.num_queues; i++) {
196
- virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
197
+ AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
198
+ virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], ctx);
199
}
200
}
201
return 0;
202
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
203
s->dataplane_stopping = true;
204
205
if (s->bus.drain_count == 0) {
206
- aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
207
+ for (i = 0; i < vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED; i++) {
208
+ VirtQueue *vq = virtio_get_queue(&vs->parent_obj, i);
209
+ AioContext *ctx = s->vq_aio_context[i];
210
+ aio_wait_bh_oneshot(ctx, virtio_scsi_dataplane_stop_vq_bh, vq);
211
+ }
212
}
213
214
blk_drain_all(); /* ensure there are no in-flight requests */
215
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
216
index XXXXXXX..XXXXXXX 100644
217
--- a/hw/scsi/virtio-scsi.c
218
+++ b/hw/scsi/virtio-scsi.c
219
@@ -XXX,XX +XXX,XX @@
220
#include "hw/qdev-properties.h"
221
#include "hw/scsi/scsi.h"
222
#include "scsi/constants.h"
223
+#include "hw/virtio/iothread-vq-mapping.h"
224
#include "hw/virtio/virtio-bus.h"
225
#include "hw/virtio/virtio-access.h"
226
#include "trace.h"
227
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
228
g_free(n);
229
}
230
231
-static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d)
232
-{
233
- if (s->dataplane_started && d && blk_is_available(d->conf.blk)) {
234
- assert(blk_get_aio_context(d->conf.blk) == s->ctx);
235
- }
236
-}
237
-
238
static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
239
{
240
VirtIOSCSI *s = req->dev;
241
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_flush_defer_tmf_to_aio_context(VirtIOSCSI *s)
242
243
assert(!s->dataplane_started);
244
245
- if (s->ctx) {
246
+ for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) {
247
+ AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
248
+
249
/* Our BH only runs after previously scheduled BHs */
250
- aio_wait_bh_oneshot(s->ctx, dummy_bh, NULL);
251
+ aio_wait_bh_oneshot(ctx, dummy_bh, NULL);
252
}
253
}
254
255
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
256
AioContext *ctx;
257
int ret = 0;
258
259
- virtio_scsi_ctx_check(s, d);
260
/* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */
261
req->resp.tmf.response = VIRTIO_SCSI_S_OK;
262
263
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
264
265
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
266
case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: {
267
+ g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
268
+
269
if (!d) {
270
goto fail;
271
}
272
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
273
274
qatomic_inc(&req->remaining);
275
276
- ctx = s->ctx ?: qemu_get_aio_context();
277
- virtio_scsi_defer_tmf_to_aio_context(req, ctx);
278
+ for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) {
279
+ ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
280
+
281
+ if (!g_hash_table_add(aio_contexts, ctx)) {
282
+ continue; /* skip previously added AioContext */
283
+ }
284
+
285
+ virtio_scsi_defer_tmf_to_aio_context(req, ctx);
286
+ }
287
288
virtio_scsi_tmf_dec_remaining(req);
289
ret = -EINPROGRESS;
290
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
291
*/
292
static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s)
293
{
294
- if (!s->ctx || s->dataplane_started) {
295
+ if (s->dataplane_started) {
296
return false;
297
}
298
+ if (s->vq_aio_context[0] == qemu_get_aio_context()) {
299
+ return false; /* not using IOThreads */
300
+ }
301
302
virtio_device_start_ioeventfd(&s->parent_obj.parent_obj);
303
return !s->dataplane_fenced;
304
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
305
virtio_scsi_complete_cmd_req(req);
306
return -ENOENT;
307
}
308
- virtio_scsi_ctx_check(s, d);
309
req->sreq = scsi_req_new(d, req->req.cmd.tag,
310
virtio_scsi_get_lun(req->req.cmd.lun),
311
req->req.cmd.cdb, vs->cdb_size, req);
312
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
313
{
314
VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
315
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
316
+ AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED];
317
SCSIDevice *sd = SCSI_DEVICE(dev);
318
- int ret;
319
320
- if (s->ctx && !s->dataplane_fenced) {
321
- ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp);
322
- if (ret < 0) {
323
- return;
324
- }
325
+ if (ctx != qemu_get_aio_context() && !s->dataplane_fenced) {
326
+ /*
327
+ * Try to make the BlockBackend's AioContext match ours. Ignore failure
328
+ * because I/O will still work although block jobs and other users
329
+ * might be slower when multiple AioContexts use a BlockBackend.
330
+ */
331
+ blk_set_aio_context(sd->conf.blk, ctx, errp);
332
}
333
334
if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
335
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
336
337
qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
338
339
- if (s->ctx) {
340
+ if (s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED] != qemu_get_aio_context()) {
341
/* If other users keep the BlockBackend in the iothread, that's ok */
342
blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL);
343
}
344
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_drained_begin(SCSIBus *bus)
345
346
for (uint32_t i = 0; i < total_queues; i++) {
347
VirtQueue *vq = virtio_get_queue(vdev, i);
348
- virtio_queue_aio_detach_host_notifier(vq, s->ctx);
349
+ virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]);
350
}
351
}
352
353
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_drained_end(SCSIBus *bus)
354
355
for (uint32_t i = 0; i < total_queues; i++) {
356
VirtQueue *vq = virtio_get_queue(vdev, i);
357
+ AioContext *ctx = s->vq_aio_context[i];
358
+
359
if (vq == vs->event_vq) {
360
- virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx);
361
+ virtio_queue_aio_attach_host_notifier_no_poll(vq, ctx);
362
} else {
363
- virtio_queue_aio_attach_host_notifier(vq, s->ctx);
364
+ virtio_queue_aio_attach_host_notifier(vq, ctx);
365
}
366
}
367
}
368
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_common_unrealize(DeviceState *dev)
369
virtio_cleanup(vdev);
370
}
371
372
+/* main loop */
373
static void virtio_scsi_device_unrealize(DeviceState *dev)
374
{
375
VirtIOSCSI *s = VIRTIO_SCSI(dev);
376
377
virtio_scsi_reset_tmf_bh(s);
378
-
379
+ virtio_scsi_dataplane_cleanup(s);
380
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
381
virtio_scsi_common_unrealize(dev);
382
qemu_mutex_destroy(&s->tmf_bh_lock);
383
@@ -XXX,XX +XXX,XX @@ static const Property virtio_scsi_properties[] = {
384
VIRTIO_SCSI_F_CHANGE, true),
385
DEFINE_PROP_LINK("iothread", VirtIOSCSI, parent_obj.conf.iothread,
386
TYPE_IOTHREAD, IOThread *),
387
+ DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOSCSI,
388
+ parent_obj.conf.iothread_vq_mapping_list),
389
};
390
391
static const VMStateDescription vmstate_virtio_scsi = {
62
--
392
--
63
2.41.0
393
2.48.1
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Mark functions as coroutine_fn when they are only called by other coroutine_fns
3
Previously the ctrl virtqueue was handled in the AioContext where SCSI
4
and they can suspend. Change calls to co_wrappers to use the non-wrapped
4
requests are processed. When IOThread Virtqueue Mapping was added things
5
functions, which in turn requires adding GRAPH_RDLOCK annotations.
5
become more complicated because SCSI requests could run in other
6
6
AioContexts.
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
7
8
Message-ID: <20230601115145.196465-5-pbonzini@redhat.com>
8
Simplify by handling the ctrl virtqueue in the main loop where reset
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
operations can be performed. Note that BHs are still used canceling SCSI
10
requests in their AioContexts but at least the mean loop activity
11
doesn't need BHs anymore.
12
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Message-ID: <20250311132616.1049687-13-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
16
---
12
block/bochs.c | 7 ++++---
17
include/hw/virtio/virtio-scsi.h | 8 --
13
1 file changed, 4 insertions(+), 3 deletions(-)
18
hw/scsi/virtio-scsi-dataplane.c | 6 ++
14
19
hw/scsi/virtio-scsi.c | 144 ++++++--------------------------
15
diff --git a/block/bochs.c b/block/bochs.c
20
3 files changed, 33 insertions(+), 125 deletions(-)
21
22
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
16
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
17
--- a/block/bochs.c
24
--- a/include/hw/virtio/virtio-scsi.h
18
+++ b/block/bochs.c
25
+++ b/include/hw/virtio/virtio-scsi.h
19
@@ -XXX,XX +XXX,XX @@ static void bochs_refresh_limits(BlockDriverState *bs, Error **errp)
26
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
20
bs->bl.request_alignment = BDRV_SECTOR_SIZE; /* No sub-sector I/O */
27
28
QemuMutex ctrl_lock; /* protects ctrl_vq */
29
30
- /*
31
- * TMFs deferred to main loop BH. These fields are protected by
32
- * tmf_bh_lock.
33
- */
34
- QemuMutex tmf_bh_lock;
35
- QEMUBH *tmf_bh;
36
- QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
37
-
38
/* Fields for dataplane below */
39
AioContext **vq_aio_context; /* per-virtqueue AioContext pointer */
40
41
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/scsi/virtio-scsi-dataplane.c
44
+++ b/hw/scsi/virtio-scsi-dataplane.c
45
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
46
s->vq_aio_context[i] = ctx;
47
}
48
}
49
+
50
+ /*
51
+ * Always handle the ctrl virtqueue in the main loop thread where device
52
+ * resets can be performed.
53
+ */
54
+ s->vq_aio_context[0] = qemu_get_aio_context();
21
}
55
}
22
56
23
-static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
57
/* Context: BQL held */
24
+static int64_t coroutine_fn GRAPH_RDLOCK
58
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
25
+seek_to_sector(BlockDriverState *bs, int64_t sector_num)
59
index XXXXXXX..XXXXXXX 100644
60
--- a/hw/scsi/virtio-scsi.c
61
+++ b/hw/scsi/virtio-scsi.c
62
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
63
g_free(n);
64
}
65
66
-static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
67
-{
68
- VirtIOSCSI *s = req->dev;
69
- SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
70
- BusChild *kid;
71
- int target;
72
-
73
- switch (req->req.tmf.subtype) {
74
- case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
75
- if (!d) {
76
- req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
77
- goto out;
78
- }
79
- if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
80
- req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN;
81
- goto out;
82
- }
83
- qatomic_inc(&s->resetting);
84
- device_cold_reset(&d->qdev);
85
- qatomic_dec(&s->resetting);
86
- break;
87
-
88
- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
89
- target = req->req.tmf.lun[1];
90
- qatomic_inc(&s->resetting);
91
-
92
- rcu_read_lock();
93
- QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
94
- SCSIDevice *d1 = SCSI_DEVICE(kid->child);
95
- if (d1->channel == 0 && d1->id == target) {
96
- device_cold_reset(&d1->qdev);
97
- }
98
- }
99
- rcu_read_unlock();
100
-
101
- qatomic_dec(&s->resetting);
102
- break;
103
-
104
- default:
105
- g_assert_not_reached();
106
- }
107
-
108
-out:
109
- object_unref(OBJECT(d));
110
- virtio_scsi_complete_req(req, &s->ctrl_lock);
111
-}
112
-
113
-/* Some TMFs must be processed from the main loop thread */
114
-static void virtio_scsi_do_tmf_bh(void *opaque)
115
-{
116
- VirtIOSCSI *s = opaque;
117
- QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
118
- VirtIOSCSIReq *req;
119
- VirtIOSCSIReq *tmp;
120
-
121
- GLOBAL_STATE_CODE();
122
-
123
- WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
124
- QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
125
- QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
126
- QTAILQ_INSERT_TAIL(&reqs, req, next);
127
- }
128
-
129
- qemu_bh_delete(s->tmf_bh);
130
- s->tmf_bh = NULL;
131
- }
132
-
133
- QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) {
134
- QTAILQ_REMOVE(&reqs, req, next);
135
- virtio_scsi_do_one_tmf_bh(req);
136
- }
137
-}
138
-
139
-static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
140
-{
141
- VirtIOSCSIReq *req;
142
- VirtIOSCSIReq *tmp;
143
-
144
- GLOBAL_STATE_CODE();
145
-
146
- /* Called after ioeventfd has been stopped, so tmf_bh_lock is not needed */
147
- if (s->tmf_bh) {
148
- qemu_bh_delete(s->tmf_bh);
149
- s->tmf_bh = NULL;
150
- }
151
-
152
- QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
153
- QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
154
-
155
- /* SAM-6 6.3.2 Hard reset */
156
- req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
157
- virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
158
- }
159
-}
160
-
161
-static void virtio_scsi_defer_tmf_to_main_loop(VirtIOSCSIReq *req)
162
-{
163
- VirtIOSCSI *s = req->dev;
164
-
165
- WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
166
- QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
167
-
168
- if (!s->tmf_bh) {
169
- s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
170
- qemu_bh_schedule(s->tmf_bh);
171
- }
172
- }
173
-}
174
-
175
static void virtio_scsi_tmf_cancel_req(VirtIOSCSIReq *tmf, SCSIRequest *r)
26
{
176
{
27
BDRVBochsState *s = bs->opaque;
177
VirtIOSCSICancelNotifier *notifier;
28
uint64_t offset = sector_num * 512;
178
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
29
@@ -XXX,XX +XXX,XX @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num)
179
break;
30
(s->extent_blocks + s->bitmap_blocks));
180
31
181
case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
32
/* read in bitmap for current extent */
182
- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
33
- ret = bdrv_pread(bs->file, bitmap_offset + (extent_offset / 8), 1,
183
- virtio_scsi_defer_tmf_to_main_loop(req);
34
- &bitmap_entry, 0);
184
- ret = -EINPROGRESS;
35
+ ret = bdrv_co_pread(bs->file, bitmap_offset + (extent_offset / 8), 1,
185
+ if (!d) {
36
+ &bitmap_entry, 0);
186
+ goto fail;
37
if (ret < 0) {
187
+ }
38
return ret;
188
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
39
}
189
+ goto incorrect_lun;
190
+ }
191
+ qatomic_inc(&s->resetting);
192
+ device_cold_reset(&d->qdev);
193
+ qatomic_dec(&s->resetting);
194
break;
195
196
+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: {
197
+ BusChild *kid;
198
+ int target = req->req.tmf.lun[1];
199
+ qatomic_inc(&s->resetting);
200
+
201
+ rcu_read_lock();
202
+ QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
203
+ SCSIDevice *d1 = SCSI_DEVICE(kid->child);
204
+ if (d1->channel == 0 && d1->id == target) {
205
+ device_cold_reset(&d1->qdev);
206
+ }
207
+ }
208
+ rcu_read_unlock();
209
+
210
+ qatomic_dec(&s->resetting);
211
+ break;
212
+ }
213
+
214
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
215
case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: {
216
g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
217
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev)
218
219
assert(!s->dataplane_started);
220
221
- virtio_scsi_reset_tmf_bh(s);
222
virtio_scsi_flush_defer_tmf_to_aio_context(s);
223
224
qatomic_inc(&s->resetting);
225
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
226
VirtIOSCSI *s = VIRTIO_SCSI(dev);
227
Error *err = NULL;
228
229
- QTAILQ_INIT(&s->tmf_bh_list);
230
qemu_mutex_init(&s->ctrl_lock);
231
qemu_mutex_init(&s->event_lock);
232
- qemu_mutex_init(&s->tmf_bh_lock);
233
234
virtio_scsi_common_realize(dev,
235
virtio_scsi_handle_ctrl,
236
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_unrealize(DeviceState *dev)
237
{
238
VirtIOSCSI *s = VIRTIO_SCSI(dev);
239
240
- virtio_scsi_reset_tmf_bh(s);
241
virtio_scsi_dataplane_cleanup(s);
242
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
243
virtio_scsi_common_unrealize(dev);
244
- qemu_mutex_destroy(&s->tmf_bh_lock);
245
qemu_mutex_destroy(&s->event_lock);
246
qemu_mutex_destroy(&s->ctrl_lock);
247
}
40
--
248
--
41
2.41.0
249
2.48.1
diff view generated by jsdifflib
1
Now that bdrv_graph_wrlock() temporarily drops the AioContext lock that
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
its caller holds, it can poll without causing deadlocks. We can now
3
re-enable graph locking.
4
2
5
This reverts commit ad128dff0bf4b6f971d05eb4335a627883a19c1d.
3
Peter Krempa and Kevin Wolf observed that iothread-vq-mapping is
4
confusing to use because the control and event virtqueues have a fixed
5
location before the command virtqueues but need to be treated
6
differently.
6
7
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Only expose the command virtqueues via iothread-vq-mapping so that the
8
Message-ID: <20230605085711.21261-12-kwolf@redhat.com>
9
command-line parameter is intuitive: it controls where SCSI requests are
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
processed.
11
12
The control virtqueue needs to be hardcoded to the main loop thread for
13
technical reasons anyway. Kevin also pointed out that it's better to
14
place the event virtqueue in the main loop thread since its no poll
15
behavior would prevent polling if assigned to an IOThread.
16
17
This change is its own commit to avoid squashing the previous commit.
18
19
Suggested-by: Kevin Wolf <kwolf@redhat.com>
20
Suggested-by: Peter Krempa <pkrempa@redhat.com>
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
Message-ID: <20250311132616.1049687-14-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
23
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
24
---
12
block/graph-lock.c | 26 --------------------------
25
hw/scsi/virtio-scsi-dataplane.c | 33 ++++++++++++++++++++-------------
13
1 file changed, 26 deletions(-)
26
1 file changed, 20 insertions(+), 13 deletions(-)
14
27
15
diff --git a/block/graph-lock.c b/block/graph-lock.c
28
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
16
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
17
--- a/block/graph-lock.c
30
--- a/hw/scsi/virtio-scsi-dataplane.c
18
+++ b/block/graph-lock.c
31
+++ b/hw/scsi/virtio-scsi-dataplane.c
19
@@ -XXX,XX +XXX,XX @@ BdrvGraphLock graph_lock;
32
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
20
/* Protects the list of aiocontext and orphaned_reader_count */
33
VirtIODevice *vdev = VIRTIO_DEVICE(s);
21
static QemuMutex aio_context_list_lock;
34
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
22
35
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
23
-#if 0
36
- uint16_t num_vqs = vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED;
24
/* Written and read with atomic operations. */
37
25
static int has_writer;
38
if (vs->conf.iothread && vs->conf.iothread_vq_mapping_list) {
26
-#endif
39
error_setg(errp,
27
40
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
28
/*
29
* A reader coroutine could move from an AioContext to another.
30
@@ -XXX,XX +XXX,XX @@ void unregister_aiocontext(AioContext *ctx)
31
g_free(ctx->bdrv_graph);
32
}
33
34
-#if 0
35
static uint32_t reader_count(void)
36
{
37
BdrvGraphRWlock *brdv_graph;
38
@@ -XXX,XX +XXX,XX @@ static uint32_t reader_count(void)
39
assert((int32_t)rd >= 0);
40
return rd;
41
}
42
-#endif
43
44
void bdrv_graph_wrlock(BlockDriverState *bs)
45
{
46
AioContext *ctx = NULL;
47
48
GLOBAL_STATE_CODE();
49
- /*
50
- * TODO Some callers hold an AioContext lock when this is called, which
51
- * causes deadlocks. Reenable once the AioContext locking is cleaned up (or
52
- * AioContext locks are gone).
53
- */
54
-#if 0
55
assert(!qatomic_read(&has_writer));
56
-#endif
57
58
/*
59
* Release only non-mainloop AioContext. The mainloop often relies on the
60
@@ -XXX,XX +XXX,XX @@ void bdrv_graph_wrlock(BlockDriverState *bs)
61
}
41
}
62
}
42
}
63
43
64
-#if 0
44
- s->vq_aio_context = g_new(AioContext *, num_vqs);
65
/* Make sure that constantly arriving new I/O doesn't cause starvation */
45
+ s->vq_aio_context = g_new(AioContext *, vs->conf.num_queues +
66
bdrv_drain_all_begin_nopoll();
46
+ VIRTIO_SCSI_VQ_NUM_FIXED);
67
47
+
68
@@ -XXX,XX +XXX,XX @@ void bdrv_graph_wrlock(BlockDriverState *bs)
48
+ /*
69
} while (reader_count() >= 1);
49
+ * Handle the ctrl virtqueue in the main loop thread where device resets
70
50
+ * can be performed.
71
bdrv_drain_all_end();
51
+ */
72
-#endif
52
+ s->vq_aio_context[0] = qemu_get_aio_context();
73
53
+
74
if (ctx) {
54
+ /*
75
aio_context_acquire(bdrv_get_aio_context(bs));
55
+ * Handle the event virtqueue in the main loop thread where its no_poll
76
@@ -XXX,XX +XXX,XX @@ void bdrv_graph_wrlock(BlockDriverState *bs)
56
+ * behavior won't stop IOThread polling.
77
void bdrv_graph_wrunlock(void)
57
+ */
78
{
58
+ s->vq_aio_context[1] = qemu_get_aio_context();
79
GLOBAL_STATE_CODE();
59
80
-#if 0
60
if (vs->conf.iothread_vq_mapping_list) {
81
QEMU_LOCK_GUARD(&aio_context_list_lock);
61
if (!iothread_vq_mapping_apply(vs->conf.iothread_vq_mapping_list,
82
assert(qatomic_read(&has_writer));
62
- s->vq_aio_context, num_vqs, errp)) {
83
63
+ &s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED],
84
@@ -XXX,XX +XXX,XX @@ void bdrv_graph_wrunlock(void)
64
+ vs->conf.num_queues, errp)) {
85
65
g_free(s->vq_aio_context);
86
/* Wake up all coroutine that are waiting to read the graph */
66
s->vq_aio_context = NULL;
87
qemu_co_enter_all(&reader_queue, &aio_context_list_lock);
67
return;
88
-#endif
68
}
89
}
69
} else if (vs->conf.iothread) {
90
70
AioContext *ctx = iothread_get_aio_context(vs->conf.iothread);
91
void coroutine_fn bdrv_graph_co_rdlock(void)
71
- for (uint16_t i = 0; i < num_vqs; i++) {
92
{
72
- s->vq_aio_context[i] = ctx;
93
- /* TODO Reenable when wrlock is reenabled */
73
+ for (uint16_t i = 0; i < vs->conf.num_queues; i++) {
94
-#if 0
74
+ s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i] = ctx;
95
BdrvGraphRWlock *bdrv_graph;
75
}
96
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
76
97
77
/* Released in virtio_scsi_dataplane_cleanup() */
98
@@ -XXX,XX +XXX,XX @@ void coroutine_fn bdrv_graph_co_rdlock(void)
78
object_ref(OBJECT(vs->conf.iothread));
99
qemu_co_queue_wait(&reader_queue, &aio_context_list_lock);
79
} else {
80
AioContext *ctx = qemu_get_aio_context();
81
- for (unsigned i = 0; i < num_vqs; i++) {
82
- s->vq_aio_context[i] = ctx;
83
+ for (unsigned i = 0; i < vs->conf.num_queues; i++) {
84
+ s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i] = ctx;
100
}
85
}
101
}
86
}
102
-#endif
87
-
88
- /*
89
- * Always handle the ctrl virtqueue in the main loop thread where device
90
- * resets can be performed.
91
- */
92
- s->vq_aio_context[0] = qemu_get_aio_context();
103
}
93
}
104
94
105
void coroutine_fn bdrv_graph_co_rdunlock(void)
95
/* Context: BQL held */
106
{
107
-#if 0
108
BdrvGraphRWlock *bdrv_graph;
109
bdrv_graph = qemu_get_current_aio_context()->bdrv_graph;
110
111
@@ -XXX,XX +XXX,XX @@ void coroutine_fn bdrv_graph_co_rdunlock(void)
112
if (qatomic_read(&has_writer)) {
113
aio_wait_kick();
114
}
115
-#endif
116
}
117
118
void bdrv_graph_rdlock_main_loop(void)
119
@@ -XXX,XX +XXX,XX @@ void bdrv_graph_rdunlock_main_loop(void)
120
void assert_bdrv_graph_readable(void)
121
{
122
/* reader_count() is slow due to aio_context_list_lock lock contention */
123
- /* TODO Reenable when wrlock is reenabled */
124
-#if 0
125
#ifdef CONFIG_DEBUG_GRAPH_LOCK
126
assert(qemu_in_main_thread() || reader_count());
127
#endif
128
-#endif
129
}
130
131
void assert_bdrv_graph_writable(void)
132
{
133
assert(qemu_in_main_thread());
134
- /* TODO Reenable when wrlock is reenabled */
135
-#if 0
136
assert(qatomic_read(&has_writer));
137
-#endif
138
}
139
--
96
--
140
2.41.0
97
2.48.1
diff view generated by jsdifflib