1
The following changes since commit 9964e96dc9999cf7f7c936ee854a795415d19b60:
1
The following changes since commit 825b96dbcee23d134b691fc75618b59c5f53da32:
2
2
3
Merge remote-tracking branch 'jasowang/tags/net-pull-request' into staging (2017-05-23 15:01:31 +0100)
3
Merge tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu into staging (2025-03-11 09:32:07 +0800)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
https://repo.or.cz/qemu/kevin.git tags/for-upstream
7
8
8
git://repo.or.cz/qemu/kevin.git tags/for-upstream
9
for you to fetch changes up to a93c04f3cbe690877b3297a9df4767aa811fcd97:
9
10
10
for you to fetch changes up to 42a48128417b3bfade93d1a4721348cc480e9e50:
11
virtio-scsi: only expose cmd vqs via iothread-vq-mapping (2025-03-11 15:49:22 +0100)
11
12
Merge remote-tracking branch 'mreitz/tags/pull-block-2017-05-29-v3' into queue-block (2017-05-29 16:34:27 +0200)
13
12
14
----------------------------------------------------------------
13
----------------------------------------------------------------
15
16
Block layer patches
14
Block layer patches
17
15
16
- virtio-scsi: add iothread-vq-mapping parameter
17
- Improve writethrough performance
18
- Fix missing zero init in bdrv_snapshot_goto()
19
- Code cleanup and iotests fixes
20
18
----------------------------------------------------------------
21
----------------------------------------------------------------
19
Alberto Garcia (2):
22
Kevin Wolf (8):
20
stream: fix crash in stream_start() when block_job_create() fails
23
block: Remove unused blk_op_is_blocked()
21
qcow2: remove extra local_error variable
24
block: Zero block driver state before reopening
25
file-posix: Support FUA writes
26
block/io: Ignore FUA with cache.no-flush=on
27
aio: Create AioPolledEvent
28
aio-posix: Factor out adjust_polling_time()
29
aio-posix: Separate AioPolledEvent per AioHandler
30
aio-posix: Adjust polling time also for new handlers
22
31
23
Daniel P. Berrange (4):
32
Stefan Hajnoczi (13):
24
qemu-img: add support for --object with 'dd' command
33
scsi-disk: drop unused SCSIDiskState->bh field
25
qemu-img: fix --image-opts usage with dd command
34
dma: use current AioContext for dma_blk_io()
26
qemu-img: introduce --target-image-opts for 'convert' command
35
scsi: track per-SCSIRequest AioContext
27
qemu-img: copy *key-secret opts when opening newly created files
36
scsi: introduce requests_lock
37
virtio-scsi: introduce event and ctrl virtqueue locks
38
virtio-scsi: protect events_dropped field
39
virtio-scsi: perform TMFs in appropriate AioContexts
40
virtio-blk: extract cleanup_iothread_vq_mapping() function
41
virtio-blk: tidy up iothread_vq_mapping functions
42
virtio: extract iothread-vq-mapping.h API
43
virtio-scsi: add iothread-vq-mapping parameter
44
virtio-scsi: handle ctrl virtqueue in main loop
45
virtio-scsi: only expose cmd vqs via iothread-vq-mapping
28
46
29
Eric Blake (1):
47
Thomas Huth (1):
30
block: Tweak error message related to qemu-img amend
48
iotests: Limit qsd-migrate to working formats
31
49
32
Fam Zheng (3):
50
include/block/aio.h | 5 +-
33
iotests: 147: Don't test inet6 if not available
51
include/block/raw-aio.h | 8 +-
34
qemu-img: Fix documentation of convert
52
include/hw/scsi/scsi.h | 8 +-
35
qemu-img: Fix leakage of options on error
53
include/hw/virtio/iothread-vq-mapping.h | 45 +++
36
54
include/hw/virtio/virtio-scsi.h | 15 +-
37
Kevin Wolf (3):
55
include/system/block-backend-global-state.h | 1 -
38
qemu-iotests: Test streaming with missing job ID
56
include/system/dma.h | 3 +-
39
mirror: Drop permissions on s->target on completion
57
util/aio-posix.h | 1 +
40
Merge remote-tracking branch 'mreitz/tags/pull-block-2017-05-29-v3' into queue-block
58
block/block-backend.c | 12 -
41
59
block/file-posix.c | 26 +-
42
Max Reitz (2):
60
block/io.c | 4 +
43
block: Fix backing paths for filenames with colons
61
block/io_uring.c | 13 +-
44
block/file-*: *_parse_filename() and colons
62
block/linux-aio.c | 24 +-
45
63
block/snapshot.c | 1 +
46
Stephen Bates (1):
64
hw/block/virtio-blk.c | 132 +-------
47
nvme: Add support for Controller Memory Buffers
65
hw/ide/core.c | 3 +-
48
66
hw/ide/macio.c | 3 +-
49
block.c | 50 +++++++++++++--
67
hw/scsi/scsi-bus.c | 121 +++++--
50
block/file-posix.c | 17 +-----
68
hw/scsi/scsi-disk.c | 24 +-
51
block/file-win32.c | 12 +---
69
hw/scsi/virtio-scsi-dataplane.c | 103 ++++--
52
block/mirror.c | 7 ++-
70
hw/scsi/virtio-scsi.c | 502 ++++++++++++++++------------
53
block/qcow2-cluster.c | 3 +-
71
hw/virtio/iothread-vq-mapping.c | 131 ++++++++
54
block/qcow2.c | 5 +-
72
system/dma-helpers.c | 8 +-
55
block/stream.c | 2 +-
73
util/aio-posix.c | 114 ++++---
56
hw/block/nvme.c | 75 +++++++++++++++++++++--
74
util/async.c | 1 -
57
hw/block/nvme.h | 73 ++++++++++++++++++++++
75
hw/virtio/meson.build | 1 +
58
include/block/block_int.h | 3 +
76
meson.build | 4 +
59
qemu-img-cmds.hx | 4 +-
77
tests/qemu-iotests/tests/qsd-migrate | 2 +-
60
qemu-img.c | 148 +++++++++++++++++++++++++++++++++++----------
78
28 files changed, 803 insertions(+), 512 deletions(-)
61
qemu-img.texi | 12 +++-
79
create mode 100644 include/hw/virtio/iothread-vq-mapping.h
62
tests/qemu-iotests/030 | 4 ++
80
create mode 100644 hw/virtio/iothread-vq-mapping.c
63
tests/qemu-iotests/030.out | 4 +-
64
tests/qemu-iotests/060.out | 2 +-
65
tests/qemu-iotests/147 | 7 +++
66
17 files changed, 351 insertions(+), 77 deletions(-)
67
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
Commit fc4e394b28 removed the last caller of blk_op_is_blocked(). Remove
2
the now unused function.
2
3
3
It got lost in commit a8d16f9ca "qemu-img: Update documentation for -U".
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Message-ID: <20250206165331.379033-1-kwolf@redhat.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
10
include/system/block-backend-global-state.h | 1 -
11
block/block-backend.c | 12 ------------
12
2 files changed, 13 deletions(-)
4
13
5
Reported-by: Max Reitz <mreitz@redhat.com>
14
diff --git a/include/system/block-backend-global-state.h b/include/system/block-backend-global-state.h
6
Signed-off-by: Fam Zheng <famz@redhat.com>
7
Message-id: 20170515103551.31313-1-famz@redhat.com
8
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
11
qemu-img-cmds.hx | 4 ++--
12
1 file changed, 2 insertions(+), 2 deletions(-)
13
14
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
15
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
16
--- a/qemu-img-cmds.hx
16
--- a/include/system/block-backend-global-state.h
17
+++ b/qemu-img-cmds.hx
17
+++ b/include/system/block-backend-global-state.h
18
@@ -XXX,XX +XXX,XX @@ STEXI
18
@@ -XXX,XX +XXX,XX @@ bool blk_supports_write_perm(BlockBackend *blk);
19
ETEXI
19
bool blk_is_sg(BlockBackend *blk);
20
20
void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
21
DEF("convert", img_convert,
21
int blk_get_flags(BlockBackend *blk);
22
- "convert [--object objectdef] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename")
22
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
23
+ "convert [--object objectdef] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename")
23
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
24
STEXI
24
Error **errp);
25
-@item convert [--object @var{objectdef}] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename}
25
void blk_add_aio_context_notifier(BlockBackend *blk,
26
+@item convert [--object @var{objectdef}] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename}
26
diff --git a/block/block-backend.c b/block/block-backend.c
27
ETEXI
27
index XXXXXXX..XXXXXXX 100644
28
28
--- a/block/block-backend.c
29
DEF("dd", img_dd,
29
+++ b/block/block-backend.c
30
@@ -XXX,XX +XXX,XX @@ void *blk_blockalign(BlockBackend *blk, size_t size)
31
return qemu_blockalign(blk ? blk_bs(blk) : NULL, size);
32
}
33
34
-bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp)
35
-{
36
- BlockDriverState *bs = blk_bs(blk);
37
- GLOBAL_STATE_CODE();
38
- GRAPH_RDLOCK_GUARD_MAINLOOP();
39
-
40
- if (!bs) {
41
- return false;
42
- }
43
-
44
- return bdrv_op_is_blocked(bs, op, errp);
45
-}
46
47
/**
48
* Return BB's current AioContext. Note that this context may change
30
--
49
--
31
1.8.3.1
50
2.48.1
32
51
33
52
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
Block drivers assume in their .bdrv_open() implementation that their
2
state in bs->opaque has been zeroed; it is initially allocated with
3
g_malloc0() in bdrv_open_driver().
2
4
3
Reported by Coverity.
5
bdrv_snapshot_goto() needs to make sure that it is zeroed again before
6
calling drv->bdrv_open() to avoid that block drivers use stale values.
4
7
5
Signed-off-by: Fam Zheng <famz@redhat.com>
8
One symptom of this bug is VMDK running into a double free when the user
6
Message-id: 20170515141014.25793-1-famz@redhat.com
9
tries to apply an internal snapshot like 'qemu-img snapshot -a test
7
Reviewed-by: Eric Blake <eblake@redhat.com>
10
test.vmdk'. This should be a graceful error because VMDK doesn't support
8
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
internal snapshots.
12
13
==25507== Invalid free() / delete / delete[] / realloc()
14
==25507== at 0x484B347: realloc (vg_replace_malloc.c:1801)
15
==25507== by 0x54B592A: g_realloc (gmem.c:171)
16
==25507== by 0x1B221D: vmdk_add_extent (../block/vmdk.c:570)
17
==25507== by 0x1B1084: vmdk_open_sparse (../block/vmdk.c:1059)
18
==25507== by 0x1AF3D8: vmdk_open (../block/vmdk.c:1371)
19
==25507== by 0x1A2AE0: bdrv_snapshot_goto (../block/snapshot.c:299)
20
==25507== by 0x205C77: img_snapshot (../qemu-img.c:3500)
21
==25507== by 0x58FA087: (below main) (libc_start_call_main.h:58)
22
==25507== Address 0x832f3e0 is 0 bytes inside a block of size 272 free'd
23
==25507== at 0x4846B83: free (vg_replace_malloc.c:989)
24
==25507== by 0x54AEAC4: g_free (gmem.c:208)
25
==25507== by 0x1AF629: vmdk_close (../block/vmdk.c:2889)
26
==25507== by 0x1A2A9C: bdrv_snapshot_goto (../block/snapshot.c:290)
27
==25507== by 0x205C77: img_snapshot (../qemu-img.c:3500)
28
==25507== by 0x58FA087: (below main) (libc_start_call_main.h:58)
29
30
This error was discovered by fuzzing qemu-img.
31
32
Cc: qemu-stable@nongnu.org
33
Closes: https://gitlab.com/qemu-project/qemu/-/issues/2853
34
Closes: https://gitlab.com/qemu-project/qemu/-/issues/2851
35
Reported-by: Denis Rastyogin <gerben@altlinux.org>
36
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
37
Message-ID: <20250310104858.28221-1-kwolf@redhat.com>
38
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
39
---
10
qemu-img.c | 1 +
40
block/snapshot.c | 1 +
11
1 file changed, 1 insertion(+)
41
1 file changed, 1 insertion(+)
12
42
13
diff --git a/qemu-img.c b/qemu-img.c
43
diff --git a/block/snapshot.c b/block/snapshot.c
14
index XXXXXXX..XXXXXXX 100644
44
index XXXXXXX..XXXXXXX 100644
15
--- a/qemu-img.c
45
--- a/block/snapshot.c
16
+++ b/qemu-img.c
46
+++ b/block/snapshot.c
17
@@ -XXX,XX +XXX,XX @@ static BlockBackend *img_open_opts(const char *optstr,
47
@@ -XXX,XX +XXX,XX @@ int bdrv_snapshot_goto(BlockDriverState *bs,
18
if (qdict_haskey(options, BDRV_OPT_FORCE_SHARE)
48
bdrv_graph_wrunlock();
19
&& !qdict_get_bool(options, BDRV_OPT_FORCE_SHARE)) {
49
20
error_report("--force-share/-U conflicts with image options");
50
ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
21
+ QDECREF(options);
51
+ memset(bs->opaque, 0, drv->instance_size);
22
return NULL;
52
open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
23
}
53
qobject_unref(options);
24
qdict_put(options, BDRV_OPT_FORCE_SHARE, qbool_from_bool(true));
54
if (open_ret < 0) {
25
--
55
--
26
1.8.3.1
56
2.48.1
27
28
diff view generated by jsdifflib
New patch
1
1
Until now, FUA was always emulated with a separate flush after the write
2
for file-posix. The overhead of processing a second request can reduce
3
performance significantly for a guest disk that has disabled the write
4
cache, especially if the host disk is already write through, too, and
5
the flush isn't actually doing anything.
6
7
Advertise support for REQ_FUA in write requests and implement it for
8
Linux AIO and io_uring using the RWF_DSYNC flag for write requests. The
9
thread pool still performs a separate fdatasync() call. This can be
10
improved later by using the pwritev2() syscall if available.
11
12
As an example, this is how fio numbers can be improved in some scenarios
13
with this patch (all using virtio-blk with cache=directsync on an nvme
14
block device for the VM, fio with ioengine=libaio,direct=1,sync=1):
15
16
| old | with FUA support
17
------------------------------+---------------+-------------------
18
bs=4k, iodepth=1, numjobs=1 | 45.6k iops | 56.1k iops
19
bs=4k, iodepth=1, numjobs=16 | 183.3k iops | 236.0k iops
20
bs=4k, iodepth=16, numjobs=1 | 258.4k iops | 311.1k iops
21
22
However, not all scenarios are clear wins. On another slower disk I saw
23
little to no improvment. In fact, in two corner case scenarios, I even
24
observed a regression, which I however consider acceptable:
25
26
1. On slow host disks in a write through cache mode, when the guest is
27
using virtio-blk in a separate iothread so that polling can be
28
enabled, and each completion is quickly followed up with a new
29
request (so that polling gets it), it can happen that enabling FUA
30
makes things slower - the additional very fast no-op flush we used to
31
have gave the adaptive polling algorithm a success so that it kept
32
polling. Without it, we only have the slow write request, which
33
disables polling. This is a problem in the polling algorithm that
34
will be fixed later in this series.
35
36
2. With a high queue depth, it can be beneficial to have flush requests
37
for another reason: The optimisation in bdrv_co_flush() that flushes
38
only once per write generation acts as a synchronisation mechanism
39
that lets all requests complete at the same time. This can result in
40
better batching and if the disk is very fast (I only saw this with a
41
null_blk backend), this can make up for the overhead of the flush and
42
improve throughput. In theory, we could optionally introduce a
43
similar artificial latency in the normal completion path to achieve
44
the same kind of completion batching. This is not implemented in this
45
series.
46
47
Compatibility is not a concern for io_uring, it has supported RWF_DSYNC
48
from the start. Linux AIO started supporting it in Linux 4.13 and libaio
49
0.3.111. The kernel is not a problem for any supported build platform,
50
so it's not necessary to add runtime checks. However, openSUSE is still
51
stuck with an older libaio version that would break the build. We must
52
detect this at build time to avoid build failures.
53
54
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
55
Message-ID: <20250307221634.71951-2-kwolf@redhat.com>
56
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
57
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
58
---
59
include/block/raw-aio.h | 8 ++++++--
60
block/file-posix.c | 26 ++++++++++++++++++--------
61
block/io_uring.c | 13 ++++++++-----
62
block/linux-aio.c | 24 +++++++++++++++++++++---
63
meson.build | 4 ++++
64
5 files changed, 57 insertions(+), 18 deletions(-)
65
66
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
67
index XXXXXXX..XXXXXXX 100644
68
--- a/include/block/raw-aio.h
69
+++ b/include/block/raw-aio.h
70
@@ -XXX,XX +XXX,XX @@
71
#define QEMU_RAW_AIO_H
72
73
#include "block/aio.h"
74
+#include "block/block-common.h"
75
#include "qemu/iov.h"
76
77
/* AIO request types */
78
@@ -XXX,XX +XXX,XX @@ void laio_cleanup(LinuxAioState *s);
79
80
/* laio_co_submit: submit I/O requests in the thread's current AioContext. */
81
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
82
- int type, uint64_t dev_max_batch);
83
+ int type, BdrvRequestFlags flags,
84
+ uint64_t dev_max_batch);
85
86
bool laio_has_fdsync(int);
87
+bool laio_has_fua(void);
88
void laio_detach_aio_context(LinuxAioState *s, AioContext *old_context);
89
void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
90
#endif
91
@@ -XXX,XX +XXX,XX @@ void luring_cleanup(LuringState *s);
92
93
/* luring_co_submit: submit I/O requests in the thread's current AioContext. */
94
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
95
- QEMUIOVector *qiov, int type);
96
+ QEMUIOVector *qiov, int type,
97
+ BdrvRequestFlags flags);
98
void luring_detach_aio_context(LuringState *s, AioContext *old_context);
99
void luring_attach_aio_context(LuringState *s, AioContext *new_context);
100
#endif
101
diff --git a/block/file-posix.c b/block/file-posix.c
102
index XXXXXXX..XXXXXXX 100644
103
--- a/block/file-posix.c
104
+++ b/block/file-posix.c
105
@@ -XXX,XX +XXX,XX @@ static int fd_open(BlockDriverState *bs)
106
}
107
108
static int64_t raw_getlength(BlockDriverState *bs);
109
+static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs);
110
111
typedef struct RawPosixAIOData {
112
BlockDriverState *bs;
113
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
114
#endif
115
s->needs_alignment = raw_needs_alignment(bs);
116
117
+ if (!s->use_linux_aio || laio_has_fua()) {
118
+ bs->supported_write_flags = BDRV_REQ_FUA;
119
+ }
120
+
121
bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
122
if (S_ISREG(st.st_mode)) {
123
/* When extending regular files, we get zeros from the OS */
124
@@ -XXX,XX +XXX,XX @@ static inline bool raw_check_linux_aio(BDRVRawState *s)
125
#endif
126
127
static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
128
- uint64_t bytes, QEMUIOVector *qiov, int type)
129
+ uint64_t bytes, QEMUIOVector *qiov, int type,
130
+ int flags)
131
{
132
BDRVRawState *s = bs->opaque;
133
RawPosixAIOData acb;
134
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
135
#ifdef CONFIG_LINUX_IO_URING
136
} else if (raw_check_linux_io_uring(s)) {
137
assert(qiov->size == bytes);
138
- ret = luring_co_submit(bs, s->fd, offset, qiov, type);
139
+ ret = luring_co_submit(bs, s->fd, offset, qiov, type, flags);
140
goto out;
141
#endif
142
#ifdef CONFIG_LINUX_AIO
143
} else if (raw_check_linux_aio(s)) {
144
assert(qiov->size == bytes);
145
- ret = laio_co_submit(s->fd, offset, qiov, type,
146
+ ret = laio_co_submit(s->fd, offset, qiov, type, flags,
147
s->aio_max_batch);
148
goto out;
149
#endif
150
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, int64_t *offset_ptr,
151
152
assert(qiov->size == bytes);
153
ret = raw_thread_pool_submit(handle_aiocb_rw, &acb);
154
+ if (ret == 0 && (flags & BDRV_REQ_FUA)) {
155
+ /* TODO Use pwritev2() instead if it's available */
156
+ ret = raw_co_flush_to_disk(bs);
157
+ }
158
goto out; /* Avoid the compiler err of unused label */
159
160
out:
161
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_preadv(BlockDriverState *bs, int64_t offset,
162
int64_t bytes, QEMUIOVector *qiov,
163
BdrvRequestFlags flags)
164
{
165
- return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ);
166
+ return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_READ, flags);
167
}
168
169
static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, int64_t offset,
170
int64_t bytes, QEMUIOVector *qiov,
171
BdrvRequestFlags flags)
172
{
173
- return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE);
174
+ return raw_co_prw(bs, &offset, bytes, qiov, QEMU_AIO_WRITE, flags);
175
}
176
177
static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
178
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_flush_to_disk(BlockDriverState *bs)
179
180
#ifdef CONFIG_LINUX_IO_URING
181
if (raw_check_linux_io_uring(s)) {
182
- return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH);
183
+ return luring_co_submit(bs, s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
184
}
185
#endif
186
#ifdef CONFIG_LINUX_AIO
187
if (s->has_laio_fdsync && raw_check_linux_aio(s)) {
188
- return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0);
189
+ return laio_co_submit(s->fd, 0, NULL, QEMU_AIO_FLUSH, 0, 0);
190
}
191
#endif
192
return raw_thread_pool_submit(handle_aiocb_flush, &acb);
193
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_zone_append(BlockDriverState *bs,
194
}
195
196
trace_zbd_zone_append(bs, *offset >> BDRV_SECTOR_BITS);
197
- return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND);
198
+ return raw_co_prw(bs, offset, len, qiov, QEMU_AIO_ZONE_APPEND, 0);
199
}
200
#endif
201
202
diff --git a/block/io_uring.c b/block/io_uring.c
203
index XXXXXXX..XXXXXXX 100644
204
--- a/block/io_uring.c
205
+++ b/block/io_uring.c
206
@@ -XXX,XX +XXX,XX @@ static void luring_deferred_fn(void *opaque)
207
*
208
*/
209
static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
210
- uint64_t offset, int type)
211
+ uint64_t offset, int type, BdrvRequestFlags flags)
212
{
213
int ret;
214
struct io_uring_sqe *sqes = &luringcb->sqeq;
215
+ int luring_flags;
216
217
switch (type) {
218
case QEMU_AIO_WRITE:
219
- io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
220
- luringcb->qiov->niov, offset);
221
+ luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
222
+ io_uring_prep_writev2(sqes, fd, luringcb->qiov->iov,
223
+ luringcb->qiov->niov, offset, luring_flags);
224
break;
225
case QEMU_AIO_ZONE_APPEND:
226
io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
227
@@ -XXX,XX +XXX,XX @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
228
}
229
230
int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
231
- QEMUIOVector *qiov, int type)
232
+ QEMUIOVector *qiov, int type,
233
+ BdrvRequestFlags flags)
234
{
235
int ret;
236
AioContext *ctx = qemu_get_current_aio_context();
237
@@ -XXX,XX +XXX,XX @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, int fd, uint64_t offset,
238
};
239
trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0,
240
type);
241
- ret = luring_do_submit(fd, &luringcb, s, offset, type);
242
+ ret = luring_do_submit(fd, &luringcb, s, offset, type, flags);
243
244
if (ret < 0) {
245
return ret;
246
diff --git a/block/linux-aio.c b/block/linux-aio.c
247
index XXXXXXX..XXXXXXX 100644
248
--- a/block/linux-aio.c
249
+++ b/block/linux-aio.c
250
@@ -XXX,XX +XXX,XX @@ static void laio_deferred_fn(void *opaque)
251
}
252
253
static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
254
- int type, uint64_t dev_max_batch)
255
+ int type, BdrvRequestFlags flags,
256
+ uint64_t dev_max_batch)
257
{
258
LinuxAioState *s = laiocb->ctx;
259
struct iocb *iocbs = &laiocb->iocb;
260
QEMUIOVector *qiov = laiocb->qiov;
261
+ int laio_flags;
262
263
switch (type) {
264
case QEMU_AIO_WRITE:
265
+#ifdef HAVE_IO_PREP_PWRITEV2
266
+ laio_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
267
+ io_prep_pwritev2(iocbs, fd, qiov->iov, qiov->niov, offset, laio_flags);
268
+#else
269
+ assert(flags == 0);
270
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
271
+#endif
272
break;
273
case QEMU_AIO_ZONE_APPEND:
274
io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
275
@@ -XXX,XX +XXX,XX @@ static int laio_do_submit(int fd, struct qemu_laiocb *laiocb, off_t offset,
276
}
277
278
int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
279
- int type, uint64_t dev_max_batch)
280
+ int type, BdrvRequestFlags flags,
281
+ uint64_t dev_max_batch)
282
{
283
int ret;
284
AioContext *ctx = qemu_get_current_aio_context();
285
@@ -XXX,XX +XXX,XX @@ int coroutine_fn laio_co_submit(int fd, uint64_t offset, QEMUIOVector *qiov,
286
.qiov = qiov,
287
};
288
289
- ret = laio_do_submit(fd, &laiocb, offset, type, dev_max_batch);
290
+ ret = laio_do_submit(fd, &laiocb, offset, type, flags, dev_max_batch);
291
if (ret < 0) {
292
return ret;
293
}
294
@@ -XXX,XX +XXX,XX @@ bool laio_has_fdsync(int fd)
295
io_destroy(ctx);
296
return (ret == -EINVAL) ? false : true;
297
}
298
+
299
+bool laio_has_fua(void)
300
+{
301
+#ifdef HAVE_IO_PREP_PWRITEV2
302
+ return true;
303
+#else
304
+ return false;
305
+#endif
306
+}
307
diff --git a/meson.build b/meson.build
308
index XXXXXXX..XXXXXXX 100644
309
--- a/meson.build
310
+++ b/meson.build
311
@@ -XXX,XX +XXX,XX @@ config_host_data.set('HAVE_OPTRESET',
312
cc.has_header_symbol('getopt.h', 'optreset'))
313
config_host_data.set('HAVE_IPPROTO_MPTCP',
314
cc.has_header_symbol('netinet/in.h', 'IPPROTO_MPTCP'))
315
+if libaio.found()
316
+ config_host_data.set('HAVE_IO_PREP_PWRITEV2',
317
+ cc.has_header_symbol('libaio.h', 'io_prep_pwritev2'))
318
+endif
319
320
# has_member
321
config_host_data.set('HAVE_SIGEV_NOTIFY_THREAD_ID',
322
--
323
2.48.1
diff view generated by jsdifflib
New patch
1
For block drivers that don't advertise FUA support, we already call
2
bdrv_co_flush(), which considers BDRV_O_NO_FLUSH. However, drivers that
3
do support FUA still see the FUA flag with BDRV_O_NO_FLUSH and get the
4
associated performance penalty that cache.no-flush=on was supposed to
5
avoid.
1
6
7
Clear FUA for write requests if BDRV_O_NO_FLUSH is set.
8
9
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Message-ID: <20250307221634.71951-3-kwolf@redhat.com>
11
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
14
block/io.c | 4 ++++
15
1 file changed, 4 insertions(+)
16
17
diff --git a/block/io.c b/block/io.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/io.c
20
+++ b/block/io.c
21
@@ -XXX,XX +XXX,XX @@ bdrv_driver_pwritev(BlockDriverState *bs, int64_t offset, int64_t bytes,
22
return -ENOMEDIUM;
23
}
24
25
+ if (bs->open_flags & BDRV_O_NO_FLUSH) {
26
+ flags &= ~BDRV_REQ_FUA;
27
+ }
28
+
29
if ((flags & BDRV_REQ_FUA) &&
30
(~bs->supported_write_flags & BDRV_REQ_FUA)) {
31
flags &= ~BDRV_REQ_FUA;
32
--
33
2.48.1
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
As a preparation for having multiple adaptive polling states per
2
AioContext, move the 'ns' field into a separate struct.
2
3
3
When converting a 1.1 image down to 0.10, qemu-iotests 060 forces
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
a contrived failure where allocating a cluster used to replace a
5
Message-ID: <20250307221634.71951-4-kwolf@redhat.com>
5
zero cluster reads unaligned data. Since it is a zero cluster
6
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
rather than a data cluster being converted, changing the error
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
message to match our earlier change in 'qcow2: Make distinction
8
---
8
between zero cluster types obvious' is worthwhile.
9
include/block/aio.h | 6 +++++-
10
util/aio-posix.c | 31 ++++++++++++++++---------------
11
util/async.c | 3 ++-
12
3 files changed, 23 insertions(+), 17 deletions(-)
9
13
10
Suggested-by: Max Reitz <mreitz@redhat.com>
14
diff --git a/include/block/aio.h b/include/block/aio.h
11
Signed-off-by: Eric Blake <eblake@redhat.com>
12
Message-id: 20170508171302.17805-1-eblake@redhat.com
13
[mreitz: Commit message fixes]
14
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
---
16
block/qcow2-cluster.c | 3 ++-
17
tests/qemu-iotests/060.out | 2 +-
18
2 files changed, 3 insertions(+), 2 deletions(-)
19
20
diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c
21
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
22
--- a/block/qcow2-cluster.c
16
--- a/include/block/aio.h
23
+++ b/block/qcow2-cluster.c
17
+++ b/include/block/aio.h
24
@@ -XXX,XX +XXX,XX @@ static int expand_zero_clusters_in_l1(BlockDriverState *bs, uint64_t *l1_table,
18
@@ -XXX,XX +XXX,XX @@ struct BHListSlice {
19
20
typedef QSLIST_HEAD(, AioHandler) AioHandlerSList;
21
22
+typedef struct AioPolledEvent {
23
+ int64_t ns; /* current polling time in nanoseconds */
24
+} AioPolledEvent;
25
+
26
struct AioContext {
27
GSource source;
28
29
@@ -XXX,XX +XXX,XX @@ struct AioContext {
30
int poll_disable_cnt;
31
32
/* Polling mode parameters */
33
- int64_t poll_ns; /* current polling time in nanoseconds */
34
+ AioPolledEvent poll;
35
int64_t poll_max_ns; /* maximum polling time in nanoseconds */
36
int64_t poll_grow; /* polling time growth factor */
37
int64_t poll_shrink; /* polling time shrink factor */
38
diff --git a/util/aio-posix.c b/util/aio-posix.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/util/aio-posix.c
41
+++ b/util/aio-posix.c
42
@@ -XXX,XX +XXX,XX @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
43
return false;
44
}
45
46
- max_ns = qemu_soonest_timeout(*timeout, ctx->poll_ns);
47
+ max_ns = qemu_soonest_timeout(*timeout, ctx->poll.ns);
48
if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
49
/*
50
* Enable poll mode. It pairs with the poll_set_started() in
51
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
52
if (ctx->poll_max_ns) {
53
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
54
55
- if (block_ns <= ctx->poll_ns) {
56
+ if (block_ns <= ctx->poll.ns) {
57
/* This is the sweet spot, no adjustment needed */
58
} else if (block_ns > ctx->poll_max_ns) {
59
/* We'd have to poll for too long, poll less */
60
- int64_t old = ctx->poll_ns;
61
+ int64_t old = ctx->poll.ns;
62
63
if (ctx->poll_shrink) {
64
- ctx->poll_ns /= ctx->poll_shrink;
65
+ ctx->poll.ns /= ctx->poll_shrink;
66
} else {
67
- ctx->poll_ns = 0;
68
+ ctx->poll.ns = 0;
25
}
69
}
26
70
27
if (offset_into_cluster(s, offset)) {
71
- trace_poll_shrink(ctx, old, ctx->poll_ns);
28
- qcow2_signal_corruption(bs, true, -1, -1, "Data cluster offset "
72
- } else if (ctx->poll_ns < ctx->poll_max_ns &&
29
+ qcow2_signal_corruption(bs, true, -1, -1,
73
+ trace_poll_shrink(ctx, old, ctx->poll.ns);
30
+ "Cluster allocation offset "
74
+ } else if (ctx->poll.ns < ctx->poll_max_ns &&
31
"%#" PRIx64 " unaligned (L2 offset: %#"
75
block_ns < ctx->poll_max_ns) {
32
PRIx64 ", L2 index: %#x)", offset,
76
/* There is room to grow, poll longer */
33
l2_offset, j);
77
- int64_t old = ctx->poll_ns;
34
diff --git a/tests/qemu-iotests/060.out b/tests/qemu-iotests/060.out
78
+ int64_t old = ctx->poll.ns;
79
int64_t grow = ctx->poll_grow;
80
81
if (grow == 0) {
82
grow = 2;
83
}
84
85
- if (ctx->poll_ns) {
86
- ctx->poll_ns *= grow;
87
+ if (ctx->poll.ns) {
88
+ ctx->poll.ns *= grow;
89
} else {
90
- ctx->poll_ns = 4000; /* start polling at 4 microseconds */
91
+ ctx->poll.ns = 4000; /* start polling at 4 microseconds */
92
}
93
94
- if (ctx->poll_ns > ctx->poll_max_ns) {
95
- ctx->poll_ns = ctx->poll_max_ns;
96
+ if (ctx->poll.ns > ctx->poll_max_ns) {
97
+ ctx->poll.ns = ctx->poll_max_ns;
98
}
99
100
- trace_poll_grow(ctx, old, ctx->poll_ns);
101
+ trace_poll_grow(ctx, old, ctx->poll.ns);
102
}
103
}
104
105
@@ -XXX,XX +XXX,XX @@ void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
106
/* No thread synchronization here, it doesn't matter if an incorrect value
107
* is used once.
108
*/
109
+ ctx->poll.ns = 0;
110
+
111
ctx->poll_max_ns = max_ns;
112
- ctx->poll_ns = 0;
113
ctx->poll_grow = grow;
114
ctx->poll_shrink = shrink;
115
116
diff --git a/util/async.c b/util/async.c
35
index XXXXXXX..XXXXXXX 100644
117
index XXXXXXX..XXXXXXX 100644
36
--- a/tests/qemu-iotests/060.out
118
--- a/util/async.c
37
+++ b/tests/qemu-iotests/060.out
119
+++ b/util/async.c
38
@@ -XXX,XX +XXX,XX @@ read failed: Input/output error
120
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
39
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
121
qemu_rec_mutex_init(&ctx->lock);
40
wrote 65536/65536 bytes at offset 0
122
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
41
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
123
42
-qcow2: Marking image as corrupt: Data cluster offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed
124
- ctx->poll_ns = 0;
43
+qcow2: Marking image as corrupt: Cluster allocation offset 0x52a00 unaligned (L2 offset: 0x40000, L2 index: 0); further corruption events will be suppressed
125
+ ctx->poll.ns = 0;
44
qemu-img: Error while amending options: Input/output error
126
+
45
127
ctx->poll_max_ns = 0;
46
=== Testing unaligned reftable entry ===
128
ctx->poll_grow = 0;
129
ctx->poll_shrink = 0;
47
--
130
--
48
1.8.3.1
131
2.48.1
49
50
diff view generated by jsdifflib
1
From: Stephen Bates <sbates@raithlin.com>
1
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2
2
Message-ID: <20250307221634.71951-5-kwolf@redhat.com>
3
Implement NVMe Controller Memory Buffers (CMBs) which were added in
3
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
4
version 1.2 of the NVMe Specification. This patch adds an optional
5
argument (cmb_size_mb) which indicates the size of the CMB (in
6
MB). Currently only the Submission Queue Support (SQS) is enabled
7
which aligns with the current Linux driver for NVMe.
8
9
Signed-off-by: Stephen Bates <sbates@raithlin.com>
10
Acked-by: Keith Busch <keith.busch@intel.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
5
---
13
hw/block/nvme.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++---
6
util/aio-posix.c | 77 ++++++++++++++++++++++++++----------------------
14
hw/block/nvme.h | 73 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
7
1 file changed, 41 insertions(+), 36 deletions(-)
15
2 files changed, 144 insertions(+), 4 deletions(-)
16
8
17
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
9
diff --git a/util/aio-posix.c b/util/aio-posix.c
18
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/block/nvme.c
11
--- a/util/aio-posix.c
20
+++ b/hw/block/nvme.c
12
+++ b/util/aio-posix.c
21
@@ -XXX,XX +XXX,XX @@
13
@@ -XXX,XX +XXX,XX @@ static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
22
*/
14
return false;
23
15
}
24
/**
16
25
- * Reference Specs: http://www.nvmexpress.org, 1.1, 1.0e
17
+static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
26
+ * Reference Specs: http://www.nvmexpress.org, 1.2, 1.1, 1.0e
18
+ int64_t block_ns)
27
*
28
* http://www.nvmexpress.org/resources/
29
*/
30
@@ -XXX,XX +XXX,XX @@
31
/**
32
* Usage: add options:
33
* -drive file=<file>,if=none,id=<drive_id>
34
- * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>
35
+ * -device nvme,drive=<drive_id>,serial=<serial>,id=<id[optional]>, \
36
+ * cmb_size_mb=<cmb_size_mb[optional]>
37
+ *
38
+ * Note cmb_size_mb denotes size of CMB in MB. CMB is assumed to be at
39
+ * offset 0 in BAR2 and supports SQS only for now.
40
*/
41
42
#include "qemu/osdep.h"
43
@@ -XXX,XX +XXX,XX @@
44
45
static void nvme_process_sq(void *opaque);
46
47
+static void nvme_addr_read(NvmeCtrl *n, hwaddr addr, void *buf, int size)
48
+{
19
+{
49
+ if (n->cmbsz && addr >= n->ctrl_mem.addr &&
20
+ if (block_ns <= poll->ns) {
50
+ addr < (n->ctrl_mem.addr + int128_get64(n->ctrl_mem.size))) {
21
+ /* This is the sweet spot, no adjustment needed */
51
+ memcpy(buf, (void *)&n->cmbuf[addr - n->ctrl_mem.addr], size);
22
+ } else if (block_ns > ctx->poll_max_ns) {
52
+ } else {
23
+ /* We'd have to poll for too long, poll less */
53
+ pci_dma_read(&n->parent_obj, addr, buf, size);
24
+ int64_t old = poll->ns;
25
+
26
+ if (ctx->poll_shrink) {
27
+ poll->ns /= ctx->poll_shrink;
28
+ } else {
29
+ poll->ns = 0;
30
+ }
31
+
32
+ trace_poll_shrink(ctx, old, poll->ns);
33
+ } else if (poll->ns < ctx->poll_max_ns &&
34
+ block_ns < ctx->poll_max_ns) {
35
+ /* There is room to grow, poll longer */
36
+ int64_t old = poll->ns;
37
+ int64_t grow = ctx->poll_grow;
38
+
39
+ if (grow == 0) {
40
+ grow = 2;
41
+ }
42
+
43
+ if (poll->ns) {
44
+ poll->ns *= grow;
45
+ } else {
46
+ poll->ns = 4000; /* start polling at 4 microseconds */
47
+ }
48
+
49
+ if (poll->ns > ctx->poll_max_ns) {
50
+ poll->ns = ctx->poll_max_ns;
51
+ }
52
+
53
+ trace_poll_grow(ctx, old, poll->ns);
54
+ }
54
+ }
55
+}
55
+}
56
+
56
+
57
static int nvme_check_sqid(NvmeCtrl *n, uint16_t sqid)
57
bool aio_poll(AioContext *ctx, bool blocking)
58
{
58
{
59
return sqid < n->num_queues && n->sq[sqid] != NULL ? 0 : -1;
59
AioHandlerList ready_list = QLIST_HEAD_INITIALIZER(ready_list);
60
@@ -XXX,XX +XXX,XX @@ static void nvme_process_sq(void *opaque)
60
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
61
61
/* Adjust polling time */
62
while (!(nvme_sq_empty(sq) || QTAILQ_EMPTY(&sq->req_list))) {
62
if (ctx->poll_max_ns) {
63
addr = sq->dma_addr + sq->head * n->sqe_size;
63
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
64
- pci_dma_read(&n->parent_obj, addr, (void *)&cmd, sizeof(cmd));
64
-
65
+ nvme_addr_read(n, addr, (void *)&cmd, sizeof(cmd));
65
- if (block_ns <= ctx->poll.ns) {
66
nvme_inc_sq_head(sq);
66
- /* This is the sweet spot, no adjustment needed */
67
67
- } else if (block_ns > ctx->poll_max_ns) {
68
req = QTAILQ_FIRST(&sq->req_list);
68
- /* We'd have to poll for too long, poll less */
69
@@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps nvme_mmio_ops = {
69
- int64_t old = ctx->poll.ns;
70
},
70
-
71
};
71
- if (ctx->poll_shrink) {
72
72
- ctx->poll.ns /= ctx->poll_shrink;
73
+static void nvme_cmb_write(void *opaque, hwaddr addr, uint64_t data,
73
- } else {
74
+ unsigned size)
74
- ctx->poll.ns = 0;
75
+{
75
- }
76
+ NvmeCtrl *n = (NvmeCtrl *)opaque;
76
-
77
+ memcpy(&n->cmbuf[addr], &data, size);
77
- trace_poll_shrink(ctx, old, ctx->poll.ns);
78
+}
78
- } else if (ctx->poll.ns < ctx->poll_max_ns &&
79
+
79
- block_ns < ctx->poll_max_ns) {
80
+static uint64_t nvme_cmb_read(void *opaque, hwaddr addr, unsigned size)
80
- /* There is room to grow, poll longer */
81
+{
81
- int64_t old = ctx->poll.ns;
82
+ uint64_t val;
82
- int64_t grow = ctx->poll_grow;
83
+ NvmeCtrl *n = (NvmeCtrl *)opaque;
83
-
84
+
84
- if (grow == 0) {
85
+ memcpy(&val, &n->cmbuf[addr], size);
85
- grow = 2;
86
+ return val;
86
- }
87
+}
87
-
88
+
88
- if (ctx->poll.ns) {
89
+static const MemoryRegionOps nvme_cmb_ops = {
89
- ctx->poll.ns *= grow;
90
+ .read = nvme_cmb_read,
90
- } else {
91
+ .write = nvme_cmb_write,
91
- ctx->poll.ns = 4000; /* start polling at 4 microseconds */
92
+ .endianness = DEVICE_LITTLE_ENDIAN,
92
- }
93
+ .impl = {
93
-
94
+ .min_access_size = 2,
94
- if (ctx->poll.ns > ctx->poll_max_ns) {
95
+ .max_access_size = 8,
95
- ctx->poll.ns = ctx->poll_max_ns;
96
+ },
96
- }
97
+};
97
-
98
+
98
- trace_poll_grow(ctx, old, ctx->poll.ns);
99
static int nvme_init(PCIDevice *pci_dev)
99
- }
100
{
100
+ adjust_polling_time(ctx, &ctx->poll, block_ns);
101
NvmeCtrl *n = NVME(pci_dev);
101
}
102
@@ -XXX,XX +XXX,XX @@ static int nvme_init(PCIDevice *pci_dev)
102
103
NVME_CAP_SET_CSS(n->bar.cap, 1);
103
progress |= aio_bh_poll(ctx);
104
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
105
106
- n->bar.vs = 0x00010100;
107
+ n->bar.vs = 0x00010200;
108
n->bar.intmc = n->bar.intms = 0;
109
110
+ if (n->cmb_size_mb) {
111
+
112
+ NVME_CMBLOC_SET_BIR(n->bar.cmbloc, 2);
113
+ NVME_CMBLOC_SET_OFST(n->bar.cmbloc, 0);
114
+
115
+ NVME_CMBSZ_SET_SQS(n->bar.cmbsz, 1);
116
+ NVME_CMBSZ_SET_CQS(n->bar.cmbsz, 0);
117
+ NVME_CMBSZ_SET_LISTS(n->bar.cmbsz, 0);
118
+ NVME_CMBSZ_SET_RDS(n->bar.cmbsz, 0);
119
+ NVME_CMBSZ_SET_WDS(n->bar.cmbsz, 0);
120
+ NVME_CMBSZ_SET_SZU(n->bar.cmbsz, 2); /* MBs */
121
+ NVME_CMBSZ_SET_SZ(n->bar.cmbsz, n->cmb_size_mb);
122
+
123
+ n->cmbuf = g_malloc0(NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
124
+ memory_region_init_io(&n->ctrl_mem, OBJECT(n), &nvme_cmb_ops, n,
125
+ "nvme-cmb", NVME_CMBSZ_GETSIZE(n->bar.cmbsz));
126
+ pci_register_bar(&n->parent_obj, NVME_CMBLOC_BIR(n->bar.cmbloc),
127
+ PCI_BASE_ADDRESS_SPACE_MEMORY | PCI_BASE_ADDRESS_MEM_TYPE_64 |
128
+ PCI_BASE_ADDRESS_MEM_PREFETCH, &n->ctrl_mem);
129
+
130
+ }
131
+
132
for (i = 0; i < n->num_namespaces; i++) {
133
NvmeNamespace *ns = &n->namespaces[i];
134
NvmeIdNs *id_ns = &ns->id_ns;
135
@@ -XXX,XX +XXX,XX @@ static void nvme_exit(PCIDevice *pci_dev)
136
g_free(n->namespaces);
137
g_free(n->cq);
138
g_free(n->sq);
139
+ if (n->cmbsz) {
140
+ memory_region_unref(&n->ctrl_mem);
141
+ }
142
+
143
msix_uninit_exclusive_bar(pci_dev);
144
}
145
146
static Property nvme_props[] = {
147
DEFINE_BLOCK_PROPERTIES(NvmeCtrl, conf),
148
DEFINE_PROP_STRING("serial", NvmeCtrl, serial),
149
+ DEFINE_PROP_UINT32("cmb_size_mb", NvmeCtrl, cmb_size_mb, 0),
150
DEFINE_PROP_END_OF_LIST(),
151
};
152
153
diff --git a/hw/block/nvme.h b/hw/block/nvme.h
154
index XXXXXXX..XXXXXXX 100644
155
--- a/hw/block/nvme.h
156
+++ b/hw/block/nvme.h
157
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeBar {
158
uint32_t aqa;
159
uint64_t asq;
160
uint64_t acq;
161
+ uint32_t cmbloc;
162
+ uint32_t cmbsz;
163
} NvmeBar;
164
165
enum NvmeCapShift {
166
@@ -XXX,XX +XXX,XX @@ enum NvmeAqaMask {
167
#define NVME_AQA_ASQS(aqa) ((aqa >> AQA_ASQS_SHIFT) & AQA_ASQS_MASK)
168
#define NVME_AQA_ACQS(aqa) ((aqa >> AQA_ACQS_SHIFT) & AQA_ACQS_MASK)
169
170
+enum NvmeCmblocShift {
171
+ CMBLOC_BIR_SHIFT = 0,
172
+ CMBLOC_OFST_SHIFT = 12,
173
+};
174
+
175
+enum NvmeCmblocMask {
176
+ CMBLOC_BIR_MASK = 0x7,
177
+ CMBLOC_OFST_MASK = 0xfffff,
178
+};
179
+
180
+#define NVME_CMBLOC_BIR(cmbloc) ((cmbloc >> CMBLOC_BIR_SHIFT) & \
181
+ CMBLOC_BIR_MASK)
182
+#define NVME_CMBLOC_OFST(cmbloc)((cmbloc >> CMBLOC_OFST_SHIFT) & \
183
+ CMBLOC_OFST_MASK)
184
+
185
+#define NVME_CMBLOC_SET_BIR(cmbloc, val) \
186
+ (cmbloc |= (uint64_t)(val & CMBLOC_BIR_MASK) << CMBLOC_BIR_SHIFT)
187
+#define NVME_CMBLOC_SET_OFST(cmbloc, val) \
188
+ (cmbloc |= (uint64_t)(val & CMBLOC_OFST_MASK) << CMBLOC_OFST_SHIFT)
189
+
190
+enum NvmeCmbszShift {
191
+ CMBSZ_SQS_SHIFT = 0,
192
+ CMBSZ_CQS_SHIFT = 1,
193
+ CMBSZ_LISTS_SHIFT = 2,
194
+ CMBSZ_RDS_SHIFT = 3,
195
+ CMBSZ_WDS_SHIFT = 4,
196
+ CMBSZ_SZU_SHIFT = 8,
197
+ CMBSZ_SZ_SHIFT = 12,
198
+};
199
+
200
+enum NvmeCmbszMask {
201
+ CMBSZ_SQS_MASK = 0x1,
202
+ CMBSZ_CQS_MASK = 0x1,
203
+ CMBSZ_LISTS_MASK = 0x1,
204
+ CMBSZ_RDS_MASK = 0x1,
205
+ CMBSZ_WDS_MASK = 0x1,
206
+ CMBSZ_SZU_MASK = 0xf,
207
+ CMBSZ_SZ_MASK = 0xfffff,
208
+};
209
+
210
+#define NVME_CMBSZ_SQS(cmbsz) ((cmbsz >> CMBSZ_SQS_SHIFT) & CMBSZ_SQS_MASK)
211
+#define NVME_CMBSZ_CQS(cmbsz) ((cmbsz >> CMBSZ_CQS_SHIFT) & CMBSZ_CQS_MASK)
212
+#define NVME_CMBSZ_LISTS(cmbsz)((cmbsz >> CMBSZ_LISTS_SHIFT) & CMBSZ_LISTS_MASK)
213
+#define NVME_CMBSZ_RDS(cmbsz) ((cmbsz >> CMBSZ_RDS_SHIFT) & CMBSZ_RDS_MASK)
214
+#define NVME_CMBSZ_WDS(cmbsz) ((cmbsz >> CMBSZ_WDS_SHIFT) & CMBSZ_WDS_MASK)
215
+#define NVME_CMBSZ_SZU(cmbsz) ((cmbsz >> CMBSZ_SZU_SHIFT) & CMBSZ_SZU_MASK)
216
+#define NVME_CMBSZ_SZ(cmbsz) ((cmbsz >> CMBSZ_SZ_SHIFT) & CMBSZ_SZ_MASK)
217
+
218
+#define NVME_CMBSZ_SET_SQS(cmbsz, val) \
219
+ (cmbsz |= (uint64_t)(val & CMBSZ_SQS_MASK) << CMBSZ_SQS_SHIFT)
220
+#define NVME_CMBSZ_SET_CQS(cmbsz, val) \
221
+ (cmbsz |= (uint64_t)(val & CMBSZ_CQS_MASK) << CMBSZ_CQS_SHIFT)
222
+#define NVME_CMBSZ_SET_LISTS(cmbsz, val) \
223
+ (cmbsz |= (uint64_t)(val & CMBSZ_LISTS_MASK) << CMBSZ_LISTS_SHIFT)
224
+#define NVME_CMBSZ_SET_RDS(cmbsz, val) \
225
+ (cmbsz |= (uint64_t)(val & CMBSZ_RDS_MASK) << CMBSZ_RDS_SHIFT)
226
+#define NVME_CMBSZ_SET_WDS(cmbsz, val) \
227
+ (cmbsz |= (uint64_t)(val & CMBSZ_WDS_MASK) << CMBSZ_WDS_SHIFT)
228
+#define NVME_CMBSZ_SET_SZU(cmbsz, val) \
229
+ (cmbsz |= (uint64_t)(val & CMBSZ_SZU_MASK) << CMBSZ_SZU_SHIFT)
230
+#define NVME_CMBSZ_SET_SZ(cmbsz, val) \
231
+ (cmbsz |= (uint64_t)(val & CMBSZ_SZ_MASK) << CMBSZ_SZ_SHIFT)
232
+
233
+#define NVME_CMBSZ_GETSIZE(cmbsz) \
234
+ (NVME_CMBSZ_SZ(cmbsz) * (1 << (12 + 4 * NVME_CMBSZ_SZU(cmbsz))))
235
+
236
typedef struct NvmeCmd {
237
uint8_t opcode;
238
uint8_t fuse;
239
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeNamespace {
240
typedef struct NvmeCtrl {
241
PCIDevice parent_obj;
242
MemoryRegion iomem;
243
+ MemoryRegion ctrl_mem;
244
NvmeBar bar;
245
BlockConf conf;
246
247
@@ -XXX,XX +XXX,XX @@ typedef struct NvmeCtrl {
248
uint32_t num_queues;
249
uint32_t max_q_ents;
250
uint64_t ns_size;
251
+ uint32_t cmb_size_mb;
252
+ uint32_t cmbsz;
253
+ uint32_t cmbloc;
254
+ uint8_t *cmbuf;
255
256
char *serial;
257
NvmeNamespace *namespaces;
258
--
104
--
259
1.8.3.1
105
2.48.1
260
261
diff view generated by jsdifflib
1
This adds a small test for the image streaming error path for failing
1
Adaptive polling has a big problem: It doesn't consider that an event
2
block_job_create(), which would have found the null pointer dereference
2
loop can wait for many different events that may have very different
3
in commit a170a91f.
3
typical latencies.
4
5
For example, think of a guest that tends to send a new I/O request soon
6
after the previous I/O request completes, but the storage on the host is
7
rather slow. In this case, getting the new request from guest quickly
8
means that polling is enabled, but the next thing is performing the I/O
9
request on the backend, which is slow and disables polling again for the
10
next guest request. This means that in such a scenario, polling could
11
help for every other event, but is only ever enabled when it can't
12
succeed.
13
14
In order to fix this, keep a separate AioPolledEvent for each
15
AioHandler. We will then know that the backend file descriptor always
16
has a high latency and isn't worth polling for, but we also know that
17
the guest is always fast and we should poll for it. This solves at least
18
half of the problem, we can now keep polling for those cases where it
19
makes sense and get the improved performance from it.
20
21
Since the event loop doesn't know which event will be next, we still do
22
some unnecessary polling while we're waiting for the slow disk. I made
23
some attempts to be more clever than just randomly growing and shrinking
24
the polling time, and even to let callers be explicit about when they
25
expect a new event, but so far this hasn't resulted in improved
26
performance or even caused performance regressions. For now, let's just
27
fix the part that is easy enough to fix, we can revisit the rest later.
4
28
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
29
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Reviewed-by: Alberto Garcia <berto@igalia.com>
30
Message-ID: <20250307221634.71951-6-kwolf@redhat.com>
7
Reviewed-by: Kashyap Chamarthy <kchamart@redhat.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
31
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Jeff Cody <jcody@redhat.com>
32
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
---
33
---
11
tests/qemu-iotests/030 | 4 ++++
34
include/block/aio.h | 1 -
12
tests/qemu-iotests/030.out | 4 ++--
35
util/aio-posix.h | 1 +
13
2 files changed, 6 insertions(+), 2 deletions(-)
36
util/aio-posix.c | 26 ++++++++++++++++++++++----
37
util/async.c | 2 --
38
4 files changed, 23 insertions(+), 7 deletions(-)
14
39
15
diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030
40
diff --git a/include/block/aio.h b/include/block/aio.h
16
index XXXXXXX..XXXXXXX 100755
41
index XXXXXXX..XXXXXXX 100644
17
--- a/tests/qemu-iotests/030
42
--- a/include/block/aio.h
18
+++ b/tests/qemu-iotests/030
43
+++ b/include/block/aio.h
19
@@ -XXX,XX +XXX,XX @@ class TestSingleDrive(iotests.QMPTestCase):
44
@@ -XXX,XX +XXX,XX @@ struct AioContext {
20
result = self.vm.qmp('block-stream', device='nonexistent')
45
int poll_disable_cnt;
21
self.assert_qmp(result, 'error/class', 'GenericError')
46
22
47
/* Polling mode parameters */
23
+ def test_job_id_missing(self):
48
- AioPolledEvent poll;
24
+ result = self.vm.qmp('block-stream', device='mid')
49
int64_t poll_max_ns; /* maximum polling time in nanoseconds */
25
+ self.assert_qmp(result, 'error/class', 'GenericError')
50
int64_t poll_grow; /* polling time growth factor */
51
int64_t poll_shrink; /* polling time shrink factor */
52
diff --git a/util/aio-posix.h b/util/aio-posix.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/util/aio-posix.h
55
+++ b/util/aio-posix.h
56
@@ -XXX,XX +XXX,XX @@ struct AioHandler {
57
#endif
58
int64_t poll_idle_timeout; /* when to stop userspace polling */
59
bool poll_ready; /* has polling detected an event? */
60
+ AioPolledEvent poll;
61
};
62
63
/* Add a handler to a ready list */
64
diff --git a/util/aio-posix.c b/util/aio-posix.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/util/aio-posix.c
67
+++ b/util/aio-posix.c
68
@@ -XXX,XX +XXX,XX @@ static bool run_poll_handlers(AioContext *ctx, AioHandlerList *ready_list,
69
static bool try_poll_mode(AioContext *ctx, AioHandlerList *ready_list,
70
int64_t *timeout)
71
{
72
+ AioHandler *node;
73
int64_t max_ns;
74
75
if (QLIST_EMPTY_RCU(&ctx->poll_aio_handlers)) {
76
return false;
77
}
78
79
- max_ns = qemu_soonest_timeout(*timeout, ctx->poll.ns);
80
+ max_ns = 0;
81
+ QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
82
+ max_ns = MAX(max_ns, node->poll.ns);
83
+ }
84
+ max_ns = qemu_soonest_timeout(*timeout, max_ns);
26
+
85
+
27
86
if (max_ns && !ctx->fdmon_ops->need_wait(ctx)) {
28
class TestParallelOps(iotests.QMPTestCase):
87
/*
29
num_ops = 4 # Number of parallel block-stream operations
88
* Enable poll mode. It pairs with the poll_set_started() in
30
diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out
89
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
90
91
/* Adjust polling time */
92
if (ctx->poll_max_ns) {
93
+ AioHandler *node;
94
int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
95
- adjust_polling_time(ctx, &ctx->poll, block_ns);
96
+
97
+ QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
98
+ if (QLIST_IS_INSERTED(node, node_ready)) {
99
+ adjust_polling_time(ctx, &node->poll, block_ns);
100
+ }
101
+ }
102
}
103
104
progress |= aio_bh_poll(ctx);
105
@@ -XXX,XX +XXX,XX @@ void aio_context_use_g_source(AioContext *ctx)
106
void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
107
int64_t grow, int64_t shrink, Error **errp)
108
{
109
+ AioHandler *node;
110
+
111
+ qemu_lockcnt_inc(&ctx->list_lock);
112
+ QLIST_FOREACH(node, &ctx->aio_handlers, node) {
113
+ node->poll.ns = 0;
114
+ }
115
+ qemu_lockcnt_dec(&ctx->list_lock);
116
+
117
/* No thread synchronization here, it doesn't matter if an incorrect value
118
* is used once.
119
*/
120
- ctx->poll.ns = 0;
121
-
122
ctx->poll_max_ns = max_ns;
123
ctx->poll_grow = grow;
124
ctx->poll_shrink = shrink;
125
diff --git a/util/async.c b/util/async.c
31
index XXXXXXX..XXXXXXX 100644
126
index XXXXXXX..XXXXXXX 100644
32
--- a/tests/qemu-iotests/030.out
127
--- a/util/async.c
33
+++ b/tests/qemu-iotests/030.out
128
+++ b/util/async.c
34
@@ -XXX,XX +XXX,XX @@
129
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
35
-......................
130
qemu_rec_mutex_init(&ctx->lock);
36
+.......................
131
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
37
----------------------------------------------------------------------
132
38
-Ran 22 tests
133
- ctx->poll.ns = 0;
39
+Ran 23 tests
134
-
40
135
ctx->poll_max_ns = 0;
41
OK
136
ctx->poll_grow = 0;
137
ctx->poll_shrink = 0;
42
--
138
--
43
1.8.3.1
139
2.48.1
44
45
diff view generated by jsdifflib
New patch
1
aio_dispatch_handler() adds handlers to ctx->poll_aio_handlers if
2
polling should be enabled. If we call adjust_polling_time() for all
3
polling handlers before this, new polling handlers are still left at
4
poll->ns = 0 and polling is only actually enabled after the next event.
5
Move the adjust_polling_time() call after aio_dispatch_handler().
1
6
7
This fixes test-nested-aio-poll, which expects that polling becomes
8
effective the first time around.
9
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Message-ID: <20250311141912.135657-1-kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
14
util/aio-posix.c | 28 +++++++++++++++++-----------
15
1 file changed, 17 insertions(+), 11 deletions(-)
16
17
diff --git a/util/aio-posix.c b/util/aio-posix.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/util/aio-posix.c
20
+++ b/util/aio-posix.c
21
@@ -XXX,XX +XXX,XX @@
22
/* Stop userspace polling on a handler if it isn't active for some time */
23
#define POLL_IDLE_INTERVAL_NS (7 * NANOSECONDS_PER_SECOND)
24
25
+static void adjust_polling_time(AioContext *ctx, AioPolledEvent *poll,
26
+ int64_t block_ns);
27
+
28
bool aio_poll_disabled(AioContext *ctx)
29
{
30
return qatomic_read(&ctx->poll_disable_cnt);
31
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_handler(AioContext *ctx, AioHandler *node)
32
* scanning all handlers with aio_dispatch_handlers().
33
*/
34
static bool aio_dispatch_ready_handlers(AioContext *ctx,
35
- AioHandlerList *ready_list)
36
+ AioHandlerList *ready_list,
37
+ int64_t block_ns)
38
{
39
bool progress = false;
40
AioHandler *node;
41
@@ -XXX,XX +XXX,XX @@ static bool aio_dispatch_ready_handlers(AioContext *ctx,
42
while ((node = QLIST_FIRST(ready_list))) {
43
QLIST_REMOVE(node, node_ready);
44
progress = aio_dispatch_handler(ctx, node) || progress;
45
+
46
+ /*
47
+ * Adjust polling time only after aio_dispatch_handler(), which can
48
+ * add the handler to ctx->poll_aio_handlers.
49
+ */
50
+ if (ctx->poll_max_ns && QLIST_IS_INSERTED(node, node_poll)) {
51
+ adjust_polling_time(ctx, &node->poll, block_ns);
52
+ }
53
}
54
55
return progress;
56
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
57
bool use_notify_me;
58
int64_t timeout;
59
int64_t start = 0;
60
+ int64_t block_ns = 0;
61
62
/*
63
* There cannot be two concurrent aio_poll calls for the same AioContext (or
64
@@ -XXX,XX +XXX,XX @@ bool aio_poll(AioContext *ctx, bool blocking)
65
66
aio_notify_accept(ctx);
67
68
- /* Adjust polling time */
69
+ /* Calculate blocked time for adaptive polling */
70
if (ctx->poll_max_ns) {
71
- AioHandler *node;
72
- int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
73
-
74
- QLIST_FOREACH(node, &ctx->poll_aio_handlers, node_poll) {
75
- if (QLIST_IS_INSERTED(node, node_ready)) {
76
- adjust_polling_time(ctx, &node->poll, block_ns);
77
- }
78
- }
79
+ block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
80
}
81
82
progress |= aio_bh_poll(ctx);
83
- progress |= aio_dispatch_ready_handlers(ctx, &ready_list);
84
+ progress |= aio_dispatch_ready_handlers(ctx, &ready_list, block_ns);
85
86
aio_free_deleted_handlers(ctx);
87
88
--
89
2.48.1
diff view generated by jsdifflib
1
This fixes an assertion failure that was triggered by qemu-iotests 129
1
From: Thomas Huth <thuth@redhat.com>
2
on some CI host, while the same test case didn't seem to fail on other
3
hosts.
4
2
5
Essentially the problem is that the blk_unref(s->target) in
3
qsd-migrate is currently only working for raw, qcow2 and qed.
6
mirror_exit() doesn't necessarily mean that the BlockBackend goes away
4
Other formats are failing, e.g. because they don't support migration.
7
immediately. It is possible that the job completion was triggered nested
5
Thus let's limit this test to the three usable formats now.
8
in mirror_drain(), which looks like this:
9
6
10
BlockBackend *target = s->target;
7
Suggested-by: Kevin Wolf <kwolf@redhat.com>
11
blk_ref(target);
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
12
blk_drain(target);
9
Message-ID: <20250224214058.205889-1-thuth@redhat.com>
13
blk_unref(target);
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
tests/qemu-iotests/tests/qsd-migrate | 2 +-
14
1 file changed, 1 insertion(+), 1 deletion(-)
14
15
15
In this case, the write permissions for s->target are retained until
16
diff --git a/tests/qemu-iotests/tests/qsd-migrate b/tests/qemu-iotests/tests/qsd-migrate
16
after blk_drain(), which makes removing mirror_top_bs fail for the
17
index XXXXXXX..XXXXXXX 100755
17
active commit case (can't have a writable backing file in the chain
18
--- a/tests/qemu-iotests/tests/qsd-migrate
18
without the filter driver).
19
+++ b/tests/qemu-iotests/tests/qsd-migrate
19
20
@@ -XXX,XX +XXX,XX @@ import iotests
20
Explicitly dropping the permissions first means that the additional
21
21
reference doesn't hurt and the job can complete successfully even if
22
from iotests import filter_qemu_io, filter_qtest
22
called from the nested blk_drain().
23
23
24
-iotests.script_initialize(supported_fmts=['generic'],
24
Cc: qemu-stable@nongnu.org
25
+iotests.script_initialize(supported_fmts=['qcow2', 'qed', 'raw'],
25
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
26
supported_protocols=['file'],
26
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
27
supported_platforms=['linux'])
27
Reviewed-by: Max Reitz <mreitz@redhat.com>
28
---
29
block/mirror.c | 7 ++++++-
30
1 file changed, 6 insertions(+), 1 deletion(-)
31
32
diff --git a/block/mirror.c b/block/mirror.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/block/mirror.c
35
+++ b/block/mirror.c
36
@@ -XXX,XX +XXX,XX @@ static void mirror_exit(BlockJob *job, void *opaque)
37
38
/* Remove target parent that still uses BLK_PERM_WRITE/RESIZE before
39
* inserting target_bs at s->to_replace, where we might not be able to get
40
- * these permissions. */
41
+ * these permissions.
42
+ *
43
+ * Note that blk_unref() alone doesn't necessarily drop permissions because
44
+ * we might be running nested inside mirror_drain(), which takes an extra
45
+ * reference, so use an explicit blk_set_perm() first. */
46
+ blk_set_perm(s->target, 0, BLK_PERM_ALL, &error_abort);
47
blk_unref(s->target);
48
s->target = NULL;
49
28
50
--
29
--
51
1.8.3.1
30
2.48.1
52
53
diff view generated by jsdifflib
1
From: Fam Zheng <famz@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
This is the case in our docker tests, as we use --net=none there. Skip
3
Commit 71544d30a6f8 ("scsi: push request restart to SCSIDevice") removed
4
this method.
4
the only user of SCSIDiskState->bh.
5
5
6
Signed-off-by: Fam Zheng <famz@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
Message-ID: <20250311132616.1049687-2-stefanha@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
11
---
9
tests/qemu-iotests/147 | 7 +++++++
12
hw/scsi/scsi-disk.c | 1 -
10
1 file changed, 7 insertions(+)
13
1 file changed, 1 deletion(-)
11
14
12
diff --git a/tests/qemu-iotests/147 b/tests/qemu-iotests/147
15
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
13
index XXXXXXX..XXXXXXX 100755
16
index XXXXXXX..XXXXXXX 100644
14
--- a/tests/qemu-iotests/147
17
--- a/hw/scsi/scsi-disk.c
15
+++ b/tests/qemu-iotests/147
18
+++ b/hw/scsi/scsi-disk.c
16
@@ -XXX,XX +XXX,XX @@ class BuiltinNBD(NBDBlockdevAddBase):
19
@@ -XXX,XX +XXX,XX @@ struct SCSIDiskState {
17
self._server_down()
20
uint64_t max_unmap_size;
18
21
uint64_t max_io_size;
19
def test_inet6(self):
22
uint32_t quirks;
20
+ try:
23
- QEMUBH *bh;
21
+ socket.getaddrinfo("::0", "0", socket.AF_INET6,
24
char *version;
22
+ socket.SOCK_STREAM, socket.IPPROTO_TCP,
25
char *serial;
23
+ socket.AI_ADDRCONFIG | socket.AI_CANONNAME)
26
char *vendor;
24
+ except socket.gaierror:
25
+ # IPv6 not available, skip
26
+ return
27
address = { 'type': 'inet',
28
'data': {
29
'host': '::1',
30
--
27
--
31
1.8.3.1
28
2.48.1
32
29
33
30
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
In the past a single AioContext was used for block I/O and it was
4
fetched using blk_get_aio_context(). Nowadays the block layer supports
5
running I/O from any AioContext and multiple AioContexts at the same
6
time. Remove the dma_blk_io() AioContext argument and use the current
7
AioContext instead.
8
9
This makes calling the function easier and enables multiple IOThreads to
10
use dma_blk_io() concurrently for the same block device.
11
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
14
Message-ID: <20250311132616.1049687-3-stefanha@redhat.com>
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
---
17
include/system/dma.h | 3 +--
18
hw/ide/core.c | 3 +--
19
hw/ide/macio.c | 3 +--
20
hw/scsi/scsi-disk.c | 6 ++----
21
system/dma-helpers.c | 8 ++++----
22
5 files changed, 9 insertions(+), 14 deletions(-)
23
24
diff --git a/include/system/dma.h b/include/system/dma.h
25
index XXXXXXX..XXXXXXX 100644
26
--- a/include/system/dma.h
27
+++ b/include/system/dma.h
28
@@ -XXX,XX +XXX,XX @@ typedef BlockAIOCB *DMAIOFunc(int64_t offset, QEMUIOVector *iov,
29
BlockCompletionFunc *cb, void *cb_opaque,
30
void *opaque);
31
32
-BlockAIOCB *dma_blk_io(AioContext *ctx,
33
- QEMUSGList *sg, uint64_t offset, uint32_t align,
34
+BlockAIOCB *dma_blk_io(QEMUSGList *sg, uint64_t offset, uint32_t align,
35
DMAIOFunc *io_func, void *io_func_opaque,
36
BlockCompletionFunc *cb, void *opaque, DMADirection dir);
37
BlockAIOCB *dma_blk_read(BlockBackend *blk,
38
diff --git a/hw/ide/core.c b/hw/ide/core.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/hw/ide/core.c
41
+++ b/hw/ide/core.c
42
@@ -XXX,XX +XXX,XX @@ static void ide_dma_cb(void *opaque, int ret)
43
BDRV_SECTOR_SIZE, ide_dma_cb, s);
44
break;
45
case IDE_DMA_TRIM:
46
- s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk),
47
- &s->sg, offset, BDRV_SECTOR_SIZE,
48
+ s->bus->dma->aiocb = dma_blk_io(&s->sg, offset, BDRV_SECTOR_SIZE,
49
ide_issue_trim, s, ide_dma_cb, s,
50
DMA_DIRECTION_TO_DEVICE);
51
break;
52
diff --git a/hw/ide/macio.c b/hw/ide/macio.c
53
index XXXXXXX..XXXXXXX 100644
54
--- a/hw/ide/macio.c
55
+++ b/hw/ide/macio.c
56
@@ -XXX,XX +XXX,XX @@ static void pmac_ide_transfer_cb(void *opaque, int ret)
57
pmac_ide_transfer_cb, io);
58
break;
59
case IDE_DMA_TRIM:
60
- s->bus->dma->aiocb = dma_blk_io(blk_get_aio_context(s->blk), &s->sg,
61
- offset, 0x1, ide_issue_trim, s,
62
+ s->bus->dma->aiocb = dma_blk_io(&s->sg, offset, 0x1, ide_issue_trim, s,
63
pmac_ide_transfer_cb, io,
64
DMA_DIRECTION_TO_DEVICE);
65
break;
66
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/hw/scsi/scsi-disk.c
69
+++ b/hw/scsi/scsi-disk.c
70
@@ -XXX,XX +XXX,XX @@ static void scsi_do_read(SCSIDiskReq *r, int ret)
71
if (r->req.sg) {
72
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_READ);
73
r->req.residual -= r->req.sg->size;
74
- r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
75
- r->req.sg, r->sector << BDRV_SECTOR_BITS,
76
+ r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
77
BDRV_SECTOR_SIZE,
78
sdc->dma_readv, r, scsi_dma_complete, r,
79
DMA_DIRECTION_FROM_DEVICE);
80
@@ -XXX,XX +XXX,XX @@ static void scsi_write_data(SCSIRequest *req)
81
if (r->req.sg) {
82
dma_acct_start(s->qdev.conf.blk, &r->acct, r->req.sg, BLOCK_ACCT_WRITE);
83
r->req.residual -= r->req.sg->size;
84
- r->req.aiocb = dma_blk_io(blk_get_aio_context(s->qdev.conf.blk),
85
- r->req.sg, r->sector << BDRV_SECTOR_BITS,
86
+ r->req.aiocb = dma_blk_io(r->req.sg, r->sector << BDRV_SECTOR_BITS,
87
BDRV_SECTOR_SIZE,
88
sdc->dma_writev, r, scsi_dma_complete, r,
89
DMA_DIRECTION_TO_DEVICE);
90
diff --git a/system/dma-helpers.c b/system/dma-helpers.c
91
index XXXXXXX..XXXXXXX 100644
92
--- a/system/dma-helpers.c
93
+++ b/system/dma-helpers.c
94
@@ -XXX,XX +XXX,XX @@ static const AIOCBInfo dma_aiocb_info = {
95
.cancel_async = dma_aio_cancel,
96
};
97
98
-BlockAIOCB *dma_blk_io(AioContext *ctx,
99
+BlockAIOCB *dma_blk_io(
100
QEMUSGList *sg, uint64_t offset, uint32_t align,
101
DMAIOFunc *io_func, void *io_func_opaque,
102
BlockCompletionFunc *cb,
103
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *dma_blk_io(AioContext *ctx,
104
105
dbs->acb = NULL;
106
dbs->sg = sg;
107
- dbs->ctx = ctx;
108
+ dbs->ctx = qemu_get_current_aio_context();
109
dbs->offset = offset;
110
dbs->align = align;
111
dbs->sg_cur_index = 0;
112
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *dma_blk_read(BlockBackend *blk,
113
QEMUSGList *sg, uint64_t offset, uint32_t align,
114
void (*cb)(void *opaque, int ret), void *opaque)
115
{
116
- return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
117
+ return dma_blk_io(sg, offset, align,
118
dma_blk_read_io_func, blk, cb, opaque,
119
DMA_DIRECTION_FROM_DEVICE);
120
}
121
@@ -XXX,XX +XXX,XX @@ BlockAIOCB *dma_blk_write(BlockBackend *blk,
122
QEMUSGList *sg, uint64_t offset, uint32_t align,
123
void (*cb)(void *opaque, int ret), void *opaque)
124
{
125
- return dma_blk_io(blk_get_aio_context(blk), sg, offset, align,
126
+ return dma_blk_io(sg, offset, align,
127
dma_blk_write_io_func, blk, cb, opaque,
128
DMA_DIRECTION_TO_DEVICE);
129
}
130
--
131
2.48.1
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
Until now, a SCSIDevice's I/O requests have run in a single AioContext.
4
In order to support multiple IOThreads it will be necessary to move to
5
the concept of a per-SCSIRequest AioContext.
6
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
Message-ID: <20250311132616.1049687-4-stefanha@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
12
include/hw/scsi/scsi.h | 1 +
13
hw/scsi/scsi-bus.c | 1 +
14
hw/scsi/scsi-disk.c | 17 ++++++-----------
15
3 files changed, 8 insertions(+), 11 deletions(-)
16
17
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/hw/scsi/scsi.h
20
+++ b/include/hw/scsi/scsi.h
21
@@ -XXX,XX +XXX,XX @@ struct SCSIRequest {
22
SCSIBus *bus;
23
SCSIDevice *dev;
24
const SCSIReqOps *ops;
25
+ AioContext *ctx;
26
uint32_t refcount;
27
uint32_t tag;
28
uint32_t lun;
29
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/hw/scsi/scsi-bus.c
32
+++ b/hw/scsi/scsi-bus.c
33
@@ -XXX,XX +XXX,XX @@ invalid_opcode:
34
}
35
}
36
37
+ req->ctx = qemu_get_current_aio_context();
38
req->cmd = cmd;
39
req->residual = req->cmd.xfer;
40
41
diff --git a/hw/scsi/scsi-disk.c b/hw/scsi/scsi-disk.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/scsi/scsi-disk.c
44
+++ b/hw/scsi/scsi-disk.c
45
@@ -XXX,XX +XXX,XX @@ static void scsi_aio_complete(void *opaque, int ret)
46
SCSIDiskReq *r = (SCSIDiskReq *)opaque;
47
SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
48
49
- /* The request must only run in the BlockBackend's AioContext */
50
- assert(blk_get_aio_context(s->qdev.conf.blk) ==
51
- qemu_get_current_aio_context());
52
+ /* The request must run in its AioContext */
53
+ assert(r->req.ctx == qemu_get_current_aio_context());
54
55
assert(r->req.aiocb != NULL);
56
r->req.aiocb = NULL;
57
@@ -XXX,XX +XXX,XX @@ static void scsi_dma_complete(void *opaque, int ret)
58
59
static void scsi_read_complete_noio(SCSIDiskReq *r, int ret)
60
{
61
- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
62
uint32_t n;
63
64
- /* The request must only run in the BlockBackend's AioContext */
65
- assert(blk_get_aio_context(s->qdev.conf.blk) ==
66
- qemu_get_current_aio_context());
67
+ /* The request must run in its AioContext */
68
+ assert(r->req.ctx == qemu_get_current_aio_context());
69
70
assert(r->req.aiocb == NULL);
71
if (scsi_disk_req_check_error(r, ret, ret > 0)) {
72
@@ -XXX,XX +XXX,XX @@ static void scsi_read_data(SCSIRequest *req)
73
74
static void scsi_write_complete_noio(SCSIDiskReq *r, int ret)
75
{
76
- SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev);
77
uint32_t n;
78
79
- /* The request must only run in the BlockBackend's AioContext */
80
- assert(blk_get_aio_context(s->qdev.conf.blk) ==
81
- qemu_get_current_aio_context());
82
+ /* The request must run in its AioContext */
83
+ assert(r->req.ctx == qemu_get_current_aio_context());
84
85
assert (r->req.aiocb == NULL);
86
if (scsi_disk_req_check_error(r, ret, ret > 0)) {
87
--
88
2.48.1
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
SCSIDevice keeps track of in-flight requests for device reset and Task
4
Management Functions (TMFs). The request list requires protection so
5
that multi-threaded SCSI emulation can be implemented in commits that
6
follow.
7
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
10
Message-ID: <20250311132616.1049687-5-stefanha@redhat.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
include/hw/scsi/scsi.h | 7 ++-
14
hw/scsi/scsi-bus.c | 120 +++++++++++++++++++++++++++++------------
15
2 files changed, 88 insertions(+), 39 deletions(-)
16
17
diff --git a/include/hw/scsi/scsi.h b/include/hw/scsi/scsi.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/hw/scsi/scsi.h
20
+++ b/include/hw/scsi/scsi.h
21
@@ -XXX,XX +XXX,XX @@ struct SCSIRequest {
22
bool dma_started;
23
BlockAIOCB *aiocb;
24
QEMUSGList *sg;
25
+
26
+ /* Protected by SCSIDevice->requests_lock */
27
QTAILQ_ENTRY(SCSIRequest) next;
28
};
29
30
@@ -XXX,XX +XXX,XX @@ struct SCSIDevice
31
uint8_t sense[SCSI_SENSE_BUF_SIZE];
32
uint32_t sense_len;
33
34
- /*
35
- * The requests list is only accessed from the AioContext that executes
36
- * requests or from the main loop when IOThread processing is stopped.
37
- */
38
+ QemuMutex requests_lock; /* protects the requests list */
39
QTAILQ_HEAD(, SCSIRequest) requests;
40
41
uint32_t channel;
42
diff --git a/hw/scsi/scsi-bus.c b/hw/scsi/scsi-bus.c
43
index XXXXXXX..XXXXXXX 100644
44
--- a/hw/scsi/scsi-bus.c
45
+++ b/hw/scsi/scsi-bus.c
46
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_sync(SCSIDevice *s,
47
assert(!runstate_is_running());
48
assert(qemu_in_main_thread());
49
50
- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
51
- fn(req, opaque);
52
+ /*
53
+ * Locking is not necessary because the guest is stopped and no other
54
+ * threads can be accessing the requests list, but take the lock for
55
+ * consistency.
56
+ */
57
+ WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
58
+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next_req) {
59
+ fn(req, opaque);
60
+ }
61
}
62
}
63
64
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async_bh(void *opaque)
65
{
66
g_autofree SCSIDeviceForEachReqAsyncData *data = opaque;
67
SCSIDevice *s = data->s;
68
- AioContext *ctx;
69
- SCSIRequest *req;
70
- SCSIRequest *next;
71
+ g_autoptr(GList) reqs = NULL;
72
73
/*
74
- * The BB cannot have changed contexts between this BH being scheduled and
75
- * now: BBs' AioContexts, when they have a node attached, can only be
76
- * changed via bdrv_try_change_aio_context(), in a drained section. While
77
- * we have the in-flight counter incremented, that drain must block.
78
+ * Build a list of requests in this AioContext so fn() can be invoked later
79
+ * outside requests_lock.
80
*/
81
- ctx = blk_get_aio_context(s->conf.blk);
82
- assert(ctx == qemu_get_current_aio_context());
83
+ WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
84
+ AioContext *ctx = qemu_get_current_aio_context();
85
+ SCSIRequest *req;
86
+ SCSIRequest *next;
87
+
88
+ QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
89
+ if (req->ctx == ctx) {
90
+ scsi_req_ref(req); /* dropped after calling fn() */
91
+ reqs = g_list_prepend(reqs, req);
92
+ }
93
+ }
94
+ }
95
96
- QTAILQ_FOREACH_SAFE(req, &s->requests, next, next) {
97
- data->fn(req, data->fn_opaque);
98
+ /* Call fn() on each request */
99
+ for (GList *elem = g_list_first(reqs); elem; elem = g_list_next(elem)) {
100
+ data->fn(elem->data, data->fn_opaque);
101
+ scsi_req_unref(elem->data);
102
}
103
104
/* Drop the reference taken by scsi_device_for_each_req_async() */
105
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async_bh(void *opaque)
106
blk_dec_in_flight(s->conf.blk);
107
}
108
109
+static void scsi_device_for_each_req_async_do_ctx(gpointer key, gpointer value,
110
+ gpointer user_data)
111
+{
112
+ AioContext *ctx = key;
113
+ SCSIDeviceForEachReqAsyncData *params = user_data;
114
+ SCSIDeviceForEachReqAsyncData *data;
115
+
116
+ data = g_new(SCSIDeviceForEachReqAsyncData, 1);
117
+ data->s = params->s;
118
+ data->fn = params->fn;
119
+ data->fn_opaque = params->fn_opaque;
120
+
121
+ /*
122
+ * Hold a reference to the SCSIDevice until
123
+ * scsi_device_for_each_req_async_bh() finishes.
124
+ */
125
+ object_ref(OBJECT(data->s));
126
+
127
+ /* Paired with scsi_device_for_each_req_async_bh() */
128
+ blk_inc_in_flight(data->s->conf.blk);
129
+
130
+ aio_bh_schedule_oneshot(ctx, scsi_device_for_each_req_async_bh, data);
131
+}
132
+
133
/*
134
* Schedule @fn() to be invoked for each enqueued request in device @s. @fn()
135
- * runs in the AioContext that is executing the request.
136
+ * must be thread-safe because it runs concurrently in each AioContext that is
137
+ * executing a request.
138
+ *
139
* Keeps the BlockBackend's in-flight counter incremented until everything is
140
* done, so draining it will settle all scheduled @fn() calls.
141
*/
142
@@ -XXX,XX +XXX,XX @@ static void scsi_device_for_each_req_async(SCSIDevice *s,
143
{
144
assert(qemu_in_main_thread());
145
146
- SCSIDeviceForEachReqAsyncData *data =
147
- g_new(SCSIDeviceForEachReqAsyncData, 1);
148
-
149
- data->s = s;
150
- data->fn = fn;
151
- data->fn_opaque = opaque;
152
-
153
- /*
154
- * Hold a reference to the SCSIDevice until
155
- * scsi_device_for_each_req_async_bh() finishes.
156
- */
157
- object_ref(OBJECT(s));
158
+ /* The set of AioContexts where the requests are being processed */
159
+ g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
160
+ WITH_QEMU_LOCK_GUARD(&s->requests_lock) {
161
+ SCSIRequest *req;
162
+ QTAILQ_FOREACH(req, &s->requests, next) {
163
+ g_hash_table_add(aio_contexts, req->ctx);
164
+ }
165
+ }
166
167
- /* Paired with blk_dec_in_flight() in scsi_device_for_each_req_async_bh() */
168
- blk_inc_in_flight(s->conf.blk);
169
- aio_bh_schedule_oneshot(blk_get_aio_context(s->conf.blk),
170
- scsi_device_for_each_req_async_bh,
171
- data);
172
+ /* Schedule a BH for each AioContext */
173
+ SCSIDeviceForEachReqAsyncData params = {
174
+ .s = s,
175
+ .fn = fn,
176
+ .fn_opaque = opaque,
177
+ };
178
+ g_hash_table_foreach(
179
+ aio_contexts,
180
+ scsi_device_for_each_req_async_do_ctx,
181
+ &params
182
+ );
183
}
184
185
static void scsi_device_realize(SCSIDevice *s, Error **errp)
186
@@ -XXX,XX +XXX,XX @@ static void scsi_qdev_realize(DeviceState *qdev, Error **errp)
187
dev->lun = lun;
188
}
189
190
+ qemu_mutex_init(&dev->requests_lock);
191
QTAILQ_INIT(&dev->requests);
192
scsi_device_realize(dev, &local_err);
193
if (local_err) {
194
@@ -XXX,XX +XXX,XX @@ static void scsi_qdev_unrealize(DeviceState *qdev)
195
196
scsi_device_purge_requests(dev, SENSE_CODE(NO_SENSE));
197
198
+ qemu_mutex_destroy(&dev->requests_lock);
199
+
200
scsi_device_unrealize(dev);
201
202
blockdev_mark_auto_del(dev->conf.blk);
203
@@ -XXX,XX +XXX,XX @@ static void scsi_req_enqueue_internal(SCSIRequest *req)
204
req->sg = NULL;
205
}
206
req->enqueued = true;
207
- QTAILQ_INSERT_TAIL(&req->dev->requests, req, next);
208
+
209
+ WITH_QEMU_LOCK_GUARD(&req->dev->requests_lock) {
210
+ QTAILQ_INSERT_TAIL(&req->dev->requests, req, next);
211
+ }
212
}
213
214
int32_t scsi_req_enqueue(SCSIRequest *req)
215
@@ -XXX,XX +XXX,XX @@ static void scsi_req_dequeue(SCSIRequest *req)
216
trace_scsi_req_dequeue(req->dev->id, req->lun, req->tag);
217
req->retry = false;
218
if (req->enqueued) {
219
- QTAILQ_REMOVE(&req->dev->requests, req, next);
220
+ WITH_QEMU_LOCK_GUARD(&req->dev->requests_lock) {
221
+ QTAILQ_REMOVE(&req->dev->requests, req, next);
222
+ }
223
req->enqueued = false;
224
scsi_req_unref(req);
225
}
226
@@ -XXX,XX +XXX,XX @@ static void scsi_device_class_init(ObjectClass *klass, void *data)
227
228
static void scsi_dev_instance_init(Object *obj)
229
{
230
- DeviceState *dev = DEVICE(obj);
231
- SCSIDevice *s = SCSI_DEVICE(dev);
232
+ SCSIDevice *s = SCSI_DEVICE(obj);
233
234
device_add_bootindex_property(obj, &s->conf.bootindex,
235
"bootindex", NULL,
236
--
237
2.48.1
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
Commit d7086422b1c1e75e320519cfe26176db6ec97a37 added a local_err
3
Virtqueues are not thread-safe. Until now this was not a major issue
4
variable global to the qcow2_amend_options() function, so there's no
4
since all virtqueue processing happened in the same thread. The ctrl
5
need to have this other one.
5
queue's Task Management Function (TMF) requests sometimes need the main
6
6
loop, so a BH was used to schedule the virtqueue completion back in the
7
Signed-off-by: Alberto Garcia <berto@igalia.com>
7
thread that has virtqueue access.
8
Message-id: 20170511150337.21470-1-berto@igalia.com
8
9
Reviewed-by: Eric Blake <eblake@redhat.com>
9
When IOThread Virtqueue Mapping is introduced in later commits, event
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
and ctrl virtqueue accesses from other threads will become necessary.
11
Introduce an optional per-virtqueue lock so the event and ctrl
12
virtqueues can be protected in the commits that follow.
13
14
The addition of the ctrl virtqueue lock makes
15
virtio_scsi_complete_req_from_main_loop() and its BH unnecessary.
16
Instead, take the ctrl virtqueue lock from the main loop thread.
17
18
The cmd virtqueue does not have a lock because the entirety of SCSI
19
command processing happens in one thread. Only one thread accesses the
20
cmd virtqueue and a lock is unnecessary.
21
22
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
23
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
24
Message-ID: <20250311132616.1049687-6-stefanha@redhat.com>
25
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
---
26
---
12
block/qcow2.c | 5 ++---
27
include/hw/virtio/virtio-scsi.h | 3 ++
13
1 file changed, 2 insertions(+), 3 deletions(-)
28
hw/scsi/virtio-scsi.c | 84 ++++++++++++++++++---------------
14
29
2 files changed, 49 insertions(+), 38 deletions(-)
15
diff --git a/block/qcow2.c b/block/qcow2.c
30
31
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
16
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
17
--- a/block/qcow2.c
33
--- a/include/hw/virtio/virtio-scsi.h
18
+++ b/block/qcow2.c
34
+++ b/include/hw/virtio/virtio-scsi.h
19
@@ -XXX,XX +XXX,XX @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
35
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
20
36
int resetting; /* written from main loop thread, read from any thread */
21
if (s->refcount_bits != refcount_bits) {
37
bool events_dropped;
22
int refcount_order = ctz32(refcount_bits);
38
23
- Error *local_error = NULL;
39
+ QemuMutex ctrl_lock; /* protects ctrl_vq */
24
40
+ QemuMutex event_lock; /* protects event_vq */
25
if (new_version < 3 && refcount_bits != 16) {
41
+
26
error_report("Different refcount widths than 16 bits require "
42
/*
27
@@ -XXX,XX +XXX,XX @@ static int qcow2_amend_options(BlockDriverState *bs, QemuOpts *opts,
43
* TMFs deferred to main loop BH. These fields are protected by
28
helper_cb_info.current_operation = QCOW2_CHANGING_REFCOUNT_ORDER;
44
* tmf_bh_lock.
29
ret = qcow2_change_refcount_order(bs, refcount_order,
45
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
30
&qcow2_amend_helper_cb,
46
index XXXXXXX..XXXXXXX 100644
31
- &helper_cb_info, &local_error);
47
--- a/hw/scsi/virtio-scsi.c
32
+ &helper_cb_info, &local_err);
48
+++ b/hw/scsi/virtio-scsi.c
33
if (ret < 0) {
49
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_free_req(VirtIOSCSIReq *req)
34
- error_report_err(local_error);
50
g_free(req);
35
+ error_report_err(local_err);
51
}
36
return ret;
52
53
-static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
54
+static void virtio_scsi_complete_req(VirtIOSCSIReq *req, QemuMutex *vq_lock)
55
{
56
VirtIOSCSI *s = req->dev;
57
VirtQueue *vq = req->vq;
58
VirtIODevice *vdev = VIRTIO_DEVICE(s);
59
60
qemu_iovec_from_buf(&req->resp_iov, 0, &req->resp, req->resp_size);
61
+
62
+ if (vq_lock) {
63
+ qemu_mutex_lock(vq_lock);
64
+ }
65
+
66
virtqueue_push(vq, &req->elem, req->qsgl.size + req->resp_iov.size);
67
if (s->dataplane_started && !s->dataplane_fenced) {
68
virtio_notify_irqfd(vdev, vq);
69
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
70
virtio_notify(vdev, vq);
71
}
72
73
+ if (vq_lock) {
74
+ qemu_mutex_unlock(vq_lock);
75
+ }
76
+
77
if (req->sreq) {
78
req->sreq->hba_private = NULL;
79
scsi_req_unref(req->sreq);
80
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_complete_req(VirtIOSCSIReq *req)
81
virtio_scsi_free_req(req);
82
}
83
84
-static void virtio_scsi_complete_req_bh(void *opaque)
85
+static void virtio_scsi_bad_req(VirtIOSCSIReq *req, QemuMutex *vq_lock)
86
{
87
- VirtIOSCSIReq *req = opaque;
88
+ virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
89
90
- virtio_scsi_complete_req(req);
91
-}
92
+ if (vq_lock) {
93
+ qemu_mutex_lock(vq_lock);
94
+ }
95
96
-/*
97
- * Called from virtio_scsi_do_one_tmf_bh() in main loop thread. The main loop
98
- * thread cannot touch the virtqueue since that could race with an IOThread.
99
- */
100
-static void virtio_scsi_complete_req_from_main_loop(VirtIOSCSIReq *req)
101
-{
102
- VirtIOSCSI *s = req->dev;
103
+ virtqueue_detach_element(req->vq, &req->elem, 0);
104
105
- if (!s->ctx || s->ctx == qemu_get_aio_context()) {
106
- /* No need to schedule a BH when there is no IOThread */
107
- virtio_scsi_complete_req(req);
108
- } else {
109
- /* Run request completion in the IOThread */
110
- aio_wait_bh_oneshot(s->ctx, virtio_scsi_complete_req_bh, req);
111
+ if (vq_lock) {
112
+ qemu_mutex_unlock(vq_lock);
113
}
114
-}
115
116
-static void virtio_scsi_bad_req(VirtIOSCSIReq *req)
117
-{
118
- virtio_error(VIRTIO_DEVICE(req->dev), "wrong size for virtio-scsi headers");
119
- virtqueue_detach_element(req->vq, &req->elem, 0);
120
virtio_scsi_free_req(req);
121
}
122
123
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_parse_req(VirtIOSCSIReq *req,
124
return 0;
125
}
126
127
-static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq)
128
+static VirtIOSCSIReq *virtio_scsi_pop_req(VirtIOSCSI *s, VirtQueue *vq, QemuMutex *vq_lock)
129
{
130
VirtIOSCSICommon *vs = (VirtIOSCSICommon *)s;
131
VirtIOSCSIReq *req;
132
133
+ if (vq_lock) {
134
+ qemu_mutex_lock(vq_lock);
135
+ }
136
+
137
req = virtqueue_pop(vq, sizeof(VirtIOSCSIReq) + vs->cdb_size);
138
+
139
+ if (vq_lock) {
140
+ qemu_mutex_unlock(vq_lock);
141
+ }
142
+
143
if (!req) {
144
return NULL;
145
}
146
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
147
148
trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(req->req.tmf.lun),
149
req->req.tmf.tag, req->resp.tmf.response);
150
- virtio_scsi_complete_req(req);
151
+ virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
152
}
153
g_free(n);
154
}
155
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
156
157
out:
158
object_unref(OBJECT(d));
159
- virtio_scsi_complete_req_from_main_loop(req);
160
+ virtio_scsi_complete_req(req, &s->ctrl_lock);
161
}
162
163
/* Some TMFs must be processed from the main loop thread */
164
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
165
166
/* SAM-6 6.3.2 Hard reset */
167
req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
168
- virtio_scsi_complete_req(req);
169
+ virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
170
}
171
}
172
173
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
174
175
if (iov_to_buf(req->elem.out_sg, req->elem.out_num, 0,
176
&type, sizeof(type)) < sizeof(type)) {
177
- virtio_scsi_bad_req(req);
178
+ virtio_scsi_bad_req(req, &s->ctrl_lock);
179
return;
180
}
181
182
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
183
if (type == VIRTIO_SCSI_T_TMF) {
184
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlTMFReq),
185
sizeof(VirtIOSCSICtrlTMFResp)) < 0) {
186
- virtio_scsi_bad_req(req);
187
+ virtio_scsi_bad_req(req, &s->ctrl_lock);
188
return;
189
} else {
190
r = virtio_scsi_do_tmf(s, req);
191
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
192
type == VIRTIO_SCSI_T_AN_SUBSCRIBE) {
193
if (virtio_scsi_parse_req(req, sizeof(VirtIOSCSICtrlANReq),
194
sizeof(VirtIOSCSICtrlANResp)) < 0) {
195
- virtio_scsi_bad_req(req);
196
+ virtio_scsi_bad_req(req, &s->ctrl_lock);
197
return;
198
} else {
199
req->req.an.event_requested =
200
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_req(VirtIOSCSI *s, VirtIOSCSIReq *req)
201
type == VIRTIO_SCSI_T_AN_SUBSCRIBE)
202
trace_virtio_scsi_an_resp(virtio_scsi_get_lun(req->req.an.lun),
203
req->resp.an.response);
204
- virtio_scsi_complete_req(req);
205
+ virtio_scsi_complete_req(req, &s->ctrl_lock);
206
} else {
207
assert(r == -EINPROGRESS);
208
}
209
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
210
{
211
VirtIOSCSIReq *req;
212
213
- while ((req = virtio_scsi_pop_req(s, vq))) {
214
+ while ((req = virtio_scsi_pop_req(s, vq, &s->ctrl_lock))) {
215
virtio_scsi_handle_ctrl_req(s, req);
216
}
217
}
218
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_complete_cmd_req(VirtIOSCSIReq *req)
219
* in virtio_scsi_command_complete.
220
*/
221
req->resp_size = sizeof(VirtIOSCSICmdResp);
222
- virtio_scsi_complete_req(req);
223
+ virtio_scsi_complete_req(req, NULL);
224
}
225
226
static void virtio_scsi_command_failed(SCSIRequest *r)
227
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
228
virtio_scsi_fail_cmd_req(req);
229
return -ENOTSUP;
230
} else {
231
- virtio_scsi_bad_req(req);
232
+ virtio_scsi_bad_req(req, NULL);
233
return -EINVAL;
37
}
234
}
38
}
235
}
236
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_cmd_vq(VirtIOSCSI *s, VirtQueue *vq)
237
virtio_queue_set_notification(vq, 0);
238
}
239
240
- while ((req = virtio_scsi_pop_req(s, vq))) {
241
+ while ((req = virtio_scsi_pop_req(s, vq, NULL))) {
242
ret = virtio_scsi_handle_cmd_req_prepare(s, req);
243
if (!ret) {
244
QTAILQ_INSERT_TAIL(&reqs, req, next);
245
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
246
return;
247
}
248
249
- req = virtio_scsi_pop_req(s, vs->event_vq);
250
+ req = virtio_scsi_pop_req(s, vs->event_vq, &s->event_lock);
251
if (!req) {
252
s->events_dropped = true;
253
return;
254
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
255
}
256
257
if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) {
258
- virtio_scsi_bad_req(req);
259
+ virtio_scsi_bad_req(req, &s->event_lock);
260
return;
261
}
262
263
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
264
}
265
trace_virtio_scsi_event(virtio_scsi_get_lun(evt->lun), event, reason);
266
267
- virtio_scsi_complete_req(req);
268
+ virtio_scsi_complete_req(req, &s->event_lock);
269
}
270
271
static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
272
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
273
Error *err = NULL;
274
275
QTAILQ_INIT(&s->tmf_bh_list);
276
+ qemu_mutex_init(&s->ctrl_lock);
277
+ qemu_mutex_init(&s->event_lock);
278
qemu_mutex_init(&s->tmf_bh_lock);
279
280
virtio_scsi_common_realize(dev,
281
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_unrealize(DeviceState *dev)
282
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
283
virtio_scsi_common_unrealize(dev);
284
qemu_mutex_destroy(&s->tmf_bh_lock);
285
+ qemu_mutex_destroy(&s->event_lock);
286
+ qemu_mutex_destroy(&s->ctrl_lock);
287
}
288
289
static const Property virtio_scsi_properties[] = {
39
--
290
--
40
1.8.3.1
291
2.48.1
41
42
diff view generated by jsdifflib
1
From: "Daniel P. Berrange" <berrange@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
The qemu-img dd command added --image-opts support, but missed
3
The block layer can invoke the resize callback from any AioContext that
4
the corresponding --object support. This prevented passing
4
is processing requests. The virtqueue is already protected but the
5
secrets (eg auth passwords) needed by certain disk images.
5
events_dropped field also needs to be protected against races. Cover it
6
using the event virtqueue lock because it is closely associated with
7
accesses to the virtqueue.
6
8
7
Reviewed-by: Fam Zheng <famz@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
9
Reviewed-by: Eric Blake <eblake@redhat.com>
11
Message-ID: <20250311132616.1049687-7-stefanha@redhat.com>
10
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
11
Message-id: 20170515164712.6643-2-berrange@redhat.com
12
Signed-off-by: Max Reitz <mreitz@redhat.com>
13
---
13
---
14
qemu-img.c | 18 ++++++++++++++++++
14
include/hw/virtio/virtio-scsi.h | 3 ++-
15
1 file changed, 18 insertions(+)
15
hw/scsi/virtio-scsi.c | 29 ++++++++++++++++++++---------
16
2 files changed, 22 insertions(+), 10 deletions(-)
16
17
17
diff --git a/qemu-img.c b/qemu-img.c
18
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
18
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
19
--- a/qemu-img.c
20
--- a/include/hw/virtio/virtio-scsi.h
20
+++ b/qemu-img.c
21
+++ b/include/hw/virtio/virtio-scsi.h
21
@@ -XXX,XX +XXX,XX @@ static int img_dd(int argc, char **argv)
22
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
22
};
23
23
const struct option long_options[] = {
24
SCSIBus bus;
24
{ "help", no_argument, 0, 'h'},
25
int resetting; /* written from main loop thread, read from any thread */
25
+ { "object", required_argument, 0, OPTION_OBJECT},
26
+
26
{ "image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
27
+ QemuMutex event_lock; /* protects event_vq and events_dropped */
27
{ "force-share", no_argument, 0, 'U'},
28
bool events_dropped;
28
{ 0, 0, 0, 0 }
29
29
@@ -XXX,XX +XXX,XX @@ static int img_dd(int argc, char **argv)
30
QemuMutex ctrl_lock; /* protects ctrl_vq */
30
case 'U':
31
- QemuMutex event_lock; /* protects event_vq */
31
force_share = true;
32
32
break;
33
/*
33
+ case OPTION_OBJECT: {
34
* TMFs deferred to main loop BH. These fields are protected by
34
+ QemuOpts *opts;
35
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
35
+ opts = qemu_opts_parse_noisily(&qemu_object_opts,
36
index XXXXXXX..XXXXXXX 100644
36
+ optarg, true);
37
--- a/hw/scsi/virtio-scsi.c
37
+ if (!opts) {
38
+++ b/hw/scsi/virtio-scsi.c
38
+ ret = -1;
39
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev)
39
+ goto out;
40
40
+ }
41
vs->sense_size = VIRTIO_SCSI_SENSE_DEFAULT_SIZE;
41
+ } break;
42
vs->cdb_size = VIRTIO_SCSI_CDB_DEFAULT_SIZE;
42
case OPTION_IMAGE_OPTS:
43
- s->events_dropped = false;
43
image_opts = true;
44
+
44
break;
45
+ WITH_QEMU_LOCK_GUARD(&s->event_lock) {
45
@@ -XXX,XX +XXX,XX @@ static int img_dd(int argc, char **argv)
46
+ s->events_dropped = false;
46
ret = -1;
47
+ }
47
goto out;
48
}
49
50
typedef struct {
51
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
48
}
52
}
53
54
req = virtio_scsi_pop_req(s, vs->event_vq, &s->event_lock);
55
- if (!req) {
56
- s->events_dropped = true;
57
- return;
58
- }
59
+ WITH_QEMU_LOCK_GUARD(&s->event_lock) {
60
+ if (!req) {
61
+ s->events_dropped = true;
62
+ return;
63
+ }
64
65
- if (s->events_dropped) {
66
- event |= VIRTIO_SCSI_T_EVENTS_MISSED;
67
- s->events_dropped = false;
68
+ if (s->events_dropped) {
69
+ event |= VIRTIO_SCSI_T_EVENTS_MISSED;
70
+ s->events_dropped = false;
71
+ }
72
}
73
74
if (virtio_scsi_parse_req(req, 0, sizeof(VirtIOSCSIEvent))) {
75
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_push_event(VirtIOSCSI *s,
76
77
static void virtio_scsi_handle_event_vq(VirtIOSCSI *s, VirtQueue *vq)
78
{
79
- if (s->events_dropped) {
80
+ bool events_dropped;
49
+
81
+
50
+ if (qemu_opts_foreach(&qemu_object_opts,
82
+ WITH_QEMU_LOCK_GUARD(&s->event_lock) {
51
+ user_creatable_add_opts_foreach,
83
+ events_dropped = s->events_dropped;
52
+ NULL, NULL)) {
53
+ ret = -1;
54
+ goto out;
55
+ }
84
+ }
56
+
85
+
57
blk1 = img_open(image_opts, in.filename, fmt, 0, false, false,
86
+ if (events_dropped) {
58
force_share);
87
VirtIOSCSIEventInfo info = {
59
88
.event = VIRTIO_SCSI_T_NO_EVENT,
89
};
60
--
90
--
61
1.8.3.1
91
2.48.1
62
63
diff view generated by jsdifflib
1
From: Alberto Garcia <berto@igalia.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
The code that tries to reopen a BlockDriverState in stream_start()
3
With IOThread Virtqueue Mapping there will be multiple AioContexts
4
when the creation of a new block job fails crashes because it attempts
4
processing SCSI requests. scsi_req_cancel() and other SCSI request
5
to dereference a pointer that is known to be NULL.
5
operations must be performed from the AioContext where the request is
6
6
running.
7
This is a regression introduced in a170a91fd3eab6155da39e740381867e,
7
8
likely because the code was copied from stream_complete().
8
Introduce a virtio_scsi_defer_tmf_to_aio_context() function and the
9
9
necessary VirtIOSCSIReq->remaining refcount infrastructure to move the
10
Cc: qemu-stable@nongnu.org
10
TMF code into the AioContext where the request is running.
11
Reported-by: Kashyap Chamarthy <kchamart@redhat.com>
11
12
Signed-off-by: Alberto Garcia <berto@igalia.com>
12
For the time being there is still just one AioContext: the main loop or
13
Tested-by: Kashyap Chamarthy <kchamart@redhat.com>
13
the IOThread. When the iothread-vq-mapping parameter is added in a later
14
patch this will be changed to per-virtqueue AioContexts.
15
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
18
Message-ID: <20250311132616.1049687-8-stefanha@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
19
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
15
---
20
---
16
block/stream.c | 2 +-
21
hw/scsi/virtio-scsi.c | 270 ++++++++++++++++++++++++++++++++----------
17
1 file changed, 1 insertion(+), 1 deletion(-)
22
1 file changed, 206 insertions(+), 64 deletions(-)
18
23
19
diff --git a/block/stream.c b/block/stream.c
24
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
20
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
21
--- a/block/stream.c
26
--- a/hw/scsi/virtio-scsi.c
22
+++ b/block/stream.c
27
+++ b/hw/scsi/virtio-scsi.c
23
@@ -XXX,XX +XXX,XX @@ void stream_start(const char *job_id, BlockDriverState *bs,
28
@@ -XXX,XX +XXX,XX @@ typedef struct VirtIOSCSIReq {
24
29
/* Used for two-stage request submission and TMFs deferred to BH */
25
fail:
30
QTAILQ_ENTRY(VirtIOSCSIReq) next;
26
if (orig_bs_flags != bdrv_get_flags(bs)) {
31
27
- bdrv_reopen(bs, s->bs_flags, NULL);
32
- /* Used for cancellation of request during TMFs */
28
+ bdrv_reopen(bs, orig_bs_flags, NULL);
33
+ /* Used for cancellation of request during TMFs. Atomic. */
34
int remaining;
35
36
SCSIRequest *sreq;
37
@@ -XXX,XX +XXX,XX @@ typedef struct {
38
VirtIOSCSIReq *tmf_req;
39
} VirtIOSCSICancelNotifier;
40
41
+static void virtio_scsi_tmf_dec_remaining(VirtIOSCSIReq *tmf)
42
+{
43
+ if (qatomic_fetch_dec(&tmf->remaining) == 1) {
44
+ trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(tmf->req.tmf.lun),
45
+ tmf->req.tmf.tag, tmf->resp.tmf.response);
46
+
47
+ virtio_scsi_complete_req(tmf, &tmf->dev->ctrl_lock);
48
+ }
49
+}
50
+
51
static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
52
{
53
VirtIOSCSICancelNotifier *n = container_of(notifier,
54
VirtIOSCSICancelNotifier,
55
notifier);
56
57
- if (--n->tmf_req->remaining == 0) {
58
- VirtIOSCSIReq *req = n->tmf_req;
59
-
60
- trace_virtio_scsi_tmf_resp(virtio_scsi_get_lun(req->req.tmf.lun),
61
- req->req.tmf.tag, req->resp.tmf.response);
62
- virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
63
- }
64
+ virtio_scsi_tmf_dec_remaining(n->tmf_req);
65
g_free(n);
66
}
67
68
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
29
}
69
}
30
}
70
}
71
72
-static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
73
+static void virtio_scsi_defer_tmf_to_main_loop(VirtIOSCSIReq *req)
74
{
75
VirtIOSCSI *s = req->dev;
76
77
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_defer_tmf_to_bh(VirtIOSCSIReq *req)
78
}
79
}
80
81
+static void virtio_scsi_tmf_cancel_req(VirtIOSCSIReq *tmf, SCSIRequest *r)
82
+{
83
+ VirtIOSCSICancelNotifier *notifier;
84
+
85
+ assert(r->ctx == qemu_get_current_aio_context());
86
+
87
+ /* Decremented in virtio_scsi_cancel_notify() */
88
+ qatomic_inc(&tmf->remaining);
89
+
90
+ notifier = g_new(VirtIOSCSICancelNotifier, 1);
91
+ notifier->notifier.notify = virtio_scsi_cancel_notify;
92
+ notifier->tmf_req = tmf;
93
+ scsi_req_cancel_async(r, &notifier->notifier);
94
+}
95
+
96
+/* Execute a TMF on the requests in the current AioContext */
97
+static void virtio_scsi_do_tmf_aio_context(void *opaque)
98
+{
99
+ AioContext *ctx = qemu_get_current_aio_context();
100
+ VirtIOSCSIReq *tmf = opaque;
101
+ VirtIOSCSI *s = tmf->dev;
102
+ SCSIDevice *d = virtio_scsi_device_get(s, tmf->req.tmf.lun);
103
+ SCSIRequest *r;
104
+ bool match_tag;
105
+
106
+ if (!d) {
107
+ tmf->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
108
+ virtio_scsi_tmf_dec_remaining(tmf);
109
+ return;
110
+ }
111
+
112
+ /*
113
+ * This function could handle other subtypes that need to be processed in
114
+ * the request's AioContext in the future, but for now only request
115
+ * cancelation subtypes are performed here.
116
+ */
117
+ switch (tmf->req.tmf.subtype) {
118
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK:
119
+ match_tag = true;
120
+ break;
121
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
122
+ case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
123
+ match_tag = false;
124
+ break;
125
+ default:
126
+ g_assert_not_reached();
127
+ }
128
+
129
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
130
+ QTAILQ_FOREACH(r, &d->requests, next) {
131
+ VirtIOSCSIReq *cmd_req = r->hba_private;
132
+ assert(cmd_req); /* request has hba_private while enqueued */
133
+
134
+ if (r->ctx != ctx) {
135
+ continue;
136
+ }
137
+ if (match_tag && cmd_req->req.cmd.tag != tmf->req.tmf.tag) {
138
+ continue;
139
+ }
140
+ virtio_scsi_tmf_cancel_req(tmf, r);
141
+ }
142
+ }
143
+
144
+ /* Incremented by virtio_scsi_do_tmf() */
145
+ virtio_scsi_tmf_dec_remaining(tmf);
146
+
147
+ object_unref(d);
148
+}
149
+
150
+static void dummy_bh(void *opaque)
151
+{
152
+ /* Do nothing */
153
+}
154
+
155
+/*
156
+ * Wait for pending virtio_scsi_defer_tmf_to_aio_context() BHs.
157
+ */
158
+static void virtio_scsi_flush_defer_tmf_to_aio_context(VirtIOSCSI *s)
159
+{
160
+ GLOBAL_STATE_CODE();
161
+
162
+ assert(!s->dataplane_started);
163
+
164
+ if (s->ctx) {
165
+ /* Our BH only runs after previously scheduled BHs */
166
+ aio_wait_bh_oneshot(s->ctx, dummy_bh, NULL);
167
+ }
168
+}
169
+
170
+/*
171
+ * Run the TMF in a specific AioContext, handling only requests in that
172
+ * AioContext. This is necessary because requests can run in different
173
+ * AioContext and it is only possible to cancel them from the AioContext where
174
+ * they are running.
175
+ */
176
+static void virtio_scsi_defer_tmf_to_aio_context(VirtIOSCSIReq *tmf,
177
+ AioContext *ctx)
178
+{
179
+ /* Decremented in virtio_scsi_do_tmf_aio_context() */
180
+ qatomic_inc(&tmf->remaining);
181
+
182
+ /* See virtio_scsi_flush_defer_tmf_to_aio_context() cleanup during reset */
183
+ aio_bh_schedule_oneshot(ctx, virtio_scsi_do_tmf_aio_context, tmf);
184
+}
185
+
186
+/*
187
+ * Returns the AioContext for a given TMF's tag field or NULL. Note that the
188
+ * request identified by the tag may have completed by the time you can execute
189
+ * a BH in the AioContext, so don't assume the request still exists in your BH.
190
+ */
191
+static AioContext *find_aio_context_for_tmf_tag(SCSIDevice *d,
192
+ VirtIOSCSIReq *tmf)
193
+{
194
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
195
+ SCSIRequest *r;
196
+ SCSIRequest *next;
197
+
198
+ QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
199
+ VirtIOSCSIReq *cmd_req = r->hba_private;
200
+
201
+ /* hba_private is non-NULL while the request is enqueued */
202
+ assert(cmd_req);
203
+
204
+ if (cmd_req->req.cmd.tag == tmf->req.tmf.tag) {
205
+ return r->ctx;
206
+ }
207
+ }
208
+ }
209
+ return NULL;
210
+}
211
+
212
/* Return 0 if the request is ready to be completed and return to guest;
213
* -EINPROGRESS if the request is submitted and will be completed later, in the
214
* case of async cancellation. */
215
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
216
{
217
SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
218
SCSIRequest *r, *next;
219
+ AioContext *ctx;
220
int ret = 0;
221
222
virtio_scsi_ctx_check(s, d);
223
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
224
req->req.tmf.tag, req->req.tmf.subtype);
225
226
switch (req->req.tmf.subtype) {
227
- case VIRTIO_SCSI_T_TMF_ABORT_TASK:
228
- case VIRTIO_SCSI_T_TMF_QUERY_TASK:
229
+ case VIRTIO_SCSI_T_TMF_ABORT_TASK: {
230
if (!d) {
231
goto fail;
232
}
233
if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
234
goto incorrect_lun;
235
}
236
- QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
237
- VirtIOSCSIReq *cmd_req = r->hba_private;
238
- if (cmd_req && cmd_req->req.cmd.tag == req->req.tmf.tag) {
239
- break;
240
- }
241
+
242
+ ctx = find_aio_context_for_tmf_tag(d, req);
243
+ if (ctx) {
244
+ virtio_scsi_defer_tmf_to_aio_context(req, ctx);
245
+ ret = -EINPROGRESS;
246
}
247
- if (r) {
248
- /*
249
- * Assert that the request has not been completed yet, we
250
- * check for it in the loop above.
251
- */
252
- assert(r->hba_private);
253
- if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK) {
254
- /* "If the specified command is present in the task set, then
255
- * return a service response set to FUNCTION SUCCEEDED".
256
- */
257
- req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
258
- } else {
259
- VirtIOSCSICancelNotifier *notifier;
260
-
261
- req->remaining = 1;
262
- notifier = g_new(VirtIOSCSICancelNotifier, 1);
263
- notifier->tmf_req = req;
264
- notifier->notifier.notify = virtio_scsi_cancel_notify;
265
- scsi_req_cancel_async(r, &notifier->notifier);
266
- ret = -EINPROGRESS;
267
+ break;
268
+ }
269
+
270
+ case VIRTIO_SCSI_T_TMF_QUERY_TASK:
271
+ if (!d) {
272
+ goto fail;
273
+ }
274
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
275
+ goto incorrect_lun;
276
+ }
277
+
278
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
279
+ QTAILQ_FOREACH(r, &d->requests, next) {
280
+ VirtIOSCSIReq *cmd_req = r->hba_private;
281
+ assert(cmd_req); /* request has hba_private while enqueued */
282
+
283
+ if (cmd_req->req.cmd.tag == req->req.tmf.tag) {
284
+ /*
285
+ * "If the specified command is present in the task set,
286
+ * then return a service response set to FUNCTION
287
+ * SUCCEEDED".
288
+ */
289
+ req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
290
+ }
291
}
292
}
293
break;
294
295
case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
296
case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
297
- virtio_scsi_defer_tmf_to_bh(req);
298
+ virtio_scsi_defer_tmf_to_main_loop(req);
299
ret = -EINPROGRESS;
300
break;
301
302
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
303
- case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET:
304
+ case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: {
305
+ if (!d) {
306
+ goto fail;
307
+ }
308
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
309
+ goto incorrect_lun;
310
+ }
311
+
312
+ qatomic_inc(&req->remaining);
313
+
314
+ ctx = s->ctx ?: qemu_get_aio_context();
315
+ virtio_scsi_defer_tmf_to_aio_context(req, ctx);
316
+
317
+ virtio_scsi_tmf_dec_remaining(req);
318
+ ret = -EINPROGRESS;
319
+ break;
320
+ }
321
+
322
case VIRTIO_SCSI_T_TMF_QUERY_TASK_SET:
323
if (!d) {
324
goto fail;
325
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
326
goto incorrect_lun;
327
}
328
329
- /* Add 1 to "remaining" until virtio_scsi_do_tmf returns.
330
- * This way, if the bus starts calling back to the notifiers
331
- * even before we finish the loop, virtio_scsi_cancel_notify
332
- * will not complete the TMF too early.
333
- */
334
- req->remaining = 1;
335
- QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
336
- if (r->hba_private) {
337
- if (req->req.tmf.subtype == VIRTIO_SCSI_T_TMF_QUERY_TASK_SET) {
338
- /* "If there is any command present in the task set, then
339
- * return a service response set to FUNCTION SUCCEEDED".
340
- */
341
- req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
342
- break;
343
- } else {
344
- VirtIOSCSICancelNotifier *notifier;
345
-
346
- req->remaining++;
347
- notifier = g_new(VirtIOSCSICancelNotifier, 1);
348
- notifier->notifier.notify = virtio_scsi_cancel_notify;
349
- notifier->tmf_req = req;
350
- scsi_req_cancel_async(r, &notifier->notifier);
351
- }
352
+ WITH_QEMU_LOCK_GUARD(&d->requests_lock) {
353
+ QTAILQ_FOREACH_SAFE(r, &d->requests, next, next) {
354
+ /* Request has hba_private while enqueued */
355
+ assert(r->hba_private);
356
+
357
+ /*
358
+ * "If there is any command present in the task set, then
359
+ * return a service response set to FUNCTION SUCCEEDED".
360
+ */
361
+ req->resp.tmf.response = VIRTIO_SCSI_S_FUNCTION_SUCCEEDED;
362
+ break;
363
}
364
}
365
- if (--req->remaining > 0) {
366
- ret = -EINPROGRESS;
367
- }
368
break;
369
370
case VIRTIO_SCSI_T_TMF_CLEAR_ACA:
371
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev)
372
assert(!s->dataplane_started);
373
374
virtio_scsi_reset_tmf_bh(s);
375
+ virtio_scsi_flush_defer_tmf_to_aio_context(s);
376
377
qatomic_inc(&s->resetting);
378
bus_cold_reset(BUS(&s->bus));
31
--
379
--
32
1.8.3.1
380
2.48.1
33
34
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
The file drivers' *_parse_filename() implementations just strip the
3
This is the cleanup function that must be called after
4
optional protocol prefix off the filename. However, for e.g.
4
apply_iothread_vq_mapping() succeeds. virtio-scsi will need this
5
"file:foo:bar", this would lead to "foo:bar" being stored as the BDS's
5
function too, so extract it.
6
filename which looks like it should be managed using the "foo" protocol.
7
This is especially troublesome if you then try to resolve a backing
8
filename based on "foo:bar".
9
6
10
This issue can only occur if the stripped part is a relative filename
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
("file:/foo:bar" will be shortened to "/foo:bar" and having a slash
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
12
before the first colon means that "/foo" is not recognized as a protocol
9
Message-ID: <20250311132616.1049687-9-stefanha@redhat.com>
13
part). Therefore, we can easily fix it by prepending "./" to such
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
filenames.
11
---
12
hw/block/virtio-blk.c | 27 +++++++++++++++++++++------
13
1 file changed, 21 insertions(+), 6 deletions(-)
15
14
16
Before this patch:
15
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
17
$ ./qemu-img create -f qcow2 backing.qcow2 64M
18
Formatting 'backing.qcow2', fmt=qcow2 size=67108864 encryption=off
19
cluster_size=65536 lazy_refcounts=off refcount_bits=16
20
$ ./qemu-img create -f qcow2 -b backing.qcow2 file:top:image.qcow2
21
Formatting 'file:top:image.qcow2', fmt=qcow2 size=67108864
22
backing_file=backing.qcow2 encryption=off cluster_size=65536
23
lazy_refcounts=off refcount_bits=16
24
$ ./qemu-io file:top:image.qcow2
25
can't open device file:top:image.qcow2: Could not open backing file:
26
Unknown protocol 'top'
27
28
After this patch:
29
$ ./qemu-io file:top:image.qcow2
30
[no error]
31
32
Signed-off-by: Max Reitz <mreitz@redhat.com>
33
Message-id: 20170522195217.12991-3-mreitz@redhat.com
34
Reviewed-by: Eric Blake <eblake@redhat.com>
35
Signed-off-by: Max Reitz <mreitz@redhat.com>
36
---
37
block.c | 35 +++++++++++++++++++++++++++++++++++
38
block/file-posix.c | 17 +++--------------
39
block/file-win32.c | 12 ++----------
40
include/block/block_int.h | 3 +++
41
4 files changed, 43 insertions(+), 24 deletions(-)
42
43
diff --git a/block.c b/block.c
44
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
45
--- a/block.c
17
--- a/hw/block/virtio-blk.c
46
+++ b/block.c
18
+++ b/hw/block/virtio-blk.c
47
@@ -XXX,XX +XXX,XX @@ void path_combine(char *dest, int dest_size,
19
@@ -XXX,XX +XXX,XX @@ validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
48
}
20
* Fill in the AioContext for each virtqueue in the @vq_aio_context array given
21
* the iothread-vq-mapping parameter in @iothread_vq_mapping_list.
22
*
23
+ * cleanup_iothread_vq_mapping() must be called to free IOThread object
24
+ * references after this function returns success.
25
+ *
26
* Returns: %true on success, %false on failure.
27
**/
28
static bool apply_iothread_vq_mapping(
29
@@ -XXX,XX +XXX,XX @@ static bool apply_iothread_vq_mapping(
30
return true;
49
}
31
}
50
32
51
+/*
33
+/**
52
+ * Helper function for bdrv_parse_filename() implementations to remove optional
34
+ * cleanup_iothread_vq_mapping:
53
+ * protocol prefixes (especially "file:") from a filename and for putting the
35
+ * @list: The mapping of virtqueues to IOThreads.
54
+ * stripped filename into the options QDict if there is such a prefix.
36
+ *
37
+ * Release IOThread object references that were acquired by
38
+ * apply_iothread_vq_mapping().
55
+ */
39
+ */
56
+void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
40
+static void cleanup_iothread_vq_mapping(IOThreadVirtQueueMappingList *list)
57
+ QDict *options)
58
+{
41
+{
59
+ if (strstart(filename, prefix, &filename)) {
42
+ IOThreadVirtQueueMappingList *node;
60
+ /* Stripping the explicit protocol prefix may result in a protocol
61
+ * prefix being (wrongly) detected (if the filename contains a colon) */
62
+ if (path_has_protocol(filename)) {
63
+ QString *fat_filename;
64
+
43
+
65
+ /* This means there is some colon before the first slash; therefore,
44
+ for (node = list; node; node = node->next) {
66
+ * this cannot be an absolute path */
45
+ IOThread *iothread = iothread_by_id(node->value->iothread);
67
+ assert(!path_is_absolute(filename));
46
+ object_unref(OBJECT(iothread));
68
+
69
+ /* And we can thus fix the protocol detection issue by prefixing it
70
+ * by "./" */
71
+ fat_filename = qstring_from_str("./");
72
+ qstring_append(fat_filename, filename);
73
+
74
+ assert(!path_has_protocol(qstring_get_str(fat_filename)));
75
+
76
+ qdict_put(options, "filename", fat_filename);
77
+ } else {
78
+ /* If no protocol prefix was detected, we can use the shortened
79
+ * filename as-is */
80
+ qdict_put_str(options, "filename", filename);
81
+ }
82
+ }
47
+ }
83
+}
48
+}
84
+
49
+
85
+
50
/* Context: BQL held */
86
/* Returns whether the image file is opened as read-only. Note that this can
51
static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
87
* return false and writing to the image file is still not possible because the
88
* image is inactivated. */
89
diff --git a/block/file-posix.c b/block/file-posix.c
90
index XXXXXXX..XXXXXXX 100644
91
--- a/block/file-posix.c
92
+++ b/block/file-posix.c
93
@@ -XXX,XX +XXX,XX @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
94
static void raw_parse_filename(const char *filename, QDict *options,
95
Error **errp)
96
{
52
{
97
- /* The filename does not have to be prefixed by the protocol name, since
53
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s)
98
- * "file" is the default protocol; therefore, the return value of this
54
assert(!s->ioeventfd_started);
99
- * function call can be ignored. */
55
100
- strstart(filename, "file:", &filename);
56
if (conf->iothread_vq_mapping_list) {
57
- IOThreadVirtQueueMappingList *node;
101
-
58
-
102
- qdict_put_str(options, "filename", filename);
59
- for (node = conf->iothread_vq_mapping_list; node; node = node->next) {
103
+ bdrv_parse_filename_strip_prefix(filename, "file:", options);
60
- IOThread *iothread = iothread_by_id(node->value->iothread);
104
}
61
- object_unref(OBJECT(iothread));
105
62
- }
106
static QemuOptsList raw_runtime_opts = {
63
+ cleanup_iothread_vq_mapping(conf->iothread_vq_mapping_list);
107
@@ -XXX,XX +XXX,XX @@ static int check_hdev_writable(BDRVRawState *s)
64
}
108
static void hdev_parse_filename(const char *filename, QDict *options,
65
109
Error **errp)
66
if (conf->iothread) {
110
{
111
- /* The prefix is optional, just as for "file". */
112
- strstart(filename, "host_device:", &filename);
113
-
114
- qdict_put_str(options, "filename", filename);
115
+ bdrv_parse_filename_strip_prefix(filename, "host_device:", options);
116
}
117
118
static bool hdev_is_sg(BlockDriverState *bs)
119
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_host_device = {
120
static void cdrom_parse_filename(const char *filename, QDict *options,
121
Error **errp)
122
{
123
- /* The prefix is optional, just as for "file". */
124
- strstart(filename, "host_cdrom:", &filename);
125
-
126
- qdict_put_str(options, "filename", filename);
127
+ bdrv_parse_filename_strip_prefix(filename, "host_cdrom:", options);
128
}
129
#endif
130
131
diff --git a/block/file-win32.c b/block/file-win32.c
132
index XXXXXXX..XXXXXXX 100644
133
--- a/block/file-win32.c
134
+++ b/block/file-win32.c
135
@@ -XXX,XX +XXX,XX @@ static void raw_parse_flags(int flags, bool use_aio, int *access_flags,
136
static void raw_parse_filename(const char *filename, QDict *options,
137
Error **errp)
138
{
139
- /* The filename does not have to be prefixed by the protocol name, since
140
- * "file" is the default protocol; therefore, the return value of this
141
- * function call can be ignored. */
142
- strstart(filename, "file:", &filename);
143
-
144
- qdict_put_str(options, "filename", filename);
145
+ bdrv_parse_filename_strip_prefix(filename, "file:", options);
146
}
147
148
static QemuOptsList raw_runtime_opts = {
149
@@ -XXX,XX +XXX,XX @@ static int hdev_probe_device(const char *filename)
150
static void hdev_parse_filename(const char *filename, QDict *options,
151
Error **errp)
152
{
153
- /* The prefix is optional, just as for "file". */
154
- strstart(filename, "host_device:", &filename);
155
-
156
- qdict_put_str(options, "filename", filename);
157
+ bdrv_parse_filename_strip_prefix(filename, "host_device:", options);
158
}
159
160
static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
161
diff --git a/include/block/block_int.h b/include/block/block_int.h
162
index XXXXXXX..XXXXXXX 100644
163
--- a/include/block/block_int.h
164
+++ b/include/block/block_int.h
165
@@ -XXX,XX +XXX,XX @@ int get_tmp_filename(char *filename, int size);
166
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
167
const char *filename);
168
169
+void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
170
+ QDict *options);
171
+
172
173
/**
174
* bdrv_add_before_write_notifier:
175
--
67
--
176
1.8.3.1
68
2.48.1
177
178
diff view generated by jsdifflib
1
From: "Daniel P. Berrange" <berrange@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
The '--image-opts' flag indicates whether the source filename
3
Use noun_verb() function naming instead of verb_noun() because the
4
includes options. The target filename has to remain in the
4
former is the most common naming style for APIs. The next commit will
5
plain filename format though, since it needs to be passed to
5
move these functions into a header file so that virtio-scsi can call
6
bdrv_create(). When using --skip-create though, it would be
6
them.
7
possible to use image-opts syntax. This adds --target-image-opts
8
to indicate that the target filename includes options. Currently
9
this mandates use of the --skip-create flag too.
10
7
11
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
8
Shorten iothread_vq_mapping_apply()'s iothread_vq_mapping_list argument
12
Message-id: 20170515164712.6643-4-berrange@redhat.com
9
to just "list" like in the other functions.
13
Reviewed-by: Max Reitz <mreitz@redhat.com>
10
14
Reviewed-by: Eric Blake <eblake@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
13
Message-ID: <20250311132616.1049687-10-stefanha@redhat.com>
14
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
---
15
---
17
qemu-img-cmds.hx | 4 +--
16
hw/block/virtio-blk.c | 33 ++++++++++++++++-----------------
18
qemu-img.c | 84 ++++++++++++++++++++++++++++++++++++++------------------
17
1 file changed, 16 insertions(+), 17 deletions(-)
19
qemu-img.texi | 12 ++++++--
20
3 files changed, 69 insertions(+), 31 deletions(-)
21
18
22
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
19
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
23
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
24
--- a/qemu-img-cmds.hx
21
--- a/hw/block/virtio-blk.c
25
+++ b/qemu-img-cmds.hx
22
+++ b/hw/block/virtio-blk.c
26
@@ -XXX,XX +XXX,XX @@ STEXI
23
@@ -XXX,XX +XXX,XX @@ static const BlockDevOps virtio_block_ops = {
27
ETEXI
28
29
DEF("convert", img_convert,
30
- "convert [--object objectdef] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename")
31
+ "convert [--object objectdef] [--image-opts] [--target-image-opts] [-U] [-c] [-p] [-q] [-n] [-f fmt] [-t cache] [-T src_cache] [-O output_fmt] [-B backing_file] [-o options] [-s snapshot_id_or_name] [-l snapshot_param] [-S sparse_size] [-m num_coroutines] [-W] filename [filename2 [...]] output_filename")
32
STEXI
33
-@item convert [--object @var{objectdef}] [--image-opts] [-U] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename}
34
+@item convert [--object @var{objectdef}] [--image-opts] [--target-image-opts] [-U] [-c] [-p] [-q] [-n] [-f @var{fmt}] [-t @var{cache}] [-T @var{src_cache}] [-O @var{output_fmt}] [-B @var{backing_file}] [-o @var{options}] [-s @var{snapshot_id_or_name}] [-l @var{snapshot_param}] [-S @var{sparse_size}] [-m @var{num_coroutines}] [-W] @var{filename} [@var{filename2} [...]] @var{output_filename}
35
ETEXI
36
37
DEF("dd", img_dd,
38
diff --git a/qemu-img.c b/qemu-img.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/qemu-img.c
41
+++ b/qemu-img.c
42
@@ -XXX,XX +XXX,XX @@ enum {
43
OPTION_PATTERN = 260,
44
OPTION_FLUSH_INTERVAL = 261,
45
OPTION_NO_DRAIN = 262,
46
+ OPTION_TARGET_IMAGE_OPTS = 263,
47
};
24
};
48
25
49
typedef enum OutputFormat {
26
static bool
50
@@ -XXX,XX +XXX,XX @@ static int convert_do_copy(ImgConvertState *s)
27
-validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
51
static int img_convert(int argc, char **argv)
28
- uint16_t num_queues, Error **errp)
29
+iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t
30
+ num_queues, Error **errp)
52
{
31
{
53
int c, bs_i, flags, src_flags = 0;
32
g_autofree unsigned long *vqs = bitmap_new(num_queues);
54
- const char *fmt = NULL, *out_fmt = "raw", *cache = "unsafe",
33
g_autoptr(GHashTable) iothreads =
55
+ const char *fmt = NULL, *out_fmt = NULL, *cache = "unsafe",
34
@@ -XXX,XX +XXX,XX @@ validate_iothread_vq_mapping_list(IOThreadVirtQueueMappingList *list,
56
*src_cache = BDRV_DEFAULT_CACHE, *out_baseimg = NULL,
35
}
57
*out_filename, *out_baseimg_param, *snapshot_name = NULL;
36
58
- BlockDriver *drv, *proto_drv;
37
/**
59
+ BlockDriver *drv = NULL, *proto_drv = NULL;
38
- * apply_iothread_vq_mapping:
60
BlockDriverInfo bdi;
39
- * @iothread_vq_mapping_list: The mapping of virtqueues to IOThreads.
61
BlockDriverState *out_bs;
40
+ * iothread_vq_mapping_apply:
62
QemuOpts *opts = NULL, *sn_opts = NULL;
41
+ * @list: The mapping of virtqueues to IOThreads.
63
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
42
* @vq_aio_context: The array of AioContext pointers to fill in.
64
char *options = NULL;
43
* @num_queues: The length of @vq_aio_context.
65
Error *local_err = NULL;
44
* @errp: If an error occurs, a pointer to the area to store the error.
66
bool writethrough, src_writethrough, quiet = false, image_opts = false,
45
*
67
- skip_create = false, progress = false;
46
* Fill in the AioContext for each virtqueue in the @vq_aio_context array given
68
+ skip_create = false, progress = false, tgt_image_opts = false;
47
- * the iothread-vq-mapping parameter in @iothread_vq_mapping_list.
69
int64_t ret = -EINVAL;
48
+ * the iothread-vq-mapping parameter in @list.
70
bool force_share = false;
49
*
71
50
- * cleanup_iothread_vq_mapping() must be called to free IOThread object
72
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
51
+ * iothread_vq_mapping_cleanup() must be called to free IOThread object
73
{"object", required_argument, 0, OPTION_OBJECT},
52
* references after this function returns success.
74
{"image-opts", no_argument, 0, OPTION_IMAGE_OPTS},
53
*
75
{"force-share", no_argument, 0, 'U'},
54
* Returns: %true on success, %false on failure.
76
+ {"target-image-opts", no_argument, 0, OPTION_TARGET_IMAGE_OPTS},
55
**/
77
{0, 0, 0, 0}
56
-static bool apply_iothread_vq_mapping(
78
};
57
- IOThreadVirtQueueMappingList *iothread_vq_mapping_list,
79
c = getopt_long(argc, argv, ":hf:O:B:ce6o:s:l:S:pt:T:qnm:WU",
58
+static bool iothread_vq_mapping_apply(
80
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
59
+ IOThreadVirtQueueMappingList *list,
81
case OPTION_IMAGE_OPTS:
60
AioContext **vq_aio_context,
82
image_opts = true;
61
uint16_t num_queues,
83
break;
62
Error **errp)
84
+ case OPTION_TARGET_IMAGE_OPTS:
63
@@ -XXX,XX +XXX,XX @@ static bool apply_iothread_vq_mapping(
85
+ tgt_image_opts = true;
64
size_t num_iothreads = 0;
86
+ break;
65
size_t cur_iothread = 0;
87
}
66
67
- if (!validate_iothread_vq_mapping_list(iothread_vq_mapping_list,
68
- num_queues, errp)) {
69
+ if (!iothread_vq_mapping_validate(list, num_queues, errp)) {
70
return false;
88
}
71
}
89
72
90
+ if (!out_fmt && !tgt_image_opts) {
73
- for (node = iothread_vq_mapping_list; node; node = node->next) {
91
+ out_fmt = "raw";
74
+ for (node = list; node; node = node->next) {
92
+ }
75
num_iothreads++;
93
+
94
if (qemu_opts_foreach(&qemu_object_opts,
95
user_creatable_add_opts_foreach,
96
NULL, NULL)) {
97
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
98
goto fail_getopt;
99
}
76
}
100
77
101
+ if (tgt_image_opts && !skip_create) {
78
- for (node = iothread_vq_mapping_list; node; node = node->next) {
102
+ error_report("--target-image-opts requires use of -n flag");
79
+ for (node = list; node; node = node->next) {
103
+ goto fail_getopt;
80
IOThread *iothread = iothread_by_id(node->value->iothread);
104
+ }
81
AioContext *ctx = iothread_get_aio_context(iothread);
105
+
82
106
s.src_num = argc - optind - 1;
83
@@ -XXX,XX +XXX,XX @@ static bool apply_iothread_vq_mapping(
107
out_filename = s.src_num >= 1 ? argv[argc - 1] : NULL;
84
}
108
85
109
if (options && has_help_option(options)) {
86
/**
110
- ret = print_block_option_help(out_filename, out_fmt);
87
- * cleanup_iothread_vq_mapping:
111
- goto fail_getopt;
88
+ * iothread_vq_mapping_cleanup:
112
+ if (out_fmt) {
89
* @list: The mapping of virtqueues to IOThreads.
113
+ ret = print_block_option_help(out_filename, out_fmt);
90
*
114
+ goto fail_getopt;
91
* Release IOThread object references that were acquired by
115
+ } else {
92
- * apply_iothread_vq_mapping().
116
+ error_report("Option help requires a format be specified");
93
+ * iothread_vq_mapping_apply().
117
+ goto fail_getopt;
94
*/
118
+ }
95
-static void cleanup_iothread_vq_mapping(IOThreadVirtQueueMappingList *list)
96
+static void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list)
97
{
98
IOThreadVirtQueueMappingList *node;
99
100
@@ -XXX,XX +XXX,XX @@ static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
101
s->vq_aio_context = g_new(AioContext *, conf->num_queues);
102
103
if (conf->iothread_vq_mapping_list) {
104
- if (!apply_iothread_vq_mapping(conf->iothread_vq_mapping_list,
105
+ if (!iothread_vq_mapping_apply(conf->iothread_vq_mapping_list,
106
s->vq_aio_context,
107
conf->num_queues,
108
errp)) {
109
@@ -XXX,XX +XXX,XX @@ static void virtio_blk_vq_aio_context_cleanup(VirtIOBlock *s)
110
assert(!s->ioeventfd_started);
111
112
if (conf->iothread_vq_mapping_list) {
113
- cleanup_iothread_vq_mapping(conf->iothread_vq_mapping_list);
114
+ iothread_vq_mapping_cleanup(conf->iothread_vq_mapping_list);
119
}
115
}
120
116
121
if (s.src_num < 1) {
117
if (conf->iothread) {
122
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
123
goto out;
124
}
125
126
- /* Find driver and parse its options */
127
- drv = bdrv_find_format(out_fmt);
128
- if (!drv) {
129
- error_report("Unknown file format '%s'", out_fmt);
130
- ret = -1;
131
- goto out;
132
- }
133
+ if (!skip_create) {
134
+ /* Find driver and parse its options */
135
+ drv = bdrv_find_format(out_fmt);
136
+ if (!drv) {
137
+ error_report("Unknown file format '%s'", out_fmt);
138
+ ret = -1;
139
+ goto out;
140
+ }
141
142
- proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
143
- if (!proto_drv) {
144
- error_report_err(local_err);
145
- ret = -1;
146
- goto out;
147
- }
148
+ proto_drv = bdrv_find_protocol(out_filename, true, &local_err);
149
+ if (!proto_drv) {
150
+ error_report_err(local_err);
151
+ ret = -1;
152
+ goto out;
153
+ }
154
155
- if (!skip_create) {
156
if (!drv->create_opts) {
157
error_report("Format driver '%s' does not support image creation",
158
drv->format_name);
159
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
160
const char *preallocation =
161
qemu_opt_get(opts, BLOCK_OPT_PREALLOC);
162
163
- if (!drv->bdrv_co_pwritev_compressed) {
164
+ if (drv && !drv->bdrv_co_pwritev_compressed) {
165
error_report("Compression not supported for this file format");
166
ret = -1;
167
goto out;
168
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
169
goto out;
170
}
171
172
- /* XXX we should allow --image-opts to trigger use of
173
- * img_open() here, but then we have trouble with
174
- * the bdrv_create() call which takes different params.
175
- * Not critical right now, so fix can wait...
176
- */
177
- s.target = img_open_file(out_filename, out_fmt, flags, writethrough, quiet,
178
- false);
179
+ if (skip_create) {
180
+ s.target = img_open(tgt_image_opts, out_filename, out_fmt,
181
+ flags, writethrough, quiet, false);
182
+ } else {
183
+ /* TODO ultimately we should allow --target-image-opts
184
+ * to be used even when -n is not given.
185
+ * That has to wait for bdrv_create to be improved
186
+ * to allow filenames in option syntax
187
+ */
188
+ s.target = img_open_file(out_filename, out_fmt, flags,
189
+ writethrough, quiet, false);
190
+ }
191
if (!s.target) {
192
ret = -1;
193
goto out;
194
}
195
out_bs = blk_bs(s.target);
196
197
+ if (s.compressed && !out_bs->drv->bdrv_co_pwritev_compressed) {
198
+ error_report("Compression not supported for this file format");
199
+ ret = -1;
200
+ goto out;
201
+ }
202
+
203
/* increase bufsectors from the default 4096 (2M) if opt_transfer
204
* or discard_alignment of the out_bs is greater. Limit to 32768 (16MB)
205
* as maximum. */
206
diff --git a/qemu-img.texi b/qemu-img.texi
207
index XXXXXXX..XXXXXXX 100644
208
--- a/qemu-img.texi
209
+++ b/qemu-img.texi
210
@@ -XXX,XX +XXX,XX @@ keys.
211
212
@item --image-opts
213
214
-Indicates that the @var{filename} parameter is to be interpreted as a
215
+Indicates that the source @var{filename} parameter is to be interpreted as a
216
full option string, not a plain filename. This parameter is mutually
217
-exclusive with the @var{-f} and @var{-F} parameters.
218
+exclusive with the @var{-f} parameter.
219
+
220
+@item --target-image-opts
221
+
222
+Indicates that the @var{output_filename} parameter(s) are to be interpreted as
223
+a full option string, not a plain filename. This parameter is mutually
224
+exclusive with the @var{-O} parameters. It is currently required to also use
225
+the @var{-n} parameter to skip image creation. This restriction may be relaxed
226
+in a future release.
227
228
@item fmt
229
is the disk image format. It is guessed automatically in most cases. See below
230
--
118
--
231
1.8.3.1
119
2.48.1
232
233
diff view generated by jsdifflib
1
From: "Daniel P. Berrange" <berrange@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
The qemu-img dd/convert commands will create an image file and
3
The code that builds an array of AioContext pointers indexed by the
4
then try to open it. Historically it has been possible to open
4
virtqueue is not specific to virtio-blk. virtio-scsi will need to do the
5
new files without passing any options. With encrypted files
5
same thing, so extract the functions.
6
though, the *key-secret options are mandatory, so we need to
7
provide those options when opening the newly created file.
8
6
9
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-id: 20170515164712.6643-5-berrange@redhat.com
8
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
Message-ID: <20250311132616.1049687-11-stefanha@redhat.com>
12
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
11
---
14
qemu-img.c | 42 +++++++++++++++++++++++++++++++++++++-----
12
include/hw/virtio/iothread-vq-mapping.h | 45 ++++++++
15
1 file changed, 37 insertions(+), 5 deletions(-)
13
hw/block/virtio-blk.c | 142 +-----------------------
14
hw/virtio/iothread-vq-mapping.c | 131 ++++++++++++++++++++++
15
hw/virtio/meson.build | 1 +
16
4 files changed, 178 insertions(+), 141 deletions(-)
17
create mode 100644 include/hw/virtio/iothread-vq-mapping.h
18
create mode 100644 hw/virtio/iothread-vq-mapping.c
16
19
17
diff --git a/qemu-img.c b/qemu-img.c
20
diff --git a/include/hw/virtio/iothread-vq-mapping.h b/include/hw/virtio/iothread-vq-mapping.h
21
new file mode 100644
22
index XXXXXXX..XXXXXXX
23
--- /dev/null
24
+++ b/include/hw/virtio/iothread-vq-mapping.h
25
@@ -XXX,XX +XXX,XX @@
26
+/*
27
+ * IOThread Virtqueue Mapping
28
+ *
29
+ * Copyright Red Hat, Inc
30
+ *
31
+ * SPDX-License-Identifier: GPL-2.0-only
32
+ */
33
+
34
+#ifndef HW_VIRTIO_IOTHREAD_VQ_MAPPING_H
35
+#define HW_VIRTIO_IOTHREAD_VQ_MAPPING_H
36
+
37
+#include "qapi/error.h"
38
+#include "qapi/qapi-types-virtio.h"
39
+
40
+/**
41
+ * iothread_vq_mapping_apply:
42
+ * @list: The mapping of virtqueues to IOThreads.
43
+ * @vq_aio_context: The array of AioContext pointers to fill in.
44
+ * @num_queues: The length of @vq_aio_context.
45
+ * @errp: If an error occurs, a pointer to the area to store the error.
46
+ *
47
+ * Fill in the AioContext for each virtqueue in the @vq_aio_context array given
48
+ * the iothread-vq-mapping parameter in @list.
49
+ *
50
+ * iothread_vq_mapping_cleanup() must be called to free IOThread object
51
+ * references after this function returns success.
52
+ *
53
+ * Returns: %true on success, %false on failure.
54
+ **/
55
+bool iothread_vq_mapping_apply(
56
+ IOThreadVirtQueueMappingList *list,
57
+ AioContext **vq_aio_context,
58
+ uint16_t num_queues,
59
+ Error **errp);
60
+
61
+/**
62
+ * iothread_vq_mapping_cleanup:
63
+ * @list: The mapping of virtqueues to IOThreads.
64
+ *
65
+ * Release IOThread object references that were acquired by
66
+ * iothread_vq_mapping_apply().
67
+ */
68
+void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list);
69
+
70
+#endif /* HW_VIRTIO_IOTHREAD_VQ_MAPPING_H */
71
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
18
index XXXXXXX..XXXXXXX 100644
72
index XXXXXXX..XXXXXXX 100644
19
--- a/qemu-img.c
73
--- a/hw/block/virtio-blk.c
20
+++ b/qemu-img.c
74
+++ b/hw/block/virtio-blk.c
21
@@ -XXX,XX +XXX,XX @@ static BlockBackend *img_open_opts(const char *optstr,
75
@@ -XXX,XX +XXX,XX @@
22
}
76
#endif
23
77
#include "hw/virtio/virtio-bus.h"
24
static BlockBackend *img_open_file(const char *filename,
78
#include "migration/qemu-file-types.h"
25
+ QDict *options,
79
+#include "hw/virtio/iothread-vq-mapping.h"
26
const char *fmt, int flags,
80
#include "hw/virtio/virtio-access.h"
27
bool writethrough, bool quiet,
81
#include "hw/virtio/virtio-blk-common.h"
28
bool force_share)
82
#include "qemu/coroutine.h"
83
@@ -XXX,XX +XXX,XX @@ static const BlockDevOps virtio_block_ops = {
84
.drained_end = virtio_blk_drained_end,
85
};
86
87
-static bool
88
-iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t
89
- num_queues, Error **errp)
90
-{
91
- g_autofree unsigned long *vqs = bitmap_new(num_queues);
92
- g_autoptr(GHashTable) iothreads =
93
- g_hash_table_new(g_str_hash, g_str_equal);
94
-
95
- for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
96
- const char *name = node->value->iothread;
97
- uint16List *vq;
98
-
99
- if (!iothread_by_id(name)) {
100
- error_setg(errp, "IOThread \"%s\" object does not exist", name);
101
- return false;
102
- }
103
-
104
- if (!g_hash_table_add(iothreads, (gpointer)name)) {
105
- error_setg(errp,
106
- "duplicate IOThread name \"%s\" in iothread-vq-mapping",
107
- name);
108
- return false;
109
- }
110
-
111
- if (node != list) {
112
- if (!!node->value->vqs != !!list->value->vqs) {
113
- error_setg(errp, "either all items in iothread-vq-mapping "
114
- "must have vqs or none of them must have it");
115
- return false;
116
- }
117
- }
118
-
119
- for (vq = node->value->vqs; vq; vq = vq->next) {
120
- if (vq->value >= num_queues) {
121
- error_setg(errp, "vq index %u for IOThread \"%s\" must be "
122
- "less than num_queues %u in iothread-vq-mapping",
123
- vq->value, name, num_queues);
124
- return false;
125
- }
126
-
127
- if (test_and_set_bit(vq->value, vqs)) {
128
- error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
129
- "because it is already assigned", vq->value, name);
130
- return false;
131
- }
132
- }
133
- }
134
-
135
- if (list->value->vqs) {
136
- for (uint16_t i = 0; i < num_queues; i++) {
137
- if (!test_bit(i, vqs)) {
138
- error_setg(errp,
139
- "missing vq %u IOThread assignment in iothread-vq-mapping",
140
- i);
141
- return false;
142
- }
143
- }
144
- }
145
-
146
- return true;
147
-}
148
-
149
-/**
150
- * iothread_vq_mapping_apply:
151
- * @list: The mapping of virtqueues to IOThreads.
152
- * @vq_aio_context: The array of AioContext pointers to fill in.
153
- * @num_queues: The length of @vq_aio_context.
154
- * @errp: If an error occurs, a pointer to the area to store the error.
155
- *
156
- * Fill in the AioContext for each virtqueue in the @vq_aio_context array given
157
- * the iothread-vq-mapping parameter in @list.
158
- *
159
- * iothread_vq_mapping_cleanup() must be called to free IOThread object
160
- * references after this function returns success.
161
- *
162
- * Returns: %true on success, %false on failure.
163
- **/
164
-static bool iothread_vq_mapping_apply(
165
- IOThreadVirtQueueMappingList *list,
166
- AioContext **vq_aio_context,
167
- uint16_t num_queues,
168
- Error **errp)
169
-{
170
- IOThreadVirtQueueMappingList *node;
171
- size_t num_iothreads = 0;
172
- size_t cur_iothread = 0;
173
-
174
- if (!iothread_vq_mapping_validate(list, num_queues, errp)) {
175
- return false;
176
- }
177
-
178
- for (node = list; node; node = node->next) {
179
- num_iothreads++;
180
- }
181
-
182
- for (node = list; node; node = node->next) {
183
- IOThread *iothread = iothread_by_id(node->value->iothread);
184
- AioContext *ctx = iothread_get_aio_context(iothread);
185
-
186
- /* Released in virtio_blk_vq_aio_context_cleanup() */
187
- object_ref(OBJECT(iothread));
188
-
189
- if (node->value->vqs) {
190
- uint16List *vq;
191
-
192
- /* Explicit vq:IOThread assignment */
193
- for (vq = node->value->vqs; vq; vq = vq->next) {
194
- assert(vq->value < num_queues);
195
- vq_aio_context[vq->value] = ctx;
196
- }
197
- } else {
198
- /* Round-robin vq:IOThread assignment */
199
- for (unsigned i = cur_iothread; i < num_queues;
200
- i += num_iothreads) {
201
- vq_aio_context[i] = ctx;
202
- }
203
- }
204
-
205
- cur_iothread++;
206
- }
207
-
208
- return true;
209
-}
210
-
211
-/**
212
- * iothread_vq_mapping_cleanup:
213
- * @list: The mapping of virtqueues to IOThreads.
214
- *
215
- * Release IOThread object references that were acquired by
216
- * iothread_vq_mapping_apply().
217
- */
218
-static void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list)
219
-{
220
- IOThreadVirtQueueMappingList *node;
221
-
222
- for (node = list; node; node = node->next) {
223
- IOThread *iothread = iothread_by_id(node->value->iothread);
224
- object_unref(OBJECT(iothread));
225
- }
226
-}
227
-
228
/* Context: BQL held */
229
static bool virtio_blk_vq_aio_context_init(VirtIOBlock *s, Error **errp)
29
{
230
{
30
BlockBackend *blk;
231
diff --git a/hw/virtio/iothread-vq-mapping.c b/hw/virtio/iothread-vq-mapping.c
31
Error *local_err = NULL;
232
new file mode 100644
32
- QDict *options = qdict_new();
233
index XXXXXXX..XXXXXXX
33
234
--- /dev/null
34
+ if (!options) {
235
+++ b/hw/virtio/iothread-vq-mapping.c
35
+ options = qdict_new();
236
@@ -XXX,XX +XXX,XX @@
36
+ }
237
+/*
37
if (fmt) {
238
+ * IOThread Virtqueue Mapping
38
qdict_put_str(options, "driver", fmt);
239
+ *
39
}
240
+ * Copyright Red Hat, Inc
40
@@ -XXX,XX +XXX,XX @@ static BlockBackend *img_open_file(const char *filename,
241
+ *
41
}
242
+ * SPDX-License-Identifier: GPL-2.0-only
42
243
+ */
43
244
+
44
+static int img_add_key_secrets(void *opaque,
245
+#include "qemu/osdep.h"
45
+ const char *name, const char *value,
246
+#include "system/iothread.h"
46
+ Error **errp)
247
+#include "hw/virtio/iothread-vq-mapping.h"
248
+
249
+static bool
250
+iothread_vq_mapping_validate(IOThreadVirtQueueMappingList *list, uint16_t
251
+ num_queues, Error **errp)
47
+{
252
+{
48
+ QDict *options = opaque;
253
+ g_autofree unsigned long *vqs = bitmap_new(num_queues);
49
+
254
+ g_autoptr(GHashTable) iothreads =
50
+ if (g_str_has_suffix(name, "key-secret")) {
255
+ g_hash_table_new(g_str_hash, g_str_equal);
51
+ qdict_put(options, name, qstring_from_str(value));
256
+
52
+ }
257
+ for (IOThreadVirtQueueMappingList *node = list; node; node = node->next) {
53
+
258
+ const char *name = node->value->iothread;
54
+ return 0;
259
+ uint16List *vq;
260
+
261
+ if (!iothread_by_id(name)) {
262
+ error_setg(errp, "IOThread \"%s\" object does not exist", name);
263
+ return false;
264
+ }
265
+
266
+ if (!g_hash_table_add(iothreads, (gpointer)name)) {
267
+ error_setg(errp,
268
+ "duplicate IOThread name \"%s\" in iothread-vq-mapping",
269
+ name);
270
+ return false;
271
+ }
272
+
273
+ if (node != list) {
274
+ if (!!node->value->vqs != !!list->value->vqs) {
275
+ error_setg(errp, "either all items in iothread-vq-mapping "
276
+ "must have vqs or none of them must have it");
277
+ return false;
278
+ }
279
+ }
280
+
281
+ for (vq = node->value->vqs; vq; vq = vq->next) {
282
+ if (vq->value >= num_queues) {
283
+ error_setg(errp, "vq index %u for IOThread \"%s\" must be "
284
+ "less than num_queues %u in iothread-vq-mapping",
285
+ vq->value, name, num_queues);
286
+ return false;
287
+ }
288
+
289
+ if (test_and_set_bit(vq->value, vqs)) {
290
+ error_setg(errp, "cannot assign vq %u to IOThread \"%s\" "
291
+ "because it is already assigned", vq->value, name);
292
+ return false;
293
+ }
294
+ }
295
+ }
296
+
297
+ if (list->value->vqs) {
298
+ for (uint16_t i = 0; i < num_queues; i++) {
299
+ if (!test_bit(i, vqs)) {
300
+ error_setg(errp,
301
+ "missing vq %u IOThread assignment in iothread-vq-mapping",
302
+ i);
303
+ return false;
304
+ }
305
+ }
306
+ }
307
+
308
+ return true;
55
+}
309
+}
56
+
310
+
57
+static BlockBackend *img_open_new_file(const char *filename,
311
+bool iothread_vq_mapping_apply(
58
+ QemuOpts *create_opts,
312
+ IOThreadVirtQueueMappingList *list,
59
+ const char *fmt, int flags,
313
+ AioContext **vq_aio_context,
60
+ bool writethrough, bool quiet,
314
+ uint16_t num_queues,
61
+ bool force_share)
315
+ Error **errp)
62
+{
316
+{
63
+ QDict *options = NULL;
317
+ IOThreadVirtQueueMappingList *node;
64
+
318
+ size_t num_iothreads = 0;
65
+ options = qdict_new();
319
+ size_t cur_iothread = 0;
66
+ qemu_opt_foreach(create_opts, img_add_key_secrets, options, &error_abort);
320
+
67
+
321
+ if (!iothread_vq_mapping_validate(list, num_queues, errp)) {
68
+ return img_open_file(filename, options, fmt, flags, writethrough, quiet,
322
+ return false;
69
+ force_share);
323
+ }
324
+
325
+ for (node = list; node; node = node->next) {
326
+ num_iothreads++;
327
+ }
328
+
329
+ for (node = list; node; node = node->next) {
330
+ IOThread *iothread = iothread_by_id(node->value->iothread);
331
+ AioContext *ctx = iothread_get_aio_context(iothread);
332
+
333
+ /* Released in virtio_blk_vq_aio_context_cleanup() */
334
+ object_ref(OBJECT(iothread));
335
+
336
+ if (node->value->vqs) {
337
+ uint16List *vq;
338
+
339
+ /* Explicit vq:IOThread assignment */
340
+ for (vq = node->value->vqs; vq; vq = vq->next) {
341
+ assert(vq->value < num_queues);
342
+ vq_aio_context[vq->value] = ctx;
343
+ }
344
+ } else {
345
+ /* Round-robin vq:IOThread assignment */
346
+ for (unsigned i = cur_iothread; i < num_queues;
347
+ i += num_iothreads) {
348
+ vq_aio_context[i] = ctx;
349
+ }
350
+ }
351
+
352
+ cur_iothread++;
353
+ }
354
+
355
+ return true;
70
+}
356
+}
71
+
357
+
72
+
358
+void iothread_vq_mapping_cleanup(IOThreadVirtQueueMappingList *list)
73
static BlockBackend *img_open(bool image_opts,
359
+{
74
const char *filename,
360
+ IOThreadVirtQueueMappingList *node;
75
const char *fmt, int flags, bool writethrough,
361
+
76
@@ -XXX,XX +XXX,XX @@ static BlockBackend *img_open(bool image_opts,
362
+ for (node = list; node; node = node->next) {
77
blk = img_open_opts(filename, opts, flags, writethrough, quiet,
363
+ IOThread *iothread = iothread_by_id(node->value->iothread);
78
force_share);
364
+ object_unref(OBJECT(iothread));
79
} else {
365
+ }
80
- blk = img_open_file(filename, fmt, flags, writethrough, quiet,
366
+}
81
+ blk = img_open_file(filename, NULL, fmt, flags, writethrough, quiet,
367
+
82
force_share);
368
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
83
}
369
index XXXXXXX..XXXXXXX 100644
84
return blk;
370
--- a/hw/virtio/meson.build
85
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
371
+++ b/hw/virtio/meson.build
86
* That has to wait for bdrv_create to be improved
372
@@ -XXX,XX +XXX,XX @@
87
* to allow filenames in option syntax
373
system_virtio_ss = ss.source_set()
88
*/
374
system_virtio_ss.add(files('virtio-bus.c'))
89
- s.target = img_open_file(out_filename, out_fmt, flags,
375
+system_virtio_ss.add(files('iothread-vq-mapping.c'))
90
- writethrough, quiet, false);
376
system_virtio_ss.add(when: 'CONFIG_VIRTIO_PCI', if_true: files('virtio-pci.c'))
91
+ s.target = img_open_new_file(out_filename, opts, out_fmt,
377
system_virtio_ss.add(when: 'CONFIG_VIRTIO_MMIO', if_true: files('virtio-mmio.c'))
92
+ flags, writethrough, quiet, false);
378
system_virtio_ss.add(when: 'CONFIG_VIRTIO_CRYPTO', if_true: files('virtio-crypto.c'))
93
}
94
if (!s.target) {
95
ret = -1;
96
@@ -XXX,XX +XXX,XX @@ static int img_dd(int argc, char **argv)
97
* with the bdrv_create() call above which does not
98
* support image-opts style.
99
*/
100
- blk2 = img_open_file(out.filename, out_fmt, BDRV_O_RDWR,
101
+ blk2 = img_open_file(out.filename, NULL, out_fmt, BDRV_O_RDWR,
102
false, false, false);
103
104
if (!blk2) {
105
--
379
--
106
1.8.3.1
380
2.48.1
107
108
diff view generated by jsdifflib
New patch
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
2
3
Allow virtio-scsi virtqueues to be assigned to different IOThreads. This
4
makes it possible to take advantage of host multi-queue block layer
5
scalability by assigning virtqueues that have affinity with vCPUs to
6
different IOThreads that have affinity with host CPUs. The same feature
7
was introduced for virtio-blk in the past:
8
https://developers.redhat.com/articles/2024/09/05/scaling-virtio-blk-disk-io-iothread-virtqueue-mapping
9
10
Here are fio randread 4k iodepth=64 results from a 4 vCPU guest with an
11
Intel P4800X SSD:
12
iothreads IOPS
13
------------------------------
14
1 189576
15
2 312698
16
4 346744
17
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
Message-ID: <20250311132616.1049687-12-stefanha@redhat.com>
20
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
21
---
22
include/hw/virtio/virtio-scsi.h | 5 +-
23
hw/scsi/virtio-scsi-dataplane.c | 90 ++++++++++++++++++++++++---------
24
hw/scsi/virtio-scsi.c | 63 ++++++++++++++---------
25
3 files changed, 107 insertions(+), 51 deletions(-)
26
27
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/hw/virtio/virtio-scsi.h
30
+++ b/include/hw/virtio/virtio-scsi.h
31
@@ -XXX,XX +XXX,XX @@
32
#include "hw/virtio/virtio.h"
33
#include "hw/scsi/scsi.h"
34
#include "chardev/char-fe.h"
35
+#include "qapi/qapi-types-virtio.h"
36
#include "system/iothread.h"
37
38
#define TYPE_VIRTIO_SCSI_COMMON "virtio-scsi-common"
39
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSIConf {
40
CharBackend chardev;
41
uint32_t boot_tpgt;
42
IOThread *iothread;
43
+ IOThreadVirtQueueMappingList *iothread_vq_mapping_list;
44
};
45
46
struct VirtIOSCSI;
47
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
48
QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
49
50
/* Fields for dataplane below */
51
- AioContext *ctx; /* one iothread per virtio-scsi-pci for now */
52
+ AioContext **vq_aio_context; /* per-virtqueue AioContext pointer */
53
54
bool dataplane_started;
55
bool dataplane_starting;
56
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_common_realize(DeviceState *dev,
57
void virtio_scsi_common_unrealize(DeviceState *dev);
58
59
void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp);
60
+void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s);
61
int virtio_scsi_dataplane_start(VirtIODevice *s);
62
void virtio_scsi_dataplane_stop(VirtIODevice *s);
63
64
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
65
index XXXXXXX..XXXXXXX 100644
66
--- a/hw/scsi/virtio-scsi-dataplane.c
67
+++ b/hw/scsi/virtio-scsi-dataplane.c
68
@@ -XXX,XX +XXX,XX @@
69
#include "system/block-backend.h"
70
#include "hw/scsi/scsi.h"
71
#include "scsi/constants.h"
72
+#include "hw/virtio/iothread-vq-mapping.h"
73
#include "hw/virtio/virtio-bus.h"
74
75
/* Context: BQL held */
76
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
77
VirtIODevice *vdev = VIRTIO_DEVICE(s);
78
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
79
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
80
+ uint16_t num_vqs = vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED;
81
82
- if (vs->conf.iothread) {
83
+ if (vs->conf.iothread && vs->conf.iothread_vq_mapping_list) {
84
+ error_setg(errp,
85
+ "iothread and iothread-vq-mapping properties cannot be set "
86
+ "at the same time");
87
+ return;
88
+ }
89
+
90
+ if (vs->conf.iothread || vs->conf.iothread_vq_mapping_list) {
91
if (!k->set_guest_notifiers || !k->ioeventfd_assign) {
92
error_setg(errp,
93
"device is incompatible with iothread "
94
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
95
error_setg(errp, "ioeventfd is required for iothread");
96
return;
97
}
98
- s->ctx = iothread_get_aio_context(vs->conf.iothread);
99
- } else {
100
- if (!virtio_device_ioeventfd_enabled(vdev)) {
101
+ }
102
+
103
+ s->vq_aio_context = g_new(AioContext *, num_vqs);
104
+
105
+ if (vs->conf.iothread_vq_mapping_list) {
106
+ if (!iothread_vq_mapping_apply(vs->conf.iothread_vq_mapping_list,
107
+ s->vq_aio_context, num_vqs, errp)) {
108
+ g_free(s->vq_aio_context);
109
+ s->vq_aio_context = NULL;
110
return;
111
}
112
- s->ctx = qemu_get_aio_context();
113
+ } else if (vs->conf.iothread) {
114
+ AioContext *ctx = iothread_get_aio_context(vs->conf.iothread);
115
+ for (uint16_t i = 0; i < num_vqs; i++) {
116
+ s->vq_aio_context[i] = ctx;
117
+ }
118
+
119
+ /* Released in virtio_scsi_dataplane_cleanup() */
120
+ object_ref(OBJECT(vs->conf.iothread));
121
+ } else {
122
+ AioContext *ctx = qemu_get_aio_context();
123
+ for (unsigned i = 0; i < num_vqs; i++) {
124
+ s->vq_aio_context[i] = ctx;
125
+ }
126
+ }
127
+}
128
+
129
+/* Context: BQL held */
130
+void virtio_scsi_dataplane_cleanup(VirtIOSCSI *s)
131
+{
132
+ VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
133
+
134
+ if (vs->conf.iothread_vq_mapping_list) {
135
+ iothread_vq_mapping_cleanup(vs->conf.iothread_vq_mapping_list);
136
}
137
+
138
+ if (vs->conf.iothread) {
139
+ object_unref(OBJECT(vs->conf.iothread));
140
+ }
141
+
142
+ g_free(s->vq_aio_context);
143
+ s->vq_aio_context = NULL;
144
}
145
146
static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
147
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_set_host_notifier(VirtIOSCSI *s, VirtQueue *vq, int n)
148
}
149
150
/* Context: BH in IOThread */
151
-static void virtio_scsi_dataplane_stop_bh(void *opaque)
152
+static void virtio_scsi_dataplane_stop_vq_bh(void *opaque)
153
{
154
- VirtIOSCSI *s = opaque;
155
- VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
156
+ AioContext *ctx = qemu_get_current_aio_context();
157
+ VirtQueue *vq = opaque;
158
EventNotifier *host_notifier;
159
- int i;
160
161
- virtio_queue_aio_detach_host_notifier(vs->ctrl_vq, s->ctx);
162
- host_notifier = virtio_queue_get_host_notifier(vs->ctrl_vq);
163
+ virtio_queue_aio_detach_host_notifier(vq, ctx);
164
+ host_notifier = virtio_queue_get_host_notifier(vq);
165
166
/*
167
* Test and clear notifier after disabling event, in case poll callback
168
* didn't have time to run.
169
*/
170
virtio_queue_host_notifier_read(host_notifier);
171
-
172
- virtio_queue_aio_detach_host_notifier(vs->event_vq, s->ctx);
173
- host_notifier = virtio_queue_get_host_notifier(vs->event_vq);
174
- virtio_queue_host_notifier_read(host_notifier);
175
-
176
- for (i = 0; i < vs->conf.num_queues; i++) {
177
- virtio_queue_aio_detach_host_notifier(vs->cmd_vqs[i], s->ctx);
178
- host_notifier = virtio_queue_get_host_notifier(vs->cmd_vqs[i]);
179
- virtio_queue_host_notifier_read(host_notifier);
180
- }
181
}
182
183
/* Context: BQL held */
184
@@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
185
smp_wmb(); /* paired with aio_notify_accept() */
186
187
if (s->bus.drain_count == 0) {
188
- virtio_queue_aio_attach_host_notifier(vs->ctrl_vq, s->ctx);
189
- virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq, s->ctx);
190
+ virtio_queue_aio_attach_host_notifier(vs->ctrl_vq,
191
+ s->vq_aio_context[0]);
192
+ virtio_queue_aio_attach_host_notifier_no_poll(vs->event_vq,
193
+ s->vq_aio_context[1]);
194
195
for (i = 0; i < vs->conf.num_queues; i++) {
196
- virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], s->ctx);
197
+ AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
198
+ virtio_queue_aio_attach_host_notifier(vs->cmd_vqs[i], ctx);
199
}
200
}
201
return 0;
202
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
203
s->dataplane_stopping = true;
204
205
if (s->bus.drain_count == 0) {
206
- aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
207
+ for (i = 0; i < vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED; i++) {
208
+ VirtQueue *vq = virtio_get_queue(&vs->parent_obj, i);
209
+ AioContext *ctx = s->vq_aio_context[i];
210
+ aio_wait_bh_oneshot(ctx, virtio_scsi_dataplane_stop_vq_bh, vq);
211
+ }
212
}
213
214
blk_drain_all(); /* ensure there are no in-flight requests */
215
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
216
index XXXXXXX..XXXXXXX 100644
217
--- a/hw/scsi/virtio-scsi.c
218
+++ b/hw/scsi/virtio-scsi.c
219
@@ -XXX,XX +XXX,XX @@
220
#include "hw/qdev-properties.h"
221
#include "hw/scsi/scsi.h"
222
#include "scsi/constants.h"
223
+#include "hw/virtio/iothread-vq-mapping.h"
224
#include "hw/virtio/virtio-bus.h"
225
#include "hw/virtio/virtio-access.h"
226
#include "trace.h"
227
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
228
g_free(n);
229
}
230
231
-static inline void virtio_scsi_ctx_check(VirtIOSCSI *s, SCSIDevice *d)
232
-{
233
- if (s->dataplane_started && d && blk_is_available(d->conf.blk)) {
234
- assert(blk_get_aio_context(d->conf.blk) == s->ctx);
235
- }
236
-}
237
-
238
static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
239
{
240
VirtIOSCSI *s = req->dev;
241
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_flush_defer_tmf_to_aio_context(VirtIOSCSI *s)
242
243
assert(!s->dataplane_started);
244
245
- if (s->ctx) {
246
+ for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) {
247
+ AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
248
+
249
/* Our BH only runs after previously scheduled BHs */
250
- aio_wait_bh_oneshot(s->ctx, dummy_bh, NULL);
251
+ aio_wait_bh_oneshot(ctx, dummy_bh, NULL);
252
}
253
}
254
255
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
256
AioContext *ctx;
257
int ret = 0;
258
259
- virtio_scsi_ctx_check(s, d);
260
/* Here VIRTIO_SCSI_S_OK means "FUNCTION COMPLETE". */
261
req->resp.tmf.response = VIRTIO_SCSI_S_OK;
262
263
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
264
265
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
266
case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: {
267
+ g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
268
+
269
if (!d) {
270
goto fail;
271
}
272
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
273
274
qatomic_inc(&req->remaining);
275
276
- ctx = s->ctx ?: qemu_get_aio_context();
277
- virtio_scsi_defer_tmf_to_aio_context(req, ctx);
278
+ for (uint32_t i = 0; i < s->parent_obj.conf.num_queues; i++) {
279
+ ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i];
280
+
281
+ if (!g_hash_table_add(aio_contexts, ctx)) {
282
+ continue; /* skip previously added AioContext */
283
+ }
284
+
285
+ virtio_scsi_defer_tmf_to_aio_context(req, ctx);
286
+ }
287
288
virtio_scsi_tmf_dec_remaining(req);
289
ret = -EINPROGRESS;
290
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_handle_ctrl_vq(VirtIOSCSI *s, VirtQueue *vq)
291
*/
292
static bool virtio_scsi_defer_to_dataplane(VirtIOSCSI *s)
293
{
294
- if (!s->ctx || s->dataplane_started) {
295
+ if (s->dataplane_started) {
296
return false;
297
}
298
+ if (s->vq_aio_context[0] == qemu_get_aio_context()) {
299
+ return false; /* not using IOThreads */
300
+ }
301
302
virtio_device_start_ioeventfd(&s->parent_obj.parent_obj);
303
return !s->dataplane_fenced;
304
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_handle_cmd_req_prepare(VirtIOSCSI *s, VirtIOSCSIReq *req)
305
virtio_scsi_complete_cmd_req(req);
306
return -ENOENT;
307
}
308
- virtio_scsi_ctx_check(s, d);
309
req->sreq = scsi_req_new(d, req->req.cmd.tag,
310
virtio_scsi_get_lun(req->req.cmd.lun),
311
req->req.cmd.cdb, vs->cdb_size, req);
312
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotplug(HotplugHandler *hotplug_dev, DeviceState *dev,
313
{
314
VirtIODevice *vdev = VIRTIO_DEVICE(hotplug_dev);
315
VirtIOSCSI *s = VIRTIO_SCSI(vdev);
316
+ AioContext *ctx = s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED];
317
SCSIDevice *sd = SCSI_DEVICE(dev);
318
- int ret;
319
320
- if (s->ctx && !s->dataplane_fenced) {
321
- ret = blk_set_aio_context(sd->conf.blk, s->ctx, errp);
322
- if (ret < 0) {
323
- return;
324
- }
325
+ if (ctx != qemu_get_aio_context() && !s->dataplane_fenced) {
326
+ /*
327
+ * Try to make the BlockBackend's AioContext match ours. Ignore failure
328
+ * because I/O will still work although block jobs and other users
329
+ * might be slower when multiple AioContexts use a BlockBackend.
330
+ */
331
+ blk_set_aio_context(sd->conf.blk, ctx, errp);
332
}
333
334
if (virtio_vdev_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG)) {
335
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_hotunplug(HotplugHandler *hotplug_dev, DeviceState *dev,
336
337
qdev_simple_device_unplug_cb(hotplug_dev, dev, errp);
338
339
- if (s->ctx) {
340
+ if (s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED] != qemu_get_aio_context()) {
341
/* If other users keep the BlockBackend in the iothread, that's ok */
342
blk_set_aio_context(sd->conf.blk, qemu_get_aio_context(), NULL);
343
}
344
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_drained_begin(SCSIBus *bus)
345
346
for (uint32_t i = 0; i < total_queues; i++) {
347
VirtQueue *vq = virtio_get_queue(vdev, i);
348
- virtio_queue_aio_detach_host_notifier(vq, s->ctx);
349
+ virtio_queue_aio_detach_host_notifier(vq, s->vq_aio_context[i]);
350
}
351
}
352
353
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_drained_end(SCSIBus *bus)
354
355
for (uint32_t i = 0; i < total_queues; i++) {
356
VirtQueue *vq = virtio_get_queue(vdev, i);
357
+ AioContext *ctx = s->vq_aio_context[i];
358
+
359
if (vq == vs->event_vq) {
360
- virtio_queue_aio_attach_host_notifier_no_poll(vq, s->ctx);
361
+ virtio_queue_aio_attach_host_notifier_no_poll(vq, ctx);
362
} else {
363
- virtio_queue_aio_attach_host_notifier(vq, s->ctx);
364
+ virtio_queue_aio_attach_host_notifier(vq, ctx);
365
}
366
}
367
}
368
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_common_unrealize(DeviceState *dev)
369
virtio_cleanup(vdev);
370
}
371
372
+/* main loop */
373
static void virtio_scsi_device_unrealize(DeviceState *dev)
374
{
375
VirtIOSCSI *s = VIRTIO_SCSI(dev);
376
377
virtio_scsi_reset_tmf_bh(s);
378
-
379
+ virtio_scsi_dataplane_cleanup(s);
380
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
381
virtio_scsi_common_unrealize(dev);
382
qemu_mutex_destroy(&s->tmf_bh_lock);
383
@@ -XXX,XX +XXX,XX @@ static const Property virtio_scsi_properties[] = {
384
VIRTIO_SCSI_F_CHANGE, true),
385
DEFINE_PROP_LINK("iothread", VirtIOSCSI, parent_obj.conf.iothread,
386
TYPE_IOTHREAD, IOThread *),
387
+ DEFINE_PROP_IOTHREAD_VQ_MAPPING_LIST("iothread-vq-mapping", VirtIOSCSI,
388
+ parent_obj.conf.iothread_vq_mapping_list),
389
};
390
391
static const VMStateDescription vmstate_virtio_scsi = {
392
--
393
2.48.1
diff view generated by jsdifflib
1
From: Max Reitz <mreitz@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
path_combine() naturally tries to preserve a protocol prefix. However,
3
Previously the ctrl virtqueue was handled in the AioContext where SCSI
4
it recognizes such a prefix by scanning for the first colon; which is
4
requests are processed. When IOThread Virtqueue Mapping was added things
5
different from what path_has_protocol() does: There only is a protocol
5
become more complicated because SCSI requests could run in other
6
prefix if there is a colon before the first slash.
6
AioContexts.
7
7
8
A protocol prefix that is not recognized by path_has_protocol() is none,
8
Simplify by handling the ctrl virtqueue in the main loop where reset
9
and should thus not be taken as one.
9
operations can be performed. Note that BHs are still used canceling SCSI
10
10
requests in their AioContexts but at least the mean loop activity
11
Case in point, before this patch:
11
doesn't need BHs anymore.
12
$ ./qemu-img create -f qcow2 -b backing.qcow2 ./top:image.qcow2
12
13
qemu-img: ./top:image.qcow2: Could not open './top:backing.qcow2':
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
No such file or directory
14
Message-ID: <20250311132616.1049687-13-stefanha@redhat.com>
15
15
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
16
Afterwards:
17
$ ./qemu-img create -f qcow2 -b backing.qcow2 ./top:image.qcow2
18
qemu-img: ./top:image.qcow2: Could not open './backing.qcow2':
19
No such file or directory
20
21
Reported-by: yangyang <yangyang@redhat.com>
22
Signed-off-by: Max Reitz <mreitz@redhat.com>
23
Reviewed-by: Eric Blake <eblake@redhat.com>
24
Message-id: 20170522195217.12991-2-mreitz@redhat.com
25
Signed-off-by: Max Reitz <mreitz@redhat.com>
26
---
16
---
27
block.c | 15 ++++++++++-----
17
include/hw/virtio/virtio-scsi.h | 8 --
28
1 file changed, 10 insertions(+), 5 deletions(-)
18
hw/scsi/virtio-scsi-dataplane.c | 6 ++
29
19
hw/scsi/virtio-scsi.c | 144 ++++++--------------------------
30
diff --git a/block.c b/block.c
20
3 files changed, 33 insertions(+), 125 deletions(-)
21
22
diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h
31
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
32
--- a/block.c
24
--- a/include/hw/virtio/virtio-scsi.h
33
+++ b/block.c
25
+++ b/include/hw/virtio/virtio-scsi.h
34
@@ -XXX,XX +XXX,XX @@ void path_combine(char *dest, int dest_size,
26
@@ -XXX,XX +XXX,XX @@ struct VirtIOSCSI {
35
if (path_is_absolute(filename)) {
27
36
pstrcpy(dest, dest_size, filename);
28
QemuMutex ctrl_lock; /* protects ctrl_vq */
37
} else {
29
38
- p = strchr(base_path, ':');
30
- /*
39
- if (p)
31
- * TMFs deferred to main loop BH. These fields are protected by
40
- p++;
32
- * tmf_bh_lock.
41
- else
33
- */
42
- p = base_path;
34
- QemuMutex tmf_bh_lock;
43
+ const char *protocol_stripped = NULL;
35
- QEMUBH *tmf_bh;
44
+
36
- QTAILQ_HEAD(, VirtIOSCSIReq) tmf_bh_list;
45
+ if (path_has_protocol(base_path)) {
37
-
46
+ protocol_stripped = strchr(base_path, ':');
38
/* Fields for dataplane below */
47
+ if (protocol_stripped) {
39
AioContext **vq_aio_context; /* per-virtqueue AioContext pointer */
48
+ protocol_stripped++;
40
41
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
42
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/scsi/virtio-scsi-dataplane.c
44
+++ b/hw/scsi/virtio-scsi-dataplane.c
45
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
46
s->vq_aio_context[i] = ctx;
47
}
48
}
49
+
50
+ /*
51
+ * Always handle the ctrl virtqueue in the main loop thread where device
52
+ * resets can be performed.
53
+ */
54
+ s->vq_aio_context[0] = qemu_get_aio_context();
55
}
56
57
/* Context: BQL held */
58
diff --git a/hw/scsi/virtio-scsi.c b/hw/scsi/virtio-scsi.c
59
index XXXXXXX..XXXXXXX 100644
60
--- a/hw/scsi/virtio-scsi.c
61
+++ b/hw/scsi/virtio-scsi.c
62
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_cancel_notify(Notifier *notifier, void *data)
63
g_free(n);
64
}
65
66
-static void virtio_scsi_do_one_tmf_bh(VirtIOSCSIReq *req)
67
-{
68
- VirtIOSCSI *s = req->dev;
69
- SCSIDevice *d = virtio_scsi_device_get(s, req->req.tmf.lun);
70
- BusChild *kid;
71
- int target;
72
-
73
- switch (req->req.tmf.subtype) {
74
- case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
75
- if (!d) {
76
- req->resp.tmf.response = VIRTIO_SCSI_S_BAD_TARGET;
77
- goto out;
78
- }
79
- if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
80
- req->resp.tmf.response = VIRTIO_SCSI_S_INCORRECT_LUN;
81
- goto out;
82
- }
83
- qatomic_inc(&s->resetting);
84
- device_cold_reset(&d->qdev);
85
- qatomic_dec(&s->resetting);
86
- break;
87
-
88
- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
89
- target = req->req.tmf.lun[1];
90
- qatomic_inc(&s->resetting);
91
-
92
- rcu_read_lock();
93
- QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
94
- SCSIDevice *d1 = SCSI_DEVICE(kid->child);
95
- if (d1->channel == 0 && d1->id == target) {
96
- device_cold_reset(&d1->qdev);
97
- }
98
- }
99
- rcu_read_unlock();
100
-
101
- qatomic_dec(&s->resetting);
102
- break;
103
-
104
- default:
105
- g_assert_not_reached();
106
- }
107
-
108
-out:
109
- object_unref(OBJECT(d));
110
- virtio_scsi_complete_req(req, &s->ctrl_lock);
111
-}
112
-
113
-/* Some TMFs must be processed from the main loop thread */
114
-static void virtio_scsi_do_tmf_bh(void *opaque)
115
-{
116
- VirtIOSCSI *s = opaque;
117
- QTAILQ_HEAD(, VirtIOSCSIReq) reqs = QTAILQ_HEAD_INITIALIZER(reqs);
118
- VirtIOSCSIReq *req;
119
- VirtIOSCSIReq *tmp;
120
-
121
- GLOBAL_STATE_CODE();
122
-
123
- WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
124
- QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
125
- QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
126
- QTAILQ_INSERT_TAIL(&reqs, req, next);
127
- }
128
-
129
- qemu_bh_delete(s->tmf_bh);
130
- s->tmf_bh = NULL;
131
- }
132
-
133
- QTAILQ_FOREACH_SAFE(req, &reqs, next, tmp) {
134
- QTAILQ_REMOVE(&reqs, req, next);
135
- virtio_scsi_do_one_tmf_bh(req);
136
- }
137
-}
138
-
139
-static void virtio_scsi_reset_tmf_bh(VirtIOSCSI *s)
140
-{
141
- VirtIOSCSIReq *req;
142
- VirtIOSCSIReq *tmp;
143
-
144
- GLOBAL_STATE_CODE();
145
-
146
- /* Called after ioeventfd has been stopped, so tmf_bh_lock is not needed */
147
- if (s->tmf_bh) {
148
- qemu_bh_delete(s->tmf_bh);
149
- s->tmf_bh = NULL;
150
- }
151
-
152
- QTAILQ_FOREACH_SAFE(req, &s->tmf_bh_list, next, tmp) {
153
- QTAILQ_REMOVE(&s->tmf_bh_list, req, next);
154
-
155
- /* SAM-6 6.3.2 Hard reset */
156
- req->resp.tmf.response = VIRTIO_SCSI_S_TARGET_FAILURE;
157
- virtio_scsi_complete_req(req, &req->dev->ctrl_lock);
158
- }
159
-}
160
-
161
-static void virtio_scsi_defer_tmf_to_main_loop(VirtIOSCSIReq *req)
162
-{
163
- VirtIOSCSI *s = req->dev;
164
-
165
- WITH_QEMU_LOCK_GUARD(&s->tmf_bh_lock) {
166
- QTAILQ_INSERT_TAIL(&s->tmf_bh_list, req, next);
167
-
168
- if (!s->tmf_bh) {
169
- s->tmf_bh = qemu_bh_new(virtio_scsi_do_tmf_bh, s);
170
- qemu_bh_schedule(s->tmf_bh);
171
- }
172
- }
173
-}
174
-
175
static void virtio_scsi_tmf_cancel_req(VirtIOSCSIReq *tmf, SCSIRequest *r)
176
{
177
VirtIOSCSICancelNotifier *notifier;
178
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_do_tmf(VirtIOSCSI *s, VirtIOSCSIReq *req)
179
break;
180
181
case VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET:
182
- case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET:
183
- virtio_scsi_defer_tmf_to_main_loop(req);
184
- ret = -EINPROGRESS;
185
+ if (!d) {
186
+ goto fail;
187
+ }
188
+ if (d->lun != virtio_scsi_get_lun(req->req.tmf.lun)) {
189
+ goto incorrect_lun;
190
+ }
191
+ qatomic_inc(&s->resetting);
192
+ device_cold_reset(&d->qdev);
193
+ qatomic_dec(&s->resetting);
194
break;
195
196
+ case VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET: {
197
+ BusChild *kid;
198
+ int target = req->req.tmf.lun[1];
199
+ qatomic_inc(&s->resetting);
200
+
201
+ rcu_read_lock();
202
+ QTAILQ_FOREACH_RCU(kid, &s->bus.qbus.children, sibling) {
203
+ SCSIDevice *d1 = SCSI_DEVICE(kid->child);
204
+ if (d1->channel == 0 && d1->id == target) {
205
+ device_cold_reset(&d1->qdev);
49
+ }
206
+ }
50
+ }
207
+ }
51
+ p = protocol_stripped ?: base_path;
208
+ rcu_read_unlock();
52
+
209
+
53
p1 = strrchr(base_path, '/');
210
+ qatomic_dec(&s->resetting);
54
#ifdef _WIN32
211
+ break;
55
{
212
+ }
213
+
214
case VIRTIO_SCSI_T_TMF_ABORT_TASK_SET:
215
case VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET: {
216
g_autoptr(GHashTable) aio_contexts = g_hash_table_new(NULL, NULL);
217
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_reset(VirtIODevice *vdev)
218
219
assert(!s->dataplane_started);
220
221
- virtio_scsi_reset_tmf_bh(s);
222
virtio_scsi_flush_defer_tmf_to_aio_context(s);
223
224
qatomic_inc(&s->resetting);
225
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_realize(DeviceState *dev, Error **errp)
226
VirtIOSCSI *s = VIRTIO_SCSI(dev);
227
Error *err = NULL;
228
229
- QTAILQ_INIT(&s->tmf_bh_list);
230
qemu_mutex_init(&s->ctrl_lock);
231
qemu_mutex_init(&s->event_lock);
232
- qemu_mutex_init(&s->tmf_bh_lock);
233
234
virtio_scsi_common_realize(dev,
235
virtio_scsi_handle_ctrl,
236
@@ -XXX,XX +XXX,XX @@ static void virtio_scsi_device_unrealize(DeviceState *dev)
237
{
238
VirtIOSCSI *s = VIRTIO_SCSI(dev);
239
240
- virtio_scsi_reset_tmf_bh(s);
241
virtio_scsi_dataplane_cleanup(s);
242
qbus_set_hotplug_handler(BUS(&s->bus), NULL);
243
virtio_scsi_common_unrealize(dev);
244
- qemu_mutex_destroy(&s->tmf_bh_lock);
245
qemu_mutex_destroy(&s->event_lock);
246
qemu_mutex_destroy(&s->ctrl_lock);
247
}
56
--
248
--
57
1.8.3.1
249
2.48.1
58
59
diff view generated by jsdifflib
1
From: "Daniel P. Berrange" <berrange@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
The --image-opts flag can only be used to affect the parsing
3
Peter Krempa and Kevin Wolf observed that iothread-vq-mapping is
4
of the source image. The target image has to be specified in
4
confusing to use because the control and event virtqueues have a fixed
5
the traditional style regardless, since it needs to be passed
5
location before the command virtqueues but need to be treated
6
to the bdrv_create() API which does not support the new style
6
differently.
7
opts.
8
7
9
Reviewed-by: Fam Zheng <famz@redhat.com>
8
Only expose the command virtqueues via iothread-vq-mapping so that the
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
command-line parameter is intuitive: it controls where SCSI requests are
11
Signed-off-by: Daniel P. Berrange <berrange@redhat.com>
10
processed.
12
Message-id: 20170515164712.6643-3-berrange@redhat.com
11
13
Signed-off-by: Max Reitz <mreitz@redhat.com>
12
The control virtqueue needs to be hardcoded to the main loop thread for
13
technical reasons anyway. Kevin also pointed out that it's better to
14
place the event virtqueue in the main loop thread since its no poll
15
behavior would prevent polling if assigned to an IOThread.
16
17
This change is its own commit to avoid squashing the previous commit.
18
19
Suggested-by: Kevin Wolf <kwolf@redhat.com>
20
Suggested-by: Peter Krempa <pkrempa@redhat.com>
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
Message-ID: <20250311132616.1049687-14-stefanha@redhat.com>
23
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
---
24
---
15
qemu-img.c | 9 +++++++--
25
hw/scsi/virtio-scsi-dataplane.c | 33 ++++++++++++++++++++-------------
16
1 file changed, 7 insertions(+), 2 deletions(-)
26
1 file changed, 20 insertions(+), 13 deletions(-)
17
27
18
diff --git a/qemu-img.c b/qemu-img.c
28
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
19
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
20
--- a/qemu-img.c
30
--- a/hw/scsi/virtio-scsi-dataplane.c
21
+++ b/qemu-img.c
31
+++ b/hw/scsi/virtio-scsi-dataplane.c
22
@@ -XXX,XX +XXX,XX @@ static int img_dd(int argc, char **argv)
32
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
23
goto out;
33
VirtIODevice *vdev = VIRTIO_DEVICE(s);
34
BusState *qbus = qdev_get_parent_bus(DEVICE(vdev));
35
VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus);
36
- uint16_t num_vqs = vs->conf.num_queues + VIRTIO_SCSI_VQ_NUM_FIXED;
37
38
if (vs->conf.iothread && vs->conf.iothread_vq_mapping_list) {
39
error_setg(errp,
40
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_setup(VirtIOSCSI *s, Error **errp)
41
}
24
}
42
}
25
43
26
- blk2 = img_open(image_opts, out.filename, out_fmt, BDRV_O_RDWR,
44
- s->vq_aio_context = g_new(AioContext *, num_vqs);
27
- false, false, false);
45
+ s->vq_aio_context = g_new(AioContext *, vs->conf.num_queues +
28
+ /* TODO, we can't honour --image-opts for the target,
46
+ VIRTIO_SCSI_VQ_NUM_FIXED);
29
+ * since it needs to be given in a format compatible
47
+
30
+ * with the bdrv_create() call above which does not
48
+ /*
31
+ * support image-opts style.
49
+ * Handle the ctrl virtqueue in the main loop thread where device resets
50
+ * can be performed.
32
+ */
51
+ */
33
+ blk2 = img_open_file(out.filename, out_fmt, BDRV_O_RDWR,
52
+ s->vq_aio_context[0] = qemu_get_aio_context();
34
+ false, false, false);
53
+
35
54
+ /*
36
if (!blk2) {
55
+ * Handle the event virtqueue in the main loop thread where its no_poll
37
ret = -1;
56
+ * behavior won't stop IOThread polling.
57
+ */
58
+ s->vq_aio_context[1] = qemu_get_aio_context();
59
60
if (vs->conf.iothread_vq_mapping_list) {
61
if (!iothread_vq_mapping_apply(vs->conf.iothread_vq_mapping_list,
62
- s->vq_aio_context, num_vqs, errp)) {
63
+ &s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED],
64
+ vs->conf.num_queues, errp)) {
65
g_free(s->vq_aio_context);
66
s->vq_aio_context = NULL;
67
return;
68
}
69
} else if (vs->conf.iothread) {
70
AioContext *ctx = iothread_get_aio_context(vs->conf.iothread);
71
- for (uint16_t i = 0; i < num_vqs; i++) {
72
- s->vq_aio_context[i] = ctx;
73
+ for (uint16_t i = 0; i < vs->conf.num_queues; i++) {
74
+ s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i] = ctx;
75
}
76
77
/* Released in virtio_scsi_dataplane_cleanup() */
78
object_ref(OBJECT(vs->conf.iothread));
79
} else {
80
AioContext *ctx = qemu_get_aio_context();
81
- for (unsigned i = 0; i < num_vqs; i++) {
82
- s->vq_aio_context[i] = ctx;
83
+ for (unsigned i = 0; i < vs->conf.num_queues; i++) {
84
+ s->vq_aio_context[VIRTIO_SCSI_VQ_NUM_FIXED + i] = ctx;
85
}
86
}
87
-
88
- /*
89
- * Always handle the ctrl virtqueue in the main loop thread where device
90
- * resets can be performed.
91
- */
92
- s->vq_aio_context[0] = qemu_get_aio_context();
93
}
94
95
/* Context: BQL held */
38
--
96
--
39
1.8.3.1
97
2.48.1
40
41
diff view generated by jsdifflib