1
The following changes since commit 0ab4537f08e09b13788db67efd760592fb7db769:
1
The following changes since commit 7208429223963c405c62fa2611398f1aa8033593:
2
2
3
Merge remote-tracking branch 'remotes/stefanberger/tags/pull-tpm-2018-03-07-1' into staging (2018-03-08 12:56:39 +0000)
3
Merge tag 'mem-2022-10-28' of https://github.com/davidhildenbrand/qemu into staging (2022-10-30 18:31:59 -0400)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 4486e89c219c0d1b9bd8dfa0b1dd5b0d51ff2268:
9
for you to fetch changes up to 6c32fc0df9cd901add75618c831fb26a9eb742cb:
10
10
11
vl: introduce vm_shutdown() (2018-03-08 17:38:51 +0000)
11
block/blkio: Make driver nvme-io_uring take a "path" instead of a "filename" (2022-10-31 14:35:14 -0400)
12
13
----------------------------------------------------------------
14
Pull request
15
16
Note that we're still discussing "block/blkio: Make driver nvme-io_uring take a
17
"path" instead of a "filename"". I have sent the pull request now so everything
18
is ready for the soft freeze tomorrow if we decide to go ahead with the patch.
12
19
13
----------------------------------------------------------------
20
----------------------------------------------------------------
14
21
15
----------------------------------------------------------------
22
Alberto Faria (3):
23
block/blkio: Add virtio-blk-vfio-pci BlockDriver
24
block/blkio: Tolerate device size changes
25
block/blkio: Make driver nvme-io_uring take a "path" instead of a
26
"filename"
16
27
17
Deepa Srinivasan (1):
28
qapi/block-core.json | 22 +++++++++++++++++++--
18
block: Fix qemu crash when using scsi-block
29
block/blkio.c | 47 ++++++++++++++++++++++++++++++++++++++++----
19
30
2 files changed, 63 insertions(+), 6 deletions(-)
20
Fam Zheng (1):
21
README: Fix typo 'git-publish'
22
23
Sergio Lopez (1):
24
virtio-blk: dataplane: Don't batch notifications if EVENT_IDX is
25
present
26
27
Stefan Hajnoczi (4):
28
block: add aio_wait_bh_oneshot()
29
virtio-blk: fix race between .ioeventfd_stop() and vq handler
30
virtio-scsi: fix race between .ioeventfd_stop() and vq handler
31
vl: introduce vm_shutdown()
32
33
include/block/aio-wait.h | 13 +++++++++++
34
include/sysemu/iothread.h | 1 -
35
include/sysemu/sysemu.h | 1 +
36
block/block-backend.c | 51 ++++++++++++++++++++---------------------
37
cpus.c | 16 ++++++++++---
38
hw/block/dataplane/virtio-blk.c | 39 +++++++++++++++++++++++--------
39
hw/scsi/virtio-scsi-dataplane.c | 9 ++++----
40
iothread.c | 31 -------------------------
41
util/aio-wait.c | 31 +++++++++++++++++++++++++
42
vl.c | 13 +++--------
43
README | 2 +-
44
11 files changed, 122 insertions(+), 85 deletions(-)
45
31
46
--
32
--
47
2.14.3
33
2.38.1
48
49
diff view generated by jsdifflib
Deleted patch
1
From: Deepa Srinivasan <deepa.srinivasan@oracle.com>
2
1
3
Starting qemu with the following arguments causes qemu to segfault:
4
... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name=
5
iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1
6
7
This patch fixes blk_aio_ioctl() so it does not pass stack addresses to
8
blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More
9
details about the bug follow.
10
11
blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the
12
coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter().
13
14
When blk_aio_ioctl() is executed from within a coroutine context (e.g.
15
iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to
16
the current coroutine's wakeup queue. blk_aio_ioctl() then returns.
17
18
When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer:
19
....
20
BlkRwCo *rwco = &acb->rwco;
21
22
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
23
rwco->qiov->iov[0].iov_base); <--- qiov is
24
invalid here
25
...
26
27
In the case when blk_aio_ioctl() is called from a non-coroutine context,
28
blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls
29
qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine
30
execution is complete, control returns to blk_aio_ioctl_entry() after the call
31
to blk_co_ioctl(). There is no invalid reference after this point, but the
32
function is still holding on to invalid pointers.
33
34
The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer
35
rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the
36
coroutine function casts it to QEMUIOVector or uses the void pointer directly.
37
38
Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com>
39
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
40
Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
41
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
42
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
43
---
44
block/block-backend.c | 51 +++++++++++++++++++++++++--------------------------
45
1 file changed, 25 insertions(+), 26 deletions(-)
46
47
diff --git a/block/block-backend.c b/block/block-backend.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/block/block-backend.c
50
+++ b/block/block-backend.c
51
@@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
52
typedef struct BlkRwCo {
53
BlockBackend *blk;
54
int64_t offset;
55
- QEMUIOVector *qiov;
56
+ void *iobuf;
57
int ret;
58
BdrvRequestFlags flags;
59
} BlkRwCo;
60
@@ -XXX,XX +XXX,XX @@ typedef struct BlkRwCo {
61
static void blk_read_entry(void *opaque)
62
{
63
BlkRwCo *rwco = opaque;
64
+ QEMUIOVector *qiov = rwco->iobuf;
65
66
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
67
- rwco->qiov, rwco->flags);
68
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
69
+ qiov, rwco->flags);
70
}
71
72
static void blk_write_entry(void *opaque)
73
{
74
BlkRwCo *rwco = opaque;
75
+ QEMUIOVector *qiov = rwco->iobuf;
76
77
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
78
- rwco->qiov, rwco->flags);
79
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
80
+ qiov, rwco->flags);
81
}
82
83
static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
84
@@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
85
rwco = (BlkRwCo) {
86
.blk = blk,
87
.offset = offset,
88
- .qiov = &qiov,
89
+ .iobuf = &qiov,
90
.flags = flags,
91
.ret = NOT_DONE,
92
};
93
@@ -XXX,XX +XXX,XX @@ static void blk_aio_complete_bh(void *opaque)
94
}
95
96
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
97
- QEMUIOVector *qiov, CoroutineEntry co_entry,
98
+ void *iobuf, CoroutineEntry co_entry,
99
BdrvRequestFlags flags,
100
BlockCompletionFunc *cb, void *opaque)
101
{
102
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
103
acb->rwco = (BlkRwCo) {
104
.blk = blk,
105
.offset = offset,
106
- .qiov = qiov,
107
+ .iobuf = iobuf,
108
.flags = flags,
109
.ret = NOT_DONE,
110
};
111
@@ -XXX,XX +XXX,XX @@ static void blk_aio_read_entry(void *opaque)
112
{
113
BlkAioEmAIOCB *acb = opaque;
114
BlkRwCo *rwco = &acb->rwco;
115
+ QEMUIOVector *qiov = rwco->iobuf;
116
117
- assert(rwco->qiov->size == acb->bytes);
118
+ assert(qiov->size == acb->bytes);
119
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
120
- rwco->qiov, rwco->flags);
121
+ qiov, rwco->flags);
122
blk_aio_complete(acb);
123
}
124
125
@@ -XXX,XX +XXX,XX @@ static void blk_aio_write_entry(void *opaque)
126
{
127
BlkAioEmAIOCB *acb = opaque;
128
BlkRwCo *rwco = &acb->rwco;
129
+ QEMUIOVector *qiov = rwco->iobuf;
130
131
- assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
132
+ assert(!qiov || qiov->size == acb->bytes);
133
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
134
- rwco->qiov, rwco->flags);
135
+ qiov, rwco->flags);
136
blk_aio_complete(acb);
137
}
138
139
@@ -XXX,XX +XXX,XX @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
140
static void blk_ioctl_entry(void *opaque)
141
{
142
BlkRwCo *rwco = opaque;
143
+ QEMUIOVector *qiov = rwco->iobuf;
144
+
145
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
146
- rwco->qiov->iov[0].iov_base);
147
+ qiov->iov[0].iov_base);
148
}
149
150
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
151
@@ -XXX,XX +XXX,XX @@ static void blk_aio_ioctl_entry(void *opaque)
152
BlkAioEmAIOCB *acb = opaque;
153
BlkRwCo *rwco = &acb->rwco;
154
155
- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
156
- rwco->qiov->iov[0].iov_base);
157
+ rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
158
+
159
blk_aio_complete(acb);
160
}
161
162
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
163
BlockCompletionFunc *cb, void *opaque)
164
{
165
- QEMUIOVector qiov;
166
- struct iovec iov;
167
-
168
- iov = (struct iovec) {
169
- .iov_base = buf,
170
- .iov_len = 0,
171
- };
172
- qemu_iovec_init_external(&qiov, &iov, 1);
173
-
174
- return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
175
+ return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
176
}
177
178
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
179
@@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
180
static void blk_pdiscard_entry(void *opaque)
181
{
182
BlkRwCo *rwco = opaque;
183
- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
184
+ QEMUIOVector *qiov = rwco->iobuf;
185
+
186
+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
187
}
188
189
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
190
--
191
2.14.3
192
193
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
Reported-by: Alberto Garcia <berto@igalia.com>
4
Signed-off-by: Fam Zheng <famz@redhat.com>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Message-id: 20180306024328.19195-1-famz@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
README | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
12
diff --git a/README b/README
13
index XXXXXXX..XXXXXXX 100644
14
--- a/README
15
+++ b/README
16
@@ -XXX,XX +XXX,XX @@ The QEMU website is also maintained under source control.
17
git clone git://git.qemu.org/qemu-web.git
18
https://www.qemu.org/2017/02/04/the-new-qemu-website-is-up/
19
20
-A 'git-profile' utility was created to make above process less
21
+A 'git-publish' utility was created to make above process less
22
cumbersome, and is highly recommended for making regular contributions,
23
or even just for sending consecutive patch series revisions. It also
24
requires a working 'git send-email' setup, and by default doesn't
25
--
26
2.14.3
27
28
diff view generated by jsdifflib
1
Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before
1
From: Alberto Faria <afaria@redhat.com>
2
stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa
3
("iothread: Stop threads before main() quits") tried to work around the
4
fact that emulation was still active during termination by stopping
5
iothreads. They suffer from race conditions:
6
1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the
7
virtio_scsi_ctx_check() assertion failure because the BDS AioContext
8
has been modified by iothread_stop_all().
9
2. Guest vq kick racing with main loop termination leaves a readable
10
ioeventfd that is handled by the next aio_poll() when external
11
clients are enabled again, resulting in unwanted emulation activity.
12
2
13
This patch obsoletes those commits by fully disabling emulation activity
3
libblkio 1.1.0 [1] introduces a virtio-blk-vfio-pci driver, which
14
when vcpus are stopped.
4
accesses a virtio-blk PCI device using VFIO. Add a corresponding
5
BlockDriver.
15
6
16
Use the new vm_shutdown() function instead of pause_all_vcpus() so that
7
[1] https://gitlab.com/libblkio/libblkio/-/tree/v1.1.0
17
vm change state handlers are invoked too. Virtio devices will now stop
18
their ioeventfds, preventing further emulation activity after vm_stop().
19
8
20
Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a
9
Signed-off-by: Alberto Faria <afaria@redhat.com>
21
QMP STOP event that may affect existing clients.
10
Message-id: 20221028131635.710267-1-afaria@redhat.com
22
23
It is no longer necessary to call replay_disable_events() directly since
24
vm_shutdown() does so already.
25
26
Drop iothread_stop_all() since it is no longer used.
27
28
Cc: Fam Zheng <famz@redhat.com>
29
Cc: Kevin Wolf <kwolf@redhat.com>
30
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
31
Reviewed-by: Fam Zheng <famz@redhat.com>
32
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
33
Message-id: 20180307144205.20619-5-stefanha@redhat.com
34
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
35
---
12
---
36
include/sysemu/iothread.h | 1 -
13
qapi/block-core.json | 18 ++++++++++++++++++
37
include/sysemu/sysemu.h | 1 +
14
block/blkio.c | 8 ++++++++
38
cpus.c | 16 +++++++++++++---
15
2 files changed, 26 insertions(+)
39
iothread.c | 31 -------------------------------
40
vl.c | 13 +++----------
41
5 files changed, 17 insertions(+), 45 deletions(-)
42
16
43
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
17
diff --git a/qapi/block-core.json b/qapi/block-core.json
44
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
45
--- a/include/sysemu/iothread.h
19
--- a/qapi/block-core.json
46
+++ b/include/sysemu/iothread.h
20
+++ b/qapi/block-core.json
47
@@ -XXX,XX +XXX,XX @@ typedef struct {
21
@@ -XXX,XX +XXX,XX @@
48
char *iothread_get_id(IOThread *iothread);
22
'raw', 'rbd',
49
IOThread *iothread_by_id(const char *id);
23
{ 'name': 'replication', 'if': 'CONFIG_REPLICATION' },
50
AioContext *iothread_get_aio_context(IOThread *iothread);
24
'ssh', 'throttle', 'vdi', 'vhdx',
51
-void iothread_stop_all(void);
25
+ { 'name': 'virtio-blk-vfio-pci', 'if': 'CONFIG_BLKIO' },
52
GMainContext *iothread_get_g_main_context(IOThread *iothread);
26
{ 'name': 'virtio-blk-vhost-user', 'if': 'CONFIG_BLKIO' },
53
27
{ 'name': 'virtio-blk-vhost-vdpa', 'if': 'CONFIG_BLKIO' },
54
/*
28
'vmdk', 'vpc', 'vvfat' ] }
55
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
29
@@ -XXX,XX +XXX,XX @@
30
'data': { 'filename': 'str' },
31
'if': 'CONFIG_BLKIO' }
32
33
+##
34
+# @BlockdevOptionsVirtioBlkVfioPci:
35
+#
36
+# Driver specific block device options for the virtio-blk-vfio-pci backend.
37
+#
38
+# @path: path to the PCI device's sysfs directory (e.g.
39
+# /sys/bus/pci/devices/0000:00:01.0).
40
+#
41
+# Since: 7.2
42
+##
43
+{ 'struct': 'BlockdevOptionsVirtioBlkVfioPci',
44
+ 'data': { 'path': 'str' },
45
+ 'if': 'CONFIG_BLKIO' }
46
+
47
##
48
# @BlockdevOptionsVirtioBlkVhostUser:
49
#
50
@@ -XXX,XX +XXX,XX @@
51
'throttle': 'BlockdevOptionsThrottle',
52
'vdi': 'BlockdevOptionsGenericFormat',
53
'vhdx': 'BlockdevOptionsGenericFormat',
54
+ 'virtio-blk-vfio-pci':
55
+ { 'type': 'BlockdevOptionsVirtioBlkVfioPci',
56
+ 'if': 'CONFIG_BLKIO' },
57
'virtio-blk-vhost-user':
58
{ 'type': 'BlockdevOptionsVirtioBlkVhostUser',
59
'if': 'CONFIG_BLKIO' },
60
diff --git a/block/blkio.c b/block/blkio.c
56
index XXXXXXX..XXXXXXX 100644
61
index XXXXXXX..XXXXXXX 100644
57
--- a/include/sysemu/sysemu.h
62
--- a/block/blkio.c
58
+++ b/include/sysemu/sysemu.h
63
+++ b/block/blkio.c
59
@@ -XXX,XX +XXX,XX @@ void vm_start(void);
64
@@ -XXX,XX +XXX,XX @@
60
int vm_prepare_start(void);
65
*/
61
int vm_stop(RunState state);
66
#define DRIVER_IO_URING "io_uring"
62
int vm_stop_force_state(RunState state);
67
#define DRIVER_NVME_IO_URING "nvme-io_uring"
63
+int vm_shutdown(void);
68
+#define DRIVER_VIRTIO_BLK_VFIO_PCI "virtio-blk-vfio-pci"
64
69
#define DRIVER_VIRTIO_BLK_VHOST_USER "virtio-blk-vhost-user"
65
typedef enum WakeupReason {
70
#define DRIVER_VIRTIO_BLK_VHOST_VDPA "virtio-blk-vhost-vdpa"
66
/* Always keep QEMU_WAKEUP_REASON_NONE = 0 */
71
67
diff --git a/cpus.c b/cpus.c
72
@@ -XXX,XX +XXX,XX @@ static int blkio_file_open(BlockDriverState *bs, QDict *options, int flags,
68
index XXXXXXX..XXXXXXX 100644
73
ret = blkio_io_uring_open(bs, options, flags, errp);
69
--- a/cpus.c
74
} else if (strcmp(blkio_driver, DRIVER_NVME_IO_URING) == 0) {
70
+++ b/cpus.c
75
ret = blkio_nvme_io_uring(bs, options, flags, errp);
71
@@ -XXX,XX +XXX,XX @@ void cpu_synchronize_all_pre_loadvm(void)
76
+ } else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VFIO_PCI) == 0) {
72
}
77
+ ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
78
} else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_USER) == 0) {
79
ret = blkio_virtio_blk_common_open(bs, options, flags, errp);
80
} else if (strcmp(blkio_driver, DRIVER_VIRTIO_BLK_VHOST_VDPA) == 0) {
81
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
82
.bdrv_needs_filename = true,
83
);
84
85
+static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER(
86
+ DRIVER_VIRTIO_BLK_VFIO_PCI
87
+);
88
+
89
static BlockDriver bdrv_virtio_blk_vhost_user = BLKIO_DRIVER(
90
DRIVER_VIRTIO_BLK_VHOST_USER
91
);
92
@@ -XXX,XX +XXX,XX @@ static void bdrv_blkio_init(void)
93
{
94
bdrv_register(&bdrv_io_uring);
95
bdrv_register(&bdrv_nvme_io_uring);
96
+ bdrv_register(&bdrv_virtio_blk_vfio_pci);
97
bdrv_register(&bdrv_virtio_blk_vhost_user);
98
bdrv_register(&bdrv_virtio_blk_vhost_vdpa);
73
}
99
}
74
75
-static int do_vm_stop(RunState state)
76
+static int do_vm_stop(RunState state, bool send_stop)
77
{
78
int ret = 0;
79
80
@@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state)
81
pause_all_vcpus();
82
runstate_set(state);
83
vm_state_notify(0, state);
84
- qapi_event_send_stop(&error_abort);
85
+ if (send_stop) {
86
+ qapi_event_send_stop(&error_abort);
87
+ }
88
}
89
90
bdrv_drain_all();
91
@@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state)
92
return ret;
93
}
94
95
+/* Special vm_stop() variant for terminating the process. Historically clients
96
+ * did not expect a QMP STOP event and so we need to retain compatibility.
97
+ */
98
+int vm_shutdown(void)
99
+{
100
+ return do_vm_stop(RUN_STATE_SHUTDOWN, false);
101
+}
102
+
103
static bool cpu_can_run(CPUState *cpu)
104
{
105
if (cpu->stop) {
106
@@ -XXX,XX +XXX,XX @@ int vm_stop(RunState state)
107
return 0;
108
}
109
110
- return do_vm_stop(state);
111
+ return do_vm_stop(state, true);
112
}
113
114
/**
115
diff --git a/iothread.c b/iothread.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/iothread.c
118
+++ b/iothread.c
119
@@ -XXX,XX +XXX,XX @@ void iothread_stop(IOThread *iothread)
120
qemu_thread_join(&iothread->thread);
121
}
122
123
-static int iothread_stop_iter(Object *object, void *opaque)
124
-{
125
- IOThread *iothread;
126
-
127
- iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD);
128
- if (!iothread) {
129
- return 0;
130
- }
131
- iothread_stop(iothread);
132
- return 0;
133
-}
134
-
135
static void iothread_instance_init(Object *obj)
136
{
137
IOThread *iothread = IOTHREAD(obj);
138
@@ -XXX,XX +XXX,XX @@ IOThreadInfoList *qmp_query_iothreads(Error **errp)
139
return head;
140
}
141
142
-void iothread_stop_all(void)
143
-{
144
- Object *container = object_get_objects_root();
145
- BlockDriverState *bs;
146
- BdrvNextIterator it;
147
-
148
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
149
- AioContext *ctx = bdrv_get_aio_context(bs);
150
- if (ctx == qemu_get_aio_context()) {
151
- continue;
152
- }
153
- aio_context_acquire(ctx);
154
- bdrv_set_aio_context(bs, qemu_get_aio_context());
155
- aio_context_release(ctx);
156
- }
157
-
158
- object_child_foreach(container, iothread_stop_iter, NULL);
159
-}
160
-
161
static gpointer iothread_g_main_context_init(gpointer opaque)
162
{
163
AioContext *ctx;
164
diff --git a/vl.c b/vl.c
165
index XXXXXXX..XXXXXXX 100644
166
--- a/vl.c
167
+++ b/vl.c
168
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
169
os_setup_post();
170
171
main_loop();
172
- replay_disable_events();
173
174
- /* The ordering of the following is delicate. Stop vcpus to prevent new
175
- * I/O requests being queued by the guest. Then stop IOThreads (this
176
- * includes a drain operation and completes all request processing). At
177
- * this point emulated devices are still associated with their IOThreads
178
- * (if any) but no longer have any work to do. Only then can we close
179
- * block devices safely because we know there is no more I/O coming.
180
- */
181
- pause_all_vcpus();
182
- iothread_stop_all();
183
+ /* No more vcpu or device emulation activity beyond this point */
184
+ vm_shutdown();
185
+
186
bdrv_close_all();
187
188
res_free();
189
--
100
--
190
2.14.3
101
2.38.1
191
192
diff view generated by jsdifflib
1
If the main loop thread invokes .ioeventfd_stop() just as the vq handler
1
From: Alberto Faria <afaria@redhat.com>
2
function begins in the IOThread then the handler may lose the race for
3
the AioContext lock. By the time the vq handler is able to acquire the
4
AioContext lock the ioeventfd has already been removed and the handler
5
isn't supposed to run anymore!
6
2
7
Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal
3
Some libblkio drivers may be able to work with regular files (e.g.,
8
from within the IOThread. This way no races with the vq handler are
4
io_uring) or otherwise resizable devices. Conservatively set
9
possible.
5
BlockDriver::has_variable_length to true to ensure bdrv_nb_sectors()
6
always gives up-to-date results.
10
7
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Also implement BlockDriver::bdrv_co_truncate for the case where no
12
Reviewed-by: Fam Zheng <famz@redhat.com>
9
preallocation is needed and the device already has a size compatible
13
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
10
with what was requested.
14
Message-id: 20180307144205.20619-3-stefanha@redhat.com
11
12
Signed-off-by: Alberto Faria <afaria@redhat.com>
13
Message-id: 20221029122031.975273-1-afaria@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
15
---
17
hw/block/dataplane/virtio-blk.c | 24 +++++++++++++++++-------
16
block/blkio.c | 27 +++++++++++++++++++++++++++
18
1 file changed, 17 insertions(+), 7 deletions(-)
17
1 file changed, 27 insertions(+)
19
18
20
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
19
diff --git a/block/blkio.c b/block/blkio.c
21
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/block/dataplane/virtio-blk.c
21
--- a/block/blkio.c
23
+++ b/hw/block/dataplane/virtio-blk.c
22
+++ b/block/blkio.c
24
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
23
@@ -XXX,XX +XXX,XX @@ static int64_t blkio_getlength(BlockDriverState *bs)
25
return -ENOSYS;
24
return capacity;
26
}
25
}
27
26
28
+/* Stop notifications for new requests from guest.
27
+static int coroutine_fn blkio_truncate(BlockDriverState *bs, int64_t offset,
29
+ *
28
+ bool exact, PreallocMode prealloc,
30
+ * Context: BH in IOThread
29
+ BdrvRequestFlags flags, Error **errp)
31
+ */
32
+static void virtio_blk_data_plane_stop_bh(void *opaque)
33
+{
30
+{
34
+ VirtIOBlockDataPlane *s = opaque;
31
+ int64_t current_length;
35
+ unsigned i;
36
+
32
+
37
+ for (i = 0; i < s->conf->num_queues; i++) {
33
+ if (prealloc != PREALLOC_MODE_OFF) {
38
+ VirtQueue *vq = virtio_get_queue(s->vdev, i);
34
+ error_setg(errp, "Unsupported preallocation mode '%s'",
35
+ PreallocMode_str(prealloc));
36
+ return -ENOTSUP;
37
+ }
39
+
38
+
40
+ virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL);
39
+ current_length = blkio_getlength(bs);
40
+
41
+ if (offset > current_length) {
42
+ error_setg(errp, "Cannot grow device");
43
+ return -EINVAL;
44
+ } else if (exact && offset != current_length) {
45
+ error_setg(errp, "Cannot resize device");
46
+ return -ENOTSUP;
41
+ }
47
+ }
48
+
49
+ return 0;
42
+}
50
+}
43
+
51
+
44
/* Context: QEMU global mutex held */
52
static int blkio_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
45
void virtio_blk_data_plane_stop(VirtIODevice *vdev)
46
{
53
{
47
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
54
return 0;
48
trace_virtio_blk_data_plane_stop(s);
55
@@ -XXX,XX +XXX,XX @@ static void blkio_refresh_limits(BlockDriverState *bs, Error **errp)
49
56
{ \
50
aio_context_acquire(s->ctx);
57
.format_name = name, \
51
-
58
.protocol_name = name, \
52
- /* Stop notifications for new requests from guest */
59
+ .has_variable_length = true, \
53
- for (i = 0; i < nvqs; i++) {
60
.instance_size = sizeof(BDRVBlkioState), \
54
- VirtQueue *vq = virtio_get_queue(s->vdev, i);
61
.bdrv_file_open = blkio_file_open, \
55
-
62
.bdrv_close = blkio_close, \
56
- virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL);
63
.bdrv_getlength = blkio_getlength, \
57
- }
64
+ .bdrv_co_truncate = blkio_truncate, \
58
+ aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
65
.bdrv_get_info = blkio_get_info, \
59
66
.bdrv_attach_aio_context = blkio_attach_aio_context, \
60
/* Drain and switch bs back to the QEMU main loop */
67
.bdrv_detach_aio_context = blkio_detach_aio_context, \
61
blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());
62
--
68
--
63
2.14.3
69
2.38.1
64
65
diff view generated by jsdifflib
1
From: Sergio Lopez <slp@redhat.com>
1
From: Alberto Faria <afaria@redhat.com>
2
2
3
Commit 5b2ffbe4d99843fd8305c573a100047a8c962327 ("virtio-blk: dataplane:
3
The nvme-io_uring driver expects a character special file such as
4
notify guest as a batch") deferred guest notification to a BH in order
4
/dev/ng0n1. Follow the convention of having a "filename" option when a
5
batch notifications, with purpose of avoiding flooding the guest with
5
regular file is expected, and a "path" option otherwise.
6
interruptions.
7
6
8
This optimization came with a cost. The average latency perceived in the
7
This makes io_uring the only libblkio-based driver with a "filename"
9
guest is increased by a few microseconds, but also when multiple IO
8
option, as it accepts a regular file (even though it can also take a
10
operations finish at the same time, the guest won't be notified until
9
block special file).
11
all completions from each operation has been run. On the contrary,
12
virtio-scsi issues the notification at the end of each completion.
13
10
14
On the other hand, nowadays we have the EVENT_IDX feature that allows a
11
Signed-off-by: Alberto Faria <afaria@redhat.com>
15
better coordination between QEMU and the Guest OS to avoid sending
12
Message-id: 20221028233854.839933-1-afaria@redhat.com
16
unnecessary interruptions.
17
18
With this change, virtio-blk/dataplane only batches notifications if the
19
EVENT_IDX feature is not present.
20
21
Some numbers obtained with fio (ioengine=sync, iodepth=1, direct=1):
22
- Test specs:
23
* fio-3.4 (ioengine=sync, iodepth=1, direct=1)
24
* qemu master
25
* virtio-blk with a dedicated iothread (default poll-max-ns)
26
* backend: null_blk nr_devices=1 irqmode=2 completion_nsec=280000
27
* 8 vCPUs pinned to isolated physical cores
28
* Emulator and iothread also pinned to separate isolated cores
29
* variance between runs < 1%
30
31
- Not patched
32
* numjobs=1: lat_avg=327.32 irqs=29998
33
* numjobs=4: lat_avg=337.89 irqs=29073
34
* numjobs=8: lat_avg=342.98 irqs=28643
35
36
- Patched:
37
* numjobs=1: lat_avg=323.92 irqs=30262
38
* numjobs=4: lat_avg=332.65 irqs=29520
39
* numjobs=8: lat_avg=335.54 irqs=29323
40
41
Signed-off-by: Sergio Lopez <slp@redhat.com>
42
Message-id: 20180307114459.26636-1-slp@redhat.com
43
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
44
---
14
---
45
hw/block/dataplane/virtio-blk.c | 15 +++++++++++++--
15
qapi/block-core.json | 4 ++--
46
1 file changed, 13 insertions(+), 2 deletions(-)
16
block/blkio.c | 12 ++++++++----
17
2 files changed, 10 insertions(+), 6 deletions(-)
47
18
48
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
19
diff --git a/qapi/block-core.json b/qapi/block-core.json
49
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
50
--- a/hw/block/dataplane/virtio-blk.c
21
--- a/qapi/block-core.json
51
+++ b/hw/block/dataplane/virtio-blk.c
22
+++ b/qapi/block-core.json
52
@@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane {
23
@@ -XXX,XX +XXX,XX @@
53
VirtIODevice *vdev;
24
#
54
QEMUBH *bh; /* bh for guest notification */
25
# Driver specific block device options for the nvme-io_uring backend.
55
unsigned long *batch_notify_vqs;
26
#
56
+ bool batch_notifications;
27
-# @filename: path to the image file
57
28
+# @path: path to the image file
58
/* Note that these EventNotifiers are assigned by value. This is
29
#
59
* fine as long as you do not call event_notifier_cleanup on them
30
# Since: 7.2
60
@@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane {
31
##
61
/* Raise an interrupt to signal guest, if necessary */
32
{ 'struct': 'BlockdevOptionsNvmeIoUring',
62
void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
33
- 'data': { 'filename': 'str' },
34
+ 'data': { 'path': 'str' },
35
'if': 'CONFIG_BLKIO' }
36
37
##
38
diff --git a/block/blkio.c b/block/blkio.c
39
index XXXXXXX..XXXXXXX 100644
40
--- a/block/blkio.c
41
+++ b/block/blkio.c
42
@@ -XXX,XX +XXX,XX @@ static int blkio_io_uring_open(BlockDriverState *bs, QDict *options, int flags,
43
static int blkio_nvme_io_uring(BlockDriverState *bs, QDict *options, int flags,
44
Error **errp)
63
{
45
{
64
- set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
46
- const char *filename = qdict_get_str(options, "filename");
65
- qemu_bh_schedule(s->bh);
47
+ const char *path = qdict_get_try_str(options, "path");
66
+ if (s->batch_notifications) {
48
BDRVBlkioState *s = bs->opaque;
67
+ set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
49
int ret;
68
+ qemu_bh_schedule(s->bh);
50
69
+ } else {
51
- ret = blkio_set_str(s->blkio, "path", filename);
70
+ virtio_notify_irqfd(s->vdev, vq);
52
- qdict_del(options, "filename");
71
+ }
53
+ if (!path) {
72
}
54
+ error_setg(errp, "missing 'path' option");
73
55
+ return -EINVAL;
74
static void notify_guest_bh(void *opaque)
75
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
76
77
s->starting = true;
78
79
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
80
+ s->batch_notifications = true;
81
+ } else {
82
+ s->batch_notifications = false;
83
+ }
56
+ }
84
+
57
+
85
/* Set up guest notifier (irq) */
58
+ ret = blkio_set_str(s->blkio, "path", path);
86
r = k->set_guest_notifiers(qbus->parent, nvqs, true);
59
+ qdict_del(options, "path");
87
if (r != 0) {
60
if (ret < 0) {
61
error_setg_errno(errp, -ret, "failed to set path: %s",
62
blkio_get_error_msg());
63
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_io_uring = BLKIO_DRIVER(
64
65
static BlockDriver bdrv_nvme_io_uring = BLKIO_DRIVER(
66
DRIVER_NVME_IO_URING,
67
- .bdrv_needs_filename = true,
68
);
69
70
static BlockDriver bdrv_virtio_blk_vfio_pci = BLKIO_DRIVER(
88
--
71
--
89
2.14.3
72
2.38.1
90
91
diff view generated by jsdifflib
Deleted patch
1
Sometimes it's necessary for the main loop thread to run a BH in an
2
IOThread and wait for its completion. This primitive is useful during
3
startup/shutdown to synchronize and avoid race conditions.
4
1
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Fam Zheng <famz@redhat.com>
7
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
8
Message-id: 20180307144205.20619-2-stefanha@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
include/block/aio-wait.h | 13 +++++++++++++
12
util/aio-wait.c | 31 +++++++++++++++++++++++++++++++
13
2 files changed, 44 insertions(+)
14
15
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/block/aio-wait.h
18
+++ b/include/block/aio-wait.h
19
@@ -XXX,XX +XXX,XX @@ typedef struct {
20
*/
21
void aio_wait_kick(AioWait *wait);
22
23
+/**
24
+ * aio_wait_bh_oneshot:
25
+ * @ctx: the aio context
26
+ * @cb: the BH callback function
27
+ * @opaque: user data for the BH callback function
28
+ *
29
+ * Run a BH in @ctx and wait for it to complete.
30
+ *
31
+ * Must be called from the main loop thread with @ctx acquired exactly once.
32
+ * Note that main loop event processing may occur.
33
+ */
34
+void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
35
+
36
#endif /* QEMU_AIO_WAIT */
37
diff --git a/util/aio-wait.c b/util/aio-wait.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/util/aio-wait.c
40
+++ b/util/aio-wait.c
41
@@ -XXX,XX +XXX,XX @@ void aio_wait_kick(AioWait *wait)
42
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
43
}
44
}
45
+
46
+typedef struct {
47
+ AioWait wait;
48
+ bool done;
49
+ QEMUBHFunc *cb;
50
+ void *opaque;
51
+} AioWaitBHData;
52
+
53
+/* Context: BH in IOThread */
54
+static void aio_wait_bh(void *opaque)
55
+{
56
+ AioWaitBHData *data = opaque;
57
+
58
+ data->cb(data->opaque);
59
+
60
+ data->done = true;
61
+ aio_wait_kick(&data->wait);
62
+}
63
+
64
+void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
65
+{
66
+ AioWaitBHData data = {
67
+ .cb = cb,
68
+ .opaque = opaque,
69
+ };
70
+
71
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
72
+
73
+ aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data);
74
+ AIO_WAIT_WHILE(&data.wait, ctx, !data.done);
75
+}
76
--
77
2.14.3
78
79
diff view generated by jsdifflib
Deleted patch
1
If the main loop thread invokes .ioeventfd_stop() just as the vq handler
2
function begins in the IOThread then the handler may lose the race for
3
the AioContext lock. By the time the vq handler is able to acquire the
4
AioContext lock the ioeventfd has already been removed and the handler
5
isn't supposed to run anymore!
6
1
7
Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal
8
from within the IOThread. This way no races with the vq handler are
9
possible.
10
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
13
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
14
Message-id: 20180307144205.20619-4-stefanha@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
hw/scsi/virtio-scsi-dataplane.c | 9 +++++----
18
1 file changed, 5 insertions(+), 4 deletions(-)
19
20
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/scsi/virtio-scsi-dataplane.c
23
+++ b/hw/scsi/virtio-scsi-dataplane.c
24
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
25
return 0;
26
}
27
28
-/* assumes s->ctx held */
29
-static void virtio_scsi_clear_aio(VirtIOSCSI *s)
30
+/* Context: BH in IOThread */
31
+static void virtio_scsi_dataplane_stop_bh(void *opaque)
32
{
33
+ VirtIOSCSI *s = opaque;
34
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
35
int i;
36
37
@@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
38
return 0;
39
40
fail_vrings:
41
- virtio_scsi_clear_aio(s);
42
+ aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
43
aio_context_release(s->ctx);
44
for (i = 0; i < vs->conf.num_queues + 2; i++) {
45
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
46
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
47
s->dataplane_stopping = true;
48
49
aio_context_acquire(s->ctx);
50
- virtio_scsi_clear_aio(s);
51
+ aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
52
aio_context_release(s->ctx);
53
54
blk_drain_all(); /* ensure there are no in-flight requests */
55
--
56
2.14.3
57
58
diff view generated by jsdifflib