1
The following changes since commit 0ab4537f08e09b13788db67efd760592fb7db769:
1
The following changes since commit 711c0418c8c1ce3a24346f058b001c4c5a2f0f81:
2
2
3
Merge remote-tracking branch 'remotes/stefanberger/tags/pull-tpm-2018-03-07-1' into staging (2018-03-08 12:56:39 +0000)
3
Merge remote-tracking branch 'remotes/philmd/tags/mips-20210702' into staging (2021-07-04 14:04:12 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
git://github.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 4486e89c219c0d1b9bd8dfa0b1dd5b0d51ff2268:
9
for you to fetch changes up to 9f460c64e13897117f35ffb61f6f5e0102cabc70:
10
10
11
vl: introduce vm_shutdown() (2018-03-08 17:38:51 +0000)
11
block/io: Merge discard request alignments (2021-07-06 14:28:55 +0100)
12
13
----------------------------------------------------------------
14
Pull request
12
15
13
----------------------------------------------------------------
16
----------------------------------------------------------------
14
17
15
----------------------------------------------------------------
18
Akihiko Odaki (3):
19
block/file-posix: Optimize for macOS
20
block: Add backend_defaults property
21
block/io: Merge discard request alignments
16
22
17
Deepa Srinivasan (1):
23
Stefan Hajnoczi (2):
18
block: Fix qemu crash when using scsi-block
24
util/async: add a human-readable name to BHs for debugging
25
util/async: print leaked BH name when AioContext finalizes
19
26
20
Fam Zheng (1):
27
include/block/aio.h | 31 ++++++++++++++++++++++---
21
README: Fix typo 'git-publish'
28
include/hw/block/block.h | 3 +++
22
29
include/qemu/main-loop.h | 4 +++-
23
Sergio Lopez (1):
30
block/file-posix.c | 27 ++++++++++++++++++++--
24
virtio-blk: dataplane: Don't batch notifications if EVENT_IDX is
31
block/io.c | 2 ++
25
present
32
hw/block/block.c | 42 ++++++++++++++++++++++++++++++----
26
33
tests/unit/ptimer-test-stubs.c | 2 +-
27
Stefan Hajnoczi (4):
34
util/async.c | 25 ++++++++++++++++----
28
block: add aio_wait_bh_oneshot()
35
util/main-loop.c | 4 ++--
29
virtio-blk: fix race between .ioeventfd_stop() and vq handler
36
tests/qemu-iotests/172.out | 38 ++++++++++++++++++++++++++++++
30
virtio-scsi: fix race between .ioeventfd_stop() and vq handler
37
10 files changed, 161 insertions(+), 17 deletions(-)
31
vl: introduce vm_shutdown()
32
33
include/block/aio-wait.h | 13 +++++++++++
34
include/sysemu/iothread.h | 1 -
35
include/sysemu/sysemu.h | 1 +
36
block/block-backend.c | 51 ++++++++++++++++++++---------------------
37
cpus.c | 16 ++++++++++---
38
hw/block/dataplane/virtio-blk.c | 39 +++++++++++++++++++++++--------
39
hw/scsi/virtio-scsi-dataplane.c | 9 ++++----
40
iothread.c | 31 -------------------------
41
util/aio-wait.c | 31 +++++++++++++++++++++++++
42
vl.c | 13 +++--------
43
README | 2 +-
44
11 files changed, 122 insertions(+), 85 deletions(-)
45
38
46
--
39
--
47
2.14.3
40
2.31.1
48
41
49
diff view generated by jsdifflib
Deleted patch
1
From: Deepa Srinivasan <deepa.srinivasan@oracle.com>
2
1
3
Starting qemu with the following arguments causes qemu to segfault:
4
... -device lsi,id=lsi0 -drive file=iscsi:<...>,format=raw,if=none,node-name=
5
iscsi1 -device scsi-block,bus=lsi0.0,id=<...>,drive=iscsi1
6
7
This patch fixes blk_aio_ioctl() so it does not pass stack addresses to
8
blk_aio_ioctl_entry() which may be invoked after blk_aio_ioctl() returns. More
9
details about the bug follow.
10
11
blk_aio_ioctl() invokes blk_aio_prwv() with blk_aio_ioctl_entry as the
12
coroutine parameter. blk_aio_prwv() ultimately calls aio_co_enter().
13
14
When blk_aio_ioctl() is executed from within a coroutine context (e.g.
15
iscsi_bh_cb()), aio_co_enter() adds the coroutine (blk_aio_ioctl_entry) to
16
the current coroutine's wakeup queue. blk_aio_ioctl() then returns.
17
18
When blk_aio_ioctl_entry() executes later, it accesses an invalid pointer:
19
....
20
BlkRwCo *rwco = &acb->rwco;
21
22
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
23
rwco->qiov->iov[0].iov_base); <--- qiov is
24
invalid here
25
...
26
27
In the case when blk_aio_ioctl() is called from a non-coroutine context,
28
blk_aio_ioctl_entry() executes immediately. But if bdrv_co_ioctl() calls
29
qemu_coroutine_yield(), blk_aio_ioctl() will return. When the coroutine
30
execution is complete, control returns to blk_aio_ioctl_entry() after the call
31
to blk_co_ioctl(). There is no invalid reference after this point, but the
32
function is still holding on to invalid pointers.
33
34
The fix is to change blk_aio_prwv() to accept a void pointer for the IO buffer
35
rather than a QEMUIOVector. blk_aio_prwv() passes this through in BlkRwCo and the
36
coroutine function casts it to QEMUIOVector or uses the void pointer directly.
37
38
Signed-off-by: Deepa Srinivasan <deepa.srinivasan@oracle.com>
39
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
40
Reviewed-by: Mark Kanda <mark.kanda@oracle.com>
41
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
42
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
43
---
44
block/block-backend.c | 51 +++++++++++++++++++++++++--------------------------
45
1 file changed, 25 insertions(+), 26 deletions(-)
46
47
diff --git a/block/block-backend.c b/block/block-backend.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/block/block-backend.c
50
+++ b/block/block-backend.c
51
@@ -XXX,XX +XXX,XX @@ int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
52
typedef struct BlkRwCo {
53
BlockBackend *blk;
54
int64_t offset;
55
- QEMUIOVector *qiov;
56
+ void *iobuf;
57
int ret;
58
BdrvRequestFlags flags;
59
} BlkRwCo;
60
@@ -XXX,XX +XXX,XX @@ typedef struct BlkRwCo {
61
static void blk_read_entry(void *opaque)
62
{
63
BlkRwCo *rwco = opaque;
64
+ QEMUIOVector *qiov = rwco->iobuf;
65
66
- rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, rwco->qiov->size,
67
- rwco->qiov, rwco->flags);
68
+ rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, qiov->size,
69
+ qiov, rwco->flags);
70
}
71
72
static void blk_write_entry(void *opaque)
73
{
74
BlkRwCo *rwco = opaque;
75
+ QEMUIOVector *qiov = rwco->iobuf;
76
77
- rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, rwco->qiov->size,
78
- rwco->qiov, rwco->flags);
79
+ rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, qiov->size,
80
+ qiov, rwco->flags);
81
}
82
83
static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
84
@@ -XXX,XX +XXX,XX @@ static int blk_prw(BlockBackend *blk, int64_t offset, uint8_t *buf,
85
rwco = (BlkRwCo) {
86
.blk = blk,
87
.offset = offset,
88
- .qiov = &qiov,
89
+ .iobuf = &qiov,
90
.flags = flags,
91
.ret = NOT_DONE,
92
};
93
@@ -XXX,XX +XXX,XX @@ static void blk_aio_complete_bh(void *opaque)
94
}
95
96
static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
97
- QEMUIOVector *qiov, CoroutineEntry co_entry,
98
+ void *iobuf, CoroutineEntry co_entry,
99
BdrvRequestFlags flags,
100
BlockCompletionFunc *cb, void *opaque)
101
{
102
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *blk_aio_prwv(BlockBackend *blk, int64_t offset, int bytes,
103
acb->rwco = (BlkRwCo) {
104
.blk = blk,
105
.offset = offset,
106
- .qiov = qiov,
107
+ .iobuf = iobuf,
108
.flags = flags,
109
.ret = NOT_DONE,
110
};
111
@@ -XXX,XX +XXX,XX @@ static void blk_aio_read_entry(void *opaque)
112
{
113
BlkAioEmAIOCB *acb = opaque;
114
BlkRwCo *rwco = &acb->rwco;
115
+ QEMUIOVector *qiov = rwco->iobuf;
116
117
- assert(rwco->qiov->size == acb->bytes);
118
+ assert(qiov->size == acb->bytes);
119
rwco->ret = blk_co_preadv(rwco->blk, rwco->offset, acb->bytes,
120
- rwco->qiov, rwco->flags);
121
+ qiov, rwco->flags);
122
blk_aio_complete(acb);
123
}
124
125
@@ -XXX,XX +XXX,XX @@ static void blk_aio_write_entry(void *opaque)
126
{
127
BlkAioEmAIOCB *acb = opaque;
128
BlkRwCo *rwco = &acb->rwco;
129
+ QEMUIOVector *qiov = rwco->iobuf;
130
131
- assert(!rwco->qiov || rwco->qiov->size == acb->bytes);
132
+ assert(!qiov || qiov->size == acb->bytes);
133
rwco->ret = blk_co_pwritev(rwco->blk, rwco->offset, acb->bytes,
134
- rwco->qiov, rwco->flags);
135
+ qiov, rwco->flags);
136
blk_aio_complete(acb);
137
}
138
139
@@ -XXX,XX +XXX,XX @@ int blk_co_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
140
static void blk_ioctl_entry(void *opaque)
141
{
142
BlkRwCo *rwco = opaque;
143
+ QEMUIOVector *qiov = rwco->iobuf;
144
+
145
rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
146
- rwco->qiov->iov[0].iov_base);
147
+ qiov->iov[0].iov_base);
148
}
149
150
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf)
151
@@ -XXX,XX +XXX,XX @@ static void blk_aio_ioctl_entry(void *opaque)
152
BlkAioEmAIOCB *acb = opaque;
153
BlkRwCo *rwco = &acb->rwco;
154
155
- rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset,
156
- rwco->qiov->iov[0].iov_base);
157
+ rwco->ret = blk_co_ioctl(rwco->blk, rwco->offset, rwco->iobuf);
158
+
159
blk_aio_complete(acb);
160
}
161
162
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
163
BlockCompletionFunc *cb, void *opaque)
164
{
165
- QEMUIOVector qiov;
166
- struct iovec iov;
167
-
168
- iov = (struct iovec) {
169
- .iov_base = buf,
170
- .iov_len = 0,
171
- };
172
- qemu_iovec_init_external(&qiov, &iov, 1);
173
-
174
- return blk_aio_prwv(blk, req, 0, &qiov, blk_aio_ioctl_entry, 0, cb, opaque);
175
+ return blk_aio_prwv(blk, req, 0, buf, blk_aio_ioctl_entry, 0, cb, opaque);
176
}
177
178
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
179
@@ -XXX,XX +XXX,XX @@ int blk_truncate(BlockBackend *blk, int64_t offset, PreallocMode prealloc,
180
static void blk_pdiscard_entry(void *opaque)
181
{
182
BlkRwCo *rwco = opaque;
183
- rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, rwco->qiov->size);
184
+ QEMUIOVector *qiov = rwco->iobuf;
185
+
186
+ rwco->ret = blk_co_pdiscard(rwco->blk, rwco->offset, qiov->size);
187
}
188
189
int blk_pdiscard(BlockBackend *blk, int64_t offset, int bytes)
190
--
191
2.14.3
192
193
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
Reported-by: Alberto Garcia <berto@igalia.com>
4
Signed-off-by: Fam Zheng <famz@redhat.com>
5
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Message-id: 20180306024328.19195-1-famz@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
README | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
12
diff --git a/README b/README
13
index XXXXXXX..XXXXXXX 100644
14
--- a/README
15
+++ b/README
16
@@ -XXX,XX +XXX,XX @@ The QEMU website is also maintained under source control.
17
git clone git://git.qemu.org/qemu-web.git
18
https://www.qemu.org/2017/02/04/the-new-qemu-website-is-up/
19
20
-A 'git-profile' utility was created to make above process less
21
+A 'git-publish' utility was created to make above process less
22
cumbersome, and is highly recommended for making regular contributions,
23
or even just for sending consecutive patch series revisions. It also
24
requires a working 'git send-email' setup, and by default doesn't
25
--
26
2.14.3
27
28
diff view generated by jsdifflib
1
If the main loop thread invokes .ioeventfd_stop() just as the vq handler
1
It can be difficult to debug issues with BHs in production environments.
2
function begins in the IOThread then the handler may lose the race for
2
Although BHs can usually be identified by looking up their ->cb()
3
the AioContext lock. By the time the vq handler is able to acquire the
3
function pointer, this requires debug information for the program. It is
4
AioContext lock the ioeventfd has already been removed and the handler
4
also not possible to print human-readable diagnostics about BHs because
5
isn't supposed to run anymore!
5
they have no identifier.
6
6
7
Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal
7
This patch adds a name to each BH. The name is not unique per instance
8
from within the IOThread. This way no races with the vq handler are
8
but differentiates between cb() functions, which is usually enough. It's
9
possible.
9
done by changing aio_bh_new() and friends to macros that stringify cb.
10
11
The next patch will use the name field when reporting leaked BHs.
10
12
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Fam Zheng <famz@redhat.com>
14
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
13
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
15
Message-Id: <20210414200247.917496-2-stefanha@redhat.com>
14
Message-id: 20180307144205.20619-4-stefanha@redhat.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
16
---
17
hw/scsi/virtio-scsi-dataplane.c | 9 +++++----
17
include/block/aio.h | 31 ++++++++++++++++++++++++++++---
18
1 file changed, 5 insertions(+), 4 deletions(-)
18
include/qemu/main-loop.h | 4 +++-
19
tests/unit/ptimer-test-stubs.c | 2 +-
20
util/async.c | 9 +++++++--
21
util/main-loop.c | 4 ++--
22
5 files changed, 41 insertions(+), 9 deletions(-)
19
23
20
diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c
24
diff --git a/include/block/aio.h b/include/block/aio.h
21
index XXXXXXX..XXXXXXX 100644
25
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/scsi/virtio-scsi-dataplane.c
26
--- a/include/block/aio.h
23
+++ b/hw/scsi/virtio-scsi-dataplane.c
27
+++ b/include/block/aio.h
24
@@ -XXX,XX +XXX,XX @@ static int virtio_scsi_vring_init(VirtIOSCSI *s, VirtQueue *vq, int n,
28
@@ -XXX,XX +XXX,XX @@ void aio_context_acquire(AioContext *ctx);
25
return 0;
29
/* Relinquish ownership of the AioContext. */
30
void aio_context_release(AioContext *ctx);
31
32
+/**
33
+ * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will
34
+ * run only once and as soon as possible.
35
+ *
36
+ * @name: A human-readable identifier for debugging purposes.
37
+ */
38
+void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
39
+ const char *name);
40
+
41
/**
42
* aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run
43
* only once and as soon as possible.
44
+ *
45
+ * A convenience wrapper for aio_bh_schedule_oneshot_full() that uses cb as the
46
+ * name string.
47
*/
48
-void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
49
+#define aio_bh_schedule_oneshot(ctx, cb, opaque) \
50
+ aio_bh_schedule_oneshot_full((ctx), (cb), (opaque), (stringify(cb)))
51
52
/**
53
- * aio_bh_new: Allocate a new bottom half structure.
54
+ * aio_bh_new_full: Allocate a new bottom half structure.
55
*
56
* Bottom halves are lightweight callbacks whose invocation is guaranteed
57
* to be wait-free, thread-safe and signal-safe. The #QEMUBH structure
58
* is opaque and must be allocated prior to its use.
59
+ *
60
+ * @name: A human-readable identifier for debugging purposes.
61
*/
62
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
63
+QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
64
+ const char *name);
65
+
66
+/**
67
+ * aio_bh_new: Allocate a new bottom half structure
68
+ *
69
+ * A convenience wrapper for aio_bh_new_full() that uses the cb as the name
70
+ * string.
71
+ */
72
+#define aio_bh_new(ctx, cb, opaque) \
73
+ aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb)))
74
75
/**
76
* aio_notify: Force processing of pending events.
77
diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h
78
index XXXXXXX..XXXXXXX 100644
79
--- a/include/qemu/main-loop.h
80
+++ b/include/qemu/main-loop.h
81
@@ -XXX,XX +XXX,XX @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms);
82
83
void qemu_fd_register(int fd);
84
85
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque);
86
+#define qemu_bh_new(cb, opaque) \
87
+ qemu_bh_new_full((cb), (opaque), (stringify(cb)))
88
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name);
89
void qemu_bh_schedule_idle(QEMUBH *bh);
90
91
enum {
92
diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/tests/unit/ptimer-test-stubs.c
95
+++ b/tests/unit/ptimer-test-stubs.c
96
@@ -XXX,XX +XXX,XX @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask)
97
return deadline;
26
}
98
}
27
99
28
-/* assumes s->ctx held */
100
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
29
-static void virtio_scsi_clear_aio(VirtIOSCSI *s)
101
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
30
+/* Context: BH in IOThread */
31
+static void virtio_scsi_dataplane_stop_bh(void *opaque)
32
{
102
{
33
+ VirtIOSCSI *s = opaque;
103
QEMUBH *bh = g_new(QEMUBH, 1);
34
VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(s);
104
35
int i;
105
diff --git a/util/async.c b/util/async.c
36
106
index XXXXXXX..XXXXXXX 100644
37
@@ -XXX,XX +XXX,XX @@ int virtio_scsi_dataplane_start(VirtIODevice *vdev)
107
--- a/util/async.c
38
return 0;
108
+++ b/util/async.c
39
109
@@ -XXX,XX +XXX,XX @@ enum {
40
fail_vrings:
110
41
- virtio_scsi_clear_aio(s);
111
struct QEMUBH {
42
+ aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
112
AioContext *ctx;
43
aio_context_release(s->ctx);
113
+ const char *name;
44
for (i = 0; i < vs->conf.num_queues + 2; i++) {
114
QEMUBHFunc *cb;
45
virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
115
void *opaque;
46
@@ -XXX,XX +XXX,XX @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev)
116
QSLIST_ENTRY(QEMUBH) next;
47
s->dataplane_stopping = true;
117
@@ -XXX,XX +XXX,XX @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags)
48
118
return bh;
49
aio_context_acquire(s->ctx);
119
}
50
- virtio_scsi_clear_aio(s);
120
51
+ aio_wait_bh_oneshot(s->ctx, virtio_scsi_dataplane_stop_bh, s);
121
-void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
52
aio_context_release(s->ctx);
122
+void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb,
53
123
+ void *opaque, const char *name)
54
blk_drain_all(); /* ensure there are no in-flight requests */
124
{
125
QEMUBH *bh;
126
bh = g_new(QEMUBH, 1);
127
@@ -XXX,XX +XXX,XX @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
128
.ctx = ctx,
129
.cb = cb,
130
.opaque = opaque,
131
+ .name = name,
132
};
133
aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT);
134
}
135
136
-QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
137
+QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque,
138
+ const char *name)
139
{
140
QEMUBH *bh;
141
bh = g_new(QEMUBH, 1);
142
@@ -XXX,XX +XXX,XX @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
143
.ctx = ctx,
144
.cb = cb,
145
.opaque = opaque,
146
+ .name = name,
147
};
148
return bh;
149
}
150
diff --git a/util/main-loop.c b/util/main-loop.c
151
index XXXXXXX..XXXXXXX 100644
152
--- a/util/main-loop.c
153
+++ b/util/main-loop.c
154
@@ -XXX,XX +XXX,XX @@ void main_loop_wait(int nonblocking)
155
156
/* Functions to operate on the main QEMU AioContext. */
157
158
-QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
159
+QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name)
160
{
161
- return aio_bh_new(qemu_aio_context, cb, opaque);
162
+ return aio_bh_new_full(qemu_aio_context, cb, opaque, name);
163
}
164
165
/*
55
--
166
--
56
2.14.3
167
2.31.1
57
168
58
diff view generated by jsdifflib
1
Sometimes it's necessary for the main loop thread to run a BH in an
1
BHs must be deleted before the AioContext is finalized. If not, it's a
2
IOThread and wait for its completion. This primitive is useful during
2
bug and probably indicates that some part of the program still expects
3
startup/shutdown to synchronize and avoid race conditions.
3
the BH to run in the future. That can lead to memory leaks, inconsistent
4
state, or just hangs.
4
5
6
Unfortunately the assert(flags & BH_DELETED) call in aio_ctx_finalize()
7
is difficult to debug because the assertion failure contains no
8
information about the BH!
9
10
Use the QEMUBH name field added in the previous patch to show a useful
11
error when a leaked BH is detected.
12
13
Suggested-by: Eric Ernst <eric.g.ernst@gmail.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Fam Zheng <famz@redhat.com>
15
Message-Id: <20210414200247.917496-3-stefanha@redhat.com>
7
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
8
Message-id: 20180307144205.20619-2-stefanha@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
16
---
11
include/block/aio-wait.h | 13 +++++++++++++
17
util/async.c | 16 ++++++++++++++--
12
util/aio-wait.c | 31 +++++++++++++++++++++++++++++++
18
1 file changed, 14 insertions(+), 2 deletions(-)
13
2 files changed, 44 insertions(+)
14
19
15
diff --git a/include/block/aio-wait.h b/include/block/aio-wait.h
20
diff --git a/util/async.c b/util/async.c
16
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
17
--- a/include/block/aio-wait.h
22
--- a/util/async.c
18
+++ b/include/block/aio-wait.h
23
+++ b/util/async.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct {
24
@@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source)
20
*/
25
assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list));
21
void aio_wait_kick(AioWait *wait);
26
22
27
while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) {
23
+/**
28
- /* qemu_bh_delete() must have been called on BHs in this AioContext */
24
+ * aio_wait_bh_oneshot:
29
- assert(flags & BH_DELETED);
25
+ * @ctx: the aio context
30
+ /*
26
+ * @cb: the BH callback function
31
+ * qemu_bh_delete() must have been called on BHs in this AioContext. In
27
+ * @opaque: user data for the BH callback function
32
+ * many cases memory leaks, hangs, or inconsistent state occur when a
28
+ *
33
+ * BH is leaked because something still expects it to run.
29
+ * Run a BH in @ctx and wait for it to complete.
34
+ *
30
+ *
35
+ * If you hit this, fix the lifecycle of the BH so that
31
+ * Must be called from the main loop thread with @ctx acquired exactly once.
36
+ * qemu_bh_delete() and any associated cleanup is called before the
32
+ * Note that main loop event processing may occur.
37
+ * AioContext is finalized.
33
+ */
38
+ */
34
+void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque);
39
+ if (unlikely(!(flags & BH_DELETED))) {
35
+
40
+ fprintf(stderr, "%s: BH '%s' leaked, aborting...\n",
36
#endif /* QEMU_AIO_WAIT */
41
+ __func__, bh->name);
37
diff --git a/util/aio-wait.c b/util/aio-wait.c
42
+ abort();
38
index XXXXXXX..XXXXXXX 100644
43
+ }
39
--- a/util/aio-wait.c
44
40
+++ b/util/aio-wait.c
45
g_free(bh);
41
@@ -XXX,XX +XXX,XX @@ void aio_wait_kick(AioWait *wait)
42
aio_bh_schedule_oneshot(qemu_get_aio_context(), dummy_bh_cb, NULL);
43
}
46
}
44
}
45
+
46
+typedef struct {
47
+ AioWait wait;
48
+ bool done;
49
+ QEMUBHFunc *cb;
50
+ void *opaque;
51
+} AioWaitBHData;
52
+
53
+/* Context: BH in IOThread */
54
+static void aio_wait_bh(void *opaque)
55
+{
56
+ AioWaitBHData *data = opaque;
57
+
58
+ data->cb(data->opaque);
59
+
60
+ data->done = true;
61
+ aio_wait_kick(&data->wait);
62
+}
63
+
64
+void aio_wait_bh_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
65
+{
66
+ AioWaitBHData data = {
67
+ .cb = cb,
68
+ .opaque = opaque,
69
+ };
70
+
71
+ assert(qemu_get_current_aio_context() == qemu_get_aio_context());
72
+
73
+ aio_bh_schedule_oneshot(ctx, aio_wait_bh, &data);
74
+ AIO_WAIT_WHILE(&data.wait, ctx, !data.done);
75
+}
76
--
47
--
77
2.14.3
48
2.31.1
78
49
79
diff view generated by jsdifflib
1
Commit 00d09fdbbae5f7864ce754913efc84c12fdf9f1a ("vl: pause vcpus before
1
From: Akihiko Odaki <akihiko.odaki@gmail.com>
2
stopping iothreads") and commit dce8921b2baaf95974af8176406881872067adfa
3
("iothread: Stop threads before main() quits") tried to work around the
4
fact that emulation was still active during termination by stopping
5
iothreads. They suffer from race conditions:
6
1. virtio_scsi_handle_cmd_vq() racing with iothread_stop_all() hits the
7
virtio_scsi_ctx_check() assertion failure because the BDS AioContext
8
has been modified by iothread_stop_all().
9
2. Guest vq kick racing with main loop termination leaves a readable
10
ioeventfd that is handled by the next aio_poll() when external
11
clients are enabled again, resulting in unwanted emulation activity.
12
2
13
This patch obsoletes those commits by fully disabling emulation activity
3
This commit introduces "punch hole" operation and optimizes transfer
14
when vcpus are stopped.
4
block size for macOS.
15
5
16
Use the new vm_shutdown() function instead of pause_all_vcpus() so that
6
Thanks to Konstantin Nazarov for detailed analysis of a flaw in an
17
vm change state handlers are invoked too. Virtio devices will now stop
7
old version of this change:
18
their ioeventfds, preventing further emulation activity after vm_stop().
8
https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667
19
9
20
Note that vm_stop(RUN_STATE_SHUTDOWN) cannot be used because it emits a
10
Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com>
21
QMP STOP event that may affect existing clients.
11
Message-id: 20210705130458.97642-1-akihiko.odaki@gmail.com
22
23
It is no longer necessary to call replay_disable_events() directly since
24
vm_shutdown() does so already.
25
26
Drop iothread_stop_all() since it is no longer used.
27
28
Cc: Fam Zheng <famz@redhat.com>
29
Cc: Kevin Wolf <kwolf@redhat.com>
30
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
31
Reviewed-by: Fam Zheng <famz@redhat.com>
32
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
33
Message-id: 20180307144205.20619-5-stefanha@redhat.com
34
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
35
---
13
---
36
include/sysemu/iothread.h | 1 -
14
block/file-posix.c | 27 +++++++++++++++++++++++++--
37
include/sysemu/sysemu.h | 1 +
15
1 file changed, 25 insertions(+), 2 deletions(-)
38
cpus.c | 16 +++++++++++++---
39
iothread.c | 31 -------------------------------
40
vl.c | 13 +++----------
41
5 files changed, 17 insertions(+), 45 deletions(-)
42
16
43
diff --git a/include/sysemu/iothread.h b/include/sysemu/iothread.h
17
diff --git a/block/file-posix.c b/block/file-posix.c
44
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
45
--- a/include/sysemu/iothread.h
19
--- a/block/file-posix.c
46
+++ b/include/sysemu/iothread.h
20
+++ b/block/file-posix.c
47
@@ -XXX,XX +XXX,XX @@ typedef struct {
21
@@ -XXX,XX +XXX,XX @@
48
char *iothread_get_id(IOThread *iothread);
22
#if defined(HAVE_HOST_BLOCK_DEVICE)
49
IOThread *iothread_by_id(const char *id);
23
#include <paths.h>
50
AioContext *iothread_get_aio_context(IOThread *iothread);
24
#include <sys/param.h>
51
-void iothread_stop_all(void);
25
+#include <sys/mount.h>
52
GMainContext *iothread_get_g_main_context(IOThread *iothread);
26
#include <IOKit/IOKitLib.h>
53
27
#include <IOKit/IOBSD.h>
54
/*
28
#include <IOKit/storage/IOMediaBSDClient.h>
55
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
29
@@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp)
56
index XXXXXXX..XXXXXXX 100644
30
return;
57
--- a/include/sysemu/sysemu.h
31
}
58
+++ b/include/sysemu/sysemu.h
32
59
@@ -XXX,XX +XXX,XX @@ void vm_start(void);
33
+#if defined(__APPLE__) && (__MACH__)
60
int vm_prepare_start(void);
34
+ struct statfs buf;
61
int vm_stop(RunState state);
35
+
62
int vm_stop_force_state(RunState state);
36
+ if (!fstatfs(s->fd, &buf)) {
63
+int vm_shutdown(void);
37
+ bs->bl.opt_transfer = buf.f_iosize;
64
38
+ bs->bl.pdiscard_alignment = buf.f_bsize;
65
typedef enum WakeupReason {
39
+ }
66
/* Always keep QEMU_WAKEUP_REASON_NONE = 0 */
40
+#endif
67
diff --git a/cpus.c b/cpus.c
41
+
68
index XXXXXXX..XXXXXXX 100644
42
if (bs->sg || S_ISBLK(st.st_mode)) {
69
--- a/cpus.c
43
int ret = hdev_get_max_hw_transfer(s->fd, &st);
70
+++ b/cpus.c
44
71
@@ -XXX,XX +XXX,XX @@ void cpu_synchronize_all_pre_loadvm(void)
45
@@ -XXX,XX +XXX,XX @@ out:
72
}
46
}
73
}
47
}
74
48
75
-static int do_vm_stop(RunState state)
49
+#if defined(CONFIG_FALLOCATE) || defined(BLKZEROOUT) || defined(BLKDISCARD)
76
+static int do_vm_stop(RunState state, bool send_stop)
50
static int translate_err(int err)
77
{
51
{
78
int ret = 0;
52
if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP ||
79
53
@@ -XXX,XX +XXX,XX @@ static int translate_err(int err)
80
@@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state)
54
}
81
pause_all_vcpus();
55
return err;
82
runstate_set(state);
56
}
83
vm_state_notify(0, state);
57
+#endif
84
- qapi_event_send_stop(&error_abort);
58
85
+ if (send_stop) {
59
#ifdef CONFIG_FALLOCATE
86
+ qapi_event_send_stop(&error_abort);
60
static int do_fallocate(int fd, int mode, off_t offset, off_t len)
61
@@ -XXX,XX +XXX,XX @@ static int handle_aiocb_discard(void *opaque)
62
}
63
} while (errno == EINTR);
64
65
- ret = -errno;
66
+ ret = translate_err(-errno);
67
#endif
68
} else {
69
#ifdef CONFIG_FALLOCATE_PUNCH_HOLE
70
ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
71
aiocb->aio_offset, aiocb->aio_nbytes);
72
+ ret = translate_err(-errno);
73
+#elif defined(__APPLE__) && (__MACH__)
74
+ fpunchhole_t fpunchhole;
75
+ fpunchhole.fp_flags = 0;
76
+ fpunchhole.reserved = 0;
77
+ fpunchhole.fp_offset = aiocb->aio_offset;
78
+ fpunchhole.fp_length = aiocb->aio_nbytes;
79
+ if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) {
80
+ ret = errno == ENODEV ? -ENOTSUP : -errno;
81
+ } else {
82
+ ret = 0;
87
+ }
83
+ }
84
#endif
88
}
85
}
89
86
90
bdrv_drain_all();
87
- ret = translate_err(ret);
91
@@ -XXX,XX +XXX,XX @@ static int do_vm_stop(RunState state)
88
if (ret == -ENOTSUP) {
92
return ret;
89
s->has_discard = false;
93
}
94
95
+/* Special vm_stop() variant for terminating the process. Historically clients
96
+ * did not expect a QMP STOP event and so we need to retain compatibility.
97
+ */
98
+int vm_shutdown(void)
99
+{
100
+ return do_vm_stop(RUN_STATE_SHUTDOWN, false);
101
+}
102
+
103
static bool cpu_can_run(CPUState *cpu)
104
{
105
if (cpu->stop) {
106
@@ -XXX,XX +XXX,XX @@ int vm_stop(RunState state)
107
return 0;
108
}
90
}
109
110
- return do_vm_stop(state);
111
+ return do_vm_stop(state, true);
112
}
113
114
/**
115
diff --git a/iothread.c b/iothread.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/iothread.c
118
+++ b/iothread.c
119
@@ -XXX,XX +XXX,XX @@ void iothread_stop(IOThread *iothread)
120
qemu_thread_join(&iothread->thread);
121
}
122
123
-static int iothread_stop_iter(Object *object, void *opaque)
124
-{
125
- IOThread *iothread;
126
-
127
- iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD);
128
- if (!iothread) {
129
- return 0;
130
- }
131
- iothread_stop(iothread);
132
- return 0;
133
-}
134
-
135
static void iothread_instance_init(Object *obj)
136
{
137
IOThread *iothread = IOTHREAD(obj);
138
@@ -XXX,XX +XXX,XX @@ IOThreadInfoList *qmp_query_iothreads(Error **errp)
139
return head;
140
}
141
142
-void iothread_stop_all(void)
143
-{
144
- Object *container = object_get_objects_root();
145
- BlockDriverState *bs;
146
- BdrvNextIterator it;
147
-
148
- for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
149
- AioContext *ctx = bdrv_get_aio_context(bs);
150
- if (ctx == qemu_get_aio_context()) {
151
- continue;
152
- }
153
- aio_context_acquire(ctx);
154
- bdrv_set_aio_context(bs, qemu_get_aio_context());
155
- aio_context_release(ctx);
156
- }
157
-
158
- object_child_foreach(container, iothread_stop_iter, NULL);
159
-}
160
-
161
static gpointer iothread_g_main_context_init(gpointer opaque)
162
{
163
AioContext *ctx;
164
diff --git a/vl.c b/vl.c
165
index XXXXXXX..XXXXXXX 100644
166
--- a/vl.c
167
+++ b/vl.c
168
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
169
os_setup_post();
170
171
main_loop();
172
- replay_disable_events();
173
174
- /* The ordering of the following is delicate. Stop vcpus to prevent new
175
- * I/O requests being queued by the guest. Then stop IOThreads (this
176
- * includes a drain operation and completes all request processing). At
177
- * this point emulated devices are still associated with their IOThreads
178
- * (if any) but no longer have any work to do. Only then can we close
179
- * block devices safely because we know there is no more I/O coming.
180
- */
181
- pause_all_vcpus();
182
- iothread_stop_all();
183
+ /* No more vcpu or device emulation activity beyond this point */
184
+ vm_shutdown();
185
+
186
bdrv_close_all();
187
188
res_free();
189
--
91
--
190
2.14.3
92
2.31.1
191
93
192
diff view generated by jsdifflib
1
If the main loop thread invokes .ioeventfd_stop() just as the vq handler
1
From: Akihiko Odaki <akihiko.odaki@gmail.com>
2
function begins in the IOThread then the handler may lose the race for
2
3
the AioContext lock. By the time the vq handler is able to acquire the
3
backend_defaults property allow users to control if default block
4
AioContext lock the ioeventfd has already been removed and the handler
4
properties should be decided with backend information.
5
isn't supposed to run anymore!
5
6
6
If it is off, any backend information will be discarded, which is
7
Use the new aio_wait_bh_oneshot() function to perform ioeventfd removal
7
suitable if you plan to perform live migration to a different disk backend.
8
from within the IOThread. This way no races with the vq handler are
8
9
possible.
9
If it is on, a block device may utilize backend information more
10
10
aggressively.
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
12
Reviewed-by: Fam Zheng <famz@redhat.com>
12
By default, it is auto, which uses backend information for block
13
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
13
sizes and ignores the others, which is consistent with the older
14
Message-id: 20180307144205.20619-3-stefanha@redhat.com
14
versions.
15
16
Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com>
17
Message-id: 20210705130458.97642-2-akihiko.odaki@gmail.com
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
19
---
17
hw/block/dataplane/virtio-blk.c | 24 +++++++++++++++++-------
20
include/hw/block/block.h | 3 +++
18
1 file changed, 17 insertions(+), 7 deletions(-)
21
hw/block/block.c | 42 ++++++++++++++++++++++++++++++++++----
19
22
tests/qemu-iotests/172.out | 38 ++++++++++++++++++++++++++++++++++
20
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
23
3 files changed, 79 insertions(+), 4 deletions(-)
24
25
diff --git a/include/hw/block/block.h b/include/hw/block/block.h
21
index XXXXXXX..XXXXXXX 100644
26
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/block/dataplane/virtio-blk.c
27
--- a/include/hw/block/block.h
23
+++ b/hw/block/dataplane/virtio-blk.c
28
+++ b/include/hw/block/block.h
24
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
29
@@ -XXX,XX +XXX,XX @@
25
return -ENOSYS;
30
31
typedef struct BlockConf {
32
BlockBackend *blk;
33
+ OnOffAuto backend_defaults;
34
uint32_t physical_block_size;
35
uint32_t logical_block_size;
36
uint32_t min_io_size;
37
@@ -XXX,XX +XXX,XX @@ static inline unsigned int get_physical_block_exp(BlockConf *conf)
26
}
38
}
27
39
28
+/* Stop notifications for new requests from guest.
40
#define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) \
29
+ *
41
+ DEFINE_PROP_ON_OFF_AUTO("backend_defaults", _state, \
30
+ * Context: BH in IOThread
42
+ _conf.backend_defaults, ON_OFF_AUTO_AUTO), \
31
+ */
43
DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \
32
+static void virtio_blk_data_plane_stop_bh(void *opaque)
44
_conf.logical_block_size), \
33
+{
45
DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \
34
+ VirtIOBlockDataPlane *s = opaque;
46
diff --git a/hw/block/block.c b/hw/block/block.c
35
+ unsigned i;
47
index XXXXXXX..XXXXXXX 100644
48
--- a/hw/block/block.c
49
+++ b/hw/block/block.c
50
@@ -XXX,XX +XXX,XX @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp)
51
{
52
BlockBackend *blk = conf->blk;
53
BlockSizes blocksizes;
54
- int backend_ret;
55
+ BlockDriverState *bs;
56
+ bool use_blocksizes;
57
+ bool use_bs;
36
+
58
+
37
+ for (i = 0; i < s->conf->num_queues; i++) {
59
+ switch (conf->backend_defaults) {
38
+ VirtQueue *vq = virtio_get_queue(s->vdev, i);
60
+ case ON_OFF_AUTO_AUTO:
61
+ use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes);
62
+ use_bs = false;
63
+ break;
39
+
64
+
40
+ virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL);
65
+ case ON_OFF_AUTO_ON:
66
+ use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes);
67
+ bs = blk_bs(blk);
68
+ use_bs = bs;
69
+ break;
70
+
71
+ case ON_OFF_AUTO_OFF:
72
+ use_blocksizes = false;
73
+ use_bs = false;
74
+ break;
75
+
76
+ default:
77
+ abort();
41
+ }
78
+ }
42
+}
79
43
+
80
- backend_ret = blk_probe_blocksizes(blk, &blocksizes);
44
/* Context: QEMU global mutex held */
81
/* fill in detected values if they are not defined via qemu command line */
45
void virtio_blk_data_plane_stop(VirtIODevice *vdev)
82
if (!conf->physical_block_size) {
46
{
83
- if (!backend_ret) {
47
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
84
+ if (use_blocksizes) {
48
trace_virtio_blk_data_plane_stop(s);
85
conf->physical_block_size = blocksizes.phys;
49
86
} else {
50
aio_context_acquire(s->ctx);
87
conf->physical_block_size = BDRV_SECTOR_SIZE;
51
-
88
}
52
- /* Stop notifications for new requests from guest */
89
}
53
- for (i = 0; i < nvqs; i++) {
90
if (!conf->logical_block_size) {
54
- VirtQueue *vq = virtio_get_queue(s->vdev, i);
91
- if (!backend_ret) {
55
-
92
+ if (use_blocksizes) {
56
- virtio_queue_aio_set_host_notifier_handler(vq, s->ctx, NULL);
93
conf->logical_block_size = blocksizes.log;
57
- }
94
} else {
58
+ aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
95
conf->logical_block_size = BDRV_SECTOR_SIZE;
59
96
}
60
/* Drain and switch bs back to the QEMU main loop */
97
}
61
blk_set_aio_context(s->conf->conf.blk, qemu_get_aio_context());
98
+ if (use_bs) {
99
+ if (!conf->opt_io_size) {
100
+ conf->opt_io_size = bs->bl.opt_transfer;
101
+ }
102
+ if (conf->discard_granularity == -1) {
103
+ if (bs->bl.pdiscard_alignment) {
104
+ conf->discard_granularity = bs->bl.pdiscard_alignment;
105
+ } else if (bs->bl.request_alignment != 1) {
106
+ conf->discard_granularity = bs->bl.request_alignment;
107
+ }
108
+ }
109
+ }
110
111
if (conf->logical_block_size > conf->physical_block_size) {
112
error_setg(errp,
113
diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out
114
index XXXXXXX..XXXXXXX 100644
115
--- a/tests/qemu-iotests/172.out
116
+++ b/tests/qemu-iotests/172.out
117
@@ -XXX,XX +XXX,XX @@ Testing:
118
dev: floppy, id ""
119
unit = 0 (0x0)
120
drive = "floppy0"
121
+ backend_defaults = "auto"
122
logical_block_size = 512 (512 B)
123
physical_block_size = 512 (512 B)
124
min_io_size = 0 (0 B)
125
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2
126
dev: floppy, id ""
127
unit = 0 (0x0)
128
drive = "floppy0"
129
+ backend_defaults = "auto"
130
logical_block_size = 512 (512 B)
131
physical_block_size = 512 (512 B)
132
min_io_size = 0 (0 B)
133
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2
134
dev: floppy, id ""
135
unit = 1 (0x1)
136
drive = "floppy1"
137
+ backend_defaults = "auto"
138
logical_block_size = 512 (512 B)
139
physical_block_size = 512 (512 B)
140
min_io_size = 0 (0 B)
141
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2
142
dev: floppy, id ""
143
unit = 0 (0x0)
144
drive = "floppy0"
145
+ backend_defaults = "auto"
146
logical_block_size = 512 (512 B)
147
physical_block_size = 512 (512 B)
148
min_io_size = 0 (0 B)
149
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2
150
dev: floppy, id ""
151
unit = 1 (0x1)
152
drive = "floppy1"
153
+ backend_defaults = "auto"
154
logical_block_size = 512 (512 B)
155
physical_block_size = 512 (512 B)
156
min_io_size = 0 (0 B)
157
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2
158
dev: floppy, id ""
159
unit = 0 (0x0)
160
drive = "floppy0"
161
+ backend_defaults = "auto"
162
logical_block_size = 512 (512 B)
163
physical_block_size = 512 (512 B)
164
min_io_size = 0 (0 B)
165
@@ -XXX,XX +XXX,XX @@ Testing: -fdb
166
dev: floppy, id ""
167
unit = 1 (0x1)
168
drive = "floppy1"
169
+ backend_defaults = "auto"
170
logical_block_size = 512 (512 B)
171
physical_block_size = 512 (512 B)
172
min_io_size = 0 (0 B)
173
@@ -XXX,XX +XXX,XX @@ Testing: -fdb
174
dev: floppy, id ""
175
unit = 0 (0x0)
176
drive = "floppy0"
177
+ backend_defaults = "auto"
178
logical_block_size = 512 (512 B)
179
physical_block_size = 512 (512 B)
180
min_io_size = 0 (0 B)
181
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2
182
dev: floppy, id ""
183
unit = 0 (0x0)
184
drive = "floppy0"
185
+ backend_defaults = "auto"
186
logical_block_size = 512 (512 B)
187
physical_block_size = 512 (512 B)
188
min_io_size = 0 (0 B)
189
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
190
dev: floppy, id ""
191
unit = 1 (0x1)
192
drive = "floppy1"
193
+ backend_defaults = "auto"
194
logical_block_size = 512 (512 B)
195
physical_block_size = 512 (512 B)
196
min_io_size = 0 (0 B)
197
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1
198
dev: floppy, id ""
199
unit = 0 (0x0)
200
drive = "floppy0"
201
+ backend_defaults = "auto"
202
logical_block_size = 512 (512 B)
203
physical_block_size = 512 (512 B)
204
min_io_size = 0 (0 B)
205
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
206
dev: floppy, id ""
207
unit = 1 (0x1)
208
drive = "floppy1"
209
+ backend_defaults = "auto"
210
logical_block_size = 512 (512 B)
211
physical_block_size = 512 (512 B)
212
min_io_size = 0 (0 B)
213
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t
214
dev: floppy, id ""
215
unit = 0 (0x0)
216
drive = "floppy0"
217
+ backend_defaults = "auto"
218
logical_block_size = 512 (512 B)
219
physical_block_size = 512 (512 B)
220
min_io_size = 0 (0 B)
221
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0
222
dev: floppy, id ""
223
unit = 0 (0x0)
224
drive = "none0"
225
+ backend_defaults = "auto"
226
logical_block_size = 512 (512 B)
227
physical_block_size = 512 (512 B)
228
min_io_size = 0 (0 B)
229
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1
230
dev: floppy, id ""
231
unit = 1 (0x1)
232
drive = "none0"
233
+ backend_defaults = "auto"
234
logical_block_size = 512 (512 B)
235
physical_block_size = 512 (512 B)
236
min_io_size = 0 (0 B)
237
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
238
dev: floppy, id ""
239
unit = 1 (0x1)
240
drive = "none1"
241
+ backend_defaults = "auto"
242
logical_block_size = 512 (512 B)
243
physical_block_size = 512 (512 B)
244
min_io_size = 0 (0 B)
245
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco
246
dev: floppy, id ""
247
unit = 0 (0x0)
248
drive = "none0"
249
+ backend_defaults = "auto"
250
logical_block_size = 512 (512 B)
251
physical_block_size = 512 (512 B)
252
min_io_size = 0 (0 B)
253
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
254
dev: floppy, id ""
255
unit = 1 (0x1)
256
drive = "none0"
257
+ backend_defaults = "auto"
258
logical_block_size = 512 (512 B)
259
physical_block_size = 512 (512 B)
260
min_io_size = 0 (0 B)
261
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
262
dev: floppy, id ""
263
unit = 0 (0x0)
264
drive = "floppy0"
265
+ backend_defaults = "auto"
266
logical_block_size = 512 (512 B)
267
physical_block_size = 512 (512 B)
268
min_io_size = 0 (0 B)
269
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
270
dev: floppy, id ""
271
unit = 1 (0x1)
272
drive = "none0"
273
+ backend_defaults = "auto"
274
logical_block_size = 512 (512 B)
275
physical_block_size = 512 (512 B)
276
min_io_size = 0 (0 B)
277
@@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
278
dev: floppy, id ""
279
unit = 0 (0x0)
280
drive = "floppy0"
281
+ backend_defaults = "auto"
282
logical_block_size = 512 (512 B)
283
physical_block_size = 512 (512 B)
284
min_io_size = 0 (0 B)
285
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
286
dev: floppy, id ""
287
unit = 0 (0x0)
288
drive = "none0"
289
+ backend_defaults = "auto"
290
logical_block_size = 512 (512 B)
291
physical_block_size = 512 (512 B)
292
min_io_size = 0 (0 B)
293
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
294
dev: floppy, id ""
295
unit = 1 (0x1)
296
drive = "floppy1"
297
+ backend_defaults = "auto"
298
logical_block_size = 512 (512 B)
299
physical_block_size = 512 (512 B)
300
min_io_size = 0 (0 B)
301
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
302
dev: floppy, id ""
303
unit = 0 (0x0)
304
drive = "none0"
305
+ backend_defaults = "auto"
306
logical_block_size = 512 (512 B)
307
physical_block_size = 512 (512 B)
308
min_io_size = 0 (0 B)
309
@@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl
310
dev: floppy, id ""
311
unit = 1 (0x1)
312
drive = "floppy1"
313
+ backend_defaults = "auto"
314
logical_block_size = 512 (512 B)
315
physical_block_size = 512 (512 B)
316
min_io_size = 0 (0 B)
317
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
318
dev: floppy, id ""
319
unit = 1 (0x1)
320
drive = "none0"
321
+ backend_defaults = "auto"
322
logical_block_size = 512 (512 B)
323
physical_block_size = 512 (512 B)
324
min_io_size = 0 (0 B)
325
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
326
dev: floppy, id ""
327
unit = 0 (0x0)
328
drive = "floppy0"
329
+ backend_defaults = "auto"
330
logical_block_size = 512 (512 B)
331
physical_block_size = 512 (512 B)
332
min_io_size = 0 (0 B)
333
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
334
dev: floppy, id ""
335
unit = 1 (0x1)
336
drive = "none0"
337
+ backend_defaults = "auto"
338
logical_block_size = 512 (512 B)
339
physical_block_size = 512 (512 B)
340
min_io_size = 0 (0 B)
341
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q
342
dev: floppy, id ""
343
unit = 0 (0x0)
344
drive = "floppy0"
345
+ backend_defaults = "auto"
346
logical_block_size = 512 (512 B)
347
physical_block_size = 512 (512 B)
348
min_io_size = 0 (0 B)
349
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global floppy.drive=none0 -device
350
dev: floppy, id ""
351
unit = 0 (0x0)
352
drive = "none0"
353
+ backend_defaults = "auto"
354
logical_block_size = 512 (512 B)
355
physical_block_size = 512 (512 B)
356
min_io_size = 0 (0 B)
357
@@ -XXX,XX +XXX,XX @@ Testing: -device floppy
358
dev: floppy, id ""
359
unit = 0 (0x0)
360
drive = ""
361
+ backend_defaults = "auto"
362
logical_block_size = 512 (512 B)
363
physical_block_size = 512 (512 B)
364
min_io_size = 0 (0 B)
365
@@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=120
366
dev: floppy, id ""
367
unit = 0 (0x0)
368
drive = ""
369
+ backend_defaults = "auto"
370
logical_block_size = 512 (512 B)
371
physical_block_size = 512 (512 B)
372
min_io_size = 0 (0 B)
373
@@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=144
374
dev: floppy, id ""
375
unit = 0 (0x0)
376
drive = ""
377
+ backend_defaults = "auto"
378
logical_block_size = 512 (512 B)
379
physical_block_size = 512 (512 B)
380
min_io_size = 0 (0 B)
381
@@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=288
382
dev: floppy, id ""
383
unit = 0 (0x0)
384
drive = ""
385
+ backend_defaults = "auto"
386
logical_block_size = 512 (512 B)
387
physical_block_size = 512 (512 B)
388
min_io_size = 0 (0 B)
389
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
390
dev: floppy, id ""
391
unit = 0 (0x0)
392
drive = "none0"
393
+ backend_defaults = "auto"
394
logical_block_size = 512 (512 B)
395
physical_block_size = 512 (512 B)
396
min_io_size = 0 (0 B)
397
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t
398
dev: floppy, id ""
399
unit = 0 (0x0)
400
drive = "none0"
401
+ backend_defaults = "auto"
402
logical_block_size = 512 (512 B)
403
physical_block_size = 512 (512 B)
404
min_io_size = 0 (0 B)
405
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical
406
dev: floppy, id ""
407
unit = 0 (0x0)
408
drive = "none0"
409
+ backend_defaults = "auto"
410
logical_block_size = 512 (512 B)
411
physical_block_size = 512 (512 B)
412
min_io_size = 0 (0 B)
413
@@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica
414
dev: floppy, id ""
415
unit = 0 (0x0)
416
drive = "none0"
417
+ backend_defaults = "auto"
418
logical_block_size = 512 (512 B)
419
physical_block_size = 512 (512 B)
420
min_io_size = 0 (0 B)
62
--
421
--
63
2.14.3
422
2.31.1
64
423
65
diff view generated by jsdifflib
1
From: Sergio Lopez <slp@redhat.com>
1
From: Akihiko Odaki <akihiko.odaki@gmail.com>
2
2
3
Commit 5b2ffbe4d99843fd8305c573a100047a8c962327 ("virtio-blk: dataplane:
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com>
4
notify guest as a batch") deferred guest notification to a BH in order
4
Message-id: 20210705130458.97642-3-akihiko.odaki@gmail.com
5
batch notifications, with purpose of avoiding flooding the guest with
6
interruptions.
7
8
This optimization came with a cost. The average latency perceived in the
9
guest is increased by a few microseconds, but also when multiple IO
10
operations finish at the same time, the guest won't be notified until
11
all completions from each operation has been run. On the contrary,
12
virtio-scsi issues the notification at the end of each completion.
13
14
On the other hand, nowadays we have the EVENT_IDX feature that allows a
15
better coordination between QEMU and the Guest OS to avoid sending
16
unnecessary interruptions.
17
18
With this change, virtio-blk/dataplane only batches notifications if the
19
EVENT_IDX feature is not present.
20
21
Some numbers obtained with fio (ioengine=sync, iodepth=1, direct=1):
22
- Test specs:
23
* fio-3.4 (ioengine=sync, iodepth=1, direct=1)
24
* qemu master
25
* virtio-blk with a dedicated iothread (default poll-max-ns)
26
* backend: null_blk nr_devices=1 irqmode=2 completion_nsec=280000
27
* 8 vCPUs pinned to isolated physical cores
28
* Emulator and iothread also pinned to separate isolated cores
29
* variance between runs < 1%
30
31
- Not patched
32
* numjobs=1: lat_avg=327.32 irqs=29998
33
* numjobs=4: lat_avg=337.89 irqs=29073
34
* numjobs=8: lat_avg=342.98 irqs=28643
35
36
- Patched:
37
* numjobs=1: lat_avg=323.92 irqs=30262
38
* numjobs=4: lat_avg=332.65 irqs=29520
39
* numjobs=8: lat_avg=335.54 irqs=29323
40
41
Signed-off-by: Sergio Lopez <slp@redhat.com>
42
Message-id: 20180307114459.26636-1-slp@redhat.com
43
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
44
---
6
---
45
hw/block/dataplane/virtio-blk.c | 15 +++++++++++++--
7
block/io.c | 2 ++
46
1 file changed, 13 insertions(+), 2 deletions(-)
8
1 file changed, 2 insertions(+)
47
9
48
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
10
diff --git a/block/io.c b/block/io.c
49
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
50
--- a/hw/block/dataplane/virtio-blk.c
12
--- a/block/io.c
51
+++ b/hw/block/dataplane/virtio-blk.c
13
+++ b/block/io.c
52
@@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane {
14
@@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
53
VirtIODevice *vdev;
15
54
QEMUBH *bh; /* bh for guest notification */
16
static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
55
unsigned long *batch_notify_vqs;
56
+ bool batch_notifications;
57
58
/* Note that these EventNotifiers are assigned by value. This is
59
* fine as long as you do not call event_notifier_cleanup on them
60
@@ -XXX,XX +XXX,XX @@ struct VirtIOBlockDataPlane {
61
/* Raise an interrupt to signal guest, if necessary */
62
void virtio_blk_data_plane_notify(VirtIOBlockDataPlane *s, VirtQueue *vq)
63
{
17
{
64
- set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
18
+ dst->pdiscard_alignment = MAX(dst->pdiscard_alignment,
65
- qemu_bh_schedule(s->bh);
19
+ src->pdiscard_alignment);
66
+ if (s->batch_notifications) {
20
dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer);
67
+ set_bit(virtio_get_queue_index(vq), s->batch_notify_vqs);
21
dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer);
68
+ qemu_bh_schedule(s->bh);
22
dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer,
69
+ } else {
70
+ virtio_notify_irqfd(s->vdev, vq);
71
+ }
72
}
73
74
static void notify_guest_bh(void *opaque)
75
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
76
77
s->starting = true;
78
79
+ if (!virtio_vdev_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX)) {
80
+ s->batch_notifications = true;
81
+ } else {
82
+ s->batch_notifications = false;
83
+ }
84
+
85
/* Set up guest notifier (irq) */
86
r = k->set_guest_notifiers(qbus->parent, nvqs, true);
87
if (r != 0) {
88
--
23
--
89
2.14.3
24
2.31.1
90
25
91
diff view generated by jsdifflib