1
The following changes since commit fe8ee082db5038a05dbd8872e946049e9a9c550e:
1
The following changes since commit 887cba855bb6ff4775256f7968409281350b568c:
2
2
3
Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-02-22' into staging (2017-02-24 15:00:51 +0000)
3
configure: Fix cross-building for RISCV host (v5) (2023-07-11 17:56:09 +0100)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 1d393bdeae22fde2cb83c1ea719675747c85c40e:
9
for you to fetch changes up to 75dcb4d790bbe5327169fd72b185960ca58e2fa6:
10
10
11
RBD: Add support readv,writev for rbd (2017-02-24 12:43:01 -0500)
11
virtio-blk: fix host notifier issues during dataplane start/stop (2023-07-12 15:20:32 -0400)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches for 2.9
14
Pull request
15
15
----------------------------------------------------------------
16
----------------------------------------------------------------
16
17
17
Peter Lieven (2):
18
Stefan Hajnoczi (1):
18
block/nfs: convert to preadv / pwritev
19
virtio-blk: fix host notifier issues during dataplane start/stop
19
block/nfs: try to avoid the bounce buffer in pwritev
20
20
21
tianqing (1):
21
hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++--------------
22
RBD: Add support readv,writev for rbd
22
1 file changed, 38 insertions(+), 29 deletions(-)
23
24
block/nfs.c | 62 +++++++++++++++++++++++++----------------------
25
block/rbd.c | 80 ++++++++++++++++++++++++++++++++++++++++++-------------------
26
2 files changed, 90 insertions(+), 52 deletions(-)
27
23
28
--
24
--
29
2.9.3
25
2.40.1
30
31
diff view generated by jsdifflib
Deleted patch
1
From: Peter Lieven <pl@kamp.de>
2
1
3
Signed-off-by: Peter Lieven <pl@kamp.de>
4
Reviewed-by: Jeff Cody <jcody@redhat.com>
5
Message-id: 1487349541-10201-2-git-send-email-pl@kamp.de
6
Signed-off-by: Jeff Cody <jcody@redhat.com>
7
---
8
block/nfs.c | 33 +++++++++++++++------------------
9
1 file changed, 15 insertions(+), 18 deletions(-)
10
11
diff --git a/block/nfs.c b/block/nfs.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/block/nfs.c
14
+++ b/block/nfs.c
15
@@ -XXX,XX +XXX,XX @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
16
nfs_co_generic_bh_cb, task);
17
}
18
19
-static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
20
- int64_t sector_num, int nb_sectors,
21
- QEMUIOVector *iov)
22
+static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
23
+ uint64_t bytes, QEMUIOVector *iov,
24
+ int flags)
25
{
26
NFSClient *client = bs->opaque;
27
NFSRPC task;
28
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
29
task.iov = iov;
30
31
if (nfs_pread_async(client->context, client->fh,
32
- sector_num * BDRV_SECTOR_SIZE,
33
- nb_sectors * BDRV_SECTOR_SIZE,
34
- nfs_co_generic_cb, &task) != 0) {
35
+ offset, bytes, nfs_co_generic_cb, &task) != 0) {
36
return -ENOMEM;
37
}
38
39
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
40
return 0;
41
}
42
43
-static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
44
- int64_t sector_num, int nb_sectors,
45
- QEMUIOVector *iov)
46
+static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
47
+ uint64_t bytes, QEMUIOVector *iov,
48
+ int flags)
49
{
50
NFSClient *client = bs->opaque;
51
NFSRPC task;
52
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
53
54
nfs_co_init_task(bs, &task);
55
56
- buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
57
- if (nb_sectors && buf == NULL) {
58
+ buf = g_try_malloc(bytes);
59
+ if (bytes && buf == NULL) {
60
return -ENOMEM;
61
}
62
63
- qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
64
+ qemu_iovec_to_buf(iov, 0, buf, bytes);
65
66
if (nfs_pwrite_async(client->context, client->fh,
67
- sector_num * BDRV_SECTOR_SIZE,
68
- nb_sectors * BDRV_SECTOR_SIZE,
69
- buf, nfs_co_generic_cb, &task) != 0) {
70
+ offset, bytes, buf,
71
+ nfs_co_generic_cb, &task) != 0) {
72
g_free(buf);
73
return -ENOMEM;
74
}
75
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
76
77
g_free(buf);
78
79
- if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
80
+ if (task.ret != bytes) {
81
return task.ret < 0 ? task.ret : -EIO;
82
}
83
84
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nfs = {
85
.bdrv_create = nfs_file_create,
86
.bdrv_reopen_prepare = nfs_reopen_prepare,
87
88
- .bdrv_co_readv = nfs_co_readv,
89
- .bdrv_co_writev = nfs_co_writev,
90
+ .bdrv_co_preadv = nfs_co_preadv,
91
+ .bdrv_co_pwritev = nfs_co_pwritev,
92
.bdrv_co_flush_to_disk = nfs_co_flush,
93
94
.bdrv_detach_aio_context = nfs_detach_aio_context,
95
--
96
2.9.3
97
98
diff view generated by jsdifflib
1
From: Peter Lieven <pl@kamp.de>
1
The main loop thread can consume 100% CPU when using --device
2
virtio-blk-pci,iothread=<iothread>. ppoll() constantly returns but
3
reading virtqueue host notifiers fails with EAGAIN. The file descriptors
4
are stale and remain registered with the AioContext because of bugs in
5
the virtio-blk dataplane start/stop code.
2
6
3
if the passed qiov contains exactly one iov we can
7
The problem is that the dataplane start/stop code involves drain
4
pass the buffer directly.
8
operations, which call virtio_blk_drained_begin() and
9
virtio_blk_drained_end() at points where the host notifier is not
10
operational:
11
- In virtio_blk_data_plane_start(), blk_set_aio_context() drains after
12
vblk->dataplane_started has been set to true but the host notifier has
13
not been attached yet.
14
- In virtio_blk_data_plane_stop(), blk_drain() and blk_set_aio_context()
15
drain after the host notifier has already been detached but with
16
vblk->dataplane_started still set to true.
5
17
6
Signed-off-by: Peter Lieven <pl@kamp.de>
18
I would like to simplify ->ioeventfd_start/stop() to avoid interactions
7
Reviewed-by: Jeff Cody <jcody@redhat.com>
19
with drain entirely, but couldn't find a way to do that. Instead, this
8
Message-id: 1487349541-10201-3-git-send-email-pl@kamp.de
20
patch accepts the fragile nature of the code and reorders it so that
9
Signed-off-by: Jeff Cody <jcody@redhat.com>
21
vblk->dataplane_started is false during drain operations. This way the
22
virtio_blk_drained_begin() and virtio_blk_drained_end() calls don't
23
touch the host notifier. The result is that
24
virtio_blk_data_plane_start() and virtio_blk_data_plane_stop() have
25
complete control over the host notifier and stale file descriptors are
26
no longer left in the AioContext.
27
28
This patch fixes the 100% CPU consumption in the main loop thread and
29
correctly moves host notifier processing to the IOThread.
30
31
Fixes: 1665d9326fd2 ("virtio-blk: implement BlockDevOps->drained_begin()")
32
Reported-by: Lukáš Doktor <ldoktor@redhat.com>
33
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
34
Tested-by: Lukas Doktor <ldoktor@redhat.com>
35
Message-id: 20230704151527.193586-1-stefanha@redhat.com
36
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
37
---
11
block/nfs.c | 35 ++++++++++++++++++++++-------------
38
hw/block/dataplane/virtio-blk.c | 67 +++++++++++++++++++--------------
12
1 file changed, 22 insertions(+), 13 deletions(-)
39
1 file changed, 38 insertions(+), 29 deletions(-)
13
40
14
diff --git a/block/nfs.c b/block/nfs.c
41
diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
15
index XXXXXXX..XXXXXXX 100644
42
index XXXXXXX..XXXXXXX 100644
16
--- a/block/nfs.c
43
--- a/hw/block/dataplane/virtio-blk.c
17
+++ b/block/nfs.c
44
+++ b/hw/block/dataplane/virtio-blk.c
18
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
45
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
19
NFSClient *client = bs->opaque;
46
20
NFSRPC task;
47
memory_region_transaction_commit();
21
char *buf = NULL;
48
22
+ bool my_buffer = false;
49
- /*
23
50
- * These fields are visible to the IOThread so we rely on implicit barriers
24
nfs_co_init_task(bs, &task);
51
- * in aio_context_acquire() on the write side and aio_notify_accept() on
25
52
- * the read side.
26
- buf = g_try_malloc(bytes);
53
- */
27
- if (bytes && buf == NULL) {
54
- s->starting = false;
28
- return -ENOMEM;
55
- vblk->dataplane_started = true;
29
+ if (iov->niov != 1) {
56
trace_virtio_blk_data_plane_start(s);
30
+ buf = g_try_malloc(bytes);
57
31
+ if (bytes && buf == NULL) {
58
old_context = blk_get_aio_context(s->conf->conf.blk);
32
+ return -ENOMEM;
59
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
33
+ }
60
event_notifier_set(virtio_queue_get_host_notifier(vq));
34
+ qemu_iovec_to_buf(iov, 0, buf, bytes);
35
+ my_buffer = true;
36
+ } else {
37
+ buf = iov->iov[0].iov_base;
38
}
61
}
39
62
40
- qemu_iovec_to_buf(iov, 0, buf, bytes);
63
+ /*
41
-
64
+ * These fields must be visible to the IOThread when it processes the
42
if (nfs_pwrite_async(client->context, client->fh,
65
+ * virtqueue, otherwise it will think dataplane has not started yet.
43
offset, bytes, buf,
66
+ *
44
nfs_co_generic_cb, &task) != 0) {
67
+ * Make sure ->dataplane_started is false when blk_set_aio_context() is
45
+ if (my_buffer) {
68
+ * called above so that draining does not cause the host notifier to be
46
+ g_free(buf);
69
+ * detached/attached prematurely.
47
+ }
70
+ */
48
+ return -ENOMEM;
71
+ s->starting = false;
72
+ vblk->dataplane_started = true;
73
+ smp_wmb(); /* paired with aio_notify_accept() on the read side */
74
+
75
/* Get this show started by hooking up our callbacks */
76
if (!blk_in_drain(s->conf->conf.blk)) {
77
aio_context_acquire(s->ctx);
78
@@ -XXX,XX +XXX,XX @@ int virtio_blk_data_plane_start(VirtIODevice *vdev)
79
fail_guest_notifiers:
80
vblk->dataplane_disabled = true;
81
s->starting = false;
82
- vblk->dataplane_started = true;
83
return -ENOSYS;
84
}
85
86
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
87
aio_wait_bh_oneshot(s->ctx, virtio_blk_data_plane_stop_bh, s);
88
}
89
90
+ /*
91
+ * Batch all the host notifiers in a single transaction to avoid
92
+ * quadratic time complexity in address_space_update_ioeventfds().
93
+ */
94
+ memory_region_transaction_begin();
95
+
96
+ for (i = 0; i < nvqs; i++) {
97
+ virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
49
+ }
98
+ }
50
+
99
+
51
+ nfs_set_events(client);
100
+ /*
52
+ while (!task.complete) {
101
+ * The transaction expects the ioeventfds to be open when it
53
+ qemu_coroutine_yield();
102
+ * commits. Do it now, before the cleanup loop.
103
+ */
104
+ memory_region_transaction_commit();
105
+
106
+ for (i = 0; i < nvqs; i++) {
107
+ virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
54
+ }
108
+ }
55
+
109
+
56
+ if (my_buffer) {
110
+ /*
57
g_free(buf);
111
+ * Set ->dataplane_started to false before draining so that host notifiers
58
- return -ENOMEM;
112
+ * are not detached/attached anymore.
59
}
113
+ */
60
114
+ vblk->dataplane_started = false;
61
- nfs_set_events(client);
115
+
62
- while (!task.complete) {
116
aio_context_acquire(s->ctx);
63
- qemu_coroutine_yield();
117
118
/* Wait for virtio_blk_dma_restart_bh() and in flight I/O to complete */
119
@@ -XXX,XX +XXX,XX @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev)
120
121
aio_context_release(s->ctx);
122
123
- /*
124
- * Batch all the host notifiers in a single transaction to avoid
125
- * quadratic time complexity in address_space_update_ioeventfds().
126
- */
127
- memory_region_transaction_begin();
128
-
129
- for (i = 0; i < nvqs; i++) {
130
- virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false);
64
- }
131
- }
65
-
132
-
66
- g_free(buf);
133
- /*
134
- * The transaction expects the ioeventfds to be open when it
135
- * commits. Do it now, before the cleanup loop.
136
- */
137
- memory_region_transaction_commit();
67
-
138
-
68
if (task.ret != bytes) {
139
- for (i = 0; i < nvqs; i++) {
69
return task.ret < 0 ? task.ret : -EIO;
140
- virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i);
70
}
141
- }
142
-
143
qemu_bh_cancel(s->bh);
144
notify_guest_bh(s); /* final chance to notify guest */
145
146
/* Clean up guest notifier (irq) */
147
k->set_guest_notifiers(qbus->parent, nvqs, false);
148
149
- vblk->dataplane_started = false;
150
s->stopping = false;
151
}
71
--
152
--
72
2.9.3
153
2.40.1
73
154
74
155
diff view generated by jsdifflib
Deleted patch
1
From: tianqing <tianqing@unitedstack.com>
2
1
3
Rbd can do readv and writev directly, so wo do not need to transform
4
iov to buf or vice versa any more.
5
6
Signed-off-by: tianqing <tianqing@unitedstack.com>
7
Reviewed-by: Jeff Cody <jcody@redhat.com>
8
Signed-off-by: Jeff Cody <jcody@redhat.com>
9
---
10
block/rbd.c | 80 ++++++++++++++++++++++++++++++++++++++++++-------------------
11
1 file changed, 56 insertions(+), 24 deletions(-)
12
13
diff --git a/block/rbd.c b/block/rbd.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/block/rbd.c
16
+++ b/block/rbd.c
17
@@ -XXX,XX +XXX,XX @@
18
#define RBD_MAX_SNAP_NAME_SIZE 128
19
#define RBD_MAX_SNAPS 100
20
21
+/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
22
+#ifdef LIBRBD_SUPPORTS_IOVEC
23
+#define LIBRBD_USE_IOVEC 1
24
+#else
25
+#define LIBRBD_USE_IOVEC 0
26
+#endif
27
+
28
typedef enum {
29
RBD_AIO_READ,
30
RBD_AIO_WRITE,
31
@@ -XXX,XX +XXX,XX @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
32
return ret;
33
}
34
35
+static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
36
+{
37
+ if (LIBRBD_USE_IOVEC) {
38
+ RBDAIOCB *acb = rcb->acb;
39
+ iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
40
+ acb->qiov->size - offs);
41
+ } else {
42
+ memset(rcb->buf + offs, 0, rcb->size - offs);
43
+ }
44
+}
45
+
46
static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
47
{
48
Error *local_err = NULL;
49
@@ -XXX,XX +XXX,XX @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
50
}
51
} else {
52
if (r < 0) {
53
- memset(rcb->buf, 0, rcb->size);
54
+ qemu_rbd_memset(rcb, 0);
55
acb->ret = r;
56
acb->error = 1;
57
} else if (r < rcb->size) {
58
- memset(rcb->buf + r, 0, rcb->size - r);
59
+ qemu_rbd_memset(rcb, r);
60
if (!acb->error) {
61
acb->ret = rcb->size;
62
}
63
@@ -XXX,XX +XXX,XX @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
64
65
g_free(rcb);
66
67
- if (acb->cmd == RBD_AIO_READ) {
68
- qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
69
+ if (!LIBRBD_USE_IOVEC) {
70
+ if (acb->cmd == RBD_AIO_READ) {
71
+ qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
72
+ }
73
+ qemu_vfree(acb->bounce);
74
}
75
- qemu_vfree(acb->bounce);
76
+
77
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
78
79
qemu_aio_unref(acb);
80
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
81
RBDAIOCB *acb;
82
RADOSCB *rcb = NULL;
83
rbd_completion_t c;
84
- char *buf;
85
int r;
86
87
BDRVRBDState *s = bs->opaque;
88
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
89
acb->cmd = cmd;
90
acb->qiov = qiov;
91
assert(!qiov || qiov->size == size);
92
- if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
93
- acb->bounce = NULL;
94
- } else {
95
- acb->bounce = qemu_try_blockalign(bs, qiov->size);
96
- if (acb->bounce == NULL) {
97
- goto failed;
98
+
99
+ rcb = g_new(RADOSCB, 1);
100
+
101
+ if (!LIBRBD_USE_IOVEC) {
102
+ if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
103
+ acb->bounce = NULL;
104
+ } else {
105
+ acb->bounce = qemu_try_blockalign(bs, qiov->size);
106
+ if (acb->bounce == NULL) {
107
+ goto failed;
108
+ }
109
}
110
+ if (cmd == RBD_AIO_WRITE) {
111
+ qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
112
+ }
113
+ rcb->buf = acb->bounce;
114
}
115
+
116
acb->ret = 0;
117
acb->error = 0;
118
acb->s = s;
119
120
- if (cmd == RBD_AIO_WRITE) {
121
- qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
122
- }
123
-
124
- buf = acb->bounce;
125
-
126
- rcb = g_new(RADOSCB, 1);
127
rcb->acb = acb;
128
- rcb->buf = buf;
129
rcb->s = acb->s;
130
rcb->size = size;
131
r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
132
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
133
134
switch (cmd) {
135
case RBD_AIO_WRITE:
136
- r = rbd_aio_write(s->image, off, size, buf, c);
137
+#ifdef LIBRBD_SUPPORTS_IOVEC
138
+ r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
139
+#else
140
+ r = rbd_aio_write(s->image, off, size, rcb->buf, c);
141
+#endif
142
break;
143
case RBD_AIO_READ:
144
- r = rbd_aio_read(s->image, off, size, buf, c);
145
+#ifdef LIBRBD_SUPPORTS_IOVEC
146
+ r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
147
+#else
148
+ r = rbd_aio_read(s->image, off, size, rcb->buf, c);
149
+#endif
150
break;
151
case RBD_AIO_DISCARD:
152
r = rbd_aio_discard_wrapper(s->image, off, size, c);
153
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
154
if (r < 0) {
155
goto failed_completion;
156
}
157
-
158
return &acb->common;
159
160
failed_completion:
161
rbd_aio_release(c);
162
failed:
163
g_free(rcb);
164
- qemu_vfree(acb->bounce);
165
+ if (!LIBRBD_USE_IOVEC) {
166
+ qemu_vfree(acb->bounce);
167
+ }
168
+
169
qemu_aio_unref(acb);
170
return NULL;
171
}
172
--
173
2.9.3
174
175
diff view generated by jsdifflib