1
The following changes since commit fe8ee082db5038a05dbd8872e946049e9a9c550e:
1
The following changes since commit 3521ade3510eb5cefb2e27a101667f25dad89935:
2
2
3
Merge remote-tracking branch 'remotes/armbru/tags/pull-qapi-2017-02-22' into staging (2017-02-24 15:00:51 +0000)
3
Merge remote-tracking branch 'remotes/thuth-gitlab/tags/pull-request-2021-07-29' into staging (2021-07-29 13:17:20 +0100)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/codyprime/qemu-kvm-jtc.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 1d393bdeae22fde2cb83c1ea719675747c85c40e:
9
for you to fetch changes up to cc8eecd7f105a1dff5876adeb238a14696061a4a:
10
10
11
RBD: Add support readv,writev for rbd (2017-02-24 12:43:01 -0500)
11
MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver (2021-07-29 17:17:34 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches for 2.9
14
Pull request
15
16
The main fix here is for io_uring. Spurious -EAGAIN errors can happen and the
17
request needs to be resubmitted.
18
19
The MAINTAINERS changes carry no risk and we might as well include them in QEMU
20
6.1.
21
15
----------------------------------------------------------------
22
----------------------------------------------------------------
16
23
17
Peter Lieven (2):
24
Fabian Ebner (1):
18
block/nfs: convert to preadv / pwritev
25
block/io_uring: resubmit when result is -EAGAIN
19
block/nfs: try to avoid the bounce buffer in pwritev
20
26
21
tianqing (1):
27
Philippe Mathieu-Daudé (1):
22
RBD: Add support readv,writev for rbd
28
MAINTAINERS: Added myself as a reviewer for the NVMe Block Driver
23
29
24
block/nfs.c | 62 +++++++++++++++++++++++++----------------------
30
Stefano Garzarella (1):
25
block/rbd.c | 80 ++++++++++++++++++++++++++++++++++++++++++-------------------
31
MAINTAINERS: add Stefano Garzarella as io_uring reviewer
26
2 files changed, 90 insertions(+), 52 deletions(-)
32
33
MAINTAINERS | 2 ++
34
block/io_uring.c | 16 +++++++++++++++-
35
2 files changed, 17 insertions(+), 1 deletion(-)
27
36
28
--
37
--
29
2.9.3
38
2.31.1
30
39
31
diff view generated by jsdifflib
1
From: tianqing <tianqing@unitedstack.com>
1
From: Stefano Garzarella <sgarzare@redhat.com>
2
2
3
Rbd can do readv and writev directly, so wo do not need to transform
3
I've been working with io_uring for a while so I'd like to help
4
iov to buf or vice versa any more.
4
with reviews.
5
5
6
Signed-off-by: tianqing <tianqing@unitedstack.com>
6
Signed-off-by: Stefano Garzarella <sgarzare@redhat.com>
7
Reviewed-by: Jeff Cody <jcody@redhat.com>
7
Message-Id: <20210728131515.131045-1-sgarzare@redhat.com>
8
Signed-off-by: Jeff Cody <jcody@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
9
---
10
block/rbd.c | 80 ++++++++++++++++++++++++++++++++++++++++++-------------------
10
MAINTAINERS | 1 +
11
1 file changed, 56 insertions(+), 24 deletions(-)
11
1 file changed, 1 insertion(+)
12
12
13
diff --git a/block/rbd.c b/block/rbd.c
13
diff --git a/MAINTAINERS b/MAINTAINERS
14
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
15
--- a/block/rbd.c
15
--- a/MAINTAINERS
16
+++ b/block/rbd.c
16
+++ b/MAINTAINERS
17
@@ -XXX,XX +XXX,XX @@
17
@@ -XXX,XX +XXX,XX @@ Linux io_uring
18
#define RBD_MAX_SNAP_NAME_SIZE 128
18
M: Aarushi Mehta <mehta.aaru20@gmail.com>
19
#define RBD_MAX_SNAPS 100
19
M: Julia Suvorova <jusual@redhat.com>
20
20
M: Stefan Hajnoczi <stefanha@redhat.com>
21
+/* The LIBRBD_SUPPORTS_IOVEC is defined in librbd.h */
21
+R: Stefano Garzarella <sgarzare@redhat.com>
22
+#ifdef LIBRBD_SUPPORTS_IOVEC
22
L: qemu-block@nongnu.org
23
+#define LIBRBD_USE_IOVEC 1
23
S: Maintained
24
+#else
24
F: block/io_uring.c
25
+#define LIBRBD_USE_IOVEC 0
26
+#endif
27
+
28
typedef enum {
29
RBD_AIO_READ,
30
RBD_AIO_WRITE,
31
@@ -XXX,XX +XXX,XX @@ static int qemu_rbd_set_conf(rados_t cluster, const char *conf,
32
return ret;
33
}
34
35
+static void qemu_rbd_memset(RADOSCB *rcb, int64_t offs)
36
+{
37
+ if (LIBRBD_USE_IOVEC) {
38
+ RBDAIOCB *acb = rcb->acb;
39
+ iov_memset(acb->qiov->iov, acb->qiov->niov, offs, 0,
40
+ acb->qiov->size - offs);
41
+ } else {
42
+ memset(rcb->buf + offs, 0, rcb->size - offs);
43
+ }
44
+}
45
+
46
static int qemu_rbd_create(const char *filename, QemuOpts *opts, Error **errp)
47
{
48
Error *local_err = NULL;
49
@@ -XXX,XX +XXX,XX @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
50
}
51
} else {
52
if (r < 0) {
53
- memset(rcb->buf, 0, rcb->size);
54
+ qemu_rbd_memset(rcb, 0);
55
acb->ret = r;
56
acb->error = 1;
57
} else if (r < rcb->size) {
58
- memset(rcb->buf + r, 0, rcb->size - r);
59
+ qemu_rbd_memset(rcb, r);
60
if (!acb->error) {
61
acb->ret = rcb->size;
62
}
63
@@ -XXX,XX +XXX,XX @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
64
65
g_free(rcb);
66
67
- if (acb->cmd == RBD_AIO_READ) {
68
- qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
69
+ if (!LIBRBD_USE_IOVEC) {
70
+ if (acb->cmd == RBD_AIO_READ) {
71
+ qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
72
+ }
73
+ qemu_vfree(acb->bounce);
74
}
75
- qemu_vfree(acb->bounce);
76
+
77
acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
78
79
qemu_aio_unref(acb);
80
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
81
RBDAIOCB *acb;
82
RADOSCB *rcb = NULL;
83
rbd_completion_t c;
84
- char *buf;
85
int r;
86
87
BDRVRBDState *s = bs->opaque;
88
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
89
acb->cmd = cmd;
90
acb->qiov = qiov;
91
assert(!qiov || qiov->size == size);
92
- if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
93
- acb->bounce = NULL;
94
- } else {
95
- acb->bounce = qemu_try_blockalign(bs, qiov->size);
96
- if (acb->bounce == NULL) {
97
- goto failed;
98
+
99
+ rcb = g_new(RADOSCB, 1);
100
+
101
+ if (!LIBRBD_USE_IOVEC) {
102
+ if (cmd == RBD_AIO_DISCARD || cmd == RBD_AIO_FLUSH) {
103
+ acb->bounce = NULL;
104
+ } else {
105
+ acb->bounce = qemu_try_blockalign(bs, qiov->size);
106
+ if (acb->bounce == NULL) {
107
+ goto failed;
108
+ }
109
}
110
+ if (cmd == RBD_AIO_WRITE) {
111
+ qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
112
+ }
113
+ rcb->buf = acb->bounce;
114
}
115
+
116
acb->ret = 0;
117
acb->error = 0;
118
acb->s = s;
119
120
- if (cmd == RBD_AIO_WRITE) {
121
- qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
122
- }
123
-
124
- buf = acb->bounce;
125
-
126
- rcb = g_new(RADOSCB, 1);
127
rcb->acb = acb;
128
- rcb->buf = buf;
129
rcb->s = acb->s;
130
rcb->size = size;
131
r = rbd_aio_create_completion(rcb, (rbd_callback_t) rbd_finish_aiocb, &c);
132
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
133
134
switch (cmd) {
135
case RBD_AIO_WRITE:
136
- r = rbd_aio_write(s->image, off, size, buf, c);
137
+#ifdef LIBRBD_SUPPORTS_IOVEC
138
+ r = rbd_aio_writev(s->image, qiov->iov, qiov->niov, off, c);
139
+#else
140
+ r = rbd_aio_write(s->image, off, size, rcb->buf, c);
141
+#endif
142
break;
143
case RBD_AIO_READ:
144
- r = rbd_aio_read(s->image, off, size, buf, c);
145
+#ifdef LIBRBD_SUPPORTS_IOVEC
146
+ r = rbd_aio_readv(s->image, qiov->iov, qiov->niov, off, c);
147
+#else
148
+ r = rbd_aio_read(s->image, off, size, rcb->buf, c);
149
+#endif
150
break;
151
case RBD_AIO_DISCARD:
152
r = rbd_aio_discard_wrapper(s->image, off, size, c);
153
@@ -XXX,XX +XXX,XX @@ static BlockAIOCB *rbd_start_aio(BlockDriverState *bs,
154
if (r < 0) {
155
goto failed_completion;
156
}
157
-
158
return &acb->common;
159
160
failed_completion:
161
rbd_aio_release(c);
162
failed:
163
g_free(rcb);
164
- qemu_vfree(acb->bounce);
165
+ if (!LIBRBD_USE_IOVEC) {
166
+ qemu_vfree(acb->bounce);
167
+ }
168
+
169
qemu_aio_unref(acb);
170
return NULL;
171
}
172
--
25
--
173
2.9.3
26
2.31.1
174
27
175
diff view generated by jsdifflib
1
From: Peter Lieven <pl@kamp.de>
1
From: Fabian Ebner <f.ebner@proxmox.com>
2
2
3
if the passed qiov contains exactly one iov we can
3
Linux SCSI can throw spurious -EAGAIN in some corner cases in its
4
pass the buffer directly.
4
completion path, which will end up being the result in the completed
5
io_uring request.
5
6
6
Signed-off-by: Peter Lieven <pl@kamp.de>
7
Resubmitting such requests should allow block jobs to complete, even
7
Reviewed-by: Jeff Cody <jcody@redhat.com>
8
if such spurious errors are encountered.
8
Message-id: 1487349541-10201-3-git-send-email-pl@kamp.de
9
9
Signed-off-by: Jeff Cody <jcody@redhat.com>
10
Co-authored-by: Stefan Hajnoczi <stefanha@gmail.com>
11
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
12
Signed-off-by: Fabian Ebner <f.ebner@proxmox.com>
13
Message-id: 20210729091029.65369-1-f.ebner@proxmox.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
15
---
11
block/nfs.c | 35 ++++++++++++++++++++++-------------
16
block/io_uring.c | 16 +++++++++++++++-
12
1 file changed, 22 insertions(+), 13 deletions(-)
17
1 file changed, 15 insertions(+), 1 deletion(-)
13
18
14
diff --git a/block/nfs.c b/block/nfs.c
19
diff --git a/block/io_uring.c b/block/io_uring.c
15
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
16
--- a/block/nfs.c
21
--- a/block/io_uring.c
17
+++ b/block/nfs.c
22
+++ b/block/io_uring.c
18
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
23
@@ -XXX,XX +XXX,XX @@ static void luring_process_completions(LuringState *s)
19
NFSClient *client = bs->opaque;
24
total_bytes = ret + luringcb->total_read;
20
NFSRPC task;
25
21
char *buf = NULL;
26
if (ret < 0) {
22
+ bool my_buffer = false;
27
- if (ret == -EINTR) {
23
28
+ /*
24
nfs_co_init_task(bs, &task);
29
+ * Only writev/readv/fsync requests on regular files or host block
25
30
+ * devices are submitted. Therefore -EAGAIN is not expected but it's
26
- buf = g_try_malloc(bytes);
31
+ * known to happen sometimes with Linux SCSI. Submit again and hope
27
- if (bytes && buf == NULL) {
32
+ * the request completes successfully.
28
- return -ENOMEM;
33
+ *
29
+ if (iov->niov != 1) {
34
+ * For more information, see:
30
+ buf = g_try_malloc(bytes);
35
+ * https://lore.kernel.org/io-uring/20210727165811.284510-3-axboe@kernel.dk/T/#u
31
+ if (bytes && buf == NULL) {
36
+ *
32
+ return -ENOMEM;
37
+ * If the code is changed to submit other types of requests in the
33
+ }
38
+ * future, then this workaround may need to be extended to deal with
34
+ qemu_iovec_to_buf(iov, 0, buf, bytes);
39
+ * genuine -EAGAIN results that should not be resubmitted
35
+ my_buffer = true;
40
+ * immediately.
36
+ } else {
41
+ */
37
+ buf = iov->iov[0].iov_base;
42
+ if (ret == -EINTR || ret == -EAGAIN) {
38
}
43
luring_resubmit(s, luringcb);
39
44
continue;
40
- qemu_iovec_to_buf(iov, 0, buf, bytes);
45
}
41
-
42
if (nfs_pwrite_async(client->context, client->fh,
43
offset, bytes, buf,
44
nfs_co_generic_cb, &task) != 0) {
45
+ if (my_buffer) {
46
+ g_free(buf);
47
+ }
48
+ return -ENOMEM;
49
+ }
50
+
51
+ nfs_set_events(client);
52
+ while (!task.complete) {
53
+ qemu_coroutine_yield();
54
+ }
55
+
56
+ if (my_buffer) {
57
g_free(buf);
58
- return -ENOMEM;
59
}
60
61
- nfs_set_events(client);
62
- while (!task.complete) {
63
- qemu_coroutine_yield();
64
- }
65
-
66
- g_free(buf);
67
-
68
if (task.ret != bytes) {
69
return task.ret < 0 ? task.ret : -EIO;
70
}
71
--
46
--
72
2.9.3
47
2.31.1
73
48
74
diff view generated by jsdifflib
1
From: Peter Lieven <pl@kamp.de>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
Signed-off-by: Peter Lieven <pl@kamp.de>
3
I'm interested in following the activity around the NVMe bdrv.
4
Reviewed-by: Jeff Cody <jcody@redhat.com>
4
5
Message-id: 1487349541-10201-2-git-send-email-pl@kamp.de
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Jeff Cody <jcody@redhat.com>
6
Message-id: 20210728183340.2018313-1-philmd@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
8
---
8
block/nfs.c | 33 +++++++++++++++------------------
9
MAINTAINERS | 1 +
9
1 file changed, 15 insertions(+), 18 deletions(-)
10
1 file changed, 1 insertion(+)
10
11
11
diff --git a/block/nfs.c b/block/nfs.c
12
diff --git a/MAINTAINERS b/MAINTAINERS
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/block/nfs.c
14
--- a/MAINTAINERS
14
+++ b/block/nfs.c
15
+++ b/MAINTAINERS
15
@@ -XXX,XX +XXX,XX @@ nfs_co_generic_cb(int ret, struct nfs_context *nfs, void *data,
16
@@ -XXX,XX +XXX,XX @@ F: block/null.c
16
nfs_co_generic_bh_cb, task);
17
NVMe Block Driver
17
}
18
M: Stefan Hajnoczi <stefanha@redhat.com>
18
19
R: Fam Zheng <fam@euphon.net>
19
-static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
20
+R: Philippe Mathieu-Daudé <philmd@redhat.com>
20
- int64_t sector_num, int nb_sectors,
21
L: qemu-block@nongnu.org
21
- QEMUIOVector *iov)
22
S: Supported
22
+static int coroutine_fn nfs_co_preadv(BlockDriverState *bs, uint64_t offset,
23
F: block/nvme*
23
+ uint64_t bytes, QEMUIOVector *iov,
24
+ int flags)
25
{
26
NFSClient *client = bs->opaque;
27
NFSRPC task;
28
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
29
task.iov = iov;
30
31
if (nfs_pread_async(client->context, client->fh,
32
- sector_num * BDRV_SECTOR_SIZE,
33
- nb_sectors * BDRV_SECTOR_SIZE,
34
- nfs_co_generic_cb, &task) != 0) {
35
+ offset, bytes, nfs_co_generic_cb, &task) != 0) {
36
return -ENOMEM;
37
}
38
39
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nfs_co_readv(BlockDriverState *bs,
40
return 0;
41
}
42
43
-static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
44
- int64_t sector_num, int nb_sectors,
45
- QEMUIOVector *iov)
46
+static int coroutine_fn nfs_co_pwritev(BlockDriverState *bs, uint64_t offset,
47
+ uint64_t bytes, QEMUIOVector *iov,
48
+ int flags)
49
{
50
NFSClient *client = bs->opaque;
51
NFSRPC task;
52
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
53
54
nfs_co_init_task(bs, &task);
55
56
- buf = g_try_malloc(nb_sectors * BDRV_SECTOR_SIZE);
57
- if (nb_sectors && buf == NULL) {
58
+ buf = g_try_malloc(bytes);
59
+ if (bytes && buf == NULL) {
60
return -ENOMEM;
61
}
62
63
- qemu_iovec_to_buf(iov, 0, buf, nb_sectors * BDRV_SECTOR_SIZE);
64
+ qemu_iovec_to_buf(iov, 0, buf, bytes);
65
66
if (nfs_pwrite_async(client->context, client->fh,
67
- sector_num * BDRV_SECTOR_SIZE,
68
- nb_sectors * BDRV_SECTOR_SIZE,
69
- buf, nfs_co_generic_cb, &task) != 0) {
70
+ offset, bytes, buf,
71
+ nfs_co_generic_cb, &task) != 0) {
72
g_free(buf);
73
return -ENOMEM;
74
}
75
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn nfs_co_writev(BlockDriverState *bs,
76
77
g_free(buf);
78
79
- if (task.ret != nb_sectors * BDRV_SECTOR_SIZE) {
80
+ if (task.ret != bytes) {
81
return task.ret < 0 ? task.ret : -EIO;
82
}
83
84
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nfs = {
85
.bdrv_create = nfs_file_create,
86
.bdrv_reopen_prepare = nfs_reopen_prepare,
87
88
- .bdrv_co_readv = nfs_co_readv,
89
- .bdrv_co_writev = nfs_co_writev,
90
+ .bdrv_co_preadv = nfs_co_preadv,
91
+ .bdrv_co_pwritev = nfs_co_pwritev,
92
.bdrv_co_flush_to_disk = nfs_co_flush,
93
94
.bdrv_detach_aio_context = nfs_detach_aio_context,
95
--
24
--
96
2.9.3
25
2.31.1
97
26
98
diff view generated by jsdifflib