The io_uring_prep_readv2/writev2() man pages recommend using the
non-vectored read/write operations when possible for performance
reasons.
I didn't measure a significant difference but it doesn't hurt to have
this optimization in place.
Suggested-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
v5:
- Reduce #ifdef HAVE_IO_URING_PREP_WRITEV2 code duplication [Kevin]
---
block/io_uring.c | 34 ++++++++++++++++++++++++++--------
1 file changed, 26 insertions(+), 8 deletions(-)
diff --git a/block/io_uring.c b/block/io_uring.c
index dd930ee57e..f1514cf024 100644
--- a/block/io_uring.c
+++ b/block/io_uring.c
@@ -46,17 +46,28 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
switch (req->type) {
case QEMU_AIO_WRITE:
-#ifdef HAVE_IO_URING_PREP_WRITEV2
{
int luring_flags = (flags & BDRV_REQ_FUA) ? RWF_DSYNC : 0;
- io_uring_prep_writev2(sqe, fd, qiov->iov,
- qiov->niov, offset, luring_flags);
- }
+ if (luring_flags != 0 || qiov->niov > 1) {
+#ifdef HAVE_IO_URING_PREP_WRITEV2
+ io_uring_prep_writev2(sqe, fd, qiov->iov,
+ qiov->niov, offset, luring_flags);
#else
- assert(flags == 0);
- io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
+ /*
+ * FUA should only be enabled with HAVE_IO_URING_PREP_WRITEV2, see
+ * luring_has_fua().
+ */
+ assert(luring_flags == 0);
+
+ io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
#endif
+ } else {
+ /* The man page says non-vectored is faster than vectored */
+ struct iovec *iov = qiov->iov;
+ io_uring_prep_write(sqe, fd, iov->iov_base, iov->iov_len, offset);
+ }
break;
+ }
case QEMU_AIO_ZONE_APPEND:
io_uring_prep_writev(sqe, fd, qiov->iov, qiov->niov, offset);
break;
@@ -65,8 +76,15 @@ static void luring_prep_sqe(struct io_uring_sqe *sqe, void *opaque)
if (req->resubmit_qiov.iov != NULL) {
qiov = &req->resubmit_qiov;
}
- io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov,
- offset + req->total_read);
+ if (qiov->niov > 1) {
+ io_uring_prep_readv(sqe, fd, qiov->iov, qiov->niov,
+ offset + req->total_read);
+ } else {
+ /* The man page says non-vectored is faster than vectored */
+ struct iovec *iov = qiov->iov;
+ io_uring_prep_read(sqe, fd, iov->iov_base, iov->iov_len,
+ offset + req->total_read);
+ }
break;
}
case QEMU_AIO_FLUSH:
--
2.51.1