1
The following changes since commit 928173659d6e5dc368284f73f90ea1d129e1f57d:
1
The following changes since commit 661c2e1ab29cd9c4d268ae3f44712e8d421c0e56:
2
2
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200130' into staging (2020-01-30 16:19:04 +0000)
3
scripts/checkpatch: Fix a typo (2025-03-04 09:30:26 +0800)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/stefanha/qemu.git tags/block-pull-request
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 8dff69b9415b4287e900358744b732195e1ab2e2:
9
for you to fetch changes up to 2ad638a3d160923ef3dbf87c73944e6e44bdc724:
10
10
11
tests/qemu-iotests: use AIOMODE with various tests (2020-01-30 21:01:40 +0000)
11
block/qed: fix use-after-free by nullifying timer pointer after free (2025-03-06 10:19:54 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Pull request
15
15
16
QED need_check_timer use-after-free fix
17
16
----------------------------------------------------------------
18
----------------------------------------------------------------
17
19
18
Aarushi Mehta (15):
20
Denis Rastyogin (1):
19
configure: permit use of io_uring
21
block/qed: fix use-after-free by nullifying timer pointer after free
20
qapi/block-core: add option for io_uring
21
block/block: add BDRV flag for io_uring
22
block/io_uring: implements interfaces for io_uring
23
stubs: add stubs for io_uring interface
24
util/async: add aio interfaces for io_uring
25
blockdev: adds bdrv_parse_aio to use io_uring
26
block/file-posix.c: extend to use io_uring
27
block: add trace events for io_uring
28
block/io_uring: adds userspace completion polling
29
qemu-io: adds option to use aio engine
30
qemu-img: adds option to use aio engine for benchmarking
31
qemu-nbd: adds option for aio engines
32
tests/qemu-iotests: enable testing with aio options
33
tests/qemu-iotests: use AIOMODE with various tests
34
22
35
Paolo Bonzini (3):
23
block/qed.c | 1 +
36
block: eliminate BDRV_REQ_NO_SERIALISING
24
1 file changed, 1 insertion(+)
37
block/io: wait for serialising requests when a request becomes
38
serialising
39
block/io: take bs->reqs_lock in bdrv_mark_request_serialising
40
41
MAINTAINERS | 9 +
42
block.c | 22 ++
43
block/Makefile.objs | 3 +
44
block/file-posix.c | 99 ++++++--
45
block/io.c | 162 +++++++------
46
block/io_uring.c | 433 ++++++++++++++++++++++++++++++++++
47
block/trace-events | 12 +
48
blockdev.c | 12 +-
49
configure | 27 +++
50
docs/interop/qemu-nbd.rst | 4 +-
51
include/block/aio.h | 16 +-
52
include/block/block.h | 14 +-
53
include/block/block_int.h | 3 +-
54
include/block/raw-aio.h | 12 +
55
qapi/block-core.json | 4 +-
56
qemu-img-cmds.hx | 4 +-
57
qemu-img.c | 11 +-
58
qemu-img.texi | 5 +-
59
qemu-io.c | 25 +-
60
qemu-nbd.c | 12 +-
61
stubs/Makefile.objs | 1 +
62
stubs/io_uring.c | 32 +++
63
tests/qemu-iotests/028 | 2 +-
64
tests/qemu-iotests/058 | 2 +-
65
tests/qemu-iotests/089 | 4 +-
66
tests/qemu-iotests/091 | 4 +-
67
tests/qemu-iotests/109 | 2 +-
68
tests/qemu-iotests/147 | 5 +-
69
tests/qemu-iotests/181 | 8 +-
70
tests/qemu-iotests/183 | 4 +-
71
tests/qemu-iotests/185 | 10 +-
72
tests/qemu-iotests/200 | 2 +-
73
tests/qemu-iotests/201 | 8 +-
74
tests/qemu-iotests/check | 15 +-
75
tests/qemu-iotests/common.rc | 14 ++
76
tests/qemu-iotests/iotests.py | 12 +-
77
util/async.c | 36 +++
78
37 files changed, 878 insertions(+), 172 deletions(-)
79
create mode 100644 block/io_uring.c
80
create mode 100644 stubs/io_uring.c
81
25
82
--
26
--
83
2.24.1
27
2.48.1
84
85
diff view generated by jsdifflib
Deleted patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
1
3
It is unused since commit 00e30f0 ("block/backup: use backup-top instead
4
of write notifiers", 2019-10-01), drop it to simplify the code.
5
6
While at it, drop redundant assertions on flags.
7
8
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
9
Message-id: 1578495356-46219-2-git-send-email-pbonzini@redhat.com
10
Message-Id: <1578495356-46219-2-git-send-email-pbonzini@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
block/io.c | 18 ++++--------------
14
include/block/block.h | 12 ------------
15
2 files changed, 4 insertions(+), 26 deletions(-)
16
17
diff --git a/block/io.c b/block/io.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/block/io.c
20
+++ b/block/io.c
21
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
22
* potential fallback support, if we ever implement any read flags
23
* to pass through to drivers. For now, there aren't any
24
* passthrough flags. */
25
- assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ |
26
- BDRV_REQ_PREFETCH)));
27
+ assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH)));
28
29
/* Handle Copy on Read and associated serialisation */
30
if (flags & BDRV_REQ_COPY_ON_READ) {
31
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
32
bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
33
}
34
35
- /* BDRV_REQ_SERIALISING is only for write operation */
36
- assert(!(flags & BDRV_REQ_SERIALISING));
37
-
38
- if (!(flags & BDRV_REQ_NO_SERIALISING)) {
39
- bdrv_wait_serialising_requests(req);
40
- }
41
+ bdrv_wait_serialising_requests(req);
42
43
if (flags & BDRV_REQ_COPY_ON_READ) {
44
int64_t pnum;
45
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
46
bdrv_inc_in_flight(bs);
47
48
/* Don't do copy-on-read if we read data before write operation */
49
- if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) {
50
+ if (atomic_read(&bs->copy_on_read)) {
51
flags |= BDRV_REQ_COPY_ON_READ;
52
}
53
54
@@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
55
return -EPERM;
56
}
57
58
- /* BDRV_REQ_NO_SERIALISING is only for read operation */
59
- assert(!(flags & BDRV_REQ_NO_SERIALISING));
60
assert(!(bs->open_flags & BDRV_O_INACTIVE));
61
assert((bs->open_flags & BDRV_O_NO_IO) == 0);
62
assert(!(flags & ~BDRV_REQ_MASK));
63
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(
64
65
/* BDRV_REQ_SERIALISING is only for write operation */
66
assert(!(read_flags & BDRV_REQ_SERIALISING));
67
- if (!(read_flags & BDRV_REQ_NO_SERIALISING)) {
68
- bdrv_wait_serialising_requests(&req);
69
- }
70
+ bdrv_wait_serialising_requests(&req);
71
72
ret = src->bs->drv->bdrv_co_copy_range_from(src->bs,
73
src, src_offset,
74
diff --git a/include/block/block.h b/include/block/block.h
75
index XXXXXXX..XXXXXXX 100644
76
--- a/include/block/block.h
77
+++ b/include/block/block.h
78
@@ -XXX,XX +XXX,XX @@ typedef enum {
79
*/
80
BDRV_REQ_MAY_UNMAP = 0x4,
81
82
- /*
83
- * The BDRV_REQ_NO_SERIALISING flag is only valid for reads and means that
84
- * we don't want wait_serialising_requests() during the read operation.
85
- *
86
- * This flag is used for backup copy-on-write operations, when we need to
87
- * read old data before write (write notifier triggered). It is okay since
88
- * we already waited for other serializing requests in the initiating write
89
- * (see bdrv_aligned_pwritev), and it is necessary if the initiating write
90
- * is already serializing (without the flag, the read would deadlock
91
- * waiting for the serialising write to complete).
92
- */
93
- BDRV_REQ_NO_SERIALISING = 0x8,
94
BDRV_REQ_FUA = 0x10,
95
BDRV_REQ_WRITE_COMPRESSED = 0x20,
96
97
--
98
2.24.1
99
100
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Denis Rastyogin <gerben@altlinux.org>
2
2
3
Marking without waiting would not result in actual serialising behavior.
3
This error was discovered by fuzzing qemu-img.
4
Thus, make a call bdrv_mark_request_serialising sufficient for
5
serialisation to happen.
6
4
7
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
5
In the QED block driver, the need_check_timer timer is freed in
8
Message-id: 1578495356-46219-3-git-send-email-pbonzini@redhat.com
6
bdrv_qed_detach_aio_context, but the pointer to the timer is not
9
Message-Id: <1578495356-46219-3-git-send-email-pbonzini@redhat.com>
7
set to NULL. This can lead to a use-after-free scenario
8
in bdrv_qed_drain_begin().
9
10
The need_check_timer pointer is set to NULL after freeing the timer.
11
Which helps catch this condition when checking in bdrv_qed_drain_begin().
12
13
Closes: https://gitlab.com/qemu-project/qemu/-/issues/2852
14
Signed-off-by: Denis Rastyogin <gerben@altlinux.org>
15
Message-ID: <20250304083927.37681-1-gerben@altlinux.org>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
17
---
12
block/file-posix.c | 1 -
18
block/qed.c | 1 +
13
block/io.c | 40 +++++++++++++++++----------------------
19
1 file changed, 1 insertion(+)
14
include/block/block_int.h | 3 +--
15
3 files changed, 18 insertions(+), 26 deletions(-)
16
20
17
diff --git a/block/file-posix.c b/block/file-posix.c
21
diff --git a/block/qed.c b/block/qed.c
18
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
19
--- a/block/file-posix.c
23
--- a/block/qed.c
20
+++ b/block/file-posix.c
24
+++ b/block/qed.c
21
@@ -XXX,XX +XXX,XX @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
25
@@ -XXX,XX +XXX,XX @@ static void bdrv_qed_detach_aio_context(BlockDriverState *bs)
22
req->overlap_bytes = req->bytes;
26
23
27
qed_cancel_need_check_timer(s);
24
bdrv_mark_request_serialising(req, bs->bl.request_alignment);
28
timer_free(s->need_check_timer);
25
- bdrv_wait_serialising_requests(req);
29
+ s->need_check_timer = NULL;
26
}
27
#endif
28
29
diff --git a/block/io.c b/block/io.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/block/io.c
32
+++ b/block/io.c
33
@@ -XXX,XX +XXX,XX @@
34
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
35
36
static void bdrv_parent_cb_resize(BlockDriverState *bs);
37
+static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self);
38
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
39
int64_t offset, int bytes, BdrvRequestFlags flags);
40
41
@@ -XXX,XX +XXX,XX @@ static void tracked_request_begin(BdrvTrackedRequest *req,
42
qemu_co_mutex_unlock(&bs->reqs_lock);
43
}
30
}
44
31
45
-void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
32
static void bdrv_qed_attach_aio_context(BlockDriverState *bs,
46
+bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
47
{
48
int64_t overlap_offset = req->offset & ~(align - 1);
49
uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
50
@@ -XXX,XX +XXX,XX @@ void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
51
52
req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
53
req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
54
-}
55
-
56
-static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req)
57
-{
58
- /*
59
- * If the request is serialising, overlap_offset and overlap_bytes are set,
60
- * so we can check if the request is aligned. Otherwise, don't care and
61
- * return false.
62
- */
63
-
64
- return req->serialising && (req->offset == req->overlap_offset) &&
65
- (req->bytes == req->overlap_bytes);
66
+ return bdrv_wait_serialising_requests(req);
67
}
68
69
/**
70
@@ -XXX,XX +XXX,XX @@ void bdrv_dec_in_flight(BlockDriverState *bs)
71
bdrv_wakeup(bs);
72
}
73
74
-bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
75
+static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
76
{
77
BlockDriverState *bs = self->bs;
78
BdrvTrackedRequest *req;
79
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child,
80
* it ensures that the CoR read and write operations are atomic and
81
* guest writes cannot interleave between them. */
82
bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
83
+ } else {
84
+ bdrv_wait_serialising_requests(req);
85
}
86
87
- bdrv_wait_serialising_requests(req);
88
-
89
if (flags & BDRV_REQ_COPY_ON_READ) {
90
int64_t pnum;
91
92
@@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes,
93
assert(!(flags & ~BDRV_REQ_MASK));
94
95
if (flags & BDRV_REQ_SERIALISING) {
96
- bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
97
+ waited = bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs));
98
+ /*
99
+ * For a misaligned request we should have already waited earlier,
100
+ * because we come after bdrv_padding_rmw_read which must be called
101
+ * with the request already marked as serialising.
102
+ */
103
+ assert(!waited ||
104
+ (req->offset == req->overlap_offset &&
105
+ req->bytes == req->overlap_bytes));
106
+ } else {
107
+ bdrv_wait_serialising_requests(req);
108
}
109
110
- waited = bdrv_wait_serialising_requests(req);
111
-
112
- assert(!waited || !req->serialising ||
113
- is_request_serialising_and_aligned(req));
114
assert(req->overlap_offset <= offset);
115
assert(offset + bytes <= req->overlap_offset + req->overlap_bytes);
116
assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE);
117
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child,
118
padding = bdrv_init_padding(bs, offset, bytes, &pad);
119
if (padding) {
120
bdrv_mark_request_serialising(req, align);
121
- bdrv_wait_serialising_requests(req);
122
123
bdrv_padding_rmw_read(child, req, &pad, true);
124
125
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
126
127
if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) {
128
bdrv_mark_request_serialising(&req, align);
129
- bdrv_wait_serialising_requests(&req);
130
bdrv_padding_rmw_read(child, &req, &pad, false);
131
}
132
133
diff --git a/include/block/block_int.h b/include/block/block_int.h
134
index XXXXXXX..XXXXXXX 100644
135
--- a/include/block/block_int.h
136
+++ b/include/block/block_int.h
137
@@ -XXX,XX +XXX,XX @@ extern unsigned int bdrv_drain_all_count;
138
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
139
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
140
141
-bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self);
142
-void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align);
143
+bool coroutine_fn bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align);
144
BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
145
146
int get_tmp_filename(char *filename, int size);
147
--
33
--
148
2.24.1
34
2.48.1
149
150
diff view generated by jsdifflib
Deleted patch
1
From: Paolo Bonzini <pbonzini@redhat.com>
2
1
3
bdrv_mark_request_serialising is writing the overlap_offset and
4
overlap_bytes fields of BdrvTrackedRequest. Take bs->reqs_lock
5
for the whole duration of it, and not just when waiting for
6
serialising requests, so that tracked_request_overlaps does not
7
look at a half-updated request.
8
9
The new code does not unlock/relock around retries. This is unnecessary
10
because a retry is always preceded by a CoQueue wait, which already
11
releases and reacquires bs->reqs_lock.
12
13
Reported-by: Peter Lieven <pl@kamp.de>
14
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
15
Message-id: 1578495356-46219-4-git-send-email-pbonzini@redhat.com
16
Message-Id: <1578495356-46219-4-git-send-email-pbonzini@redhat.com>
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
---
19
block/io.c | 112 ++++++++++++++++++++++++++++++-----------------------
20
1 file changed, 63 insertions(+), 49 deletions(-)
21
22
diff --git a/block/io.c b/block/io.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/block/io.c
25
+++ b/block/io.c
26
@@ -XXX,XX +XXX,XX @@
27
#define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS)
28
29
static void bdrv_parent_cb_resize(BlockDriverState *bs);
30
-static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self);
31
static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
32
int64_t offset, int bytes, BdrvRequestFlags flags);
33
34
@@ -XXX,XX +XXX,XX @@ static void tracked_request_begin(BdrvTrackedRequest *req,
35
qemu_co_mutex_unlock(&bs->reqs_lock);
36
}
37
38
+static bool tracked_request_overlaps(BdrvTrackedRequest *req,
39
+ int64_t offset, uint64_t bytes)
40
+{
41
+ /* aaaa bbbb */
42
+ if (offset >= req->overlap_offset + req->overlap_bytes) {
43
+ return false;
44
+ }
45
+ /* bbbb aaaa */
46
+ if (req->overlap_offset >= offset + bytes) {
47
+ return false;
48
+ }
49
+ return true;
50
+}
51
+
52
+static bool coroutine_fn
53
+bdrv_wait_serialising_requests_locked(BlockDriverState *bs,
54
+ BdrvTrackedRequest *self)
55
+{
56
+ BdrvTrackedRequest *req;
57
+ bool retry;
58
+ bool waited = false;
59
+
60
+ do {
61
+ retry = false;
62
+ QLIST_FOREACH(req, &bs->tracked_requests, list) {
63
+ if (req == self || (!req->serialising && !self->serialising)) {
64
+ continue;
65
+ }
66
+ if (tracked_request_overlaps(req, self->overlap_offset,
67
+ self->overlap_bytes))
68
+ {
69
+ /* Hitting this means there was a reentrant request, for
70
+ * example, a block driver issuing nested requests. This must
71
+ * never happen since it means deadlock.
72
+ */
73
+ assert(qemu_coroutine_self() != req->co);
74
+
75
+ /* If the request is already (indirectly) waiting for us, or
76
+ * will wait for us as soon as it wakes up, then just go on
77
+ * (instead of producing a deadlock in the former case). */
78
+ if (!req->waiting_for) {
79
+ self->waiting_for = req;
80
+ qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
81
+ self->waiting_for = NULL;
82
+ retry = true;
83
+ waited = true;
84
+ break;
85
+ }
86
+ }
87
+ }
88
+ } while (retry);
89
+ return waited;
90
+}
91
+
92
bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
93
{
94
+ BlockDriverState *bs = req->bs;
95
int64_t overlap_offset = req->offset & ~(align - 1);
96
uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align)
97
- overlap_offset;
98
+ bool waited;
99
100
+ qemu_co_mutex_lock(&bs->reqs_lock);
101
if (!req->serialising) {
102
atomic_inc(&req->bs->serialising_in_flight);
103
req->serialising = true;
104
@@ -XXX,XX +XXX,XX @@ bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align)
105
106
req->overlap_offset = MIN(req->overlap_offset, overlap_offset);
107
req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes);
108
- return bdrv_wait_serialising_requests(req);
109
+ waited = bdrv_wait_serialising_requests_locked(bs, req);
110
+ qemu_co_mutex_unlock(&bs->reqs_lock);
111
+ return waited;
112
}
113
114
/**
115
@@ -XXX,XX +XXX,XX @@ static int bdrv_get_cluster_size(BlockDriverState *bs)
116
}
117
}
118
119
-static bool tracked_request_overlaps(BdrvTrackedRequest *req,
120
- int64_t offset, uint64_t bytes)
121
-{
122
- /* aaaa bbbb */
123
- if (offset >= req->overlap_offset + req->overlap_bytes) {
124
- return false;
125
- }
126
- /* bbbb aaaa */
127
- if (req->overlap_offset >= offset + bytes) {
128
- return false;
129
- }
130
- return true;
131
-}
132
-
133
void bdrv_inc_in_flight(BlockDriverState *bs)
134
{
135
atomic_inc(&bs->in_flight);
136
@@ -XXX,XX +XXX,XX @@ void bdrv_dec_in_flight(BlockDriverState *bs)
137
static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self)
138
{
139
BlockDriverState *bs = self->bs;
140
- BdrvTrackedRequest *req;
141
- bool retry;
142
bool waited = false;
143
144
if (!atomic_read(&bs->serialising_in_flight)) {
145
return false;
146
}
147
148
- do {
149
- retry = false;
150
- qemu_co_mutex_lock(&bs->reqs_lock);
151
- QLIST_FOREACH(req, &bs->tracked_requests, list) {
152
- if (req == self || (!req->serialising && !self->serialising)) {
153
- continue;
154
- }
155
- if (tracked_request_overlaps(req, self->overlap_offset,
156
- self->overlap_bytes))
157
- {
158
- /* Hitting this means there was a reentrant request, for
159
- * example, a block driver issuing nested requests. This must
160
- * never happen since it means deadlock.
161
- */
162
- assert(qemu_coroutine_self() != req->co);
163
-
164
- /* If the request is already (indirectly) waiting for us, or
165
- * will wait for us as soon as it wakes up, then just go on
166
- * (instead of producing a deadlock in the former case). */
167
- if (!req->waiting_for) {
168
- self->waiting_for = req;
169
- qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock);
170
- self->waiting_for = NULL;
171
- retry = true;
172
- waited = true;
173
- break;
174
- }
175
- }
176
- }
177
- qemu_co_mutex_unlock(&bs->reqs_lock);
178
- } while (retry);
179
+ qemu_co_mutex_lock(&bs->reqs_lock);
180
+ waited = bdrv_wait_serialising_requests_locked(bs, self);
181
+ qemu_co_mutex_unlock(&bs->reqs_lock);
182
183
return waited;
184
}
185
--
186
2.24.1
187
188
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Reviewed-by: Maxim Levitsky <maximlevitsky@gmail.com>
5
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-id: 20200120141858.587874-2-stefanha@redhat.com
8
Message-Id: <20200120141858.587874-2-stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
configure | 27 +++++++++++++++++++++++++++
12
1 file changed, 27 insertions(+)
13
14
diff --git a/configure b/configure
15
index XXXXXXX..XXXXXXX 100755
16
--- a/configure
17
+++ b/configure
18
@@ -XXX,XX +XXX,XX @@ xen=""
19
xen_ctrl_version=""
20
xen_pci_passthrough=""
21
linux_aio=""
22
+linux_io_uring=""
23
cap_ng=""
24
attr=""
25
libattr=""
26
@@ -XXX,XX +XXX,XX @@ for opt do
27
;;
28
--enable-linux-aio) linux_aio="yes"
29
;;
30
+ --disable-linux-io-uring) linux_io_uring="no"
31
+ ;;
32
+ --enable-linux-io-uring) linux_io_uring="yes"
33
+ ;;
34
--disable-attr) attr="no"
35
;;
36
--enable-attr) attr="yes"
37
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available:
38
vde support for vde network
39
netmap support for netmap network
40
linux-aio Linux AIO support
41
+ linux-io-uring Linux io_uring support
42
cap-ng libcap-ng support
43
attr attr and xattr support
44
vhost-net vhost-net kernel acceleration support
45
@@ -XXX,XX +XXX,XX @@ EOF
46
linux_aio=no
47
fi
48
fi
49
+##########################################
50
+# linux-io-uring probe
51
+
52
+if test "$linux_io_uring" != "no" ; then
53
+ if $pkg_config liburing; then
54
+ linux_io_uring_cflags=$($pkg_config --cflags liburing)
55
+ linux_io_uring_libs=$($pkg_config --libs liburing)
56
+ linux_io_uring=yes
57
+ else
58
+ if test "$linux_io_uring" = "yes" ; then
59
+ feature_not_found "linux io_uring" "Install liburing devel"
60
+ fi
61
+ linux_io_uring=no
62
+ fi
63
+fi
64
65
##########################################
66
# TPM emulation is only on POSIX
67
@@ -XXX,XX +XXX,XX @@ echo "PIE $pie"
68
echo "vde support $vde"
69
echo "netmap support $netmap"
70
echo "Linux AIO support $linux_aio"
71
+echo "Linux io_uring support $linux_io_uring"
72
echo "ATTR/XATTR support $attr"
73
echo "Install blobs $blobs"
74
echo "KVM support $kvm"
75
@@ -XXX,XX +XXX,XX @@ fi
76
if test "$linux_aio" = "yes" ; then
77
echo "CONFIG_LINUX_AIO=y" >> $config_host_mak
78
fi
79
+if test "$linux_io_uring" = "yes" ; then
80
+ echo "CONFIG_LINUX_IO_URING=y" >> $config_host_mak
81
+ echo "LINUX_IO_URING_CFLAGS=$linux_io_uring_cflags" >> $config_host_mak
82
+ echo "LINUX_IO_URING_LIBS=$linux_io_uring_libs" >> $config_host_mak
83
+fi
84
if test "$attr" = "yes" ; then
85
echo "CONFIG_ATTR=y" >> $config_host_mak
86
fi
87
--
88
2.24.1
89
90
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Since io_uring is the actual name of the Linux API, we use it as enum
4
value even though the QAPI schema conventions would prefer io-uring.
5
6
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
7
Acked-by: Markus Armbruster <armbru@redhat.com>
8
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-id: 20200120141858.587874-3-stefanha@redhat.com
11
Message-Id: <20200120141858.587874-3-stefanha@redhat.com>
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
qapi/block-core.json | 4 +++-
15
1 file changed, 3 insertions(+), 1 deletion(-)
16
17
diff --git a/qapi/block-core.json b/qapi/block-core.json
18
index XXXXXXX..XXXXXXX 100644
19
--- a/qapi/block-core.json
20
+++ b/qapi/block-core.json
21
@@ -XXX,XX +XXX,XX @@
22
#
23
# @threads: Use qemu's thread pool
24
# @native: Use native AIO backend (only Linux and Windows)
25
+# @io_uring: Use linux io_uring (since 5.0)
26
#
27
# Since: 2.9
28
##
29
{ 'enum': 'BlockdevAioOptions',
30
- 'data': [ 'threads', 'native' ] }
31
+ 'data': [ 'threads', 'native',
32
+ { 'name': 'io_uring', 'if': 'defined(CONFIG_LINUX_IO_URING)' } ] }
33
34
##
35
# @BlockdevCacheOptions:
36
--
37
2.24.1
38
39
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Reviewed-by: Maxim Levitsky <maximlevitsky@gmail.com>
5
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-id: 20200120141858.587874-4-stefanha@redhat.com
8
Message-Id: <20200120141858.587874-4-stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
include/block/block.h | 1 +
12
1 file changed, 1 insertion(+)
13
14
diff --git a/include/block/block.h b/include/block/block.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/block/block.h
17
+++ b/include/block/block.h
18
@@ -XXX,XX +XXX,XX @@ typedef struct HDGeometry {
19
ignoring the format layer */
20
#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
21
#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */
22
+#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
23
24
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
25
26
--
27
2.24.1
28
29
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Aborts when sqe fails to be set as sqes cannot be returned to the
4
ring. Adds slow path for short reads for older kernels
5
6
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
7
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-id: 20200120141858.587874-5-stefanha@redhat.com
10
Message-Id: <20200120141858.587874-5-stefanha@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
MAINTAINERS | 8 +
14
block/Makefile.objs | 3 +
15
block/io_uring.c | 401 ++++++++++++++++++++++++++++++++++++++++
16
include/block/aio.h | 16 +-
17
include/block/raw-aio.h | 12 ++
18
5 files changed, 439 insertions(+), 1 deletion(-)
19
create mode 100644 block/io_uring.c
20
21
diff --git a/MAINTAINERS b/MAINTAINERS
22
index XXXXXXX..XXXXXXX 100644
23
--- a/MAINTAINERS
24
+++ b/MAINTAINERS
25
@@ -XXX,XX +XXX,XX @@ F: block/file-posix.c
26
F: block/file-win32.c
27
F: block/win32-aio.c
28
29
+Linux io_uring
30
+M: Aarushi Mehta <mehta.aaru20@gmail.com>
31
+M: Julia Suvorova <jusual@redhat.com>
32
+M: Stefan Hajnoczi <stefanha@redhat.com>
33
+L: qemu-block@nongnu.org
34
+S: Maintained
35
+F: block/io_uring.c
36
+
37
qcow2
38
M: Kevin Wolf <kwolf@redhat.com>
39
M: Max Reitz <mreitz@redhat.com>
40
diff --git a/block/Makefile.objs b/block/Makefile.objs
41
index XXXXXXX..XXXXXXX 100644
42
--- a/block/Makefile.objs
43
+++ b/block/Makefile.objs
44
@@ -XXX,XX +XXX,XX @@ block-obj-y += block-backend.o snapshot.o qapi.o
45
block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
46
block-obj-$(CONFIG_POSIX) += file-posix.o
47
block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
48
+block-obj-$(CONFIG_LINUX_IO_URING) += io_uring.o
49
block-obj-y += null.o mirror.o commit.o io.o create.o
50
block-obj-y += throttle-groups.o
51
block-obj-$(CONFIG_LINUX) += nvme.o
52
@@ -XXX,XX +XXX,XX @@ block-obj-$(if $(CONFIG_LZFSE),m,n) += dmg-lzfse.o
53
dmg-lzfse.o-libs := $(LZFSE_LIBS)
54
qcow.o-libs := -lz
55
linux-aio.o-libs := -laio
56
+io_uring.o-cflags := $(LINUX_IO_URING_CFLAGS)
57
+io_uring.o-libs := $(LINUX_IO_URING_LIBS)
58
parallels.o-cflags := $(LIBXML2_CFLAGS)
59
parallels.o-libs := $(LIBXML2_LIBS)
60
diff --git a/block/io_uring.c b/block/io_uring.c
61
new file mode 100644
62
index XXXXXXX..XXXXXXX
63
--- /dev/null
64
+++ b/block/io_uring.c
65
@@ -XXX,XX +XXX,XX @@
66
+/*
67
+ * Linux io_uring support.
68
+ *
69
+ * Copyright (C) 2009 IBM, Corp.
70
+ * Copyright (C) 2009 Red Hat, Inc.
71
+ * Copyright (C) 2019 Aarushi Mehta
72
+ *
73
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
74
+ * See the COPYING file in the top-level directory.
75
+ */
76
+#include "qemu/osdep.h"
77
+#include <liburing.h>
78
+#include "qemu-common.h"
79
+#include "block/aio.h"
80
+#include "qemu/queue.h"
81
+#include "block/block.h"
82
+#include "block/raw-aio.h"
83
+#include "qemu/coroutine.h"
84
+#include "qapi/error.h"
85
+
86
+/* io_uring ring size */
87
+#define MAX_ENTRIES 128
88
+
89
+typedef struct LuringAIOCB {
90
+ Coroutine *co;
91
+ struct io_uring_sqe sqeq;
92
+ ssize_t ret;
93
+ QEMUIOVector *qiov;
94
+ bool is_read;
95
+ QSIMPLEQ_ENTRY(LuringAIOCB) next;
96
+
97
+ /*
98
+ * Buffered reads may require resubmission, see
99
+ * luring_resubmit_short_read().
100
+ */
101
+ int total_read;
102
+ QEMUIOVector resubmit_qiov;
103
+} LuringAIOCB;
104
+
105
+typedef struct LuringQueue {
106
+ int plugged;
107
+ unsigned int in_queue;
108
+ unsigned int in_flight;
109
+ bool blocked;
110
+ QSIMPLEQ_HEAD(, LuringAIOCB) submit_queue;
111
+} LuringQueue;
112
+
113
+typedef struct LuringState {
114
+ AioContext *aio_context;
115
+
116
+ struct io_uring ring;
117
+
118
+ /* io queue for submit at batch. Protected by AioContext lock. */
119
+ LuringQueue io_q;
120
+
121
+ /* I/O completion processing. Only runs in I/O thread. */
122
+ QEMUBH *completion_bh;
123
+} LuringState;
124
+
125
+/**
126
+ * luring_resubmit:
127
+ *
128
+ * Resubmit a request by appending it to submit_queue. The caller must ensure
129
+ * that ioq_submit() is called later so that submit_queue requests are started.
130
+ */
131
+static void luring_resubmit(LuringState *s, LuringAIOCB *luringcb)
132
+{
133
+ QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next);
134
+ s->io_q.in_queue++;
135
+}
136
+
137
+/**
138
+ * luring_resubmit_short_read:
139
+ *
140
+ * Before Linux commit 9d93a3f5a0c ("io_uring: punt short reads to async
141
+ * context") a buffered I/O request with the start of the file range in the
142
+ * page cache could result in a short read. Applications need to resubmit the
143
+ * remaining read request.
144
+ *
145
+ * This is a slow path but recent kernels never take it.
146
+ */
147
+static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
148
+ int nread)
149
+{
150
+ QEMUIOVector *resubmit_qiov;
151
+ size_t remaining;
152
+
153
+ /* Update read position */
154
+ luringcb->total_read = nread;
155
+ remaining = luringcb->qiov->size - luringcb->total_read;
156
+
157
+ /* Shorten qiov */
158
+ resubmit_qiov = &luringcb->resubmit_qiov;
159
+ if (resubmit_qiov->iov == NULL) {
160
+ qemu_iovec_init(resubmit_qiov, luringcb->qiov->niov);
161
+ } else {
162
+ qemu_iovec_reset(resubmit_qiov);
163
+ }
164
+ qemu_iovec_concat(resubmit_qiov, luringcb->qiov, luringcb->total_read,
165
+ remaining);
166
+
167
+ /* Update sqe */
168
+ luringcb->sqeq.off = nread;
169
+ luringcb->sqeq.addr = (__u64)(uintptr_t)luringcb->resubmit_qiov.iov;
170
+ luringcb->sqeq.len = luringcb->resubmit_qiov.niov;
171
+
172
+ luring_resubmit(s, luringcb);
173
+}
174
+
175
+/**
176
+ * luring_process_completions:
177
+ * @s: AIO state
178
+ *
179
+ * Fetches completed I/O requests, consumes cqes and invokes their callbacks
180
+ * The function is somewhat tricky because it supports nested event loops, for
181
+ * example when a request callback invokes aio_poll().
182
+ *
183
+ * Function schedules BH completion so it can be called again in a nested
184
+ * event loop. When there are no events left to complete the BH is being
185
+ * canceled.
186
+ *
187
+ */
188
+static void luring_process_completions(LuringState *s)
189
+{
190
+ struct io_uring_cqe *cqes;
191
+ int total_bytes;
192
+ /*
193
+ * Request completion callbacks can run the nested event loop.
194
+ * Schedule ourselves so the nested event loop will "see" remaining
195
+ * completed requests and process them. Without this, completion
196
+ * callbacks that wait for other requests using a nested event loop
197
+ * would hang forever.
198
+ *
199
+ * This workaround is needed because io_uring uses poll_wait, which
200
+ * is woken up when new events are added to the uring, thus polling on
201
+ * the same uring fd will block unless more events are received.
202
+ *
203
+ * Other leaf block drivers (drivers that access the data themselves)
204
+ * are networking based, so they poll sockets for data and run the
205
+ * correct coroutine.
206
+ */
207
+ qemu_bh_schedule(s->completion_bh);
208
+
209
+ while (io_uring_peek_cqe(&s->ring, &cqes) == 0) {
210
+ LuringAIOCB *luringcb;
211
+ int ret;
212
+
213
+ if (!cqes) {
214
+ break;
215
+ }
216
+
217
+ luringcb = io_uring_cqe_get_data(cqes);
218
+ ret = cqes->res;
219
+ io_uring_cqe_seen(&s->ring, cqes);
220
+ cqes = NULL;
221
+
222
+ /* Change counters one-by-one because we can be nested. */
223
+ s->io_q.in_flight--;
224
+
225
+ /* total_read is non-zero only for resubmitted read requests */
226
+ total_bytes = ret + luringcb->total_read;
227
+
228
+ if (ret < 0) {
229
+ if (ret == -EINTR) {
230
+ luring_resubmit(s, luringcb);
231
+ continue;
232
+ }
233
+ } else if (!luringcb->qiov) {
234
+ goto end;
235
+ } else if (total_bytes == luringcb->qiov->size) {
236
+ ret = 0;
237
+ /* Only read/write */
238
+ } else {
239
+ /* Short Read/Write */
240
+ if (luringcb->is_read) {
241
+ if (ret > 0) {
242
+ luring_resubmit_short_read(s, luringcb, ret);
243
+ continue;
244
+ } else {
245
+ /* Pad with zeroes */
246
+ qemu_iovec_memset(luringcb->qiov, total_bytes, 0,
247
+ luringcb->qiov->size - total_bytes);
248
+ ret = 0;
249
+ }
250
+ } else {
251
+ ret = -ENOSPC;;
252
+ }
253
+ }
254
+end:
255
+ luringcb->ret = ret;
256
+ qemu_iovec_destroy(&luringcb->resubmit_qiov);
257
+
258
+ /*
259
+ * If the coroutine is already entered it must be in ioq_submit()
260
+ * and will notice luringcb->ret has been filled in when it
261
+ * eventually runs later. Coroutines cannot be entered recursively
262
+ * so avoid doing that!
263
+ */
264
+ if (!qemu_coroutine_entered(luringcb->co)) {
265
+ aio_co_wake(luringcb->co);
266
+ }
267
+ }
268
+ qemu_bh_cancel(s->completion_bh);
269
+}
270
+
271
+static int ioq_submit(LuringState *s)
272
+{
273
+ int ret = 0;
274
+ LuringAIOCB *luringcb, *luringcb_next;
275
+
276
+ while (s->io_q.in_queue > 0) {
277
+ /*
278
+ * Try to fetch sqes from the ring for requests waiting in
279
+ * the overflow queue
280
+ */
281
+ QSIMPLEQ_FOREACH_SAFE(luringcb, &s->io_q.submit_queue, next,
282
+ luringcb_next) {
283
+ struct io_uring_sqe *sqes = io_uring_get_sqe(&s->ring);
284
+ if (!sqes) {
285
+ break;
286
+ }
287
+ /* Prep sqe for submission */
288
+ *sqes = luringcb->sqeq;
289
+ QSIMPLEQ_REMOVE_HEAD(&s->io_q.submit_queue, next);
290
+ }
291
+ ret = io_uring_submit(&s->ring);
292
+ /* Prevent infinite loop if submission is refused */
293
+ if (ret <= 0) {
294
+ if (ret == -EAGAIN) {
295
+ continue;
296
+ }
297
+ break;
298
+ }
299
+ s->io_q.in_flight += ret;
300
+ s->io_q.in_queue -= ret;
301
+ }
302
+ s->io_q.blocked = (s->io_q.in_queue > 0);
303
+
304
+ if (s->io_q.in_flight) {
305
+ /*
306
+ * We can try to complete something just right away if there are
307
+ * still requests in-flight.
308
+ */
309
+ luring_process_completions(s);
310
+ }
311
+ return ret;
312
+}
313
+
314
+static void luring_process_completions_and_submit(LuringState *s)
315
+{
316
+ aio_context_acquire(s->aio_context);
317
+ luring_process_completions(s);
318
+
319
+ if (!s->io_q.plugged && s->io_q.in_queue > 0) {
320
+ ioq_submit(s);
321
+ }
322
+ aio_context_release(s->aio_context);
323
+}
324
+
325
+static void qemu_luring_completion_bh(void *opaque)
326
+{
327
+ LuringState *s = opaque;
328
+ luring_process_completions_and_submit(s);
329
+}
330
+
331
+static void qemu_luring_completion_cb(void *opaque)
332
+{
333
+ LuringState *s = opaque;
334
+ luring_process_completions_and_submit(s);
335
+}
336
+
337
+static void ioq_init(LuringQueue *io_q)
338
+{
339
+ QSIMPLEQ_INIT(&io_q->submit_queue);
340
+ io_q->plugged = 0;
341
+ io_q->in_queue = 0;
342
+ io_q->in_flight = 0;
343
+ io_q->blocked = false;
344
+}
345
+
346
+void luring_io_plug(BlockDriverState *bs, LuringState *s)
347
+{
348
+ s->io_q.plugged++;
349
+}
350
+
351
+void luring_io_unplug(BlockDriverState *bs, LuringState *s)
352
+{
353
+ assert(s->io_q.plugged);
354
+ if (--s->io_q.plugged == 0 &&
355
+ !s->io_q.blocked && s->io_q.in_queue > 0) {
356
+ ioq_submit(s);
357
+ }
358
+}
359
+
360
+/**
361
+ * luring_do_submit:
362
+ * @fd: file descriptor for I/O
363
+ * @luringcb: AIO control block
364
+ * @s: AIO state
365
+ * @offset: offset for request
366
+ * @type: type of request
367
+ *
368
+ * Fetches sqes from ring, adds to pending queue and preps them
369
+ *
370
+ */
371
+static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
372
+ uint64_t offset, int type)
373
+{
374
+ struct io_uring_sqe *sqes = &luringcb->sqeq;
375
+
376
+ switch (type) {
377
+ case QEMU_AIO_WRITE:
378
+ io_uring_prep_writev(sqes, fd, luringcb->qiov->iov,
379
+ luringcb->qiov->niov, offset);
380
+ break;
381
+ case QEMU_AIO_READ:
382
+ io_uring_prep_readv(sqes, fd, luringcb->qiov->iov,
383
+ luringcb->qiov->niov, offset);
384
+ break;
385
+ case QEMU_AIO_FLUSH:
386
+ io_uring_prep_fsync(sqes, fd, IORING_FSYNC_DATASYNC);
387
+ break;
388
+ default:
389
+ fprintf(stderr, "%s: invalid AIO request type, aborting 0x%x.\n",
390
+ __func__, type);
391
+ abort();
392
+ }
393
+ io_uring_sqe_set_data(sqes, luringcb);
394
+
395
+ QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next);
396
+ s->io_q.in_queue++;
397
+
398
+ if (!s->io_q.blocked &&
399
+ (!s->io_q.plugged ||
400
+ s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) {
401
+ return ioq_submit(s);
402
+ }
403
+ return 0;
404
+}
405
+
406
+int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
407
+ uint64_t offset, QEMUIOVector *qiov, int type)
408
+{
409
+ int ret;
410
+ LuringAIOCB luringcb = {
411
+ .co = qemu_coroutine_self(),
412
+ .ret = -EINPROGRESS,
413
+ .qiov = qiov,
414
+ .is_read = (type == QEMU_AIO_READ),
415
+ };
416
+
417
+ ret = luring_do_submit(fd, &luringcb, s, offset, type);
418
+ if (ret < 0) {
419
+ return ret;
420
+ }
421
+
422
+ if (luringcb.ret == -EINPROGRESS) {
423
+ qemu_coroutine_yield();
424
+ }
425
+ return luringcb.ret;
426
+}
427
+
428
+void luring_detach_aio_context(LuringState *s, AioContext *old_context)
429
+{
430
+ aio_set_fd_handler(old_context, s->ring.ring_fd, false, NULL, NULL, NULL,
431
+ s);
432
+ qemu_bh_delete(s->completion_bh);
433
+ s->aio_context = NULL;
434
+}
435
+
436
+void luring_attach_aio_context(LuringState *s, AioContext *new_context)
437
+{
438
+ s->aio_context = new_context;
439
+ s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s);
440
+ aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false,
441
+ qemu_luring_completion_cb, NULL, NULL, s);
442
+}
443
+
444
+LuringState *luring_init(Error **errp)
445
+{
446
+ int rc;
447
+ LuringState *s = g_new0(LuringState, 1);
448
+ struct io_uring *ring = &s->ring;
449
+
450
+ rc = io_uring_queue_init(MAX_ENTRIES, ring, 0);
451
+ if (rc < 0) {
452
+ error_setg_errno(errp, errno, "failed to init linux io_uring ring");
453
+ g_free(s);
454
+ return NULL;
455
+ }
456
+
457
+ ioq_init(&s->io_q);
458
+ return s;
459
+
460
+}
461
+
462
+void luring_cleanup(LuringState *s)
463
+{
464
+ io_uring_queue_exit(&s->ring);
465
+ g_free(s);
466
+}
467
diff --git a/include/block/aio.h b/include/block/aio.h
468
index XXXXXXX..XXXXXXX 100644
469
--- a/include/block/aio.h
470
+++ b/include/block/aio.h
471
@@ -XXX,XX +XXX,XX @@ typedef void IOHandler(void *opaque);
472
struct Coroutine;
473
struct ThreadPool;
474
struct LinuxAioState;
475
+struct LuringState;
476
477
struct AioContext {
478
GSource source;
479
@@ -XXX,XX +XXX,XX @@ struct AioContext {
480
struct ThreadPool *thread_pool;
481
482
#ifdef CONFIG_LINUX_AIO
483
- /* State for native Linux AIO. Uses aio_context_acquire/release for
484
+ /*
485
+ * State for native Linux AIO. Uses aio_context_acquire/release for
486
* locking.
487
*/
488
struct LinuxAioState *linux_aio;
489
#endif
490
+#ifdef CONFIG_LINUX_IO_URING
491
+ /*
492
+ * State for Linux io_uring. Uses aio_context_acquire/release for
493
+ * locking.
494
+ */
495
+ struct LuringState *linux_io_uring;
496
+#endif
497
498
/* TimerLists for calling timers - one per clock type. Has its own
499
* locking.
500
@@ -XXX,XX +XXX,XX @@ struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp);
501
/* Return the LinuxAioState bound to this AioContext */
502
struct LinuxAioState *aio_get_linux_aio(AioContext *ctx);
503
504
+/* Setup the LuringState bound to this AioContext */
505
+struct LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp);
506
+
507
+/* Return the LuringState bound to this AioContext */
508
+struct LuringState *aio_get_linux_io_uring(AioContext *ctx);
509
/**
510
* aio_timer_new_with_attrs:
511
* @ctx: the aio context
512
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
513
index XXXXXXX..XXXXXXX 100644
514
--- a/include/block/raw-aio.h
515
+++ b/include/block/raw-aio.h
516
@@ -XXX,XX +XXX,XX @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context);
517
void laio_io_plug(BlockDriverState *bs, LinuxAioState *s);
518
void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s);
519
#endif
520
+/* io_uring.c - Linux io_uring implementation */
521
+#ifdef CONFIG_LINUX_IO_URING
522
+typedef struct LuringState LuringState;
523
+LuringState *luring_init(Error **errp);
524
+void luring_cleanup(LuringState *s);
525
+int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
526
+ uint64_t offset, QEMUIOVector *qiov, int type);
527
+void luring_detach_aio_context(LuringState *s, AioContext *old_context);
528
+void luring_attach_aio_context(LuringState *s, AioContext *new_context);
529
+void luring_io_plug(BlockDriverState *bs, LuringState *s);
530
+void luring_io_unplug(BlockDriverState *bs, LuringState *s);
531
+#endif
532
533
#ifdef _WIN32
534
typedef struct QEMUWin32AIOState QEMUWin32AIOState;
535
--
536
2.24.1
537
538
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Follow linux-aio.o and stub out the block/io_uring.o APIs that will be
4
missing when a binary is linked with obj-util-y but without
5
block-util-y (e.g. vhost-user-gpu).
6
7
For example, the stubs are necessary so that a binary using util/async.o
8
from obj-util-y for qemu_bh_new() links successfully. In this case
9
block/io_uring.o from block-util-y isn't needed and we can avoid
10
dragging in the block layer by linking the stubs instead. The stub
11
functions never get called.
12
13
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
14
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
Message-id: 20200120141858.587874-6-stefanha@redhat.com
17
Message-Id: <20200120141858.587874-6-stefanha@redhat.com>
18
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
19
---
20
MAINTAINERS | 1 +
21
stubs/Makefile.objs | 1 +
22
stubs/io_uring.c | 32 ++++++++++++++++++++++++++++++++
23
3 files changed, 34 insertions(+)
24
create mode 100644 stubs/io_uring.c
25
26
diff --git a/MAINTAINERS b/MAINTAINERS
27
index XXXXXXX..XXXXXXX 100644
28
--- a/MAINTAINERS
29
+++ b/MAINTAINERS
30
@@ -XXX,XX +XXX,XX @@ M: Stefan Hajnoczi <stefanha@redhat.com>
31
L: qemu-block@nongnu.org
32
S: Maintained
33
F: block/io_uring.c
34
+F: stubs/io_uring.c
35
36
qcow2
37
M: Kevin Wolf <kwolf@redhat.com>
38
diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs
39
index XXXXXXX..XXXXXXX 100644
40
--- a/stubs/Makefile.objs
41
+++ b/stubs/Makefile.objs
42
@@ -XXX,XX +XXX,XX @@ stub-obj-y += iothread.o
43
stub-obj-y += iothread-lock.o
44
stub-obj-y += is-daemonized.o
45
stub-obj-$(CONFIG_LINUX_AIO) += linux-aio.o
46
+stub-obj-$(CONFIG_LINUX_IO_URING) += io_uring.o
47
stub-obj-y += machine-init-done.o
48
stub-obj-y += migr-blocker.o
49
stub-obj-y += change-state-handler.o
50
diff --git a/stubs/io_uring.c b/stubs/io_uring.c
51
new file mode 100644
52
index XXXXXXX..XXXXXXX
53
--- /dev/null
54
+++ b/stubs/io_uring.c
55
@@ -XXX,XX +XXX,XX @@
56
+/*
57
+ * Linux io_uring support.
58
+ *
59
+ * Copyright (C) 2009 IBM, Corp.
60
+ * Copyright (C) 2009 Red Hat, Inc.
61
+ *
62
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
63
+ * See the COPYING file in the top-level directory.
64
+ */
65
+#include "qemu/osdep.h"
66
+#include "block/aio.h"
67
+#include "block/raw-aio.h"
68
+
69
+void luring_detach_aio_context(LuringState *s, AioContext *old_context)
70
+{
71
+ abort();
72
+}
73
+
74
+void luring_attach_aio_context(LuringState *s, AioContext *new_context)
75
+{
76
+ abort();
77
+}
78
+
79
+LuringState *luring_init(Error **errp)
80
+{
81
+ abort();
82
+}
83
+
84
+void luring_cleanup(LuringState *s)
85
+{
86
+ abort();
87
+}
88
--
89
2.24.1
90
91
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200120141858.587874-7-stefanha@redhat.com
7
Message-Id: <20200120141858.587874-7-stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
util/async.c | 36 ++++++++++++++++++++++++++++++++++++
11
1 file changed, 36 insertions(+)
12
13
diff --git a/util/async.c b/util/async.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/util/async.c
16
+++ b/util/async.c
17
@@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source)
18
}
19
#endif
20
21
+#ifdef CONFIG_LINUX_IO_URING
22
+ if (ctx->linux_io_uring) {
23
+ luring_detach_aio_context(ctx->linux_io_uring, ctx);
24
+ luring_cleanup(ctx->linux_io_uring);
25
+ ctx->linux_io_uring = NULL;
26
+ }
27
+#endif
28
+
29
assert(QSLIST_EMPTY(&ctx->scheduled_coroutines));
30
qemu_bh_delete(ctx->co_schedule_bh);
31
32
@@ -XXX,XX +XXX,XX @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx)
33
}
34
#endif
35
36
+#ifdef CONFIG_LINUX_IO_URING
37
+LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp)
38
+{
39
+ if (ctx->linux_io_uring) {
40
+ return ctx->linux_io_uring;
41
+ }
42
+
43
+ ctx->linux_io_uring = luring_init(errp);
44
+ if (!ctx->linux_io_uring) {
45
+ return NULL;
46
+ }
47
+
48
+ luring_attach_aio_context(ctx->linux_io_uring, ctx);
49
+ return ctx->linux_io_uring;
50
+}
51
+
52
+LuringState *aio_get_linux_io_uring(AioContext *ctx)
53
+{
54
+ assert(ctx->linux_io_uring);
55
+ return ctx->linux_io_uring;
56
+}
57
+#endif
58
+
59
void aio_notify(AioContext *ctx)
60
{
61
/* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs
62
@@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp)
63
#ifdef CONFIG_LINUX_AIO
64
ctx->linux_aio = NULL;
65
#endif
66
+
67
+#ifdef CONFIG_LINUX_IO_URING
68
+ ctx->linux_io_uring = NULL;
69
+#endif
70
+
71
ctx->thread_pool = NULL;
72
qemu_rec_mutex_init(&ctx->lock);
73
timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
74
--
75
2.24.1
76
77
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200120141858.587874-8-stefanha@redhat.com
7
Message-Id: <20200120141858.587874-8-stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
block.c | 22 ++++++++++++++++++++++
11
blockdev.c | 12 ++++--------
12
include/block/block.h | 1 +
13
3 files changed, 27 insertions(+), 8 deletions(-)
14
15
diff --git a/block.c b/block.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/block.c
18
+++ b/block.c
19
@@ -XXX,XX +XXX,XX @@ static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts,
20
return detect_zeroes;
21
}
22
23
+/**
24
+ * Set open flags for aio engine
25
+ *
26
+ * Return 0 on success, -1 if the engine specified is invalid
27
+ */
28
+int bdrv_parse_aio(const char *mode, int *flags)
29
+{
30
+ if (!strcmp(mode, "threads")) {
31
+ /* do nothing, default */
32
+ } else if (!strcmp(mode, "native")) {
33
+ *flags |= BDRV_O_NATIVE_AIO;
34
+#ifdef CONFIG_LINUX_IO_URING
35
+ } else if (!strcmp(mode, "io_uring")) {
36
+ *flags |= BDRV_O_IO_URING;
37
+#endif
38
+ } else {
39
+ return -1;
40
+ }
41
+
42
+ return 0;
43
+}
44
+
45
/**
46
* Set open flags for a given discard mode
47
*
48
diff --git a/blockdev.c b/blockdev.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/blockdev.c
51
+++ b/blockdev.c
52
@@ -XXX,XX +XXX,XX @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
53
}
54
55
if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
56
- if (!strcmp(aio, "native")) {
57
- *bdrv_flags |= BDRV_O_NATIVE_AIO;
58
- } else if (!strcmp(aio, "threads")) {
59
- /* this is the default */
60
- } else {
61
- error_setg(errp, "invalid aio option");
62
- return;
63
+ if (bdrv_parse_aio(aio, bdrv_flags) < 0) {
64
+ error_setg(errp, "invalid aio option");
65
+ return;
66
}
67
}
68
}
69
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_common_drive_opts = {
70
},{
71
.name = "aio",
72
.type = QEMU_OPT_STRING,
73
- .help = "host AIO implementation (threads, native)",
74
+ .help = "host AIO implementation (threads, native, io_uring)",
75
},{
76
.name = BDRV_OPT_CACHE_WB,
77
.type = QEMU_OPT_BOOL,
78
diff --git a/include/block/block.h b/include/block/block.h
79
index XXXXXXX..XXXXXXX 100644
80
--- a/include/block/block.h
81
+++ b/include/block/block.h
82
@@ -XXX,XX +XXX,XX @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
83
void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
84
Error **errp);
85
86
+int bdrv_parse_aio(const char *mode, int *flags);
87
int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
88
int bdrv_parse_discard_flags(const char *mode, int *flags);
89
BdrvChild *bdrv_open_child(const char *filename,
90
--
91
2.24.1
92
93
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Reviewed-by: Maxim Levitsky <maximlevitsky@gmail.com>
5
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-id: 20200120141858.587874-9-stefanha@redhat.com
8
Message-Id: <20200120141858.587874-9-stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
block/file-posix.c | 98 +++++++++++++++++++++++++++++++++++++---------
12
1 file changed, 79 insertions(+), 19 deletions(-)
13
14
diff --git a/block/file-posix.c b/block/file-posix.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/file-posix.c
17
+++ b/block/file-posix.c
18
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVRawState {
19
bool has_write_zeroes:1;
20
bool discard_zeroes:1;
21
bool use_linux_aio:1;
22
+ bool use_linux_io_uring:1;
23
bool page_cache_inconsistent:1;
24
bool has_fallocate;
25
bool needs_alignment;
26
@@ -XXX,XX +XXX,XX @@ static QemuOptsList raw_runtime_opts = {
27
{
28
.name = "aio",
29
.type = QEMU_OPT_STRING,
30
- .help = "host AIO implementation (threads, native)",
31
+ .help = "host AIO implementation (threads, native, io_uring)",
32
},
33
{
34
.name = "locking",
35
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
36
goto fail;
37
}
38
39
- aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO)
40
- ? BLOCKDEV_AIO_OPTIONS_NATIVE
41
- : BLOCKDEV_AIO_OPTIONS_THREADS;
42
+ if (bdrv_flags & BDRV_O_NATIVE_AIO) {
43
+ aio_default = BLOCKDEV_AIO_OPTIONS_NATIVE;
44
+#ifdef CONFIG_LINUX_IO_URING
45
+ } else if (bdrv_flags & BDRV_O_IO_URING) {
46
+ aio_default = BLOCKDEV_AIO_OPTIONS_IO_URING;
47
+#endif
48
+ } else {
49
+ aio_default = BLOCKDEV_AIO_OPTIONS_THREADS;
50
+ }
51
+
52
aio = qapi_enum_parse(&BlockdevAioOptions_lookup,
53
qemu_opt_get(opts, "aio"),
54
aio_default, &local_err);
55
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
56
ret = -EINVAL;
57
goto fail;
58
}
59
+
60
s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
61
+#ifdef CONFIG_LINUX_IO_URING
62
+ s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING);
63
+#endif
64
65
locking = qapi_enum_parse(&OnOffAuto_lookup,
66
qemu_opt_get(opts, "locking"),
67
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
68
}
69
#endif /* !defined(CONFIG_LINUX_AIO) */
70
71
+#ifdef CONFIG_LINUX_IO_URING
72
+ if (s->use_linux_io_uring) {
73
+ if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) {
74
+ error_prepend(errp, "Unable to use io_uring: ");
75
+ goto fail;
76
+ }
77
+ }
78
+#else
79
+ if (s->use_linux_io_uring) {
80
+ error_setg(errp, "aio=io_uring was specified, but is not supported "
81
+ "in this build.");
82
+ ret = -EINVAL;
83
+ goto fail;
84
+ }
85
+#endif /* !defined(CONFIG_LINUX_IO_URING) */
86
+
87
s->has_discard = true;
88
s->has_write_zeroes = true;
89
if ((bs->open_flags & BDRV_O_NOCACHE) != 0) {
90
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
91
return -EIO;
92
93
/*
94
- * Check if the underlying device requires requests to be aligned,
95
- * and if the request we are trying to submit is aligned or not.
96
- * If this is the case tell the low-level driver that it needs
97
- * to copy the buffer.
98
+ * When using O_DIRECT, the request must be aligned to be able to use
99
+ * either libaio or io_uring interface. If not fail back to regular thread
100
+ * pool read/write code which emulates this for us if we
101
+ * set QEMU_AIO_MISALIGNED.
102
*/
103
- if (s->needs_alignment) {
104
- if (!bdrv_qiov_is_aligned(bs, qiov)) {
105
- type |= QEMU_AIO_MISALIGNED;
106
+ if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) {
107
+ type |= QEMU_AIO_MISALIGNED;
108
+#ifdef CONFIG_LINUX_IO_URING
109
+ } else if (s->use_linux_io_uring) {
110
+ LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
111
+ assert(qiov->size == bytes);
112
+ return luring_co_submit(bs, aio, s->fd, offset, qiov, type);
113
+#endif
114
#ifdef CONFIG_LINUX_AIO
115
- } else if (s->use_linux_aio) {
116
- LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
117
- assert(qiov->size == bytes);
118
- return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
119
+ } else if (s->use_linux_aio) {
120
+ LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
121
+ assert(qiov->size == bytes);
122
+ return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
123
#endif
124
- }
125
}
126
127
acb = (RawPosixAIOData) {
128
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
129
130
static void raw_aio_plug(BlockDriverState *bs)
131
{
132
+ BDRVRawState __attribute__((unused)) *s = bs->opaque;
133
#ifdef CONFIG_LINUX_AIO
134
- BDRVRawState *s = bs->opaque;
135
if (s->use_linux_aio) {
136
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
137
laio_io_plug(bs, aio);
138
}
139
#endif
140
+#ifdef CONFIG_LINUX_IO_URING
141
+ if (s->use_linux_io_uring) {
142
+ LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
143
+ luring_io_plug(bs, aio);
144
+ }
145
+#endif
146
}
147
148
static void raw_aio_unplug(BlockDriverState *bs)
149
{
150
+ BDRVRawState __attribute__((unused)) *s = bs->opaque;
151
#ifdef CONFIG_LINUX_AIO
152
- BDRVRawState *s = bs->opaque;
153
if (s->use_linux_aio) {
154
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
155
laio_io_unplug(bs, aio);
156
}
157
#endif
158
+#ifdef CONFIG_LINUX_IO_URING
159
+ if (s->use_linux_io_uring) {
160
+ LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
161
+ luring_io_unplug(bs, aio);
162
+ }
163
+#endif
164
}
165
166
static int raw_co_flush_to_disk(BlockDriverState *bs)
167
@@ -XXX,XX +XXX,XX @@ static int raw_co_flush_to_disk(BlockDriverState *bs)
168
.aio_type = QEMU_AIO_FLUSH,
169
};
170
171
+#ifdef CONFIG_LINUX_IO_URING
172
+ if (s->use_linux_io_uring) {
173
+ LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs));
174
+ return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH);
175
+ }
176
+#endif
177
return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb);
178
}
179
180
static void raw_aio_attach_aio_context(BlockDriverState *bs,
181
AioContext *new_context)
182
{
183
+ BDRVRawState __attribute__((unused)) *s = bs->opaque;
184
#ifdef CONFIG_LINUX_AIO
185
- BDRVRawState *s = bs->opaque;
186
if (s->use_linux_aio) {
187
Error *local_err = NULL;
188
if (!aio_setup_linux_aio(new_context, &local_err)) {
189
@@ -XXX,XX +XXX,XX @@ static void raw_aio_attach_aio_context(BlockDriverState *bs,
190
}
191
}
192
#endif
193
+#ifdef CONFIG_LINUX_IO_URING
194
+ if (s->use_linux_io_uring) {
195
+ Error *local_err;
196
+ if (!aio_setup_linux_io_uring(new_context, &local_err)) {
197
+ error_reportf_err(local_err, "Unable to use linux io_uring, "
198
+ "falling back to thread pool: ");
199
+ s->use_linux_io_uring = false;
200
+ }
201
+ }
202
+#endif
203
}
204
205
static void raw_close(BlockDriverState *bs)
206
--
207
2.24.1
208
209
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200120141858.587874-10-stefanha@redhat.com
7
Message-Id: <20200120141858.587874-10-stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
block/io_uring.c | 23 ++++++++++++++++++++---
11
block/trace-events | 12 ++++++++++++
12
2 files changed, 32 insertions(+), 3 deletions(-)
13
14
diff --git a/block/io_uring.c b/block/io_uring.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/io_uring.c
17
+++ b/block/io_uring.c
18
@@ -XXX,XX +XXX,XX @@
19
#include "block/raw-aio.h"
20
#include "qemu/coroutine.h"
21
#include "qapi/error.h"
22
+#include "trace.h"
23
24
/* io_uring ring size */
25
#define MAX_ENTRIES 128
26
@@ -XXX,XX +XXX,XX @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb,
27
QEMUIOVector *resubmit_qiov;
28
size_t remaining;
29
30
+ trace_luring_resubmit_short_read(s, luringcb, nread);
31
+
32
/* Update read position */
33
luringcb->total_read = nread;
34
remaining = luringcb->qiov->size - luringcb->total_read;
35
@@ -XXX,XX +XXX,XX @@ static void luring_process_completions(LuringState *s)
36
37
/* Change counters one-by-one because we can be nested. */
38
s->io_q.in_flight--;
39
+ trace_luring_process_completion(s, luringcb, ret);
40
41
/* total_read is non-zero only for resubmitted read requests */
42
total_bytes = ret + luringcb->total_read;
43
@@ -XXX,XX +XXX,XX @@ static int ioq_submit(LuringState *s)
44
QSIMPLEQ_REMOVE_HEAD(&s->io_q.submit_queue, next);
45
}
46
ret = io_uring_submit(&s->ring);
47
+ trace_luring_io_uring_submit(s, ret);
48
/* Prevent infinite loop if submission is refused */
49
if (ret <= 0) {
50
if (ret == -EAGAIN) {
51
@@ -XXX,XX +XXX,XX @@ static void ioq_init(LuringQueue *io_q)
52
53
void luring_io_plug(BlockDriverState *bs, LuringState *s)
54
{
55
+ trace_luring_io_plug(s);
56
s->io_q.plugged++;
57
}
58
59
void luring_io_unplug(BlockDriverState *bs, LuringState *s)
60
{
61
assert(s->io_q.plugged);
62
+ trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged,
63
+ s->io_q.in_queue, s->io_q.in_flight);
64
if (--s->io_q.plugged == 0 &&
65
!s->io_q.blocked && s->io_q.in_queue > 0) {
66
ioq_submit(s);
67
@@ -XXX,XX +XXX,XX @@ void luring_io_unplug(BlockDriverState *bs, LuringState *s)
68
static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
69
uint64_t offset, int type)
70
{
71
+ int ret;
72
struct io_uring_sqe *sqes = &luringcb->sqeq;
73
74
switch (type) {
75
@@ -XXX,XX +XXX,XX @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s,
76
77
QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next);
78
s->io_q.in_queue++;
79
-
80
+ trace_luring_do_submit(s, s->io_q.blocked, s->io_q.plugged,
81
+ s->io_q.in_queue, s->io_q.in_flight);
82
if (!s->io_q.blocked &&
83
(!s->io_q.plugged ||
84
s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) {
85
- return ioq_submit(s);
86
+ ret = ioq_submit(s);
87
+ trace_luring_do_submit_done(s, ret);
88
+ return ret;
89
}
90
return 0;
91
}
92
@@ -XXX,XX +XXX,XX @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd,
93
.qiov = qiov,
94
.is_read = (type == QEMU_AIO_READ),
95
};
96
-
97
+ trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0,
98
+ type);
99
ret = luring_do_submit(fd, &luringcb, s, offset, type);
100
+
101
if (ret < 0) {
102
return ret;
103
}
104
@@ -XXX,XX +XXX,XX @@ LuringState *luring_init(Error **errp)
105
LuringState *s = g_new0(LuringState, 1);
106
struct io_uring *ring = &s->ring;
107
108
+ trace_luring_init_state(s, sizeof(*s));
109
+
110
rc = io_uring_queue_init(MAX_ENTRIES, ring, 0);
111
if (rc < 0) {
112
error_setg_errno(errp, errno, "failed to init linux io_uring ring");
113
@@ -XXX,XX +XXX,XX @@ void luring_cleanup(LuringState *s)
114
{
115
io_uring_queue_exit(&s->ring);
116
g_free(s);
117
+ trace_luring_cleanup_state(s);
118
}
119
diff --git a/block/trace-events b/block/trace-events
120
index XXXXXXX..XXXXXXX 100644
121
--- a/block/trace-events
122
+++ b/block/trace-events
123
@@ -XXX,XX +XXX,XX @@ qmp_block_stream(void *bs) "bs %p"
124
file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d"
125
file_copy_file_range(void *bs, int src, int64_t src_off, int dst, int64_t dst_off, int64_t bytes, int flags, int64_t ret) "bs %p src_fd %d offset %"PRIu64" dst_fd %d offset %"PRIu64" bytes %"PRIu64" flags %d ret %"PRId64
126
127
+#io_uring.c
128
+luring_init_state(void *s, size_t size) "s %p size %zu"
129
+luring_cleanup_state(void *s) "%p freed"
130
+luring_io_plug(void *s) "LuringState %p plug"
131
+luring_io_unplug(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d"
132
+luring_do_submit(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d"
133
+luring_do_submit_done(void *s, int ret) "LuringState %p submitted to kernel %d"
134
+luring_co_submit(void *bs, void *s, void *luringcb, int fd, uint64_t offset, size_t nbytes, int type) "bs %p s %p luringcb %p fd %d offset %" PRId64 " nbytes %zd type %d"
135
+luring_process_completion(void *s, void *aiocb, int ret) "LuringState %p luringcb %p ret %d"
136
+luring_io_uring_submit(void *s, int ret) "LuringState %p ret %d"
137
+luring_resubmit_short_read(void *s, void *luringcb, int nread) "LuringState %p luringcb %p nread %d"
138
+
139
# qcow2.c
140
qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t file_cluster_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu"
141
qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d"
142
--
143
2.24.1
144
145
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200120141858.587874-11-stefanha@redhat.com
7
Message-Id: <20200120141858.587874-11-stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
block/io_uring.c | 17 ++++++++++++++++-
11
1 file changed, 16 insertions(+), 1 deletion(-)
12
13
diff --git a/block/io_uring.c b/block/io_uring.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/block/io_uring.c
16
+++ b/block/io_uring.c
17
@@ -XXX,XX +XXX,XX @@ static void qemu_luring_completion_cb(void *opaque)
18
luring_process_completions_and_submit(s);
19
}
20
21
+static bool qemu_luring_poll_cb(void *opaque)
22
+{
23
+ LuringState *s = opaque;
24
+ struct io_uring_cqe *cqes;
25
+
26
+ if (io_uring_peek_cqe(&s->ring, &cqes) == 0) {
27
+ if (cqes) {
28
+ luring_process_completions_and_submit(s);
29
+ return true;
30
+ }
31
+ }
32
+
33
+ return false;
34
+}
35
+
36
static void ioq_init(LuringQueue *io_q)
37
{
38
QSIMPLEQ_INIT(&io_q->submit_queue);
39
@@ -XXX,XX +XXX,XX @@ void luring_attach_aio_context(LuringState *s, AioContext *new_context)
40
s->aio_context = new_context;
41
s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s);
42
aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false,
43
- qemu_luring_completion_cb, NULL, NULL, s);
44
+ qemu_luring_completion_cb, NULL, qemu_luring_poll_cb, s);
45
}
46
47
LuringState *luring_init(Error **errp)
48
--
49
2.24.1
50
51
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200120141858.587874-12-stefanha@redhat.com
7
Message-Id: <20200120141858.587874-12-stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
qemu-io.c | 25 +++++++++++++++++++++----
11
1 file changed, 21 insertions(+), 4 deletions(-)
12
13
diff --git a/qemu-io.c b/qemu-io.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/qemu-io.c
16
+++ b/qemu-io.c
17
@@ -XXX,XX +XXX,XX @@ static void open_help(void)
18
" -C, -- use copy-on-read\n"
19
" -n, -- disable host cache, short for -t none\n"
20
" -U, -- force shared permissions\n"
21
-" -k, -- use kernel AIO implementation (on Linux only)\n"
22
+" -k, -- use kernel AIO implementation (Linux only, prefer use of -i)\n"
23
+" -i, -- use AIO mode (threads, native or io_uring)\n"
24
" -t, -- use the given cache mode for the image\n"
25
" -d, -- use the given discard mode for the image\n"
26
" -o, -- options to be given to the block driver"
27
@@ -XXX,XX +XXX,XX @@ static int open_f(BlockBackend *blk, int argc, char **argv)
28
QDict *opts;
29
bool force_share = false;
30
31
- while ((c = getopt(argc, argv, "snCro:kt:d:U")) != -1) {
32
+ while ((c = getopt(argc, argv, "snCro:ki:t:d:U")) != -1) {
33
switch (c) {
34
case 's':
35
flags |= BDRV_O_SNAPSHOT;
36
@@ -XXX,XX +XXX,XX @@ static int open_f(BlockBackend *blk, int argc, char **argv)
37
return -EINVAL;
38
}
39
break;
40
+ case 'i':
41
+ if (bdrv_parse_aio(optarg, &flags) < 0) {
42
+ error_report("Invalid aio option: %s", optarg);
43
+ qemu_opts_reset(&empty_opts);
44
+ return -EINVAL;
45
+ }
46
+ break;
47
case 'o':
48
if (imageOpts) {
49
printf("--image-opts and 'open -o' are mutually exclusive\n");
50
@@ -XXX,XX +XXX,XX @@ static void usage(const char *name)
51
" -n, --nocache disable host cache, short for -t none\n"
52
" -C, --copy-on-read enable copy-on-read\n"
53
" -m, --misalign misalign allocations for O_DIRECT\n"
54
-" -k, --native-aio use kernel AIO implementation (on Linux only)\n"
55
+" -k, --native-aio use kernel AIO implementation\n"
56
+" (Linux only, prefer use of -i)\n"
57
+" -i, --aio=MODE use AIO mode (threads, native or io_uring)\n"
58
" -t, --cache=MODE use the given cache mode for the image\n"
59
" -d, --discard=MODE use the given discard mode for the image\n"
60
" -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
61
@@ -XXX,XX +XXX,XX @@ static QemuOptsList file_opts = {
62
int main(int argc, char **argv)
63
{
64
int readonly = 0;
65
- const char *sopt = "hVc:d:f:rsnCmkt:T:U";
66
+ const char *sopt = "hVc:d:f:rsnCmki:t:T:U";
67
const struct option lopt[] = {
68
{ "help", no_argument, NULL, 'h' },
69
{ "version", no_argument, NULL, 'V' },
70
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
71
{ "copy-on-read", no_argument, NULL, 'C' },
72
{ "misalign", no_argument, NULL, 'm' },
73
{ "native-aio", no_argument, NULL, 'k' },
74
+ { "aio", required_argument, NULL, 'i' },
75
{ "discard", required_argument, NULL, 'd' },
76
{ "cache", required_argument, NULL, 't' },
77
{ "trace", required_argument, NULL, 'T' },
78
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
79
case 'k':
80
flags |= BDRV_O_NATIVE_AIO;
81
break;
82
+ case 'i':
83
+ if (bdrv_parse_aio(optarg, &flags) < 0) {
84
+ error_report("Invalid aio option: %s", optarg);
85
+ exit(1);
86
+ }
87
+ break;
88
case 't':
89
if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) < 0) {
90
error_report("Invalid cache option: %s", optarg);
91
--
92
2.24.1
93
94
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200120141858.587874-13-stefanha@redhat.com
7
Message-Id: <20200120141858.587874-13-stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
qemu-img-cmds.hx | 4 ++--
11
qemu-img.c | 11 ++++++++++-
12
qemu-img.texi | 5 ++++-
13
3 files changed, 16 insertions(+), 4 deletions(-)
14
15
diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx
16
index XXXXXXX..XXXXXXX 100644
17
--- a/qemu-img-cmds.hx
18
+++ b/qemu-img-cmds.hx
19
@@ -XXX,XX +XXX,XX @@ STEXI
20
ETEXI
21
22
DEF("bench", img_bench,
23
- "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-w] [-U] filename")
24
+ "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-i aio] [-w] [-U] filename")
25
STEXI
26
-@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename}
27
+@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-i @var{aio}] [-w] [-U] @var{filename}
28
ETEXI
29
30
DEF("check", img_check,
31
diff --git a/qemu-img.c b/qemu-img.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/qemu-img.c
34
+++ b/qemu-img.c
35
@@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv)
36
{"force-share", no_argument, 0, 'U'},
37
{0, 0, 0, 0}
38
};
39
- c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:wU", long_options, NULL);
40
+ c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options,
41
+ NULL);
42
if (c == -1) {
43
break;
44
}
45
@@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv)
46
case 'n':
47
flags |= BDRV_O_NATIVE_AIO;
48
break;
49
+ case 'i':
50
+ ret = bdrv_parse_aio(optarg, &flags);
51
+ if (ret < 0) {
52
+ error_report("Invalid aio option: %s", optarg);
53
+ ret = -1;
54
+ goto out;
55
+ }
56
+ break;
57
case 'o':
58
{
59
offset = cvtnum(optarg);
60
diff --git a/qemu-img.texi b/qemu-img.texi
61
index XXXXXXX..XXXXXXX 100644
62
--- a/qemu-img.texi
63
+++ b/qemu-img.texi
64
@@ -XXX,XX +XXX,XX @@ Command description:
65
Amends the image format specific @var{options} for the image file
66
@var{filename}. Not all file formats support this operation.
67
68
-@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename}
69
+@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [-i @var{aio}] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename}
70
71
Run a simple sequential I/O benchmark on the specified image. If @code{-w} is
72
specified, a write test is performed, otherwise a read test is performed.
73
@@ -XXX,XX +XXX,XX @@ If @code{-n} is specified, the native AIO backend is used if possible. On
74
Linux, this option only works if @code{-t none} or @code{-t directsync} is
75
specified as well.
76
77
+If @code{-i} is specified, aio option can be used to specify different AIO
78
+backends: @var{threads}, @var{native} or @var{io_uring}.
79
+
80
For write tests, by default a buffer filled with zeros is written. This can be
81
overridden with a pattern byte specified by @var{pattern}.
82
83
--
84
2.24.1
85
86
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Acked-by: Eric Blake <eblake@redhat.com>
5
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-id: 20200120141858.587874-14-stefanha@redhat.com
8
Message-Id: <20200120141858.587874-14-stefanha@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
docs/interop/qemu-nbd.rst | 4 ++--
12
qemu-nbd.c | 12 ++++--------
13
2 files changed, 6 insertions(+), 10 deletions(-)
14
15
diff --git a/docs/interop/qemu-nbd.rst b/docs/interop/qemu-nbd.rst
16
index XXXXXXX..XXXXXXX 100644
17
--- a/docs/interop/qemu-nbd.rst
18
+++ b/docs/interop/qemu-nbd.rst
19
@@ -XXX,XX +XXX,XX @@ driver options if ``--image-opts`` is specified.
20
21
.. option:: --aio=AIO
22
23
- Set the asynchronous I/O mode between ``threads`` (the default)
24
- and ``native`` (Linux only).
25
+ Set the asynchronous I/O mode between ``threads`` (the default),
26
+ ``native`` (Linux only), and ``io_uring`` (Linux 5.1+).
27
28
.. option:: --discard=DISCARD
29
30
diff --git a/qemu-nbd.c b/qemu-nbd.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/qemu-nbd.c
33
+++ b/qemu-nbd.c
34
@@ -XXX,XX +XXX,XX @@ static void usage(const char *name)
35
" '[ID_OR_NAME]'\n"
36
" -n, --nocache disable host cache\n"
37
" --cache=MODE set cache mode (none, writeback, ...)\n"
38
-" --aio=MODE set AIO mode (native or threads)\n"
39
+" --aio=MODE set AIO mode (native, io_uring or threads)\n"
40
" --discard=MODE set discard mode (ignore, unmap)\n"
41
" --detect-zeroes=MODE set detect-zeroes mode (off, on, unmap)\n"
42
" --image-opts treat FILE as a full set of image options\n"
43
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
44
exit(EXIT_FAILURE);
45
}
46
seen_aio = true;
47
- if (!strcmp(optarg, "native")) {
48
- flags |= BDRV_O_NATIVE_AIO;
49
- } else if (!strcmp(optarg, "threads")) {
50
- /* this is the default */
51
- } else {
52
- error_report("invalid aio mode `%s'", optarg);
53
- exit(EXIT_FAILURE);
54
+ if (bdrv_parse_aio(optarg, &flags) < 0) {
55
+ error_report("Invalid aio mode '%s'", optarg);
56
+ exit(EXIT_FAILURE);
57
}
58
break;
59
case QEMU_NBD_OPT_DISCARD:
60
--
61
2.24.1
62
63
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200120141858.587874-15-stefanha@redhat.com
7
Message-Id: <20200120141858.587874-15-stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
tests/qemu-iotests/check | 15 ++++++++++++++-
11
tests/qemu-iotests/common.rc | 14 ++++++++++++++
12
tests/qemu-iotests/iotests.py | 12 ++++++++++--
13
3 files changed, 38 insertions(+), 3 deletions(-)
14
15
diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check
16
index XXXXXXX..XXXXXXX 100755
17
--- a/tests/qemu-iotests/check
18
+++ b/tests/qemu-iotests/check
19
@@ -XXX,XX +XXX,XX @@ sortme=false
20
expunge=true
21
have_test_arg=false
22
cachemode=false
23
+aiomode=false
24
25
tmp="${TEST_DIR}"/$$
26
rm -f $tmp.list $tmp.tmp $tmp.sed
27
@@ -XXX,XX +XXX,XX @@ export IMGFMT_GENERIC=true
28
export IMGPROTO=file
29
export IMGOPTS=""
30
export CACHEMODE="writeback"
31
+export AIOMODE="threads"
32
export QEMU_IO_OPTIONS=""
33
export QEMU_IO_OPTIONS_NO_FMT=""
34
export CACHEMODE_IS_DEFAULT=true
35
@@ -XXX,XX +XXX,XX @@ s/ .*//p
36
CACHEMODE_IS_DEFAULT=false
37
cachemode=false
38
continue
39
+ elif $aiomode
40
+ then
41
+ AIOMODE="$r"
42
+ aiomode=false
43
+ continue
44
fi
45
46
xpand=true
47
@@ -XXX,XX +XXX,XX @@ other options
48
-n show me, do not run tests
49
-o options -o options to pass to qemu-img create/convert
50
-c mode cache mode
51
+ -i mode AIO mode
52
-makecheck pretty print output for make check
53
54
testlist options
55
@@ -XXX,XX +XXX,XX @@ testlist options
56
cachemode=true
57
xpand=false
58
;;
59
+ -i)
60
+ aiomode=true
61
+ xpand=false
62
+ ;;
63
-T) # deprecated timestamp option
64
xpand=false
65
;;
66
-
67
-v)
68
verbose=true
69
xpand=false
70
@@ -XXX,XX +XXX,XX @@ done
71
72
# Set qemu-io cache mode with $CACHEMODE we have
73
QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --cache $CACHEMODE"
74
+# Set qemu-io aio mode with $AIOMODE we have
75
+QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --aio $AIOMODE"
76
77
QEMU_IO_OPTIONS_NO_FMT="$QEMU_IO_OPTIONS"
78
if [ "$IMGOPTSSYNTAX" != "true" ]; then
79
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
80
index XXXXXXX..XXXXXXX 100644
81
--- a/tests/qemu-iotests/common.rc
82
+++ b/tests/qemu-iotests/common.rc
83
@@ -XXX,XX +XXX,XX @@ _default_cache_mode()
84
return
85
fi
86
}
87
+_supported_aio_modes()
88
+{
89
+ for mode; do
90
+ if [ "$mode" = "$AIOMODE" ]; then
91
+ return
92
+ fi
93
+ done
94
+ _notrun "not suitable for aio mode: $AIOMODE"
95
+}
96
+_default_aio_mode()
97
+{
98
+ AIOMODE="$1"
99
+ QEMU_IO="$QEMU_IO --aio $1"
100
+}
101
102
_unsupported_imgopts()
103
{
104
diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py
105
index XXXXXXX..XXXXXXX 100644
106
--- a/tests/qemu-iotests/iotests.py
107
+++ b/tests/qemu-iotests/iotests.py
108
@@ -XXX,XX +XXX,XX @@ test_dir = os.environ.get('TEST_DIR')
109
sock_dir = os.environ.get('SOCK_DIR')
110
output_dir = os.environ.get('OUTPUT_DIR', '.')
111
cachemode = os.environ.get('CACHEMODE')
112
+aiomode = os.environ.get('AIOMODE')
113
qemu_default_machine = os.environ.get('QEMU_DEFAULT_MACHINE')
114
115
socket_scm_helper = os.environ.get('SOCKET_SCM_HELPER', 'socket_scm_helper')
116
@@ -XXX,XX +XXX,XX @@ class VM(qtest.QEMUQtestMachine):
117
options.append('file=%s' % path)
118
options.append('format=%s' % format)
119
options.append('cache=%s' % cachemode)
120
+ options.append('aio=%s' % aiomode)
121
122
if opts:
123
options.append(opts)
124
@@ -XXX,XX +XXX,XX @@ def verify_cache_mode(supported_cache_modes=[]):
125
if supported_cache_modes and (cachemode not in supported_cache_modes):
126
notrun('not suitable for this cache mode: %s' % cachemode)
127
128
+def verify_aio_mode(supported_aio_modes=[]):
129
+ if supported_aio_modes and (aiomode not in supported_aio_modes):
130
+ notrun('not suitable for this aio mode: %s' % aiomode)
131
+
132
def supports_quorum():
133
return 'quorum' in qemu_img_pipe('--help')
134
135
@@ -XXX,XX +XXX,XX @@ def execute_unittest(output, verbosity, debug):
136
137
def execute_test(test_function=None,
138
supported_fmts=[], supported_oses=['linux'],
139
- supported_cache_modes=[], unsupported_fmts=[],
140
- supported_protocols=[], unsupported_protocols=[]):
141
+ supported_cache_modes=[], supported_aio_modes={},
142
+ unsupported_fmts=[], supported_protocols=[],
143
+ unsupported_protocols=[]):
144
"""Run either unittest or script-style tests."""
145
146
# We are using TEST_DIR and QEMU_DEFAULT_MACHINE as proxies to
147
@@ -XXX,XX +XXX,XX @@ def execute_test(test_function=None,
148
verify_protocol(supported_protocols, unsupported_protocols)
149
verify_platform(supported_oses)
150
verify_cache_mode(supported_cache_modes)
151
+ verify_aio_mode(supported_aio_modes)
152
153
if debug:
154
output = sys.stdout
155
--
156
2.24.1
157
158
diff view generated by jsdifflib
Deleted patch
1
From: Aarushi Mehta <mehta.aaru20@gmail.com>
2
1
3
Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com>
4
Acked-by: Stefano Garzarella <sgarzare@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200120141858.587874-16-stefanha@redhat.com
7
Message-Id: <20200120141858.587874-16-stefanha@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
tests/qemu-iotests/028 | 2 +-
11
tests/qemu-iotests/058 | 2 +-
12
tests/qemu-iotests/089 | 4 ++--
13
tests/qemu-iotests/091 | 4 ++--
14
tests/qemu-iotests/109 | 2 +-
15
tests/qemu-iotests/147 | 5 +++--
16
tests/qemu-iotests/181 | 8 ++++----
17
tests/qemu-iotests/183 | 4 ++--
18
tests/qemu-iotests/185 | 10 +++++-----
19
tests/qemu-iotests/200 | 2 +-
20
tests/qemu-iotests/201 | 8 ++++----
21
11 files changed, 26 insertions(+), 25 deletions(-)
22
23
diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028
24
index XXXXXXX..XXXXXXX 100755
25
--- a/tests/qemu-iotests/028
26
+++ b/tests/qemu-iotests/028
27
@@ -XXX,XX +XXX,XX @@ echo block-backup
28
echo
29
30
qemu_comm_method="monitor"
31
-_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk
32
+_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=${AIOMODE},id=disk
33
h=$QEMU_HANDLE
34
if [ "${VALGRIND_QEMU}" == "y" ]; then
35
QEMU_COMM_TIMEOUT=7
36
diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058
37
index XXXXXXX..XXXXXXX 100755
38
--- a/tests/qemu-iotests/058
39
+++ b/tests/qemu-iotests/058
40
@@ -XXX,XX +XXX,XX @@ nbd_snapshot_img="nbd:unix:$nbd_unix_socket"
41
converted_image=$TEST_IMG.converted
42
43
# Use -f raw instead of -f $IMGFMT for the NBD connection
44
-QEMU_IO_NBD="$QEMU_IO -f raw --cache=$CACHEMODE"
45
+QEMU_IO_NBD="$QEMU_IO -f raw --cache=$CACHEMODE --aio=$AIOMODE"
46
47
echo
48
echo "== preparing image =="
49
diff --git a/tests/qemu-iotests/089 b/tests/qemu-iotests/089
50
index XXXXXXX..XXXXXXX 100755
51
--- a/tests/qemu-iotests/089
52
+++ b/tests/qemu-iotests/089
53
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'write -P 42 0 512' -c 'write -P 23 512 512' \
54
55
$QEMU_IMG convert -f raw -O $IMGFMT "$TEST_IMG.base" "$TEST_IMG"
56
57
-$QEMU_IO_PROG --cache $CACHEMODE \
58
+$QEMU_IO_PROG --cache $CACHEMODE --aio $AIOMODE \
59
-c 'read -P 42 0 512' -c 'read -P 23 512 512' \
60
-c 'read -P 66 1024 512' "json:{
61
\"driver\": \"$IMGFMT\",
62
@@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'write -P 42 0x38000 512' "$TEST_IMG" | _filter_qemu_io
63
64
# The "image.filename" part tests whether "a": { "b": "c" } and "a.b": "c" do
65
# the same (which they should).
66
-$QEMU_IO_PROG --cache $CACHEMODE \
67
+$QEMU_IO_PROG --cache $CACHEMODE --aio $AIOMODE \
68
-c 'read -P 42 0x38000 512' "json:{
69
\"driver\": \"$IMGFMT\",
70
\"file\": {
71
diff --git a/tests/qemu-iotests/091 b/tests/qemu-iotests/091
72
index XXXXXXX..XXXXXXX 100755
73
--- a/tests/qemu-iotests/091
74
+++ b/tests/qemu-iotests/091
75
@@ -XXX,XX +XXX,XX @@ echo === Starting QEMU VM1 ===
76
echo
77
78
qemu_comm_method="monitor"
79
-_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk
80
+_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=${AIOMODE},id=disk
81
h1=$QEMU_HANDLE
82
83
echo
84
echo === Starting QEMU VM2 ===
85
echo
86
-_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk \
87
+_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=${AIOMODE},id=disk \
88
-incoming "exec: cat '${MIG_FIFO}'"
89
h2=$QEMU_HANDLE
90
91
diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109
92
index XXXXXXX..XXXXXXX 100755
93
--- a/tests/qemu-iotests/109
94
+++ b/tests/qemu-iotests/109
95
@@ -XXX,XX +XXX,XX @@ run_qemu()
96
local qmp_format="$3"
97
local qmp_event="$4"
98
99
- _launch_qemu -drive file="${source_img}",format=raw,cache=${CACHEMODE},id=src
100
+ _launch_qemu -drive file="${source_img}",format=raw,cache=${CACHEMODE},aio=${AIOMODE},id=src
101
_send_qemu_cmd $QEMU_HANDLE "{ 'execute': 'qmp_capabilities' }" "return"
102
103
_send_qemu_cmd $QEMU_HANDLE \
104
diff --git a/tests/qemu-iotests/147 b/tests/qemu-iotests/147
105
index XXXXXXX..XXXXXXX 100755
106
--- a/tests/qemu-iotests/147
107
+++ b/tests/qemu-iotests/147
108
@@ -XXX,XX +XXX,XX @@ import socket
109
import stat
110
import time
111
import iotests
112
-from iotests import cachemode, imgfmt, qemu_img, qemu_nbd, qemu_nbd_early_pipe
113
+from iotests import cachemode, aiomode, imgfmt, qemu_img, qemu_nbd, qemu_nbd_early_pipe
114
115
NBD_PORT_START = 32768
116
NBD_PORT_END = NBD_PORT_START + 1024
117
@@ -XXX,XX +XXX,XX @@ class BuiltinNBD(NBDBlockdevAddBase):
118
self.server.add_drive_raw('if=none,id=nbd-export,' +
119
'file=%s,' % test_img +
120
'format=%s,' % imgfmt +
121
- 'cache=%s' % cachemode)
122
+ 'cache=%s' % cachemode +
123
+ 'aio=%s' % aiomode)
124
self.server.launch()
125
126
def tearDown(self):
127
diff --git a/tests/qemu-iotests/181 b/tests/qemu-iotests/181
128
index XXXXXXX..XXXXXXX 100755
129
--- a/tests/qemu-iotests/181
130
+++ b/tests/qemu-iotests/181
131
@@ -XXX,XX +XXX,XX @@ qemu_comm_method="monitor"
132
133
if [ "$IMGOPTSSYNTAX" = "true" ]; then
134
_launch_qemu \
135
- -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk
136
+ -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk
137
else
138
_launch_qemu \
139
- -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk
140
+ -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk
141
fi
142
src=$QEMU_HANDLE
143
144
if [ "$IMGOPTSSYNTAX" = "true" ]; then
145
_launch_qemu \
146
- -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk \
147
+ -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk \
148
-incoming "unix:${MIG_SOCKET}"
149
else
150
_launch_qemu \
151
- -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk \
152
+ -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk \
153
-incoming "unix:${MIG_SOCKET}"
154
fi
155
dest=$QEMU_HANDLE
156
diff --git a/tests/qemu-iotests/183 b/tests/qemu-iotests/183
157
index XXXXXXX..XXXXXXX 100755
158
--- a/tests/qemu-iotests/183
159
+++ b/tests/qemu-iotests/183
160
@@ -XXX,XX +XXX,XX @@ echo
161
qemu_comm_method="qmp"
162
163
_launch_qemu \
164
- -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
165
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
166
src=$QEMU_HANDLE
167
_send_qemu_cmd $src "{ 'execute': 'qmp_capabilities' }" 'return'
168
169
_launch_qemu \
170
- -drive file="${TEST_IMG}.dest",cache=$CACHEMODE,driver=$IMGFMT,id=disk \
171
+ -drive file="${TEST_IMG}.dest",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk \
172
-incoming "unix:${MIG_SOCKET}"
173
dest=$QEMU_HANDLE
174
_send_qemu_cmd $dest "{ 'execute': 'qmp_capabilities' }" 'return'
175
diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185
176
index XXXXXXX..XXXXXXX 100755
177
--- a/tests/qemu-iotests/185
178
+++ b/tests/qemu-iotests/185
179
@@ -XXX,XX +XXX,XX @@ echo
180
qemu_comm_method="qmp"
181
182
_launch_qemu \
183
- -drive file="${TEST_IMG}.base",cache=$CACHEMODE,driver=$IMGFMT,id=disk
184
+ -drive file="${TEST_IMG}.base",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
185
h=$QEMU_HANDLE
186
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
187
188
@@ -XXX,XX +XXX,XX @@ echo === Start active commit job and exit qemu ===
189
echo
190
191
_launch_qemu \
192
- -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
193
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
194
h=$QEMU_HANDLE
195
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
196
197
@@ -XXX,XX +XXX,XX @@ echo === Start mirror job and exit qemu ===
198
echo
199
200
_launch_qemu \
201
- -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
202
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
203
h=$QEMU_HANDLE
204
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
205
206
@@ -XXX,XX +XXX,XX @@ echo === Start backup job and exit qemu ===
207
echo
208
209
_launch_qemu \
210
- -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
211
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
212
h=$QEMU_HANDLE
213
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
214
215
@@ -XXX,XX +XXX,XX @@ echo === Start streaming job and exit qemu ===
216
echo
217
218
_launch_qemu \
219
- -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk
220
+ -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk
221
h=$QEMU_HANDLE
222
_send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return'
223
224
diff --git a/tests/qemu-iotests/200 b/tests/qemu-iotests/200
225
index XXXXXXX..XXXXXXX 100755
226
--- a/tests/qemu-iotests/200
227
+++ b/tests/qemu-iotests/200
228
@@ -XXX,XX +XXX,XX @@ echo === Starting QEMU VM ===
229
echo
230
qemu_comm_method="qmp"
231
_launch_qemu -object iothread,id=iothread0 $virtio_scsi \
232
- -drive file="${TEST_IMG}",media=disk,if=none,cache=$CACHEMODE,id=drive_sysdisk,format=$IMGFMT \
233
+ -drive file="${TEST_IMG}",media=disk,if=none,cache=$CACHEMODE,aio=$AIOMODE,id=drive_sysdisk,format=$IMGFMT \
234
-device scsi-hd,drive=drive_sysdisk,bus=scsi0.0,id=sysdisk,bootindex=0
235
h1=$QEMU_HANDLE
236
237
diff --git a/tests/qemu-iotests/201 b/tests/qemu-iotests/201
238
index XXXXXXX..XXXXXXX 100755
239
--- a/tests/qemu-iotests/201
240
+++ b/tests/qemu-iotests/201
241
@@ -XXX,XX +XXX,XX @@ qemu_comm_method="monitor"
242
243
if [ "$IMGOPTSSYNTAX" = "true" ]; then
244
_launch_qemu \
245
- -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk
246
+ -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk
247
else
248
_launch_qemu \
249
- -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk
250
+ -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk
251
fi
252
src=$QEMU_HANDLE
253
254
if [ "$IMGOPTSSYNTAX" = "true" ]; then
255
_launch_qemu \
256
- -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk \
257
+ -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk \
258
-incoming "unix:${MIG_SOCKET}"
259
else
260
_launch_qemu \
261
- -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk \
262
+ -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk \
263
-incoming "unix:${MIG_SOCKET}"
264
fi
265
dest=$QEMU_HANDLE
266
--
267
2.24.1
268
269
diff view generated by jsdifflib