1 | The following changes since commit 928173659d6e5dc368284f73f90ea1d129e1f57d: | 1 | The following changes since commit 661c2e1ab29cd9c4d268ae3f44712e8d421c0e56: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20200130' into staging (2020-01-30 16:19:04 +0000) | 3 | scripts/checkpatch: Fix a typo (2025-03-04 09:30:26 +0800) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 8dff69b9415b4287e900358744b732195e1ab2e2: | 9 | for you to fetch changes up to 2ad638a3d160923ef3dbf87c73944e6e44bdc724: |
10 | 10 | ||
11 | tests/qemu-iotests: use AIOMODE with various tests (2020-01-30 21:01:40 +0000) | 11 | block/qed: fix use-after-free by nullifying timer pointer after free (2025-03-06 10:19:54 +0800) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | QED need_check_timer use-after-free fix | ||
17 | |||
16 | ---------------------------------------------------------------- | 18 | ---------------------------------------------------------------- |
17 | 19 | ||
18 | Aarushi Mehta (15): | 20 | Denis Rastyogin (1): |
19 | configure: permit use of io_uring | 21 | block/qed: fix use-after-free by nullifying timer pointer after free |
20 | qapi/block-core: add option for io_uring | ||
21 | block/block: add BDRV flag for io_uring | ||
22 | block/io_uring: implements interfaces for io_uring | ||
23 | stubs: add stubs for io_uring interface | ||
24 | util/async: add aio interfaces for io_uring | ||
25 | blockdev: adds bdrv_parse_aio to use io_uring | ||
26 | block/file-posix.c: extend to use io_uring | ||
27 | block: add trace events for io_uring | ||
28 | block/io_uring: adds userspace completion polling | ||
29 | qemu-io: adds option to use aio engine | ||
30 | qemu-img: adds option to use aio engine for benchmarking | ||
31 | qemu-nbd: adds option for aio engines | ||
32 | tests/qemu-iotests: enable testing with aio options | ||
33 | tests/qemu-iotests: use AIOMODE with various tests | ||
34 | 22 | ||
35 | Paolo Bonzini (3): | 23 | block/qed.c | 1 + |
36 | block: eliminate BDRV_REQ_NO_SERIALISING | 24 | 1 file changed, 1 insertion(+) |
37 | block/io: wait for serialising requests when a request becomes | ||
38 | serialising | ||
39 | block/io: take bs->reqs_lock in bdrv_mark_request_serialising | ||
40 | |||
41 | MAINTAINERS | 9 + | ||
42 | block.c | 22 ++ | ||
43 | block/Makefile.objs | 3 + | ||
44 | block/file-posix.c | 99 ++++++-- | ||
45 | block/io.c | 162 +++++++------ | ||
46 | block/io_uring.c | 433 ++++++++++++++++++++++++++++++++++ | ||
47 | block/trace-events | 12 + | ||
48 | blockdev.c | 12 +- | ||
49 | configure | 27 +++ | ||
50 | docs/interop/qemu-nbd.rst | 4 +- | ||
51 | include/block/aio.h | 16 +- | ||
52 | include/block/block.h | 14 +- | ||
53 | include/block/block_int.h | 3 +- | ||
54 | include/block/raw-aio.h | 12 + | ||
55 | qapi/block-core.json | 4 +- | ||
56 | qemu-img-cmds.hx | 4 +- | ||
57 | qemu-img.c | 11 +- | ||
58 | qemu-img.texi | 5 +- | ||
59 | qemu-io.c | 25 +- | ||
60 | qemu-nbd.c | 12 +- | ||
61 | stubs/Makefile.objs | 1 + | ||
62 | stubs/io_uring.c | 32 +++ | ||
63 | tests/qemu-iotests/028 | 2 +- | ||
64 | tests/qemu-iotests/058 | 2 +- | ||
65 | tests/qemu-iotests/089 | 4 +- | ||
66 | tests/qemu-iotests/091 | 4 +- | ||
67 | tests/qemu-iotests/109 | 2 +- | ||
68 | tests/qemu-iotests/147 | 5 +- | ||
69 | tests/qemu-iotests/181 | 8 +- | ||
70 | tests/qemu-iotests/183 | 4 +- | ||
71 | tests/qemu-iotests/185 | 10 +- | ||
72 | tests/qemu-iotests/200 | 2 +- | ||
73 | tests/qemu-iotests/201 | 8 +- | ||
74 | tests/qemu-iotests/check | 15 +- | ||
75 | tests/qemu-iotests/common.rc | 14 ++ | ||
76 | tests/qemu-iotests/iotests.py | 12 +- | ||
77 | util/async.c | 36 +++ | ||
78 | 37 files changed, 878 insertions(+), 172 deletions(-) | ||
79 | create mode 100644 block/io_uring.c | ||
80 | create mode 100644 stubs/io_uring.c | ||
81 | 25 | ||
82 | -- | 26 | -- |
83 | 2.24.1 | 27 | 2.48.1 |
84 | |||
85 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Paolo Bonzini <pbonzini@redhat.com> | ||
2 | 1 | ||
3 | It is unused since commit 00e30f0 ("block/backup: use backup-top instead | ||
4 | of write notifiers", 2019-10-01), drop it to simplify the code. | ||
5 | |||
6 | While at it, drop redundant assertions on flags. | ||
7 | |||
8 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
9 | Message-id: 1578495356-46219-2-git-send-email-pbonzini@redhat.com | ||
10 | Message-Id: <1578495356-46219-2-git-send-email-pbonzini@redhat.com> | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | --- | ||
13 | block/io.c | 18 ++++-------------- | ||
14 | include/block/block.h | 12 ------------ | ||
15 | 2 files changed, 4 insertions(+), 26 deletions(-) | ||
16 | |||
17 | diff --git a/block/io.c b/block/io.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/block/io.c | ||
20 | +++ b/block/io.c | ||
21 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, | ||
22 | * potential fallback support, if we ever implement any read flags | ||
23 | * to pass through to drivers. For now, there aren't any | ||
24 | * passthrough flags. */ | ||
25 | - assert(!(flags & ~(BDRV_REQ_NO_SERIALISING | BDRV_REQ_COPY_ON_READ | | ||
26 | - BDRV_REQ_PREFETCH))); | ||
27 | + assert(!(flags & ~(BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH))); | ||
28 | |||
29 | /* Handle Copy on Read and associated serialisation */ | ||
30 | if (flags & BDRV_REQ_COPY_ON_READ) { | ||
31 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, | ||
32 | bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); | ||
33 | } | ||
34 | |||
35 | - /* BDRV_REQ_SERIALISING is only for write operation */ | ||
36 | - assert(!(flags & BDRV_REQ_SERIALISING)); | ||
37 | - | ||
38 | - if (!(flags & BDRV_REQ_NO_SERIALISING)) { | ||
39 | - bdrv_wait_serialising_requests(req); | ||
40 | - } | ||
41 | + bdrv_wait_serialising_requests(req); | ||
42 | |||
43 | if (flags & BDRV_REQ_COPY_ON_READ) { | ||
44 | int64_t pnum; | ||
45 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, | ||
46 | bdrv_inc_in_flight(bs); | ||
47 | |||
48 | /* Don't do copy-on-read if we read data before write operation */ | ||
49 | - if (atomic_read(&bs->copy_on_read) && !(flags & BDRV_REQ_NO_SERIALISING)) { | ||
50 | + if (atomic_read(&bs->copy_on_read)) { | ||
51 | flags |= BDRV_REQ_COPY_ON_READ; | ||
52 | } | ||
53 | |||
54 | @@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes, | ||
55 | return -EPERM; | ||
56 | } | ||
57 | |||
58 | - /* BDRV_REQ_NO_SERIALISING is only for read operation */ | ||
59 | - assert(!(flags & BDRV_REQ_NO_SERIALISING)); | ||
60 | assert(!(bs->open_flags & BDRV_O_INACTIVE)); | ||
61 | assert((bs->open_flags & BDRV_O_NO_IO) == 0); | ||
62 | assert(!(flags & ~BDRV_REQ_MASK)); | ||
63 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal( | ||
64 | |||
65 | /* BDRV_REQ_SERIALISING is only for write operation */ | ||
66 | assert(!(read_flags & BDRV_REQ_SERIALISING)); | ||
67 | - if (!(read_flags & BDRV_REQ_NO_SERIALISING)) { | ||
68 | - bdrv_wait_serialising_requests(&req); | ||
69 | - } | ||
70 | + bdrv_wait_serialising_requests(&req); | ||
71 | |||
72 | ret = src->bs->drv->bdrv_co_copy_range_from(src->bs, | ||
73 | src, src_offset, | ||
74 | diff --git a/include/block/block.h b/include/block/block.h | ||
75 | index XXXXXXX..XXXXXXX 100644 | ||
76 | --- a/include/block/block.h | ||
77 | +++ b/include/block/block.h | ||
78 | @@ -XXX,XX +XXX,XX @@ typedef enum { | ||
79 | */ | ||
80 | BDRV_REQ_MAY_UNMAP = 0x4, | ||
81 | |||
82 | - /* | ||
83 | - * The BDRV_REQ_NO_SERIALISING flag is only valid for reads and means that | ||
84 | - * we don't want wait_serialising_requests() during the read operation. | ||
85 | - * | ||
86 | - * This flag is used for backup copy-on-write operations, when we need to | ||
87 | - * read old data before write (write notifier triggered). It is okay since | ||
88 | - * we already waited for other serializing requests in the initiating write | ||
89 | - * (see bdrv_aligned_pwritev), and it is necessary if the initiating write | ||
90 | - * is already serializing (without the flag, the read would deadlock | ||
91 | - * waiting for the serialising write to complete). | ||
92 | - */ | ||
93 | - BDRV_REQ_NO_SERIALISING = 0x8, | ||
94 | BDRV_REQ_FUA = 0x10, | ||
95 | BDRV_REQ_WRITE_COMPRESSED = 0x20, | ||
96 | |||
97 | -- | ||
98 | 2.24.1 | ||
99 | |||
100 | diff view generated by jsdifflib |
1 | From: Paolo Bonzini <pbonzini@redhat.com> | 1 | From: Denis Rastyogin <gerben@altlinux.org> |
---|---|---|---|
2 | 2 | ||
3 | Marking without waiting would not result in actual serialising behavior. | 3 | This error was discovered by fuzzing qemu-img. |
4 | Thus, make a call bdrv_mark_request_serialising sufficient for | ||
5 | serialisation to happen. | ||
6 | 4 | ||
7 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | 5 | In the QED block driver, the need_check_timer timer is freed in |
8 | Message-id: 1578495356-46219-3-git-send-email-pbonzini@redhat.com | 6 | bdrv_qed_detach_aio_context, but the pointer to the timer is not |
9 | Message-Id: <1578495356-46219-3-git-send-email-pbonzini@redhat.com> | 7 | set to NULL. This can lead to a use-after-free scenario |
8 | in bdrv_qed_drain_begin(). | ||
9 | |||
10 | The need_check_timer pointer is set to NULL after freeing the timer. | ||
11 | Which helps catch this condition when checking in bdrv_qed_drain_begin(). | ||
12 | |||
13 | Closes: https://gitlab.com/qemu-project/qemu/-/issues/2852 | ||
14 | Signed-off-by: Denis Rastyogin <gerben@altlinux.org> | ||
15 | Message-ID: <20250304083927.37681-1-gerben@altlinux.org> | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 16 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
11 | --- | 17 | --- |
12 | block/file-posix.c | 1 - | 18 | block/qed.c | 1 + |
13 | block/io.c | 40 +++++++++++++++++---------------------- | 19 | 1 file changed, 1 insertion(+) |
14 | include/block/block_int.h | 3 +-- | ||
15 | 3 files changed, 18 insertions(+), 26 deletions(-) | ||
16 | 20 | ||
17 | diff --git a/block/file-posix.c b/block/file-posix.c | 21 | diff --git a/block/qed.c b/block/qed.c |
18 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/block/file-posix.c | 23 | --- a/block/qed.c |
20 | +++ b/block/file-posix.c | 24 | +++ b/block/qed.c |
21 | @@ -XXX,XX +XXX,XX @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, | 25 | @@ -XXX,XX +XXX,XX @@ static void bdrv_qed_detach_aio_context(BlockDriverState *bs) |
22 | req->overlap_bytes = req->bytes; | 26 | |
23 | 27 | qed_cancel_need_check_timer(s); | |
24 | bdrv_mark_request_serialising(req, bs->bl.request_alignment); | 28 | timer_free(s->need_check_timer); |
25 | - bdrv_wait_serialising_requests(req); | 29 | + s->need_check_timer = NULL; |
26 | } | ||
27 | #endif | ||
28 | |||
29 | diff --git a/block/io.c b/block/io.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/block/io.c | ||
32 | +++ b/block/io.c | ||
33 | @@ -XXX,XX +XXX,XX @@ | ||
34 | #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) | ||
35 | |||
36 | static void bdrv_parent_cb_resize(BlockDriverState *bs); | ||
37 | +static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self); | ||
38 | static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, | ||
39 | int64_t offset, int bytes, BdrvRequestFlags flags); | ||
40 | |||
41 | @@ -XXX,XX +XXX,XX @@ static void tracked_request_begin(BdrvTrackedRequest *req, | ||
42 | qemu_co_mutex_unlock(&bs->reqs_lock); | ||
43 | } | 30 | } |
44 | 31 | ||
45 | -void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) | 32 | static void bdrv_qed_attach_aio_context(BlockDriverState *bs, |
46 | +bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) | ||
47 | { | ||
48 | int64_t overlap_offset = req->offset & ~(align - 1); | ||
49 | uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align) | ||
50 | @@ -XXX,XX +XXX,XX @@ void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) | ||
51 | |||
52 | req->overlap_offset = MIN(req->overlap_offset, overlap_offset); | ||
53 | req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes); | ||
54 | -} | ||
55 | - | ||
56 | -static bool is_request_serialising_and_aligned(BdrvTrackedRequest *req) | ||
57 | -{ | ||
58 | - /* | ||
59 | - * If the request is serialising, overlap_offset and overlap_bytes are set, | ||
60 | - * so we can check if the request is aligned. Otherwise, don't care and | ||
61 | - * return false. | ||
62 | - */ | ||
63 | - | ||
64 | - return req->serialising && (req->offset == req->overlap_offset) && | ||
65 | - (req->bytes == req->overlap_bytes); | ||
66 | + return bdrv_wait_serialising_requests(req); | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | @@ -XXX,XX +XXX,XX @@ void bdrv_dec_in_flight(BlockDriverState *bs) | ||
71 | bdrv_wakeup(bs); | ||
72 | } | ||
73 | |||
74 | -bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self) | ||
75 | +static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self) | ||
76 | { | ||
77 | BlockDriverState *bs = self->bs; | ||
78 | BdrvTrackedRequest *req; | ||
79 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, | ||
80 | * it ensures that the CoR read and write operations are atomic and | ||
81 | * guest writes cannot interleave between them. */ | ||
82 | bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); | ||
83 | + } else { | ||
84 | + bdrv_wait_serialising_requests(req); | ||
85 | } | ||
86 | |||
87 | - bdrv_wait_serialising_requests(req); | ||
88 | - | ||
89 | if (flags & BDRV_REQ_COPY_ON_READ) { | ||
90 | int64_t pnum; | ||
91 | |||
92 | @@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_prepare(BdrvChild *child, int64_t offset, uint64_t bytes, | ||
93 | assert(!(flags & ~BDRV_REQ_MASK)); | ||
94 | |||
95 | if (flags & BDRV_REQ_SERIALISING) { | ||
96 | - bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); | ||
97 | + waited = bdrv_mark_request_serialising(req, bdrv_get_cluster_size(bs)); | ||
98 | + /* | ||
99 | + * For a misaligned request we should have already waited earlier, | ||
100 | + * because we come after bdrv_padding_rmw_read which must be called | ||
101 | + * with the request already marked as serialising. | ||
102 | + */ | ||
103 | + assert(!waited || | ||
104 | + (req->offset == req->overlap_offset && | ||
105 | + req->bytes == req->overlap_bytes)); | ||
106 | + } else { | ||
107 | + bdrv_wait_serialising_requests(req); | ||
108 | } | ||
109 | |||
110 | - waited = bdrv_wait_serialising_requests(req); | ||
111 | - | ||
112 | - assert(!waited || !req->serialising || | ||
113 | - is_request_serialising_and_aligned(req)); | ||
114 | assert(req->overlap_offset <= offset); | ||
115 | assert(offset + bytes <= req->overlap_offset + req->overlap_bytes); | ||
116 | assert(end_sector <= bs->total_sectors || child->perm & BLK_PERM_RESIZE); | ||
117 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, | ||
118 | padding = bdrv_init_padding(bs, offset, bytes, &pad); | ||
119 | if (padding) { | ||
120 | bdrv_mark_request_serialising(req, align); | ||
121 | - bdrv_wait_serialising_requests(req); | ||
122 | |||
123 | bdrv_padding_rmw_read(child, req, &pad, true); | ||
124 | |||
125 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, | ||
126 | |||
127 | if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) { | ||
128 | bdrv_mark_request_serialising(&req, align); | ||
129 | - bdrv_wait_serialising_requests(&req); | ||
130 | bdrv_padding_rmw_read(child, &req, &pad, false); | ||
131 | } | ||
132 | |||
133 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
134 | index XXXXXXX..XXXXXXX 100644 | ||
135 | --- a/include/block/block_int.h | ||
136 | +++ b/include/block/block_int.h | ||
137 | @@ -XXX,XX +XXX,XX @@ extern unsigned int bdrv_drain_all_count; | ||
138 | void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent); | ||
139 | void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent); | ||
140 | |||
141 | -bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self); | ||
142 | -void bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align); | ||
143 | +bool coroutine_fn bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align); | ||
144 | BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs); | ||
145 | |||
146 | int get_tmp_filename(char *filename, int size); | ||
147 | -- | 33 | -- |
148 | 2.24.1 | 34 | 2.48.1 |
149 | |||
150 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Paolo Bonzini <pbonzini@redhat.com> | ||
2 | 1 | ||
3 | bdrv_mark_request_serialising is writing the overlap_offset and | ||
4 | overlap_bytes fields of BdrvTrackedRequest. Take bs->reqs_lock | ||
5 | for the whole duration of it, and not just when waiting for | ||
6 | serialising requests, so that tracked_request_overlaps does not | ||
7 | look at a half-updated request. | ||
8 | |||
9 | The new code does not unlock/relock around retries. This is unnecessary | ||
10 | because a retry is always preceded by a CoQueue wait, which already | ||
11 | releases and reacquires bs->reqs_lock. | ||
12 | |||
13 | Reported-by: Peter Lieven <pl@kamp.de> | ||
14 | Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> | ||
15 | Message-id: 1578495356-46219-4-git-send-email-pbonzini@redhat.com | ||
16 | Message-Id: <1578495356-46219-4-git-send-email-pbonzini@redhat.com> | ||
17 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
18 | --- | ||
19 | block/io.c | 112 ++++++++++++++++++++++++++++++----------------------- | ||
20 | 1 file changed, 63 insertions(+), 49 deletions(-) | ||
21 | |||
22 | diff --git a/block/io.c b/block/io.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/block/io.c | ||
25 | +++ b/block/io.c | ||
26 | @@ -XXX,XX +XXX,XX @@ | ||
27 | #define MAX_BOUNCE_BUFFER (32768 << BDRV_SECTOR_BITS) | ||
28 | |||
29 | static void bdrv_parent_cb_resize(BlockDriverState *bs); | ||
30 | -static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self); | ||
31 | static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, | ||
32 | int64_t offset, int bytes, BdrvRequestFlags flags); | ||
33 | |||
34 | @@ -XXX,XX +XXX,XX @@ static void tracked_request_begin(BdrvTrackedRequest *req, | ||
35 | qemu_co_mutex_unlock(&bs->reqs_lock); | ||
36 | } | ||
37 | |||
38 | +static bool tracked_request_overlaps(BdrvTrackedRequest *req, | ||
39 | + int64_t offset, uint64_t bytes) | ||
40 | +{ | ||
41 | + /* aaaa bbbb */ | ||
42 | + if (offset >= req->overlap_offset + req->overlap_bytes) { | ||
43 | + return false; | ||
44 | + } | ||
45 | + /* bbbb aaaa */ | ||
46 | + if (req->overlap_offset >= offset + bytes) { | ||
47 | + return false; | ||
48 | + } | ||
49 | + return true; | ||
50 | +} | ||
51 | + | ||
52 | +static bool coroutine_fn | ||
53 | +bdrv_wait_serialising_requests_locked(BlockDriverState *bs, | ||
54 | + BdrvTrackedRequest *self) | ||
55 | +{ | ||
56 | + BdrvTrackedRequest *req; | ||
57 | + bool retry; | ||
58 | + bool waited = false; | ||
59 | + | ||
60 | + do { | ||
61 | + retry = false; | ||
62 | + QLIST_FOREACH(req, &bs->tracked_requests, list) { | ||
63 | + if (req == self || (!req->serialising && !self->serialising)) { | ||
64 | + continue; | ||
65 | + } | ||
66 | + if (tracked_request_overlaps(req, self->overlap_offset, | ||
67 | + self->overlap_bytes)) | ||
68 | + { | ||
69 | + /* Hitting this means there was a reentrant request, for | ||
70 | + * example, a block driver issuing nested requests. This must | ||
71 | + * never happen since it means deadlock. | ||
72 | + */ | ||
73 | + assert(qemu_coroutine_self() != req->co); | ||
74 | + | ||
75 | + /* If the request is already (indirectly) waiting for us, or | ||
76 | + * will wait for us as soon as it wakes up, then just go on | ||
77 | + * (instead of producing a deadlock in the former case). */ | ||
78 | + if (!req->waiting_for) { | ||
79 | + self->waiting_for = req; | ||
80 | + qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock); | ||
81 | + self->waiting_for = NULL; | ||
82 | + retry = true; | ||
83 | + waited = true; | ||
84 | + break; | ||
85 | + } | ||
86 | + } | ||
87 | + } | ||
88 | + } while (retry); | ||
89 | + return waited; | ||
90 | +} | ||
91 | + | ||
92 | bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) | ||
93 | { | ||
94 | + BlockDriverState *bs = req->bs; | ||
95 | int64_t overlap_offset = req->offset & ~(align - 1); | ||
96 | uint64_t overlap_bytes = ROUND_UP(req->offset + req->bytes, align) | ||
97 | - overlap_offset; | ||
98 | + bool waited; | ||
99 | |||
100 | + qemu_co_mutex_lock(&bs->reqs_lock); | ||
101 | if (!req->serialising) { | ||
102 | atomic_inc(&req->bs->serialising_in_flight); | ||
103 | req->serialising = true; | ||
104 | @@ -XXX,XX +XXX,XX @@ bool bdrv_mark_request_serialising(BdrvTrackedRequest *req, uint64_t align) | ||
105 | |||
106 | req->overlap_offset = MIN(req->overlap_offset, overlap_offset); | ||
107 | req->overlap_bytes = MAX(req->overlap_bytes, overlap_bytes); | ||
108 | - return bdrv_wait_serialising_requests(req); | ||
109 | + waited = bdrv_wait_serialising_requests_locked(bs, req); | ||
110 | + qemu_co_mutex_unlock(&bs->reqs_lock); | ||
111 | + return waited; | ||
112 | } | ||
113 | |||
114 | /** | ||
115 | @@ -XXX,XX +XXX,XX @@ static int bdrv_get_cluster_size(BlockDriverState *bs) | ||
116 | } | ||
117 | } | ||
118 | |||
119 | -static bool tracked_request_overlaps(BdrvTrackedRequest *req, | ||
120 | - int64_t offset, uint64_t bytes) | ||
121 | -{ | ||
122 | - /* aaaa bbbb */ | ||
123 | - if (offset >= req->overlap_offset + req->overlap_bytes) { | ||
124 | - return false; | ||
125 | - } | ||
126 | - /* bbbb aaaa */ | ||
127 | - if (req->overlap_offset >= offset + bytes) { | ||
128 | - return false; | ||
129 | - } | ||
130 | - return true; | ||
131 | -} | ||
132 | - | ||
133 | void bdrv_inc_in_flight(BlockDriverState *bs) | ||
134 | { | ||
135 | atomic_inc(&bs->in_flight); | ||
136 | @@ -XXX,XX +XXX,XX @@ void bdrv_dec_in_flight(BlockDriverState *bs) | ||
137 | static bool coroutine_fn bdrv_wait_serialising_requests(BdrvTrackedRequest *self) | ||
138 | { | ||
139 | BlockDriverState *bs = self->bs; | ||
140 | - BdrvTrackedRequest *req; | ||
141 | - bool retry; | ||
142 | bool waited = false; | ||
143 | |||
144 | if (!atomic_read(&bs->serialising_in_flight)) { | ||
145 | return false; | ||
146 | } | ||
147 | |||
148 | - do { | ||
149 | - retry = false; | ||
150 | - qemu_co_mutex_lock(&bs->reqs_lock); | ||
151 | - QLIST_FOREACH(req, &bs->tracked_requests, list) { | ||
152 | - if (req == self || (!req->serialising && !self->serialising)) { | ||
153 | - continue; | ||
154 | - } | ||
155 | - if (tracked_request_overlaps(req, self->overlap_offset, | ||
156 | - self->overlap_bytes)) | ||
157 | - { | ||
158 | - /* Hitting this means there was a reentrant request, for | ||
159 | - * example, a block driver issuing nested requests. This must | ||
160 | - * never happen since it means deadlock. | ||
161 | - */ | ||
162 | - assert(qemu_coroutine_self() != req->co); | ||
163 | - | ||
164 | - /* If the request is already (indirectly) waiting for us, or | ||
165 | - * will wait for us as soon as it wakes up, then just go on | ||
166 | - * (instead of producing a deadlock in the former case). */ | ||
167 | - if (!req->waiting_for) { | ||
168 | - self->waiting_for = req; | ||
169 | - qemu_co_queue_wait(&req->wait_queue, &bs->reqs_lock); | ||
170 | - self->waiting_for = NULL; | ||
171 | - retry = true; | ||
172 | - waited = true; | ||
173 | - break; | ||
174 | - } | ||
175 | - } | ||
176 | - } | ||
177 | - qemu_co_mutex_unlock(&bs->reqs_lock); | ||
178 | - } while (retry); | ||
179 | + qemu_co_mutex_lock(&bs->reqs_lock); | ||
180 | + waited = bdrv_wait_serialising_requests_locked(bs, self); | ||
181 | + qemu_co_mutex_unlock(&bs->reqs_lock); | ||
182 | |||
183 | return waited; | ||
184 | } | ||
185 | -- | ||
186 | 2.24.1 | ||
187 | |||
188 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Reviewed-by: Maxim Levitsky <maximlevitsky@gmail.com> | ||
5 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
6 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Message-id: 20200120141858.587874-2-stefanha@redhat.com | ||
8 | Message-Id: <20200120141858.587874-2-stefanha@redhat.com> | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | ||
11 | configure | 27 +++++++++++++++++++++++++++ | ||
12 | 1 file changed, 27 insertions(+) | ||
13 | |||
14 | diff --git a/configure b/configure | ||
15 | index XXXXXXX..XXXXXXX 100755 | ||
16 | --- a/configure | ||
17 | +++ b/configure | ||
18 | @@ -XXX,XX +XXX,XX @@ xen="" | ||
19 | xen_ctrl_version="" | ||
20 | xen_pci_passthrough="" | ||
21 | linux_aio="" | ||
22 | +linux_io_uring="" | ||
23 | cap_ng="" | ||
24 | attr="" | ||
25 | libattr="" | ||
26 | @@ -XXX,XX +XXX,XX @@ for opt do | ||
27 | ;; | ||
28 | --enable-linux-aio) linux_aio="yes" | ||
29 | ;; | ||
30 | + --disable-linux-io-uring) linux_io_uring="no" | ||
31 | + ;; | ||
32 | + --enable-linux-io-uring) linux_io_uring="yes" | ||
33 | + ;; | ||
34 | --disable-attr) attr="no" | ||
35 | ;; | ||
36 | --enable-attr) attr="yes" | ||
37 | @@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available: | ||
38 | vde support for vde network | ||
39 | netmap support for netmap network | ||
40 | linux-aio Linux AIO support | ||
41 | + linux-io-uring Linux io_uring support | ||
42 | cap-ng libcap-ng support | ||
43 | attr attr and xattr support | ||
44 | vhost-net vhost-net kernel acceleration support | ||
45 | @@ -XXX,XX +XXX,XX @@ EOF | ||
46 | linux_aio=no | ||
47 | fi | ||
48 | fi | ||
49 | +########################################## | ||
50 | +# linux-io-uring probe | ||
51 | + | ||
52 | +if test "$linux_io_uring" != "no" ; then | ||
53 | + if $pkg_config liburing; then | ||
54 | + linux_io_uring_cflags=$($pkg_config --cflags liburing) | ||
55 | + linux_io_uring_libs=$($pkg_config --libs liburing) | ||
56 | + linux_io_uring=yes | ||
57 | + else | ||
58 | + if test "$linux_io_uring" = "yes" ; then | ||
59 | + feature_not_found "linux io_uring" "Install liburing devel" | ||
60 | + fi | ||
61 | + linux_io_uring=no | ||
62 | + fi | ||
63 | +fi | ||
64 | |||
65 | ########################################## | ||
66 | # TPM emulation is only on POSIX | ||
67 | @@ -XXX,XX +XXX,XX @@ echo "PIE $pie" | ||
68 | echo "vde support $vde" | ||
69 | echo "netmap support $netmap" | ||
70 | echo "Linux AIO support $linux_aio" | ||
71 | +echo "Linux io_uring support $linux_io_uring" | ||
72 | echo "ATTR/XATTR support $attr" | ||
73 | echo "Install blobs $blobs" | ||
74 | echo "KVM support $kvm" | ||
75 | @@ -XXX,XX +XXX,XX @@ fi | ||
76 | if test "$linux_aio" = "yes" ; then | ||
77 | echo "CONFIG_LINUX_AIO=y" >> $config_host_mak | ||
78 | fi | ||
79 | +if test "$linux_io_uring" = "yes" ; then | ||
80 | + echo "CONFIG_LINUX_IO_URING=y" >> $config_host_mak | ||
81 | + echo "LINUX_IO_URING_CFLAGS=$linux_io_uring_cflags" >> $config_host_mak | ||
82 | + echo "LINUX_IO_URING_LIBS=$linux_io_uring_libs" >> $config_host_mak | ||
83 | +fi | ||
84 | if test "$attr" = "yes" ; then | ||
85 | echo "CONFIG_ATTR=y" >> $config_host_mak | ||
86 | fi | ||
87 | -- | ||
88 | 2.24.1 | ||
89 | |||
90 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Since io_uring is the actual name of the Linux API, we use it as enum | ||
4 | value even though the QAPI schema conventions would prefer io-uring. | ||
5 | |||
6 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
7 | Acked-by: Markus Armbruster <armbru@redhat.com> | ||
8 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Message-id: 20200120141858.587874-3-stefanha@redhat.com | ||
11 | Message-Id: <20200120141858.587874-3-stefanha@redhat.com> | ||
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | --- | ||
14 | qapi/block-core.json | 4 +++- | ||
15 | 1 file changed, 3 insertions(+), 1 deletion(-) | ||
16 | |||
17 | diff --git a/qapi/block-core.json b/qapi/block-core.json | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/qapi/block-core.json | ||
20 | +++ b/qapi/block-core.json | ||
21 | @@ -XXX,XX +XXX,XX @@ | ||
22 | # | ||
23 | # @threads: Use qemu's thread pool | ||
24 | # @native: Use native AIO backend (only Linux and Windows) | ||
25 | +# @io_uring: Use linux io_uring (since 5.0) | ||
26 | # | ||
27 | # Since: 2.9 | ||
28 | ## | ||
29 | { 'enum': 'BlockdevAioOptions', | ||
30 | - 'data': [ 'threads', 'native' ] } | ||
31 | + 'data': [ 'threads', 'native', | ||
32 | + { 'name': 'io_uring', 'if': 'defined(CONFIG_LINUX_IO_URING)' } ] } | ||
33 | |||
34 | ## | ||
35 | # @BlockdevCacheOptions: | ||
36 | -- | ||
37 | 2.24.1 | ||
38 | |||
39 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Reviewed-by: Maxim Levitsky <maximlevitsky@gmail.com> | ||
5 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
6 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Message-id: 20200120141858.587874-4-stefanha@redhat.com | ||
8 | Message-Id: <20200120141858.587874-4-stefanha@redhat.com> | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | ||
11 | include/block/block.h | 1 + | ||
12 | 1 file changed, 1 insertion(+) | ||
13 | |||
14 | diff --git a/include/block/block.h b/include/block/block.h | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/include/block/block.h | ||
17 | +++ b/include/block/block.h | ||
18 | @@ -XXX,XX +XXX,XX @@ typedef struct HDGeometry { | ||
19 | ignoring the format layer */ | ||
20 | #define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */ | ||
21 | #define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */ | ||
22 | +#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */ | ||
23 | |||
24 | #define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH) | ||
25 | |||
26 | -- | ||
27 | 2.24.1 | ||
28 | |||
29 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Aborts when sqe fails to be set as sqes cannot be returned to the | ||
4 | ring. Adds slow path for short reads for older kernels | ||
5 | |||
6 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
7 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | Message-id: 20200120141858.587874-5-stefanha@redhat.com | ||
10 | Message-Id: <20200120141858.587874-5-stefanha@redhat.com> | ||
11 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
12 | --- | ||
13 | MAINTAINERS | 8 + | ||
14 | block/Makefile.objs | 3 + | ||
15 | block/io_uring.c | 401 ++++++++++++++++++++++++++++++++++++++++ | ||
16 | include/block/aio.h | 16 +- | ||
17 | include/block/raw-aio.h | 12 ++ | ||
18 | 5 files changed, 439 insertions(+), 1 deletion(-) | ||
19 | create mode 100644 block/io_uring.c | ||
20 | |||
21 | diff --git a/MAINTAINERS b/MAINTAINERS | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/MAINTAINERS | ||
24 | +++ b/MAINTAINERS | ||
25 | @@ -XXX,XX +XXX,XX @@ F: block/file-posix.c | ||
26 | F: block/file-win32.c | ||
27 | F: block/win32-aio.c | ||
28 | |||
29 | +Linux io_uring | ||
30 | +M: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
31 | +M: Julia Suvorova <jusual@redhat.com> | ||
32 | +M: Stefan Hajnoczi <stefanha@redhat.com> | ||
33 | +L: qemu-block@nongnu.org | ||
34 | +S: Maintained | ||
35 | +F: block/io_uring.c | ||
36 | + | ||
37 | qcow2 | ||
38 | M: Kevin Wolf <kwolf@redhat.com> | ||
39 | M: Max Reitz <mreitz@redhat.com> | ||
40 | diff --git a/block/Makefile.objs b/block/Makefile.objs | ||
41 | index XXXXXXX..XXXXXXX 100644 | ||
42 | --- a/block/Makefile.objs | ||
43 | +++ b/block/Makefile.objs | ||
44 | @@ -XXX,XX +XXX,XX @@ block-obj-y += block-backend.o snapshot.o qapi.o | ||
45 | block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o | ||
46 | block-obj-$(CONFIG_POSIX) += file-posix.o | ||
47 | block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o | ||
48 | +block-obj-$(CONFIG_LINUX_IO_URING) += io_uring.o | ||
49 | block-obj-y += null.o mirror.o commit.o io.o create.o | ||
50 | block-obj-y += throttle-groups.o | ||
51 | block-obj-$(CONFIG_LINUX) += nvme.o | ||
52 | @@ -XXX,XX +XXX,XX @@ block-obj-$(if $(CONFIG_LZFSE),m,n) += dmg-lzfse.o | ||
53 | dmg-lzfse.o-libs := $(LZFSE_LIBS) | ||
54 | qcow.o-libs := -lz | ||
55 | linux-aio.o-libs := -laio | ||
56 | +io_uring.o-cflags := $(LINUX_IO_URING_CFLAGS) | ||
57 | +io_uring.o-libs := $(LINUX_IO_URING_LIBS) | ||
58 | parallels.o-cflags := $(LIBXML2_CFLAGS) | ||
59 | parallels.o-libs := $(LIBXML2_LIBS) | ||
60 | diff --git a/block/io_uring.c b/block/io_uring.c | ||
61 | new file mode 100644 | ||
62 | index XXXXXXX..XXXXXXX | ||
63 | --- /dev/null | ||
64 | +++ b/block/io_uring.c | ||
65 | @@ -XXX,XX +XXX,XX @@ | ||
66 | +/* | ||
67 | + * Linux io_uring support. | ||
68 | + * | ||
69 | + * Copyright (C) 2009 IBM, Corp. | ||
70 | + * Copyright (C) 2009 Red Hat, Inc. | ||
71 | + * Copyright (C) 2019 Aarushi Mehta | ||
72 | + * | ||
73 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
74 | + * See the COPYING file in the top-level directory. | ||
75 | + */ | ||
76 | +#include "qemu/osdep.h" | ||
77 | +#include <liburing.h> | ||
78 | +#include "qemu-common.h" | ||
79 | +#include "block/aio.h" | ||
80 | +#include "qemu/queue.h" | ||
81 | +#include "block/block.h" | ||
82 | +#include "block/raw-aio.h" | ||
83 | +#include "qemu/coroutine.h" | ||
84 | +#include "qapi/error.h" | ||
85 | + | ||
86 | +/* io_uring ring size */ | ||
87 | +#define MAX_ENTRIES 128 | ||
88 | + | ||
89 | +typedef struct LuringAIOCB { | ||
90 | + Coroutine *co; | ||
91 | + struct io_uring_sqe sqeq; | ||
92 | + ssize_t ret; | ||
93 | + QEMUIOVector *qiov; | ||
94 | + bool is_read; | ||
95 | + QSIMPLEQ_ENTRY(LuringAIOCB) next; | ||
96 | + | ||
97 | + /* | ||
98 | + * Buffered reads may require resubmission, see | ||
99 | + * luring_resubmit_short_read(). | ||
100 | + */ | ||
101 | + int total_read; | ||
102 | + QEMUIOVector resubmit_qiov; | ||
103 | +} LuringAIOCB; | ||
104 | + | ||
105 | +typedef struct LuringQueue { | ||
106 | + int plugged; | ||
107 | + unsigned int in_queue; | ||
108 | + unsigned int in_flight; | ||
109 | + bool blocked; | ||
110 | + QSIMPLEQ_HEAD(, LuringAIOCB) submit_queue; | ||
111 | +} LuringQueue; | ||
112 | + | ||
113 | +typedef struct LuringState { | ||
114 | + AioContext *aio_context; | ||
115 | + | ||
116 | + struct io_uring ring; | ||
117 | + | ||
118 | + /* io queue for submit at batch. Protected by AioContext lock. */ | ||
119 | + LuringQueue io_q; | ||
120 | + | ||
121 | + /* I/O completion processing. Only runs in I/O thread. */ | ||
122 | + QEMUBH *completion_bh; | ||
123 | +} LuringState; | ||
124 | + | ||
125 | +/** | ||
126 | + * luring_resubmit: | ||
127 | + * | ||
128 | + * Resubmit a request by appending it to submit_queue. The caller must ensure | ||
129 | + * that ioq_submit() is called later so that submit_queue requests are started. | ||
130 | + */ | ||
131 | +static void luring_resubmit(LuringState *s, LuringAIOCB *luringcb) | ||
132 | +{ | ||
133 | + QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next); | ||
134 | + s->io_q.in_queue++; | ||
135 | +} | ||
136 | + | ||
137 | +/** | ||
138 | + * luring_resubmit_short_read: | ||
139 | + * | ||
140 | + * Before Linux commit 9d93a3f5a0c ("io_uring: punt short reads to async | ||
141 | + * context") a buffered I/O request with the start of the file range in the | ||
142 | + * page cache could result in a short read. Applications need to resubmit the | ||
143 | + * remaining read request. | ||
144 | + * | ||
145 | + * This is a slow path but recent kernels never take it. | ||
146 | + */ | ||
147 | +static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb, | ||
148 | + int nread) | ||
149 | +{ | ||
150 | + QEMUIOVector *resubmit_qiov; | ||
151 | + size_t remaining; | ||
152 | + | ||
153 | + /* Update read position */ | ||
154 | + luringcb->total_read = nread; | ||
155 | + remaining = luringcb->qiov->size - luringcb->total_read; | ||
156 | + | ||
157 | + /* Shorten qiov */ | ||
158 | + resubmit_qiov = &luringcb->resubmit_qiov; | ||
159 | + if (resubmit_qiov->iov == NULL) { | ||
160 | + qemu_iovec_init(resubmit_qiov, luringcb->qiov->niov); | ||
161 | + } else { | ||
162 | + qemu_iovec_reset(resubmit_qiov); | ||
163 | + } | ||
164 | + qemu_iovec_concat(resubmit_qiov, luringcb->qiov, luringcb->total_read, | ||
165 | + remaining); | ||
166 | + | ||
167 | + /* Update sqe */ | ||
168 | + luringcb->sqeq.off = nread; | ||
169 | + luringcb->sqeq.addr = (__u64)(uintptr_t)luringcb->resubmit_qiov.iov; | ||
170 | + luringcb->sqeq.len = luringcb->resubmit_qiov.niov; | ||
171 | + | ||
172 | + luring_resubmit(s, luringcb); | ||
173 | +} | ||
174 | + | ||
175 | +/** | ||
176 | + * luring_process_completions: | ||
177 | + * @s: AIO state | ||
178 | + * | ||
179 | + * Fetches completed I/O requests, consumes cqes and invokes their callbacks | ||
180 | + * The function is somewhat tricky because it supports nested event loops, for | ||
181 | + * example when a request callback invokes aio_poll(). | ||
182 | + * | ||
183 | + * Function schedules BH completion so it can be called again in a nested | ||
184 | + * event loop. When there are no events left to complete the BH is being | ||
185 | + * canceled. | ||
186 | + * | ||
187 | + */ | ||
188 | +static void luring_process_completions(LuringState *s) | ||
189 | +{ | ||
190 | + struct io_uring_cqe *cqes; | ||
191 | + int total_bytes; | ||
192 | + /* | ||
193 | + * Request completion callbacks can run the nested event loop. | ||
194 | + * Schedule ourselves so the nested event loop will "see" remaining | ||
195 | + * completed requests and process them. Without this, completion | ||
196 | + * callbacks that wait for other requests using a nested event loop | ||
197 | + * would hang forever. | ||
198 | + * | ||
199 | + * This workaround is needed because io_uring uses poll_wait, which | ||
200 | + * is woken up when new events are added to the uring, thus polling on | ||
201 | + * the same uring fd will block unless more events are received. | ||
202 | + * | ||
203 | + * Other leaf block drivers (drivers that access the data themselves) | ||
204 | + * are networking based, so they poll sockets for data and run the | ||
205 | + * correct coroutine. | ||
206 | + */ | ||
207 | + qemu_bh_schedule(s->completion_bh); | ||
208 | + | ||
209 | + while (io_uring_peek_cqe(&s->ring, &cqes) == 0) { | ||
210 | + LuringAIOCB *luringcb; | ||
211 | + int ret; | ||
212 | + | ||
213 | + if (!cqes) { | ||
214 | + break; | ||
215 | + } | ||
216 | + | ||
217 | + luringcb = io_uring_cqe_get_data(cqes); | ||
218 | + ret = cqes->res; | ||
219 | + io_uring_cqe_seen(&s->ring, cqes); | ||
220 | + cqes = NULL; | ||
221 | + | ||
222 | + /* Change counters one-by-one because we can be nested. */ | ||
223 | + s->io_q.in_flight--; | ||
224 | + | ||
225 | + /* total_read is non-zero only for resubmitted read requests */ | ||
226 | + total_bytes = ret + luringcb->total_read; | ||
227 | + | ||
228 | + if (ret < 0) { | ||
229 | + if (ret == -EINTR) { | ||
230 | + luring_resubmit(s, luringcb); | ||
231 | + continue; | ||
232 | + } | ||
233 | + } else if (!luringcb->qiov) { | ||
234 | + goto end; | ||
235 | + } else if (total_bytes == luringcb->qiov->size) { | ||
236 | + ret = 0; | ||
237 | + /* Only read/write */ | ||
238 | + } else { | ||
239 | + /* Short Read/Write */ | ||
240 | + if (luringcb->is_read) { | ||
241 | + if (ret > 0) { | ||
242 | + luring_resubmit_short_read(s, luringcb, ret); | ||
243 | + continue; | ||
244 | + } else { | ||
245 | + /* Pad with zeroes */ | ||
246 | + qemu_iovec_memset(luringcb->qiov, total_bytes, 0, | ||
247 | + luringcb->qiov->size - total_bytes); | ||
248 | + ret = 0; | ||
249 | + } | ||
250 | + } else { | ||
251 | + ret = -ENOSPC;; | ||
252 | + } | ||
253 | + } | ||
254 | +end: | ||
255 | + luringcb->ret = ret; | ||
256 | + qemu_iovec_destroy(&luringcb->resubmit_qiov); | ||
257 | + | ||
258 | + /* | ||
259 | + * If the coroutine is already entered it must be in ioq_submit() | ||
260 | + * and will notice luringcb->ret has been filled in when it | ||
261 | + * eventually runs later. Coroutines cannot be entered recursively | ||
262 | + * so avoid doing that! | ||
263 | + */ | ||
264 | + if (!qemu_coroutine_entered(luringcb->co)) { | ||
265 | + aio_co_wake(luringcb->co); | ||
266 | + } | ||
267 | + } | ||
268 | + qemu_bh_cancel(s->completion_bh); | ||
269 | +} | ||
270 | + | ||
271 | +static int ioq_submit(LuringState *s) | ||
272 | +{ | ||
273 | + int ret = 0; | ||
274 | + LuringAIOCB *luringcb, *luringcb_next; | ||
275 | + | ||
276 | + while (s->io_q.in_queue > 0) { | ||
277 | + /* | ||
278 | + * Try to fetch sqes from the ring for requests waiting in | ||
279 | + * the overflow queue | ||
280 | + */ | ||
281 | + QSIMPLEQ_FOREACH_SAFE(luringcb, &s->io_q.submit_queue, next, | ||
282 | + luringcb_next) { | ||
283 | + struct io_uring_sqe *sqes = io_uring_get_sqe(&s->ring); | ||
284 | + if (!sqes) { | ||
285 | + break; | ||
286 | + } | ||
287 | + /* Prep sqe for submission */ | ||
288 | + *sqes = luringcb->sqeq; | ||
289 | + QSIMPLEQ_REMOVE_HEAD(&s->io_q.submit_queue, next); | ||
290 | + } | ||
291 | + ret = io_uring_submit(&s->ring); | ||
292 | + /* Prevent infinite loop if submission is refused */ | ||
293 | + if (ret <= 0) { | ||
294 | + if (ret == -EAGAIN) { | ||
295 | + continue; | ||
296 | + } | ||
297 | + break; | ||
298 | + } | ||
299 | + s->io_q.in_flight += ret; | ||
300 | + s->io_q.in_queue -= ret; | ||
301 | + } | ||
302 | + s->io_q.blocked = (s->io_q.in_queue > 0); | ||
303 | + | ||
304 | + if (s->io_q.in_flight) { | ||
305 | + /* | ||
306 | + * We can try to complete something just right away if there are | ||
307 | + * still requests in-flight. | ||
308 | + */ | ||
309 | + luring_process_completions(s); | ||
310 | + } | ||
311 | + return ret; | ||
312 | +} | ||
313 | + | ||
314 | +static void luring_process_completions_and_submit(LuringState *s) | ||
315 | +{ | ||
316 | + aio_context_acquire(s->aio_context); | ||
317 | + luring_process_completions(s); | ||
318 | + | ||
319 | + if (!s->io_q.plugged && s->io_q.in_queue > 0) { | ||
320 | + ioq_submit(s); | ||
321 | + } | ||
322 | + aio_context_release(s->aio_context); | ||
323 | +} | ||
324 | + | ||
325 | +static void qemu_luring_completion_bh(void *opaque) | ||
326 | +{ | ||
327 | + LuringState *s = opaque; | ||
328 | + luring_process_completions_and_submit(s); | ||
329 | +} | ||
330 | + | ||
331 | +static void qemu_luring_completion_cb(void *opaque) | ||
332 | +{ | ||
333 | + LuringState *s = opaque; | ||
334 | + luring_process_completions_and_submit(s); | ||
335 | +} | ||
336 | + | ||
337 | +static void ioq_init(LuringQueue *io_q) | ||
338 | +{ | ||
339 | + QSIMPLEQ_INIT(&io_q->submit_queue); | ||
340 | + io_q->plugged = 0; | ||
341 | + io_q->in_queue = 0; | ||
342 | + io_q->in_flight = 0; | ||
343 | + io_q->blocked = false; | ||
344 | +} | ||
345 | + | ||
346 | +void luring_io_plug(BlockDriverState *bs, LuringState *s) | ||
347 | +{ | ||
348 | + s->io_q.plugged++; | ||
349 | +} | ||
350 | + | ||
351 | +void luring_io_unplug(BlockDriverState *bs, LuringState *s) | ||
352 | +{ | ||
353 | + assert(s->io_q.plugged); | ||
354 | + if (--s->io_q.plugged == 0 && | ||
355 | + !s->io_q.blocked && s->io_q.in_queue > 0) { | ||
356 | + ioq_submit(s); | ||
357 | + } | ||
358 | +} | ||
359 | + | ||
360 | +/** | ||
361 | + * luring_do_submit: | ||
362 | + * @fd: file descriptor for I/O | ||
363 | + * @luringcb: AIO control block | ||
364 | + * @s: AIO state | ||
365 | + * @offset: offset for request | ||
366 | + * @type: type of request | ||
367 | + * | ||
368 | + * Fetches sqes from ring, adds to pending queue and preps them | ||
369 | + * | ||
370 | + */ | ||
371 | +static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, | ||
372 | + uint64_t offset, int type) | ||
373 | +{ | ||
374 | + struct io_uring_sqe *sqes = &luringcb->sqeq; | ||
375 | + | ||
376 | + switch (type) { | ||
377 | + case QEMU_AIO_WRITE: | ||
378 | + io_uring_prep_writev(sqes, fd, luringcb->qiov->iov, | ||
379 | + luringcb->qiov->niov, offset); | ||
380 | + break; | ||
381 | + case QEMU_AIO_READ: | ||
382 | + io_uring_prep_readv(sqes, fd, luringcb->qiov->iov, | ||
383 | + luringcb->qiov->niov, offset); | ||
384 | + break; | ||
385 | + case QEMU_AIO_FLUSH: | ||
386 | + io_uring_prep_fsync(sqes, fd, IORING_FSYNC_DATASYNC); | ||
387 | + break; | ||
388 | + default: | ||
389 | + fprintf(stderr, "%s: invalid AIO request type, aborting 0x%x.\n", | ||
390 | + __func__, type); | ||
391 | + abort(); | ||
392 | + } | ||
393 | + io_uring_sqe_set_data(sqes, luringcb); | ||
394 | + | ||
395 | + QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next); | ||
396 | + s->io_q.in_queue++; | ||
397 | + | ||
398 | + if (!s->io_q.blocked && | ||
399 | + (!s->io_q.plugged || | ||
400 | + s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) { | ||
401 | + return ioq_submit(s); | ||
402 | + } | ||
403 | + return 0; | ||
404 | +} | ||
405 | + | ||
406 | +int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd, | ||
407 | + uint64_t offset, QEMUIOVector *qiov, int type) | ||
408 | +{ | ||
409 | + int ret; | ||
410 | + LuringAIOCB luringcb = { | ||
411 | + .co = qemu_coroutine_self(), | ||
412 | + .ret = -EINPROGRESS, | ||
413 | + .qiov = qiov, | ||
414 | + .is_read = (type == QEMU_AIO_READ), | ||
415 | + }; | ||
416 | + | ||
417 | + ret = luring_do_submit(fd, &luringcb, s, offset, type); | ||
418 | + if (ret < 0) { | ||
419 | + return ret; | ||
420 | + } | ||
421 | + | ||
422 | + if (luringcb.ret == -EINPROGRESS) { | ||
423 | + qemu_coroutine_yield(); | ||
424 | + } | ||
425 | + return luringcb.ret; | ||
426 | +} | ||
427 | + | ||
428 | +void luring_detach_aio_context(LuringState *s, AioContext *old_context) | ||
429 | +{ | ||
430 | + aio_set_fd_handler(old_context, s->ring.ring_fd, false, NULL, NULL, NULL, | ||
431 | + s); | ||
432 | + qemu_bh_delete(s->completion_bh); | ||
433 | + s->aio_context = NULL; | ||
434 | +} | ||
435 | + | ||
436 | +void luring_attach_aio_context(LuringState *s, AioContext *new_context) | ||
437 | +{ | ||
438 | + s->aio_context = new_context; | ||
439 | + s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s); | ||
440 | + aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false, | ||
441 | + qemu_luring_completion_cb, NULL, NULL, s); | ||
442 | +} | ||
443 | + | ||
444 | +LuringState *luring_init(Error **errp) | ||
445 | +{ | ||
446 | + int rc; | ||
447 | + LuringState *s = g_new0(LuringState, 1); | ||
448 | + struct io_uring *ring = &s->ring; | ||
449 | + | ||
450 | + rc = io_uring_queue_init(MAX_ENTRIES, ring, 0); | ||
451 | + if (rc < 0) { | ||
452 | + error_setg_errno(errp, errno, "failed to init linux io_uring ring"); | ||
453 | + g_free(s); | ||
454 | + return NULL; | ||
455 | + } | ||
456 | + | ||
457 | + ioq_init(&s->io_q); | ||
458 | + return s; | ||
459 | + | ||
460 | +} | ||
461 | + | ||
462 | +void luring_cleanup(LuringState *s) | ||
463 | +{ | ||
464 | + io_uring_queue_exit(&s->ring); | ||
465 | + g_free(s); | ||
466 | +} | ||
467 | diff --git a/include/block/aio.h b/include/block/aio.h | ||
468 | index XXXXXXX..XXXXXXX 100644 | ||
469 | --- a/include/block/aio.h | ||
470 | +++ b/include/block/aio.h | ||
471 | @@ -XXX,XX +XXX,XX @@ typedef void IOHandler(void *opaque); | ||
472 | struct Coroutine; | ||
473 | struct ThreadPool; | ||
474 | struct LinuxAioState; | ||
475 | +struct LuringState; | ||
476 | |||
477 | struct AioContext { | ||
478 | GSource source; | ||
479 | @@ -XXX,XX +XXX,XX @@ struct AioContext { | ||
480 | struct ThreadPool *thread_pool; | ||
481 | |||
482 | #ifdef CONFIG_LINUX_AIO | ||
483 | - /* State for native Linux AIO. Uses aio_context_acquire/release for | ||
484 | + /* | ||
485 | + * State for native Linux AIO. Uses aio_context_acquire/release for | ||
486 | * locking. | ||
487 | */ | ||
488 | struct LinuxAioState *linux_aio; | ||
489 | #endif | ||
490 | +#ifdef CONFIG_LINUX_IO_URING | ||
491 | + /* | ||
492 | + * State for Linux io_uring. Uses aio_context_acquire/release for | ||
493 | + * locking. | ||
494 | + */ | ||
495 | + struct LuringState *linux_io_uring; | ||
496 | +#endif | ||
497 | |||
498 | /* TimerLists for calling timers - one per clock type. Has its own | ||
499 | * locking. | ||
500 | @@ -XXX,XX +XXX,XX @@ struct LinuxAioState *aio_setup_linux_aio(AioContext *ctx, Error **errp); | ||
501 | /* Return the LinuxAioState bound to this AioContext */ | ||
502 | struct LinuxAioState *aio_get_linux_aio(AioContext *ctx); | ||
503 | |||
504 | +/* Setup the LuringState bound to this AioContext */ | ||
505 | +struct LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp); | ||
506 | + | ||
507 | +/* Return the LuringState bound to this AioContext */ | ||
508 | +struct LuringState *aio_get_linux_io_uring(AioContext *ctx); | ||
509 | /** | ||
510 | * aio_timer_new_with_attrs: | ||
511 | * @ctx: the aio context | ||
512 | diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h | ||
513 | index XXXXXXX..XXXXXXX 100644 | ||
514 | --- a/include/block/raw-aio.h | ||
515 | +++ b/include/block/raw-aio.h | ||
516 | @@ -XXX,XX +XXX,XX @@ void laio_attach_aio_context(LinuxAioState *s, AioContext *new_context); | ||
517 | void laio_io_plug(BlockDriverState *bs, LinuxAioState *s); | ||
518 | void laio_io_unplug(BlockDriverState *bs, LinuxAioState *s); | ||
519 | #endif | ||
520 | +/* io_uring.c - Linux io_uring implementation */ | ||
521 | +#ifdef CONFIG_LINUX_IO_URING | ||
522 | +typedef struct LuringState LuringState; | ||
523 | +LuringState *luring_init(Error **errp); | ||
524 | +void luring_cleanup(LuringState *s); | ||
525 | +int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd, | ||
526 | + uint64_t offset, QEMUIOVector *qiov, int type); | ||
527 | +void luring_detach_aio_context(LuringState *s, AioContext *old_context); | ||
528 | +void luring_attach_aio_context(LuringState *s, AioContext *new_context); | ||
529 | +void luring_io_plug(BlockDriverState *bs, LuringState *s); | ||
530 | +void luring_io_unplug(BlockDriverState *bs, LuringState *s); | ||
531 | +#endif | ||
532 | |||
533 | #ifdef _WIN32 | ||
534 | typedef struct QEMUWin32AIOState QEMUWin32AIOState; | ||
535 | -- | ||
536 | 2.24.1 | ||
537 | |||
538 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Follow linux-aio.o and stub out the block/io_uring.o APIs that will be | ||
4 | missing when a binary is linked with obj-util-y but without | ||
5 | block-util-y (e.g. vhost-user-gpu). | ||
6 | |||
7 | For example, the stubs are necessary so that a binary using util/async.o | ||
8 | from obj-util-y for qemu_bh_new() links successfully. In this case | ||
9 | block/io_uring.o from block-util-y isn't needed and we can avoid | ||
10 | dragging in the block layer by linking the stubs instead. The stub | ||
11 | functions never get called. | ||
12 | |||
13 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
14 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
16 | Message-id: 20200120141858.587874-6-stefanha@redhat.com | ||
17 | Message-Id: <20200120141858.587874-6-stefanha@redhat.com> | ||
18 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
19 | --- | ||
20 | MAINTAINERS | 1 + | ||
21 | stubs/Makefile.objs | 1 + | ||
22 | stubs/io_uring.c | 32 ++++++++++++++++++++++++++++++++ | ||
23 | 3 files changed, 34 insertions(+) | ||
24 | create mode 100644 stubs/io_uring.c | ||
25 | |||
26 | diff --git a/MAINTAINERS b/MAINTAINERS | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/MAINTAINERS | ||
29 | +++ b/MAINTAINERS | ||
30 | @@ -XXX,XX +XXX,XX @@ M: Stefan Hajnoczi <stefanha@redhat.com> | ||
31 | L: qemu-block@nongnu.org | ||
32 | S: Maintained | ||
33 | F: block/io_uring.c | ||
34 | +F: stubs/io_uring.c | ||
35 | |||
36 | qcow2 | ||
37 | M: Kevin Wolf <kwolf@redhat.com> | ||
38 | diff --git a/stubs/Makefile.objs b/stubs/Makefile.objs | ||
39 | index XXXXXXX..XXXXXXX 100644 | ||
40 | --- a/stubs/Makefile.objs | ||
41 | +++ b/stubs/Makefile.objs | ||
42 | @@ -XXX,XX +XXX,XX @@ stub-obj-y += iothread.o | ||
43 | stub-obj-y += iothread-lock.o | ||
44 | stub-obj-y += is-daemonized.o | ||
45 | stub-obj-$(CONFIG_LINUX_AIO) += linux-aio.o | ||
46 | +stub-obj-$(CONFIG_LINUX_IO_URING) += io_uring.o | ||
47 | stub-obj-y += machine-init-done.o | ||
48 | stub-obj-y += migr-blocker.o | ||
49 | stub-obj-y += change-state-handler.o | ||
50 | diff --git a/stubs/io_uring.c b/stubs/io_uring.c | ||
51 | new file mode 100644 | ||
52 | index XXXXXXX..XXXXXXX | ||
53 | --- /dev/null | ||
54 | +++ b/stubs/io_uring.c | ||
55 | @@ -XXX,XX +XXX,XX @@ | ||
56 | +/* | ||
57 | + * Linux io_uring support. | ||
58 | + * | ||
59 | + * Copyright (C) 2009 IBM, Corp. | ||
60 | + * Copyright (C) 2009 Red Hat, Inc. | ||
61 | + * | ||
62 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
63 | + * See the COPYING file in the top-level directory. | ||
64 | + */ | ||
65 | +#include "qemu/osdep.h" | ||
66 | +#include "block/aio.h" | ||
67 | +#include "block/raw-aio.h" | ||
68 | + | ||
69 | +void luring_detach_aio_context(LuringState *s, AioContext *old_context) | ||
70 | +{ | ||
71 | + abort(); | ||
72 | +} | ||
73 | + | ||
74 | +void luring_attach_aio_context(LuringState *s, AioContext *new_context) | ||
75 | +{ | ||
76 | + abort(); | ||
77 | +} | ||
78 | + | ||
79 | +LuringState *luring_init(Error **errp) | ||
80 | +{ | ||
81 | + abort(); | ||
82 | +} | ||
83 | + | ||
84 | +void luring_cleanup(LuringState *s) | ||
85 | +{ | ||
86 | + abort(); | ||
87 | +} | ||
88 | -- | ||
89 | 2.24.1 | ||
90 | |||
91 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | Message-id: 20200120141858.587874-7-stefanha@redhat.com | ||
7 | Message-Id: <20200120141858.587874-7-stefanha@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | util/async.c | 36 ++++++++++++++++++++++++++++++++++++ | ||
11 | 1 file changed, 36 insertions(+) | ||
12 | |||
13 | diff --git a/util/async.c b/util/async.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/util/async.c | ||
16 | +++ b/util/async.c | ||
17 | @@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source) | ||
18 | } | ||
19 | #endif | ||
20 | |||
21 | +#ifdef CONFIG_LINUX_IO_URING | ||
22 | + if (ctx->linux_io_uring) { | ||
23 | + luring_detach_aio_context(ctx->linux_io_uring, ctx); | ||
24 | + luring_cleanup(ctx->linux_io_uring); | ||
25 | + ctx->linux_io_uring = NULL; | ||
26 | + } | ||
27 | +#endif | ||
28 | + | ||
29 | assert(QSLIST_EMPTY(&ctx->scheduled_coroutines)); | ||
30 | qemu_bh_delete(ctx->co_schedule_bh); | ||
31 | |||
32 | @@ -XXX,XX +XXX,XX @@ LinuxAioState *aio_get_linux_aio(AioContext *ctx) | ||
33 | } | ||
34 | #endif | ||
35 | |||
36 | +#ifdef CONFIG_LINUX_IO_URING | ||
37 | +LuringState *aio_setup_linux_io_uring(AioContext *ctx, Error **errp) | ||
38 | +{ | ||
39 | + if (ctx->linux_io_uring) { | ||
40 | + return ctx->linux_io_uring; | ||
41 | + } | ||
42 | + | ||
43 | + ctx->linux_io_uring = luring_init(errp); | ||
44 | + if (!ctx->linux_io_uring) { | ||
45 | + return NULL; | ||
46 | + } | ||
47 | + | ||
48 | + luring_attach_aio_context(ctx->linux_io_uring, ctx); | ||
49 | + return ctx->linux_io_uring; | ||
50 | +} | ||
51 | + | ||
52 | +LuringState *aio_get_linux_io_uring(AioContext *ctx) | ||
53 | +{ | ||
54 | + assert(ctx->linux_io_uring); | ||
55 | + return ctx->linux_io_uring; | ||
56 | +} | ||
57 | +#endif | ||
58 | + | ||
59 | void aio_notify(AioContext *ctx) | ||
60 | { | ||
61 | /* Write e.g. bh->scheduled before reading ctx->notify_me. Pairs | ||
62 | @@ -XXX,XX +XXX,XX @@ AioContext *aio_context_new(Error **errp) | ||
63 | #ifdef CONFIG_LINUX_AIO | ||
64 | ctx->linux_aio = NULL; | ||
65 | #endif | ||
66 | + | ||
67 | +#ifdef CONFIG_LINUX_IO_URING | ||
68 | + ctx->linux_io_uring = NULL; | ||
69 | +#endif | ||
70 | + | ||
71 | ctx->thread_pool = NULL; | ||
72 | qemu_rec_mutex_init(&ctx->lock); | ||
73 | timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx); | ||
74 | -- | ||
75 | 2.24.1 | ||
76 | |||
77 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | Message-id: 20200120141858.587874-8-stefanha@redhat.com | ||
7 | Message-Id: <20200120141858.587874-8-stefanha@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | block.c | 22 ++++++++++++++++++++++ | ||
11 | blockdev.c | 12 ++++-------- | ||
12 | include/block/block.h | 1 + | ||
13 | 3 files changed, 27 insertions(+), 8 deletions(-) | ||
14 | |||
15 | diff --git a/block.c b/block.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/block.c | ||
18 | +++ b/block.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static BlockdevDetectZeroesOptions bdrv_parse_detect_zeroes(QemuOpts *opts, | ||
20 | return detect_zeroes; | ||
21 | } | ||
22 | |||
23 | +/** | ||
24 | + * Set open flags for aio engine | ||
25 | + * | ||
26 | + * Return 0 on success, -1 if the engine specified is invalid | ||
27 | + */ | ||
28 | +int bdrv_parse_aio(const char *mode, int *flags) | ||
29 | +{ | ||
30 | + if (!strcmp(mode, "threads")) { | ||
31 | + /* do nothing, default */ | ||
32 | + } else if (!strcmp(mode, "native")) { | ||
33 | + *flags |= BDRV_O_NATIVE_AIO; | ||
34 | +#ifdef CONFIG_LINUX_IO_URING | ||
35 | + } else if (!strcmp(mode, "io_uring")) { | ||
36 | + *flags |= BDRV_O_IO_URING; | ||
37 | +#endif | ||
38 | + } else { | ||
39 | + return -1; | ||
40 | + } | ||
41 | + | ||
42 | + return 0; | ||
43 | +} | ||
44 | + | ||
45 | /** | ||
46 | * Set open flags for a given discard mode | ||
47 | * | ||
48 | diff --git a/blockdev.c b/blockdev.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/blockdev.c | ||
51 | +++ b/blockdev.c | ||
52 | @@ -XXX,XX +XXX,XX @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags, | ||
53 | } | ||
54 | |||
55 | if ((aio = qemu_opt_get(opts, "aio")) != NULL) { | ||
56 | - if (!strcmp(aio, "native")) { | ||
57 | - *bdrv_flags |= BDRV_O_NATIVE_AIO; | ||
58 | - } else if (!strcmp(aio, "threads")) { | ||
59 | - /* this is the default */ | ||
60 | - } else { | ||
61 | - error_setg(errp, "invalid aio option"); | ||
62 | - return; | ||
63 | + if (bdrv_parse_aio(aio, bdrv_flags) < 0) { | ||
64 | + error_setg(errp, "invalid aio option"); | ||
65 | + return; | ||
66 | } | ||
67 | } | ||
68 | } | ||
69 | @@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_common_drive_opts = { | ||
70 | },{ | ||
71 | .name = "aio", | ||
72 | .type = QEMU_OPT_STRING, | ||
73 | - .help = "host AIO implementation (threads, native)", | ||
74 | + .help = "host AIO implementation (threads, native, io_uring)", | ||
75 | },{ | ||
76 | .name = BDRV_OPT_CACHE_WB, | ||
77 | .type = QEMU_OPT_BOOL, | ||
78 | diff --git a/include/block/block.h b/include/block/block.h | ||
79 | index XXXXXXX..XXXXXXX 100644 | ||
80 | --- a/include/block/block.h | ||
81 | +++ b/include/block/block.h | ||
82 | @@ -XXX,XX +XXX,XX @@ void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top, | ||
83 | void bdrv_replace_node(BlockDriverState *from, BlockDriverState *to, | ||
84 | Error **errp); | ||
85 | |||
86 | +int bdrv_parse_aio(const char *mode, int *flags); | ||
87 | int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough); | ||
88 | int bdrv_parse_discard_flags(const char *mode, int *flags); | ||
89 | BdrvChild *bdrv_open_child(const char *filename, | ||
90 | -- | ||
91 | 2.24.1 | ||
92 | |||
93 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Reviewed-by: Maxim Levitsky <maximlevitsky@gmail.com> | ||
5 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
6 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Message-id: 20200120141858.587874-9-stefanha@redhat.com | ||
8 | Message-Id: <20200120141858.587874-9-stefanha@redhat.com> | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | ||
11 | block/file-posix.c | 98 +++++++++++++++++++++++++++++++++++++--------- | ||
12 | 1 file changed, 79 insertions(+), 19 deletions(-) | ||
13 | |||
14 | diff --git a/block/file-posix.c b/block/file-posix.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/block/file-posix.c | ||
17 | +++ b/block/file-posix.c | ||
18 | @@ -XXX,XX +XXX,XX @@ typedef struct BDRVRawState { | ||
19 | bool has_write_zeroes:1; | ||
20 | bool discard_zeroes:1; | ||
21 | bool use_linux_aio:1; | ||
22 | + bool use_linux_io_uring:1; | ||
23 | bool page_cache_inconsistent:1; | ||
24 | bool has_fallocate; | ||
25 | bool needs_alignment; | ||
26 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList raw_runtime_opts = { | ||
27 | { | ||
28 | .name = "aio", | ||
29 | .type = QEMU_OPT_STRING, | ||
30 | - .help = "host AIO implementation (threads, native)", | ||
31 | + .help = "host AIO implementation (threads, native, io_uring)", | ||
32 | }, | ||
33 | { | ||
34 | .name = "locking", | ||
35 | @@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options, | ||
36 | goto fail; | ||
37 | } | ||
38 | |||
39 | - aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO) | ||
40 | - ? BLOCKDEV_AIO_OPTIONS_NATIVE | ||
41 | - : BLOCKDEV_AIO_OPTIONS_THREADS; | ||
42 | + if (bdrv_flags & BDRV_O_NATIVE_AIO) { | ||
43 | + aio_default = BLOCKDEV_AIO_OPTIONS_NATIVE; | ||
44 | +#ifdef CONFIG_LINUX_IO_URING | ||
45 | + } else if (bdrv_flags & BDRV_O_IO_URING) { | ||
46 | + aio_default = BLOCKDEV_AIO_OPTIONS_IO_URING; | ||
47 | +#endif | ||
48 | + } else { | ||
49 | + aio_default = BLOCKDEV_AIO_OPTIONS_THREADS; | ||
50 | + } | ||
51 | + | ||
52 | aio = qapi_enum_parse(&BlockdevAioOptions_lookup, | ||
53 | qemu_opt_get(opts, "aio"), | ||
54 | aio_default, &local_err); | ||
55 | @@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options, | ||
56 | ret = -EINVAL; | ||
57 | goto fail; | ||
58 | } | ||
59 | + | ||
60 | s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE); | ||
61 | +#ifdef CONFIG_LINUX_IO_URING | ||
62 | + s->use_linux_io_uring = (aio == BLOCKDEV_AIO_OPTIONS_IO_URING); | ||
63 | +#endif | ||
64 | |||
65 | locking = qapi_enum_parse(&OnOffAuto_lookup, | ||
66 | qemu_opt_get(opts, "locking"), | ||
67 | @@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options, | ||
68 | } | ||
69 | #endif /* !defined(CONFIG_LINUX_AIO) */ | ||
70 | |||
71 | +#ifdef CONFIG_LINUX_IO_URING | ||
72 | + if (s->use_linux_io_uring) { | ||
73 | + if (!aio_setup_linux_io_uring(bdrv_get_aio_context(bs), errp)) { | ||
74 | + error_prepend(errp, "Unable to use io_uring: "); | ||
75 | + goto fail; | ||
76 | + } | ||
77 | + } | ||
78 | +#else | ||
79 | + if (s->use_linux_io_uring) { | ||
80 | + error_setg(errp, "aio=io_uring was specified, but is not supported " | ||
81 | + "in this build."); | ||
82 | + ret = -EINVAL; | ||
83 | + goto fail; | ||
84 | + } | ||
85 | +#endif /* !defined(CONFIG_LINUX_IO_URING) */ | ||
86 | + | ||
87 | s->has_discard = true; | ||
88 | s->has_write_zeroes = true; | ||
89 | if ((bs->open_flags & BDRV_O_NOCACHE) != 0) { | ||
90 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, | ||
91 | return -EIO; | ||
92 | |||
93 | /* | ||
94 | - * Check if the underlying device requires requests to be aligned, | ||
95 | - * and if the request we are trying to submit is aligned or not. | ||
96 | - * If this is the case tell the low-level driver that it needs | ||
97 | - * to copy the buffer. | ||
98 | + * When using O_DIRECT, the request must be aligned to be able to use | ||
99 | + * either libaio or io_uring interface. If not fail back to regular thread | ||
100 | + * pool read/write code which emulates this for us if we | ||
101 | + * set QEMU_AIO_MISALIGNED. | ||
102 | */ | ||
103 | - if (s->needs_alignment) { | ||
104 | - if (!bdrv_qiov_is_aligned(bs, qiov)) { | ||
105 | - type |= QEMU_AIO_MISALIGNED; | ||
106 | + if (s->needs_alignment && !bdrv_qiov_is_aligned(bs, qiov)) { | ||
107 | + type |= QEMU_AIO_MISALIGNED; | ||
108 | +#ifdef CONFIG_LINUX_IO_URING | ||
109 | + } else if (s->use_linux_io_uring) { | ||
110 | + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); | ||
111 | + assert(qiov->size == bytes); | ||
112 | + return luring_co_submit(bs, aio, s->fd, offset, qiov, type); | ||
113 | +#endif | ||
114 | #ifdef CONFIG_LINUX_AIO | ||
115 | - } else if (s->use_linux_aio) { | ||
116 | - LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); | ||
117 | - assert(qiov->size == bytes); | ||
118 | - return laio_co_submit(bs, aio, s->fd, offset, qiov, type); | ||
119 | + } else if (s->use_linux_aio) { | ||
120 | + LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); | ||
121 | + assert(qiov->size == bytes); | ||
122 | + return laio_co_submit(bs, aio, s->fd, offset, qiov, type); | ||
123 | #endif | ||
124 | - } | ||
125 | } | ||
126 | |||
127 | acb = (RawPosixAIOData) { | ||
128 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, | ||
129 | |||
130 | static void raw_aio_plug(BlockDriverState *bs) | ||
131 | { | ||
132 | + BDRVRawState __attribute__((unused)) *s = bs->opaque; | ||
133 | #ifdef CONFIG_LINUX_AIO | ||
134 | - BDRVRawState *s = bs->opaque; | ||
135 | if (s->use_linux_aio) { | ||
136 | LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); | ||
137 | laio_io_plug(bs, aio); | ||
138 | } | ||
139 | #endif | ||
140 | +#ifdef CONFIG_LINUX_IO_URING | ||
141 | + if (s->use_linux_io_uring) { | ||
142 | + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); | ||
143 | + luring_io_plug(bs, aio); | ||
144 | + } | ||
145 | +#endif | ||
146 | } | ||
147 | |||
148 | static void raw_aio_unplug(BlockDriverState *bs) | ||
149 | { | ||
150 | + BDRVRawState __attribute__((unused)) *s = bs->opaque; | ||
151 | #ifdef CONFIG_LINUX_AIO | ||
152 | - BDRVRawState *s = bs->opaque; | ||
153 | if (s->use_linux_aio) { | ||
154 | LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); | ||
155 | laio_io_unplug(bs, aio); | ||
156 | } | ||
157 | #endif | ||
158 | +#ifdef CONFIG_LINUX_IO_URING | ||
159 | + if (s->use_linux_io_uring) { | ||
160 | + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); | ||
161 | + luring_io_unplug(bs, aio); | ||
162 | + } | ||
163 | +#endif | ||
164 | } | ||
165 | |||
166 | static int raw_co_flush_to_disk(BlockDriverState *bs) | ||
167 | @@ -XXX,XX +XXX,XX @@ static int raw_co_flush_to_disk(BlockDriverState *bs) | ||
168 | .aio_type = QEMU_AIO_FLUSH, | ||
169 | }; | ||
170 | |||
171 | +#ifdef CONFIG_LINUX_IO_URING | ||
172 | + if (s->use_linux_io_uring) { | ||
173 | + LuringState *aio = aio_get_linux_io_uring(bdrv_get_aio_context(bs)); | ||
174 | + return luring_co_submit(bs, aio, s->fd, 0, NULL, QEMU_AIO_FLUSH); | ||
175 | + } | ||
176 | +#endif | ||
177 | return raw_thread_pool_submit(bs, handle_aiocb_flush, &acb); | ||
178 | } | ||
179 | |||
180 | static void raw_aio_attach_aio_context(BlockDriverState *bs, | ||
181 | AioContext *new_context) | ||
182 | { | ||
183 | + BDRVRawState __attribute__((unused)) *s = bs->opaque; | ||
184 | #ifdef CONFIG_LINUX_AIO | ||
185 | - BDRVRawState *s = bs->opaque; | ||
186 | if (s->use_linux_aio) { | ||
187 | Error *local_err = NULL; | ||
188 | if (!aio_setup_linux_aio(new_context, &local_err)) { | ||
189 | @@ -XXX,XX +XXX,XX @@ static void raw_aio_attach_aio_context(BlockDriverState *bs, | ||
190 | } | ||
191 | } | ||
192 | #endif | ||
193 | +#ifdef CONFIG_LINUX_IO_URING | ||
194 | + if (s->use_linux_io_uring) { | ||
195 | + Error *local_err; | ||
196 | + if (!aio_setup_linux_io_uring(new_context, &local_err)) { | ||
197 | + error_reportf_err(local_err, "Unable to use linux io_uring, " | ||
198 | + "falling back to thread pool: "); | ||
199 | + s->use_linux_io_uring = false; | ||
200 | + } | ||
201 | + } | ||
202 | +#endif | ||
203 | } | ||
204 | |||
205 | static void raw_close(BlockDriverState *bs) | ||
206 | -- | ||
207 | 2.24.1 | ||
208 | |||
209 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | Message-id: 20200120141858.587874-10-stefanha@redhat.com | ||
7 | Message-Id: <20200120141858.587874-10-stefanha@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | block/io_uring.c | 23 ++++++++++++++++++++--- | ||
11 | block/trace-events | 12 ++++++++++++ | ||
12 | 2 files changed, 32 insertions(+), 3 deletions(-) | ||
13 | |||
14 | diff --git a/block/io_uring.c b/block/io_uring.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/block/io_uring.c | ||
17 | +++ b/block/io_uring.c | ||
18 | @@ -XXX,XX +XXX,XX @@ | ||
19 | #include "block/raw-aio.h" | ||
20 | #include "qemu/coroutine.h" | ||
21 | #include "qapi/error.h" | ||
22 | +#include "trace.h" | ||
23 | |||
24 | /* io_uring ring size */ | ||
25 | #define MAX_ENTRIES 128 | ||
26 | @@ -XXX,XX +XXX,XX @@ static void luring_resubmit_short_read(LuringState *s, LuringAIOCB *luringcb, | ||
27 | QEMUIOVector *resubmit_qiov; | ||
28 | size_t remaining; | ||
29 | |||
30 | + trace_luring_resubmit_short_read(s, luringcb, nread); | ||
31 | + | ||
32 | /* Update read position */ | ||
33 | luringcb->total_read = nread; | ||
34 | remaining = luringcb->qiov->size - luringcb->total_read; | ||
35 | @@ -XXX,XX +XXX,XX @@ static void luring_process_completions(LuringState *s) | ||
36 | |||
37 | /* Change counters one-by-one because we can be nested. */ | ||
38 | s->io_q.in_flight--; | ||
39 | + trace_luring_process_completion(s, luringcb, ret); | ||
40 | |||
41 | /* total_read is non-zero only for resubmitted read requests */ | ||
42 | total_bytes = ret + luringcb->total_read; | ||
43 | @@ -XXX,XX +XXX,XX @@ static int ioq_submit(LuringState *s) | ||
44 | QSIMPLEQ_REMOVE_HEAD(&s->io_q.submit_queue, next); | ||
45 | } | ||
46 | ret = io_uring_submit(&s->ring); | ||
47 | + trace_luring_io_uring_submit(s, ret); | ||
48 | /* Prevent infinite loop if submission is refused */ | ||
49 | if (ret <= 0) { | ||
50 | if (ret == -EAGAIN) { | ||
51 | @@ -XXX,XX +XXX,XX @@ static void ioq_init(LuringQueue *io_q) | ||
52 | |||
53 | void luring_io_plug(BlockDriverState *bs, LuringState *s) | ||
54 | { | ||
55 | + trace_luring_io_plug(s); | ||
56 | s->io_q.plugged++; | ||
57 | } | ||
58 | |||
59 | void luring_io_unplug(BlockDriverState *bs, LuringState *s) | ||
60 | { | ||
61 | assert(s->io_q.plugged); | ||
62 | + trace_luring_io_unplug(s, s->io_q.blocked, s->io_q.plugged, | ||
63 | + s->io_q.in_queue, s->io_q.in_flight); | ||
64 | if (--s->io_q.plugged == 0 && | ||
65 | !s->io_q.blocked && s->io_q.in_queue > 0) { | ||
66 | ioq_submit(s); | ||
67 | @@ -XXX,XX +XXX,XX @@ void luring_io_unplug(BlockDriverState *bs, LuringState *s) | ||
68 | static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, | ||
69 | uint64_t offset, int type) | ||
70 | { | ||
71 | + int ret; | ||
72 | struct io_uring_sqe *sqes = &luringcb->sqeq; | ||
73 | |||
74 | switch (type) { | ||
75 | @@ -XXX,XX +XXX,XX @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, | ||
76 | |||
77 | QSIMPLEQ_INSERT_TAIL(&s->io_q.submit_queue, luringcb, next); | ||
78 | s->io_q.in_queue++; | ||
79 | - | ||
80 | + trace_luring_do_submit(s, s->io_q.blocked, s->io_q.plugged, | ||
81 | + s->io_q.in_queue, s->io_q.in_flight); | ||
82 | if (!s->io_q.blocked && | ||
83 | (!s->io_q.plugged || | ||
84 | s->io_q.in_flight + s->io_q.in_queue >= MAX_ENTRIES)) { | ||
85 | - return ioq_submit(s); | ||
86 | + ret = ioq_submit(s); | ||
87 | + trace_luring_do_submit_done(s, ret); | ||
88 | + return ret; | ||
89 | } | ||
90 | return 0; | ||
91 | } | ||
92 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn luring_co_submit(BlockDriverState *bs, LuringState *s, int fd, | ||
93 | .qiov = qiov, | ||
94 | .is_read = (type == QEMU_AIO_READ), | ||
95 | }; | ||
96 | - | ||
97 | + trace_luring_co_submit(bs, s, &luringcb, fd, offset, qiov ? qiov->size : 0, | ||
98 | + type); | ||
99 | ret = luring_do_submit(fd, &luringcb, s, offset, type); | ||
100 | + | ||
101 | if (ret < 0) { | ||
102 | return ret; | ||
103 | } | ||
104 | @@ -XXX,XX +XXX,XX @@ LuringState *luring_init(Error **errp) | ||
105 | LuringState *s = g_new0(LuringState, 1); | ||
106 | struct io_uring *ring = &s->ring; | ||
107 | |||
108 | + trace_luring_init_state(s, sizeof(*s)); | ||
109 | + | ||
110 | rc = io_uring_queue_init(MAX_ENTRIES, ring, 0); | ||
111 | if (rc < 0) { | ||
112 | error_setg_errno(errp, errno, "failed to init linux io_uring ring"); | ||
113 | @@ -XXX,XX +XXX,XX @@ void luring_cleanup(LuringState *s) | ||
114 | { | ||
115 | io_uring_queue_exit(&s->ring); | ||
116 | g_free(s); | ||
117 | + trace_luring_cleanup_state(s); | ||
118 | } | ||
119 | diff --git a/block/trace-events b/block/trace-events | ||
120 | index XXXXXXX..XXXXXXX 100644 | ||
121 | --- a/block/trace-events | ||
122 | +++ b/block/trace-events | ||
123 | @@ -XXX,XX +XXX,XX @@ qmp_block_stream(void *bs) "bs %p" | ||
124 | file_paio_submit(void *acb, void *opaque, int64_t offset, int count, int type) "acb %p opaque %p offset %"PRId64" count %d type %d" | ||
125 | file_copy_file_range(void *bs, int src, int64_t src_off, int dst, int64_t dst_off, int64_t bytes, int flags, int64_t ret) "bs %p src_fd %d offset %"PRIu64" dst_fd %d offset %"PRIu64" bytes %"PRIu64" flags %d ret %"PRId64 | ||
126 | |||
127 | +#io_uring.c | ||
128 | +luring_init_state(void *s, size_t size) "s %p size %zu" | ||
129 | +luring_cleanup_state(void *s) "%p freed" | ||
130 | +luring_io_plug(void *s) "LuringState %p plug" | ||
131 | +luring_io_unplug(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d" | ||
132 | +luring_do_submit(void *s, int blocked, int plugged, int queued, int inflight) "LuringState %p blocked %d plugged %d queued %d inflight %d" | ||
133 | +luring_do_submit_done(void *s, int ret) "LuringState %p submitted to kernel %d" | ||
134 | +luring_co_submit(void *bs, void *s, void *luringcb, int fd, uint64_t offset, size_t nbytes, int type) "bs %p s %p luringcb %p fd %d offset %" PRId64 " nbytes %zd type %d" | ||
135 | +luring_process_completion(void *s, void *aiocb, int ret) "LuringState %p luringcb %p ret %d" | ||
136 | +luring_io_uring_submit(void *s, int ret) "LuringState %p ret %d" | ||
137 | +luring_resubmit_short_read(void *s, void *luringcb, int nread) "LuringState %p luringcb %p nread %d" | ||
138 | + | ||
139 | # qcow2.c | ||
140 | qcow2_add_task(void *co, void *bs, void *pool, const char *action, int cluster_type, uint64_t file_cluster_offset, uint64_t offset, uint64_t bytes, void *qiov, size_t qiov_offset) "co %p bs %p pool %p: %s: cluster_type %d file_cluster_offset %" PRIu64 " offset %" PRIu64 " bytes %" PRIu64 " qiov %p qiov_offset %zu" | ||
141 | qcow2_writev_start_req(void *co, int64_t offset, int bytes) "co %p offset 0x%" PRIx64 " bytes %d" | ||
142 | -- | ||
143 | 2.24.1 | ||
144 | |||
145 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | Message-id: 20200120141858.587874-11-stefanha@redhat.com | ||
7 | Message-Id: <20200120141858.587874-11-stefanha@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | block/io_uring.c | 17 ++++++++++++++++- | ||
11 | 1 file changed, 16 insertions(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/block/io_uring.c b/block/io_uring.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/block/io_uring.c | ||
16 | +++ b/block/io_uring.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void qemu_luring_completion_cb(void *opaque) | ||
18 | luring_process_completions_and_submit(s); | ||
19 | } | ||
20 | |||
21 | +static bool qemu_luring_poll_cb(void *opaque) | ||
22 | +{ | ||
23 | + LuringState *s = opaque; | ||
24 | + struct io_uring_cqe *cqes; | ||
25 | + | ||
26 | + if (io_uring_peek_cqe(&s->ring, &cqes) == 0) { | ||
27 | + if (cqes) { | ||
28 | + luring_process_completions_and_submit(s); | ||
29 | + return true; | ||
30 | + } | ||
31 | + } | ||
32 | + | ||
33 | + return false; | ||
34 | +} | ||
35 | + | ||
36 | static void ioq_init(LuringQueue *io_q) | ||
37 | { | ||
38 | QSIMPLEQ_INIT(&io_q->submit_queue); | ||
39 | @@ -XXX,XX +XXX,XX @@ void luring_attach_aio_context(LuringState *s, AioContext *new_context) | ||
40 | s->aio_context = new_context; | ||
41 | s->completion_bh = aio_bh_new(new_context, qemu_luring_completion_bh, s); | ||
42 | aio_set_fd_handler(s->aio_context, s->ring.ring_fd, false, | ||
43 | - qemu_luring_completion_cb, NULL, NULL, s); | ||
44 | + qemu_luring_completion_cb, NULL, qemu_luring_poll_cb, s); | ||
45 | } | ||
46 | |||
47 | LuringState *luring_init(Error **errp) | ||
48 | -- | ||
49 | 2.24.1 | ||
50 | |||
51 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | Message-id: 20200120141858.587874-12-stefanha@redhat.com | ||
7 | Message-Id: <20200120141858.587874-12-stefanha@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | qemu-io.c | 25 +++++++++++++++++++++---- | ||
11 | 1 file changed, 21 insertions(+), 4 deletions(-) | ||
12 | |||
13 | diff --git a/qemu-io.c b/qemu-io.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/qemu-io.c | ||
16 | +++ b/qemu-io.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void open_help(void) | ||
18 | " -C, -- use copy-on-read\n" | ||
19 | " -n, -- disable host cache, short for -t none\n" | ||
20 | " -U, -- force shared permissions\n" | ||
21 | -" -k, -- use kernel AIO implementation (on Linux only)\n" | ||
22 | +" -k, -- use kernel AIO implementation (Linux only, prefer use of -i)\n" | ||
23 | +" -i, -- use AIO mode (threads, native or io_uring)\n" | ||
24 | " -t, -- use the given cache mode for the image\n" | ||
25 | " -d, -- use the given discard mode for the image\n" | ||
26 | " -o, -- options to be given to the block driver" | ||
27 | @@ -XXX,XX +XXX,XX @@ static int open_f(BlockBackend *blk, int argc, char **argv) | ||
28 | QDict *opts; | ||
29 | bool force_share = false; | ||
30 | |||
31 | - while ((c = getopt(argc, argv, "snCro:kt:d:U")) != -1) { | ||
32 | + while ((c = getopt(argc, argv, "snCro:ki:t:d:U")) != -1) { | ||
33 | switch (c) { | ||
34 | case 's': | ||
35 | flags |= BDRV_O_SNAPSHOT; | ||
36 | @@ -XXX,XX +XXX,XX @@ static int open_f(BlockBackend *blk, int argc, char **argv) | ||
37 | return -EINVAL; | ||
38 | } | ||
39 | break; | ||
40 | + case 'i': | ||
41 | + if (bdrv_parse_aio(optarg, &flags) < 0) { | ||
42 | + error_report("Invalid aio option: %s", optarg); | ||
43 | + qemu_opts_reset(&empty_opts); | ||
44 | + return -EINVAL; | ||
45 | + } | ||
46 | + break; | ||
47 | case 'o': | ||
48 | if (imageOpts) { | ||
49 | printf("--image-opts and 'open -o' are mutually exclusive\n"); | ||
50 | @@ -XXX,XX +XXX,XX @@ static void usage(const char *name) | ||
51 | " -n, --nocache disable host cache, short for -t none\n" | ||
52 | " -C, --copy-on-read enable copy-on-read\n" | ||
53 | " -m, --misalign misalign allocations for O_DIRECT\n" | ||
54 | -" -k, --native-aio use kernel AIO implementation (on Linux only)\n" | ||
55 | +" -k, --native-aio use kernel AIO implementation\n" | ||
56 | +" (Linux only, prefer use of -i)\n" | ||
57 | +" -i, --aio=MODE use AIO mode (threads, native or io_uring)\n" | ||
58 | " -t, --cache=MODE use the given cache mode for the image\n" | ||
59 | " -d, --discard=MODE use the given discard mode for the image\n" | ||
60 | " -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n" | ||
61 | @@ -XXX,XX +XXX,XX @@ static QemuOptsList file_opts = { | ||
62 | int main(int argc, char **argv) | ||
63 | { | ||
64 | int readonly = 0; | ||
65 | - const char *sopt = "hVc:d:f:rsnCmkt:T:U"; | ||
66 | + const char *sopt = "hVc:d:f:rsnCmki:t:T:U"; | ||
67 | const struct option lopt[] = { | ||
68 | { "help", no_argument, NULL, 'h' }, | ||
69 | { "version", no_argument, NULL, 'V' }, | ||
70 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
71 | { "copy-on-read", no_argument, NULL, 'C' }, | ||
72 | { "misalign", no_argument, NULL, 'm' }, | ||
73 | { "native-aio", no_argument, NULL, 'k' }, | ||
74 | + { "aio", required_argument, NULL, 'i' }, | ||
75 | { "discard", required_argument, NULL, 'd' }, | ||
76 | { "cache", required_argument, NULL, 't' }, | ||
77 | { "trace", required_argument, NULL, 'T' }, | ||
78 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
79 | case 'k': | ||
80 | flags |= BDRV_O_NATIVE_AIO; | ||
81 | break; | ||
82 | + case 'i': | ||
83 | + if (bdrv_parse_aio(optarg, &flags) < 0) { | ||
84 | + error_report("Invalid aio option: %s", optarg); | ||
85 | + exit(1); | ||
86 | + } | ||
87 | + break; | ||
88 | case 't': | ||
89 | if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) < 0) { | ||
90 | error_report("Invalid cache option: %s", optarg); | ||
91 | -- | ||
92 | 2.24.1 | ||
93 | |||
94 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | Message-id: 20200120141858.587874-13-stefanha@redhat.com | ||
7 | Message-Id: <20200120141858.587874-13-stefanha@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | qemu-img-cmds.hx | 4 ++-- | ||
11 | qemu-img.c | 11 ++++++++++- | ||
12 | qemu-img.texi | 5 ++++- | ||
13 | 3 files changed, 16 insertions(+), 4 deletions(-) | ||
14 | |||
15 | diff --git a/qemu-img-cmds.hx b/qemu-img-cmds.hx | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/qemu-img-cmds.hx | ||
18 | +++ b/qemu-img-cmds.hx | ||
19 | @@ -XXX,XX +XXX,XX @@ STEXI | ||
20 | ETEXI | ||
21 | |||
22 | DEF("bench", img_bench, | ||
23 | - "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-w] [-U] filename") | ||
24 | + "bench [-c count] [-d depth] [-f fmt] [--flush-interval=flush_interval] [-n] [--no-drain] [-o offset] [--pattern=pattern] [-q] [-s buffer_size] [-S step_size] [-t cache] [-i aio] [-w] [-U] filename") | ||
25 | STEXI | ||
26 | -@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename} | ||
27 | +@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-i @var{aio}] [-w] [-U] @var{filename} | ||
28 | ETEXI | ||
29 | |||
30 | DEF("check", img_check, | ||
31 | diff --git a/qemu-img.c b/qemu-img.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/qemu-img.c | ||
34 | +++ b/qemu-img.c | ||
35 | @@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv) | ||
36 | {"force-share", no_argument, 0, 'U'}, | ||
37 | {0, 0, 0, 0} | ||
38 | }; | ||
39 | - c = getopt_long(argc, argv, ":hc:d:f:no:qs:S:t:wU", long_options, NULL); | ||
40 | + c = getopt_long(argc, argv, ":hc:d:f:ni:o:qs:S:t:wU", long_options, | ||
41 | + NULL); | ||
42 | if (c == -1) { | ||
43 | break; | ||
44 | } | ||
45 | @@ -XXX,XX +XXX,XX @@ static int img_bench(int argc, char **argv) | ||
46 | case 'n': | ||
47 | flags |= BDRV_O_NATIVE_AIO; | ||
48 | break; | ||
49 | + case 'i': | ||
50 | + ret = bdrv_parse_aio(optarg, &flags); | ||
51 | + if (ret < 0) { | ||
52 | + error_report("Invalid aio option: %s", optarg); | ||
53 | + ret = -1; | ||
54 | + goto out; | ||
55 | + } | ||
56 | + break; | ||
57 | case 'o': | ||
58 | { | ||
59 | offset = cvtnum(optarg); | ||
60 | diff --git a/qemu-img.texi b/qemu-img.texi | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/qemu-img.texi | ||
63 | +++ b/qemu-img.texi | ||
64 | @@ -XXX,XX +XXX,XX @@ Command description: | ||
65 | Amends the image format specific @var{options} for the image file | ||
66 | @var{filename}. Not all file formats support this operation. | ||
67 | |||
68 | -@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename} | ||
69 | +@item bench [-c @var{count}] [-d @var{depth}] [-f @var{fmt}] [--flush-interval=@var{flush_interval}] [-n] [-i @var{aio}] [--no-drain] [-o @var{offset}] [--pattern=@var{pattern}] [-q] [-s @var{buffer_size}] [-S @var{step_size}] [-t @var{cache}] [-w] [-U] @var{filename} | ||
70 | |||
71 | Run a simple sequential I/O benchmark on the specified image. If @code{-w} is | ||
72 | specified, a write test is performed, otherwise a read test is performed. | ||
73 | @@ -XXX,XX +XXX,XX @@ If @code{-n} is specified, the native AIO backend is used if possible. On | ||
74 | Linux, this option only works if @code{-t none} or @code{-t directsync} is | ||
75 | specified as well. | ||
76 | |||
77 | +If @code{-i} is specified, aio option can be used to specify different AIO | ||
78 | +backends: @var{threads}, @var{native} or @var{io_uring}. | ||
79 | + | ||
80 | For write tests, by default a buffer filled with zeros is written. This can be | ||
81 | overridden with a pattern byte specified by @var{pattern}. | ||
82 | |||
83 | -- | ||
84 | 2.24.1 | ||
85 | |||
86 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Acked-by: Eric Blake <eblake@redhat.com> | ||
5 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
6 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Message-id: 20200120141858.587874-14-stefanha@redhat.com | ||
8 | Message-Id: <20200120141858.587874-14-stefanha@redhat.com> | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | ||
11 | docs/interop/qemu-nbd.rst | 4 ++-- | ||
12 | qemu-nbd.c | 12 ++++-------- | ||
13 | 2 files changed, 6 insertions(+), 10 deletions(-) | ||
14 | |||
15 | diff --git a/docs/interop/qemu-nbd.rst b/docs/interop/qemu-nbd.rst | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/docs/interop/qemu-nbd.rst | ||
18 | +++ b/docs/interop/qemu-nbd.rst | ||
19 | @@ -XXX,XX +XXX,XX @@ driver options if ``--image-opts`` is specified. | ||
20 | |||
21 | .. option:: --aio=AIO | ||
22 | |||
23 | - Set the asynchronous I/O mode between ``threads`` (the default) | ||
24 | - and ``native`` (Linux only). | ||
25 | + Set the asynchronous I/O mode between ``threads`` (the default), | ||
26 | + ``native`` (Linux only), and ``io_uring`` (Linux 5.1+). | ||
27 | |||
28 | .. option:: --discard=DISCARD | ||
29 | |||
30 | diff --git a/qemu-nbd.c b/qemu-nbd.c | ||
31 | index XXXXXXX..XXXXXXX 100644 | ||
32 | --- a/qemu-nbd.c | ||
33 | +++ b/qemu-nbd.c | ||
34 | @@ -XXX,XX +XXX,XX @@ static void usage(const char *name) | ||
35 | " '[ID_OR_NAME]'\n" | ||
36 | " -n, --nocache disable host cache\n" | ||
37 | " --cache=MODE set cache mode (none, writeback, ...)\n" | ||
38 | -" --aio=MODE set AIO mode (native or threads)\n" | ||
39 | +" --aio=MODE set AIO mode (native, io_uring or threads)\n" | ||
40 | " --discard=MODE set discard mode (ignore, unmap)\n" | ||
41 | " --detect-zeroes=MODE set detect-zeroes mode (off, on, unmap)\n" | ||
42 | " --image-opts treat FILE as a full set of image options\n" | ||
43 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) | ||
44 | exit(EXIT_FAILURE); | ||
45 | } | ||
46 | seen_aio = true; | ||
47 | - if (!strcmp(optarg, "native")) { | ||
48 | - flags |= BDRV_O_NATIVE_AIO; | ||
49 | - } else if (!strcmp(optarg, "threads")) { | ||
50 | - /* this is the default */ | ||
51 | - } else { | ||
52 | - error_report("invalid aio mode `%s'", optarg); | ||
53 | - exit(EXIT_FAILURE); | ||
54 | + if (bdrv_parse_aio(optarg, &flags) < 0) { | ||
55 | + error_report("Invalid aio mode '%s'", optarg); | ||
56 | + exit(EXIT_FAILURE); | ||
57 | } | ||
58 | break; | ||
59 | case QEMU_NBD_OPT_DISCARD: | ||
60 | -- | ||
61 | 2.24.1 | ||
62 | |||
63 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | Message-id: 20200120141858.587874-15-stefanha@redhat.com | ||
7 | Message-Id: <20200120141858.587874-15-stefanha@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | tests/qemu-iotests/check | 15 ++++++++++++++- | ||
11 | tests/qemu-iotests/common.rc | 14 ++++++++++++++ | ||
12 | tests/qemu-iotests/iotests.py | 12 ++++++++++-- | ||
13 | 3 files changed, 38 insertions(+), 3 deletions(-) | ||
14 | |||
15 | diff --git a/tests/qemu-iotests/check b/tests/qemu-iotests/check | ||
16 | index XXXXXXX..XXXXXXX 100755 | ||
17 | --- a/tests/qemu-iotests/check | ||
18 | +++ b/tests/qemu-iotests/check | ||
19 | @@ -XXX,XX +XXX,XX @@ sortme=false | ||
20 | expunge=true | ||
21 | have_test_arg=false | ||
22 | cachemode=false | ||
23 | +aiomode=false | ||
24 | |||
25 | tmp="${TEST_DIR}"/$$ | ||
26 | rm -f $tmp.list $tmp.tmp $tmp.sed | ||
27 | @@ -XXX,XX +XXX,XX @@ export IMGFMT_GENERIC=true | ||
28 | export IMGPROTO=file | ||
29 | export IMGOPTS="" | ||
30 | export CACHEMODE="writeback" | ||
31 | +export AIOMODE="threads" | ||
32 | export QEMU_IO_OPTIONS="" | ||
33 | export QEMU_IO_OPTIONS_NO_FMT="" | ||
34 | export CACHEMODE_IS_DEFAULT=true | ||
35 | @@ -XXX,XX +XXX,XX @@ s/ .*//p | ||
36 | CACHEMODE_IS_DEFAULT=false | ||
37 | cachemode=false | ||
38 | continue | ||
39 | + elif $aiomode | ||
40 | + then | ||
41 | + AIOMODE="$r" | ||
42 | + aiomode=false | ||
43 | + continue | ||
44 | fi | ||
45 | |||
46 | xpand=true | ||
47 | @@ -XXX,XX +XXX,XX @@ other options | ||
48 | -n show me, do not run tests | ||
49 | -o options -o options to pass to qemu-img create/convert | ||
50 | -c mode cache mode | ||
51 | + -i mode AIO mode | ||
52 | -makecheck pretty print output for make check | ||
53 | |||
54 | testlist options | ||
55 | @@ -XXX,XX +XXX,XX @@ testlist options | ||
56 | cachemode=true | ||
57 | xpand=false | ||
58 | ;; | ||
59 | + -i) | ||
60 | + aiomode=true | ||
61 | + xpand=false | ||
62 | + ;; | ||
63 | -T) # deprecated timestamp option | ||
64 | xpand=false | ||
65 | ;; | ||
66 | - | ||
67 | -v) | ||
68 | verbose=true | ||
69 | xpand=false | ||
70 | @@ -XXX,XX +XXX,XX @@ done | ||
71 | |||
72 | # Set qemu-io cache mode with $CACHEMODE we have | ||
73 | QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --cache $CACHEMODE" | ||
74 | +# Set qemu-io aio mode with $AIOMODE we have | ||
75 | +QEMU_IO_OPTIONS="$QEMU_IO_OPTIONS --aio $AIOMODE" | ||
76 | |||
77 | QEMU_IO_OPTIONS_NO_FMT="$QEMU_IO_OPTIONS" | ||
78 | if [ "$IMGOPTSSYNTAX" != "true" ]; then | ||
79 | diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc | ||
80 | index XXXXXXX..XXXXXXX 100644 | ||
81 | --- a/tests/qemu-iotests/common.rc | ||
82 | +++ b/tests/qemu-iotests/common.rc | ||
83 | @@ -XXX,XX +XXX,XX @@ _default_cache_mode() | ||
84 | return | ||
85 | fi | ||
86 | } | ||
87 | +_supported_aio_modes() | ||
88 | +{ | ||
89 | + for mode; do | ||
90 | + if [ "$mode" = "$AIOMODE" ]; then | ||
91 | + return | ||
92 | + fi | ||
93 | + done | ||
94 | + _notrun "not suitable for aio mode: $AIOMODE" | ||
95 | +} | ||
96 | +_default_aio_mode() | ||
97 | +{ | ||
98 | + AIOMODE="$1" | ||
99 | + QEMU_IO="$QEMU_IO --aio $1" | ||
100 | +} | ||
101 | |||
102 | _unsupported_imgopts() | ||
103 | { | ||
104 | diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py | ||
105 | index XXXXXXX..XXXXXXX 100644 | ||
106 | --- a/tests/qemu-iotests/iotests.py | ||
107 | +++ b/tests/qemu-iotests/iotests.py | ||
108 | @@ -XXX,XX +XXX,XX @@ test_dir = os.environ.get('TEST_DIR') | ||
109 | sock_dir = os.environ.get('SOCK_DIR') | ||
110 | output_dir = os.environ.get('OUTPUT_DIR', '.') | ||
111 | cachemode = os.environ.get('CACHEMODE') | ||
112 | +aiomode = os.environ.get('AIOMODE') | ||
113 | qemu_default_machine = os.environ.get('QEMU_DEFAULT_MACHINE') | ||
114 | |||
115 | socket_scm_helper = os.environ.get('SOCKET_SCM_HELPER', 'socket_scm_helper') | ||
116 | @@ -XXX,XX +XXX,XX @@ class VM(qtest.QEMUQtestMachine): | ||
117 | options.append('file=%s' % path) | ||
118 | options.append('format=%s' % format) | ||
119 | options.append('cache=%s' % cachemode) | ||
120 | + options.append('aio=%s' % aiomode) | ||
121 | |||
122 | if opts: | ||
123 | options.append(opts) | ||
124 | @@ -XXX,XX +XXX,XX @@ def verify_cache_mode(supported_cache_modes=[]): | ||
125 | if supported_cache_modes and (cachemode not in supported_cache_modes): | ||
126 | notrun('not suitable for this cache mode: %s' % cachemode) | ||
127 | |||
128 | +def verify_aio_mode(supported_aio_modes=[]): | ||
129 | + if supported_aio_modes and (aiomode not in supported_aio_modes): | ||
130 | + notrun('not suitable for this aio mode: %s' % aiomode) | ||
131 | + | ||
132 | def supports_quorum(): | ||
133 | return 'quorum' in qemu_img_pipe('--help') | ||
134 | |||
135 | @@ -XXX,XX +XXX,XX @@ def execute_unittest(output, verbosity, debug): | ||
136 | |||
137 | def execute_test(test_function=None, | ||
138 | supported_fmts=[], supported_oses=['linux'], | ||
139 | - supported_cache_modes=[], unsupported_fmts=[], | ||
140 | - supported_protocols=[], unsupported_protocols=[]): | ||
141 | + supported_cache_modes=[], supported_aio_modes={}, | ||
142 | + unsupported_fmts=[], supported_protocols=[], | ||
143 | + unsupported_protocols=[]): | ||
144 | """Run either unittest or script-style tests.""" | ||
145 | |||
146 | # We are using TEST_DIR and QEMU_DEFAULT_MACHINE as proxies to | ||
147 | @@ -XXX,XX +XXX,XX @@ def execute_test(test_function=None, | ||
148 | verify_protocol(supported_protocols, unsupported_protocols) | ||
149 | verify_platform(supported_oses) | ||
150 | verify_cache_mode(supported_cache_modes) | ||
151 | + verify_aio_mode(supported_aio_modes) | ||
152 | |||
153 | if debug: | ||
154 | output = sys.stdout | ||
155 | -- | ||
156 | 2.24.1 | ||
157 | |||
158 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
2 | 1 | ||
3 | Signed-off-by: Aarushi Mehta <mehta.aaru20@gmail.com> | ||
4 | Acked-by: Stefano Garzarella <sgarzare@redhat.com> | ||
5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
6 | Message-id: 20200120141858.587874-16-stefanha@redhat.com | ||
7 | Message-Id: <20200120141858.587874-16-stefanha@redhat.com> | ||
8 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
9 | --- | ||
10 | tests/qemu-iotests/028 | 2 +- | ||
11 | tests/qemu-iotests/058 | 2 +- | ||
12 | tests/qemu-iotests/089 | 4 ++-- | ||
13 | tests/qemu-iotests/091 | 4 ++-- | ||
14 | tests/qemu-iotests/109 | 2 +- | ||
15 | tests/qemu-iotests/147 | 5 +++-- | ||
16 | tests/qemu-iotests/181 | 8 ++++---- | ||
17 | tests/qemu-iotests/183 | 4 ++-- | ||
18 | tests/qemu-iotests/185 | 10 +++++----- | ||
19 | tests/qemu-iotests/200 | 2 +- | ||
20 | tests/qemu-iotests/201 | 8 ++++---- | ||
21 | 11 files changed, 26 insertions(+), 25 deletions(-) | ||
22 | |||
23 | diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028 | ||
24 | index XXXXXXX..XXXXXXX 100755 | ||
25 | --- a/tests/qemu-iotests/028 | ||
26 | +++ b/tests/qemu-iotests/028 | ||
27 | @@ -XXX,XX +XXX,XX @@ echo block-backup | ||
28 | echo | ||
29 | |||
30 | qemu_comm_method="monitor" | ||
31 | -_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk | ||
32 | +_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=${AIOMODE},id=disk | ||
33 | h=$QEMU_HANDLE | ||
34 | if [ "${VALGRIND_QEMU}" == "y" ]; then | ||
35 | QEMU_COMM_TIMEOUT=7 | ||
36 | diff --git a/tests/qemu-iotests/058 b/tests/qemu-iotests/058 | ||
37 | index XXXXXXX..XXXXXXX 100755 | ||
38 | --- a/tests/qemu-iotests/058 | ||
39 | +++ b/tests/qemu-iotests/058 | ||
40 | @@ -XXX,XX +XXX,XX @@ nbd_snapshot_img="nbd:unix:$nbd_unix_socket" | ||
41 | converted_image=$TEST_IMG.converted | ||
42 | |||
43 | # Use -f raw instead of -f $IMGFMT for the NBD connection | ||
44 | -QEMU_IO_NBD="$QEMU_IO -f raw --cache=$CACHEMODE" | ||
45 | +QEMU_IO_NBD="$QEMU_IO -f raw --cache=$CACHEMODE --aio=$AIOMODE" | ||
46 | |||
47 | echo | ||
48 | echo "== preparing image ==" | ||
49 | diff --git a/tests/qemu-iotests/089 b/tests/qemu-iotests/089 | ||
50 | index XXXXXXX..XXXXXXX 100755 | ||
51 | --- a/tests/qemu-iotests/089 | ||
52 | +++ b/tests/qemu-iotests/089 | ||
53 | @@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'write -P 42 0 512' -c 'write -P 23 512 512' \ | ||
54 | |||
55 | $QEMU_IMG convert -f raw -O $IMGFMT "$TEST_IMG.base" "$TEST_IMG" | ||
56 | |||
57 | -$QEMU_IO_PROG --cache $CACHEMODE \ | ||
58 | +$QEMU_IO_PROG --cache $CACHEMODE --aio $AIOMODE \ | ||
59 | -c 'read -P 42 0 512' -c 'read -P 23 512 512' \ | ||
60 | -c 'read -P 66 1024 512' "json:{ | ||
61 | \"driver\": \"$IMGFMT\", | ||
62 | @@ -XXX,XX +XXX,XX @@ $QEMU_IO -c 'write -P 42 0x38000 512' "$TEST_IMG" | _filter_qemu_io | ||
63 | |||
64 | # The "image.filename" part tests whether "a": { "b": "c" } and "a.b": "c" do | ||
65 | # the same (which they should). | ||
66 | -$QEMU_IO_PROG --cache $CACHEMODE \ | ||
67 | +$QEMU_IO_PROG --cache $CACHEMODE --aio $AIOMODE \ | ||
68 | -c 'read -P 42 0x38000 512' "json:{ | ||
69 | \"driver\": \"$IMGFMT\", | ||
70 | \"file\": { | ||
71 | diff --git a/tests/qemu-iotests/091 b/tests/qemu-iotests/091 | ||
72 | index XXXXXXX..XXXXXXX 100755 | ||
73 | --- a/tests/qemu-iotests/091 | ||
74 | +++ b/tests/qemu-iotests/091 | ||
75 | @@ -XXX,XX +XXX,XX @@ echo === Starting QEMU VM1 === | ||
76 | echo | ||
77 | |||
78 | qemu_comm_method="monitor" | ||
79 | -_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk | ||
80 | +_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=${AIOMODE},id=disk | ||
81 | h1=$QEMU_HANDLE | ||
82 | |||
83 | echo | ||
84 | echo === Starting QEMU VM2 === | ||
85 | echo | ||
86 | -_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk \ | ||
87 | +_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=${AIOMODE},id=disk \ | ||
88 | -incoming "exec: cat '${MIG_FIFO}'" | ||
89 | h2=$QEMU_HANDLE | ||
90 | |||
91 | diff --git a/tests/qemu-iotests/109 b/tests/qemu-iotests/109 | ||
92 | index XXXXXXX..XXXXXXX 100755 | ||
93 | --- a/tests/qemu-iotests/109 | ||
94 | +++ b/tests/qemu-iotests/109 | ||
95 | @@ -XXX,XX +XXX,XX @@ run_qemu() | ||
96 | local qmp_format="$3" | ||
97 | local qmp_event="$4" | ||
98 | |||
99 | - _launch_qemu -drive file="${source_img}",format=raw,cache=${CACHEMODE},id=src | ||
100 | + _launch_qemu -drive file="${source_img}",format=raw,cache=${CACHEMODE},aio=${AIOMODE},id=src | ||
101 | _send_qemu_cmd $QEMU_HANDLE "{ 'execute': 'qmp_capabilities' }" "return" | ||
102 | |||
103 | _send_qemu_cmd $QEMU_HANDLE \ | ||
104 | diff --git a/tests/qemu-iotests/147 b/tests/qemu-iotests/147 | ||
105 | index XXXXXXX..XXXXXXX 100755 | ||
106 | --- a/tests/qemu-iotests/147 | ||
107 | +++ b/tests/qemu-iotests/147 | ||
108 | @@ -XXX,XX +XXX,XX @@ import socket | ||
109 | import stat | ||
110 | import time | ||
111 | import iotests | ||
112 | -from iotests import cachemode, imgfmt, qemu_img, qemu_nbd, qemu_nbd_early_pipe | ||
113 | +from iotests import cachemode, aiomode, imgfmt, qemu_img, qemu_nbd, qemu_nbd_early_pipe | ||
114 | |||
115 | NBD_PORT_START = 32768 | ||
116 | NBD_PORT_END = NBD_PORT_START + 1024 | ||
117 | @@ -XXX,XX +XXX,XX @@ class BuiltinNBD(NBDBlockdevAddBase): | ||
118 | self.server.add_drive_raw('if=none,id=nbd-export,' + | ||
119 | 'file=%s,' % test_img + | ||
120 | 'format=%s,' % imgfmt + | ||
121 | - 'cache=%s' % cachemode) | ||
122 | + 'cache=%s' % cachemode + | ||
123 | + 'aio=%s' % aiomode) | ||
124 | self.server.launch() | ||
125 | |||
126 | def tearDown(self): | ||
127 | diff --git a/tests/qemu-iotests/181 b/tests/qemu-iotests/181 | ||
128 | index XXXXXXX..XXXXXXX 100755 | ||
129 | --- a/tests/qemu-iotests/181 | ||
130 | +++ b/tests/qemu-iotests/181 | ||
131 | @@ -XXX,XX +XXX,XX @@ qemu_comm_method="monitor" | ||
132 | |||
133 | if [ "$IMGOPTSSYNTAX" = "true" ]; then | ||
134 | _launch_qemu \ | ||
135 | - -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk | ||
136 | + -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk | ||
137 | else | ||
138 | _launch_qemu \ | ||
139 | - -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk | ||
140 | + -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk | ||
141 | fi | ||
142 | src=$QEMU_HANDLE | ||
143 | |||
144 | if [ "$IMGOPTSSYNTAX" = "true" ]; then | ||
145 | _launch_qemu \ | ||
146 | - -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk \ | ||
147 | + -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk \ | ||
148 | -incoming "unix:${MIG_SOCKET}" | ||
149 | else | ||
150 | _launch_qemu \ | ||
151 | - -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk \ | ||
152 | + -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk \ | ||
153 | -incoming "unix:${MIG_SOCKET}" | ||
154 | fi | ||
155 | dest=$QEMU_HANDLE | ||
156 | diff --git a/tests/qemu-iotests/183 b/tests/qemu-iotests/183 | ||
157 | index XXXXXXX..XXXXXXX 100755 | ||
158 | --- a/tests/qemu-iotests/183 | ||
159 | +++ b/tests/qemu-iotests/183 | ||
160 | @@ -XXX,XX +XXX,XX @@ echo | ||
161 | qemu_comm_method="qmp" | ||
162 | |||
163 | _launch_qemu \ | ||
164 | - -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk | ||
165 | + -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk | ||
166 | src=$QEMU_HANDLE | ||
167 | _send_qemu_cmd $src "{ 'execute': 'qmp_capabilities' }" 'return' | ||
168 | |||
169 | _launch_qemu \ | ||
170 | - -drive file="${TEST_IMG}.dest",cache=$CACHEMODE,driver=$IMGFMT,id=disk \ | ||
171 | + -drive file="${TEST_IMG}.dest",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk \ | ||
172 | -incoming "unix:${MIG_SOCKET}" | ||
173 | dest=$QEMU_HANDLE | ||
174 | _send_qemu_cmd $dest "{ 'execute': 'qmp_capabilities' }" 'return' | ||
175 | diff --git a/tests/qemu-iotests/185 b/tests/qemu-iotests/185 | ||
176 | index XXXXXXX..XXXXXXX 100755 | ||
177 | --- a/tests/qemu-iotests/185 | ||
178 | +++ b/tests/qemu-iotests/185 | ||
179 | @@ -XXX,XX +XXX,XX @@ echo | ||
180 | qemu_comm_method="qmp" | ||
181 | |||
182 | _launch_qemu \ | ||
183 | - -drive file="${TEST_IMG}.base",cache=$CACHEMODE,driver=$IMGFMT,id=disk | ||
184 | + -drive file="${TEST_IMG}.base",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk | ||
185 | h=$QEMU_HANDLE | ||
186 | _send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' | ||
187 | |||
188 | @@ -XXX,XX +XXX,XX @@ echo === Start active commit job and exit qemu === | ||
189 | echo | ||
190 | |||
191 | _launch_qemu \ | ||
192 | - -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk | ||
193 | + -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk | ||
194 | h=$QEMU_HANDLE | ||
195 | _send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' | ||
196 | |||
197 | @@ -XXX,XX +XXX,XX @@ echo === Start mirror job and exit qemu === | ||
198 | echo | ||
199 | |||
200 | _launch_qemu \ | ||
201 | - -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk | ||
202 | + -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk | ||
203 | h=$QEMU_HANDLE | ||
204 | _send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' | ||
205 | |||
206 | @@ -XXX,XX +XXX,XX @@ echo === Start backup job and exit qemu === | ||
207 | echo | ||
208 | |||
209 | _launch_qemu \ | ||
210 | - -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk | ||
211 | + -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk | ||
212 | h=$QEMU_HANDLE | ||
213 | _send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' | ||
214 | |||
215 | @@ -XXX,XX +XXX,XX @@ echo === Start streaming job and exit qemu === | ||
216 | echo | ||
217 | |||
218 | _launch_qemu \ | ||
219 | - -drive file="${TEST_IMG}",cache=$CACHEMODE,driver=$IMGFMT,id=disk | ||
220 | + -drive file="${TEST_IMG}",cache=$CACHEMODE,aio=$AIOMODE,driver=$IMGFMT,id=disk | ||
221 | h=$QEMU_HANDLE | ||
222 | _send_qemu_cmd $h "{ 'execute': 'qmp_capabilities' }" 'return' | ||
223 | |||
224 | diff --git a/tests/qemu-iotests/200 b/tests/qemu-iotests/200 | ||
225 | index XXXXXXX..XXXXXXX 100755 | ||
226 | --- a/tests/qemu-iotests/200 | ||
227 | +++ b/tests/qemu-iotests/200 | ||
228 | @@ -XXX,XX +XXX,XX @@ echo === Starting QEMU VM === | ||
229 | echo | ||
230 | qemu_comm_method="qmp" | ||
231 | _launch_qemu -object iothread,id=iothread0 $virtio_scsi \ | ||
232 | - -drive file="${TEST_IMG}",media=disk,if=none,cache=$CACHEMODE,id=drive_sysdisk,format=$IMGFMT \ | ||
233 | + -drive file="${TEST_IMG}",media=disk,if=none,cache=$CACHEMODE,aio=$AIOMODE,id=drive_sysdisk,format=$IMGFMT \ | ||
234 | -device scsi-hd,drive=drive_sysdisk,bus=scsi0.0,id=sysdisk,bootindex=0 | ||
235 | h1=$QEMU_HANDLE | ||
236 | |||
237 | diff --git a/tests/qemu-iotests/201 b/tests/qemu-iotests/201 | ||
238 | index XXXXXXX..XXXXXXX 100755 | ||
239 | --- a/tests/qemu-iotests/201 | ||
240 | +++ b/tests/qemu-iotests/201 | ||
241 | @@ -XXX,XX +XXX,XX @@ qemu_comm_method="monitor" | ||
242 | |||
243 | if [ "$IMGOPTSSYNTAX" = "true" ]; then | ||
244 | _launch_qemu \ | ||
245 | - -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk | ||
246 | + -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk | ||
247 | else | ||
248 | _launch_qemu \ | ||
249 | - -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk | ||
250 | + -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk | ||
251 | fi | ||
252 | src=$QEMU_HANDLE | ||
253 | |||
254 | if [ "$IMGOPTSSYNTAX" = "true" ]; then | ||
255 | _launch_qemu \ | ||
256 | - -drive "${TEST_IMG}",cache=${CACHEMODE},id=disk \ | ||
257 | + -drive "${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,id=disk \ | ||
258 | -incoming "unix:${MIG_SOCKET}" | ||
259 | else | ||
260 | _launch_qemu \ | ||
261 | - -drive file="${TEST_IMG}",cache=${CACHEMODE},driver=$IMGFMT,id=disk \ | ||
262 | + -drive file="${TEST_IMG}",cache=${CACHEMODE},aio=$AIOMODE,driver=$IMGFMT,id=disk \ | ||
263 | -incoming "unix:${MIG_SOCKET}" | ||
264 | fi | ||
265 | dest=$QEMU_HANDLE | ||
266 | -- | ||
267 | 2.24.1 | ||
268 | |||
269 | diff view generated by jsdifflib |