1 | The following changes since commit dac03af5d5482ec7ee9c23db467bb7230b33c0d9: | 1 | The following changes since commit 711c0418c8c1ce3a24346f058b001c4c5a2f0f81: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/rth/tags/pull-axp-20190825' into staging (2019-08-27 10:00:51 +0100) | 3 | Merge remote-tracking branch 'remotes/philmd/tags/mips-20210702' into staging (2021-07-04 14:04:12 +0100) |
4 | 4 | ||
5 | are available in the Git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | https://github.com/stefanha/qemu.git tags/block-pull-request | 7 | https://gitlab.com/stefanha/qemu.git tags/block-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 5396234b96a2ac743f48644529771498e036e698: | 9 | for you to fetch changes up to 9f460c64e13897117f35ffb61f6f5e0102cabc70: |
10 | 10 | ||
11 | block/qcow2: implement .bdrv_co_pwritev(_compressed)_part (2019-08-27 14:58:42 +0100) | 11 | block/io: Merge discard request alignments (2021-07-06 14:28:55 +0100) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Pull request | 14 | Pull request |
15 | 15 | ||
16 | ---------------------------------------------------------------- | 16 | ---------------------------------------------------------------- |
17 | 17 | ||
18 | Vladimir Sementsov-Ogievskiy (12): | 18 | Akihiko Odaki (3): |
19 | util/iov: introduce qemu_iovec_init_extended | 19 | block/file-posix: Optimize for macOS |
20 | util/iov: improve qemu_iovec_is_zero | 20 | block: Add backend_defaults property |
21 | block/io: refactor padding | 21 | block/io: Merge discard request alignments |
22 | block: define .*_part io handlers in BlockDriver | ||
23 | block/io: bdrv_co_do_copy_on_readv: use and support qiov_offset | ||
24 | block/io: bdrv_co_do_copy_on_readv: lazy allocation | ||
25 | block/io: bdrv_aligned_preadv: use and support qiov_offset | ||
26 | block/io: bdrv_aligned_pwritev: use and support qiov_offset | ||
27 | block/io: introduce bdrv_co_p{read, write}v_part | ||
28 | block/qcow2: refactor qcow2_co_preadv to use buffer-based io | ||
29 | block/qcow2: implement .bdrv_co_preadv_part | ||
30 | block/qcow2: implement .bdrv_co_pwritev(_compressed)_part | ||
31 | 22 | ||
32 | block/qcow2.h | 1 + | 23 | Stefan Hajnoczi (2): |
33 | include/block/block_int.h | 21 ++ | 24 | util/async: add a human-readable name to BHs for debugging |
34 | include/qemu/iov.h | 10 +- | 25 | util/async: print leaked BH name when AioContext finalizes |
35 | block/backup.c | 2 +- | 26 | |
36 | block/io.c | 541 +++++++++++++++++++++++--------------- | 27 | include/block/aio.h | 31 ++++++++++++++++++++++--- |
37 | block/qcow2-cluster.c | 14 +- | 28 | include/hw/block/block.h | 3 +++ |
38 | block/qcow2.c | 131 +++++---- | 29 | include/qemu/main-loop.h | 4 +++- |
39 | qemu-img.c | 4 +- | 30 | block/file-posix.c | 27 ++++++++++++++++++++-- |
40 | util/iov.c | 153 +++++++++-- | 31 | block/io.c | 2 ++ |
41 | 9 files changed, 568 insertions(+), 309 deletions(-) | 32 | hw/block/block.c | 42 ++++++++++++++++++++++++++++++---- |
33 | tests/unit/ptimer-test-stubs.c | 2 +- | ||
34 | util/async.c | 25 ++++++++++++++++---- | ||
35 | util/main-loop.c | 4 ++-- | ||
36 | tests/qemu-iotests/172.out | 38 ++++++++++++++++++++++++++++++ | ||
37 | 10 files changed, 161 insertions(+), 17 deletions(-) | ||
42 | 38 | ||
43 | -- | 39 | -- |
44 | 2.21.0 | 40 | 2.31.1 |
45 | 41 | ||
46 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Introduce new initialization API, to create requests with padding. Will | ||
4 | be used in the following patch. New API uses qemu_iovec_init_buf if | ||
5 | resulting io vector has only one element, to avoid extra allocations. | ||
6 | So, we need to update qemu_iovec_destroy to support destroying such | ||
7 | QIOVs. | ||
8 | |||
9 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
10 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Message-id: 20190604161514.262241-2-vsementsov@virtuozzo.com | ||
12 | Message-Id: <20190604161514.262241-2-vsementsov@virtuozzo.com> | ||
13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
14 | --- | ||
15 | include/qemu/iov.h | 7 +++ | ||
16 | util/iov.c | 112 +++++++++++++++++++++++++++++++++++++++++++-- | ||
17 | 2 files changed, 114 insertions(+), 5 deletions(-) | ||
18 | |||
19 | diff --git a/include/qemu/iov.h b/include/qemu/iov.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/include/qemu/iov.h | ||
22 | +++ b/include/qemu/iov.h | ||
23 | @@ -XXX,XX +XXX,XX @@ static inline void *qemu_iovec_buf(QEMUIOVector *qiov) | ||
24 | |||
25 | void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint); | ||
26 | void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov); | ||
27 | +void qemu_iovec_init_extended( | ||
28 | + QEMUIOVector *qiov, | ||
29 | + void *head_buf, size_t head_len, | ||
30 | + QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, | ||
31 | + void *tail_buf, size_t tail_len); | ||
32 | +void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, | ||
33 | + size_t offset, size_t len); | ||
34 | void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); | ||
35 | void qemu_iovec_concat(QEMUIOVector *dst, | ||
36 | QEMUIOVector *src, size_t soffset, size_t sbytes); | ||
37 | diff --git a/util/iov.c b/util/iov.c | ||
38 | index XXXXXXX..XXXXXXX 100644 | ||
39 | --- a/util/iov.c | ||
40 | +++ b/util/iov.c | ||
41 | @@ -XXX,XX +XXX,XX @@ void qemu_iovec_concat(QEMUIOVector *dst, | ||
42 | qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes); | ||
43 | } | ||
44 | |||
45 | +/* | ||
46 | + * qiov_find_iov | ||
47 | + * | ||
48 | + * Return pointer to iovec structure, where byte at @offset in original vector | ||
49 | + * @iov exactly is. | ||
50 | + * Set @remaining_offset to be offset inside that iovec to the same byte. | ||
51 | + */ | ||
52 | +static struct iovec *iov_skip_offset(struct iovec *iov, size_t offset, | ||
53 | + size_t *remaining_offset) | ||
54 | +{ | ||
55 | + while (offset > 0 && offset >= iov->iov_len) { | ||
56 | + offset -= iov->iov_len; | ||
57 | + iov++; | ||
58 | + } | ||
59 | + *remaining_offset = offset; | ||
60 | + | ||
61 | + return iov; | ||
62 | +} | ||
63 | + | ||
64 | +/* | ||
65 | + * qiov_slice | ||
66 | + * | ||
67 | + * Find subarray of iovec's, containing requested range. @head would | ||
68 | + * be offset in first iov (returned by the function), @tail would be | ||
69 | + * count of extra bytes in last iovec (returned iov + @niov - 1). | ||
70 | + */ | ||
71 | +static struct iovec *qiov_slice(QEMUIOVector *qiov, | ||
72 | + size_t offset, size_t len, | ||
73 | + size_t *head, size_t *tail, int *niov) | ||
74 | +{ | ||
75 | + struct iovec *iov, *end_iov; | ||
76 | + | ||
77 | + assert(offset + len <= qiov->size); | ||
78 | + | ||
79 | + iov = iov_skip_offset(qiov->iov, offset, head); | ||
80 | + end_iov = iov_skip_offset(iov, *head + len, tail); | ||
81 | + | ||
82 | + if (*tail > 0) { | ||
83 | + assert(*tail < end_iov->iov_len); | ||
84 | + *tail = end_iov->iov_len - *tail; | ||
85 | + end_iov++; | ||
86 | + } | ||
87 | + | ||
88 | + *niov = end_iov - iov; | ||
89 | + | ||
90 | + return iov; | ||
91 | +} | ||
92 | + | ||
93 | +/* | ||
94 | + * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, | ||
95 | + * and @tail_buf buffer into new qiov. | ||
96 | + */ | ||
97 | +void qemu_iovec_init_extended( | ||
98 | + QEMUIOVector *qiov, | ||
99 | + void *head_buf, size_t head_len, | ||
100 | + QEMUIOVector *mid_qiov, size_t mid_offset, size_t mid_len, | ||
101 | + void *tail_buf, size_t tail_len) | ||
102 | +{ | ||
103 | + size_t mid_head, mid_tail; | ||
104 | + int total_niov, mid_niov = 0; | ||
105 | + struct iovec *p, *mid_iov; | ||
106 | + | ||
107 | + if (mid_len) { | ||
108 | + mid_iov = qiov_slice(mid_qiov, mid_offset, mid_len, | ||
109 | + &mid_head, &mid_tail, &mid_niov); | ||
110 | + } | ||
111 | + | ||
112 | + total_niov = !!head_len + mid_niov + !!tail_len; | ||
113 | + if (total_niov == 1) { | ||
114 | + qemu_iovec_init_buf(qiov, NULL, 0); | ||
115 | + p = &qiov->local_iov; | ||
116 | + } else { | ||
117 | + qiov->niov = qiov->nalloc = total_niov; | ||
118 | + qiov->size = head_len + mid_len + tail_len; | ||
119 | + p = qiov->iov = g_new(struct iovec, qiov->niov); | ||
120 | + } | ||
121 | + | ||
122 | + if (head_len) { | ||
123 | + p->iov_base = head_buf; | ||
124 | + p->iov_len = head_len; | ||
125 | + p++; | ||
126 | + } | ||
127 | + | ||
128 | + if (mid_len) { | ||
129 | + memcpy(p, mid_iov, mid_niov * sizeof(*p)); | ||
130 | + p[0].iov_base = (uint8_t *)p[0].iov_base + mid_head; | ||
131 | + p[0].iov_len -= mid_head; | ||
132 | + p[mid_niov - 1].iov_len -= mid_tail; | ||
133 | + p += mid_niov; | ||
134 | + } | ||
135 | + | ||
136 | + if (tail_len) { | ||
137 | + p->iov_base = tail_buf; | ||
138 | + p->iov_len = tail_len; | ||
139 | + } | ||
140 | +} | ||
141 | + | ||
142 | /* | ||
143 | * Check if the contents of the iovecs are all zero | ||
144 | */ | ||
145 | @@ -XXX,XX +XXX,XX @@ bool qemu_iovec_is_zero(QEMUIOVector *qiov) | ||
146 | return true; | ||
147 | } | ||
148 | |||
149 | +void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, | ||
150 | + size_t offset, size_t len) | ||
151 | +{ | ||
152 | + qemu_iovec_init_extended(qiov, NULL, 0, source, offset, len, NULL, 0); | ||
153 | +} | ||
154 | + | ||
155 | void qemu_iovec_destroy(QEMUIOVector *qiov) | ||
156 | { | ||
157 | - assert(qiov->nalloc != -1); | ||
158 | + if (qiov->nalloc != -1) { | ||
159 | + g_free(qiov->iov); | ||
160 | + } | ||
161 | |||
162 | - qemu_iovec_reset(qiov); | ||
163 | - g_free(qiov->iov); | ||
164 | - qiov->nalloc = 0; | ||
165 | - qiov->iov = NULL; | ||
166 | + memset(qiov, 0, sizeof(*qiov)); | ||
167 | } | ||
168 | |||
169 | void qemu_iovec_reset(QEMUIOVector *qiov) | ||
170 | -- | ||
171 | 2.21.0 | ||
172 | |||
173 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | We'll need to check a part of qiov soon, so implement it now. | ||
4 | |||
5 | Optimization with align down to 4 * sizeof(long) is dropped due to: | ||
6 | 1. It is strange: it aligns length of the buffer, but where is a | ||
7 | guarantee that buffer pointer is aligned itself? | ||
8 | 2. buffer_is_zero() is a better place for optimizations and it has | ||
9 | them. | ||
10 | |||
11 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
12 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Message-id: 20190604161514.262241-3-vsementsov@virtuozzo.com | ||
14 | Message-Id: <20190604161514.262241-3-vsementsov@virtuozzo.com> | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
16 | --- | ||
17 | include/qemu/iov.h | 2 +- | ||
18 | block/io.c | 2 +- | ||
19 | util/iov.c | 31 +++++++++++++++++++------------ | ||
20 | 3 files changed, 21 insertions(+), 14 deletions(-) | ||
21 | |||
22 | diff --git a/include/qemu/iov.h b/include/qemu/iov.h | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/include/qemu/iov.h | ||
25 | +++ b/include/qemu/iov.h | ||
26 | @@ -XXX,XX +XXX,XX @@ void qemu_iovec_concat(QEMUIOVector *dst, | ||
27 | size_t qemu_iovec_concat_iov(QEMUIOVector *dst, | ||
28 | struct iovec *src_iov, unsigned int src_cnt, | ||
29 | size_t soffset, size_t sbytes); | ||
30 | -bool qemu_iovec_is_zero(QEMUIOVector *qiov); | ||
31 | +bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t qiov_offeset, size_t bytes); | ||
32 | void qemu_iovec_destroy(QEMUIOVector *qiov); | ||
33 | void qemu_iovec_reset(QEMUIOVector *qiov); | ||
34 | size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, | ||
35 | diff --git a/block/io.c b/block/io.c | ||
36 | index XXXXXXX..XXXXXXX 100644 | ||
37 | --- a/block/io.c | ||
38 | +++ b/block/io.c | ||
39 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, | ||
40 | |||
41 | if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && | ||
42 | !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes && | ||
43 | - qemu_iovec_is_zero(qiov)) { | ||
44 | + qemu_iovec_is_zero(qiov, 0, qiov->size)) { | ||
45 | flags |= BDRV_REQ_ZERO_WRITE; | ||
46 | if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { | ||
47 | flags |= BDRV_REQ_MAY_UNMAP; | ||
48 | diff --git a/util/iov.c b/util/iov.c | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/util/iov.c | ||
51 | +++ b/util/iov.c | ||
52 | @@ -XXX,XX +XXX,XX @@ void qemu_iovec_init_extended( | ||
53 | } | ||
54 | |||
55 | /* | ||
56 | - * Check if the contents of the iovecs are all zero | ||
57 | + * Check if the contents of subrange of qiov data is all zeroes. | ||
58 | */ | ||
59 | -bool qemu_iovec_is_zero(QEMUIOVector *qiov) | ||
60 | +bool qemu_iovec_is_zero(QEMUIOVector *qiov, size_t offset, size_t bytes) | ||
61 | { | ||
62 | - int i; | ||
63 | - for (i = 0; i < qiov->niov; i++) { | ||
64 | - size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long)); | ||
65 | - uint8_t *ptr = qiov->iov[i].iov_base; | ||
66 | - if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) { | ||
67 | + struct iovec *iov; | ||
68 | + size_t current_offset; | ||
69 | + | ||
70 | + assert(offset + bytes <= qiov->size); | ||
71 | + | ||
72 | + iov = iov_skip_offset(qiov->iov, offset, ¤t_offset); | ||
73 | + | ||
74 | + while (bytes) { | ||
75 | + uint8_t *base = (uint8_t *)iov->iov_base + current_offset; | ||
76 | + size_t len = MIN(iov->iov_len - current_offset, bytes); | ||
77 | + | ||
78 | + if (!buffer_is_zero(base, len)) { | ||
79 | return false; | ||
80 | } | ||
81 | - for (; offs < qiov->iov[i].iov_len; offs++) { | ||
82 | - if (ptr[offs]) { | ||
83 | - return false; | ||
84 | - } | ||
85 | - } | ||
86 | + | ||
87 | + current_offset = 0; | ||
88 | + bytes -= len; | ||
89 | + iov++; | ||
90 | } | ||
91 | + | ||
92 | return true; | ||
93 | } | ||
94 | |||
95 | -- | ||
96 | 2.21.0 | ||
97 | |||
98 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | It can be difficult to debug issues with BHs in production environments. |
---|---|---|---|
2 | Although BHs can usually be identified by looking up their ->cb() | ||
3 | function pointer, this requires debug information for the program. It is | ||
4 | also not possible to print human-readable diagnostics about BHs because | ||
5 | they have no identifier. | ||
2 | 6 | ||
3 | Implement and use new interface to get rid of hd_qiov. | 7 | This patch adds a name to each BH. The name is not unique per instance |
8 | but differentiates between cb() functions, which is usually enough. It's | ||
9 | done by changing aio_bh_new() and friends to macros that stringify cb. | ||
4 | 10 | ||
5 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 11 | The next patch will use the name field when reporting leaked BHs. |
6 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | 12 | |
7 | Message-id: 20190604161514.262241-13-vsementsov@virtuozzo.com | ||
8 | Message-Id: <20190604161514.262241-13-vsementsov@virtuozzo.com> | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 13 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
14 | Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
15 | Message-Id: <20210414200247.917496-2-stefanha@redhat.com> | ||
10 | --- | 16 | --- |
11 | block/qcow2.h | 1 + | 17 | include/block/aio.h | 31 ++++++++++++++++++++++++++++--- |
12 | include/qemu/iov.h | 1 + | 18 | include/qemu/main-loop.h | 4 +++- |
13 | block/qcow2-cluster.c | 9 ++++--- | 19 | tests/unit/ptimer-test-stubs.c | 2 +- |
14 | block/qcow2.c | 60 +++++++++++++++++++++---------------------- | 20 | util/async.c | 9 +++++++-- |
15 | util/iov.c | 10 ++++++++ | 21 | util/main-loop.c | 4 ++-- |
16 | 5 files changed, 48 insertions(+), 33 deletions(-) | 22 | 5 files changed, 41 insertions(+), 9 deletions(-) |
17 | 23 | ||
18 | diff --git a/block/qcow2.h b/block/qcow2.h | 24 | diff --git a/include/block/aio.h b/include/block/aio.h |
19 | index XXXXXXX..XXXXXXX 100644 | 25 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/block/qcow2.h | 26 | --- a/include/block/aio.h |
21 | +++ b/block/qcow2.h | 27 | +++ b/include/block/aio.h |
22 | @@ -XXX,XX +XXX,XX @@ typedef struct QCowL2Meta | 28 | @@ -XXX,XX +XXX,XX @@ void aio_context_acquire(AioContext *ctx); |
23 | * from @cow_start and @cow_end into one single write operation. | 29 | /* Relinquish ownership of the AioContext. */ |
24 | */ | 30 | void aio_context_release(AioContext *ctx); |
25 | QEMUIOVector *data_qiov; | 31 | |
26 | + size_t data_qiov_offset; | 32 | +/** |
27 | 33 | + * aio_bh_schedule_oneshot_full: Allocate a new bottom half structure that will | |
28 | /** Pointer to next L2Meta of the same write request */ | 34 | + * run only once and as soon as possible. |
29 | struct QCowL2Meta *next; | 35 | + * |
30 | diff --git a/include/qemu/iov.h b/include/qemu/iov.h | 36 | + * @name: A human-readable identifier for debugging purposes. |
37 | + */ | ||
38 | +void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, | ||
39 | + const char *name); | ||
40 | + | ||
41 | /** | ||
42 | * aio_bh_schedule_oneshot: Allocate a new bottom half structure that will run | ||
43 | * only once and as soon as possible. | ||
44 | + * | ||
45 | + * A convenience wrapper for aio_bh_schedule_oneshot_full() that uses cb as the | ||
46 | + * name string. | ||
47 | */ | ||
48 | -void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque); | ||
49 | +#define aio_bh_schedule_oneshot(ctx, cb, opaque) \ | ||
50 | + aio_bh_schedule_oneshot_full((ctx), (cb), (opaque), (stringify(cb))) | ||
51 | |||
52 | /** | ||
53 | - * aio_bh_new: Allocate a new bottom half structure. | ||
54 | + * aio_bh_new_full: Allocate a new bottom half structure. | ||
55 | * | ||
56 | * Bottom halves are lightweight callbacks whose invocation is guaranteed | ||
57 | * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure | ||
58 | * is opaque and must be allocated prior to its use. | ||
59 | + * | ||
60 | + * @name: A human-readable identifier for debugging purposes. | ||
61 | */ | ||
62 | -QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); | ||
63 | +QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, | ||
64 | + const char *name); | ||
65 | + | ||
66 | +/** | ||
67 | + * aio_bh_new: Allocate a new bottom half structure | ||
68 | + * | ||
69 | + * A convenience wrapper for aio_bh_new_full() that uses the cb as the name | ||
70 | + * string. | ||
71 | + */ | ||
72 | +#define aio_bh_new(ctx, cb, opaque) \ | ||
73 | + aio_bh_new_full((ctx), (cb), (opaque), (stringify(cb))) | ||
74 | |||
75 | /** | ||
76 | * aio_notify: Force processing of pending events. | ||
77 | diff --git a/include/qemu/main-loop.h b/include/qemu/main-loop.h | ||
31 | index XXXXXXX..XXXXXXX 100644 | 78 | index XXXXXXX..XXXXXXX 100644 |
32 | --- a/include/qemu/iov.h | 79 | --- a/include/qemu/main-loop.h |
33 | +++ b/include/qemu/iov.h | 80 | +++ b/include/qemu/main-loop.h |
34 | @@ -XXX,XX +XXX,XX @@ void qemu_iovec_init_extended( | 81 | @@ -XXX,XX +XXX,XX @@ void qemu_cond_timedwait_iothread(QemuCond *cond, int ms); |
35 | void *tail_buf, size_t tail_len); | 82 | |
36 | void qemu_iovec_init_slice(QEMUIOVector *qiov, QEMUIOVector *source, | 83 | void qemu_fd_register(int fd); |
37 | size_t offset, size_t len); | 84 | |
38 | +int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len); | 85 | -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque); |
39 | void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len); | 86 | +#define qemu_bh_new(cb, opaque) \ |
40 | void qemu_iovec_concat(QEMUIOVector *dst, | 87 | + qemu_bh_new_full((cb), (opaque), (stringify(cb))) |
41 | QEMUIOVector *src, size_t soffset, size_t sbytes); | 88 | +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name); |
42 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c | 89 | void qemu_bh_schedule_idle(QEMUBH *bh); |
90 | |||
91 | enum { | ||
92 | diff --git a/tests/unit/ptimer-test-stubs.c b/tests/unit/ptimer-test-stubs.c | ||
43 | index XXXXXXX..XXXXXXX 100644 | 93 | index XXXXXXX..XXXXXXX 100644 |
44 | --- a/block/qcow2-cluster.c | 94 | --- a/tests/unit/ptimer-test-stubs.c |
45 | +++ b/block/qcow2-cluster.c | 95 | +++ b/tests/unit/ptimer-test-stubs.c |
46 | @@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) | 96 | @@ -XXX,XX +XXX,XX @@ int64_t qemu_clock_deadline_ns_all(QEMUClockType type, int attr_mask) |
47 | assert(start->nb_bytes <= UINT_MAX - end->nb_bytes); | 97 | return deadline; |
48 | assert(start->nb_bytes + end->nb_bytes <= UINT_MAX - data_bytes); | 98 | } |
49 | assert(start->offset + start->nb_bytes <= end->offset); | 99 | |
50 | - assert(!m->data_qiov || m->data_qiov->size == data_bytes); | 100 | -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) |
51 | 101 | +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) | |
52 | if ((start->nb_bytes == 0 && end->nb_bytes == 0) || m->skip_cow) { | 102 | { |
53 | return 0; | 103 | QEMUBH *bh = g_new(QEMUBH, 1); |
54 | @@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) | 104 | |
55 | /* The part of the buffer where the end region is located */ | 105 | diff --git a/util/async.c b/util/async.c |
56 | end_buffer = start_buffer + buffer_size - end->nb_bytes; | ||
57 | |||
58 | - qemu_iovec_init(&qiov, 2 + (m->data_qiov ? m->data_qiov->niov : 0)); | ||
59 | + qemu_iovec_init(&qiov, 2 + (m->data_qiov ? | ||
60 | + qemu_iovec_subvec_niov(m->data_qiov, | ||
61 | + m->data_qiov_offset, | ||
62 | + data_bytes) | ||
63 | + : 0)); | ||
64 | |||
65 | qemu_co_mutex_unlock(&s->lock); | ||
66 | /* First we read the existing data from both COW regions. We | ||
67 | @@ -XXX,XX +XXX,XX @@ static int perform_cow(BlockDriverState *bs, QCowL2Meta *m) | ||
68 | if (start->nb_bytes) { | ||
69 | qemu_iovec_add(&qiov, start_buffer, start->nb_bytes); | ||
70 | } | ||
71 | - qemu_iovec_concat(&qiov, m->data_qiov, 0, data_bytes); | ||
72 | + qemu_iovec_concat(&qiov, m->data_qiov, m->data_qiov_offset, data_bytes); | ||
73 | if (end->nb_bytes) { | ||
74 | qemu_iovec_add(&qiov, end_buffer, end->nb_bytes); | ||
75 | } | ||
76 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
77 | index XXXXXXX..XXXXXXX 100644 | 106 | index XXXXXXX..XXXXXXX 100644 |
78 | --- a/block/qcow2.c | 107 | --- a/util/async.c |
79 | +++ b/block/qcow2.c | 108 | +++ b/util/async.c |
80 | @@ -XXX,XX +XXX,XX @@ fail: | 109 | @@ -XXX,XX +XXX,XX @@ enum { |
81 | /* Check if it's possible to merge a write request with the writing of | 110 | |
82 | * the data from the COW regions */ | 111 | struct QEMUBH { |
83 | static bool merge_cow(uint64_t offset, unsigned bytes, | 112 | AioContext *ctx; |
84 | - QEMUIOVector *hd_qiov, QCowL2Meta *l2meta) | 113 | + const char *name; |
85 | + QEMUIOVector *qiov, size_t qiov_offset, | 114 | QEMUBHFunc *cb; |
86 | + QCowL2Meta *l2meta) | 115 | void *opaque; |
116 | QSLIST_ENTRY(QEMUBH) next; | ||
117 | @@ -XXX,XX +XXX,XX @@ static QEMUBH *aio_bh_dequeue(BHList *head, unsigned *flags) | ||
118 | return bh; | ||
119 | } | ||
120 | |||
121 | -void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) | ||
122 | +void aio_bh_schedule_oneshot_full(AioContext *ctx, QEMUBHFunc *cb, | ||
123 | + void *opaque, const char *name) | ||
87 | { | 124 | { |
88 | QCowL2Meta *m; | 125 | QEMUBH *bh; |
89 | 126 | bh = g_new(QEMUBH, 1); | |
90 | @@ -XXX,XX +XXX,XX @@ static bool merge_cow(uint64_t offset, unsigned bytes, | 127 | @@ -XXX,XX +XXX,XX @@ void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque) |
91 | 128 | .ctx = ctx, | |
92 | /* Make sure that adding both COW regions to the QEMUIOVector | 129 | .cb = cb, |
93 | * does not exceed IOV_MAX */ | 130 | .opaque = opaque, |
94 | - if (hd_qiov->niov > IOV_MAX - 2) { | 131 | + .name = name, |
95 | + if (qemu_iovec_subvec_niov(qiov, qiov_offset, bytes) > IOV_MAX - 2) { | 132 | }; |
96 | continue; | 133 | aio_bh_enqueue(bh, BH_SCHEDULED | BH_ONESHOT); |
97 | } | ||
98 | |||
99 | - m->data_qiov = hd_qiov; | ||
100 | + m->data_qiov = qiov; | ||
101 | + m->data_qiov_offset = qiov_offset; | ||
102 | return true; | ||
103 | } | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static int handle_alloc_space(BlockDriverState *bs, QCowL2Meta *l2meta) | ||
106 | return 0; | ||
107 | } | 134 | } |
108 | 135 | ||
109 | -static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, | 136 | -QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) |
110 | - uint64_t bytes, QEMUIOVector *qiov, | 137 | +QEMUBH *aio_bh_new_full(AioContext *ctx, QEMUBHFunc *cb, void *opaque, |
111 | - int flags) | 138 | + const char *name) |
112 | +static coroutine_fn int qcow2_co_pwritev_part( | ||
113 | + BlockDriverState *bs, uint64_t offset, uint64_t bytes, | ||
114 | + QEMUIOVector *qiov, size_t qiov_offset, int flags) | ||
115 | { | 139 | { |
116 | BDRVQcow2State *s = bs->opaque; | 140 | QEMUBH *bh; |
117 | int offset_in_cluster; | 141 | bh = g_new(QEMUBH, 1); |
118 | int ret; | 142 | @@ -XXX,XX +XXX,XX @@ QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque) |
119 | unsigned int cur_bytes; /* number of sectors in current iteration */ | 143 | .ctx = ctx, |
120 | uint64_t cluster_offset; | 144 | .cb = cb, |
121 | - QEMUIOVector hd_qiov; | 145 | .opaque = opaque, |
122 | + QEMUIOVector encrypted_qiov; | 146 | + .name = name, |
123 | uint64_t bytes_done = 0; | 147 | }; |
124 | uint8_t *cluster_data = NULL; | 148 | return bh; |
125 | QCowL2Meta *l2meta = NULL; | 149 | } |
126 | 150 | diff --git a/util/main-loop.c b/util/main-loop.c | |
127 | trace_qcow2_writev_start_req(qemu_coroutine_self(), offset, bytes); | 151 | index XXXXXXX..XXXXXXX 100644 |
128 | 152 | --- a/util/main-loop.c | |
129 | - qemu_iovec_init(&hd_qiov, qiov->niov); | 153 | +++ b/util/main-loop.c |
130 | - | 154 | @@ -XXX,XX +XXX,XX @@ void main_loop_wait(int nonblocking) |
131 | qemu_co_mutex_lock(&s->lock); | 155 | |
132 | 156 | /* Functions to operate on the main QEMU AioContext. */ | |
133 | while (bytes != 0) { | 157 | |
134 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, | 158 | -QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) |
135 | 159 | +QEMUBH *qemu_bh_new_full(QEMUBHFunc *cb, void *opaque, const char *name) | |
136 | qemu_co_mutex_unlock(&s->lock); | ||
137 | |||
138 | - qemu_iovec_reset(&hd_qiov); | ||
139 | - qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); | ||
140 | - | ||
141 | if (bs->encrypted) { | ||
142 | assert(s->crypto); | ||
143 | if (!cluster_data) { | ||
144 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, | ||
145 | } | ||
146 | } | ||
147 | |||
148 | - assert(hd_qiov.size <= | ||
149 | - QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); | ||
150 | - qemu_iovec_to_buf(&hd_qiov, 0, cluster_data, hd_qiov.size); | ||
151 | + assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); | ||
152 | + qemu_iovec_to_buf(qiov, qiov_offset + bytes_done, | ||
153 | + cluster_data, cur_bytes); | ||
154 | |||
155 | if (qcow2_co_encrypt(bs, cluster_offset, offset, | ||
156 | cluster_data, cur_bytes) < 0) { | ||
157 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, | ||
158 | goto out_unlocked; | ||
159 | } | ||
160 | |||
161 | - qemu_iovec_reset(&hd_qiov); | ||
162 | - qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); | ||
163 | + qemu_iovec_init_buf(&encrypted_qiov, cluster_data, cur_bytes); | ||
164 | } | ||
165 | |||
166 | /* Try to efficiently initialize the physical space with zeroes */ | ||
167 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_pwritev(BlockDriverState *bs, uint64_t offset, | ||
168 | * writing of the guest data together with that of the COW regions. | ||
169 | * If it's not possible (or not necessary) then write the | ||
170 | * guest data now. */ | ||
171 | - if (!merge_cow(offset, cur_bytes, &hd_qiov, l2meta)) { | ||
172 | + if (!merge_cow(offset, cur_bytes, | ||
173 | + bs->encrypted ? &encrypted_qiov : qiov, | ||
174 | + bs->encrypted ? 0 : qiov_offset + bytes_done, l2meta)) | ||
175 | + { | ||
176 | BLKDBG_EVENT(bs->file, BLKDBG_WRITE_AIO); | ||
177 | trace_qcow2_writev_data(qemu_coroutine_self(), | ||
178 | cluster_offset + offset_in_cluster); | ||
179 | - ret = bdrv_co_pwritev(s->data_file, | ||
180 | - cluster_offset + offset_in_cluster, | ||
181 | - cur_bytes, &hd_qiov, 0); | ||
182 | + ret = bdrv_co_pwritev_part( | ||
183 | + s->data_file, cluster_offset + offset_in_cluster, cur_bytes, | ||
184 | + bs->encrypted ? &encrypted_qiov : qiov, | ||
185 | + bs->encrypted ? 0 : qiov_offset + bytes_done, 0); | ||
186 | if (ret < 0) { | ||
187 | goto out_unlocked; | ||
188 | } | ||
189 | @@ -XXX,XX +XXX,XX @@ out_locked: | ||
190 | |||
191 | qemu_co_mutex_unlock(&s->lock); | ||
192 | |||
193 | - qemu_iovec_destroy(&hd_qiov); | ||
194 | qemu_vfree(cluster_data); | ||
195 | trace_qcow2_writev_done_req(qemu_coroutine_self(), ret); | ||
196 | |||
197 | @@ -XXX,XX +XXX,XX @@ fail: | ||
198 | /* XXX: put compressed sectors first, then all the cluster aligned | ||
199 | tables to avoid losing bytes in alignment */ | ||
200 | static coroutine_fn int | ||
201 | -qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | ||
202 | - uint64_t bytes, QEMUIOVector *qiov) | ||
203 | +qcow2_co_pwritev_compressed_part(BlockDriverState *bs, | ||
204 | + uint64_t offset, uint64_t bytes, | ||
205 | + QEMUIOVector *qiov, size_t qiov_offset) | ||
206 | { | 160 | { |
207 | BDRVQcow2State *s = bs->opaque; | 161 | - return aio_bh_new(qemu_aio_context, cb, opaque); |
208 | int ret; | 162 | + return aio_bh_new_full(qemu_aio_context, cb, opaque, name); |
209 | @@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | ||
210 | /* Zero-pad last write if image size is not cluster aligned */ | ||
211 | memset(buf + bytes, 0, s->cluster_size - bytes); | ||
212 | } | ||
213 | - qemu_iovec_to_buf(qiov, 0, buf, bytes); | ||
214 | + qemu_iovec_to_buf(qiov, qiov_offset, buf, bytes); | ||
215 | |||
216 | out_buf = g_malloc(s->cluster_size); | ||
217 | |||
218 | @@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | ||
219 | buf, s->cluster_size); | ||
220 | if (out_len == -ENOMEM) { | ||
221 | /* could not compress: write normal cluster */ | ||
222 | - ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0); | ||
223 | + ret = qcow2_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, 0); | ||
224 | if (ret < 0) { | ||
225 | goto fail; | ||
226 | } | ||
227 | @@ -XXX,XX +XXX,XX @@ static int qcow2_save_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, | ||
228 | BDRVQcow2State *s = bs->opaque; | ||
229 | |||
230 | BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_SAVE); | ||
231 | - return bs->drv->bdrv_co_pwritev(bs, qcow2_vm_state_offset(s) + pos, | ||
232 | - qiov->size, qiov, 0); | ||
233 | + return bs->drv->bdrv_co_pwritev_part(bs, qcow2_vm_state_offset(s) + pos, | ||
234 | + qiov->size, qiov, 0, 0); | ||
235 | } | 163 | } |
236 | 164 | ||
237 | static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, | ||
238 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = { | ||
239 | .bdrv_co_block_status = qcow2_co_block_status, | ||
240 | |||
241 | .bdrv_co_preadv_part = qcow2_co_preadv_part, | ||
242 | - .bdrv_co_pwritev = qcow2_co_pwritev, | ||
243 | + .bdrv_co_pwritev_part = qcow2_co_pwritev_part, | ||
244 | .bdrv_co_flush_to_os = qcow2_co_flush_to_os, | ||
245 | |||
246 | .bdrv_co_pwrite_zeroes = qcow2_co_pwrite_zeroes, | ||
247 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = { | ||
248 | .bdrv_co_copy_range_from = qcow2_co_copy_range_from, | ||
249 | .bdrv_co_copy_range_to = qcow2_co_copy_range_to, | ||
250 | .bdrv_co_truncate = qcow2_co_truncate, | ||
251 | - .bdrv_co_pwritev_compressed = qcow2_co_pwritev_compressed, | ||
252 | + .bdrv_co_pwritev_compressed_part = qcow2_co_pwritev_compressed_part, | ||
253 | .bdrv_make_empty = qcow2_make_empty, | ||
254 | |||
255 | .bdrv_snapshot_create = qcow2_snapshot_create, | ||
256 | diff --git a/util/iov.c b/util/iov.c | ||
257 | index XXXXXXX..XXXXXXX 100644 | ||
258 | --- a/util/iov.c | ||
259 | +++ b/util/iov.c | ||
260 | @@ -XXX,XX +XXX,XX @@ static struct iovec *qiov_slice(QEMUIOVector *qiov, | ||
261 | return iov; | ||
262 | } | ||
263 | |||
264 | +int qemu_iovec_subvec_niov(QEMUIOVector *qiov, size_t offset, size_t len) | ||
265 | +{ | ||
266 | + size_t head, tail; | ||
267 | + int niov; | ||
268 | + | ||
269 | + qiov_slice(qiov, offset, len, &head, &tail, &niov); | ||
270 | + | ||
271 | + return niov; | ||
272 | +} | ||
273 | + | ||
274 | /* | 165 | /* |
275 | * Compile new iovec, combining @head_buf buffer, sub-qiov of @mid_qiov, | ||
276 | * and @tail_buf buffer into new qiov. | ||
277 | -- | 166 | -- |
278 | 2.21.0 | 167 | 2.31.1 |
279 | 168 | ||
280 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | BHs must be deleted before the AioContext is finalized. If not, it's a |
---|---|---|---|
2 | bug and probably indicates that some part of the program still expects | ||
3 | the BH to run in the future. That can lead to memory leaks, inconsistent | ||
4 | state, or just hangs. | ||
2 | 5 | ||
3 | Implement and use new interface to get rid of hd_qiov. | 6 | Unfortunately the assert(flags & BH_DELETED) call in aio_ctx_finalize() |
7 | is difficult to debug because the assertion failure contains no | ||
8 | information about the BH! | ||
4 | 9 | ||
5 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 10 | Use the QEMUBH name field added in the previous patch to show a useful |
6 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | 11 | error when a leaked BH is detected. |
7 | Message-id: 20190604161514.262241-12-vsementsov@virtuozzo.com | 12 | |
8 | Message-Id: <20190604161514.262241-12-vsementsov@virtuozzo.com> | 13 | Suggested-by: Eric Ernst <eric.g.ernst@gmail.com> |
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
15 | Message-Id: <20210414200247.917496-3-stefanha@redhat.com> | ||
10 | --- | 16 | --- |
11 | block/qcow2-cluster.c | 5 +++-- | 17 | util/async.c | 16 ++++++++++++++-- |
12 | block/qcow2.c | 49 +++++++++++++++++++------------------------ | 18 | 1 file changed, 14 insertions(+), 2 deletions(-) |
13 | 2 files changed, 25 insertions(+), 29 deletions(-) | ||
14 | 19 | ||
15 | diff --git a/block/qcow2-cluster.c b/block/qcow2-cluster.c | 20 | diff --git a/util/async.c b/util/async.c |
16 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
17 | --- a/block/qcow2-cluster.c | 22 | --- a/util/async.c |
18 | +++ b/block/qcow2-cluster.c | 23 | +++ b/util/async.c |
19 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn do_perform_cow_read(BlockDriverState *bs, | 24 | @@ -XXX,XX +XXX,XX @@ aio_ctx_finalize(GSource *source) |
20 | * interface. This avoids double I/O throttling and request tracking, | 25 | assert(QSIMPLEQ_EMPTY(&ctx->bh_slice_list)); |
21 | * which can lead to deadlock when block layer copy-on-read is enabled. | 26 | |
22 | */ | 27 | while ((bh = aio_bh_dequeue(&ctx->bh_list, &flags))) { |
23 | - ret = bs->drv->bdrv_co_preadv(bs, src_cluster_offset + offset_in_cluster, | 28 | - /* qemu_bh_delete() must have been called on BHs in this AioContext */ |
24 | - qiov->size, qiov, 0); | 29 | - assert(flags & BH_DELETED); |
25 | + ret = bs->drv->bdrv_co_preadv_part(bs, | 30 | + /* |
26 | + src_cluster_offset + offset_in_cluster, | 31 | + * qemu_bh_delete() must have been called on BHs in this AioContext. In |
27 | + qiov->size, qiov, 0, 0); | 32 | + * many cases memory leaks, hangs, or inconsistent state occur when a |
28 | if (ret < 0) { | 33 | + * BH is leaked because something still expects it to run. |
29 | return ret; | 34 | + * |
35 | + * If you hit this, fix the lifecycle of the BH so that | ||
36 | + * qemu_bh_delete() and any associated cleanup is called before the | ||
37 | + * AioContext is finalized. | ||
38 | + */ | ||
39 | + if (unlikely(!(flags & BH_DELETED))) { | ||
40 | + fprintf(stderr, "%s: BH '%s' leaked, aborting...\n", | ||
41 | + __func__, bh->name); | ||
42 | + abort(); | ||
43 | + } | ||
44 | |||
45 | g_free(bh); | ||
30 | } | 46 | } |
31 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/block/qcow2.c | ||
34 | +++ b/block/qcow2.c | ||
35 | @@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs, | ||
36 | uint64_t file_cluster_offset, | ||
37 | uint64_t offset, | ||
38 | uint64_t bytes, | ||
39 | - QEMUIOVector *qiov); | ||
40 | + QEMUIOVector *qiov, | ||
41 | + size_t qiov_offset); | ||
42 | |||
43 | static int qcow2_probe(const uint8_t *buf, int buf_size, const char *filename) | ||
44 | { | ||
45 | @@ -XXX,XX +XXX,XX @@ out: | ||
46 | return ret; | ||
47 | } | ||
48 | |||
49 | -static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
50 | - uint64_t bytes, QEMUIOVector *qiov, | ||
51 | - int flags) | ||
52 | +static coroutine_fn int qcow2_co_preadv_part(BlockDriverState *bs, | ||
53 | + uint64_t offset, uint64_t bytes, | ||
54 | + QEMUIOVector *qiov, | ||
55 | + size_t qiov_offset, int flags) | ||
56 | { | ||
57 | BDRVQcow2State *s = bs->opaque; | ||
58 | int offset_in_cluster; | ||
59 | int ret; | ||
60 | unsigned int cur_bytes; /* number of bytes in current iteration */ | ||
61 | uint64_t cluster_offset = 0; | ||
62 | - uint64_t bytes_done = 0; | ||
63 | - QEMUIOVector hd_qiov; | ||
64 | uint8_t *cluster_data = NULL; | ||
65 | |||
66 | - qemu_iovec_init(&hd_qiov, qiov->niov); | ||
67 | - | ||
68 | while (bytes != 0) { | ||
69 | |||
70 | /* prepare next request */ | ||
71 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
72 | |||
73 | offset_in_cluster = offset_into_cluster(s, offset); | ||
74 | |||
75 | - qemu_iovec_reset(&hd_qiov); | ||
76 | - qemu_iovec_concat(&hd_qiov, qiov, bytes_done, cur_bytes); | ||
77 | - | ||
78 | switch (ret) { | ||
79 | case QCOW2_CLUSTER_UNALLOCATED: | ||
80 | |||
81 | if (bs->backing) { | ||
82 | BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); | ||
83 | - ret = bdrv_co_preadv(bs->backing, offset, cur_bytes, | ||
84 | - &hd_qiov, 0); | ||
85 | + ret = bdrv_co_preadv_part(bs->backing, offset, cur_bytes, | ||
86 | + qiov, qiov_offset, 0); | ||
87 | if (ret < 0) { | ||
88 | goto fail; | ||
89 | } | ||
90 | } else { | ||
91 | /* Note: in this case, no need to wait */ | ||
92 | - qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); | ||
93 | + qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes); | ||
94 | } | ||
95 | break; | ||
96 | |||
97 | case QCOW2_CLUSTER_ZERO_PLAIN: | ||
98 | case QCOW2_CLUSTER_ZERO_ALLOC: | ||
99 | - qemu_iovec_memset(&hd_qiov, 0, 0, cur_bytes); | ||
100 | + qemu_iovec_memset(qiov, qiov_offset, 0, cur_bytes); | ||
101 | break; | ||
102 | |||
103 | case QCOW2_CLUSTER_COMPRESSED: | ||
104 | ret = qcow2_co_preadv_compressed(bs, cluster_offset, | ||
105 | offset, cur_bytes, | ||
106 | - &hd_qiov); | ||
107 | + qiov, qiov_offset); | ||
108 | if (ret < 0) { | ||
109 | goto fail; | ||
110 | } | ||
111 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
112 | ret = -EIO; | ||
113 | goto fail; | ||
114 | } | ||
115 | - qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes); | ||
116 | + qemu_iovec_from_buf(qiov, qiov_offset, cluster_data, cur_bytes); | ||
117 | } else { | ||
118 | BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); | ||
119 | - ret = bdrv_co_preadv(s->data_file, | ||
120 | - cluster_offset + offset_in_cluster, | ||
121 | - cur_bytes, &hd_qiov, 0); | ||
122 | + ret = bdrv_co_preadv_part(s->data_file, | ||
123 | + cluster_offset + offset_in_cluster, | ||
124 | + cur_bytes, qiov, qiov_offset, 0); | ||
125 | if (ret < 0) { | ||
126 | goto fail; | ||
127 | } | ||
128 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
129 | |||
130 | bytes -= cur_bytes; | ||
131 | offset += cur_bytes; | ||
132 | - bytes_done += cur_bytes; | ||
133 | + qiov_offset += cur_bytes; | ||
134 | } | ||
135 | ret = 0; | ||
136 | |||
137 | fail: | ||
138 | - qemu_iovec_destroy(&hd_qiov); | ||
139 | qemu_vfree(cluster_data); | ||
140 | |||
141 | return ret; | ||
142 | @@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs, | ||
143 | uint64_t file_cluster_offset, | ||
144 | uint64_t offset, | ||
145 | uint64_t bytes, | ||
146 | - QEMUIOVector *qiov) | ||
147 | + QEMUIOVector *qiov, | ||
148 | + size_t qiov_offset) | ||
149 | { | ||
150 | BDRVQcow2State *s = bs->opaque; | ||
151 | int ret = 0, csize, nb_csectors; | ||
152 | @@ -XXX,XX +XXX,XX @@ qcow2_co_preadv_compressed(BlockDriverState *bs, | ||
153 | goto fail; | ||
154 | } | ||
155 | |||
156 | - qemu_iovec_from_buf(qiov, 0, out_buf + offset_in_cluster, bytes); | ||
157 | + qemu_iovec_from_buf(qiov, qiov_offset, out_buf + offset_in_cluster, bytes); | ||
158 | |||
159 | fail: | ||
160 | qemu_vfree(out_buf); | ||
161 | @@ -XXX,XX +XXX,XX @@ static int qcow2_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, | ||
162 | BDRVQcow2State *s = bs->opaque; | ||
163 | |||
164 | BLKDBG_EVENT(bs->file, BLKDBG_VMSTATE_LOAD); | ||
165 | - return bs->drv->bdrv_co_preadv(bs, qcow2_vm_state_offset(s) + pos, | ||
166 | - qiov->size, qiov, 0); | ||
167 | + return bs->drv->bdrv_co_preadv_part(bs, qcow2_vm_state_offset(s) + pos, | ||
168 | + qiov->size, qiov, 0, 0); | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | @@ -XXX,XX +XXX,XX @@ BlockDriver bdrv_qcow2 = { | ||
173 | .bdrv_has_zero_init_truncate = bdrv_has_zero_init_1, | ||
174 | .bdrv_co_block_status = qcow2_co_block_status, | ||
175 | |||
176 | - .bdrv_co_preadv = qcow2_co_preadv, | ||
177 | + .bdrv_co_preadv_part = qcow2_co_preadv_part, | ||
178 | .bdrv_co_pwritev = qcow2_co_pwritev, | ||
179 | .bdrv_co_flush_to_os = qcow2_co_flush_to_os, | ||
180 | |||
181 | -- | 47 | -- |
182 | 2.21.0 | 48 | 2.31.1 |
183 | 49 | ||
184 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Akihiko Odaki <akihiko.odaki@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Introduce extended variants of bdrv_co_preadv and bdrv_co_pwritev | 3 | This commit introduces "punch hole" operation and optimizes transfer |
4 | with qiov_offset parameter. | 4 | block size for macOS. |
5 | 5 | ||
6 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 6 | Thanks to Konstantin Nazarov for detailed analysis of a flaw in an |
7 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | 7 | old version of this change: |
8 | Message-id: 20190604161514.262241-10-vsementsov@virtuozzo.com | 8 | https://gist.github.com/akihikodaki/87df4149e7ca87f18dc56807ec5a1bc5#gistcomment-3654667 |
9 | Message-Id: <20190604161514.262241-10-vsementsov@virtuozzo.com> | 9 | |
10 | Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com> | ||
11 | Message-id: 20210705130458.97642-1-akihiko.odaki@gmail.com | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
11 | --- | 13 | --- |
12 | include/block/block_int.h | 6 ++++++ | 14 | block/file-posix.c | 27 +++++++++++++++++++++++++-- |
13 | block/io.c | 29 +++++++++++++++++++++++------ | 15 | 1 file changed, 25 insertions(+), 2 deletions(-) |
14 | 2 files changed, 29 insertions(+), 6 deletions(-) | ||
15 | 16 | ||
16 | diff --git a/include/block/block_int.h b/include/block/block_int.h | 17 | diff --git a/block/file-posix.c b/block/file-posix.c |
17 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/include/block/block_int.h | 19 | --- a/block/file-posix.c |
19 | +++ b/include/block/block_int.h | 20 | +++ b/block/file-posix.c |
20 | @@ -XXX,XX +XXX,XX @@ extern BlockDriver bdrv_qcow2; | 21 | @@ -XXX,XX +XXX,XX @@ |
21 | int coroutine_fn bdrv_co_preadv(BdrvChild *child, | 22 | #if defined(HAVE_HOST_BLOCK_DEVICE) |
22 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, | 23 | #include <paths.h> |
23 | BdrvRequestFlags flags); | 24 | #include <sys/param.h> |
24 | +int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, | 25 | +#include <sys/mount.h> |
25 | + int64_t offset, unsigned int bytes, | 26 | #include <IOKit/IOKitLib.h> |
26 | + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); | 27 | #include <IOKit/IOBSD.h> |
27 | int coroutine_fn bdrv_co_pwritev(BdrvChild *child, | 28 | #include <IOKit/storage/IOMediaBSDClient.h> |
28 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, | 29 | @@ -XXX,XX +XXX,XX @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) |
29 | BdrvRequestFlags flags); | 30 | return; |
30 | +int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, | 31 | } |
31 | + int64_t offset, unsigned int bytes, | 32 | |
32 | + QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags); | 33 | +#if defined(__APPLE__) && (__MACH__) |
33 | 34 | + struct statfs buf; | |
34 | static inline int coroutine_fn bdrv_co_pread(BdrvChild *child, | 35 | + |
35 | int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags) | 36 | + if (!fstatfs(s->fd, &buf)) { |
36 | diff --git a/block/io.c b/block/io.c | 37 | + bs->bl.opt_transfer = buf.f_iosize; |
37 | index XXXXXXX..XXXXXXX 100644 | 38 | + bs->bl.pdiscard_alignment = buf.f_bsize; |
38 | --- a/block/io.c | 39 | + } |
39 | +++ b/block/io.c | 40 | +#endif |
40 | @@ -XXX,XX +XXX,XX @@ static void bdrv_padding_destroy(BdrvRequestPadding *pad) | 41 | + |
41 | * | 42 | if (bs->sg || S_ISBLK(st.st_mode)) { |
42 | * Function always succeeds. | 43 | int ret = hdev_get_max_hw_transfer(s->fd, &st); |
43 | */ | 44 | |
44 | -static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov, | 45 | @@ -XXX,XX +XXX,XX @@ out: |
45 | +static bool bdrv_pad_request(BlockDriverState *bs, | 46 | } |
46 | + QEMUIOVector **qiov, size_t *qiov_offset, | 47 | } |
47 | int64_t *offset, unsigned int *bytes, | 48 | |
48 | BdrvRequestPadding *pad) | 49 | +#if defined(CONFIG_FALLOCATE) || defined(BLKZEROOUT) || defined(BLKDISCARD) |
50 | static int translate_err(int err) | ||
49 | { | 51 | { |
50 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov, | 52 | if (err == -ENODEV || err == -ENOSYS || err == -EOPNOTSUPP || |
53 | @@ -XXX,XX +XXX,XX @@ static int translate_err(int err) | ||
51 | } | 54 | } |
52 | 55 | return err; | |
53 | qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, | ||
54 | - *qiov, 0, *bytes, | ||
55 | + *qiov, *qiov_offset, *bytes, | ||
56 | pad->buf + pad->buf_len - pad->tail, pad->tail); | ||
57 | *bytes += pad->head + pad->tail; | ||
58 | *offset -= pad->head; | ||
59 | *qiov = &pad->local_qiov; | ||
60 | + *qiov_offset = 0; | ||
61 | |||
62 | return true; | ||
63 | } | 56 | } |
64 | @@ -XXX,XX +XXX,XX @@ static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov, | 57 | +#endif |
65 | int coroutine_fn bdrv_co_preadv(BdrvChild *child, | 58 | |
66 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, | 59 | #ifdef CONFIG_FALLOCATE |
67 | BdrvRequestFlags flags) | 60 | static int do_fallocate(int fd, int mode, off_t offset, off_t len) |
68 | +{ | 61 | @@ -XXX,XX +XXX,XX @@ static int handle_aiocb_discard(void *opaque) |
69 | + return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags); | 62 | } |
70 | +} | 63 | } while (errno == EINTR); |
71 | + | 64 | |
72 | +int coroutine_fn bdrv_co_preadv_part(BdrvChild *child, | 65 | - ret = -errno; |
73 | + int64_t offset, unsigned int bytes, | 66 | + ret = translate_err(-errno); |
74 | + QEMUIOVector *qiov, size_t qiov_offset, | 67 | #endif |
75 | + BdrvRequestFlags flags) | 68 | } else { |
76 | { | 69 | #ifdef CONFIG_FALLOCATE_PUNCH_HOLE |
77 | BlockDriverState *bs = child->bs; | 70 | ret = do_fallocate(s->fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, |
78 | BdrvTrackedRequest req; | 71 | aiocb->aio_offset, aiocb->aio_nbytes); |
79 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, | 72 | + ret = translate_err(-errno); |
80 | flags |= BDRV_REQ_COPY_ON_READ; | 73 | +#elif defined(__APPLE__) && (__MACH__) |
74 | + fpunchhole_t fpunchhole; | ||
75 | + fpunchhole.fp_flags = 0; | ||
76 | + fpunchhole.reserved = 0; | ||
77 | + fpunchhole.fp_offset = aiocb->aio_offset; | ||
78 | + fpunchhole.fp_length = aiocb->aio_nbytes; | ||
79 | + if (fcntl(s->fd, F_PUNCHHOLE, &fpunchhole) == -1) { | ||
80 | + ret = errno == ENODEV ? -ENOTSUP : -errno; | ||
81 | + } else { | ||
82 | + ret = 0; | ||
83 | + } | ||
84 | #endif | ||
81 | } | 85 | } |
82 | 86 | ||
83 | - bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad); | 87 | - ret = translate_err(ret); |
84 | + bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad); | 88 | if (ret == -ENOTSUP) { |
85 | 89 | s->has_discard = false; | |
86 | tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); | ||
87 | ret = bdrv_aligned_preadv(child, &req, offset, bytes, | ||
88 | bs->bl.request_alignment, | ||
89 | - qiov, 0, flags); | ||
90 | + qiov, qiov_offset, flags); | ||
91 | tracked_request_end(&req); | ||
92 | bdrv_dec_in_flight(bs); | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ out: | ||
95 | int coroutine_fn bdrv_co_pwritev(BdrvChild *child, | ||
96 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, | ||
97 | BdrvRequestFlags flags) | ||
98 | +{ | ||
99 | + return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags); | ||
100 | +} | ||
101 | + | ||
102 | +int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child, | ||
103 | + int64_t offset, unsigned int bytes, QEMUIOVector *qiov, size_t qiov_offset, | ||
104 | + BdrvRequestFlags flags) | ||
105 | { | ||
106 | BlockDriverState *bs = child->bs; | ||
107 | BdrvTrackedRequest req; | ||
108 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, | ||
109 | goto out; | ||
110 | } | 90 | } |
111 | |||
112 | - if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) { | ||
113 | + if (bdrv_pad_request(bs, &qiov, &qiov_offset, &offset, &bytes, &pad)) { | ||
114 | mark_request_serialising(&req, align); | ||
115 | wait_serialising_requests(&req); | ||
116 | bdrv_padding_rmw_read(child, &req, &pad, false); | ||
117 | } | ||
118 | |||
119 | ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, | ||
120 | - qiov, 0, flags); | ||
121 | + qiov, qiov_offset, flags); | ||
122 | |||
123 | bdrv_padding_destroy(&pad); | ||
124 | |||
125 | -- | 91 | -- |
126 | 2.21.0 | 92 | 2.31.1 |
127 | 93 | ||
128 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Akihiko Odaki <akihiko.odaki@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | We have similar padding code in bdrv_co_pwritev, | 3 | backend_defaults property allow users to control if default block |
4 | bdrv_co_do_pwrite_zeroes and bdrv_co_preadv. Let's combine and unify | 4 | properties should be decided with backend information. |
5 | it. | 5 | |
6 | 6 | If it is off, any backend information will be discarded, which is | |
7 | [Squashed in Vladimir's qemu-iotests 077 fix | 7 | suitable if you plan to perform live migration to a different disk backend. |
8 | --Stefan] | 8 | |
9 | 9 | If it is on, a block device may utilize backend information more | |
10 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 10 | aggressively. |
11 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | 11 | |
12 | Message-id: 20190604161514.262241-4-vsementsov@virtuozzo.com | 12 | By default, it is auto, which uses backend information for block |
13 | Message-Id: <20190604161514.262241-4-vsementsov@virtuozzo.com> | 13 | sizes and ignores the others, which is consistent with the older |
14 | versions. | ||
15 | |||
16 | Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com> | ||
17 | Message-id: 20210705130458.97642-2-akihiko.odaki@gmail.com | ||
14 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 18 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
15 | --- | 19 | --- |
16 | block/io.c | 365 +++++++++++++++++++++++++++++------------------------ | 20 | include/hw/block/block.h | 3 +++ |
17 | 1 file changed, 200 insertions(+), 165 deletions(-) | 21 | hw/block/block.c | 42 ++++++++++++++++++++++++++++++++++---- |
18 | 22 | tests/qemu-iotests/172.out | 38 ++++++++++++++++++++++++++++++++++ | |
19 | diff --git a/block/io.c b/block/io.c | 23 | 3 files changed, 79 insertions(+), 4 deletions(-) |
24 | |||
25 | diff --git a/include/hw/block/block.h b/include/hw/block/block.h | ||
20 | index XXXXXXX..XXXXXXX 100644 | 26 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/block/io.c | 27 | --- a/include/hw/block/block.h |
22 | +++ b/block/io.c | 28 | +++ b/include/hw/block/block.h |
23 | @@ -XXX,XX +XXX,XX @@ out: | 29 | @@ -XXX,XX +XXX,XX @@ |
30 | |||
31 | typedef struct BlockConf { | ||
32 | BlockBackend *blk; | ||
33 | + OnOffAuto backend_defaults; | ||
34 | uint32_t physical_block_size; | ||
35 | uint32_t logical_block_size; | ||
36 | uint32_t min_io_size; | ||
37 | @@ -XXX,XX +XXX,XX @@ static inline unsigned int get_physical_block_exp(BlockConf *conf) | ||
24 | } | 38 | } |
25 | 39 | ||
26 | /* | 40 | #define DEFINE_BLOCK_PROPERTIES_BASE(_state, _conf) \ |
27 | - * Handle a read request in coroutine context | 41 | + DEFINE_PROP_ON_OFF_AUTO("backend_defaults", _state, \ |
28 | + * Request padding | 42 | + _conf.backend_defaults, ON_OFF_AUTO_AUTO), \ |
29 | + * | 43 | DEFINE_PROP_BLOCKSIZE("logical_block_size", _state, \ |
30 | + * |<---- align ----->| |<----- align ---->| | 44 | _conf.logical_block_size), \ |
31 | + * |<- head ->|<------------- bytes ------------->|<-- tail -->| | 45 | DEFINE_PROP_BLOCKSIZE("physical_block_size", _state, \ |
32 | + * | | | | | | | 46 | diff --git a/hw/block/block.c b/hw/block/block.c |
33 | + * -*----------$-------*-------- ... --------*-----$------------*--- | 47 | index XXXXXXX..XXXXXXX 100644 |
34 | + * | | | | | | | 48 | --- a/hw/block/block.c |
35 | + * | offset | | end | | 49 | +++ b/hw/block/block.c |
36 | + * ALIGN_DOWN(offset) ALIGN_UP(offset) ALIGN_DOWN(end) ALIGN_UP(end) | 50 | @@ -XXX,XX +XXX,XX @@ bool blkconf_blocksizes(BlockConf *conf, Error **errp) |
37 | + * [buf ... ) [tail_buf ) | 51 | { |
38 | + * | 52 | BlockBackend *blk = conf->blk; |
39 | + * @buf is an aligned allocation needed to store @head and @tail paddings. @head | 53 | BlockSizes blocksizes; |
40 | + * is placed at the beginning of @buf and @tail at the @end. | 54 | - int backend_ret; |
41 | + * | 55 | + BlockDriverState *bs; |
42 | + * @tail_buf is a pointer to sub-buffer, corresponding to align-sized chunk | 56 | + bool use_blocksizes; |
43 | + * around tail, if tail exists. | 57 | + bool use_bs; |
44 | + * | ||
45 | + * @merge_reads is true for small requests, | ||
46 | + * if @buf_len == @head + bytes + @tail. In this case it is possible that both | ||
47 | + * head and tail exist but @buf_len == align and @tail_buf == @buf. | ||
48 | */ | ||
49 | +typedef struct BdrvRequestPadding { | ||
50 | + uint8_t *buf; | ||
51 | + size_t buf_len; | ||
52 | + uint8_t *tail_buf; | ||
53 | + size_t head; | ||
54 | + size_t tail; | ||
55 | + bool merge_reads; | ||
56 | + QEMUIOVector local_qiov; | ||
57 | +} BdrvRequestPadding; | ||
58 | + | 58 | + |
59 | +static bool bdrv_init_padding(BlockDriverState *bs, | 59 | + switch (conf->backend_defaults) { |
60 | + int64_t offset, int64_t bytes, | 60 | + case ON_OFF_AUTO_AUTO: |
61 | + BdrvRequestPadding *pad) | 61 | + use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes); |
62 | +{ | 62 | + use_bs = false; |
63 | + uint64_t align = bs->bl.request_alignment; | 63 | + break; |
64 | + size_t sum; | ||
65 | + | 64 | + |
66 | + memset(pad, 0, sizeof(*pad)); | 65 | + case ON_OFF_AUTO_ON: |
66 | + use_blocksizes = !blk_probe_blocksizes(blk, &blocksizes); | ||
67 | + bs = blk_bs(blk); | ||
68 | + use_bs = bs; | ||
69 | + break; | ||
67 | + | 70 | + |
68 | + pad->head = offset & (align - 1); | 71 | + case ON_OFF_AUTO_OFF: |
69 | + pad->tail = ((offset + bytes) & (align - 1)); | 72 | + use_blocksizes = false; |
70 | + if (pad->tail) { | 73 | + use_bs = false; |
71 | + pad->tail = align - pad->tail; | 74 | + break; |
75 | + | ||
76 | + default: | ||
77 | + abort(); | ||
72 | + } | 78 | + } |
73 | + | 79 | |
74 | + if ((!pad->head && !pad->tail) || !bytes) { | 80 | - backend_ret = blk_probe_blocksizes(blk, &blocksizes); |
75 | + return false; | 81 | /* fill in detected values if they are not defined via qemu command line */ |
76 | + } | 82 | if (!conf->physical_block_size) { |
77 | + | 83 | - if (!backend_ret) { |
78 | + sum = pad->head + bytes + pad->tail; | 84 | + if (use_blocksizes) { |
79 | + pad->buf_len = (sum > align && pad->head && pad->tail) ? 2 * align : align; | 85 | conf->physical_block_size = blocksizes.phys; |
80 | + pad->buf = qemu_blockalign(bs, pad->buf_len); | 86 | } else { |
81 | + pad->merge_reads = sum == pad->buf_len; | 87 | conf->physical_block_size = BDRV_SECTOR_SIZE; |
82 | + if (pad->tail) { | 88 | } |
83 | + pad->tail_buf = pad->buf + pad->buf_len - align; | 89 | } |
84 | + } | 90 | if (!conf->logical_block_size) { |
85 | + | 91 | - if (!backend_ret) { |
86 | + return true; | 92 | + if (use_blocksizes) { |
87 | +} | 93 | conf->logical_block_size = blocksizes.log; |
88 | + | 94 | } else { |
89 | +static int bdrv_padding_rmw_read(BdrvChild *child, | 95 | conf->logical_block_size = BDRV_SECTOR_SIZE; |
90 | + BdrvTrackedRequest *req, | 96 | } |
91 | + BdrvRequestPadding *pad, | 97 | } |
92 | + bool zero_middle) | 98 | + if (use_bs) { |
93 | +{ | 99 | + if (!conf->opt_io_size) { |
94 | + QEMUIOVector local_qiov; | 100 | + conf->opt_io_size = bs->bl.opt_transfer; |
95 | + BlockDriverState *bs = child->bs; | ||
96 | + uint64_t align = bs->bl.request_alignment; | ||
97 | + int ret; | ||
98 | + | ||
99 | + assert(req->serialising && pad->buf); | ||
100 | + | ||
101 | + if (pad->head || pad->merge_reads) { | ||
102 | + uint64_t bytes = pad->merge_reads ? pad->buf_len : align; | ||
103 | + | ||
104 | + qemu_iovec_init_buf(&local_qiov, pad->buf, bytes); | ||
105 | + | ||
106 | + if (pad->head) { | ||
107 | + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); | ||
108 | + } | 101 | + } |
109 | + if (pad->merge_reads && pad->tail) { | 102 | + if (conf->discard_granularity == -1) { |
110 | + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); | 103 | + if (bs->bl.pdiscard_alignment) { |
111 | + } | 104 | + conf->discard_granularity = bs->bl.pdiscard_alignment; |
112 | + ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes, | 105 | + } else if (bs->bl.request_alignment != 1) { |
113 | + align, &local_qiov, 0); | 106 | + conf->discard_granularity = bs->bl.request_alignment; |
114 | + if (ret < 0) { | 107 | + } |
115 | + return ret; | ||
116 | + } | ||
117 | + if (pad->head) { | ||
118 | + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); | ||
119 | + } | ||
120 | + if (pad->merge_reads && pad->tail) { | ||
121 | + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); | ||
122 | + } | ||
123 | + | ||
124 | + if (pad->merge_reads) { | ||
125 | + goto zero_mem; | ||
126 | + } | 108 | + } |
127 | + } | 109 | + } |
128 | + | 110 | |
129 | + if (pad->tail) { | 111 | if (conf->logical_block_size > conf->physical_block_size) { |
130 | + qemu_iovec_init_buf(&local_qiov, pad->tail_buf, align); | 112 | error_setg(errp, |
131 | + | 113 | diff --git a/tests/qemu-iotests/172.out b/tests/qemu-iotests/172.out |
132 | + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); | 114 | index XXXXXXX..XXXXXXX 100644 |
133 | + ret = bdrv_aligned_preadv( | 115 | --- a/tests/qemu-iotests/172.out |
134 | + child, req, | 116 | +++ b/tests/qemu-iotests/172.out |
135 | + req->overlap_offset + req->overlap_bytes - align, | 117 | @@ -XXX,XX +XXX,XX @@ Testing: |
136 | + align, align, &local_qiov, 0); | 118 | dev: floppy, id "" |
137 | + if (ret < 0) { | 119 | unit = 0 (0x0) |
138 | + return ret; | 120 | drive = "floppy0" |
139 | + } | 121 | + backend_defaults = "auto" |
140 | + bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); | 122 | logical_block_size = 512 (512 B) |
141 | + } | 123 | physical_block_size = 512 (512 B) |
142 | + | 124 | min_io_size = 0 (0 B) |
143 | +zero_mem: | 125 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 |
144 | + if (zero_middle) { | 126 | dev: floppy, id "" |
145 | + memset(pad->buf + pad->head, 0, pad->buf_len - pad->head - pad->tail); | 127 | unit = 0 (0x0) |
146 | + } | 128 | drive = "floppy0" |
147 | + | 129 | + backend_defaults = "auto" |
148 | + return 0; | 130 | logical_block_size = 512 (512 B) |
149 | +} | 131 | physical_block_size = 512 (512 B) |
150 | + | 132 | min_io_size = 0 (0 B) |
151 | +static void bdrv_padding_destroy(BdrvRequestPadding *pad) | 133 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 |
152 | +{ | 134 | dev: floppy, id "" |
153 | + if (pad->buf) { | 135 | unit = 1 (0x1) |
154 | + qemu_vfree(pad->buf); | 136 | drive = "floppy1" |
155 | + qemu_iovec_destroy(&pad->local_qiov); | 137 | + backend_defaults = "auto" |
156 | + } | 138 | logical_block_size = 512 (512 B) |
157 | +} | 139 | physical_block_size = 512 (512 B) |
158 | + | 140 | min_io_size = 0 (0 B) |
159 | +/* | 141 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 |
160 | + * bdrv_pad_request | 142 | dev: floppy, id "" |
161 | + * | 143 | unit = 0 (0x0) |
162 | + * Exchange request parameters with padded request if needed. Don't include RMW | 144 | drive = "floppy0" |
163 | + * read of padding, bdrv_padding_rmw_read() should be called separately if | 145 | + backend_defaults = "auto" |
164 | + * needed. | 146 | logical_block_size = 512 (512 B) |
165 | + * | 147 | physical_block_size = 512 (512 B) |
166 | + * All parameters except @bs are in-out: they represent original request at | 148 | min_io_size = 0 (0 B) |
167 | + * function call and padded (if padding needed) at function finish. | 149 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2 |
168 | + * | 150 | dev: floppy, id "" |
169 | + * Function always succeeds. | 151 | unit = 1 (0x1) |
170 | + */ | 152 | drive = "floppy1" |
171 | +static bool bdrv_pad_request(BlockDriverState *bs, QEMUIOVector **qiov, | 153 | + backend_defaults = "auto" |
172 | + int64_t *offset, unsigned int *bytes, | 154 | logical_block_size = 512 (512 B) |
173 | + BdrvRequestPadding *pad) | 155 | physical_block_size = 512 (512 B) |
174 | +{ | 156 | min_io_size = 0 (0 B) |
175 | + if (!bdrv_init_padding(bs, *offset, *bytes, pad)) { | 157 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -fdb TEST_DIR/t.qcow2.2 |
176 | + return false; | 158 | dev: floppy, id "" |
177 | + } | 159 | unit = 0 (0x0) |
178 | + | 160 | drive = "floppy0" |
179 | + qemu_iovec_init_extended(&pad->local_qiov, pad->buf, pad->head, | 161 | + backend_defaults = "auto" |
180 | + *qiov, 0, *bytes, | 162 | logical_block_size = 512 (512 B) |
181 | + pad->buf + pad->buf_len - pad->tail, pad->tail); | 163 | physical_block_size = 512 (512 B) |
182 | + *bytes += pad->head + pad->tail; | 164 | min_io_size = 0 (0 B) |
183 | + *offset -= pad->head; | 165 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb |
184 | + *qiov = &pad->local_qiov; | 166 | dev: floppy, id "" |
185 | + | 167 | unit = 1 (0x1) |
186 | + return true; | 168 | drive = "floppy1" |
187 | +} | 169 | + backend_defaults = "auto" |
188 | + | 170 | logical_block_size = 512 (512 B) |
189 | int coroutine_fn bdrv_co_preadv(BdrvChild *child, | 171 | physical_block_size = 512 (512 B) |
190 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, | 172 | min_io_size = 0 (0 B) |
191 | BdrvRequestFlags flags) | 173 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb |
192 | { | 174 | dev: floppy, id "" |
193 | BlockDriverState *bs = child->bs; | 175 | unit = 0 (0x0) |
194 | - BlockDriver *drv = bs->drv; | 176 | drive = "floppy0" |
195 | BdrvTrackedRequest req; | 177 | + backend_defaults = "auto" |
196 | - | 178 | logical_block_size = 512 (512 B) |
197 | - uint64_t align = bs->bl.request_alignment; | 179 | physical_block_size = 512 (512 B) |
198 | - uint8_t *head_buf = NULL; | 180 | min_io_size = 0 (0 B) |
199 | - uint8_t *tail_buf = NULL; | 181 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 |
200 | - QEMUIOVector local_qiov; | 182 | dev: floppy, id "" |
201 | - bool use_local_qiov = false; | 183 | unit = 0 (0x0) |
202 | + BdrvRequestPadding pad; | 184 | drive = "floppy0" |
203 | int ret; | 185 | + backend_defaults = "auto" |
204 | 186 | logical_block_size = 512 (512 B) | |
205 | - trace_bdrv_co_preadv(child->bs, offset, bytes, flags); | 187 | physical_block_size = 512 (512 B) |
206 | - | 188 | min_io_size = 0 (0 B) |
207 | - if (!drv) { | 189 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 |
208 | - return -ENOMEDIUM; | 190 | dev: floppy, id "" |
209 | - } | 191 | unit = 1 (0x1) |
210 | + trace_bdrv_co_preadv(bs, offset, bytes, flags); | 192 | drive = "floppy1" |
211 | 193 | + backend_defaults = "auto" | |
212 | ret = bdrv_check_byte_request(bs, offset, bytes); | 194 | logical_block_size = 512 (512 B) |
213 | if (ret < 0) { | 195 | physical_block_size = 512 (512 B) |
214 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, | 196 | min_io_size = 0 (0 B) |
215 | flags |= BDRV_REQ_COPY_ON_READ; | 197 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2,index=1 |
216 | } | 198 | dev: floppy, id "" |
217 | 199 | unit = 0 (0x0) | |
218 | - /* Align read if necessary by padding qiov */ | 200 | drive = "floppy0" |
219 | - if (offset & (align - 1)) { | 201 | + backend_defaults = "auto" |
220 | - head_buf = qemu_blockalign(bs, align); | 202 | logical_block_size = 512 (512 B) |
221 | - qemu_iovec_init(&local_qiov, qiov->niov + 2); | 203 | physical_block_size = 512 (512 B) |
222 | - qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); | 204 | min_io_size = 0 (0 B) |
223 | - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); | 205 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t |
224 | - use_local_qiov = true; | 206 | dev: floppy, id "" |
225 | - | 207 | unit = 1 (0x1) |
226 | - bytes += offset & (align - 1); | 208 | drive = "floppy1" |
227 | - offset = offset & ~(align - 1); | 209 | + backend_defaults = "auto" |
228 | - } | 210 | logical_block_size = 512 (512 B) |
229 | - | 211 | physical_block_size = 512 (512 B) |
230 | - if ((offset + bytes) & (align - 1)) { | 212 | min_io_size = 0 (0 B) |
231 | - if (!use_local_qiov) { | 213 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=floppy,file=TEST_DIR/t |
232 | - qemu_iovec_init(&local_qiov, qiov->niov + 1); | 214 | dev: floppy, id "" |
233 | - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); | 215 | unit = 0 (0x0) |
234 | - use_local_qiov = true; | 216 | drive = "floppy0" |
235 | - } | 217 | + backend_defaults = "auto" |
236 | - tail_buf = qemu_blockalign(bs, align); | 218 | logical_block_size = 512 (512 B) |
237 | - qemu_iovec_add(&local_qiov, tail_buf, | 219 | physical_block_size = 512 (512 B) |
238 | - align - ((offset + bytes) & (align - 1))); | 220 | min_io_size = 0 (0 B) |
239 | - | 221 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0 |
240 | - bytes = ROUND_UP(bytes, align); | 222 | dev: floppy, id "" |
241 | - } | 223 | unit = 0 (0x0) |
242 | + bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad); | 224 | drive = "none0" |
243 | 225 | + backend_defaults = "auto" | |
244 | tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); | 226 | logical_block_size = 512 (512 B) |
245 | - ret = bdrv_aligned_preadv(child, &req, offset, bytes, align, | 227 | physical_block_size = 512 (512 B) |
246 | - use_local_qiov ? &local_qiov : qiov, | 228 | min_io_size = 0 (0 B) |
247 | - flags); | 229 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,unit=1 |
248 | + ret = bdrv_aligned_preadv(child, &req, offset, bytes, | 230 | dev: floppy, id "" |
249 | + bs->bl.request_alignment, | 231 | unit = 1 (0x1) |
250 | + qiov, flags); | 232 | drive = "none0" |
251 | tracked_request_end(&req); | 233 | + backend_defaults = "auto" |
252 | bdrv_dec_in_flight(bs); | 234 | logical_block_size = 512 (512 B) |
253 | 235 | physical_block_size = 512 (512 B) | |
254 | - if (use_local_qiov) { | 236 | min_io_size = 0 (0 B) |
255 | - qemu_iovec_destroy(&local_qiov); | 237 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco |
256 | - qemu_vfree(head_buf); | 238 | dev: floppy, id "" |
257 | - qemu_vfree(tail_buf); | 239 | unit = 1 (0x1) |
258 | - } | 240 | drive = "none1" |
259 | + bdrv_padding_destroy(&pad); | 241 | + backend_defaults = "auto" |
260 | 242 | logical_block_size = 512 (512 B) | |
261 | return ret; | 243 | physical_block_size = 512 (512 B) |
262 | } | 244 | min_io_size = 0 (0 B) |
263 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, | 245 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qco |
264 | BdrvTrackedRequest *req) | 246 | dev: floppy, id "" |
265 | { | 247 | unit = 0 (0x0) |
266 | BlockDriverState *bs = child->bs; | 248 | drive = "none0" |
267 | - uint8_t *buf = NULL; | 249 | + backend_defaults = "auto" |
268 | QEMUIOVector local_qiov; | 250 | logical_block_size = 512 (512 B) |
269 | uint64_t align = bs->bl.request_alignment; | 251 | physical_block_size = 512 (512 B) |
270 | - unsigned int head_padding_bytes, tail_padding_bytes; | 252 | min_io_size = 0 (0 B) |
271 | int ret = 0; | 253 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl |
272 | + bool padding; | 254 | dev: floppy, id "" |
273 | + BdrvRequestPadding pad; | 255 | unit = 1 (0x1) |
274 | 256 | drive = "none0" | |
275 | - head_padding_bytes = offset & (align - 1); | 257 | + backend_defaults = "auto" |
276 | - tail_padding_bytes = (align - (offset + bytes)) & (align - 1); | 258 | logical_block_size = 512 (512 B) |
277 | - | 259 | physical_block_size = 512 (512 B) |
278 | - | 260 | min_io_size = 0 (0 B) |
279 | - assert(flags & BDRV_REQ_ZERO_WRITE); | 261 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl |
280 | - if (head_padding_bytes || tail_padding_bytes) { | 262 | dev: floppy, id "" |
281 | - buf = qemu_blockalign(bs, align); | 263 | unit = 0 (0x0) |
282 | - qemu_iovec_init_buf(&local_qiov, buf, align); | 264 | drive = "floppy0" |
283 | - } | 265 | + backend_defaults = "auto" |
284 | - if (head_padding_bytes) { | 266 | logical_block_size = 512 (512 B) |
285 | - uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes); | 267 | physical_block_size = 512 (512 B) |
286 | - | 268 | min_io_size = 0 (0 B) |
287 | - /* RMW the unaligned part before head. */ | 269 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl |
288 | + padding = bdrv_init_padding(bs, offset, bytes, &pad); | 270 | dev: floppy, id "" |
289 | + if (padding) { | 271 | unit = 1 (0x1) |
290 | mark_request_serialising(req, align); | 272 | drive = "none0" |
291 | wait_serialising_requests(req); | 273 | + backend_defaults = "auto" |
292 | - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); | 274 | logical_block_size = 512 (512 B) |
293 | - ret = bdrv_aligned_preadv(child, req, offset & ~(align - 1), align, | 275 | physical_block_size = 512 (512 B) |
294 | - align, &local_qiov, 0); | 276 | min_io_size = 0 (0 B) |
295 | - if (ret < 0) { | 277 | @@ -XXX,XX +XXX,XX @@ Testing: -fda TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl |
296 | - goto fail; | 278 | dev: floppy, id "" |
297 | - } | 279 | unit = 0 (0x0) |
298 | - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); | 280 | drive = "floppy0" |
299 | 281 | + backend_defaults = "auto" | |
300 | - memset(buf + head_padding_bytes, 0, zero_bytes); | 282 | logical_block_size = 512 (512 B) |
301 | - ret = bdrv_aligned_pwritev(child, req, offset & ~(align - 1), align, | 283 | physical_block_size = 512 (512 B) |
302 | - align, &local_qiov, | 284 | min_io_size = 0 (0 B) |
303 | - flags & ~BDRV_REQ_ZERO_WRITE); | 285 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl |
304 | - if (ret < 0) { | 286 | dev: floppy, id "" |
305 | - goto fail; | 287 | unit = 0 (0x0) |
306 | + bdrv_padding_rmw_read(child, req, &pad, true); | 288 | drive = "none0" |
307 | + | 289 | + backend_defaults = "auto" |
308 | + if (pad.head || pad.merge_reads) { | 290 | logical_block_size = 512 (512 B) |
309 | + int64_t aligned_offset = offset & ~(align - 1); | 291 | physical_block_size = 512 (512 B) |
310 | + int64_t write_bytes = pad.merge_reads ? pad.buf_len : align; | 292 | min_io_size = 0 (0 B) |
311 | + | 293 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl |
312 | + qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes); | 294 | dev: floppy, id "" |
313 | + ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes, | 295 | unit = 1 (0x1) |
314 | + align, &local_qiov, | 296 | drive = "floppy1" |
315 | + flags & ~BDRV_REQ_ZERO_WRITE); | 297 | + backend_defaults = "auto" |
316 | + if (ret < 0 || pad.merge_reads) { | 298 | logical_block_size = 512 (512 B) |
317 | + /* Error or all work is done */ | 299 | physical_block_size = 512 (512 B) |
318 | + goto out; | 300 | min_io_size = 0 (0 B) |
319 | + } | 301 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl |
320 | + offset += write_bytes - pad.head; | 302 | dev: floppy, id "" |
321 | + bytes -= write_bytes - pad.head; | 303 | unit = 0 (0x0) |
322 | } | 304 | drive = "none0" |
323 | - offset += zero_bytes; | 305 | + backend_defaults = "auto" |
324 | - bytes -= zero_bytes; | 306 | logical_block_size = 512 (512 B) |
325 | } | 307 | physical_block_size = 512 (512 B) |
326 | 308 | min_io_size = 0 (0 B) | |
327 | assert(!bytes || (offset & (align - 1)) == 0); | 309 | @@ -XXX,XX +XXX,XX @@ Testing: -fdb TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.qcow2.2 -device fl |
328 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, | 310 | dev: floppy, id "" |
329 | ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, | 311 | unit = 1 (0x1) |
330 | NULL, flags); | 312 | drive = "floppy1" |
331 | if (ret < 0) { | 313 | + backend_defaults = "auto" |
332 | - goto fail; | 314 | logical_block_size = 512 (512 B) |
333 | + goto out; | 315 | physical_block_size = 512 (512 B) |
334 | } | 316 | min_io_size = 0 (0 B) |
335 | bytes -= aligned_bytes; | 317 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q |
336 | offset += aligned_bytes; | 318 | dev: floppy, id "" |
337 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, | 319 | unit = 1 (0x1) |
338 | 320 | drive = "none0" | |
339 | assert(!bytes || (offset & (align - 1)) == 0); | 321 | + backend_defaults = "auto" |
340 | if (bytes) { | 322 | logical_block_size = 512 (512 B) |
341 | - assert(align == tail_padding_bytes + bytes); | 323 | physical_block_size = 512 (512 B) |
342 | - /* RMW the unaligned part after tail. */ | 324 | min_io_size = 0 (0 B) |
343 | - mark_request_serialising(req, align); | 325 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q |
344 | - wait_serialising_requests(req); | 326 | dev: floppy, id "" |
345 | - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); | 327 | unit = 0 (0x0) |
346 | - ret = bdrv_aligned_preadv(child, req, offset, align, | 328 | drive = "floppy0" |
347 | - align, &local_qiov, 0); | 329 | + backend_defaults = "auto" |
348 | - if (ret < 0) { | 330 | logical_block_size = 512 (512 B) |
349 | - goto fail; | 331 | physical_block_size = 512 (512 B) |
350 | - } | 332 | min_io_size = 0 (0 B) |
351 | - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); | 333 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q |
352 | + assert(align == pad.tail + bytes); | 334 | dev: floppy, id "" |
353 | 335 | unit = 1 (0x1) | |
354 | - memset(buf, 0, bytes); | 336 | drive = "none0" |
355 | + qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align); | 337 | + backend_defaults = "auto" |
356 | ret = bdrv_aligned_pwritev(child, req, offset, align, align, | 338 | logical_block_size = 512 (512 B) |
357 | &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); | 339 | physical_block_size = 512 (512 B) |
358 | } | 340 | min_io_size = 0 (0 B) |
359 | -fail: | 341 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=floppy,file=TEST_DIR/t.qcow2 -drive if=none,file=TEST_DIR/t.q |
360 | - qemu_vfree(buf); | 342 | dev: floppy, id "" |
361 | + | 343 | unit = 0 (0x0) |
362 | +out: | 344 | drive = "floppy0" |
363 | + bdrv_padding_destroy(&pad); | 345 | + backend_defaults = "auto" |
364 | + | 346 | logical_block_size = 512 (512 B) |
365 | return ret; | 347 | physical_block_size = 512 (512 B) |
366 | - | 348 | min_io_size = 0 (0 B) |
367 | } | 349 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -global floppy.drive=none0 -device |
368 | 350 | dev: floppy, id "" | |
369 | /* | 351 | unit = 0 (0x0) |
370 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, | 352 | drive = "none0" |
371 | BlockDriverState *bs = child->bs; | 353 | + backend_defaults = "auto" |
372 | BdrvTrackedRequest req; | 354 | logical_block_size = 512 (512 B) |
373 | uint64_t align = bs->bl.request_alignment; | 355 | physical_block_size = 512 (512 B) |
374 | - uint8_t *head_buf = NULL; | 356 | min_io_size = 0 (0 B) |
375 | - uint8_t *tail_buf = NULL; | 357 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy |
376 | - QEMUIOVector local_qiov; | 358 | dev: floppy, id "" |
377 | - bool use_local_qiov = false; | 359 | unit = 0 (0x0) |
378 | + BdrvRequestPadding pad; | 360 | drive = "" |
379 | int ret; | 361 | + backend_defaults = "auto" |
380 | 362 | logical_block_size = 512 (512 B) | |
381 | trace_bdrv_co_pwritev(child->bs, offset, bytes, flags); | 363 | physical_block_size = 512 (512 B) |
382 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, | 364 | min_io_size = 0 (0 B) |
383 | goto out; | 365 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=120 |
384 | } | 366 | dev: floppy, id "" |
385 | 367 | unit = 0 (0x0) | |
386 | - if (offset & (align - 1)) { | 368 | drive = "" |
387 | - QEMUIOVector head_qiov; | 369 | + backend_defaults = "auto" |
388 | - | 370 | logical_block_size = 512 (512 B) |
389 | + if (bdrv_pad_request(bs, &qiov, &offset, &bytes, &pad)) { | 371 | physical_block_size = 512 (512 B) |
390 | mark_request_serialising(&req, align); | 372 | min_io_size = 0 (0 B) |
391 | wait_serialising_requests(&req); | 373 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=144 |
392 | - | 374 | dev: floppy, id "" |
393 | - head_buf = qemu_blockalign(bs, align); | 375 | unit = 0 (0x0) |
394 | - qemu_iovec_init_buf(&head_qiov, head_buf, align); | 376 | drive = "" |
395 | - | 377 | + backend_defaults = "auto" |
396 | - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_HEAD); | 378 | logical_block_size = 512 (512 B) |
397 | - ret = bdrv_aligned_preadv(child, &req, offset & ~(align - 1), align, | 379 | physical_block_size = 512 (512 B) |
398 | - align, &head_qiov, 0); | 380 | min_io_size = 0 (0 B) |
399 | - if (ret < 0) { | 381 | @@ -XXX,XX +XXX,XX @@ Testing: -device floppy,drive-type=288 |
400 | - goto fail; | 382 | dev: floppy, id "" |
401 | - } | 383 | unit = 0 (0x0) |
402 | - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); | 384 | drive = "" |
403 | - | 385 | + backend_defaults = "auto" |
404 | - qemu_iovec_init(&local_qiov, qiov->niov + 2); | 386 | logical_block_size = 512 (512 B) |
405 | - qemu_iovec_add(&local_qiov, head_buf, offset & (align - 1)); | 387 | physical_block_size = 512 (512 B) |
406 | - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); | 388 | min_io_size = 0 (0 B) |
407 | - use_local_qiov = true; | 389 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t |
408 | - | 390 | dev: floppy, id "" |
409 | - bytes += offset & (align - 1); | 391 | unit = 0 (0x0) |
410 | - offset = offset & ~(align - 1); | 392 | drive = "none0" |
411 | - | 393 | + backend_defaults = "auto" |
412 | - /* We have read the tail already if the request is smaller | 394 | logical_block_size = 512 (512 B) |
413 | - * than one aligned block. | 395 | physical_block_size = 512 (512 B) |
414 | - */ | 396 | min_io_size = 0 (0 B) |
415 | - if (bytes < align) { | 397 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,drive-t |
416 | - qemu_iovec_add(&local_qiov, head_buf + bytes, align - bytes); | 398 | dev: floppy, id "" |
417 | - bytes = align; | 399 | unit = 0 (0x0) |
418 | - } | 400 | drive = "none0" |
419 | - } | 401 | + backend_defaults = "auto" |
420 | - | 402 | logical_block_size = 512 (512 B) |
421 | - if ((offset + bytes) & (align - 1)) { | 403 | physical_block_size = 512 (512 B) |
422 | - QEMUIOVector tail_qiov; | 404 | min_io_size = 0 (0 B) |
423 | - size_t tail_bytes; | 405 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,logical |
424 | - bool waited; | 406 | dev: floppy, id "" |
425 | - | 407 | unit = 0 (0x0) |
426 | - mark_request_serialising(&req, align); | 408 | drive = "none0" |
427 | - waited = wait_serialising_requests(&req); | 409 | + backend_defaults = "auto" |
428 | - assert(!waited || !use_local_qiov); | 410 | logical_block_size = 512 (512 B) |
429 | - | 411 | physical_block_size = 512 (512 B) |
430 | - tail_buf = qemu_blockalign(bs, align); | 412 | min_io_size = 0 (0 B) |
431 | - qemu_iovec_init_buf(&tail_qiov, tail_buf, align); | 413 | @@ -XXX,XX +XXX,XX @@ Testing: -drive if=none,file=TEST_DIR/t.qcow2 -device floppy,drive=none0,physica |
432 | - | 414 | dev: floppy, id "" |
433 | - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); | 415 | unit = 0 (0x0) |
434 | - ret = bdrv_aligned_preadv(child, &req, (offset + bytes) & ~(align - 1), | 416 | drive = "none0" |
435 | - align, align, &tail_qiov, 0); | 417 | + backend_defaults = "auto" |
436 | - if (ret < 0) { | 418 | logical_block_size = 512 (512 B) |
437 | - goto fail; | 419 | physical_block_size = 512 (512 B) |
438 | - } | 420 | min_io_size = 0 (0 B) |
439 | - bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); | ||
440 | - | ||
441 | - if (!use_local_qiov) { | ||
442 | - qemu_iovec_init(&local_qiov, qiov->niov + 1); | ||
443 | - qemu_iovec_concat(&local_qiov, qiov, 0, qiov->size); | ||
444 | - use_local_qiov = true; | ||
445 | - } | ||
446 | - | ||
447 | - tail_bytes = (offset + bytes) & (align - 1); | ||
448 | - qemu_iovec_add(&local_qiov, tail_buf + tail_bytes, align - tail_bytes); | ||
449 | - | ||
450 | - bytes = ROUND_UP(bytes, align); | ||
451 | + bdrv_padding_rmw_read(child, &req, &pad, false); | ||
452 | } | ||
453 | |||
454 | ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, | ||
455 | - use_local_qiov ? &local_qiov : qiov, | ||
456 | - flags); | ||
457 | + qiov, flags); | ||
458 | |||
459 | -fail: | ||
460 | + bdrv_padding_destroy(&pad); | ||
461 | |||
462 | - if (use_local_qiov) { | ||
463 | - qemu_iovec_destroy(&local_qiov); | ||
464 | - } | ||
465 | - qemu_vfree(head_buf); | ||
466 | - qemu_vfree(tail_buf); | ||
467 | out: | ||
468 | tracked_request_end(&req); | ||
469 | bdrv_dec_in_flight(bs); | ||
470 | + | ||
471 | return ret; | ||
472 | } | ||
473 | |||
474 | -- | 421 | -- |
475 | 2.21.0 | 422 | 2.31.1 |
476 | 423 | ||
477 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Add handlers supporting qiov_offset parameter: | ||
4 | bdrv_co_preadv_part | ||
5 | bdrv_co_pwritev_part | ||
6 | bdrv_co_pwritev_compressed_part | ||
7 | This is used to reduce need of defining local_qiovs and hd_qiovs in all | ||
8 | corners of block layer code. The following patches will increase usage | ||
9 | of this new API part by part. | ||
10 | |||
11 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
12 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | Message-id: 20190604161514.262241-5-vsementsov@virtuozzo.com | ||
14 | Message-Id: <20190604161514.262241-5-vsementsov@virtuozzo.com> | ||
15 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
16 | --- | ||
17 | include/block/block_int.h | 15 ++++++ | ||
18 | block/backup.c | 2 +- | ||
19 | block/io.c | 96 +++++++++++++++++++++++++++++++-------- | ||
20 | qemu-img.c | 4 +- | ||
21 | 4 files changed, 95 insertions(+), 22 deletions(-) | ||
22 | |||
23 | diff --git a/include/block/block_int.h b/include/block/block_int.h | ||
24 | index XXXXXXX..XXXXXXX 100644 | ||
25 | --- a/include/block/block_int.h | ||
26 | +++ b/include/block/block_int.h | ||
27 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
28 | */ | ||
29 | int coroutine_fn (*bdrv_co_preadv)(BlockDriverState *bs, | ||
30 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); | ||
31 | + int coroutine_fn (*bdrv_co_preadv_part)(BlockDriverState *bs, | ||
32 | + uint64_t offset, uint64_t bytes, | ||
33 | + QEMUIOVector *qiov, size_t qiov_offset, int flags); | ||
34 | int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, | ||
35 | int64_t sector_num, int nb_sectors, QEMUIOVector *qiov, int flags); | ||
36 | /** | ||
37 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
38 | */ | ||
39 | int coroutine_fn (*bdrv_co_pwritev)(BlockDriverState *bs, | ||
40 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, int flags); | ||
41 | + int coroutine_fn (*bdrv_co_pwritev_part)(BlockDriverState *bs, | ||
42 | + uint64_t offset, uint64_t bytes, | ||
43 | + QEMUIOVector *qiov, size_t qiov_offset, int flags); | ||
44 | |||
45 | /* | ||
46 | * Efficiently zero a region of the disk image. Typically an image format | ||
47 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
48 | |||
49 | int coroutine_fn (*bdrv_co_pwritev_compressed)(BlockDriverState *bs, | ||
50 | uint64_t offset, uint64_t bytes, QEMUIOVector *qiov); | ||
51 | + int coroutine_fn (*bdrv_co_pwritev_compressed_part)(BlockDriverState *bs, | ||
52 | + uint64_t offset, uint64_t bytes, QEMUIOVector *qiov, | ||
53 | + size_t qiov_offset); | ||
54 | |||
55 | int (*bdrv_snapshot_create)(BlockDriverState *bs, | ||
56 | QEMUSnapshotInfo *sn_info); | ||
57 | @@ -XXX,XX +XXX,XX @@ struct BlockDriver { | ||
58 | const char *const *strong_runtime_opts; | ||
59 | }; | ||
60 | |||
61 | +static inline bool block_driver_can_compress(BlockDriver *drv) | ||
62 | +{ | ||
63 | + return drv->bdrv_co_pwritev_compressed || | ||
64 | + drv->bdrv_co_pwritev_compressed_part; | ||
65 | +} | ||
66 | + | ||
67 | typedef struct BlockLimits { | ||
68 | /* Alignment requirement, in bytes, for offset/length of I/O | ||
69 | * requests. Must be a power of 2 less than INT_MAX; defaults to | ||
70 | diff --git a/block/backup.c b/block/backup.c | ||
71 | index XXXXXXX..XXXXXXX 100644 | ||
72 | --- a/block/backup.c | ||
73 | +++ b/block/backup.c | ||
74 | @@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs, | ||
75 | return NULL; | ||
76 | } | ||
77 | |||
78 | - if (compress && target->drv->bdrv_co_pwritev_compressed == NULL) { | ||
79 | + if (compress && !block_driver_can_compress(target->drv)) { | ||
80 | error_setg(errp, "Compression is not supported for this drive %s", | ||
81 | bdrv_get_device_name(target)); | ||
82 | return NULL; | ||
83 | diff --git a/block/io.c b/block/io.c | ||
84 | index XXXXXXX..XXXXXXX 100644 | ||
85 | --- a/block/io.c | ||
86 | +++ b/block/io.c | ||
87 | @@ -XXX,XX +XXX,XX @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) | ||
88 | |||
89 | /* Default alignment based on whether driver has byte interface */ | ||
90 | bs->bl.request_alignment = (drv->bdrv_co_preadv || | ||
91 | - drv->bdrv_aio_preadv) ? 1 : 512; | ||
92 | + drv->bdrv_aio_preadv || | ||
93 | + drv->bdrv_co_preadv_part) ? 1 : 512; | ||
94 | |||
95 | /* Take some limits from the children as a default */ | ||
96 | if (bs->file) { | ||
97 | @@ -XXX,XX +XXX,XX @@ static void bdrv_co_io_em_complete(void *opaque, int ret) | ||
98 | |||
99 | static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, | ||
100 | uint64_t offset, uint64_t bytes, | ||
101 | - QEMUIOVector *qiov, int flags) | ||
102 | + QEMUIOVector *qiov, | ||
103 | + size_t qiov_offset, int flags) | ||
104 | { | ||
105 | BlockDriver *drv = bs->drv; | ||
106 | int64_t sector_num; | ||
107 | unsigned int nb_sectors; | ||
108 | + QEMUIOVector local_qiov; | ||
109 | + int ret; | ||
110 | |||
111 | assert(!(flags & ~BDRV_REQ_MASK)); | ||
112 | assert(!(flags & BDRV_REQ_NO_FALLBACK)); | ||
113 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, | ||
114 | return -ENOMEDIUM; | ||
115 | } | ||
116 | |||
117 | + if (drv->bdrv_co_preadv_part) { | ||
118 | + return drv->bdrv_co_preadv_part(bs, offset, bytes, qiov, qiov_offset, | ||
119 | + flags); | ||
120 | + } | ||
121 | + | ||
122 | + if (qiov_offset > 0 || bytes != qiov->size) { | ||
123 | + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); | ||
124 | + qiov = &local_qiov; | ||
125 | + } | ||
126 | + | ||
127 | if (drv->bdrv_co_preadv) { | ||
128 | - return drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags); | ||
129 | + ret = drv->bdrv_co_preadv(bs, offset, bytes, qiov, flags); | ||
130 | + goto out; | ||
131 | } | ||
132 | |||
133 | if (drv->bdrv_aio_preadv) { | ||
134 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, | ||
135 | acb = drv->bdrv_aio_preadv(bs, offset, bytes, qiov, flags, | ||
136 | bdrv_co_io_em_complete, &co); | ||
137 | if (acb == NULL) { | ||
138 | - return -EIO; | ||
139 | + ret = -EIO; | ||
140 | + goto out; | ||
141 | } else { | ||
142 | qemu_coroutine_yield(); | ||
143 | - return co.ret; | ||
144 | + ret = co.ret; | ||
145 | + goto out; | ||
146 | } | ||
147 | } | ||
148 | |||
149 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, | ||
150 | assert(bytes <= BDRV_REQUEST_MAX_BYTES); | ||
151 | assert(drv->bdrv_co_readv); | ||
152 | |||
153 | - return drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); | ||
154 | + ret = drv->bdrv_co_readv(bs, sector_num, nb_sectors, qiov); | ||
155 | + | ||
156 | +out: | ||
157 | + if (qiov == &local_qiov) { | ||
158 | + qemu_iovec_destroy(&local_qiov); | ||
159 | + } | ||
160 | + | ||
161 | + return ret; | ||
162 | } | ||
163 | |||
164 | static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, | ||
165 | uint64_t offset, uint64_t bytes, | ||
166 | - QEMUIOVector *qiov, int flags) | ||
167 | + QEMUIOVector *qiov, | ||
168 | + size_t qiov_offset, int flags) | ||
169 | { | ||
170 | BlockDriver *drv = bs->drv; | ||
171 | int64_t sector_num; | ||
172 | unsigned int nb_sectors; | ||
173 | + QEMUIOVector local_qiov; | ||
174 | int ret; | ||
175 | |||
176 | assert(!(flags & ~BDRV_REQ_MASK)); | ||
177 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, | ||
178 | return -ENOMEDIUM; | ||
179 | } | ||
180 | |||
181 | + if (drv->bdrv_co_pwritev_part) { | ||
182 | + ret = drv->bdrv_co_pwritev_part(bs, offset, bytes, qiov, qiov_offset, | ||
183 | + flags & bs->supported_write_flags); | ||
184 | + flags &= ~bs->supported_write_flags; | ||
185 | + goto emulate_flags; | ||
186 | + } | ||
187 | + | ||
188 | + if (qiov_offset > 0 || bytes != qiov->size) { | ||
189 | + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); | ||
190 | + qiov = &local_qiov; | ||
191 | + } | ||
192 | + | ||
193 | if (drv->bdrv_co_pwritev) { | ||
194 | ret = drv->bdrv_co_pwritev(bs, offset, bytes, qiov, | ||
195 | flags & bs->supported_write_flags); | ||
196 | @@ -XXX,XX +XXX,XX @@ emulate_flags: | ||
197 | ret = bdrv_co_flush(bs); | ||
198 | } | ||
199 | |||
200 | + if (qiov == &local_qiov) { | ||
201 | + qemu_iovec_destroy(&local_qiov); | ||
202 | + } | ||
203 | + | ||
204 | return ret; | ||
205 | } | ||
206 | |||
207 | static int coroutine_fn | ||
208 | bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | ||
209 | - uint64_t bytes, QEMUIOVector *qiov) | ||
210 | + uint64_t bytes, QEMUIOVector *qiov, | ||
211 | + size_t qiov_offset) | ||
212 | { | ||
213 | BlockDriver *drv = bs->drv; | ||
214 | + QEMUIOVector local_qiov; | ||
215 | + int ret; | ||
216 | |||
217 | if (!drv) { | ||
218 | return -ENOMEDIUM; | ||
219 | } | ||
220 | |||
221 | - if (!drv->bdrv_co_pwritev_compressed) { | ||
222 | + if (!block_driver_can_compress(drv)) { | ||
223 | return -ENOTSUP; | ||
224 | } | ||
225 | |||
226 | - return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov); | ||
227 | + if (drv->bdrv_co_pwritev_compressed_part) { | ||
228 | + return drv->bdrv_co_pwritev_compressed_part(bs, offset, bytes, | ||
229 | + qiov, qiov_offset); | ||
230 | + } | ||
231 | + | ||
232 | + if (qiov_offset == 0) { | ||
233 | + return drv->bdrv_co_pwritev_compressed(bs, offset, bytes, qiov); | ||
234 | + } | ||
235 | + | ||
236 | + qemu_iovec_init_slice(&local_qiov, qiov, qiov_offset, bytes); | ||
237 | + ret = drv->bdrv_co_pwritev_compressed(bs, offset, bytes, &local_qiov); | ||
238 | + qemu_iovec_destroy(&local_qiov); | ||
239 | + | ||
240 | + return ret; | ||
241 | } | ||
242 | |||
243 | static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
244 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
245 | qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum); | ||
246 | |||
247 | ret = bdrv_driver_preadv(bs, cluster_offset, pnum, | ||
248 | - &local_qiov, 0); | ||
249 | + &local_qiov, 0, 0); | ||
250 | if (ret < 0) { | ||
251 | goto err; | ||
252 | } | ||
253 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
254 | * necessary to flush even in cache=writethrough mode. | ||
255 | */ | ||
256 | ret = bdrv_driver_pwritev(bs, cluster_offset, pnum, | ||
257 | - &local_qiov, | ||
258 | + &local_qiov, 0, | ||
259 | BDRV_REQ_WRITE_UNCHANGED); | ||
260 | } | ||
261 | |||
262 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
263 | qemu_iovec_init(&local_qiov, qiov->niov); | ||
264 | qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes); | ||
265 | ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size, | ||
266 | - &local_qiov, 0); | ||
267 | + &local_qiov, 0, 0); | ||
268 | qemu_iovec_destroy(&local_qiov); | ||
269 | if (ret < 0) { | ||
270 | goto err; | ||
271 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, | ||
272 | |||
273 | max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); | ||
274 | if (bytes <= max_bytes && bytes <= max_transfer) { | ||
275 | - ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0); | ||
276 | + ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0, 0); | ||
277 | goto out; | ||
278 | } | ||
279 | |||
280 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, | ||
281 | qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num); | ||
282 | |||
283 | ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining, | ||
284 | - num, &local_qiov, 0); | ||
285 | + num, &local_qiov, 0, 0); | ||
286 | max_bytes -= num; | ||
287 | qemu_iovec_destroy(&local_qiov); | ||
288 | } else { | ||
289 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, | ||
290 | } | ||
291 | qemu_iovec_init_buf(&qiov, buf, num); | ||
292 | |||
293 | - ret = bdrv_driver_pwritev(bs, offset, num, &qiov, write_flags); | ||
294 | + ret = bdrv_driver_pwritev(bs, offset, num, &qiov, 0, write_flags); | ||
295 | |||
296 | /* Keep bounce buffer around if it is big enough for all | ||
297 | * all future requests. | ||
298 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, | ||
299 | bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO); | ||
300 | ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags); | ||
301 | } else if (flags & BDRV_REQ_WRITE_COMPRESSED) { | ||
302 | - ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov); | ||
303 | + ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov, 0); | ||
304 | } else if (bytes <= max_transfer) { | ||
305 | bdrv_debug_event(bs, BLKDBG_PWRITEV); | ||
306 | - ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, flags); | ||
307 | + ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, 0, flags); | ||
308 | } else { | ||
309 | bdrv_debug_event(bs, BLKDBG_PWRITEV); | ||
310 | while (bytes_remaining) { | ||
311 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, | ||
312 | qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num); | ||
313 | |||
314 | ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining, | ||
315 | - num, &local_qiov, local_flags); | ||
316 | + num, &local_qiov, 0, local_flags); | ||
317 | qemu_iovec_destroy(&local_qiov); | ||
318 | if (ret < 0) { | ||
319 | break; | ||
320 | diff --git a/qemu-img.c b/qemu-img.c | ||
321 | index XXXXXXX..XXXXXXX 100644 | ||
322 | --- a/qemu-img.c | ||
323 | +++ b/qemu-img.c | ||
324 | @@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv) | ||
325 | const char *preallocation = | ||
326 | qemu_opt_get(opts, BLOCK_OPT_PREALLOC); | ||
327 | |||
328 | - if (drv && !drv->bdrv_co_pwritev_compressed) { | ||
329 | + if (drv && !block_driver_can_compress(drv)) { | ||
330 | error_report("Compression not supported for this file format"); | ||
331 | ret = -1; | ||
332 | goto out; | ||
333 | @@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv) | ||
334 | } | ||
335 | out_bs = blk_bs(s.target); | ||
336 | |||
337 | - if (s.compressed && !out_bs->drv->bdrv_co_pwritev_compressed) { | ||
338 | + if (s.compressed && !block_driver_can_compress(out_bs->drv)) { | ||
339 | error_report("Compression not supported for this file format"); | ||
340 | ret = -1; | ||
341 | goto out; | ||
342 | -- | ||
343 | 2.21.0 | ||
344 | |||
345 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Use and support new API in bdrv_co_do_copy_on_readv. Note that in case | ||
4 | of allocated-in-top we need to shrink read size to MIN(..) by hand, as | ||
5 | pre-patch this was actually done implicitly by qemu_iovec_concat (and | ||
6 | we used local_qiov.size). | ||
7 | |||
8 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
9 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | Message-id: 20190604161514.262241-6-vsementsov@virtuozzo.com | ||
11 | Message-Id: <20190604161514.262241-6-vsementsov@virtuozzo.com> | ||
12 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
13 | --- | ||
14 | block/io.c | 18 +++++++++--------- | ||
15 | 1 file changed, 9 insertions(+), 9 deletions(-) | ||
16 | |||
17 | diff --git a/block/io.c b/block/io.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/block/io.c | ||
20 | +++ b/block/io.c | ||
21 | @@ -XXX,XX +XXX,XX @@ bdrv_driver_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | ||
22 | |||
23 | static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
24 | int64_t offset, unsigned int bytes, QEMUIOVector *qiov, | ||
25 | - int flags) | ||
26 | + size_t qiov_offset, int flags) | ||
27 | { | ||
28 | BlockDriverState *bs = child->bs; | ||
29 | |||
30 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
31 | void *bounce_buffer; | ||
32 | |||
33 | BlockDriver *drv = bs->drv; | ||
34 | - QEMUIOVector local_qiov; | ||
35 | int64_t cluster_offset; | ||
36 | int64_t cluster_bytes; | ||
37 | size_t skip_bytes; | ||
38 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
39 | assert(skip_bytes < pnum); | ||
40 | |||
41 | if (ret <= 0) { | ||
42 | + QEMUIOVector local_qiov; | ||
43 | + | ||
44 | /* Must copy-on-read; use the bounce buffer */ | ||
45 | pnum = MIN(pnum, MAX_BOUNCE_BUFFER); | ||
46 | qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum); | ||
47 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
48 | } | ||
49 | |||
50 | if (!(flags & BDRV_REQ_PREFETCH)) { | ||
51 | - qemu_iovec_from_buf(qiov, progress, bounce_buffer + skip_bytes, | ||
52 | + qemu_iovec_from_buf(qiov, qiov_offset + progress, | ||
53 | + bounce_buffer + skip_bytes, | ||
54 | pnum - skip_bytes); | ||
55 | } | ||
56 | } else if (!(flags & BDRV_REQ_PREFETCH)) { | ||
57 | /* Read directly into the destination */ | ||
58 | - qemu_iovec_init(&local_qiov, qiov->niov); | ||
59 | - qemu_iovec_concat(&local_qiov, qiov, progress, pnum - skip_bytes); | ||
60 | - ret = bdrv_driver_preadv(bs, offset + progress, local_qiov.size, | ||
61 | - &local_qiov, 0, 0); | ||
62 | - qemu_iovec_destroy(&local_qiov); | ||
63 | + ret = bdrv_driver_preadv(bs, offset + progress, | ||
64 | + MIN(pnum - skip_bytes, bytes - progress), | ||
65 | + qiov, qiov_offset + progress, 0); | ||
66 | if (ret < 0) { | ||
67 | goto err; | ||
68 | } | ||
69 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, | ||
70 | } | ||
71 | |||
72 | if (!ret || pnum != bytes) { | ||
73 | - ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov, flags); | ||
74 | + ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov, 0, flags); | ||
75 | goto out; | ||
76 | } else if (flags & BDRV_REQ_PREFETCH) { | ||
77 | goto out; | ||
78 | -- | ||
79 | 2.21.0 | ||
80 | |||
81 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Allocate bounce_buffer only if it is really needed. Also, sub-optimize | ||
4 | allocation size (why not?). | ||
5 | |||
6 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
7 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Message-id: 20190604161514.262241-7-vsementsov@virtuozzo.com | ||
9 | Message-Id: <20190604161514.262241-7-vsementsov@virtuozzo.com> | ||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | --- | ||
12 | block/io.c | 21 ++++++++++++--------- | ||
13 | 1 file changed, 12 insertions(+), 9 deletions(-) | ||
14 | |||
15 | diff --git a/block/io.c b/block/io.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/block/io.c | ||
18 | +++ b/block/io.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
20 | * modifying the image file. This is critical for zero-copy guest I/O | ||
21 | * where anything might happen inside guest memory. | ||
22 | */ | ||
23 | - void *bounce_buffer; | ||
24 | + void *bounce_buffer = NULL; | ||
25 | |||
26 | BlockDriver *drv = bs->drv; | ||
27 | int64_t cluster_offset; | ||
28 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
29 | trace_bdrv_co_do_copy_on_readv(bs, offset, bytes, | ||
30 | cluster_offset, cluster_bytes); | ||
31 | |||
32 | - bounce_buffer = qemu_try_blockalign(bs, | ||
33 | - MIN(MIN(max_transfer, cluster_bytes), | ||
34 | - MAX_BOUNCE_BUFFER)); | ||
35 | - if (bounce_buffer == NULL) { | ||
36 | - ret = -ENOMEM; | ||
37 | - goto err; | ||
38 | - } | ||
39 | - | ||
40 | while (cluster_bytes) { | ||
41 | int64_t pnum; | ||
42 | |||
43 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_copy_on_readv(BdrvChild *child, | ||
44 | |||
45 | /* Must copy-on-read; use the bounce buffer */ | ||
46 | pnum = MIN(pnum, MAX_BOUNCE_BUFFER); | ||
47 | + if (!bounce_buffer) { | ||
48 | + int64_t max_we_need = MAX(pnum, cluster_bytes - pnum); | ||
49 | + int64_t max_allowed = MIN(max_transfer, MAX_BOUNCE_BUFFER); | ||
50 | + int64_t bounce_buffer_len = MIN(max_we_need, max_allowed); | ||
51 | + | ||
52 | + bounce_buffer = qemu_try_blockalign(bs, bounce_buffer_len); | ||
53 | + if (!bounce_buffer) { | ||
54 | + ret = -ENOMEM; | ||
55 | + goto err; | ||
56 | + } | ||
57 | + } | ||
58 | qemu_iovec_init_buf(&local_qiov, bounce_buffer, pnum); | ||
59 | |||
60 | ret = bdrv_driver_preadv(bs, cluster_offset, pnum, | ||
61 | -- | ||
62 | 2.21.0 | ||
63 | |||
64 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Use and support new API in bdrv_co_do_copy_on_readv. | ||
4 | |||
5 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
6 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Message-id: 20190604161514.262241-8-vsementsov@virtuozzo.com | ||
8 | Message-Id: <20190604161514.262241-8-vsementsov@virtuozzo.com> | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | ||
11 | block/io.c | 21 ++++++++------------- | ||
12 | 1 file changed, 8 insertions(+), 13 deletions(-) | ||
13 | |||
14 | diff --git a/block/io.c b/block/io.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/block/io.c | ||
17 | +++ b/block/io.c | ||
18 | @@ -XXX,XX +XXX,XX @@ err: | ||
19 | */ | ||
20 | static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, | ||
21 | BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, | ||
22 | - int64_t align, QEMUIOVector *qiov, int flags) | ||
23 | + int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags) | ||
24 | { | ||
25 | BlockDriverState *bs = child->bs; | ||
26 | int64_t total_bytes, max_bytes; | ||
27 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, | ||
28 | assert(is_power_of_2(align)); | ||
29 | assert((offset & (align - 1)) == 0); | ||
30 | assert((bytes & (align - 1)) == 0); | ||
31 | - assert(!qiov || bytes == qiov->size); | ||
32 | assert((bs->open_flags & BDRV_O_NO_IO) == 0); | ||
33 | max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), | ||
34 | align); | ||
35 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, | ||
36 | } | ||
37 | |||
38 | if (!ret || pnum != bytes) { | ||
39 | - ret = bdrv_co_do_copy_on_readv(child, offset, bytes, qiov, 0, flags); | ||
40 | + ret = bdrv_co_do_copy_on_readv(child, offset, bytes, | ||
41 | + qiov, qiov_offset, flags); | ||
42 | goto out; | ||
43 | } else if (flags & BDRV_REQ_PREFETCH) { | ||
44 | goto out; | ||
45 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, | ||
46 | |||
47 | max_bytes = ROUND_UP(MAX(0, total_bytes - offset), align); | ||
48 | if (bytes <= max_bytes && bytes <= max_transfer) { | ||
49 | - ret = bdrv_driver_preadv(bs, offset, bytes, qiov, 0, 0); | ||
50 | + ret = bdrv_driver_preadv(bs, offset, bytes, qiov, qiov_offset, 0); | ||
51 | goto out; | ||
52 | } | ||
53 | |||
54 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_preadv(BdrvChild *child, | ||
55 | int num; | ||
56 | |||
57 | if (max_bytes) { | ||
58 | - QEMUIOVector local_qiov; | ||
59 | - | ||
60 | num = MIN(bytes_remaining, MIN(max_bytes, max_transfer)); | ||
61 | assert(num); | ||
62 | - qemu_iovec_init(&local_qiov, qiov->niov); | ||
63 | - qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num); | ||
64 | |||
65 | ret = bdrv_driver_preadv(bs, offset + bytes - bytes_remaining, | ||
66 | - num, &local_qiov, 0, 0); | ||
67 | + num, qiov, bytes - bytes_remaining, 0); | ||
68 | max_bytes -= num; | ||
69 | - qemu_iovec_destroy(&local_qiov); | ||
70 | } else { | ||
71 | num = bytes_remaining; | ||
72 | ret = qemu_iovec_memset(qiov, bytes - bytes_remaining, 0, | ||
73 | @@ -XXX,XX +XXX,XX @@ static int bdrv_padding_rmw_read(BdrvChild *child, | ||
74 | bdrv_debug_event(bs, BLKDBG_PWRITEV_RMW_TAIL); | ||
75 | } | ||
76 | ret = bdrv_aligned_preadv(child, req, req->overlap_offset, bytes, | ||
77 | - align, &local_qiov, 0); | ||
78 | + align, &local_qiov, 0, 0); | ||
79 | if (ret < 0) { | ||
80 | return ret; | ||
81 | } | ||
82 | @@ -XXX,XX +XXX,XX @@ static int bdrv_padding_rmw_read(BdrvChild *child, | ||
83 | ret = bdrv_aligned_preadv( | ||
84 | child, req, | ||
85 | req->overlap_offset + req->overlap_bytes - align, | ||
86 | - align, align, &local_qiov, 0); | ||
87 | + align, align, &local_qiov, 0, 0); | ||
88 | if (ret < 0) { | ||
89 | return ret; | ||
90 | } | ||
91 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child, | ||
92 | tracked_request_begin(&req, bs, offset, bytes, BDRV_TRACKED_READ); | ||
93 | ret = bdrv_aligned_preadv(child, &req, offset, bytes, | ||
94 | bs->bl.request_alignment, | ||
95 | - qiov, flags); | ||
96 | + qiov, 0, flags); | ||
97 | tracked_request_end(&req); | ||
98 | bdrv_dec_in_flight(bs); | ||
99 | |||
100 | -- | ||
101 | 2.21.0 | ||
102 | |||
103 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Akihiko Odaki <akihiko.odaki@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Use and support new API in bdrv_aligned_pwritev. | 3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@gmail.com> |
4 | 4 | Message-id: 20210705130458.97642-3-akihiko.odaki@gmail.com | |
5 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
6 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Message-id: 20190604161514.262241-9-vsementsov@virtuozzo.com | ||
8 | Message-Id: <20190604161514.262241-9-vsementsov@virtuozzo.com> | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | 5 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> |
10 | --- | 6 | --- |
11 | block/io.c | 27 +++++++++++++-------------- | 7 | block/io.c | 2 ++ |
12 | 1 file changed, 13 insertions(+), 14 deletions(-) | 8 | 1 file changed, 2 insertions(+) |
13 | 9 | ||
14 | diff --git a/block/io.c b/block/io.c | 10 | diff --git a/block/io.c b/block/io.c |
15 | index XXXXXXX..XXXXXXX 100644 | 11 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/io.c | 12 | --- a/block/io.c |
17 | +++ b/block/io.c | 13 | +++ b/block/io.c |
18 | @@ -XXX,XX +XXX,XX @@ bdrv_co_write_req_finish(BdrvChild *child, int64_t offset, uint64_t bytes, | 14 | @@ -XXX,XX +XXX,XX @@ void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll) |
19 | */ | 15 | |
20 | static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, | 16 | static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src) |
21 | BdrvTrackedRequest *req, int64_t offset, unsigned int bytes, | ||
22 | - int64_t align, QEMUIOVector *qiov, int flags) | ||
23 | + int64_t align, QEMUIOVector *qiov, size_t qiov_offset, int flags) | ||
24 | { | 17 | { |
25 | BlockDriverState *bs = child->bs; | 18 | + dst->pdiscard_alignment = MAX(dst->pdiscard_alignment, |
26 | BlockDriver *drv = bs->drv; | 19 | + src->pdiscard_alignment); |
27 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, | 20 | dst->opt_transfer = MAX(dst->opt_transfer, src->opt_transfer); |
28 | assert(is_power_of_2(align)); | 21 | dst->max_transfer = MIN_NON_ZERO(dst->max_transfer, src->max_transfer); |
29 | assert((offset & (align - 1)) == 0); | 22 | dst->max_hw_transfer = MIN_NON_ZERO(dst->max_hw_transfer, |
30 | assert((bytes & (align - 1)) == 0); | ||
31 | - assert(!qiov || bytes == qiov->size); | ||
32 | + assert(!qiov || qiov_offset + bytes <= qiov->size); | ||
33 | max_transfer = QEMU_ALIGN_DOWN(MIN_NON_ZERO(bs->bl.max_transfer, INT_MAX), | ||
34 | align); | ||
35 | |||
36 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, | ||
37 | |||
38 | if (!ret && bs->detect_zeroes != BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF && | ||
39 | !(flags & BDRV_REQ_ZERO_WRITE) && drv->bdrv_co_pwrite_zeroes && | ||
40 | - qemu_iovec_is_zero(qiov, 0, qiov->size)) { | ||
41 | + qemu_iovec_is_zero(qiov, qiov_offset, bytes)) { | ||
42 | flags |= BDRV_REQ_ZERO_WRITE; | ||
43 | if (bs->detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP) { | ||
44 | flags |= BDRV_REQ_MAY_UNMAP; | ||
45 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, | ||
46 | bdrv_debug_event(bs, BLKDBG_PWRITEV_ZERO); | ||
47 | ret = bdrv_co_do_pwrite_zeroes(bs, offset, bytes, flags); | ||
48 | } else if (flags & BDRV_REQ_WRITE_COMPRESSED) { | ||
49 | - ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, qiov, 0); | ||
50 | + ret = bdrv_driver_pwritev_compressed(bs, offset, bytes, | ||
51 | + qiov, qiov_offset); | ||
52 | } else if (bytes <= max_transfer) { | ||
53 | bdrv_debug_event(bs, BLKDBG_PWRITEV); | ||
54 | - ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, 0, flags); | ||
55 | + ret = bdrv_driver_pwritev(bs, offset, bytes, qiov, qiov_offset, flags); | ||
56 | } else { | ||
57 | bdrv_debug_event(bs, BLKDBG_PWRITEV); | ||
58 | while (bytes_remaining) { | ||
59 | int num = MIN(bytes_remaining, max_transfer); | ||
60 | - QEMUIOVector local_qiov; | ||
61 | int local_flags = flags; | ||
62 | |||
63 | assert(num); | ||
64 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_aligned_pwritev(BdrvChild *child, | ||
65 | * need to flush on the last iteration */ | ||
66 | local_flags &= ~BDRV_REQ_FUA; | ||
67 | } | ||
68 | - qemu_iovec_init(&local_qiov, qiov->niov); | ||
69 | - qemu_iovec_concat(&local_qiov, qiov, bytes - bytes_remaining, num); | ||
70 | |||
71 | ret = bdrv_driver_pwritev(bs, offset + bytes - bytes_remaining, | ||
72 | - num, &local_qiov, 0, local_flags); | ||
73 | - qemu_iovec_destroy(&local_qiov); | ||
74 | + num, qiov, bytes - bytes_remaining, | ||
75 | + local_flags); | ||
76 | if (ret < 0) { | ||
77 | break; | ||
78 | } | ||
79 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, | ||
80 | |||
81 | qemu_iovec_init_buf(&local_qiov, pad.buf, write_bytes); | ||
82 | ret = bdrv_aligned_pwritev(child, req, aligned_offset, write_bytes, | ||
83 | - align, &local_qiov, | ||
84 | + align, &local_qiov, 0, | ||
85 | flags & ~BDRV_REQ_ZERO_WRITE); | ||
86 | if (ret < 0 || pad.merge_reads) { | ||
87 | /* Error or all work is done */ | ||
88 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, | ||
89 | /* Write the aligned part in the middle. */ | ||
90 | uint64_t aligned_bytes = bytes & ~(align - 1); | ||
91 | ret = bdrv_aligned_pwritev(child, req, offset, aligned_bytes, align, | ||
92 | - NULL, flags); | ||
93 | + NULL, 0, flags); | ||
94 | if (ret < 0) { | ||
95 | goto out; | ||
96 | } | ||
97 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_zero_pwritev(BdrvChild *child, | ||
98 | |||
99 | qemu_iovec_init_buf(&local_qiov, pad.tail_buf, align); | ||
100 | ret = bdrv_aligned_pwritev(child, req, offset, align, align, | ||
101 | - &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); | ||
102 | + &local_qiov, 0, | ||
103 | + flags & ~BDRV_REQ_ZERO_WRITE); | ||
104 | } | ||
105 | |||
106 | out: | ||
107 | @@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child, | ||
108 | } | ||
109 | |||
110 | ret = bdrv_aligned_pwritev(child, &req, offset, bytes, align, | ||
111 | - qiov, flags); | ||
112 | + qiov, 0, flags); | ||
113 | |||
114 | bdrv_padding_destroy(&pad); | ||
115 | |||
116 | -- | 23 | -- |
117 | 2.21.0 | 24 | 2.31.1 |
118 | 25 | ||
119 | diff view generated by jsdifflib |
Deleted patch | |||
---|---|---|---|
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
2 | 1 | ||
3 | Use buffer based io in encrypted case. | ||
4 | |||
5 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | ||
6 | Acked-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Message-id: 20190604161514.262241-11-vsementsov@virtuozzo.com | ||
8 | Message-Id: <20190604161514.262241-11-vsementsov@virtuozzo.com> | ||
9 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
10 | --- | ||
11 | block/qcow2.c | 28 ++++++++++++++++------------ | ||
12 | 1 file changed, 16 insertions(+), 12 deletions(-) | ||
13 | |||
14 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/block/qcow2.c | ||
17 | +++ b/block/qcow2.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
19 | } | ||
20 | |||
21 | assert(cur_bytes <= QCOW_MAX_CRYPT_CLUSTERS * s->cluster_size); | ||
22 | - qemu_iovec_reset(&hd_qiov); | ||
23 | - qemu_iovec_add(&hd_qiov, cluster_data, cur_bytes); | ||
24 | - } | ||
25 | |||
26 | - BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); | ||
27 | - ret = bdrv_co_preadv(s->data_file, | ||
28 | - cluster_offset + offset_in_cluster, | ||
29 | - cur_bytes, &hd_qiov, 0); | ||
30 | - if (ret < 0) { | ||
31 | - goto fail; | ||
32 | - } | ||
33 | - if (bs->encrypted) { | ||
34 | - assert(s->crypto); | ||
35 | + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); | ||
36 | + ret = bdrv_co_pread(s->data_file, | ||
37 | + cluster_offset + offset_in_cluster, | ||
38 | + cur_bytes, cluster_data, 0); | ||
39 | + if (ret < 0) { | ||
40 | + goto fail; | ||
41 | + } | ||
42 | + | ||
43 | assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); | ||
44 | assert((cur_bytes & (BDRV_SECTOR_SIZE - 1)) == 0); | ||
45 | if (qcow2_co_decrypt(bs, cluster_offset, offset, | ||
46 | @@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_preadv(BlockDriverState *bs, uint64_t offset, | ||
47 | goto fail; | ||
48 | } | ||
49 | qemu_iovec_from_buf(qiov, bytes_done, cluster_data, cur_bytes); | ||
50 | + } else { | ||
51 | + BLKDBG_EVENT(bs->file, BLKDBG_READ_AIO); | ||
52 | + ret = bdrv_co_preadv(s->data_file, | ||
53 | + cluster_offset + offset_in_cluster, | ||
54 | + cur_bytes, &hd_qiov, 0); | ||
55 | + if (ret < 0) { | ||
56 | + goto fail; | ||
57 | + } | ||
58 | } | ||
59 | break; | ||
60 | |||
61 | -- | ||
62 | 2.21.0 | ||
63 | |||
64 | diff view generated by jsdifflib |