1 | The following changes since commit a395717cbd26e7593d3c3fe81faca121ec6d13e8: | 1 | The following changes since commit 64f0ad8ad8e13257e7c912df470d46784b55c3fd: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging (2018-07-03 11:49:51 +0100) | 3 | Merge remote-tracking branch 'remotes/armbru/tags/pull-error-2020-07-02' into staging (2020-07-02 15:54:09 +0100) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the Git repository at: |
6 | 6 | ||
7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream | 7 | git://repo.or.cz/qemu/kevin.git tags/for-upstream |
8 | 8 | ||
9 | for you to fetch changes up to 59738025a1674bb7e07713c3c93ff4fb9c5079f5: | 9 | for you to fetch changes up to 4f071a9460886667fde061c05b79dc786cc22e3c: |
10 | 10 | ||
11 | block: Add blklogwrites (2018-07-03 16:09:48 +0200) | 11 | iotests: Fix 051 output after qdev_init_nofail() removal (2020-07-03 10:06:29 +0200) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | Block layer patches: | 14 | Block layer patches: |
15 | 15 | ||
16 | - qcow2: Use worker threads for compression to improve performance of | 16 | - qemu-img convert: Don't pre-zero images (removes nowadays |
17 | 'qemu-img convert -W' and compressed backup jobs | 17 | counterproductive optimisation) |
18 | - blklogwrites: New filter driver to log write requests to an image in | 18 | - qemu-storage-daemon: Fix object-del, cleaner shutdown |
19 | the dm-log-writes format | 19 | - vvfat: Check that the guest doesn't escape the given host directory |
20 | with read-write vvfat drives | ||
21 | - vvfat: Fix crash by out-of-bounds array writes for read-write drives | ||
22 | - iotests fixes | ||
20 | 23 | ||
21 | ---------------------------------------------------------------- | 24 | ---------------------------------------------------------------- |
22 | Aapo Vienamo (1): | 25 | Kevin Wolf (3): |
23 | block: Add blklogwrites | 26 | qemu-img convert: Don't pre-zero images |
27 | vvfat: Check that updated filenames are valid | ||
28 | vvfat: Fix array_remove_slice() | ||
24 | 29 | ||
25 | Ari Sundholm (1): | 30 | Max Reitz (1): |
26 | block: Move two block permission constants to the relevant enum | 31 | iotests.py: Do not wait() before communicate() |
27 | 32 | ||
28 | Vladimir Sementsov-Ogievskiy (3): | 33 | Philippe Mathieu-Daudé (1): |
29 | qemu-img: allow compressed not-in-order writes | 34 | iotests: Fix 051 output after qdev_init_nofail() removal |
30 | qcow2: refactor data compression | ||
31 | qcow2: add compress threads | ||
32 | 35 | ||
33 | qapi/block-core.json | 33 ++++- | 36 | Stefan Hajnoczi (2): |
34 | block/qcow2.h | 3 + | 37 | qemu-storage-daemon: remember to add qemu_object_opts |
35 | include/block/block.h | 7 + | 38 | qemu-storage-daemon: add missing cleanup calls |
36 | block.c | 6 - | ||
37 | block/blklogwrites.c | 392 ++++++++++++++++++++++++++++++++++++++++++++++++++ | ||
38 | block/qcow2.c | 136 ++++++++++++++---- | ||
39 | qemu-img.c | 5 - | ||
40 | MAINTAINERS | 6 + | ||
41 | block/Makefile.objs | 1 + | ||
42 | 9 files changed, 545 insertions(+), 44 deletions(-) | ||
43 | create mode 100644 block/blklogwrites.c | ||
44 | 39 | ||
40 | block/vvfat.c | 67 +++++++++++++++++++------------------------ | ||
41 | qemu-img.c | 9 ------ | ||
42 | qemu-storage-daemon.c | 5 ++++ | ||
43 | tests/qemu-iotests/iotests.py | 34 +++++++++++----------- | ||
44 | tests/qemu-iotests/051.pc.out | 4 +-- | ||
45 | 5 files changed, 53 insertions(+), 66 deletions(-) | ||
46 | |||
47 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | Since commit 5a37b60a61c, qemu-img create will pre-zero the target image |
---|---|---|---|
2 | if it isn't already zero-initialised (most importantly, for host block | ||
3 | devices, but also iscsi etc.), so that writing explicit zeros wouldn't | ||
4 | be necessary later. | ||
2 | 5 | ||
3 | No reason to forbid them, and they are needed to improve performance | 6 | This could speed up the operation significantly, in particular when the |
4 | with compress-threads in further patches. | 7 | source image file was only sparsely populated. However, it also means |
8 | that some block are written twice: Once when pre-zeroing them, and then | ||
9 | when they are overwritten with actual data. On a full image, the | ||
10 | pre-zeroing is wasted work because everything will be overwritten. | ||
5 | 11 | ||
6 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 12 | In practice, write_zeroes typically turns out faster than writing |
13 | explicit zero buffers, but slow enough that first zeroing everything and | ||
14 | then overwriting parts can be a significant net loss. | ||
15 | |||
16 | Meanwhile, qemu-img convert was rewritten in 690c7301600 and zero blocks | ||
17 | are now written to the target using bdrv_co_pwrite_zeroes() if the | ||
18 | target could be pre-zeroed. This way we already make use of the faster | ||
19 | write_zeroes operation, but avoid writing any blocks twice. | ||
20 | |||
21 | Remove the pre-zeroing because these days this former optimisation has | ||
22 | actually turned into a pessimisation in the common case. | ||
23 | |||
24 | Reported-by: Nir Soffer <nsoffer@redhat.com> | ||
25 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
26 | Message-Id: <20200622151203.35624-1-kwolf@redhat.com> | ||
27 | Tested-by: Nir Soffer <nsoffer@redhat.com> | ||
28 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 29 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | --- | 30 | --- |
9 | qemu-img.c | 5 ----- | 31 | qemu-img.c | 9 --------- |
10 | 1 file changed, 5 deletions(-) | 32 | 1 file changed, 9 deletions(-) |
11 | 33 | ||
12 | diff --git a/qemu-img.c b/qemu-img.c | 34 | diff --git a/qemu-img.c b/qemu-img.c |
13 | index XXXXXXX..XXXXXXX 100644 | 35 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/qemu-img.c | 36 | --- a/qemu-img.c |
15 | +++ b/qemu-img.c | 37 | +++ b/qemu-img.c |
16 | @@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv) | 38 | @@ -XXX,XX +XXX,XX @@ static int convert_do_copy(ImgConvertState *s) |
17 | goto fail_getopt; | 39 | s->has_zero_init = bdrv_has_zero_init(blk_bs(s->target)); |
18 | } | 40 | } |
19 | 41 | ||
20 | - if (!s.wr_in_order && s.compressed) { | 42 | - if (!s->has_zero_init && !s->target_has_backing && |
21 | - error_report("Out of order write and compress are mutually exclusive"); | 43 | - bdrv_can_write_zeroes_with_unmap(blk_bs(s->target))) |
22 | - goto fail_getopt; | 44 | - { |
45 | - ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK); | ||
46 | - if (ret == 0) { | ||
47 | - s->has_zero_init = true; | ||
48 | - } | ||
23 | - } | 49 | - } |
24 | - | 50 | - |
25 | if (tgt_image_opts && !skip_create) { | 51 | /* Allocate buffer for copied data. For compressed images, only one cluster |
26 | error_report("--target-image-opts requires use of -n flag"); | 52 | * can be copied at a time. */ |
27 | goto fail_getopt; | 53 | if (s->compressed) { |
28 | -- | 54 | -- |
29 | 2.13.6 | 55 | 2.25.4 |
30 | 56 | ||
31 | 57 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | ||
1 | 2 | ||
3 | The --object option is supported by qemu-storage-daemon but the | ||
4 | qemu_object_opts QemuOptsList wasn't being added. As a result calls to | ||
5 | qemu_find_opts("object") failed with "There is no option group | ||
6 | 'object'". | ||
7 | |||
8 | This patch fixes the object-del QMP command. | ||
9 | |||
10 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
11 | Message-Id: <20200619101132.2401756-2-stefanha@redhat.com> | ||
12 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
13 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
14 | --- | ||
15 | qemu-storage-daemon.c | 1 + | ||
16 | 1 file changed, 1 insertion(+) | ||
17 | |||
18 | diff --git a/qemu-storage-daemon.c b/qemu-storage-daemon.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/qemu-storage-daemon.c | ||
21 | +++ b/qemu-storage-daemon.c | ||
22 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char *argv[]) | ||
23 | |||
24 | module_call_init(MODULE_INIT_QOM); | ||
25 | module_call_init(MODULE_INIT_TRACE); | ||
26 | + qemu_add_opts(&qemu_object_opts); | ||
27 | qemu_add_opts(&qemu_trace_opts); | ||
28 | qcrypto_init(&error_fatal); | ||
29 | bdrv_init(); | ||
30 | -- | ||
31 | 2.25.4 | ||
32 | |||
33 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Stefan Hajnoczi <stefanha@redhat.com> | ||
1 | 2 | ||
3 | Several components used by qemu-storage-daemon have cleanup functions | ||
4 | that aren't called. Keep the "valgrind --leak-check=full" as clean as | ||
5 | possible by invoking the necessary cleanup functions. | ||
6 | |||
7 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
8 | Message-Id: <20200619101132.2401756-3-stefanha@redhat.com> | ||
9 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
10 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
11 | --- | ||
12 | qemu-storage-daemon.c | 4 ++++ | ||
13 | 1 file changed, 4 insertions(+) | ||
14 | |||
15 | diff --git a/qemu-storage-daemon.c b/qemu-storage-daemon.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/qemu-storage-daemon.c | ||
18 | +++ b/qemu-storage-daemon.c | ||
19 | @@ -XXX,XX +XXX,XX @@ int main(int argc, char *argv[]) | ||
20 | main_loop_wait(false); | ||
21 | } | ||
22 | |||
23 | + monitor_cleanup(); | ||
24 | + qemu_chr_cleanup(); | ||
25 | + user_creatable_cleanup(); | ||
26 | + | ||
27 | return EXIT_SUCCESS; | ||
28 | } | ||
29 | -- | ||
30 | 2.25.4 | ||
31 | |||
32 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | FAT allows only a restricted set of characters in file names, and for |
---|---|---|---|
2 | some of the illegal characters, it's actually important that we catch | ||
3 | them: If filenames can contain '/', the guest can construct filenames | ||
4 | containing "../" and escape from the assigned vvfat directory. The same | ||
5 | problem could arise if ".." was ever accepted as a literal filename. | ||
2 | 6 | ||
3 | Do data compression in separate threads. This significantly improve | 7 | Fix this by adding a check that all filenames are valid in |
4 | performance for qemu-img convert with -W (allow async writes) and -c | 8 | check_directory_consistency(). |
5 | (compressed) options. | ||
6 | 9 | ||
7 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 10 | Reported-by: Nathan Huckleberry <nhuck15@gmail.com> |
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
12 | Message-Id: <20200623175534.38286-2-kwolf@redhat.com> | ||
13 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
8 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 14 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
9 | --- | 15 | --- |
10 | block/qcow2.h | 3 +++ | 16 | block/vvfat.c | 25 ++++++++++++++++++++++++- |
11 | block/qcow2.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- | 17 | 1 file changed, 24 insertions(+), 1 deletion(-) |
12 | 2 files changed, 64 insertions(+), 1 deletion(-) | ||
13 | 18 | ||
14 | diff --git a/block/qcow2.h b/block/qcow2.h | 19 | diff --git a/block/vvfat.c b/block/vvfat.c |
15 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/block/qcow2.h | 21 | --- a/block/vvfat.c |
17 | +++ b/block/qcow2.h | 22 | +++ b/block/vvfat.c |
18 | @@ -XXX,XX +XXX,XX @@ typedef struct BDRVQcow2State { | 23 | @@ -XXX,XX +XXX,XX @@ static void set_begin_of_direntry(direntry_t* direntry, uint32_t begin) |
19 | * override) */ | 24 | direntry->begin_hi = cpu_to_le16((begin >> 16) & 0xffff); |
20 | char *image_backing_file; | ||
21 | char *image_backing_format; | ||
22 | + | ||
23 | + CoQueue compress_wait_queue; | ||
24 | + int nb_compress_threads; | ||
25 | } BDRVQcow2State; | ||
26 | |||
27 | typedef struct Qcow2COWRegion { | ||
28 | diff --git a/block/qcow2.c b/block/qcow2.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/block/qcow2.c | ||
31 | +++ b/block/qcow2.c | ||
32 | @@ -XXX,XX +XXX,XX @@ | ||
33 | #include "qapi/qobject-input-visitor.h" | ||
34 | #include "qapi/qapi-visit-block-core.h" | ||
35 | #include "crypto.h" | ||
36 | +#include "block/thread-pool.h" | ||
37 | |||
38 | /* | ||
39 | Differences with QCOW: | ||
40 | @@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options, | ||
41 | qcow2_check_refcounts(bs, &result, 0); | ||
42 | } | ||
43 | #endif | ||
44 | + | ||
45 | + qemu_co_queue_init(&s->compress_wait_queue); | ||
46 | + | ||
47 | return ret; | ||
48 | |||
49 | fail: | ||
50 | @@ -XXX,XX +XXX,XX @@ static ssize_t qcow2_compress(void *dest, const void *src, size_t size) | ||
51 | return ret; | ||
52 | } | 25 | } |
53 | 26 | ||
54 | +#define MAX_COMPRESS_THREADS 4 | 27 | +static bool valid_filename(const unsigned char *name) |
55 | + | ||
56 | +typedef struct Qcow2CompressData { | ||
57 | + void *dest; | ||
58 | + const void *src; | ||
59 | + size_t size; | ||
60 | + ssize_t ret; | ||
61 | +} Qcow2CompressData; | ||
62 | + | ||
63 | +static int qcow2_compress_pool_func(void *opaque) | ||
64 | +{ | 28 | +{ |
65 | + Qcow2CompressData *data = opaque; | 29 | + unsigned char c; |
66 | + | 30 | + if (!strcmp((const char*)name, ".") || !strcmp((const char*)name, "..")) { |
67 | + data->ret = qcow2_compress(data->dest, data->src, data->size); | 31 | + return false; |
68 | + | 32 | + } |
69 | + return 0; | 33 | + for (; (c = *name); name++) { |
34 | + if (!((c >= '0' && c <= '9') || | ||
35 | + (c >= 'A' && c <= 'Z') || | ||
36 | + (c >= 'a' && c <= 'z') || | ||
37 | + c > 127 || | ||
38 | + strchr("$%'-_@~`!(){}^#&.+,;=[]", c) != NULL)) | ||
39 | + { | ||
40 | + return false; | ||
41 | + } | ||
42 | + } | ||
43 | + return true; | ||
70 | +} | 44 | +} |
71 | + | 45 | + |
72 | +static void qcow2_compress_complete(void *opaque, int ret) | 46 | static uint8_t to_valid_short_char(gunichar c) |
73 | +{ | 47 | { |
74 | + qemu_coroutine_enter(opaque); | 48 | c = g_unichar_toupper(c); |
75 | +} | 49 | if ((c >= '0' && c <= '9') || |
76 | + | 50 | (c >= 'A' && c <= 'Z') || |
77 | +/* See qcow2_compress definition for parameters description */ | 51 | - strchr("$%'-_@~`!(){}^#&", c) != 0) { |
78 | +static ssize_t qcow2_co_compress(BlockDriverState *bs, | 52 | + strchr("$%'-_@~`!(){}^#&", c) != NULL) { |
79 | + void *dest, const void *src, size_t size) | 53 | return c; |
80 | +{ | 54 | } else { |
81 | + BDRVQcow2State *s = bs->opaque; | 55 | return 0; |
82 | + BlockAIOCB *acb; | 56 | @@ -XXX,XX +XXX,XX @@ DLOG(fprintf(stderr, "check direntry %d:\n", i); print_direntry(direntries + i)) |
83 | + ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); | 57 | } |
84 | + Qcow2CompressData arg = { | 58 | lfn.checksum = 0x100; /* cannot use long name twice */ |
85 | + .dest = dest, | 59 | |
86 | + .src = src, | 60 | + if (!valid_filename(lfn.name)) { |
87 | + .size = size, | 61 | + fprintf(stderr, "Invalid file name\n"); |
88 | + }; | 62 | + goto fail; |
89 | + | 63 | + } |
90 | + while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) { | 64 | if (path_len + 1 + lfn.len >= PATH_MAX) { |
91 | + qemu_co_queue_wait(&s->compress_wait_queue, NULL); | 65 | fprintf(stderr, "Name too long: %s/%s\n", path, lfn.name); |
92 | + } | 66 | goto fail; |
93 | + | ||
94 | + s->nb_compress_threads++; | ||
95 | + acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg, | ||
96 | + qcow2_compress_complete, | ||
97 | + qemu_coroutine_self()); | ||
98 | + | ||
99 | + if (!acb) { | ||
100 | + s->nb_compress_threads--; | ||
101 | + return -EINVAL; | ||
102 | + } | ||
103 | + qemu_coroutine_yield(); | ||
104 | + s->nb_compress_threads--; | ||
105 | + qemu_co_queue_next(&s->compress_wait_queue); | ||
106 | + | ||
107 | + return arg.ret; | ||
108 | +} | ||
109 | + | ||
110 | /* XXX: put compressed sectors first, then all the cluster aligned | ||
111 | tables to avoid losing bytes in alignment */ | ||
112 | static coroutine_fn int | ||
113 | @@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | ||
114 | |||
115 | out_buf = g_malloc(s->cluster_size); | ||
116 | |||
117 | - out_len = qcow2_compress(out_buf, buf, s->cluster_size); | ||
118 | + out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size); | ||
119 | if (out_len == -2) { | ||
120 | ret = -EINVAL; | ||
121 | goto fail; | ||
122 | -- | 67 | -- |
123 | 2.13.6 | 68 | 2.25.4 |
124 | 69 | ||
125 | 70 | diff view generated by jsdifflib |
1 | From: Ari Sundholm <ari@tuxera.com> | 1 | array_remove_slice() calls array_roll() with array->next - 1 as the |
---|---|---|---|
2 | destination index. This is only correct for count == 1, otherwise we're | ||
3 | writing past the end of the array. array->next - count would be correct. | ||
2 | 4 | ||
3 | This allows using the two constants outside of block.c, which will | 5 | However, this is the only place ever calling array_roll(), so this |
4 | happen in a subsequent patch. | 6 | rather complicated operation isn't even necessary. |
5 | 7 | ||
6 | Signed-off-by: Ari Sundholm <ari@tuxera.com> | 8 | Fix the problem and simplify the code by replacing it with a single |
9 | memmove() call. array_roll() can now be removed. | ||
10 | |||
11 | Reported-by: Nathan Huckleberry <nhuck15@gmail.com> | ||
12 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | ||
13 | Message-Id: <20200623175534.38286-3-kwolf@redhat.com> | ||
14 | Reviewed-by: Eric Blake <eblake@redhat.com> | ||
7 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
8 | --- | 16 | --- |
9 | include/block/block.h | 7 +++++++ | 17 | block/vvfat.c | 42 +++++------------------------------------- |
10 | block.c | 6 ------ | 18 | 1 file changed, 5 insertions(+), 37 deletions(-) |
11 | 2 files changed, 7 insertions(+), 6 deletions(-) | ||
12 | 19 | ||
13 | diff --git a/include/block/block.h b/include/block/block.h | 20 | diff --git a/block/vvfat.c b/block/vvfat.c |
14 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/include/block/block.h | 22 | --- a/block/vvfat.c |
16 | +++ b/include/block/block.h | 23 | +++ b/block/vvfat.c |
17 | @@ -XXX,XX +XXX,XX @@ enum { | 24 | @@ -XXX,XX +XXX,XX @@ static inline void* array_insert(array_t* array,unsigned int index,unsigned int |
18 | BLK_PERM_GRAPH_MOD = 0x10, | 25 | return array->pointer+index*array->item_size; |
19 | 26 | } | |
20 | BLK_PERM_ALL = 0x1f, | 27 | |
28 | -/* this performs a "roll", so that the element which was at index_from becomes | ||
29 | - * index_to, but the order of all other elements is preserved. */ | ||
30 | -static inline int array_roll(array_t* array,int index_to,int index_from,int count) | ||
31 | -{ | ||
32 | - char* buf; | ||
33 | - char* from; | ||
34 | - char* to; | ||
35 | - int is; | ||
36 | - | ||
37 | - if(!array || | ||
38 | - index_to<0 || index_to>=array->next || | ||
39 | - index_from<0 || index_from>=array->next) | ||
40 | - return -1; | ||
41 | - | ||
42 | - if(index_to==index_from) | ||
43 | - return 0; | ||
44 | - | ||
45 | - is=array->item_size; | ||
46 | - from=array->pointer+index_from*is; | ||
47 | - to=array->pointer+index_to*is; | ||
48 | - buf=g_malloc(is*count); | ||
49 | - memcpy(buf,from,is*count); | ||
50 | - | ||
51 | - if(index_to<index_from) | ||
52 | - memmove(to+is*count,to,from-to); | ||
53 | - else | ||
54 | - memmove(from,from+is*count,to-from); | ||
55 | - | ||
56 | - memcpy(to,buf,is*count); | ||
57 | - | ||
58 | - g_free(buf); | ||
59 | - | ||
60 | - return 0; | ||
61 | -} | ||
62 | - | ||
63 | static inline int array_remove_slice(array_t* array,int index, int count) | ||
64 | { | ||
65 | assert(index >=0); | ||
66 | assert(count > 0); | ||
67 | assert(index + count <= array->next); | ||
68 | - if(array_roll(array,array->next-1,index,count)) | ||
69 | - return -1; | ||
21 | + | 70 | + |
22 | + DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ | 71 | + memmove(array->pointer + index * array->item_size, |
23 | + | BLK_PERM_WRITE | 72 | + array->pointer + (index + count) * array->item_size, |
24 | + | BLK_PERM_WRITE_UNCHANGED | 73 | + (array->next - index - count) * array->item_size); |
25 | + | BLK_PERM_RESIZE, | ||
26 | + | 74 | + |
27 | + DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH, | 75 | array->next -= count; |
28 | }; | ||
29 | |||
30 | char *bdrv_perm_names(uint64_t perm); | ||
31 | diff --git a/block.c b/block.c | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/block.c | ||
34 | +++ b/block.c | ||
35 | @@ -XXX,XX +XXX,XX @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared, | ||
36 | return 0; | 76 | return 0; |
37 | } | 77 | } |
38 | |||
39 | -#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \ | ||
40 | - | BLK_PERM_WRITE \ | ||
41 | - | BLK_PERM_WRITE_UNCHANGED \ | ||
42 | - | BLK_PERM_RESIZE) | ||
43 | -#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH) | ||
44 | - | ||
45 | void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c, | ||
46 | const BdrvChildRole *role, | ||
47 | BlockReopenQueue *reopen_queue, | ||
48 | -- | 78 | -- |
49 | 2.13.6 | 79 | 2.25.4 |
50 | 80 | ||
51 | 81 | diff view generated by jsdifflib |
1 | From: Aapo Vienamo <aapo@tuxera.com> | 1 | From: Max Reitz <mreitz@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Implements a block device write logging system, similar to Linux kernel | 3 | Waiting on a process for which we have a pipe will stall if the process |
4 | device mapper dm-log-writes. The write operations that are performed | 4 | outputs more data than fits into the OS-provided buffer. We must use |
5 | on a block device are logged to a file or another block device. The | 5 | communicate() before wait(), and in fact, communicate() perfectly |
6 | write log format is identical to the dm-log-writes format. Currently, | 6 | replaces wait() already. |
7 | log markers are not supported. | ||
8 | 7 | ||
9 | This functionality can be used for crash consistency and fs consistency | 8 | We have to drop the stderr=subprocess.STDOUT parameter from |
10 | testing. By implementing it in qemu, tests utilizing write logs can be | 9 | subprocess.Popen() in qemu_nbd_early_pipe(), because stderr is passed on |
11 | be used to test non-Linux drivers and older kernels. | 10 | to the child process, so if we do not drop this parameter, communicate() |
11 | will hang (because the pipe is not closed). | ||
12 | 12 | ||
13 | The driver accepts an optional parameter to set the sector size used | 13 | Signed-off-by: Max Reitz <mreitz@redhat.com> |
14 | for logging. This makes the driver require all requests to be aligned | 14 | Message-Id: <20200630083711.40567-1-mreitz@redhat.com> |
15 | to this sector size and also makes offsets and sizes of writes in the | ||
16 | log metadata to be expressed in terms of this value (the log format has | ||
17 | a granularity of one sector for offsets and sizes). This allows | ||
18 | accurate logging of writes to guest block devices that have unusual | ||
19 | sector sizes. | ||
20 | |||
21 | The implementation is based on the blkverify and blkdebug block | ||
22 | drivers. | ||
23 | |||
24 | Signed-off-by: Aapo Vienamo <aapo@tuxera.com> | ||
25 | Signed-off-by: Ari Sundholm <ari@tuxera.com> | ||
26 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 15 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
27 | --- | 16 | --- |
28 | qapi/block-core.json | 33 ++++- | 17 | tests/qemu-iotests/iotests.py | 34 +++++++++++++++++----------------- |
29 | block/blklogwrites.c | 392 +++++++++++++++++++++++++++++++++++++++++++++++++++ | 18 | 1 file changed, 17 insertions(+), 17 deletions(-) |
30 | MAINTAINERS | 6 + | ||
31 | block/Makefile.objs | 1 + | ||
32 | 4 files changed, 426 insertions(+), 6 deletions(-) | ||
33 | create mode 100644 block/blklogwrites.c | ||
34 | 19 | ||
35 | diff --git a/qapi/block-core.json b/qapi/block-core.json | 20 | diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py |
36 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
37 | --- a/qapi/block-core.json | 22 | --- a/tests/qemu-iotests/iotests.py |
38 | +++ b/qapi/block-core.json | 23 | +++ b/tests/qemu-iotests/iotests.py |
39 | @@ -XXX,XX +XXX,XX @@ | 24 | @@ -XXX,XX +XXX,XX @@ def qemu_img_pipe(*args): |
40 | # @throttle: Since 2.11 | 25 | stdout=subprocess.PIPE, |
41 | # @nvme: Since 2.12 | 26 | stderr=subprocess.STDOUT, |
42 | # @copy-on-read: Since 3.0 | 27 | universal_newlines=True) |
43 | +# @blklogwrites: Since 3.0 | 28 | - exitcode = subp.wait() |
44 | # | 29 | - if exitcode < 0: |
45 | # Since: 2.9 | 30 | + output = subp.communicate()[0] |
46 | ## | 31 | + if subp.returncode < 0: |
47 | { 'enum': 'BlockdevDriver', | 32 | sys.stderr.write('qemu-img received signal %i: %s\n' |
48 | - 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', 'copy-on-read', | 33 | - % (-exitcode, ' '.join(qemu_img_args + list(args)))) |
49 | - 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom', | 34 | - return subp.communicate()[0] |
50 | - 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs', | 35 | + % (-subp.returncode, |
51 | - 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed', | 36 | + ' '.join(qemu_img_args + list(args)))) |
52 | - 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh', | 37 | + return output |
53 | - 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] } | 38 | |
54 | + 'data': [ 'blkdebug', 'blklogwrites', 'blkverify', 'bochs', 'cloop', | 39 | def qemu_img_log(*args): |
55 | + 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps', 'gluster', | 40 | result = qemu_img_pipe(*args) |
56 | + 'host_cdrom', 'host_device', 'http', 'https', 'iscsi', 'luks', | 41 | @@ -XXX,XX +XXX,XX @@ def qemu_io(*args): |
57 | + 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', | 42 | subp = subprocess.Popen(args, stdout=subprocess.PIPE, |
58 | + 'qcow2', 'qed', 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', | 43 | stderr=subprocess.STDOUT, |
59 | + 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] } | 44 | universal_newlines=True) |
60 | 45 | - exitcode = subp.wait() | |
61 | ## | 46 | - if exitcode < 0: |
62 | # @BlockdevOptionsFile: | 47 | + output = subp.communicate()[0] |
63 | @@ -XXX,XX +XXX,XX @@ | 48 | + if subp.returncode < 0: |
64 | '*set-state': ['BlkdebugSetStateOptions'] } } | 49 | sys.stderr.write('qemu-io received signal %i: %s\n' |
65 | 50 | - % (-exitcode, ' '.join(args))) | |
66 | ## | 51 | - return subp.communicate()[0] |
67 | +# @BlockdevOptionsBlklogwrites: | 52 | + % (-subp.returncode, ' '.join(args))) |
68 | +# | 53 | + return output |
69 | +# Driver specific block device options for blklogwrites. | 54 | |
70 | +# | 55 | def qemu_io_log(*args): |
71 | +# @file: block device | 56 | result = qemu_io(*args) |
72 | +# | 57 | @@ -XXX,XX +XXX,XX @@ def qemu_nbd_early_pipe(*args): |
73 | +# @log: block device used to log writes to @file | 58 | and its output in case of an error''' |
74 | +# | 59 | subp = subprocess.Popen(qemu_nbd_args + ['--fork'] + list(args), |
75 | +# @log-sector-size: sector size used in logging writes to @file, determines | 60 | stdout=subprocess.PIPE, |
76 | +# granularity of offsets and sizes of writes (default: 512) | 61 | - stderr=subprocess.STDOUT, |
77 | +# | 62 | universal_newlines=True) |
78 | +# Since: 3.0 | 63 | - exitcode = subp.wait() |
79 | +## | 64 | - if exitcode < 0: |
80 | +{ 'struct': 'BlockdevOptionsBlklogwrites', | 65 | + output = subp.communicate()[0] |
81 | + 'data': { 'file': 'BlockdevRef', | 66 | + if subp.returncode < 0: |
82 | + 'log': 'BlockdevRef', | 67 | sys.stderr.write('qemu-nbd received signal %i: %s\n' % |
83 | + '*log-sector-size': 'uint32' } } | 68 | - (-exitcode, |
84 | + | 69 | + (-subp.returncode, |
85 | +## | 70 | ' '.join(qemu_nbd_args + ['--fork'] + list(args)))) |
86 | # @BlockdevOptionsBlkverify: | 71 | |
87 | # | 72 | - return exitcode, subp.communicate()[0] if exitcode else '' |
88 | # Driver specific block device options for blkverify. | 73 | + return subp.returncode, output if subp.returncode else '' |
89 | @@ -XXX,XX +XXX,XX @@ | 74 | |
90 | 'discriminator': 'driver', | 75 | def qemu_nbd_popen(*args): |
91 | 'data': { | 76 | '''Run qemu-nbd in daemon mode and return the parent's exit code''' |
92 | 'blkdebug': 'BlockdevOptionsBlkdebug', | 77 | @@ -XXX,XX +XXX,XX @@ def qemu_pipe(*args): |
93 | + 'blklogwrites':'BlockdevOptionsBlklogwrites', | 78 | subp = subprocess.Popen(args, stdout=subprocess.PIPE, |
94 | 'blkverify': 'BlockdevOptionsBlkverify', | 79 | stderr=subprocess.STDOUT, |
95 | 'bochs': 'BlockdevOptionsGenericFormat', | 80 | universal_newlines=True) |
96 | 'cloop': 'BlockdevOptionsGenericFormat', | 81 | - exitcode = subp.wait() |
97 | diff --git a/block/blklogwrites.c b/block/blklogwrites.c | 82 | - if exitcode < 0: |
98 | new file mode 100644 | 83 | + output = subp.communicate()[0] |
99 | index XXXXXXX..XXXXXXX | 84 | + if subp.returncode < 0: |
100 | --- /dev/null | 85 | sys.stderr.write('qemu received signal %i: %s\n' % |
101 | +++ b/block/blklogwrites.c | 86 | - (-exitcode, ' '.join(args))) |
102 | @@ -XXX,XX +XXX,XX @@ | 87 | - return subp.communicate()[0] |
103 | +/* | 88 | + (-subp.returncode, ' '.join(args))) |
104 | + * Write logging blk driver based on blkverify and blkdebug. | 89 | + return output |
105 | + * | 90 | |
106 | + * Copyright (c) 2017 Tuomas Tynkkynen <tuomas@tuxera.com> | 91 | def supported_formats(read_only=False): |
107 | + * Copyright (c) 2018 Aapo Vienamo <aapo@tuxera.com> | 92 | '''Set 'read_only' to True to check ro-whitelist |
108 | + * Copyright (c) 2018 Ari Sundholm <ari@tuxera.com> | ||
109 | + * | ||
110 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
111 | + * See the COPYING file in the top-level directory. | ||
112 | + */ | ||
113 | + | ||
114 | +#include "qemu/osdep.h" | ||
115 | +#include "qapi/error.h" | ||
116 | +#include "qemu/sockets.h" /* for EINPROGRESS on Windows */ | ||
117 | +#include "block/block_int.h" | ||
118 | +#include "qapi/qmp/qdict.h" | ||
119 | +#include "qapi/qmp/qstring.h" | ||
120 | +#include "qemu/cutils.h" | ||
121 | +#include "qemu/option.h" | ||
122 | + | ||
123 | +/* Disk format stuff - taken from Linux drivers/md/dm-log-writes.c */ | ||
124 | + | ||
125 | +#define LOG_FLUSH_FLAG (1 << 0) | ||
126 | +#define LOG_FUA_FLAG (1 << 1) | ||
127 | +#define LOG_DISCARD_FLAG (1 << 2) | ||
128 | +#define LOG_MARK_FLAG (1 << 3) | ||
129 | + | ||
130 | +#define WRITE_LOG_VERSION 1ULL | ||
131 | +#define WRITE_LOG_MAGIC 0x6a736677736872ULL | ||
132 | + | ||
133 | +/* All fields are little-endian. */ | ||
134 | +struct log_write_super { | ||
135 | + uint64_t magic; | ||
136 | + uint64_t version; | ||
137 | + uint64_t nr_entries; | ||
138 | + uint32_t sectorsize; | ||
139 | +} QEMU_PACKED; | ||
140 | + | ||
141 | +struct log_write_entry { | ||
142 | + uint64_t sector; | ||
143 | + uint64_t nr_sectors; | ||
144 | + uint64_t flags; | ||
145 | + uint64_t data_len; | ||
146 | +} QEMU_PACKED; | ||
147 | + | ||
148 | +/* End of disk format structures. */ | ||
149 | + | ||
150 | +typedef struct { | ||
151 | + BdrvChild *log_file; | ||
152 | + uint32_t sectorsize; | ||
153 | + uint32_t sectorbits; | ||
154 | + uint64_t cur_log_sector; | ||
155 | + uint64_t nr_entries; | ||
156 | +} BDRVBlkLogWritesState; | ||
157 | + | ||
158 | +static inline uint32_t blk_log_writes_log2(uint32_t value) | ||
159 | +{ | ||
160 | + assert(value > 0); | ||
161 | + return 31 - clz32(value); | ||
162 | +} | ||
163 | + | ||
164 | +static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags, | ||
165 | + Error **errp) | ||
166 | +{ | ||
167 | + BDRVBlkLogWritesState *s = bs->opaque; | ||
168 | + Error *local_err = NULL; | ||
169 | + int ret; | ||
170 | + int64_t log_sector_size = BDRV_SECTOR_SIZE; | ||
171 | + | ||
172 | + /* Open the file */ | ||
173 | + bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false, | ||
174 | + &local_err); | ||
175 | + if (local_err) { | ||
176 | + ret = -EINVAL; | ||
177 | + error_propagate(errp, local_err); | ||
178 | + goto fail; | ||
179 | + } | ||
180 | + | ||
181 | + if (qdict_haskey(options, "log-sector-size")) { | ||
182 | + log_sector_size = qdict_get_int(options, "log-sector-size"); | ||
183 | + qdict_del(options, "log-sector-size"); | ||
184 | + } | ||
185 | + | ||
186 | + if (log_sector_size < 0 || log_sector_size >= (1ull << 32) || | ||
187 | + !is_power_of_2(log_sector_size)) | ||
188 | + { | ||
189 | + ret = -EINVAL; | ||
190 | + error_setg(errp, "Invalid log sector size %"PRId64, log_sector_size); | ||
191 | + goto fail; | ||
192 | + } | ||
193 | + | ||
194 | + s->sectorsize = log_sector_size; | ||
195 | + s->sectorbits = blk_log_writes_log2(log_sector_size); | ||
196 | + s->cur_log_sector = 1; | ||
197 | + s->nr_entries = 0; | ||
198 | + | ||
199 | + /* Open the log file */ | ||
200 | + s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_file, false, | ||
201 | + &local_err); | ||
202 | + if (local_err) { | ||
203 | + ret = -EINVAL; | ||
204 | + error_propagate(errp, local_err); | ||
205 | + goto fail; | ||
206 | + } | ||
207 | + | ||
208 | + ret = 0; | ||
209 | +fail: | ||
210 | + if (ret < 0) { | ||
211 | + bdrv_unref_child(bs, bs->file); | ||
212 | + bs->file = NULL; | ||
213 | + } | ||
214 | + return ret; | ||
215 | +} | ||
216 | + | ||
217 | +static void blk_log_writes_close(BlockDriverState *bs) | ||
218 | +{ | ||
219 | + BDRVBlkLogWritesState *s = bs->opaque; | ||
220 | + | ||
221 | + bdrv_unref_child(bs, s->log_file); | ||
222 | + s->log_file = NULL; | ||
223 | +} | ||
224 | + | ||
225 | +static int64_t blk_log_writes_getlength(BlockDriverState *bs) | ||
226 | +{ | ||
227 | + return bdrv_getlength(bs->file->bs); | ||
228 | +} | ||
229 | + | ||
230 | +static void blk_log_writes_refresh_filename(BlockDriverState *bs, | ||
231 | + QDict *options) | ||
232 | +{ | ||
233 | + BDRVBlkLogWritesState *s = bs->opaque; | ||
234 | + | ||
235 | + /* bs->file->bs has already been refreshed */ | ||
236 | + bdrv_refresh_filename(s->log_file->bs); | ||
237 | + | ||
238 | + if (bs->file->bs->full_open_options | ||
239 | + && s->log_file->bs->full_open_options) | ||
240 | + { | ||
241 | + QDict *opts = qdict_new(); | ||
242 | + qdict_put_str(opts, "driver", "blklogwrites"); | ||
243 | + | ||
244 | + qobject_ref(bs->file->bs->full_open_options); | ||
245 | + qdict_put_obj(opts, "file", QOBJECT(bs->file->bs->full_open_options)); | ||
246 | + qobject_ref(s->log_file->bs->full_open_options); | ||
247 | + qdict_put_obj(opts, "log", | ||
248 | + QOBJECT(s->log_file->bs->full_open_options)); | ||
249 | + | ||
250 | + bs->full_open_options = opts; | ||
251 | + } | ||
252 | +} | ||
253 | + | ||
254 | +static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c, | ||
255 | + const BdrvChildRole *role, | ||
256 | + BlockReopenQueue *ro_q, | ||
257 | + uint64_t perm, uint64_t shrd, | ||
258 | + uint64_t *nperm, uint64_t *nshrd) | ||
259 | +{ | ||
260 | + if (!c) { | ||
261 | + *nperm = perm & DEFAULT_PERM_PASSTHROUGH; | ||
262 | + *nshrd = (shrd & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED; | ||
263 | + return; | ||
264 | + } | ||
265 | + | ||
266 | + if (!strcmp(c->name, "log")) { | ||
267 | + bdrv_format_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd); | ||
268 | + } else { | ||
269 | + bdrv_filter_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd); | ||
270 | + } | ||
271 | +} | ||
272 | + | ||
273 | +static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp) | ||
274 | +{ | ||
275 | + BDRVBlkLogWritesState *s = bs->opaque; | ||
276 | + bs->bl.request_alignment = s->sectorsize; | ||
277 | +} | ||
278 | + | ||
279 | +static int coroutine_fn | ||
280 | +blk_log_writes_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | ||
281 | + QEMUIOVector *qiov, int flags) | ||
282 | +{ | ||
283 | + return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags); | ||
284 | +} | ||
285 | + | ||
286 | +typedef struct BlkLogWritesFileReq { | ||
287 | + BlockDriverState *bs; | ||
288 | + uint64_t offset; | ||
289 | + uint64_t bytes; | ||
290 | + int file_flags; | ||
291 | + QEMUIOVector *qiov; | ||
292 | + int (*func)(struct BlkLogWritesFileReq *r); | ||
293 | + int file_ret; | ||
294 | +} BlkLogWritesFileReq; | ||
295 | + | ||
296 | +typedef struct { | ||
297 | + BlockDriverState *bs; | ||
298 | + QEMUIOVector *qiov; | ||
299 | + struct log_write_entry entry; | ||
300 | + uint64_t zero_size; | ||
301 | + int log_ret; | ||
302 | +} BlkLogWritesLogReq; | ||
303 | + | ||
304 | +static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr) | ||
305 | +{ | ||
306 | + BDRVBlkLogWritesState *s = lr->bs->opaque; | ||
307 | + uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits; | ||
308 | + | ||
309 | + s->nr_entries++; | ||
310 | + s->cur_log_sector += | ||
311 | + ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits; | ||
312 | + | ||
313 | + lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size, | ||
314 | + lr->qiov, 0); | ||
315 | + | ||
316 | + /* Logging for the "write zeroes" operation */ | ||
317 | + if (lr->log_ret == 0 && lr->zero_size) { | ||
318 | + cur_log_offset = s->cur_log_sector << s->sectorbits; | ||
319 | + s->cur_log_sector += | ||
320 | + ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits; | ||
321 | + | ||
322 | + lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset, | ||
323 | + lr->zero_size, 0); | ||
324 | + } | ||
325 | + | ||
326 | + /* Update super block on flush */ | ||
327 | + if (lr->log_ret == 0 && lr->entry.flags & LOG_FLUSH_FLAG) { | ||
328 | + struct log_write_super super = { | ||
329 | + .magic = cpu_to_le64(WRITE_LOG_MAGIC), | ||
330 | + .version = cpu_to_le64(WRITE_LOG_VERSION), | ||
331 | + .nr_entries = cpu_to_le64(s->nr_entries), | ||
332 | + .sectorsize = cpu_to_le32(s->sectorsize), | ||
333 | + }; | ||
334 | + void *zeroes = g_malloc0(s->sectorsize - sizeof(super)); | ||
335 | + QEMUIOVector qiov; | ||
336 | + | ||
337 | + qemu_iovec_init(&qiov, 2); | ||
338 | + qemu_iovec_add(&qiov, &super, sizeof(super)); | ||
339 | + qemu_iovec_add(&qiov, zeroes, s->sectorsize - sizeof(super)); | ||
340 | + | ||
341 | + lr->log_ret = | ||
342 | + bdrv_co_pwritev(s->log_file, 0, s->sectorsize, &qiov, 0); | ||
343 | + if (lr->log_ret == 0) { | ||
344 | + lr->log_ret = bdrv_co_flush(s->log_file->bs); | ||
345 | + } | ||
346 | + qemu_iovec_destroy(&qiov); | ||
347 | + g_free(zeroes); | ||
348 | + } | ||
349 | +} | ||
350 | + | ||
351 | +static void coroutine_fn blk_log_writes_co_do_file(BlkLogWritesFileReq *fr) | ||
352 | +{ | ||
353 | + fr->file_ret = fr->func(fr); | ||
354 | +} | ||
355 | + | ||
356 | +static int coroutine_fn | ||
357 | +blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | ||
358 | + QEMUIOVector *qiov, int flags, | ||
359 | + int (*file_func)(BlkLogWritesFileReq *r), | ||
360 | + uint64_t entry_flags, bool is_zero_write) | ||
361 | +{ | ||
362 | + QEMUIOVector log_qiov; | ||
363 | + size_t niov = qiov ? qiov->niov : 0; | ||
364 | + BDRVBlkLogWritesState *s = bs->opaque; | ||
365 | + BlkLogWritesFileReq fr = { | ||
366 | + .bs = bs, | ||
367 | + .offset = offset, | ||
368 | + .bytes = bytes, | ||
369 | + .file_flags = flags, | ||
370 | + .qiov = qiov, | ||
371 | + .func = file_func, | ||
372 | + }; | ||
373 | + BlkLogWritesLogReq lr = { | ||
374 | + .bs = bs, | ||
375 | + .qiov = &log_qiov, | ||
376 | + .entry = { | ||
377 | + .sector = cpu_to_le64(offset >> s->sectorbits), | ||
378 | + .nr_sectors = cpu_to_le64(bytes >> s->sectorbits), | ||
379 | + .flags = cpu_to_le64(entry_flags), | ||
380 | + .data_len = 0, | ||
381 | + }, | ||
382 | + .zero_size = is_zero_write ? bytes : 0, | ||
383 | + }; | ||
384 | + void *zeroes = g_malloc0(s->sectorsize - sizeof(lr.entry)); | ||
385 | + | ||
386 | + assert((1 << s->sectorbits) == s->sectorsize); | ||
387 | + assert(bs->bl.request_alignment == s->sectorsize); | ||
388 | + assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment)); | ||
389 | + assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment)); | ||
390 | + | ||
391 | + qemu_iovec_init(&log_qiov, niov + 2); | ||
392 | + qemu_iovec_add(&log_qiov, &lr.entry, sizeof(lr.entry)); | ||
393 | + qemu_iovec_add(&log_qiov, zeroes, s->sectorsize - sizeof(lr.entry)); | ||
394 | + if (qiov) { | ||
395 | + qemu_iovec_concat(&log_qiov, qiov, 0, qiov->size); | ||
396 | + } | ||
397 | + | ||
398 | + blk_log_writes_co_do_file(&fr); | ||
399 | + blk_log_writes_co_do_log(&lr); | ||
400 | + | ||
401 | + qemu_iovec_destroy(&log_qiov); | ||
402 | + g_free(zeroes); | ||
403 | + | ||
404 | + if (lr.log_ret < 0) { | ||
405 | + return lr.log_ret; | ||
406 | + } | ||
407 | + | ||
408 | + return fr.file_ret; | ||
409 | +} | ||
410 | + | ||
411 | +static int coroutine_fn | ||
412 | +blk_log_writes_co_do_file_pwritev(BlkLogWritesFileReq *fr) | ||
413 | +{ | ||
414 | + return bdrv_co_pwritev(fr->bs->file, fr->offset, fr->bytes, | ||
415 | + fr->qiov, fr->file_flags); | ||
416 | +} | ||
417 | + | ||
418 | +static int coroutine_fn | ||
419 | +blk_log_writes_co_do_file_pwrite_zeroes(BlkLogWritesFileReq *fr) | ||
420 | +{ | ||
421 | + return bdrv_co_pwrite_zeroes(fr->bs->file, fr->offset, fr->bytes, | ||
422 | + fr->file_flags); | ||
423 | +} | ||
424 | + | ||
425 | +static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr) | ||
426 | +{ | ||
427 | + return bdrv_co_flush(fr->bs->file->bs); | ||
428 | +} | ||
429 | + | ||
430 | +static int coroutine_fn | ||
431 | +blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr) | ||
432 | +{ | ||
433 | + return bdrv_co_pdiscard(fr->bs->file->bs, fr->offset, fr->bytes); | ||
434 | +} | ||
435 | + | ||
436 | +static int coroutine_fn | ||
437 | +blk_log_writes_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes, | ||
438 | + QEMUIOVector *qiov, int flags) | ||
439 | +{ | ||
440 | + return blk_log_writes_co_log(bs, offset, bytes, qiov, flags, | ||
441 | + blk_log_writes_co_do_file_pwritev, 0, false); | ||
442 | +} | ||
443 | + | ||
444 | +static int coroutine_fn | ||
445 | +blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, | ||
446 | + BdrvRequestFlags flags) | ||
447 | +{ | ||
448 | + return blk_log_writes_co_log(bs, offset, bytes, NULL, flags, | ||
449 | + blk_log_writes_co_do_file_pwrite_zeroes, 0, | ||
450 | + true); | ||
451 | +} | ||
452 | + | ||
453 | +static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs) | ||
454 | +{ | ||
455 | + return blk_log_writes_co_log(bs, 0, 0, NULL, 0, | ||
456 | + blk_log_writes_co_do_file_flush, | ||
457 | + LOG_FLUSH_FLAG, false); | ||
458 | +} | ||
459 | + | ||
460 | +static int coroutine_fn | ||
461 | +blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count) | ||
462 | +{ | ||
463 | + return blk_log_writes_co_log(bs, offset, count, NULL, 0, | ||
464 | + blk_log_writes_co_do_file_pdiscard, | ||
465 | + LOG_DISCARD_FLAG, false); | ||
466 | +} | ||
467 | + | ||
468 | +static BlockDriver bdrv_blk_log_writes = { | ||
469 | + .format_name = "blklogwrites", | ||
470 | + .instance_size = sizeof(BDRVBlkLogWritesState), | ||
471 | + | ||
472 | + .bdrv_open = blk_log_writes_open, | ||
473 | + .bdrv_close = blk_log_writes_close, | ||
474 | + .bdrv_getlength = blk_log_writes_getlength, | ||
475 | + .bdrv_refresh_filename = blk_log_writes_refresh_filename, | ||
476 | + .bdrv_child_perm = blk_log_writes_child_perm, | ||
477 | + .bdrv_refresh_limits = blk_log_writes_refresh_limits, | ||
478 | + | ||
479 | + .bdrv_co_preadv = blk_log_writes_co_preadv, | ||
480 | + .bdrv_co_pwritev = blk_log_writes_co_pwritev, | ||
481 | + .bdrv_co_pwrite_zeroes = blk_log_writes_co_pwrite_zeroes, | ||
482 | + .bdrv_co_flush_to_disk = blk_log_writes_co_flush_to_disk, | ||
483 | + .bdrv_co_pdiscard = blk_log_writes_co_pdiscard, | ||
484 | + .bdrv_co_block_status = bdrv_co_block_status_from_file, | ||
485 | + | ||
486 | + .is_filter = true, | ||
487 | +}; | ||
488 | + | ||
489 | +static void bdrv_blk_log_writes_init(void) | ||
490 | +{ | ||
491 | + bdrv_register(&bdrv_blk_log_writes); | ||
492 | +} | ||
493 | + | ||
494 | +block_init(bdrv_blk_log_writes_init); | ||
495 | diff --git a/MAINTAINERS b/MAINTAINERS | ||
496 | index XXXXXXX..XXXXXXX 100644 | ||
497 | --- a/MAINTAINERS | ||
498 | +++ b/MAINTAINERS | ||
499 | @@ -XXX,XX +XXX,XX @@ S: Supported | ||
500 | F: block/quorum.c | ||
501 | L: qemu-block@nongnu.org | ||
502 | |||
503 | +blklogwrites | ||
504 | +M: Ari Sundholm <ari@tuxera.com> | ||
505 | +L: qemu-block@nongnu.org | ||
506 | +S: Supported | ||
507 | +F: block/blklogwrites.c | ||
508 | + | ||
509 | blkverify | ||
510 | M: Stefan Hajnoczi <stefanha@redhat.com> | ||
511 | L: qemu-block@nongnu.org | ||
512 | diff --git a/block/Makefile.objs b/block/Makefile.objs | ||
513 | index XXXXXXX..XXXXXXX 100644 | ||
514 | --- a/block/Makefile.objs | ||
515 | +++ b/block/Makefile.objs | ||
516 | @@ -XXX,XX +XXX,XX @@ block-obj-y += qed-check.o | ||
517 | block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o | ||
518 | block-obj-y += quorum.o | ||
519 | block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o | ||
520 | +block-obj-y += blklogwrites.o | ||
521 | block-obj-y += block-backend.o snapshot.o qapi.o | ||
522 | block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o | ||
523 | block-obj-$(CONFIG_POSIX) += file-posix.o | ||
524 | -- | 93 | -- |
525 | 2.13.6 | 94 | 2.25.4 |
526 | 95 | ||
527 | 96 | diff view generated by jsdifflib |
1 | From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 1 | From: Philippe Mathieu-Daudé <philmd@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Make a separate function for compression to be parallelized later. | 3 | Commit 96927c744 replaced qdev_init_nofail() call by |
4 | - use .avail_out field instead of .next_out to calculate size of | 4 | isa_realize_and_unref() which has a different error |
5 | compressed data. It looks more natural and it allows to keep dest to | 5 | message. Update the test output accordingly. |
6 | be void pointer | ||
7 | - set avail_out to be at least one byte less than input, to be sure | ||
8 | avoid inefficient compression earlier | ||
9 | 6 | ||
10 | Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com> | 7 | Gitlab CI error after merging b77b5b3dc7: |
8 | https://gitlab.com/qemu-project/qemu/-/jobs/597414772#L4375 | ||
9 | |||
10 | Reported-by: Thomas Huth <thuth@redhat.com> | ||
11 | Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> | ||
12 | Signed-off-by: Alex Bennée <alex.bennee@linaro.org> | ||
13 | Reviewed-by: John Snow <jsnow@redhat.com> | ||
14 | Reviewed-by: Thomas Huth <thuth@redhat.com> | ||
15 | Message-Id: <20200616154949.6586-1-philmd@redhat.com> | ||
16 | Message-Id: <20200624140446.15380-2-alex.bennee@linaro.org> | ||
11 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> | 17 | Signed-off-by: Kevin Wolf <kwolf@redhat.com> |
12 | --- | 18 | --- |
13 | block/qcow2.c | 76 ++++++++++++++++++++++++++++++++++++++--------------------- | 19 | tests/qemu-iotests/051.pc.out | 4 ++-- |
14 | 1 file changed, 49 insertions(+), 27 deletions(-) | 20 | 1 file changed, 2 insertions(+), 2 deletions(-) |
15 | 21 | ||
16 | diff --git a/block/qcow2.c b/block/qcow2.c | 22 | diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out |
17 | index XXXXXXX..XXXXXXX 100644 | 23 | index XXXXXXX..XXXXXXX 100644 |
18 | --- a/block/qcow2.c | 24 | --- a/tests/qemu-iotests/051.pc.out |
19 | +++ b/block/qcow2.c | 25 | +++ b/tests/qemu-iotests/051.pc.out |
20 | @@ -XXX,XX +XXX,XX @@ | 26 | @@ -XXX,XX +XXX,XX @@ QEMU X.Y.Z monitor - type 'help' for more information |
21 | */ | 27 | |
22 | 28 | Testing: -drive if=ide | |
23 | #include "qemu/osdep.h" | 29 | QEMU X.Y.Z monitor - type 'help' for more information |
24 | + | 30 | -(qemu) QEMU_PROG: Initialization of device ide-hd failed: Device needs media, but drive is empty |
25 | +#define ZLIB_CONST | 31 | +(qemu) QEMU_PROG: Device needs media, but drive is empty |
26 | +#include <zlib.h> | 32 | |
27 | + | 33 | Testing: -drive if=virtio |
28 | #include "block/block_int.h" | 34 | QEMU X.Y.Z monitor - type 'help' for more information |
29 | #include "block/qdict.h" | 35 | @@ -XXX,XX +XXX,XX @@ QEMU X.Y.Z monitor - type 'help' for more information |
30 | #include "sysemu/block-backend.h" | 36 | |
31 | #include "qemu/module.h" | 37 | Testing: -drive file=TEST_DIR/t.qcow2,if=ide,readonly=on |
32 | -#include <zlib.h> | 38 | QEMU X.Y.Z monitor - type 'help' for more information |
33 | #include "qcow2.h" | 39 | -(qemu) QEMU_PROG: Initialization of device ide-hd failed: Block node is read-only |
34 | #include "qemu/error-report.h" | 40 | +(qemu) QEMU_PROG: Block node is read-only |
35 | #include "qapi/error.h" | 41 | |
36 | @@ -XXX,XX +XXX,XX @@ fail: | 42 | Testing: -drive file=TEST_DIR/t.qcow2,if=virtio,readonly=on |
37 | return ret; | 43 | QEMU X.Y.Z monitor - type 'help' for more information |
38 | } | ||
39 | |||
40 | +/* | ||
41 | + * qcow2_compress() | ||
42 | + * | ||
43 | + * @dest - destination buffer, at least of @size-1 bytes | ||
44 | + * @src - source buffer, @size bytes | ||
45 | + * | ||
46 | + * Returns: compressed size on success | ||
47 | + * -1 if compression is inefficient | ||
48 | + * -2 on any other error | ||
49 | + */ | ||
50 | +static ssize_t qcow2_compress(void *dest, const void *src, size_t size) | ||
51 | +{ | ||
52 | + ssize_t ret; | ||
53 | + z_stream strm; | ||
54 | + | ||
55 | + /* best compression, small window, no zlib header */ | ||
56 | + memset(&strm, 0, sizeof(strm)); | ||
57 | + ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED, | ||
58 | + -12, 9, Z_DEFAULT_STRATEGY); | ||
59 | + if (ret != 0) { | ||
60 | + return -2; | ||
61 | + } | ||
62 | + | ||
63 | + strm.avail_in = size; | ||
64 | + strm.next_in = src; | ||
65 | + strm.avail_out = size - 1; | ||
66 | + strm.next_out = dest; | ||
67 | + | ||
68 | + ret = deflate(&strm, Z_FINISH); | ||
69 | + if (ret == Z_STREAM_END) { | ||
70 | + ret = size - 1 - strm.avail_out; | ||
71 | + } else { | ||
72 | + ret = (ret == Z_OK ? -1 : -2); | ||
73 | + } | ||
74 | + | ||
75 | + deflateEnd(&strm); | ||
76 | + | ||
77 | + return ret; | ||
78 | +} | ||
79 | + | ||
80 | /* XXX: put compressed sectors first, then all the cluster aligned | ||
81 | tables to avoid losing bytes in alignment */ | ||
82 | static coroutine_fn int | ||
83 | @@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | ||
84 | BDRVQcow2State *s = bs->opaque; | ||
85 | QEMUIOVector hd_qiov; | ||
86 | struct iovec iov; | ||
87 | - z_stream strm; | ||
88 | - int ret, out_len; | ||
89 | + int ret; | ||
90 | + size_t out_len; | ||
91 | uint8_t *buf, *out_buf; | ||
92 | int64_t cluster_offset; | ||
93 | |||
94 | @@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset, | ||
95 | |||
96 | out_buf = g_malloc(s->cluster_size); | ||
97 | |||
98 | - /* best compression, small window, no zlib header */ | ||
99 | - memset(&strm, 0, sizeof(strm)); | ||
100 | - ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, | ||
101 | - Z_DEFLATED, -12, | ||
102 | - 9, Z_DEFAULT_STRATEGY); | ||
103 | - if (ret != 0) { | ||
104 | + out_len = qcow2_compress(out_buf, buf, s->cluster_size); | ||
105 | + if (out_len == -2) { | ||
106 | ret = -EINVAL; | ||
107 | goto fail; | ||
108 | - } | ||
109 | - | ||
110 | - strm.avail_in = s->cluster_size; | ||
111 | - strm.next_in = (uint8_t *)buf; | ||
112 | - strm.avail_out = s->cluster_size; | ||
113 | - strm.next_out = out_buf; | ||
114 | - | ||
115 | - ret = deflate(&strm, Z_FINISH); | ||
116 | - if (ret != Z_STREAM_END && ret != Z_OK) { | ||
117 | - deflateEnd(&strm); | ||
118 | - ret = -EINVAL; | ||
119 | - goto fail; | ||
120 | - } | ||
121 | - out_len = strm.next_out - out_buf; | ||
122 | - | ||
123 | - deflateEnd(&strm); | ||
124 | - | ||
125 | - if (ret != Z_STREAM_END || out_len >= s->cluster_size) { | ||
126 | + } else if (out_len == -1) { | ||
127 | /* could not compress: write normal cluster */ | ||
128 | ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0); | ||
129 | if (ret < 0) { | ||
130 | -- | 44 | -- |
131 | 2.13.6 | 45 | 2.25.4 |
132 | 46 | ||
133 | 47 | diff view generated by jsdifflib |