1
The following changes since commit 527266f324def9f7f392fe3b0dd940cb8dc699d9:
1
The following changes since commit fe8d2d5737ab20ed0118863f5eb888cae37122ab:
2
2
3
Merge remote-tracking branch 'remotes/armbru/tags/pull-pflash-2019-03-26' into staging (2019-03-26 09:57:07 +0000)
3
Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-3.0-pull-request' into staging (2018-07-04 22:38:10 +0100)
4
4
5
are available in the Git repository at:
5
are available in the git repository at:
6
6
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to c6e3f520c802c5cb2de80576aba7f9f1fe985d8b:
9
for you to fetch changes up to 7c20c808a5cbf5d244735bc78fc3138c739c1946:
10
10
11
qemu-io: Add write -n for BDRV_REQ_NO_FALLBACK (2019-03-26 11:37:51 +0100)
11
file-posix: Unlock FD after creation (2018-07-05 11:07:58 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches:
14
Block layer patches:
15
15
16
- Fix slow pre-zeroing in qemu-img convert
16
- qcow2: Use worker threads for compression to improve performance of
17
- Test case for block job pausing on I/O errors
17
'qemu-img convert -W' and compressed backup jobs
18
- blklogwrites: New filter driver to log write requests to an image in
19
the dm-log-writes format
20
- file-posix: Fix image locking during image creation
21
- crypto: Fix memory leak in error path
22
- Error out instead of silently truncating node names
18
23
19
----------------------------------------------------------------
24
----------------------------------------------------------------
20
Kevin Wolf (6):
25
Aapo Vienamo (1):
21
block: Remove error messages in bdrv_make_zero()
26
block: Add blklogwrites
22
block: Add BDRV_REQ_NO_FALLBACK
23
block: Advertise BDRV_REQ_NO_FALLBACK in filter drivers
24
file-posix: Support BDRV_REQ_NO_FALLBACK for zero writes
25
qemu-img: Use BDRV_REQ_NO_FALLBACK for pre-zeroing
26
qemu-io: Add write -n for BDRV_REQ_NO_FALLBACK
27
27
28
Vladimir Sementsov-Ogievskiy (1):
28
Ari Sundholm (4):
29
iotests: add 248: test resume mirror after auto pause on ENOSPC
29
block: Move two block permission constants to the relevant enum
30
block/blklogwrites: Change log_sector_size from int64_t to uint64_t
31
block/blklogwrites: Add an option for appending to an old log
32
block/blklogwrites: Add an option for the update interval of the log superblock
30
33
31
include/block/block.h | 7 ++++-
34
Kevin Wolf (2):
32
include/block/raw-aio.h | 1 +
35
block: Don't silently truncate node names
33
block/blkdebug.c | 2 +-
36
block/crypto: Fix memory leak in create error path
34
block/copy-on-read.c | 7 ++---
35
block/file-posix.c | 24 ++++++++++------
36
block/io.c | 16 +++++++----
37
block/mirror.c | 3 +-
38
block/raw-format.c | 2 +-
39
qemu-img.c | 2 +-
40
qemu-io-cmds.c | 13 +++++++--
41
tests/qemu-iotests/248 | 71 ++++++++++++++++++++++++++++++++++++++++++++++
42
tests/qemu-iotests/248.out | 8 ++++++
43
tests/qemu-iotests/group | 1 +
44
13 files changed, 133 insertions(+), 24 deletions(-)
45
create mode 100755 tests/qemu-iotests/248
46
create mode 100644 tests/qemu-iotests/248.out
47
37
38
Max Reitz (2):
39
file-posix: Fix creation locking
40
file-posix: Unlock FD after creation
41
42
Vladimir Sementsov-Ogievskiy (3):
43
qemu-img: allow compressed not-in-order writes
44
qcow2: refactor data compression
45
qcow2: add compress threads
46
47
qapi/block-core.json | 38 ++-
48
block/qcow2.h | 3 +
49
include/block/block.h | 7 +
50
block.c | 12 +-
51
block/blklogwrites.c | 547 ++++++++++++++++++++++++++++++++++++++++++
52
block/crypto.c | 2 +-
53
block/file-posix.c | 21 +-
54
block/qcow2.c | 138 ++++++++---
55
qemu-img.c | 5 -
56
MAINTAINERS | 6 +
57
block/Makefile.objs | 1 +
58
tests/qemu-iotests/051 | 15 ++
59
tests/qemu-iotests/051.out | 23 ++
60
tests/qemu-iotests/051.pc.out | 23 ++
61
14 files changed, 791 insertions(+), 50 deletions(-)
62
create mode 100644 block/blklogwrites.c
63
diff view generated by jsdifflib
1
If qemu-img convert sees that the target image isn't zero-initialised
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
yet, it tries to do an efficient zero write for the whole image first
3
to save the overhead of repeated explicit zero writes during the
4
conversion. Obviously, this provides only an advantage if the
5
pre-zeroing is actually efficient. Otherwise, we can end up writing
6
zeroes slowly while zeroing out the whole image, and then overwrite the
7
same blocks again with real data, potentially doubling the written data.
8
2
9
Pass BDRV_REQ_NO_FALLBACK to blk_make_zero() to avoid this case. If we
3
No reason to forbid them, and they are needed to improve performance
10
can't efficiently zero out, we'll instead write explicit zeroes only if
4
with compress-threads in further patches.
11
there is no data to be written to a block.
12
5
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
13
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
14
Acked-by: Eric Blake <eblake@redhat.com>
15
---
8
---
16
qemu-img.c | 2 +-
9
qemu-img.c | 5 -----
17
1 file changed, 1 insertion(+), 1 deletion(-)
10
1 file changed, 5 deletions(-)
18
11
19
diff --git a/qemu-img.c b/qemu-img.c
12
diff --git a/qemu-img.c b/qemu-img.c
20
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
21
--- a/qemu-img.c
14
--- a/qemu-img.c
22
+++ b/qemu-img.c
15
+++ b/qemu-img.c
23
@@ -XXX,XX +XXX,XX @@ static int convert_do_copy(ImgConvertState *s)
16
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
24
if (!s->has_zero_init && !s->target_has_backing &&
17
goto fail_getopt;
25
bdrv_can_write_zeroes_with_unmap(blk_bs(s->target)))
18
}
26
{
19
27
- ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP);
20
- if (!s.wr_in_order && s.compressed) {
28
+ ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK);
21
- error_report("Out of order write and compress are mutually exclusive");
29
if (ret == 0) {
22
- goto fail_getopt;
30
s->has_zero_init = true;
23
- }
31
}
24
-
25
if (tgt_image_opts && !skip_create) {
26
error_report("--target-image-opts requires use of -n flag");
27
goto fail_getopt;
32
--
28
--
33
2.20.1
29
2.13.6
34
30
35
31
diff view generated by jsdifflib
New patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
2
3
Make a separate function for compression to be parallelized later.
4
- use .avail_out field instead of .next_out to calculate size of
5
compressed data. It looks more natural and it allows to keep dest to
6
be void pointer
7
- set avail_out to be at least one byte less than input, to be sure
8
avoid inefficient compression earlier
9
10
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
block/qcow2.c | 78 ++++++++++++++++++++++++++++++++++++++---------------------
14
1 file changed, 51 insertions(+), 27 deletions(-)
15
16
diff --git a/block/qcow2.c b/block/qcow2.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qcow2.c
19
+++ b/block/qcow2.c
20
@@ -XXX,XX +XXX,XX @@
21
*/
22
23
#include "qemu/osdep.h"
24
+
25
+#define ZLIB_CONST
26
+#include <zlib.h>
27
+
28
#include "block/block_int.h"
29
#include "block/qdict.h"
30
#include "sysemu/block-backend.h"
31
#include "qemu/module.h"
32
-#include <zlib.h>
33
#include "qcow2.h"
34
#include "qemu/error-report.h"
35
#include "qapi/error.h"
36
@@ -XXX,XX +XXX,XX @@ fail:
37
return ret;
38
}
39
40
+/*
41
+ * qcow2_compress()
42
+ *
43
+ * @dest - destination buffer, at least of @size-1 bytes
44
+ * @src - source buffer, @size bytes
45
+ *
46
+ * Returns: compressed size on success
47
+ * -1 if compression is inefficient
48
+ * -2 on any other error
49
+ */
50
+static ssize_t qcow2_compress(void *dest, const void *src, size_t size)
51
+{
52
+ ssize_t ret;
53
+ z_stream strm;
54
+
55
+ /* best compression, small window, no zlib header */
56
+ memset(&strm, 0, sizeof(strm));
57
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
58
+ -12, 9, Z_DEFAULT_STRATEGY);
59
+ if (ret != 0) {
60
+ return -2;
61
+ }
62
+
63
+ /* strm.next_in is not const in old zlib versions, such as those used on
64
+ * OpenBSD/NetBSD, so cast the const away */
65
+ strm.avail_in = size;
66
+ strm.next_in = (void *) src;
67
+ strm.avail_out = size - 1;
68
+ strm.next_out = dest;
69
+
70
+ ret = deflate(&strm, Z_FINISH);
71
+ if (ret == Z_STREAM_END) {
72
+ ret = size - 1 - strm.avail_out;
73
+ } else {
74
+ ret = (ret == Z_OK ? -1 : -2);
75
+ }
76
+
77
+ deflateEnd(&strm);
78
+
79
+ return ret;
80
+}
81
+
82
/* XXX: put compressed sectors first, then all the cluster aligned
83
tables to avoid losing bytes in alignment */
84
static coroutine_fn int
85
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
86
BDRVQcow2State *s = bs->opaque;
87
QEMUIOVector hd_qiov;
88
struct iovec iov;
89
- z_stream strm;
90
- int ret, out_len;
91
+ int ret;
92
+ size_t out_len;
93
uint8_t *buf, *out_buf;
94
int64_t cluster_offset;
95
96
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
97
98
out_buf = g_malloc(s->cluster_size);
99
100
- /* best compression, small window, no zlib header */
101
- memset(&strm, 0, sizeof(strm));
102
- ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
103
- Z_DEFLATED, -12,
104
- 9, Z_DEFAULT_STRATEGY);
105
- if (ret != 0) {
106
+ out_len = qcow2_compress(out_buf, buf, s->cluster_size);
107
+ if (out_len == -2) {
108
ret = -EINVAL;
109
goto fail;
110
- }
111
-
112
- strm.avail_in = s->cluster_size;
113
- strm.next_in = (uint8_t *)buf;
114
- strm.avail_out = s->cluster_size;
115
- strm.next_out = out_buf;
116
-
117
- ret = deflate(&strm, Z_FINISH);
118
- if (ret != Z_STREAM_END && ret != Z_OK) {
119
- deflateEnd(&strm);
120
- ret = -EINVAL;
121
- goto fail;
122
- }
123
- out_len = strm.next_out - out_buf;
124
-
125
- deflateEnd(&strm);
126
-
127
- if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
128
+ } else if (out_len == -1) {
129
/* could not compress: write normal cluster */
130
ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
131
if (ret < 0) {
132
--
133
2.13.6
134
135
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
Test that mirror job actually resume on resume command after being
3
Do data compression in separate threads. This significantly improve
4
automatically paused on ENOSPC error.
4
performance for qemu-img convert with -W (allow async writes) and -c
5
5
(compressed) options.
6
It's a follow-up test for 8d9648cbf3e
7
"blockjob: fix user pause in block_job_error_action"
8
6
9
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Tested-by: John Snow <jsnow@redhat.com>
11
Reviewed-by: John Snow <jsnow@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
13
---
9
---
14
tests/qemu-iotests/248 | 71 ++++++++++++++++++++++++++++++++++++++
10
block/qcow2.h | 3 +++
15
tests/qemu-iotests/248.out | 8 +++++
11
block/qcow2.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
16
tests/qemu-iotests/group | 1 +
12
2 files changed, 64 insertions(+), 1 deletion(-)
17
3 files changed, 80 insertions(+)
18
create mode 100755 tests/qemu-iotests/248
19
create mode 100644 tests/qemu-iotests/248.out
20
13
21
diff --git a/tests/qemu-iotests/248 b/tests/qemu-iotests/248
14
diff --git a/block/qcow2.h b/block/qcow2.h
22
new file mode 100755
15
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX
16
--- a/block/qcow2.h
24
--- /dev/null
17
+++ b/block/qcow2.h
25
+++ b/tests/qemu-iotests/248
18
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVQcow2State {
19
* override) */
20
char *image_backing_file;
21
char *image_backing_format;
22
+
23
+ CoQueue compress_wait_queue;
24
+ int nb_compress_threads;
25
} BDRVQcow2State;
26
27
typedef struct Qcow2COWRegion {
28
diff --git a/block/qcow2.c b/block/qcow2.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/block/qcow2.c
31
+++ b/block/qcow2.c
26
@@ -XXX,XX +XXX,XX @@
32
@@ -XXX,XX +XXX,XX @@
27
+#!/usr/bin/env python
33
#include "qapi/qobject-input-visitor.h"
28
+#
34
#include "qapi/qapi-visit-block-core.h"
29
+# Test resume mirror after auto pause on ENOSPC
35
#include "crypto.h"
30
+#
36
+#include "block/thread-pool.h"
31
+# Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved.
37
32
+#
38
/*
33
+# This program is free software; you can redistribute it and/or modify
39
Differences with QCOW:
34
+# it under the terms of the GNU General Public License as published by
40
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
35
+# the Free Software Foundation; either version 2 of the License, or
41
qcow2_check_refcounts(bs, &result, 0);
36
+# (at your option) any later version.
42
}
37
+#
43
#endif
38
+# This program is distributed in the hope that it will be useful,
39
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
40
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
41
+# GNU General Public License for more details.
42
+#
43
+# You should have received a copy of the GNU General Public License
44
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
45
+#
46
+
44
+
47
+import iotests
45
+ qemu_co_queue_init(&s->compress_wait_queue);
48
+from iotests import qemu_img_create, qemu_io, file_path, filter_qmp_testfiles
49
+
46
+
50
+iotests.verify_image_format(supported_fmts=['qcow2'])
47
return ret;
48
49
fail:
50
@@ -XXX,XX +XXX,XX @@ static ssize_t qcow2_compress(void *dest, const void *src, size_t size)
51
return ret;
52
}
53
54
+#define MAX_COMPRESS_THREADS 4
51
+
55
+
52
+source, target = file_path('source', 'target')
56
+typedef struct Qcow2CompressData {
53
+size = 5 * 1024 * 1024
57
+ void *dest;
54
+limit = 2 * 1024 * 1024
58
+ const void *src;
59
+ size_t size;
60
+ ssize_t ret;
61
+} Qcow2CompressData;
55
+
62
+
56
+qemu_img_create('-f', iotests.imgfmt, source, str(size))
63
+static int qcow2_compress_pool_func(void *opaque)
57
+qemu_img_create('-f', iotests.imgfmt, target, str(size))
64
+{
58
+qemu_io('-c', 'write 0 {}'.format(size), source)
65
+ Qcow2CompressData *data = opaque;
59
+
66
+
60
+# raw format don't like empty files
67
+ data->ret = qcow2_compress(data->dest, data->src, data->size);
61
+qemu_io('-c', 'write 0 {}'.format(size), target)
62
+
68
+
63
+vm = iotests.VM().add_drive(source)
69
+ return 0;
64
+vm.launch()
70
+}
65
+
71
+
66
+blockdev_opts = {
72
+static void qcow2_compress_complete(void *opaque, int ret)
67
+ 'driver': iotests.imgfmt,
73
+{
68
+ 'node-name': 'target',
74
+ qemu_coroutine_enter(opaque);
69
+ 'file': {
75
+}
70
+ 'driver': 'raw',
76
+
71
+ 'size': limit,
77
+/* See qcow2_compress definition for parameters description */
72
+ 'file': {
78
+static ssize_t qcow2_co_compress(BlockDriverState *bs,
73
+ 'driver': 'file',
79
+ void *dest, const void *src, size_t size)
74
+ 'filename': target
80
+{
75
+ }
81
+ BDRVQcow2State *s = bs->opaque;
82
+ BlockAIOCB *acb;
83
+ ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
84
+ Qcow2CompressData arg = {
85
+ .dest = dest,
86
+ .src = src,
87
+ .size = size,
88
+ };
89
+
90
+ while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
91
+ qemu_co_queue_wait(&s->compress_wait_queue, NULL);
76
+ }
92
+ }
93
+
94
+ s->nb_compress_threads++;
95
+ acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg,
96
+ qcow2_compress_complete,
97
+ qemu_coroutine_self());
98
+
99
+ if (!acb) {
100
+ s->nb_compress_threads--;
101
+ return -EINVAL;
102
+ }
103
+ qemu_coroutine_yield();
104
+ s->nb_compress_threads--;
105
+ qemu_co_queue_next(&s->compress_wait_queue);
106
+
107
+ return arg.ret;
77
+}
108
+}
78
+vm.qmp_log('blockdev-add', filters=[filter_qmp_testfiles], **blockdev_opts)
79
+
109
+
80
+vm.qmp_log('blockdev-mirror', device='drive0', sync='full', target='target',
110
/* XXX: put compressed sectors first, then all the cluster aligned
81
+ on_target_error='enospc')
111
tables to avoid losing bytes in alignment */
82
+
112
static coroutine_fn int
83
+vm.event_wait('JOB_STATUS_CHANGE', timeout=3.0,
113
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
84
+ match={'data': {'status': 'paused'}})
114
85
+
115
out_buf = g_malloc(s->cluster_size);
86
+# drop other cached events, to not interfere with further wait for 'running'
116
87
+vm.get_qmp_events()
117
- out_len = qcow2_compress(out_buf, buf, s->cluster_size);
88
+
118
+ out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size);
89
+del blockdev_opts['file']['size']
119
if (out_len == -2) {
90
+vm.qmp_log('x-blockdev-reopen', filters=[filter_qmp_testfiles],
120
ret = -EINVAL;
91
+ **blockdev_opts)
121
goto fail;
92
+
93
+vm.qmp_log('block-job-resume', device='drive0')
94
+vm.event_wait('JOB_STATUS_CHANGE', timeout=1.0,
95
+ match={'data': {'status': 'running'}})
96
+
97
+vm.shutdown()
98
diff --git a/tests/qemu-iotests/248.out b/tests/qemu-iotests/248.out
99
new file mode 100644
100
index XXXXXXX..XXXXXXX
101
--- /dev/null
102
+++ b/tests/qemu-iotests/248.out
103
@@ -XXX,XX +XXX,XX @@
104
+{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-target"}, "size": 2097152}, "node-name": "target"}}
105
+{"return": {}}
106
+{"execute": "blockdev-mirror", "arguments": {"device": "drive0", "on-target-error": "enospc", "sync": "full", "target": "target"}}
107
+{"return": {}}
108
+{"execute": "x-blockdev-reopen", "arguments": {"driver": "qcow2", "file": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-target"}}, "node-name": "target"}}
109
+{"return": {}}
110
+{"execute": "block-job-resume", "arguments": {"device": "drive0"}}
111
+{"return": {}}
112
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
113
index XXXXXXX..XXXXXXX 100644
114
--- a/tests/qemu-iotests/group
115
+++ b/tests/qemu-iotests/group
116
@@ -XXX,XX +XXX,XX @@
117
245 rw auto
118
246 rw auto quick
119
247 rw auto quick
120
+248 rw auto quick
121
--
122
--
122
2.20.1
123
2.13.6
123
124
124
125
diff view generated by jsdifflib
1
For qemu-img convert, we want an operation that zeroes out the whole
1
From: Ari Sundholm <ari@tuxera.com>
2
image if this can be done efficiently, but that returns an error
3
otherwise so we don't write explicit zeroes and immediately overwrite
4
them with the real data, potentially doubling the amount of data to be
5
written.
6
2
3
This allows using the two constants outside of block.c, which will
4
happen in a subsequent patch.
5
6
Signed-off-by: Ari Sundholm <ari@tuxera.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Acked-by: Eric Blake <eblake@redhat.com>
9
---
8
---
10
include/block/block.h | 7 ++++++-
9
include/block/block.h | 7 +++++++
11
block/io.c | 12 +++++++++++-
10
block.c | 6 ------
12
2 files changed, 17 insertions(+), 2 deletions(-)
11
2 files changed, 7 insertions(+), 6 deletions(-)
13
12
14
diff --git a/include/block/block.h b/include/block/block.h
13
diff --git a/include/block/block.h b/include/block/block.h
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/include/block/block.h
15
--- a/include/block/block.h
17
+++ b/include/block/block.h
16
+++ b/include/block/block.h
18
@@ -XXX,XX +XXX,XX @@ typedef enum {
17
@@ -XXX,XX +XXX,XX @@ enum {
19
*/
18
BLK_PERM_GRAPH_MOD = 0x10,
20
BDRV_REQ_SERIALISING = 0x80,
19
21
20
BLK_PERM_ALL = 0x1f,
22
+ /* Execute the request only if the operation can be offloaded or otherwise
23
+ * be executed efficiently, but return an error instead of using a slow
24
+ * fallback. */
25
+ BDRV_REQ_NO_FALLBACK = 0x100,
26
+
21
+
27
/* Mask of valid flags */
22
+ DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
28
- BDRV_REQ_MASK = 0xff,
23
+ | BLK_PERM_WRITE
29
+ BDRV_REQ_MASK = 0x1ff,
24
+ | BLK_PERM_WRITE_UNCHANGED
30
} BdrvRequestFlags;
25
+ | BLK_PERM_RESIZE,
31
26
+
32
typedef struct BlockSizes {
27
+ DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
33
diff --git a/block/io.c b/block/io.c
28
};
29
30
char *bdrv_perm_names(uint64_t perm);
31
diff --git a/block.c b/block.c
34
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
35
--- a/block/io.c
33
--- a/block.c
36
+++ b/block/io.c
34
+++ b/block.c
37
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs,
35
@@ -XXX,XX +XXX,XX @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
38
unsigned int nb_sectors;
36
return 0;
39
37
}
40
assert(!(flags & ~BDRV_REQ_MASK));
38
41
+ assert(!(flags & BDRV_REQ_NO_FALLBACK));
39
-#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
42
40
- | BLK_PERM_WRITE \
43
if (!drv) {
41
- | BLK_PERM_WRITE_UNCHANGED \
44
return -ENOMEDIUM;
42
- | BLK_PERM_RESIZE)
45
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs,
43
-#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH)
46
int ret;
44
-
47
45
void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
48
assert(!(flags & ~BDRV_REQ_MASK));
46
const BdrvChildRole *role,
49
+ assert(!(flags & BDRV_REQ_NO_FALLBACK));
47
BlockReopenQueue *reopen_queue,
50
51
if (!drv) {
52
return -ENOMEDIUM;
53
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
54
return -ENOMEDIUM;
55
}
56
57
+ if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) {
58
+ return -ENOTSUP;
59
+ }
60
+
61
assert(alignment % bs->bl.request_alignment == 0);
62
head = offset % alignment;
63
tail = (offset + bytes) % alignment;
64
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs,
65
assert(!bs->supported_zero_flags);
66
}
67
68
- if (ret == -ENOTSUP) {
69
+ if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) {
70
/* Fall back to bounce buffer if write zeroes is unsupported */
71
BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE;
72
73
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn bdrv_co_copy_range_internal(
74
BdrvTrackedRequest req;
75
int ret;
76
77
+ /* TODO We can support BDRV_REQ_NO_FALLBACK here */
78
+ assert(!(read_flags & BDRV_REQ_NO_FALLBACK));
79
+ assert(!(write_flags & BDRV_REQ_NO_FALLBACK));
80
+
81
if (!dst || !dst->bs) {
82
return -ENOMEDIUM;
83
}
84
--
48
--
85
2.20.1
49
2.13.6
86
50
87
51
diff view generated by jsdifflib
New patch
1
1
From: Aapo Vienamo <aapo@tuxera.com>
2
3
Implements a block device write logging system, similar to Linux kernel
4
device mapper dm-log-writes. The write operations that are performed
5
on a block device are logged to a file or another block device. The
6
write log format is identical to the dm-log-writes format. Currently,
7
log markers are not supported.
8
9
This functionality can be used for crash consistency and fs consistency
10
testing. By implementing it in qemu, tests utilizing write logs can be
11
be used to test non-Linux drivers and older kernels.
12
13
The driver accepts an optional parameter to set the sector size used
14
for logging. This makes the driver require all requests to be aligned
15
to this sector size and also makes offsets and sizes of writes in the
16
log metadata to be expressed in terms of this value (the log format has
17
a granularity of one sector for offsets and sizes). This allows
18
accurate logging of writes to guest block devices that have unusual
19
sector sizes.
20
21
The implementation is based on the blkverify and blkdebug block
22
drivers.
23
24
Signed-off-by: Aapo Vienamo <aapo@tuxera.com>
25
Signed-off-by: Ari Sundholm <ari@tuxera.com>
26
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
27
---
28
qapi/block-core.json | 33 +++-
29
block/blklogwrites.c | 414 +++++++++++++++++++++++++++++++++++++++++++++++++++
30
MAINTAINERS | 6 +
31
block/Makefile.objs | 1 +
32
4 files changed, 448 insertions(+), 6 deletions(-)
33
create mode 100644 block/blklogwrites.c
34
35
diff --git a/qapi/block-core.json b/qapi/block-core.json
36
index XXXXXXX..XXXXXXX 100644
37
--- a/qapi/block-core.json
38
+++ b/qapi/block-core.json
39
@@ -XXX,XX +XXX,XX @@
40
# @throttle: Since 2.11
41
# @nvme: Since 2.12
42
# @copy-on-read: Since 3.0
43
+# @blklogwrites: Since 3.0
44
#
45
# Since: 2.9
46
##
47
{ 'enum': 'BlockdevDriver',
48
- 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', 'copy-on-read',
49
- 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
50
- 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
51
- 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
52
- 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
53
- 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
54
+ 'data': [ 'blkdebug', 'blklogwrites', 'blkverify', 'bochs', 'cloop',
55
+ 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps', 'gluster',
56
+ 'host_cdrom', 'host_device', 'http', 'https', 'iscsi', 'luks',
57
+ 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow',
58
+ 'qcow2', 'qed', 'quorum', 'raw', 'rbd', 'replication', 'sheepdog',
59
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
60
61
##
62
# @BlockdevOptionsFile:
63
@@ -XXX,XX +XXX,XX @@
64
'*set-state': ['BlkdebugSetStateOptions'] } }
65
66
##
67
+# @BlockdevOptionsBlklogwrites:
68
+#
69
+# Driver specific block device options for blklogwrites.
70
+#
71
+# @file: block device
72
+#
73
+# @log: block device used to log writes to @file
74
+#
75
+# @log-sector-size: sector size used in logging writes to @file, determines
76
+# granularity of offsets and sizes of writes (default: 512)
77
+#
78
+# Since: 3.0
79
+##
80
+{ 'struct': 'BlockdevOptionsBlklogwrites',
81
+ 'data': { 'file': 'BlockdevRef',
82
+ 'log': 'BlockdevRef',
83
+ '*log-sector-size': 'uint32' } }
84
+
85
+##
86
# @BlockdevOptionsBlkverify:
87
#
88
# Driver specific block device options for blkverify.
89
@@ -XXX,XX +XXX,XX @@
90
'discriminator': 'driver',
91
'data': {
92
'blkdebug': 'BlockdevOptionsBlkdebug',
93
+ 'blklogwrites':'BlockdevOptionsBlklogwrites',
94
'blkverify': 'BlockdevOptionsBlkverify',
95
'bochs': 'BlockdevOptionsGenericFormat',
96
'cloop': 'BlockdevOptionsGenericFormat',
97
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
98
new file mode 100644
99
index XXXXXXX..XXXXXXX
100
--- /dev/null
101
+++ b/block/blklogwrites.c
102
@@ -XXX,XX +XXX,XX @@
103
+/*
104
+ * Write logging blk driver based on blkverify and blkdebug.
105
+ *
106
+ * Copyright (c) 2017 Tuomas Tynkkynen <tuomas@tuxera.com>
107
+ * Copyright (c) 2018 Aapo Vienamo <aapo@tuxera.com>
108
+ * Copyright (c) 2018 Ari Sundholm <ari@tuxera.com>
109
+ *
110
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
111
+ * See the COPYING file in the top-level directory.
112
+ */
113
+
114
+#include "qemu/osdep.h"
115
+#include "qapi/error.h"
116
+#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
117
+#include "block/block_int.h"
118
+#include "qapi/qmp/qdict.h"
119
+#include "qapi/qmp/qstring.h"
120
+#include "qemu/cutils.h"
121
+#include "qemu/option.h"
122
+
123
+/* Disk format stuff - taken from Linux drivers/md/dm-log-writes.c */
124
+
125
+#define LOG_FLUSH_FLAG (1 << 0)
126
+#define LOG_FUA_FLAG (1 << 1)
127
+#define LOG_DISCARD_FLAG (1 << 2)
128
+#define LOG_MARK_FLAG (1 << 3)
129
+
130
+#define WRITE_LOG_VERSION 1ULL
131
+#define WRITE_LOG_MAGIC 0x6a736677736872ULL
132
+
133
+/* All fields are little-endian. */
134
+struct log_write_super {
135
+ uint64_t magic;
136
+ uint64_t version;
137
+ uint64_t nr_entries;
138
+ uint32_t sectorsize;
139
+} QEMU_PACKED;
140
+
141
+struct log_write_entry {
142
+ uint64_t sector;
143
+ uint64_t nr_sectors;
144
+ uint64_t flags;
145
+ uint64_t data_len;
146
+} QEMU_PACKED;
147
+
148
+/* End of disk format structures. */
149
+
150
+typedef struct {
151
+ BdrvChild *log_file;
152
+ uint32_t sectorsize;
153
+ uint32_t sectorbits;
154
+ uint64_t cur_log_sector;
155
+ uint64_t nr_entries;
156
+} BDRVBlkLogWritesState;
157
+
158
+static QemuOptsList runtime_opts = {
159
+ .name = "blklogwrites",
160
+ .head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
161
+ .desc = {
162
+ {
163
+ .name = "log-sector-size",
164
+ .type = QEMU_OPT_SIZE,
165
+ .help = "Log sector size",
166
+ },
167
+ { /* end of list */ }
168
+ },
169
+};
170
+
171
+static inline uint32_t blk_log_writes_log2(uint32_t value)
172
+{
173
+ assert(value > 0);
174
+ return 31 - clz32(value);
175
+}
176
+
177
+static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
178
+ Error **errp)
179
+{
180
+ BDRVBlkLogWritesState *s = bs->opaque;
181
+ QemuOpts *opts;
182
+ Error *local_err = NULL;
183
+ int ret;
184
+ int64_t log_sector_size;
185
+
186
+ opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
187
+ qemu_opts_absorb_qdict(opts, options, &local_err);
188
+ if (local_err) {
189
+ ret = -EINVAL;
190
+ error_propagate(errp, local_err);
191
+ goto fail;
192
+ }
193
+
194
+ /* Open the file */
195
+ bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false,
196
+ &local_err);
197
+ if (local_err) {
198
+ ret = -EINVAL;
199
+ error_propagate(errp, local_err);
200
+ goto fail;
201
+ }
202
+
203
+ log_sector_size = qemu_opt_get_size(opts, "log-sector-size",
204
+ BDRV_SECTOR_SIZE);
205
+
206
+ if (log_sector_size < 0 || log_sector_size > (1ull << 23) ||
207
+ !is_power_of_2(log_sector_size))
208
+ {
209
+ ret = -EINVAL;
210
+ error_setg(errp, "Invalid log sector size %"PRId64, log_sector_size);
211
+ goto fail;
212
+ }
213
+
214
+ s->sectorsize = log_sector_size;
215
+ s->sectorbits = blk_log_writes_log2(log_sector_size);
216
+ s->cur_log_sector = 1;
217
+ s->nr_entries = 0;
218
+
219
+ /* Open the log file */
220
+ s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_file, false,
221
+ &local_err);
222
+ if (local_err) {
223
+ ret = -EINVAL;
224
+ error_propagate(errp, local_err);
225
+ goto fail;
226
+ }
227
+
228
+ ret = 0;
229
+fail:
230
+ if (ret < 0) {
231
+ bdrv_unref_child(bs, bs->file);
232
+ bs->file = NULL;
233
+ }
234
+ qemu_opts_del(opts);
235
+ return ret;
236
+}
237
+
238
+static void blk_log_writes_close(BlockDriverState *bs)
239
+{
240
+ BDRVBlkLogWritesState *s = bs->opaque;
241
+
242
+ bdrv_unref_child(bs, s->log_file);
243
+ s->log_file = NULL;
244
+}
245
+
246
+static int64_t blk_log_writes_getlength(BlockDriverState *bs)
247
+{
248
+ return bdrv_getlength(bs->file->bs);
249
+}
250
+
251
+static void blk_log_writes_refresh_filename(BlockDriverState *bs,
252
+ QDict *options)
253
+{
254
+ BDRVBlkLogWritesState *s = bs->opaque;
255
+
256
+ /* bs->file->bs has already been refreshed */
257
+ bdrv_refresh_filename(s->log_file->bs);
258
+
259
+ if (bs->file->bs->full_open_options
260
+ && s->log_file->bs->full_open_options)
261
+ {
262
+ QDict *opts = qdict_new();
263
+ qdict_put_str(opts, "driver", "blklogwrites");
264
+
265
+ qobject_ref(bs->file->bs->full_open_options);
266
+ qdict_put_obj(opts, "file", QOBJECT(bs->file->bs->full_open_options));
267
+ qobject_ref(s->log_file->bs->full_open_options);
268
+ qdict_put_obj(opts, "log",
269
+ QOBJECT(s->log_file->bs->full_open_options));
270
+ qdict_put_int(opts, "log-sector-size", s->sectorsize);
271
+
272
+ bs->full_open_options = opts;
273
+ }
274
+}
275
+
276
+static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c,
277
+ const BdrvChildRole *role,
278
+ BlockReopenQueue *ro_q,
279
+ uint64_t perm, uint64_t shrd,
280
+ uint64_t *nperm, uint64_t *nshrd)
281
+{
282
+ if (!c) {
283
+ *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
284
+ *nshrd = (shrd & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
285
+ return;
286
+ }
287
+
288
+ if (!strcmp(c->name, "log")) {
289
+ bdrv_format_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd);
290
+ } else {
291
+ bdrv_filter_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd);
292
+ }
293
+}
294
+
295
+static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp)
296
+{
297
+ BDRVBlkLogWritesState *s = bs->opaque;
298
+ bs->bl.request_alignment = s->sectorsize;
299
+}
300
+
301
+static int coroutine_fn
302
+blk_log_writes_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
303
+ QEMUIOVector *qiov, int flags)
304
+{
305
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
306
+}
307
+
308
+typedef struct BlkLogWritesFileReq {
309
+ BlockDriverState *bs;
310
+ uint64_t offset;
311
+ uint64_t bytes;
312
+ int file_flags;
313
+ QEMUIOVector *qiov;
314
+ int (*func)(struct BlkLogWritesFileReq *r);
315
+ int file_ret;
316
+} BlkLogWritesFileReq;
317
+
318
+typedef struct {
319
+ BlockDriverState *bs;
320
+ QEMUIOVector *qiov;
321
+ struct log_write_entry entry;
322
+ uint64_t zero_size;
323
+ int log_ret;
324
+} BlkLogWritesLogReq;
325
+
326
+static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
327
+{
328
+ BDRVBlkLogWritesState *s = lr->bs->opaque;
329
+ uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits;
330
+
331
+ s->nr_entries++;
332
+ s->cur_log_sector +=
333
+ ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits;
334
+
335
+ lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size,
336
+ lr->qiov, 0);
337
+
338
+ /* Logging for the "write zeroes" operation */
339
+ if (lr->log_ret == 0 && lr->zero_size) {
340
+ cur_log_offset = s->cur_log_sector << s->sectorbits;
341
+ s->cur_log_sector +=
342
+ ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits;
343
+
344
+ lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset,
345
+ lr->zero_size, 0);
346
+ }
347
+
348
+ /* Update super block on flush */
349
+ if (lr->log_ret == 0 && lr->entry.flags & LOG_FLUSH_FLAG) {
350
+ struct log_write_super super = {
351
+ .magic = cpu_to_le64(WRITE_LOG_MAGIC),
352
+ .version = cpu_to_le64(WRITE_LOG_VERSION),
353
+ .nr_entries = cpu_to_le64(s->nr_entries),
354
+ .sectorsize = cpu_to_le32(s->sectorsize),
355
+ };
356
+ void *zeroes = g_malloc0(s->sectorsize - sizeof(super));
357
+ QEMUIOVector qiov;
358
+
359
+ qemu_iovec_init(&qiov, 2);
360
+ qemu_iovec_add(&qiov, &super, sizeof(super));
361
+ qemu_iovec_add(&qiov, zeroes, s->sectorsize - sizeof(super));
362
+
363
+ lr->log_ret =
364
+ bdrv_co_pwritev(s->log_file, 0, s->sectorsize, &qiov, 0);
365
+ if (lr->log_ret == 0) {
366
+ lr->log_ret = bdrv_co_flush(s->log_file->bs);
367
+ }
368
+ qemu_iovec_destroy(&qiov);
369
+ g_free(zeroes);
370
+ }
371
+}
372
+
373
+static void coroutine_fn blk_log_writes_co_do_file(BlkLogWritesFileReq *fr)
374
+{
375
+ fr->file_ret = fr->func(fr);
376
+}
377
+
378
+static int coroutine_fn
379
+blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
380
+ QEMUIOVector *qiov, int flags,
381
+ int (*file_func)(BlkLogWritesFileReq *r),
382
+ uint64_t entry_flags, bool is_zero_write)
383
+{
384
+ QEMUIOVector log_qiov;
385
+ size_t niov = qiov ? qiov->niov : 0;
386
+ BDRVBlkLogWritesState *s = bs->opaque;
387
+ BlkLogWritesFileReq fr = {
388
+ .bs = bs,
389
+ .offset = offset,
390
+ .bytes = bytes,
391
+ .file_flags = flags,
392
+ .qiov = qiov,
393
+ .func = file_func,
394
+ };
395
+ BlkLogWritesLogReq lr = {
396
+ .bs = bs,
397
+ .qiov = &log_qiov,
398
+ .entry = {
399
+ .sector = cpu_to_le64(offset >> s->sectorbits),
400
+ .nr_sectors = cpu_to_le64(bytes >> s->sectorbits),
401
+ .flags = cpu_to_le64(entry_flags),
402
+ .data_len = 0,
403
+ },
404
+ .zero_size = is_zero_write ? bytes : 0,
405
+ };
406
+ void *zeroes = g_malloc0(s->sectorsize - sizeof(lr.entry));
407
+
408
+ assert((1 << s->sectorbits) == s->sectorsize);
409
+ assert(bs->bl.request_alignment == s->sectorsize);
410
+ assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
411
+ assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
412
+
413
+ qemu_iovec_init(&log_qiov, niov + 2);
414
+ qemu_iovec_add(&log_qiov, &lr.entry, sizeof(lr.entry));
415
+ qemu_iovec_add(&log_qiov, zeroes, s->sectorsize - sizeof(lr.entry));
416
+ if (qiov) {
417
+ qemu_iovec_concat(&log_qiov, qiov, 0, qiov->size);
418
+ }
419
+
420
+ blk_log_writes_co_do_file(&fr);
421
+ blk_log_writes_co_do_log(&lr);
422
+
423
+ qemu_iovec_destroy(&log_qiov);
424
+ g_free(zeroes);
425
+
426
+ if (lr.log_ret < 0) {
427
+ return lr.log_ret;
428
+ }
429
+
430
+ return fr.file_ret;
431
+}
432
+
433
+static int coroutine_fn
434
+blk_log_writes_co_do_file_pwritev(BlkLogWritesFileReq *fr)
435
+{
436
+ return bdrv_co_pwritev(fr->bs->file, fr->offset, fr->bytes,
437
+ fr->qiov, fr->file_flags);
438
+}
439
+
440
+static int coroutine_fn
441
+blk_log_writes_co_do_file_pwrite_zeroes(BlkLogWritesFileReq *fr)
442
+{
443
+ return bdrv_co_pwrite_zeroes(fr->bs->file, fr->offset, fr->bytes,
444
+ fr->file_flags);
445
+}
446
+
447
+static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr)
448
+{
449
+ return bdrv_co_flush(fr->bs->file->bs);
450
+}
451
+
452
+static int coroutine_fn
453
+blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr)
454
+{
455
+ return bdrv_co_pdiscard(fr->bs->file->bs, fr->offset, fr->bytes);
456
+}
457
+
458
+static int coroutine_fn
459
+blk_log_writes_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
460
+ QEMUIOVector *qiov, int flags)
461
+{
462
+ return blk_log_writes_co_log(bs, offset, bytes, qiov, flags,
463
+ blk_log_writes_co_do_file_pwritev, 0, false);
464
+}
465
+
466
+static int coroutine_fn
467
+blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
468
+ BdrvRequestFlags flags)
469
+{
470
+ return blk_log_writes_co_log(bs, offset, bytes, NULL, flags,
471
+ blk_log_writes_co_do_file_pwrite_zeroes, 0,
472
+ true);
473
+}
474
+
475
+static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
476
+{
477
+ return blk_log_writes_co_log(bs, 0, 0, NULL, 0,
478
+ blk_log_writes_co_do_file_flush,
479
+ LOG_FLUSH_FLAG, false);
480
+}
481
+
482
+static int coroutine_fn
483
+blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
484
+{
485
+ return blk_log_writes_co_log(bs, offset, count, NULL, 0,
486
+ blk_log_writes_co_do_file_pdiscard,
487
+ LOG_DISCARD_FLAG, false);
488
+}
489
+
490
+static BlockDriver bdrv_blk_log_writes = {
491
+ .format_name = "blklogwrites",
492
+ .instance_size = sizeof(BDRVBlkLogWritesState),
493
+
494
+ .bdrv_open = blk_log_writes_open,
495
+ .bdrv_close = blk_log_writes_close,
496
+ .bdrv_getlength = blk_log_writes_getlength,
497
+ .bdrv_refresh_filename = blk_log_writes_refresh_filename,
498
+ .bdrv_child_perm = blk_log_writes_child_perm,
499
+ .bdrv_refresh_limits = blk_log_writes_refresh_limits,
500
+
501
+ .bdrv_co_preadv = blk_log_writes_co_preadv,
502
+ .bdrv_co_pwritev = blk_log_writes_co_pwritev,
503
+ .bdrv_co_pwrite_zeroes = blk_log_writes_co_pwrite_zeroes,
504
+ .bdrv_co_flush_to_disk = blk_log_writes_co_flush_to_disk,
505
+ .bdrv_co_pdiscard = blk_log_writes_co_pdiscard,
506
+ .bdrv_co_block_status = bdrv_co_block_status_from_file,
507
+
508
+ .is_filter = true,
509
+};
510
+
511
+static void bdrv_blk_log_writes_init(void)
512
+{
513
+ bdrv_register(&bdrv_blk_log_writes);
514
+}
515
+
516
+block_init(bdrv_blk_log_writes_init);
517
diff --git a/MAINTAINERS b/MAINTAINERS
518
index XXXXXXX..XXXXXXX 100644
519
--- a/MAINTAINERS
520
+++ b/MAINTAINERS
521
@@ -XXX,XX +XXX,XX @@ S: Supported
522
F: block/quorum.c
523
L: qemu-block@nongnu.org
524
525
+blklogwrites
526
+M: Ari Sundholm <ari@tuxera.com>
527
+L: qemu-block@nongnu.org
528
+S: Supported
529
+F: block/blklogwrites.c
530
+
531
blkverify
532
M: Stefan Hajnoczi <stefanha@redhat.com>
533
L: qemu-block@nongnu.org
534
diff --git a/block/Makefile.objs b/block/Makefile.objs
535
index XXXXXXX..XXXXXXX 100644
536
--- a/block/Makefile.objs
537
+++ b/block/Makefile.objs
538
@@ -XXX,XX +XXX,XX @@ block-obj-y += qed-check.o
539
block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
540
block-obj-y += quorum.o
541
block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
542
+block-obj-y += blklogwrites.o
543
block-obj-y += block-backend.o snapshot.o qapi.o
544
block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
545
block-obj-$(CONFIG_POSIX) += file-posix.o
546
--
547
2.13.6
548
549
diff view generated by jsdifflib
1
This makes the new BDRV_REQ_NO_FALLBACK flag available in the qemu-io
1
If the user passes a too long node name string, we silently truncate it
2
write command.
2
to fit into BlockDriverState.node_name, i.e. to 31 characters. Apart
3
from surprising the user when the node has a different name than
4
requested, this also bypasses the check for duplicate names, so that the
5
same name can be assigned to multiple nodes.
3
6
7
Fix this by just making too long node names an error.
8
9
Reported-by: Peter Krempa <pkrempa@redhat.com>
4
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
5
Acked-by: Eric Blake <eblake@redhat.com>
6
---
11
---
7
qemu-io-cmds.c | 13 +++++++++++--
12
block.c | 6 ++++++
8
1 file changed, 11 insertions(+), 2 deletions(-)
13
tests/qemu-iotests/051 | 15 +++++++++++++++
14
tests/qemu-iotests/051.out | 23 +++++++++++++++++++++++
15
tests/qemu-iotests/051.pc.out | 23 +++++++++++++++++++++++
16
4 files changed, 67 insertions(+)
9
17
10
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
18
diff --git a/block.c b/block.c
11
index XXXXXXX..XXXXXXX 100644
19
index XXXXXXX..XXXXXXX 100644
12
--- a/qemu-io-cmds.c
20
--- a/block.c
13
+++ b/qemu-io-cmds.c
21
+++ b/block.c
14
@@ -XXX,XX +XXX,XX @@ static void write_help(void)
22
@@ -XXX,XX +XXX,XX @@ static void bdrv_assign_node_name(BlockDriverState *bs,
15
" -b, -- write to the VM state rather than the virtual disk\n"
23
goto out;
16
" -c, -- write compressed data with blk_write_compressed\n"
17
" -f, -- use Force Unit Access semantics\n"
18
+" -n, -- with -z, don't allow slow fallback\n"
19
" -p, -- ignored for backwards compatibility\n"
20
" -P, -- use different pattern to fill file\n"
21
" -C, -- report statistics in a machine parsable format\n"
22
@@ -XXX,XX +XXX,XX @@ static const cmdinfo_t write_cmd = {
23
.perm = BLK_PERM_WRITE,
24
.argmin = 2,
25
.argmax = -1,
26
- .args = "[-bcCfquz] [-P pattern] off len",
27
+ .args = "[-bcCfnquz] [-P pattern] off len",
28
.oneline = "writes a number of bytes at a specified offset",
29
.help = write_help,
30
};
31
@@ -XXX,XX +XXX,XX @@ static int write_f(BlockBackend *blk, int argc, char **argv)
32
int64_t total = 0;
33
int pattern = 0xcd;
34
35
- while ((c = getopt(argc, argv, "bcCfpP:quz")) != -1) {
36
+ while ((c = getopt(argc, argv, "bcCfnpP:quz")) != -1) {
37
switch (c) {
38
case 'b':
39
bflag = true;
40
@@ -XXX,XX +XXX,XX @@ static int write_f(BlockBackend *blk, int argc, char **argv)
41
case 'f':
42
flags |= BDRV_REQ_FUA;
43
break;
44
+ case 'n':
45
+ flags |= BDRV_REQ_NO_FALLBACK;
46
+ break;
47
case 'p':
48
/* Ignored for backwards compatibility */
49
break;
50
@@ -XXX,XX +XXX,XX @@ static int write_f(BlockBackend *blk, int argc, char **argv)
51
return -EINVAL;
52
}
24
}
53
25
54
+ if ((flags & BDRV_REQ_NO_FALLBACK) && !zflag) {
26
+ /* Make sure that the node name isn't truncated */
55
+ printf("-n requires -z to be specified\n");
27
+ if (strlen(node_name) >= sizeof(bs->node_name)) {
56
+ return -EINVAL;
28
+ error_setg(errp, "Node name too long");
29
+ goto out;
57
+ }
30
+ }
58
+
31
+
59
if ((flags & BDRV_REQ_MAY_UNMAP) && !zflag) {
32
/* copy node name into the bs and insert it into the graph list */
60
printf("-u requires -z to be specified\n");
33
pstrcpy(bs->node_name, sizeof(bs->node_name), node_name);
61
return -EINVAL;
34
QTAILQ_INSERT_TAIL(&graph_bdrv_states, bs, node_list);
35
diff --git a/tests/qemu-iotests/051 b/tests/qemu-iotests/051
36
index XXXXXXX..XXXXXXX 100755
37
--- a/tests/qemu-iotests/051
38
+++ b/tests/qemu-iotests/051
39
@@ -XXX,XX +XXX,XX @@ run_qemu -drive file="$TEST_IMG",driver=raw,format=qcow2
40
run_qemu -drive file="$TEST_IMG",driver=qcow2,format=qcow2
41
42
echo
43
+echo === Node names ===
44
+echo
45
+
46
+# Maximum length: 31 characters
47
+run_qemu -drive file="$TEST_IMG",node-name=x123456789012345678901234567890
48
+run_qemu -drive file="$TEST_IMG",node-name=x1234567890123456789012345678901
49
+
50
+# First character must be alphabetic
51
+# Following characters alphanumeric or -._
52
+run_qemu -drive file="$TEST_IMG",node-name=All-Types.of_all0wed_chars
53
+run_qemu -drive file="$TEST_IMG",node-name=123foo
54
+run_qemu -drive file="$TEST_IMG",node-name=_foo
55
+run_qemu -drive file="$TEST_IMG",node-name=foo#12
56
+
57
+echo
58
echo === Device without drive ===
59
echo
60
61
diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out
62
index XXXXXXX..XXXXXXX 100644
63
--- a/tests/qemu-iotests/051.out
64
+++ b/tests/qemu-iotests/051.out
65
@@ -XXX,XX +XXX,XX @@ Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2
66
QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specify both 'driver' and 'format'
67
68
69
+=== Node names ===
70
+
71
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=x123456789012345678901234567890
72
+QEMU X.Y.Z monitor - type 'help' for more information
73
+(qemu) quit
74
+
75
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901
76
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901: Node name too long
77
+
78
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=All-Types.of_all0wed_chars
79
+QEMU X.Y.Z monitor - type 'help' for more information
80
+(qemu) quit
81
+
82
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=123foo
83
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node name
84
+
85
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=_foo
86
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node name
87
+
88
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=foo#12
89
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node name
90
+
91
+
92
=== Device without drive ===
93
94
Testing: -device VIRTIO_SCSI -device scsi-hd
95
diff --git a/tests/qemu-iotests/051.pc.out b/tests/qemu-iotests/051.pc.out
96
index XXXXXXX..XXXXXXX 100644
97
--- a/tests/qemu-iotests/051.pc.out
98
+++ b/tests/qemu-iotests/051.pc.out
99
@@ -XXX,XX +XXX,XX @@ Testing: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2
100
QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specify both 'driver' and 'format'
101
102
103
+=== Node names ===
104
+
105
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=x123456789012345678901234567890
106
+QEMU X.Y.Z monitor - type 'help' for more information
107
+(qemu) quit
108
+
109
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901
110
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=x1234567890123456789012345678901: Node name too long
111
+
112
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=All-Types.of_all0wed_chars
113
+QEMU X.Y.Z monitor - type 'help' for more information
114
+(qemu) quit
115
+
116
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=123foo
117
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=123foo: Invalid node name
118
+
119
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=_foo
120
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=_foo: Invalid node name
121
+
122
+Testing: -drive file=TEST_DIR/t.qcow2,node-name=foo#12
123
+QEMU_PROG: -drive file=TEST_DIR/t.qcow2,node-name=foo#12: Invalid node name
124
+
125
+
126
=== Device without drive ===
127
128
Testing: -device VIRTIO_SCSI -device scsi-hd
62
--
129
--
63
2.20.1
130
2.13.6
64
131
65
132
diff view generated by jsdifflib
New patch
1
Fixes: Coverity CID 1393782
2
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
3
Reviewed-by: Daniel P. Berrangé <berrange@redhat.com>
4
---
5
block/crypto.c | 2 +-
6
1 file changed, 1 insertion(+), 1 deletion(-)
1
7
8
diff --git a/block/crypto.c b/block/crypto.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/block/crypto.c
11
+++ b/block/crypto.c
12
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_crypto_co_create_opts_luks(const char *filename,
13
/* Create protocol layer */
14
ret = bdrv_create_file(filename, opts, errp);
15
if (ret < 0) {
16
- return ret;
17
+ goto fail;
18
}
19
20
bs = bdrv_open(filename, NULL, NULL,
21
--
22
2.13.6
23
24
diff view generated by jsdifflib
New patch
1
From: Ari Sundholm <ari@tuxera.com>
1
2
3
This was a simple oversight when working on intermediate versions
4
of the original patch which introduced blklogwrites.
5
6
Signed-off-by: Ari Sundholm <ari@tuxera.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
9
block/blklogwrites.c | 8 +++-----
10
1 file changed, 3 insertions(+), 5 deletions(-)
11
12
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/block/blklogwrites.c
15
+++ b/block/blklogwrites.c
16
@@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
17
QemuOpts *opts;
18
Error *local_err = NULL;
19
int ret;
20
- int64_t log_sector_size;
21
+ uint64_t log_sector_size;
22
23
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
24
qemu_opts_absorb_qdict(opts, options, &local_err);
25
@@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
26
log_sector_size = qemu_opt_get_size(opts, "log-sector-size",
27
BDRV_SECTOR_SIZE);
28
29
- if (log_sector_size < 0 || log_sector_size > (1ull << 23) ||
30
- !is_power_of_2(log_sector_size))
31
- {
32
+ if (log_sector_size > (1ull << 23) || !is_power_of_2(log_sector_size)) {
33
ret = -EINVAL;
34
- error_setg(errp, "Invalid log sector size %"PRId64, log_sector_size);
35
+ error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size);
36
goto fail;
37
}
38
39
--
40
2.13.6
41
42
diff view generated by jsdifflib
New patch
1
1
From: Ari Sundholm <ari@tuxera.com>
2
3
Suggested by Kevin Wolf. May be useful when testing multiple batches
4
of writes or doing long-term testing involving restarts of the VM.
5
6
Signed-off-by: Ari Sundholm <ari@tuxera.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
9
qapi/block-core.json | 3 +-
10
block/blklogwrites.c | 147 ++++++++++++++++++++++++++++++++++++++++++++++-----
11
2 files changed, 135 insertions(+), 15 deletions(-)
12
13
diff --git a/qapi/block-core.json b/qapi/block-core.json
14
index XXXXXXX..XXXXXXX 100644
15
--- a/qapi/block-core.json
16
+++ b/qapi/block-core.json
17
@@ -XXX,XX +XXX,XX @@
18
{ 'struct': 'BlockdevOptionsBlklogwrites',
19
'data': { 'file': 'BlockdevRef',
20
'log': 'BlockdevRef',
21
- '*log-sector-size': 'uint32' } }
22
+ '*log-sector-size': 'uint32',
23
+ '*log-append': 'bool' } }
24
25
##
26
# @BlockdevOptionsBlkverify:
27
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
28
index XXXXXXX..XXXXXXX 100644
29
--- a/block/blklogwrites.c
30
+++ b/block/blklogwrites.c
31
@@ -XXX,XX +XXX,XX @@
32
#define LOG_FUA_FLAG (1 << 1)
33
#define LOG_DISCARD_FLAG (1 << 2)
34
#define LOG_MARK_FLAG (1 << 3)
35
+#define LOG_FLAG_MASK (LOG_FLUSH_FLAG \
36
+ | LOG_FUA_FLAG \
37
+ | LOG_DISCARD_FLAG \
38
+ | LOG_MARK_FLAG)
39
40
#define WRITE_LOG_VERSION 1ULL
41
#define WRITE_LOG_MAGIC 0x6a736677736872ULL
42
@@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = {
43
.head = QTAILQ_HEAD_INITIALIZER(runtime_opts.head),
44
.desc = {
45
{
46
+ .name = "log-append",
47
+ .type = QEMU_OPT_BOOL,
48
+ .help = "Append to an existing log",
49
+ },
50
+ {
51
.name = "log-sector-size",
52
.type = QEMU_OPT_SIZE,
53
.help = "Log sector size",
54
@@ -XXX,XX +XXX,XX @@ static inline uint32_t blk_log_writes_log2(uint32_t value)
55
return 31 - clz32(value);
56
}
57
58
+static inline bool blk_log_writes_sector_size_valid(uint32_t sector_size)
59
+{
60
+ return sector_size < (1ull << 24) && is_power_of_2(sector_size);
61
+}
62
+
63
+static uint64_t blk_log_writes_find_cur_log_sector(BdrvChild *log,
64
+ uint32_t sector_size,
65
+ uint64_t nr_entries,
66
+ Error **errp)
67
+{
68
+ uint64_t cur_sector = 1;
69
+ uint64_t cur_idx = 0;
70
+ uint32_t sector_bits = blk_log_writes_log2(sector_size);
71
+ struct log_write_entry cur_entry;
72
+
73
+ while (cur_idx < nr_entries) {
74
+ int read_ret = bdrv_pread(log, cur_sector << sector_bits, &cur_entry,
75
+ sizeof(cur_entry));
76
+ if (read_ret < 0) {
77
+ error_setg_errno(errp, -read_ret,
78
+ "Failed to read log entry %"PRIu64, cur_idx);
79
+ return (uint64_t)-1ull;
80
+ }
81
+
82
+ if (cur_entry.flags & ~cpu_to_le64(LOG_FLAG_MASK)) {
83
+ error_setg(errp, "Invalid flags 0x%"PRIx64" in log entry %"PRIu64,
84
+ le64_to_cpu(cur_entry.flags), cur_idx);
85
+ return (uint64_t)-1ull;
86
+ }
87
+
88
+ /* Account for the sector of the entry itself */
89
+ ++cur_sector;
90
+
91
+ /*
92
+ * Account for the data of the write.
93
+ * For discards, this data is not present.
94
+ */
95
+ if (!(cur_entry.flags & cpu_to_le64(LOG_DISCARD_FLAG))) {
96
+ cur_sector += le64_to_cpu(cur_entry.nr_sectors);
97
+ }
98
+
99
+ ++cur_idx;
100
+ }
101
+
102
+ return cur_sector;
103
+}
104
+
105
static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
106
Error **errp)
107
{
108
@@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
109
Error *local_err = NULL;
110
int ret;
111
uint64_t log_sector_size;
112
+ bool log_append;
113
114
opts = qemu_opts_create(&runtime_opts, NULL, 0, &error_abort);
115
qemu_opts_absorb_qdict(opts, options, &local_err);
116
@@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
117
goto fail;
118
}
119
120
- log_sector_size = qemu_opt_get_size(opts, "log-sector-size",
121
- BDRV_SECTOR_SIZE);
122
-
123
- if (log_sector_size > (1ull << 23) || !is_power_of_2(log_sector_size)) {
124
- ret = -EINVAL;
125
- error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size);
126
- goto fail;
127
- }
128
-
129
- s->sectorsize = log_sector_size;
130
- s->sectorbits = blk_log_writes_log2(log_sector_size);
131
- s->cur_log_sector = 1;
132
- s->nr_entries = 0;
133
-
134
/* Open the log file */
135
s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_file, false,
136
&local_err);
137
@@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
138
goto fail;
139
}
140
141
+ log_append = qemu_opt_get_bool(opts, "log-append", false);
142
+
143
+ if (log_append) {
144
+ struct log_write_super log_sb = { 0, 0, 0, 0 };
145
+
146
+ if (qemu_opt_find(opts, "log-sector-size")) {
147
+ ret = -EINVAL;
148
+ error_setg(errp, "log-append and log-sector-size are mutually "
149
+ "exclusive");
150
+ goto fail_log;
151
+ }
152
+
153
+ /* Read log superblock or fake one for an empty log */
154
+ if (!bdrv_getlength(s->log_file->bs)) {
155
+ log_sb.magic = cpu_to_le64(WRITE_LOG_MAGIC);
156
+ log_sb.version = cpu_to_le64(WRITE_LOG_VERSION);
157
+ log_sb.nr_entries = cpu_to_le64(0);
158
+ log_sb.sectorsize = cpu_to_le32(BDRV_SECTOR_SIZE);
159
+ } else {
160
+ ret = bdrv_pread(s->log_file, 0, &log_sb, sizeof(log_sb));
161
+ if (ret < 0) {
162
+ error_setg_errno(errp, -ret, "Could not read log superblock");
163
+ goto fail_log;
164
+ }
165
+ }
166
+
167
+ if (log_sb.magic != cpu_to_le64(WRITE_LOG_MAGIC)) {
168
+ ret = -EINVAL;
169
+ error_setg(errp, "Invalid log superblock magic");
170
+ goto fail_log;
171
+ }
172
+
173
+ if (log_sb.version != cpu_to_le64(WRITE_LOG_VERSION)) {
174
+ ret = -EINVAL;
175
+ error_setg(errp, "Unsupported log version %"PRIu64,
176
+ le64_to_cpu(log_sb.version));
177
+ goto fail_log;
178
+ }
179
+
180
+ log_sector_size = le32_to_cpu(log_sb.sectorsize);
181
+ s->cur_log_sector = 1;
182
+ s->nr_entries = 0;
183
+
184
+ if (blk_log_writes_sector_size_valid(log_sector_size)) {
185
+ s->cur_log_sector =
186
+ blk_log_writes_find_cur_log_sector(s->log_file, log_sector_size,
187
+ le64_to_cpu(log_sb.nr_entries), &local_err);
188
+ if (local_err) {
189
+ ret = -EINVAL;
190
+ error_propagate(errp, local_err);
191
+ goto fail_log;
192
+ }
193
+
194
+ s->nr_entries = le64_to_cpu(log_sb.nr_entries);
195
+ }
196
+ } else {
197
+ log_sector_size = qemu_opt_get_size(opts, "log-sector-size",
198
+ BDRV_SECTOR_SIZE);
199
+ s->cur_log_sector = 1;
200
+ s->nr_entries = 0;
201
+ }
202
+
203
+ if (!blk_log_writes_sector_size_valid(log_sector_size)) {
204
+ ret = -EINVAL;
205
+ error_setg(errp, "Invalid log sector size %"PRIu64, log_sector_size);
206
+ goto fail_log;
207
+ }
208
+
209
+ s->sectorsize = log_sector_size;
210
+ s->sectorbits = blk_log_writes_log2(log_sector_size);
211
+
212
ret = 0;
213
+fail_log:
214
+ if (ret < 0) {
215
+ bdrv_unref_child(bs, s->log_file);
216
+ s->log_file = NULL;
217
+ }
218
fail:
219
if (ret < 0) {
220
bdrv_unref_child(bs, bs->file);
221
--
222
2.13.6
223
224
diff view generated by jsdifflib
1
Filter drivers that support .bdrv_co_pwrite_zeroes can safely advertise
1
From: Ari Sundholm <ari@tuxera.com>
2
BDRV_REQ_NO_FALLBACK because they just forward the request flags to
3
their child node.
4
2
3
This is a way to ensure that the log superblock is periodically
4
updated. Before, this was only done on flush requests, which may
5
not be enough if the VM exits abnormally, omitting the final flush.
6
7
The default interval is 4096 write requests.
8
9
Signed-off-by: Ari Sundholm <ari@tuxera.com>
5
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
10
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
6
Acked-by: Eric Blake <eblake@redhat.com>
7
---
11
---
8
block/blkdebug.c | 2 +-
12
qapi/block-core.json | 6 +++++-
9
block/copy-on-read.c | 7 +++----
13
block/blklogwrites.c | 20 ++++++++++++++++++--
10
block/mirror.c | 3 ++-
14
2 files changed, 23 insertions(+), 3 deletions(-)
11
block/raw-format.c | 2 +-
12
4 files changed, 7 insertions(+), 7 deletions(-)
13
15
14
diff --git a/block/blkdebug.c b/block/blkdebug.c
16
diff --git a/qapi/block-core.json b/qapi/block-core.json
15
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
16
--- a/block/blkdebug.c
18
--- a/qapi/block-core.json
17
+++ b/block/blkdebug.c
19
+++ b/qapi/block-core.json
18
@@ -XXX,XX +XXX,XX @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags,
20
@@ -XXX,XX +XXX,XX @@
19
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
21
# @log-sector-size: sector size used in logging writes to @file, determines
20
(BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
22
# granularity of offsets and sizes of writes (default: 512)
21
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
23
#
22
- ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
24
+# @log-super-update-interval: interval of write requests after which the log
23
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
25
+# super block is updated to disk (default: 4096)
24
bs->file->bs->supported_zero_flags);
26
+#
25
ret = -EINVAL;
27
# Since: 3.0
26
28
##
27
diff --git a/block/copy-on-read.c b/block/copy-on-read.c
29
{ 'struct': 'BlockdevOptionsBlklogwrites',
30
'data': { 'file': 'BlockdevRef',
31
'log': 'BlockdevRef',
32
'*log-sector-size': 'uint32',
33
- '*log-append': 'bool' } }
34
+ '*log-append': 'bool',
35
+ '*log-super-update-interval': 'uint64' } }
36
37
##
38
# @BlockdevOptionsBlkverify:
39
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
28
index XXXXXXX..XXXXXXX 100644
40
index XXXXXXX..XXXXXXX 100644
29
--- a/block/copy-on-read.c
41
--- a/block/blklogwrites.c
30
+++ b/block/copy-on-read.c
42
+++ b/block/blklogwrites.c
31
@@ -XXX,XX +XXX,XX @@ static int cor_open(BlockDriverState *bs, QDict *options, int flags,
43
@@ -XXX,XX +XXX,XX @@ typedef struct {
44
uint32_t sectorbits;
45
uint64_t cur_log_sector;
46
uint64_t nr_entries;
47
+ uint64_t update_interval;
48
} BDRVBlkLogWritesState;
49
50
static QemuOptsList runtime_opts = {
51
@@ -XXX,XX +XXX,XX @@ static QemuOptsList runtime_opts = {
52
.type = QEMU_OPT_SIZE,
53
.help = "Log sector size",
54
},
55
+ {
56
+ .name = "log-super-update-interval",
57
+ .type = QEMU_OPT_NUMBER,
58
+ .help = "Log superblock update interval (# of write requests)",
59
+ },
60
{ /* end of list */ }
61
},
62
};
63
@@ -XXX,XX +XXX,XX @@ static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
64
65
s->sectorsize = log_sector_size;
66
s->sectorbits = blk_log_writes_log2(log_sector_size);
67
+ s->update_interval = qemu_opt_get_number(opts, "log-super-update-interval",
68
+ 4096);
69
+ if (!s->update_interval) {
70
+ ret = -EINVAL;
71
+ error_setg(errp, "Invalid log superblock update interval %"PRIu64,
72
+ s->update_interval);
73
+ goto fail_log;
74
+ }
75
76
ret = 0;
77
fail_log:
78
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
79
lr->zero_size, 0);
32
}
80
}
33
81
34
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
82
- /* Update super block on flush */
35
- (BDRV_REQ_FUA &
83
- if (lr->log_ret == 0 && lr->entry.flags & LOG_FLUSH_FLAG) {
36
- bs->file->bs->supported_write_flags);
84
+ /* Update super block on flush or every update interval */
37
+ (BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
85
+ if (lr->log_ret == 0 && ((lr->entry.flags & LOG_FLUSH_FLAG)
38
86
+ || (s->nr_entries % s->update_interval == 0)))
39
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
87
+ {
40
- ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
88
struct log_write_super super = {
41
- bs->file->bs->supported_zero_flags);
89
.magic = cpu_to_le64(WRITE_LOG_MAGIC),
42
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
90
.version = cpu_to_le64(WRITE_LOG_VERSION),
43
+ bs->file->bs->supported_zero_flags);
44
45
return 0;
46
}
47
diff --git a/block/mirror.c b/block/mirror.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/block/mirror.c
50
+++ b/block/mirror.c
51
@@ -XXX,XX +XXX,XX @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs,
52
}
53
mirror_top_bs->total_sectors = bs->total_sectors;
54
mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED;
55
- mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED;
56
+ mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
57
+ BDRV_REQ_NO_FALLBACK;
58
bs_opaque = g_new0(MirrorBDSOpaque, 1);
59
mirror_top_bs->opaque = bs_opaque;
60
bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs));
61
diff --git a/block/raw-format.c b/block/raw-format.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/block/raw-format.c
64
+++ b/block/raw-format.c
65
@@ -XXX,XX +XXX,XX @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
66
bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED |
67
(BDRV_REQ_FUA & bs->file->bs->supported_write_flags);
68
bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED |
69
- ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) &
70
+ ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) &
71
bs->file->bs->supported_zero_flags);
72
73
if (bs->probed && !bdrv_is_read_only(bs)) {
74
--
91
--
75
2.20.1
92
2.13.6
76
93
77
94
diff view generated by jsdifflib
1
There is only a single caller of bdrv_make_zero(), which is qemu-img
1
From: Max Reitz <mreitz@redhat.com>
2
convert. If the function fails, we just fall back to a different method
3
of zeroing out blocks on the target image. There is no good reason to
4
print error messages on stderr when the higher level operation will
5
actually succeed.
6
2
3
raw_apply_lock_bytes() takes a bit mask of "permissions that are NOT
4
shared".
5
6
Also, make the "perm" and "shared" variables uint64_t, because I do not
7
particularly like using ~ on signed integers (and other permission masks
8
are usually uint64_t, too).
9
10
Reported-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Acked-by: Eric Blake <eblake@redhat.com>
9
---
13
---
10
block/io.c | 4 ----
14
block/file-posix.c | 4 ++--
11
1 file changed, 4 deletions(-)
15
1 file changed, 2 insertions(+), 2 deletions(-)
12
16
13
diff --git a/block/io.c b/block/io.c
17
diff --git a/block/file-posix.c b/block/file-posix.c
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/block/io.c
19
--- a/block/file-posix.c
16
+++ b/block/io.c
20
+++ b/block/file-posix.c
17
@@ -XXX,XX +XXX,XX @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
21
@@ -XXX,XX +XXX,XX @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
18
}
22
{
19
ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL);
23
BlockdevCreateOptionsFile *file_opts;
20
if (ret < 0) {
24
int fd;
21
- error_report("error getting block status at offset %" PRId64 ": %s",
25
- int perm, shared;
22
- offset, strerror(-ret));
26
+ uint64_t perm, shared;
23
return ret;
27
int result = 0;
24
}
28
25
if (ret & BDRV_BLOCK_ZERO) {
29
/* Validate options and set default values */
26
@@ -XXX,XX +XXX,XX @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
30
@@ -XXX,XX +XXX,XX @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
27
}
31
shared = BLK_PERM_ALL & ~BLK_PERM_RESIZE;
28
ret = bdrv_pwrite_zeroes(child, offset, bytes, flags);
32
29
if (ret < 0) {
33
/* Step one: Take locks */
30
- error_report("error writing zeroes at offset %" PRId64 ": %s",
34
- result = raw_apply_lock_bytes(fd, perm, shared, false, errp);
31
- offset, strerror(-ret));
35
+ result = raw_apply_lock_bytes(fd, perm, ~shared, false, errp);
32
return ret;
36
if (result < 0) {
33
}
37
goto out_close;
34
offset += bytes;
38
}
35
--
39
--
36
2.20.1
40
2.13.6
37
41
38
42
diff view generated by jsdifflib
1
We know that the kernel implements a slow fallback code path for
1
From: Max Reitz <mreitz@redhat.com>
2
BLKZEROOUT, so if BDRV_REQ_NO_FALLBACK is given, we shouldn't call it.
3
The other operations we call in the context of .bdrv_co_pwrite_zeroes
4
should usually be quick, so no modification should be needed for them.
5
If we ever notice that there are additional problematic cases, we can
6
still make these conditional as well.
7
2
3
Closing the FD does not necessarily mean that it is unlocked. Fix this
4
by relinquishing all permission locks before qemu_close().
5
6
Reported-by: Kevin Wolf <kwolf@redhat.com>
7
Signed-off-by: Max Reitz <mreitz@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
Acked-by: Eric Blake <eblake@redhat.com>
10
---
9
---
11
include/block/raw-aio.h | 1 +
10
block/file-posix.c | 17 ++++++++++++++---
12
block/file-posix.c | 24 ++++++++++++++++--------
11
1 file changed, 14 insertions(+), 3 deletions(-)
13
2 files changed, 17 insertions(+), 8 deletions(-)
14
12
15
diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/block/raw-aio.h
18
+++ b/include/block/raw-aio.h
19
@@ -XXX,XX +XXX,XX @@
20
/* AIO flags */
21
#define QEMU_AIO_MISALIGNED 0x1000
22
#define QEMU_AIO_BLKDEV 0x2000
23
+#define QEMU_AIO_NO_FALLBACK 0x4000
24
25
26
/* linux-aio.c - Linux native implementation */
27
diff --git a/block/file-posix.c b/block/file-posix.c
13
diff --git a/block/file-posix.c b/block/file-posix.c
28
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
29
--- a/block/file-posix.c
15
--- a/block/file-posix.c
30
+++ b/block/file-posix.c
16
+++ b/block/file-posix.c
31
@@ -XXX,XX +XXX,XX @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
17
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn
18
raw_co_create(BlockdevCreateOptions *options, Error **errp)
19
{
20
BlockdevCreateOptionsFile *file_opts;
21
+ Error *local_err = NULL;
22
int fd;
23
uint64_t perm, shared;
24
int result = 0;
25
@@ -XXX,XX +XXX,XX @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
26
/* Step two: Check that nobody else has taken conflicting locks */
27
result = raw_check_lock_bytes(fd, perm, shared, errp);
28
if (result < 0) {
29
- goto out_close;
30
+ goto out_unlock;
32
}
31
}
33
#endif
32
34
33
/* Clear the file by truncating it to 0 */
35
- bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP;
34
result = raw_regular_truncate(NULL, fd, 0, PREALLOC_MODE_OFF, errp);
36
+ bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK;
35
if (result < 0) {
37
ret = 0;
36
- goto out_close;
38
fail:
37
+ goto out_unlock;
39
if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) {
40
@@ -XXX,XX +XXX,XX @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb)
41
}
38
}
42
39
43
#ifdef BLKZEROOUT
40
if (file_opts->nocow) {
44
- do {
41
@@ -XXX,XX +XXX,XX @@ raw_co_create(BlockdevCreateOptions *options, Error **errp)
45
- uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
42
result = raw_regular_truncate(NULL, fd, file_opts->size,
46
- if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
43
file_opts->preallocation, errp);
47
- return 0;
44
if (result < 0) {
48
- }
45
- goto out_close;
49
- } while (errno == EINTR);
46
+ goto out_unlock;
50
+ /* The BLKZEROOUT implementation in the kernel doesn't set
51
+ * BLKDEV_ZERO_NOFALLBACK, so we can't call this if we have to avoid slow
52
+ * fallbacks. */
53
+ if (!(aiocb->aio_type & QEMU_AIO_NO_FALLBACK)) {
54
+ do {
55
+ uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes };
56
+ if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) {
57
+ return 0;
58
+ }
59
+ } while (errno == EINTR);
60
61
- ret = translate_err(-errno);
62
+ ret = translate_err(-errno);
63
+ }
47
+ }
64
#endif
48
+
65
49
+out_unlock:
66
if (ret == -ENOTSUP) {
50
+ raw_apply_lock_bytes(fd, 0, 0, true, &local_err);
67
@@ -XXX,XX +XXX,XX @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
51
+ if (local_err) {
68
if (blkdev) {
52
+ /* The above call should not fail, and if it does, that does
69
acb.aio_type |= QEMU_AIO_BLKDEV;
53
+ * not mean the whole creation operation has failed. So
54
+ * report it the user for their convenience, but do not report
55
+ * it to the caller. */
56
+ error_report_err(local_err);
70
}
57
}
71
+ if (flags & BDRV_REQ_NO_FALLBACK) {
58
72
+ acb.aio_type |= QEMU_AIO_NO_FALLBACK;
59
out_close:
73
+ }
74
75
if (flags & BDRV_REQ_MAY_UNMAP) {
76
acb.aio_type |= QEMU_AIO_DISCARD;
77
--
60
--
78
2.20.1
61
2.13.6
79
62
80
63
diff view generated by jsdifflib