1
The following changes since commit a395717cbd26e7593d3c3fe81faca121ec6d13e8:
1
The following changes since commit 825b96dbcee23d134b691fc75618b59c5f53da32:
2
2
3
Merge remote-tracking branch 'remotes/cody/tags/block-pull-request' into staging (2018-07-03 11:49:51 +0100)
3
Merge tag 'migration-20250310-pull-request' of https://gitlab.com/farosas/qemu into staging (2025-03-11 09:32:07 +0800)
4
4
5
are available in the git repository at:
5
are available in the Git repository at:
6
6
7
git://repo.or.cz/qemu/kevin.git tags/for-upstream
7
https://repo.or.cz/qemu/kevin.git tags/for-upstream
8
8
9
for you to fetch changes up to 59738025a1674bb7e07713c3c93ff4fb9c5079f5:
9
for you to fetch changes up to df957115c46845e2c0ccc29ac0a75eb9700a9a0d:
10
10
11
block: Add blklogwrites (2018-07-03 16:09:48 +0200)
11
scripts/qcow2-to-stdout.py: Add script to write qcow2 images to stdout (2025-03-13 17:57:23 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block layer patches:
14
Block layer patches
15
15
16
- qcow2: Use worker threads for compression to improve performance of
16
- virtio-scsi: add iothread-vq-mapping parameter
17
'qemu-img convert -W' and compressed backup jobs
17
- Improve writethrough performance
18
- blklogwrites: New filter driver to log write requests to an image in
18
- Fix missing zero init in bdrv_snapshot_goto()
19
the dm-log-writes format
19
- Added scripts/qcow2-to-stdout.py
20
- Code cleanup and iotests fixes
20
21
21
----------------------------------------------------------------
22
----------------------------------------------------------------
22
Aapo Vienamo (1):
23
Alberto Garcia (1):
23
block: Add blklogwrites
24
scripts/qcow2-to-stdout.py: Add script to write qcow2 images to stdout
24
25
25
Ari Sundholm (1):
26
Kevin Wolf (8):
26
block: Move two block permission constants to the relevant enum
27
block: Remove unused blk_op_is_blocked()
28
block: Zero block driver state before reopening
29
file-posix: Support FUA writes
30
block/io: Ignore FUA with cache.no-flush=on
31
aio: Create AioPolledEvent
32
aio-posix: Factor out adjust_polling_time()
33
aio-posix: Separate AioPolledEvent per AioHandler
34
aio-posix: Adjust polling time also for new handlers
27
35
28
Vladimir Sementsov-Ogievskiy (3):
36
Stefan Hajnoczi (13):
29
qemu-img: allow compressed not-in-order writes
37
scsi-disk: drop unused SCSIDiskState->bh field
30
qcow2: refactor data compression
38
dma: use current AioContext for dma_blk_io()
31
qcow2: add compress threads
39
scsi: track per-SCSIRequest AioContext
40
scsi: introduce requests_lock
41
virtio-scsi: introduce event and ctrl virtqueue locks
42
virtio-scsi: protect events_dropped field
43
virtio-scsi: perform TMFs in appropriate AioContexts
44
virtio-blk: extract cleanup_iothread_vq_mapping() function
45
virtio-blk: tidy up iothread_vq_mapping functions
46
virtio: extract iothread-vq-mapping.h API
47
virtio-scsi: add iothread-vq-mapping parameter
48
virtio-scsi: handle ctrl virtqueue in main loop
49
virtio-scsi: only expose cmd vqs via iothread-vq-mapping
32
50
33
qapi/block-core.json | 33 ++++-
51
Thomas Huth (1):
34
block/qcow2.h | 3 +
52
iotests: Limit qsd-migrate to working formats
35
include/block/block.h | 7 +
36
block.c | 6 -
37
block/blklogwrites.c | 392 ++++++++++++++++++++++++++++++++++++++++++++++++++
38
block/qcow2.c | 136 ++++++++++++++----
39
qemu-img.c | 5 -
40
MAINTAINERS | 6 +
41
block/Makefile.objs | 1 +
42
9 files changed, 545 insertions(+), 44 deletions(-)
43
create mode 100644 block/blklogwrites.c
44
53
54
include/block/aio.h | 5 +-
55
include/block/raw-aio.h | 19 +-
56
include/hw/scsi/scsi.h | 8 +-
57
include/hw/virtio/iothread-vq-mapping.h | 45 +++
58
include/hw/virtio/virtio-scsi.h | 15 +-
59
include/system/block-backend-global-state.h | 1 -
60
include/system/dma.h | 3 +-
61
util/aio-posix.h | 1 +
62
block/block-backend.c | 12 -
63
block/file-posix.c | 29 +-
64
block/io.c | 4 +
65
block/io_uring.c | 25 +-
66
block/linux-aio.c | 25 +-
67
block/snapshot.c | 1 +
68
hw/block/virtio-blk.c | 132 +-------
69
hw/ide/core.c | 3 +-
70
hw/ide/macio.c | 3 +-
71
hw/scsi/scsi-bus.c | 121 +++++--
72
hw/scsi/scsi-disk.c | 24 +-
73
hw/scsi/virtio-scsi-dataplane.c | 103 ++++--
74
hw/scsi/virtio-scsi.c | 502 ++++++++++++++++------------
75
hw/virtio/iothread-vq-mapping.c | 131 ++++++++
76
system/dma-helpers.c | 8 +-
77
util/aio-posix.c | 114 ++++---
78
util/async.c | 1 -
79
scripts/qcow2-to-stdout.py | 449 +++++++++++++++++++++++++
80
hw/virtio/meson.build | 1 +
81
meson.build | 8 +
82
tests/qemu-iotests/051.pc.out | 2 +-
83
tests/qemu-iotests/tests/qsd-migrate | 2 +-
84
30 files changed, 1286 insertions(+), 511 deletions(-)
85
create mode 100644 include/hw/virtio/iothread-vq-mapping.h
86
create mode 100644 hw/virtio/iothread-vq-mapping.c
87
create mode 100755 scripts/qcow2-to-stdout.py
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
No reason to forbid them, and they are needed to improve performance
4
with compress-threads in further patches.
5
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
9
qemu-img.c | 5 -----
10
1 file changed, 5 deletions(-)
11
12
diff --git a/qemu-img.c b/qemu-img.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/qemu-img.c
15
+++ b/qemu-img.c
16
@@ -XXX,XX +XXX,XX @@ static int img_convert(int argc, char **argv)
17
goto fail_getopt;
18
}
19
20
- if (!s.wr_in_order && s.compressed) {
21
- error_report("Out of order write and compress are mutually exclusive");
22
- goto fail_getopt;
23
- }
24
-
25
if (tgt_image_opts && !skip_create) {
26
error_report("--target-image-opts requires use of -n flag");
27
goto fail_getopt;
28
--
29
2.13.6
30
31
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Make a separate function for compression to be parallelized later.
4
- use .avail_out field instead of .next_out to calculate size of
5
compressed data. It looks more natural and it allows to keep dest to
6
be void pointer
7
- set avail_out to be at least one byte less than input, to be sure
8
avoid inefficient compression earlier
9
10
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
12
---
13
block/qcow2.c | 76 ++++++++++++++++++++++++++++++++++++++---------------------
14
1 file changed, 49 insertions(+), 27 deletions(-)
15
16
diff --git a/block/qcow2.c b/block/qcow2.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/block/qcow2.c
19
+++ b/block/qcow2.c
20
@@ -XXX,XX +XXX,XX @@
21
*/
22
23
#include "qemu/osdep.h"
24
+
25
+#define ZLIB_CONST
26
+#include <zlib.h>
27
+
28
#include "block/block_int.h"
29
#include "block/qdict.h"
30
#include "sysemu/block-backend.h"
31
#include "qemu/module.h"
32
-#include <zlib.h>
33
#include "qcow2.h"
34
#include "qemu/error-report.h"
35
#include "qapi/error.h"
36
@@ -XXX,XX +XXX,XX @@ fail:
37
return ret;
38
}
39
40
+/*
41
+ * qcow2_compress()
42
+ *
43
+ * @dest - destination buffer, at least of @size-1 bytes
44
+ * @src - source buffer, @size bytes
45
+ *
46
+ * Returns: compressed size on success
47
+ * -1 if compression is inefficient
48
+ * -2 on any other error
49
+ */
50
+static ssize_t qcow2_compress(void *dest, const void *src, size_t size)
51
+{
52
+ ssize_t ret;
53
+ z_stream strm;
54
+
55
+ /* best compression, small window, no zlib header */
56
+ memset(&strm, 0, sizeof(strm));
57
+ ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION, Z_DEFLATED,
58
+ -12, 9, Z_DEFAULT_STRATEGY);
59
+ if (ret != 0) {
60
+ return -2;
61
+ }
62
+
63
+ strm.avail_in = size;
64
+ strm.next_in = src;
65
+ strm.avail_out = size - 1;
66
+ strm.next_out = dest;
67
+
68
+ ret = deflate(&strm, Z_FINISH);
69
+ if (ret == Z_STREAM_END) {
70
+ ret = size - 1 - strm.avail_out;
71
+ } else {
72
+ ret = (ret == Z_OK ? -1 : -2);
73
+ }
74
+
75
+ deflateEnd(&strm);
76
+
77
+ return ret;
78
+}
79
+
80
/* XXX: put compressed sectors first, then all the cluster aligned
81
tables to avoid losing bytes in alignment */
82
static coroutine_fn int
83
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
84
BDRVQcow2State *s = bs->opaque;
85
QEMUIOVector hd_qiov;
86
struct iovec iov;
87
- z_stream strm;
88
- int ret, out_len;
89
+ int ret;
90
+ size_t out_len;
91
uint8_t *buf, *out_buf;
92
int64_t cluster_offset;
93
94
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
95
96
out_buf = g_malloc(s->cluster_size);
97
98
- /* best compression, small window, no zlib header */
99
- memset(&strm, 0, sizeof(strm));
100
- ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
101
- Z_DEFLATED, -12,
102
- 9, Z_DEFAULT_STRATEGY);
103
- if (ret != 0) {
104
+ out_len = qcow2_compress(out_buf, buf, s->cluster_size);
105
+ if (out_len == -2) {
106
ret = -EINVAL;
107
goto fail;
108
- }
109
-
110
- strm.avail_in = s->cluster_size;
111
- strm.next_in = (uint8_t *)buf;
112
- strm.avail_out = s->cluster_size;
113
- strm.next_out = out_buf;
114
-
115
- ret = deflate(&strm, Z_FINISH);
116
- if (ret != Z_STREAM_END && ret != Z_OK) {
117
- deflateEnd(&strm);
118
- ret = -EINVAL;
119
- goto fail;
120
- }
121
- out_len = strm.next_out - out_buf;
122
-
123
- deflateEnd(&strm);
124
-
125
- if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
126
+ } else if (out_len == -1) {
127
/* could not compress: write normal cluster */
128
ret = qcow2_co_pwritev(bs, offset, bytes, qiov, 0);
129
if (ret < 0) {
130
--
131
2.13.6
132
133
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
Do data compression in separate threads. This significantly improve
4
performance for qemu-img convert with -W (allow async writes) and -c
5
(compressed) options.
6
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
9
---
10
block/qcow2.h | 3 +++
11
block/qcow2.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
12
2 files changed, 64 insertions(+), 1 deletion(-)
13
14
diff --git a/block/qcow2.h b/block/qcow2.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/qcow2.h
17
+++ b/block/qcow2.h
18
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVQcow2State {
19
* override) */
20
char *image_backing_file;
21
char *image_backing_format;
22
+
23
+ CoQueue compress_wait_queue;
24
+ int nb_compress_threads;
25
} BDRVQcow2State;
26
27
typedef struct Qcow2COWRegion {
28
diff --git a/block/qcow2.c b/block/qcow2.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/block/qcow2.c
31
+++ b/block/qcow2.c
32
@@ -XXX,XX +XXX,XX @@
33
#include "qapi/qobject-input-visitor.h"
34
#include "qapi/qapi-visit-block-core.h"
35
#include "crypto.h"
36
+#include "block/thread-pool.h"
37
38
/*
39
Differences with QCOW:
40
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn qcow2_do_open(BlockDriverState *bs, QDict *options,
41
qcow2_check_refcounts(bs, &result, 0);
42
}
43
#endif
44
+
45
+ qemu_co_queue_init(&s->compress_wait_queue);
46
+
47
return ret;
48
49
fail:
50
@@ -XXX,XX +XXX,XX @@ static ssize_t qcow2_compress(void *dest, const void *src, size_t size)
51
return ret;
52
}
53
54
+#define MAX_COMPRESS_THREADS 4
55
+
56
+typedef struct Qcow2CompressData {
57
+ void *dest;
58
+ const void *src;
59
+ size_t size;
60
+ ssize_t ret;
61
+} Qcow2CompressData;
62
+
63
+static int qcow2_compress_pool_func(void *opaque)
64
+{
65
+ Qcow2CompressData *data = opaque;
66
+
67
+ data->ret = qcow2_compress(data->dest, data->src, data->size);
68
+
69
+ return 0;
70
+}
71
+
72
+static void qcow2_compress_complete(void *opaque, int ret)
73
+{
74
+ qemu_coroutine_enter(opaque);
75
+}
76
+
77
+/* See qcow2_compress definition for parameters description */
78
+static ssize_t qcow2_co_compress(BlockDriverState *bs,
79
+ void *dest, const void *src, size_t size)
80
+{
81
+ BDRVQcow2State *s = bs->opaque;
82
+ BlockAIOCB *acb;
83
+ ThreadPool *pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
84
+ Qcow2CompressData arg = {
85
+ .dest = dest,
86
+ .src = src,
87
+ .size = size,
88
+ };
89
+
90
+ while (s->nb_compress_threads >= MAX_COMPRESS_THREADS) {
91
+ qemu_co_queue_wait(&s->compress_wait_queue, NULL);
92
+ }
93
+
94
+ s->nb_compress_threads++;
95
+ acb = thread_pool_submit_aio(pool, qcow2_compress_pool_func, &arg,
96
+ qcow2_compress_complete,
97
+ qemu_coroutine_self());
98
+
99
+ if (!acb) {
100
+ s->nb_compress_threads--;
101
+ return -EINVAL;
102
+ }
103
+ qemu_coroutine_yield();
104
+ s->nb_compress_threads--;
105
+ qemu_co_queue_next(&s->compress_wait_queue);
106
+
107
+ return arg.ret;
108
+}
109
+
110
/* XXX: put compressed sectors first, then all the cluster aligned
111
tables to avoid losing bytes in alignment */
112
static coroutine_fn int
113
@@ -XXX,XX +XXX,XX @@ qcow2_co_pwritev_compressed(BlockDriverState *bs, uint64_t offset,
114
115
out_buf = g_malloc(s->cluster_size);
116
117
- out_len = qcow2_compress(out_buf, buf, s->cluster_size);
118
+ out_len = qcow2_co_compress(bs, out_buf, buf, s->cluster_size);
119
if (out_len == -2) {
120
ret = -EINVAL;
121
goto fail;
122
--
123
2.13.6
124
125
diff view generated by jsdifflib
Deleted patch
1
From: Ari Sundholm <ari@tuxera.com>
2
1
3
This allows using the two constants outside of block.c, which will
4
happen in a subsequent patch.
5
6
Signed-off-by: Ari Sundholm <ari@tuxera.com>
7
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
8
---
9
include/block/block.h | 7 +++++++
10
block.c | 6 ------
11
2 files changed, 7 insertions(+), 6 deletions(-)
12
13
diff --git a/include/block/block.h b/include/block/block.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/include/block/block.h
16
+++ b/include/block/block.h
17
@@ -XXX,XX +XXX,XX @@ enum {
18
BLK_PERM_GRAPH_MOD = 0x10,
19
20
BLK_PERM_ALL = 0x1f,
21
+
22
+ DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
23
+ | BLK_PERM_WRITE
24
+ | BLK_PERM_WRITE_UNCHANGED
25
+ | BLK_PERM_RESIZE,
26
+
27
+ DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
28
};
29
30
char *bdrv_perm_names(uint64_t perm);
31
diff --git a/block.c b/block.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/block.c
34
+++ b/block.c
35
@@ -XXX,XX +XXX,XX @@ int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
36
return 0;
37
}
38
39
-#define DEFAULT_PERM_PASSTHROUGH (BLK_PERM_CONSISTENT_READ \
40
- | BLK_PERM_WRITE \
41
- | BLK_PERM_WRITE_UNCHANGED \
42
- | BLK_PERM_RESIZE)
43
-#define DEFAULT_PERM_UNCHANGED (BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH)
44
-
45
void bdrv_filter_default_perms(BlockDriverState *bs, BdrvChild *c,
46
const BdrvChildRole *role,
47
BlockReopenQueue *reopen_queue,
48
--
49
2.13.6
50
51
diff view generated by jsdifflib
Deleted patch
1
From: Aapo Vienamo <aapo@tuxera.com>
2
1
3
Implements a block device write logging system, similar to Linux kernel
4
device mapper dm-log-writes. The write operations that are performed
5
on a block device are logged to a file or another block device. The
6
write log format is identical to the dm-log-writes format. Currently,
7
log markers are not supported.
8
9
This functionality can be used for crash consistency and fs consistency
10
testing. By implementing it in qemu, tests utilizing write logs can be
11
be used to test non-Linux drivers and older kernels.
12
13
The driver accepts an optional parameter to set the sector size used
14
for logging. This makes the driver require all requests to be aligned
15
to this sector size and also makes offsets and sizes of writes in the
16
log metadata to be expressed in terms of this value (the log format has
17
a granularity of one sector for offsets and sizes). This allows
18
accurate logging of writes to guest block devices that have unusual
19
sector sizes.
20
21
The implementation is based on the blkverify and blkdebug block
22
drivers.
23
24
Signed-off-by: Aapo Vienamo <aapo@tuxera.com>
25
Signed-off-by: Ari Sundholm <ari@tuxera.com>
26
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
27
---
28
qapi/block-core.json | 33 ++++-
29
block/blklogwrites.c | 392 +++++++++++++++++++++++++++++++++++++++++++++++++++
30
MAINTAINERS | 6 +
31
block/Makefile.objs | 1 +
32
4 files changed, 426 insertions(+), 6 deletions(-)
33
create mode 100644 block/blklogwrites.c
34
35
diff --git a/qapi/block-core.json b/qapi/block-core.json
36
index XXXXXXX..XXXXXXX 100644
37
--- a/qapi/block-core.json
38
+++ b/qapi/block-core.json
39
@@ -XXX,XX +XXX,XX @@
40
# @throttle: Since 2.11
41
# @nvme: Since 2.12
42
# @copy-on-read: Since 3.0
43
+# @blklogwrites: Since 3.0
44
#
45
# Since: 2.9
46
##
47
{ 'enum': 'BlockdevDriver',
48
- 'data': [ 'blkdebug', 'blkverify', 'bochs', 'cloop', 'copy-on-read',
49
- 'dmg', 'file', 'ftp', 'ftps', 'gluster', 'host_cdrom',
50
- 'host_device', 'http', 'https', 'iscsi', 'luks', 'nbd', 'nfs',
51
- 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow', 'qcow2', 'qed',
52
- 'quorum', 'raw', 'rbd', 'replication', 'sheepdog', 'ssh',
53
- 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
54
+ 'data': [ 'blkdebug', 'blklogwrites', 'blkverify', 'bochs', 'cloop',
55
+ 'copy-on-read', 'dmg', 'file', 'ftp', 'ftps', 'gluster',
56
+ 'host_cdrom', 'host_device', 'http', 'https', 'iscsi', 'luks',
57
+ 'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow',
58
+ 'qcow2', 'qed', 'quorum', 'raw', 'rbd', 'replication', 'sheepdog',
59
+ 'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] }
60
61
##
62
# @BlockdevOptionsFile:
63
@@ -XXX,XX +XXX,XX @@
64
'*set-state': ['BlkdebugSetStateOptions'] } }
65
66
##
67
+# @BlockdevOptionsBlklogwrites:
68
+#
69
+# Driver specific block device options for blklogwrites.
70
+#
71
+# @file: block device
72
+#
73
+# @log: block device used to log writes to @file
74
+#
75
+# @log-sector-size: sector size used in logging writes to @file, determines
76
+# granularity of offsets and sizes of writes (default: 512)
77
+#
78
+# Since: 3.0
79
+##
80
+{ 'struct': 'BlockdevOptionsBlklogwrites',
81
+ 'data': { 'file': 'BlockdevRef',
82
+ 'log': 'BlockdevRef',
83
+ '*log-sector-size': 'uint32' } }
84
+
85
+##
86
# @BlockdevOptionsBlkverify:
87
#
88
# Driver specific block device options for blkverify.
89
@@ -XXX,XX +XXX,XX @@
90
'discriminator': 'driver',
91
'data': {
92
'blkdebug': 'BlockdevOptionsBlkdebug',
93
+ 'blklogwrites':'BlockdevOptionsBlklogwrites',
94
'blkverify': 'BlockdevOptionsBlkverify',
95
'bochs': 'BlockdevOptionsGenericFormat',
96
'cloop': 'BlockdevOptionsGenericFormat',
97
diff --git a/block/blklogwrites.c b/block/blklogwrites.c
98
new file mode 100644
99
index XXXXXXX..XXXXXXX
100
--- /dev/null
101
+++ b/block/blklogwrites.c
102
@@ -XXX,XX +XXX,XX @@
103
+/*
104
+ * Write logging blk driver based on blkverify and blkdebug.
105
+ *
106
+ * Copyright (c) 2017 Tuomas Tynkkynen <tuomas@tuxera.com>
107
+ * Copyright (c) 2018 Aapo Vienamo <aapo@tuxera.com>
108
+ * Copyright (c) 2018 Ari Sundholm <ari@tuxera.com>
109
+ *
110
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
111
+ * See the COPYING file in the top-level directory.
112
+ */
113
+
114
+#include "qemu/osdep.h"
115
+#include "qapi/error.h"
116
+#include "qemu/sockets.h" /* for EINPROGRESS on Windows */
117
+#include "block/block_int.h"
118
+#include "qapi/qmp/qdict.h"
119
+#include "qapi/qmp/qstring.h"
120
+#include "qemu/cutils.h"
121
+#include "qemu/option.h"
122
+
123
+/* Disk format stuff - taken from Linux drivers/md/dm-log-writes.c */
124
+
125
+#define LOG_FLUSH_FLAG (1 << 0)
126
+#define LOG_FUA_FLAG (1 << 1)
127
+#define LOG_DISCARD_FLAG (1 << 2)
128
+#define LOG_MARK_FLAG (1 << 3)
129
+
130
+#define WRITE_LOG_VERSION 1ULL
131
+#define WRITE_LOG_MAGIC 0x6a736677736872ULL
132
+
133
+/* All fields are little-endian. */
134
+struct log_write_super {
135
+ uint64_t magic;
136
+ uint64_t version;
137
+ uint64_t nr_entries;
138
+ uint32_t sectorsize;
139
+} QEMU_PACKED;
140
+
141
+struct log_write_entry {
142
+ uint64_t sector;
143
+ uint64_t nr_sectors;
144
+ uint64_t flags;
145
+ uint64_t data_len;
146
+} QEMU_PACKED;
147
+
148
+/* End of disk format structures. */
149
+
150
+typedef struct {
151
+ BdrvChild *log_file;
152
+ uint32_t sectorsize;
153
+ uint32_t sectorbits;
154
+ uint64_t cur_log_sector;
155
+ uint64_t nr_entries;
156
+} BDRVBlkLogWritesState;
157
+
158
+static inline uint32_t blk_log_writes_log2(uint32_t value)
159
+{
160
+ assert(value > 0);
161
+ return 31 - clz32(value);
162
+}
163
+
164
+static int blk_log_writes_open(BlockDriverState *bs, QDict *options, int flags,
165
+ Error **errp)
166
+{
167
+ BDRVBlkLogWritesState *s = bs->opaque;
168
+ Error *local_err = NULL;
169
+ int ret;
170
+ int64_t log_sector_size = BDRV_SECTOR_SIZE;
171
+
172
+ /* Open the file */
173
+ bs->file = bdrv_open_child(NULL, options, "file", bs, &child_file, false,
174
+ &local_err);
175
+ if (local_err) {
176
+ ret = -EINVAL;
177
+ error_propagate(errp, local_err);
178
+ goto fail;
179
+ }
180
+
181
+ if (qdict_haskey(options, "log-sector-size")) {
182
+ log_sector_size = qdict_get_int(options, "log-sector-size");
183
+ qdict_del(options, "log-sector-size");
184
+ }
185
+
186
+ if (log_sector_size < 0 || log_sector_size >= (1ull << 32) ||
187
+ !is_power_of_2(log_sector_size))
188
+ {
189
+ ret = -EINVAL;
190
+ error_setg(errp, "Invalid log sector size %"PRId64, log_sector_size);
191
+ goto fail;
192
+ }
193
+
194
+ s->sectorsize = log_sector_size;
195
+ s->sectorbits = blk_log_writes_log2(log_sector_size);
196
+ s->cur_log_sector = 1;
197
+ s->nr_entries = 0;
198
+
199
+ /* Open the log file */
200
+ s->log_file = bdrv_open_child(NULL, options, "log", bs, &child_file, false,
201
+ &local_err);
202
+ if (local_err) {
203
+ ret = -EINVAL;
204
+ error_propagate(errp, local_err);
205
+ goto fail;
206
+ }
207
+
208
+ ret = 0;
209
+fail:
210
+ if (ret < 0) {
211
+ bdrv_unref_child(bs, bs->file);
212
+ bs->file = NULL;
213
+ }
214
+ return ret;
215
+}
216
+
217
+static void blk_log_writes_close(BlockDriverState *bs)
218
+{
219
+ BDRVBlkLogWritesState *s = bs->opaque;
220
+
221
+ bdrv_unref_child(bs, s->log_file);
222
+ s->log_file = NULL;
223
+}
224
+
225
+static int64_t blk_log_writes_getlength(BlockDriverState *bs)
226
+{
227
+ return bdrv_getlength(bs->file->bs);
228
+}
229
+
230
+static void blk_log_writes_refresh_filename(BlockDriverState *bs,
231
+ QDict *options)
232
+{
233
+ BDRVBlkLogWritesState *s = bs->opaque;
234
+
235
+ /* bs->file->bs has already been refreshed */
236
+ bdrv_refresh_filename(s->log_file->bs);
237
+
238
+ if (bs->file->bs->full_open_options
239
+ && s->log_file->bs->full_open_options)
240
+ {
241
+ QDict *opts = qdict_new();
242
+ qdict_put_str(opts, "driver", "blklogwrites");
243
+
244
+ qobject_ref(bs->file->bs->full_open_options);
245
+ qdict_put_obj(opts, "file", QOBJECT(bs->file->bs->full_open_options));
246
+ qobject_ref(s->log_file->bs->full_open_options);
247
+ qdict_put_obj(opts, "log",
248
+ QOBJECT(s->log_file->bs->full_open_options));
249
+
250
+ bs->full_open_options = opts;
251
+ }
252
+}
253
+
254
+static void blk_log_writes_child_perm(BlockDriverState *bs, BdrvChild *c,
255
+ const BdrvChildRole *role,
256
+ BlockReopenQueue *ro_q,
257
+ uint64_t perm, uint64_t shrd,
258
+ uint64_t *nperm, uint64_t *nshrd)
259
+{
260
+ if (!c) {
261
+ *nperm = perm & DEFAULT_PERM_PASSTHROUGH;
262
+ *nshrd = (shrd & DEFAULT_PERM_PASSTHROUGH) | DEFAULT_PERM_UNCHANGED;
263
+ return;
264
+ }
265
+
266
+ if (!strcmp(c->name, "log")) {
267
+ bdrv_format_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd);
268
+ } else {
269
+ bdrv_filter_default_perms(bs, c, role, ro_q, perm, shrd, nperm, nshrd);
270
+ }
271
+}
272
+
273
+static void blk_log_writes_refresh_limits(BlockDriverState *bs, Error **errp)
274
+{
275
+ BDRVBlkLogWritesState *s = bs->opaque;
276
+ bs->bl.request_alignment = s->sectorsize;
277
+}
278
+
279
+static int coroutine_fn
280
+blk_log_writes_co_preadv(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
281
+ QEMUIOVector *qiov, int flags)
282
+{
283
+ return bdrv_co_preadv(bs->file, offset, bytes, qiov, flags);
284
+}
285
+
286
+typedef struct BlkLogWritesFileReq {
287
+ BlockDriverState *bs;
288
+ uint64_t offset;
289
+ uint64_t bytes;
290
+ int file_flags;
291
+ QEMUIOVector *qiov;
292
+ int (*func)(struct BlkLogWritesFileReq *r);
293
+ int file_ret;
294
+} BlkLogWritesFileReq;
295
+
296
+typedef struct {
297
+ BlockDriverState *bs;
298
+ QEMUIOVector *qiov;
299
+ struct log_write_entry entry;
300
+ uint64_t zero_size;
301
+ int log_ret;
302
+} BlkLogWritesLogReq;
303
+
304
+static void coroutine_fn blk_log_writes_co_do_log(BlkLogWritesLogReq *lr)
305
+{
306
+ BDRVBlkLogWritesState *s = lr->bs->opaque;
307
+ uint64_t cur_log_offset = s->cur_log_sector << s->sectorbits;
308
+
309
+ s->nr_entries++;
310
+ s->cur_log_sector +=
311
+ ROUND_UP(lr->qiov->size, s->sectorsize) >> s->sectorbits;
312
+
313
+ lr->log_ret = bdrv_co_pwritev(s->log_file, cur_log_offset, lr->qiov->size,
314
+ lr->qiov, 0);
315
+
316
+ /* Logging for the "write zeroes" operation */
317
+ if (lr->log_ret == 0 && lr->zero_size) {
318
+ cur_log_offset = s->cur_log_sector << s->sectorbits;
319
+ s->cur_log_sector +=
320
+ ROUND_UP(lr->zero_size, s->sectorsize) >> s->sectorbits;
321
+
322
+ lr->log_ret = bdrv_co_pwrite_zeroes(s->log_file, cur_log_offset,
323
+ lr->zero_size, 0);
324
+ }
325
+
326
+ /* Update super block on flush */
327
+ if (lr->log_ret == 0 && lr->entry.flags & LOG_FLUSH_FLAG) {
328
+ struct log_write_super super = {
329
+ .magic = cpu_to_le64(WRITE_LOG_MAGIC),
330
+ .version = cpu_to_le64(WRITE_LOG_VERSION),
331
+ .nr_entries = cpu_to_le64(s->nr_entries),
332
+ .sectorsize = cpu_to_le32(s->sectorsize),
333
+ };
334
+ void *zeroes = g_malloc0(s->sectorsize - sizeof(super));
335
+ QEMUIOVector qiov;
336
+
337
+ qemu_iovec_init(&qiov, 2);
338
+ qemu_iovec_add(&qiov, &super, sizeof(super));
339
+ qemu_iovec_add(&qiov, zeroes, s->sectorsize - sizeof(super));
340
+
341
+ lr->log_ret =
342
+ bdrv_co_pwritev(s->log_file, 0, s->sectorsize, &qiov, 0);
343
+ if (lr->log_ret == 0) {
344
+ lr->log_ret = bdrv_co_flush(s->log_file->bs);
345
+ }
346
+ qemu_iovec_destroy(&qiov);
347
+ g_free(zeroes);
348
+ }
349
+}
350
+
351
+static void coroutine_fn blk_log_writes_co_do_file(BlkLogWritesFileReq *fr)
352
+{
353
+ fr->file_ret = fr->func(fr);
354
+}
355
+
356
+static int coroutine_fn
357
+blk_log_writes_co_log(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
358
+ QEMUIOVector *qiov, int flags,
359
+ int (*file_func)(BlkLogWritesFileReq *r),
360
+ uint64_t entry_flags, bool is_zero_write)
361
+{
362
+ QEMUIOVector log_qiov;
363
+ size_t niov = qiov ? qiov->niov : 0;
364
+ BDRVBlkLogWritesState *s = bs->opaque;
365
+ BlkLogWritesFileReq fr = {
366
+ .bs = bs,
367
+ .offset = offset,
368
+ .bytes = bytes,
369
+ .file_flags = flags,
370
+ .qiov = qiov,
371
+ .func = file_func,
372
+ };
373
+ BlkLogWritesLogReq lr = {
374
+ .bs = bs,
375
+ .qiov = &log_qiov,
376
+ .entry = {
377
+ .sector = cpu_to_le64(offset >> s->sectorbits),
378
+ .nr_sectors = cpu_to_le64(bytes >> s->sectorbits),
379
+ .flags = cpu_to_le64(entry_flags),
380
+ .data_len = 0,
381
+ },
382
+ .zero_size = is_zero_write ? bytes : 0,
383
+ };
384
+ void *zeroes = g_malloc0(s->sectorsize - sizeof(lr.entry));
385
+
386
+ assert((1 << s->sectorbits) == s->sectorsize);
387
+ assert(bs->bl.request_alignment == s->sectorsize);
388
+ assert(QEMU_IS_ALIGNED(offset, bs->bl.request_alignment));
389
+ assert(QEMU_IS_ALIGNED(bytes, bs->bl.request_alignment));
390
+
391
+ qemu_iovec_init(&log_qiov, niov + 2);
392
+ qemu_iovec_add(&log_qiov, &lr.entry, sizeof(lr.entry));
393
+ qemu_iovec_add(&log_qiov, zeroes, s->sectorsize - sizeof(lr.entry));
394
+ if (qiov) {
395
+ qemu_iovec_concat(&log_qiov, qiov, 0, qiov->size);
396
+ }
397
+
398
+ blk_log_writes_co_do_file(&fr);
399
+ blk_log_writes_co_do_log(&lr);
400
+
401
+ qemu_iovec_destroy(&log_qiov);
402
+ g_free(zeroes);
403
+
404
+ if (lr.log_ret < 0) {
405
+ return lr.log_ret;
406
+ }
407
+
408
+ return fr.file_ret;
409
+}
410
+
411
+static int coroutine_fn
412
+blk_log_writes_co_do_file_pwritev(BlkLogWritesFileReq *fr)
413
+{
414
+ return bdrv_co_pwritev(fr->bs->file, fr->offset, fr->bytes,
415
+ fr->qiov, fr->file_flags);
416
+}
417
+
418
+static int coroutine_fn
419
+blk_log_writes_co_do_file_pwrite_zeroes(BlkLogWritesFileReq *fr)
420
+{
421
+ return bdrv_co_pwrite_zeroes(fr->bs->file, fr->offset, fr->bytes,
422
+ fr->file_flags);
423
+}
424
+
425
+static int coroutine_fn blk_log_writes_co_do_file_flush(BlkLogWritesFileReq *fr)
426
+{
427
+ return bdrv_co_flush(fr->bs->file->bs);
428
+}
429
+
430
+static int coroutine_fn
431
+blk_log_writes_co_do_file_pdiscard(BlkLogWritesFileReq *fr)
432
+{
433
+ return bdrv_co_pdiscard(fr->bs->file->bs, fr->offset, fr->bytes);
434
+}
435
+
436
+static int coroutine_fn
437
+blk_log_writes_co_pwritev(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
438
+ QEMUIOVector *qiov, int flags)
439
+{
440
+ return blk_log_writes_co_log(bs, offset, bytes, qiov, flags,
441
+ blk_log_writes_co_do_file_pwritev, 0, false);
442
+}
443
+
444
+static int coroutine_fn
445
+blk_log_writes_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes,
446
+ BdrvRequestFlags flags)
447
+{
448
+ return blk_log_writes_co_log(bs, offset, bytes, NULL, flags,
449
+ blk_log_writes_co_do_file_pwrite_zeroes, 0,
450
+ true);
451
+}
452
+
453
+static int coroutine_fn blk_log_writes_co_flush_to_disk(BlockDriverState *bs)
454
+{
455
+ return blk_log_writes_co_log(bs, 0, 0, NULL, 0,
456
+ blk_log_writes_co_do_file_flush,
457
+ LOG_FLUSH_FLAG, false);
458
+}
459
+
460
+static int coroutine_fn
461
+blk_log_writes_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
462
+{
463
+ return blk_log_writes_co_log(bs, offset, count, NULL, 0,
464
+ blk_log_writes_co_do_file_pdiscard,
465
+ LOG_DISCARD_FLAG, false);
466
+}
467
+
468
+static BlockDriver bdrv_blk_log_writes = {
469
+ .format_name = "blklogwrites",
470
+ .instance_size = sizeof(BDRVBlkLogWritesState),
471
+
472
+ .bdrv_open = blk_log_writes_open,
473
+ .bdrv_close = blk_log_writes_close,
474
+ .bdrv_getlength = blk_log_writes_getlength,
475
+ .bdrv_refresh_filename = blk_log_writes_refresh_filename,
476
+ .bdrv_child_perm = blk_log_writes_child_perm,
477
+ .bdrv_refresh_limits = blk_log_writes_refresh_limits,
478
+
479
+ .bdrv_co_preadv = blk_log_writes_co_preadv,
480
+ .bdrv_co_pwritev = blk_log_writes_co_pwritev,
481
+ .bdrv_co_pwrite_zeroes = blk_log_writes_co_pwrite_zeroes,
482
+ .bdrv_co_flush_to_disk = blk_log_writes_co_flush_to_disk,
483
+ .bdrv_co_pdiscard = blk_log_writes_co_pdiscard,
484
+ .bdrv_co_block_status = bdrv_co_block_status_from_file,
485
+
486
+ .is_filter = true,
487
+};
488
+
489
+static void bdrv_blk_log_writes_init(void)
490
+{
491
+ bdrv_register(&bdrv_blk_log_writes);
492
+}
493
+
494
+block_init(bdrv_blk_log_writes_init);
495
diff --git a/MAINTAINERS b/MAINTAINERS
496
index XXXXXXX..XXXXXXX 100644
497
--- a/MAINTAINERS
498
+++ b/MAINTAINERS
499
@@ -XXX,XX +XXX,XX @@ S: Supported
500
F: block/quorum.c
501
L: qemu-block@nongnu.org
502
503
+blklogwrites
504
+M: Ari Sundholm <ari@tuxera.com>
505
+L: qemu-block@nongnu.org
506
+S: Supported
507
+F: block/blklogwrites.c
508
+
509
blkverify
510
M: Stefan Hajnoczi <stefanha@redhat.com>
511
L: qemu-block@nongnu.org
512
diff --git a/block/Makefile.objs b/block/Makefile.objs
513
index XXXXXXX..XXXXXXX 100644
514
--- a/block/Makefile.objs
515
+++ b/block/Makefile.objs
516
@@ -XXX,XX +XXX,XX @@ block-obj-y += qed-check.o
517
block-obj-y += vhdx.o vhdx-endian.o vhdx-log.o
518
block-obj-y += quorum.o
519
block-obj-y += parallels.o blkdebug.o blkverify.o blkreplay.o
520
+block-obj-y += blklogwrites.o
521
block-obj-y += block-backend.o snapshot.o qapi.o
522
block-obj-$(CONFIG_WIN32) += file-win32.o win32-aio.o
523
block-obj-$(CONFIG_POSIX) += file-posix.o
524
--
525
2.13.6
526
527
diff view generated by jsdifflib