1
The following changes since commit ba29883206d92a29ad5a466e679ccfc2ee6132ef:
1
The following changes since commit ac793156f650ae2d77834932d72224175ee69086:
2
2
3
Merge remote-tracking branch 'remotes/borntraeger/tags/s390x-20200310' into staging (2020-03-10 16:50:28 +0000)
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20201020-1' into staging (2020-10-20 21:11:35 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/XanClic/qemu.git tags/pull-block-2020-03-11
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to 397f4e9d83e9c0000905f0a988ba1aeda162571c:
9
for you to fetch changes up to 32a3fd65e7e3551337fd26bfc0e2f899d70c028c:
10
10
11
block/block-copy: hide structure definitions (2020-03-11 12:42:30 +0100)
11
iotests: add commit top->base cases to 274 (2020-10-22 09:55:39 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches for the 5.0 softfreeze:
14
Pull request
15
- qemu-img measure for LUKS
15
16
- Improve block-copy's performance by reducing inter-request
16
v2:
17
dependencies
17
* Fix format string issues on 32-bit hosts [Peter]
18
- Make curl's detection of accept-ranges more robust
18
* Fix qemu-nbd.c CONFIG_POSIX ifdef issue [Eric]
19
- Memleak fixes
19
* Fix missing eventfd.h header on macOS [Peter]
20
- iotest fix
20
* Drop unreliable vhost-user-blk test (will send a new patch when ready) [Peter]
21
22
This pull request contains the vhost-user-blk server by Coiby Xu along with my
23
additions, block/nvme.c alignment and hardware error statistics by Philippe
24
Mathieu-Daudé, and bdrv_co_block_status_above() fixes by Vladimir
25
Sementsov-Ogievskiy.
21
26
22
----------------------------------------------------------------
27
----------------------------------------------------------------
23
David Edmondson (2):
24
block/curl: HTTP header fields allow whitespace around values
25
block/curl: HTTP header field names are case insensitive
26
28
27
Eric Blake (1):
29
Coiby Xu (6):
28
iotests: Fix nonportable use of od --endian
30
libvhost-user: Allow vu_message_read to be replaced
31
libvhost-user: remove watch for kick_fd when de-initialize vu-dev
32
util/vhost-user-server: generic vhost user server
33
block: move logical block size check function to a common utility
34
function
35
block/export: vhost-user block device backend server
36
MAINTAINERS: Add vhost-user block device backend server maintainer
29
37
30
Pan Nengyuan (2):
38
Philippe Mathieu-Daudé (1):
31
block/qcow2: do free crypto_opts in qcow2_close()
39
block/nvme: Add driver statistics for access alignment and hw errors
32
qemu-img: free memory before re-assign
33
40
34
Stefan Hajnoczi (4):
41
Stefan Hajnoczi (16):
35
luks: extract qcrypto_block_calculate_payload_offset()
42
util/vhost-user-server: s/fileds/fields/ typo fix
36
luks: implement .bdrv_measure()
43
util/vhost-user-server: drop unnecessary QOM cast
37
qemu-img: allow qemu-img measure --object without a filename
44
util/vhost-user-server: drop unnecessary watch deletion
38
iotests: add 288 luks qemu-img measure test
45
block/export: consolidate request structs into VuBlockReq
46
util/vhost-user-server: drop unused DevicePanicNotifier
47
util/vhost-user-server: fix memory leak in vu_message_read()
48
util/vhost-user-server: check EOF when reading payload
49
util/vhost-user-server: rework vu_client_trip() coroutine lifecycle
50
block/export: report flush errors
51
block/export: convert vhost-user-blk server to block export API
52
util/vhost-user-server: move header to include/
53
util/vhost-user-server: use static library in meson.build
54
qemu-storage-daemon: avoid compiling blockdev_ss twice
55
block: move block exports to libblockdev
56
block/export: add iothread and fixed-iothread options
57
block/export: add vhost-user-blk multi-queue support
39
58
40
Vladimir Sementsov-Ogievskiy (10):
59
Vladimir Sementsov-Ogievskiy (5):
41
block/qcow2-threads: fix qcow2_decompress
60
block/io: fix bdrv_co_block_status_above
42
job: refactor progress to separate object
61
block/io: bdrv_common_block_status_above: support include_base
43
block/block-copy: fix progress calculation
62
block/io: bdrv_common_block_status_above: support bs == base
44
block/block-copy: specialcase first copy_range request
63
block/io: fix bdrv_is_allocated_above
45
block/block-copy: use block_status
64
iotests: add commit top->base cases to 274
46
block/block-copy: factor out find_conflicting_inflight_req
47
block/block-copy: refactor interfaces to use bytes instead of end
48
block/block-copy: rename start to offset in interfaces
49
block/block-copy: reduce intersecting request lock
50
block/block-copy: hide structure definitions
51
65
52
block/backup-top.c | 6 +-
66
MAINTAINERS | 9 +
53
block/backup.c | 38 ++-
67
qapi/block-core.json | 24 +-
54
block/block-copy.c | 405 ++++++++++++++++++++++++-------
68
qapi/block-export.json | 36 +-
55
block/crypto.c | 62 +++++
69
block/coroutines.h | 2 +
56
block/curl.c | 32 ++-
70
block/export/vhost-user-blk-server.h | 19 +
57
block/qcow2-threads.c | 12 +-
71
contrib/libvhost-user/libvhost-user.h | 21 +
58
block/qcow2.c | 75 ++----
72
include/qemu/vhost-user-server.h | 65 +++
59
block/trace-events | 1 +
73
util/block-helpers.h | 19 +
60
blockjob.c | 16 +-
74
block/export/export.c | 37 +-
61
crypto/block.c | 36 +++
75
block/export/vhost-user-blk-server.c | 431 ++++++++++++++++++++
62
include/block/block-copy.h | 65 +----
76
block/io.c | 132 +++---
63
include/crypto/block.h | 22 ++
77
block/nvme.c | 27 ++
64
include/qemu/job.h | 11 +-
78
block/qcow2.c | 16 +-
65
include/qemu/progress_meter.h | 58 +++++
79
contrib/libvhost-user/libvhost-user-glib.c | 2 +-
66
job-qmp.c | 4 +-
80
contrib/libvhost-user/libvhost-user.c | 15 +-
67
job.c | 6 +-
81
hw/core/qdev-properties-system.c | 31 +-
68
qemu-img.c | 14 +-
82
nbd/server.c | 2 -
69
tests/qemu-iotests/178 | 2 +-
83
qemu-nbd.c | 21 +-
70
tests/qemu-iotests/178.out.qcow2 | 8 +-
84
softmmu/vl.c | 4 +
71
tests/qemu-iotests/178.out.raw | 8 +-
85
stubs/blk-exp-close-all.c | 7 +
72
tests/qemu-iotests/288 | 93 +++++++
86
tests/vhost-user-bridge.c | 2 +
73
tests/qemu-iotests/288.out | 30 +++
87
tools/virtiofsd/fuse_virtio.c | 4 +-
74
tests/qemu-iotests/common.rc | 22 +-
88
util/block-helpers.c | 46 +++
75
tests/qemu-iotests/group | 1 +
89
util/vhost-user-server.c | 446 +++++++++++++++++++++
76
24 files changed, 749 insertions(+), 278 deletions(-)
90
block/export/meson.build | 3 +-
77
create mode 100644 include/qemu/progress_meter.h
91
contrib/libvhost-user/meson.build | 1 +
78
create mode 100755 tests/qemu-iotests/288
92
meson.build | 22 +-
79
create mode 100644 tests/qemu-iotests/288.out
93
nbd/meson.build | 2 +
94
storage-daemon/meson.build | 3 +-
95
stubs/meson.build | 1 +
96
tests/qemu-iotests/274 | 20 +
97
tests/qemu-iotests/274.out | 68 ++++
98
util/meson.build | 4 +
99
33 files changed, 1420 insertions(+), 122 deletions(-)
100
create mode 100644 block/export/vhost-user-blk-server.h
101
create mode 100644 include/qemu/vhost-user-server.h
102
create mode 100644 util/block-helpers.h
103
create mode 100644 block/export/vhost-user-blk-server.c
104
create mode 100644 stubs/blk-exp-close-all.c
105
create mode 100644 util/block-helpers.c
106
create mode 100644 util/vhost-user-server.c
80
107
81
--
108
--
82
2.24.1
109
2.26.2
83
110
84
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
In block_copy_do_copy we fallback to read+write if copy_range failed.
3
Keep statistics of some hardware errors, and number of
4
In this case copy_size is larger than defined for buffered IO, and
4
aligned/unaligned I/O accesses.
5
there is corresponding commit. Still, backup copies data cluster by
6
cluster, and most of requests are limited to one cluster anyway, so the
7
only source of this one bad-limited request is copy-before-write
8
operation.
9
5
10
Further patch will move backup to use block_copy directly, than for
6
QMP example booting a full RHEL 8.3 aarch64 guest:
11
cases where copy_range is not supported, first request will be
12
oversized in each backup. It's not good, let's change it now.
13
7
14
Fix is simple: just limit first copy_range request like buffer-based
8
{ "execute": "query-blockstats" }
15
request. If it succeed, set larger copy_range limit.
9
{
10
"return": [
11
{
12
"device": "",
13
"node-name": "drive0",
14
"stats": {
15
"flush_total_time_ns": 6026948,
16
"wr_highest_offset": 3383991230464,
17
"wr_total_time_ns": 807450995,
18
"failed_wr_operations": 0,
19
"failed_rd_operations": 0,
20
"wr_merged": 3,
21
"wr_bytes": 50133504,
22
"failed_unmap_operations": 0,
23
"failed_flush_operations": 0,
24
"account_invalid": false,
25
"rd_total_time_ns": 1846979900,
26
"flush_operations": 130,
27
"wr_operations": 659,
28
"rd_merged": 1192,
29
"rd_bytes": 218244096,
30
"account_failed": false,
31
"idle_time_ns": 2678641497,
32
"rd_operations": 7406,
33
},
34
"driver-specific": {
35
"driver": "nvme",
36
"completion-errors": 0,
37
"unaligned-accesses": 2959,
38
"aligned-accesses": 4477
39
},
40
"qdev": "/machine/peripheral-anon/device[0]/virtio-backend"
41
}
42
]
43
}
16
44
17
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
45
Suggested-by: Stefan Hajnoczi <stefanha@gmail.com>
18
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
46
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
19
Reviewed-by: Max Reitz <mreitz@redhat.com>
47
Acked-by: Markus Armbruster <armbru@redhat.com>
20
Message-Id: <20200311103004.7649-4-vsementsov@virtuozzo.com>
48
Message-id: 20201001162939.1567915-1-philmd@redhat.com
21
Signed-off-by: Max Reitz <mreitz@redhat.com>
49
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
---
50
---
23
block/block-copy.c | 41 +++++++++++++++++++++++++++++++----------
51
qapi/block-core.json | 24 +++++++++++++++++++++++-
24
1 file changed, 31 insertions(+), 10 deletions(-)
52
block/nvme.c | 27 +++++++++++++++++++++++++++
53
2 files changed, 50 insertions(+), 1 deletion(-)
25
54
26
diff --git a/block/block-copy.c b/block/block-copy.c
55
diff --git a/qapi/block-core.json b/qapi/block-core.json
27
index XXXXXXX..XXXXXXX 100644
56
index XXXXXXX..XXXXXXX 100644
28
--- a/block/block-copy.c
57
--- a/qapi/block-core.json
29
+++ b/block/block-copy.c
58
+++ b/qapi/block-core.json
30
@@ -XXX,XX +XXX,XX @@ void block_copy_state_free(BlockCopyState *s)
59
@@ -XXX,XX +XXX,XX @@
31
g_free(s);
60
'discard-nb-failed': 'uint64',
61
'discard-bytes-ok': 'uint64' } }
62
63
+##
64
+# @BlockStatsSpecificNvme:
65
+#
66
+# NVMe driver statistics
67
+#
68
+# @completion-errors: The number of completion errors.
69
+#
70
+# @aligned-accesses: The number of aligned accesses performed by
71
+# the driver.
72
+#
73
+# @unaligned-accesses: The number of unaligned accesses performed by
74
+# the driver.
75
+#
76
+# Since: 5.2
77
+##
78
+{ 'struct': 'BlockStatsSpecificNvme',
79
+ 'data': {
80
+ 'completion-errors': 'uint64',
81
+ 'aligned-accesses': 'uint64',
82
+ 'unaligned-accesses': 'uint64' } }
83
+
84
##
85
# @BlockStatsSpecific:
86
#
87
@@ -XXX,XX +XXX,XX @@
88
'discriminator': 'driver',
89
'data': {
90
'file': 'BlockStatsSpecificFile',
91
- 'host_device': 'BlockStatsSpecificFile' } }
92
+ 'host_device': 'BlockStatsSpecificFile',
93
+ 'nvme': 'BlockStatsSpecificNvme' } }
94
95
##
96
# @BlockStats:
97
diff --git a/block/nvme.c b/block/nvme.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/block/nvme.c
100
+++ b/block/nvme.c
101
@@ -XXX,XX +XXX,XX @@ struct BDRVNVMeState {
102
103
/* PCI address (required for nvme_refresh_filename()) */
104
char *device;
105
+
106
+ struct {
107
+ uint64_t completion_errors;
108
+ uint64_t aligned_accesses;
109
+ uint64_t unaligned_accesses;
110
+ } stats;
111
};
112
113
#define NVME_BLOCK_OPT_DEVICE "device"
114
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(NVMeQueuePair *q)
115
break;
116
}
117
ret = nvme_translate_error(c);
118
+ if (ret) {
119
+ s->stats.completion_errors++;
120
+ }
121
q->cq.head = (q->cq.head + 1) % NVME_QUEUE_SIZE;
122
if (!q->cq.head) {
123
q->cq_phase = !q->cq_phase;
124
@@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
125
assert(QEMU_IS_ALIGNED(bytes, s->page_size));
126
assert(bytes <= s->max_transfer);
127
if (nvme_qiov_aligned(bs, qiov)) {
128
+ s->stats.aligned_accesses++;
129
return nvme_co_prw_aligned(bs, offset, bytes, qiov, is_write, flags);
130
}
131
+ s->stats.unaligned_accesses++;
132
trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write);
133
buf = qemu_try_memalign(s->page_size, bytes);
134
135
@@ -XXX,XX +XXX,XX @@ static void nvme_unregister_buf(BlockDriverState *bs, void *host)
136
qemu_vfio_dma_unmap(s->vfio, host);
32
}
137
}
33
138
34
+static uint32_t block_copy_max_transfer(BdrvChild *source, BdrvChild *target)
139
+static BlockStatsSpecific *nvme_get_specific_stats(BlockDriverState *bs)
35
+{
140
+{
36
+ return MIN_NON_ZERO(INT_MAX,
141
+ BlockStatsSpecific *stats = g_new(BlockStatsSpecific, 1);
37
+ MIN_NON_ZERO(source->bs->bl.max_transfer,
142
+ BDRVNVMeState *s = bs->opaque;
38
+ target->bs->bl.max_transfer));
143
+
144
+ stats->driver = BLOCKDEV_DRIVER_NVME;
145
+ stats->u.nvme = (BlockStatsSpecificNvme) {
146
+ .completion_errors = s->stats.completion_errors,
147
+ .aligned_accesses = s->stats.aligned_accesses,
148
+ .unaligned_accesses = s->stats.unaligned_accesses,
149
+ };
150
+
151
+ return stats;
39
+}
152
+}
40
+
153
+
41
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
154
static const char *const nvme_strong_runtime_opts[] = {
42
int64_t cluster_size,
155
NVME_BLOCK_OPT_DEVICE,
43
BdrvRequestFlags write_flags, Error **errp)
156
NVME_BLOCK_OPT_NAMESPACE,
44
{
157
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nvme = {
45
BlockCopyState *s;
158
.bdrv_refresh_filename = nvme_refresh_filename,
46
BdrvDirtyBitmap *copy_bitmap;
159
.bdrv_refresh_limits = nvme_refresh_limits,
47
- uint32_t max_transfer =
160
.strong_runtime_opts = nvme_strong_runtime_opts,
48
- MIN_NON_ZERO(INT_MAX,
161
+ .bdrv_get_specific_stats = nvme_get_specific_stats,
49
- MIN_NON_ZERO(source->bs->bl.max_transfer,
162
50
- target->bs->bl.max_transfer));
163
.bdrv_detach_aio_context = nvme_detach_aio_context,
51
164
.bdrv_attach_aio_context = nvme_attach_aio_context,
52
copy_bitmap = bdrv_create_dirty_bitmap(source->bs, cluster_size, NULL,
53
errp);
54
@@ -XXX,XX +XXX,XX @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
55
.mem = shres_create(BLOCK_COPY_MAX_MEM),
56
};
57
58
- if (max_transfer < cluster_size) {
59
+ if (block_copy_max_transfer(source, target) < cluster_size) {
60
/*
61
* copy_range does not respect max_transfer. We don't want to bother
62
* with requests smaller than block-copy cluster size, so fallback to
63
@@ -XXX,XX +XXX,XX @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
64
s->copy_size = cluster_size;
65
} else {
66
/*
67
- * copy_range does not respect max_transfer (it's a TODO), so we factor
68
- * that in here.
69
+ * We enable copy-range, but keep small copy_size, until first
70
+ * successful copy_range (look at block_copy_do_copy).
71
*/
72
s->use_copy_range = true;
73
- s->copy_size = MIN(MAX(cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
74
- QEMU_ALIGN_DOWN(max_transfer, cluster_size));
75
+ s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
76
}
77
78
QLIST_INIT(&s->inflight_reqs);
79
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
80
s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
81
/* Fallback to read+write with allocated buffer */
82
} else {
83
+ if (s->use_copy_range) {
84
+ /*
85
+ * Successful copy-range. Now increase copy_size. copy_range
86
+ * does not respect max_transfer (it's a TODO), so we factor
87
+ * that in here.
88
+ *
89
+ * Note: we double-check s->use_copy_range for the case when
90
+ * parallel block-copy request unsets it during previous
91
+ * bdrv_co_copy_range call.
92
+ */
93
+ s->copy_size =
94
+ MIN(MAX(s->cluster_size, BLOCK_COPY_MAX_COPY_RANGE),
95
+ QEMU_ALIGN_DOWN(block_copy_max_transfer(s->source,
96
+ s->target),
97
+ s->cluster_size));
98
+ }
99
goto out;
100
}
101
}
102
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
103
/*
104
* In case of failed copy_range request above, we may proceed with buffered
105
* request larger than BLOCK_COPY_MAX_BUFFER. Still, further requests will
106
- * be properly limited, so don't care too much.
107
+ * be properly limited, so don't care too much. Moreover the most likely
108
+ * case (copy_range is unsupported for the configuration, so the very first
109
+ * copy_range request fails) is handled by setting large copy_size only
110
+ * after first successful copy_range.
111
*/
112
113
bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
114
--
165
--
115
2.24.1
166
2.26.2
116
167
117
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Coiby Xu <coiby.xu@gmail.com>
2
2
3
Hide structure definitions and add explicit API instead, to keep an
3
Allow vu_message_read to be replaced by one which will make use of the
4
eye on the scope of the shared fields.
4
QIOChannel functions. Thus reading vhost-user message won't stall the
5
guest. For slave channel, we still use the default vu_message_read.
5
6
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
7
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
8
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Message-Id: <20200311103004.7649-10-vsementsov@virtuozzo.com>
10
Message-id: 20200918080912.321299-2-coiby.xu@gmail.com
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
---
12
block/backup-top.c | 6 ++--
13
contrib/libvhost-user/libvhost-user.h | 21 +++++++++++++++++++++
13
block/backup.c | 25 ++++++++--------
14
contrib/libvhost-user/libvhost-user-glib.c | 2 +-
14
block/block-copy.c | 59 ++++++++++++++++++++++++++++++++++++++
15
contrib/libvhost-user/libvhost-user.c | 14 +++++++-------
15
include/block/block-copy.h | 52 +++------------------------------
16
tests/vhost-user-bridge.c | 2 ++
16
4 files changed, 80 insertions(+), 62 deletions(-)
17
tools/virtiofsd/fuse_virtio.c | 4 ++--
18
5 files changed, 33 insertions(+), 10 deletions(-)
17
19
18
diff --git a/block/backup-top.c b/block/backup-top.c
20
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
19
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
20
--- a/block/backup-top.c
22
--- a/contrib/libvhost-user/libvhost-user.h
21
+++ b/block/backup-top.c
23
+++ b/contrib/libvhost-user/libvhost-user.h
22
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVBackupTopState {
24
@@ -XXX,XX +XXX,XX @@
23
BlockCopyState *bcs;
25
*/
24
BdrvChild *target;
26
#define VHOST_USER_MAX_RAM_SLOTS 32
25
bool active;
27
26
+ int64_t cluster_size;
28
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
27
} BDRVBackupTopState;
29
+
28
30
typedef enum VhostSetConfigType {
29
static coroutine_fn int backup_top_co_preadv(
31
VHOST_SET_CONFIG_TYPE_MASTER = 0,
30
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int backup_top_cbw(BlockDriverState *bs, uint64_t offset,
32
VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
31
return 0;
33
@@ -XXX,XX +XXX,XX @@ typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
34
typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
35
typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
36
int *do_reply);
37
+typedef bool (*vu_read_msg_cb) (VuDev *dev, int sock, VhostUserMsg *vmsg);
38
typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
39
typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
40
typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
41
@@ -XXX,XX +XXX,XX @@ struct VuDev {
42
bool broken;
43
uint16_t max_queues;
44
45
+ /* @read_msg: custom method to read vhost-user message
46
+ *
47
+ * Read data from vhost_user socket fd and fill up
48
+ * the passed VhostUserMsg *vmsg struct.
49
+ *
50
+ * If reading fails, it should close the received set of file
51
+ * descriptors as socket message's auxiliary data.
52
+ *
53
+ * For the details, please refer to vu_message_read in libvhost-user.c
54
+ * which will be used by default if not custom method is provided when
55
+ * calling vu_init
56
+ *
57
+ * Returns: true if vhost-user message successfully received,
58
+ * otherwise return false.
59
+ *
60
+ */
61
+ vu_read_msg_cb read_msg;
62
/* @set_watch: add or update the given fd to the watch set,
63
* call cb when condition is met */
64
vu_set_watch_cb set_watch;
65
@@ -XXX,XX +XXX,XX @@ bool vu_init(VuDev *dev,
66
uint16_t max_queues,
67
int socket,
68
vu_panic_cb panic,
69
+ vu_read_msg_cb read_msg,
70
vu_set_watch_cb set_watch,
71
vu_remove_watch_cb remove_watch,
72
const VuDevIface *iface);
73
diff --git a/contrib/libvhost-user/libvhost-user-glib.c b/contrib/libvhost-user/libvhost-user-glib.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/contrib/libvhost-user/libvhost-user-glib.c
76
+++ b/contrib/libvhost-user/libvhost-user-glib.c
77
@@ -XXX,XX +XXX,XX @@ vug_init(VugDev *dev, uint16_t max_queues, int socket,
78
g_assert(dev);
79
g_assert(iface);
80
81
- if (!vu_init(&dev->parent, max_queues, socket, panic, set_watch,
82
+ if (!vu_init(&dev->parent, max_queues, socket, panic, NULL, set_watch,
83
remove_watch, iface)) {
84
return false;
32
}
85
}
33
86
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
34
- off = QEMU_ALIGN_DOWN(offset, s->bcs->cluster_size);
87
index XXXXXXX..XXXXXXX 100644
35
- end = QEMU_ALIGN_UP(offset + bytes, s->bcs->cluster_size);
88
--- a/contrib/libvhost-user/libvhost-user.c
36
+ off = QEMU_ALIGN_DOWN(offset, s->cluster_size);
89
+++ b/contrib/libvhost-user/libvhost-user.c
37
+ end = QEMU_ALIGN_UP(offset + bytes, s->cluster_size);
90
@@ -XXX,XX +XXX,XX @@
38
91
/* The version of inflight buffer */
39
return block_copy(s->bcs, off, end - off, NULL);
92
#define INFLIGHT_VERSION 1
93
94
-#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
95
-
96
/* The version of the protocol we support */
97
#define VHOST_USER_VERSION 1
98
#define LIBVHOST_USER_DEBUG 0
99
@@ -XXX,XX +XXX,XX @@ have_userfault(void)
40
}
100
}
41
@@ -XXX,XX +XXX,XX @@ BlockDriverState *bdrv_backup_top_append(BlockDriverState *source,
101
42
goto fail;
102
static bool
103
-vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
104
+vu_message_read_default(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
105
{
106
char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {};
107
struct iovec iov = {
108
@@ -XXX,XX +XXX,XX @@ vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
109
goto out;
43
}
110
}
44
111
45
+ state->cluster_size = cluster_size;
112
- if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) {
46
state->bcs = block_copy_state_new(top->backing, state->target,
113
+ if (!vu_message_read_default(dev, dev->slave_fd, &msg_reply)) {
47
cluster_size, write_flags, &local_err);
114
goto out;
48
if (local_err) {
49
diff --git a/block/backup.c b/block/backup.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/block/backup.c
52
+++ b/block/backup.c
53
@@ -XXX,XX +XXX,XX @@ static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
54
55
if (ret < 0 && job->bitmap_mode == BITMAP_SYNC_MODE_ALWAYS) {
56
/* If we failed and synced, merge in the bits we didn't copy: */
57
- bdrv_dirty_bitmap_merge_internal(bm, job->bcs->copy_bitmap,
58
+ bdrv_dirty_bitmap_merge_internal(bm, block_copy_dirty_bitmap(job->bcs),
59
NULL, true);
60
}
115
}
61
}
116
62
@@ -XXX,XX +XXX,XX @@ void backup_do_checkpoint(BlockJob *job, Error **errp)
117
@@ -XXX,XX +XXX,XX @@ vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg)
118
/* Wait for QEMU to confirm that it's registered the handler for the
119
* faults.
120
*/
121
- if (!vu_message_read(dev, dev->sock, vmsg) ||
122
+ if (!dev->read_msg(dev, dev->sock, vmsg) ||
123
vmsg->size != sizeof(vmsg->payload.u64) ||
124
vmsg->payload.u64 != 0) {
125
vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table");
126
@@ -XXX,XX +XXX,XX @@ vu_dispatch(VuDev *dev)
127
int reply_requested;
128
bool need_reply, success = false;
129
130
- if (!vu_message_read(dev, dev->sock, &vmsg)) {
131
+ if (!dev->read_msg(dev, dev->sock, &vmsg)) {
132
goto end;
133
}
134
135
@@ -XXX,XX +XXX,XX @@ vu_init(VuDev *dev,
136
uint16_t max_queues,
137
int socket,
138
vu_panic_cb panic,
139
+ vu_read_msg_cb read_msg,
140
vu_set_watch_cb set_watch,
141
vu_remove_watch_cb remove_watch,
142
const VuDevIface *iface)
143
@@ -XXX,XX +XXX,XX @@ vu_init(VuDev *dev,
144
145
dev->sock = socket;
146
dev->panic = panic;
147
+ dev->read_msg = read_msg ? read_msg : vu_message_read_default;
148
dev->set_watch = set_watch;
149
dev->remove_watch = remove_watch;
150
dev->iface = iface;
151
@@ -XXX,XX +XXX,XX @@ static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
152
153
vu_message_write(dev, dev->slave_fd, &vmsg);
154
if (ack) {
155
- vu_message_read(dev, dev->slave_fd, &vmsg);
156
+ vu_message_read_default(dev, dev->slave_fd, &vmsg);
157
}
63
return;
158
return;
64
}
159
}
65
160
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
66
- bdrv_set_dirty_bitmap(backup_job->bcs->copy_bitmap, 0, backup_job->len);
161
index XXXXXXX..XXXXXXX 100644
67
+ bdrv_set_dirty_bitmap(block_copy_dirty_bitmap(backup_job->bcs), 0,
162
--- a/tests/vhost-user-bridge.c
68
+ backup_job->len);
163
+++ b/tests/vhost-user-bridge.c
164
@@ -XXX,XX +XXX,XX @@ vubr_accept_cb(int sock, void *ctx)
165
VHOST_USER_BRIDGE_MAX_QUEUES,
166
conn_fd,
167
vubr_panic,
168
+ NULL,
169
vubr_set_watch,
170
vubr_remove_watch,
171
&vuiface)) {
172
@@ -XXX,XX +XXX,XX @@ vubr_new(const char *path, bool client)
173
VHOST_USER_BRIDGE_MAX_QUEUES,
174
dev->sock,
175
vubr_panic,
176
+ NULL,
177
vubr_set_watch,
178
vubr_remove_watch,
179
&vuiface)) {
180
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
181
index XXXXXXX..XXXXXXX 100644
182
--- a/tools/virtiofsd/fuse_virtio.c
183
+++ b/tools/virtiofsd/fuse_virtio.c
184
@@ -XXX,XX +XXX,XX @@ int virtio_session_mount(struct fuse_session *se)
185
se->vu_socketfd = data_sock;
186
se->virtio_dev->se = se;
187
pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL);
188
- vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch,
189
- fv_remove_watch, &fv_iface);
190
+ vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, NULL,
191
+ fv_set_watch, fv_remove_watch, &fv_iface);
192
193
return 0;
69
}
194
}
70
71
static BlockErrorAction backup_error_action(BackupBlockJob *job,
72
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_loop(BackupBlockJob *job)
73
BdrvDirtyBitmapIter *bdbi;
74
int ret = 0;
75
76
- bdbi = bdrv_dirty_iter_new(job->bcs->copy_bitmap);
77
+ bdbi = bdrv_dirty_iter_new(block_copy_dirty_bitmap(job->bcs));
78
while ((offset = bdrv_dirty_iter_next(bdbi)) != -1) {
79
do {
80
if (yield_and_check(job)) {
81
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_loop(BackupBlockJob *job)
82
return ret;
83
}
84
85
-static void backup_init_copy_bitmap(BackupBlockJob *job)
86
+static void backup_init_bcs_bitmap(BackupBlockJob *job)
87
{
88
bool ret;
89
uint64_t estimate;
90
+ BdrvDirtyBitmap *bcs_bitmap = block_copy_dirty_bitmap(job->bcs);
91
92
if (job->sync_mode == MIRROR_SYNC_MODE_BITMAP) {
93
- ret = bdrv_dirty_bitmap_merge_internal(job->bcs->copy_bitmap,
94
- job->sync_bitmap,
95
+ ret = bdrv_dirty_bitmap_merge_internal(bcs_bitmap, job->sync_bitmap,
96
NULL, true);
97
assert(ret);
98
} else {
99
@@ -XXX,XX +XXX,XX @@ static void backup_init_copy_bitmap(BackupBlockJob *job)
100
* We can't hog the coroutine to initialize this thoroughly.
101
* Set a flag and resume work when we are able to yield safely.
102
*/
103
- job->bcs->skip_unallocated = true;
104
+ block_copy_set_skip_unallocated(job->bcs, true);
105
}
106
- bdrv_set_dirty_bitmap(job->bcs->copy_bitmap, 0, job->len);
107
+ bdrv_set_dirty_bitmap(bcs_bitmap, 0, job->len);
108
}
109
110
- estimate = bdrv_get_dirty_count(job->bcs->copy_bitmap);
111
+ estimate = bdrv_get_dirty_count(bcs_bitmap);
112
job_progress_set_remaining(&job->common.job, estimate);
113
}
114
115
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_run(Job *job, Error **errp)
116
BackupBlockJob *s = container_of(job, BackupBlockJob, common.job);
117
int ret = 0;
118
119
- backup_init_copy_bitmap(s);
120
+ backup_init_bcs_bitmap(s);
121
122
if (s->sync_mode == MIRROR_SYNC_MODE_TOP) {
123
int64_t offset = 0;
124
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn backup_run(Job *job, Error **errp)
125
126
offset += count;
127
}
128
- s->bcs->skip_unallocated = false;
129
+ block_copy_set_skip_unallocated(s->bcs, false);
130
}
131
132
if (s->sync_mode == MIRROR_SYNC_MODE_NONE) {
133
/*
134
- * All bits are set in copy_bitmap to allow any cluster to be copied.
135
+ * All bits are set in bcs bitmap to allow any cluster to be copied.
136
* This does not actually require them to be copied.
137
*/
138
while (!job_is_cancelled(job)) {
139
diff --git a/block/block-copy.c b/block/block-copy.c
140
index XXXXXXX..XXXXXXX 100644
141
--- a/block/block-copy.c
142
+++ b/block/block-copy.c
143
@@ -XXX,XX +XXX,XX @@
144
#define BLOCK_COPY_MAX_BUFFER (1 * MiB)
145
#define BLOCK_COPY_MAX_MEM (128 * MiB)
146
147
+typedef struct BlockCopyInFlightReq {
148
+ int64_t offset;
149
+ int64_t bytes;
150
+ QLIST_ENTRY(BlockCopyInFlightReq) list;
151
+ CoQueue wait_queue; /* coroutines blocked on this request */
152
+} BlockCopyInFlightReq;
153
+
154
+typedef struct BlockCopyState {
155
+ /*
156
+ * BdrvChild objects are not owned or managed by block-copy. They are
157
+ * provided by block-copy user and user is responsible for appropriate
158
+ * permissions on these children.
159
+ */
160
+ BdrvChild *source;
161
+ BdrvChild *target;
162
+ BdrvDirtyBitmap *copy_bitmap;
163
+ int64_t in_flight_bytes;
164
+ int64_t cluster_size;
165
+ bool use_copy_range;
166
+ int64_t copy_size;
167
+ uint64_t len;
168
+ QLIST_HEAD(, BlockCopyInFlightReq) inflight_reqs;
169
+
170
+ BdrvRequestFlags write_flags;
171
+
172
+ /*
173
+ * skip_unallocated:
174
+ *
175
+ * Used by sync=top jobs, which first scan the source node for unallocated
176
+ * areas and clear them in the copy_bitmap. During this process, the bitmap
177
+ * is thus not fully initialized: It may still have bits set for areas that
178
+ * are unallocated and should actually not be copied.
179
+ *
180
+ * This is indicated by skip_unallocated.
181
+ *
182
+ * In this case, block_copy() will query the source’s allocation status,
183
+ * skip unallocated regions, clear them in the copy_bitmap, and invoke
184
+ * block_copy_reset_unallocated() every time it does.
185
+ */
186
+ bool skip_unallocated;
187
+
188
+ ProgressMeter *progress;
189
+ /* progress_bytes_callback: called when some copying progress is done. */
190
+ ProgressBytesCallbackFunc progress_bytes_callback;
191
+ void *progress_opaque;
192
+
193
+ SharedResource *mem;
194
+} BlockCopyState;
195
+
196
static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
197
int64_t offset,
198
int64_t bytes)
199
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
200
201
return ret;
202
}
203
+
204
+BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s)
205
+{
206
+ return s->copy_bitmap;
207
+}
208
+
209
+void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip)
210
+{
211
+ s->skip_unallocated = skip;
212
+}
213
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
214
index XXXXXXX..XXXXXXX 100644
215
--- a/include/block/block-copy.h
216
+++ b/include/block/block-copy.h
217
@@ -XXX,XX +XXX,XX @@
218
#include "block/block.h"
219
#include "qemu/co-shared-resource.h"
220
221
-typedef struct BlockCopyInFlightReq {
222
- int64_t offset;
223
- int64_t bytes;
224
- QLIST_ENTRY(BlockCopyInFlightReq) list;
225
- CoQueue wait_queue; /* coroutines blocked on this request */
226
-} BlockCopyInFlightReq;
227
-
228
typedef void (*ProgressBytesCallbackFunc)(int64_t bytes, void *opaque);
229
-typedef struct BlockCopyState {
230
- /*
231
- * BdrvChild objects are not owned or managed by block-copy. They are
232
- * provided by block-copy user and user is responsible for appropriate
233
- * permissions on these children.
234
- */
235
- BdrvChild *source;
236
- BdrvChild *target;
237
- BdrvDirtyBitmap *copy_bitmap;
238
- int64_t in_flight_bytes;
239
- int64_t cluster_size;
240
- bool use_copy_range;
241
- int64_t copy_size;
242
- uint64_t len;
243
- QLIST_HEAD(, BlockCopyInFlightReq) inflight_reqs;
244
-
245
- BdrvRequestFlags write_flags;
246
-
247
- /*
248
- * skip_unallocated:
249
- *
250
- * Used by sync=top jobs, which first scan the source node for unallocated
251
- * areas and clear them in the copy_bitmap. During this process, the bitmap
252
- * is thus not fully initialized: It may still have bits set for areas that
253
- * are unallocated and should actually not be copied.
254
- *
255
- * This is indicated by skip_unallocated.
256
- *
257
- * In this case, block_copy() will query the source’s allocation status,
258
- * skip unallocated regions, clear them in the copy_bitmap, and invoke
259
- * block_copy_reset_unallocated() every time it does.
260
- */
261
- bool skip_unallocated;
262
-
263
- ProgressMeter *progress;
264
- /* progress_bytes_callback: called when some copying progress is done. */
265
- ProgressBytesCallbackFunc progress_bytes_callback;
266
- void *progress_opaque;
267
-
268
- SharedResource *mem;
269
-} BlockCopyState;
270
+typedef struct BlockCopyState BlockCopyState;
271
272
BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
273
int64_t cluster_size,
274
@@ -XXX,XX +XXX,XX @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
275
int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
276
bool *error_is_read);
277
278
+BdrvDirtyBitmap *block_copy_dirty_bitmap(BlockCopyState *s);
279
+void block_copy_set_skip_unallocated(BlockCopyState *s, bool skip);
280
+
281
#endif /* BLOCK_COPY_H */
282
--
195
--
283
2.24.1
196
2.26.2
284
197
285
diff view generated by jsdifflib
1
From: Pan Nengyuan <pannengyuan@huawei.com>
1
From: Coiby Xu <coiby.xu@gmail.com>
2
2
3
collect_image_check() is called twice in img_check(), the filename/format will be alloced without free the original memory.
3
When the client is running in gdb and quit command is run in gdb,
4
It is not a big deal since the process will exit anyway, but seems like a clean code and it will remove the warning spotted by asan.
4
QEMU will still dispatch the event which will cause segment fault in
5
the callback function.
5
6
6
Reported-by: Euler Robot <euler.robot@huawei.com>
7
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
7
Signed-off-by: Pan Nengyuan <pannengyuan@huawei.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-Id: <20200227012950.12256-3-pannengyuan@huawei.com>
9
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
Message-id: 20200918080912.321299-3-coiby.xu@gmail.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
12
---
11
qemu-img.c | 2 ++
13
contrib/libvhost-user/libvhost-user.c | 1 +
12
1 file changed, 2 insertions(+)
14
1 file changed, 1 insertion(+)
13
15
14
diff --git a/qemu-img.c b/qemu-img.c
16
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
15
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
16
--- a/qemu-img.c
18
--- a/contrib/libvhost-user/libvhost-user.c
17
+++ b/qemu-img.c
19
+++ b/contrib/libvhost-user/libvhost-user.c
18
@@ -XXX,XX +XXX,XX @@ static int img_check(int argc, char **argv)
20
@@ -XXX,XX +XXX,XX @@ vu_deinit(VuDev *dev)
19
check->corruptions_fixed);
20
}
21
}
21
22
22
+ qapi_free_ImageCheck(check);
23
if (vq->kick_fd != -1) {
23
+ check = g_new0(ImageCheck, 1);
24
+ dev->remove_watch(dev, vq->kick_fd);
24
ret = collect_image_check(bs, check, filename, fmt, 0);
25
close(vq->kick_fd);
25
26
vq->kick_fd = -1;
26
check->leaks_fixed = leaks_fixed;
27
}
27
--
28
--
28
2.24.1
29
2.26.2
29
30
30
diff view generated by jsdifflib
New patch
1
From: Coiby Xu <coiby.xu@gmail.com>
1
2
3
Sharing QEMU devices via vhost-user protocol.
4
5
Only one vhost-user client can connect to the server one time.
6
7
Suggested-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
12
Message-id: 20200918080912.321299-4-coiby.xu@gmail.com
13
[Fixed size_t %lu -> %zu format string compiler error.
14
--Stefan]
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
util/vhost-user-server.h | 65 ++++++
18
util/vhost-user-server.c | 428 +++++++++++++++++++++++++++++++++++++++
19
util/meson.build | 1 +
20
3 files changed, 494 insertions(+)
21
create mode 100644 util/vhost-user-server.h
22
create mode 100644 util/vhost-user-server.c
23
24
diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
25
new file mode 100644
26
index XXXXXXX..XXXXXXX
27
--- /dev/null
28
+++ b/util/vhost-user-server.h
29
@@ -XXX,XX +XXX,XX @@
30
+/*
31
+ * Sharing QEMU devices via vhost-user protocol
32
+ *
33
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
34
+ * Copyright (c) 2020 Red Hat, Inc.
35
+ *
36
+ * This work is licensed under the terms of the GNU GPL, version 2 or
37
+ * later. See the COPYING file in the top-level directory.
38
+ */
39
+
40
+#ifndef VHOST_USER_SERVER_H
41
+#define VHOST_USER_SERVER_H
42
+
43
+#include "contrib/libvhost-user/libvhost-user.h"
44
+#include "io/channel-socket.h"
45
+#include "io/channel-file.h"
46
+#include "io/net-listener.h"
47
+#include "qemu/error-report.h"
48
+#include "qapi/error.h"
49
+#include "standard-headers/linux/virtio_blk.h"
50
+
51
+typedef struct VuFdWatch {
52
+ VuDev *vu_dev;
53
+ int fd; /*kick fd*/
54
+ void *pvt;
55
+ vu_watch_cb cb;
56
+ bool processing;
57
+ QTAILQ_ENTRY(VuFdWatch) next;
58
+} VuFdWatch;
59
+
60
+typedef struct VuServer VuServer;
61
+typedef void DevicePanicNotifierFn(VuServer *server);
62
+
63
+struct VuServer {
64
+ QIONetListener *listener;
65
+ AioContext *ctx;
66
+ DevicePanicNotifierFn *device_panic_notifier;
67
+ int max_queues;
68
+ const VuDevIface *vu_iface;
69
+ VuDev vu_dev;
70
+ QIOChannel *ioc; /* The I/O channel with the client */
71
+ QIOChannelSocket *sioc; /* The underlying data channel with the client */
72
+ /* IOChannel for fd provided via VHOST_USER_SET_SLAVE_REQ_FD */
73
+ QIOChannel *ioc_slave;
74
+ QIOChannelSocket *sioc_slave;
75
+ Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
76
+ QTAILQ_HEAD(, VuFdWatch) vu_fd_watches;
77
+ /* restart coroutine co_trip if AIOContext is changed */
78
+ bool aio_context_changed;
79
+ bool processing_msg;
80
+};
81
+
82
+bool vhost_user_server_start(VuServer *server,
83
+ SocketAddress *unix_socket,
84
+ AioContext *ctx,
85
+ uint16_t max_queues,
86
+ DevicePanicNotifierFn *device_panic_notifier,
87
+ const VuDevIface *vu_iface,
88
+ Error **errp);
89
+
90
+void vhost_user_server_stop(VuServer *server);
91
+
92
+void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx);
93
+
94
+#endif /* VHOST_USER_SERVER_H */
95
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
96
new file mode 100644
97
index XXXXXXX..XXXXXXX
98
--- /dev/null
99
+++ b/util/vhost-user-server.c
100
@@ -XXX,XX +XXX,XX @@
101
+/*
102
+ * Sharing QEMU devices via vhost-user protocol
103
+ *
104
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
105
+ * Copyright (c) 2020 Red Hat, Inc.
106
+ *
107
+ * This work is licensed under the terms of the GNU GPL, version 2 or
108
+ * later. See the COPYING file in the top-level directory.
109
+ */
110
+#include "qemu/osdep.h"
111
+#include "qemu/main-loop.h"
112
+#include "vhost-user-server.h"
113
+
114
+static void vmsg_close_fds(VhostUserMsg *vmsg)
115
+{
116
+ int i;
117
+ for (i = 0; i < vmsg->fd_num; i++) {
118
+ close(vmsg->fds[i]);
119
+ }
120
+}
121
+
122
+static void vmsg_unblock_fds(VhostUserMsg *vmsg)
123
+{
124
+ int i;
125
+ for (i = 0; i < vmsg->fd_num; i++) {
126
+ qemu_set_nonblock(vmsg->fds[i]);
127
+ }
128
+}
129
+
130
+static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
131
+ gpointer opaque);
132
+
133
+static void close_client(VuServer *server)
134
+{
135
+ /*
136
+ * Before closing the client
137
+ *
138
+ * 1. Let vu_client_trip stop processing new vhost-user msg
139
+ *
140
+ * 2. remove kick_handler
141
+ *
142
+ * 3. wait for the kick handler to be finished
143
+ *
144
+ * 4. wait for the current vhost-user msg to be finished processing
145
+ */
146
+
147
+ QIOChannelSocket *sioc = server->sioc;
148
+ /* When this is set vu_client_trip will stop new processing vhost-user message */
149
+ server->sioc = NULL;
150
+
151
+ VuFdWatch *vu_fd_watch, *next;
152
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
153
+ aio_set_fd_handler(server->ioc->ctx, vu_fd_watch->fd, true, NULL,
154
+ NULL, NULL, NULL);
155
+ }
156
+
157
+ while (!QTAILQ_EMPTY(&server->vu_fd_watches)) {
158
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
159
+ if (!vu_fd_watch->processing) {
160
+ QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
161
+ g_free(vu_fd_watch);
162
+ }
163
+ }
164
+ }
165
+
166
+ while (server->processing_msg) {
167
+ if (server->ioc->read_coroutine) {
168
+ server->ioc->read_coroutine = NULL;
169
+ qio_channel_set_aio_fd_handler(server->ioc, server->ioc->ctx, NULL,
170
+ NULL, server->ioc);
171
+ server->processing_msg = false;
172
+ }
173
+ }
174
+
175
+ vu_deinit(&server->vu_dev);
176
+ object_unref(OBJECT(sioc));
177
+ object_unref(OBJECT(server->ioc));
178
+}
179
+
180
+static void panic_cb(VuDev *vu_dev, const char *buf)
181
+{
182
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
183
+
184
+ /* avoid while loop in close_client */
185
+ server->processing_msg = false;
186
+
187
+ if (buf) {
188
+ error_report("vu_panic: %s", buf);
189
+ }
190
+
191
+ if (server->sioc) {
192
+ close_client(server);
193
+ }
194
+
195
+ if (server->device_panic_notifier) {
196
+ server->device_panic_notifier(server);
197
+ }
198
+
199
+ /*
200
+ * Set the callback function for network listener so another
201
+ * vhost-user client can connect to this server
202
+ */
203
+ qio_net_listener_set_client_func(server->listener,
204
+ vu_accept,
205
+ server,
206
+ NULL);
207
+}
208
+
209
+static bool coroutine_fn
210
+vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
211
+{
212
+ struct iovec iov = {
213
+ .iov_base = (char *)vmsg,
214
+ .iov_len = VHOST_USER_HDR_SIZE,
215
+ };
216
+ int rc, read_bytes = 0;
217
+ Error *local_err = NULL;
218
+ /*
219
+ * Store fds/nfds returned from qio_channel_readv_full into
220
+ * temporary variables.
221
+ *
222
+ * VhostUserMsg is a packed structure, gcc will complain about passing
223
+ * pointer to a packed structure member if we pass &VhostUserMsg.fd_num
224
+ * and &VhostUserMsg.fds directly when calling qio_channel_readv_full,
225
+ * thus two temporary variables nfds and fds are used here.
226
+ */
227
+ size_t nfds = 0, nfds_t = 0;
228
+ const size_t max_fds = G_N_ELEMENTS(vmsg->fds);
229
+ int *fds_t = NULL;
230
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
231
+ QIOChannel *ioc = server->ioc;
232
+
233
+ if (!ioc) {
234
+ error_report_err(local_err);
235
+ goto fail;
236
+ }
237
+
238
+ assert(qemu_in_coroutine());
239
+ do {
240
+ /*
241
+ * qio_channel_readv_full may have short reads, keeping calling it
242
+ * until getting VHOST_USER_HDR_SIZE or 0 bytes in total
243
+ */
244
+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds_t, &nfds_t, &local_err);
245
+ if (rc < 0) {
246
+ if (rc == QIO_CHANNEL_ERR_BLOCK) {
247
+ qio_channel_yield(ioc, G_IO_IN);
248
+ continue;
249
+ } else {
250
+ error_report_err(local_err);
251
+ return false;
252
+ }
253
+ }
254
+ read_bytes += rc;
255
+ if (nfds_t > 0) {
256
+ if (nfds + nfds_t > max_fds) {
257
+ error_report("A maximum of %zu fds are allowed, "
258
+ "however got %zu fds now",
259
+ max_fds, nfds + nfds_t);
260
+ goto fail;
261
+ }
262
+ memcpy(vmsg->fds + nfds, fds_t,
263
+ nfds_t *sizeof(vmsg->fds[0]));
264
+ nfds += nfds_t;
265
+ g_free(fds_t);
266
+ }
267
+ if (read_bytes == VHOST_USER_HDR_SIZE || rc == 0) {
268
+ break;
269
+ }
270
+ iov.iov_base = (char *)vmsg + read_bytes;
271
+ iov.iov_len = VHOST_USER_HDR_SIZE - read_bytes;
272
+ } while (true);
273
+
274
+ vmsg->fd_num = nfds;
275
+ /* qio_channel_readv_full will make socket fds blocking, unblock them */
276
+ vmsg_unblock_fds(vmsg);
277
+ if (vmsg->size > sizeof(vmsg->payload)) {
278
+ error_report("Error: too big message request: %d, "
279
+ "size: vmsg->size: %u, "
280
+ "while sizeof(vmsg->payload) = %zu",
281
+ vmsg->request, vmsg->size, sizeof(vmsg->payload));
282
+ goto fail;
283
+ }
284
+
285
+ struct iovec iov_payload = {
286
+ .iov_base = (char *)&vmsg->payload,
287
+ .iov_len = vmsg->size,
288
+ };
289
+ if (vmsg->size) {
290
+ rc = qio_channel_readv_all_eof(ioc, &iov_payload, 1, &local_err);
291
+ if (rc == -1) {
292
+ error_report_err(local_err);
293
+ goto fail;
294
+ }
295
+ }
296
+
297
+ return true;
298
+
299
+fail:
300
+ vmsg_close_fds(vmsg);
301
+
302
+ return false;
303
+}
304
+
305
+
306
+static void vu_client_start(VuServer *server);
307
+static coroutine_fn void vu_client_trip(void *opaque)
308
+{
309
+ VuServer *server = opaque;
310
+
311
+ while (!server->aio_context_changed && server->sioc) {
312
+ server->processing_msg = true;
313
+ vu_dispatch(&server->vu_dev);
314
+ server->processing_msg = false;
315
+ }
316
+
317
+ if (server->aio_context_changed && server->sioc) {
318
+ server->aio_context_changed = false;
319
+ vu_client_start(server);
320
+ }
321
+}
322
+
323
+static void vu_client_start(VuServer *server)
324
+{
325
+ server->co_trip = qemu_coroutine_create(vu_client_trip, server);
326
+ aio_co_enter(server->ctx, server->co_trip);
327
+}
328
+
329
+/*
330
+ * a wrapper for vu_kick_cb
331
+ *
332
+ * since aio_dispatch can only pass one user data pointer to the
333
+ * callback function, pack VuDev and pvt into a struct. Then unpack it
334
+ * and pass them to vu_kick_cb
335
+ */
336
+static void kick_handler(void *opaque)
337
+{
338
+ VuFdWatch *vu_fd_watch = opaque;
339
+ vu_fd_watch->processing = true;
340
+ vu_fd_watch->cb(vu_fd_watch->vu_dev, 0, vu_fd_watch->pvt);
341
+ vu_fd_watch->processing = false;
342
+}
343
+
344
+
345
+static VuFdWatch *find_vu_fd_watch(VuServer *server, int fd)
346
+{
347
+
348
+ VuFdWatch *vu_fd_watch, *next;
349
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
350
+ if (vu_fd_watch->fd == fd) {
351
+ return vu_fd_watch;
352
+ }
353
+ }
354
+ return NULL;
355
+}
356
+
357
+static void
358
+set_watch(VuDev *vu_dev, int fd, int vu_evt,
359
+ vu_watch_cb cb, void *pvt)
360
+{
361
+
362
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
363
+ g_assert(vu_dev);
364
+ g_assert(fd >= 0);
365
+ g_assert(cb);
366
+
367
+ VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
368
+
369
+ if (!vu_fd_watch) {
370
+ VuFdWatch *vu_fd_watch = g_new0(VuFdWatch, 1);
371
+
372
+ QTAILQ_INSERT_TAIL(&server->vu_fd_watches, vu_fd_watch, next);
373
+
374
+ vu_fd_watch->fd = fd;
375
+ vu_fd_watch->cb = cb;
376
+ qemu_set_nonblock(fd);
377
+ aio_set_fd_handler(server->ioc->ctx, fd, true, kick_handler,
378
+ NULL, NULL, vu_fd_watch);
379
+ vu_fd_watch->vu_dev = vu_dev;
380
+ vu_fd_watch->pvt = pvt;
381
+ }
382
+}
383
+
384
+
385
+static void remove_watch(VuDev *vu_dev, int fd)
386
+{
387
+ VuServer *server;
388
+ g_assert(vu_dev);
389
+ g_assert(fd >= 0);
390
+
391
+ server = container_of(vu_dev, VuServer, vu_dev);
392
+
393
+ VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
394
+
395
+ if (!vu_fd_watch) {
396
+ return;
397
+ }
398
+ aio_set_fd_handler(server->ioc->ctx, fd, true, NULL, NULL, NULL, NULL);
399
+
400
+ QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
401
+ g_free(vu_fd_watch);
402
+}
403
+
404
+
405
+static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
406
+ gpointer opaque)
407
+{
408
+ VuServer *server = opaque;
409
+
410
+ if (server->sioc) {
411
+ warn_report("Only one vhost-user client is allowed to "
412
+ "connect the server one time");
413
+ return;
414
+ }
415
+
416
+ if (!vu_init(&server->vu_dev, server->max_queues, sioc->fd, panic_cb,
417
+ vu_message_read, set_watch, remove_watch, server->vu_iface)) {
418
+ error_report("Failed to initialize libvhost-user");
419
+ return;
420
+ }
421
+
422
+ /*
423
+ * Unset the callback function for network listener to make another
424
+ * vhost-user client keeping waiting until this client disconnects
425
+ */
426
+ qio_net_listener_set_client_func(server->listener,
427
+ NULL,
428
+ NULL,
429
+ NULL);
430
+ server->sioc = sioc;
431
+ /*
432
+ * Increase the object reference, so sioc will not freed by
433
+ * qio_net_listener_channel_func which will call object_unref(OBJECT(sioc))
434
+ */
435
+ object_ref(OBJECT(server->sioc));
436
+ qio_channel_set_name(QIO_CHANNEL(sioc), "vhost-user client");
437
+ server->ioc = QIO_CHANNEL(sioc);
438
+ object_ref(OBJECT(server->ioc));
439
+ qio_channel_attach_aio_context(server->ioc, server->ctx);
440
+ qio_channel_set_blocking(QIO_CHANNEL(server->sioc), false, NULL);
441
+ vu_client_start(server);
442
+}
443
+
444
+
445
+void vhost_user_server_stop(VuServer *server)
446
+{
447
+ if (server->sioc) {
448
+ close_client(server);
449
+ }
450
+
451
+ if (server->listener) {
452
+ qio_net_listener_disconnect(server->listener);
453
+ object_unref(OBJECT(server->listener));
454
+ }
455
+
456
+}
457
+
458
+void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx)
459
+{
460
+ VuFdWatch *vu_fd_watch, *next;
461
+ void *opaque = NULL;
462
+ IOHandler *io_read = NULL;
463
+ bool attach;
464
+
465
+ server->ctx = ctx ? ctx : qemu_get_aio_context();
466
+
467
+ if (!server->sioc) {
468
+ /* not yet serving any client*/
469
+ return;
470
+ }
471
+
472
+ if (ctx) {
473
+ qio_channel_attach_aio_context(server->ioc, ctx);
474
+ server->aio_context_changed = true;
475
+ io_read = kick_handler;
476
+ attach = true;
477
+ } else {
478
+ qio_channel_detach_aio_context(server->ioc);
479
+ /* server->ioc->ctx keeps the old AioConext */
480
+ ctx = server->ioc->ctx;
481
+ attach = false;
482
+ }
483
+
484
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
485
+ if (vu_fd_watch->cb) {
486
+ opaque = attach ? vu_fd_watch : NULL;
487
+ aio_set_fd_handler(ctx, vu_fd_watch->fd, true,
488
+ io_read, NULL, NULL,
489
+ opaque);
490
+ }
491
+ }
492
+}
493
+
494
+
495
+bool vhost_user_server_start(VuServer *server,
496
+ SocketAddress *socket_addr,
497
+ AioContext *ctx,
498
+ uint16_t max_queues,
499
+ DevicePanicNotifierFn *device_panic_notifier,
500
+ const VuDevIface *vu_iface,
501
+ Error **errp)
502
+{
503
+ QIONetListener *listener = qio_net_listener_new();
504
+ if (qio_net_listener_open_sync(listener, socket_addr, 1,
505
+ errp) < 0) {
506
+ object_unref(OBJECT(listener));
507
+ return false;
508
+ }
509
+
510
+ /* zero out unspecified fileds */
511
+ *server = (VuServer) {
512
+ .listener = listener,
513
+ .vu_iface = vu_iface,
514
+ .max_queues = max_queues,
515
+ .ctx = ctx,
516
+ .device_panic_notifier = device_panic_notifier,
517
+ };
518
+
519
+ qio_net_listener_set_name(server->listener, "vhost-user-backend-listener");
520
+
521
+ qio_net_listener_set_client_func(server->listener,
522
+ vu_accept,
523
+ server,
524
+ NULL);
525
+
526
+ QTAILQ_INIT(&server->vu_fd_watches);
527
+ return true;
528
+}
529
diff --git a/util/meson.build b/util/meson.build
530
index XXXXXXX..XXXXXXX 100644
531
--- a/util/meson.build
532
+++ b/util/meson.build
533
@@ -XXX,XX +XXX,XX @@ if have_block
534
util_ss.add(files('main-loop.c'))
535
util_ss.add(files('nvdimm-utils.c'))
536
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
537
+ util_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-server.c'))
538
util_ss.add(files('qemu-coroutine-sleep.c'))
539
util_ss.add(files('qemu-co-shared-resource.c'))
540
util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
541
--
542
2.26.2
543
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Coiby Xu <coiby.xu@gmail.com>
2
2
3
We need it in separate to pass to the block-copy object in the next
3
Move the constants from hw/core/qdev-properties.c to
4
commit.
4
util/block-helpers.h so that knowledge of the min/max values is
5
5
6
Cc: qemu-stable@nongnu.org
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
8
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
10
Message-Id: <20200311103004.7649-2-vsementsov@virtuozzo.com>
10
Acked-by: Eduardo Habkost <ehabkost@redhat.com>
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
Message-id: 20200918080912.321299-5-coiby.xu@gmail.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
---
13
blockjob.c | 16 +++++-----
14
util/block-helpers.h | 19 +++++++++++++
14
include/qemu/job.h | 11 ++-----
15
hw/core/qdev-properties-system.c | 31 ++++-----------------
15
include/qemu/progress_meter.h | 58 +++++++++++++++++++++++++++++++++++
16
util/block-helpers.c | 46 ++++++++++++++++++++++++++++++++
16
job-qmp.c | 4 +--
17
util/meson.build | 1 +
17
job.c | 6 ++--
18
4 files changed, 71 insertions(+), 26 deletions(-)
18
qemu-img.c | 6 ++--
19
create mode 100644 util/block-helpers.h
19
6 files changed, 76 insertions(+), 25 deletions(-)
20
create mode 100644 util/block-helpers.c
20
create mode 100644 include/qemu/progress_meter.h
21
21
22
diff --git a/blockjob.c b/blockjob.c
22
diff --git a/util/block-helpers.h b/util/block-helpers.h
23
index XXXXXXX..XXXXXXX 100644
24
--- a/blockjob.c
25
+++ b/blockjob.c
26
@@ -XXX,XX +XXX,XX @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
27
info->device = g_strdup(job->job.id);
28
info->busy = atomic_read(&job->job.busy);
29
info->paused = job->job.pause_count > 0;
30
- info->offset = job->job.progress_current;
31
- info->len = job->job.progress_total;
32
+ info->offset = job->job.progress.current;
33
+ info->len = job->job.progress.total;
34
info->speed = job->speed;
35
info->io_status = job->iostatus;
36
info->ready = job_is_ready(&job->job),
37
@@ -XXX,XX +XXX,XX @@ static void block_job_event_cancelled(Notifier *n, void *opaque)
38
39
qapi_event_send_block_job_cancelled(job_type(&job->job),
40
job->job.id,
41
- job->job.progress_total,
42
- job->job.progress_current,
43
+ job->job.progress.total,
44
+ job->job.progress.current,
45
job->speed);
46
}
47
48
@@ -XXX,XX +XXX,XX @@ static void block_job_event_completed(Notifier *n, void *opaque)
49
50
qapi_event_send_block_job_completed(job_type(&job->job),
51
job->job.id,
52
- job->job.progress_total,
53
- job->job.progress_current,
54
+ job->job.progress.total,
55
+ job->job.progress.current,
56
job->speed,
57
!!msg,
58
msg);
59
@@ -XXX,XX +XXX,XX @@ static void block_job_event_ready(Notifier *n, void *opaque)
60
61
qapi_event_send_block_job_ready(job_type(&job->job),
62
job->job.id,
63
- job->job.progress_total,
64
- job->job.progress_current,
65
+ job->job.progress.total,
66
+ job->job.progress.current,
67
job->speed);
68
}
69
70
diff --git a/include/qemu/job.h b/include/qemu/job.h
71
index XXXXXXX..XXXXXXX 100644
72
--- a/include/qemu/job.h
73
+++ b/include/qemu/job.h
74
@@ -XXX,XX +XXX,XX @@
75
76
#include "qapi/qapi-types-job.h"
77
#include "qemu/queue.h"
78
+#include "qemu/progress_meter.h"
79
#include "qemu/coroutine.h"
80
#include "block/aio.h"
81
82
@@ -XXX,XX +XXX,XX @@ typedef struct Job {
83
/** True if this job should automatically dismiss itself */
84
bool auto_dismiss;
85
86
- /**
87
- * Current progress. The unit is arbitrary as long as the ratio between
88
- * progress_current and progress_total represents the estimated percentage
89
- * of work already done.
90
- */
91
- int64_t progress_current;
92
-
93
- /** Estimated progress_current value at the completion of the job */
94
- int64_t progress_total;
95
+ ProgressMeter progress;
96
97
/**
98
* Return code from @run and/or @prepare callback(s).
99
diff --git a/include/qemu/progress_meter.h b/include/qemu/progress_meter.h
100
new file mode 100644
23
new file mode 100644
101
index XXXXXXX..XXXXXXX
24
index XXXXXXX..XXXXXXX
102
--- /dev/null
25
--- /dev/null
103
+++ b/include/qemu/progress_meter.h
26
+++ b/util/block-helpers.h
27
@@ -XXX,XX +XXX,XX @@
28
+#ifndef BLOCK_HELPERS_H
29
+#define BLOCK_HELPERS_H
30
+
31
+#include "qemu/units.h"
32
+
33
+/* lower limit is sector size */
34
+#define MIN_BLOCK_SIZE INT64_C(512)
35
+#define MIN_BLOCK_SIZE_STR "512 B"
36
+/*
37
+ * upper limit is arbitrary, 2 MiB looks sufficient for all sensible uses, and
38
+ * matches qcow2 cluster size limit
39
+ */
40
+#define MAX_BLOCK_SIZE (2 * MiB)
41
+#define MAX_BLOCK_SIZE_STR "2 MiB"
42
+
43
+void check_block_size(const char *id, const char *name, int64_t value,
44
+ Error **errp);
45
+
46
+#endif /* BLOCK_HELPERS_H */
47
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/hw/core/qdev-properties-system.c
50
+++ b/hw/core/qdev-properties-system.c
51
@@ -XXX,XX +XXX,XX @@
52
#include "sysemu/blockdev.h"
53
#include "net/net.h"
54
#include "hw/pci/pci.h"
55
+#include "util/block-helpers.h"
56
57
static bool check_prop_still_unset(DeviceState *dev, const char *name,
58
const void *old_val, const char *new_val,
59
@@ -XXX,XX +XXX,XX @@ const PropertyInfo qdev_prop_losttickpolicy = {
60
61
/* --- blocksize --- */
62
63
-/* lower limit is sector size */
64
-#define MIN_BLOCK_SIZE 512
65
-#define MIN_BLOCK_SIZE_STR "512 B"
66
-/*
67
- * upper limit is arbitrary, 2 MiB looks sufficient for all sensible uses, and
68
- * matches qcow2 cluster size limit
69
- */
70
-#define MAX_BLOCK_SIZE (2 * MiB)
71
-#define MAX_BLOCK_SIZE_STR "2 MiB"
72
-
73
static void set_blocksize(Object *obj, Visitor *v, const char *name,
74
void *opaque, Error **errp)
75
{
76
@@ -XXX,XX +XXX,XX @@ static void set_blocksize(Object *obj, Visitor *v, const char *name,
77
Property *prop = opaque;
78
uint32_t *ptr = qdev_get_prop_ptr(dev, prop);
79
uint64_t value;
80
+ Error *local_err = NULL;
81
82
if (dev->realized) {
83
qdev_prop_set_after_realize(dev, name, errp);
84
@@ -XXX,XX +XXX,XX @@ static void set_blocksize(Object *obj, Visitor *v, const char *name,
85
if (!visit_type_size(v, name, &value, errp)) {
86
return;
87
}
88
- /* value of 0 means "unset" */
89
- if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) {
90
- error_setg(errp,
91
- "Property %s.%s doesn't take value %" PRIu64
92
- " (minimum: " MIN_BLOCK_SIZE_STR
93
- ", maximum: " MAX_BLOCK_SIZE_STR ")",
94
- dev->id ? : "", name, value);
95
+ check_block_size(dev->id ? : "", name, value, &local_err);
96
+ if (local_err) {
97
+ error_propagate(errp, local_err);
98
return;
99
}
100
-
101
- /* We rely on power-of-2 blocksizes for bitmasks */
102
- if ((value & (value - 1)) != 0) {
103
- error_setg(errp,
104
- "Property %s.%s doesn't take value '%" PRId64 "', "
105
- "it's not a power of 2", dev->id ?: "", name, (int64_t)value);
106
- return;
107
- }
108
-
109
*ptr = value;
110
}
111
112
diff --git a/util/block-helpers.c b/util/block-helpers.c
113
new file mode 100644
114
index XXXXXXX..XXXXXXX
115
--- /dev/null
116
+++ b/util/block-helpers.c
104
@@ -XXX,XX +XXX,XX @@
117
@@ -XXX,XX +XXX,XX @@
105
+/*
118
+/*
106
+ * Helper functionality for some process progress tracking.
119
+ * Block utility functions
107
+ *
120
+ *
108
+ * Copyright (c) 2011 IBM Corp.
121
+ * Copyright IBM, Corp. 2011
109
+ * Copyright (c) 2012, 2018 Red Hat, Inc.
122
+ * Copyright (c) 2020 Coiby Xu <coiby.xu@gmail.com>
110
+ * Copyright (c) 2020 Virtuozzo International GmbH
111
+ *
123
+ *
112
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
124
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
113
+ * of this software and associated documentation files (the "Software"), to deal
125
+ * See the COPYING file in the top-level directory.
114
+ * in the Software without restriction, including without limitation the rights
115
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
116
+ * copies of the Software, and to permit persons to whom the Software is
117
+ * furnished to do so, subject to the following conditions:
118
+ *
119
+ * The above copyright notice and this permission notice shall be included in
120
+ * all copies or substantial portions of the Software.
121
+ *
122
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
123
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
124
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
125
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
126
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
127
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
128
+ * THE SOFTWARE.
129
+ */
126
+ */
130
+
127
+
131
+#ifndef QEMU_PROGRESS_METER_H
128
+#include "qemu/osdep.h"
132
+#define QEMU_PROGRESS_METER_H
129
+#include "qapi/error.h"
130
+#include "qapi/qmp/qerror.h"
131
+#include "block-helpers.h"
133
+
132
+
134
+typedef struct ProgressMeter {
133
+/**
135
+ /**
134
+ * check_block_size:
136
+ * Current progress. The unit is arbitrary as long as the ratio between
135
+ * @id: The unique ID of the object
137
+ * current and total represents the estimated percentage
136
+ * @name: The name of the property being validated
138
+ * of work already done.
137
+ * @value: The block size in bytes
139
+ */
138
+ * @errp: A pointer to an area to store an error
140
+ uint64_t current;
139
+ *
140
+ * This function checks that the block size meets the following conditions:
141
+ * 1. At least MIN_BLOCK_SIZE
142
+ * 2. No larger than MAX_BLOCK_SIZE
143
+ * 3. A power of 2
144
+ */
145
+void check_block_size(const char *id, const char *name, int64_t value,
146
+ Error **errp)
147
+{
148
+ /* value of 0 means "unset" */
149
+ if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) {
150
+ error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE,
151
+ id, name, value, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
152
+ return;
153
+ }
141
+
154
+
142
+ /** Estimated current value at the completion of the process */
155
+ /* We rely on power-of-2 blocksizes for bitmasks */
143
+ uint64_t total;
156
+ if ((value & (value - 1)) != 0) {
144
+} ProgressMeter;
157
+ error_setg(errp,
145
+
158
+ "Property %s.%s doesn't take value '%" PRId64
146
+static inline void progress_work_done(ProgressMeter *pm, uint64_t done)
159
+ "', it's not a power of 2",
147
+{
160
+ id, name, value);
148
+ pm->current += done;
161
+ return;
162
+ }
149
+}
163
+}
150
+
164
diff --git a/util/meson.build b/util/meson.build
151
+static inline void progress_set_remaining(ProgressMeter *pm, uint64_t remaining)
152
+{
153
+ pm->total = pm->current + remaining;
154
+}
155
+
156
+static inline void progress_increase_remaining(ProgressMeter *pm,
157
+ uint64_t delta)
158
+{
159
+ pm->total += delta;
160
+}
161
+
162
+#endif /* QEMU_PROGRESS_METER_H */
163
diff --git a/job-qmp.c b/job-qmp.c
164
index XXXXXXX..XXXXXXX 100644
165
index XXXXXXX..XXXXXXX 100644
165
--- a/job-qmp.c
166
--- a/util/meson.build
166
+++ b/job-qmp.c
167
+++ b/util/meson.build
167
@@ -XXX,XX +XXX,XX @@ static JobInfo *job_query_single(Job *job, Error **errp)
168
@@ -XXX,XX +XXX,XX @@ if have_block
168
.id = g_strdup(job->id),
169
util_ss.add(files('nvdimm-utils.c'))
169
.type = job_type(job),
170
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
170
.status = job->status,
171
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-server.c'))
171
- .current_progress = job->progress_current,
172
+ util_ss.add(files('block-helpers.c'))
172
- .total_progress = job->progress_total,
173
util_ss.add(files('qemu-coroutine-sleep.c'))
173
+ .current_progress = job->progress.current,
174
util_ss.add(files('qemu-co-shared-resource.c'))
174
+ .total_progress = job->progress.total,
175
util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
175
.has_error = !!job->err,
176
.error = job->err ? \
177
g_strdup(error_get_pretty(job->err)) : NULL,
178
diff --git a/job.c b/job.c
179
index XXXXXXX..XXXXXXX 100644
180
--- a/job.c
181
+++ b/job.c
182
@@ -XXX,XX +XXX,XX @@ void job_unref(Job *job)
183
184
void job_progress_update(Job *job, uint64_t done)
185
{
186
- job->progress_current += done;
187
+ progress_work_done(&job->progress, done);
188
}
189
190
void job_progress_set_remaining(Job *job, uint64_t remaining)
191
{
192
- job->progress_total = job->progress_current + remaining;
193
+ progress_set_remaining(&job->progress, remaining);
194
}
195
196
void job_progress_increase_remaining(Job *job, uint64_t delta)
197
{
198
- job->progress_total += delta;
199
+ progress_increase_remaining(&job->progress, delta);
200
}
201
202
void job_event_cancelled(Job *job)
203
diff --git a/qemu-img.c b/qemu-img.c
204
index XXXXXXX..XXXXXXX 100644
205
--- a/qemu-img.c
206
+++ b/qemu-img.c
207
@@ -XXX,XX +XXX,XX @@ static void run_block_job(BlockJob *job, Error **errp)
208
do {
209
float progress = 0.0f;
210
aio_poll(aio_context, true);
211
- if (job->job.progress_total) {
212
- progress = (float)job->job.progress_current /
213
- job->job.progress_total * 100.f;
214
+ if (job->job.progress.total) {
215
+ progress = (float)job->job.progress.current /
216
+ job->job.progress.total * 100.f;
217
}
218
qemu_progress_print(progress, 0);
219
} while (!job_is_ready(&job->job) && !job_is_completed(&job->job));
220
--
176
--
221
2.24.1
177
2.26.2
222
178
223
diff view generated by jsdifflib
New patch
1
From: Coiby Xu <coiby.xu@gmail.com>
1
2
3
By making use of libvhost-user, block device drive can be shared to
4
the connected vhost-user client. Only one client can connect to the
5
server one time.
6
7
Since vhost-user-server needs a block drive to be created first, delay
8
the creation of this object.
9
10
Suggested-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
15
Message-id: 20200918080912.321299-6-coiby.xu@gmail.com
16
[Shorten "vhost_user_blk_server" string to "vhost_user_blk" to avoid the
17
following compiler warning:
18
../block/export/vhost-user-blk-server.c:178:50: error: ‘%s’ directive output truncated writing 21 bytes into a region of size 20 [-Werror=format-truncation=]
19
and fix "Invalid size %ld ..." ssize_t format string arguments for
20
32-bit hosts.
21
--Stefan]
22
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
23
---
24
block/export/vhost-user-blk-server.h | 36 ++
25
block/export/vhost-user-blk-server.c | 661 +++++++++++++++++++++++++++
26
softmmu/vl.c | 4 +
27
block/meson.build | 1 +
28
4 files changed, 702 insertions(+)
29
create mode 100644 block/export/vhost-user-blk-server.h
30
create mode 100644 block/export/vhost-user-blk-server.c
31
32
diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h
33
new file mode 100644
34
index XXXXXXX..XXXXXXX
35
--- /dev/null
36
+++ b/block/export/vhost-user-blk-server.h
37
@@ -XXX,XX +XXX,XX @@
38
+/*
39
+ * Sharing QEMU block devices via vhost-user protocal
40
+ *
41
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
42
+ * Copyright (c) 2020 Red Hat, Inc.
43
+ *
44
+ * This work is licensed under the terms of the GNU GPL, version 2 or
45
+ * later. See the COPYING file in the top-level directory.
46
+ */
47
+
48
+#ifndef VHOST_USER_BLK_SERVER_H
49
+#define VHOST_USER_BLK_SERVER_H
50
+#include "util/vhost-user-server.h"
51
+
52
+typedef struct VuBlockDev VuBlockDev;
53
+#define TYPE_VHOST_USER_BLK_SERVER "vhost-user-blk-server"
54
+#define VHOST_USER_BLK_SERVER(obj) \
55
+ OBJECT_CHECK(VuBlockDev, obj, TYPE_VHOST_USER_BLK_SERVER)
56
+
57
+/* vhost user block device */
58
+struct VuBlockDev {
59
+ Object parent_obj;
60
+ char *node_name;
61
+ SocketAddress *addr;
62
+ AioContext *ctx;
63
+ VuServer vu_server;
64
+ bool running;
65
+ uint32_t blk_size;
66
+ BlockBackend *backend;
67
+ QIOChannelSocket *sioc;
68
+ QTAILQ_ENTRY(VuBlockDev) next;
69
+ struct virtio_blk_config blkcfg;
70
+ bool writable;
71
+};
72
+
73
+#endif /* VHOST_USER_BLK_SERVER_H */
74
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
75
new file mode 100644
76
index XXXXXXX..XXXXXXX
77
--- /dev/null
78
+++ b/block/export/vhost-user-blk-server.c
79
@@ -XXX,XX +XXX,XX @@
80
+/*
81
+ * Sharing QEMU block devices via vhost-user protocal
82
+ *
83
+ * Parts of the code based on nbd/server.c.
84
+ *
85
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
86
+ * Copyright (c) 2020 Red Hat, Inc.
87
+ *
88
+ * This work is licensed under the terms of the GNU GPL, version 2 or
89
+ * later. See the COPYING file in the top-level directory.
90
+ */
91
+#include "qemu/osdep.h"
92
+#include "block/block.h"
93
+#include "vhost-user-blk-server.h"
94
+#include "qapi/error.h"
95
+#include "qom/object_interfaces.h"
96
+#include "sysemu/block-backend.h"
97
+#include "util/block-helpers.h"
98
+
99
+enum {
100
+ VHOST_USER_BLK_MAX_QUEUES = 1,
101
+};
102
+struct virtio_blk_inhdr {
103
+ unsigned char status;
104
+};
105
+
106
+typedef struct VuBlockReq {
107
+ VuVirtqElement *elem;
108
+ int64_t sector_num;
109
+ size_t size;
110
+ struct virtio_blk_inhdr *in;
111
+ struct virtio_blk_outhdr out;
112
+ VuServer *server;
113
+ struct VuVirtq *vq;
114
+} VuBlockReq;
115
+
116
+static void vu_block_req_complete(VuBlockReq *req)
117
+{
118
+ VuDev *vu_dev = &req->server->vu_dev;
119
+
120
+ /* IO size with 1 extra status byte */
121
+ vu_queue_push(vu_dev, req->vq, req->elem, req->size + 1);
122
+ vu_queue_notify(vu_dev, req->vq);
123
+
124
+ if (req->elem) {
125
+ free(req->elem);
126
+ }
127
+
128
+ g_free(req);
129
+}
130
+
131
+static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
132
+{
133
+ return container_of(server, VuBlockDev, vu_server);
134
+}
135
+
136
+static int coroutine_fn
137
+vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
138
+ uint32_t iovcnt, uint32_t type)
139
+{
140
+ struct virtio_blk_discard_write_zeroes desc;
141
+ ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
142
+ if (unlikely(size != sizeof(desc))) {
143
+ error_report("Invalid size %zd, expect %zu", size, sizeof(desc));
144
+ return -EINVAL;
145
+ }
146
+
147
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
148
+ uint64_t range[2] = { le64_to_cpu(desc.sector) << 9,
149
+ le32_to_cpu(desc.num_sectors) << 9 };
150
+ if (type == VIRTIO_BLK_T_DISCARD) {
151
+ if (blk_co_pdiscard(vdev_blk->backend, range[0], range[1]) == 0) {
152
+ return 0;
153
+ }
154
+ } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
155
+ if (blk_co_pwrite_zeroes(vdev_blk->backend,
156
+ range[0], range[1], 0) == 0) {
157
+ return 0;
158
+ }
159
+ }
160
+
161
+ return -EINVAL;
162
+}
163
+
164
+static void coroutine_fn vu_block_flush(VuBlockReq *req)
165
+{
166
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
167
+ BlockBackend *backend = vdev_blk->backend;
168
+ blk_co_flush(backend);
169
+}
170
+
171
+struct req_data {
172
+ VuServer *server;
173
+ VuVirtq *vq;
174
+ VuVirtqElement *elem;
175
+};
176
+
177
+static void coroutine_fn vu_block_virtio_process_req(void *opaque)
178
+{
179
+ struct req_data *data = opaque;
180
+ VuServer *server = data->server;
181
+ VuVirtq *vq = data->vq;
182
+ VuVirtqElement *elem = data->elem;
183
+ uint32_t type;
184
+ VuBlockReq *req;
185
+
186
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
187
+ BlockBackend *backend = vdev_blk->backend;
188
+
189
+ struct iovec *in_iov = elem->in_sg;
190
+ struct iovec *out_iov = elem->out_sg;
191
+ unsigned in_num = elem->in_num;
192
+ unsigned out_num = elem->out_num;
193
+ /* refer to hw/block/virtio_blk.c */
194
+ if (elem->out_num < 1 || elem->in_num < 1) {
195
+ error_report("virtio-blk request missing headers");
196
+ free(elem);
197
+ return;
198
+ }
199
+
200
+ req = g_new0(VuBlockReq, 1);
201
+ req->server = server;
202
+ req->vq = vq;
203
+ req->elem = elem;
204
+
205
+ if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
206
+ sizeof(req->out)) != sizeof(req->out))) {
207
+ error_report("virtio-blk request outhdr too short");
208
+ goto err;
209
+ }
210
+
211
+ iov_discard_front(&out_iov, &out_num, sizeof(req->out));
212
+
213
+ if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
214
+ error_report("virtio-blk request inhdr too short");
215
+ goto err;
216
+ }
217
+
218
+ /* We always touch the last byte, so just see how big in_iov is. */
219
+ req->in = (void *)in_iov[in_num - 1].iov_base
220
+ + in_iov[in_num - 1].iov_len
221
+ - sizeof(struct virtio_blk_inhdr);
222
+ iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
223
+
224
+ type = le32_to_cpu(req->out.type);
225
+ switch (type & ~VIRTIO_BLK_T_BARRIER) {
226
+ case VIRTIO_BLK_T_IN:
227
+ case VIRTIO_BLK_T_OUT: {
228
+ ssize_t ret = 0;
229
+ bool is_write = type & VIRTIO_BLK_T_OUT;
230
+ req->sector_num = le64_to_cpu(req->out.sector);
231
+
232
+ int64_t offset = req->sector_num * vdev_blk->blk_size;
233
+ QEMUIOVector qiov;
234
+ if (is_write) {
235
+ qemu_iovec_init_external(&qiov, out_iov, out_num);
236
+ ret = blk_co_pwritev(backend, offset, qiov.size,
237
+ &qiov, 0);
238
+ } else {
239
+ qemu_iovec_init_external(&qiov, in_iov, in_num);
240
+ ret = blk_co_preadv(backend, offset, qiov.size,
241
+ &qiov, 0);
242
+ }
243
+ if (ret >= 0) {
244
+ req->in->status = VIRTIO_BLK_S_OK;
245
+ } else {
246
+ req->in->status = VIRTIO_BLK_S_IOERR;
247
+ }
248
+ break;
249
+ }
250
+ case VIRTIO_BLK_T_FLUSH:
251
+ vu_block_flush(req);
252
+ req->in->status = VIRTIO_BLK_S_OK;
253
+ break;
254
+ case VIRTIO_BLK_T_GET_ID: {
255
+ size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
256
+ VIRTIO_BLK_ID_BYTES);
257
+ snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
258
+ req->in->status = VIRTIO_BLK_S_OK;
259
+ req->size = elem->in_sg[0].iov_len;
260
+ break;
261
+ }
262
+ case VIRTIO_BLK_T_DISCARD:
263
+ case VIRTIO_BLK_T_WRITE_ZEROES: {
264
+ int rc;
265
+ rc = vu_block_discard_write_zeroes(req, &elem->out_sg[1],
266
+ out_num, type);
267
+ if (rc == 0) {
268
+ req->in->status = VIRTIO_BLK_S_OK;
269
+ } else {
270
+ req->in->status = VIRTIO_BLK_S_IOERR;
271
+ }
272
+ break;
273
+ }
274
+ default:
275
+ req->in->status = VIRTIO_BLK_S_UNSUPP;
276
+ break;
277
+ }
278
+
279
+ vu_block_req_complete(req);
280
+ return;
281
+
282
+err:
283
+ free(elem);
284
+ g_free(req);
285
+ return;
286
+}
287
+
288
+static void vu_block_process_vq(VuDev *vu_dev, int idx)
289
+{
290
+ VuServer *server;
291
+ VuVirtq *vq;
292
+ struct req_data *req_data;
293
+
294
+ server = container_of(vu_dev, VuServer, vu_dev);
295
+ assert(server);
296
+
297
+ vq = vu_get_queue(vu_dev, idx);
298
+ assert(vq);
299
+ VuVirtqElement *elem;
300
+ while (1) {
301
+ elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) +
302
+ sizeof(VuBlockReq));
303
+ if (elem) {
304
+ req_data = g_new0(struct req_data, 1);
305
+ req_data->server = server;
306
+ req_data->vq = vq;
307
+ req_data->elem = elem;
308
+ Coroutine *co = qemu_coroutine_create(vu_block_virtio_process_req,
309
+ req_data);
310
+ aio_co_enter(server->ioc->ctx, co);
311
+ } else {
312
+ break;
313
+ }
314
+ }
315
+}
316
+
317
+static void vu_block_queue_set_started(VuDev *vu_dev, int idx, bool started)
318
+{
319
+ VuVirtq *vq;
320
+
321
+ assert(vu_dev);
322
+
323
+ vq = vu_get_queue(vu_dev, idx);
324
+ vu_set_queue_handler(vu_dev, vq, started ? vu_block_process_vq : NULL);
325
+}
326
+
327
+static uint64_t vu_block_get_features(VuDev *dev)
328
+{
329
+ uint64_t features;
330
+ VuServer *server = container_of(dev, VuServer, vu_dev);
331
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
332
+ features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
333
+ 1ull << VIRTIO_BLK_F_SEG_MAX |
334
+ 1ull << VIRTIO_BLK_F_TOPOLOGY |
335
+ 1ull << VIRTIO_BLK_F_BLK_SIZE |
336
+ 1ull << VIRTIO_BLK_F_FLUSH |
337
+ 1ull << VIRTIO_BLK_F_DISCARD |
338
+ 1ull << VIRTIO_BLK_F_WRITE_ZEROES |
339
+ 1ull << VIRTIO_BLK_F_CONFIG_WCE |
340
+ 1ull << VIRTIO_F_VERSION_1 |
341
+ 1ull << VIRTIO_RING_F_INDIRECT_DESC |
342
+ 1ull << VIRTIO_RING_F_EVENT_IDX |
343
+ 1ull << VHOST_USER_F_PROTOCOL_FEATURES;
344
+
345
+ if (!vdev_blk->writable) {
346
+ features |= 1ull << VIRTIO_BLK_F_RO;
347
+ }
348
+
349
+ return features;
350
+}
351
+
352
+static uint64_t vu_block_get_protocol_features(VuDev *dev)
353
+{
354
+ return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
355
+ 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
356
+}
357
+
358
+static int
359
+vu_block_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
360
+{
361
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
362
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
363
+ memcpy(config, &vdev_blk->blkcfg, len);
364
+
365
+ return 0;
366
+}
367
+
368
+static int
369
+vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
370
+ uint32_t offset, uint32_t size, uint32_t flags)
371
+{
372
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
373
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
374
+ uint8_t wce;
375
+
376
+ /* don't support live migration */
377
+ if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
378
+ return -EINVAL;
379
+ }
380
+
381
+ if (offset != offsetof(struct virtio_blk_config, wce) ||
382
+ size != 1) {
383
+ return -EINVAL;
384
+ }
385
+
386
+ wce = *data;
387
+ vdev_blk->blkcfg.wce = wce;
388
+ blk_set_enable_write_cache(vdev_blk->backend, wce);
389
+ return 0;
390
+}
391
+
392
+/*
393
+ * When the client disconnects, it sends a VHOST_USER_NONE request
394
+ * and vu_process_message will simple call exit which cause the VM
395
+ * to exit abruptly.
396
+ * To avoid this issue, process VHOST_USER_NONE request ahead
397
+ * of vu_process_message.
398
+ *
399
+ */
400
+static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
401
+{
402
+ if (vmsg->request == VHOST_USER_NONE) {
403
+ dev->panic(dev, "disconnect");
404
+ return true;
405
+ }
406
+ return false;
407
+}
408
+
409
+static const VuDevIface vu_block_iface = {
410
+ .get_features = vu_block_get_features,
411
+ .queue_set_started = vu_block_queue_set_started,
412
+ .get_protocol_features = vu_block_get_protocol_features,
413
+ .get_config = vu_block_get_config,
414
+ .set_config = vu_block_set_config,
415
+ .process_msg = vu_block_process_msg,
416
+};
417
+
418
+static void blk_aio_attached(AioContext *ctx, void *opaque)
419
+{
420
+ VuBlockDev *vub_dev = opaque;
421
+ aio_context_acquire(ctx);
422
+ vhost_user_server_set_aio_context(&vub_dev->vu_server, ctx);
423
+ aio_context_release(ctx);
424
+}
425
+
426
+static void blk_aio_detach(void *opaque)
427
+{
428
+ VuBlockDev *vub_dev = opaque;
429
+ AioContext *ctx = vub_dev->vu_server.ctx;
430
+ aio_context_acquire(ctx);
431
+ vhost_user_server_set_aio_context(&vub_dev->vu_server, NULL);
432
+ aio_context_release(ctx);
433
+}
434
+
435
+static void
436
+vu_block_initialize_config(BlockDriverState *bs,
437
+ struct virtio_blk_config *config, uint32_t blk_size)
438
+{
439
+ config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
440
+ config->blk_size = blk_size;
441
+ config->size_max = 0;
442
+ config->seg_max = 128 - 2;
443
+ config->min_io_size = 1;
444
+ config->opt_io_size = 1;
445
+ config->num_queues = VHOST_USER_BLK_MAX_QUEUES;
446
+ config->max_discard_sectors = 32768;
447
+ config->max_discard_seg = 1;
448
+ config->discard_sector_alignment = config->blk_size >> 9;
449
+ config->max_write_zeroes_sectors = 32768;
450
+ config->max_write_zeroes_seg = 1;
451
+}
452
+
453
+static VuBlockDev *vu_block_init(VuBlockDev *vu_block_device, Error **errp)
454
+{
455
+
456
+ BlockBackend *blk;
457
+ Error *local_error = NULL;
458
+ const char *node_name = vu_block_device->node_name;
459
+ bool writable = vu_block_device->writable;
460
+ uint64_t perm = BLK_PERM_CONSISTENT_READ;
461
+ int ret;
462
+
463
+ AioContext *ctx;
464
+
465
+ BlockDriverState *bs = bdrv_lookup_bs(node_name, node_name, &local_error);
466
+
467
+ if (!bs) {
468
+ error_propagate(errp, local_error);
469
+ return NULL;
470
+ }
471
+
472
+ if (bdrv_is_read_only(bs)) {
473
+ writable = false;
474
+ }
475
+
476
+ if (writable) {
477
+ perm |= BLK_PERM_WRITE;
478
+ }
479
+
480
+ ctx = bdrv_get_aio_context(bs);
481
+ aio_context_acquire(ctx);
482
+ bdrv_invalidate_cache(bs, NULL);
483
+ aio_context_release(ctx);
484
+
485
+ /*
486
+ * Don't allow resize while the vhost user server is running,
487
+ * otherwise we don't care what happens with the node.
488
+ */
489
+ blk = blk_new(bdrv_get_aio_context(bs), perm,
490
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
491
+ BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
492
+ ret = blk_insert_bs(blk, bs, errp);
493
+
494
+ if (ret < 0) {
495
+ goto fail;
496
+ }
497
+
498
+ blk_set_enable_write_cache(blk, false);
499
+
500
+ blk_set_allow_aio_context_change(blk, true);
501
+
502
+ vu_block_device->blkcfg.wce = 0;
503
+ vu_block_device->backend = blk;
504
+ if (!vu_block_device->blk_size) {
505
+ vu_block_device->blk_size = BDRV_SECTOR_SIZE;
506
+ }
507
+ vu_block_device->blkcfg.blk_size = vu_block_device->blk_size;
508
+ blk_set_guest_block_size(blk, vu_block_device->blk_size);
509
+ vu_block_initialize_config(bs, &vu_block_device->blkcfg,
510
+ vu_block_device->blk_size);
511
+ return vu_block_device;
512
+
513
+fail:
514
+ blk_unref(blk);
515
+ return NULL;
516
+}
517
+
518
+static void vu_block_deinit(VuBlockDev *vu_block_device)
519
+{
520
+ if (vu_block_device->backend) {
521
+ blk_remove_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
522
+ blk_aio_detach, vu_block_device);
523
+ }
524
+
525
+ blk_unref(vu_block_device->backend);
526
+}
527
+
528
+static void vhost_user_blk_server_stop(VuBlockDev *vu_block_device)
529
+{
530
+ vhost_user_server_stop(&vu_block_device->vu_server);
531
+ vu_block_deinit(vu_block_device);
532
+}
533
+
534
+static void vhost_user_blk_server_start(VuBlockDev *vu_block_device,
535
+ Error **errp)
536
+{
537
+ AioContext *ctx;
538
+ SocketAddress *addr = vu_block_device->addr;
539
+
540
+ if (!vu_block_init(vu_block_device, errp)) {
541
+ return;
542
+ }
543
+
544
+ ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
545
+
546
+ if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx,
547
+ VHOST_USER_BLK_MAX_QUEUES,
548
+ NULL, &vu_block_iface,
549
+ errp)) {
550
+ goto error;
551
+ }
552
+
553
+ blk_add_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
554
+ blk_aio_detach, vu_block_device);
555
+ vu_block_device->running = true;
556
+ return;
557
+
558
+ error:
559
+ vu_block_deinit(vu_block_device);
560
+}
561
+
562
+static bool vu_prop_modifiable(VuBlockDev *vus, Error **errp)
563
+{
564
+ if (vus->running) {
565
+ error_setg(errp, "The property can't be modified "
566
+ "while the server is running");
567
+ return false;
568
+ }
569
+ return true;
570
+}
571
+
572
+static void vu_set_node_name(Object *obj, const char *value, Error **errp)
573
+{
574
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
575
+
576
+ if (!vu_prop_modifiable(vus, errp)) {
577
+ return;
578
+ }
579
+
580
+ if (vus->node_name) {
581
+ g_free(vus->node_name);
582
+ }
583
+
584
+ vus->node_name = g_strdup(value);
585
+}
586
+
587
+static char *vu_get_node_name(Object *obj, Error **errp)
588
+{
589
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
590
+ return g_strdup(vus->node_name);
591
+}
592
+
593
+static void free_socket_addr(SocketAddress *addr)
594
+{
595
+ g_free(addr->u.q_unix.path);
596
+ g_free(addr);
597
+}
598
+
599
+static void vu_set_unix_socket(Object *obj, const char *value,
600
+ Error **errp)
601
+{
602
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
603
+
604
+ if (!vu_prop_modifiable(vus, errp)) {
605
+ return;
606
+ }
607
+
608
+ if (vus->addr) {
609
+ free_socket_addr(vus->addr);
610
+ }
611
+
612
+ SocketAddress *addr = g_new0(SocketAddress, 1);
613
+ addr->type = SOCKET_ADDRESS_TYPE_UNIX;
614
+ addr->u.q_unix.path = g_strdup(value);
615
+ vus->addr = addr;
616
+}
617
+
618
+static char *vu_get_unix_socket(Object *obj, Error **errp)
619
+{
620
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
621
+ return g_strdup(vus->addr->u.q_unix.path);
622
+}
623
+
624
+static bool vu_get_block_writable(Object *obj, Error **errp)
625
+{
626
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
627
+ return vus->writable;
628
+}
629
+
630
+static void vu_set_block_writable(Object *obj, bool value, Error **errp)
631
+{
632
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
633
+
634
+ if (!vu_prop_modifiable(vus, errp)) {
635
+ return;
636
+ }
637
+
638
+ vus->writable = value;
639
+}
640
+
641
+static void vu_get_blk_size(Object *obj, Visitor *v, const char *name,
642
+ void *opaque, Error **errp)
643
+{
644
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
645
+ uint32_t value = vus->blk_size;
646
+
647
+ visit_type_uint32(v, name, &value, errp);
648
+}
649
+
650
+static void vu_set_blk_size(Object *obj, Visitor *v, const char *name,
651
+ void *opaque, Error **errp)
652
+{
653
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
654
+
655
+ Error *local_err = NULL;
656
+ uint32_t value;
657
+
658
+ if (!vu_prop_modifiable(vus, errp)) {
659
+ return;
660
+ }
661
+
662
+ visit_type_uint32(v, name, &value, &local_err);
663
+ if (local_err) {
664
+ goto out;
665
+ }
666
+
667
+ check_block_size(object_get_typename(obj), name, value, &local_err);
668
+ if (local_err) {
669
+ goto out;
670
+ }
671
+
672
+ vus->blk_size = value;
673
+
674
+out:
675
+ error_propagate(errp, local_err);
676
+}
677
+
678
+static void vhost_user_blk_server_instance_finalize(Object *obj)
679
+{
680
+ VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
681
+
682
+ vhost_user_blk_server_stop(vub);
683
+
684
+ /*
685
+ * Unlike object_property_add_str, object_class_property_add_str
686
+ * doesn't have a release method. Thus manual memory freeing is
687
+ * needed.
688
+ */
689
+ free_socket_addr(vub->addr);
690
+ g_free(vub->node_name);
691
+}
692
+
693
+static void vhost_user_blk_server_complete(UserCreatable *obj, Error **errp)
694
+{
695
+ VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
696
+
697
+ vhost_user_blk_server_start(vub, errp);
698
+}
699
+
700
+static void vhost_user_blk_server_class_init(ObjectClass *klass,
701
+ void *class_data)
702
+{
703
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
704
+ ucc->complete = vhost_user_blk_server_complete;
705
+
706
+ object_class_property_add_bool(klass, "writable",
707
+ vu_get_block_writable,
708
+ vu_set_block_writable);
709
+
710
+ object_class_property_add_str(klass, "node-name",
711
+ vu_get_node_name,
712
+ vu_set_node_name);
713
+
714
+ object_class_property_add_str(klass, "unix-socket",
715
+ vu_get_unix_socket,
716
+ vu_set_unix_socket);
717
+
718
+ object_class_property_add(klass, "logical-block-size", "uint32",
719
+ vu_get_blk_size, vu_set_blk_size,
720
+ NULL, NULL);
721
+}
722
+
723
+static const TypeInfo vhost_user_blk_server_info = {
724
+ .name = TYPE_VHOST_USER_BLK_SERVER,
725
+ .parent = TYPE_OBJECT,
726
+ .instance_size = sizeof(VuBlockDev),
727
+ .instance_finalize = vhost_user_blk_server_instance_finalize,
728
+ .class_init = vhost_user_blk_server_class_init,
729
+ .interfaces = (InterfaceInfo[]) {
730
+ {TYPE_USER_CREATABLE},
731
+ {}
732
+ },
733
+};
734
+
735
+static void vhost_user_blk_server_register_types(void)
736
+{
737
+ type_register_static(&vhost_user_blk_server_info);
738
+}
739
+
740
+type_init(vhost_user_blk_server_register_types)
741
diff --git a/softmmu/vl.c b/softmmu/vl.c
742
index XXXXXXX..XXXXXXX 100644
743
--- a/softmmu/vl.c
744
+++ b/softmmu/vl.c
745
@@ -XXX,XX +XXX,XX @@ static bool object_create_initial(const char *type, QemuOpts *opts)
746
}
747
#endif
748
749
+ /* Reason: vhost-user-blk-server property "node-name" */
750
+ if (g_str_equal(type, "vhost-user-blk-server")) {
751
+ return false;
752
+ }
753
/*
754
* Reason: filter-* property "netdev" etc.
755
*/
756
diff --git a/block/meson.build b/block/meson.build
757
index XXXXXXX..XXXXXXX 100644
758
--- a/block/meson.build
759
+++ b/block/meson.build
760
@@ -XXX,XX +XXX,XX @@ block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c')
761
block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit])
762
block_ss.add(when: 'CONFIG_LIBISCSI', if_true: files('iscsi-opts.c'))
763
block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
764
+block_ss.add(when: 'CONFIG_LINUX', if_true: files('export/vhost-user-blk-server.c', '../contrib/libvhost-user/libvhost-user.c'))
765
block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c'))
766
block_ss.add(when: 'CONFIG_SHEEPDOG', if_true: files('sheepdog.c'))
767
block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c'))
768
--
769
2.26.2
770
diff view generated by jsdifflib
New patch
1
From: Coiby Xu <coiby.xu@gmail.com>
1
2
3
Suggested-by: Stefano Garzarella <sgarzare@redhat.com>
4
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
7
Message-id: 20200918080912.321299-8-coiby.xu@gmail.com
8
[Removed reference to vhost-user-blk-test.c, it will be sent in a
9
separate pull request.
10
--Stefan]
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
MAINTAINERS | 7 +++++++
14
1 file changed, 7 insertions(+)
15
16
diff --git a/MAINTAINERS b/MAINTAINERS
17
index XXXXXXX..XXXXXXX 100644
18
--- a/MAINTAINERS
19
+++ b/MAINTAINERS
20
@@ -XXX,XX +XXX,XX @@ L: qemu-block@nongnu.org
21
S: Supported
22
F: tests/image-fuzzer/
23
24
+Vhost-user block device backend server
25
+M: Coiby Xu <Coiby.Xu@gmail.com>
26
+S: Maintained
27
+F: block/export/vhost-user-blk-server.c
28
+F: util/vhost-user-server.c
29
+F: tests/qtest/libqos/vhost-user-blk.c
30
+
31
Replication
32
M: Wen Congyang <wencongyang2@huawei.com>
33
M: Xie Changlong <xiechanglong.d@gmail.com>
34
--
35
2.26.2
36
diff view generated by jsdifflib
New patch
1
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2
Message-id: 20200924151549.913737-3-stefanha@redhat.com
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
---
5
util/vhost-user-server.c | 2 +-
6
1 file changed, 1 insertion(+), 1 deletion(-)
1
7
8
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/util/vhost-user-server.c
11
+++ b/util/vhost-user-server.c
12
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
13
return false;
14
}
15
16
- /* zero out unspecified fileds */
17
+ /* zero out unspecified fields */
18
*server = (VuServer) {
19
.listener = listener,
20
.vu_iface = vu_iface,
21
--
22
2.26.2
23
diff view generated by jsdifflib
New patch
1
We already have access to the value with the correct type (ioc and sioc
2
are the same QIOChannel).
1
3
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Message-id: 20200924151549.913737-4-stefanha@redhat.com
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
8
util/vhost-user-server.c | 2 +-
9
1 file changed, 1 insertion(+), 1 deletion(-)
10
11
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/util/vhost-user-server.c
14
+++ b/util/vhost-user-server.c
15
@@ -XXX,XX +XXX,XX @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
16
server->ioc = QIO_CHANNEL(sioc);
17
object_ref(OBJECT(server->ioc));
18
qio_channel_attach_aio_context(server->ioc, server->ctx);
19
- qio_channel_set_blocking(QIO_CHANNEL(server->sioc), false, NULL);
20
+ qio_channel_set_blocking(server->ioc, false, NULL);
21
vu_client_start(server);
22
}
23
24
--
25
2.26.2
26
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Explicitly deleting watches is not necessary since libvhost-user calls
2
2
remove_watch() during vu_deinit(). Add an assertion to check this
3
The qcow2 .bdrv_measure() code calculates the crypto payload offset.
3
though.
4
This logic really belongs in crypto/block.c where it can be reused by
5
other image formats.
6
7
The "luks" block driver will need this same logic in order to implement
8
.bdrv_measure(), so extract the qcrypto_block_calculate_payload_offset()
9
function now.
10
4
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Reviewed-by: Max Reitz <mreitz@redhat.com>
6
Message-id: 20200924151549.913737-5-stefanha@redhat.com
13
Message-Id: <20200221112522.1497712-2-stefanha@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
---
8
---
16
block/qcow2.c | 74 +++++++++++-------------------------------
9
util/vhost-user-server.c | 19 ++++---------------
17
crypto/block.c | 36 ++++++++++++++++++++
10
1 file changed, 4 insertions(+), 15 deletions(-)
18
include/crypto/block.h | 22 +++++++++++++
19
3 files changed, 77 insertions(+), 55 deletions(-)
20
11
21
diff --git a/block/qcow2.c b/block/qcow2.c
12
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
22
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
23
--- a/block/qcow2.c
14
--- a/util/vhost-user-server.c
24
+++ b/block/qcow2.c
15
+++ b/util/vhost-user-server.c
25
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int qcow2_co_flush_to_os(BlockDriverState *bs)
16
@@ -XXX,XX +XXX,XX @@ static void close_client(VuServer *server)
26
return ret;
17
/* When this is set vu_client_trip will stop new processing vhost-user message */
27
}
18
server->sioc = NULL;
28
19
29
-static ssize_t qcow2_measure_crypto_hdr_init_func(QCryptoBlock *block,
20
- VuFdWatch *vu_fd_watch, *next;
30
- size_t headerlen, void *opaque, Error **errp)
21
- QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
31
-{
22
- aio_set_fd_handler(server->ioc->ctx, vu_fd_watch->fd, true, NULL,
32
- size_t *headerlenp = opaque;
23
- NULL, NULL, NULL);
33
-
34
- /* Stash away the payload size */
35
- *headerlenp = headerlen;
36
- return 0;
37
-}
38
-
39
-static ssize_t qcow2_measure_crypto_hdr_write_func(QCryptoBlock *block,
40
- size_t offset, const uint8_t *buf, size_t buflen,
41
- void *opaque, Error **errp)
42
-{
43
- /* Discard the bytes, we're not actually writing to an image */
44
- return buflen;
45
-}
46
-
47
-/* Determine the number of bytes for the LUKS payload */
48
-static bool qcow2_measure_luks_headerlen(QemuOpts *opts, size_t *len,
49
- Error **errp)
50
-{
51
- QDict *opts_qdict;
52
- QDict *cryptoopts_qdict;
53
- QCryptoBlockCreateOptions *cryptoopts;
54
- QCryptoBlock *crypto;
55
-
56
- /* Extract "encrypt." options into a qdict */
57
- opts_qdict = qemu_opts_to_qdict(opts, NULL);
58
- qdict_extract_subqdict(opts_qdict, &cryptoopts_qdict, "encrypt.");
59
- qobject_unref(opts_qdict);
60
-
61
- /* Build QCryptoBlockCreateOptions object from qdict */
62
- qdict_put_str(cryptoopts_qdict, "format", "luks");
63
- cryptoopts = block_crypto_create_opts_init(cryptoopts_qdict, errp);
64
- qobject_unref(cryptoopts_qdict);
65
- if (!cryptoopts) {
66
- return false;
67
- }
24
- }
68
-
25
-
69
- /* Fake LUKS creation in order to determine the payload size */
26
- while (!QTAILQ_EMPTY(&server->vu_fd_watches)) {
70
- crypto = qcrypto_block_create(cryptoopts, "encrypt.",
27
- QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
71
- qcow2_measure_crypto_hdr_init_func,
28
- if (!vu_fd_watch->processing) {
72
- qcow2_measure_crypto_hdr_write_func,
29
- QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
73
- len, errp);
30
- g_free(vu_fd_watch);
74
- qapi_free_QCryptoBlockCreateOptions(cryptoopts);
31
- }
75
- if (!crypto) {
32
- }
76
- return false;
77
- }
33
- }
78
-
34
-
79
- qcrypto_block_free(crypto);
35
while (server->processing_msg) {
80
- return true;
36
if (server->ioc->read_coroutine) {
81
-}
37
server->ioc->read_coroutine = NULL;
82
-
38
@@ -XXX,XX +XXX,XX @@ static void close_client(VuServer *server)
83
static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
39
}
84
Error **errp)
40
85
{
41
vu_deinit(&server->vu_dev);
86
@@ -XXX,XX +XXX,XX @@ static BlockMeasureInfo *qcow2_measure(QemuOpts *opts, BlockDriverState *in_bs,
87
g_free(optstr);
88
89
if (has_luks) {
90
+ g_autoptr(QCryptoBlockCreateOptions) create_opts = NULL;
91
+ QDict *opts_qdict;
92
+ QDict *cryptoopts;
93
size_t headerlen;
94
95
- if (!qcow2_measure_luks_headerlen(opts, &headerlen, &local_err)) {
96
+ opts_qdict = qemu_opts_to_qdict(opts, NULL);
97
+ qdict_extract_subqdict(opts_qdict, &cryptoopts, "encrypt.");
98
+ qobject_unref(opts_qdict);
99
+
42
+
100
+ qdict_put_str(cryptoopts, "format", "luks");
43
+ /* vu_deinit() should have called remove_watch() */
44
+ assert(QTAILQ_EMPTY(&server->vu_fd_watches));
101
+
45
+
102
+ create_opts = block_crypto_create_opts_init(cryptoopts, errp);
46
object_unref(OBJECT(sioc));
103
+ qobject_unref(cryptoopts);
47
object_unref(OBJECT(server->ioc));
104
+ if (!create_opts) {
105
+ goto err;
106
+ }
107
+
108
+ if (!qcrypto_block_calculate_payload_offset(create_opts,
109
+ "encrypt.",
110
+ &headerlen,
111
+ &local_err)) {
112
goto err;
113
}
114
115
diff --git a/crypto/block.c b/crypto/block.c
116
index XXXXXXX..XXXXXXX 100644
117
--- a/crypto/block.c
118
+++ b/crypto/block.c
119
@@ -XXX,XX +XXX,XX @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
120
}
48
}
121
122
123
+static ssize_t qcrypto_block_headerlen_hdr_init_func(QCryptoBlock *block,
124
+ size_t headerlen, void *opaque, Error **errp)
125
+{
126
+ size_t *headerlenp = opaque;
127
+
128
+ /* Stash away the payload size */
129
+ *headerlenp = headerlen;
130
+ return 0;
131
+}
132
+
133
+
134
+static ssize_t qcrypto_block_headerlen_hdr_write_func(QCryptoBlock *block,
135
+ size_t offset, const uint8_t *buf, size_t buflen,
136
+ void *opaque, Error **errp)
137
+{
138
+ /* Discard the bytes, we're not actually writing to an image */
139
+ return buflen;
140
+}
141
+
142
+
143
+bool
144
+qcrypto_block_calculate_payload_offset(QCryptoBlockCreateOptions *create_opts,
145
+ const char *optprefix,
146
+ size_t *len,
147
+ Error **errp)
148
+{
149
+ /* Fake LUKS creation in order to determine the payload size */
150
+ g_autoptr(QCryptoBlock) crypto =
151
+ qcrypto_block_create(create_opts, optprefix,
152
+ qcrypto_block_headerlen_hdr_init_func,
153
+ qcrypto_block_headerlen_hdr_write_func,
154
+ len, errp);
155
+ return crypto != NULL;
156
+}
157
+
158
+
159
QCryptoBlockInfo *qcrypto_block_get_info(QCryptoBlock *block,
160
Error **errp)
161
{
162
diff --git a/include/crypto/block.h b/include/crypto/block.h
163
index XXXXXXX..XXXXXXX 100644
164
--- a/include/crypto/block.h
165
+++ b/include/crypto/block.h
166
@@ -XXX,XX +XXX,XX @@ QCryptoBlock *qcrypto_block_create(QCryptoBlockCreateOptions *options,
167
Error **errp);
168
169
170
+/**
171
+ * qcrypto_block_calculate_payload_offset:
172
+ * @create_opts: the encryption options
173
+ * @optprefix: name prefix for options
174
+ * @len: output for number of header bytes before payload
175
+ * @errp: pointer to a NULL-initialized error object
176
+ *
177
+ * Calculate the number of header bytes before the payload in an encrypted
178
+ * storage volume. The header is an area before the payload that is reserved
179
+ * for encryption metadata.
180
+ *
181
+ * Returns: true on success, false on error
182
+ */
183
+bool
184
+qcrypto_block_calculate_payload_offset(QCryptoBlockCreateOptions *create_opts,
185
+ const char *optprefix,
186
+ size_t *len,
187
+ Error **errp);
188
+
189
+
190
/**
191
* qcrypto_block_get_info:
192
* @block: the block encryption object
193
@@ -XXX,XX +XXX,XX @@ uint64_t qcrypto_block_get_sector_size(QCryptoBlock *block);
194
void qcrypto_block_free(QCryptoBlock *block);
195
196
G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoBlock, qcrypto_block_free)
197
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(QCryptoBlockCreateOptions,
198
+ qapi_free_QCryptoBlockCreateOptions)
199
200
#endif /* QCRYPTO_BLOCK_H */
201
--
49
--
202
2.24.1
50
2.26.2
203
51
204
diff view generated by jsdifflib
1
From: Eric Blake <eblake@redhat.com>
1
Only one struct is needed per request. Drop req_data and the separate
2
VuBlockReq instance. Instead let vu_queue_pop() allocate everything at
3
once.
2
4
3
Tests 261 and 272 fail on RHEL 7 with coreutils 8.22, since od
5
This fixes the req_data memory leak in vu_block_virtio_process_req().
4
--endian was not added until coreutils 8.23. Fix this by manually
5
constructing the final value one byte at a time.
6
6
7
Fixes: fc8ba423
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Reported-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
8
Message-id: 20200924151549.913737-6-stefanha@redhat.com
9
Signed-off-by: Eric Blake <eblake@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
Message-Id: <20200226125424.481840-1-eblake@redhat.com>
12
Signed-off-by: Max Reitz <mreitz@redhat.com>
13
---
10
---
14
tests/qemu-iotests/common.rc | 22 +++++++++++++++++-----
11
block/export/vhost-user-blk-server.c | 68 +++++++++-------------------
15
1 file changed, 17 insertions(+), 5 deletions(-)
12
1 file changed, 21 insertions(+), 47 deletions(-)
16
13
17
diff --git a/tests/qemu-iotests/common.rc b/tests/qemu-iotests/common.rc
14
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
18
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
19
--- a/tests/qemu-iotests/common.rc
16
--- a/block/export/vhost-user-blk-server.c
20
+++ b/tests/qemu-iotests/common.rc
17
+++ b/block/export/vhost-user-blk-server.c
21
@@ -XXX,XX +XXX,XX @@ poke_file()
18
@@ -XXX,XX +XXX,XX @@ struct virtio_blk_inhdr {
22
# peek_file_le 'test.img' 512 2 => 65534
19
};
23
peek_file_le()
20
21
typedef struct VuBlockReq {
22
- VuVirtqElement *elem;
23
+ VuVirtqElement elem;
24
int64_t sector_num;
25
size_t size;
26
struct virtio_blk_inhdr *in;
27
@@ -XXX,XX +XXX,XX @@ static void vu_block_req_complete(VuBlockReq *req)
28
VuDev *vu_dev = &req->server->vu_dev;
29
30
/* IO size with 1 extra status byte */
31
- vu_queue_push(vu_dev, req->vq, req->elem, req->size + 1);
32
+ vu_queue_push(vu_dev, req->vq, &req->elem, req->size + 1);
33
vu_queue_notify(vu_dev, req->vq);
34
35
- if (req->elem) {
36
- free(req->elem);
37
- }
38
-
39
- g_free(req);
40
+ free(req);
41
}
42
43
static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
44
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_flush(VuBlockReq *req)
45
blk_co_flush(backend);
46
}
47
48
-struct req_data {
49
- VuServer *server;
50
- VuVirtq *vq;
51
- VuVirtqElement *elem;
52
-};
53
-
54
static void coroutine_fn vu_block_virtio_process_req(void *opaque)
24
{
55
{
25
- # Wrap in echo $() to strip spaces
56
- struct req_data *data = opaque;
26
- echo $(od -j"$2" -N"$3" --endian=little -An -vtu"$3" "$1")
57
- VuServer *server = data->server;
27
+ local val=0 shift=0 byte
58
- VuVirtq *vq = data->vq;
59
- VuVirtqElement *elem = data->elem;
60
+ VuBlockReq *req = opaque;
61
+ VuServer *server = req->server;
62
+ VuVirtqElement *elem = &req->elem;
63
uint32_t type;
64
- VuBlockReq *req;
65
66
VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
67
BlockBackend *backend = vdev_blk->backend;
68
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
69
struct iovec *out_iov = elem->out_sg;
70
unsigned in_num = elem->in_num;
71
unsigned out_num = elem->out_num;
28
+
72
+
29
+ # coreutils' od --endian is not portable, so manually assemble bytes.
73
/* refer to hw/block/virtio_blk.c */
30
+ for byte in $(od -j"$2" -N"$3" -An -v -tu1 "$1"); do
74
if (elem->out_num < 1 || elem->in_num < 1) {
31
+ val=$(( val | (byte << shift) ))
75
error_report("virtio-blk request missing headers");
32
+ shift=$((shift + 8))
76
- free(elem);
33
+ done
77
- return;
34
+ printf %llu $val
78
+ goto err;
79
}
80
81
- req = g_new0(VuBlockReq, 1);
82
- req->server = server;
83
- req->vq = vq;
84
- req->elem = elem;
85
-
86
if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
87
sizeof(req->out)) != sizeof(req->out))) {
88
error_report("virtio-blk request outhdr too short");
89
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
90
91
err:
92
free(elem);
93
- g_free(req);
94
- return;
35
}
95
}
36
96
37
# peek_file_be 'test.img' 512 2 => 65279
97
static void vu_block_process_vq(VuDev *vu_dev, int idx)
38
peek_file_be()
39
{
98
{
40
- # Wrap in echo $() to strip spaces
99
- VuServer *server;
41
- echo $(od -j"$2" -N"$3" --endian=big -An -vtu"$3" "$1")
100
- VuVirtq *vq;
42
+ local val=0 byte
101
- struct req_data *req_data;
102
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
103
+ VuVirtq *vq = vu_get_queue(vu_dev, idx);
104
105
- server = container_of(vu_dev, VuServer, vu_dev);
106
- assert(server);
107
-
108
- vq = vu_get_queue(vu_dev, idx);
109
- assert(vq);
110
- VuVirtqElement *elem;
111
while (1) {
112
- elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) +
113
- sizeof(VuBlockReq));
114
- if (elem) {
115
- req_data = g_new0(struct req_data, 1);
116
- req_data->server = server;
117
- req_data->vq = vq;
118
- req_data->elem = elem;
119
- Coroutine *co = qemu_coroutine_create(vu_block_virtio_process_req,
120
- req_data);
121
- aio_co_enter(server->ioc->ctx, co);
122
- } else {
123
+ VuBlockReq *req;
43
+
124
+
44
+ # coreutils' od --endian is not portable, so manually assemble bytes.
125
+ req = vu_queue_pop(vu_dev, vq, sizeof(VuBlockReq));
45
+ for byte in $(od -j"$2" -N"$3" -An -v -tu1 "$1"); do
126
+ if (!req) {
46
+ val=$(( (val << 8) | byte ))
127
break;
47
+ done
128
}
48
+ printf %llu $val
129
+
130
+ req->server = server;
131
+ req->vq = vq;
132
+
133
+ Coroutine *co =
134
+ qemu_coroutine_create(vu_block_virtio_process_req, req);
135
+ qemu_coroutine_enter(co);
136
}
49
}
137
}
50
138
51
-# peek_file_raw 'test.img' 512 2 => '\xff\xfe'
52
+# peek_file_raw 'test.img' 512 2 => '\xff\xfe'. Do not use if the raw data
53
+# is likely to contain \0 or trailing \n.
54
peek_file_raw()
55
{
56
dd if="$1" bs=1 skip="$2" count="$3" status=none
57
--
139
--
58
2.24.1
140
2.26.2
59
141
60
diff view generated by jsdifflib
New patch
1
The device panic notifier callback is not used. Drop it.
1
2
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Message-id: 20200924151549.913737-7-stefanha@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
util/vhost-user-server.h | 3 ---
8
block/export/vhost-user-blk-server.c | 3 +--
9
util/vhost-user-server.c | 6 ------
10
3 files changed, 1 insertion(+), 11 deletions(-)
11
12
diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/util/vhost-user-server.h
15
+++ b/util/vhost-user-server.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct VuFdWatch {
17
} VuFdWatch;
18
19
typedef struct VuServer VuServer;
20
-typedef void DevicePanicNotifierFn(VuServer *server);
21
22
struct VuServer {
23
QIONetListener *listener;
24
AioContext *ctx;
25
- DevicePanicNotifierFn *device_panic_notifier;
26
int max_queues;
27
const VuDevIface *vu_iface;
28
VuDev vu_dev;
29
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
30
SocketAddress *unix_socket,
31
AioContext *ctx,
32
uint16_t max_queues,
33
- DevicePanicNotifierFn *device_panic_notifier,
34
const VuDevIface *vu_iface,
35
Error **errp);
36
37
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/block/export/vhost-user-blk-server.c
40
+++ b/block/export/vhost-user-blk-server.c
41
@@ -XXX,XX +XXX,XX @@ static void vhost_user_blk_server_start(VuBlockDev *vu_block_device,
42
ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
43
44
if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx,
45
- VHOST_USER_BLK_MAX_QUEUES,
46
- NULL, &vu_block_iface,
47
+ VHOST_USER_BLK_MAX_QUEUES, &vu_block_iface,
48
errp)) {
49
goto error;
50
}
51
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/util/vhost-user-server.c
54
+++ b/util/vhost-user-server.c
55
@@ -XXX,XX +XXX,XX @@ static void panic_cb(VuDev *vu_dev, const char *buf)
56
close_client(server);
57
}
58
59
- if (server->device_panic_notifier) {
60
- server->device_panic_notifier(server);
61
- }
62
-
63
/*
64
* Set the callback function for network listener so another
65
* vhost-user client can connect to this server
66
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
67
SocketAddress *socket_addr,
68
AioContext *ctx,
69
uint16_t max_queues,
70
- DevicePanicNotifierFn *device_panic_notifier,
71
const VuDevIface *vu_iface,
72
Error **errp)
73
{
74
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
75
.vu_iface = vu_iface,
76
.max_queues = max_queues,
77
.ctx = ctx,
78
- .device_panic_notifier = device_panic_notifier,
79
};
80
81
qio_net_listener_set_name(server->listener, "vhost-user-backend-listener");
82
--
83
2.26.2
84
diff view generated by jsdifflib
New patch
1
fds[] is leaked when qio_channel_readv_full() fails.
1
2
3
Use vmsg->fds[] instead of keeping a local fds[] array. Then we can
4
reuse goto fail to clean up fds. vmsg->fd_num must be zeroed before the
5
loop to make this safe.
6
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20200924151549.913737-8-stefanha@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
util/vhost-user-server.c | 50 ++++++++++++++++++----------------------
12
1 file changed, 23 insertions(+), 27 deletions(-)
13
14
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/util/vhost-user-server.c
17
+++ b/util/vhost-user-server.c
18
@@ -XXX,XX +XXX,XX @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
19
};
20
int rc, read_bytes = 0;
21
Error *local_err = NULL;
22
- /*
23
- * Store fds/nfds returned from qio_channel_readv_full into
24
- * temporary variables.
25
- *
26
- * VhostUserMsg is a packed structure, gcc will complain about passing
27
- * pointer to a packed structure member if we pass &VhostUserMsg.fd_num
28
- * and &VhostUserMsg.fds directly when calling qio_channel_readv_full,
29
- * thus two temporary variables nfds and fds are used here.
30
- */
31
- size_t nfds = 0, nfds_t = 0;
32
const size_t max_fds = G_N_ELEMENTS(vmsg->fds);
33
- int *fds_t = NULL;
34
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
35
QIOChannel *ioc = server->ioc;
36
37
+ vmsg->fd_num = 0;
38
if (!ioc) {
39
error_report_err(local_err);
40
goto fail;
41
@@ -XXX,XX +XXX,XX @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
42
43
assert(qemu_in_coroutine());
44
do {
45
+ size_t nfds = 0;
46
+ int *fds = NULL;
47
+
48
/*
49
* qio_channel_readv_full may have short reads, keeping calling it
50
* until getting VHOST_USER_HDR_SIZE or 0 bytes in total
51
*/
52
- rc = qio_channel_readv_full(ioc, &iov, 1, &fds_t, &nfds_t, &local_err);
53
+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
54
if (rc < 0) {
55
if (rc == QIO_CHANNEL_ERR_BLOCK) {
56
+ assert(local_err == NULL);
57
qio_channel_yield(ioc, G_IO_IN);
58
continue;
59
} else {
60
error_report_err(local_err);
61
- return false;
62
+ goto fail;
63
}
64
}
65
- read_bytes += rc;
66
- if (nfds_t > 0) {
67
- if (nfds + nfds_t > max_fds) {
68
+
69
+ if (nfds > 0) {
70
+ if (vmsg->fd_num + nfds > max_fds) {
71
error_report("A maximum of %zu fds are allowed, "
72
"however got %zu fds now",
73
- max_fds, nfds + nfds_t);
74
+ max_fds, vmsg->fd_num + nfds);
75
+ g_free(fds);
76
goto fail;
77
}
78
- memcpy(vmsg->fds + nfds, fds_t,
79
- nfds_t *sizeof(vmsg->fds[0]));
80
- nfds += nfds_t;
81
- g_free(fds_t);
82
+ memcpy(vmsg->fds + vmsg->fd_num, fds, nfds * sizeof(vmsg->fds[0]));
83
+ vmsg->fd_num += nfds;
84
+ g_free(fds);
85
}
86
- if (read_bytes == VHOST_USER_HDR_SIZE || rc == 0) {
87
- break;
88
+
89
+ if (rc == 0) { /* socket closed */
90
+ goto fail;
91
}
92
- iov.iov_base = (char *)vmsg + read_bytes;
93
- iov.iov_len = VHOST_USER_HDR_SIZE - read_bytes;
94
- } while (true);
95
96
- vmsg->fd_num = nfds;
97
+ iov.iov_base += rc;
98
+ iov.iov_len -= rc;
99
+ read_bytes += rc;
100
+ } while (read_bytes != VHOST_USER_HDR_SIZE);
101
+
102
/* qio_channel_readv_full will make socket fds blocking, unblock them */
103
vmsg_unblock_fds(vmsg);
104
if (vmsg->size > sizeof(vmsg->payload)) {
105
--
106
2.26.2
107
diff view generated by jsdifflib
New patch
1
Unexpected EOF is an error that must be reported.
1
2
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Message-id: 20200924151549.913737-9-stefanha@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
util/vhost-user-server.c | 6 ++++--
8
1 file changed, 4 insertions(+), 2 deletions(-)
9
10
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/util/vhost-user-server.c
13
+++ b/util/vhost-user-server.c
14
@@ -XXX,XX +XXX,XX @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
15
};
16
if (vmsg->size) {
17
rc = qio_channel_readv_all_eof(ioc, &iov_payload, 1, &local_err);
18
- if (rc == -1) {
19
- error_report_err(local_err);
20
+ if (rc != 1) {
21
+ if (local_err) {
22
+ error_report_err(local_err);
23
+ }
24
goto fail;
25
}
26
}
27
--
28
2.26.2
29
diff view generated by jsdifflib
1
From: David Edmondson <david.edmondson@oracle.com>
1
The vu_client_trip() coroutine is leaked during AioContext switching. It
2
is also unsafe to destroy the vu_dev in panic_cb() since its callers
3
still access it in some cases.
2
4
3
RFC 7230 section 3.2 indicates that whitespace is permitted between
5
Rework the lifecycle to solve these safety issues.
4
the field name and field value and after the field value.
5
6
6
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Message-Id: <20200224101310.101169-2-david.edmondson@oracle.com>
8
Message-id: 20200924151549.913737-10-stefanha@redhat.com
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
---
10
---
11
block/curl.c | 31 +++++++++++++++++++++++++++----
11
util/vhost-user-server.h | 29 ++--
12
1 file changed, 27 insertions(+), 4 deletions(-)
12
block/export/vhost-user-blk-server.c | 9 +-
13
util/vhost-user-server.c | 245 +++++++++++++++------------
14
3 files changed, 155 insertions(+), 128 deletions(-)
13
15
14
diff --git a/block/curl.c b/block/curl.c
16
diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
15
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
16
--- a/block/curl.c
18
--- a/util/vhost-user-server.h
17
+++ b/block/curl.c
19
+++ b/util/vhost-user-server.h
18
@@ -XXX,XX +XXX,XX @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
20
@@ -XXX,XX +XXX,XX @@
19
{
21
#include "qapi/error.h"
20
BDRVCURLState *s = opaque;
22
#include "standard-headers/linux/virtio_blk.h"
21
size_t realsize = size * nmemb;
23
22
- const char *accept_line = "Accept-Ranges: bytes";
24
+/* A kick fd that we monitor on behalf of libvhost-user */
23
+ const char *header = (char *)ptr;
25
typedef struct VuFdWatch {
24
+ const char *end = header + realsize;
26
VuDev *vu_dev;
25
+ const char *accept_ranges = "Accept-Ranges:";
27
int fd; /*kick fd*/
26
+ const char *bytes = "bytes";
28
void *pvt;
27
29
vu_watch_cb cb;
28
- if (realsize >= strlen(accept_line)
30
- bool processing;
29
- && strncmp((char *)ptr, accept_line, strlen(accept_line)) == 0) {
31
QTAILQ_ENTRY(VuFdWatch) next;
30
- s->accept_range = true;
32
} VuFdWatch;
31
+ if (realsize >= strlen(accept_ranges)
33
32
+ && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) {
34
-typedef struct VuServer VuServer;
33
+
35
-
34
+ char *p = strchr(header, ':') + 1;
36
-struct VuServer {
35
+
37
+/**
36
+ /* Skip whitespace between the header name and value. */
38
+ * VuServer:
37
+ while (p < end && *p && g_ascii_isspace(*p)) {
39
+ * A vhost-user server instance with user-defined VuDevIface callbacks.
38
+ p++;
40
+ * Vhost-user device backends can be implemented using VuServer. VuDevIface
41
+ * callbacks and virtqueue kicks run in the given AioContext.
42
+ */
43
+typedef struct {
44
QIONetListener *listener;
45
+ QEMUBH *restart_listener_bh;
46
AioContext *ctx;
47
int max_queues;
48
const VuDevIface *vu_iface;
49
+
50
+ /* Protected by ctx lock */
51
VuDev vu_dev;
52
QIOChannel *ioc; /* The I/O channel with the client */
53
QIOChannelSocket *sioc; /* The underlying data channel with the client */
54
- /* IOChannel for fd provided via VHOST_USER_SET_SLAVE_REQ_FD */
55
- QIOChannel *ioc_slave;
56
- QIOChannelSocket *sioc_slave;
57
- Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
58
QTAILQ_HEAD(, VuFdWatch) vu_fd_watches;
59
- /* restart coroutine co_trip if AIOContext is changed */
60
- bool aio_context_changed;
61
- bool processing_msg;
62
-};
63
+
64
+ Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
65
+} VuServer;
66
67
bool vhost_user_server_start(VuServer *server,
68
SocketAddress *unix_socket,
69
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
70
71
void vhost_user_server_stop(VuServer *server);
72
73
-void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx);
74
+void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
75
+void vhost_user_server_detach_aio_context(VuServer *server);
76
77
#endif /* VHOST_USER_SERVER_H */
78
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
79
index XXXXXXX..XXXXXXX 100644
80
--- a/block/export/vhost-user-blk-server.c
81
+++ b/block/export/vhost-user-blk-server.c
82
@@ -XXX,XX +XXX,XX @@ static const VuDevIface vu_block_iface = {
83
static void blk_aio_attached(AioContext *ctx, void *opaque)
84
{
85
VuBlockDev *vub_dev = opaque;
86
- aio_context_acquire(ctx);
87
- vhost_user_server_set_aio_context(&vub_dev->vu_server, ctx);
88
- aio_context_release(ctx);
89
+ vhost_user_server_attach_aio_context(&vub_dev->vu_server, ctx);
90
}
91
92
static void blk_aio_detach(void *opaque)
93
{
94
VuBlockDev *vub_dev = opaque;
95
- AioContext *ctx = vub_dev->vu_server.ctx;
96
- aio_context_acquire(ctx);
97
- vhost_user_server_set_aio_context(&vub_dev->vu_server, NULL);
98
- aio_context_release(ctx);
99
+ vhost_user_server_detach_aio_context(&vub_dev->vu_server);
100
}
101
102
static void
103
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/util/vhost-user-server.c
106
+++ b/util/vhost-user-server.c
107
@@ -XXX,XX +XXX,XX @@
108
*/
109
#include "qemu/osdep.h"
110
#include "qemu/main-loop.h"
111
+#include "block/aio-wait.h"
112
#include "vhost-user-server.h"
113
114
+/*
115
+ * Theory of operation:
116
+ *
117
+ * VuServer is started and stopped by vhost_user_server_start() and
118
+ * vhost_user_server_stop() from the main loop thread. Starting the server
119
+ * opens a vhost-user UNIX domain socket and listens for incoming connections.
120
+ * Only one connection is allowed at a time.
121
+ *
122
+ * The connection is handled by the vu_client_trip() coroutine in the
123
+ * VuServer->ctx AioContext. The coroutine consists of a vu_dispatch() loop
124
+ * where libvhost-user calls vu_message_read() to receive the next vhost-user
125
+ * protocol messages over the UNIX domain socket.
126
+ *
127
+ * When virtqueues are set up libvhost-user calls set_watch() to monitor kick
128
+ * fds. These fds are also handled in the VuServer->ctx AioContext.
129
+ *
130
+ * Both vu_client_trip() and kick fd monitoring can be stopped by shutting down
131
+ * the socket connection. Shutting down the socket connection causes
132
+ * vu_message_read() to fail since no more data can be received from the socket.
133
+ * After vu_dispatch() fails, vu_client_trip() calls vu_deinit() to stop
134
+ * libvhost-user before terminating the coroutine. vu_deinit() calls
135
+ * remove_watch() to stop monitoring kick fds and this stops virtqueue
136
+ * processing.
137
+ *
138
+ * When vu_client_trip() has finished cleaning up it schedules a BH in the main
139
+ * loop thread to accept the next client connection.
140
+ *
141
+ * When libvhost-user detects an error it calls panic_cb() and sets the
142
+ * dev->broken flag. Both vu_client_trip() and kick fd processing stop when
143
+ * the dev->broken flag is set.
144
+ *
145
+ * It is possible to switch AioContexts using
146
+ * vhost_user_server_detach_aio_context() and
147
+ * vhost_user_server_attach_aio_context(). They stop monitoring fds in the old
148
+ * AioContext and resume monitoring in the new AioContext. The vu_client_trip()
149
+ * coroutine remains in a yielded state during the switch. This is made
150
+ * possible by QIOChannel's support for spurious coroutine re-entry in
151
+ * qio_channel_yield(). The coroutine will restart I/O when re-entered from the
152
+ * new AioContext.
153
+ */
154
+
155
static void vmsg_close_fds(VhostUserMsg *vmsg)
156
{
157
int i;
158
@@ -XXX,XX +XXX,XX @@ static void vmsg_unblock_fds(VhostUserMsg *vmsg)
159
}
160
}
161
162
-static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
163
- gpointer opaque);
164
-
165
-static void close_client(VuServer *server)
166
-{
167
- /*
168
- * Before closing the client
169
- *
170
- * 1. Let vu_client_trip stop processing new vhost-user msg
171
- *
172
- * 2. remove kick_handler
173
- *
174
- * 3. wait for the kick handler to be finished
175
- *
176
- * 4. wait for the current vhost-user msg to be finished processing
177
- */
178
-
179
- QIOChannelSocket *sioc = server->sioc;
180
- /* When this is set vu_client_trip will stop new processing vhost-user message */
181
- server->sioc = NULL;
182
-
183
- while (server->processing_msg) {
184
- if (server->ioc->read_coroutine) {
185
- server->ioc->read_coroutine = NULL;
186
- qio_channel_set_aio_fd_handler(server->ioc, server->ioc->ctx, NULL,
187
- NULL, server->ioc);
188
- server->processing_msg = false;
189
- }
190
- }
191
-
192
- vu_deinit(&server->vu_dev);
193
-
194
- /* vu_deinit() should have called remove_watch() */
195
- assert(QTAILQ_EMPTY(&server->vu_fd_watches));
196
-
197
- object_unref(OBJECT(sioc));
198
- object_unref(OBJECT(server->ioc));
199
-}
200
-
201
static void panic_cb(VuDev *vu_dev, const char *buf)
202
{
203
- VuServer *server = container_of(vu_dev, VuServer, vu_dev);
204
-
205
- /* avoid while loop in close_client */
206
- server->processing_msg = false;
207
-
208
- if (buf) {
209
- error_report("vu_panic: %s", buf);
210
- }
211
-
212
- if (server->sioc) {
213
- close_client(server);
214
- }
215
-
216
- /*
217
- * Set the callback function for network listener so another
218
- * vhost-user client can connect to this server
219
- */
220
- qio_net_listener_set_client_func(server->listener,
221
- vu_accept,
222
- server,
223
- NULL);
224
+ error_report("vu_panic: %s", buf);
225
}
226
227
static bool coroutine_fn
228
@@ -XXX,XX +XXX,XX @@ fail:
229
return false;
230
}
231
232
-
233
-static void vu_client_start(VuServer *server);
234
static coroutine_fn void vu_client_trip(void *opaque)
235
{
236
VuServer *server = opaque;
237
+ VuDev *vu_dev = &server->vu_dev;
238
239
- while (!server->aio_context_changed && server->sioc) {
240
- server->processing_msg = true;
241
- vu_dispatch(&server->vu_dev);
242
- server->processing_msg = false;
243
+ while (!vu_dev->broken && vu_dispatch(vu_dev)) {
244
+ /* Keep running */
245
}
246
247
- if (server->aio_context_changed && server->sioc) {
248
- server->aio_context_changed = false;
249
- vu_client_start(server);
250
- }
251
-}
252
+ vu_deinit(vu_dev);
253
+
254
+ /* vu_deinit() should have called remove_watch() */
255
+ assert(QTAILQ_EMPTY(&server->vu_fd_watches));
256
+
257
+ object_unref(OBJECT(server->sioc));
258
+ server->sioc = NULL;
259
260
-static void vu_client_start(VuServer *server)
261
-{
262
- server->co_trip = qemu_coroutine_create(vu_client_trip, server);
263
- aio_co_enter(server->ctx, server->co_trip);
264
+ object_unref(OBJECT(server->ioc));
265
+ server->ioc = NULL;
266
+
267
+ server->co_trip = NULL;
268
+ if (server->restart_listener_bh) {
269
+ qemu_bh_schedule(server->restart_listener_bh);
270
+ }
271
+ aio_wait_kick();
272
}
273
274
/*
275
@@ -XXX,XX +XXX,XX @@ static void vu_client_start(VuServer *server)
276
static void kick_handler(void *opaque)
277
{
278
VuFdWatch *vu_fd_watch = opaque;
279
- vu_fd_watch->processing = true;
280
- vu_fd_watch->cb(vu_fd_watch->vu_dev, 0, vu_fd_watch->pvt);
281
- vu_fd_watch->processing = false;
282
+ VuDev *vu_dev = vu_fd_watch->vu_dev;
283
+
284
+ vu_fd_watch->cb(vu_dev, 0, vu_fd_watch->pvt);
285
+
286
+ /* Stop vu_client_trip() if an error occurred in vu_fd_watch->cb() */
287
+ if (vu_dev->broken) {
288
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
289
+
290
+ qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
291
+ }
292
}
293
294
-
295
static VuFdWatch *find_vu_fd_watch(VuServer *server, int fd)
296
{
297
298
@@ -XXX,XX +XXX,XX @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
299
qio_channel_set_name(QIO_CHANNEL(sioc), "vhost-user client");
300
server->ioc = QIO_CHANNEL(sioc);
301
object_ref(OBJECT(server->ioc));
302
- qio_channel_attach_aio_context(server->ioc, server->ctx);
303
+
304
+ /* TODO vu_message_write() spins if non-blocking! */
305
qio_channel_set_blocking(server->ioc, false, NULL);
306
- vu_client_start(server);
307
+
308
+ server->co_trip = qemu_coroutine_create(vu_client_trip, server);
309
+
310
+ aio_context_acquire(server->ctx);
311
+ vhost_user_server_attach_aio_context(server, server->ctx);
312
+ aio_context_release(server->ctx);
313
}
314
315
-
316
void vhost_user_server_stop(VuServer *server)
317
{
318
+ aio_context_acquire(server->ctx);
319
+
320
+ qemu_bh_delete(server->restart_listener_bh);
321
+ server->restart_listener_bh = NULL;
322
+
323
if (server->sioc) {
324
- close_client(server);
325
+ VuFdWatch *vu_fd_watch;
326
+
327
+ QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
328
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
329
+ NULL, NULL, NULL, vu_fd_watch);
39
+ }
330
+ }
40
+
331
+
41
+ if (end - p >= strlen(bytes)
332
+ qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
42
+ && strncmp(p, bytes, strlen(bytes)) == 0) {
333
+
43
+
334
+ AIO_WAIT_WHILE(server->ctx, server->co_trip);
44
+ /* Check that there is nothing but whitespace after the value. */
335
}
45
+ p += strlen(bytes);
336
46
+ while (p < end && *p && g_ascii_isspace(*p)) {
337
+ aio_context_release(server->ctx);
47
+ p++;
338
+
48
+ }
339
if (server->listener) {
49
+
340
qio_net_listener_disconnect(server->listener);
50
+ if (p == end || !*p) {
341
object_unref(OBJECT(server->listener));
51
+ s->accept_range = true;
342
}
52
+ }
343
+}
344
+
345
+/*
346
+ * Allow the next client to connect to the server. Called from a BH in the main
347
+ * loop.
348
+ */
349
+static void restart_listener_bh(void *opaque)
350
+{
351
+ VuServer *server = opaque;
352
353
+ qio_net_listener_set_client_func(server->listener, vu_accept, server,
354
+ NULL);
355
}
356
357
-void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx)
358
+/* Called with ctx acquired */
359
+void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
360
{
361
- VuFdWatch *vu_fd_watch, *next;
362
- void *opaque = NULL;
363
- IOHandler *io_read = NULL;
364
- bool attach;
365
+ VuFdWatch *vu_fd_watch;
366
367
- server->ctx = ctx ? ctx : qemu_get_aio_context();
368
+ server->ctx = ctx;
369
370
if (!server->sioc) {
371
- /* not yet serving any client*/
372
return;
373
}
374
375
- if (ctx) {
376
- qio_channel_attach_aio_context(server->ioc, ctx);
377
- server->aio_context_changed = true;
378
- io_read = kick_handler;
379
- attach = true;
380
- } else {
381
+ qio_channel_attach_aio_context(server->ioc, ctx);
382
+
383
+ QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
384
+ aio_set_fd_handler(ctx, vu_fd_watch->fd, true, kick_handler, NULL,
385
+ NULL, vu_fd_watch);
386
+ }
387
+
388
+ aio_co_schedule(ctx, server->co_trip);
389
+}
390
+
391
+/* Called with server->ctx acquired */
392
+void vhost_user_server_detach_aio_context(VuServer *server)
393
+{
394
+ if (server->sioc) {
395
+ VuFdWatch *vu_fd_watch;
396
+
397
+ QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
398
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
399
+ NULL, NULL, NULL, vu_fd_watch);
53
+ }
400
+ }
54
}
401
+
55
402
qio_channel_detach_aio_context(server->ioc);
56
return realsize;
403
- /* server->ioc->ctx keeps the old AioConext */
404
- ctx = server->ioc->ctx;
405
- attach = false;
406
}
407
408
- QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
409
- if (vu_fd_watch->cb) {
410
- opaque = attach ? vu_fd_watch : NULL;
411
- aio_set_fd_handler(ctx, vu_fd_watch->fd, true,
412
- io_read, NULL, NULL,
413
- opaque);
414
- }
415
- }
416
+ server->ctx = NULL;
417
}
418
419
-
420
bool vhost_user_server_start(VuServer *server,
421
SocketAddress *socket_addr,
422
AioContext *ctx,
423
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
424
const VuDevIface *vu_iface,
425
Error **errp)
426
{
427
+ QEMUBH *bh;
428
QIONetListener *listener = qio_net_listener_new();
429
if (qio_net_listener_open_sync(listener, socket_addr, 1,
430
errp) < 0) {
431
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
432
return false;
433
}
434
435
+ bh = qemu_bh_new(restart_listener_bh, server);
436
+
437
/* zero out unspecified fields */
438
*server = (VuServer) {
439
.listener = listener,
440
+ .restart_listener_bh = bh,
441
.vu_iface = vu_iface,
442
.max_queues = max_queues,
443
.ctx = ctx,
57
--
444
--
58
2.24.1
445
2.26.2
59
446
60
diff view generated by jsdifflib
New patch
1
Propagate the flush return value since errors are possible.
1
2
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Message-id: 20200924151549.913737-11-stefanha@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
block/export/vhost-user-blk-server.c | 11 +++++++----
8
1 file changed, 7 insertions(+), 4 deletions(-)
9
10
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/block/export/vhost-user-blk-server.c
13
+++ b/block/export/vhost-user-blk-server.c
14
@@ -XXX,XX +XXX,XX @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
15
return -EINVAL;
16
}
17
18
-static void coroutine_fn vu_block_flush(VuBlockReq *req)
19
+static int coroutine_fn vu_block_flush(VuBlockReq *req)
20
{
21
VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
22
BlockBackend *backend = vdev_blk->backend;
23
- blk_co_flush(backend);
24
+ return blk_co_flush(backend);
25
}
26
27
static void coroutine_fn vu_block_virtio_process_req(void *opaque)
28
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
29
break;
30
}
31
case VIRTIO_BLK_T_FLUSH:
32
- vu_block_flush(req);
33
- req->in->status = VIRTIO_BLK_S_OK;
34
+ if (vu_block_flush(req) == 0) {
35
+ req->in->status = VIRTIO_BLK_S_OK;
36
+ } else {
37
+ req->in->status = VIRTIO_BLK_S_IOERR;
38
+ }
39
break;
40
case VIRTIO_BLK_T_GET_ID: {
41
size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
42
--
43
2.26.2
44
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
Use the new QAPI block exports API instead of defining our own QOM
2
2
objects.
3
offset/bytes pair is more usual naming in block layer, let's use it.
3
4
4
This is a large change because the lifecycle of VuBlockDev needs to
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
5
follow BlockExportDriver. QOM properties are replaced by QAPI options
6
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
6
objects.
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
7
8
Message-Id: <20200311103004.7649-8-vsementsov@virtuozzo.com>
8
VuBlockDev is renamed VuBlkExport and contains a BlockExport field.
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
Several fields can be dropped since BlockExport already has equivalents.
10
11
The file names and meson build integration will be adjusted in a future
12
patch. libvhost-user should probably be built as a static library that
13
is linked into QEMU instead of as a .c file that results in duplicate
14
compilation.
15
16
The new command-line syntax is:
17
18
$ qemu-storage-daemon \
19
--blockdev file,node-name=drive0,filename=test.img \
20
--export vhost-user-blk,node-name=drive0,id=export0,unix-socket=/tmp/vhost-user-blk.sock
21
22
Note that unix-socket is optional because we may wish to accept chardevs
23
too in the future.
24
25
Markus noted that supported address families are not explicit in the
26
QAPI schema. It is unlikely that support for more address families will
27
be added since file descriptor passing is required and few address
28
families support it. If a new address family needs to be added, then the
29
QAPI 'features' syntax can be used to advertize them.
30
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
32
Acked-by: Markus Armbruster <armbru@redhat.com>
33
Message-id: 20200924151549.913737-12-stefanha@redhat.com
34
[Skip test on big-endian host architectures because this device doesn't
35
support them yet (as already mentioned in a code comment).
36
--Stefan]
37
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
38
---
11
block/block-copy.c | 82 +++++++++++++++++++-------------------
39
qapi/block-export.json | 21 +-
12
include/block/block-copy.h | 4 +-
40
block/export/vhost-user-blk-server.h | 23 +-
13
2 files changed, 43 insertions(+), 43 deletions(-)
41
block/export/export.c | 6 +
14
42
block/export/vhost-user-blk-server.c | 452 +++++++--------------------
15
diff --git a/block/block-copy.c b/block/block-copy.c
43
util/vhost-user-server.c | 10 +-
44
block/export/meson.build | 1 +
45
block/meson.build | 1 -
46
7 files changed, 156 insertions(+), 358 deletions(-)
47
48
diff --git a/qapi/block-export.json b/qapi/block-export.json
16
index XXXXXXX..XXXXXXX 100644
49
index XXXXXXX..XXXXXXX 100644
17
--- a/block/block-copy.c
50
--- a/qapi/block-export.json
18
+++ b/block/block-copy.c
51
+++ b/qapi/block-export.json
19
@@ -XXX,XX +XXX,XX @@
52
@@ -XXX,XX +XXX,XX @@
20
#define BLOCK_COPY_MAX_MEM (128 * MiB)
53
'data': { '*name': 'str', '*description': 'str',
21
54
'*bitmap': 'str' } }
22
static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
55
23
- int64_t start,
56
+##
24
+ int64_t offset,
57
+# @BlockExportOptionsVhostUserBlk:
25
int64_t bytes)
58
+#
26
{
59
+# A vhost-user-blk block export.
27
BlockCopyInFlightReq *req;
60
+#
28
61
+# @addr: The vhost-user socket on which to listen. Both 'unix' and 'fd'
29
QLIST_FOREACH(req, &s->inflight_reqs, list) {
62
+# SocketAddress types are supported. Passed fds must be UNIX domain
30
- if (start + bytes > req->start && start < req->start + req->bytes) {
63
+# sockets.
31
+ if (offset + bytes > req->offset && offset < req->offset + req->bytes) {
64
+# @logical-block-size: Logical block size in bytes. Defaults to 512 bytes.
32
return req;
65
+#
66
+# Since: 5.2
67
+##
68
+{ 'struct': 'BlockExportOptionsVhostUserBlk',
69
+ 'data': { 'addr': 'SocketAddress', '*logical-block-size': 'size' } }
70
+
71
##
72
# @NbdServerAddOptions:
73
#
74
@@ -XXX,XX +XXX,XX @@
75
# An enumeration of block export types
76
#
77
# @nbd: NBD export
78
+# @vhost-user-blk: vhost-user-blk export (since 5.2)
79
#
80
# Since: 4.2
81
##
82
{ 'enum': 'BlockExportType',
83
- 'data': [ 'nbd' ] }
84
+ 'data': [ 'nbd', 'vhost-user-blk' ] }
85
86
##
87
# @BlockExportOptions:
88
@@ -XXX,XX +XXX,XX @@
89
'*writethrough': 'bool' },
90
'discriminator': 'type',
91
'data': {
92
- 'nbd': 'BlockExportOptionsNbd'
93
+ 'nbd': 'BlockExportOptionsNbd',
94
+ 'vhost-user-blk': 'BlockExportOptionsVhostUserBlk'
95
} }
96
97
##
98
diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h
99
index XXXXXXX..XXXXXXX 100644
100
--- a/block/export/vhost-user-blk-server.h
101
+++ b/block/export/vhost-user-blk-server.h
102
@@ -XXX,XX +XXX,XX @@
103
104
#ifndef VHOST_USER_BLK_SERVER_H
105
#define VHOST_USER_BLK_SERVER_H
106
-#include "util/vhost-user-server.h"
107
108
-typedef struct VuBlockDev VuBlockDev;
109
-#define TYPE_VHOST_USER_BLK_SERVER "vhost-user-blk-server"
110
-#define VHOST_USER_BLK_SERVER(obj) \
111
- OBJECT_CHECK(VuBlockDev, obj, TYPE_VHOST_USER_BLK_SERVER)
112
+#include "block/export.h"
113
114
-/* vhost user block device */
115
-struct VuBlockDev {
116
- Object parent_obj;
117
- char *node_name;
118
- SocketAddress *addr;
119
- AioContext *ctx;
120
- VuServer vu_server;
121
- bool running;
122
- uint32_t blk_size;
123
- BlockBackend *backend;
124
- QIOChannelSocket *sioc;
125
- QTAILQ_ENTRY(VuBlockDev) next;
126
- struct virtio_blk_config blkcfg;
127
- bool writable;
128
-};
129
+/* For block/export/export.c */
130
+extern const BlockExportDriver blk_exp_vhost_user_blk;
131
132
#endif /* VHOST_USER_BLK_SERVER_H */
133
diff --git a/block/export/export.c b/block/export/export.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/block/export/export.c
136
+++ b/block/export/export.c
137
@@ -XXX,XX +XXX,XX @@
138
#include "sysemu/block-backend.h"
139
#include "block/export.h"
140
#include "block/nbd.h"
141
+#if CONFIG_LINUX
142
+#include "block/export/vhost-user-blk-server.h"
143
+#endif
144
#include "qapi/error.h"
145
#include "qapi/qapi-commands-block-export.h"
146
#include "qapi/qapi-events-block-export.h"
147
@@ -XXX,XX +XXX,XX @@
148
149
static const BlockExportDriver *blk_exp_drivers[] = {
150
&blk_exp_nbd,
151
+#if CONFIG_LINUX
152
+ &blk_exp_vhost_user_blk,
153
+#endif
154
};
155
156
/* Only accessed from the main thread */
157
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
158
index XXXXXXX..XXXXXXX 100644
159
--- a/block/export/vhost-user-blk-server.c
160
+++ b/block/export/vhost-user-blk-server.c
161
@@ -XXX,XX +XXX,XX @@
162
*/
163
#include "qemu/osdep.h"
164
#include "block/block.h"
165
+#include "contrib/libvhost-user/libvhost-user.h"
166
+#include "standard-headers/linux/virtio_blk.h"
167
+#include "util/vhost-user-server.h"
168
#include "vhost-user-blk-server.h"
169
#include "qapi/error.h"
170
#include "qom/object_interfaces.h"
171
@@ -XXX,XX +XXX,XX @@ struct virtio_blk_inhdr {
172
unsigned char status;
173
};
174
175
-typedef struct VuBlockReq {
176
+typedef struct VuBlkReq {
177
VuVirtqElement elem;
178
int64_t sector_num;
179
size_t size;
180
@@ -XXX,XX +XXX,XX @@ typedef struct VuBlockReq {
181
struct virtio_blk_outhdr out;
182
VuServer *server;
183
struct VuVirtq *vq;
184
-} VuBlockReq;
185
+} VuBlkReq;
186
187
-static void vu_block_req_complete(VuBlockReq *req)
188
+/* vhost user block device */
189
+typedef struct {
190
+ BlockExport export;
191
+ VuServer vu_server;
192
+ uint32_t blk_size;
193
+ QIOChannelSocket *sioc;
194
+ struct virtio_blk_config blkcfg;
195
+ bool writable;
196
+} VuBlkExport;
197
+
198
+static void vu_blk_req_complete(VuBlkReq *req)
199
{
200
VuDev *vu_dev = &req->server->vu_dev;
201
202
@@ -XXX,XX +XXX,XX @@ static void vu_block_req_complete(VuBlockReq *req)
203
free(req);
204
}
205
206
-static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
207
-{
208
- return container_of(server, VuBlockDev, vu_server);
209
-}
210
-
211
static int coroutine_fn
212
-vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
213
- uint32_t iovcnt, uint32_t type)
214
+vu_blk_discard_write_zeroes(BlockBackend *blk, struct iovec *iov,
215
+ uint32_t iovcnt, uint32_t type)
216
{
217
struct virtio_blk_discard_write_zeroes desc;
218
ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
219
@@ -XXX,XX +XXX,XX @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
220
return -EINVAL;
221
}
222
223
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
224
uint64_t range[2] = { le64_to_cpu(desc.sector) << 9,
225
le32_to_cpu(desc.num_sectors) << 9 };
226
if (type == VIRTIO_BLK_T_DISCARD) {
227
- if (blk_co_pdiscard(vdev_blk->backend, range[0], range[1]) == 0) {
228
+ if (blk_co_pdiscard(blk, range[0], range[1]) == 0) {
229
return 0;
230
}
231
} else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
232
- if (blk_co_pwrite_zeroes(vdev_blk->backend,
233
- range[0], range[1], 0) == 0) {
234
+ if (blk_co_pwrite_zeroes(blk, range[0], range[1], 0) == 0) {
235
return 0;
33
}
236
}
34
}
237
}
35
@@ -XXX,XX +XXX,XX @@ static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
238
@@ -XXX,XX +XXX,XX @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
36
}
239
return -EINVAL;
37
240
}
38
static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
241
39
- int64_t start,
242
-static int coroutine_fn vu_block_flush(VuBlockReq *req)
40
+ int64_t offset,
243
+static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
41
int64_t bytes)
244
{
42
{
245
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
43
BlockCopyInFlightReq *req;
246
- BlockBackend *backend = vdev_blk->backend;
44
247
- return blk_co_flush(backend);
45
- while ((req = find_conflicting_inflight_req(s, start, bytes))) {
248
-}
46
+ while ((req = find_conflicting_inflight_req(s, offset, bytes))) {
249
-
47
qemu_co_queue_wait(&req->wait_queue, NULL);
250
-static void coroutine_fn vu_block_virtio_process_req(void *opaque)
251
-{
252
- VuBlockReq *req = opaque;
253
+ VuBlkReq *req = opaque;
254
VuServer *server = req->server;
255
VuVirtqElement *elem = &req->elem;
256
uint32_t type;
257
258
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
259
- BlockBackend *backend = vdev_blk->backend;
260
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
261
+ BlockBackend *blk = vexp->export.blk;
262
263
struct iovec *in_iov = elem->in_sg;
264
struct iovec *out_iov = elem->out_sg;
265
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
266
bool is_write = type & VIRTIO_BLK_T_OUT;
267
req->sector_num = le64_to_cpu(req->out.sector);
268
269
- int64_t offset = req->sector_num * vdev_blk->blk_size;
270
+ if (is_write && !vexp->writable) {
271
+ req->in->status = VIRTIO_BLK_S_IOERR;
272
+ break;
273
+ }
274
+
275
+ int64_t offset = req->sector_num * vexp->blk_size;
276
QEMUIOVector qiov;
277
if (is_write) {
278
qemu_iovec_init_external(&qiov, out_iov, out_num);
279
- ret = blk_co_pwritev(backend, offset, qiov.size,
280
- &qiov, 0);
281
+ ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
282
} else {
283
qemu_iovec_init_external(&qiov, in_iov, in_num);
284
- ret = blk_co_preadv(backend, offset, qiov.size,
285
- &qiov, 0);
286
+ ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
287
}
288
if (ret >= 0) {
289
req->in->status = VIRTIO_BLK_S_OK;
290
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
291
break;
48
}
292
}
49
}
293
case VIRTIO_BLK_T_FLUSH:
50
294
- if (vu_block_flush(req) == 0) {
51
static void block_copy_inflight_req_begin(BlockCopyState *s,
295
+ if (blk_co_flush(blk) == 0) {
52
BlockCopyInFlightReq *req,
296
req->in->status = VIRTIO_BLK_S_OK;
53
- int64_t start, int64_t bytes)
297
} else {
54
+ int64_t offset, int64_t bytes)
298
req->in->status = VIRTIO_BLK_S_IOERR;
55
{
299
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
56
- req->start = start;
300
case VIRTIO_BLK_T_DISCARD:
57
+ req->offset = offset;
301
case VIRTIO_BLK_T_WRITE_ZEROES: {
58
req->bytes = bytes;
302
int rc;
59
qemu_co_queue_init(&req->wait_queue);
303
- rc = vu_block_discard_write_zeroes(req, &elem->out_sg[1],
60
QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
304
- out_num, type);
61
@@ -XXX,XX +XXX,XX @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
305
+
62
* Returns 0 on success.
306
+ if (!vexp->writable) {
63
*/
307
+ req->in->status = VIRTIO_BLK_S_IOERR;
64
static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
308
+ break;
65
- int64_t start, int64_t bytes,
309
+ }
66
+ int64_t offset, int64_t bytes,
310
+
67
bool zeroes, bool *error_is_read)
311
+ rc = vu_blk_discard_write_zeroes(blk, &elem->out_sg[1], out_num, type);
68
{
312
if (rc == 0) {
69
int ret;
313
req->in->status = VIRTIO_BLK_S_OK;
70
- int64_t nbytes = MIN(start + bytes, s->len) - start;
314
} else {
71
+ int64_t nbytes = MIN(offset + bytes, s->len) - offset;
315
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
72
void *bounce_buffer = NULL;
316
break;
73
74
- assert(start >= 0 && bytes > 0 && INT64_MAX - start >= bytes);
75
- assert(QEMU_IS_ALIGNED(start, s->cluster_size));
76
+ assert(offset >= 0 && bytes > 0 && INT64_MAX - offset >= bytes);
77
+ assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
78
assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
79
- assert(start < s->len);
80
- assert(start + bytes <= s->len ||
81
- start + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
82
+ assert(offset < s->len);
83
+ assert(offset + bytes <= s->len ||
84
+ offset + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
85
assert(nbytes < INT_MAX);
86
87
if (zeroes) {
88
- ret = bdrv_co_pwrite_zeroes(s->target, start, nbytes, s->write_flags &
89
+ ret = bdrv_co_pwrite_zeroes(s->target, offset, nbytes, s->write_flags &
90
~BDRV_REQ_WRITE_COMPRESSED);
91
if (ret < 0) {
92
- trace_block_copy_write_zeroes_fail(s, start, ret);
93
+ trace_block_copy_write_zeroes_fail(s, offset, ret);
94
if (error_is_read) {
95
*error_is_read = false;
96
}
97
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
98
}
317
}
99
318
100
if (s->use_copy_range) {
319
- vu_block_req_complete(req);
101
- ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
320
+ vu_blk_req_complete(req);
102
+ ret = bdrv_co_copy_range(s->source, offset, s->target, offset, nbytes,
321
return;
103
0, s->write_flags);
322
104
if (ret < 0) {
323
err:
105
- trace_block_copy_copy_range_fail(s, start, ret);
324
- free(elem);
106
+ trace_block_copy_copy_range_fail(s, offset, ret);
325
+ free(req);
107
s->use_copy_range = false;
326
}
108
s->copy_size = MAX(s->cluster_size, BLOCK_COPY_MAX_BUFFER);
327
109
/* Fallback to read+write with allocated buffer */
328
-static void vu_block_process_vq(VuDev *vu_dev, int idx)
110
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
329
+static void vu_blk_process_vq(VuDev *vu_dev, int idx)
111
330
{
112
bounce_buffer = qemu_blockalign(s->source->bs, nbytes);
331
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
113
332
VuVirtq *vq = vu_get_queue(vu_dev, idx);
114
- ret = bdrv_co_pread(s->source, start, nbytes, bounce_buffer, 0);
333
115
+ ret = bdrv_co_pread(s->source, offset, nbytes, bounce_buffer, 0);
334
while (1) {
116
if (ret < 0) {
335
- VuBlockReq *req;
117
- trace_block_copy_read_fail(s, start, ret);
336
+ VuBlkReq *req;
118
+ trace_block_copy_read_fail(s, offset, ret);
337
119
if (error_is_read) {
338
- req = vu_queue_pop(vu_dev, vq, sizeof(VuBlockReq));
120
*error_is_read = true;
339
+ req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq));
121
}
340
if (!req) {
122
goto out;
123
}
124
125
- ret = bdrv_co_pwrite(s->target, start, nbytes, bounce_buffer,
126
+ ret = bdrv_co_pwrite(s->target, offset, nbytes, bounce_buffer,
127
s->write_flags);
128
if (ret < 0) {
129
- trace_block_copy_write_fail(s, start, ret);
130
+ trace_block_copy_write_fail(s, offset, ret);
131
if (error_is_read) {
132
*error_is_read = false;
133
}
134
@@ -XXX,XX +XXX,XX @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
135
}
136
137
int coroutine_fn block_copy(BlockCopyState *s,
138
- int64_t start, int64_t bytes,
139
+ int64_t offset, int64_t bytes,
140
bool *error_is_read)
141
{
142
int ret = 0;
143
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
144
assert(bdrv_get_aio_context(s->source->bs) ==
145
bdrv_get_aio_context(s->target->bs));
146
147
- assert(QEMU_IS_ALIGNED(start, s->cluster_size));
148
+ assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
149
assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
150
151
- block_copy_wait_inflight_reqs(s, start, bytes);
152
- block_copy_inflight_req_begin(s, &req, start, bytes);
153
+ block_copy_wait_inflight_reqs(s, offset, bytes);
154
+ block_copy_inflight_req_begin(s, &req, offset, bytes);
155
156
while (bytes) {
157
int64_t next_zero, cur_bytes, status_bytes;
158
159
- if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
160
- trace_block_copy_skip(s, start);
161
- start += s->cluster_size;
162
+ if (!bdrv_dirty_bitmap_get(s->copy_bitmap, offset)) {
163
+ trace_block_copy_skip(s, offset);
164
+ offset += s->cluster_size;
165
bytes -= s->cluster_size;
166
continue; /* already copied */
167
}
168
169
cur_bytes = MIN(bytes, s->copy_size);
170
171
- next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
172
+ next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, offset,
173
cur_bytes);
174
if (next_zero >= 0) {
175
- assert(next_zero > start); /* start is dirty */
176
- assert(next_zero < start + cur_bytes); /* no need to do MIN() */
177
- cur_bytes = next_zero - start;
178
+ assert(next_zero > offset); /* offset is dirty */
179
+ assert(next_zero < offset + cur_bytes); /* no need to do MIN() */
180
+ cur_bytes = next_zero - offset;
181
}
182
183
- ret = block_copy_block_status(s, start, cur_bytes, &status_bytes);
184
+ ret = block_copy_block_status(s, offset, cur_bytes, &status_bytes);
185
if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
186
- bdrv_reset_dirty_bitmap(s->copy_bitmap, start, status_bytes);
187
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, status_bytes);
188
progress_set_remaining(s->progress,
189
bdrv_get_dirty_count(s->copy_bitmap) +
190
s->in_flight_bytes);
191
- trace_block_copy_skip_range(s, start, status_bytes);
192
- start += status_bytes;
193
+ trace_block_copy_skip_range(s, offset, status_bytes);
194
+ offset += status_bytes;
195
bytes -= status_bytes;
196
continue;
197
}
198
199
cur_bytes = MIN(cur_bytes, status_bytes);
200
201
- trace_block_copy_process(s, start);
202
+ trace_block_copy_process(s, offset);
203
204
- bdrv_reset_dirty_bitmap(s->copy_bitmap, start, cur_bytes);
205
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
206
s->in_flight_bytes += cur_bytes;
207
208
co_get_from_shres(s->mem, cur_bytes);
209
- ret = block_copy_do_copy(s, start, cur_bytes, ret & BDRV_BLOCK_ZERO,
210
+ ret = block_copy_do_copy(s, offset, cur_bytes, ret & BDRV_BLOCK_ZERO,
211
error_is_read);
212
co_put_to_shres(s->mem, cur_bytes);
213
s->in_flight_bytes -= cur_bytes;
214
if (ret < 0) {
215
- bdrv_set_dirty_bitmap(s->copy_bitmap, start, cur_bytes);
216
+ bdrv_set_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
217
break;
341
break;
218
}
342
}
219
343
@@ -XXX,XX +XXX,XX @@ static void vu_block_process_vq(VuDev *vu_dev, int idx)
220
progress_work_done(s->progress, cur_bytes);
344
req->vq = vq;
221
s->progress_bytes_callback(cur_bytes, s->progress_opaque);
345
222
- start += cur_bytes;
346
Coroutine *co =
223
+ offset += cur_bytes;
347
- qemu_coroutine_create(vu_block_virtio_process_req, req);
224
bytes -= cur_bytes;
348
+ qemu_coroutine_create(vu_blk_virtio_process_req, req);
349
qemu_coroutine_enter(co);
225
}
350
}
226
351
}
227
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
352
353
-static void vu_block_queue_set_started(VuDev *vu_dev, int idx, bool started)
354
+static void vu_blk_queue_set_started(VuDev *vu_dev, int idx, bool started)
355
{
356
VuVirtq *vq;
357
358
assert(vu_dev);
359
360
vq = vu_get_queue(vu_dev, idx);
361
- vu_set_queue_handler(vu_dev, vq, started ? vu_block_process_vq : NULL);
362
+ vu_set_queue_handler(vu_dev, vq, started ? vu_blk_process_vq : NULL);
363
}
364
365
-static uint64_t vu_block_get_features(VuDev *dev)
366
+static uint64_t vu_blk_get_features(VuDev *dev)
367
{
368
uint64_t features;
369
VuServer *server = container_of(dev, VuServer, vu_dev);
370
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
371
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
372
features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
373
1ull << VIRTIO_BLK_F_SEG_MAX |
374
1ull << VIRTIO_BLK_F_TOPOLOGY |
375
@@ -XXX,XX +XXX,XX @@ static uint64_t vu_block_get_features(VuDev *dev)
376
1ull << VIRTIO_RING_F_EVENT_IDX |
377
1ull << VHOST_USER_F_PROTOCOL_FEATURES;
378
379
- if (!vdev_blk->writable) {
380
+ if (!vexp->writable) {
381
features |= 1ull << VIRTIO_BLK_F_RO;
382
}
383
384
return features;
385
}
386
387
-static uint64_t vu_block_get_protocol_features(VuDev *dev)
388
+static uint64_t vu_blk_get_protocol_features(VuDev *dev)
389
{
390
return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
391
1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
392
}
393
394
static int
395
-vu_block_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
396
+vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
397
{
398
+ /* TODO blkcfg must be little-endian for VIRTIO 1.0 */
399
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
400
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
401
- memcpy(config, &vdev_blk->blkcfg, len);
402
-
403
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
404
+ memcpy(config, &vexp->blkcfg, len);
405
return 0;
406
}
407
408
static int
409
-vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
410
+vu_blk_set_config(VuDev *vu_dev, const uint8_t *data,
411
uint32_t offset, uint32_t size, uint32_t flags)
412
{
413
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
414
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
415
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
416
uint8_t wce;
417
418
/* don't support live migration */
419
@@ -XXX,XX +XXX,XX @@ vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
420
}
421
422
wce = *data;
423
- vdev_blk->blkcfg.wce = wce;
424
- blk_set_enable_write_cache(vdev_blk->backend, wce);
425
+ vexp->blkcfg.wce = wce;
426
+ blk_set_enable_write_cache(vexp->export.blk, wce);
427
return 0;
428
}
429
430
@@ -XXX,XX +XXX,XX @@ vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
431
* of vu_process_message.
432
*
433
*/
434
-static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
435
+static int vu_blk_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
436
{
437
if (vmsg->request == VHOST_USER_NONE) {
438
dev->panic(dev, "disconnect");
439
@@ -XXX,XX +XXX,XX @@ static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
440
return false;
441
}
442
443
-static const VuDevIface vu_block_iface = {
444
- .get_features = vu_block_get_features,
445
- .queue_set_started = vu_block_queue_set_started,
446
- .get_protocol_features = vu_block_get_protocol_features,
447
- .get_config = vu_block_get_config,
448
- .set_config = vu_block_set_config,
449
- .process_msg = vu_block_process_msg,
450
+static const VuDevIface vu_blk_iface = {
451
+ .get_features = vu_blk_get_features,
452
+ .queue_set_started = vu_blk_queue_set_started,
453
+ .get_protocol_features = vu_blk_get_protocol_features,
454
+ .get_config = vu_blk_get_config,
455
+ .set_config = vu_blk_set_config,
456
+ .process_msg = vu_blk_process_msg,
457
};
458
459
static void blk_aio_attached(AioContext *ctx, void *opaque)
460
{
461
- VuBlockDev *vub_dev = opaque;
462
- vhost_user_server_attach_aio_context(&vub_dev->vu_server, ctx);
463
+ VuBlkExport *vexp = opaque;
464
+ vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
465
}
466
467
static void blk_aio_detach(void *opaque)
468
{
469
- VuBlockDev *vub_dev = opaque;
470
- vhost_user_server_detach_aio_context(&vub_dev->vu_server);
471
+ VuBlkExport *vexp = opaque;
472
+ vhost_user_server_detach_aio_context(&vexp->vu_server);
473
}
474
475
static void
476
-vu_block_initialize_config(BlockDriverState *bs,
477
+vu_blk_initialize_config(BlockDriverState *bs,
478
struct virtio_blk_config *config, uint32_t blk_size)
479
{
480
config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
481
@@ -XXX,XX +XXX,XX @@ vu_block_initialize_config(BlockDriverState *bs,
482
config->max_write_zeroes_seg = 1;
483
}
484
485
-static VuBlockDev *vu_block_init(VuBlockDev *vu_block_device, Error **errp)
486
+static void vu_blk_exp_request_shutdown(BlockExport *exp)
487
{
488
+ VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
489
490
- BlockBackend *blk;
491
- Error *local_error = NULL;
492
- const char *node_name = vu_block_device->node_name;
493
- bool writable = vu_block_device->writable;
494
- uint64_t perm = BLK_PERM_CONSISTENT_READ;
495
- int ret;
496
-
497
- AioContext *ctx;
498
-
499
- BlockDriverState *bs = bdrv_lookup_bs(node_name, node_name, &local_error);
500
-
501
- if (!bs) {
502
- error_propagate(errp, local_error);
503
- return NULL;
504
- }
505
-
506
- if (bdrv_is_read_only(bs)) {
507
- writable = false;
508
- }
509
-
510
- if (writable) {
511
- perm |= BLK_PERM_WRITE;
512
- }
513
-
514
- ctx = bdrv_get_aio_context(bs);
515
- aio_context_acquire(ctx);
516
- bdrv_invalidate_cache(bs, NULL);
517
- aio_context_release(ctx);
518
-
519
- /*
520
- * Don't allow resize while the vhost user server is running,
521
- * otherwise we don't care what happens with the node.
522
- */
523
- blk = blk_new(bdrv_get_aio_context(bs), perm,
524
- BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
525
- BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
526
- ret = blk_insert_bs(blk, bs, errp);
527
-
528
- if (ret < 0) {
529
- goto fail;
530
- }
531
-
532
- blk_set_enable_write_cache(blk, false);
533
-
534
- blk_set_allow_aio_context_change(blk, true);
535
-
536
- vu_block_device->blkcfg.wce = 0;
537
- vu_block_device->backend = blk;
538
- if (!vu_block_device->blk_size) {
539
- vu_block_device->blk_size = BDRV_SECTOR_SIZE;
540
- }
541
- vu_block_device->blkcfg.blk_size = vu_block_device->blk_size;
542
- blk_set_guest_block_size(blk, vu_block_device->blk_size);
543
- vu_block_initialize_config(bs, &vu_block_device->blkcfg,
544
- vu_block_device->blk_size);
545
- return vu_block_device;
546
-
547
-fail:
548
- blk_unref(blk);
549
- return NULL;
550
-}
551
-
552
-static void vu_block_deinit(VuBlockDev *vu_block_device)
553
-{
554
- if (vu_block_device->backend) {
555
- blk_remove_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
556
- blk_aio_detach, vu_block_device);
557
- }
558
-
559
- blk_unref(vu_block_device->backend);
560
-}
561
-
562
-static void vhost_user_blk_server_stop(VuBlockDev *vu_block_device)
563
-{
564
- vhost_user_server_stop(&vu_block_device->vu_server);
565
- vu_block_deinit(vu_block_device);
566
-}
567
-
568
-static void vhost_user_blk_server_start(VuBlockDev *vu_block_device,
569
- Error **errp)
570
-{
571
- AioContext *ctx;
572
- SocketAddress *addr = vu_block_device->addr;
573
-
574
- if (!vu_block_init(vu_block_device, errp)) {
575
- return;
576
- }
577
-
578
- ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
579
-
580
- if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx,
581
- VHOST_USER_BLK_MAX_QUEUES, &vu_block_iface,
582
- errp)) {
583
- goto error;
584
- }
585
-
586
- blk_add_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
587
- blk_aio_detach, vu_block_device);
588
- vu_block_device->running = true;
589
- return;
590
-
591
- error:
592
- vu_block_deinit(vu_block_device);
593
-}
594
-
595
-static bool vu_prop_modifiable(VuBlockDev *vus, Error **errp)
596
-{
597
- if (vus->running) {
598
- error_setg(errp, "The property can't be modified "
599
- "while the server is running");
600
- return false;
601
- }
602
- return true;
603
-}
604
-
605
-static void vu_set_node_name(Object *obj, const char *value, Error **errp)
606
-{
607
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
608
-
609
- if (!vu_prop_modifiable(vus, errp)) {
610
- return;
611
- }
612
-
613
- if (vus->node_name) {
614
- g_free(vus->node_name);
615
- }
616
-
617
- vus->node_name = g_strdup(value);
618
-}
619
-
620
-static char *vu_get_node_name(Object *obj, Error **errp)
621
-{
622
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
623
- return g_strdup(vus->node_name);
624
-}
625
-
626
-static void free_socket_addr(SocketAddress *addr)
627
-{
628
- g_free(addr->u.q_unix.path);
629
- g_free(addr);
630
-}
631
-
632
-static void vu_set_unix_socket(Object *obj, const char *value,
633
- Error **errp)
634
-{
635
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
636
-
637
- if (!vu_prop_modifiable(vus, errp)) {
638
- return;
639
- }
640
-
641
- if (vus->addr) {
642
- free_socket_addr(vus->addr);
643
- }
644
-
645
- SocketAddress *addr = g_new0(SocketAddress, 1);
646
- addr->type = SOCKET_ADDRESS_TYPE_UNIX;
647
- addr->u.q_unix.path = g_strdup(value);
648
- vus->addr = addr;
649
+ vhost_user_server_stop(&vexp->vu_server);
650
}
651
652
-static char *vu_get_unix_socket(Object *obj, Error **errp)
653
+static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
654
+ Error **errp)
655
{
656
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
657
- return g_strdup(vus->addr->u.q_unix.path);
658
-}
659
-
660
-static bool vu_get_block_writable(Object *obj, Error **errp)
661
-{
662
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
663
- return vus->writable;
664
-}
665
-
666
-static void vu_set_block_writable(Object *obj, bool value, Error **errp)
667
-{
668
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
669
-
670
- if (!vu_prop_modifiable(vus, errp)) {
671
- return;
672
- }
673
-
674
- vus->writable = value;
675
-}
676
-
677
-static void vu_get_blk_size(Object *obj, Visitor *v, const char *name,
678
- void *opaque, Error **errp)
679
-{
680
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
681
- uint32_t value = vus->blk_size;
682
-
683
- visit_type_uint32(v, name, &value, errp);
684
-}
685
-
686
-static void vu_set_blk_size(Object *obj, Visitor *v, const char *name,
687
- void *opaque, Error **errp)
688
-{
689
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
690
-
691
+ VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
692
+ BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk;
693
Error *local_err = NULL;
694
- uint32_t value;
695
+ uint64_t logical_block_size;
696
697
- if (!vu_prop_modifiable(vus, errp)) {
698
- return;
699
- }
700
+ vexp->writable = opts->writable;
701
+ vexp->blkcfg.wce = 0;
702
703
- visit_type_uint32(v, name, &value, &local_err);
704
- if (local_err) {
705
- goto out;
706
+ if (vu_opts->has_logical_block_size) {
707
+ logical_block_size = vu_opts->logical_block_size;
708
+ } else {
709
+ logical_block_size = BDRV_SECTOR_SIZE;
710
}
711
-
712
- check_block_size(object_get_typename(obj), name, value, &local_err);
713
+ check_block_size(exp->id, "logical-block-size", logical_block_size,
714
+ &local_err);
715
if (local_err) {
716
- goto out;
717
+ error_propagate(errp, local_err);
718
+ return -EINVAL;
719
+ }
720
+ vexp->blk_size = logical_block_size;
721
+ blk_set_guest_block_size(exp->blk, logical_block_size);
722
+ vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
723
+ logical_block_size);
724
+
725
+ blk_set_allow_aio_context_change(exp->blk, true);
726
+ blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
727
+ vexp);
728
+
729
+ if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
730
+ VHOST_USER_BLK_MAX_QUEUES, &vu_blk_iface,
731
+ errp)) {
732
+ blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
733
+ blk_aio_detach, vexp);
734
+ return -EADDRNOTAVAIL;
735
}
736
737
- vus->blk_size = value;
738
-
739
-out:
740
- error_propagate(errp, local_err);
741
-}
742
-
743
-static void vhost_user_blk_server_instance_finalize(Object *obj)
744
-{
745
- VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
746
-
747
- vhost_user_blk_server_stop(vub);
748
-
749
- /*
750
- * Unlike object_property_add_str, object_class_property_add_str
751
- * doesn't have a release method. Thus manual memory freeing is
752
- * needed.
753
- */
754
- free_socket_addr(vub->addr);
755
- g_free(vub->node_name);
756
-}
757
-
758
-static void vhost_user_blk_server_complete(UserCreatable *obj, Error **errp)
759
-{
760
- VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
761
-
762
- vhost_user_blk_server_start(vub, errp);
763
+ return 0;
764
}
765
766
-static void vhost_user_blk_server_class_init(ObjectClass *klass,
767
- void *class_data)
768
+static void vu_blk_exp_delete(BlockExport *exp)
769
{
770
- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
771
- ucc->complete = vhost_user_blk_server_complete;
772
-
773
- object_class_property_add_bool(klass, "writable",
774
- vu_get_block_writable,
775
- vu_set_block_writable);
776
-
777
- object_class_property_add_str(klass, "node-name",
778
- vu_get_node_name,
779
- vu_set_node_name);
780
-
781
- object_class_property_add_str(klass, "unix-socket",
782
- vu_get_unix_socket,
783
- vu_set_unix_socket);
784
+ VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
785
786
- object_class_property_add(klass, "logical-block-size", "uint32",
787
- vu_get_blk_size, vu_set_blk_size,
788
- NULL, NULL);
789
+ blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
790
+ vexp);
791
}
792
793
-static const TypeInfo vhost_user_blk_server_info = {
794
- .name = TYPE_VHOST_USER_BLK_SERVER,
795
- .parent = TYPE_OBJECT,
796
- .instance_size = sizeof(VuBlockDev),
797
- .instance_finalize = vhost_user_blk_server_instance_finalize,
798
- .class_init = vhost_user_blk_server_class_init,
799
- .interfaces = (InterfaceInfo[]) {
800
- {TYPE_USER_CREATABLE},
801
- {}
802
- },
803
+const BlockExportDriver blk_exp_vhost_user_blk = {
804
+ .type = BLOCK_EXPORT_TYPE_VHOST_USER_BLK,
805
+ .instance_size = sizeof(VuBlkExport),
806
+ .create = vu_blk_exp_create,
807
+ .delete = vu_blk_exp_delete,
808
+ .request_shutdown = vu_blk_exp_request_shutdown,
809
};
810
-
811
-static void vhost_user_blk_server_register_types(void)
812
-{
813
- type_register_static(&vhost_user_blk_server_info);
814
-}
815
-
816
-type_init(vhost_user_blk_server_register_types)
817
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
228
index XXXXXXX..XXXXXXX 100644
818
index XXXXXXX..XXXXXXX 100644
229
--- a/include/block/block-copy.h
819
--- a/util/vhost-user-server.c
230
+++ b/include/block/block-copy.h
820
+++ b/util/vhost-user-server.c
231
@@ -XXX,XX +XXX,XX @@
821
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
232
#include "qemu/co-shared-resource.h"
822
Error **errp)
233
823
{
234
typedef struct BlockCopyInFlightReq {
824
QEMUBH *bh;
235
- int64_t start;
825
- QIONetListener *listener = qio_net_listener_new();
236
+ int64_t offset;
826
+ QIONetListener *listener;
237
int64_t bytes;
827
+
238
QLIST_ENTRY(BlockCopyInFlightReq) list;
828
+ if (socket_addr->type != SOCKET_ADDRESS_TYPE_UNIX &&
239
CoQueue wait_queue; /* coroutines blocked on this request */
829
+ socket_addr->type != SOCKET_ADDRESS_TYPE_FD) {
240
@@ -XXX,XX +XXX,XX @@ void block_copy_state_free(BlockCopyState *s);
830
+ error_setg(errp, "Only socket address types 'unix' and 'fd' are supported");
241
int64_t block_copy_reset_unallocated(BlockCopyState *s,
831
+ return false;
242
int64_t offset, int64_t *count);
832
+ }
243
833
+
244
-int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes,
834
+ listener = qio_net_listener_new();
245
+int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
835
if (qio_net_listener_open_sync(listener, socket_addr, 1,
246
bool *error_is_read);
836
errp) < 0) {
247
837
object_unref(OBJECT(listener));
248
#endif /* BLOCK_COPY_H */
838
diff --git a/block/export/meson.build b/block/export/meson.build
839
index XXXXXXX..XXXXXXX 100644
840
--- a/block/export/meson.build
841
+++ b/block/export/meson.build
842
@@ -1 +1,2 @@
843
block_ss.add(files('export.c'))
844
+block_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-blk-server.c', '../../contrib/libvhost-user/libvhost-user.c'))
845
diff --git a/block/meson.build b/block/meson.build
846
index XXXXXXX..XXXXXXX 100644
847
--- a/block/meson.build
848
+++ b/block/meson.build
849
@@ -XXX,XX +XXX,XX @@ block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c')
850
block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit])
851
block_ss.add(when: 'CONFIG_LIBISCSI', if_true: files('iscsi-opts.c'))
852
block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
853
-block_ss.add(when: 'CONFIG_LINUX', if_true: files('export/vhost-user-blk-server.c', '../contrib/libvhost-user/libvhost-user.c'))
854
block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c'))
855
block_ss.add(when: 'CONFIG_SHEEPDOG', if_true: files('sheepdog.c'))
856
block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c'))
249
--
857
--
250
2.24.1
858
2.26.2
251
859
252
diff view generated by jsdifflib
1
From: Pan Nengyuan <pannengyuan@huawei.com>
1
Headers used by other subsystems are located in include/. Also add the
2
vhost-user-server and vhost-user-blk-server headers to MAINTAINERS.
2
3
3
'crypto_opts' forgot to free in qcow2_close(), this patch fix the bellow leak stack:
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Message-id: 20200924151549.913737-13-stefanha@redhat.com
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
8
MAINTAINERS | 4 +++-
9
{util => include/qemu}/vhost-user-server.h | 0
10
block/export/vhost-user-blk-server.c | 2 +-
11
util/vhost-user-server.c | 2 +-
12
4 files changed, 5 insertions(+), 3 deletions(-)
13
rename {util => include/qemu}/vhost-user-server.h (100%)
4
14
5
Direct leak of 24 byte(s) in 1 object(s) allocated from:
15
diff --git a/MAINTAINERS b/MAINTAINERS
6
#0 0x7f0edd81f970 in __interceptor_calloc (/lib64/libasan.so.5+0xef970)
16
index XXXXXXX..XXXXXXX 100644
7
#1 0x7f0edc6d149d in g_malloc0 (/lib64/libglib-2.0.so.0+0x5249d)
17
--- a/MAINTAINERS
8
#2 0x55d7eaede63d in qobject_input_start_struct /mnt/sdb/qemu-new/qemu_test/qemu/qapi/qobject-input-visitor.c:295
18
+++ b/MAINTAINERS
9
#3 0x55d7eaed78b8 in visit_start_struct /mnt/sdb/qemu-new/qemu_test/qemu/qapi/qapi-visit-core.c:49
19
@@ -XXX,XX +XXX,XX @@ Vhost-user block device backend server
10
#4 0x55d7eaf5140b in visit_type_QCryptoBlockOpenOptions qapi/qapi-visit-crypto.c:290
20
M: Coiby Xu <Coiby.Xu@gmail.com>
11
#5 0x55d7eae43af3 in block_crypto_open_opts_init /mnt/sdb/qemu-new/qemu_test/qemu/block/crypto.c:163
21
S: Maintained
12
#6 0x55d7eacd2924 in qcow2_update_options_prepare /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1148
22
F: block/export/vhost-user-blk-server.c
13
#7 0x55d7eacd33f7 in qcow2_update_options /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1232
23
-F: util/vhost-user-server.c
14
#8 0x55d7eacd9680 in qcow2_do_open /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1512
24
+F: block/export/vhost-user-blk-server.h
15
#9 0x55d7eacdc55e in qcow2_open_entry /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1792
25
+F: include/qemu/vhost-user-server.h
16
#10 0x55d7eacdc8fe in qcow2_open /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:1819
26
F: tests/qtest/libqos/vhost-user-blk.c
17
#11 0x55d7eac3742d in bdrv_open_driver /mnt/sdb/qemu-new/qemu_test/qemu/block.c:1317
27
+F: util/vhost-user-server.c
18
#12 0x55d7eac3e990 in bdrv_open_common /mnt/sdb/qemu-new/qemu_test/qemu/block.c:1575
28
19
#13 0x55d7eac4442c in bdrv_open_inherit /mnt/sdb/qemu-new/qemu_test/qemu/block.c:3126
29
Replication
20
#14 0x55d7eac45c3f in bdrv_open /mnt/sdb/qemu-new/qemu_test/qemu/block.c:3219
30
M: Wen Congyang <wencongyang2@huawei.com>
21
#15 0x55d7ead8e8a4 in blk_new_open /mnt/sdb/qemu-new/qemu_test/qemu/block/block-backend.c:397
31
diff --git a/util/vhost-user-server.h b/include/qemu/vhost-user-server.h
22
#16 0x55d7eacde74c in qcow2_co_create /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:3534
32
similarity index 100%
23
#17 0x55d7eacdfa6d in qcow2_co_create_opts /mnt/sdb/qemu-new/qemu_test/qemu/block/qcow2.c:3668
33
rename from util/vhost-user-server.h
24
#18 0x55d7eac1c678 in bdrv_create_co_entry /mnt/sdb/qemu-new/qemu_test/qemu/block.c:485
34
rename to include/qemu/vhost-user-server.h
25
#19 0x55d7eb0024d2 in coroutine_trampoline /mnt/sdb/qemu-new/qemu_test/qemu/util/coroutine-ucontext.c:115
35
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/block/export/vhost-user-blk-server.c
38
+++ b/block/export/vhost-user-blk-server.c
39
@@ -XXX,XX +XXX,XX @@
40
#include "block/block.h"
41
#include "contrib/libvhost-user/libvhost-user.h"
42
#include "standard-headers/linux/virtio_blk.h"
43
-#include "util/vhost-user-server.h"
44
+#include "qemu/vhost-user-server.h"
45
#include "vhost-user-blk-server.h"
46
#include "qapi/error.h"
47
#include "qom/object_interfaces.h"
48
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/util/vhost-user-server.c
51
+++ b/util/vhost-user-server.c
52
@@ -XXX,XX +XXX,XX @@
53
*/
54
#include "qemu/osdep.h"
55
#include "qemu/main-loop.h"
56
+#include "qemu/vhost-user-server.h"
57
#include "block/aio-wait.h"
58
-#include "vhost-user-server.h"
59
60
/*
61
* Theory of operation:
62
--
63
2.26.2
26
64
27
Reported-by: Euler Robot <euler.robot@huawei.com>
28
Signed-off-by: Pan Nengyuan <pannengyuan@huawei.com>
29
Reviewed-by: Max Reitz <mreitz@redhat.com>
30
Message-Id: <20200227012950.12256-2-pannengyuan@huawei.com>
31
Signed-off-by: Max Reitz <mreitz@redhat.com>
32
---
33
block/qcow2.c | 1 +
34
1 file changed, 1 insertion(+)
35
36
diff --git a/block/qcow2.c b/block/qcow2.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/qcow2.c
39
+++ b/block/qcow2.c
40
@@ -XXX,XX +XXX,XX @@ static void qcow2_close(BlockDriverState *bs)
41
42
qcrypto_block_free(s->crypto);
43
s->crypto = NULL;
44
+ qapi_free_QCryptoBlockOpenOptions(s->crypto_opts);
45
46
g_free(s->unknown_header_fields);
47
cleanup_unknown_header_ext(bs);
48
--
49
2.24.1
50
51
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Don't compile contrib/libvhost-user/libvhost-user.c again. Instead build
2
the static library once and then reuse it throughout QEMU.
2
3
3
In most qemu-img sub-commands the --object option only makes sense when
4
Also switch from CONFIG_LINUX to CONFIG_VHOST_USER, which is what the
4
there is a filename. qemu-img measure is an exception because objects
5
vhost-user tools (vhost-user-gpu, etc) do.
5
may be referenced from the image creation options instead of an existing
6
image file. Allow --object without a filename.
7
6
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Message-id: 20200924151549.913737-14-stefanha@redhat.com
10
Message-Id: <20200221112522.1497712-4-stefanha@redhat.com>
9
[Added CONFIG_LINUX again because libvhost-user doesn't build on macOS.
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
--Stefan]
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
12
---
13
qemu-img.c | 6 ++----
13
block/export/export.c | 8 ++++----
14
tests/qemu-iotests/178 | 2 +-
14
block/export/meson.build | 2 +-
15
tests/qemu-iotests/178.out.qcow2 | 8 ++++----
15
contrib/libvhost-user/meson.build | 1 +
16
tests/qemu-iotests/178.out.raw | 8 ++++----
16
meson.build | 6 +++++-
17
4 files changed, 11 insertions(+), 13 deletions(-)
17
util/meson.build | 4 +++-
18
5 files changed, 14 insertions(+), 7 deletions(-)
18
19
19
diff --git a/qemu-img.c b/qemu-img.c
20
diff --git a/block/export/export.c b/block/export/export.c
20
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
21
--- a/qemu-img.c
22
--- a/block/export/export.c
22
+++ b/qemu-img.c
23
+++ b/block/export/export.c
23
@@ -XXX,XX +XXX,XX @@ static int img_measure(int argc, char **argv)
24
@@ -XXX,XX +XXX,XX @@
24
filename = argv[optind];
25
#include "sysemu/block-backend.h"
25
}
26
#include "block/export.h"
26
27
#include "block/nbd.h"
27
- if (!filename &&
28
-#if CONFIG_LINUX
28
- (object_opts || image_opts || fmt || snapshot_name || sn_opts)) {
29
-#include "block/export/vhost-user-blk-server.h"
29
- error_report("--object, --image-opts, -f, and -l "
30
-#endif
30
- "require a filename argument.");
31
#include "qapi/error.h"
31
+ if (!filename && (image_opts || fmt || snapshot_name || sn_opts)) {
32
#include "qapi/qapi-commands-block-export.h"
32
+ error_report("--image-opts, -f, and -l require a filename argument.");
33
#include "qapi/qapi-events-block-export.h"
33
goto out;
34
#include "qemu/id.h"
34
}
35
+#ifdef CONFIG_VHOST_USER
35
if (filename && img_size != UINT64_MAX) {
36
+#include "vhost-user-blk-server.h"
36
diff --git a/tests/qemu-iotests/178 b/tests/qemu-iotests/178
37
+#endif
37
index XXXXXXX..XXXXXXX 100755
38
38
--- a/tests/qemu-iotests/178
39
static const BlockExportDriver *blk_exp_drivers[] = {
39
+++ b/tests/qemu-iotests/178
40
&blk_exp_nbd,
40
@@ -XXX,XX +XXX,XX @@ _make_test_img 1G
41
-#if CONFIG_LINUX
41
$QEMU_IMG measure # missing arguments
42
+#ifdef CONFIG_VHOST_USER
42
$QEMU_IMG measure --size 2G "$TEST_IMG" # only one allowed
43
&blk_exp_vhost_user_blk,
43
$QEMU_IMG measure "$TEST_IMG" a # only one filename allowed
44
#endif
44
-$QEMU_IMG measure --object secret,id=sec0,data=MTIzNDU2,format=base64 # missing filename
45
};
45
+$QEMU_IMG measure --object secret,id=sec0,data=MTIzNDU2,format=base64 # size or filename needed
46
diff --git a/block/export/meson.build b/block/export/meson.build
46
$QEMU_IMG measure --image-opts # missing filename
47
$QEMU_IMG measure -f qcow2 # missing filename
48
$QEMU_IMG measure -l snap1 # missing filename
49
diff --git a/tests/qemu-iotests/178.out.qcow2 b/tests/qemu-iotests/178.out.qcow2
50
index XXXXXXX..XXXXXXX 100644
47
index XXXXXXX..XXXXXXX 100644
51
--- a/tests/qemu-iotests/178.out.qcow2
48
--- a/block/export/meson.build
52
+++ b/tests/qemu-iotests/178.out.qcow2
49
+++ b/block/export/meson.build
53
@@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
50
@@ -XXX,XX +XXX,XX @@
54
qemu-img: Either --size N or one filename must be specified.
51
block_ss.add(files('export.c'))
55
qemu-img: --size N cannot be used together with a filename.
52
-block_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-blk-server.c', '../../contrib/libvhost-user/libvhost-user.c'))
56
qemu-img: At most one filename argument is allowed.
53
+block_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: files('vhost-user-blk-server.c'))
57
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
54
diff --git a/contrib/libvhost-user/meson.build b/contrib/libvhost-user/meson.build
58
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
59
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
60
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
61
+qemu-img: Either --size N or one filename must be specified.
62
+qemu-img: --image-opts, -f, and -l require a filename argument.
63
+qemu-img: --image-opts, -f, and -l require a filename argument.
64
+qemu-img: --image-opts, -f, and -l require a filename argument.
65
qemu-img: Invalid option list: ,
66
qemu-img: Invalid parameter 'snapshot.foo'
67
qemu-img: Failed in parsing snapshot param 'snapshot.foo'
68
diff --git a/tests/qemu-iotests/178.out.raw b/tests/qemu-iotests/178.out.raw
69
index XXXXXXX..XXXXXXX 100644
55
index XXXXXXX..XXXXXXX 100644
70
--- a/tests/qemu-iotests/178.out.raw
56
--- a/contrib/libvhost-user/meson.build
71
+++ b/tests/qemu-iotests/178.out.raw
57
+++ b/contrib/libvhost-user/meson.build
72
@@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
58
@@ -XXX,XX +XXX,XX @@
73
qemu-img: Either --size N or one filename must be specified.
59
libvhost_user = static_library('vhost-user',
74
qemu-img: --size N cannot be used together with a filename.
60
files('libvhost-user.c', 'libvhost-user-glib.c'),
75
qemu-img: At most one filename argument is allowed.
61
build_by_default: false)
76
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
62
+vhost_user = declare_dependency(link_with: libvhost_user)
77
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
63
diff --git a/meson.build b/meson.build
78
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
64
index XXXXXXX..XXXXXXX 100644
79
-qemu-img: --object, --image-opts, -f, and -l require a filename argument.
65
--- a/meson.build
80
+qemu-img: Either --size N or one filename must be specified.
66
+++ b/meson.build
81
+qemu-img: --image-opts, -f, and -l require a filename argument.
67
@@ -XXX,XX +XXX,XX @@ trace_events_subdirs += [
82
+qemu-img: --image-opts, -f, and -l require a filename argument.
68
'util',
83
+qemu-img: --image-opts, -f, and -l require a filename argument.
69
]
84
qemu-img: Invalid option list: ,
70
85
qemu-img: Invalid parameter 'snapshot.foo'
71
+vhost_user = not_found
86
qemu-img: Failed in parsing snapshot param 'snapshot.foo'
72
+if 'CONFIG_VHOST_USER' in config_host
73
+ subdir('contrib/libvhost-user')
74
+endif
75
+
76
subdir('qapi')
77
subdir('qobject')
78
subdir('stubs')
79
@@ -XXX,XX +XXX,XX @@ if have_tools
80
install: true)
81
82
if 'CONFIG_VHOST_USER' in config_host
83
- subdir('contrib/libvhost-user')
84
subdir('contrib/vhost-user-blk')
85
subdir('contrib/vhost-user-gpu')
86
subdir('contrib/vhost-user-input')
87
diff --git a/util/meson.build b/util/meson.build
88
index XXXXXXX..XXXXXXX 100644
89
--- a/util/meson.build
90
+++ b/util/meson.build
91
@@ -XXX,XX +XXX,XX @@ if have_block
92
util_ss.add(files('main-loop.c'))
93
util_ss.add(files('nvdimm-utils.c'))
94
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
95
- util_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-server.c'))
96
+ util_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: [
97
+ files('vhost-user-server.c'), vhost_user
98
+ ])
99
util_ss.add(files('block-helpers.c'))
100
util_ss.add(files('qemu-coroutine-sleep.c'))
101
util_ss.add(files('qemu-co-shared-resource.c'))
87
--
102
--
88
2.24.1
103
2.26.2
89
104
90
diff view generated by jsdifflib
1
From: David Edmondson <david.edmondson@oracle.com>
1
Introduce libblkdev.fa to avoid recompiling blockdev_ss twice.
2
2
3
RFC 7230 section 3.2 indicates that HTTP header field names are case
3
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
4
insensitive.
4
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200929125516.186715-3-stefanha@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
meson.build | 12 ++++++++++--
10
storage-daemon/meson.build | 3 +--
11
2 files changed, 11 insertions(+), 4 deletions(-)
5
12
6
Signed-off-by: David Edmondson <david.edmondson@oracle.com>
13
diff --git a/meson.build b/meson.build
7
Message-Id: <20200224101310.101169-3-david.edmondson@oracle.com>
14
index XXXXXXX..XXXXXXX 100644
8
Reviewed-by: Max Reitz <mreitz@redhat.com>
15
--- a/meson.build
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
16
+++ b/meson.build
10
---
17
@@ -XXX,XX +XXX,XX @@ blockdev_ss.add(files(
11
block/curl.c | 5 +++--
18
# os-win32.c does not
12
1 file changed, 3 insertions(+), 2 deletions(-)
19
blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
20
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
21
-softmmu_ss.add_all(blockdev_ss)
22
23
common_ss.add(files('cpus-common.c'))
24
25
@@ -XXX,XX +XXX,XX @@ block = declare_dependency(link_whole: [libblock],
26
link_args: '@block.syms',
27
dependencies: [crypto, io])
28
29
+blockdev_ss = blockdev_ss.apply(config_host, strict: false)
30
+libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
31
+ dependencies: blockdev_ss.dependencies(),
32
+ name_suffix: 'fa',
33
+ build_by_default: false)
34
+
35
+blockdev = declare_dependency(link_whole: [libblockdev],
36
+ dependencies: [block])
37
+
38
qmp_ss = qmp_ss.apply(config_host, strict: false)
39
libqmp = static_library('qmp', qmp_ss.sources() + genh,
40
dependencies: qmp_ss.dependencies(),
41
@@ -XXX,XX +XXX,XX @@ foreach m : block_mods + softmmu_mods
42
install_dir: config_host['qemu_moddir'])
43
endforeach
44
45
-softmmu_ss.add(authz, block, chardev, crypto, io, qmp)
46
+softmmu_ss.add(authz, blockdev, chardev, crypto, io, qmp)
47
common_ss.add(qom, qemuutil)
48
49
common_ss.add_all(when: 'CONFIG_SOFTMMU', if_true: [softmmu_ss])
50
diff --git a/storage-daemon/meson.build b/storage-daemon/meson.build
51
index XXXXXXX..XXXXXXX 100644
52
--- a/storage-daemon/meson.build
53
+++ b/storage-daemon/meson.build
54
@@ -XXX,XX +XXX,XX @@
55
qsd_ss = ss.source_set()
56
qsd_ss.add(files('qemu-storage-daemon.c'))
57
-qsd_ss.add(block, chardev, qmp, qom, qemuutil)
58
-qsd_ss.add_all(blockdev_ss)
59
+qsd_ss.add(blockdev, chardev, qmp, qom, qemuutil)
60
61
subdir('qapi')
62
63
--
64
2.26.2
13
65
14
diff --git a/block/curl.c b/block/curl.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/curl.c
17
+++ b/block/curl.c
18
@@ -XXX,XX +XXX,XX @@ static size_t curl_header_cb(void *ptr, size_t size, size_t nmemb, void *opaque)
19
size_t realsize = size * nmemb;
20
const char *header = (char *)ptr;
21
const char *end = header + realsize;
22
- const char *accept_ranges = "Accept-Ranges:";
23
+ const char *accept_ranges = "accept-ranges:";
24
const char *bytes = "bytes";
25
26
if (realsize >= strlen(accept_ranges)
27
- && strncmp(header, accept_ranges, strlen(accept_ranges)) == 0) {
28
+ && g_ascii_strncasecmp(header, accept_ranges,
29
+ strlen(accept_ranges)) == 0) {
30
31
char *p = strchr(header, ':') + 1;
32
33
--
34
2.24.1
35
36
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Block exports are used by softmmu, qemu-storage-daemon, and qemu-nbd.
2
They are not used by other programs and are not otherwise needed in
3
libblock.
2
4
3
This test exercises the block/crypto.c "luks" block driver
5
Undo the recent move of blockdev-nbd.c from blockdev_ss into block_ss.
4
.bdrv_measure() code.
6
Since bdrv_close_all() (libblock) calls blk_exp_close_all()
7
(libblockdev) a stub function is required..
8
9
Make qemu-nbd.c use signal handling utility functions instead of
10
duplicating the code. This helps because os-posix.c is in libblockdev
11
and it depends on a qemu_system_killed() symbol that qemu-nbd.c lacks.
12
Once we use the signal handling utility functions we also end up
13
providing the necessary symbol.
5
14
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
16
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
8
Message-Id: <20200221112522.1497712-5-stefanha@redhat.com>
17
Reviewed-by: Eric Blake <eblake@redhat.com>
9
[mreitz: Renamed test from 282 to 288]
18
Message-id: 20200929125516.186715-4-stefanha@redhat.com
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
19
[Fixed s/ndb/nbd/ typo in commit description as suggested by Eric Blake
20
--Stefan]
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
22
---
12
tests/qemu-iotests/288 | 93 ++++++++++++++++++++++++++++++++++++++
23
qemu-nbd.c | 21 ++++++++-------------
13
tests/qemu-iotests/288.out | 30 ++++++++++++
24
stubs/blk-exp-close-all.c | 7 +++++++
14
tests/qemu-iotests/group | 1 +
25
block/export/meson.build | 4 ++--
15
3 files changed, 124 insertions(+)
26
meson.build | 4 ++--
16
create mode 100755 tests/qemu-iotests/288
27
nbd/meson.build | 2 ++
17
create mode 100644 tests/qemu-iotests/288.out
28
stubs/meson.build | 1 +
29
6 files changed, 22 insertions(+), 17 deletions(-)
30
create mode 100644 stubs/blk-exp-close-all.c
18
31
19
diff --git a/tests/qemu-iotests/288 b/tests/qemu-iotests/288
32
diff --git a/qemu-nbd.c b/qemu-nbd.c
20
new file mode 100755
33
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX
34
--- a/qemu-nbd.c
22
--- /dev/null
35
+++ b/qemu-nbd.c
23
+++ b/tests/qemu-iotests/288
24
@@ -XXX,XX +XXX,XX @@
36
@@ -XXX,XX +XXX,XX @@
25
+#!/usr/bin/env bash
37
#include "qapi/error.h"
26
+#
38
#include "qemu/cutils.h"
27
+# qemu-img measure tests for LUKS images
39
#include "sysemu/block-backend.h"
28
+#
40
+#include "sysemu/runstate.h" /* for qemu_system_killed() prototype */
29
+# Copyright (C) 2020 Red Hat, Inc.
41
#include "block/block_int.h"
30
+#
42
#include "block/nbd.h"
31
+# This program is free software; you can redistribute it and/or modify
43
#include "qemu/main-loop.h"
32
+# it under the terms of the GNU General Public License as published by
44
@@ -XXX,XX +XXX,XX @@ QEMU_COPYRIGHT "\n"
33
+# the Free Software Foundation; either version 2 of the License, or
45
}
34
+# (at your option) any later version.
46
35
+#
47
#ifdef CONFIG_POSIX
36
+# This program is distributed in the hope that it will be useful,
48
-static void termsig_handler(int signum)
37
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
49
+/*
38
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
50
+ * The client thread uses SIGTERM to interrupt the server. A signal
39
+# GNU General Public License for more details.
51
+ * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
40
+#
52
+ */
41
+# You should have received a copy of the GNU General Public License
53
+void qemu_system_killed(int signum, pid_t pid)
42
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
54
{
43
+#
55
qatomic_cmpxchg(&state, RUNNING, TERMINATE);
44
+
56
qemu_notify_event();
45
+# creator
57
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
46
+owner=stefanha@redhat.com
58
BlockExportOptions *export_opts;
47
+
59
48
+seq=`basename $0`
60
#ifdef CONFIG_POSIX
49
+echo "QA output created by $seq"
61
- /*
50
+
62
- * Exit gracefully on various signals, which includes SIGTERM used
51
+status=1 # failure is the default!
63
- * by 'qemu-nbd -v -c'.
52
+
64
- */
53
+_cleanup()
65
- struct sigaction sa_sigterm;
54
+{
66
- memset(&sa_sigterm, 0, sizeof(sa_sigterm));
55
+ _cleanup_test_img
67
- sa_sigterm.sa_handler = termsig_handler;
56
+ rm -f "$TEST_IMG.converted"
68
- sigaction(SIGTERM, &sa_sigterm, NULL);
57
+}
69
- sigaction(SIGINT, &sa_sigterm, NULL);
58
+trap "_cleanup; exit \$status" 0 1 2 3 15
70
- sigaction(SIGHUP, &sa_sigterm, NULL);
59
+
71
-
60
+# get standard environment, filters and checks
72
- signal(SIGPIPE, SIG_IGN);
61
+. ./common.rc
73
+ os_setup_early_signal_handling();
62
+. ./common.filter
74
+ os_setup_signal_handling();
63
+. ./common.pattern
75
#endif
64
+
76
65
+_supported_fmt luks
77
socket_init();
66
+_supported_proto file
78
diff --git a/stubs/blk-exp-close-all.c b/stubs/blk-exp-close-all.c
67
+_supported_os Linux
68
+
69
+SECRET=secret,id=sec0,data=passphrase
70
+
71
+echo "== measure 1G image file =="
72
+echo
73
+
74
+$QEMU_IMG measure --object "$SECRET" \
75
+     -O "$IMGFMT" \
76
+         -o key-secret=sec0,iter-time=10 \
77
+         --size 1G
78
+
79
+echo
80
+echo "== create 1G image file (size should be no greater than measured) =="
81
+echo
82
+
83
+_make_test_img 1G
84
+stat -c "image file size in bytes: %s" "$TEST_IMG_FILE"
85
+
86
+echo
87
+echo "== modified 1G image file (size should be no greater than measured) =="
88
+echo
89
+
90
+$QEMU_IO --object "$SECRET" --image-opts "$TEST_IMG" -c "write -P 0x51 0x10000 0x400" | _filter_qemu_io | _filter_testdir
91
+stat -c "image file size in bytes: %s" "$TEST_IMG_FILE"
92
+
93
+echo
94
+echo "== measure preallocation=falloc 1G image file =="
95
+echo
96
+
97
+$QEMU_IMG measure --object "$SECRET" \
98
+     -O "$IMGFMT" \
99
+         -o key-secret=sec0,iter-time=10,preallocation=falloc \
100
+         --size 1G
101
+
102
+echo
103
+echo "== measure with input image file =="
104
+echo
105
+
106
+IMGFMT=raw IMGKEYSECRET= IMGOPTS= _make_test_img 1G | _filter_imgfmt
107
+QEMU_IO_OPTIONS= IMGOPTSSYNTAX= $QEMU_IO -f raw -c "write -P 0x51 0x10000 0x400" "$TEST_IMG_FILE" | _filter_qemu_io | _filter_testdir
108
+$QEMU_IMG measure --object "$SECRET" \
109
+     -O "$IMGFMT" \
110
+         -o key-secret=sec0,iter-time=10 \
111
+         -f raw \
112
+         "$TEST_IMG_FILE"
113
+
114
+# success, all done
115
+echo "*** done"
116
+rm -f $seq.full
117
+status=0
118
diff --git a/tests/qemu-iotests/288.out b/tests/qemu-iotests/288.out
119
new file mode 100644
79
new file mode 100644
120
index XXXXXXX..XXXXXXX
80
index XXXXXXX..XXXXXXX
121
--- /dev/null
81
--- /dev/null
122
+++ b/tests/qemu-iotests/288.out
82
+++ b/stubs/blk-exp-close-all.c
123
@@ -XXX,XX +XXX,XX @@
83
@@ -XXX,XX +XXX,XX @@
124
+QA output created by 288
84
+#include "qemu/osdep.h"
125
+== measure 1G image file ==
85
+#include "block/export.h"
126
+
86
+
127
+required size: 1075810304
87
+/* Only used in programs that support block exports (libblockdev.fa) */
128
+fully allocated size: 1075810304
88
+void blk_exp_close_all(void)
129
+
89
+{
130
+== create 1G image file (size should be no greater than measured) ==
90
+}
131
+
91
diff --git a/block/export/meson.build b/block/export/meson.build
132
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
133
+image file size in bytes: 1075810304
134
+
135
+== modified 1G image file (size should be no greater than measured) ==
136
+
137
+wrote 1024/1024 bytes at offset 65536
138
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
139
+image file size in bytes: 1075810304
140
+
141
+== measure preallocation=falloc 1G image file ==
142
+
143
+required size: 1075810304
144
+fully allocated size: 1075810304
145
+
146
+== measure with input image file ==
147
+
148
+Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1073741824
149
+wrote 1024/1024 bytes at offset 65536
150
+1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
151
+required size: 1075810304
152
+fully allocated size: 1075810304
153
+*** done
154
diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group
155
index XXXXXXX..XXXXXXX 100644
92
index XXXXXXX..XXXXXXX 100644
156
--- a/tests/qemu-iotests/group
93
--- a/block/export/meson.build
157
+++ b/tests/qemu-iotests/group
94
+++ b/block/export/meson.build
158
@@ -XXX,XX +XXX,XX @@
95
@@ -XXX,XX +XXX,XX @@
159
283 auto quick
96
-block_ss.add(files('export.c'))
160
284 rw
97
-block_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: files('vhost-user-blk-server.c'))
161
286 rw quick
98
+blockdev_ss.add(files('export.c'))
162
+288 quick
99
+blockdev_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: files('vhost-user-blk-server.c'))
100
diff --git a/meson.build b/meson.build
101
index XXXXXXX..XXXXXXX 100644
102
--- a/meson.build
103
+++ b/meson.build
104
@@ -XXX,XX +XXX,XX @@ subdir('dump')
105
106
block_ss.add(files(
107
'block.c',
108
- 'blockdev-nbd.c',
109
'blockjob.c',
110
'job.c',
111
'qemu-io-cmds.c',
112
@@ -XXX,XX +XXX,XX @@ subdir('block')
113
114
blockdev_ss.add(files(
115
'blockdev.c',
116
+ 'blockdev-nbd.c',
117
'iothread.c',
118
'job-qmp.c',
119
))
120
@@ -XXX,XX +XXX,XX @@ if have_tools
121
qemu_io = executable('qemu-io', files('qemu-io.c'),
122
dependencies: [block, qemuutil], install: true)
123
qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
124
- dependencies: [block, qemuutil], install: true)
125
+ dependencies: [blockdev, qemuutil], install: true)
126
127
subdir('storage-daemon')
128
subdir('contrib/rdmacm-mux')
129
diff --git a/nbd/meson.build b/nbd/meson.build
130
index XXXXXXX..XXXXXXX 100644
131
--- a/nbd/meson.build
132
+++ b/nbd/meson.build
133
@@ -XXX,XX +XXX,XX @@
134
block_ss.add(files(
135
'client.c',
136
'common.c',
137
+))
138
+blockdev_ss.add(files(
139
'server.c',
140
))
141
diff --git a/stubs/meson.build b/stubs/meson.build
142
index XXXXXXX..XXXXXXX 100644
143
--- a/stubs/meson.build
144
+++ b/stubs/meson.build
145
@@ -XXX,XX +XXX,XX @@
146
stub_ss.add(files('arch_type.c'))
147
stub_ss.add(files('bdrv-next-monitor-owned.c'))
148
stub_ss.add(files('blk-commit-all.c'))
149
+stub_ss.add(files('blk-exp-close-all.c'))
150
stub_ss.add(files('blockdev-close-all-bdrv-states.c'))
151
stub_ss.add(files('change-state-handler.c'))
152
stub_ss.add(files('cmos.c'))
163
--
153
--
164
2.24.1
154
2.26.2
165
155
166
diff view generated by jsdifflib
1
From: Stefan Hajnoczi <stefanha@redhat.com>
1
Make it possible to specify the iothread where the export will run. By
2
2
default the block node can be moved to other AioContexts later and the
3
Add qemu-img measure support in the "luks" block driver.
3
export will follow. The fixed-iothread option forces strict behavior
4
that prevents changing AioContext while the export is active. See the
5
QAPI docs for details.
4
6
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Message-id: 20200929125516.186715-5-stefanha@redhat.com
7
Message-Id: <20200221112522.1497712-3-stefanha@redhat.com>
9
[Fix stray '#' character in block-export.json and add missing "(since:
8
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
5.2)" as suggested by Eric Blake.
11
--Stefan]
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
13
---
10
block/crypto.c | 62 ++++++++++++++++++++++++++++++++++++++++++++++++++
14
qapi/block-export.json | 11 ++++++++++
11
1 file changed, 62 insertions(+)
15
block/export/export.c | 31 +++++++++++++++++++++++++++-
16
block/export/vhost-user-blk-server.c | 5 ++++-
17
nbd/server.c | 2 --
18
4 files changed, 45 insertions(+), 4 deletions(-)
12
19
13
diff --git a/block/crypto.c b/block/crypto.c
20
diff --git a/qapi/block-export.json b/qapi/block-export.json
14
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
15
--- a/block/crypto.c
22
--- a/qapi/block-export.json
16
+++ b/block/crypto.c
23
+++ b/qapi/block-export.json
17
@@ -XXX,XX +XXX,XX @@ static int64_t block_crypto_getlength(BlockDriverState *bs)
24
@@ -XXX,XX +XXX,XX @@
18
}
25
# export before completion is signalled. (since: 5.2;
19
26
# default: false)
20
27
#
21
+static BlockMeasureInfo *block_crypto_measure(QemuOpts *opts,
28
+# @iothread: The name of the iothread object where the export will run. The
22
+ BlockDriverState *in_bs,
29
+# default is to use the thread currently associated with the
23
+ Error **errp)
30
+# block node. (since: 5.2)
24
+{
31
+#
25
+ g_autoptr(QCryptoBlockCreateOptions) create_opts = NULL;
32
+# @fixed-iothread: True prevents the block node from being moved to another
26
+ Error *local_err = NULL;
33
+# thread while the export is active. If true and @iothread is
27
+ BlockMeasureInfo *info;
34
+# given, export creation fails if the block node cannot be
28
+ uint64_t size;
35
+# moved to the iothread. The default is false. (since: 5.2)
29
+ size_t luks_payload_size;
36
+#
30
+ QDict *cryptoopts;
37
# Since: 4.2
38
##
39
{ 'union': 'BlockExportOptions',
40
'base': { 'type': 'BlockExportType',
41
'id': 'str',
42
+     '*fixed-iothread': 'bool',
43
+     '*iothread': 'str',
44
'node-name': 'str',
45
'*writable': 'bool',
46
'*writethrough': 'bool' },
47
diff --git a/block/export/export.c b/block/export/export.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/block/export/export.c
50
+++ b/block/export/export.c
51
@@ -XXX,XX +XXX,XX @@
52
53
#include "block/block.h"
54
#include "sysemu/block-backend.h"
55
+#include "sysemu/iothread.h"
56
#include "block/export.h"
57
#include "block/nbd.h"
58
#include "qapi/error.h"
59
@@ -XXX,XX +XXX,XX @@ static const BlockExportDriver *blk_exp_find_driver(BlockExportType type)
60
61
BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
62
{
63
+ bool fixed_iothread = export->has_fixed_iothread && export->fixed_iothread;
64
const BlockExportDriver *drv;
65
BlockExport *exp = NULL;
66
BlockDriverState *bs;
67
- BlockBackend *blk;
68
+ BlockBackend *blk = NULL;
69
AioContext *ctx;
70
uint64_t perm;
71
int ret;
72
@@ -XXX,XX +XXX,XX @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
73
ctx = bdrv_get_aio_context(bs);
74
aio_context_acquire(ctx);
75
76
+ if (export->has_iothread) {
77
+ IOThread *iothread;
78
+ AioContext *new_ctx;
31
+
79
+
32
+ /*
80
+ iothread = iothread_by_id(export->iothread);
33
+ * Preallocation mode doesn't affect size requirements but we must consume
81
+ if (!iothread) {
34
+ * the option.
82
+ error_setg(errp, "iothread \"%s\" not found", export->iothread);
35
+ */
83
+ goto fail;
36
+ g_free(qemu_opt_get_del(opts, BLOCK_OPT_PREALLOC));
37
+
38
+ size = qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0);
39
+
40
+ if (in_bs) {
41
+ int64_t ssize = bdrv_getlength(in_bs);
42
+
43
+ if (ssize < 0) {
44
+ error_setg_errno(&local_err, -ssize,
45
+ "Unable to get image virtual_size");
46
+ goto err;
47
+ }
84
+ }
48
+
85
+
49
+ size = ssize;
86
+ new_ctx = iothread_get_aio_context(iothread);
87
+
88
+ ret = bdrv_try_set_aio_context(bs, new_ctx, errp);
89
+ if (ret == 0) {
90
+ aio_context_release(ctx);
91
+ aio_context_acquire(new_ctx);
92
+ ctx = new_ctx;
93
+ } else if (fixed_iothread) {
94
+ goto fail;
95
+ }
50
+ }
96
+ }
51
+
97
+
52
+ cryptoopts = qemu_opts_to_qdict_filtered(opts, NULL,
98
/*
53
+ &block_crypto_create_opts_luks, true);
99
* Block exports are used for non-shared storage migration. Make sure
54
+ qdict_put_str(cryptoopts, "format", "luks");
100
* that BDRV_O_INACTIVE is cleared and the image is ready for write
55
+ create_opts = block_crypto_create_opts_init(cryptoopts, &local_err);
101
@@ -XXX,XX +XXX,XX @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
56
+ qobject_unref(cryptoopts);
102
}
57
+ if (!create_opts) {
103
58
+ goto err;
104
blk = blk_new(ctx, perm, BLK_PERM_ALL);
105
+
106
+ if (!fixed_iothread) {
107
+ blk_set_allow_aio_context_change(blk, true);
59
+ }
108
+ }
60
+
109
+
61
+ if (!qcrypto_block_calculate_payload_offset(create_opts, NULL,
110
ret = blk_insert_bs(blk, bs, errp);
62
+ &luks_payload_size,
111
if (ret < 0) {
63
+ &local_err)) {
112
goto fail;
64
+ goto err;
113
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
65
+ }
114
index XXXXXXX..XXXXXXX 100644
115
--- a/block/export/vhost-user-blk-server.c
116
+++ b/block/export/vhost-user-blk-server.c
117
@@ -XXX,XX +XXX,XX @@ static const VuDevIface vu_blk_iface = {
118
static void blk_aio_attached(AioContext *ctx, void *opaque)
119
{
120
VuBlkExport *vexp = opaque;
66
+
121
+
67
+ /*
122
+ vexp->export.ctx = ctx;
68
+ * Unallocated blocks are still encrypted so allocation status makes no
123
vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
69
+ * difference to the file size.
124
}
70
+ */
125
71
+ info = g_new(BlockMeasureInfo, 1);
126
static void blk_aio_detach(void *opaque)
72
+ info->fully_allocated = luks_payload_size + size;
127
{
73
+ info->required = luks_payload_size + size;
128
VuBlkExport *vexp = opaque;
74
+ return info;
75
+
129
+
76
+err:
130
vhost_user_server_detach_aio_context(&vexp->vu_server);
77
+ error_propagate(errp, local_err);
131
+ vexp->export.ctx = NULL;
78
+ return NULL;
132
}
79
+}
133
80
+
134
static void
81
+
135
@@ -XXX,XX +XXX,XX @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
82
static int block_crypto_probe_luks(const uint8_t *buf,
136
vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
83
int buf_size,
137
logical_block_size);
84
const char *filename) {
138
85
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_crypto_luks = {
139
- blk_set_allow_aio_context_change(exp->blk, true);
86
.bdrv_co_preadv = block_crypto_co_preadv,
140
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
87
.bdrv_co_pwritev = block_crypto_co_pwritev,
141
vexp);
88
.bdrv_getlength = block_crypto_getlength,
142
89
+ .bdrv_measure = block_crypto_measure,
143
diff --git a/nbd/server.c b/nbd/server.c
90
.bdrv_get_info = block_crypto_get_info_luks,
144
index XXXXXXX..XXXXXXX 100644
91
.bdrv_get_specific_info = block_crypto_get_specific_info_luks,
145
--- a/nbd/server.c
92
146
+++ b/nbd/server.c
147
@@ -XXX,XX +XXX,XX @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
148
return ret;
149
}
150
151
- blk_set_allow_aio_context_change(blk, true);
152
-
153
QTAILQ_INIT(&exp->clients);
154
exp->name = g_strdup(arg->name);
155
exp->description = g_strdup(arg->description);
93
--
156
--
94
2.24.1
157
2.26.2
95
158
96
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
Allow the number of queues to be configured using --export
2
vhost-user-blk,num-queues=N. This setting should match the QEMU --device
3
vhost-user-blk-pci,num-queues=N setting but QEMU vhost-user-blk.c lowers
4
its own value if the vhost-user-blk backend offers fewer queues than
5
QEMU.
2
6
3
Use bdrv_block_status_above to chose effective chunk size and to handle
7
The vhost-user-blk-server.c code is already capable of multi-queue. All
4
zeroes effectively.
8
virtqueue processing runs in the same AioContext. No new locking is
9
needed.
5
10
6
This substitutes checking for just being allocated or not, and drops
11
Add the num-queues=N option and set the VIRTIO_BLK_F_MQ feature bit.
7
old code path for it. Assistance by backup job is dropped too, as
12
Note that the feature bit only announces the presence of the num_queues
8
caching block-status information is more difficult than just caching
13
configuration space field. It does not promise that there is more than 1
9
is-allocated information in our dirty bitmap, and backup job is not
14
virtqueue, so we can set it unconditionally.
10
good place for this caching anyway.
11
15
12
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
16
I tested multi-queue by running a random read fio test with numjobs=4 on
13
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
17
an -smp 4 guest. After the benchmark finished the guest /proc/interrupts
14
Reviewed-by: Max Reitz <mreitz@redhat.com>
18
file showed activity on all 4 virtio-blk MSI-X. The /sys/block/vda/mq/
15
Message-Id: <20200311103004.7649-5-vsementsov@virtuozzo.com>
19
directory shows that Linux blk-mq has 4 queues configured.
16
Signed-off-by: Max Reitz <mreitz@redhat.com>
20
21
An automated test is included in the next commit.
22
23
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
24
Acked-by: Markus Armbruster <armbru@redhat.com>
25
Message-id: 20201001144604.559733-2-stefanha@redhat.com
26
[Fixed accidental tab characters as suggested by Markus Armbruster
27
--Stefan]
28
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
17
---
29
---
18
block/block-copy.c | 73 +++++++++++++++++++++++++++++++++++++---------
30
qapi/block-export.json | 10 +++++++---
19
block/trace-events | 1 +
31
block/export/vhost-user-blk-server.c | 24 ++++++++++++++++++------
20
2 files changed, 61 insertions(+), 13 deletions(-)
32
2 files changed, 25 insertions(+), 9 deletions(-)
21
33
22
diff --git a/block/block-copy.c b/block/block-copy.c
34
diff --git a/qapi/block-export.json b/qapi/block-export.json
23
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
24
--- a/block/block-copy.c
36
--- a/qapi/block-export.json
25
+++ b/block/block-copy.c
37
+++ b/qapi/block-export.json
26
@@ -XXX,XX +XXX,XX @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
38
@@ -XXX,XX +XXX,XX @@
27
*/
39
# SocketAddress types are supported. Passed fds must be UNIX domain
28
static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
40
# sockets.
29
int64_t start, int64_t end,
41
# @logical-block-size: Logical block size in bytes. Defaults to 512 bytes.
30
- bool *error_is_read)
42
+# @num-queues: Number of request virtqueues. Must be greater than 0. Defaults
31
+ bool zeroes, bool *error_is_read)
43
+# to 1.
44
#
45
# Since: 5.2
46
##
47
{ 'struct': 'BlockExportOptionsVhostUserBlk',
48
- 'data': { 'addr': 'SocketAddress', '*logical-block-size': 'size' } }
49
+ 'data': { 'addr': 'SocketAddress',
50
+     '*logical-block-size': 'size',
51
+ '*num-queues': 'uint16'} }
52
53
##
54
# @NbdServerAddOptions:
55
@@ -XXX,XX +XXX,XX @@
56
{ 'union': 'BlockExportOptions',
57
'base': { 'type': 'BlockExportType',
58
'id': 'str',
59
-     '*fixed-iothread': 'bool',
60
-     '*iothread': 'str',
61
+ '*fixed-iothread': 'bool',
62
+ '*iothread': 'str',
63
'node-name': 'str',
64
'*writable': 'bool',
65
'*writethrough': 'bool' },
66
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/block/export/vhost-user-blk-server.c
69
+++ b/block/export/vhost-user-blk-server.c
70
@@ -XXX,XX +XXX,XX @@
71
#include "util/block-helpers.h"
72
73
enum {
74
- VHOST_USER_BLK_MAX_QUEUES = 1,
75
+ VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1,
76
};
77
struct virtio_blk_inhdr {
78
unsigned char status;
79
@@ -XXX,XX +XXX,XX @@ static uint64_t vu_blk_get_features(VuDev *dev)
80
1ull << VIRTIO_BLK_F_DISCARD |
81
1ull << VIRTIO_BLK_F_WRITE_ZEROES |
82
1ull << VIRTIO_BLK_F_CONFIG_WCE |
83
+ 1ull << VIRTIO_BLK_F_MQ |
84
1ull << VIRTIO_F_VERSION_1 |
85
1ull << VIRTIO_RING_F_INDIRECT_DESC |
86
1ull << VIRTIO_RING_F_EVENT_IDX |
87
@@ -XXX,XX +XXX,XX @@ static void blk_aio_detach(void *opaque)
88
89
static void
90
vu_blk_initialize_config(BlockDriverState *bs,
91
- struct virtio_blk_config *config, uint32_t blk_size)
92
+ struct virtio_blk_config *config,
93
+ uint32_t blk_size,
94
+ uint16_t num_queues)
32
{
95
{
33
int ret;
96
config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
34
int nbytes = MIN(end, s->len) - start;
97
config->blk_size = blk_size;
35
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
98
@@ -XXX,XX +XXX,XX @@ vu_blk_initialize_config(BlockDriverState *bs,
36
assert(QEMU_IS_ALIGNED(end, s->cluster_size));
99
config->seg_max = 128 - 2;
37
assert(end < s->len || end == QEMU_ALIGN_UP(s->len, s->cluster_size));
100
config->min_io_size = 1;
38
101
config->opt_io_size = 1;
39
+ if (zeroes) {
102
- config->num_queues = VHOST_USER_BLK_MAX_QUEUES;
40
+ ret = bdrv_co_pwrite_zeroes(s->target, start, nbytes, s->write_flags &
103
+ config->num_queues = num_queues;
41
+ ~BDRV_REQ_WRITE_COMPRESSED);
104
config->max_discard_sectors = 32768;
42
+ if (ret < 0) {
105
config->max_discard_seg = 1;
43
+ trace_block_copy_write_zeroes_fail(s, start, ret);
106
config->discard_sector_alignment = config->blk_size >> 9;
44
+ if (error_is_read) {
107
@@ -XXX,XX +XXX,XX @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
45
+ *error_is_read = false;
108
BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk;
46
+ }
109
Error *local_err = NULL;
47
+ }
110
uint64_t logical_block_size;
48
+ return ret;
111
+ uint16_t num_queues = VHOST_USER_BLK_NUM_QUEUES_DEFAULT;
112
113
vexp->writable = opts->writable;
114
vexp->blkcfg.wce = 0;
115
@@ -XXX,XX +XXX,XX @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
116
}
117
vexp->blk_size = logical_block_size;
118
blk_set_guest_block_size(exp->blk, logical_block_size);
119
+
120
+ if (vu_opts->has_num_queues) {
121
+ num_queues = vu_opts->num_queues;
122
+ }
123
+ if (num_queues == 0) {
124
+ error_setg(errp, "num-queues must be greater than 0");
125
+ return -EINVAL;
49
+ }
126
+ }
50
+
127
+
51
if (s->use_copy_range) {
128
vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
52
ret = bdrv_co_copy_range(s->source, start, s->target, start, nbytes,
129
- logical_block_size);
53
0, s->write_flags);
130
+ logical_block_size, num_queues);
54
@@ -XXX,XX +XXX,XX @@ out:
131
55
return ret;
132
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
56
}
133
vexp);
57
134
58
+static int block_copy_block_status(BlockCopyState *s, int64_t offset,
135
if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
59
+ int64_t bytes, int64_t *pnum)
136
- VHOST_USER_BLK_MAX_QUEUES, &vu_blk_iface,
60
+{
137
- errp)) {
61
+ int64_t num;
138
+ num_queues, &vu_blk_iface, errp)) {
62
+ BlockDriverState *base;
139
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
63
+ int ret;
140
blk_aio_detach, vexp);
64
+
141
return -EADDRNOTAVAIL;
65
+ if (s->skip_unallocated && s->source->bs->backing) {
66
+ base = s->source->bs->backing->bs;
67
+ } else {
68
+ base = NULL;
69
+ }
70
+
71
+ ret = bdrv_block_status_above(s->source->bs, base, offset, bytes, &num,
72
+ NULL, NULL);
73
+ if (ret < 0 || num < s->cluster_size) {
74
+ /*
75
+ * On error or if failed to obtain large enough chunk just fallback to
76
+ * copy one cluster.
77
+ */
78
+ num = s->cluster_size;
79
+ ret = BDRV_BLOCK_ALLOCATED | BDRV_BLOCK_DATA;
80
+ } else if (offset + num == s->len) {
81
+ num = QEMU_ALIGN_UP(num, s->cluster_size);
82
+ } else {
83
+ num = QEMU_ALIGN_DOWN(num, s->cluster_size);
84
+ }
85
+
86
+ *pnum = num;
87
+ return ret;
88
+}
89
+
90
/*
91
* Check if the cluster starting at offset is allocated or not.
92
* return via pnum the number of contiguous clusters sharing this allocation.
93
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
94
{
95
int ret = 0;
96
int64_t end = bytes + start; /* bytes */
97
- int64_t status_bytes;
98
BlockCopyInFlightReq req;
99
100
/*
101
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
102
block_copy_inflight_req_begin(s, &req, start, end);
103
104
while (start < end) {
105
- int64_t next_zero, chunk_end;
106
+ int64_t next_zero, chunk_end, status_bytes;
107
108
if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
109
trace_block_copy_skip(s, start);
110
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
111
chunk_end = next_zero;
112
}
113
114
- if (s->skip_unallocated) {
115
- ret = block_copy_reset_unallocated(s, start, &status_bytes);
116
- if (ret == 0) {
117
- trace_block_copy_skip_range(s, start, status_bytes);
118
- start += status_bytes;
119
- continue;
120
- }
121
- /* Clamp to known allocated region */
122
- chunk_end = MIN(chunk_end, start + status_bytes);
123
+ ret = block_copy_block_status(s, start, chunk_end - start,
124
+ &status_bytes);
125
+ if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
126
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, start, status_bytes);
127
+ progress_set_remaining(s->progress,
128
+ bdrv_get_dirty_count(s->copy_bitmap) +
129
+ s->in_flight_bytes);
130
+ trace_block_copy_skip_range(s, start, status_bytes);
131
+ start += status_bytes;
132
+ continue;
133
}
134
135
+ chunk_end = MIN(chunk_end, start + status_bytes);
136
+
137
trace_block_copy_process(s, start);
138
139
bdrv_reset_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
140
s->in_flight_bytes += chunk_end - start;
141
142
co_get_from_shres(s->mem, chunk_end - start);
143
- ret = block_copy_do_copy(s, start, chunk_end, error_is_read);
144
+ ret = block_copy_do_copy(s, start, chunk_end, ret & BDRV_BLOCK_ZERO,
145
+ error_is_read);
146
co_put_to_shres(s->mem, chunk_end - start);
147
s->in_flight_bytes -= chunk_end - start;
148
if (ret < 0) {
149
diff --git a/block/trace-events b/block/trace-events
150
index XXXXXXX..XXXXXXX 100644
151
--- a/block/trace-events
152
+++ b/block/trace-events
153
@@ -XXX,XX +XXX,XX @@ block_copy_process(void *bcs, int64_t start) "bcs %p start %"PRId64
154
block_copy_copy_range_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
155
block_copy_read_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
156
block_copy_write_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
157
+block_copy_write_zeroes_fail(void *bcs, int64_t start, int ret) "bcs %p start %"PRId64" ret %d"
158
159
# ../blockdev.c
160
qmp_block_job_cancel(void *job) "job %p"
161
--
142
--
162
2.24.1
143
2.26.2
163
144
164
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
Currently, block_copy operation lock the whole requested region. But
3
bdrv_co_block_status_above has several design problems with handling
4
there is no reason to lock clusters, which are already copied, it will
4
short backing files:
5
disturb other parallel block_copy requests for no reason.
6
5
7
Let's instead do the following:
6
1. With want_zeros=true, it may return ret with BDRV_BLOCK_ZERO but
7
without BDRV_BLOCK_ALLOCATED flag, when actually short backing file
8
which produces these after-EOF zeros is inside requested backing
9
sequence.
8
10
9
Lock only sub-region, which we are going to operate on. Then, after
11
2. With want_zero=false, it may return pnum=0 prior to actual EOF,
10
copying all dirty sub-regions, we should wait for intersecting
12
because of EOF of short backing file.
11
requests block-copy, if they failed, we should retry these new dirty
13
12
clusters.
14
Fix these things, making logic about short backing files clearer.
15
16
With fixed bdrv_block_status_above we also have to improve is_zero in
17
qcow2 code, otherwise iotest 154 will fail, because with this patch we
18
stop to merge zeros of different types (produced by fully unallocated
19
in the whole backing chain regions vs produced by short backing files).
20
21
Note also, that this patch leaves for another day the general problem
22
around block-status: misuse of BDRV_BLOCK_ALLOCATED as is-fs-allocated
23
vs go-to-backing.
13
24
14
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
25
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
15
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
26
Reviewed-by: Alberto Garcia <berto@igalia.com>
16
Message-Id: <20200311103004.7649-9-vsementsov@virtuozzo.com>
27
Reviewed-by: Eric Blake <eblake@redhat.com>
17
Signed-off-by: Max Reitz <mreitz@redhat.com>
28
Message-id: 20200924194003.22080-2-vsementsov@virtuozzo.com
29
[Fix s/comes/come/ as suggested by Eric Blake
30
--Stefan]
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
18
---
32
---
19
block/block-copy.c | 129 ++++++++++++++++++++++++++++++++++++---------
33
block/io.c | 68 ++++++++++++++++++++++++++++++++++++++++-----------
20
1 file changed, 105 insertions(+), 24 deletions(-)
34
block/qcow2.c | 16 ++++++++++--
35
2 files changed, 68 insertions(+), 16 deletions(-)
21
36
22
diff --git a/block/block-copy.c b/block/block-copy.c
37
diff --git a/block/io.c b/block/io.c
23
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
24
--- a/block/block-copy.c
39
--- a/block/io.c
25
+++ b/block/block-copy.c
40
+++ b/block/io.c
26
@@ -XXX,XX +XXX,XX @@ static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
41
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
27
return NULL;
42
int64_t *map,
28
}
43
BlockDriverState **file)
29
30
-static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
31
- int64_t offset,
32
- int64_t bytes)
33
+/*
34
+ * If there are no intersecting requests return false. Otherwise, wait for the
35
+ * first found intersecting request to finish and return true.
36
+ */
37
+static bool coroutine_fn block_copy_wait_one(BlockCopyState *s, int64_t offset,
38
+ int64_t bytes)
39
{
44
{
40
- BlockCopyInFlightReq *req;
45
+ int ret;
41
+ BlockCopyInFlightReq *req = find_conflicting_inflight_req(s, offset, bytes);
46
BlockDriverState *p;
42
47
- int ret = 0;
43
- while ((req = find_conflicting_inflight_req(s, offset, bytes))) {
48
- bool first = true;
44
- qemu_co_queue_wait(&req->wait_queue, NULL);
49
+ int64_t eof = 0;
45
+ if (!req) {
50
46
+ return false;
51
assert(bs != base);
47
}
52
- for (p = bs; p != base; p = bdrv_filter_or_cow_bs(p)) {
48
+
53
+
49
+ qemu_co_queue_wait(&req->wait_queue, NULL);
54
+ ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
50
+
55
+ if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED) {
51
+ return true;
56
+ return ret;
52
}
53
54
+/* Called only on full-dirty region */
55
static void block_copy_inflight_req_begin(BlockCopyState *s,
56
BlockCopyInFlightReq *req,
57
int64_t offset, int64_t bytes)
58
{
59
+ assert(!find_conflicting_inflight_req(s, offset, bytes));
60
+
61
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
62
+ s->in_flight_bytes += bytes;
63
+
64
req->offset = offset;
65
req->bytes = bytes;
66
qemu_co_queue_init(&req->wait_queue);
67
QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
68
}
69
70
-static void coroutine_fn block_copy_inflight_req_end(BlockCopyInFlightReq *req)
71
+/*
72
+ * block_copy_inflight_req_shrink
73
+ *
74
+ * Drop the tail of the request to be handled later. Set dirty bits back and
75
+ * wake up all requests waiting for us (may be some of them are not intersecting
76
+ * with shrunk request)
77
+ */
78
+static void coroutine_fn block_copy_inflight_req_shrink(BlockCopyState *s,
79
+ BlockCopyInFlightReq *req, int64_t new_bytes)
80
{
81
+ if (new_bytes == req->bytes) {
82
+ return;
83
+ }
57
+ }
84
+
58
+
85
+ assert(new_bytes > 0 && new_bytes < req->bytes);
59
+ if (ret & BDRV_BLOCK_EOF) {
60
+ eof = offset + *pnum;
61
+ }
86
+
62
+
87
+ s->in_flight_bytes -= req->bytes - new_bytes;
63
+ assert(*pnum <= bytes);
88
+ bdrv_set_dirty_bitmap(s->copy_bitmap,
64
+ bytes = *pnum;
89
+ req->offset + new_bytes, req->bytes - new_bytes);
90
+
65
+
91
+ req->bytes = new_bytes;
66
+ for (p = bdrv_filter_or_cow_bs(bs); p != base;
92
+ qemu_co_queue_restart_all(&req->wait_queue);
67
+ p = bdrv_filter_or_cow_bs(p))
93
+}
68
+ {
94
+
69
ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
95
+static void coroutine_fn block_copy_inflight_req_end(BlockCopyState *s,
70
file);
96
+ BlockCopyInFlightReq *req,
97
+ int ret)
98
+{
99
+ s->in_flight_bytes -= req->bytes;
100
+ if (ret < 0) {
101
+ bdrv_set_dirty_bitmap(s->copy_bitmap, req->offset, req->bytes);
102
+ }
103
QLIST_REMOVE(req, list);
104
qemu_co_queue_restart_all(&req->wait_queue);
105
}
106
@@ -XXX,XX +XXX,XX @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
107
return ret;
108
}
109
110
-int coroutine_fn block_copy(BlockCopyState *s,
111
- int64_t offset, int64_t bytes,
112
- bool *error_is_read)
113
+/*
114
+ * block_copy_dirty_clusters
115
+ *
116
+ * Copy dirty clusters in @offset/@bytes range.
117
+ * Returns 1 if dirty clusters found and successfully copied, 0 if no dirty
118
+ * clusters found and -errno on failure.
119
+ */
120
+static int coroutine_fn block_copy_dirty_clusters(BlockCopyState *s,
121
+ int64_t offset, int64_t bytes,
122
+ bool *error_is_read)
123
{
124
int ret = 0;
125
- BlockCopyInFlightReq req;
126
+ bool found_dirty = false;
127
128
/*
129
* block_copy() user is responsible for keeping source and target in same
130
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
131
assert(QEMU_IS_ALIGNED(offset, s->cluster_size));
132
assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
133
134
- block_copy_wait_inflight_reqs(s, offset, bytes);
135
- block_copy_inflight_req_begin(s, &req, offset, bytes);
136
-
137
while (bytes) {
138
+ BlockCopyInFlightReq req;
139
int64_t next_zero, cur_bytes, status_bytes;
140
141
if (!bdrv_dirty_bitmap_get(s->copy_bitmap, offset)) {
142
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
143
continue; /* already copied */
144
}
145
146
+ found_dirty = true;
147
+
148
cur_bytes = MIN(bytes, s->copy_size);
149
150
next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, offset,
151
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
152
assert(next_zero < offset + cur_bytes); /* no need to do MIN() */
153
cur_bytes = next_zero - offset;
154
}
155
+ block_copy_inflight_req_begin(s, &req, offset, cur_bytes);
156
157
ret = block_copy_block_status(s, offset, cur_bytes, &status_bytes);
158
+ assert(ret >= 0); /* never fail */
159
+ cur_bytes = MIN(cur_bytes, status_bytes);
160
+ block_copy_inflight_req_shrink(s, &req, cur_bytes);
161
if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
162
- bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, status_bytes);
163
+ block_copy_inflight_req_end(s, &req, 0);
164
progress_set_remaining(s->progress,
165
bdrv_get_dirty_count(s->copy_bitmap) +
166
s->in_flight_bytes);
167
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
168
continue;
169
}
170
171
- cur_bytes = MIN(cur_bytes, status_bytes);
172
-
173
trace_block_copy_process(s, offset);
174
175
- bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
176
- s->in_flight_bytes += cur_bytes;
177
-
178
co_get_from_shres(s->mem, cur_bytes);
179
ret = block_copy_do_copy(s, offset, cur_bytes, ret & BDRV_BLOCK_ZERO,
180
error_is_read);
181
co_put_to_shres(s->mem, cur_bytes);
182
- s->in_flight_bytes -= cur_bytes;
183
+ block_copy_inflight_req_end(s, &req, ret);
184
if (ret < 0) {
71
if (ret < 0) {
185
- bdrv_set_dirty_bitmap(s->copy_bitmap, offset, cur_bytes);
186
- break;
72
- break;
187
+ return ret;
73
+ return ret;
188
}
74
}
189
75
- if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
190
progress_work_done(s->progress, cur_bytes);
76
+ if (*pnum == 0) {
191
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
77
/*
192
bytes -= cur_bytes;
78
- * Reading beyond the end of the file continues to read
193
}
79
- * zeroes, but we can only widen the result to the
194
80
- * unallocated length we learned from an earlier
195
- block_copy_inflight_req_end(&req);
81
- * iteration.
196
+ return found_dirty;
82
+ * The top layer deferred to this layer, and because this layer is
197
+}
83
+ * short, any zeroes that we synthesize beyond EOF behave as if they
198
+
84
+ * were allocated at this layer.
199
+/*
85
+ *
200
+ * block_copy
86
+ * We don't include BDRV_BLOCK_EOF into ret, as upper layer may be
201
+ *
87
+ * larger. We'll add BDRV_BLOCK_EOF if needed at function end, see
202
+ * Copy requested region, accordingly to dirty bitmap.
88
+ * below.
203
+ * Collaborate with parallel block_copy requests: if they succeed it will help
89
*/
204
+ * us. If they fail, we will retry not-copied regions. So, if we return error,
90
+ assert(ret & BDRV_BLOCK_EOF);
205
+ * it means that some I/O operation failed in context of _this_ block_copy call,
91
*pnum = bytes;
206
+ * not some parallel operation.
92
+ if (file) {
207
+ */
93
+ *file = p;
208
+int coroutine_fn block_copy(BlockCopyState *s, int64_t offset, int64_t bytes,
94
+ }
209
+ bool *error_is_read)
95
+ ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED;
210
+{
96
+ break;
211
+ int ret;
97
}
212
+
98
- if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
213
+ do {
99
+ if (ret & BDRV_BLOCK_ALLOCATED) {
214
+ ret = block_copy_dirty_clusters(s, offset, bytes, error_is_read);
100
+ /*
215
+
101
+ * We've found the node and the status, we must break.
216
+ if (ret == 0) {
102
+ *
217
+ ret = block_copy_wait_one(s, offset, bytes);
103
+ * Drop BDRV_BLOCK_EOF, as it's not for upper layer, which may be
218
+ }
104
+ * larger. We'll add BDRV_BLOCK_EOF if needed at function end, see
105
+ * below.
106
+ */
107
+ ret &= ~BDRV_BLOCK_EOF;
108
break;
109
}
110
- /* [offset, pnum] unallocated on this layer, which could be only
111
- * the first part of [offset, bytes]. */
112
- bytes = MIN(bytes, *pnum);
113
- first = false;
219
+
114
+
220
+ /*
115
+ /*
221
+ * We retry in two cases:
116
+ * OK, [offset, offset + *pnum) region is unallocated on this layer,
222
+ * 1. Some progress done
117
+ * let's continue the diving.
223
+ * Something was copied, which means that there were yield points
224
+ * and some new dirty bits may have appeared (due to failed parallel
225
+ * block-copy requests).
226
+ * 2. We have waited for some intersecting block-copy request
227
+ * It may have failed and produced new dirty bits.
228
+ */
118
+ */
229
+ } while (ret > 0);
119
+ assert(*pnum <= bytes);
230
120
+ bytes = *pnum;
121
+ }
122
+
123
+ if (offset + *pnum == eof) {
124
+ ret |= BDRV_BLOCK_EOF;
125
}
126
+
231
return ret;
127
return ret;
232
}
128
}
129
130
diff --git a/block/qcow2.c b/block/qcow2.c
131
index XXXXXXX..XXXXXXX 100644
132
--- a/block/qcow2.c
133
+++ b/block/qcow2.c
134
@@ -XXX,XX +XXX,XX @@ static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
135
if (!bytes) {
136
return true;
137
}
138
- res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
139
- return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
140
+
141
+ /*
142
+ * bdrv_block_status_above doesn't merge different types of zeros, for
143
+ * example, zeros which come from the region which is unallocated in
144
+ * the whole backing chain, and zeros which come because of a short
145
+ * backing file. So, we need a loop.
146
+ */
147
+ do {
148
+ res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
149
+ offset += nr;
150
+ bytes -= nr;
151
+ } while (res >= 0 && (res & BDRV_BLOCK_ZERO) && nr && bytes);
152
+
153
+ return res >= 0 && (res & BDRV_BLOCK_ZERO) && bytes == 0;
154
}
155
156
static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
233
--
157
--
234
2.24.1
158
2.26.2
235
159
236
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
Assume we have two regions, A and B, and region B is in-flight now,
3
In order to reuse bdrv_common_block_status_above in
4
region A is not yet touched, but it is unallocated and should be
4
bdrv_is_allocated_above, let's support include_base parameter.
5
skipped.
6
5
7
Correspondingly, as progress we have
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Reviewed-by: Alberto Garcia <berto@igalia.com>
8
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Message-id: 20200924194003.22080-3-vsementsov@virtuozzo.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
block/coroutines.h | 2 ++
13
block/io.c | 21 ++++++++++++++-------
14
2 files changed, 16 insertions(+), 7 deletions(-)
8
15
9
total = A + B
16
diff --git a/block/coroutines.h b/block/coroutines.h
10
current = 0
11
12
If we reset unallocated region A and call progress_reset_callback,
13
it will calculate 0 bytes dirty in the bitmap and call
14
job_progress_set_remaining, which will set
15
16
total = current + 0 = 0 + 0 = 0
17
18
So, B bytes are actually removed from total accounting. When job
19
finishes we'll have
20
21
total = 0
22
current = B
23
24
, which doesn't sound good.
25
26
This is because we didn't considered in-flight bytes, actually when
27
calculating remaining, we should have set (in_flight + dirty_bytes)
28
as remaining, not only dirty_bytes.
29
30
To fix it, let's refactor progress calculation, moving it to block-copy
31
itself instead of fixing callback. And, of course, track in_flight
32
bytes count.
33
34
We still have to keep one callback, to maintain backup job bytes_read
35
calculation, but it will go on soon, when we turn the whole backup
36
process into one block_copy call.
37
38
Cc: qemu-stable@nongnu.org
39
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
40
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
41
Message-Id: <20200311103004.7649-3-vsementsov@virtuozzo.com>
42
Signed-off-by: Max Reitz <mreitz@redhat.com>
43
---
44
block/backup.c | 13 ++-----------
45
block/block-copy.c | 16 ++++++++++++----
46
include/block/block-copy.h | 15 +++++----------
47
3 files changed, 19 insertions(+), 25 deletions(-)
48
49
diff --git a/block/backup.c b/block/backup.c
50
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
51
--- a/block/backup.c
18
--- a/block/coroutines.h
52
+++ b/block/backup.c
19
+++ b/block/coroutines.h
53
@@ -XXX,XX +XXX,XX @@ static void backup_progress_bytes_callback(int64_t bytes, void *opaque)
20
@@ -XXX,XX +XXX,XX @@ bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
54
BackupBlockJob *s = opaque;
21
int coroutine_fn
55
22
bdrv_co_common_block_status_above(BlockDriverState *bs,
56
s->bytes_read += bytes;
23
BlockDriverState *base,
57
- job_progress_update(&s->common.job, bytes);
24
+ bool include_base,
58
-}
25
bool want_zero,
59
-
26
int64_t offset,
60
-static void backup_progress_reset_callback(void *opaque)
27
int64_t bytes,
61
-{
28
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
62
- BackupBlockJob *s = opaque;
29
int generated_co_wrapper
63
- uint64_t estimate = bdrv_get_dirty_count(s->bcs->copy_bitmap);
30
bdrv_common_block_status_above(BlockDriverState *bs,
64
-
31
BlockDriverState *base,
65
- job_progress_set_remaining(&s->common.job, estimate);
32
+ bool include_base,
66
}
33
bool want_zero,
67
34
int64_t offset,
68
static int coroutine_fn backup_do_cow(BackupBlockJob *job,
35
int64_t bytes,
69
@@ -XXX,XX +XXX,XX @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
36
diff --git a/block/io.c b/block/io.c
70
job->cluster_size = cluster_size;
71
job->len = len;
72
73
- block_copy_set_callbacks(bcs, backup_progress_bytes_callback,
74
- backup_progress_reset_callback, job);
75
+ block_copy_set_progress_callback(bcs, backup_progress_bytes_callback, job);
76
+ block_copy_set_progress_meter(bcs, &job->common.job.progress);
77
78
/* Required permissions are already taken by backup-top target */
79
block_job_add_bdrv(&job->common, "target", target, 0, BLK_PERM_ALL,
80
diff --git a/block/block-copy.c b/block/block-copy.c
81
index XXXXXXX..XXXXXXX 100644
37
index XXXXXXX..XXXXXXX 100644
82
--- a/block/block-copy.c
38
--- a/block/io.c
83
+++ b/block/block-copy.c
39
+++ b/block/io.c
84
@@ -XXX,XX +XXX,XX @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
40
@@ -XXX,XX +XXX,XX @@ early_out:
85
return s;
41
int coroutine_fn
86
}
42
bdrv_co_common_block_status_above(BlockDriverState *bs,
87
43
BlockDriverState *base,
88
-void block_copy_set_callbacks(
44
+ bool include_base,
89
+void block_copy_set_progress_callback(
45
bool want_zero,
90
BlockCopyState *s,
46
int64_t offset,
91
ProgressBytesCallbackFunc progress_bytes_callback,
47
int64_t bytes,
92
- ProgressResetCallbackFunc progress_reset_callback,
48
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
93
void *progress_opaque)
49
BlockDriverState *p;
94
{
50
int64_t eof = 0;
95
s->progress_bytes_callback = progress_bytes_callback;
51
96
- s->progress_reset_callback = progress_reset_callback;
52
- assert(bs != base);
97
s->progress_opaque = progress_opaque;
53
+ assert(include_base || bs != base);
98
}
54
+ assert(!include_base || base); /* Can't include NULL base */
99
55
100
+void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
56
ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
101
+{
57
- if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED) {
102
+ s->progress = pm;
58
+ if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
103
+}
59
return ret;
104
+
105
/*
106
* block_copy_do_copy
107
*
108
@@ -XXX,XX +XXX,XX @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
109
110
if (!ret) {
111
bdrv_reset_dirty_bitmap(s->copy_bitmap, offset, bytes);
112
- s->progress_reset_callback(s->progress_opaque);
113
+ progress_set_remaining(s->progress,
114
+ bdrv_get_dirty_count(s->copy_bitmap) +
115
+ s->in_flight_bytes);
116
}
60
}
117
61
118
*count = bytes;
62
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
119
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
63
assert(*pnum <= bytes);
120
trace_block_copy_process(s, start);
64
bytes = *pnum;
121
65
122
bdrv_reset_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
66
- for (p = bdrv_filter_or_cow_bs(bs); p != base;
123
+ s->in_flight_bytes += chunk_end - start;
67
+ for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
124
68
p = bdrv_filter_or_cow_bs(p))
125
co_get_from_shres(s->mem, chunk_end - start);
69
{
126
ret = block_copy_do_copy(s, start, chunk_end, error_is_read);
70
ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
127
co_put_to_shres(s->mem, chunk_end - start);
71
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
128
+ s->in_flight_bytes -= chunk_end - start;
129
if (ret < 0) {
130
bdrv_set_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
131
break;
72
break;
132
}
73
}
133
74
134
+ progress_work_done(s->progress, chunk_end - start);
75
+ if (p == base) {
135
s->progress_bytes_callback(chunk_end - start, s->progress_opaque);
76
+ assert(include_base);
136
start = chunk_end;
77
+ break;
137
ret = 0;
78
+ }
138
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
139
index XXXXXXX..XXXXXXX 100644
140
--- a/include/block/block-copy.h
141
+++ b/include/block/block-copy.h
142
@@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyInFlightReq {
143
} BlockCopyInFlightReq;
144
145
typedef void (*ProgressBytesCallbackFunc)(int64_t bytes, void *opaque);
146
-typedef void (*ProgressResetCallbackFunc)(void *opaque);
147
typedef struct BlockCopyState {
148
/*
149
* BdrvChild objects are not owned or managed by block-copy. They are
150
@@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyState {
151
BdrvChild *source;
152
BdrvChild *target;
153
BdrvDirtyBitmap *copy_bitmap;
154
+ int64_t in_flight_bytes;
155
int64_t cluster_size;
156
bool use_copy_range;
157
int64_t copy_size;
158
@@ -XXX,XX +XXX,XX @@ typedef struct BlockCopyState {
159
*/
160
bool skip_unallocated;
161
162
+ ProgressMeter *progress;
163
/* progress_bytes_callback: called when some copying progress is done. */
164
ProgressBytesCallbackFunc progress_bytes_callback;
165
-
166
- /*
167
- * progress_reset_callback: called when some bytes reset from copy_bitmap
168
- * (see @skip_unallocated above). The callee is assumed to recalculate how
169
- * many bytes remain based on the dirty bit count of copy_bitmap.
170
- */
171
- ProgressResetCallbackFunc progress_reset_callback;
172
void *progress_opaque;
173
174
SharedResource *mem;
175
@@ -XXX,XX +XXX,XX @@ BlockCopyState *block_copy_state_new(BdrvChild *source, BdrvChild *target,
176
BdrvRequestFlags write_flags,
177
Error **errp);
178
179
-void block_copy_set_callbacks(
180
+void block_copy_set_progress_callback(
181
BlockCopyState *s,
182
ProgressBytesCallbackFunc progress_bytes_callback,
183
- ProgressResetCallbackFunc progress_reset_callback,
184
void *progress_opaque);
185
186
+void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm);
187
+
79
+
188
void block_copy_state_free(BlockCopyState *s);
80
/*
189
81
* OK, [offset, offset + *pnum) region is unallocated on this layer,
190
int64_t block_copy_reset_unallocated(BlockCopyState *s,
82
* let's continue the diving.
83
@@ -XXX,XX +XXX,XX @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
84
int64_t offset, int64_t bytes, int64_t *pnum,
85
int64_t *map, BlockDriverState **file)
86
{
87
- return bdrv_common_block_status_above(bs, base, true, offset, bytes,
88
+ return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
89
pnum, map, file);
90
}
91
92
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
93
int ret;
94
int64_t dummy;
95
96
- ret = bdrv_common_block_status_above(bs, bdrv_filter_or_cow_bs(bs), false,
97
- offset, bytes, pnum ? pnum : &dummy,
98
- NULL, NULL);
99
+ ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
100
+ bytes, pnum ? pnum : &dummy, NULL,
101
+ NULL);
102
if (ret < 0) {
103
return ret;
104
}
191
--
105
--
192
2.24.1
106
2.26.2
193
107
194
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
Split find_conflicting_inflight_req to be used separately.
3
We are going to reuse bdrv_common_block_status_above in
4
bdrv_is_allocated_above. bdrv_is_allocated_above may be called with
5
include_base == false and still bs == base (for ex. from img_rebase()).
6
7
So, support this corner case.
4
8
5
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
9
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Message-Id: <20200311103004.7649-6-vsementsov@virtuozzo.com>
12
Reviewed-by: Alberto Garcia <berto@igalia.com>
9
Signed-off-by: Max Reitz <mreitz@redhat.com>
13
Message-id: 20200924194003.22080-4-vsementsov@virtuozzo.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
15
---
11
block/block-copy.c | 31 +++++++++++++++++++------------
16
block/io.c | 6 +++++-
12
1 file changed, 19 insertions(+), 12 deletions(-)
17
1 file changed, 5 insertions(+), 1 deletion(-)
13
18
14
diff --git a/block/block-copy.c b/block/block-copy.c
19
diff --git a/block/io.c b/block/io.c
15
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
16
--- a/block/block-copy.c
21
--- a/block/io.c
17
+++ b/block/block-copy.c
22
+++ b/block/io.c
18
@@ -XXX,XX +XXX,XX @@
23
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
19
#define BLOCK_COPY_MAX_BUFFER (1 * MiB)
24
BlockDriverState *p;
20
#define BLOCK_COPY_MAX_MEM (128 * MiB)
25
int64_t eof = 0;
21
26
22
+static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
27
- assert(include_base || bs != base);
23
+ int64_t start,
28
assert(!include_base || base); /* Can't include NULL base */
24
+ int64_t end)
29
25
+{
30
+ if (!include_base && bs == base) {
26
+ BlockCopyInFlightReq *req;
31
+ *pnum = bytes;
27
+
32
+ return 0;
28
+ QLIST_FOREACH(req, &s->inflight_reqs, list) {
29
+ if (end > req->start_byte && start < req->end_byte) {
30
+ return req;
31
+ }
32
+ }
33
+ }
33
+
34
+
34
+ return NULL;
35
ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
35
+}
36
if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
36
+
37
return ret;
37
static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
38
int64_t start,
39
int64_t end)
40
{
41
BlockCopyInFlightReq *req;
42
- bool waited;
43
-
44
- do {
45
- waited = false;
46
- QLIST_FOREACH(req, &s->inflight_reqs, list) {
47
- if (end > req->start_byte && start < req->end_byte) {
48
- qemu_co_queue_wait(&req->wait_queue, NULL);
49
- waited = true;
50
- break;
51
- }
52
- }
53
- } while (waited);
54
+
55
+ while ((req = find_conflicting_inflight_req(s, start, end))) {
56
+ qemu_co_queue_wait(&req->wait_queue, NULL);
57
+ }
58
}
59
60
static void block_copy_inflight_req_begin(BlockCopyState *s,
61
--
38
--
62
2.24.1
39
2.26.2
63
40
64
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
We have a lot of "chunk_end - start" invocations, let's switch to
3
bdrv_is_allocated_above wrongly handles short backing files: it reports
4
bytes/cur_bytes scheme instead.
4
after-EOF space as UNALLOCATED which is wrong, as on read the data is
5
generated on the level of short backing file (if all overlays have
6
unallocated areas at that place).
5
7
6
While being here, improve check on block_copy_do_copy parameters to not
8
Reusing bdrv_common_block_status_above fixes the issue and unifies code
7
overflow when calculating nbytes and use int64_t for bytes in
9
path.
8
block_copy for consistency.
9
10
10
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
11
Reviewed-by: Andrey Shinkevich <andrey.shinkevich@virtuozzo.com>
12
Reviewed-by: Eric Blake <eblake@redhat.com>
12
Reviewed-by: Max Reitz <mreitz@redhat.com>
13
Reviewed-by: Alberto Garcia <berto@igalia.com>
13
Message-Id: <20200311103004.7649-7-vsementsov@virtuozzo.com>
14
Message-id: 20200924194003.22080-5-vsementsov@virtuozzo.com
14
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
[Fix s/has/have/ as suggested by Eric Blake. Fix s/area/areas/.
16
--Stefan]
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
18
---
16
block/block-copy.c | 78 ++++++++++++++++++++------------------
19
block/io.c | 43 +++++--------------------------------------
17
include/block/block-copy.h | 6 +--
20
1 file changed, 5 insertions(+), 38 deletions(-)
18
2 files changed, 44 insertions(+), 40 deletions(-)
19
21
20
diff --git a/block/block-copy.c b/block/block-copy.c
22
diff --git a/block/io.c b/block/io.c
21
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
22
--- a/block/block-copy.c
24
--- a/block/io.c
23
+++ b/block/block-copy.c
25
+++ b/block/io.c
24
@@ -XXX,XX +XXX,XX @@
26
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
25
27
* at 'offset + *pnum' may return the same allocation status (in other
26
static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
28
* words, the result is not necessarily the maximum possible range);
27
int64_t start,
29
* but 'pnum' will only be 0 when end of file is reached.
28
- int64_t end)
30
- *
29
+ int64_t bytes)
31
*/
32
int bdrv_is_allocated_above(BlockDriverState *top,
33
BlockDriverState *base,
34
bool include_base, int64_t offset,
35
int64_t bytes, int64_t *pnum)
30
{
36
{
31
BlockCopyInFlightReq *req;
37
- BlockDriverState *intermediate;
32
38
- int ret;
33
QLIST_FOREACH(req, &s->inflight_reqs, list) {
39
- int64_t n = bytes;
34
- if (end > req->start_byte && start < req->end_byte) {
40
-
35
+ if (start + bytes > req->start && start < req->start + req->bytes) {
41
- assert(base || !include_base);
36
return req;
42
-
37
}
43
- intermediate = top;
44
- while (include_base || intermediate != base) {
45
- int64_t pnum_inter;
46
- int64_t size_inter;
47
-
48
- assert(intermediate);
49
- ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
50
- if (ret < 0) {
51
- return ret;
52
- }
53
- if (ret) {
54
- *pnum = pnum_inter;
55
- return 1;
56
- }
57
-
58
- size_inter = bdrv_getlength(intermediate);
59
- if (size_inter < 0) {
60
- return size_inter;
61
- }
62
- if (n > pnum_inter &&
63
- (intermediate == top || offset + pnum_inter < size_inter)) {
64
- n = pnum_inter;
65
- }
66
-
67
- if (intermediate == base) {
68
- break;
69
- }
70
-
71
- intermediate = bdrv_filter_or_cow_bs(intermediate);
72
+ int ret = bdrv_common_block_status_above(top, base, include_base, false,
73
+ offset, bytes, pnum, NULL, NULL);
74
+ if (ret < 0) {
75
+ return ret;
38
}
76
}
39
@@ -XXX,XX +XXX,XX @@ static BlockCopyInFlightReq *find_conflicting_inflight_req(BlockCopyState *s,
77
40
78
- *pnum = n;
41
static void coroutine_fn block_copy_wait_inflight_reqs(BlockCopyState *s,
79
- return 0;
42
int64_t start,
80
+ return !!(ret & BDRV_BLOCK_ALLOCATED);
43
- int64_t end)
44
+ int64_t bytes)
45
{
46
BlockCopyInFlightReq *req;
47
48
- while ((req = find_conflicting_inflight_req(s, start, end))) {
49
+ while ((req = find_conflicting_inflight_req(s, start, bytes))) {
50
qemu_co_queue_wait(&req->wait_queue, NULL);
51
}
52
}
81
}
53
82
54
static void block_copy_inflight_req_begin(BlockCopyState *s,
83
int coroutine_fn
55
BlockCopyInFlightReq *req,
56
- int64_t start, int64_t end)
57
+ int64_t start, int64_t bytes)
58
{
59
- req->start_byte = start;
60
- req->end_byte = end;
61
+ req->start = start;
62
+ req->bytes = bytes;
63
qemu_co_queue_init(&req->wait_queue);
64
QLIST_INSERT_HEAD(&s->inflight_reqs, req, list);
65
}
66
@@ -XXX,XX +XXX,XX @@ void block_copy_set_progress_meter(BlockCopyState *s, ProgressMeter *pm)
67
/*
68
* block_copy_do_copy
69
*
70
- * Do copy of cluser-aligned chunk. @end is allowed to exceed s->len only to
71
- * cover last cluster when s->len is not aligned to clusters.
72
+ * Do copy of cluster-aligned chunk. Requested region is allowed to exceed
73
+ * s->len only to cover last cluster when s->len is not aligned to clusters.
74
*
75
* No sync here: nor bitmap neighter intersecting requests handling, only copy.
76
*
77
* Returns 0 on success.
78
*/
79
static int coroutine_fn block_copy_do_copy(BlockCopyState *s,
80
- int64_t start, int64_t end,
81
+ int64_t start, int64_t bytes,
82
bool zeroes, bool *error_is_read)
83
{
84
int ret;
85
- int nbytes = MIN(end, s->len) - start;
86
+ int64_t nbytes = MIN(start + bytes, s->len) - start;
87
void *bounce_buffer = NULL;
88
89
+ assert(start >= 0 && bytes > 0 && INT64_MAX - start >= bytes);
90
assert(QEMU_IS_ALIGNED(start, s->cluster_size));
91
- assert(QEMU_IS_ALIGNED(end, s->cluster_size));
92
- assert(end < s->len || end == QEMU_ALIGN_UP(s->len, s->cluster_size));
93
+ assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
94
+ assert(start < s->len);
95
+ assert(start + bytes <= s->len ||
96
+ start + bytes == QEMU_ALIGN_UP(s->len, s->cluster_size));
97
+ assert(nbytes < INT_MAX);
98
99
if (zeroes) {
100
ret = bdrv_co_pwrite_zeroes(s->target, start, nbytes, s->write_flags &
101
@@ -XXX,XX +XXX,XX @@ int64_t block_copy_reset_unallocated(BlockCopyState *s,
102
}
103
104
int coroutine_fn block_copy(BlockCopyState *s,
105
- int64_t start, uint64_t bytes,
106
+ int64_t start, int64_t bytes,
107
bool *error_is_read)
108
{
109
int ret = 0;
110
- int64_t end = bytes + start; /* bytes */
111
BlockCopyInFlightReq req;
112
113
/*
114
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
115
bdrv_get_aio_context(s->target->bs));
116
117
assert(QEMU_IS_ALIGNED(start, s->cluster_size));
118
- assert(QEMU_IS_ALIGNED(end, s->cluster_size));
119
+ assert(QEMU_IS_ALIGNED(bytes, s->cluster_size));
120
121
block_copy_wait_inflight_reqs(s, start, bytes);
122
- block_copy_inflight_req_begin(s, &req, start, end);
123
+ block_copy_inflight_req_begin(s, &req, start, bytes);
124
125
- while (start < end) {
126
- int64_t next_zero, chunk_end, status_bytes;
127
+ while (bytes) {
128
+ int64_t next_zero, cur_bytes, status_bytes;
129
130
if (!bdrv_dirty_bitmap_get(s->copy_bitmap, start)) {
131
trace_block_copy_skip(s, start);
132
start += s->cluster_size;
133
+ bytes -= s->cluster_size;
134
continue; /* already copied */
135
}
136
137
- chunk_end = MIN(end, start + s->copy_size);
138
+ cur_bytes = MIN(bytes, s->copy_size);
139
140
next_zero = bdrv_dirty_bitmap_next_zero(s->copy_bitmap, start,
141
- chunk_end - start);
142
+ cur_bytes);
143
if (next_zero >= 0) {
144
assert(next_zero > start); /* start is dirty */
145
- assert(next_zero < chunk_end); /* no need to do MIN() */
146
- chunk_end = next_zero;
147
+ assert(next_zero < start + cur_bytes); /* no need to do MIN() */
148
+ cur_bytes = next_zero - start;
149
}
150
151
- ret = block_copy_block_status(s, start, chunk_end - start,
152
- &status_bytes);
153
+ ret = block_copy_block_status(s, start, cur_bytes, &status_bytes);
154
if (s->skip_unallocated && !(ret & BDRV_BLOCK_ALLOCATED)) {
155
bdrv_reset_dirty_bitmap(s->copy_bitmap, start, status_bytes);
156
progress_set_remaining(s->progress,
157
@@ -XXX,XX +XXX,XX @@ int coroutine_fn block_copy(BlockCopyState *s,
158
s->in_flight_bytes);
159
trace_block_copy_skip_range(s, start, status_bytes);
160
start += status_bytes;
161
+ bytes -= status_bytes;
162
continue;
163
}
164
165
- chunk_end = MIN(chunk_end, start + status_bytes);
166
+ cur_bytes = MIN(cur_bytes, status_bytes);
167
168
trace_block_copy_process(s, start);
169
170
- bdrv_reset_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
171
- s->in_flight_bytes += chunk_end - start;
172
+ bdrv_reset_dirty_bitmap(s->copy_bitmap, start, cur_bytes);
173
+ s->in_flight_bytes += cur_bytes;
174
175
- co_get_from_shres(s->mem, chunk_end - start);
176
- ret = block_copy_do_copy(s, start, chunk_end, ret & BDRV_BLOCK_ZERO,
177
+ co_get_from_shres(s->mem, cur_bytes);
178
+ ret = block_copy_do_copy(s, start, cur_bytes, ret & BDRV_BLOCK_ZERO,
179
error_is_read);
180
- co_put_to_shres(s->mem, chunk_end - start);
181
- s->in_flight_bytes -= chunk_end - start;
182
+ co_put_to_shres(s->mem, cur_bytes);
183
+ s->in_flight_bytes -= cur_bytes;
184
if (ret < 0) {
185
- bdrv_set_dirty_bitmap(s->copy_bitmap, start, chunk_end - start);
186
+ bdrv_set_dirty_bitmap(s->copy_bitmap, start, cur_bytes);
187
break;
188
}
189
190
- progress_work_done(s->progress, chunk_end - start);
191
- s->progress_bytes_callback(chunk_end - start, s->progress_opaque);
192
- start = chunk_end;
193
- ret = 0;
194
+ progress_work_done(s->progress, cur_bytes);
195
+ s->progress_bytes_callback(cur_bytes, s->progress_opaque);
196
+ start += cur_bytes;
197
+ bytes -= cur_bytes;
198
}
199
200
block_copy_inflight_req_end(&req);
201
diff --git a/include/block/block-copy.h b/include/block/block-copy.h
202
index XXXXXXX..XXXXXXX 100644
203
--- a/include/block/block-copy.h
204
+++ b/include/block/block-copy.h
205
@@ -XXX,XX +XXX,XX @@
206
#include "qemu/co-shared-resource.h"
207
208
typedef struct BlockCopyInFlightReq {
209
- int64_t start_byte;
210
- int64_t end_byte;
211
+ int64_t start;
212
+ int64_t bytes;
213
QLIST_ENTRY(BlockCopyInFlightReq) list;
214
CoQueue wait_queue; /* coroutines blocked on this request */
215
} BlockCopyInFlightReq;
216
@@ -XXX,XX +XXX,XX @@ void block_copy_state_free(BlockCopyState *s);
217
int64_t block_copy_reset_unallocated(BlockCopyState *s,
218
int64_t offset, int64_t *count);
219
220
-int coroutine_fn block_copy(BlockCopyState *s, int64_t start, uint64_t bytes,
221
+int coroutine_fn block_copy(BlockCopyState *s, int64_t start, int64_t bytes,
222
bool *error_is_read);
223
224
#endif /* BLOCK_COPY_H */
225
--
84
--
226
2.24.1
85
2.26.2
227
86
228
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
On success path we return what inflate() returns instead of 0. And it
3
These cases are fixed by previous patches around block_status and
4
most probably works for Z_STREAM_END as it is positive, but is
4
is_allocated.
5
definitely broken for Z_BUF_ERROR.
6
5
7
While being here, switch to errno return code, to be closer to
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
qcow2_compress API (and usual expectations).
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Alberto Garcia <berto@igalia.com>
9
Message-id: 20200924194003.22080-6-vsementsov@virtuozzo.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
tests/qemu-iotests/274 | 20 +++++++++++
13
tests/qemu-iotests/274.out | 68 ++++++++++++++++++++++++++++++++++++++
14
2 files changed, 88 insertions(+)
9
15
10
Revert condition in if to be more positive. Drop dead initialization of
16
diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274
11
ret.
17
index XXXXXXX..XXXXXXX 100755
18
--- a/tests/qemu-iotests/274
19
+++ b/tests/qemu-iotests/274
20
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('base') as base, \
21
iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
22
iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
23
24
+ iotests.log('=== Testing qemu-img commit (top -> base) ===')
25
+
26
+ create_chain()
27
+ iotests.qemu_img_log('commit', '-b', base, top)
28
+ iotests.img_info_log(base)
29
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, base)
30
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), base)
31
+
32
+ iotests.log('=== Testing QMP active commit (top -> base) ===')
33
+
34
+ create_chain()
35
+ with create_vm() as vm:
36
+ vm.launch()
37
+ vm.qmp_log('block-commit', device='top', base_node='base',
38
+ job_id='job0', auto_dismiss=False)
39
+ vm.run_job('job0', wait=5)
40
+
41
+ iotests.img_info_log(mid)
42
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, base)
43
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), base)
44
45
iotests.log('== Resize tests ==')
46
47
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
48
index XXXXXXX..XXXXXXX 100644
49
--- a/tests/qemu-iotests/274.out
50
+++ b/tests/qemu-iotests/274.out
51
@@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 0
52
read 1048576/1048576 bytes at offset 1048576
53
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
54
55
+=== Testing qemu-img commit (top -> base) ===
56
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16
57
+
58
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
59
+
60
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
61
+
62
+wrote 2097152/2097152 bytes at offset 0
63
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
64
+
65
+Image committed.
66
+
67
+image: TEST_IMG
68
+file format: IMGFMT
69
+virtual size: 2 MiB (2097152 bytes)
70
+cluster_size: 65536
71
+Format specific information:
72
+ compat: 1.1
73
+ compression type: zlib
74
+ lazy refcounts: false
75
+ refcount bits: 16
76
+ corrupt: false
77
+ extended l2: false
78
+
79
+read 1048576/1048576 bytes at offset 0
80
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
81
+
82
+read 1048576/1048576 bytes at offset 1048576
83
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
84
+
85
+=== Testing QMP active commit (top -> base) ===
86
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16
87
+
88
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
89
+
90
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
91
+
92
+wrote 2097152/2097152 bytes at offset 0
93
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
94
+
95
+{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "base", "device": "top", "job-id": "job0"}}
96
+{"return": {}}
97
+{"execute": "job-complete", "arguments": {"id": "job0"}}
98
+{"return": {}}
99
+{"data": {"device": "job0", "len": 1048576, "offset": 1048576, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
100
+{"data": {"device": "job0", "len": 1048576, "offset": 1048576, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
101
+{"execute": "job-dismiss", "arguments": {"id": "job0"}}
102
+{"return": {}}
103
+image: TEST_IMG
104
+file format: IMGFMT
105
+virtual size: 1 MiB (1048576 bytes)
106
+cluster_size: 65536
107
+backing file: TEST_DIR/PID-base
108
+backing file format: IMGFMT
109
+Format specific information:
110
+ compat: 1.1
111
+ compression type: zlib
112
+ lazy refcounts: false
113
+ refcount bits: 16
114
+ corrupt: false
115
+ extended l2: false
116
+
117
+read 1048576/1048576 bytes at offset 0
118
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
119
+
120
+read 1048576/1048576 bytes at offset 1048576
121
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
122
+
123
== Resize tests ==
124
=== preallocation=off ===
125
Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=6442450944 lazy_refcounts=off refcount_bits=16
126
--
127
2.26.2
12
128
13
Cc: qemu-stable@nongnu.org # v4.0
14
Fixes: 341926ab83e2b
15
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
16
Message-Id: <20200302150930.16218-1-vsementsov@virtuozzo.com>
17
Reviewed-by: Alberto Garcia <berto@igalia.com>
18
Reviewed-by: Ján Tomko <jtomko@redhat.com>
19
Signed-off-by: Max Reitz <mreitz@redhat.com>
20
---
21
block/qcow2-threads.c | 12 +++++++-----
22
1 file changed, 7 insertions(+), 5 deletions(-)
23
24
diff --git a/block/qcow2-threads.c b/block/qcow2-threads.c
25
index XXXXXXX..XXXXXXX 100644
26
--- a/block/qcow2-threads.c
27
+++ b/block/qcow2-threads.c
28
@@ -XXX,XX +XXX,XX @@ static ssize_t qcow2_compress(void *dest, size_t dest_size,
29
* @src - source buffer, @src_size bytes
30
*
31
* Returns: 0 on success
32
- * -1 on fail
33
+ * -EIO on fail
34
*/
35
static ssize_t qcow2_decompress(void *dest, size_t dest_size,
36
const void *src, size_t src_size)
37
{
38
- int ret = 0;
39
+ int ret;
40
z_stream strm;
41
42
memset(&strm, 0, sizeof(strm));
43
@@ -XXX,XX +XXX,XX @@ static ssize_t qcow2_decompress(void *dest, size_t dest_size,
44
45
ret = inflateInit2(&strm, -12);
46
if (ret != Z_OK) {
47
- return -1;
48
+ return -EIO;
49
}
50
51
ret = inflate(&strm, Z_FINISH);
52
- if ((ret != Z_STREAM_END && ret != Z_BUF_ERROR) || strm.avail_out != 0) {
53
+ if ((ret == Z_STREAM_END || ret == Z_BUF_ERROR) && strm.avail_out == 0) {
54
/*
55
* We approve Z_BUF_ERROR because we need @dest buffer to be filled, but
56
* @src buffer may be processed partly (because in qcow2 we know size of
57
* compressed data with precision of one sector)
58
*/
59
- ret = -1;
60
+ ret = 0;
61
+ } else {
62
+ ret = -EIO;
63
}
64
65
inflateEnd(&strm);
66
--
67
2.24.1
68
69
diff view generated by jsdifflib