1
The following changes since commit 497d415d76b9f59fcae27f22df1ca2c3fa4df64e:
1
The following changes since commit 474f3938d79ab36b9231c9ad3b5a9314c2aeacde:
2
2
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20201008-1' into staging (2020-10-08 21:41:20 +0100)
3
Merge remote-tracking branch 'remotes/amarkovic/tags/mips-queue-jun-21-2019' into staging (2019-06-21 15:40:50 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
7
https://github.com/XanClic/qemu.git tags/pull-block-2019-06-24
8
8
9
for you to fetch changes up to e969c7b045c90368bc3a5db3479e70b6f0ecb828:
9
for you to fetch changes up to ab5d4a30f7f3803ca5106b370969c1b7b54136f8:
10
10
11
iotests: add commit top->base cases to 274 (2020-10-09 14:32:24 +0100)
11
iotests: Fix 205 for concurrent runs (2019-06-24 16:01:40 +0200)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Pull request
14
Block patches:
15
- The SSH block driver now uses libssh instead of libssh2
16
- The VMDK block driver gets read-only support for the seSparse
17
subformat
18
- Various fixes
15
19
16
This pull request includes the vhost-user-blk server by Coiby Xu, the block
20
---
17
coroutine code generator by Vladimir Sementsov-Ogievskiy, nvme block driver
21
18
statistics by Philippe Mathieu-Daudé, and cleanups/fixes/additions to the
22
v2:
19
vhost-user-blk server by me.
23
- Squashed Pino's fix for pre-0.8 libssh into the libssh patch
20
24
21
----------------------------------------------------------------
25
----------------------------------------------------------------
26
Anton Nefedov (1):
27
iotest 134: test cluster-misaligned encrypted write
22
28
23
Coiby Xu (7):
29
Klaus Birkelund Jensen (1):
24
libvhost-user: Allow vu_message_read to be replaced
30
nvme: do not advertise support for unsupported arbitration mechanism
25
libvhost-user: remove watch for kick_fd when de-initialize vu-dev
26
util/vhost-user-server: generic vhost user server
27
block: move logical block size check function to a common utility
28
function
29
block/export: vhost-user block device backend server
30
test: new qTest case to test the vhost-user-blk-server
31
MAINTAINERS: Add vhost-user block device backend server maintainer
32
31
33
Philippe Mathieu-Daudé (1):
32
Max Reitz (1):
34
block/nvme: Add driver statistics for access alignment and hw errors
33
iotests: Fix 205 for concurrent runs
35
34
36
Stefan Hajnoczi (17):
35
Pino Toscano (1):
37
util/vhost-user-server: s/fileds/fields/ typo fix
36
ssh: switch from libssh2 to libssh
38
util/vhost-user-server: drop unnecessary QOM cast
39
util/vhost-user-server: drop unnecessary watch deletion
40
block/export: consolidate request structs into VuBlockReq
41
util/vhost-user-server: drop unused DevicePanicNotifier
42
util/vhost-user-server: fix memory leak in vu_message_read()
43
util/vhost-user-server: check EOF when reading payload
44
util/vhost-user-server: rework vu_client_trip() coroutine lifecycle
45
block/export: report flush errors
46
block/export: convert vhost-user-blk server to block export API
47
util/vhost-user-server: move header to include/
48
util/vhost-user-server: use static library in meson.build
49
qemu-storage-daemon: avoid compiling blockdev_ss twice
50
block: move block exports to libblockdev
51
block/export: add iothread and fixed-iothread options
52
block/export: add vhost-user-blk multi-queue support
53
tests/qtest: add multi-queue test case to vhost-user-blk-test
54
37
55
Vladimir Sementsov-Ogievskiy (5):
38
Sam Eiderman (3):
56
block/io: fix bdrv_co_block_status_above
39
vmdk: Fix comment regarding max l1_size coverage
57
block/io: bdrv_common_block_status_above: support include_base
40
vmdk: Reduce the max bound for L1 table size
58
block/io: bdrv_common_block_status_above: support bs == base
41
vmdk: Add read-only support for seSparse snapshots
59
block/io: fix bdrv_is_allocated_above
60
iotests: add commit top->base cases to 274
61
42
62
MAINTAINERS | 10 +
43
Vladimir Sementsov-Ogievskiy (1):
63
qapi/block-core.json | 24 +-
44
blockdev: enable non-root nodes for transaction drive-backup source
64
qapi/block-export.json | 36 +-
45
65
block/coroutines.h | 2 +
46
configure | 65 +-
66
block/export/vhost-user-blk-server.h | 19 +
47
block/Makefile.objs | 6 +-
67
contrib/libvhost-user/libvhost-user.h | 21 +
48
block/ssh.c | 652 ++++++++++--------
68
include/qemu/vhost-user-server.h | 65 ++
49
block/vmdk.c | 372 +++++++++-
69
tests/qtest/libqos/libqtest.h | 17 +
50
blockdev.c | 2 +-
70
tests/qtest/libqos/vhost-user-blk.h | 48 ++
51
hw/block/nvme.c | 1 -
71
util/block-helpers.h | 19 +
52
.travis.yml | 4 +-
72
block/export/export.c | 37 +-
53
block/trace-events | 14 +-
73
block/export/vhost-user-blk-server.c | 431 +++++++++++
54
docs/qemu-block-drivers.texi | 2 +-
74
block/io.c | 132 ++--
55
.../dockerfiles/debian-win32-cross.docker | 1 -
75
block/nvme.c | 27 +
56
.../dockerfiles/debian-win64-cross.docker | 1 -
76
block/qcow2.c | 16 +-
57
tests/docker/dockerfiles/fedora.docker | 4 +-
77
contrib/libvhost-user/libvhost-user-glib.c | 2 +-
58
tests/docker/dockerfiles/ubuntu.docker | 2 +-
78
contrib/libvhost-user/libvhost-user.c | 15 +-
59
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
79
hw/core/qdev-properties-system.c | 31 +-
60
tests/qemu-iotests/059.out | 2 +-
80
nbd/server.c | 2 -
61
tests/qemu-iotests/134 | 9 +
81
qemu-nbd.c | 21 +-
62
tests/qemu-iotests/134.out | 10 +
82
softmmu/vl.c | 4 +
63
tests/qemu-iotests/205 | 2 +-
83
stubs/blk-exp-close-all.c | 7 +
64
tests/qemu-iotests/207 | 54 +-
84
tests/qtest/libqos/vhost-user-blk.c | 129 ++++
65
tests/qemu-iotests/207.out | 2 +-
85
tests/qtest/libqtest.c | 36 +-
66
20 files changed, 823 insertions(+), 384 deletions(-)
86
tests/qtest/vhost-user-blk-test.c | 822 +++++++++++++++++++++
87
tests/vhost-user-bridge.c | 2 +
88
tools/virtiofsd/fuse_virtio.c | 4 +-
89
util/block-helpers.c | 46 ++
90
util/vhost-user-server.c | 446 +++++++++++
91
block/export/meson.build | 3 +-
92
contrib/libvhost-user/meson.build | 1 +
93
meson.build | 22 +-
94
nbd/meson.build | 2 +
95
storage-daemon/meson.build | 3 +-
96
stubs/meson.build | 1 +
97
tests/qemu-iotests/274 | 20 +
98
tests/qemu-iotests/274.out | 68 ++
99
tests/qtest/libqos/meson.build | 1 +
100
tests/qtest/meson.build | 4 +-
101
util/meson.build | 4 +
102
40 files changed, 2476 insertions(+), 124 deletions(-)
103
create mode 100644 block/export/vhost-user-blk-server.h
104
create mode 100644 include/qemu/vhost-user-server.h
105
create mode 100644 tests/qtest/libqos/vhost-user-blk.h
106
create mode 100644 util/block-helpers.h
107
create mode 100644 block/export/vhost-user-blk-server.c
108
create mode 100644 stubs/blk-exp-close-all.c
109
create mode 100644 tests/qtest/libqos/vhost-user-blk.c
110
create mode 100644 tests/qtest/vhost-user-blk-test.c
111
create mode 100644 util/block-helpers.c
112
create mode 100644 util/vhost-user-server.c
113
67
114
--
68
--
115
2.26.2
69
2.21.0
116
70
71
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Klaus Birkelund Jensen <klaus@birkelund.eu>
2
2
3
These cases are fixed by previous patches around block_status and
3
The device mistakenly reports that the Weighted Round Robin with Urgent
4
is_allocated.
4
Priority Class arbitration mechanism is supported.
5
5
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
It is not.
7
Reviewed-by: Eric Blake <eblake@redhat.com>
7
8
Reviewed-by: Alberto Garcia <berto@igalia.com>
8
Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com>
9
Message-id: 20200924194003.22080-6-vsementsov@virtuozzo.com
9
Message-id: 20190606092530.14206-1-klaus@birkelund.eu
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Acked-by: Maxim Levitsky <mlevitsk@redhat.com>
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
12
---
12
tests/qemu-iotests/274 | 20 +++++++++++
13
hw/block/nvme.c | 1 -
13
tests/qemu-iotests/274.out | 68 ++++++++++++++++++++++++++++++++++++++
14
1 file changed, 1 deletion(-)
14
2 files changed, 88 insertions(+)
15
15
16
diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274
16
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
17
index XXXXXXX..XXXXXXX 100755
18
--- a/tests/qemu-iotests/274
19
+++ b/tests/qemu-iotests/274
20
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('base') as base, \
21
iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
22
iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
23
24
+ iotests.log('=== Testing qemu-img commit (top -> base) ===')
25
+
26
+ create_chain()
27
+ iotests.qemu_img_log('commit', '-b', base, top)
28
+ iotests.img_info_log(base)
29
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, base)
30
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), base)
31
+
32
+ iotests.log('=== Testing QMP active commit (top -> base) ===')
33
+
34
+ create_chain()
35
+ with create_vm() as vm:
36
+ vm.launch()
37
+ vm.qmp_log('block-commit', device='top', base_node='base',
38
+ job_id='job0', auto_dismiss=False)
39
+ vm.run_job('job0', wait=5)
40
+
41
+ iotests.img_info_log(mid)
42
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, base)
43
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), base)
44
45
iotests.log('== Resize tests ==')
46
47
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
48
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
49
--- a/tests/qemu-iotests/274.out
18
--- a/hw/block/nvme.c
50
+++ b/tests/qemu-iotests/274.out
19
+++ b/hw/block/nvme.c
51
@@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 0
20
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
52
read 1048576/1048576 bytes at offset 1048576
21
n->bar.cap = 0;
53
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
22
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
54
23
NVME_CAP_SET_CQR(n->bar.cap, 1);
55
+=== Testing qemu-img commit (top -> base) ===
24
- NVME_CAP_SET_AMS(n->bar.cap, 1);
56
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16
25
NVME_CAP_SET_TO(n->bar.cap, 0xf);
57
+
26
NVME_CAP_SET_CSS(n->bar.cap, 1);
58
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
27
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
59
+
60
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
61
+
62
+wrote 2097152/2097152 bytes at offset 0
63
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
64
+
65
+Image committed.
66
+
67
+image: TEST_IMG
68
+file format: IMGFMT
69
+virtual size: 2 MiB (2097152 bytes)
70
+cluster_size: 65536
71
+Format specific information:
72
+ compat: 1.1
73
+ compression type: zlib
74
+ lazy refcounts: false
75
+ refcount bits: 16
76
+ corrupt: false
77
+ extended l2: false
78
+
79
+read 1048576/1048576 bytes at offset 0
80
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
81
+
82
+read 1048576/1048576 bytes at offset 1048576
83
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
84
+
85
+=== Testing QMP active commit (top -> base) ===
86
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16
87
+
88
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
89
+
90
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
91
+
92
+wrote 2097152/2097152 bytes at offset 0
93
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
94
+
95
+{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "base", "device": "top", "job-id": "job0"}}
96
+{"return": {}}
97
+{"execute": "job-complete", "arguments": {"id": "job0"}}
98
+{"return": {}}
99
+{"data": {"device": "job0", "len": 1048576, "offset": 1048576, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
100
+{"data": {"device": "job0", "len": 1048576, "offset": 1048576, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
101
+{"execute": "job-dismiss", "arguments": {"id": "job0"}}
102
+{"return": {}}
103
+image: TEST_IMG
104
+file format: IMGFMT
105
+virtual size: 1 MiB (1048576 bytes)
106
+cluster_size: 65536
107
+backing file: TEST_DIR/PID-base
108
+backing file format: IMGFMT
109
+Format specific information:
110
+ compat: 1.1
111
+ compression type: zlib
112
+ lazy refcounts: false
113
+ refcount bits: 16
114
+ corrupt: false
115
+ extended l2: false
116
+
117
+read 1048576/1048576 bytes at offset 0
118
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
119
+
120
+read 1048576/1048576 bytes at offset 1048576
121
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
122
+
123
== Resize tests ==
124
=== preallocation=off ===
125
Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=6442450944 lazy_refcounts=off refcount_bits=16
126
--
28
--
127
2.26.2
29
2.21.0
128
30
31
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
In order to reuse bdrv_common_block_status_above in
3
We forget to enable it for transaction .prepare, while it is already
4
bdrv_is_allocated_above, let's support include_base parameter.
4
enabled in do_drive_backup since commit a2d665c1bc362
5
"blockdev: loosen restrictions on drive-backup source node"
5
6
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Reviewed-by: Alberto Garcia <berto@igalia.com>
8
Message-id: 20190618140804.59214-1-vsementsov@virtuozzo.com
8
Reviewed-by: Eric Blake <eblake@redhat.com>
9
Reviewed-by: John Snow <jsnow@redhat.com>
9
Message-id: 20200924194003.22080-3-vsementsov@virtuozzo.com
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
11
---
12
block/coroutines.h | 2 ++
12
blockdev.c | 2 +-
13
block/io.c | 21 ++++++++++++++-------
13
1 file changed, 1 insertion(+), 1 deletion(-)
14
2 files changed, 16 insertions(+), 7 deletions(-)
15
14
16
diff --git a/block/coroutines.h b/block/coroutines.h
15
diff --git a/blockdev.c b/blockdev.c
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/block/coroutines.h
17
--- a/blockdev.c
19
+++ b/block/coroutines.h
18
+++ b/blockdev.c
20
@@ -XXX,XX +XXX,XX @@ bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
19
@@ -XXX,XX +XXX,XX @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
21
int coroutine_fn
20
assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
22
bdrv_co_common_block_status_above(BlockDriverState *bs,
21
backup = common->action->u.drive_backup.data;
23
BlockDriverState *base,
22
24
+ bool include_base,
23
- bs = qmp_get_root_bs(backup->device, errp);
25
bool want_zero,
24
+ bs = bdrv_lookup_bs(backup->device, backup->device, errp);
26
int64_t offset,
25
if (!bs) {
27
int64_t bytes,
26
return;
28
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
29
int generated_co_wrapper
30
bdrv_common_block_status_above(BlockDriverState *bs,
31
BlockDriverState *base,
32
+ bool include_base,
33
bool want_zero,
34
int64_t offset,
35
int64_t bytes,
36
diff --git a/block/io.c b/block/io.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/io.c
39
+++ b/block/io.c
40
@@ -XXX,XX +XXX,XX @@ early_out:
41
int coroutine_fn
42
bdrv_co_common_block_status_above(BlockDriverState *bs,
43
BlockDriverState *base,
44
+ bool include_base,
45
bool want_zero,
46
int64_t offset,
47
int64_t bytes,
48
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
49
BlockDriverState *p;
50
int64_t eof = 0;
51
52
- assert(bs != base);
53
+ assert(include_base || bs != base);
54
+ assert(!include_base || base); /* Can't include NULL base */
55
56
ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
57
- if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED) {
58
+ if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
59
return ret;
60
}
61
62
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
63
assert(*pnum <= bytes);
64
bytes = *pnum;
65
66
- for (p = bdrv_filter_or_cow_bs(bs); p != base;
67
+ for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
68
p = bdrv_filter_or_cow_bs(p))
69
{
70
ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
71
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
72
break;
73
}
74
75
+ if (p == base) {
76
+ assert(include_base);
77
+ break;
78
+ }
79
+
80
/*
81
* OK, [offset, offset + *pnum) region is unallocated on this layer,
82
* let's continue the diving.
83
@@ -XXX,XX +XXX,XX @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
84
int64_t offset, int64_t bytes, int64_t *pnum,
85
int64_t *map, BlockDriverState **file)
86
{
87
- return bdrv_common_block_status_above(bs, base, true, offset, bytes,
88
+ return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
89
pnum, map, file);
90
}
91
92
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
93
int ret;
94
int64_t dummy;
95
96
- ret = bdrv_common_block_status_above(bs, bdrv_filter_or_cow_bs(bs), false,
97
- offset, bytes, pnum ? pnum : &dummy,
98
- NULL, NULL);
99
+ ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
100
+ bytes, pnum ? pnum : &dummy, NULL,
101
+ NULL);
102
if (ret < 0) {
103
return ret;
104
}
27
}
105
--
28
--
106
2.26.2
29
2.21.0
107
30
31
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Anton Nefedov <anton.nefedov@virtuozzo.com>
2
2
3
bdrv_is_allocated_above wrongly handles short backing files: it reports
3
COW (even empty/zero) areas require encryption too
4
after-EOF space as UNALLOCATED which is wrong, as on read the data is
5
generated on the level of short backing file (if all overlays have
6
unallocated areas at that place).
7
4
8
Reusing bdrv_common_block_status_above fixes the issue and unifies code
5
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
9
path.
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Reviewed-by: Alberto Garcia <berto@igalia.com>
9
Message-id: 20190516143028.81155-1-anton.nefedov@virtuozzo.com
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
11
---
12
tests/qemu-iotests/134 | 9 +++++++++
13
tests/qemu-iotests/134.out | 10 ++++++++++
14
2 files changed, 19 insertions(+)
10
15
11
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
16
diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134
12
Reviewed-by: Eric Blake <eblake@redhat.com>
17
index XXXXXXX..XXXXXXX 100755
13
Reviewed-by: Alberto Garcia <berto@igalia.com>
18
--- a/tests/qemu-iotests/134
14
Message-id: 20200924194003.22080-5-vsementsov@virtuozzo.com
19
+++ b/tests/qemu-iotests/134
15
[Fix s/has/have/ as suggested by Eric Blake. Fix s/area/areas/.
20
@@ -XXX,XX +XXX,XX @@ echo
16
--Stefan]
21
echo "== reading whole image =="
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
$QEMU_IO --object $SECRET -c "read 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
18
---
23
19
block/io.c | 43 +++++--------------------------------------
24
+echo
20
1 file changed, 5 insertions(+), 38 deletions(-)
25
+echo "== rewriting cluster part =="
26
+$QEMU_IO --object $SECRET -c "write -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
27
+
28
+echo
29
+echo "== verify pattern =="
30
+$QEMU_IO --object $SECRET -c "read -P 0 0 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
31
+$QEMU_IO --object $SECRET -c "read -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
32
+
33
echo
34
echo "== rewriting whole image =="
35
$QEMU_IO --object $SECRET -c "write -P 0xa 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
36
diff --git a/tests/qemu-iotests/134.out b/tests/qemu-iotests/134.out
37
index XXXXXXX..XXXXXXX 100644
38
--- a/tests/qemu-iotests/134.out
39
+++ b/tests/qemu-iotests/134.out
40
@@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on encrypt.
41
read 134217728/134217728 bytes at offset 0
42
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
43
44
+== rewriting cluster part ==
45
+wrote 512/512 bytes at offset 512
46
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
47
+
48
+== verify pattern ==
49
+read 512/512 bytes at offset 0
50
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
51
+read 512/512 bytes at offset 512
52
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
53
+
54
== rewriting whole image ==
55
wrote 134217728/134217728 bytes at offset 0
56
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
57
--
58
2.21.0
21
59
22
diff --git a/block/io.c b/block/io.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/block/io.c
25
+++ b/block/io.c
26
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
27
* at 'offset + *pnum' may return the same allocation status (in other
28
* words, the result is not necessarily the maximum possible range);
29
* but 'pnum' will only be 0 when end of file is reached.
30
- *
31
*/
32
int bdrv_is_allocated_above(BlockDriverState *top,
33
BlockDriverState *base,
34
bool include_base, int64_t offset,
35
int64_t bytes, int64_t *pnum)
36
{
37
- BlockDriverState *intermediate;
38
- int ret;
39
- int64_t n = bytes;
40
-
41
- assert(base || !include_base);
42
-
43
- intermediate = top;
44
- while (include_base || intermediate != base) {
45
- int64_t pnum_inter;
46
- int64_t size_inter;
47
-
48
- assert(intermediate);
49
- ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
50
- if (ret < 0) {
51
- return ret;
52
- }
53
- if (ret) {
54
- *pnum = pnum_inter;
55
- return 1;
56
- }
57
-
58
- size_inter = bdrv_getlength(intermediate);
59
- if (size_inter < 0) {
60
- return size_inter;
61
- }
62
- if (n > pnum_inter &&
63
- (intermediate == top || offset + pnum_inter < size_inter)) {
64
- n = pnum_inter;
65
- }
66
-
67
- if (intermediate == base) {
68
- break;
69
- }
70
-
71
- intermediate = bdrv_filter_or_cow_bs(intermediate);
72
+ int ret = bdrv_common_block_status_above(top, base, include_base, false,
73
+ offset, bytes, pnum, NULL, NULL);
74
+ if (ret < 0) {
75
+ return ret;
76
}
77
78
- *pnum = n;
79
- return 0;
80
+ return !!(ret & BDRV_BLOCK_ALLOCATED);
81
}
82
83
int coroutine_fn
84
--
85
2.26.2
86
60
diff view generated by jsdifflib
1
Unexpected EOF is an error that must be reported.
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
3
Commit b0651b8c246d ("vmdk: Move l1_size check into vmdk_add_extent")
4
Message-id: 20200924151549.913737-9-stefanha@redhat.com
4
extended the l1_size check from VMDK4 to VMDK3 but did not update the
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
default coverage in the moved comment.
6
7
The previous vmdk4 calculation:
8
9
(512 * 1024 * 1024) * 512(l2 entries) * 65536(grain) = 16PB
10
11
The added vmdk3 calculation:
12
13
(512 * 1024 * 1024) * 4096(l2 entries) * 512(grain) = 1PB
14
15
Adding the calculation of vmdk3 to the comment.
16
17
In any case, VMware does not offer virtual disks more than 2TB for
18
vmdk4/vmdk3 or 64TB for the new undocumented seSparse format which is
19
not implemented yet in qemu.
20
21
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
22
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
23
Reviewed-by: Liran Alon <liran.alon@oracle.com>
24
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
25
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
26
Message-id: 20190620091057.47441-2-shmuel.eiderman@oracle.com
27
Reviewed-by: yuchenlin <yuchenlin@synology.com>
28
Reviewed-by: Max Reitz <mreitz@redhat.com>
29
Signed-off-by: Max Reitz <mreitz@redhat.com>
6
---
30
---
7
util/vhost-user-server.c | 6 ++++--
31
block/vmdk.c | 11 ++++++++---
8
1 file changed, 4 insertions(+), 2 deletions(-)
32
1 file changed, 8 insertions(+), 3 deletions(-)
9
33
10
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
34
diff --git a/block/vmdk.c b/block/vmdk.c
11
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
12
--- a/util/vhost-user-server.c
36
--- a/block/vmdk.c
13
+++ b/util/vhost-user-server.c
37
+++ b/block/vmdk.c
14
@@ -XXX,XX +XXX,XX @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
38
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
15
};
39
return -EFBIG;
16
if (vmsg->size) {
40
}
17
rc = qio_channel_readv_all_eof(ioc, &iov_payload, 1, &local_err);
41
if (l1_size > 512 * 1024 * 1024) {
18
- if (rc == -1) {
42
- /* Although with big capacity and small l1_entry_sectors, we can get a
19
- error_report_err(local_err);
43
+ /*
20
+ if (rc != 1) {
44
+ * Although with big capacity and small l1_entry_sectors, we can get a
21
+ if (local_err) {
45
* big l1_size, we don't want unbounded value to allocate the table.
22
+ error_report_err(local_err);
46
- * Limit it to 512M, which is 16PB for default cluster and L2 table
23
+ }
47
- * size */
24
goto fail;
48
+ * Limit it to 512M, which is:
25
}
49
+ * 16PB - for default "Hosted Sparse Extent" (VMDK4)
50
+ * cluster size: 64KB, L2 table size: 512 entries
51
+ * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
52
+ * cluster size: 512B, L2 table size: 4096 entries
53
+ */
54
error_setg(errp, "L1 size too big");
55
return -EFBIG;
26
}
56
}
27
--
57
--
28
2.26.2
58
2.21.0
29
59
60
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
Keep statistics of some hardware errors, and number of
3
512M of L1 entries is a very loose bound, only 32M are required to store
4
aligned/unaligned I/O accesses.
4
the maximal supported VMDK file size of 2TB.
5
5
6
QMP example booting a full RHEL 8.3 aarch64 guest:
6
Fixed qemu-iotest 59# - now failure occures before on impossible L1
7
table size.
7
8
8
{ "execute": "query-blockstats" }
9
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
9
{
10
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
10
"return": [
11
Reviewed-by: Liran Alon <liran.alon@oracle.com>
11
{
12
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
12
"device": "",
13
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
13
"node-name": "drive0",
14
Message-id: 20190620091057.47441-3-shmuel.eiderman@oracle.com
14
"stats": {
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
15
"flush_total_time_ns": 6026948,
16
Signed-off-by: Max Reitz <mreitz@redhat.com>
16
"wr_highest_offset": 3383991230464,
17
---
17
"wr_total_time_ns": 807450995,
18
block/vmdk.c | 13 +++++++------
18
"failed_wr_operations": 0,
19
tests/qemu-iotests/059.out | 2 +-
19
"failed_rd_operations": 0,
20
2 files changed, 8 insertions(+), 7 deletions(-)
20
"wr_merged": 3,
21
"wr_bytes": 50133504,
22
"failed_unmap_operations": 0,
23
"failed_flush_operations": 0,
24
"account_invalid": false,
25
"rd_total_time_ns": 1846979900,
26
"flush_operations": 130,
27
"wr_operations": 659,
28
"rd_merged": 1192,
29
"rd_bytes": 218244096,
30
"account_failed": false,
31
"idle_time_ns": 2678641497,
32
"rd_operations": 7406,
33
},
34
"driver-specific": {
35
"driver": "nvme",
36
"completion-errors": 0,
37
"unaligned-accesses": 2959,
38
"aligned-accesses": 4477
39
},
40
"qdev": "/machine/peripheral-anon/device[0]/virtio-backend"
41
}
42
]
43
}
44
21
45
Suggested-by: Stefan Hajnoczi <stefanha@gmail.com>
22
diff --git a/block/vmdk.c b/block/vmdk.c
46
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
23
index XXXXXXX..XXXXXXX 100644
47
Acked-by: Markus Armbruster <armbru@redhat.com>
24
--- a/block/vmdk.c
48
Message-id: 20201001162939.1567915-1-philmd@redhat.com
25
+++ b/block/vmdk.c
49
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
26
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
50
---
27
error_setg(errp, "Invalid granularity, image may be corrupt");
51
qapi/block-core.json | 24 +++++++++++++++++++++++-
28
return -EFBIG;
52
block/nvme.c | 27 +++++++++++++++++++++++++++
29
}
53
2 files changed, 50 insertions(+), 1 deletion(-)
30
- if (l1_size > 512 * 1024 * 1024) {
31
+ if (l1_size > 32 * 1024 * 1024) {
32
/*
33
* Although with big capacity and small l1_entry_sectors, we can get a
34
* big l1_size, we don't want unbounded value to allocate the table.
35
- * Limit it to 512M, which is:
36
- * 16PB - for default "Hosted Sparse Extent" (VMDK4)
37
- * cluster size: 64KB, L2 table size: 512 entries
38
- * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
39
- * cluster size: 512B, L2 table size: 4096 entries
40
+ * Limit it to 32M, which is enough to store:
41
+ * 8TB - for both VMDK3 & VMDK4 with
42
+ * minimal cluster size: 512B
43
+ * minimal L2 table size: 512 entries
44
+ * 8 TB is still more than the maximal value supported for
45
+ * VMDK3 & VMDK4 which is 2TB.
46
*/
47
error_setg(errp, "L1 size too big");
48
return -EFBIG;
49
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
50
index XXXXXXX..XXXXXXX 100644
51
--- a/tests/qemu-iotests/059.out
52
+++ b/tests/qemu-iotests/059.out
53
@@ -XXX,XX +XXX,XX @@ Offset Length Mapped to File
54
0x140000000 0x10000 0x50000 TEST_DIR/t-s003.vmdk
55
56
=== Testing afl image with a very large capacity ===
57
-qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large
58
+qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': L1 size too big
59
*** done
60
--
61
2.21.0
54
62
55
diff --git a/qapi/block-core.json b/qapi/block-core.json
56
index XXXXXXX..XXXXXXX 100644
57
--- a/qapi/block-core.json
58
+++ b/qapi/block-core.json
59
@@ -XXX,XX +XXX,XX @@
60
'discard-nb-failed': 'uint64',
61
'discard-bytes-ok': 'uint64' } }
62
63
+##
64
+# @BlockStatsSpecificNvme:
65
+#
66
+# NVMe driver statistics
67
+#
68
+# @completion-errors: The number of completion errors.
69
+#
70
+# @aligned-accesses: The number of aligned accesses performed by
71
+# the driver.
72
+#
73
+# @unaligned-accesses: The number of unaligned accesses performed by
74
+# the driver.
75
+#
76
+# Since: 5.2
77
+##
78
+{ 'struct': 'BlockStatsSpecificNvme',
79
+ 'data': {
80
+ 'completion-errors': 'uint64',
81
+ 'aligned-accesses': 'uint64',
82
+ 'unaligned-accesses': 'uint64' } }
83
+
84
##
85
# @BlockStatsSpecific:
86
#
87
@@ -XXX,XX +XXX,XX @@
88
'discriminator': 'driver',
89
'data': {
90
'file': 'BlockStatsSpecificFile',
91
- 'host_device': 'BlockStatsSpecificFile' } }
92
+ 'host_device': 'BlockStatsSpecificFile',
93
+ 'nvme': 'BlockStatsSpecificNvme' } }
94
95
##
96
# @BlockStats:
97
diff --git a/block/nvme.c b/block/nvme.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/block/nvme.c
100
+++ b/block/nvme.c
101
@@ -XXX,XX +XXX,XX @@ struct BDRVNVMeState {
102
103
/* PCI address (required for nvme_refresh_filename()) */
104
char *device;
105
+
106
+ struct {
107
+ uint64_t completion_errors;
108
+ uint64_t aligned_accesses;
109
+ uint64_t unaligned_accesses;
110
+ } stats;
111
};
112
113
#define NVME_BLOCK_OPT_DEVICE "device"
114
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(NVMeQueuePair *q)
115
break;
116
}
117
ret = nvme_translate_error(c);
118
+ if (ret) {
119
+ s->stats.completion_errors++;
120
+ }
121
q->cq.head = (q->cq.head + 1) % NVME_QUEUE_SIZE;
122
if (!q->cq.head) {
123
q->cq_phase = !q->cq_phase;
124
@@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
125
assert(QEMU_IS_ALIGNED(bytes, s->page_size));
126
assert(bytes <= s->max_transfer);
127
if (nvme_qiov_aligned(bs, qiov)) {
128
+ s->stats.aligned_accesses++;
129
return nvme_co_prw_aligned(bs, offset, bytes, qiov, is_write, flags);
130
}
131
+ s->stats.unaligned_accesses++;
132
trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write);
133
buf = qemu_try_memalign(s->page_size, bytes);
134
135
@@ -XXX,XX +XXX,XX @@ static void nvme_unregister_buf(BlockDriverState *bs, void *host)
136
qemu_vfio_dma_unmap(s->vfio, host);
137
}
138
139
+static BlockStatsSpecific *nvme_get_specific_stats(BlockDriverState *bs)
140
+{
141
+ BlockStatsSpecific *stats = g_new(BlockStatsSpecific, 1);
142
+ BDRVNVMeState *s = bs->opaque;
143
+
144
+ stats->driver = BLOCKDEV_DRIVER_NVME;
145
+ stats->u.nvme = (BlockStatsSpecificNvme) {
146
+ .completion_errors = s->stats.completion_errors,
147
+ .aligned_accesses = s->stats.aligned_accesses,
148
+ .unaligned_accesses = s->stats.unaligned_accesses,
149
+ };
150
+
151
+ return stats;
152
+}
153
+
154
static const char *const nvme_strong_runtime_opts[] = {
155
NVME_BLOCK_OPT_DEVICE,
156
NVME_BLOCK_OPT_NAMESPACE,
157
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nvme = {
158
.bdrv_refresh_filename = nvme_refresh_filename,
159
.bdrv_refresh_limits = nvme_refresh_limits,
160
.strong_runtime_opts = nvme_strong_runtime_opts,
161
+ .bdrv_get_specific_stats = nvme_get_specific_stats,
162
163
.bdrv_detach_aio_context = nvme_detach_aio_context,
164
.bdrv_attach_aio_context = nvme_attach_aio_context,
165
--
166
2.26.2
167
63
diff view generated by jsdifflib
Deleted patch
1
From: Coiby Xu <coiby.xu@gmail.com>
2
1
3
Allow vu_message_read to be replaced by one which will make use of the
4
QIOChannel functions. Thus reading vhost-user message won't stall the
5
guest. For slave channel, we still use the default vu_message_read.
6
7
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
8
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-id: 20200918080912.321299-2-coiby.xu@gmail.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
contrib/libvhost-user/libvhost-user.h | 21 +++++++++++++++++++++
14
contrib/libvhost-user/libvhost-user-glib.c | 2 +-
15
contrib/libvhost-user/libvhost-user.c | 14 +++++++-------
16
tests/vhost-user-bridge.c | 2 ++
17
tools/virtiofsd/fuse_virtio.c | 4 ++--
18
5 files changed, 33 insertions(+), 10 deletions(-)
19
20
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/contrib/libvhost-user/libvhost-user.h
23
+++ b/contrib/libvhost-user/libvhost-user.h
24
@@ -XXX,XX +XXX,XX @@
25
*/
26
#define VHOST_USER_MAX_RAM_SLOTS 32
27
28
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
29
+
30
typedef enum VhostSetConfigType {
31
VHOST_SET_CONFIG_TYPE_MASTER = 0,
32
VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
33
@@ -XXX,XX +XXX,XX @@ typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
34
typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
35
typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
36
int *do_reply);
37
+typedef bool (*vu_read_msg_cb) (VuDev *dev, int sock, VhostUserMsg *vmsg);
38
typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
39
typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
40
typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
41
@@ -XXX,XX +XXX,XX @@ struct VuDev {
42
bool broken;
43
uint16_t max_queues;
44
45
+ /* @read_msg: custom method to read vhost-user message
46
+ *
47
+ * Read data from vhost_user socket fd and fill up
48
+ * the passed VhostUserMsg *vmsg struct.
49
+ *
50
+ * If reading fails, it should close the received set of file
51
+ * descriptors as socket message's auxiliary data.
52
+ *
53
+ * For the details, please refer to vu_message_read in libvhost-user.c
54
+ * which will be used by default if not custom method is provided when
55
+ * calling vu_init
56
+ *
57
+ * Returns: true if vhost-user message successfully received,
58
+ * otherwise return false.
59
+ *
60
+ */
61
+ vu_read_msg_cb read_msg;
62
/* @set_watch: add or update the given fd to the watch set,
63
* call cb when condition is met */
64
vu_set_watch_cb set_watch;
65
@@ -XXX,XX +XXX,XX @@ bool vu_init(VuDev *dev,
66
uint16_t max_queues,
67
int socket,
68
vu_panic_cb panic,
69
+ vu_read_msg_cb read_msg,
70
vu_set_watch_cb set_watch,
71
vu_remove_watch_cb remove_watch,
72
const VuDevIface *iface);
73
diff --git a/contrib/libvhost-user/libvhost-user-glib.c b/contrib/libvhost-user/libvhost-user-glib.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/contrib/libvhost-user/libvhost-user-glib.c
76
+++ b/contrib/libvhost-user/libvhost-user-glib.c
77
@@ -XXX,XX +XXX,XX @@ vug_init(VugDev *dev, uint16_t max_queues, int socket,
78
g_assert(dev);
79
g_assert(iface);
80
81
- if (!vu_init(&dev->parent, max_queues, socket, panic, set_watch,
82
+ if (!vu_init(&dev->parent, max_queues, socket, panic, NULL, set_watch,
83
remove_watch, iface)) {
84
return false;
85
}
86
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/contrib/libvhost-user/libvhost-user.c
89
+++ b/contrib/libvhost-user/libvhost-user.c
90
@@ -XXX,XX +XXX,XX @@
91
/* The version of inflight buffer */
92
#define INFLIGHT_VERSION 1
93
94
-#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
95
-
96
/* The version of the protocol we support */
97
#define VHOST_USER_VERSION 1
98
#define LIBVHOST_USER_DEBUG 0
99
@@ -XXX,XX +XXX,XX @@ have_userfault(void)
100
}
101
102
static bool
103
-vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
104
+vu_message_read_default(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
105
{
106
char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {};
107
struct iovec iov = {
108
@@ -XXX,XX +XXX,XX @@ vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
109
goto out;
110
}
111
112
- if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) {
113
+ if (!vu_message_read_default(dev, dev->slave_fd, &msg_reply)) {
114
goto out;
115
}
116
117
@@ -XXX,XX +XXX,XX @@ vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg)
118
/* Wait for QEMU to confirm that it's registered the handler for the
119
* faults.
120
*/
121
- if (!vu_message_read(dev, dev->sock, vmsg) ||
122
+ if (!dev->read_msg(dev, dev->sock, vmsg) ||
123
vmsg->size != sizeof(vmsg->payload.u64) ||
124
vmsg->payload.u64 != 0) {
125
vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table");
126
@@ -XXX,XX +XXX,XX @@ vu_dispatch(VuDev *dev)
127
int reply_requested;
128
bool need_reply, success = false;
129
130
- if (!vu_message_read(dev, dev->sock, &vmsg)) {
131
+ if (!dev->read_msg(dev, dev->sock, &vmsg)) {
132
goto end;
133
}
134
135
@@ -XXX,XX +XXX,XX @@ vu_init(VuDev *dev,
136
uint16_t max_queues,
137
int socket,
138
vu_panic_cb panic,
139
+ vu_read_msg_cb read_msg,
140
vu_set_watch_cb set_watch,
141
vu_remove_watch_cb remove_watch,
142
const VuDevIface *iface)
143
@@ -XXX,XX +XXX,XX @@ vu_init(VuDev *dev,
144
145
dev->sock = socket;
146
dev->panic = panic;
147
+ dev->read_msg = read_msg ? read_msg : vu_message_read_default;
148
dev->set_watch = set_watch;
149
dev->remove_watch = remove_watch;
150
dev->iface = iface;
151
@@ -XXX,XX +XXX,XX @@ static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
152
153
vu_message_write(dev, dev->slave_fd, &vmsg);
154
if (ack) {
155
- vu_message_read(dev, dev->slave_fd, &vmsg);
156
+ vu_message_read_default(dev, dev->slave_fd, &vmsg);
157
}
158
return;
159
}
160
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/tests/vhost-user-bridge.c
163
+++ b/tests/vhost-user-bridge.c
164
@@ -XXX,XX +XXX,XX @@ vubr_accept_cb(int sock, void *ctx)
165
VHOST_USER_BRIDGE_MAX_QUEUES,
166
conn_fd,
167
vubr_panic,
168
+ NULL,
169
vubr_set_watch,
170
vubr_remove_watch,
171
&vuiface)) {
172
@@ -XXX,XX +XXX,XX @@ vubr_new(const char *path, bool client)
173
VHOST_USER_BRIDGE_MAX_QUEUES,
174
dev->sock,
175
vubr_panic,
176
+ NULL,
177
vubr_set_watch,
178
vubr_remove_watch,
179
&vuiface)) {
180
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
181
index XXXXXXX..XXXXXXX 100644
182
--- a/tools/virtiofsd/fuse_virtio.c
183
+++ b/tools/virtiofsd/fuse_virtio.c
184
@@ -XXX,XX +XXX,XX @@ int virtio_session_mount(struct fuse_session *se)
185
se->vu_socketfd = data_sock;
186
se->virtio_dev->se = se;
187
pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL);
188
- vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch,
189
- fv_remove_watch, &fv_iface);
190
+ vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, NULL,
191
+ fv_set_watch, fv_remove_watch, &fv_iface);
192
193
return 0;
194
}
195
--
196
2.26.2
197
diff view generated by jsdifflib
Deleted patch
1
From: Coiby Xu <coiby.xu@gmail.com>
2
1
3
When the client is running in gdb and quit command is run in gdb,
4
QEMU will still dispatch the event which will cause segment fault in
5
the callback function.
6
7
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
10
Message-id: 20200918080912.321299-3-coiby.xu@gmail.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
contrib/libvhost-user/libvhost-user.c | 1 +
14
1 file changed, 1 insertion(+)
15
16
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/contrib/libvhost-user/libvhost-user.c
19
+++ b/contrib/libvhost-user/libvhost-user.c
20
@@ -XXX,XX +XXX,XX @@ vu_deinit(VuDev *dev)
21
}
22
23
if (vq->kick_fd != -1) {
24
+ dev->remove_watch(dev, vq->kick_fd);
25
close(vq->kick_fd);
26
vq->kick_fd = -1;
27
}
28
--
29
2.26.2
30
diff view generated by jsdifflib
Deleted patch
1
From: Coiby Xu <coiby.xu@gmail.com>
2
1
3
Sharing QEMU devices via vhost-user protocol.
4
5
Only one vhost-user client can connect to the server one time.
6
7
Suggested-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
12
Message-id: 20200918080912.321299-4-coiby.xu@gmail.com
13
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
14
---
15
util/vhost-user-server.h | 65 ++++++
16
util/vhost-user-server.c | 428 +++++++++++++++++++++++++++++++++++++++
17
util/meson.build | 1 +
18
3 files changed, 494 insertions(+)
19
create mode 100644 util/vhost-user-server.h
20
create mode 100644 util/vhost-user-server.c
21
22
diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
23
new file mode 100644
24
index XXXXXXX..XXXXXXX
25
--- /dev/null
26
+++ b/util/vhost-user-server.h
27
@@ -XXX,XX +XXX,XX @@
28
+/*
29
+ * Sharing QEMU devices via vhost-user protocol
30
+ *
31
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
32
+ * Copyright (c) 2020 Red Hat, Inc.
33
+ *
34
+ * This work is licensed under the terms of the GNU GPL, version 2 or
35
+ * later. See the COPYING file in the top-level directory.
36
+ */
37
+
38
+#ifndef VHOST_USER_SERVER_H
39
+#define VHOST_USER_SERVER_H
40
+
41
+#include "contrib/libvhost-user/libvhost-user.h"
42
+#include "io/channel-socket.h"
43
+#include "io/channel-file.h"
44
+#include "io/net-listener.h"
45
+#include "qemu/error-report.h"
46
+#include "qapi/error.h"
47
+#include "standard-headers/linux/virtio_blk.h"
48
+
49
+typedef struct VuFdWatch {
50
+ VuDev *vu_dev;
51
+ int fd; /*kick fd*/
52
+ void *pvt;
53
+ vu_watch_cb cb;
54
+ bool processing;
55
+ QTAILQ_ENTRY(VuFdWatch) next;
56
+} VuFdWatch;
57
+
58
+typedef struct VuServer VuServer;
59
+typedef void DevicePanicNotifierFn(VuServer *server);
60
+
61
+struct VuServer {
62
+ QIONetListener *listener;
63
+ AioContext *ctx;
64
+ DevicePanicNotifierFn *device_panic_notifier;
65
+ int max_queues;
66
+ const VuDevIface *vu_iface;
67
+ VuDev vu_dev;
68
+ QIOChannel *ioc; /* The I/O channel with the client */
69
+ QIOChannelSocket *sioc; /* The underlying data channel with the client */
70
+ /* IOChannel for fd provided via VHOST_USER_SET_SLAVE_REQ_FD */
71
+ QIOChannel *ioc_slave;
72
+ QIOChannelSocket *sioc_slave;
73
+ Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
74
+ QTAILQ_HEAD(, VuFdWatch) vu_fd_watches;
75
+ /* restart coroutine co_trip if AIOContext is changed */
76
+ bool aio_context_changed;
77
+ bool processing_msg;
78
+};
79
+
80
+bool vhost_user_server_start(VuServer *server,
81
+ SocketAddress *unix_socket,
82
+ AioContext *ctx,
83
+ uint16_t max_queues,
84
+ DevicePanicNotifierFn *device_panic_notifier,
85
+ const VuDevIface *vu_iface,
86
+ Error **errp);
87
+
88
+void vhost_user_server_stop(VuServer *server);
89
+
90
+void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx);
91
+
92
+#endif /* VHOST_USER_SERVER_H */
93
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
94
new file mode 100644
95
index XXXXXXX..XXXXXXX
96
--- /dev/null
97
+++ b/util/vhost-user-server.c
98
@@ -XXX,XX +XXX,XX @@
99
+/*
100
+ * Sharing QEMU devices via vhost-user protocol
101
+ *
102
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
103
+ * Copyright (c) 2020 Red Hat, Inc.
104
+ *
105
+ * This work is licensed under the terms of the GNU GPL, version 2 or
106
+ * later. See the COPYING file in the top-level directory.
107
+ */
108
+#include "qemu/osdep.h"
109
+#include "qemu/main-loop.h"
110
+#include "vhost-user-server.h"
111
+
112
+static void vmsg_close_fds(VhostUserMsg *vmsg)
113
+{
114
+ int i;
115
+ for (i = 0; i < vmsg->fd_num; i++) {
116
+ close(vmsg->fds[i]);
117
+ }
118
+}
119
+
120
+static void vmsg_unblock_fds(VhostUserMsg *vmsg)
121
+{
122
+ int i;
123
+ for (i = 0; i < vmsg->fd_num; i++) {
124
+ qemu_set_nonblock(vmsg->fds[i]);
125
+ }
126
+}
127
+
128
+static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
129
+ gpointer opaque);
130
+
131
+static void close_client(VuServer *server)
132
+{
133
+ /*
134
+ * Before closing the client
135
+ *
136
+ * 1. Let vu_client_trip stop processing new vhost-user msg
137
+ *
138
+ * 2. remove kick_handler
139
+ *
140
+ * 3. wait for the kick handler to be finished
141
+ *
142
+ * 4. wait for the current vhost-user msg to be finished processing
143
+ */
144
+
145
+ QIOChannelSocket *sioc = server->sioc;
146
+ /* When this is set vu_client_trip will stop new processing vhost-user message */
147
+ server->sioc = NULL;
148
+
149
+ VuFdWatch *vu_fd_watch, *next;
150
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
151
+ aio_set_fd_handler(server->ioc->ctx, vu_fd_watch->fd, true, NULL,
152
+ NULL, NULL, NULL);
153
+ }
154
+
155
+ while (!QTAILQ_EMPTY(&server->vu_fd_watches)) {
156
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
157
+ if (!vu_fd_watch->processing) {
158
+ QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
159
+ g_free(vu_fd_watch);
160
+ }
161
+ }
162
+ }
163
+
164
+ while (server->processing_msg) {
165
+ if (server->ioc->read_coroutine) {
166
+ server->ioc->read_coroutine = NULL;
167
+ qio_channel_set_aio_fd_handler(server->ioc, server->ioc->ctx, NULL,
168
+ NULL, server->ioc);
169
+ server->processing_msg = false;
170
+ }
171
+ }
172
+
173
+ vu_deinit(&server->vu_dev);
174
+ object_unref(OBJECT(sioc));
175
+ object_unref(OBJECT(server->ioc));
176
+}
177
+
178
+static void panic_cb(VuDev *vu_dev, const char *buf)
179
+{
180
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
181
+
182
+ /* avoid while loop in close_client */
183
+ server->processing_msg = false;
184
+
185
+ if (buf) {
186
+ error_report("vu_panic: %s", buf);
187
+ }
188
+
189
+ if (server->sioc) {
190
+ close_client(server);
191
+ }
192
+
193
+ if (server->device_panic_notifier) {
194
+ server->device_panic_notifier(server);
195
+ }
196
+
197
+ /*
198
+ * Set the callback function for network listener so another
199
+ * vhost-user client can connect to this server
200
+ */
201
+ qio_net_listener_set_client_func(server->listener,
202
+ vu_accept,
203
+ server,
204
+ NULL);
205
+}
206
+
207
+static bool coroutine_fn
208
+vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
209
+{
210
+ struct iovec iov = {
211
+ .iov_base = (char *)vmsg,
212
+ .iov_len = VHOST_USER_HDR_SIZE,
213
+ };
214
+ int rc, read_bytes = 0;
215
+ Error *local_err = NULL;
216
+ /*
217
+ * Store fds/nfds returned from qio_channel_readv_full into
218
+ * temporary variables.
219
+ *
220
+ * VhostUserMsg is a packed structure, gcc will complain about passing
221
+ * pointer to a packed structure member if we pass &VhostUserMsg.fd_num
222
+ * and &VhostUserMsg.fds directly when calling qio_channel_readv_full,
223
+ * thus two temporary variables nfds and fds are used here.
224
+ */
225
+ size_t nfds = 0, nfds_t = 0;
226
+ const size_t max_fds = G_N_ELEMENTS(vmsg->fds);
227
+ int *fds_t = NULL;
228
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
229
+ QIOChannel *ioc = server->ioc;
230
+
231
+ if (!ioc) {
232
+ error_report_err(local_err);
233
+ goto fail;
234
+ }
235
+
236
+ assert(qemu_in_coroutine());
237
+ do {
238
+ /*
239
+ * qio_channel_readv_full may have short reads, keeping calling it
240
+ * until getting VHOST_USER_HDR_SIZE or 0 bytes in total
241
+ */
242
+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds_t, &nfds_t, &local_err);
243
+ if (rc < 0) {
244
+ if (rc == QIO_CHANNEL_ERR_BLOCK) {
245
+ qio_channel_yield(ioc, G_IO_IN);
246
+ continue;
247
+ } else {
248
+ error_report_err(local_err);
249
+ return false;
250
+ }
251
+ }
252
+ read_bytes += rc;
253
+ if (nfds_t > 0) {
254
+ if (nfds + nfds_t > max_fds) {
255
+ error_report("A maximum of %zu fds are allowed, "
256
+ "however got %lu fds now",
257
+ max_fds, nfds + nfds_t);
258
+ goto fail;
259
+ }
260
+ memcpy(vmsg->fds + nfds, fds_t,
261
+ nfds_t *sizeof(vmsg->fds[0]));
262
+ nfds += nfds_t;
263
+ g_free(fds_t);
264
+ }
265
+ if (read_bytes == VHOST_USER_HDR_SIZE || rc == 0) {
266
+ break;
267
+ }
268
+ iov.iov_base = (char *)vmsg + read_bytes;
269
+ iov.iov_len = VHOST_USER_HDR_SIZE - read_bytes;
270
+ } while (true);
271
+
272
+ vmsg->fd_num = nfds;
273
+ /* qio_channel_readv_full will make socket fds blocking, unblock them */
274
+ vmsg_unblock_fds(vmsg);
275
+ if (vmsg->size > sizeof(vmsg->payload)) {
276
+ error_report("Error: too big message request: %d, "
277
+ "size: vmsg->size: %u, "
278
+ "while sizeof(vmsg->payload) = %zu",
279
+ vmsg->request, vmsg->size, sizeof(vmsg->payload));
280
+ goto fail;
281
+ }
282
+
283
+ struct iovec iov_payload = {
284
+ .iov_base = (char *)&vmsg->payload,
285
+ .iov_len = vmsg->size,
286
+ };
287
+ if (vmsg->size) {
288
+ rc = qio_channel_readv_all_eof(ioc, &iov_payload, 1, &local_err);
289
+ if (rc == -1) {
290
+ error_report_err(local_err);
291
+ goto fail;
292
+ }
293
+ }
294
+
295
+ return true;
296
+
297
+fail:
298
+ vmsg_close_fds(vmsg);
299
+
300
+ return false;
301
+}
302
+
303
+
304
+static void vu_client_start(VuServer *server);
305
+static coroutine_fn void vu_client_trip(void *opaque)
306
+{
307
+ VuServer *server = opaque;
308
+
309
+ while (!server->aio_context_changed && server->sioc) {
310
+ server->processing_msg = true;
311
+ vu_dispatch(&server->vu_dev);
312
+ server->processing_msg = false;
313
+ }
314
+
315
+ if (server->aio_context_changed && server->sioc) {
316
+ server->aio_context_changed = false;
317
+ vu_client_start(server);
318
+ }
319
+}
320
+
321
+static void vu_client_start(VuServer *server)
322
+{
323
+ server->co_trip = qemu_coroutine_create(vu_client_trip, server);
324
+ aio_co_enter(server->ctx, server->co_trip);
325
+}
326
+
327
+/*
328
+ * a wrapper for vu_kick_cb
329
+ *
330
+ * since aio_dispatch can only pass one user data pointer to the
331
+ * callback function, pack VuDev and pvt into a struct. Then unpack it
332
+ * and pass them to vu_kick_cb
333
+ */
334
+static void kick_handler(void *opaque)
335
+{
336
+ VuFdWatch *vu_fd_watch = opaque;
337
+ vu_fd_watch->processing = true;
338
+ vu_fd_watch->cb(vu_fd_watch->vu_dev, 0, vu_fd_watch->pvt);
339
+ vu_fd_watch->processing = false;
340
+}
341
+
342
+
343
+static VuFdWatch *find_vu_fd_watch(VuServer *server, int fd)
344
+{
345
+
346
+ VuFdWatch *vu_fd_watch, *next;
347
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
348
+ if (vu_fd_watch->fd == fd) {
349
+ return vu_fd_watch;
350
+ }
351
+ }
352
+ return NULL;
353
+}
354
+
355
+static void
356
+set_watch(VuDev *vu_dev, int fd, int vu_evt,
357
+ vu_watch_cb cb, void *pvt)
358
+{
359
+
360
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
361
+ g_assert(vu_dev);
362
+ g_assert(fd >= 0);
363
+ g_assert(cb);
364
+
365
+ VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
366
+
367
+ if (!vu_fd_watch) {
368
+ VuFdWatch *vu_fd_watch = g_new0(VuFdWatch, 1);
369
+
370
+ QTAILQ_INSERT_TAIL(&server->vu_fd_watches, vu_fd_watch, next);
371
+
372
+ vu_fd_watch->fd = fd;
373
+ vu_fd_watch->cb = cb;
374
+ qemu_set_nonblock(fd);
375
+ aio_set_fd_handler(server->ioc->ctx, fd, true, kick_handler,
376
+ NULL, NULL, vu_fd_watch);
377
+ vu_fd_watch->vu_dev = vu_dev;
378
+ vu_fd_watch->pvt = pvt;
379
+ }
380
+}
381
+
382
+
383
+static void remove_watch(VuDev *vu_dev, int fd)
384
+{
385
+ VuServer *server;
386
+ g_assert(vu_dev);
387
+ g_assert(fd >= 0);
388
+
389
+ server = container_of(vu_dev, VuServer, vu_dev);
390
+
391
+ VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
392
+
393
+ if (!vu_fd_watch) {
394
+ return;
395
+ }
396
+ aio_set_fd_handler(server->ioc->ctx, fd, true, NULL, NULL, NULL, NULL);
397
+
398
+ QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
399
+ g_free(vu_fd_watch);
400
+}
401
+
402
+
403
+static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
404
+ gpointer opaque)
405
+{
406
+ VuServer *server = opaque;
407
+
408
+ if (server->sioc) {
409
+ warn_report("Only one vhost-user client is allowed to "
410
+ "connect the server one time");
411
+ return;
412
+ }
413
+
414
+ if (!vu_init(&server->vu_dev, server->max_queues, sioc->fd, panic_cb,
415
+ vu_message_read, set_watch, remove_watch, server->vu_iface)) {
416
+ error_report("Failed to initialize libvhost-user");
417
+ return;
418
+ }
419
+
420
+ /*
421
+ * Unset the callback function for network listener to make another
422
+ * vhost-user client keeping waiting until this client disconnects
423
+ */
424
+ qio_net_listener_set_client_func(server->listener,
425
+ NULL,
426
+ NULL,
427
+ NULL);
428
+ server->sioc = sioc;
429
+ /*
430
+ * Increase the object reference, so sioc will not freed by
431
+ * qio_net_listener_channel_func which will call object_unref(OBJECT(sioc))
432
+ */
433
+ object_ref(OBJECT(server->sioc));
434
+ qio_channel_set_name(QIO_CHANNEL(sioc), "vhost-user client");
435
+ server->ioc = QIO_CHANNEL(sioc);
436
+ object_ref(OBJECT(server->ioc));
437
+ qio_channel_attach_aio_context(server->ioc, server->ctx);
438
+ qio_channel_set_blocking(QIO_CHANNEL(server->sioc), false, NULL);
439
+ vu_client_start(server);
440
+}
441
+
442
+
443
+void vhost_user_server_stop(VuServer *server)
444
+{
445
+ if (server->sioc) {
446
+ close_client(server);
447
+ }
448
+
449
+ if (server->listener) {
450
+ qio_net_listener_disconnect(server->listener);
451
+ object_unref(OBJECT(server->listener));
452
+ }
453
+
454
+}
455
+
456
+void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx)
457
+{
458
+ VuFdWatch *vu_fd_watch, *next;
459
+ void *opaque = NULL;
460
+ IOHandler *io_read = NULL;
461
+ bool attach;
462
+
463
+ server->ctx = ctx ? ctx : qemu_get_aio_context();
464
+
465
+ if (!server->sioc) {
466
+ /* not yet serving any client*/
467
+ return;
468
+ }
469
+
470
+ if (ctx) {
471
+ qio_channel_attach_aio_context(server->ioc, ctx);
472
+ server->aio_context_changed = true;
473
+ io_read = kick_handler;
474
+ attach = true;
475
+ } else {
476
+ qio_channel_detach_aio_context(server->ioc);
477
+ /* server->ioc->ctx keeps the old AioConext */
478
+ ctx = server->ioc->ctx;
479
+ attach = false;
480
+ }
481
+
482
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
483
+ if (vu_fd_watch->cb) {
484
+ opaque = attach ? vu_fd_watch : NULL;
485
+ aio_set_fd_handler(ctx, vu_fd_watch->fd, true,
486
+ io_read, NULL, NULL,
487
+ opaque);
488
+ }
489
+ }
490
+}
491
+
492
+
493
+bool vhost_user_server_start(VuServer *server,
494
+ SocketAddress *socket_addr,
495
+ AioContext *ctx,
496
+ uint16_t max_queues,
497
+ DevicePanicNotifierFn *device_panic_notifier,
498
+ const VuDevIface *vu_iface,
499
+ Error **errp)
500
+{
501
+ QIONetListener *listener = qio_net_listener_new();
502
+ if (qio_net_listener_open_sync(listener, socket_addr, 1,
503
+ errp) < 0) {
504
+ object_unref(OBJECT(listener));
505
+ return false;
506
+ }
507
+
508
+ /* zero out unspecified fileds */
509
+ *server = (VuServer) {
510
+ .listener = listener,
511
+ .vu_iface = vu_iface,
512
+ .max_queues = max_queues,
513
+ .ctx = ctx,
514
+ .device_panic_notifier = device_panic_notifier,
515
+ };
516
+
517
+ qio_net_listener_set_name(server->listener, "vhost-user-backend-listener");
518
+
519
+ qio_net_listener_set_client_func(server->listener,
520
+ vu_accept,
521
+ server,
522
+ NULL);
523
+
524
+ QTAILQ_INIT(&server->vu_fd_watches);
525
+ return true;
526
+}
527
diff --git a/util/meson.build b/util/meson.build
528
index XXXXXXX..XXXXXXX 100644
529
--- a/util/meson.build
530
+++ b/util/meson.build
531
@@ -XXX,XX +XXX,XX @@ if have_block
532
util_ss.add(files('main-loop.c'))
533
util_ss.add(files('nvdimm-utils.c'))
534
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
535
+ util_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-server.c'))
536
util_ss.add(files('qemu-coroutine-sleep.c'))
537
util_ss.add(files('qemu-co-shared-resource.c'))
538
util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
539
--
540
2.26.2
541
diff view generated by jsdifflib
Deleted patch
1
From: Coiby Xu <coiby.xu@gmail.com>
2
1
3
Move the constants from hw/core/qdev-properties.c to
4
util/block-helpers.h so that knowledge of the min/max values is
5
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
10
Acked-by: Eduardo Habkost <ehabkost@redhat.com>
11
Message-id: 20200918080912.321299-5-coiby.xu@gmail.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
util/block-helpers.h | 19 +++++++++++++
15
hw/core/qdev-properties-system.c | 31 ++++-----------------
16
util/block-helpers.c | 46 ++++++++++++++++++++++++++++++++
17
util/meson.build | 1 +
18
4 files changed, 71 insertions(+), 26 deletions(-)
19
create mode 100644 util/block-helpers.h
20
create mode 100644 util/block-helpers.c
21
22
diff --git a/util/block-helpers.h b/util/block-helpers.h
23
new file mode 100644
24
index XXXXXXX..XXXXXXX
25
--- /dev/null
26
+++ b/util/block-helpers.h
27
@@ -XXX,XX +XXX,XX @@
28
+#ifndef BLOCK_HELPERS_H
29
+#define BLOCK_HELPERS_H
30
+
31
+#include "qemu/units.h"
32
+
33
+/* lower limit is sector size */
34
+#define MIN_BLOCK_SIZE INT64_C(512)
35
+#define MIN_BLOCK_SIZE_STR "512 B"
36
+/*
37
+ * upper limit is arbitrary, 2 MiB looks sufficient for all sensible uses, and
38
+ * matches qcow2 cluster size limit
39
+ */
40
+#define MAX_BLOCK_SIZE (2 * MiB)
41
+#define MAX_BLOCK_SIZE_STR "2 MiB"
42
+
43
+void check_block_size(const char *id, const char *name, int64_t value,
44
+ Error **errp);
45
+
46
+#endif /* BLOCK_HELPERS_H */
47
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/hw/core/qdev-properties-system.c
50
+++ b/hw/core/qdev-properties-system.c
51
@@ -XXX,XX +XXX,XX @@
52
#include "sysemu/blockdev.h"
53
#include "net/net.h"
54
#include "hw/pci/pci.h"
55
+#include "util/block-helpers.h"
56
57
static bool check_prop_still_unset(DeviceState *dev, const char *name,
58
const void *old_val, const char *new_val,
59
@@ -XXX,XX +XXX,XX @@ const PropertyInfo qdev_prop_losttickpolicy = {
60
61
/* --- blocksize --- */
62
63
-/* lower limit is sector size */
64
-#define MIN_BLOCK_SIZE 512
65
-#define MIN_BLOCK_SIZE_STR "512 B"
66
-/*
67
- * upper limit is arbitrary, 2 MiB looks sufficient for all sensible uses, and
68
- * matches qcow2 cluster size limit
69
- */
70
-#define MAX_BLOCK_SIZE (2 * MiB)
71
-#define MAX_BLOCK_SIZE_STR "2 MiB"
72
-
73
static void set_blocksize(Object *obj, Visitor *v, const char *name,
74
void *opaque, Error **errp)
75
{
76
@@ -XXX,XX +XXX,XX @@ static void set_blocksize(Object *obj, Visitor *v, const char *name,
77
Property *prop = opaque;
78
uint32_t *ptr = qdev_get_prop_ptr(dev, prop);
79
uint64_t value;
80
+ Error *local_err = NULL;
81
82
if (dev->realized) {
83
qdev_prop_set_after_realize(dev, name, errp);
84
@@ -XXX,XX +XXX,XX @@ static void set_blocksize(Object *obj, Visitor *v, const char *name,
85
if (!visit_type_size(v, name, &value, errp)) {
86
return;
87
}
88
- /* value of 0 means "unset" */
89
- if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) {
90
- error_setg(errp,
91
- "Property %s.%s doesn't take value %" PRIu64
92
- " (minimum: " MIN_BLOCK_SIZE_STR
93
- ", maximum: " MAX_BLOCK_SIZE_STR ")",
94
- dev->id ? : "", name, value);
95
+ check_block_size(dev->id ? : "", name, value, &local_err);
96
+ if (local_err) {
97
+ error_propagate(errp, local_err);
98
return;
99
}
100
-
101
- /* We rely on power-of-2 blocksizes for bitmasks */
102
- if ((value & (value - 1)) != 0) {
103
- error_setg(errp,
104
- "Property %s.%s doesn't take value '%" PRId64 "', "
105
- "it's not a power of 2", dev->id ?: "", name, (int64_t)value);
106
- return;
107
- }
108
-
109
*ptr = value;
110
}
111
112
diff --git a/util/block-helpers.c b/util/block-helpers.c
113
new file mode 100644
114
index XXXXXXX..XXXXXXX
115
--- /dev/null
116
+++ b/util/block-helpers.c
117
@@ -XXX,XX +XXX,XX @@
118
+/*
119
+ * Block utility functions
120
+ *
121
+ * Copyright IBM, Corp. 2011
122
+ * Copyright (c) 2020 Coiby Xu <coiby.xu@gmail.com>
123
+ *
124
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
125
+ * See the COPYING file in the top-level directory.
126
+ */
127
+
128
+#include "qemu/osdep.h"
129
+#include "qapi/error.h"
130
+#include "qapi/qmp/qerror.h"
131
+#include "block-helpers.h"
132
+
133
+/**
134
+ * check_block_size:
135
+ * @id: The unique ID of the object
136
+ * @name: The name of the property being validated
137
+ * @value: The block size in bytes
138
+ * @errp: A pointer to an area to store an error
139
+ *
140
+ * This function checks that the block size meets the following conditions:
141
+ * 1. At least MIN_BLOCK_SIZE
142
+ * 2. No larger than MAX_BLOCK_SIZE
143
+ * 3. A power of 2
144
+ */
145
+void check_block_size(const char *id, const char *name, int64_t value,
146
+ Error **errp)
147
+{
148
+ /* value of 0 means "unset" */
149
+ if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) {
150
+ error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE,
151
+ id, name, value, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
152
+ return;
153
+ }
154
+
155
+ /* We rely on power-of-2 blocksizes for bitmasks */
156
+ if ((value & (value - 1)) != 0) {
157
+ error_setg(errp,
158
+ "Property %s.%s doesn't take value '%" PRId64
159
+ "', it's not a power of 2",
160
+ id, name, value);
161
+ return;
162
+ }
163
+}
164
diff --git a/util/meson.build b/util/meson.build
165
index XXXXXXX..XXXXXXX 100644
166
--- a/util/meson.build
167
+++ b/util/meson.build
168
@@ -XXX,XX +XXX,XX @@ if have_block
169
util_ss.add(files('nvdimm-utils.c'))
170
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
171
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-server.c'))
172
+ util_ss.add(files('block-helpers.c'))
173
util_ss.add(files('qemu-coroutine-sleep.c'))
174
util_ss.add(files('qemu-co-shared-resource.c'))
175
util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
176
--
177
2.26.2
178
diff view generated by jsdifflib
1
From: Coiby Xu <coiby.xu@gmail.com>
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
2
2
3
By making use of libvhost-user, block device drive can be shared to
3
Until ESXi 6.5 VMware used the vmfsSparse format for snapshots (VMDK3 in
4
the connected vhost-user client. Only one client can connect to the
4
QEMU).
5
server one time.
5
6
6
This format was lacking in the following:
7
Since vhost-user-server needs a block drive to be created first, delay
7
8
the creation of this object.
8
* Grain directory (L1) and grain table (L2) entries were 32-bit,
9
9
allowing access to only 2TB (slightly less) of data.
10
Suggested-by: Kevin Wolf <kwolf@redhat.com>
10
* The grain size (default) was 512 bytes - leading to data
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
fragmentation and many grain tables.
12
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
12
* For space reclamation purposes, it was necessary to find all the
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
13
grains which are not pointed to by any grain table - so a reverse
14
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
14
mapping of "offset of grain in vmdk" to "grain table" must be
15
Message-id: 20200918080912.321299-6-coiby.xu@gmail.com
15
constructed - which takes large amounts of CPU/RAM.
16
[Shorten "vhost_user_blk_server" string to "vhost_user_blk" to avoid the
16
17
following compiler warning:
17
The format specification can be found in VMware's documentation:
18
../block/export/vhost-user-blk-server.c:178:50: error: ‘%s’ directive output truncated writing 21 bytes into a region of size 20 [-Werror=format-truncation=]
18
https://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf
19
--Stefan]
19
20
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
20
In ESXi 6.5, to support snapshot files larger than 2TB, a new format was
21
introduced: SESparse (Space Efficient).
22
23
This format fixes the above issues:
24
25
* All entries are now 64-bit.
26
* The grain size (default) is 4KB.
27
* Grain directory and grain tables are now located at the beginning
28
of the file.
29
+ seSparse format reserves space for all grain tables.
30
+ Grain tables can be addressed using an index.
31
+ Grains are located in the end of the file and can also be
32
addressed with an index.
33
- seSparse vmdks of large disks (64TB) have huge preallocated
34
headers - mainly due to L2 tables, even for empty snapshots.
35
* The header contains a reverse mapping ("backmap") of "offset of
36
grain in vmdk" to "grain table" and a bitmap ("free bitmap") which
37
specifies for each grain - whether it is allocated or not.
38
Using these data structures we can implement space reclamation
39
efficiently.
40
* Due to the fact that the header now maintains two mappings:
41
* The regular one (grain directory & grain tables)
42
* A reverse one (backmap and free bitmap)
43
These data structures can lose consistency upon crash and result
44
in a corrupted VMDK.
45
Therefore, a journal is also added to the VMDK and is replayed
46
when the VMware reopens the file after a crash.
47
48
Since ESXi 6.7 - SESparse is the only snapshot format available.
49
50
Unfortunately, VMware does not provide documentation regarding the new
51
seSparse format.
52
53
This commit is based on black-box research of the seSparse format.
54
Various in-guest block operations and their effect on the snapshot file
55
were tested.
56
57
The only VMware provided source of information (regarding the underlying
58
implementation) was a log file on the ESXi:
59
60
/var/log/hostd.log
61
62
Whenever an seSparse snapshot is created - the log is being populated
63
with seSparse records.
64
65
Relevant log records are of the form:
66
67
[...] Const Header:
68
[...] constMagic = 0xcafebabe
69
[...] version = 2.1
70
[...] capacity = 204800
71
[...] grainSize = 8
72
[...] grainTableSize = 64
73
[...] flags = 0
74
[...] Extents:
75
[...] Header : <1 : 1>
76
[...] JournalHdr : <2 : 2>
77
[...] Journal : <2048 : 2048>
78
[...] GrainDirectory : <4096 : 2048>
79
[...] GrainTables : <6144 : 2048>
80
[...] FreeBitmap : <8192 : 2048>
81
[...] BackMap : <10240 : 2048>
82
[...] Grain : <12288 : 204800>
83
[...] Volatile Header:
84
[...] volatileMagic = 0xcafecafe
85
[...] FreeGTNumber = 0
86
[...] nextTxnSeqNumber = 0
87
[...] replayJournal = 0
88
89
The sizes that are seen in the log file are in sectors.
90
Extents are of the following format: <offset : size>
91
92
This commit is a strict implementation which enforces:
93
* magics
94
* version number 2.1
95
* grain size of 8 sectors (4KB)
96
* grain table size of 64 sectors
97
* zero flags
98
* extent locations
99
100
Additionally, this commit proivdes only a subset of the functionality
101
offered by seSparse's format:
102
* Read-only
103
* No journal replay
104
* No space reclamation
105
* No unmap support
106
107
Hence, journal header, journal, free bitmap and backmap extents are
108
unused, only the "classic" (L1 -> L2 -> data) grain access is
109
implemented.
110
111
However there are several differences in the grain access itself.
112
Grain directory (L1):
113
* Grain directory entries are indexes (not offsets) to grain
114
tables.
115
* Valid grain directory entries have their highest nibble set to
116
0x1.
117
* Since grain tables are always located in the beginning of the
118
file - the index can fit into 32 bits - so we can use its low
119
part if it's valid.
120
Grain table (L2):
121
* Grain table entries are indexes (not offsets) to grains.
122
* If the highest nibble of the entry is:
123
0x0:
124
The grain in not allocated.
125
The rest of the bytes are 0.
126
0x1:
127
The grain is unmapped - guest sees a zero grain.
128
The rest of the bits point to the previously mapped grain,
129
see 0x3 case.
130
0x2:
131
The grain is zero.
132
0x3:
133
The grain is allocated - to get the index calculate:
134
((entry & 0x0fff000000000000) >> 48) |
135
((entry & 0x0000ffffffffffff) << 12)
136
* The difference between 0x1 and 0x2 is that 0x1 is an unallocated
137
grain which results from the guest using sg_unmap to unmap the
138
grain - but the grain itself still exists in the grain extent - a
139
space reclamation procedure should delete it.
140
Unmapping a zero grain has no effect (0x2 will not change to 0x1)
141
but unmapping an unallocated grain will (0x0 to 0x1) - naturally.
142
143
In order to implement seSparse some fields had to be changed to support
144
both 32-bit and 64-bit entry sizes.
145
146
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
147
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
148
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
149
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
150
Message-id: 20190620091057.47441-4-shmuel.eiderman@oracle.com
151
Signed-off-by: Max Reitz <mreitz@redhat.com>
21
---
152
---
22
block/export/vhost-user-blk-server.h | 36 ++
153
block/vmdk.c | 358 ++++++++++++++++++++++++++++++++++++++++++++++++---
23
block/export/vhost-user-blk-server.c | 661 +++++++++++++++++++++++++++
154
1 file changed, 342 insertions(+), 16 deletions(-)
24
softmmu/vl.c | 4 +
155
25
block/meson.build | 1 +
156
diff --git a/block/vmdk.c b/block/vmdk.c
26
4 files changed, 702 insertions(+)
157
index XXXXXXX..XXXXXXX 100644
27
create mode 100644 block/export/vhost-user-blk-server.h
158
--- a/block/vmdk.c
28
create mode 100644 block/export/vhost-user-blk-server.c
159
+++ b/block/vmdk.c
29
160
@@ -XXX,XX +XXX,XX @@ typedef struct {
30
diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h
161
uint16_t compressAlgorithm;
31
new file mode 100644
162
} QEMU_PACKED VMDK4Header;
32
index XXXXXXX..XXXXXXX
163
33
--- /dev/null
164
+typedef struct VMDKSESparseConstHeader {
34
+++ b/block/export/vhost-user-blk-server.h
165
+ uint64_t magic;
35
@@ -XXX,XX +XXX,XX @@
166
+ uint64_t version;
36
+/*
167
+ uint64_t capacity;
37
+ * Sharing QEMU block devices via vhost-user protocal
168
+ uint64_t grain_size;
38
+ *
169
+ uint64_t grain_table_size;
39
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
170
+ uint64_t flags;
40
+ * Copyright (c) 2020 Red Hat, Inc.
171
+ uint64_t reserved1;
41
+ *
172
+ uint64_t reserved2;
42
+ * This work is licensed under the terms of the GNU GPL, version 2 or
173
+ uint64_t reserved3;
43
+ * later. See the COPYING file in the top-level directory.
174
+ uint64_t reserved4;
44
+ */
175
+ uint64_t volatile_header_offset;
45
+
176
+ uint64_t volatile_header_size;
46
+#ifndef VHOST_USER_BLK_SERVER_H
177
+ uint64_t journal_header_offset;
47
+#define VHOST_USER_BLK_SERVER_H
178
+ uint64_t journal_header_size;
48
+#include "util/vhost-user-server.h"
179
+ uint64_t journal_offset;
49
+
180
+ uint64_t journal_size;
50
+typedef struct VuBlockDev VuBlockDev;
181
+ uint64_t grain_dir_offset;
51
+#define TYPE_VHOST_USER_BLK_SERVER "vhost-user-blk-server"
182
+ uint64_t grain_dir_size;
52
+#define VHOST_USER_BLK_SERVER(obj) \
183
+ uint64_t grain_tables_offset;
53
+ OBJECT_CHECK(VuBlockDev, obj, TYPE_VHOST_USER_BLK_SERVER)
184
+ uint64_t grain_tables_size;
54
+
185
+ uint64_t free_bitmap_offset;
55
+/* vhost user block device */
186
+ uint64_t free_bitmap_size;
56
+struct VuBlockDev {
187
+ uint64_t backmap_offset;
57
+ Object parent_obj;
188
+ uint64_t backmap_size;
58
+ char *node_name;
189
+ uint64_t grains_offset;
59
+ SocketAddress *addr;
190
+ uint64_t grains_size;
60
+ AioContext *ctx;
191
+ uint8_t pad[304];
61
+ VuServer vu_server;
192
+} QEMU_PACKED VMDKSESparseConstHeader;
62
+ bool running;
193
+
63
+ uint32_t blk_size;
194
+typedef struct VMDKSESparseVolatileHeader {
64
+ BlockBackend *backend;
195
+ uint64_t magic;
65
+ QIOChannelSocket *sioc;
196
+ uint64_t free_gt_number;
66
+ QTAILQ_ENTRY(VuBlockDev) next;
197
+ uint64_t next_txn_seq_number;
67
+ struct virtio_blk_config blkcfg;
198
+ uint64_t replay_journal;
68
+ bool writable;
199
+ uint8_t pad[480];
69
+};
200
+} QEMU_PACKED VMDKSESparseVolatileHeader;
70
+
201
+
71
+#endif /* VHOST_USER_BLK_SERVER_H */
202
#define L2_CACHE_SIZE 16
72
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
203
73
new file mode 100644
204
typedef struct VmdkExtent {
74
index XXXXXXX..XXXXXXX
205
@@ -XXX,XX +XXX,XX @@ typedef struct VmdkExtent {
75
--- /dev/null
206
bool compressed;
76
+++ b/block/export/vhost-user-blk-server.c
207
bool has_marker;
77
@@ -XXX,XX +XXX,XX @@
208
bool has_zero_grain;
78
+/*
209
+ bool sesparse;
79
+ * Sharing QEMU block devices via vhost-user protocal
210
+ uint64_t sesparse_l2_tables_offset;
80
+ *
211
+ uint64_t sesparse_clusters_offset;
81
+ * Parts of the code based on nbd/server.c.
212
+ int32_t entry_size;
82
+ *
213
int version;
83
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
214
int64_t sectors;
84
+ * Copyright (c) 2020 Red Hat, Inc.
215
int64_t end_sector;
85
+ *
216
int64_t flat_start_offset;
86
+ * This work is licensed under the terms of the GNU GPL, version 2 or
217
int64_t l1_table_offset;
87
+ * later. See the COPYING file in the top-level directory.
218
int64_t l1_backup_table_offset;
88
+ */
219
- uint32_t *l1_table;
89
+#include "qemu/osdep.h"
220
+ void *l1_table;
90
+#include "block/block.h"
221
uint32_t *l1_backup_table;
91
+#include "vhost-user-blk-server.h"
222
unsigned int l1_size;
92
+#include "qapi/error.h"
223
uint32_t l1_entry_sectors;
93
+#include "qom/object_interfaces.h"
224
94
+#include "sysemu/block-backend.h"
225
unsigned int l2_size;
95
+#include "util/block-helpers.h"
226
- uint32_t *l2_cache;
96
+
227
+ void *l2_cache;
97
+enum {
228
uint32_t l2_cache_offsets[L2_CACHE_SIZE];
98
+ VHOST_USER_BLK_MAX_QUEUES = 1,
229
uint32_t l2_cache_counts[L2_CACHE_SIZE];
99
+};
230
100
+struct virtio_blk_inhdr {
231
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
101
+ unsigned char status;
232
* minimal L2 table size: 512 entries
102
+};
233
* 8 TB is still more than the maximal value supported for
103
+
234
* VMDK3 & VMDK4 which is 2TB.
104
+typedef struct VuBlockReq {
235
+ * 64TB - for "ESXi seSparse Extent"
105
+ VuVirtqElement *elem;
236
+ * minimal cluster size: 512B (default is 4KB)
106
+ int64_t sector_num;
237
+ * L2 table size: 4096 entries (const).
107
+ size_t size;
238
+ * 64TB is more than the maximal value supported for
108
+ struct virtio_blk_inhdr *in;
239
+ * seSparse VMDKs (which is slightly less than 64TB)
109
+ struct virtio_blk_outhdr out;
240
*/
110
+ VuServer *server;
241
error_setg(errp, "L1 size too big");
111
+ struct VuVirtq *vq;
242
return -EFBIG;
112
+} VuBlockReq;
243
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
113
+
244
extent->l2_size = l2_size;
114
+static void vu_block_req_complete(VuBlockReq *req)
245
extent->cluster_sectors = flat ? sectors : cluster_sectors;
246
extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
247
+ extent->entry_size = sizeof(uint32_t);
248
249
if (s->num_extents > 1) {
250
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
251
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
252
int i;
253
254
/* read the L1 table */
255
- l1_size = extent->l1_size * sizeof(uint32_t);
256
+ l1_size = extent->l1_size * extent->entry_size;
257
extent->l1_table = g_try_malloc(l1_size);
258
if (l1_size && extent->l1_table == NULL) {
259
return -ENOMEM;
260
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
261
goto fail_l1;
262
}
263
for (i = 0; i < extent->l1_size; i++) {
264
- le32_to_cpus(&extent->l1_table[i]);
265
+ if (extent->entry_size == sizeof(uint64_t)) {
266
+ le64_to_cpus((uint64_t *)extent->l1_table + i);
267
+ } else {
268
+ assert(extent->entry_size == sizeof(uint32_t));
269
+ le32_to_cpus((uint32_t *)extent->l1_table + i);
270
+ }
271
}
272
273
if (extent->l1_backup_table_offset) {
274
+ assert(!extent->sesparse);
275
extent->l1_backup_table = g_try_malloc(l1_size);
276
if (l1_size && extent->l1_backup_table == NULL) {
277
ret = -ENOMEM;
278
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
279
}
280
281
extent->l2_cache =
282
- g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE);
283
+ g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
284
return 0;
285
fail_l1b:
286
g_free(extent->l1_backup_table);
287
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
288
return ret;
289
}
290
291
+#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
292
+#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
293
+
294
+/* Strict checks - format not officially documented */
295
+static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
296
+ Error **errp)
115
+{
297
+{
116
+ VuDev *vu_dev = &req->server->vu_dev;
298
+ header->magic = le64_to_cpu(header->magic);
117
+
299
+ header->version = le64_to_cpu(header->version);
118
+ /* IO size with 1 extra status byte */
300
+ header->grain_size = le64_to_cpu(header->grain_size);
119
+ vu_queue_push(vu_dev, req->vq, req->elem, req->size + 1);
301
+ header->grain_table_size = le64_to_cpu(header->grain_table_size);
120
+ vu_queue_notify(vu_dev, req->vq);
302
+ header->flags = le64_to_cpu(header->flags);
121
+
303
+ header->reserved1 = le64_to_cpu(header->reserved1);
122
+ if (req->elem) {
304
+ header->reserved2 = le64_to_cpu(header->reserved2);
123
+ free(req->elem);
305
+ header->reserved3 = le64_to_cpu(header->reserved3);
124
+ }
306
+ header->reserved4 = le64_to_cpu(header->reserved4);
125
+
307
+
126
+ g_free(req);
308
+ header->volatile_header_offset =
127
+}
309
+ le64_to_cpu(header->volatile_header_offset);
128
+
310
+ header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
129
+static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
311
+
130
+{
312
+ header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
131
+ return container_of(server, VuBlockDev, vu_server);
313
+ header->journal_header_size = le64_to_cpu(header->journal_header_size);
132
+}
314
+
133
+
315
+ header->journal_offset = le64_to_cpu(header->journal_offset);
134
+static int coroutine_fn
316
+ header->journal_size = le64_to_cpu(header->journal_size);
135
+vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
317
+
136
+ uint32_t iovcnt, uint32_t type)
318
+ header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
137
+{
319
+ header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
138
+ struct virtio_blk_discard_write_zeroes desc;
320
+
139
+ ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
321
+ header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
140
+ if (unlikely(size != sizeof(desc))) {
322
+ header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
141
+ error_report("Invalid size %ld, expect %ld", size, sizeof(desc));
323
+
324
+ header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
325
+ header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
326
+
327
+ header->backmap_offset = le64_to_cpu(header->backmap_offset);
328
+ header->backmap_size = le64_to_cpu(header->backmap_size);
329
+
330
+ header->grains_offset = le64_to_cpu(header->grains_offset);
331
+ header->grains_size = le64_to_cpu(header->grains_size);
332
+
333
+ if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
334
+ error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
335
+ header->magic);
142
+ return -EINVAL;
336
+ return -EINVAL;
143
+ }
337
+ }
144
+
338
+
145
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
339
+ if (header->version != 0x0000000200000001) {
146
+ uint64_t range[2] = { le64_to_cpu(desc.sector) << 9,
340
+ error_setg(errp, "Unsupported version: 0x%016" PRIx64,
147
+ le32_to_cpu(desc.num_sectors) << 9 };
341
+ header->version);
148
+ if (type == VIRTIO_BLK_T_DISCARD) {
342
+ return -ENOTSUP;
149
+ if (blk_co_pdiscard(vdev_blk->backend, range[0], range[1]) == 0) {
343
+ }
150
+ return 0;
344
+
151
+ }
345
+ if (header->grain_size != 8) {
152
+ } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
346
+ error_setg(errp, "Unsupported grain size: %" PRIu64,
153
+ if (blk_co_pwrite_zeroes(vdev_blk->backend,
347
+ header->grain_size);
154
+ range[0], range[1], 0) == 0) {
348
+ return -ENOTSUP;
155
+ return 0;
349
+ }
156
+ }
350
+
157
+ }
351
+ if (header->grain_table_size != 64) {
158
+
352
+ error_setg(errp, "Unsupported grain table size: %" PRIu64,
159
+ return -EINVAL;
353
+ header->grain_table_size);
160
+}
354
+ return -ENOTSUP;
161
+
355
+ }
162
+static void coroutine_fn vu_block_flush(VuBlockReq *req)
356
+
163
+{
357
+ if (header->flags != 0) {
164
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
358
+ error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
165
+ BlockBackend *backend = vdev_blk->backend;
359
+ header->flags);
166
+ blk_co_flush(backend);
360
+ return -ENOTSUP;
167
+}
361
+ }
168
+
362
+
169
+struct req_data {
363
+ if (header->reserved1 != 0 || header->reserved2 != 0 ||
170
+ VuServer *server;
364
+ header->reserved3 != 0 || header->reserved4 != 0) {
171
+ VuVirtq *vq;
365
+ error_setg(errp, "Unsupported reserved bits:"
172
+ VuVirtqElement *elem;
366
+ " 0x%016" PRIx64 " 0x%016" PRIx64
173
+};
367
+ " 0x%016" PRIx64 " 0x%016" PRIx64,
174
+
368
+ header->reserved1, header->reserved2,
175
+static void coroutine_fn vu_block_virtio_process_req(void *opaque)
369
+ header->reserved3, header->reserved4);
176
+{
370
+ return -ENOTSUP;
177
+ struct req_data *data = opaque;
371
+ }
178
+ VuServer *server = data->server;
372
+
179
+ VuVirtq *vq = data->vq;
373
+ /* check that padding is 0 */
180
+ VuVirtqElement *elem = data->elem;
374
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
181
+ uint32_t type;
375
+ error_setg(errp, "Unsupported non-zero const header padding");
182
+ VuBlockReq *req;
376
+ return -ENOTSUP;
183
+
377
+ }
184
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
185
+ BlockBackend *backend = vdev_blk->backend;
186
+
187
+ struct iovec *in_iov = elem->in_sg;
188
+ struct iovec *out_iov = elem->out_sg;
189
+ unsigned in_num = elem->in_num;
190
+ unsigned out_num = elem->out_num;
191
+ /* refer to hw/block/virtio_blk.c */
192
+ if (elem->out_num < 1 || elem->in_num < 1) {
193
+ error_report("virtio-blk request missing headers");
194
+ free(elem);
195
+ return;
196
+ }
197
+
198
+ req = g_new0(VuBlockReq, 1);
199
+ req->server = server;
200
+ req->vq = vq;
201
+ req->elem = elem;
202
+
203
+ if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
204
+ sizeof(req->out)) != sizeof(req->out))) {
205
+ error_report("virtio-blk request outhdr too short");
206
+ goto err;
207
+ }
208
+
209
+ iov_discard_front(&out_iov, &out_num, sizeof(req->out));
210
+
211
+ if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
212
+ error_report("virtio-blk request inhdr too short");
213
+ goto err;
214
+ }
215
+
216
+ /* We always touch the last byte, so just see how big in_iov is. */
217
+ req->in = (void *)in_iov[in_num - 1].iov_base
218
+ + in_iov[in_num - 1].iov_len
219
+ - sizeof(struct virtio_blk_inhdr);
220
+ iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
221
+
222
+ type = le32_to_cpu(req->out.type);
223
+ switch (type & ~VIRTIO_BLK_T_BARRIER) {
224
+ case VIRTIO_BLK_T_IN:
225
+ case VIRTIO_BLK_T_OUT: {
226
+ ssize_t ret = 0;
227
+ bool is_write = type & VIRTIO_BLK_T_OUT;
228
+ req->sector_num = le64_to_cpu(req->out.sector);
229
+
230
+ int64_t offset = req->sector_num * vdev_blk->blk_size;
231
+ QEMUIOVector qiov;
232
+ if (is_write) {
233
+ qemu_iovec_init_external(&qiov, out_iov, out_num);
234
+ ret = blk_co_pwritev(backend, offset, qiov.size,
235
+ &qiov, 0);
236
+ } else {
237
+ qemu_iovec_init_external(&qiov, in_iov, in_num);
238
+ ret = blk_co_preadv(backend, offset, qiov.size,
239
+ &qiov, 0);
240
+ }
241
+ if (ret >= 0) {
242
+ req->in->status = VIRTIO_BLK_S_OK;
243
+ } else {
244
+ req->in->status = VIRTIO_BLK_S_IOERR;
245
+ }
246
+ break;
247
+ }
248
+ case VIRTIO_BLK_T_FLUSH:
249
+ vu_block_flush(req);
250
+ req->in->status = VIRTIO_BLK_S_OK;
251
+ break;
252
+ case VIRTIO_BLK_T_GET_ID: {
253
+ size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
254
+ VIRTIO_BLK_ID_BYTES);
255
+ snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
256
+ req->in->status = VIRTIO_BLK_S_OK;
257
+ req->size = elem->in_sg[0].iov_len;
258
+ break;
259
+ }
260
+ case VIRTIO_BLK_T_DISCARD:
261
+ case VIRTIO_BLK_T_WRITE_ZEROES: {
262
+ int rc;
263
+ rc = vu_block_discard_write_zeroes(req, &elem->out_sg[1],
264
+ out_num, type);
265
+ if (rc == 0) {
266
+ req->in->status = VIRTIO_BLK_S_OK;
267
+ } else {
268
+ req->in->status = VIRTIO_BLK_S_IOERR;
269
+ }
270
+ break;
271
+ }
272
+ default:
273
+ req->in->status = VIRTIO_BLK_S_UNSUPP;
274
+ break;
275
+ }
276
+
277
+ vu_block_req_complete(req);
278
+ return;
279
+
280
+err:
281
+ free(elem);
282
+ g_free(req);
283
+ return;
284
+}
285
+
286
+static void vu_block_process_vq(VuDev *vu_dev, int idx)
287
+{
288
+ VuServer *server;
289
+ VuVirtq *vq;
290
+ struct req_data *req_data;
291
+
292
+ server = container_of(vu_dev, VuServer, vu_dev);
293
+ assert(server);
294
+
295
+ vq = vu_get_queue(vu_dev, idx);
296
+ assert(vq);
297
+ VuVirtqElement *elem;
298
+ while (1) {
299
+ elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) +
300
+ sizeof(VuBlockReq));
301
+ if (elem) {
302
+ req_data = g_new0(struct req_data, 1);
303
+ req_data->server = server;
304
+ req_data->vq = vq;
305
+ req_data->elem = elem;
306
+ Coroutine *co = qemu_coroutine_create(vu_block_virtio_process_req,
307
+ req_data);
308
+ aio_co_enter(server->ioc->ctx, co);
309
+ } else {
310
+ break;
311
+ }
312
+ }
313
+}
314
+
315
+static void vu_block_queue_set_started(VuDev *vu_dev, int idx, bool started)
316
+{
317
+ VuVirtq *vq;
318
+
319
+ assert(vu_dev);
320
+
321
+ vq = vu_get_queue(vu_dev, idx);
322
+ vu_set_queue_handler(vu_dev, vq, started ? vu_block_process_vq : NULL);
323
+}
324
+
325
+static uint64_t vu_block_get_features(VuDev *dev)
326
+{
327
+ uint64_t features;
328
+ VuServer *server = container_of(dev, VuServer, vu_dev);
329
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
330
+ features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
331
+ 1ull << VIRTIO_BLK_F_SEG_MAX |
332
+ 1ull << VIRTIO_BLK_F_TOPOLOGY |
333
+ 1ull << VIRTIO_BLK_F_BLK_SIZE |
334
+ 1ull << VIRTIO_BLK_F_FLUSH |
335
+ 1ull << VIRTIO_BLK_F_DISCARD |
336
+ 1ull << VIRTIO_BLK_F_WRITE_ZEROES |
337
+ 1ull << VIRTIO_BLK_F_CONFIG_WCE |
338
+ 1ull << VIRTIO_F_VERSION_1 |
339
+ 1ull << VIRTIO_RING_F_INDIRECT_DESC |
340
+ 1ull << VIRTIO_RING_F_EVENT_IDX |
341
+ 1ull << VHOST_USER_F_PROTOCOL_FEATURES;
342
+
343
+ if (!vdev_blk->writable) {
344
+ features |= 1ull << VIRTIO_BLK_F_RO;
345
+ }
346
+
347
+ return features;
348
+}
349
+
350
+static uint64_t vu_block_get_protocol_features(VuDev *dev)
351
+{
352
+ return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
353
+ 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
354
+}
355
+
356
+static int
357
+vu_block_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
358
+{
359
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
360
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
361
+ memcpy(config, &vdev_blk->blkcfg, len);
362
+
378
+
363
+ return 0;
379
+ return 0;
364
+}
380
+}
365
+
381
+
366
+static int
382
+static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
367
+vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
383
+ Error **errp)
368
+ uint32_t offset, uint32_t size, uint32_t flags)
369
+{
384
+{
370
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
385
+ header->magic = le64_to_cpu(header->magic);
371
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
386
+ header->free_gt_number = le64_to_cpu(header->free_gt_number);
372
+ uint8_t wce;
387
+ header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
373
+
388
+ header->replay_journal = le64_to_cpu(header->replay_journal);
374
+ /* don't support live migration */
389
+
375
+ if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
390
+ if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
391
+ error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
392
+ header->magic);
376
+ return -EINVAL;
393
+ return -EINVAL;
377
+ }
394
+ }
378
+
395
+
379
+ if (offset != offsetof(struct virtio_blk_config, wce) ||
396
+ if (header->replay_journal) {
380
+ size != 1) {
397
+ error_setg(errp, "Image is dirty, Replaying journal not supported");
381
+ return -EINVAL;
398
+ return -ENOTSUP;
382
+ }
399
+ }
383
+
400
+
384
+ wce = *data;
401
+ /* check that padding is 0 */
385
+ vdev_blk->blkcfg.wce = wce;
402
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
386
+ blk_set_enable_write_cache(vdev_blk->backend, wce);
403
+ error_setg(errp, "Unsupported non-zero volatile header padding");
404
+ return -ENOTSUP;
405
+ }
406
+
387
+ return 0;
407
+ return 0;
388
+}
408
+}
389
+
409
+
390
+/*
410
+static int vmdk_open_se_sparse(BlockDriverState *bs,
391
+ * When the client disconnects, it sends a VHOST_USER_NONE request
411
+ BdrvChild *file,
392
+ * and vu_process_message will simple call exit which cause the VM
412
+ int flags, Error **errp)
393
+ * to exit abruptly.
394
+ * To avoid this issue, process VHOST_USER_NONE request ahead
395
+ * of vu_process_message.
396
+ *
397
+ */
398
+static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
399
+{
413
+{
400
+ if (vmsg->request == VHOST_USER_NONE) {
414
+ int ret;
401
+ dev->panic(dev, "disconnect");
415
+ VMDKSESparseConstHeader const_header;
402
+ return true;
416
+ VMDKSESparseVolatileHeader volatile_header;
403
+ }
417
+ VmdkExtent *extent;
404
+ return false;
418
+
419
+ ret = bdrv_apply_auto_read_only(bs,
420
+ "No write support for seSparse images available", errp);
421
+ if (ret < 0) {
422
+ return ret;
423
+ }
424
+
425
+ assert(sizeof(const_header) == SECTOR_SIZE);
426
+
427
+ ret = bdrv_pread(file, 0, &const_header, sizeof(const_header));
428
+ if (ret < 0) {
429
+ bdrv_refresh_filename(file->bs);
430
+ error_setg_errno(errp, -ret,
431
+ "Could not read const header from file '%s'",
432
+ file->bs->filename);
433
+ return ret;
434
+ }
435
+
436
+ /* check const header */
437
+ ret = check_se_sparse_const_header(&const_header, errp);
438
+ if (ret < 0) {
439
+ return ret;
440
+ }
441
+
442
+ assert(sizeof(volatile_header) == SECTOR_SIZE);
443
+
444
+ ret = bdrv_pread(file,
445
+ const_header.volatile_header_offset * SECTOR_SIZE,
446
+ &volatile_header, sizeof(volatile_header));
447
+ if (ret < 0) {
448
+ bdrv_refresh_filename(file->bs);
449
+ error_setg_errno(errp, -ret,
450
+ "Could not read volatile header from file '%s'",
451
+ file->bs->filename);
452
+ return ret;
453
+ }
454
+
455
+ /* check volatile header */
456
+ ret = check_se_sparse_volatile_header(&volatile_header, errp);
457
+ if (ret < 0) {
458
+ return ret;
459
+ }
460
+
461
+ ret = vmdk_add_extent(bs, file, false,
462
+ const_header.capacity,
463
+ const_header.grain_dir_offset * SECTOR_SIZE,
464
+ 0,
465
+ const_header.grain_dir_size *
466
+ SECTOR_SIZE / sizeof(uint64_t),
467
+ const_header.grain_table_size *
468
+ SECTOR_SIZE / sizeof(uint64_t),
469
+ const_header.grain_size,
470
+ &extent,
471
+ errp);
472
+ if (ret < 0) {
473
+ return ret;
474
+ }
475
+
476
+ extent->sesparse = true;
477
+ extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
478
+ extent->sesparse_clusters_offset = const_header.grains_offset;
479
+ extent->entry_size = sizeof(uint64_t);
480
+
481
+ ret = vmdk_init_tables(bs, extent, errp);
482
+ if (ret) {
483
+ /* free extent allocated by vmdk_add_extent */
484
+ vmdk_free_last_extent(bs);
485
+ }
486
+
487
+ return ret;
405
+}
488
+}
406
+
489
+
407
+static const VuDevIface vu_block_iface = {
490
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
408
+ .get_features = vu_block_get_features,
491
QDict *options, Error **errp);
409
+ .queue_set_started = vu_block_queue_set_started,
492
410
+ .get_protocol_features = vu_block_get_protocol_features,
493
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
411
+ .get_config = vu_block_get_config,
494
* RW [size in sectors] SPARSE "file-name.vmdk"
412
+ .set_config = vu_block_set_config,
495
* RW [size in sectors] VMFS "file-name.vmdk"
413
+ .process_msg = vu_block_process_msg,
496
* RW [size in sectors] VMFSSPARSE "file-name.vmdk"
414
+};
497
+ * RW [size in sectors] SESPARSE "file-name.vmdk"
415
+
498
*/
416
+static void blk_aio_attached(AioContext *ctx, void *opaque)
499
flat_offset = -1;
417
+{
500
matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
418
+ VuBlockDev *vub_dev = opaque;
501
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
419
+ aio_context_acquire(ctx);
502
420
+ vhost_user_server_set_aio_context(&vub_dev->vu_server, ctx);
503
if (sectors <= 0 ||
421
+ aio_context_release(ctx);
504
(strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
422
+}
505
- strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
423
+
506
+ strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
424
+static void blk_aio_detach(void *opaque)
507
+ strcmp(type, "SESPARSE")) ||
425
+{
508
(strcmp(access, "RW"))) {
426
+ VuBlockDev *vub_dev = opaque;
509
continue;
427
+ AioContext *ctx = vub_dev->vu_server.ctx;
510
}
428
+ aio_context_acquire(ctx);
511
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
429
+ vhost_user_server_set_aio_context(&vub_dev->vu_server, NULL);
512
return ret;
430
+ aio_context_release(ctx);
513
}
431
+}
514
extent = &s->extents[s->num_extents - 1];
432
+
515
+ } else if (!strcmp(type, "SESPARSE")) {
433
+static void
516
+ ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
434
+vu_block_initialize_config(BlockDriverState *bs,
517
+ if (ret) {
435
+ struct virtio_blk_config *config, uint32_t blk_size)
518
+ bdrv_unref_child(bs, extent_file);
436
+{
519
+ return ret;
437
+ config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
520
+ }
438
+ config->blk_size = blk_size;
521
+ extent = &s->extents[s->num_extents - 1];
439
+ config->size_max = 0;
522
} else {
440
+ config->seg_max = 128 - 2;
523
error_setg(errp, "Unsupported extent type '%s'", type);
441
+ config->min_io_size = 1;
524
bdrv_unref_child(bs, extent_file);
442
+ config->opt_io_size = 1;
525
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
443
+ config->num_queues = VHOST_USER_BLK_MAX_QUEUES;
526
if (strcmp(ct, "monolithicFlat") &&
444
+ config->max_discard_sectors = 32768;
527
strcmp(ct, "vmfs") &&
445
+ config->max_discard_seg = 1;
528
strcmp(ct, "vmfsSparse") &&
446
+ config->discard_sector_alignment = config->blk_size >> 9;
529
+ strcmp(ct, "seSparse") &&
447
+ config->max_write_zeroes_sectors = 32768;
530
strcmp(ct, "twoGbMaxExtentSparse") &&
448
+ config->max_write_zeroes_seg = 1;
531
strcmp(ct, "twoGbMaxExtentFlat")) {
449
+}
532
error_setg(errp, "Unsupported image type '%s'", ct);
450
+
533
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
451
+static VuBlockDev *vu_block_init(VuBlockDev *vu_block_device, Error **errp)
534
{
452
+{
535
unsigned int l1_index, l2_offset, l2_index;
453
+
536
int min_index, i, j;
454
+ BlockBackend *blk;
537
- uint32_t min_count, *l2_table;
455
+ Error *local_error = NULL;
538
+ uint32_t min_count;
456
+ const char *node_name = vu_block_device->node_name;
539
+ void *l2_table;
457
+ bool writable = vu_block_device->writable;
540
bool zeroed = false;
458
+ uint64_t perm = BLK_PERM_CONSISTENT_READ;
541
int64_t ret;
459
+ int ret;
542
int64_t cluster_sector;
460
+
543
+ unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
461
+ AioContext *ctx;
544
462
+
545
if (m_data) {
463
+ BlockDriverState *bs = bdrv_lookup_bs(node_name, node_name, &local_error);
546
m_data->valid = 0;
464
+
547
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
465
+ if (!bs) {
548
if (l1_index >= extent->l1_size) {
466
+ error_propagate(errp, local_error);
549
return VMDK_ERROR;
467
+ return NULL;
550
}
468
+ }
551
- l2_offset = extent->l1_table[l1_index];
469
+
552
+ if (extent->sesparse) {
470
+ if (bdrv_is_read_only(bs)) {
553
+ uint64_t l2_offset_u64;
471
+ writable = false;
554
+
472
+ }
555
+ assert(extent->entry_size == sizeof(uint64_t));
473
+
556
+
474
+ if (writable) {
557
+ l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
475
+ perm |= BLK_PERM_WRITE;
558
+ if (l2_offset_u64 == 0) {
476
+ }
559
+ l2_offset = 0;
477
+
560
+ } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
478
+ ctx = bdrv_get_aio_context(bs);
561
+ /*
479
+ aio_context_acquire(ctx);
562
+ * Top most nibble is 0x1 if grain table is allocated.
480
+ bdrv_invalidate_cache(bs, NULL);
563
+ * strict check - top most 4 bytes must be 0x10000000 since max
481
+ aio_context_release(ctx);
564
+ * supported size is 64TB for disk - so no more than 64TB / 16MB
482
+
565
+ * grain directories which is smaller than uint32,
483
+ /*
566
+ * where 16MB is the only supported default grain table coverage.
484
+ * Don't allow resize while the vhost user server is running,
567
+ */
485
+ * otherwise we don't care what happens with the node.
568
+ return VMDK_ERROR;
486
+ */
569
+ } else {
487
+ blk = blk_new(bdrv_get_aio_context(bs), perm,
570
+ l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
488
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
571
+ l2_offset_u64 = extent->sesparse_l2_tables_offset +
489
+ BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
572
+ l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
490
+ ret = blk_insert_bs(blk, bs, errp);
573
+ if (l2_offset_u64 > 0x00000000ffffffff) {
491
+
574
+ return VMDK_ERROR;
492
+ if (ret < 0) {
575
+ }
493
+ goto fail;
576
+ l2_offset = (unsigned int)(l2_offset_u64);
494
+ }
577
+ }
495
+
578
+ } else {
496
+ blk_set_enable_write_cache(blk, false);
579
+ assert(extent->entry_size == sizeof(uint32_t));
497
+
580
+ l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
498
+ blk_set_allow_aio_context_change(blk, true);
581
+ }
499
+
582
if (!l2_offset) {
500
+ vu_block_device->blkcfg.wce = 0;
583
return VMDK_UNALLOC;
501
+ vu_block_device->backend = blk;
584
}
502
+ if (!vu_block_device->blk_size) {
585
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
503
+ vu_block_device->blk_size = BDRV_SECTOR_SIZE;
586
extent->l2_cache_counts[j] >>= 1;
504
+ }
587
}
505
+ vu_block_device->blkcfg.blk_size = vu_block_device->blk_size;
588
}
506
+ blk_set_guest_block_size(blk, vu_block_device->blk_size);
589
- l2_table = extent->l2_cache + (i * extent->l2_size);
507
+ vu_block_initialize_config(bs, &vu_block_device->blkcfg,
590
+ l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
508
+ vu_block_device->blk_size);
591
goto found;
509
+ return vu_block_device;
592
}
510
+
593
}
511
+fail:
594
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
512
+ blk_unref(blk);
595
min_index = i;
513
+ return NULL;
596
}
514
+}
597
}
515
+
598
- l2_table = extent->l2_cache + (min_index * extent->l2_size);
516
+static void vu_block_deinit(VuBlockDev *vu_block_device)
599
+ l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
517
+{
600
BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
518
+ if (vu_block_device->backend) {
601
if (bdrv_pread(extent->file,
519
+ blk_remove_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
602
(int64_t)l2_offset * 512,
520
+ blk_aio_detach, vu_block_device);
603
l2_table,
521
+ }
604
- extent->l2_size * sizeof(uint32_t)
522
+
605
- ) != extent->l2_size * sizeof(uint32_t)) {
523
+ blk_unref(vu_block_device->backend);
606
+ l2_size_bytes
524
+}
607
+ ) != l2_size_bytes) {
525
+
608
return VMDK_ERROR;
526
+static void vhost_user_blk_server_stop(VuBlockDev *vu_block_device)
609
}
527
+{
610
528
+ vhost_user_server_stop(&vu_block_device->vu_server);
611
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
529
+ vu_block_deinit(vu_block_device);
612
extent->l2_cache_counts[min_index] = 1;
530
+}
613
found:
531
+
614
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
532
+static void vhost_user_blk_server_start(VuBlockDev *vu_block_device,
615
- cluster_sector = le32_to_cpu(l2_table[l2_index]);
533
+ Error **errp)
616
534
+{
617
- if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
535
+ AioContext *ctx;
618
- zeroed = true;
536
+ SocketAddress *addr = vu_block_device->addr;
619
+ if (extent->sesparse) {
537
+
620
+ cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
538
+ if (!vu_block_init(vu_block_device, errp)) {
621
+ switch (cluster_sector & 0xf000000000000000) {
539
+ return;
622
+ case 0x0000000000000000:
540
+ }
623
+ /* unallocated grain */
541
+
624
+ if (cluster_sector != 0) {
542
+ ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
625
+ return VMDK_ERROR;
543
+
626
+ }
544
+ if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx,
627
+ break;
545
+ VHOST_USER_BLK_MAX_QUEUES,
628
+ case 0x1000000000000000:
546
+ NULL, &vu_block_iface,
629
+ /* scsi-unmapped grain - fallthrough */
547
+ errp)) {
630
+ case 0x2000000000000000:
548
+ goto error;
631
+ /* zero grain */
549
+ }
632
+ zeroed = true;
550
+
633
+ break;
551
+ blk_add_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
634
+ case 0x3000000000000000:
552
+ blk_aio_detach, vu_block_device);
635
+ /* allocated grain */
553
+ vu_block_device->running = true;
636
+ cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
554
+ return;
637
+ ((cluster_sector & 0x0000ffffffffffff) << 12));
555
+
638
+ cluster_sector = extent->sesparse_clusters_offset +
556
+ error:
639
+ cluster_sector * extent->cluster_sectors;
557
+ vu_block_deinit(vu_block_device);
640
+ break;
558
+}
641
+ default:
559
+
642
+ return VMDK_ERROR;
560
+static bool vu_prop_modifiable(VuBlockDev *vus, Error **errp)
643
+ }
561
+{
644
+ } else {
562
+ if (vus->running) {
645
+ cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
563
+ error_setg(errp, "The property can't be modified "
646
+
564
+ "while the server is running");
647
+ if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
565
+ return false;
648
+ zeroed = true;
566
+ }
649
+ }
567
+ return true;
650
}
568
+}
651
569
+
652
if (!cluster_sector || zeroed) {
570
+static void vu_set_node_name(Object *obj, const char *value, Error **errp)
653
if (!allocate) {
571
+{
654
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
572
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
655
}
573
+
656
+ assert(!extent->sesparse);
574
+ if (!vu_prop_modifiable(vus, errp)) {
657
575
+ return;
658
if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
576
+ }
659
return VMDK_ERROR;
577
+
660
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
578
+ if (vus->node_name) {
661
m_data->l1_index = l1_index;
579
+ g_free(vus->node_name);
662
m_data->l2_index = l2_index;
580
+ }
663
m_data->l2_offset = l2_offset;
581
+
664
- m_data->l2_cache_entry = &l2_table[l2_index];
582
+ vus->node_name = g_strdup(value);
665
+ m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
583
+}
666
}
584
+
667
}
585
+static char *vu_get_node_name(Object *obj, Error **errp)
668
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
586
+{
669
@@ -XXX,XX +XXX,XX @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
587
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
670
if (!extent) {
588
+ return g_strdup(vus->node_name);
671
return -EIO;
589
+}
672
}
590
+
673
+ if (extent->sesparse) {
591
+static void free_socket_addr(SocketAddress *addr)
674
+ return -ENOTSUP;
592
+{
675
+ }
593
+ g_free(addr->u.q_unix.path);
676
offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
594
+ g_free(addr);
677
n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
595
+}
678
- offset_in_cluster);
596
+
597
+static void vu_set_unix_socket(Object *obj, const char *value,
598
+ Error **errp)
599
+{
600
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
601
+
602
+ if (!vu_prop_modifiable(vus, errp)) {
603
+ return;
604
+ }
605
+
606
+ if (vus->addr) {
607
+ free_socket_addr(vus->addr);
608
+ }
609
+
610
+ SocketAddress *addr = g_new0(SocketAddress, 1);
611
+ addr->type = SOCKET_ADDRESS_TYPE_UNIX;
612
+ addr->u.q_unix.path = g_strdup(value);
613
+ vus->addr = addr;
614
+}
615
+
616
+static char *vu_get_unix_socket(Object *obj, Error **errp)
617
+{
618
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
619
+ return g_strdup(vus->addr->u.q_unix.path);
620
+}
621
+
622
+static bool vu_get_block_writable(Object *obj, Error **errp)
623
+{
624
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
625
+ return vus->writable;
626
+}
627
+
628
+static void vu_set_block_writable(Object *obj, bool value, Error **errp)
629
+{
630
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
631
+
632
+ if (!vu_prop_modifiable(vus, errp)) {
633
+ return;
634
+ }
635
+
636
+ vus->writable = value;
637
+}
638
+
639
+static void vu_get_blk_size(Object *obj, Visitor *v, const char *name,
640
+ void *opaque, Error **errp)
641
+{
642
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
643
+ uint32_t value = vus->blk_size;
644
+
645
+ visit_type_uint32(v, name, &value, errp);
646
+}
647
+
648
+static void vu_set_blk_size(Object *obj, Visitor *v, const char *name,
649
+ void *opaque, Error **errp)
650
+{
651
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
652
+
653
+ Error *local_err = NULL;
654
+ uint32_t value;
655
+
656
+ if (!vu_prop_modifiable(vus, errp)) {
657
+ return;
658
+ }
659
+
660
+ visit_type_uint32(v, name, &value, &local_err);
661
+ if (local_err) {
662
+ goto out;
663
+ }
664
+
665
+ check_block_size(object_get_typename(obj), name, value, &local_err);
666
+ if (local_err) {
667
+ goto out;
668
+ }
669
+
670
+ vus->blk_size = value;
671
+
672
+out:
673
+ error_propagate(errp, local_err);
674
+}
675
+
676
+static void vhost_user_blk_server_instance_finalize(Object *obj)
677
+{
678
+ VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
679
+
680
+ vhost_user_blk_server_stop(vub);
681
+
682
+ /*
683
+ * Unlike object_property_add_str, object_class_property_add_str
684
+ * doesn't have a release method. Thus manual memory freeing is
685
+ * needed.
686
+ */
687
+ free_socket_addr(vub->addr);
688
+ g_free(vub->node_name);
689
+}
690
+
691
+static void vhost_user_blk_server_complete(UserCreatable *obj, Error **errp)
692
+{
693
+ VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
694
+
695
+ vhost_user_blk_server_start(vub, errp);
696
+}
697
+
698
+static void vhost_user_blk_server_class_init(ObjectClass *klass,
699
+ void *class_data)
700
+{
701
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
702
+ ucc->complete = vhost_user_blk_server_complete;
703
+
704
+ object_class_property_add_bool(klass, "writable",
705
+ vu_get_block_writable,
706
+ vu_set_block_writable);
707
+
708
+ object_class_property_add_str(klass, "node-name",
709
+ vu_get_node_name,
710
+ vu_set_node_name);
711
+
712
+ object_class_property_add_str(klass, "unix-socket",
713
+ vu_get_unix_socket,
714
+ vu_set_unix_socket);
715
+
716
+ object_class_property_add(klass, "logical-block-size", "uint32",
717
+ vu_get_blk_size, vu_set_blk_size,
718
+ NULL, NULL);
719
+}
720
+
721
+static const TypeInfo vhost_user_blk_server_info = {
722
+ .name = TYPE_VHOST_USER_BLK_SERVER,
723
+ .parent = TYPE_OBJECT,
724
+ .instance_size = sizeof(VuBlockDev),
725
+ .instance_finalize = vhost_user_blk_server_instance_finalize,
726
+ .class_init = vhost_user_blk_server_class_init,
727
+ .interfaces = (InterfaceInfo[]) {
728
+ {TYPE_USER_CREATABLE},
729
+ {}
730
+ },
731
+};
732
+
733
+static void vhost_user_blk_server_register_types(void)
734
+{
735
+ type_register_static(&vhost_user_blk_server_info);
736
+}
737
+
738
+type_init(vhost_user_blk_server_register_types)
739
diff --git a/softmmu/vl.c b/softmmu/vl.c
740
index XXXXXXX..XXXXXXX 100644
741
--- a/softmmu/vl.c
742
+++ b/softmmu/vl.c
743
@@ -XXX,XX +XXX,XX @@ static bool object_create_initial(const char *type, QemuOpts *opts)
744
}
745
#endif
746
747
+ /* Reason: vhost-user-blk-server property "node-name" */
748
+ if (g_str_equal(type, "vhost-user-blk-server")) {
749
+ return false;
750
+ }
751
/*
752
* Reason: filter-* property "netdev" etc.
753
*/
754
diff --git a/block/meson.build b/block/meson.build
755
index XXXXXXX..XXXXXXX 100644
756
--- a/block/meson.build
757
+++ b/block/meson.build
758
@@ -XXX,XX +XXX,XX @@ block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c')
759
block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit])
760
block_ss.add(when: 'CONFIG_LIBISCSI', if_true: files('iscsi-opts.c'))
761
block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
762
+block_ss.add(when: 'CONFIG_LINUX', if_true: files('export/vhost-user-blk-server.c', '../contrib/libvhost-user/libvhost-user.c'))
763
block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c'))
764
block_ss.add(when: 'CONFIG_SHEEPDOG', if_true: files('sheepdog.c'))
765
block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c'))
766
--
679
--
767
2.26.2
680
2.21.0
768
681
682
diff view generated by jsdifflib
Deleted patch
1
From: Coiby Xu <coiby.xu@gmail.com>
2
1
3
This test case has the same tests as tests/virtio-blk-test.c except for
4
tests have block_resize. Since vhost-user server can only server one
5
client one time, two instances of vhost-user-blk-server are started by
6
qemu-storage-daemon for the hotplug test.
7
8
In order to not block scripts/tap-driver.pl, vhost-user-blk-server will
9
send "quit" command to qemu-storage-daemon's QMP monitor. So a function
10
is added to libqtest.c to establish socket connection with socket
11
server.
12
13
Suggested-by: Thomas Huth <thuth@redhat.com>
14
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
15
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
16
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
17
Message-id: 20200918080912.321299-7-coiby.xu@gmail.com
18
[Update meson.build to only test when CONFIG_TOOLS has built
19
qemu-storage-daemon. This prevents CI failures with --disable-tools.
20
--Stefan]
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
---
23
tests/qtest/libqos/libqtest.h | 17 +
24
tests/qtest/libqos/vhost-user-blk.h | 48 ++
25
tests/qtest/libqos/vhost-user-blk.c | 129 +++++
26
tests/qtest/libqtest.c | 36 +-
27
tests/qtest/vhost-user-blk-test.c | 751 ++++++++++++++++++++++++++++
28
tests/qtest/libqos/meson.build | 1 +
29
tests/qtest/meson.build | 4 +-
30
7 files changed, 983 insertions(+), 3 deletions(-)
31
create mode 100644 tests/qtest/libqos/vhost-user-blk.h
32
create mode 100644 tests/qtest/libqos/vhost-user-blk.c
33
create mode 100644 tests/qtest/vhost-user-blk-test.c
34
35
diff --git a/tests/qtest/libqos/libqtest.h b/tests/qtest/libqos/libqtest.h
36
index XXXXXXX..XXXXXXX 100644
37
--- a/tests/qtest/libqos/libqtest.h
38
+++ b/tests/qtest/libqos/libqtest.h
39
@@ -XXX,XX +XXX,XX @@ void qtest_qmp_send(QTestState *s, const char *fmt, ...)
40
void qtest_qmp_send_raw(QTestState *s, const char *fmt, ...)
41
GCC_FMT_ATTR(2, 3);
42
43
+/**
44
+ * qtest_socket_client:
45
+ * @server_socket_path: the socket server's path
46
+ *
47
+ * Connect to a socket server.
48
+ */
49
+int qtest_socket_client(char *server_socket_path);
50
+
51
+/**
52
+ * qtest_create_state_with_qmp_fd:
53
+ * @fd: socket fd
54
+ *
55
+ * Wrap socket fd in QTestState to make use of qtest_qmp*
56
+ * functions
57
+ */
58
+QTestState *qtest_create_state_with_qmp_fd(int fd);
59
+
60
/**
61
* qtest_vqmp_fds:
62
* @s: #QTestState instance to operate on.
63
diff --git a/tests/qtest/libqos/vhost-user-blk.h b/tests/qtest/libqos/vhost-user-blk.h
64
new file mode 100644
65
index XXXXXXX..XXXXXXX
66
--- /dev/null
67
+++ b/tests/qtest/libqos/vhost-user-blk.h
68
@@ -XXX,XX +XXX,XX @@
69
+/*
70
+ * libqos driver framework
71
+ *
72
+ * Based on tests/qtest/libqos/virtio-blk.c
73
+ *
74
+ * Copyright (c) 2020 Coiby Xu <coiby.xu@gmail.com>
75
+ *
76
+ * Copyright (c) 2018 Emanuele Giuseppe Esposito <e.emanuelegiuseppe@gmail.com>
77
+ *
78
+ * This library is free software; you can redistribute it and/or
79
+ * modify it under the terms of the GNU Lesser General Public
80
+ * License version 2 as published by the Free Software Foundation.
81
+ *
82
+ * This library is distributed in the hope that it will be useful,
83
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
84
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
85
+ * Lesser General Public License for more details.
86
+ *
87
+ * You should have received a copy of the GNU Lesser General Public
88
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
89
+ */
90
+
91
+#ifndef TESTS_LIBQOS_VHOST_USER_BLK_H
92
+#define TESTS_LIBQOS_VHOST_USER_BLK_H
93
+
94
+#include "qgraph.h"
95
+#include "virtio.h"
96
+#include "virtio-pci.h"
97
+
98
+typedef struct QVhostUserBlk QVhostUserBlk;
99
+typedef struct QVhostUserBlkPCI QVhostUserBlkPCI;
100
+typedef struct QVhostUserBlkDevice QVhostUserBlkDevice;
101
+
102
+struct QVhostUserBlk {
103
+ QVirtioDevice *vdev;
104
+};
105
+
106
+struct QVhostUserBlkPCI {
107
+ QVirtioPCIDevice pci_vdev;
108
+ QVhostUserBlk blk;
109
+};
110
+
111
+struct QVhostUserBlkDevice {
112
+ QOSGraphObject obj;
113
+ QVhostUserBlk blk;
114
+};
115
+
116
+#endif
117
diff --git a/tests/qtest/libqos/vhost-user-blk.c b/tests/qtest/libqos/vhost-user-blk.c
118
new file mode 100644
119
index XXXXXXX..XXXXXXX
120
--- /dev/null
121
+++ b/tests/qtest/libqos/vhost-user-blk.c
122
@@ -XXX,XX +XXX,XX @@
123
+/*
124
+ * libqos driver framework
125
+ *
126
+ * Based on tests/qtest/libqos/virtio-blk.c
127
+ *
128
+ * Copyright (c) 2020 Coiby Xu <coiby.xu@gmail.com>
129
+ *
130
+ * Copyright (c) 2018 Emanuele Giuseppe Esposito <e.emanuelegiuseppe@gmail.com>
131
+ *
132
+ * This library is free software; you can redistribute it and/or
133
+ * modify it under the terms of the GNU Lesser General Public
134
+ * License version 2.1 as published by the Free Software Foundation.
135
+ *
136
+ * This library is distributed in the hope that it will be useful,
137
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
138
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
139
+ * Lesser General Public License for more details.
140
+ *
141
+ * You should have received a copy of the GNU Lesser General Public
142
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>
143
+ */
144
+
145
+#include "qemu/osdep.h"
146
+#include "libqtest.h"
147
+#include "qemu/module.h"
148
+#include "standard-headers/linux/virtio_blk.h"
149
+#include "vhost-user-blk.h"
150
+
151
+#define PCI_SLOT 0x04
152
+#define PCI_FN 0x00
153
+
154
+/* virtio-blk-device */
155
+static void *qvhost_user_blk_get_driver(QVhostUserBlk *v_blk,
156
+ const char *interface)
157
+{
158
+ if (!g_strcmp0(interface, "vhost-user-blk")) {
159
+ return v_blk;
160
+ }
161
+ if (!g_strcmp0(interface, "virtio")) {
162
+ return v_blk->vdev;
163
+ }
164
+
165
+ fprintf(stderr, "%s not present in vhost-user-blk-device\n", interface);
166
+ g_assert_not_reached();
167
+}
168
+
169
+static void *qvhost_user_blk_device_get_driver(void *object,
170
+ const char *interface)
171
+{
172
+ QVhostUserBlkDevice *v_blk = object;
173
+ return qvhost_user_blk_get_driver(&v_blk->blk, interface);
174
+}
175
+
176
+static void *vhost_user_blk_device_create(void *virtio_dev,
177
+ QGuestAllocator *t_alloc,
178
+ void *addr)
179
+{
180
+ QVhostUserBlkDevice *vhost_user_blk = g_new0(QVhostUserBlkDevice, 1);
181
+ QVhostUserBlk *interface = &vhost_user_blk->blk;
182
+
183
+ interface->vdev = virtio_dev;
184
+
185
+ vhost_user_blk->obj.get_driver = qvhost_user_blk_device_get_driver;
186
+
187
+ return &vhost_user_blk->obj;
188
+}
189
+
190
+/* virtio-blk-pci */
191
+static void *qvhost_user_blk_pci_get_driver(void *object, const char *interface)
192
+{
193
+ QVhostUserBlkPCI *v_blk = object;
194
+ if (!g_strcmp0(interface, "pci-device")) {
195
+ return v_blk->pci_vdev.pdev;
196
+ }
197
+ return qvhost_user_blk_get_driver(&v_blk->blk, interface);
198
+}
199
+
200
+static void *vhost_user_blk_pci_create(void *pci_bus, QGuestAllocator *t_alloc,
201
+ void *addr)
202
+{
203
+ QVhostUserBlkPCI *vhost_user_blk = g_new0(QVhostUserBlkPCI, 1);
204
+ QVhostUserBlk *interface = &vhost_user_blk->blk;
205
+ QOSGraphObject *obj = &vhost_user_blk->pci_vdev.obj;
206
+
207
+ virtio_pci_init(&vhost_user_blk->pci_vdev, pci_bus, addr);
208
+ interface->vdev = &vhost_user_blk->pci_vdev.vdev;
209
+
210
+ g_assert_cmphex(interface->vdev->device_type, ==, VIRTIO_ID_BLOCK);
211
+
212
+ obj->get_driver = qvhost_user_blk_pci_get_driver;
213
+
214
+ return obj;
215
+}
216
+
217
+static void vhost_user_blk_register_nodes(void)
218
+{
219
+ /*
220
+ * FIXME: every test using these two nodes needs to setup a
221
+ * -drive,id=drive0 otherwise QEMU is not going to start.
222
+ * Therefore, we do not include "produces" edge for virtio
223
+ * and pci-device yet.
224
+ */
225
+
226
+ char *arg = g_strdup_printf("id=drv0,chardev=char1,addr=%x.%x",
227
+ PCI_SLOT, PCI_FN);
228
+
229
+ QPCIAddress addr = {
230
+ .devfn = QPCI_DEVFN(PCI_SLOT, PCI_FN),
231
+ };
232
+
233
+ QOSGraphEdgeOptions opts = { };
234
+
235
+ /* virtio-blk-device */
236
+ /** opts.extra_device_opts = "drive=drive0"; */
237
+ qos_node_create_driver("vhost-user-blk-device", vhost_user_blk_device_create);
238
+ qos_node_consumes("vhost-user-blk-device", "virtio-bus", &opts);
239
+ qos_node_produces("vhost-user-blk-device", "vhost-user-blk");
240
+
241
+ /* virtio-blk-pci */
242
+ opts.extra_device_opts = arg;
243
+ add_qpci_address(&opts, &addr);
244
+ qos_node_create_driver("vhost-user-blk-pci", vhost_user_blk_pci_create);
245
+ qos_node_consumes("vhost-user-blk-pci", "pci-bus", &opts);
246
+ qos_node_produces("vhost-user-blk-pci", "vhost-user-blk");
247
+
248
+ g_free(arg);
249
+}
250
+
251
+libqos_init(vhost_user_blk_register_nodes);
252
diff --git a/tests/qtest/libqtest.c b/tests/qtest/libqtest.c
253
index XXXXXXX..XXXXXXX 100644
254
--- a/tests/qtest/libqtest.c
255
+++ b/tests/qtest/libqtest.c
256
@@ -XXX,XX +XXX,XX @@
257
* Copyright IBM, Corp. 2012
258
* Copyright Red Hat, Inc. 2012
259
* Copyright SUSE LINUX Products GmbH 2013
260
+ * Copyright Copyright (c) Coiby Xu
261
*
262
* Authors:
263
* Anthony Liguori <aliguori@us.ibm.com>
264
* Paolo Bonzini <pbonzini@redhat.com>
265
* Andreas Färber <afaerber@suse.de>
266
+ * Coiby Xu <coiby.xu@gmail.com>
267
*
268
* This work is licensed under the terms of the GNU GPL, version 2 or later.
269
* See the COPYING file in the top-level directory.
270
@@ -XXX,XX +XXX,XX @@ typedef struct QTestClientTransportOps {
271
QTestRecvFn recv_line; /* for receiving qtest command responses */
272
} QTestTransportOps;
273
274
-struct QTestState
275
-{
276
+struct QTestState {
277
int fd;
278
int qmp_fd;
279
pid_t qemu_pid; /* our child QEMU process */
280
@@ -XXX,XX +XXX,XX @@ QDict *qtest_qmp_receive(QTestState *s)
281
return qmp_fd_receive(s->qmp_fd);
282
}
283
284
+QTestState *qtest_create_state_with_qmp_fd(int fd)
285
+{
286
+ QTestState *qmp_test_state = g_new0(QTestState, 1);
287
+ qmp_test_state->qmp_fd = fd;
288
+ return qmp_test_state;
289
+}
290
+
291
+int qtest_socket_client(char *server_socket_path)
292
+{
293
+ struct sockaddr_un serv_addr;
294
+ int sock;
295
+ int ret;
296
+ int retries = 0;
297
+ sock = socket(PF_UNIX, SOCK_STREAM, 0);
298
+ g_assert_cmpint(sock, !=, -1);
299
+ serv_addr.sun_family = AF_UNIX;
300
+ snprintf(serv_addr.sun_path, sizeof(serv_addr.sun_path), "%s",
301
+ server_socket_path);
302
+
303
+ for (retries = 0; retries < 3; retries++) {
304
+ ret = connect(sock, (struct sockaddr *)&serv_addr, sizeof(serv_addr));
305
+ if (ret == 0) {
306
+ break;
307
+ }
308
+ g_usleep(G_USEC_PER_SEC);
309
+ }
310
+
311
+ g_assert_cmpint(ret, ==, 0);
312
+ return sock;
313
+}
314
+
315
/**
316
* Allow users to send a message without waiting for the reply,
317
* in the case that they choose to discard all replies up until
318
diff --git a/tests/qtest/vhost-user-blk-test.c b/tests/qtest/vhost-user-blk-test.c
319
new file mode 100644
320
index XXXXXXX..XXXXXXX
321
--- /dev/null
322
+++ b/tests/qtest/vhost-user-blk-test.c
323
@@ -XXX,XX +XXX,XX @@
324
+/*
325
+ * QTest testcase for Vhost-user Block Device
326
+ *
327
+ * Based on tests/qtest//virtio-blk-test.c
328
+
329
+ * Copyright (c) 2014 SUSE LINUX Products GmbH
330
+ * Copyright (c) 2014 Marc Marí
331
+ * Copyright (c) 2020 Coiby Xu
332
+ *
333
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
334
+ * See the COPYING file in the top-level directory.
335
+ */
336
+
337
+#include "qemu/osdep.h"
338
+#include "libqtest-single.h"
339
+#include "qemu/bswap.h"
340
+#include "qemu/module.h"
341
+#include "standard-headers/linux/virtio_blk.h"
342
+#include "standard-headers/linux/virtio_pci.h"
343
+#include "libqos/qgraph.h"
344
+#include "libqos/vhost-user-blk.h"
345
+#include "libqos/libqos-pc.h"
346
+
347
+#define TEST_IMAGE_SIZE (64 * 1024 * 1024)
348
+#define QVIRTIO_BLK_TIMEOUT_US (30 * 1000 * 1000)
349
+#define PCI_SLOT_HP 0x06
350
+
351
+typedef struct QVirtioBlkReq {
352
+ uint32_t type;
353
+ uint32_t ioprio;
354
+ uint64_t sector;
355
+ char *data;
356
+ uint8_t status;
357
+} QVirtioBlkReq;
358
+
359
+#ifdef HOST_WORDS_BIGENDIAN
360
+static const bool host_is_big_endian = true;
361
+#else
362
+static const bool host_is_big_endian; /* false */
363
+#endif
364
+
365
+static inline void virtio_blk_fix_request(QVirtioDevice *d, QVirtioBlkReq *req)
366
+{
367
+ if (qvirtio_is_big_endian(d) != host_is_big_endian) {
368
+ req->type = bswap32(req->type);
369
+ req->ioprio = bswap32(req->ioprio);
370
+ req->sector = bswap64(req->sector);
371
+ }
372
+}
373
+
374
+static inline void virtio_blk_fix_dwz_hdr(QVirtioDevice *d,
375
+ struct virtio_blk_discard_write_zeroes *dwz_hdr)
376
+{
377
+ if (qvirtio_is_big_endian(d) != host_is_big_endian) {
378
+ dwz_hdr->sector = bswap64(dwz_hdr->sector);
379
+ dwz_hdr->num_sectors = bswap32(dwz_hdr->num_sectors);
380
+ dwz_hdr->flags = bswap32(dwz_hdr->flags);
381
+ }
382
+}
383
+
384
+static uint64_t virtio_blk_request(QGuestAllocator *alloc, QVirtioDevice *d,
385
+ QVirtioBlkReq *req, uint64_t data_size)
386
+{
387
+ uint64_t addr;
388
+ uint8_t status = 0xFF;
389
+ QTestState *qts = global_qtest;
390
+
391
+ switch (req->type) {
392
+ case VIRTIO_BLK_T_IN:
393
+ case VIRTIO_BLK_T_OUT:
394
+ g_assert_cmpuint(data_size % 512, ==, 0);
395
+ break;
396
+ case VIRTIO_BLK_T_DISCARD:
397
+ case VIRTIO_BLK_T_WRITE_ZEROES:
398
+ g_assert_cmpuint(data_size %
399
+ sizeof(struct virtio_blk_discard_write_zeroes), ==, 0);
400
+ break;
401
+ default:
402
+ g_assert_cmpuint(data_size, ==, 0);
403
+ }
404
+
405
+ addr = guest_alloc(alloc, sizeof(*req) + data_size);
406
+
407
+ virtio_blk_fix_request(d, req);
408
+
409
+ qtest_memwrite(qts, addr, req, 16);
410
+ qtest_memwrite(qts, addr + 16, req->data, data_size);
411
+ qtest_memwrite(qts, addr + 16 + data_size, &status, sizeof(status));
412
+
413
+ return addr;
414
+}
415
+
416
+/* Returns the request virtqueue so the caller can perform further tests */
417
+static QVirtQueue *test_basic(QVirtioDevice *dev, QGuestAllocator *alloc)
418
+{
419
+ QVirtioBlkReq req;
420
+ uint64_t req_addr;
421
+ uint64_t capacity;
422
+ uint64_t features;
423
+ uint32_t free_head;
424
+ uint8_t status;
425
+ char *data;
426
+ QTestState *qts = global_qtest;
427
+ QVirtQueue *vq;
428
+
429
+ features = qvirtio_get_features(dev);
430
+ features = features & ~(QVIRTIO_F_BAD_FEATURE |
431
+ (1u << VIRTIO_RING_F_INDIRECT_DESC) |
432
+ (1u << VIRTIO_RING_F_EVENT_IDX) |
433
+ (1u << VIRTIO_BLK_F_SCSI));
434
+ qvirtio_set_features(dev, features);
435
+
436
+ capacity = qvirtio_config_readq(dev, 0);
437
+ g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
438
+
439
+ vq = qvirtqueue_setup(dev, alloc, 0);
440
+
441
+ qvirtio_set_driver_ok(dev);
442
+
443
+ /* Write and read with 3 descriptor layout */
444
+ /* Write request */
445
+ req.type = VIRTIO_BLK_T_OUT;
446
+ req.ioprio = 1;
447
+ req.sector = 0;
448
+ req.data = g_malloc0(512);
449
+ strcpy(req.data, "TEST");
450
+
451
+ req_addr = virtio_blk_request(alloc, dev, &req, 512);
452
+
453
+ g_free(req.data);
454
+
455
+ free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
456
+ qvirtqueue_add(qts, vq, req_addr + 16, 512, false, true);
457
+ qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
458
+
459
+ qvirtqueue_kick(qts, dev, vq, free_head);
460
+
461
+ qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
462
+ QVIRTIO_BLK_TIMEOUT_US);
463
+ status = readb(req_addr + 528);
464
+ g_assert_cmpint(status, ==, 0);
465
+
466
+ guest_free(alloc, req_addr);
467
+
468
+ /* Read request */
469
+ req.type = VIRTIO_BLK_T_IN;
470
+ req.ioprio = 1;
471
+ req.sector = 0;
472
+ req.data = g_malloc0(512);
473
+
474
+ req_addr = virtio_blk_request(alloc, dev, &req, 512);
475
+
476
+ g_free(req.data);
477
+
478
+ free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
479
+ qvirtqueue_add(qts, vq, req_addr + 16, 512, true, true);
480
+ qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
481
+
482
+ qvirtqueue_kick(qts, dev, vq, free_head);
483
+
484
+ qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
485
+ QVIRTIO_BLK_TIMEOUT_US);
486
+ status = readb(req_addr + 528);
487
+ g_assert_cmpint(status, ==, 0);
488
+
489
+ data = g_malloc0(512);
490
+ qtest_memread(qts, req_addr + 16, data, 512);
491
+ g_assert_cmpstr(data, ==, "TEST");
492
+ g_free(data);
493
+
494
+ guest_free(alloc, req_addr);
495
+
496
+ if (features & (1u << VIRTIO_BLK_F_WRITE_ZEROES)) {
497
+ struct virtio_blk_discard_write_zeroes dwz_hdr;
498
+ void *expected;
499
+
500
+ /*
501
+ * WRITE_ZEROES request on the same sector of previous test where
502
+ * we wrote "TEST".
503
+ */
504
+ req.type = VIRTIO_BLK_T_WRITE_ZEROES;
505
+ req.data = (char *) &dwz_hdr;
506
+ dwz_hdr.sector = 0;
507
+ dwz_hdr.num_sectors = 1;
508
+ dwz_hdr.flags = 0;
509
+
510
+ virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
511
+
512
+ req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
513
+
514
+ free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
515
+ qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
516
+ qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr), 1, true,
517
+ false);
518
+
519
+ qvirtqueue_kick(qts, dev, vq, free_head);
520
+
521
+ qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
522
+ QVIRTIO_BLK_TIMEOUT_US);
523
+ status = readb(req_addr + 16 + sizeof(dwz_hdr));
524
+ g_assert_cmpint(status, ==, 0);
525
+
526
+ guest_free(alloc, req_addr);
527
+
528
+ /* Read request to check if the sector contains all zeroes */
529
+ req.type = VIRTIO_BLK_T_IN;
530
+ req.ioprio = 1;
531
+ req.sector = 0;
532
+ req.data = g_malloc0(512);
533
+
534
+ req_addr = virtio_blk_request(alloc, dev, &req, 512);
535
+
536
+ g_free(req.data);
537
+
538
+ free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
539
+ qvirtqueue_add(qts, vq, req_addr + 16, 512, true, true);
540
+ qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
541
+
542
+ qvirtqueue_kick(qts, dev, vq, free_head);
543
+
544
+ qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
545
+ QVIRTIO_BLK_TIMEOUT_US);
546
+ status = readb(req_addr + 528);
547
+ g_assert_cmpint(status, ==, 0);
548
+
549
+ data = g_malloc(512);
550
+ expected = g_malloc0(512);
551
+ qtest_memread(qts, req_addr + 16, data, 512);
552
+ g_assert_cmpmem(data, 512, expected, 512);
553
+ g_free(expected);
554
+ g_free(data);
555
+
556
+ guest_free(alloc, req_addr);
557
+ }
558
+
559
+ if (features & (1u << VIRTIO_BLK_F_DISCARD)) {
560
+ struct virtio_blk_discard_write_zeroes dwz_hdr;
561
+
562
+ req.type = VIRTIO_BLK_T_DISCARD;
563
+ req.data = (char *) &dwz_hdr;
564
+ dwz_hdr.sector = 0;
565
+ dwz_hdr.num_sectors = 1;
566
+ dwz_hdr.flags = 0;
567
+
568
+ virtio_blk_fix_dwz_hdr(dev, &dwz_hdr);
569
+
570
+ req_addr = virtio_blk_request(alloc, dev, &req, sizeof(dwz_hdr));
571
+
572
+ free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
573
+ qvirtqueue_add(qts, vq, req_addr + 16, sizeof(dwz_hdr), false, true);
574
+ qvirtqueue_add(qts, vq, req_addr + 16 + sizeof(dwz_hdr),
575
+ 1, true, false);
576
+
577
+ qvirtqueue_kick(qts, dev, vq, free_head);
578
+
579
+ qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
580
+ QVIRTIO_BLK_TIMEOUT_US);
581
+ status = readb(req_addr + 16 + sizeof(dwz_hdr));
582
+ g_assert_cmpint(status, ==, 0);
583
+
584
+ guest_free(alloc, req_addr);
585
+ }
586
+
587
+ if (features & (1u << VIRTIO_F_ANY_LAYOUT)) {
588
+ /* Write and read with 2 descriptor layout */
589
+ /* Write request */
590
+ req.type = VIRTIO_BLK_T_OUT;
591
+ req.ioprio = 1;
592
+ req.sector = 1;
593
+ req.data = g_malloc0(512);
594
+ strcpy(req.data, "TEST");
595
+
596
+ req_addr = virtio_blk_request(alloc, dev, &req, 512);
597
+
598
+ g_free(req.data);
599
+
600
+ free_head = qvirtqueue_add(qts, vq, req_addr, 528, false, true);
601
+ qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
602
+ qvirtqueue_kick(qts, dev, vq, free_head);
603
+
604
+ qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
605
+ QVIRTIO_BLK_TIMEOUT_US);
606
+ status = readb(req_addr + 528);
607
+ g_assert_cmpint(status, ==, 0);
608
+
609
+ guest_free(alloc, req_addr);
610
+
611
+ /* Read request */
612
+ req.type = VIRTIO_BLK_T_IN;
613
+ req.ioprio = 1;
614
+ req.sector = 1;
615
+ req.data = g_malloc0(512);
616
+
617
+ req_addr = virtio_blk_request(alloc, dev, &req, 512);
618
+
619
+ g_free(req.data);
620
+
621
+ free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
622
+ qvirtqueue_add(qts, vq, req_addr + 16, 513, true, false);
623
+
624
+ qvirtqueue_kick(qts, dev, vq, free_head);
625
+
626
+ qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
627
+ QVIRTIO_BLK_TIMEOUT_US);
628
+ status = readb(req_addr + 528);
629
+ g_assert_cmpint(status, ==, 0);
630
+
631
+ data = g_malloc0(512);
632
+ qtest_memread(qts, req_addr + 16, data, 512);
633
+ g_assert_cmpstr(data, ==, "TEST");
634
+ g_free(data);
635
+
636
+ guest_free(alloc, req_addr);
637
+ }
638
+
639
+ return vq;
640
+}
641
+
642
+static void basic(void *obj, void *data, QGuestAllocator *t_alloc)
643
+{
644
+ QVhostUserBlk *blk_if = obj;
645
+ QVirtQueue *vq;
646
+
647
+ vq = test_basic(blk_if->vdev, t_alloc);
648
+ qvirtqueue_cleanup(blk_if->vdev->bus, vq, t_alloc);
649
+
650
+}
651
+
652
+static void indirect(void *obj, void *u_data, QGuestAllocator *t_alloc)
653
+{
654
+ QVirtQueue *vq;
655
+ QVhostUserBlk *blk_if = obj;
656
+ QVirtioDevice *dev = blk_if->vdev;
657
+ QVirtioBlkReq req;
658
+ QVRingIndirectDesc *indirect;
659
+ uint64_t req_addr;
660
+ uint64_t capacity;
661
+ uint64_t features;
662
+ uint32_t free_head;
663
+ uint8_t status;
664
+ char *data;
665
+ QTestState *qts = global_qtest;
666
+
667
+ features = qvirtio_get_features(dev);
668
+ g_assert_cmphex(features & (1u << VIRTIO_RING_F_INDIRECT_DESC), !=, 0);
669
+ features = features & ~(QVIRTIO_F_BAD_FEATURE |
670
+ (1u << VIRTIO_RING_F_EVENT_IDX) |
671
+ (1u << VIRTIO_BLK_F_SCSI));
672
+ qvirtio_set_features(dev, features);
673
+
674
+ capacity = qvirtio_config_readq(dev, 0);
675
+ g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
676
+
677
+ vq = qvirtqueue_setup(dev, t_alloc, 0);
678
+ qvirtio_set_driver_ok(dev);
679
+
680
+ /* Write request */
681
+ req.type = VIRTIO_BLK_T_OUT;
682
+ req.ioprio = 1;
683
+ req.sector = 0;
684
+ req.data = g_malloc0(512);
685
+ strcpy(req.data, "TEST");
686
+
687
+ req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
688
+
689
+ g_free(req.data);
690
+
691
+ indirect = qvring_indirect_desc_setup(qts, dev, t_alloc, 2);
692
+ qvring_indirect_desc_add(dev, qts, indirect, req_addr, 528, false);
693
+ qvring_indirect_desc_add(dev, qts, indirect, req_addr + 528, 1, true);
694
+ free_head = qvirtqueue_add_indirect(qts, vq, indirect);
695
+ qvirtqueue_kick(qts, dev, vq, free_head);
696
+
697
+ qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
698
+ QVIRTIO_BLK_TIMEOUT_US);
699
+ status = readb(req_addr + 528);
700
+ g_assert_cmpint(status, ==, 0);
701
+
702
+ g_free(indirect);
703
+ guest_free(t_alloc, req_addr);
704
+
705
+ /* Read request */
706
+ req.type = VIRTIO_BLK_T_IN;
707
+ req.ioprio = 1;
708
+ req.sector = 0;
709
+ req.data = g_malloc0(512);
710
+ strcpy(req.data, "TEST");
711
+
712
+ req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
713
+
714
+ g_free(req.data);
715
+
716
+ indirect = qvring_indirect_desc_setup(qts, dev, t_alloc, 2);
717
+ qvring_indirect_desc_add(dev, qts, indirect, req_addr, 16, false);
718
+ qvring_indirect_desc_add(dev, qts, indirect, req_addr + 16, 513, true);
719
+ free_head = qvirtqueue_add_indirect(qts, vq, indirect);
720
+ qvirtqueue_kick(qts, dev, vq, free_head);
721
+
722
+ qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
723
+ QVIRTIO_BLK_TIMEOUT_US);
724
+ status = readb(req_addr + 528);
725
+ g_assert_cmpint(status, ==, 0);
726
+
727
+ data = g_malloc0(512);
728
+ qtest_memread(qts, req_addr + 16, data, 512);
729
+ g_assert_cmpstr(data, ==, "TEST");
730
+ g_free(data);
731
+
732
+ g_free(indirect);
733
+ guest_free(t_alloc, req_addr);
734
+ qvirtqueue_cleanup(dev->bus, vq, t_alloc);
735
+}
736
+
737
+static void idx(void *obj, void *u_data, QGuestAllocator *t_alloc)
738
+{
739
+ QVirtQueue *vq;
740
+ QVhostUserBlkPCI *blk = obj;
741
+ QVirtioPCIDevice *pdev = &blk->pci_vdev;
742
+ QVirtioDevice *dev = &pdev->vdev;
743
+ QVirtioBlkReq req;
744
+ uint64_t req_addr;
745
+ uint64_t capacity;
746
+ uint64_t features;
747
+ uint32_t free_head;
748
+ uint32_t write_head;
749
+ uint32_t desc_idx;
750
+ uint8_t status;
751
+ char *data;
752
+ QOSGraphObject *blk_object = obj;
753
+ QPCIDevice *pci_dev = blk_object->get_driver(blk_object, "pci-device");
754
+ QTestState *qts = global_qtest;
755
+
756
+ if (qpci_check_buggy_msi(pci_dev)) {
757
+ return;
758
+ }
759
+
760
+ qpci_msix_enable(pdev->pdev);
761
+ qvirtio_pci_set_msix_configuration_vector(pdev, t_alloc, 0);
762
+
763
+ features = qvirtio_get_features(dev);
764
+ features = features & ~(QVIRTIO_F_BAD_FEATURE |
765
+ (1u << VIRTIO_RING_F_INDIRECT_DESC) |
766
+ (1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
767
+ (1u << VIRTIO_BLK_F_SCSI));
768
+ qvirtio_set_features(dev, features);
769
+
770
+ capacity = qvirtio_config_readq(dev, 0);
771
+ g_assert_cmpint(capacity, ==, TEST_IMAGE_SIZE / 512);
772
+
773
+ vq = qvirtqueue_setup(dev, t_alloc, 0);
774
+ qvirtqueue_pci_msix_setup(pdev, (QVirtQueuePCI *)vq, t_alloc, 1);
775
+
776
+ qvirtio_set_driver_ok(dev);
777
+
778
+ /* Write request */
779
+ req.type = VIRTIO_BLK_T_OUT;
780
+ req.ioprio = 1;
781
+ req.sector = 0;
782
+ req.data = g_malloc0(512);
783
+ strcpy(req.data, "TEST");
784
+
785
+ req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
786
+
787
+ g_free(req.data);
788
+
789
+ free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
790
+ qvirtqueue_add(qts, vq, req_addr + 16, 512, false, true);
791
+ qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
792
+ qvirtqueue_kick(qts, dev, vq, free_head);
793
+
794
+ qvirtio_wait_used_elem(qts, dev, vq, free_head, NULL,
795
+ QVIRTIO_BLK_TIMEOUT_US);
796
+
797
+ /* Write request */
798
+ req.type = VIRTIO_BLK_T_OUT;
799
+ req.ioprio = 1;
800
+ req.sector = 1;
801
+ req.data = g_malloc0(512);
802
+ strcpy(req.data, "TEST");
803
+
804
+ req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
805
+
806
+ g_free(req.data);
807
+
808
+ /* Notify after processing the third request */
809
+ qvirtqueue_set_used_event(qts, vq, 2);
810
+ free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
811
+ qvirtqueue_add(qts, vq, req_addr + 16, 512, false, true);
812
+ qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
813
+ qvirtqueue_kick(qts, dev, vq, free_head);
814
+ write_head = free_head;
815
+
816
+ /* No notification expected */
817
+ status = qvirtio_wait_status_byte_no_isr(qts, dev,
818
+ vq, req_addr + 528,
819
+ QVIRTIO_BLK_TIMEOUT_US);
820
+ g_assert_cmpint(status, ==, 0);
821
+
822
+ guest_free(t_alloc, req_addr);
823
+
824
+ /* Read request */
825
+ req.type = VIRTIO_BLK_T_IN;
826
+ req.ioprio = 1;
827
+ req.sector = 1;
828
+ req.data = g_malloc0(512);
829
+
830
+ req_addr = virtio_blk_request(t_alloc, dev, &req, 512);
831
+
832
+ g_free(req.data);
833
+
834
+ free_head = qvirtqueue_add(qts, vq, req_addr, 16, false, true);
835
+ qvirtqueue_add(qts, vq, req_addr + 16, 512, true, true);
836
+ qvirtqueue_add(qts, vq, req_addr + 528, 1, true, false);
837
+
838
+ qvirtqueue_kick(qts, dev, vq, free_head);
839
+
840
+ /* We get just one notification for both requests */
841
+ qvirtio_wait_used_elem(qts, dev, vq, write_head, NULL,
842
+ QVIRTIO_BLK_TIMEOUT_US);
843
+ g_assert(qvirtqueue_get_buf(qts, vq, &desc_idx, NULL));
844
+ g_assert_cmpint(desc_idx, ==, free_head);
845
+
846
+ status = readb(req_addr + 528);
847
+ g_assert_cmpint(status, ==, 0);
848
+
849
+ data = g_malloc0(512);
850
+ qtest_memread(qts, req_addr + 16, data, 512);
851
+ g_assert_cmpstr(data, ==, "TEST");
852
+ g_free(data);
853
+
854
+ guest_free(t_alloc, req_addr);
855
+
856
+ /* End test */
857
+ qpci_msix_disable(pdev->pdev);
858
+
859
+ qvirtqueue_cleanup(dev->bus, vq, t_alloc);
860
+}
861
+
862
+static void pci_hotplug(void *obj, void *data, QGuestAllocator *t_alloc)
863
+{
864
+ QVirtioPCIDevice *dev1 = obj;
865
+ QVirtioPCIDevice *dev;
866
+ QTestState *qts = dev1->pdev->bus->qts;
867
+
868
+ /* plug secondary disk */
869
+ qtest_qmp_device_add(qts, "vhost-user-blk-pci", "drv1",
870
+ "{'addr': %s, 'chardev': 'char2'}",
871
+ stringify(PCI_SLOT_HP) ".0");
872
+
873
+ dev = virtio_pci_new(dev1->pdev->bus,
874
+ &(QPCIAddress) { .devfn = QPCI_DEVFN(PCI_SLOT_HP, 0)
875
+ });
876
+ g_assert_nonnull(dev);
877
+ g_assert_cmpint(dev->vdev.device_type, ==, VIRTIO_ID_BLOCK);
878
+ qvirtio_pci_device_disable(dev);
879
+ qos_object_destroy((QOSGraphObject *)dev);
880
+
881
+ /* unplug secondary disk */
882
+ qpci_unplug_acpi_device_test(qts, "drv1", PCI_SLOT_HP);
883
+}
884
+
885
+/*
886
+ * Check that setting the vring addr on a non-existent virtqueue does
887
+ * not crash.
888
+ */
889
+static void test_nonexistent_virtqueue(void *obj, void *data,
890
+ QGuestAllocator *t_alloc)
891
+{
892
+ QVhostUserBlkPCI *blk = obj;
893
+ QVirtioPCIDevice *pdev = &blk->pci_vdev;
894
+ QPCIBar bar0;
895
+ QPCIDevice *dev;
896
+
897
+ dev = qpci_device_find(pdev->pdev->bus, QPCI_DEVFN(4, 0));
898
+ g_assert(dev != NULL);
899
+ qpci_device_enable(dev);
900
+
901
+ bar0 = qpci_iomap(dev, 0, NULL);
902
+
903
+ qpci_io_writeb(dev, bar0, VIRTIO_PCI_QUEUE_SEL, 2);
904
+ qpci_io_writel(dev, bar0, VIRTIO_PCI_QUEUE_PFN, 1);
905
+
906
+ g_free(dev);
907
+}
908
+
909
+static const char *qtest_qemu_storage_daemon_binary(void)
910
+{
911
+ const char *qemu_storage_daemon_bin;
912
+
913
+ qemu_storage_daemon_bin = getenv("QTEST_QEMU_STORAGE_DAEMON_BINARY");
914
+ if (!qemu_storage_daemon_bin) {
915
+ fprintf(stderr, "Environment variable "
916
+ "QTEST_QEMU_STORAGE_DAEMON_BINARY required\n");
917
+ exit(0);
918
+ }
919
+
920
+ return qemu_storage_daemon_bin;
921
+}
922
+
923
+static void drive_destroy(void *path)
924
+{
925
+ unlink(path);
926
+ g_free(path);
927
+ qos_invalidate_command_line();
928
+}
929
+
930
+static char *drive_create(void)
931
+{
932
+ int fd, ret;
933
+ /** vhost-user-blk won't recognize drive located in /tmp */
934
+ char *t_path = g_strdup("qtest.XXXXXX");
935
+
936
+ /** Create a temporary raw image */
937
+ fd = mkstemp(t_path);
938
+ g_assert_cmpint(fd, >=, 0);
939
+ ret = ftruncate(fd, TEST_IMAGE_SIZE);
940
+ g_assert_cmpint(ret, ==, 0);
941
+ close(fd);
942
+
943
+ g_test_queue_destroy(drive_destroy, t_path);
944
+ return t_path;
945
+}
946
+
947
+static char sock_path_tempate[] = "/tmp/qtest.vhost_user_blk.XXXXXX";
948
+static char qmp_sock_path_tempate[] = "/tmp/qtest.vhost_user_blk.qmp.XXXXXX";
949
+
950
+static void quit_storage_daemon(void *qmp_test_state)
951
+{
952
+ const char quit_str[] = "{ 'execute': 'quit' }";
953
+
954
+ /* Before quiting storate-daemon, quit qemu to avoid dubious messages */
955
+ qobject_unref(qtest_qmp(global_qtest, quit_str));
956
+
957
+ /*
958
+ * Give storage-daemon enough time to wake up&terminate
959
+ * vu_client_trip coroutine so the Coroutine object could
960
+ * be cleaned up. Otherwise LeakSanitizer would complain
961
+ * about memory leaks.
962
+ */
963
+ g_usleep(1000);
964
+
965
+ qobject_unref(qtest_qmp((QTestState *)qmp_test_state, quit_str));
966
+ g_free(qmp_test_state);
967
+}
968
+
969
+static char *start_vhost_user_blk(GString *cmd_line, int vus_instances)
970
+{
971
+ const char *vhost_user_blk_bin = qtest_qemu_storage_daemon_binary();
972
+ int fd, qmp_fd, i;
973
+ QTestState *qmp_test_state;
974
+ gchar *img_path;
975
+ char *sock_path = NULL;
976
+ char *qmp_sock_path = g_strdup(qmp_sock_path_tempate);
977
+ GString *storage_daemon_command = g_string_new(NULL);
978
+
979
+ qmp_fd = mkstemp(qmp_sock_path);
980
+ g_assert_cmpint(qmp_fd, >=, 0);
981
+ g_test_queue_destroy(drive_destroy, qmp_sock_path);
982
+
983
+ g_string_append_printf(storage_daemon_command,
984
+ "exec %s "
985
+ "--chardev socket,id=qmp,path=%s,server,nowait --monitor chardev=qmp ",
986
+ vhost_user_blk_bin, qmp_sock_path);
987
+
988
+ g_string_append_printf(cmd_line,
989
+ " -object memory-backend-memfd,id=mem,size=128M,share=on -numa node,memdev=mem ");
990
+
991
+ for (i = 0; i < vus_instances; i++) {
992
+ sock_path = g_strdup(sock_path_tempate);
993
+ fd = mkstemp(sock_path);
994
+ g_assert_cmpint(fd, >=, 0);
995
+ g_test_queue_destroy(drive_destroy, sock_path);
996
+ /* create image file */
997
+ img_path = drive_create();
998
+ g_string_append_printf(storage_daemon_command,
999
+ "--blockdev driver=file,node-name=disk%d,filename=%s "
1000
+ "--object vhost-user-blk-server,id=disk%d,unix-socket=%s,"
1001
+ "node-name=disk%i,writable=on ",
1002
+ i, img_path, i, sock_path, i);
1003
+
1004
+ g_string_append_printf(cmd_line, "-chardev socket,id=char%d,path=%s ",
1005
+ i + 1, sock_path);
1006
+ }
1007
+
1008
+ g_test_message("starting vhost-user backend: %s",
1009
+ storage_daemon_command->str);
1010
+ pid_t pid = fork();
1011
+ if (pid == 0) {
1012
+ execlp("/bin/sh", "sh", "-c", storage_daemon_command->str, NULL);
1013
+ exit(1);
1014
+ }
1015
+ g_string_free(storage_daemon_command, true);
1016
+
1017
+ qmp_test_state = qtest_create_state_with_qmp_fd(
1018
+ qtest_socket_client(qmp_sock_path));
1019
+ /*
1020
+ * Ask qemu-storage-daemon to quit so it
1021
+ * will not block scripts/tap-driver.pl.
1022
+ */
1023
+ g_test_queue_destroy(quit_storage_daemon, qmp_test_state);
1024
+
1025
+ qobject_unref(qtest_qmp(qmp_test_state, "{'execute': 'qmp_capabilities'}"));
1026
+ return sock_path;
1027
+}
1028
+
1029
+static void *vhost_user_blk_test_setup(GString *cmd_line, void *arg)
1030
+{
1031
+ start_vhost_user_blk(cmd_line, 1);
1032
+ return arg;
1033
+}
1034
+
1035
+/*
1036
+ * Setup for hotplug.
1037
+ *
1038
+ * Since vhost-user server only serves one vhost-user client one time,
1039
+ * another exprot
1040
+ *
1041
+ */
1042
+static void *vhost_user_blk_hotplug_test_setup(GString *cmd_line, void *arg)
1043
+{
1044
+ /* "-chardev socket,id=char2" is used for pci_hotplug*/
1045
+ start_vhost_user_blk(cmd_line, 2);
1046
+ return arg;
1047
+}
1048
+
1049
+static void register_vhost_user_blk_test(void)
1050
+{
1051
+ QOSGraphTestOptions opts = {
1052
+ .before = vhost_user_blk_test_setup,
1053
+ };
1054
+
1055
+ /*
1056
+ * tests for vhost-user-blk and vhost-user-blk-pci
1057
+ * The tests are borrowed from tests/virtio-blk-test.c. But some tests
1058
+ * regarding block_resize don't work for vhost-user-blk.
1059
+ * vhost-user-blk device doesn't have -drive, so tests containing
1060
+ * block_resize are also abandoned,
1061
+ * - config
1062
+ * - resize
1063
+ */
1064
+ qos_add_test("basic", "vhost-user-blk", basic, &opts);
1065
+ qos_add_test("indirect", "vhost-user-blk", indirect, &opts);
1066
+ qos_add_test("idx", "vhost-user-blk-pci", idx, &opts);
1067
+ qos_add_test("nxvirtq", "vhost-user-blk-pci",
1068
+ test_nonexistent_virtqueue, &opts);
1069
+
1070
+ opts.before = vhost_user_blk_hotplug_test_setup;
1071
+ qos_add_test("hotplug", "vhost-user-blk-pci", pci_hotplug, &opts);
1072
+}
1073
+
1074
+libqos_init(register_vhost_user_blk_test);
1075
diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build
1076
index XXXXXXX..XXXXXXX 100644
1077
--- a/tests/qtest/libqos/meson.build
1078
+++ b/tests/qtest/libqos/meson.build
1079
@@ -XXX,XX +XXX,XX @@ libqos_srcs = files('../libqtest.c',
1080
'virtio-9p.c',
1081
'virtio-balloon.c',
1082
'virtio-blk.c',
1083
+ 'vhost-user-blk.c',
1084
'virtio-mmio.c',
1085
'virtio-net.c',
1086
'virtio-pci.c',
1087
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
1088
index XXXXXXX..XXXXXXX 100644
1089
--- a/tests/qtest/meson.build
1090
+++ b/tests/qtest/meson.build
1091
@@ -XXX,XX +XXX,XX @@ qos_test_ss.add(
1092
)
1093
qos_test_ss.add(when: 'CONFIG_VIRTFS', if_true: files('virtio-9p-test.c'))
1094
qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-test.c'))
1095
+qos_test_ss.add(when: ['CONFIG_LINUX', 'CONFIG_TOOLS'], if_true: files('vhost-user-blk-test.c'))
1096
1097
extra_qtest_deps = {
1098
'bios-tables-test': [io],
1099
@@ -XXX,XX +XXX,XX @@ foreach dir : target_dirs
1100
endif
1101
qtest_env.set('G_TEST_DBUS_DAEMON', meson.source_root() / 'tests/dbus-vmstate-daemon.sh')
1102
qtest_env.set('QTEST_QEMU_BINARY', './qemu-system-' + target_base)
1103
-
1104
+ qtest_env.set('QTEST_QEMU_STORAGE_DAEMON_BINARY', './storage-daemon/qemu-storage-daemon')
1105
+
1106
foreach test : qtests
1107
# Executables are shared across targets, declare them only the first time we
1108
# encounter them
1109
--
1110
2.26.2
1111
diff view generated by jsdifflib
Deleted patch
1
From: Coiby Xu <coiby.xu@gmail.com>
2
1
3
Suggested-by: Stefano Garzarella <sgarzare@redhat.com>
4
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
7
Message-id: 20200918080912.321299-8-coiby.xu@gmail.com
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
---
10
MAINTAINERS | 8 ++++++++
11
1 file changed, 8 insertions(+)
12
13
diff --git a/MAINTAINERS b/MAINTAINERS
14
index XXXXXXX..XXXXXXX 100644
15
--- a/MAINTAINERS
16
+++ b/MAINTAINERS
17
@@ -XXX,XX +XXX,XX @@ L: qemu-block@nongnu.org
18
S: Supported
19
F: tests/image-fuzzer/
20
21
+Vhost-user block device backend server
22
+M: Coiby Xu <Coiby.Xu@gmail.com>
23
+S: Maintained
24
+F: block/export/vhost-user-blk-server.c
25
+F: util/vhost-user-server.c
26
+F: tests/qtest/vhost-user-blk-test.c
27
+F: tests/qtest/libqos/vhost-user-blk.c
28
+
29
Replication
30
M: Wen Congyang <wencongyang2@huawei.com>
31
M: Xie Changlong <xiechanglong.d@gmail.com>
32
--
33
2.26.2
34
diff view generated by jsdifflib
Deleted patch
1
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2
Message-id: 20200924151549.913737-3-stefanha@redhat.com
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
---
5
util/vhost-user-server.c | 2 +-
6
1 file changed, 1 insertion(+), 1 deletion(-)
7
1
8
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/util/vhost-user-server.c
11
+++ b/util/vhost-user-server.c
12
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
13
return false;
14
}
15
16
- /* zero out unspecified fileds */
17
+ /* zero out unspecified fields */
18
*server = (VuServer) {
19
.listener = listener,
20
.vu_iface = vu_iface,
21
--
22
2.26.2
23
diff view generated by jsdifflib
1
Use the new QAPI block exports API instead of defining our own QOM
1
From: Pino Toscano <ptoscano@redhat.com>
2
objects.
3
2
4
This is a large change because the lifecycle of VuBlockDev needs to
3
Rewrite the implementation of the ssh block driver to use libssh instead
5
follow BlockExportDriver. QOM properties are replaced by QAPI options
4
of libssh2. The libssh library has various advantages over libssh2:
6
objects.
5
- easier API for authentication (for example for using ssh-agent)
6
- easier API for known_hosts handling
7
- supports newer types of keys in known_hosts
7
8
8
VuBlockDev is renamed VuBlkExport and contains a BlockExport field.
9
Use APIs/features available in libssh 0.8 conditionally, to support
9
Several fields can be dropped since BlockExport already has equivalents.
10
older versions (which are not recommended though).
10
11
11
The file names and meson build integration will be adjusted in a future
12
Adjust the iotest 207 according to the different error message, and to
12
patch. libvhost-user should probably be built as a static library that
13
find the default key type for localhost (to properly compare the
13
is linked into QEMU instead of as a .c file that results in duplicate
14
fingerprint with).
14
compilation.
15
Contributed-by: Max Reitz <mreitz@redhat.com>
15
16
16
The new command-line syntax is:
17
Adjust the various Docker/Travis scripts to use libssh when available
18
instead of libssh2. The mingw/mxe testing is dropped for now, as there
19
are no packages for it.
17
20
18
$ qemu-storage-daemon \
21
Signed-off-by: Pino Toscano <ptoscano@redhat.com>
19
--blockdev file,node-name=drive0,filename=test.img \
22
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
20
--export vhost-user-blk,node-name=drive0,id=export0,unix-socket=/tmp/vhost-user-blk.sock
23
Acked-by: Alex Bennée <alex.bennee@linaro.org>
24
Message-id: 20190620200840.17655-1-ptoscano@redhat.com
25
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
26
Message-id: 5873173.t2JhDm7DL7@lindworm.usersys.redhat.com
27
Signed-off-by: Max Reitz <mreitz@redhat.com>
28
---
29
configure | 65 +-
30
block/Makefile.objs | 6 +-
31
block/ssh.c | 652 ++++++++++--------
32
.travis.yml | 4 +-
33
block/trace-events | 14 +-
34
docs/qemu-block-drivers.texi | 2 +-
35
.../dockerfiles/debian-win32-cross.docker | 1 -
36
.../dockerfiles/debian-win64-cross.docker | 1 -
37
tests/docker/dockerfiles/fedora.docker | 4 +-
38
tests/docker/dockerfiles/ubuntu.docker | 2 +-
39
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
40
tests/qemu-iotests/207 | 54 +-
41
tests/qemu-iotests/207.out | 2 +-
42
13 files changed, 449 insertions(+), 360 deletions(-)
21
43
22
Note that unix-socket is optional because we may wish to accept chardevs
44
diff --git a/configure b/configure
23
too in the future.
45
index XXXXXXX..XXXXXXX 100755
24
46
--- a/configure
25
Markus noted that supported address families are not explicit in the
47
+++ b/configure
26
QAPI schema. It is unlikely that support for more address families will
48
@@ -XXX,XX +XXX,XX @@ auth_pam=""
27
be added since file descriptor passing is required and few address
49
vte=""
28
families support it. If a new address family needs to be added, then the
50
virglrenderer=""
29
QAPI 'features' syntax can be used to advertize them.
51
tpm=""
30
52
-libssh2=""
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
53
+libssh=""
32
Acked-by: Markus Armbruster <armbru@redhat.com>
54
live_block_migration="yes"
33
Message-id: 20200924151549.913737-12-stefanha@redhat.com
55
numa=""
34
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
56
tcmalloc="no"
35
---
57
@@ -XXX,XX +XXX,XX @@ for opt do
36
qapi/block-export.json | 21 +-
58
;;
37
block/export/vhost-user-blk-server.h | 23 +-
59
--enable-tpm) tpm="yes"
38
block/export/export.c | 6 +
60
;;
39
block/export/vhost-user-blk-server.c | 452 +++++++--------------------
61
- --disable-libssh2) libssh2="no"
40
tests/qtest/vhost-user-blk-test.c | 2 +-
62
+ --disable-libssh) libssh="no"
41
util/vhost-user-server.c | 10 +-
63
;;
42
block/export/meson.build | 1 +
64
- --enable-libssh2) libssh2="yes"
43
block/meson.build | 1 -
65
+ --enable-libssh) libssh="yes"
44
8 files changed, 157 insertions(+), 359 deletions(-)
66
;;
45
67
--disable-live-block-migration) live_block_migration="no"
46
diff --git a/qapi/block-export.json b/qapi/block-export.json
68
;;
69
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available:
70
coroutine-pool coroutine freelist (better performance)
71
glusterfs GlusterFS backend
72
tpm TPM support
73
- libssh2 ssh block device support
74
+ libssh ssh block device support
75
numa libnuma support
76
libxml2 for Parallels image format
77
tcmalloc tcmalloc support
78
@@ -XXX,XX +XXX,XX @@ EOF
79
fi
80
81
##########################################
82
-# libssh2 probe
83
-min_libssh2_version=1.2.8
84
-if test "$libssh2" != "no" ; then
85
- if $pkg_config --atleast-version=$min_libssh2_version libssh2; then
86
- libssh2_cflags=$($pkg_config libssh2 --cflags)
87
- libssh2_libs=$($pkg_config libssh2 --libs)
88
- libssh2=yes
89
+# libssh probe
90
+if test "$libssh" != "no" ; then
91
+ if $pkg_config --exists libssh; then
92
+ libssh_cflags=$($pkg_config libssh --cflags)
93
+ libssh_libs=$($pkg_config libssh --libs)
94
+ libssh=yes
95
else
96
- if test "$libssh2" = "yes" ; then
97
- error_exit "libssh2 >= $min_libssh2_version required for --enable-libssh2"
98
+ if test "$libssh" = "yes" ; then
99
+ error_exit "libssh required for --enable-libssh"
100
fi
101
- libssh2=no
102
+ libssh=no
103
fi
104
fi
105
106
##########################################
107
-# libssh2_sftp_fsync probe
108
+# Check for libssh 0.8
109
+# This is done like this instead of using the LIBSSH_VERSION_* and
110
+# SSH_VERSION_* macros because some distributions in the past shipped
111
+# snapshots of the future 0.8 from Git, and those snapshots did not
112
+# have updated version numbers (still referring to 0.7.0).
113
114
-if test "$libssh2" = "yes"; then
115
+if test "$libssh" = "yes"; then
116
cat > $TMPC <<EOF
117
-#include <stdio.h>
118
-#include <libssh2.h>
119
-#include <libssh2_sftp.h>
120
-int main(void) {
121
- LIBSSH2_SESSION *session;
122
- LIBSSH2_SFTP *sftp;
123
- LIBSSH2_SFTP_HANDLE *sftp_handle;
124
- session = libssh2_session_init ();
125
- sftp = libssh2_sftp_init (session);
126
- sftp_handle = libssh2_sftp_open (sftp, "/", 0, 0);
127
- libssh2_sftp_fsync (sftp_handle);
128
- return 0;
129
-}
130
+#include <libssh/libssh.h>
131
+int main(void) { return ssh_get_server_publickey(NULL, NULL); }
132
EOF
133
- # libssh2_cflags/libssh2_libs defined in previous test.
134
- if compile_prog "$libssh2_cflags" "$libssh2_libs" ; then
135
- QEMU_CFLAGS="-DHAS_LIBSSH2_SFTP_FSYNC $QEMU_CFLAGS"
136
+ if compile_prog "$libssh_cflags" "$libssh_libs"; then
137
+ libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags"
138
fi
139
fi
140
141
@@ -XXX,XX +XXX,XX @@ echo "GlusterFS support $glusterfs"
142
echo "gcov $gcov_tool"
143
echo "gcov enabled $gcov"
144
echo "TPM support $tpm"
145
-echo "libssh2 support $libssh2"
146
+echo "libssh support $libssh"
147
echo "QOM debugging $qom_cast_debug"
148
echo "Live block migration $live_block_migration"
149
echo "lzo support $lzo"
150
@@ -XXX,XX +XXX,XX @@ if test "$glusterfs_iocb_has_stat" = "yes" ; then
151
echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak
152
fi
153
154
-if test "$libssh2" = "yes" ; then
155
- echo "CONFIG_LIBSSH2=m" >> $config_host_mak
156
- echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
157
- echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak
158
+if test "$libssh" = "yes" ; then
159
+ echo "CONFIG_LIBSSH=m" >> $config_host_mak
160
+ echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak
161
+ echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak
162
fi
163
164
if test "$live_block_migration" = "yes" ; then
165
diff --git a/block/Makefile.objs b/block/Makefile.objs
47
index XXXXXXX..XXXXXXX 100644
166
index XXXXXXX..XXXXXXX 100644
48
--- a/qapi/block-export.json
167
--- a/block/Makefile.objs
49
+++ b/qapi/block-export.json
168
+++ b/block/Makefile.objs
169
@@ -XXX,XX +XXX,XX @@ block-obj-$(CONFIG_CURL) += curl.o
170
block-obj-$(CONFIG_RBD) += rbd.o
171
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
172
block-obj-$(CONFIG_VXHS) += vxhs.o
173
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
174
+block-obj-$(CONFIG_LIBSSH) += ssh.o
175
block-obj-y += accounting.o dirty-bitmap.o
176
block-obj-y += write-threshold.o
177
block-obj-y += backup.o
178
@@ -XXX,XX +XXX,XX @@ rbd.o-libs := $(RBD_LIBS)
179
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
180
gluster.o-libs := $(GLUSTERFS_LIBS)
181
vxhs.o-libs := $(VXHS_LIBS)
182
-ssh.o-cflags := $(LIBSSH2_CFLAGS)
183
-ssh.o-libs := $(LIBSSH2_LIBS)
184
+ssh.o-cflags := $(LIBSSH_CFLAGS)
185
+ssh.o-libs := $(LIBSSH_LIBS)
186
block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
187
block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
188
dmg-bz2.o-libs := $(BZIP2_LIBS)
189
diff --git a/block/ssh.c b/block/ssh.c
190
index XXXXXXX..XXXXXXX 100644
191
--- a/block/ssh.c
192
+++ b/block/ssh.c
50
@@ -XXX,XX +XXX,XX @@
193
@@ -XXX,XX +XXX,XX @@
51
'data': { '*name': 'str', '*description': 'str',
194
52
'*bitmap': 'str' } }
195
#include "qemu/osdep.h"
53
196
54
+##
197
-#include <libssh2.h>
55
+# @BlockExportOptionsVhostUserBlk:
198
-#include <libssh2_sftp.h>
56
+#
199
+#include <libssh/libssh.h>
57
+# A vhost-user-blk block export.
200
+#include <libssh/sftp.h>
58
+#
201
59
+# @addr: The vhost-user socket on which to listen. Both 'unix' and 'fd'
202
#include "block/block_int.h"
60
+# SocketAddress types are supported. Passed fds must be UNIX domain
203
#include "block/qdict.h"
61
+# sockets.
62
+# @logical-block-size: Logical block size in bytes. Defaults to 512 bytes.
63
+#
64
+# Since: 5.2
65
+##
66
+{ 'struct': 'BlockExportOptionsVhostUserBlk',
67
+ 'data': { 'addr': 'SocketAddress', '*logical-block-size': 'size' } }
68
+
69
##
70
# @NbdServerAddOptions:
71
#
72
@@ -XXX,XX +XXX,XX @@
204
@@ -XXX,XX +XXX,XX @@
73
# An enumeration of block export types
205
#include "trace.h"
74
#
206
75
# @nbd: NBD export
207
/*
76
+# @vhost-user-blk: vhost-user-blk export (since 5.2)
208
- * TRACE_LIBSSH2=<bitmask> enables tracing in libssh2 itself. Note
77
#
209
- * that this requires that libssh2 was specially compiled with the
78
# Since: 4.2
210
- * `./configure --enable-debug' option, so most likely you will have
79
##
211
- * to compile it yourself. The meaning of <bitmask> is described
80
{ 'enum': 'BlockExportType',
212
- * here: http://www.libssh2.org/libssh2_trace.html
81
- 'data': [ 'nbd' ] }
213
+ * TRACE_LIBSSH=<level> enables tracing in libssh itself.
82
+ 'data': [ 'nbd', 'vhost-user-blk' ] }
214
+ * The meaning of <level> is described here:
83
215
+ * http://api.libssh.org/master/group__libssh__log.html
84
##
216
*/
85
# @BlockExportOptions:
217
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
86
@@ -XXX,XX +XXX,XX @@
218
+#define TRACE_LIBSSH 0 /* see: SSH_LOG_* */
87
'*writethrough': 'bool' },
219
88
'discriminator': 'type',
220
typedef struct BDRVSSHState {
89
'data': {
221
/* Coroutine. */
90
- 'nbd': 'BlockExportOptionsNbd'
222
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVSSHState {
91
+ 'nbd': 'BlockExportOptionsNbd',
223
92
+ 'vhost-user-blk': 'BlockExportOptionsVhostUserBlk'
224
/* SSH connection. */
93
} }
225
int sock; /* socket */
94
226
- LIBSSH2_SESSION *session; /* ssh session */
95
##
227
- LIBSSH2_SFTP *sftp; /* sftp session */
96
diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h
228
- LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
97
index XXXXXXX..XXXXXXX 100644
229
+ ssh_session session; /* ssh session */
98
--- a/block/export/vhost-user-blk-server.h
230
+ sftp_session sftp; /* sftp session */
99
+++ b/block/export/vhost-user-blk-server.h
231
+ sftp_file sftp_handle; /* sftp remote file handle */
100
@@ -XXX,XX +XXX,XX @@
232
101
233
- /* See ssh_seek() function below. */
102
#ifndef VHOST_USER_BLK_SERVER_H
234
- int64_t offset;
103
#define VHOST_USER_BLK_SERVER_H
235
- bool offset_op_read;
104
-#include "util/vhost-user-server.h"
236
-
105
237
- /* File attributes at open. We try to keep the .filesize field
106
-typedef struct VuBlockDev VuBlockDev;
238
+ /*
107
-#define TYPE_VHOST_USER_BLK_SERVER "vhost-user-blk-server"
239
+ * File attributes at open. We try to keep the .size field
108
-#define VHOST_USER_BLK_SERVER(obj) \
240
* updated if it changes (eg by writing at the end of the file).
109
- OBJECT_CHECK(VuBlockDev, obj, TYPE_VHOST_USER_BLK_SERVER)
241
*/
110
+#include "block/export.h"
242
- LIBSSH2_SFTP_ATTRIBUTES attrs;
111
243
+ sftp_attributes attrs;
112
-/* vhost user block device */
244
113
-struct VuBlockDev {
245
InetSocketAddress *inet;
114
- Object parent_obj;
246
115
- char *node_name;
247
@@ -XXX,XX +XXX,XX @@ static void ssh_state_init(BDRVSSHState *s)
116
- SocketAddress *addr;
248
{
117
- AioContext *ctx;
249
memset(s, 0, sizeof *s);
118
- VuServer vu_server;
250
s->sock = -1;
119
- bool running;
251
- s->offset = -1;
120
- uint32_t blk_size;
252
qemu_co_mutex_init(&s->lock);
121
- BlockBackend *backend;
253
}
122
- QIOChannelSocket *sioc;
254
123
- QTAILQ_ENTRY(VuBlockDev) next;
255
@@ -XXX,XX +XXX,XX @@ static void ssh_state_free(BDRVSSHState *s)
124
- struct virtio_blk_config blkcfg;
256
{
125
- bool writable;
257
g_free(s->user);
126
-};
258
127
+/* For block/export/export.c */
259
+ if (s->attrs) {
128
+extern const BlockExportDriver blk_exp_vhost_user_blk;
260
+ sftp_attributes_free(s->attrs);
129
261
+ }
130
#endif /* VHOST_USER_BLK_SERVER_H */
262
if (s->sftp_handle) {
131
diff --git a/block/export/export.c b/block/export/export.c
263
- libssh2_sftp_close(s->sftp_handle);
132
index XXXXXXX..XXXXXXX 100644
264
+ sftp_close(s->sftp_handle);
133
--- a/block/export/export.c
265
}
134
+++ b/block/export/export.c
266
if (s->sftp) {
135
@@ -XXX,XX +XXX,XX @@
267
- libssh2_sftp_shutdown(s->sftp);
136
#include "sysemu/block-backend.h"
268
+ sftp_free(s->sftp);
137
#include "block/export.h"
269
}
138
#include "block/nbd.h"
270
if (s->session) {
139
+#if CONFIG_LINUX
271
- libssh2_session_disconnect(s->session,
140
+#include "block/export/vhost-user-blk-server.h"
272
- "from qemu ssh client: "
273
- "user closed the connection");
274
- libssh2_session_free(s->session);
275
- }
276
- if (s->sock >= 0) {
277
- close(s->sock);
278
+ ssh_disconnect(s->session);
279
+ ssh_free(s->session); /* This frees s->sock */
280
}
281
}
282
283
@@ -XXX,XX +XXX,XX @@ session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
284
va_end(args);
285
286
if (s->session) {
287
- char *ssh_err;
288
+ const char *ssh_err;
289
int ssh_err_code;
290
291
- /* This is not an errno. See <libssh2.h>. */
292
- ssh_err_code = libssh2_session_last_error(s->session,
293
- &ssh_err, NULL, 0);
294
- error_setg(errp, "%s: %s (libssh2 error code: %d)",
295
+ /* This is not an errno. See <libssh/libssh.h>. */
296
+ ssh_err = ssh_get_error(s->session);
297
+ ssh_err_code = ssh_get_error_code(s->session);
298
+ error_setg(errp, "%s: %s (libssh error code: %d)",
299
msg, ssh_err, ssh_err_code);
300
} else {
301
error_setg(errp, "%s", msg);
302
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
303
va_end(args);
304
305
if (s->sftp) {
306
- char *ssh_err;
307
+ const char *ssh_err;
308
int ssh_err_code;
309
- unsigned long sftp_err_code;
310
+ int sftp_err_code;
311
312
- /* This is not an errno. See <libssh2.h>. */
313
- ssh_err_code = libssh2_session_last_error(s->session,
314
- &ssh_err, NULL, 0);
315
- /* See <libssh2_sftp.h>. */
316
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
317
+ /* This is not an errno. See <libssh/libssh.h>. */
318
+ ssh_err = ssh_get_error(s->session);
319
+ ssh_err_code = ssh_get_error_code(s->session);
320
+ /* See <libssh/sftp.h>. */
321
+ sftp_err_code = sftp_get_error(s->sftp);
322
323
error_setg(errp,
324
- "%s: %s (libssh2 error code: %d, sftp error code: %lu)",
325
+ "%s: %s (libssh error code: %d, sftp error code: %d)",
326
msg, ssh_err, ssh_err_code, sftp_err_code);
327
} else {
328
error_setg(errp, "%s", msg);
329
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
330
331
static void sftp_error_trace(BDRVSSHState *s, const char *op)
332
{
333
- char *ssh_err;
334
+ const char *ssh_err;
335
int ssh_err_code;
336
- unsigned long sftp_err_code;
337
+ int sftp_err_code;
338
339
- /* This is not an errno. See <libssh2.h>. */
340
- ssh_err_code = libssh2_session_last_error(s->session,
341
- &ssh_err, NULL, 0);
342
- /* See <libssh2_sftp.h>. */
343
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
344
+ /* This is not an errno. See <libssh/libssh.h>. */
345
+ ssh_err = ssh_get_error(s->session);
346
+ ssh_err_code = ssh_get_error_code(s->session);
347
+ /* See <libssh/sftp.h>. */
348
+ sftp_err_code = sftp_get_error(s->sftp);
349
350
trace_sftp_error(op, ssh_err, ssh_err_code, sftp_err_code);
351
}
352
@@ -XXX,XX +XXX,XX @@ static void ssh_parse_filename(const char *filename, QDict *options,
353
parse_uri(filename, options, errp);
354
}
355
356
-static int check_host_key_knownhosts(BDRVSSHState *s,
357
- const char *host, int port, Error **errp)
358
+static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
359
{
360
- const char *home;
361
- char *knh_file = NULL;
362
- LIBSSH2_KNOWNHOSTS *knh = NULL;
363
- struct libssh2_knownhost *found;
364
- int ret, r;
365
- const char *hostkey;
366
- size_t len;
367
- int type;
368
-
369
- hostkey = libssh2_session_hostkey(s->session, &len, &type);
370
- if (!hostkey) {
371
+ int ret;
372
+#ifdef HAVE_LIBSSH_0_8
373
+ enum ssh_known_hosts_e state;
374
+ int r;
375
+ ssh_key pubkey;
376
+ enum ssh_keytypes_e pubkey_type;
377
+ unsigned char *server_hash = NULL;
378
+ size_t server_hash_len;
379
+ char *fingerprint = NULL;
380
+
381
+ state = ssh_session_is_known_server(s->session);
382
+ trace_ssh_server_status(state);
383
+
384
+ switch (state) {
385
+ case SSH_KNOWN_HOSTS_OK:
386
+ /* OK */
387
+ trace_ssh_check_host_key_knownhosts();
388
+ break;
389
+ case SSH_KNOWN_HOSTS_CHANGED:
390
ret = -EINVAL;
391
- session_error_setg(errp, s, "failed to read remote host key");
392
+ r = ssh_get_server_publickey(s->session, &pubkey);
393
+ if (r == 0) {
394
+ r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA256,
395
+ &server_hash, &server_hash_len);
396
+ pubkey_type = ssh_key_type(pubkey);
397
+ ssh_key_free(pubkey);
398
+ }
399
+ if (r == 0) {
400
+ fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA256,
401
+ server_hash,
402
+ server_hash_len);
403
+ ssh_clean_pubkey_hash(&server_hash);
404
+ }
405
+ if (fingerprint) {
406
+ error_setg(errp,
407
+ "host key (%s key with fingerprint %s) does not match "
408
+ "the one in known_hosts; this may be a possible attack",
409
+ ssh_key_type_to_char(pubkey_type), fingerprint);
410
+ ssh_string_free_char(fingerprint);
411
+ } else {
412
+ error_setg(errp,
413
+ "host key does not match the one in known_hosts; this "
414
+ "may be a possible attack");
415
+ }
416
goto out;
417
- }
418
-
419
- knh = libssh2_knownhost_init(s->session);
420
- if (!knh) {
421
+ case SSH_KNOWN_HOSTS_OTHER:
422
ret = -EINVAL;
423
- session_error_setg(errp, s,
424
- "failed to initialize known hosts support");
425
+ error_setg(errp,
426
+ "host key for this server not found, another type exists");
427
+ goto out;
428
+ case SSH_KNOWN_HOSTS_UNKNOWN:
429
+ ret = -EINVAL;
430
+ error_setg(errp, "no host key was found in known_hosts");
431
+ goto out;
432
+ case SSH_KNOWN_HOSTS_NOT_FOUND:
433
+ ret = -ENOENT;
434
+ error_setg(errp, "known_hosts file not found");
435
+ goto out;
436
+ case SSH_KNOWN_HOSTS_ERROR:
437
+ ret = -EINVAL;
438
+ error_setg(errp, "error while checking the host");
439
+ goto out;
440
+ default:
441
+ ret = -EINVAL;
442
+ error_setg(errp, "error while checking for known server (%d)", state);
443
goto out;
444
}
445
+#else /* !HAVE_LIBSSH_0_8 */
446
+ int state;
447
448
- home = getenv("HOME");
449
- if (home) {
450
- knh_file = g_strdup_printf("%s/.ssh/known_hosts", home);
451
- } else {
452
- knh_file = g_strdup_printf("/root/.ssh/known_hosts");
453
- }
454
-
455
- /* Read all known hosts from OpenSSH-style known_hosts file. */
456
- libssh2_knownhost_readfile(knh, knh_file, LIBSSH2_KNOWNHOST_FILE_OPENSSH);
457
+ state = ssh_is_server_known(s->session);
458
+ trace_ssh_server_status(state);
459
460
- r = libssh2_knownhost_checkp(knh, host, port, hostkey, len,
461
- LIBSSH2_KNOWNHOST_TYPE_PLAIN|
462
- LIBSSH2_KNOWNHOST_KEYENC_RAW,
463
- &found);
464
- switch (r) {
465
- case LIBSSH2_KNOWNHOST_CHECK_MATCH:
466
+ switch (state) {
467
+ case SSH_SERVER_KNOWN_OK:
468
/* OK */
469
- trace_ssh_check_host_key_knownhosts(found->key);
470
+ trace_ssh_check_host_key_knownhosts();
471
break;
472
- case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
473
+ case SSH_SERVER_KNOWN_CHANGED:
474
ret = -EINVAL;
475
- session_error_setg(errp, s,
476
- "host key does not match the one in known_hosts"
477
- " (found key %s)", found->key);
478
+ error_setg(errp,
479
+ "host key does not match the one in known_hosts; this "
480
+ "may be a possible attack");
481
goto out;
482
- case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
483
+ case SSH_SERVER_FOUND_OTHER:
484
ret = -EINVAL;
485
- session_error_setg(errp, s, "no host key was found in known_hosts");
486
+ error_setg(errp,
487
+ "host key for this server not found, another type exists");
488
+ goto out;
489
+ case SSH_SERVER_FILE_NOT_FOUND:
490
+ ret = -ENOENT;
491
+ error_setg(errp, "known_hosts file not found");
492
goto out;
493
- case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
494
+ case SSH_SERVER_NOT_KNOWN:
495
ret = -EINVAL;
496
- session_error_setg(errp, s,
497
- "failure matching the host key with known_hosts");
498
+ error_setg(errp, "no host key was found in known_hosts");
499
+ goto out;
500
+ case SSH_SERVER_ERROR:
501
+ ret = -EINVAL;
502
+ error_setg(errp, "server error");
503
goto out;
504
default:
505
ret = -EINVAL;
506
- session_error_setg(errp, s, "unknown error matching the host key"
507
- " with known_hosts (%d)", r);
508
+ error_setg(errp, "error while checking for known server (%d)", state);
509
goto out;
510
}
511
+#endif /* !HAVE_LIBSSH_0_8 */
512
513
/* known_hosts checking successful. */
514
ret = 0;
515
516
out:
517
- if (knh != NULL) {
518
- libssh2_knownhost_free(knh);
519
- }
520
- g_free(knh_file);
521
return ret;
522
}
523
524
@@ -XXX,XX +XXX,XX @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
525
526
static int
527
check_host_key_hash(BDRVSSHState *s, const char *hash,
528
- int hash_type, size_t fingerprint_len, Error **errp)
529
+ enum ssh_publickey_hash_type type, Error **errp)
530
{
531
- const char *fingerprint;
532
-
533
- fingerprint = libssh2_hostkey_hash(s->session, hash_type);
534
- if (!fingerprint) {
535
+ int r;
536
+ ssh_key pubkey;
537
+ unsigned char *server_hash;
538
+ size_t server_hash_len;
539
+
540
+#ifdef HAVE_LIBSSH_0_8
541
+ r = ssh_get_server_publickey(s->session, &pubkey);
542
+#else
543
+ r = ssh_get_publickey(s->session, &pubkey);
141
+#endif
544
+#endif
142
#include "qapi/error.h"
545
+ if (r != SSH_OK) {
143
#include "qapi/qapi-commands-block-export.h"
546
session_error_setg(errp, s, "failed to read remote host key");
144
#include "qapi/qapi-events-block-export.h"
547
return -EINVAL;
145
@@ -XXX,XX +XXX,XX @@
548
}
146
549
147
static const BlockExportDriver *blk_exp_drivers[] = {
550
- if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
148
&blk_exp_nbd,
551
- hash) != 0) {
149
+#if CONFIG_LINUX
552
+ r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len);
150
+ &blk_exp_vhost_user_blk,
553
+ ssh_key_free(pubkey);
151
+#endif
554
+ if (r != 0) {
152
};
555
+ session_error_setg(errp, s,
153
556
+ "failed reading the hash of the server SSH key");
154
/* Only accessed from the main thread */
557
+ return -EINVAL;
155
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
558
+ }
156
index XXXXXXX..XXXXXXX 100644
559
+
157
--- a/block/export/vhost-user-blk-server.c
560
+ r = compare_fingerprint(server_hash, server_hash_len, hash);
158
+++ b/block/export/vhost-user-blk-server.c
561
+ ssh_clean_pubkey_hash(&server_hash);
159
@@ -XXX,XX +XXX,XX @@
562
+ if (r != 0) {
160
*/
563
error_setg(errp, "remote host key does not match host_key_check '%s'",
161
#include "qemu/osdep.h"
564
hash);
162
#include "block/block.h"
565
return -EPERM;
163
+#include "contrib/libvhost-user/libvhost-user.h"
566
@@ -XXX,XX +XXX,XX @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
164
+#include "standard-headers/linux/virtio_blk.h"
567
return 0;
165
+#include "util/vhost-user-server.h"
568
}
166
#include "vhost-user-blk-server.h"
569
167
#include "qapi/error.h"
570
-static int check_host_key(BDRVSSHState *s, const char *host, int port,
168
#include "qom/object_interfaces.h"
571
- SshHostKeyCheck *hkc, Error **errp)
169
@@ -XXX,XX +XXX,XX @@ struct virtio_blk_inhdr {
572
+static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp)
170
unsigned char status;
171
};
172
173
-typedef struct VuBlockReq {
174
+typedef struct VuBlkReq {
175
VuVirtqElement elem;
176
int64_t sector_num;
177
size_t size;
178
@@ -XXX,XX +XXX,XX @@ typedef struct VuBlockReq {
179
struct virtio_blk_outhdr out;
180
VuServer *server;
181
struct VuVirtq *vq;
182
-} VuBlockReq;
183
+} VuBlkReq;
184
185
-static void vu_block_req_complete(VuBlockReq *req)
186
+/* vhost user block device */
187
+typedef struct {
188
+ BlockExport export;
189
+ VuServer vu_server;
190
+ uint32_t blk_size;
191
+ QIOChannelSocket *sioc;
192
+ struct virtio_blk_config blkcfg;
193
+ bool writable;
194
+} VuBlkExport;
195
+
196
+static void vu_blk_req_complete(VuBlkReq *req)
197
{
573
{
198
VuDev *vu_dev = &req->server->vu_dev;
574
SshHostKeyCheckMode mode;
199
575
200
@@ -XXX,XX +XXX,XX @@ static void vu_block_req_complete(VuBlockReq *req)
576
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
201
free(req);
577
case SSH_HOST_KEY_CHECK_MODE_HASH:
578
if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) {
579
return check_host_key_hash(s, hkc->u.hash.hash,
580
- LIBSSH2_HOSTKEY_HASH_MD5, 16, errp);
581
+ SSH_PUBLICKEY_HASH_MD5, errp);
582
} else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) {
583
return check_host_key_hash(s, hkc->u.hash.hash,
584
- LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp);
585
+ SSH_PUBLICKEY_HASH_SHA1, errp);
586
}
587
g_assert_not_reached();
588
break;
589
case SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS:
590
- return check_host_key_knownhosts(s, host, port, errp);
591
+ return check_host_key_knownhosts(s, errp);
592
default:
593
g_assert_not_reached();
594
}
595
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
596
return -EINVAL;
202
}
597
}
203
598
204
-static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
599
-static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
600
+static int authenticate(BDRVSSHState *s, Error **errp)
601
{
602
int r, ret;
603
- const char *userauthlist;
604
- LIBSSH2_AGENT *agent = NULL;
605
- struct libssh2_agent_publickey *identity;
606
- struct libssh2_agent_publickey *prev_identity = NULL;
607
+ int method;
608
609
- userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
610
- if (strstr(userauthlist, "publickey") == NULL) {
611
+ /* Try to authenticate with the "none" method. */
612
+ r = ssh_userauth_none(s->session, NULL);
613
+ if (r == SSH_AUTH_ERROR) {
614
ret = -EPERM;
615
- error_setg(errp,
616
- "remote server does not support \"publickey\" authentication");
617
+ session_error_setg(errp, s, "failed to authenticate using none "
618
+ "authentication");
619
goto out;
620
- }
621
-
622
- /* Connect to ssh-agent and try each identity in turn. */
623
- agent = libssh2_agent_init(s->session);
624
- if (!agent) {
625
- ret = -EINVAL;
626
- session_error_setg(errp, s, "failed to initialize ssh-agent support");
627
- goto out;
628
- }
629
- if (libssh2_agent_connect(agent)) {
630
- ret = -ECONNREFUSED;
631
- session_error_setg(errp, s, "failed to connect to ssh-agent");
632
- goto out;
633
- }
634
- if (libssh2_agent_list_identities(agent)) {
635
- ret = -EINVAL;
636
- session_error_setg(errp, s,
637
- "failed requesting identities from ssh-agent");
638
+ } else if (r == SSH_AUTH_SUCCESS) {
639
+ /* Authenticated! */
640
+ ret = 0;
641
goto out;
642
}
643
644
- for(;;) {
645
- r = libssh2_agent_get_identity(agent, &identity, prev_identity);
646
- if (r == 1) { /* end of list */
647
- break;
648
- }
649
- if (r < 0) {
650
+ method = ssh_userauth_list(s->session, NULL);
651
+ trace_ssh_auth_methods(method);
652
+
653
+ /*
654
+ * Try to authenticate with publickey, using the ssh-agent
655
+ * if available.
656
+ */
657
+ if (method & SSH_AUTH_METHOD_PUBLICKEY) {
658
+ r = ssh_userauth_publickey_auto(s->session, NULL, NULL);
659
+ if (r == SSH_AUTH_ERROR) {
660
ret = -EINVAL;
661
- session_error_setg(errp, s,
662
- "failed to obtain identity from ssh-agent");
663
+ session_error_setg(errp, s, "failed to authenticate using "
664
+ "publickey authentication");
665
goto out;
666
- }
667
- r = libssh2_agent_userauth(agent, user, identity);
668
- if (r == 0) {
669
+ } else if (r == SSH_AUTH_SUCCESS) {
670
/* Authenticated! */
671
ret = 0;
672
goto out;
673
}
674
- /* Failed to authenticate with this identity, try the next one. */
675
- prev_identity = identity;
676
}
677
678
ret = -EPERM;
679
@@ -XXX,XX +XXX,XX @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
680
"and the identities held by your ssh-agent");
681
682
out:
683
- if (agent != NULL) {
684
- /* Note: libssh2 implementation implicitly calls
685
- * libssh2_agent_disconnect if necessary.
686
- */
687
- libssh2_agent_free(agent);
688
- }
689
-
690
return ret;
691
}
692
693
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
694
int ssh_flags, int creat_mode, Error **errp)
695
{
696
int r, ret;
697
- long port = 0;
698
+ unsigned int port = 0;
699
+ int new_sock = -1;
700
701
if (opts->has_user) {
702
s->user = g_strdup(opts->user);
703
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
704
s->inet = opts->server;
705
opts->server = NULL;
706
707
- if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
708
+ if (qemu_strtoui(s->inet->port, NULL, 10, &port) < 0) {
709
error_setg(errp, "Use only numeric port value");
710
ret = -EINVAL;
711
goto err;
712
}
713
714
/* Open the socket and connect. */
715
- s->sock = inet_connect_saddr(s->inet, errp);
716
- if (s->sock < 0) {
717
+ new_sock = inet_connect_saddr(s->inet, errp);
718
+ if (new_sock < 0) {
719
ret = -EIO;
720
goto err;
721
}
722
723
+ /*
724
+ * Try to disable the Nagle algorithm on TCP sockets to reduce latency,
725
+ * but do not fail if it cannot be disabled.
726
+ */
727
+ r = socket_set_nodelay(new_sock);
728
+ if (r < 0) {
729
+ warn_report("can't set TCP_NODELAY for the ssh server %s: %s",
730
+ s->inet->host, strerror(errno));
731
+ }
732
+
733
/* Create SSH session. */
734
- s->session = libssh2_session_init();
735
+ s->session = ssh_new();
736
if (!s->session) {
737
ret = -EINVAL;
738
- session_error_setg(errp, s, "failed to initialize libssh2 session");
739
+ session_error_setg(errp, s, "failed to initialize libssh session");
740
goto err;
741
}
742
743
-#if TRACE_LIBSSH2 != 0
744
- libssh2_trace(s->session, TRACE_LIBSSH2);
745
-#endif
746
+ /*
747
+ * Make sure we are in blocking mode during the connection and
748
+ * authentication phases.
749
+ */
750
+ ssh_set_blocking(s->session, 1);
751
752
- r = libssh2_session_handshake(s->session, s->sock);
753
- if (r != 0) {
754
+ r = ssh_options_set(s->session, SSH_OPTIONS_USER, s->user);
755
+ if (r < 0) {
756
+ ret = -EINVAL;
757
+ session_error_setg(errp, s,
758
+ "failed to set the user in the libssh session");
759
+ goto err;
760
+ }
761
+
762
+ r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host);
763
+ if (r < 0) {
764
+ ret = -EINVAL;
765
+ session_error_setg(errp, s,
766
+ "failed to set the host in the libssh session");
767
+ goto err;
768
+ }
769
+
770
+ if (port > 0) {
771
+ r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &port);
772
+ if (r < 0) {
773
+ ret = -EINVAL;
774
+ session_error_setg(errp, s,
775
+ "failed to set the port in the libssh session");
776
+ goto err;
777
+ }
778
+ }
779
+
780
+ r = ssh_options_set(s->session, SSH_OPTIONS_COMPRESSION, "none");
781
+ if (r < 0) {
782
+ ret = -EINVAL;
783
+ session_error_setg(errp, s,
784
+ "failed to disable the compression in the libssh "
785
+ "session");
786
+ goto err;
787
+ }
788
+
789
+ /* Read ~/.ssh/config. */
790
+ r = ssh_options_parse_config(s->session, NULL);
791
+ if (r < 0) {
792
+ ret = -EINVAL;
793
+ session_error_setg(errp, s, "failed to parse ~/.ssh/config");
794
+ goto err;
795
+ }
796
+
797
+ r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock);
798
+ if (r < 0) {
799
+ ret = -EINVAL;
800
+ session_error_setg(errp, s,
801
+ "failed to set the socket in the libssh session");
802
+ goto err;
803
+ }
804
+ /* libssh took ownership of the socket. */
805
+ s->sock = new_sock;
806
+ new_sock = -1;
807
+
808
+ /* Connect. */
809
+ r = ssh_connect(s->session);
810
+ if (r != SSH_OK) {
811
ret = -EINVAL;
812
session_error_setg(errp, s, "failed to establish SSH session");
813
goto err;
814
}
815
816
/* Check the remote host's key against known_hosts. */
817
- ret = check_host_key(s, s->inet->host, port, opts->host_key_check, errp);
818
+ ret = check_host_key(s, opts->host_key_check, errp);
819
if (ret < 0) {
820
goto err;
821
}
822
823
/* Authenticate. */
824
- ret = authenticate(s, s->user, errp);
825
+ ret = authenticate(s, errp);
826
if (ret < 0) {
827
goto err;
828
}
829
830
/* Start SFTP. */
831
- s->sftp = libssh2_sftp_init(s->session);
832
+ s->sftp = sftp_new(s->session);
833
if (!s->sftp) {
834
- session_error_setg(errp, s, "failed to initialize sftp handle");
835
+ session_error_setg(errp, s, "failed to create sftp handle");
836
+ ret = -EINVAL;
837
+ goto err;
838
+ }
839
+
840
+ r = sftp_init(s->sftp);
841
+ if (r < 0) {
842
+ sftp_error_setg(errp, s, "failed to initialize sftp handle");
843
ret = -EINVAL;
844
goto err;
845
}
846
847
/* Open the remote file. */
848
trace_ssh_connect_to_ssh(opts->path, ssh_flags, creat_mode);
849
- s->sftp_handle = libssh2_sftp_open(s->sftp, opts->path, ssh_flags,
850
- creat_mode);
851
+ s->sftp_handle = sftp_open(s->sftp, opts->path, ssh_flags, creat_mode);
852
if (!s->sftp_handle) {
853
- session_error_setg(errp, s, "failed to open remote file '%s'",
854
- opts->path);
855
+ sftp_error_setg(errp, s, "failed to open remote file '%s'",
856
+ opts->path);
857
ret = -EINVAL;
858
goto err;
859
}
860
861
- r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
862
- if (r < 0) {
863
+ /* Make sure the SFTP file is handled in blocking mode. */
864
+ sftp_file_set_blocking(s->sftp_handle);
865
+
866
+ s->attrs = sftp_fstat(s->sftp_handle);
867
+ if (!s->attrs) {
868
sftp_error_setg(errp, s, "failed to read file attributes");
869
return -EINVAL;
870
}
871
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
872
return 0;
873
874
err:
875
+ if (s->attrs) {
876
+ sftp_attributes_free(s->attrs);
877
+ }
878
+ s->attrs = NULL;
879
if (s->sftp_handle) {
880
- libssh2_sftp_close(s->sftp_handle);
881
+ sftp_close(s->sftp_handle);
882
}
883
s->sftp_handle = NULL;
884
if (s->sftp) {
885
- libssh2_sftp_shutdown(s->sftp);
886
+ sftp_free(s->sftp);
887
}
888
s->sftp = NULL;
889
if (s->session) {
890
- libssh2_session_disconnect(s->session,
891
- "from qemu ssh client: "
892
- "error opening connection");
893
- libssh2_session_free(s->session);
894
+ ssh_disconnect(s->session);
895
+ ssh_free(s->session);
896
}
897
s->session = NULL;
898
+ s->sock = -1;
899
+ if (new_sock >= 0) {
900
+ close(new_sock);
901
+ }
902
903
return ret;
904
}
905
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
906
907
ssh_state_init(s);
908
909
- ssh_flags = LIBSSH2_FXF_READ;
910
+ ssh_flags = 0;
911
if (bdrv_flags & BDRV_O_RDWR) {
912
- ssh_flags |= LIBSSH2_FXF_WRITE;
913
+ ssh_flags |= O_RDWR;
914
+ } else {
915
+ ssh_flags |= O_RDONLY;
916
}
917
918
opts = ssh_parse_options(options, errp);
919
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
920
}
921
922
/* Go non-blocking. */
923
- libssh2_session_set_blocking(s->session, 0);
924
+ ssh_set_blocking(s->session, 0);
925
926
qapi_free_BlockdevOptionsSsh(opts);
927
928
return 0;
929
930
err:
931
- if (s->sock >= 0) {
932
- close(s->sock);
933
- }
934
- s->sock = -1;
935
-
936
qapi_free_BlockdevOptionsSsh(opts);
937
938
return ret;
939
@@ -XXX,XX +XXX,XX @@ static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
940
{
941
ssize_t ret;
942
char c[1] = { '\0' };
943
- int was_blocking = libssh2_session_get_blocking(s->session);
944
+ int was_blocking = ssh_is_blocking(s->session);
945
946
/* offset must be strictly greater than the current size so we do
947
* not overwrite anything */
948
- assert(offset > 0 && offset > s->attrs.filesize);
949
+ assert(offset > 0 && offset > s->attrs->size);
950
951
- libssh2_session_set_blocking(s->session, 1);
952
+ ssh_set_blocking(s->session, 1);
953
954
- libssh2_sftp_seek64(s->sftp_handle, offset - 1);
955
- ret = libssh2_sftp_write(s->sftp_handle, c, 1);
956
+ sftp_seek64(s->sftp_handle, offset - 1);
957
+ ret = sftp_write(s->sftp_handle, c, 1);
958
959
- libssh2_session_set_blocking(s->session, was_blocking);
960
+ ssh_set_blocking(s->session, was_blocking);
961
962
if (ret < 0) {
963
sftp_error_setg(errp, s, "Failed to grow file");
964
return -EIO;
965
}
966
967
- s->attrs.filesize = offset;
968
+ s->attrs->size = offset;
969
return 0;
970
}
971
972
@@ -XXX,XX +XXX,XX @@ static int ssh_co_create(BlockdevCreateOptions *options, Error **errp)
973
ssh_state_init(&s);
974
975
ret = connect_to_ssh(&s, opts->location,
976
- LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE|
977
- LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC,
978
+ O_RDWR | O_CREAT | O_TRUNC,
979
0644, errp);
980
if (ret < 0) {
981
goto fail;
982
@@ -XXX,XX +XXX,XX @@ static int ssh_has_zero_init(BlockDriverState *bs)
983
/* Assume false, unless we can positively prove it's true. */
984
int has_zero_init = 0;
985
986
- if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) {
987
- if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) {
988
- has_zero_init = 1;
989
- }
990
+ if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) {
991
+ has_zero_init = 1;
992
}
993
994
return has_zero_init;
995
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
996
.co = qemu_coroutine_self()
997
};
998
999
- r = libssh2_session_block_directions(s->session);
1000
+ r = ssh_get_poll_flags(s->session);
1001
1002
- if (r & LIBSSH2_SESSION_BLOCK_INBOUND) {
1003
+ if (r & SSH_READ_PENDING) {
1004
rd_handler = restart_coroutine;
1005
}
1006
- if (r & LIBSSH2_SESSION_BLOCK_OUTBOUND) {
1007
+ if (r & SSH_WRITE_PENDING) {
1008
wr_handler = restart_coroutine;
1009
}
1010
1011
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
1012
trace_ssh_co_yield_back(s->sock);
1013
}
1014
1015
-/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
1016
- * in the remote file. Notice that it just updates a field in the
1017
- * sftp_handle structure, so there is no network traffic and it cannot
1018
- * fail.
1019
- *
1020
- * However, `libssh2_sftp_seek64' does have a catastrophic effect on
1021
- * performance since it causes the handle to throw away all in-flight
1022
- * reads and buffered readahead data. Therefore this function tries
1023
- * to be intelligent about when to call the underlying libssh2 function.
1024
- */
1025
-#define SSH_SEEK_WRITE 0
1026
-#define SSH_SEEK_READ 1
1027
-#define SSH_SEEK_FORCE 2
1028
-
1029
-static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
205
-{
1030
-{
206
- return container_of(server, VuBlockDev, vu_server);
1031
- bool op_read = (flags & SSH_SEEK_READ) != 0;
1032
- bool force = (flags & SSH_SEEK_FORCE) != 0;
1033
-
1034
- if (force || op_read != s->offset_op_read || offset != s->offset) {
1035
- trace_ssh_seek(offset);
1036
- libssh2_sftp_seek64(s->sftp_handle, offset);
1037
- s->offset = offset;
1038
- s->offset_op_read = op_read;
1039
- }
207
-}
1040
-}
208
-
1041
-
209
static int coroutine_fn
1042
static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
210
-vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
1043
int64_t offset, size_t size,
211
- uint32_t iovcnt, uint32_t type)
1044
QEMUIOVector *qiov)
212
+vu_blk_discard_write_zeroes(BlockBackend *blk, struct iovec *iov,
1045
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
213
+ uint32_t iovcnt, uint32_t type)
1046
214
{
1047
trace_ssh_read(offset, size);
215
struct virtio_blk_discard_write_zeroes desc;
1048
216
ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
1049
- ssh_seek(s, offset, SSH_SEEK_READ);
217
@@ -XXX,XX +XXX,XX @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
1050
+ trace_ssh_seek(offset);
218
return -EINVAL;
1051
+ sftp_seek64(s->sftp_handle, offset);
219
}
1052
220
1053
/* This keeps track of the current iovec element ('i'), where we
221
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
1054
* will write to next ('buf'), and the end of the current iovec
222
uint64_t range[2] = { le64_to_cpu(desc.sector) << 9,
1055
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
223
le32_to_cpu(desc.num_sectors) << 9 };
1056
buf = i->iov_base;
224
if (type == VIRTIO_BLK_T_DISCARD) {
1057
end_of_vec = i->iov_base + i->iov_len;
225
- if (blk_co_pdiscard(vdev_blk->backend, range[0], range[1]) == 0) {
1058
226
+ if (blk_co_pdiscard(blk, range[0], range[1]) == 0) {
1059
- /* libssh2 has a hard-coded limit of 2000 bytes per request,
1060
- * although it will also do readahead behind our backs. Therefore
1061
- * we may have to do repeated reads here until we have read 'size'
1062
- * bytes.
1063
- */
1064
for (got = 0; got < size; ) {
1065
+ size_t request_read_size;
1066
again:
1067
- trace_ssh_read_buf(buf, end_of_vec - buf);
1068
- r = libssh2_sftp_read(s->sftp_handle, buf, end_of_vec - buf);
1069
- trace_ssh_read_return(r);
1070
+ /*
1071
+ * The size of SFTP packets is limited to 32K bytes, so limit
1072
+ * the amount of data requested to 16K, as libssh currently
1073
+ * does not handle multiple requests on its own.
1074
+ */
1075
+ request_read_size = MIN(end_of_vec - buf, 16384);
1076
+ trace_ssh_read_buf(buf, end_of_vec - buf, request_read_size);
1077
+ r = sftp_read(s->sftp_handle, buf, request_read_size);
1078
+ trace_ssh_read_return(r, sftp_get_error(s->sftp));
1079
1080
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1081
+ if (r == SSH_AGAIN) {
1082
co_yield(s, bs);
1083
goto again;
1084
}
1085
- if (r < 0) {
1086
- sftp_error_trace(s, "read");
1087
- s->offset = -1;
1088
- return -EIO;
1089
- }
1090
- if (r == 0) {
1091
+ if (r == SSH_EOF || (r == 0 && sftp_get_error(s->sftp) == SSH_FX_EOF)) {
1092
/* EOF: Short read so pad the buffer with zeroes and return it. */
1093
qemu_iovec_memset(qiov, got, 0, size - got);
227
return 0;
1094
return 0;
228
}
1095
}
229
} else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
1096
+ if (r <= 0) {
230
- if (blk_co_pwrite_zeroes(vdev_blk->backend,
1097
+ sftp_error_trace(s, "read");
231
- range[0], range[1], 0) == 0) {
1098
+ return -EIO;
232
+ if (blk_co_pwrite_zeroes(blk, range[0], range[1], 0) == 0) {
1099
+ }
233
return 0;
1100
1101
got += r;
1102
buf += r;
1103
- s->offset += r;
1104
if (buf >= end_of_vec && got < size) {
1105
i++;
1106
buf = i->iov_base;
1107
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1108
1109
trace_ssh_write(offset, size);
1110
1111
- ssh_seek(s, offset, SSH_SEEK_WRITE);
1112
+ trace_ssh_seek(offset);
1113
+ sftp_seek64(s->sftp_handle, offset);
1114
1115
/* This keeps track of the current iovec element ('i'), where we
1116
* will read from next ('buf'), and the end of the current iovec
1117
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1118
end_of_vec = i->iov_base + i->iov_len;
1119
1120
for (written = 0; written < size; ) {
1121
+ size_t request_write_size;
1122
again:
1123
- trace_ssh_write_buf(buf, end_of_vec - buf);
1124
- r = libssh2_sftp_write(s->sftp_handle, buf, end_of_vec - buf);
1125
- trace_ssh_write_return(r);
1126
+ /*
1127
+ * Avoid too large data packets, as libssh currently does not
1128
+ * handle multiple requests on its own.
1129
+ */
1130
+ request_write_size = MIN(end_of_vec - buf, 131072);
1131
+ trace_ssh_write_buf(buf, end_of_vec - buf, request_write_size);
1132
+ r = sftp_write(s->sftp_handle, buf, request_write_size);
1133
+ trace_ssh_write_return(r, sftp_get_error(s->sftp));
1134
1135
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1136
+ if (r == SSH_AGAIN) {
1137
co_yield(s, bs);
1138
goto again;
234
}
1139
}
235
}
1140
if (r < 0) {
236
@@ -XXX,XX +XXX,XX @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
1141
sftp_error_trace(s, "write");
237
return -EINVAL;
1142
- s->offset = -1;
1143
return -EIO;
1144
}
1145
- /* The libssh2 API is very unclear about this. A comment in
1146
- * the code says "nothing was acked, and no EAGAIN was
1147
- * received!" which apparently means that no data got sent
1148
- * out, and the underlying channel didn't return any EAGAIN
1149
- * indication. I think this is a bug in either libssh2 or
1150
- * OpenSSH (server-side). In any case, forcing a seek (to
1151
- * discard libssh2 internal buffers), and then trying again
1152
- * works for me.
1153
- */
1154
- if (r == 0) {
1155
- ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
1156
- co_yield(s, bs);
1157
- goto again;
1158
- }
1159
1160
written += r;
1161
buf += r;
1162
- s->offset += r;
1163
if (buf >= end_of_vec && written < size) {
1164
i++;
1165
buf = i->iov_base;
1166
end_of_vec = i->iov_base + i->iov_len;
1167
}
1168
1169
- if (offset + written > s->attrs.filesize)
1170
- s->attrs.filesize = offset + written;
1171
+ if (offset + written > s->attrs->size) {
1172
+ s->attrs->size = offset + written;
1173
+ }
1174
}
1175
1176
return 0;
1177
@@ -XXX,XX +XXX,XX @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
1178
}
238
}
1179
}
239
1180
240
-static int coroutine_fn vu_block_flush(VuBlockReq *req)
1181
-#ifdef HAS_LIBSSH2_SFTP_FSYNC
241
+static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
1182
+#ifdef HAVE_LIBSSH_0_8
1183
1184
static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
242
{
1185
{
243
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
1186
int r;
244
- BlockBackend *backend = vdev_blk->backend;
1187
245
- return blk_co_flush(backend);
1188
trace_ssh_flush();
246
-}
1189
+
247
-
1190
+ if (!sftp_extension_supported(s->sftp, "fsync@openssh.com", "1")) {
248
-static void coroutine_fn vu_block_virtio_process_req(void *opaque)
1191
+ unsafe_flush_warning(s, "OpenSSH >= 6.3");
249
-{
1192
+ return 0;
250
- VuBlockReq *req = opaque;
1193
+ }
251
+ VuBlkReq *req = opaque;
1194
again:
252
VuServer *server = req->server;
1195
- r = libssh2_sftp_fsync(s->sftp_handle);
253
VuVirtqElement *elem = &req->elem;
1196
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
254
uint32_t type;
1197
+ r = sftp_fsync(s->sftp_handle);
255
1198
+ if (r == SSH_AGAIN) {
256
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
1199
co_yield(s, bs);
257
- BlockBackend *backend = vdev_blk->backend;
1200
goto again;
258
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
1201
}
259
+ BlockBackend *blk = vexp->export.blk;
1202
- if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
260
1203
- libssh2_sftp_last_error(s->sftp) == LIBSSH2_FX_OP_UNSUPPORTED) {
261
struct iovec *in_iov = elem->in_sg;
1204
- unsafe_flush_warning(s, "OpenSSH >= 6.3");
262
struct iovec *out_iov = elem->out_sg;
1205
- return 0;
263
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
1206
- }
264
bool is_write = type & VIRTIO_BLK_T_OUT;
1207
if (r < 0) {
265
req->sector_num = le64_to_cpu(req->out.sector);
1208
sftp_error_trace(s, "fsync");
266
1209
return -EIO;
267
- int64_t offset = req->sector_num * vdev_blk->blk_size;
1210
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
268
+ if (is_write && !vexp->writable) {
1211
return ret;
269
+ req->in->status = VIRTIO_BLK_S_IOERR;
270
+ break;
271
+ }
272
+
273
+ int64_t offset = req->sector_num * vexp->blk_size;
274
QEMUIOVector qiov;
275
if (is_write) {
276
qemu_iovec_init_external(&qiov, out_iov, out_num);
277
- ret = blk_co_pwritev(backend, offset, qiov.size,
278
- &qiov, 0);
279
+ ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
280
} else {
281
qemu_iovec_init_external(&qiov, in_iov, in_num);
282
- ret = blk_co_preadv(backend, offset, qiov.size,
283
- &qiov, 0);
284
+ ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
285
}
286
if (ret >= 0) {
287
req->in->status = VIRTIO_BLK_S_OK;
288
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
289
break;
290
}
291
case VIRTIO_BLK_T_FLUSH:
292
- if (vu_block_flush(req) == 0) {
293
+ if (blk_co_flush(blk) == 0) {
294
req->in->status = VIRTIO_BLK_S_OK;
295
} else {
296
req->in->status = VIRTIO_BLK_S_IOERR;
297
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
298
case VIRTIO_BLK_T_DISCARD:
299
case VIRTIO_BLK_T_WRITE_ZEROES: {
300
int rc;
301
- rc = vu_block_discard_write_zeroes(req, &elem->out_sg[1],
302
- out_num, type);
303
+
304
+ if (!vexp->writable) {
305
+ req->in->status = VIRTIO_BLK_S_IOERR;
306
+ break;
307
+ }
308
+
309
+ rc = vu_blk_discard_write_zeroes(blk, &elem->out_sg[1], out_num, type);
310
if (rc == 0) {
311
req->in->status = VIRTIO_BLK_S_OK;
312
} else {
313
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
314
break;
315
}
316
317
- vu_block_req_complete(req);
318
+ vu_blk_req_complete(req);
319
return;
320
321
err:
322
- free(elem);
323
+ free(req);
324
}
1212
}
325
1213
326
-static void vu_block_process_vq(VuDev *vu_dev, int idx)
1214
-#else /* !HAS_LIBSSH2_SFTP_FSYNC */
327
+static void vu_blk_process_vq(VuDev *vu_dev, int idx)
1215
+#else /* !HAVE_LIBSSH_0_8 */
1216
1217
static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
328
{
1218
{
329
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
1219
BDRVSSHState *s = bs->opaque;
330
VuVirtq *vq = vu_get_queue(vu_dev, idx);
1220
331
1221
- unsafe_flush_warning(s, "libssh2 >= 1.4.4");
332
while (1) {
1222
+ unsafe_flush_warning(s, "libssh >= 0.8.0");
333
- VuBlockReq *req;
334
+ VuBlkReq *req;
335
336
- req = vu_queue_pop(vu_dev, vq, sizeof(VuBlockReq));
337
+ req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq));
338
if (!req) {
339
break;
340
}
341
@@ -XXX,XX +XXX,XX @@ static void vu_block_process_vq(VuDev *vu_dev, int idx)
342
req->vq = vq;
343
344
Coroutine *co =
345
- qemu_coroutine_create(vu_block_virtio_process_req, req);
346
+ qemu_coroutine_create(vu_blk_virtio_process_req, req);
347
qemu_coroutine_enter(co);
348
}
349
}
350
351
-static void vu_block_queue_set_started(VuDev *vu_dev, int idx, bool started)
352
+static void vu_blk_queue_set_started(VuDev *vu_dev, int idx, bool started)
353
{
354
VuVirtq *vq;
355
356
assert(vu_dev);
357
358
vq = vu_get_queue(vu_dev, idx);
359
- vu_set_queue_handler(vu_dev, vq, started ? vu_block_process_vq : NULL);
360
+ vu_set_queue_handler(vu_dev, vq, started ? vu_blk_process_vq : NULL);
361
}
362
363
-static uint64_t vu_block_get_features(VuDev *dev)
364
+static uint64_t vu_blk_get_features(VuDev *dev)
365
{
366
uint64_t features;
367
VuServer *server = container_of(dev, VuServer, vu_dev);
368
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
369
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
370
features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
371
1ull << VIRTIO_BLK_F_SEG_MAX |
372
1ull << VIRTIO_BLK_F_TOPOLOGY |
373
@@ -XXX,XX +XXX,XX @@ static uint64_t vu_block_get_features(VuDev *dev)
374
1ull << VIRTIO_RING_F_EVENT_IDX |
375
1ull << VHOST_USER_F_PROTOCOL_FEATURES;
376
377
- if (!vdev_blk->writable) {
378
+ if (!vexp->writable) {
379
features |= 1ull << VIRTIO_BLK_F_RO;
380
}
381
382
return features;
383
}
384
385
-static uint64_t vu_block_get_protocol_features(VuDev *dev)
386
+static uint64_t vu_blk_get_protocol_features(VuDev *dev)
387
{
388
return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
389
1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
390
}
391
392
static int
393
-vu_block_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
394
+vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
395
{
396
+ /* TODO blkcfg must be little-endian for VIRTIO 1.0 */
397
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
398
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
399
- memcpy(config, &vdev_blk->blkcfg, len);
400
-
401
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
402
+ memcpy(config, &vexp->blkcfg, len);
403
return 0;
1223
return 0;
404
}
1224
}
405
1225
406
static int
1226
-#endif /* !HAS_LIBSSH2_SFTP_FSYNC */
407
-vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
1227
+#endif /* !HAVE_LIBSSH_0_8 */
408
+vu_blk_set_config(VuDev *vu_dev, const uint8_t *data,
1228
409
uint32_t offset, uint32_t size, uint32_t flags)
1229
static int64_t ssh_getlength(BlockDriverState *bs)
410
{
1230
{
411
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
1231
BDRVSSHState *s = bs->opaque;
412
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
1232
int64_t length;
413
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
1233
414
uint8_t wce;
1234
- /* Note we cannot make a libssh2 call here. */
415
1235
- length = (int64_t) s->attrs.filesize;
416
/* don't support live migration */
1236
+ /* Note we cannot make a libssh call here. */
417
@@ -XXX,XX +XXX,XX @@ vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
1237
+ length = (int64_t) s->attrs->size;
418
}
1238
trace_ssh_getlength(length);
419
1239
420
wce = *data;
1240
return length;
421
- vdev_blk->blkcfg.wce = wce;
1241
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
422
- blk_set_enable_write_cache(vdev_blk->backend, wce);
1242
return -ENOTSUP;
423
+ vexp->blkcfg.wce = wce;
1243
}
424
+ blk_set_enable_write_cache(vexp->export.blk, wce);
1244
425
return 0;
1245
- if (offset < s->attrs.filesize) {
1246
+ if (offset < s->attrs->size) {
1247
error_setg(errp, "ssh driver does not support shrinking files");
1248
return -ENOTSUP;
1249
}
1250
1251
- if (offset == s->attrs.filesize) {
1252
+ if (offset == s->attrs->size) {
1253
return 0;
1254
}
1255
1256
@@ -XXX,XX +XXX,XX @@ static void bdrv_ssh_init(void)
1257
{
1258
int r;
1259
1260
- r = libssh2_init(0);
1261
+ r = ssh_init();
1262
if (r != 0) {
1263
- fprintf(stderr, "libssh2 initialization failed, %d\n", r);
1264
+ fprintf(stderr, "libssh initialization failed, %d\n", r);
1265
exit(EXIT_FAILURE);
1266
}
1267
1268
+#if TRACE_LIBSSH != 0
1269
+ ssh_set_log_level(TRACE_LIBSSH);
1270
+#endif
1271
+
1272
bdrv_register(&bdrv_ssh);
426
}
1273
}
427
1274
428
@@ -XXX,XX +XXX,XX @@ vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
1275
diff --git a/.travis.yml b/.travis.yml
429
* of vu_process_message.
1276
index XXXXXXX..XXXXXXX 100644
430
*
1277
--- a/.travis.yml
431
*/
1278
+++ b/.travis.yml
432
-static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
1279
@@ -XXX,XX +XXX,XX @@ addons:
433
+static int vu_blk_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
1280
- libseccomp-dev
434
{
1281
- libspice-protocol-dev
435
if (vmsg->request == VHOST_USER_NONE) {
1282
- libspice-server-dev
436
dev->panic(dev, "disconnect");
1283
- - libssh2-1-dev
437
@@ -XXX,XX +XXX,XX @@ static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
1284
+ - libssh-dev
438
return false;
1285
- liburcu-dev
439
}
1286
- libusb-1.0-0-dev
440
1287
- libvte-2.91-dev
441
-static const VuDevIface vu_block_iface = {
1288
@@ -XXX,XX +XXX,XX @@ matrix:
442
- .get_features = vu_block_get_features,
1289
- libseccomp-dev
443
- .queue_set_started = vu_block_queue_set_started,
1290
- libspice-protocol-dev
444
- .get_protocol_features = vu_block_get_protocol_features,
1291
- libspice-server-dev
445
- .get_config = vu_block_get_config,
1292
- - libssh2-1-dev
446
- .set_config = vu_block_set_config,
1293
+ - libssh-dev
447
- .process_msg = vu_block_process_msg,
1294
- liburcu-dev
448
+static const VuDevIface vu_blk_iface = {
1295
- libusb-1.0-0-dev
449
+ .get_features = vu_blk_get_features,
1296
- libvte-2.91-dev
450
+ .queue_set_started = vu_blk_queue_set_started,
1297
diff --git a/block/trace-events b/block/trace-events
451
+ .get_protocol_features = vu_blk_get_protocol_features,
1298
index XXXXXXX..XXXXXXX 100644
452
+ .get_config = vu_blk_get_config,
1299
--- a/block/trace-events
453
+ .set_config = vu_blk_set_config,
1300
+++ b/block/trace-events
454
+ .process_msg = vu_blk_process_msg,
1301
@@ -XXX,XX +XXX,XX @@ nbd_client_connect_success(const char *export_name) "export '%s'"
455
};
1302
# ssh.c
456
1303
ssh_restart_coroutine(void *co) "co=%p"
457
static void blk_aio_attached(AioContext *ctx, void *opaque)
1304
ssh_flush(void) "fsync"
458
{
1305
-ssh_check_host_key_knownhosts(const char *key) "host key OK: %s"
459
- VuBlockDev *vub_dev = opaque;
1306
+ssh_check_host_key_knownhosts(void) "host key OK"
460
- vhost_user_server_attach_aio_context(&vub_dev->vu_server, ctx);
1307
ssh_connect_to_ssh(char *path, int flags, int mode) "opening file %s flags=0x%x creat_mode=0%o"
461
+ VuBlkExport *vexp = opaque;
1308
ssh_co_yield(int sock, void *rd_handler, void *wr_handler) "s->sock=%d rd_handler=%p wr_handler=%p"
462
+ vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
1309
ssh_co_yield_back(int sock) "s->sock=%d - back"
463
}
1310
ssh_getlength(int64_t length) "length=%" PRIi64
464
1311
ssh_co_create_opts(uint64_t size) "total_size=%" PRIu64
465
static void blk_aio_detach(void *opaque)
1312
ssh_read(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
466
{
1313
-ssh_read_buf(void *buf, size_t size) "sftp_read buf=%p size=%zu"
467
- VuBlockDev *vub_dev = opaque;
1314
-ssh_read_return(ssize_t ret) "sftp_read returned %zd"
468
- vhost_user_server_detach_aio_context(&vub_dev->vu_server);
1315
+ssh_read_buf(void *buf, size_t size, size_t actual_size) "sftp_read buf=%p size=%zu (actual size=%zu)"
469
+ VuBlkExport *vexp = opaque;
1316
+ssh_read_return(ssize_t ret, int sftp_err) "sftp_read returned %zd (sftp error=%d)"
470
+ vhost_user_server_detach_aio_context(&vexp->vu_server);
1317
ssh_write(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
471
}
1318
-ssh_write_buf(void *buf, size_t size) "sftp_write buf=%p size=%zu"
472
1319
-ssh_write_return(ssize_t ret) "sftp_write returned %zd"
473
static void
1320
+ssh_write_buf(void *buf, size_t size, size_t actual_size) "sftp_write buf=%p size=%zu (actual size=%zu)"
474
-vu_block_initialize_config(BlockDriverState *bs,
1321
+ssh_write_return(ssize_t ret, int sftp_err) "sftp_write returned %zd (sftp error=%d)"
475
+vu_blk_initialize_config(BlockDriverState *bs,
1322
ssh_seek(int64_t offset) "seeking to offset=%" PRIi64
476
struct virtio_blk_config *config, uint32_t blk_size)
1323
+ssh_auth_methods(int methods) "auth methods=0x%x"
477
{
1324
+ssh_server_status(int status) "server status=%d"
478
config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
1325
479
@@ -XXX,XX +XXX,XX @@ vu_block_initialize_config(BlockDriverState *bs,
1326
# curl.c
480
config->max_write_zeroes_seg = 1;
1327
curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld"
481
}
1328
@@ -XXX,XX +XXX,XX @@ sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s"
482
1329
sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32
483
-static VuBlockDev *vu_block_init(VuBlockDev *vu_block_device, Error **errp)
1330
484
+static void vu_blk_exp_request_shutdown(BlockExport *exp)
1331
# ssh.c
485
{
1332
-sftp_error(const char *op, const char *ssh_err, int ssh_err_code, unsigned long sftp_err_code) "%s failed: %s (libssh2 error code: %d, sftp error code: %lu)"
486
+ VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
1333
+sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
487
1334
diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi
488
- BlockBackend *blk;
1335
index XXXXXXX..XXXXXXX 100644
489
- Error *local_error = NULL;
1336
--- a/docs/qemu-block-drivers.texi
490
- const char *node_name = vu_block_device->node_name;
1337
+++ b/docs/qemu-block-drivers.texi
491
- bool writable = vu_block_device->writable;
1338
@@ -XXX,XX +XXX,XX @@ print a warning when @code{fsync} is not supported:
492
- uint64_t perm = BLK_PERM_CONSISTENT_READ;
1339
493
- int ret;
1340
warning: ssh server @code{ssh.example.com:22} does not support fsync
1341
1342
-With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is
1343
+With sufficiently new versions of libssh and OpenSSH, @code{fsync} is
1344
supported.
1345
1346
@node disk_images_nvme
1347
diff --git a/tests/docker/dockerfiles/debian-win32-cross.docker b/tests/docker/dockerfiles/debian-win32-cross.docker
1348
index XXXXXXX..XXXXXXX 100644
1349
--- a/tests/docker/dockerfiles/debian-win32-cross.docker
1350
+++ b/tests/docker/dockerfiles/debian-win32-cross.docker
1351
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1352
mxe-$TARGET-w64-mingw32.shared-curl \
1353
mxe-$TARGET-w64-mingw32.shared-glib \
1354
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1355
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1356
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1357
mxe-$TARGET-w64-mingw32.shared-lzo \
1358
mxe-$TARGET-w64-mingw32.shared-nettle \
1359
diff --git a/tests/docker/dockerfiles/debian-win64-cross.docker b/tests/docker/dockerfiles/debian-win64-cross.docker
1360
index XXXXXXX..XXXXXXX 100644
1361
--- a/tests/docker/dockerfiles/debian-win64-cross.docker
1362
+++ b/tests/docker/dockerfiles/debian-win64-cross.docker
1363
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1364
mxe-$TARGET-w64-mingw32.shared-curl \
1365
mxe-$TARGET-w64-mingw32.shared-glib \
1366
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1367
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1368
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1369
mxe-$TARGET-w64-mingw32.shared-lzo \
1370
mxe-$TARGET-w64-mingw32.shared-nettle \
1371
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
1372
index XXXXXXX..XXXXXXX 100644
1373
--- a/tests/docker/dockerfiles/fedora.docker
1374
+++ b/tests/docker/dockerfiles/fedora.docker
1375
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1376
libpng-devel \
1377
librbd-devel \
1378
libseccomp-devel \
1379
- libssh2-devel \
1380
+ libssh-devel \
1381
libubsan \
1382
libusbx-devel \
1383
libxml2-devel \
1384
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1385
mingw32-gtk3 \
1386
mingw32-libjpeg-turbo \
1387
mingw32-libpng \
1388
- mingw32-libssh2 \
1389
mingw32-libtasn1 \
1390
mingw32-nettle \
1391
mingw32-pixman \
1392
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1393
mingw64-gtk3 \
1394
mingw64-libjpeg-turbo \
1395
mingw64-libpng \
1396
- mingw64-libssh2 \
1397
mingw64-libtasn1 \
1398
mingw64-nettle \
1399
mingw64-pixman \
1400
diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker
1401
index XXXXXXX..XXXXXXX 100644
1402
--- a/tests/docker/dockerfiles/ubuntu.docker
1403
+++ b/tests/docker/dockerfiles/ubuntu.docker
1404
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1405
libsnappy-dev \
1406
libspice-protocol-dev \
1407
libspice-server-dev \
1408
- libssh2-1-dev \
1409
+ libssh-dev \
1410
libusb-1.0-0-dev \
1411
libusbredirhost-dev \
1412
libvdeplug-dev \
1413
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
1414
index XXXXXXX..XXXXXXX 100644
1415
--- a/tests/docker/dockerfiles/ubuntu1804.docker
1416
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
1417
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1418
libsnappy-dev \
1419
libspice-protocol-dev \
1420
libspice-server-dev \
1421
- libssh2-1-dev \
1422
+ libssh-dev \
1423
libusb-1.0-0-dev \
1424
libusbredirhost-dev \
1425
libvdeplug-dev \
1426
diff --git a/tests/qemu-iotests/207 b/tests/qemu-iotests/207
1427
index XXXXXXX..XXXXXXX 100755
1428
--- a/tests/qemu-iotests/207
1429
+++ b/tests/qemu-iotests/207
1430
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1431
1432
iotests.img_info_log(remote_path)
1433
1434
- md5_key = subprocess.check_output(
1435
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1436
- 'cut -d" " -f3 | base64 -d | md5sum -b | cut -d" " -f1',
1437
- shell=True).rstrip().decode('ascii')
1438
+ keys = subprocess.check_output(
1439
+ 'ssh-keyscan 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1440
+ 'cut -d" " -f3',
1441
+ shell=True).rstrip().decode('ascii').split('\n')
1442
+
1443
+ # Mappings of base64 representations to digests
1444
+ md5_keys = {}
1445
+ sha1_keys = {}
1446
+
1447
+ for key in keys:
1448
+ md5_keys[key] = subprocess.check_output(
1449
+ 'echo %s | base64 -d | md5sum -b | cut -d" " -f1' % key,
1450
+ shell=True).rstrip().decode('ascii')
1451
+
1452
+ sha1_keys[key] = subprocess.check_output(
1453
+ 'echo %s | base64 -d | sha1sum -b | cut -d" " -f1' % key,
1454
+ shell=True).rstrip().decode('ascii')
1455
1456
vm.launch()
1457
+
1458
+ # Find correct key first
1459
+ matching_key = None
1460
+ for key in keys:
1461
+ result = vm.qmp('blockdev-add',
1462
+ driver='ssh', node_name='node0', path=disk_path,
1463
+ server={
1464
+ 'host': '127.0.0.1',
1465
+ 'port': '22',
1466
+ }, host_key_check={
1467
+ 'mode': 'hash',
1468
+ 'type': 'md5',
1469
+ 'hash': md5_keys[key],
1470
+ })
1471
+
1472
+ if 'error' not in result:
1473
+ vm.qmp('blockdev-del', node_name='node0')
1474
+ matching_key = key
1475
+ break
1476
+
1477
+ if matching_key is None:
1478
+ vm.shutdown()
1479
+ iotests.notrun('Did not find a key that fits 127.0.0.1')
1480
+
1481
blockdev_create(vm, { 'driver': 'ssh',
1482
'location': {
1483
'path': disk_path,
1484
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1485
'host-key-check': {
1486
'mode': 'hash',
1487
'type': 'md5',
1488
- 'hash': md5_key,
1489
+ 'hash': md5_keys[matching_key],
1490
}
1491
},
1492
'size': 8388608 })
1493
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1494
1495
iotests.img_info_log(remote_path)
1496
1497
- sha1_key = subprocess.check_output(
1498
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1499
- 'cut -d" " -f3 | base64 -d | sha1sum -b | cut -d" " -f1',
1500
- shell=True).rstrip().decode('ascii')
494
-
1501
-
495
- AioContext *ctx;
1502
vm.launch()
496
-
1503
blockdev_create(vm, { 'driver': 'ssh',
497
- BlockDriverState *bs = bdrv_lookup_bs(node_name, node_name, &local_error);
1504
'location': {
498
-
1505
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
499
- if (!bs) {
1506
'host-key-check': {
500
- error_propagate(errp, local_error);
1507
'mode': 'hash',
501
- return NULL;
1508
'type': 'sha1',
502
- }
1509
- 'hash': sha1_key,
503
-
1510
+ 'hash': sha1_keys[matching_key],
504
- if (bdrv_is_read_only(bs)) {
1511
}
505
- writable = false;
1512
},
506
- }
1513
'size': 4194304 })
507
-
1514
diff --git a/tests/qemu-iotests/207.out b/tests/qemu-iotests/207.out
508
- if (writable) {
509
- perm |= BLK_PERM_WRITE;
510
- }
511
-
512
- ctx = bdrv_get_aio_context(bs);
513
- aio_context_acquire(ctx);
514
- bdrv_invalidate_cache(bs, NULL);
515
- aio_context_release(ctx);
516
-
517
- /*
518
- * Don't allow resize while the vhost user server is running,
519
- * otherwise we don't care what happens with the node.
520
- */
521
- blk = blk_new(bdrv_get_aio_context(bs), perm,
522
- BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
523
- BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
524
- ret = blk_insert_bs(blk, bs, errp);
525
-
526
- if (ret < 0) {
527
- goto fail;
528
- }
529
-
530
- blk_set_enable_write_cache(blk, false);
531
-
532
- blk_set_allow_aio_context_change(blk, true);
533
-
534
- vu_block_device->blkcfg.wce = 0;
535
- vu_block_device->backend = blk;
536
- if (!vu_block_device->blk_size) {
537
- vu_block_device->blk_size = BDRV_SECTOR_SIZE;
538
- }
539
- vu_block_device->blkcfg.blk_size = vu_block_device->blk_size;
540
- blk_set_guest_block_size(blk, vu_block_device->blk_size);
541
- vu_block_initialize_config(bs, &vu_block_device->blkcfg,
542
- vu_block_device->blk_size);
543
- return vu_block_device;
544
-
545
-fail:
546
- blk_unref(blk);
547
- return NULL;
548
-}
549
-
550
-static void vu_block_deinit(VuBlockDev *vu_block_device)
551
-{
552
- if (vu_block_device->backend) {
553
- blk_remove_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
554
- blk_aio_detach, vu_block_device);
555
- }
556
-
557
- blk_unref(vu_block_device->backend);
558
-}
559
-
560
-static void vhost_user_blk_server_stop(VuBlockDev *vu_block_device)
561
-{
562
- vhost_user_server_stop(&vu_block_device->vu_server);
563
- vu_block_deinit(vu_block_device);
564
-}
565
-
566
-static void vhost_user_blk_server_start(VuBlockDev *vu_block_device,
567
- Error **errp)
568
-{
569
- AioContext *ctx;
570
- SocketAddress *addr = vu_block_device->addr;
571
-
572
- if (!vu_block_init(vu_block_device, errp)) {
573
- return;
574
- }
575
-
576
- ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
577
-
578
- if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx,
579
- VHOST_USER_BLK_MAX_QUEUES, &vu_block_iface,
580
- errp)) {
581
- goto error;
582
- }
583
-
584
- blk_add_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
585
- blk_aio_detach, vu_block_device);
586
- vu_block_device->running = true;
587
- return;
588
-
589
- error:
590
- vu_block_deinit(vu_block_device);
591
-}
592
-
593
-static bool vu_prop_modifiable(VuBlockDev *vus, Error **errp)
594
-{
595
- if (vus->running) {
596
- error_setg(errp, "The property can't be modified "
597
- "while the server is running");
598
- return false;
599
- }
600
- return true;
601
-}
602
-
603
-static void vu_set_node_name(Object *obj, const char *value, Error **errp)
604
-{
605
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
606
-
607
- if (!vu_prop_modifiable(vus, errp)) {
608
- return;
609
- }
610
-
611
- if (vus->node_name) {
612
- g_free(vus->node_name);
613
- }
614
-
615
- vus->node_name = g_strdup(value);
616
-}
617
-
618
-static char *vu_get_node_name(Object *obj, Error **errp)
619
-{
620
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
621
- return g_strdup(vus->node_name);
622
-}
623
-
624
-static void free_socket_addr(SocketAddress *addr)
625
-{
626
- g_free(addr->u.q_unix.path);
627
- g_free(addr);
628
-}
629
-
630
-static void vu_set_unix_socket(Object *obj, const char *value,
631
- Error **errp)
632
-{
633
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
634
-
635
- if (!vu_prop_modifiable(vus, errp)) {
636
- return;
637
- }
638
-
639
- if (vus->addr) {
640
- free_socket_addr(vus->addr);
641
- }
642
-
643
- SocketAddress *addr = g_new0(SocketAddress, 1);
644
- addr->type = SOCKET_ADDRESS_TYPE_UNIX;
645
- addr->u.q_unix.path = g_strdup(value);
646
- vus->addr = addr;
647
+ vhost_user_server_stop(&vexp->vu_server);
648
}
649
650
-static char *vu_get_unix_socket(Object *obj, Error **errp)
651
+static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
652
+ Error **errp)
653
{
654
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
655
- return g_strdup(vus->addr->u.q_unix.path);
656
-}
657
-
658
-static bool vu_get_block_writable(Object *obj, Error **errp)
659
-{
660
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
661
- return vus->writable;
662
-}
663
-
664
-static void vu_set_block_writable(Object *obj, bool value, Error **errp)
665
-{
666
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
667
-
668
- if (!vu_prop_modifiable(vus, errp)) {
669
- return;
670
- }
671
-
672
- vus->writable = value;
673
-}
674
-
675
-static void vu_get_blk_size(Object *obj, Visitor *v, const char *name,
676
- void *opaque, Error **errp)
677
-{
678
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
679
- uint32_t value = vus->blk_size;
680
-
681
- visit_type_uint32(v, name, &value, errp);
682
-}
683
-
684
-static void vu_set_blk_size(Object *obj, Visitor *v, const char *name,
685
- void *opaque, Error **errp)
686
-{
687
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
688
-
689
+ VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
690
+ BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk;
691
Error *local_err = NULL;
692
- uint32_t value;
693
+ uint64_t logical_block_size;
694
695
- if (!vu_prop_modifiable(vus, errp)) {
696
- return;
697
- }
698
+ vexp->writable = opts->writable;
699
+ vexp->blkcfg.wce = 0;
700
701
- visit_type_uint32(v, name, &value, &local_err);
702
- if (local_err) {
703
- goto out;
704
+ if (vu_opts->has_logical_block_size) {
705
+ logical_block_size = vu_opts->logical_block_size;
706
+ } else {
707
+ logical_block_size = BDRV_SECTOR_SIZE;
708
}
709
-
710
- check_block_size(object_get_typename(obj), name, value, &local_err);
711
+ check_block_size(exp->id, "logical-block-size", logical_block_size,
712
+ &local_err);
713
if (local_err) {
714
- goto out;
715
+ error_propagate(errp, local_err);
716
+ return -EINVAL;
717
+ }
718
+ vexp->blk_size = logical_block_size;
719
+ blk_set_guest_block_size(exp->blk, logical_block_size);
720
+ vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
721
+ logical_block_size);
722
+
723
+ blk_set_allow_aio_context_change(exp->blk, true);
724
+ blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
725
+ vexp);
726
+
727
+ if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
728
+ VHOST_USER_BLK_MAX_QUEUES, &vu_blk_iface,
729
+ errp)) {
730
+ blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
731
+ blk_aio_detach, vexp);
732
+ return -EADDRNOTAVAIL;
733
}
734
735
- vus->blk_size = value;
736
-
737
-out:
738
- error_propagate(errp, local_err);
739
-}
740
-
741
-static void vhost_user_blk_server_instance_finalize(Object *obj)
742
-{
743
- VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
744
-
745
- vhost_user_blk_server_stop(vub);
746
-
747
- /*
748
- * Unlike object_property_add_str, object_class_property_add_str
749
- * doesn't have a release method. Thus manual memory freeing is
750
- * needed.
751
- */
752
- free_socket_addr(vub->addr);
753
- g_free(vub->node_name);
754
-}
755
-
756
-static void vhost_user_blk_server_complete(UserCreatable *obj, Error **errp)
757
-{
758
- VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
759
-
760
- vhost_user_blk_server_start(vub, errp);
761
+ return 0;
762
}
763
764
-static void vhost_user_blk_server_class_init(ObjectClass *klass,
765
- void *class_data)
766
+static void vu_blk_exp_delete(BlockExport *exp)
767
{
768
- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
769
- ucc->complete = vhost_user_blk_server_complete;
770
-
771
- object_class_property_add_bool(klass, "writable",
772
- vu_get_block_writable,
773
- vu_set_block_writable);
774
-
775
- object_class_property_add_str(klass, "node-name",
776
- vu_get_node_name,
777
- vu_set_node_name);
778
-
779
- object_class_property_add_str(klass, "unix-socket",
780
- vu_get_unix_socket,
781
- vu_set_unix_socket);
782
+ VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
783
784
- object_class_property_add(klass, "logical-block-size", "uint32",
785
- vu_get_blk_size, vu_set_blk_size,
786
- NULL, NULL);
787
+ blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
788
+ vexp);
789
}
790
791
-static const TypeInfo vhost_user_blk_server_info = {
792
- .name = TYPE_VHOST_USER_BLK_SERVER,
793
- .parent = TYPE_OBJECT,
794
- .instance_size = sizeof(VuBlockDev),
795
- .instance_finalize = vhost_user_blk_server_instance_finalize,
796
- .class_init = vhost_user_blk_server_class_init,
797
- .interfaces = (InterfaceInfo[]) {
798
- {TYPE_USER_CREATABLE},
799
- {}
800
- },
801
+const BlockExportDriver blk_exp_vhost_user_blk = {
802
+ .type = BLOCK_EXPORT_TYPE_VHOST_USER_BLK,
803
+ .instance_size = sizeof(VuBlkExport),
804
+ .create = vu_blk_exp_create,
805
+ .delete = vu_blk_exp_delete,
806
+ .request_shutdown = vu_blk_exp_request_shutdown,
807
};
808
-
809
-static void vhost_user_blk_server_register_types(void)
810
-{
811
- type_register_static(&vhost_user_blk_server_info);
812
-}
813
-
814
-type_init(vhost_user_blk_server_register_types)
815
diff --git a/tests/qtest/vhost-user-blk-test.c b/tests/qtest/vhost-user-blk-test.c
816
index XXXXXXX..XXXXXXX 100644
1515
index XXXXXXX..XXXXXXX 100644
817
--- a/tests/qtest/vhost-user-blk-test.c
1516
--- a/tests/qemu-iotests/207.out
818
+++ b/tests/qtest/vhost-user-blk-test.c
1517
+++ b/tests/qemu-iotests/207.out
819
@@ -XXX,XX +XXX,XX @@ static char *start_vhost_user_blk(GString *cmd_line, int vus_instances)
1518
@@ -XXX,XX +XXX,XX @@ virtual size: 4 MiB (4194304 bytes)
820
img_path = drive_create();
1519
821
g_string_append_printf(storage_daemon_command,
1520
{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "none"}, "path": "/this/is/not/an/existing/path", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}}
822
"--blockdev driver=file,node-name=disk%d,filename=%s "
1521
{"return": {}}
823
- "--object vhost-user-blk-server,id=disk%d,unix-socket=%s,"
1522
-Job failed: failed to open remote file '/this/is/not/an/existing/path': Failed opening remote file (libssh2 error code: -31)
824
+ "--export type=vhost-user-blk,id=disk%d,addr.type=unix,addr.path=%s,"
1523
+Job failed: failed to open remote file '/this/is/not/an/existing/path': SFTP server: No such file (libssh error code: 1, sftp error code: 2)
825
"node-name=disk%i,writable=on ",
1524
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
826
i, img_path, i, sock_path, i);
1525
{"return": {}}
827
1526
828
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
829
index XXXXXXX..XXXXXXX 100644
830
--- a/util/vhost-user-server.c
831
+++ b/util/vhost-user-server.c
832
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
833
Error **errp)
834
{
835
QEMUBH *bh;
836
- QIONetListener *listener = qio_net_listener_new();
837
+ QIONetListener *listener;
838
+
839
+ if (socket_addr->type != SOCKET_ADDRESS_TYPE_UNIX &&
840
+ socket_addr->type != SOCKET_ADDRESS_TYPE_FD) {
841
+ error_setg(errp, "Only socket address types 'unix' and 'fd' are supported");
842
+ return false;
843
+ }
844
+
845
+ listener = qio_net_listener_new();
846
if (qio_net_listener_open_sync(listener, socket_addr, 1,
847
errp) < 0) {
848
object_unref(OBJECT(listener));
849
diff --git a/block/export/meson.build b/block/export/meson.build
850
index XXXXXXX..XXXXXXX 100644
851
--- a/block/export/meson.build
852
+++ b/block/export/meson.build
853
@@ -1 +1,2 @@
854
block_ss.add(files('export.c'))
855
+block_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-blk-server.c', '../../contrib/libvhost-user/libvhost-user.c'))
856
diff --git a/block/meson.build b/block/meson.build
857
index XXXXXXX..XXXXXXX 100644
858
--- a/block/meson.build
859
+++ b/block/meson.build
860
@@ -XXX,XX +XXX,XX @@ block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c')
861
block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit])
862
block_ss.add(when: 'CONFIG_LIBISCSI', if_true: files('iscsi-opts.c'))
863
block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
864
-block_ss.add(when: 'CONFIG_LINUX', if_true: files('export/vhost-user-blk-server.c', '../contrib/libvhost-user/libvhost-user.c'))
865
block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c'))
866
block_ss.add(when: 'CONFIG_SHEEPDOG', if_true: files('sheepdog.c'))
867
block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c'))
868
--
1527
--
869
2.26.2
1528
2.21.0
870
1529
1530
diff view generated by jsdifflib
1
We already have access to the value with the correct type (ioc and sioc
1
Tests should place their files into the test directory. This includes
2
are the same QIOChannel).
2
Unix sockets. 205 currently fails to do so, which prevents it from
3
being run concurrently.
3
4
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Signed-off-by: Max Reitz <mreitz@redhat.com>
5
Message-id: 20200924151549.913737-4-stefanha@redhat.com
6
Message-id: 20190618210238.9524-1-mreitz@redhat.com
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Signed-off-by: Max Reitz <mreitz@redhat.com>
7
---
9
---
8
util/vhost-user-server.c | 2 +-
10
tests/qemu-iotests/205 | 2 +-
9
1 file changed, 1 insertion(+), 1 deletion(-)
11
1 file changed, 1 insertion(+), 1 deletion(-)
10
12
11
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
13
diff --git a/tests/qemu-iotests/205 b/tests/qemu-iotests/205
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100755
13
--- a/util/vhost-user-server.c
15
--- a/tests/qemu-iotests/205
14
+++ b/util/vhost-user-server.c
16
+++ b/tests/qemu-iotests/205
15
@@ -XXX,XX +XXX,XX @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
17
@@ -XXX,XX +XXX,XX @@ import iotests
16
server->ioc = QIO_CHANNEL(sioc);
18
import time
17
object_ref(OBJECT(server->ioc));
19
from iotests import qemu_img_create, qemu_io, filter_qemu_io, QemuIoInteractive
18
qio_channel_attach_aio_context(server->ioc, server->ctx);
20
19
- qio_channel_set_blocking(QIO_CHANNEL(server->sioc), false, NULL);
21
-nbd_sock = 'nbd_sock'
20
+ qio_channel_set_blocking(server->ioc, false, NULL);
22
+nbd_sock = os.path.join(iotests.test_dir, 'nbd_sock')
21
vu_client_start(server);
23
nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock
22
}
24
disk = os.path.join(iotests.test_dir, 'disk')
23
25
24
--
26
--
25
2.26.2
27
2.21.0
26
28
29
diff view generated by jsdifflib
Deleted patch
1
Explicitly deleting watches is not necessary since libvhost-user calls
2
remove_watch() during vu_deinit(). Add an assertion to check this
3
though.
4
1
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200924151549.913737-5-stefanha@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
util/vhost-user-server.c | 19 ++++---------------
10
1 file changed, 4 insertions(+), 15 deletions(-)
11
12
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/util/vhost-user-server.c
15
+++ b/util/vhost-user-server.c
16
@@ -XXX,XX +XXX,XX @@ static void close_client(VuServer *server)
17
/* When this is set vu_client_trip will stop new processing vhost-user message */
18
server->sioc = NULL;
19
20
- VuFdWatch *vu_fd_watch, *next;
21
- QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
22
- aio_set_fd_handler(server->ioc->ctx, vu_fd_watch->fd, true, NULL,
23
- NULL, NULL, NULL);
24
- }
25
-
26
- while (!QTAILQ_EMPTY(&server->vu_fd_watches)) {
27
- QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
28
- if (!vu_fd_watch->processing) {
29
- QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
30
- g_free(vu_fd_watch);
31
- }
32
- }
33
- }
34
-
35
while (server->processing_msg) {
36
if (server->ioc->read_coroutine) {
37
server->ioc->read_coroutine = NULL;
38
@@ -XXX,XX +XXX,XX @@ static void close_client(VuServer *server)
39
}
40
41
vu_deinit(&server->vu_dev);
42
+
43
+ /* vu_deinit() should have called remove_watch() */
44
+ assert(QTAILQ_EMPTY(&server->vu_fd_watches));
45
+
46
object_unref(OBJECT(sioc));
47
object_unref(OBJECT(server->ioc));
48
}
49
--
50
2.26.2
51
diff view generated by jsdifflib
Deleted patch
1
Only one struct is needed per request. Drop req_data and the separate
2
VuBlockReq instance. Instead let vu_queue_pop() allocate everything at
3
once.
4
1
5
This fixes the req_data memory leak in vu_block_virtio_process_req().
6
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20200924151549.913737-6-stefanha@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
block/export/vhost-user-blk-server.c | 68 +++++++++-------------------
12
1 file changed, 21 insertions(+), 47 deletions(-)
13
14
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/export/vhost-user-blk-server.c
17
+++ b/block/export/vhost-user-blk-server.c
18
@@ -XXX,XX +XXX,XX @@ struct virtio_blk_inhdr {
19
};
20
21
typedef struct VuBlockReq {
22
- VuVirtqElement *elem;
23
+ VuVirtqElement elem;
24
int64_t sector_num;
25
size_t size;
26
struct virtio_blk_inhdr *in;
27
@@ -XXX,XX +XXX,XX @@ static void vu_block_req_complete(VuBlockReq *req)
28
VuDev *vu_dev = &req->server->vu_dev;
29
30
/* IO size with 1 extra status byte */
31
- vu_queue_push(vu_dev, req->vq, req->elem, req->size + 1);
32
+ vu_queue_push(vu_dev, req->vq, &req->elem, req->size + 1);
33
vu_queue_notify(vu_dev, req->vq);
34
35
- if (req->elem) {
36
- free(req->elem);
37
- }
38
-
39
- g_free(req);
40
+ free(req);
41
}
42
43
static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
44
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_flush(VuBlockReq *req)
45
blk_co_flush(backend);
46
}
47
48
-struct req_data {
49
- VuServer *server;
50
- VuVirtq *vq;
51
- VuVirtqElement *elem;
52
-};
53
-
54
static void coroutine_fn vu_block_virtio_process_req(void *opaque)
55
{
56
- struct req_data *data = opaque;
57
- VuServer *server = data->server;
58
- VuVirtq *vq = data->vq;
59
- VuVirtqElement *elem = data->elem;
60
+ VuBlockReq *req = opaque;
61
+ VuServer *server = req->server;
62
+ VuVirtqElement *elem = &req->elem;
63
uint32_t type;
64
- VuBlockReq *req;
65
66
VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
67
BlockBackend *backend = vdev_blk->backend;
68
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
69
struct iovec *out_iov = elem->out_sg;
70
unsigned in_num = elem->in_num;
71
unsigned out_num = elem->out_num;
72
+
73
/* refer to hw/block/virtio_blk.c */
74
if (elem->out_num < 1 || elem->in_num < 1) {
75
error_report("virtio-blk request missing headers");
76
- free(elem);
77
- return;
78
+ goto err;
79
}
80
81
- req = g_new0(VuBlockReq, 1);
82
- req->server = server;
83
- req->vq = vq;
84
- req->elem = elem;
85
-
86
if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
87
sizeof(req->out)) != sizeof(req->out))) {
88
error_report("virtio-blk request outhdr too short");
89
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
90
91
err:
92
free(elem);
93
- g_free(req);
94
- return;
95
}
96
97
static void vu_block_process_vq(VuDev *vu_dev, int idx)
98
{
99
- VuServer *server;
100
- VuVirtq *vq;
101
- struct req_data *req_data;
102
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
103
+ VuVirtq *vq = vu_get_queue(vu_dev, idx);
104
105
- server = container_of(vu_dev, VuServer, vu_dev);
106
- assert(server);
107
-
108
- vq = vu_get_queue(vu_dev, idx);
109
- assert(vq);
110
- VuVirtqElement *elem;
111
while (1) {
112
- elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) +
113
- sizeof(VuBlockReq));
114
- if (elem) {
115
- req_data = g_new0(struct req_data, 1);
116
- req_data->server = server;
117
- req_data->vq = vq;
118
- req_data->elem = elem;
119
- Coroutine *co = qemu_coroutine_create(vu_block_virtio_process_req,
120
- req_data);
121
- aio_co_enter(server->ioc->ctx, co);
122
- } else {
123
+ VuBlockReq *req;
124
+
125
+ req = vu_queue_pop(vu_dev, vq, sizeof(VuBlockReq));
126
+ if (!req) {
127
break;
128
}
129
+
130
+ req->server = server;
131
+ req->vq = vq;
132
+
133
+ Coroutine *co =
134
+ qemu_coroutine_create(vu_block_virtio_process_req, req);
135
+ qemu_coroutine_enter(co);
136
}
137
}
138
139
--
140
2.26.2
141
diff view generated by jsdifflib
Deleted patch
1
The device panic notifier callback is not used. Drop it.
2
1
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Message-id: 20200924151549.913737-7-stefanha@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
util/vhost-user-server.h | 3 ---
8
block/export/vhost-user-blk-server.c | 3 +--
9
util/vhost-user-server.c | 6 ------
10
3 files changed, 1 insertion(+), 11 deletions(-)
11
12
diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/util/vhost-user-server.h
15
+++ b/util/vhost-user-server.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct VuFdWatch {
17
} VuFdWatch;
18
19
typedef struct VuServer VuServer;
20
-typedef void DevicePanicNotifierFn(VuServer *server);
21
22
struct VuServer {
23
QIONetListener *listener;
24
AioContext *ctx;
25
- DevicePanicNotifierFn *device_panic_notifier;
26
int max_queues;
27
const VuDevIface *vu_iface;
28
VuDev vu_dev;
29
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
30
SocketAddress *unix_socket,
31
AioContext *ctx,
32
uint16_t max_queues,
33
- DevicePanicNotifierFn *device_panic_notifier,
34
const VuDevIface *vu_iface,
35
Error **errp);
36
37
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/block/export/vhost-user-blk-server.c
40
+++ b/block/export/vhost-user-blk-server.c
41
@@ -XXX,XX +XXX,XX @@ static void vhost_user_blk_server_start(VuBlockDev *vu_block_device,
42
ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
43
44
if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx,
45
- VHOST_USER_BLK_MAX_QUEUES,
46
- NULL, &vu_block_iface,
47
+ VHOST_USER_BLK_MAX_QUEUES, &vu_block_iface,
48
errp)) {
49
goto error;
50
}
51
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/util/vhost-user-server.c
54
+++ b/util/vhost-user-server.c
55
@@ -XXX,XX +XXX,XX @@ static void panic_cb(VuDev *vu_dev, const char *buf)
56
close_client(server);
57
}
58
59
- if (server->device_panic_notifier) {
60
- server->device_panic_notifier(server);
61
- }
62
-
63
/*
64
* Set the callback function for network listener so another
65
* vhost-user client can connect to this server
66
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
67
SocketAddress *socket_addr,
68
AioContext *ctx,
69
uint16_t max_queues,
70
- DevicePanicNotifierFn *device_panic_notifier,
71
const VuDevIface *vu_iface,
72
Error **errp)
73
{
74
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
75
.vu_iface = vu_iface,
76
.max_queues = max_queues,
77
.ctx = ctx,
78
- .device_panic_notifier = device_panic_notifier,
79
};
80
81
qio_net_listener_set_name(server->listener, "vhost-user-backend-listener");
82
--
83
2.26.2
84
diff view generated by jsdifflib
Deleted patch
1
fds[] is leaked when qio_channel_readv_full() fails.
2
1
3
Use vmsg->fds[] instead of keeping a local fds[] array. Then we can
4
reuse goto fail to clean up fds. vmsg->fd_num must be zeroed before the
5
loop to make this safe.
6
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20200924151549.913737-8-stefanha@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
util/vhost-user-server.c | 50 ++++++++++++++++++----------------------
12
1 file changed, 23 insertions(+), 27 deletions(-)
13
14
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/util/vhost-user-server.c
17
+++ b/util/vhost-user-server.c
18
@@ -XXX,XX +XXX,XX @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
19
};
20
int rc, read_bytes = 0;
21
Error *local_err = NULL;
22
- /*
23
- * Store fds/nfds returned from qio_channel_readv_full into
24
- * temporary variables.
25
- *
26
- * VhostUserMsg is a packed structure, gcc will complain about passing
27
- * pointer to a packed structure member if we pass &VhostUserMsg.fd_num
28
- * and &VhostUserMsg.fds directly when calling qio_channel_readv_full,
29
- * thus two temporary variables nfds and fds are used here.
30
- */
31
- size_t nfds = 0, nfds_t = 0;
32
const size_t max_fds = G_N_ELEMENTS(vmsg->fds);
33
- int *fds_t = NULL;
34
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
35
QIOChannel *ioc = server->ioc;
36
37
+ vmsg->fd_num = 0;
38
if (!ioc) {
39
error_report_err(local_err);
40
goto fail;
41
@@ -XXX,XX +XXX,XX @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
42
43
assert(qemu_in_coroutine());
44
do {
45
+ size_t nfds = 0;
46
+ int *fds = NULL;
47
+
48
/*
49
* qio_channel_readv_full may have short reads, keeping calling it
50
* until getting VHOST_USER_HDR_SIZE or 0 bytes in total
51
*/
52
- rc = qio_channel_readv_full(ioc, &iov, 1, &fds_t, &nfds_t, &local_err);
53
+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
54
if (rc < 0) {
55
if (rc == QIO_CHANNEL_ERR_BLOCK) {
56
+ assert(local_err == NULL);
57
qio_channel_yield(ioc, G_IO_IN);
58
continue;
59
} else {
60
error_report_err(local_err);
61
- return false;
62
+ goto fail;
63
}
64
}
65
- read_bytes += rc;
66
- if (nfds_t > 0) {
67
- if (nfds + nfds_t > max_fds) {
68
+
69
+ if (nfds > 0) {
70
+ if (vmsg->fd_num + nfds > max_fds) {
71
error_report("A maximum of %zu fds are allowed, "
72
"however got %lu fds now",
73
- max_fds, nfds + nfds_t);
74
+ max_fds, vmsg->fd_num + nfds);
75
+ g_free(fds);
76
goto fail;
77
}
78
- memcpy(vmsg->fds + nfds, fds_t,
79
- nfds_t *sizeof(vmsg->fds[0]));
80
- nfds += nfds_t;
81
- g_free(fds_t);
82
+ memcpy(vmsg->fds + vmsg->fd_num, fds, nfds * sizeof(vmsg->fds[0]));
83
+ vmsg->fd_num += nfds;
84
+ g_free(fds);
85
}
86
- if (read_bytes == VHOST_USER_HDR_SIZE || rc == 0) {
87
- break;
88
+
89
+ if (rc == 0) { /* socket closed */
90
+ goto fail;
91
}
92
- iov.iov_base = (char *)vmsg + read_bytes;
93
- iov.iov_len = VHOST_USER_HDR_SIZE - read_bytes;
94
- } while (true);
95
96
- vmsg->fd_num = nfds;
97
+ iov.iov_base += rc;
98
+ iov.iov_len -= rc;
99
+ read_bytes += rc;
100
+ } while (read_bytes != VHOST_USER_HDR_SIZE);
101
+
102
/* qio_channel_readv_full will make socket fds blocking, unblock them */
103
vmsg_unblock_fds(vmsg);
104
if (vmsg->size > sizeof(vmsg->payload)) {
105
--
106
2.26.2
107
diff view generated by jsdifflib
Deleted patch
1
The vu_client_trip() coroutine is leaked during AioContext switching. It
2
is also unsafe to destroy the vu_dev in panic_cb() since its callers
3
still access it in some cases.
4
1
5
Rework the lifecycle to solve these safety issues.
6
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20200924151549.913737-10-stefanha@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
util/vhost-user-server.h | 29 ++--
12
block/export/vhost-user-blk-server.c | 9 +-
13
util/vhost-user-server.c | 245 +++++++++++++++------------
14
3 files changed, 155 insertions(+), 128 deletions(-)
15
16
diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/util/vhost-user-server.h
19
+++ b/util/vhost-user-server.h
20
@@ -XXX,XX +XXX,XX @@
21
#include "qapi/error.h"
22
#include "standard-headers/linux/virtio_blk.h"
23
24
+/* A kick fd that we monitor on behalf of libvhost-user */
25
typedef struct VuFdWatch {
26
VuDev *vu_dev;
27
int fd; /*kick fd*/
28
void *pvt;
29
vu_watch_cb cb;
30
- bool processing;
31
QTAILQ_ENTRY(VuFdWatch) next;
32
} VuFdWatch;
33
34
-typedef struct VuServer VuServer;
35
-
36
-struct VuServer {
37
+/**
38
+ * VuServer:
39
+ * A vhost-user server instance with user-defined VuDevIface callbacks.
40
+ * Vhost-user device backends can be implemented using VuServer. VuDevIface
41
+ * callbacks and virtqueue kicks run in the given AioContext.
42
+ */
43
+typedef struct {
44
QIONetListener *listener;
45
+ QEMUBH *restart_listener_bh;
46
AioContext *ctx;
47
int max_queues;
48
const VuDevIface *vu_iface;
49
+
50
+ /* Protected by ctx lock */
51
VuDev vu_dev;
52
QIOChannel *ioc; /* The I/O channel with the client */
53
QIOChannelSocket *sioc; /* The underlying data channel with the client */
54
- /* IOChannel for fd provided via VHOST_USER_SET_SLAVE_REQ_FD */
55
- QIOChannel *ioc_slave;
56
- QIOChannelSocket *sioc_slave;
57
- Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
58
QTAILQ_HEAD(, VuFdWatch) vu_fd_watches;
59
- /* restart coroutine co_trip if AIOContext is changed */
60
- bool aio_context_changed;
61
- bool processing_msg;
62
-};
63
+
64
+ Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
65
+} VuServer;
66
67
bool vhost_user_server_start(VuServer *server,
68
SocketAddress *unix_socket,
69
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
70
71
void vhost_user_server_stop(VuServer *server);
72
73
-void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx);
74
+void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
75
+void vhost_user_server_detach_aio_context(VuServer *server);
76
77
#endif /* VHOST_USER_SERVER_H */
78
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
79
index XXXXXXX..XXXXXXX 100644
80
--- a/block/export/vhost-user-blk-server.c
81
+++ b/block/export/vhost-user-blk-server.c
82
@@ -XXX,XX +XXX,XX @@ static const VuDevIface vu_block_iface = {
83
static void blk_aio_attached(AioContext *ctx, void *opaque)
84
{
85
VuBlockDev *vub_dev = opaque;
86
- aio_context_acquire(ctx);
87
- vhost_user_server_set_aio_context(&vub_dev->vu_server, ctx);
88
- aio_context_release(ctx);
89
+ vhost_user_server_attach_aio_context(&vub_dev->vu_server, ctx);
90
}
91
92
static void blk_aio_detach(void *opaque)
93
{
94
VuBlockDev *vub_dev = opaque;
95
- AioContext *ctx = vub_dev->vu_server.ctx;
96
- aio_context_acquire(ctx);
97
- vhost_user_server_set_aio_context(&vub_dev->vu_server, NULL);
98
- aio_context_release(ctx);
99
+ vhost_user_server_detach_aio_context(&vub_dev->vu_server);
100
}
101
102
static void
103
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/util/vhost-user-server.c
106
+++ b/util/vhost-user-server.c
107
@@ -XXX,XX +XXX,XX @@
108
*/
109
#include "qemu/osdep.h"
110
#include "qemu/main-loop.h"
111
+#include "block/aio-wait.h"
112
#include "vhost-user-server.h"
113
114
+/*
115
+ * Theory of operation:
116
+ *
117
+ * VuServer is started and stopped by vhost_user_server_start() and
118
+ * vhost_user_server_stop() from the main loop thread. Starting the server
119
+ * opens a vhost-user UNIX domain socket and listens for incoming connections.
120
+ * Only one connection is allowed at a time.
121
+ *
122
+ * The connection is handled by the vu_client_trip() coroutine in the
123
+ * VuServer->ctx AioContext. The coroutine consists of a vu_dispatch() loop
124
+ * where libvhost-user calls vu_message_read() to receive the next vhost-user
125
+ * protocol messages over the UNIX domain socket.
126
+ *
127
+ * When virtqueues are set up libvhost-user calls set_watch() to monitor kick
128
+ * fds. These fds are also handled in the VuServer->ctx AioContext.
129
+ *
130
+ * Both vu_client_trip() and kick fd monitoring can be stopped by shutting down
131
+ * the socket connection. Shutting down the socket connection causes
132
+ * vu_message_read() to fail since no more data can be received from the socket.
133
+ * After vu_dispatch() fails, vu_client_trip() calls vu_deinit() to stop
134
+ * libvhost-user before terminating the coroutine. vu_deinit() calls
135
+ * remove_watch() to stop monitoring kick fds and this stops virtqueue
136
+ * processing.
137
+ *
138
+ * When vu_client_trip() has finished cleaning up it schedules a BH in the main
139
+ * loop thread to accept the next client connection.
140
+ *
141
+ * When libvhost-user detects an error it calls panic_cb() and sets the
142
+ * dev->broken flag. Both vu_client_trip() and kick fd processing stop when
143
+ * the dev->broken flag is set.
144
+ *
145
+ * It is possible to switch AioContexts using
146
+ * vhost_user_server_detach_aio_context() and
147
+ * vhost_user_server_attach_aio_context(). They stop monitoring fds in the old
148
+ * AioContext and resume monitoring in the new AioContext. The vu_client_trip()
149
+ * coroutine remains in a yielded state during the switch. This is made
150
+ * possible by QIOChannel's support for spurious coroutine re-entry in
151
+ * qio_channel_yield(). The coroutine will restart I/O when re-entered from the
152
+ * new AioContext.
153
+ */
154
+
155
static void vmsg_close_fds(VhostUserMsg *vmsg)
156
{
157
int i;
158
@@ -XXX,XX +XXX,XX @@ static void vmsg_unblock_fds(VhostUserMsg *vmsg)
159
}
160
}
161
162
-static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
163
- gpointer opaque);
164
-
165
-static void close_client(VuServer *server)
166
-{
167
- /*
168
- * Before closing the client
169
- *
170
- * 1. Let vu_client_trip stop processing new vhost-user msg
171
- *
172
- * 2. remove kick_handler
173
- *
174
- * 3. wait for the kick handler to be finished
175
- *
176
- * 4. wait for the current vhost-user msg to be finished processing
177
- */
178
-
179
- QIOChannelSocket *sioc = server->sioc;
180
- /* When this is set vu_client_trip will stop new processing vhost-user message */
181
- server->sioc = NULL;
182
-
183
- while (server->processing_msg) {
184
- if (server->ioc->read_coroutine) {
185
- server->ioc->read_coroutine = NULL;
186
- qio_channel_set_aio_fd_handler(server->ioc, server->ioc->ctx, NULL,
187
- NULL, server->ioc);
188
- server->processing_msg = false;
189
- }
190
- }
191
-
192
- vu_deinit(&server->vu_dev);
193
-
194
- /* vu_deinit() should have called remove_watch() */
195
- assert(QTAILQ_EMPTY(&server->vu_fd_watches));
196
-
197
- object_unref(OBJECT(sioc));
198
- object_unref(OBJECT(server->ioc));
199
-}
200
-
201
static void panic_cb(VuDev *vu_dev, const char *buf)
202
{
203
- VuServer *server = container_of(vu_dev, VuServer, vu_dev);
204
-
205
- /* avoid while loop in close_client */
206
- server->processing_msg = false;
207
-
208
- if (buf) {
209
- error_report("vu_panic: %s", buf);
210
- }
211
-
212
- if (server->sioc) {
213
- close_client(server);
214
- }
215
-
216
- /*
217
- * Set the callback function for network listener so another
218
- * vhost-user client can connect to this server
219
- */
220
- qio_net_listener_set_client_func(server->listener,
221
- vu_accept,
222
- server,
223
- NULL);
224
+ error_report("vu_panic: %s", buf);
225
}
226
227
static bool coroutine_fn
228
@@ -XXX,XX +XXX,XX @@ fail:
229
return false;
230
}
231
232
-
233
-static void vu_client_start(VuServer *server);
234
static coroutine_fn void vu_client_trip(void *opaque)
235
{
236
VuServer *server = opaque;
237
+ VuDev *vu_dev = &server->vu_dev;
238
239
- while (!server->aio_context_changed && server->sioc) {
240
- server->processing_msg = true;
241
- vu_dispatch(&server->vu_dev);
242
- server->processing_msg = false;
243
+ while (!vu_dev->broken && vu_dispatch(vu_dev)) {
244
+ /* Keep running */
245
}
246
247
- if (server->aio_context_changed && server->sioc) {
248
- server->aio_context_changed = false;
249
- vu_client_start(server);
250
- }
251
-}
252
+ vu_deinit(vu_dev);
253
+
254
+ /* vu_deinit() should have called remove_watch() */
255
+ assert(QTAILQ_EMPTY(&server->vu_fd_watches));
256
+
257
+ object_unref(OBJECT(server->sioc));
258
+ server->sioc = NULL;
259
260
-static void vu_client_start(VuServer *server)
261
-{
262
- server->co_trip = qemu_coroutine_create(vu_client_trip, server);
263
- aio_co_enter(server->ctx, server->co_trip);
264
+ object_unref(OBJECT(server->ioc));
265
+ server->ioc = NULL;
266
+
267
+ server->co_trip = NULL;
268
+ if (server->restart_listener_bh) {
269
+ qemu_bh_schedule(server->restart_listener_bh);
270
+ }
271
+ aio_wait_kick();
272
}
273
274
/*
275
@@ -XXX,XX +XXX,XX @@ static void vu_client_start(VuServer *server)
276
static void kick_handler(void *opaque)
277
{
278
VuFdWatch *vu_fd_watch = opaque;
279
- vu_fd_watch->processing = true;
280
- vu_fd_watch->cb(vu_fd_watch->vu_dev, 0, vu_fd_watch->pvt);
281
- vu_fd_watch->processing = false;
282
+ VuDev *vu_dev = vu_fd_watch->vu_dev;
283
+
284
+ vu_fd_watch->cb(vu_dev, 0, vu_fd_watch->pvt);
285
+
286
+ /* Stop vu_client_trip() if an error occurred in vu_fd_watch->cb() */
287
+ if (vu_dev->broken) {
288
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
289
+
290
+ qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
291
+ }
292
}
293
294
-
295
static VuFdWatch *find_vu_fd_watch(VuServer *server, int fd)
296
{
297
298
@@ -XXX,XX +XXX,XX @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
299
qio_channel_set_name(QIO_CHANNEL(sioc), "vhost-user client");
300
server->ioc = QIO_CHANNEL(sioc);
301
object_ref(OBJECT(server->ioc));
302
- qio_channel_attach_aio_context(server->ioc, server->ctx);
303
+
304
+ /* TODO vu_message_write() spins if non-blocking! */
305
qio_channel_set_blocking(server->ioc, false, NULL);
306
- vu_client_start(server);
307
+
308
+ server->co_trip = qemu_coroutine_create(vu_client_trip, server);
309
+
310
+ aio_context_acquire(server->ctx);
311
+ vhost_user_server_attach_aio_context(server, server->ctx);
312
+ aio_context_release(server->ctx);
313
}
314
315
-
316
void vhost_user_server_stop(VuServer *server)
317
{
318
+ aio_context_acquire(server->ctx);
319
+
320
+ qemu_bh_delete(server->restart_listener_bh);
321
+ server->restart_listener_bh = NULL;
322
+
323
if (server->sioc) {
324
- close_client(server);
325
+ VuFdWatch *vu_fd_watch;
326
+
327
+ QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
328
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
329
+ NULL, NULL, NULL, vu_fd_watch);
330
+ }
331
+
332
+ qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
333
+
334
+ AIO_WAIT_WHILE(server->ctx, server->co_trip);
335
}
336
337
+ aio_context_release(server->ctx);
338
+
339
if (server->listener) {
340
qio_net_listener_disconnect(server->listener);
341
object_unref(OBJECT(server->listener));
342
}
343
+}
344
+
345
+/*
346
+ * Allow the next client to connect to the server. Called from a BH in the main
347
+ * loop.
348
+ */
349
+static void restart_listener_bh(void *opaque)
350
+{
351
+ VuServer *server = opaque;
352
353
+ qio_net_listener_set_client_func(server->listener, vu_accept, server,
354
+ NULL);
355
}
356
357
-void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx)
358
+/* Called with ctx acquired */
359
+void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
360
{
361
- VuFdWatch *vu_fd_watch, *next;
362
- void *opaque = NULL;
363
- IOHandler *io_read = NULL;
364
- bool attach;
365
+ VuFdWatch *vu_fd_watch;
366
367
- server->ctx = ctx ? ctx : qemu_get_aio_context();
368
+ server->ctx = ctx;
369
370
if (!server->sioc) {
371
- /* not yet serving any client*/
372
return;
373
}
374
375
- if (ctx) {
376
- qio_channel_attach_aio_context(server->ioc, ctx);
377
- server->aio_context_changed = true;
378
- io_read = kick_handler;
379
- attach = true;
380
- } else {
381
+ qio_channel_attach_aio_context(server->ioc, ctx);
382
+
383
+ QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
384
+ aio_set_fd_handler(ctx, vu_fd_watch->fd, true, kick_handler, NULL,
385
+ NULL, vu_fd_watch);
386
+ }
387
+
388
+ aio_co_schedule(ctx, server->co_trip);
389
+}
390
+
391
+/* Called with server->ctx acquired */
392
+void vhost_user_server_detach_aio_context(VuServer *server)
393
+{
394
+ if (server->sioc) {
395
+ VuFdWatch *vu_fd_watch;
396
+
397
+ QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
398
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
399
+ NULL, NULL, NULL, vu_fd_watch);
400
+ }
401
+
402
qio_channel_detach_aio_context(server->ioc);
403
- /* server->ioc->ctx keeps the old AioConext */
404
- ctx = server->ioc->ctx;
405
- attach = false;
406
}
407
408
- QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
409
- if (vu_fd_watch->cb) {
410
- opaque = attach ? vu_fd_watch : NULL;
411
- aio_set_fd_handler(ctx, vu_fd_watch->fd, true,
412
- io_read, NULL, NULL,
413
- opaque);
414
- }
415
- }
416
+ server->ctx = NULL;
417
}
418
419
-
420
bool vhost_user_server_start(VuServer *server,
421
SocketAddress *socket_addr,
422
AioContext *ctx,
423
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
424
const VuDevIface *vu_iface,
425
Error **errp)
426
{
427
+ QEMUBH *bh;
428
QIONetListener *listener = qio_net_listener_new();
429
if (qio_net_listener_open_sync(listener, socket_addr, 1,
430
errp) < 0) {
431
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
432
return false;
433
}
434
435
+ bh = qemu_bh_new(restart_listener_bh, server);
436
+
437
/* zero out unspecified fields */
438
*server = (VuServer) {
439
.listener = listener,
440
+ .restart_listener_bh = bh,
441
.vu_iface = vu_iface,
442
.max_queues = max_queues,
443
.ctx = ctx,
444
--
445
2.26.2
446
diff view generated by jsdifflib
Deleted patch
1
Propagate the flush return value since errors are possible.
2
1
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Message-id: 20200924151549.913737-11-stefanha@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
block/export/vhost-user-blk-server.c | 11 +++++++----
8
1 file changed, 7 insertions(+), 4 deletions(-)
9
10
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/block/export/vhost-user-blk-server.c
13
+++ b/block/export/vhost-user-blk-server.c
14
@@ -XXX,XX +XXX,XX @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
15
return -EINVAL;
16
}
17
18
-static void coroutine_fn vu_block_flush(VuBlockReq *req)
19
+static int coroutine_fn vu_block_flush(VuBlockReq *req)
20
{
21
VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
22
BlockBackend *backend = vdev_blk->backend;
23
- blk_co_flush(backend);
24
+ return blk_co_flush(backend);
25
}
26
27
static void coroutine_fn vu_block_virtio_process_req(void *opaque)
28
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
29
break;
30
}
31
case VIRTIO_BLK_T_FLUSH:
32
- vu_block_flush(req);
33
- req->in->status = VIRTIO_BLK_S_OK;
34
+ if (vu_block_flush(req) == 0) {
35
+ req->in->status = VIRTIO_BLK_S_OK;
36
+ } else {
37
+ req->in->status = VIRTIO_BLK_S_IOERR;
38
+ }
39
break;
40
case VIRTIO_BLK_T_GET_ID: {
41
size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
42
--
43
2.26.2
44
diff view generated by jsdifflib
Deleted patch
1
Headers used by other subsystems are located in include/. Also add the
2
vhost-user-server and vhost-user-blk-server headers to MAINTAINERS.
3
1
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Message-id: 20200924151549.913737-13-stefanha@redhat.com
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
8
MAINTAINERS | 4 +++-
9
{util => include/qemu}/vhost-user-server.h | 0
10
block/export/vhost-user-blk-server.c | 2 +-
11
util/vhost-user-server.c | 2 +-
12
4 files changed, 5 insertions(+), 3 deletions(-)
13
rename {util => include/qemu}/vhost-user-server.h (100%)
14
15
diff --git a/MAINTAINERS b/MAINTAINERS
16
index XXXXXXX..XXXXXXX 100644
17
--- a/MAINTAINERS
18
+++ b/MAINTAINERS
19
@@ -XXX,XX +XXX,XX @@ Vhost-user block device backend server
20
M: Coiby Xu <Coiby.Xu@gmail.com>
21
S: Maintained
22
F: block/export/vhost-user-blk-server.c
23
-F: util/vhost-user-server.c
24
+F: block/export/vhost-user-blk-server.h
25
+F: include/qemu/vhost-user-server.h
26
F: tests/qtest/vhost-user-blk-test.c
27
F: tests/qtest/libqos/vhost-user-blk.c
28
+F: util/vhost-user-server.c
29
30
Replication
31
M: Wen Congyang <wencongyang2@huawei.com>
32
diff --git a/util/vhost-user-server.h b/include/qemu/vhost-user-server.h
33
similarity index 100%
34
rename from util/vhost-user-server.h
35
rename to include/qemu/vhost-user-server.h
36
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/export/vhost-user-blk-server.c
39
+++ b/block/export/vhost-user-blk-server.c
40
@@ -XXX,XX +XXX,XX @@
41
#include "block/block.h"
42
#include "contrib/libvhost-user/libvhost-user.h"
43
#include "standard-headers/linux/virtio_blk.h"
44
-#include "util/vhost-user-server.h"
45
+#include "qemu/vhost-user-server.h"
46
#include "vhost-user-blk-server.h"
47
#include "qapi/error.h"
48
#include "qom/object_interfaces.h"
49
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/util/vhost-user-server.c
52
+++ b/util/vhost-user-server.c
53
@@ -XXX,XX +XXX,XX @@
54
*/
55
#include "qemu/osdep.h"
56
#include "qemu/main-loop.h"
57
+#include "qemu/vhost-user-server.h"
58
#include "block/aio-wait.h"
59
-#include "vhost-user-server.h"
60
61
/*
62
* Theory of operation:
63
--
64
2.26.2
65
diff view generated by jsdifflib
Deleted patch
1
Don't compile contrib/libvhost-user/libvhost-user.c again. Instead build
2
the static library once and then reuse it throughout QEMU.
3
1
4
Also switch from CONFIG_LINUX to CONFIG_VHOST_USER, which is what the
5
vhost-user tools (vhost-user-gpu, etc) do.
6
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20200924151549.913737-14-stefanha@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
block/export/export.c | 8 ++++----
12
block/export/meson.build | 2 +-
13
contrib/libvhost-user/meson.build | 1 +
14
meson.build | 6 +++++-
15
tests/qtest/meson.build | 2 +-
16
util/meson.build | 4 +++-
17
6 files changed, 15 insertions(+), 8 deletions(-)
18
19
diff --git a/block/export/export.c b/block/export/export.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/export/export.c
22
+++ b/block/export/export.c
23
@@ -XXX,XX +XXX,XX @@
24
#include "sysemu/block-backend.h"
25
#include "block/export.h"
26
#include "block/nbd.h"
27
-#if CONFIG_LINUX
28
-#include "block/export/vhost-user-blk-server.h"
29
-#endif
30
#include "qapi/error.h"
31
#include "qapi/qapi-commands-block-export.h"
32
#include "qapi/qapi-events-block-export.h"
33
#include "qemu/id.h"
34
+#ifdef CONFIG_VHOST_USER
35
+#include "vhost-user-blk-server.h"
36
+#endif
37
38
static const BlockExportDriver *blk_exp_drivers[] = {
39
&blk_exp_nbd,
40
-#if CONFIG_LINUX
41
+#ifdef CONFIG_VHOST_USER
42
&blk_exp_vhost_user_blk,
43
#endif
44
};
45
diff --git a/block/export/meson.build b/block/export/meson.build
46
index XXXXXXX..XXXXXXX 100644
47
--- a/block/export/meson.build
48
+++ b/block/export/meson.build
49
@@ -XXX,XX +XXX,XX @@
50
block_ss.add(files('export.c'))
51
-block_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-blk-server.c', '../../contrib/libvhost-user/libvhost-user.c'))
52
+block_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-blk-server.c'))
53
diff --git a/contrib/libvhost-user/meson.build b/contrib/libvhost-user/meson.build
54
index XXXXXXX..XXXXXXX 100644
55
--- a/contrib/libvhost-user/meson.build
56
+++ b/contrib/libvhost-user/meson.build
57
@@ -XXX,XX +XXX,XX @@
58
libvhost_user = static_library('vhost-user',
59
files('libvhost-user.c', 'libvhost-user-glib.c'),
60
build_by_default: false)
61
+vhost_user = declare_dependency(link_with: libvhost_user)
62
diff --git a/meson.build b/meson.build
63
index XXXXXXX..XXXXXXX 100644
64
--- a/meson.build
65
+++ b/meson.build
66
@@ -XXX,XX +XXX,XX @@ trace_events_subdirs += [
67
'util',
68
]
69
70
+vhost_user = not_found
71
+if 'CONFIG_VHOST_USER' in config_host
72
+ subdir('contrib/libvhost-user')
73
+endif
74
+
75
subdir('qapi')
76
subdir('qobject')
77
subdir('stubs')
78
@@ -XXX,XX +XXX,XX @@ if have_tools
79
install: true)
80
81
if 'CONFIG_VHOST_USER' in config_host
82
- subdir('contrib/libvhost-user')
83
subdir('contrib/vhost-user-blk')
84
subdir('contrib/vhost-user-gpu')
85
subdir('contrib/vhost-user-input')
86
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
87
index XXXXXXX..XXXXXXX 100644
88
--- a/tests/qtest/meson.build
89
+++ b/tests/qtest/meson.build
90
@@ -XXX,XX +XXX,XX @@ qos_test_ss.add(
91
)
92
qos_test_ss.add(when: 'CONFIG_VIRTFS', if_true: files('virtio-9p-test.c'))
93
qos_test_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-test.c'))
94
-qos_test_ss.add(when: ['CONFIG_LINUX', 'CONFIG_TOOLS'], if_true: files('vhost-user-blk-test.c'))
95
+qos_test_ss.add(when: ['CONFIG_VHOST_USER', 'CONFIG_TOOLS'], if_true: files('vhost-user-blk-test.c'))
96
97
extra_qtest_deps = {
98
'bios-tables-test': [io],
99
diff --git a/util/meson.build b/util/meson.build
100
index XXXXXXX..XXXXXXX 100644
101
--- a/util/meson.build
102
+++ b/util/meson.build
103
@@ -XXX,XX +XXX,XX @@ if have_block
104
util_ss.add(files('main-loop.c'))
105
util_ss.add(files('nvdimm-utils.c'))
106
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
107
- util_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-server.c'))
108
+ util_ss.add(when: 'CONFIG_VHOST_USER', if_true: [
109
+ files('vhost-user-server.c'), vhost_user
110
+ ])
111
util_ss.add(files('block-helpers.c'))
112
util_ss.add(files('qemu-coroutine-sleep.c'))
113
util_ss.add(files('qemu-co-shared-resource.c'))
114
--
115
2.26.2
116
diff view generated by jsdifflib
Deleted patch
1
Introduce libblkdev.fa to avoid recompiling blockdev_ss twice.
2
1
3
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
4
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200929125516.186715-3-stefanha@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
meson.build | 12 ++++++++++--
10
storage-daemon/meson.build | 3 +--
11
2 files changed, 11 insertions(+), 4 deletions(-)
12
13
diff --git a/meson.build b/meson.build
14
index XXXXXXX..XXXXXXX 100644
15
--- a/meson.build
16
+++ b/meson.build
17
@@ -XXX,XX +XXX,XX @@ blockdev_ss.add(files(
18
blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
19
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
20
21
-softmmu_ss.add_all(blockdev_ss)
22
softmmu_ss.add(files(
23
'bootdevice.c',
24
'dma-helpers.c',
25
@@ -XXX,XX +XXX,XX @@ block = declare_dependency(link_whole: [libblock],
26
link_args: '@block.syms',
27
dependencies: [crypto, io])
28
29
+blockdev_ss = blockdev_ss.apply(config_host, strict: false)
30
+libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
31
+ dependencies: blockdev_ss.dependencies(),
32
+ name_suffix: 'fa',
33
+ build_by_default: false)
34
+
35
+blockdev = declare_dependency(link_whole: [libblockdev],
36
+ dependencies: [block])
37
+
38
qmp_ss = qmp_ss.apply(config_host, strict: false)
39
libqmp = static_library('qmp', qmp_ss.sources() + genh,
40
dependencies: qmp_ss.dependencies(),
41
@@ -XXX,XX +XXX,XX @@ foreach m : block_mods + softmmu_mods
42
install_dir: config_host['qemu_moddir'])
43
endforeach
44
45
-softmmu_ss.add(authz, block, chardev, crypto, io, qmp)
46
+softmmu_ss.add(authz, blockdev, chardev, crypto, io, qmp)
47
common_ss.add(qom, qemuutil)
48
49
common_ss.add_all(when: 'CONFIG_SOFTMMU', if_true: [softmmu_ss])
50
diff --git a/storage-daemon/meson.build b/storage-daemon/meson.build
51
index XXXXXXX..XXXXXXX 100644
52
--- a/storage-daemon/meson.build
53
+++ b/storage-daemon/meson.build
54
@@ -XXX,XX +XXX,XX @@
55
qsd_ss = ss.source_set()
56
qsd_ss.add(files('qemu-storage-daemon.c'))
57
-qsd_ss.add(block, chardev, qmp, qom, qemuutil)
58
-qsd_ss.add_all(blockdev_ss)
59
+qsd_ss.add(blockdev, chardev, qmp, qom, qemuutil)
60
61
subdir('qapi')
62
63
--
64
2.26.2
65
diff view generated by jsdifflib
Deleted patch
1
Block exports are used by softmmu, qemu-storage-daemon, and qemu-nbd.
2
They are not used by other programs and are not otherwise needed in
3
libblock.
4
1
5
Undo the recent move of blockdev-nbd.c from blockdev_ss into block_ss.
6
Since bdrv_close_all() (libblock) calls blk_exp_close_all()
7
(libblockdev) a stub function is required..
8
9
Make qemu-nbd.c use signal handling utility functions instead of
10
duplicating the code. This helps because os-posix.c is in libblockdev
11
and it depends on a qemu_system_killed() symbol that qemu-nbd.c lacks.
12
Once we use the signal handling utility functions we also end up
13
providing the necessary symbol.
14
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
17
Reviewed-by: Eric Blake <eblake@redhat.com>
18
Message-id: 20200929125516.186715-4-stefanha@redhat.com
19
[Fixed s/ndb/nbd/ typo in commit description as suggested by Eric Blake
20
--Stefan]
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
---
23
qemu-nbd.c | 21 +++++++++------------
24
stubs/blk-exp-close-all.c | 7 +++++++
25
block/export/meson.build | 4 ++--
26
meson.build | 4 ++--
27
nbd/meson.build | 2 ++
28
stubs/meson.build | 1 +
29
6 files changed, 23 insertions(+), 16 deletions(-)
30
create mode 100644 stubs/blk-exp-close-all.c
31
32
diff --git a/qemu-nbd.c b/qemu-nbd.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/qemu-nbd.c
35
+++ b/qemu-nbd.c
36
@@ -XXX,XX +XXX,XX @@
37
#include "qapi/error.h"
38
#include "qemu/cutils.h"
39
#include "sysemu/block-backend.h"
40
+#include "sysemu/runstate.h" /* for qemu_system_killed() prototype */
41
#include "block/block_int.h"
42
#include "block/nbd.h"
43
#include "qemu/main-loop.h"
44
@@ -XXX,XX +XXX,XX @@ QEMU_COPYRIGHT "\n"
45
}
46
47
#if HAVE_NBD_DEVICE
48
-static void termsig_handler(int signum)
49
+/*
50
+ * The client thread uses SIGTERM to interrupt the server. A signal
51
+ * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
52
+ */
53
+void qemu_system_killed(int signum, pid_t pid)
54
{
55
qatomic_cmpxchg(&state, RUNNING, TERMINATE);
56
qemu_notify_event();
57
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
58
const char *pid_file_name = NULL;
59
BlockExportOptions *export_opts;
60
61
+ os_setup_early_signal_handling();
62
+
63
#if HAVE_NBD_DEVICE
64
- /* The client thread uses SIGTERM to interrupt the server. A signal
65
- * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
66
- */
67
- struct sigaction sa_sigterm;
68
- memset(&sa_sigterm, 0, sizeof(sa_sigterm));
69
- sa_sigterm.sa_handler = termsig_handler;
70
- sigaction(SIGTERM, &sa_sigterm, NULL);
71
+ os_setup_signal_handling();
72
#endif /* HAVE_NBD_DEVICE */
73
74
-#ifdef CONFIG_POSIX
75
- signal(SIGPIPE, SIG_IGN);
76
-#endif
77
-
78
socket_init();
79
error_init(argv[0]);
80
module_call_init(MODULE_INIT_TRACE);
81
diff --git a/stubs/blk-exp-close-all.c b/stubs/blk-exp-close-all.c
82
new file mode 100644
83
index XXXXXXX..XXXXXXX
84
--- /dev/null
85
+++ b/stubs/blk-exp-close-all.c
86
@@ -XXX,XX +XXX,XX @@
87
+#include "qemu/osdep.h"
88
+#include "block/export.h"
89
+
90
+/* Only used in programs that support block exports (libblockdev.fa) */
91
+void blk_exp_close_all(void)
92
+{
93
+}
94
diff --git a/block/export/meson.build b/block/export/meson.build
95
index XXXXXXX..XXXXXXX 100644
96
--- a/block/export/meson.build
97
+++ b/block/export/meson.build
98
@@ -XXX,XX +XXX,XX @@
99
-block_ss.add(files('export.c'))
100
-block_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-blk-server.c'))
101
+blockdev_ss.add(files('export.c'))
102
+blockdev_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user-blk-server.c'))
103
diff --git a/meson.build b/meson.build
104
index XXXXXXX..XXXXXXX 100644
105
--- a/meson.build
106
+++ b/meson.build
107
@@ -XXX,XX +XXX,XX @@ subdir('dump')
108
109
block_ss.add(files(
110
'block.c',
111
- 'blockdev-nbd.c',
112
'blockjob.c',
113
'job.c',
114
'qemu-io-cmds.c',
115
@@ -XXX,XX +XXX,XX @@ subdir('block')
116
117
blockdev_ss.add(files(
118
'blockdev.c',
119
+ 'blockdev-nbd.c',
120
'iothread.c',
121
'job-qmp.c',
122
))
123
@@ -XXX,XX +XXX,XX @@ if have_tools
124
qemu_io = executable('qemu-io', files('qemu-io.c'),
125
dependencies: [block, qemuutil], install: true)
126
qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
127
- dependencies: [block, qemuutil], install: true)
128
+ dependencies: [blockdev, qemuutil], install: true)
129
130
subdir('storage-daemon')
131
subdir('contrib/rdmacm-mux')
132
diff --git a/nbd/meson.build b/nbd/meson.build
133
index XXXXXXX..XXXXXXX 100644
134
--- a/nbd/meson.build
135
+++ b/nbd/meson.build
136
@@ -XXX,XX +XXX,XX @@
137
block_ss.add(files(
138
'client.c',
139
'common.c',
140
+))
141
+blockdev_ss.add(files(
142
'server.c',
143
))
144
diff --git a/stubs/meson.build b/stubs/meson.build
145
index XXXXXXX..XXXXXXX 100644
146
--- a/stubs/meson.build
147
+++ b/stubs/meson.build
148
@@ -XXX,XX +XXX,XX @@
149
stub_ss.add(files('arch_type.c'))
150
stub_ss.add(files('bdrv-next-monitor-owned.c'))
151
stub_ss.add(files('blk-commit-all.c'))
152
+stub_ss.add(files('blk-exp-close-all.c'))
153
stub_ss.add(files('blockdev-close-all-bdrv-states.c'))
154
stub_ss.add(files('change-state-handler.c'))
155
stub_ss.add(files('cmos.c'))
156
--
157
2.26.2
158
diff view generated by jsdifflib
Deleted patch
1
Make it possible to specify the iothread where the export will run. By
2
default the block node can be moved to other AioContexts later and the
3
export will follow. The fixed-iothread option forces strict behavior
4
that prevents changing AioContext while the export is active. See the
5
QAPI docs for details.
6
1
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20200929125516.186715-5-stefanha@redhat.com
9
[Fix stray '#' character in block-export.json and add missing "(since:
10
5.2)" as suggested by Eric Blake.
11
--Stefan]
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
qapi/block-export.json | 11 ++++++++++
15
block/export/export.c | 31 +++++++++++++++++++++++++++-
16
block/export/vhost-user-blk-server.c | 5 ++++-
17
nbd/server.c | 2 --
18
4 files changed, 45 insertions(+), 4 deletions(-)
19
20
diff --git a/qapi/block-export.json b/qapi/block-export.json
21
index XXXXXXX..XXXXXXX 100644
22
--- a/qapi/block-export.json
23
+++ b/qapi/block-export.json
24
@@ -XXX,XX +XXX,XX @@
25
# export before completion is signalled. (since: 5.2;
26
# default: false)
27
#
28
+# @iothread: The name of the iothread object where the export will run. The
29
+# default is to use the thread currently associated with the
30
+# block node. (since: 5.2)
31
+#
32
+# @fixed-iothread: True prevents the block node from being moved to another
33
+# thread while the export is active. If true and @iothread is
34
+# given, export creation fails if the block node cannot be
35
+# moved to the iothread. The default is false. (since: 5.2)
36
+#
37
# Since: 4.2
38
##
39
{ 'union': 'BlockExportOptions',
40
'base': { 'type': 'BlockExportType',
41
'id': 'str',
42
+     '*fixed-iothread': 'bool',
43
+     '*iothread': 'str',
44
'node-name': 'str',
45
'*writable': 'bool',
46
'*writethrough': 'bool' },
47
diff --git a/block/export/export.c b/block/export/export.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/block/export/export.c
50
+++ b/block/export/export.c
51
@@ -XXX,XX +XXX,XX @@
52
53
#include "block/block.h"
54
#include "sysemu/block-backend.h"
55
+#include "sysemu/iothread.h"
56
#include "block/export.h"
57
#include "block/nbd.h"
58
#include "qapi/error.h"
59
@@ -XXX,XX +XXX,XX @@ static const BlockExportDriver *blk_exp_find_driver(BlockExportType type)
60
61
BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
62
{
63
+ bool fixed_iothread = export->has_fixed_iothread && export->fixed_iothread;
64
const BlockExportDriver *drv;
65
BlockExport *exp = NULL;
66
BlockDriverState *bs;
67
- BlockBackend *blk;
68
+ BlockBackend *blk = NULL;
69
AioContext *ctx;
70
uint64_t perm;
71
int ret;
72
@@ -XXX,XX +XXX,XX @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
73
ctx = bdrv_get_aio_context(bs);
74
aio_context_acquire(ctx);
75
76
+ if (export->has_iothread) {
77
+ IOThread *iothread;
78
+ AioContext *new_ctx;
79
+
80
+ iothread = iothread_by_id(export->iothread);
81
+ if (!iothread) {
82
+ error_setg(errp, "iothread \"%s\" not found", export->iothread);
83
+ goto fail;
84
+ }
85
+
86
+ new_ctx = iothread_get_aio_context(iothread);
87
+
88
+ ret = bdrv_try_set_aio_context(bs, new_ctx, errp);
89
+ if (ret == 0) {
90
+ aio_context_release(ctx);
91
+ aio_context_acquire(new_ctx);
92
+ ctx = new_ctx;
93
+ } else if (fixed_iothread) {
94
+ goto fail;
95
+ }
96
+ }
97
+
98
/*
99
* Block exports are used for non-shared storage migration. Make sure
100
* that BDRV_O_INACTIVE is cleared and the image is ready for write
101
@@ -XXX,XX +XXX,XX @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
102
}
103
104
blk = blk_new(ctx, perm, BLK_PERM_ALL);
105
+
106
+ if (!fixed_iothread) {
107
+ blk_set_allow_aio_context_change(blk, true);
108
+ }
109
+
110
ret = blk_insert_bs(blk, bs, errp);
111
if (ret < 0) {
112
goto fail;
113
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
114
index XXXXXXX..XXXXXXX 100644
115
--- a/block/export/vhost-user-blk-server.c
116
+++ b/block/export/vhost-user-blk-server.c
117
@@ -XXX,XX +XXX,XX @@ static const VuDevIface vu_blk_iface = {
118
static void blk_aio_attached(AioContext *ctx, void *opaque)
119
{
120
VuBlkExport *vexp = opaque;
121
+
122
+ vexp->export.ctx = ctx;
123
vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
124
}
125
126
static void blk_aio_detach(void *opaque)
127
{
128
VuBlkExport *vexp = opaque;
129
+
130
vhost_user_server_detach_aio_context(&vexp->vu_server);
131
+ vexp->export.ctx = NULL;
132
}
133
134
static void
135
@@ -XXX,XX +XXX,XX @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
136
vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
137
logical_block_size);
138
139
- blk_set_allow_aio_context_change(exp->blk, true);
140
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
141
vexp);
142
143
diff --git a/nbd/server.c b/nbd/server.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/nbd/server.c
146
+++ b/nbd/server.c
147
@@ -XXX,XX +XXX,XX @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
148
return ret;
149
}
150
151
- blk_set_allow_aio_context_change(blk, true);
152
-
153
QTAILQ_INIT(&exp->clients);
154
exp->name = g_strdup(arg->name);
155
exp->description = g_strdup(arg->description);
156
--
157
2.26.2
158
diff view generated by jsdifflib
Deleted patch
1
Allow the number of queues to be configured using --export
2
vhost-user-blk,num-queues=N. This setting should match the QEMU --device
3
vhost-user-blk-pci,num-queues=N setting but QEMU vhost-user-blk.c lowers
4
its own value if the vhost-user-blk backend offers fewer queues than
5
QEMU.
6
1
7
The vhost-user-blk-server.c code is already capable of multi-queue. All
8
virtqueue processing runs in the same AioContext. No new locking is
9
needed.
10
11
Add the num-queues=N option and set the VIRTIO_BLK_F_MQ feature bit.
12
Note that the feature bit only announces the presence of the num_queues
13
configuration space field. It does not promise that there is more than 1
14
virtqueue, so we can set it unconditionally.
15
16
I tested multi-queue by running a random read fio test with numjobs=4 on
17
an -smp 4 guest. After the benchmark finished the guest /proc/interrupts
18
file showed activity on all 4 virtio-blk MSI-X. The /sys/block/vda/mq/
19
directory shows that Linux blk-mq has 4 queues configured.
20
21
An automated test is included in the next commit.
22
23
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
24
Acked-by: Markus Armbruster <armbru@redhat.com>
25
Message-id: 20201001144604.559733-2-stefanha@redhat.com
26
[Fixed accidental tab characters as suggested by Markus Armbruster
27
--Stefan]
28
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
29
---
30
qapi/block-export.json | 10 +++++++---
31
block/export/vhost-user-blk-server.c | 24 ++++++++++++++++++------
32
2 files changed, 25 insertions(+), 9 deletions(-)
33
34
diff --git a/qapi/block-export.json b/qapi/block-export.json
35
index XXXXXXX..XXXXXXX 100644
36
--- a/qapi/block-export.json
37
+++ b/qapi/block-export.json
38
@@ -XXX,XX +XXX,XX @@
39
# SocketAddress types are supported. Passed fds must be UNIX domain
40
# sockets.
41
# @logical-block-size: Logical block size in bytes. Defaults to 512 bytes.
42
+# @num-queues: Number of request virtqueues. Must be greater than 0. Defaults
43
+# to 1.
44
#
45
# Since: 5.2
46
##
47
{ 'struct': 'BlockExportOptionsVhostUserBlk',
48
- 'data': { 'addr': 'SocketAddress', '*logical-block-size': 'size' } }
49
+ 'data': { 'addr': 'SocketAddress',
50
+     '*logical-block-size': 'size',
51
+ '*num-queues': 'uint16'} }
52
53
##
54
# @NbdServerAddOptions:
55
@@ -XXX,XX +XXX,XX @@
56
{ 'union': 'BlockExportOptions',
57
'base': { 'type': 'BlockExportType',
58
'id': 'str',
59
-     '*fixed-iothread': 'bool',
60
-     '*iothread': 'str',
61
+ '*fixed-iothread': 'bool',
62
+ '*iothread': 'str',
63
'node-name': 'str',
64
'*writable': 'bool',
65
'*writethrough': 'bool' },
66
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/block/export/vhost-user-blk-server.c
69
+++ b/block/export/vhost-user-blk-server.c
70
@@ -XXX,XX +XXX,XX @@
71
#include "util/block-helpers.h"
72
73
enum {
74
- VHOST_USER_BLK_MAX_QUEUES = 1,
75
+ VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1,
76
};
77
struct virtio_blk_inhdr {
78
unsigned char status;
79
@@ -XXX,XX +XXX,XX @@ static uint64_t vu_blk_get_features(VuDev *dev)
80
1ull << VIRTIO_BLK_F_DISCARD |
81
1ull << VIRTIO_BLK_F_WRITE_ZEROES |
82
1ull << VIRTIO_BLK_F_CONFIG_WCE |
83
+ 1ull << VIRTIO_BLK_F_MQ |
84
1ull << VIRTIO_F_VERSION_1 |
85
1ull << VIRTIO_RING_F_INDIRECT_DESC |
86
1ull << VIRTIO_RING_F_EVENT_IDX |
87
@@ -XXX,XX +XXX,XX @@ static void blk_aio_detach(void *opaque)
88
89
static void
90
vu_blk_initialize_config(BlockDriverState *bs,
91
- struct virtio_blk_config *config, uint32_t blk_size)
92
+ struct virtio_blk_config *config,
93
+ uint32_t blk_size,
94
+ uint16_t num_queues)
95
{
96
config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
97
config->blk_size = blk_size;
98
@@ -XXX,XX +XXX,XX @@ vu_blk_initialize_config(BlockDriverState *bs,
99
config->seg_max = 128 - 2;
100
config->min_io_size = 1;
101
config->opt_io_size = 1;
102
- config->num_queues = VHOST_USER_BLK_MAX_QUEUES;
103
+ config->num_queues = num_queues;
104
config->max_discard_sectors = 32768;
105
config->max_discard_seg = 1;
106
config->discard_sector_alignment = config->blk_size >> 9;
107
@@ -XXX,XX +XXX,XX @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
108
BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk;
109
Error *local_err = NULL;
110
uint64_t logical_block_size;
111
+ uint16_t num_queues = VHOST_USER_BLK_NUM_QUEUES_DEFAULT;
112
113
vexp->writable = opts->writable;
114
vexp->blkcfg.wce = 0;
115
@@ -XXX,XX +XXX,XX @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
116
}
117
vexp->blk_size = logical_block_size;
118
blk_set_guest_block_size(exp->blk, logical_block_size);
119
+
120
+ if (vu_opts->has_num_queues) {
121
+ num_queues = vu_opts->num_queues;
122
+ }
123
+ if (num_queues == 0) {
124
+ error_setg(errp, "num-queues must be greater than 0");
125
+ return -EINVAL;
126
+ }
127
+
128
vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
129
- logical_block_size);
130
+ logical_block_size, num_queues);
131
132
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
133
vexp);
134
135
if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
136
- VHOST_USER_BLK_MAX_QUEUES, &vu_blk_iface,
137
- errp)) {
138
+ num_queues, &vu_blk_iface, errp)) {
139
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
140
blk_aio_detach, vexp);
141
return -EADDRNOTAVAIL;
142
--
143
2.26.2
144
diff view generated by jsdifflib
Deleted patch
1
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2
Message-id: 20201001144604.559733-3-stefanha@redhat.com
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
---
5
tests/qtest/vhost-user-blk-test.c | 81 +++++++++++++++++++++++++++++--
6
1 file changed, 76 insertions(+), 5 deletions(-)
7
1
8
diff --git a/tests/qtest/vhost-user-blk-test.c b/tests/qtest/vhost-user-blk-test.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/tests/qtest/vhost-user-blk-test.c
11
+++ b/tests/qtest/vhost-user-blk-test.c
12
@@ -XXX,XX +XXX,XX @@ static void pci_hotplug(void *obj, void *data, QGuestAllocator *t_alloc)
13
qpci_unplug_acpi_device_test(qts, "drv1", PCI_SLOT_HP);
14
}
15
16
+static void multiqueue(void *obj, void *data, QGuestAllocator *t_alloc)
17
+{
18
+ QVirtioPCIDevice *pdev1 = obj;
19
+ QVirtioDevice *dev1 = &pdev1->vdev;
20
+ QVirtioPCIDevice *pdev8;
21
+ QVirtioDevice *dev8;
22
+ QTestState *qts = pdev1->pdev->bus->qts;
23
+ uint64_t features;
24
+ uint16_t num_queues;
25
+
26
+ /*
27
+ * The primary device has 1 queue and VIRTIO_BLK_F_MQ is not enabled. The
28
+ * VIRTIO specification allows VIRTIO_BLK_F_MQ to be enabled when there is
29
+ * only 1 virtqueue, but --device vhost-user-blk-pci doesn't do this (which
30
+ * is also spec-compliant).
31
+ */
32
+ features = qvirtio_get_features(dev1);
33
+ g_assert_cmpint(features & (1u << VIRTIO_BLK_F_MQ), ==, 0);
34
+ features = features & ~(QVIRTIO_F_BAD_FEATURE |
35
+ (1u << VIRTIO_RING_F_INDIRECT_DESC) |
36
+ (1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
37
+ (1u << VIRTIO_BLK_F_SCSI));
38
+ qvirtio_set_features(dev1, features);
39
+
40
+ /* Hotplug a secondary device with 8 queues */
41
+ qtest_qmp_device_add(qts, "vhost-user-blk-pci", "drv1",
42
+ "{'addr': %s, 'chardev': 'char2', 'num-queues': 8}",
43
+ stringify(PCI_SLOT_HP) ".0");
44
+
45
+ pdev8 = virtio_pci_new(pdev1->pdev->bus,
46
+ &(QPCIAddress) {
47
+ .devfn = QPCI_DEVFN(PCI_SLOT_HP, 0)
48
+ });
49
+ g_assert_nonnull(pdev8);
50
+ g_assert_cmpint(pdev8->vdev.device_type, ==, VIRTIO_ID_BLOCK);
51
+
52
+ qos_object_start_hw(&pdev8->obj);
53
+
54
+ dev8 = &pdev8->vdev;
55
+ features = qvirtio_get_features(dev8);
56
+ g_assert_cmpint(features & (1u << VIRTIO_BLK_F_MQ),
57
+ ==,
58
+ (1u << VIRTIO_BLK_F_MQ));
59
+ features = features & ~(QVIRTIO_F_BAD_FEATURE |
60
+ (1u << VIRTIO_RING_F_INDIRECT_DESC) |
61
+ (1u << VIRTIO_F_NOTIFY_ON_EMPTY) |
62
+ (1u << VIRTIO_BLK_F_SCSI) |
63
+ (1u << VIRTIO_BLK_F_MQ));
64
+ qvirtio_set_features(dev8, features);
65
+
66
+ num_queues = qvirtio_config_readw(dev8,
67
+ offsetof(struct virtio_blk_config, num_queues));
68
+ g_assert_cmpint(num_queues, ==, 8);
69
+
70
+ qvirtio_pci_device_disable(pdev8);
71
+ qos_object_destroy(&pdev8->obj);
72
+
73
+ /* unplug secondary disk */
74
+ qpci_unplug_acpi_device_test(qts, "drv1", PCI_SLOT_HP);
75
+}
76
+
77
/*
78
* Check that setting the vring addr on a non-existent virtqueue does
79
* not crash.
80
@@ -XXX,XX +XXX,XX @@ static void quit_storage_daemon(void *qmp_test_state)
81
g_free(qmp_test_state);
82
}
83
84
-static char *start_vhost_user_blk(GString *cmd_line, int vus_instances)
85
+static char *start_vhost_user_blk(GString *cmd_line, int vus_instances,
86
+ int num_queues)
87
{
88
const char *vhost_user_blk_bin = qtest_qemu_storage_daemon_binary();
89
int fd, qmp_fd, i;
90
@@ -XXX,XX +XXX,XX @@ static char *start_vhost_user_blk(GString *cmd_line, int vus_instances)
91
g_string_append_printf(storage_daemon_command,
92
"--blockdev driver=file,node-name=disk%d,filename=%s "
93
"--export type=vhost-user-blk,id=disk%d,addr.type=unix,addr.path=%s,"
94
- "node-name=disk%i,writable=on ",
95
- i, img_path, i, sock_path, i);
96
+ "node-name=disk%i,writable=on,num-queues=%d ",
97
+ i, img_path, i, sock_path, i, num_queues);
98
99
g_string_append_printf(cmd_line, "-chardev socket,id=char%d,path=%s ",
100
i + 1, sock_path);
101
@@ -XXX,XX +XXX,XX @@ static char *start_vhost_user_blk(GString *cmd_line, int vus_instances)
102
103
static void *vhost_user_blk_test_setup(GString *cmd_line, void *arg)
104
{
105
- start_vhost_user_blk(cmd_line, 1);
106
+ start_vhost_user_blk(cmd_line, 1, 1);
107
return arg;
108
}
109
110
@@ -XXX,XX +XXX,XX @@ static void *vhost_user_blk_test_setup(GString *cmd_line, void *arg)
111
static void *vhost_user_blk_hotplug_test_setup(GString *cmd_line, void *arg)
112
{
113
/* "-chardev socket,id=char2" is used for pci_hotplug*/
114
- start_vhost_user_blk(cmd_line, 2);
115
+ start_vhost_user_blk(cmd_line, 2, 1);
116
+ return arg;
117
+}
118
+
119
+static void *vhost_user_blk_multiqueue_test_setup(GString *cmd_line, void *arg)
120
+{
121
+ start_vhost_user_blk(cmd_line, 2, 8);
122
return arg;
123
}
124
125
@@ -XXX,XX +XXX,XX @@ static void register_vhost_user_blk_test(void)
126
127
opts.before = vhost_user_blk_hotplug_test_setup;
128
qos_add_test("hotplug", "vhost-user-blk-pci", pci_hotplug, &opts);
129
+
130
+ opts.before = vhost_user_blk_multiqueue_test_setup;
131
+ qos_add_test("multiqueue", "vhost-user-blk-pci", multiqueue, &opts);
132
}
133
134
libqos_init(register_vhost_user_blk_test);
135
--
136
2.26.2
137
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
bdrv_co_block_status_above has several design problems with handling
4
short backing files:
5
6
1. With want_zeros=true, it may return ret with BDRV_BLOCK_ZERO but
7
without BDRV_BLOCK_ALLOCATED flag, when actually short backing file
8
which produces these after-EOF zeros is inside requested backing
9
sequence.
10
11
2. With want_zero=false, it may return pnum=0 prior to actual EOF,
12
because of EOF of short backing file.
13
14
Fix these things, making logic about short backing files clearer.
15
16
With fixed bdrv_block_status_above we also have to improve is_zero in
17
qcow2 code, otherwise iotest 154 will fail, because with this patch we
18
stop to merge zeros of different types (produced by fully unallocated
19
in the whole backing chain regions vs produced by short backing files).
20
21
Note also, that this patch leaves for another day the general problem
22
around block-status: misuse of BDRV_BLOCK_ALLOCATED as is-fs-allocated
23
vs go-to-backing.
24
25
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
26
Reviewed-by: Alberto Garcia <berto@igalia.com>
27
Reviewed-by: Eric Blake <eblake@redhat.com>
28
Message-id: 20200924194003.22080-2-vsementsov@virtuozzo.com
29
[Fix s/comes/come/ as suggested by Eric Blake
30
--Stefan]
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
32
---
33
block/io.c | 68 ++++++++++++++++++++++++++++++++++++++++-----------
34
block/qcow2.c | 16 ++++++++++--
35
2 files changed, 68 insertions(+), 16 deletions(-)
36
37
diff --git a/block/io.c b/block/io.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/block/io.c
40
+++ b/block/io.c
41
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
42
int64_t *map,
43
BlockDriverState **file)
44
{
45
+ int ret;
46
BlockDriverState *p;
47
- int ret = 0;
48
- bool first = true;
49
+ int64_t eof = 0;
50
51
assert(bs != base);
52
- for (p = bs; p != base; p = bdrv_filter_or_cow_bs(p)) {
53
+
54
+ ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
55
+ if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED) {
56
+ return ret;
57
+ }
58
+
59
+ if (ret & BDRV_BLOCK_EOF) {
60
+ eof = offset + *pnum;
61
+ }
62
+
63
+ assert(*pnum <= bytes);
64
+ bytes = *pnum;
65
+
66
+ for (p = bdrv_filter_or_cow_bs(bs); p != base;
67
+ p = bdrv_filter_or_cow_bs(p))
68
+ {
69
ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
70
file);
71
if (ret < 0) {
72
- break;
73
+ return ret;
74
}
75
- if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
76
+ if (*pnum == 0) {
77
/*
78
- * Reading beyond the end of the file continues to read
79
- * zeroes, but we can only widen the result to the
80
- * unallocated length we learned from an earlier
81
- * iteration.
82
+ * The top layer deferred to this layer, and because this layer is
83
+ * short, any zeroes that we synthesize beyond EOF behave as if they
84
+ * were allocated at this layer.
85
+ *
86
+ * We don't include BDRV_BLOCK_EOF into ret, as upper layer may be
87
+ * larger. We'll add BDRV_BLOCK_EOF if needed at function end, see
88
+ * below.
89
*/
90
+ assert(ret & BDRV_BLOCK_EOF);
91
*pnum = bytes;
92
+ if (file) {
93
+ *file = p;
94
+ }
95
+ ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED;
96
+ break;
97
}
98
- if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
99
+ if (ret & BDRV_BLOCK_ALLOCATED) {
100
+ /*
101
+ * We've found the node and the status, we must break.
102
+ *
103
+ * Drop BDRV_BLOCK_EOF, as it's not for upper layer, which may be
104
+ * larger. We'll add BDRV_BLOCK_EOF if needed at function end, see
105
+ * below.
106
+ */
107
+ ret &= ~BDRV_BLOCK_EOF;
108
break;
109
}
110
- /* [offset, pnum] unallocated on this layer, which could be only
111
- * the first part of [offset, bytes]. */
112
- bytes = MIN(bytes, *pnum);
113
- first = false;
114
+
115
+ /*
116
+ * OK, [offset, offset + *pnum) region is unallocated on this layer,
117
+ * let's continue the diving.
118
+ */
119
+ assert(*pnum <= bytes);
120
+ bytes = *pnum;
121
+ }
122
+
123
+ if (offset + *pnum == eof) {
124
+ ret |= BDRV_BLOCK_EOF;
125
}
126
+
127
return ret;
128
}
129
130
diff --git a/block/qcow2.c b/block/qcow2.c
131
index XXXXXXX..XXXXXXX 100644
132
--- a/block/qcow2.c
133
+++ b/block/qcow2.c
134
@@ -XXX,XX +XXX,XX @@ static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
135
if (!bytes) {
136
return true;
137
}
138
- res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
139
- return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
140
+
141
+ /*
142
+ * bdrv_block_status_above doesn't merge different types of zeros, for
143
+ * example, zeros which come from the region which is unallocated in
144
+ * the whole backing chain, and zeros which come because of a short
145
+ * backing file. So, we need a loop.
146
+ */
147
+ do {
148
+ res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
149
+ offset += nr;
150
+ bytes -= nr;
151
+ } while (res >= 0 && (res & BDRV_BLOCK_ZERO) && nr && bytes);
152
+
153
+ return res >= 0 && (res & BDRV_BLOCK_ZERO) && bytes == 0;
154
}
155
156
static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
157
--
158
2.26.2
159
diff view generated by jsdifflib
Deleted patch
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
1
3
We are going to reuse bdrv_common_block_status_above in
4
bdrv_is_allocated_above. bdrv_is_allocated_above may be called with
5
include_base == false and still bs == base (for ex. from img_rebase()).
6
7
So, support this corner case.
8
9
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
12
Reviewed-by: Alberto Garcia <berto@igalia.com>
13
Message-id: 20200924194003.22080-4-vsementsov@virtuozzo.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
---
16
block/io.c | 6 +++++-
17
1 file changed, 5 insertions(+), 1 deletion(-)
18
19
diff --git a/block/io.c b/block/io.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/block/io.c
22
+++ b/block/io.c
23
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
24
BlockDriverState *p;
25
int64_t eof = 0;
26
27
- assert(include_base || bs != base);
28
assert(!include_base || base); /* Can't include NULL base */
29
30
+ if (!include_base && bs == base) {
31
+ *pnum = bytes;
32
+ return 0;
33
+ }
34
+
35
ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
36
if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
37
return ret;
38
--
39
2.26.2
40
diff view generated by jsdifflib