1
The following changes since commit 33d609990621dea6c7d056c86f707b8811320ac1:
1
The following changes since commit ac793156f650ae2d77834932d72224175ee69086:
2
2
3
Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging (2019-06-18 17:00:52 +0100)
3
Merge remote-tracking branch 'remotes/pmaydell/tags/pull-target-arm-20201020-1' into staging (2020-10-20 21:11:35 +0100)
4
4
5
are available in the Git repository at:
5
are available in the Git repository at:
6
6
7
https://github.com/XanClic/qemu.git tags/pull-block-2019-06-21
7
https://gitlab.com/stefanha/qemu.git tags/block-pull-request
8
8
9
for you to fetch changes up to e2a76186f7948b8b75d1b2b52638de7c2f7f7472:
9
for you to fetch changes up to 32a3fd65e7e3551337fd26bfc0e2f899d70c028c:
10
10
11
iotests: Fix 205 for concurrent runs (2019-06-21 14:40:28 +0200)
11
iotests: add commit top->base cases to 274 (2020-10-22 09:55:39 +0100)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Block patches:
14
Pull request
15
- The SSH block driver now uses libssh instead of libssh2
15
16
- The VMDK block driver gets read-only support for the seSparse
16
v2:
17
subformat
17
* Fix format string issues on 32-bit hosts [Peter]
18
- Various fixes
18
* Fix qemu-nbd.c CONFIG_POSIX ifdef issue [Eric]
19
* Fix missing eventfd.h header on macOS [Peter]
20
* Drop unreliable vhost-user-blk test (will send a new patch when ready) [Peter]
21
22
This pull request contains the vhost-user-blk server by Coiby Xu along with my
23
additions, block/nvme.c alignment and hardware error statistics by Philippe
24
Mathieu-Daudé, and bdrv_co_block_status_above() fixes by Vladimir
25
Sementsov-Ogievskiy.
19
26
20
----------------------------------------------------------------
27
----------------------------------------------------------------
21
Anton Nefedov (1):
22
iotest 134: test cluster-misaligned encrypted write
23
28
24
Klaus Birkelund Jensen (1):
29
Coiby Xu (6):
25
nvme: do not advertise support for unsupported arbitration mechanism
30
libvhost-user: Allow vu_message_read to be replaced
31
libvhost-user: remove watch for kick_fd when de-initialize vu-dev
32
util/vhost-user-server: generic vhost user server
33
block: move logical block size check function to a common utility
34
function
35
block/export: vhost-user block device backend server
36
MAINTAINERS: Add vhost-user block device backend server maintainer
26
37
27
Max Reitz (1):
38
Philippe Mathieu-Daudé (1):
28
iotests: Fix 205 for concurrent runs
39
block/nvme: Add driver statistics for access alignment and hw errors
29
40
30
Pino Toscano (1):
41
Stefan Hajnoczi (16):
31
ssh: switch from libssh2 to libssh
42
util/vhost-user-server: s/fileds/fields/ typo fix
43
util/vhost-user-server: drop unnecessary QOM cast
44
util/vhost-user-server: drop unnecessary watch deletion
45
block/export: consolidate request structs into VuBlockReq
46
util/vhost-user-server: drop unused DevicePanicNotifier
47
util/vhost-user-server: fix memory leak in vu_message_read()
48
util/vhost-user-server: check EOF when reading payload
49
util/vhost-user-server: rework vu_client_trip() coroutine lifecycle
50
block/export: report flush errors
51
block/export: convert vhost-user-blk server to block export API
52
util/vhost-user-server: move header to include/
53
util/vhost-user-server: use static library in meson.build
54
qemu-storage-daemon: avoid compiling blockdev_ss twice
55
block: move block exports to libblockdev
56
block/export: add iothread and fixed-iothread options
57
block/export: add vhost-user-blk multi-queue support
32
58
33
Sam Eiderman (3):
59
Vladimir Sementsov-Ogievskiy (5):
34
vmdk: Fix comment regarding max l1_size coverage
60
block/io: fix bdrv_co_block_status_above
35
vmdk: Reduce the max bound for L1 table size
61
block/io: bdrv_common_block_status_above: support include_base
36
vmdk: Add read-only support for seSparse snapshots
62
block/io: bdrv_common_block_status_above: support bs == base
63
block/io: fix bdrv_is_allocated_above
64
iotests: add commit top->base cases to 274
37
65
38
Vladimir Sementsov-Ogievskiy (1):
66
MAINTAINERS | 9 +
39
blockdev: enable non-root nodes for transaction drive-backup source
67
qapi/block-core.json | 24 +-
40
68
qapi/block-export.json | 36 +-
41
configure | 65 +-
69
block/coroutines.h | 2 +
42
block/Makefile.objs | 6 +-
70
block/export/vhost-user-blk-server.h | 19 +
43
block/ssh.c | 673 ++++++++++--------
71
contrib/libvhost-user/libvhost-user.h | 21 +
44
block/vmdk.c | 372 +++++++++-
72
include/qemu/vhost-user-server.h | 65 +++
45
blockdev.c | 2 +-
73
util/block-helpers.h | 19 +
46
hw/block/nvme.c | 1 -
74
block/export/export.c | 37 +-
47
.travis.yml | 4 +-
75
block/export/vhost-user-blk-server.c | 431 ++++++++++++++++++++
48
block/trace-events | 14 +-
76
block/io.c | 132 +++---
49
docs/qemu-block-drivers.texi | 2 +-
77
block/nvme.c | 27 ++
50
.../dockerfiles/debian-win32-cross.docker | 1 -
78
block/qcow2.c | 16 +-
51
.../dockerfiles/debian-win64-cross.docker | 1 -
79
contrib/libvhost-user/libvhost-user-glib.c | 2 +-
52
tests/docker/dockerfiles/fedora.docker | 4 +-
80
contrib/libvhost-user/libvhost-user.c | 15 +-
53
tests/docker/dockerfiles/ubuntu.docker | 2 +-
81
hw/core/qdev-properties-system.c | 31 +-
54
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
82
nbd/server.c | 2 -
55
tests/qemu-iotests/059.out | 2 +-
83
qemu-nbd.c | 21 +-
56
tests/qemu-iotests/134 | 9 +
84
softmmu/vl.c | 4 +
57
tests/qemu-iotests/134.out | 10 +
85
stubs/blk-exp-close-all.c | 7 +
58
tests/qemu-iotests/205 | 2 +-
86
tests/vhost-user-bridge.c | 2 +
59
tests/qemu-iotests/207 | 54 +-
87
tools/virtiofsd/fuse_virtio.c | 4 +-
60
tests/qemu-iotests/207.out | 2 +-
88
util/block-helpers.c | 46 +++
61
20 files changed, 844 insertions(+), 384 deletions(-)
89
util/vhost-user-server.c | 446 +++++++++++++++++++++
90
block/export/meson.build | 3 +-
91
contrib/libvhost-user/meson.build | 1 +
92
meson.build | 22 +-
93
nbd/meson.build | 2 +
94
storage-daemon/meson.build | 3 +-
95
stubs/meson.build | 1 +
96
tests/qemu-iotests/274 | 20 +
97
tests/qemu-iotests/274.out | 68 ++++
98
util/meson.build | 4 +
99
33 files changed, 1420 insertions(+), 122 deletions(-)
100
create mode 100644 block/export/vhost-user-blk-server.h
101
create mode 100644 include/qemu/vhost-user-server.h
102
create mode 100644 util/block-helpers.h
103
create mode 100644 block/export/vhost-user-blk-server.c
104
create mode 100644 stubs/blk-exp-close-all.c
105
create mode 100644 util/block-helpers.c
106
create mode 100644 util/vhost-user-server.c
62
107
63
--
108
--
64
2.21.0
109
2.26.2
65
110
66
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
Keep statistics of some hardware errors, and number of
4
aligned/unaligned I/O accesses.
5
6
QMP example booting a full RHEL 8.3 aarch64 guest:
7
8
{ "execute": "query-blockstats" }
9
{
10
"return": [
11
{
12
"device": "",
13
"node-name": "drive0",
14
"stats": {
15
"flush_total_time_ns": 6026948,
16
"wr_highest_offset": 3383991230464,
17
"wr_total_time_ns": 807450995,
18
"failed_wr_operations": 0,
19
"failed_rd_operations": 0,
20
"wr_merged": 3,
21
"wr_bytes": 50133504,
22
"failed_unmap_operations": 0,
23
"failed_flush_operations": 0,
24
"account_invalid": false,
25
"rd_total_time_ns": 1846979900,
26
"flush_operations": 130,
27
"wr_operations": 659,
28
"rd_merged": 1192,
29
"rd_bytes": 218244096,
30
"account_failed": false,
31
"idle_time_ns": 2678641497,
32
"rd_operations": 7406,
33
},
34
"driver-specific": {
35
"driver": "nvme",
36
"completion-errors": 0,
37
"unaligned-accesses": 2959,
38
"aligned-accesses": 4477
39
},
40
"qdev": "/machine/peripheral-anon/device[0]/virtio-backend"
41
}
42
]
43
}
44
45
Suggested-by: Stefan Hajnoczi <stefanha@gmail.com>
46
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
47
Acked-by: Markus Armbruster <armbru@redhat.com>
48
Message-id: 20201001162939.1567915-1-philmd@redhat.com
49
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
50
---
51
qapi/block-core.json | 24 +++++++++++++++++++++++-
52
block/nvme.c | 27 +++++++++++++++++++++++++++
53
2 files changed, 50 insertions(+), 1 deletion(-)
54
55
diff --git a/qapi/block-core.json b/qapi/block-core.json
56
index XXXXXXX..XXXXXXX 100644
57
--- a/qapi/block-core.json
58
+++ b/qapi/block-core.json
59
@@ -XXX,XX +XXX,XX @@
60
'discard-nb-failed': 'uint64',
61
'discard-bytes-ok': 'uint64' } }
62
63
+##
64
+# @BlockStatsSpecificNvme:
65
+#
66
+# NVMe driver statistics
67
+#
68
+# @completion-errors: The number of completion errors.
69
+#
70
+# @aligned-accesses: The number of aligned accesses performed by
71
+# the driver.
72
+#
73
+# @unaligned-accesses: The number of unaligned accesses performed by
74
+# the driver.
75
+#
76
+# Since: 5.2
77
+##
78
+{ 'struct': 'BlockStatsSpecificNvme',
79
+ 'data': {
80
+ 'completion-errors': 'uint64',
81
+ 'aligned-accesses': 'uint64',
82
+ 'unaligned-accesses': 'uint64' } }
83
+
84
##
85
# @BlockStatsSpecific:
86
#
87
@@ -XXX,XX +XXX,XX @@
88
'discriminator': 'driver',
89
'data': {
90
'file': 'BlockStatsSpecificFile',
91
- 'host_device': 'BlockStatsSpecificFile' } }
92
+ 'host_device': 'BlockStatsSpecificFile',
93
+ 'nvme': 'BlockStatsSpecificNvme' } }
94
95
##
96
# @BlockStats:
97
diff --git a/block/nvme.c b/block/nvme.c
98
index XXXXXXX..XXXXXXX 100644
99
--- a/block/nvme.c
100
+++ b/block/nvme.c
101
@@ -XXX,XX +XXX,XX @@ struct BDRVNVMeState {
102
103
/* PCI address (required for nvme_refresh_filename()) */
104
char *device;
105
+
106
+ struct {
107
+ uint64_t completion_errors;
108
+ uint64_t aligned_accesses;
109
+ uint64_t unaligned_accesses;
110
+ } stats;
111
};
112
113
#define NVME_BLOCK_OPT_DEVICE "device"
114
@@ -XXX,XX +XXX,XX @@ static bool nvme_process_completion(NVMeQueuePair *q)
115
break;
116
}
117
ret = nvme_translate_error(c);
118
+ if (ret) {
119
+ s->stats.completion_errors++;
120
+ }
121
q->cq.head = (q->cq.head + 1) % NVME_QUEUE_SIZE;
122
if (!q->cq.head) {
123
q->cq_phase = !q->cq_phase;
124
@@ -XXX,XX +XXX,XX @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
125
assert(QEMU_IS_ALIGNED(bytes, s->page_size));
126
assert(bytes <= s->max_transfer);
127
if (nvme_qiov_aligned(bs, qiov)) {
128
+ s->stats.aligned_accesses++;
129
return nvme_co_prw_aligned(bs, offset, bytes, qiov, is_write, flags);
130
}
131
+ s->stats.unaligned_accesses++;
132
trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write);
133
buf = qemu_try_memalign(s->page_size, bytes);
134
135
@@ -XXX,XX +XXX,XX @@ static void nvme_unregister_buf(BlockDriverState *bs, void *host)
136
qemu_vfio_dma_unmap(s->vfio, host);
137
}
138
139
+static BlockStatsSpecific *nvme_get_specific_stats(BlockDriverState *bs)
140
+{
141
+ BlockStatsSpecific *stats = g_new(BlockStatsSpecific, 1);
142
+ BDRVNVMeState *s = bs->opaque;
143
+
144
+ stats->driver = BLOCKDEV_DRIVER_NVME;
145
+ stats->u.nvme = (BlockStatsSpecificNvme) {
146
+ .completion_errors = s->stats.completion_errors,
147
+ .aligned_accesses = s->stats.aligned_accesses,
148
+ .unaligned_accesses = s->stats.unaligned_accesses,
149
+ };
150
+
151
+ return stats;
152
+}
153
+
154
static const char *const nvme_strong_runtime_opts[] = {
155
NVME_BLOCK_OPT_DEVICE,
156
NVME_BLOCK_OPT_NAMESPACE,
157
@@ -XXX,XX +XXX,XX @@ static BlockDriver bdrv_nvme = {
158
.bdrv_refresh_filename = nvme_refresh_filename,
159
.bdrv_refresh_limits = nvme_refresh_limits,
160
.strong_runtime_opts = nvme_strong_runtime_opts,
161
+ .bdrv_get_specific_stats = nvme_get_specific_stats,
162
163
.bdrv_detach_aio_context = nvme_detach_aio_context,
164
.bdrv_attach_aio_context = nvme_attach_aio_context,
165
--
166
2.26.2
167
diff view generated by jsdifflib
New patch
1
From: Coiby Xu <coiby.xu@gmail.com>
1
2
3
Allow vu_message_read to be replaced by one which will make use of the
4
QIOChannel functions. Thus reading vhost-user message won't stall the
5
guest. For slave channel, we still use the default vu_message_read.
6
7
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
8
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
9
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
10
Message-id: 20200918080912.321299-2-coiby.xu@gmail.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
contrib/libvhost-user/libvhost-user.h | 21 +++++++++++++++++++++
14
contrib/libvhost-user/libvhost-user-glib.c | 2 +-
15
contrib/libvhost-user/libvhost-user.c | 14 +++++++-------
16
tests/vhost-user-bridge.c | 2 ++
17
tools/virtiofsd/fuse_virtio.c | 4 ++--
18
5 files changed, 33 insertions(+), 10 deletions(-)
19
20
diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h
21
index XXXXXXX..XXXXXXX 100644
22
--- a/contrib/libvhost-user/libvhost-user.h
23
+++ b/contrib/libvhost-user/libvhost-user.h
24
@@ -XXX,XX +XXX,XX @@
25
*/
26
#define VHOST_USER_MAX_RAM_SLOTS 32
27
28
+#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
29
+
30
typedef enum VhostSetConfigType {
31
VHOST_SET_CONFIG_TYPE_MASTER = 0,
32
VHOST_SET_CONFIG_TYPE_MIGRATION = 1,
33
@@ -XXX,XX +XXX,XX @@ typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
34
typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
35
typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
36
int *do_reply);
37
+typedef bool (*vu_read_msg_cb) (VuDev *dev, int sock, VhostUserMsg *vmsg);
38
typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
39
typedef bool (*vu_queue_is_processed_in_order_cb) (VuDev *dev, int qidx);
40
typedef int (*vu_get_config_cb) (VuDev *dev, uint8_t *config, uint32_t len);
41
@@ -XXX,XX +XXX,XX @@ struct VuDev {
42
bool broken;
43
uint16_t max_queues;
44
45
+ /* @read_msg: custom method to read vhost-user message
46
+ *
47
+ * Read data from vhost_user socket fd and fill up
48
+ * the passed VhostUserMsg *vmsg struct.
49
+ *
50
+ * If reading fails, it should close the received set of file
51
+ * descriptors as socket message's auxiliary data.
52
+ *
53
+ * For the details, please refer to vu_message_read in libvhost-user.c
54
+ * which will be used by default if not custom method is provided when
55
+ * calling vu_init
56
+ *
57
+ * Returns: true if vhost-user message successfully received,
58
+ * otherwise return false.
59
+ *
60
+ */
61
+ vu_read_msg_cb read_msg;
62
/* @set_watch: add or update the given fd to the watch set,
63
* call cb when condition is met */
64
vu_set_watch_cb set_watch;
65
@@ -XXX,XX +XXX,XX @@ bool vu_init(VuDev *dev,
66
uint16_t max_queues,
67
int socket,
68
vu_panic_cb panic,
69
+ vu_read_msg_cb read_msg,
70
vu_set_watch_cb set_watch,
71
vu_remove_watch_cb remove_watch,
72
const VuDevIface *iface);
73
diff --git a/contrib/libvhost-user/libvhost-user-glib.c b/contrib/libvhost-user/libvhost-user-glib.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/contrib/libvhost-user/libvhost-user-glib.c
76
+++ b/contrib/libvhost-user/libvhost-user-glib.c
77
@@ -XXX,XX +XXX,XX @@ vug_init(VugDev *dev, uint16_t max_queues, int socket,
78
g_assert(dev);
79
g_assert(iface);
80
81
- if (!vu_init(&dev->parent, max_queues, socket, panic, set_watch,
82
+ if (!vu_init(&dev->parent, max_queues, socket, panic, NULL, set_watch,
83
remove_watch, iface)) {
84
return false;
85
}
86
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/contrib/libvhost-user/libvhost-user.c
89
+++ b/contrib/libvhost-user/libvhost-user.c
90
@@ -XXX,XX +XXX,XX @@
91
/* The version of inflight buffer */
92
#define INFLIGHT_VERSION 1
93
94
-#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
95
-
96
/* The version of the protocol we support */
97
#define VHOST_USER_VERSION 1
98
#define LIBVHOST_USER_DEBUG 0
99
@@ -XXX,XX +XXX,XX @@ have_userfault(void)
100
}
101
102
static bool
103
-vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
104
+vu_message_read_default(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
105
{
106
char control[CMSG_SPACE(VHOST_MEMORY_BASELINE_NREGIONS * sizeof(int))] = {};
107
struct iovec iov = {
108
@@ -XXX,XX +XXX,XX @@ vu_process_message_reply(VuDev *dev, const VhostUserMsg *vmsg)
109
goto out;
110
}
111
112
- if (!vu_message_read(dev, dev->slave_fd, &msg_reply)) {
113
+ if (!vu_message_read_default(dev, dev->slave_fd, &msg_reply)) {
114
goto out;
115
}
116
117
@@ -XXX,XX +XXX,XX @@ vu_set_mem_table_exec_postcopy(VuDev *dev, VhostUserMsg *vmsg)
118
/* Wait for QEMU to confirm that it's registered the handler for the
119
* faults.
120
*/
121
- if (!vu_message_read(dev, dev->sock, vmsg) ||
122
+ if (!dev->read_msg(dev, dev->sock, vmsg) ||
123
vmsg->size != sizeof(vmsg->payload.u64) ||
124
vmsg->payload.u64 != 0) {
125
vu_panic(dev, "failed to receive valid ack for postcopy set-mem-table");
126
@@ -XXX,XX +XXX,XX @@ vu_dispatch(VuDev *dev)
127
int reply_requested;
128
bool need_reply, success = false;
129
130
- if (!vu_message_read(dev, dev->sock, &vmsg)) {
131
+ if (!dev->read_msg(dev, dev->sock, &vmsg)) {
132
goto end;
133
}
134
135
@@ -XXX,XX +XXX,XX @@ vu_init(VuDev *dev,
136
uint16_t max_queues,
137
int socket,
138
vu_panic_cb panic,
139
+ vu_read_msg_cb read_msg,
140
vu_set_watch_cb set_watch,
141
vu_remove_watch_cb remove_watch,
142
const VuDevIface *iface)
143
@@ -XXX,XX +XXX,XX @@ vu_init(VuDev *dev,
144
145
dev->sock = socket;
146
dev->panic = panic;
147
+ dev->read_msg = read_msg ? read_msg : vu_message_read_default;
148
dev->set_watch = set_watch;
149
dev->remove_watch = remove_watch;
150
dev->iface = iface;
151
@@ -XXX,XX +XXX,XX @@ static void _vu_queue_notify(VuDev *dev, VuVirtq *vq, bool sync)
152
153
vu_message_write(dev, dev->slave_fd, &vmsg);
154
if (ack) {
155
- vu_message_read(dev, dev->slave_fd, &vmsg);
156
+ vu_message_read_default(dev, dev->slave_fd, &vmsg);
157
}
158
return;
159
}
160
diff --git a/tests/vhost-user-bridge.c b/tests/vhost-user-bridge.c
161
index XXXXXXX..XXXXXXX 100644
162
--- a/tests/vhost-user-bridge.c
163
+++ b/tests/vhost-user-bridge.c
164
@@ -XXX,XX +XXX,XX @@ vubr_accept_cb(int sock, void *ctx)
165
VHOST_USER_BRIDGE_MAX_QUEUES,
166
conn_fd,
167
vubr_panic,
168
+ NULL,
169
vubr_set_watch,
170
vubr_remove_watch,
171
&vuiface)) {
172
@@ -XXX,XX +XXX,XX @@ vubr_new(const char *path, bool client)
173
VHOST_USER_BRIDGE_MAX_QUEUES,
174
dev->sock,
175
vubr_panic,
176
+ NULL,
177
vubr_set_watch,
178
vubr_remove_watch,
179
&vuiface)) {
180
diff --git a/tools/virtiofsd/fuse_virtio.c b/tools/virtiofsd/fuse_virtio.c
181
index XXXXXXX..XXXXXXX 100644
182
--- a/tools/virtiofsd/fuse_virtio.c
183
+++ b/tools/virtiofsd/fuse_virtio.c
184
@@ -XXX,XX +XXX,XX @@ int virtio_session_mount(struct fuse_session *se)
185
se->vu_socketfd = data_sock;
186
se->virtio_dev->se = se;
187
pthread_rwlock_init(&se->virtio_dev->vu_dispatch_rwlock, NULL);
188
- vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, fv_set_watch,
189
- fv_remove_watch, &fv_iface);
190
+ vu_init(&se->virtio_dev->dev, 2, se->vu_socketfd, fv_panic, NULL,
191
+ fv_set_watch, fv_remove_watch, &fv_iface);
192
193
return 0;
194
}
195
--
196
2.26.2
197
diff view generated by jsdifflib
New patch
1
From: Coiby Xu <coiby.xu@gmail.com>
1
2
3
When the client is running in gdb and quit command is run in gdb,
4
QEMU will still dispatch the event which will cause segment fault in
5
the callback function.
6
7
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
10
Message-id: 20200918080912.321299-3-coiby.xu@gmail.com
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
contrib/libvhost-user/libvhost-user.c | 1 +
14
1 file changed, 1 insertion(+)
15
16
diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/contrib/libvhost-user/libvhost-user.c
19
+++ b/contrib/libvhost-user/libvhost-user.c
20
@@ -XXX,XX +XXX,XX @@ vu_deinit(VuDev *dev)
21
}
22
23
if (vq->kick_fd != -1) {
24
+ dev->remove_watch(dev, vq->kick_fd);
25
close(vq->kick_fd);
26
vq->kick_fd = -1;
27
}
28
--
29
2.26.2
30
diff view generated by jsdifflib
New patch
1
From: Coiby Xu <coiby.xu@gmail.com>
1
2
3
Sharing QEMU devices via vhost-user protocol.
4
5
Only one vhost-user client can connect to the server one time.
6
7
Suggested-by: Kevin Wolf <kwolf@redhat.com>
8
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
10
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
11
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
12
Message-id: 20200918080912.321299-4-coiby.xu@gmail.com
13
[Fixed size_t %lu -> %zu format string compiler error.
14
--Stefan]
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
---
17
util/vhost-user-server.h | 65 ++++++
18
util/vhost-user-server.c | 428 +++++++++++++++++++++++++++++++++++++++
19
util/meson.build | 1 +
20
3 files changed, 494 insertions(+)
21
create mode 100644 util/vhost-user-server.h
22
create mode 100644 util/vhost-user-server.c
23
24
diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
25
new file mode 100644
26
index XXXXXXX..XXXXXXX
27
--- /dev/null
28
+++ b/util/vhost-user-server.h
29
@@ -XXX,XX +XXX,XX @@
30
+/*
31
+ * Sharing QEMU devices via vhost-user protocol
32
+ *
33
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
34
+ * Copyright (c) 2020 Red Hat, Inc.
35
+ *
36
+ * This work is licensed under the terms of the GNU GPL, version 2 or
37
+ * later. See the COPYING file in the top-level directory.
38
+ */
39
+
40
+#ifndef VHOST_USER_SERVER_H
41
+#define VHOST_USER_SERVER_H
42
+
43
+#include "contrib/libvhost-user/libvhost-user.h"
44
+#include "io/channel-socket.h"
45
+#include "io/channel-file.h"
46
+#include "io/net-listener.h"
47
+#include "qemu/error-report.h"
48
+#include "qapi/error.h"
49
+#include "standard-headers/linux/virtio_blk.h"
50
+
51
+typedef struct VuFdWatch {
52
+ VuDev *vu_dev;
53
+ int fd; /*kick fd*/
54
+ void *pvt;
55
+ vu_watch_cb cb;
56
+ bool processing;
57
+ QTAILQ_ENTRY(VuFdWatch) next;
58
+} VuFdWatch;
59
+
60
+typedef struct VuServer VuServer;
61
+typedef void DevicePanicNotifierFn(VuServer *server);
62
+
63
+struct VuServer {
64
+ QIONetListener *listener;
65
+ AioContext *ctx;
66
+ DevicePanicNotifierFn *device_panic_notifier;
67
+ int max_queues;
68
+ const VuDevIface *vu_iface;
69
+ VuDev vu_dev;
70
+ QIOChannel *ioc; /* The I/O channel with the client */
71
+ QIOChannelSocket *sioc; /* The underlying data channel with the client */
72
+ /* IOChannel for fd provided via VHOST_USER_SET_SLAVE_REQ_FD */
73
+ QIOChannel *ioc_slave;
74
+ QIOChannelSocket *sioc_slave;
75
+ Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
76
+ QTAILQ_HEAD(, VuFdWatch) vu_fd_watches;
77
+ /* restart coroutine co_trip if AIOContext is changed */
78
+ bool aio_context_changed;
79
+ bool processing_msg;
80
+};
81
+
82
+bool vhost_user_server_start(VuServer *server,
83
+ SocketAddress *unix_socket,
84
+ AioContext *ctx,
85
+ uint16_t max_queues,
86
+ DevicePanicNotifierFn *device_panic_notifier,
87
+ const VuDevIface *vu_iface,
88
+ Error **errp);
89
+
90
+void vhost_user_server_stop(VuServer *server);
91
+
92
+void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx);
93
+
94
+#endif /* VHOST_USER_SERVER_H */
95
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
96
new file mode 100644
97
index XXXXXXX..XXXXXXX
98
--- /dev/null
99
+++ b/util/vhost-user-server.c
100
@@ -XXX,XX +XXX,XX @@
101
+/*
102
+ * Sharing QEMU devices via vhost-user protocol
103
+ *
104
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
105
+ * Copyright (c) 2020 Red Hat, Inc.
106
+ *
107
+ * This work is licensed under the terms of the GNU GPL, version 2 or
108
+ * later. See the COPYING file in the top-level directory.
109
+ */
110
+#include "qemu/osdep.h"
111
+#include "qemu/main-loop.h"
112
+#include "vhost-user-server.h"
113
+
114
+static void vmsg_close_fds(VhostUserMsg *vmsg)
115
+{
116
+ int i;
117
+ for (i = 0; i < vmsg->fd_num; i++) {
118
+ close(vmsg->fds[i]);
119
+ }
120
+}
121
+
122
+static void vmsg_unblock_fds(VhostUserMsg *vmsg)
123
+{
124
+ int i;
125
+ for (i = 0; i < vmsg->fd_num; i++) {
126
+ qemu_set_nonblock(vmsg->fds[i]);
127
+ }
128
+}
129
+
130
+static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
131
+ gpointer opaque);
132
+
133
+static void close_client(VuServer *server)
134
+{
135
+ /*
136
+ * Before closing the client
137
+ *
138
+ * 1. Let vu_client_trip stop processing new vhost-user msg
139
+ *
140
+ * 2. remove kick_handler
141
+ *
142
+ * 3. wait for the kick handler to be finished
143
+ *
144
+ * 4. wait for the current vhost-user msg to be finished processing
145
+ */
146
+
147
+ QIOChannelSocket *sioc = server->sioc;
148
+ /* When this is set vu_client_trip will stop new processing vhost-user message */
149
+ server->sioc = NULL;
150
+
151
+ VuFdWatch *vu_fd_watch, *next;
152
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
153
+ aio_set_fd_handler(server->ioc->ctx, vu_fd_watch->fd, true, NULL,
154
+ NULL, NULL, NULL);
155
+ }
156
+
157
+ while (!QTAILQ_EMPTY(&server->vu_fd_watches)) {
158
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
159
+ if (!vu_fd_watch->processing) {
160
+ QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
161
+ g_free(vu_fd_watch);
162
+ }
163
+ }
164
+ }
165
+
166
+ while (server->processing_msg) {
167
+ if (server->ioc->read_coroutine) {
168
+ server->ioc->read_coroutine = NULL;
169
+ qio_channel_set_aio_fd_handler(server->ioc, server->ioc->ctx, NULL,
170
+ NULL, server->ioc);
171
+ server->processing_msg = false;
172
+ }
173
+ }
174
+
175
+ vu_deinit(&server->vu_dev);
176
+ object_unref(OBJECT(sioc));
177
+ object_unref(OBJECT(server->ioc));
178
+}
179
+
180
+static void panic_cb(VuDev *vu_dev, const char *buf)
181
+{
182
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
183
+
184
+ /* avoid while loop in close_client */
185
+ server->processing_msg = false;
186
+
187
+ if (buf) {
188
+ error_report("vu_panic: %s", buf);
189
+ }
190
+
191
+ if (server->sioc) {
192
+ close_client(server);
193
+ }
194
+
195
+ if (server->device_panic_notifier) {
196
+ server->device_panic_notifier(server);
197
+ }
198
+
199
+ /*
200
+ * Set the callback function for network listener so another
201
+ * vhost-user client can connect to this server
202
+ */
203
+ qio_net_listener_set_client_func(server->listener,
204
+ vu_accept,
205
+ server,
206
+ NULL);
207
+}
208
+
209
+static bool coroutine_fn
210
+vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
211
+{
212
+ struct iovec iov = {
213
+ .iov_base = (char *)vmsg,
214
+ .iov_len = VHOST_USER_HDR_SIZE,
215
+ };
216
+ int rc, read_bytes = 0;
217
+ Error *local_err = NULL;
218
+ /*
219
+ * Store fds/nfds returned from qio_channel_readv_full into
220
+ * temporary variables.
221
+ *
222
+ * VhostUserMsg is a packed structure, gcc will complain about passing
223
+ * pointer to a packed structure member if we pass &VhostUserMsg.fd_num
224
+ * and &VhostUserMsg.fds directly when calling qio_channel_readv_full,
225
+ * thus two temporary variables nfds and fds are used here.
226
+ */
227
+ size_t nfds = 0, nfds_t = 0;
228
+ const size_t max_fds = G_N_ELEMENTS(vmsg->fds);
229
+ int *fds_t = NULL;
230
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
231
+ QIOChannel *ioc = server->ioc;
232
+
233
+ if (!ioc) {
234
+ error_report_err(local_err);
235
+ goto fail;
236
+ }
237
+
238
+ assert(qemu_in_coroutine());
239
+ do {
240
+ /*
241
+ * qio_channel_readv_full may have short reads, keeping calling it
242
+ * until getting VHOST_USER_HDR_SIZE or 0 bytes in total
243
+ */
244
+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds_t, &nfds_t, &local_err);
245
+ if (rc < 0) {
246
+ if (rc == QIO_CHANNEL_ERR_BLOCK) {
247
+ qio_channel_yield(ioc, G_IO_IN);
248
+ continue;
249
+ } else {
250
+ error_report_err(local_err);
251
+ return false;
252
+ }
253
+ }
254
+ read_bytes += rc;
255
+ if (nfds_t > 0) {
256
+ if (nfds + nfds_t > max_fds) {
257
+ error_report("A maximum of %zu fds are allowed, "
258
+ "however got %zu fds now",
259
+ max_fds, nfds + nfds_t);
260
+ goto fail;
261
+ }
262
+ memcpy(vmsg->fds + nfds, fds_t,
263
+ nfds_t *sizeof(vmsg->fds[0]));
264
+ nfds += nfds_t;
265
+ g_free(fds_t);
266
+ }
267
+ if (read_bytes == VHOST_USER_HDR_SIZE || rc == 0) {
268
+ break;
269
+ }
270
+ iov.iov_base = (char *)vmsg + read_bytes;
271
+ iov.iov_len = VHOST_USER_HDR_SIZE - read_bytes;
272
+ } while (true);
273
+
274
+ vmsg->fd_num = nfds;
275
+ /* qio_channel_readv_full will make socket fds blocking, unblock them */
276
+ vmsg_unblock_fds(vmsg);
277
+ if (vmsg->size > sizeof(vmsg->payload)) {
278
+ error_report("Error: too big message request: %d, "
279
+ "size: vmsg->size: %u, "
280
+ "while sizeof(vmsg->payload) = %zu",
281
+ vmsg->request, vmsg->size, sizeof(vmsg->payload));
282
+ goto fail;
283
+ }
284
+
285
+ struct iovec iov_payload = {
286
+ .iov_base = (char *)&vmsg->payload,
287
+ .iov_len = vmsg->size,
288
+ };
289
+ if (vmsg->size) {
290
+ rc = qio_channel_readv_all_eof(ioc, &iov_payload, 1, &local_err);
291
+ if (rc == -1) {
292
+ error_report_err(local_err);
293
+ goto fail;
294
+ }
295
+ }
296
+
297
+ return true;
298
+
299
+fail:
300
+ vmsg_close_fds(vmsg);
301
+
302
+ return false;
303
+}
304
+
305
+
306
+static void vu_client_start(VuServer *server);
307
+static coroutine_fn void vu_client_trip(void *opaque)
308
+{
309
+ VuServer *server = opaque;
310
+
311
+ while (!server->aio_context_changed && server->sioc) {
312
+ server->processing_msg = true;
313
+ vu_dispatch(&server->vu_dev);
314
+ server->processing_msg = false;
315
+ }
316
+
317
+ if (server->aio_context_changed && server->sioc) {
318
+ server->aio_context_changed = false;
319
+ vu_client_start(server);
320
+ }
321
+}
322
+
323
+static void vu_client_start(VuServer *server)
324
+{
325
+ server->co_trip = qemu_coroutine_create(vu_client_trip, server);
326
+ aio_co_enter(server->ctx, server->co_trip);
327
+}
328
+
329
+/*
330
+ * a wrapper for vu_kick_cb
331
+ *
332
+ * since aio_dispatch can only pass one user data pointer to the
333
+ * callback function, pack VuDev and pvt into a struct. Then unpack it
334
+ * and pass them to vu_kick_cb
335
+ */
336
+static void kick_handler(void *opaque)
337
+{
338
+ VuFdWatch *vu_fd_watch = opaque;
339
+ vu_fd_watch->processing = true;
340
+ vu_fd_watch->cb(vu_fd_watch->vu_dev, 0, vu_fd_watch->pvt);
341
+ vu_fd_watch->processing = false;
342
+}
343
+
344
+
345
+static VuFdWatch *find_vu_fd_watch(VuServer *server, int fd)
346
+{
347
+
348
+ VuFdWatch *vu_fd_watch, *next;
349
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
350
+ if (vu_fd_watch->fd == fd) {
351
+ return vu_fd_watch;
352
+ }
353
+ }
354
+ return NULL;
355
+}
356
+
357
+static void
358
+set_watch(VuDev *vu_dev, int fd, int vu_evt,
359
+ vu_watch_cb cb, void *pvt)
360
+{
361
+
362
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
363
+ g_assert(vu_dev);
364
+ g_assert(fd >= 0);
365
+ g_assert(cb);
366
+
367
+ VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
368
+
369
+ if (!vu_fd_watch) {
370
+ VuFdWatch *vu_fd_watch = g_new0(VuFdWatch, 1);
371
+
372
+ QTAILQ_INSERT_TAIL(&server->vu_fd_watches, vu_fd_watch, next);
373
+
374
+ vu_fd_watch->fd = fd;
375
+ vu_fd_watch->cb = cb;
376
+ qemu_set_nonblock(fd);
377
+ aio_set_fd_handler(server->ioc->ctx, fd, true, kick_handler,
378
+ NULL, NULL, vu_fd_watch);
379
+ vu_fd_watch->vu_dev = vu_dev;
380
+ vu_fd_watch->pvt = pvt;
381
+ }
382
+}
383
+
384
+
385
+static void remove_watch(VuDev *vu_dev, int fd)
386
+{
387
+ VuServer *server;
388
+ g_assert(vu_dev);
389
+ g_assert(fd >= 0);
390
+
391
+ server = container_of(vu_dev, VuServer, vu_dev);
392
+
393
+ VuFdWatch *vu_fd_watch = find_vu_fd_watch(server, fd);
394
+
395
+ if (!vu_fd_watch) {
396
+ return;
397
+ }
398
+ aio_set_fd_handler(server->ioc->ctx, fd, true, NULL, NULL, NULL, NULL);
399
+
400
+ QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
401
+ g_free(vu_fd_watch);
402
+}
403
+
404
+
405
+static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
406
+ gpointer opaque)
407
+{
408
+ VuServer *server = opaque;
409
+
410
+ if (server->sioc) {
411
+ warn_report("Only one vhost-user client is allowed to "
412
+ "connect the server one time");
413
+ return;
414
+ }
415
+
416
+ if (!vu_init(&server->vu_dev, server->max_queues, sioc->fd, panic_cb,
417
+ vu_message_read, set_watch, remove_watch, server->vu_iface)) {
418
+ error_report("Failed to initialize libvhost-user");
419
+ return;
420
+ }
421
+
422
+ /*
423
+ * Unset the callback function for network listener to make another
424
+ * vhost-user client keeping waiting until this client disconnects
425
+ */
426
+ qio_net_listener_set_client_func(server->listener,
427
+ NULL,
428
+ NULL,
429
+ NULL);
430
+ server->sioc = sioc;
431
+ /*
432
+ * Increase the object reference, so sioc will not freed by
433
+ * qio_net_listener_channel_func which will call object_unref(OBJECT(sioc))
434
+ */
435
+ object_ref(OBJECT(server->sioc));
436
+ qio_channel_set_name(QIO_CHANNEL(sioc), "vhost-user client");
437
+ server->ioc = QIO_CHANNEL(sioc);
438
+ object_ref(OBJECT(server->ioc));
439
+ qio_channel_attach_aio_context(server->ioc, server->ctx);
440
+ qio_channel_set_blocking(QIO_CHANNEL(server->sioc), false, NULL);
441
+ vu_client_start(server);
442
+}
443
+
444
+
445
+void vhost_user_server_stop(VuServer *server)
446
+{
447
+ if (server->sioc) {
448
+ close_client(server);
449
+ }
450
+
451
+ if (server->listener) {
452
+ qio_net_listener_disconnect(server->listener);
453
+ object_unref(OBJECT(server->listener));
454
+ }
455
+
456
+}
457
+
458
+void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx)
459
+{
460
+ VuFdWatch *vu_fd_watch, *next;
461
+ void *opaque = NULL;
462
+ IOHandler *io_read = NULL;
463
+ bool attach;
464
+
465
+ server->ctx = ctx ? ctx : qemu_get_aio_context();
466
+
467
+ if (!server->sioc) {
468
+ /* not yet serving any client*/
469
+ return;
470
+ }
471
+
472
+ if (ctx) {
473
+ qio_channel_attach_aio_context(server->ioc, ctx);
474
+ server->aio_context_changed = true;
475
+ io_read = kick_handler;
476
+ attach = true;
477
+ } else {
478
+ qio_channel_detach_aio_context(server->ioc);
479
+ /* server->ioc->ctx keeps the old AioConext */
480
+ ctx = server->ioc->ctx;
481
+ attach = false;
482
+ }
483
+
484
+ QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
485
+ if (vu_fd_watch->cb) {
486
+ opaque = attach ? vu_fd_watch : NULL;
487
+ aio_set_fd_handler(ctx, vu_fd_watch->fd, true,
488
+ io_read, NULL, NULL,
489
+ opaque);
490
+ }
491
+ }
492
+}
493
+
494
+
495
+bool vhost_user_server_start(VuServer *server,
496
+ SocketAddress *socket_addr,
497
+ AioContext *ctx,
498
+ uint16_t max_queues,
499
+ DevicePanicNotifierFn *device_panic_notifier,
500
+ const VuDevIface *vu_iface,
501
+ Error **errp)
502
+{
503
+ QIONetListener *listener = qio_net_listener_new();
504
+ if (qio_net_listener_open_sync(listener, socket_addr, 1,
505
+ errp) < 0) {
506
+ object_unref(OBJECT(listener));
507
+ return false;
508
+ }
509
+
510
+ /* zero out unspecified fileds */
511
+ *server = (VuServer) {
512
+ .listener = listener,
513
+ .vu_iface = vu_iface,
514
+ .max_queues = max_queues,
515
+ .ctx = ctx,
516
+ .device_panic_notifier = device_panic_notifier,
517
+ };
518
+
519
+ qio_net_listener_set_name(server->listener, "vhost-user-backend-listener");
520
+
521
+ qio_net_listener_set_client_func(server->listener,
522
+ vu_accept,
523
+ server,
524
+ NULL);
525
+
526
+ QTAILQ_INIT(&server->vu_fd_watches);
527
+ return true;
528
+}
529
diff --git a/util/meson.build b/util/meson.build
530
index XXXXXXX..XXXXXXX 100644
531
--- a/util/meson.build
532
+++ b/util/meson.build
533
@@ -XXX,XX +XXX,XX @@ if have_block
534
util_ss.add(files('main-loop.c'))
535
util_ss.add(files('nvdimm-utils.c'))
536
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
537
+ util_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-server.c'))
538
util_ss.add(files('qemu-coroutine-sleep.c'))
539
util_ss.add(files('qemu-co-shared-resource.c'))
540
util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
541
--
542
2.26.2
543
diff view generated by jsdifflib
New patch
1
From: Coiby Xu <coiby.xu@gmail.com>
1
2
3
Move the constants from hw/core/qdev-properties.c to
4
util/block-helpers.h so that knowledge of the min/max values is
5
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
8
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
9
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
10
Acked-by: Eduardo Habkost <ehabkost@redhat.com>
11
Message-id: 20200918080912.321299-5-coiby.xu@gmail.com
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
util/block-helpers.h | 19 +++++++++++++
15
hw/core/qdev-properties-system.c | 31 ++++-----------------
16
util/block-helpers.c | 46 ++++++++++++++++++++++++++++++++
17
util/meson.build | 1 +
18
4 files changed, 71 insertions(+), 26 deletions(-)
19
create mode 100644 util/block-helpers.h
20
create mode 100644 util/block-helpers.c
21
22
diff --git a/util/block-helpers.h b/util/block-helpers.h
23
new file mode 100644
24
index XXXXXXX..XXXXXXX
25
--- /dev/null
26
+++ b/util/block-helpers.h
27
@@ -XXX,XX +XXX,XX @@
28
+#ifndef BLOCK_HELPERS_H
29
+#define BLOCK_HELPERS_H
30
+
31
+#include "qemu/units.h"
32
+
33
+/* lower limit is sector size */
34
+#define MIN_BLOCK_SIZE INT64_C(512)
35
+#define MIN_BLOCK_SIZE_STR "512 B"
36
+/*
37
+ * upper limit is arbitrary, 2 MiB looks sufficient for all sensible uses, and
38
+ * matches qcow2 cluster size limit
39
+ */
40
+#define MAX_BLOCK_SIZE (2 * MiB)
41
+#define MAX_BLOCK_SIZE_STR "2 MiB"
42
+
43
+void check_block_size(const char *id, const char *name, int64_t value,
44
+ Error **errp);
45
+
46
+#endif /* BLOCK_HELPERS_H */
47
diff --git a/hw/core/qdev-properties-system.c b/hw/core/qdev-properties-system.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/hw/core/qdev-properties-system.c
50
+++ b/hw/core/qdev-properties-system.c
51
@@ -XXX,XX +XXX,XX @@
52
#include "sysemu/blockdev.h"
53
#include "net/net.h"
54
#include "hw/pci/pci.h"
55
+#include "util/block-helpers.h"
56
57
static bool check_prop_still_unset(DeviceState *dev, const char *name,
58
const void *old_val, const char *new_val,
59
@@ -XXX,XX +XXX,XX @@ const PropertyInfo qdev_prop_losttickpolicy = {
60
61
/* --- blocksize --- */
62
63
-/* lower limit is sector size */
64
-#define MIN_BLOCK_SIZE 512
65
-#define MIN_BLOCK_SIZE_STR "512 B"
66
-/*
67
- * upper limit is arbitrary, 2 MiB looks sufficient for all sensible uses, and
68
- * matches qcow2 cluster size limit
69
- */
70
-#define MAX_BLOCK_SIZE (2 * MiB)
71
-#define MAX_BLOCK_SIZE_STR "2 MiB"
72
-
73
static void set_blocksize(Object *obj, Visitor *v, const char *name,
74
void *opaque, Error **errp)
75
{
76
@@ -XXX,XX +XXX,XX @@ static void set_blocksize(Object *obj, Visitor *v, const char *name,
77
Property *prop = opaque;
78
uint32_t *ptr = qdev_get_prop_ptr(dev, prop);
79
uint64_t value;
80
+ Error *local_err = NULL;
81
82
if (dev->realized) {
83
qdev_prop_set_after_realize(dev, name, errp);
84
@@ -XXX,XX +XXX,XX @@ static void set_blocksize(Object *obj, Visitor *v, const char *name,
85
if (!visit_type_size(v, name, &value, errp)) {
86
return;
87
}
88
- /* value of 0 means "unset" */
89
- if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) {
90
- error_setg(errp,
91
- "Property %s.%s doesn't take value %" PRIu64
92
- " (minimum: " MIN_BLOCK_SIZE_STR
93
- ", maximum: " MAX_BLOCK_SIZE_STR ")",
94
- dev->id ? : "", name, value);
95
+ check_block_size(dev->id ? : "", name, value, &local_err);
96
+ if (local_err) {
97
+ error_propagate(errp, local_err);
98
return;
99
}
100
-
101
- /* We rely on power-of-2 blocksizes for bitmasks */
102
- if ((value & (value - 1)) != 0) {
103
- error_setg(errp,
104
- "Property %s.%s doesn't take value '%" PRId64 "', "
105
- "it's not a power of 2", dev->id ?: "", name, (int64_t)value);
106
- return;
107
- }
108
-
109
*ptr = value;
110
}
111
112
diff --git a/util/block-helpers.c b/util/block-helpers.c
113
new file mode 100644
114
index XXXXXXX..XXXXXXX
115
--- /dev/null
116
+++ b/util/block-helpers.c
117
@@ -XXX,XX +XXX,XX @@
118
+/*
119
+ * Block utility functions
120
+ *
121
+ * Copyright IBM, Corp. 2011
122
+ * Copyright (c) 2020 Coiby Xu <coiby.xu@gmail.com>
123
+ *
124
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
125
+ * See the COPYING file in the top-level directory.
126
+ */
127
+
128
+#include "qemu/osdep.h"
129
+#include "qapi/error.h"
130
+#include "qapi/qmp/qerror.h"
131
+#include "block-helpers.h"
132
+
133
+/**
134
+ * check_block_size:
135
+ * @id: The unique ID of the object
136
+ * @name: The name of the property being validated
137
+ * @value: The block size in bytes
138
+ * @errp: A pointer to an area to store an error
139
+ *
140
+ * This function checks that the block size meets the following conditions:
141
+ * 1. At least MIN_BLOCK_SIZE
142
+ * 2. No larger than MAX_BLOCK_SIZE
143
+ * 3. A power of 2
144
+ */
145
+void check_block_size(const char *id, const char *name, int64_t value,
146
+ Error **errp)
147
+{
148
+ /* value of 0 means "unset" */
149
+ if (value && (value < MIN_BLOCK_SIZE || value > MAX_BLOCK_SIZE)) {
150
+ error_setg(errp, QERR_PROPERTY_VALUE_OUT_OF_RANGE,
151
+ id, name, value, MIN_BLOCK_SIZE, MAX_BLOCK_SIZE);
152
+ return;
153
+ }
154
+
155
+ /* We rely on power-of-2 blocksizes for bitmasks */
156
+ if ((value & (value - 1)) != 0) {
157
+ error_setg(errp,
158
+ "Property %s.%s doesn't take value '%" PRId64
159
+ "', it's not a power of 2",
160
+ id, name, value);
161
+ return;
162
+ }
163
+}
164
diff --git a/util/meson.build b/util/meson.build
165
index XXXXXXX..XXXXXXX 100644
166
--- a/util/meson.build
167
+++ b/util/meson.build
168
@@ -XXX,XX +XXX,XX @@ if have_block
169
util_ss.add(files('nvdimm-utils.c'))
170
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
171
util_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-server.c'))
172
+ util_ss.add(files('block-helpers.c'))
173
util_ss.add(files('qemu-coroutine-sleep.c'))
174
util_ss.add(files('qemu-co-shared-resource.c'))
175
util_ss.add(files('thread-pool.c', 'qemu-timer.c'))
176
--
177
2.26.2
178
diff view generated by jsdifflib
New patch
1
From: Coiby Xu <coiby.xu@gmail.com>
1
2
3
By making use of libvhost-user, block device drive can be shared to
4
the connected vhost-user client. Only one client can connect to the
5
server one time.
6
7
Since vhost-user-server needs a block drive to be created first, delay
8
the creation of this object.
9
10
Suggested-by: Kevin Wolf <kwolf@redhat.com>
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
13
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
14
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
15
Message-id: 20200918080912.321299-6-coiby.xu@gmail.com
16
[Shorten "vhost_user_blk_server" string to "vhost_user_blk" to avoid the
17
following compiler warning:
18
../block/export/vhost-user-blk-server.c:178:50: error: ‘%s’ directive output truncated writing 21 bytes into a region of size 20 [-Werror=format-truncation=]
19
and fix "Invalid size %ld ..." ssize_t format string arguments for
20
32-bit hosts.
21
--Stefan]
22
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
23
---
24
block/export/vhost-user-blk-server.h | 36 ++
25
block/export/vhost-user-blk-server.c | 661 +++++++++++++++++++++++++++
26
softmmu/vl.c | 4 +
27
block/meson.build | 1 +
28
4 files changed, 702 insertions(+)
29
create mode 100644 block/export/vhost-user-blk-server.h
30
create mode 100644 block/export/vhost-user-blk-server.c
31
32
diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h
33
new file mode 100644
34
index XXXXXXX..XXXXXXX
35
--- /dev/null
36
+++ b/block/export/vhost-user-blk-server.h
37
@@ -XXX,XX +XXX,XX @@
38
+/*
39
+ * Sharing QEMU block devices via vhost-user protocal
40
+ *
41
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
42
+ * Copyright (c) 2020 Red Hat, Inc.
43
+ *
44
+ * This work is licensed under the terms of the GNU GPL, version 2 or
45
+ * later. See the COPYING file in the top-level directory.
46
+ */
47
+
48
+#ifndef VHOST_USER_BLK_SERVER_H
49
+#define VHOST_USER_BLK_SERVER_H
50
+#include "util/vhost-user-server.h"
51
+
52
+typedef struct VuBlockDev VuBlockDev;
53
+#define TYPE_VHOST_USER_BLK_SERVER "vhost-user-blk-server"
54
+#define VHOST_USER_BLK_SERVER(obj) \
55
+ OBJECT_CHECK(VuBlockDev, obj, TYPE_VHOST_USER_BLK_SERVER)
56
+
57
+/* vhost user block device */
58
+struct VuBlockDev {
59
+ Object parent_obj;
60
+ char *node_name;
61
+ SocketAddress *addr;
62
+ AioContext *ctx;
63
+ VuServer vu_server;
64
+ bool running;
65
+ uint32_t blk_size;
66
+ BlockBackend *backend;
67
+ QIOChannelSocket *sioc;
68
+ QTAILQ_ENTRY(VuBlockDev) next;
69
+ struct virtio_blk_config blkcfg;
70
+ bool writable;
71
+};
72
+
73
+#endif /* VHOST_USER_BLK_SERVER_H */
74
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
75
new file mode 100644
76
index XXXXXXX..XXXXXXX
77
--- /dev/null
78
+++ b/block/export/vhost-user-blk-server.c
79
@@ -XXX,XX +XXX,XX @@
80
+/*
81
+ * Sharing QEMU block devices via vhost-user protocal
82
+ *
83
+ * Parts of the code based on nbd/server.c.
84
+ *
85
+ * Copyright (c) Coiby Xu <coiby.xu@gmail.com>.
86
+ * Copyright (c) 2020 Red Hat, Inc.
87
+ *
88
+ * This work is licensed under the terms of the GNU GPL, version 2 or
89
+ * later. See the COPYING file in the top-level directory.
90
+ */
91
+#include "qemu/osdep.h"
92
+#include "block/block.h"
93
+#include "vhost-user-blk-server.h"
94
+#include "qapi/error.h"
95
+#include "qom/object_interfaces.h"
96
+#include "sysemu/block-backend.h"
97
+#include "util/block-helpers.h"
98
+
99
+enum {
100
+ VHOST_USER_BLK_MAX_QUEUES = 1,
101
+};
102
+struct virtio_blk_inhdr {
103
+ unsigned char status;
104
+};
105
+
106
+typedef struct VuBlockReq {
107
+ VuVirtqElement *elem;
108
+ int64_t sector_num;
109
+ size_t size;
110
+ struct virtio_blk_inhdr *in;
111
+ struct virtio_blk_outhdr out;
112
+ VuServer *server;
113
+ struct VuVirtq *vq;
114
+} VuBlockReq;
115
+
116
+static void vu_block_req_complete(VuBlockReq *req)
117
+{
118
+ VuDev *vu_dev = &req->server->vu_dev;
119
+
120
+ /* IO size with 1 extra status byte */
121
+ vu_queue_push(vu_dev, req->vq, req->elem, req->size + 1);
122
+ vu_queue_notify(vu_dev, req->vq);
123
+
124
+ if (req->elem) {
125
+ free(req->elem);
126
+ }
127
+
128
+ g_free(req);
129
+}
130
+
131
+static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
132
+{
133
+ return container_of(server, VuBlockDev, vu_server);
134
+}
135
+
136
+static int coroutine_fn
137
+vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
138
+ uint32_t iovcnt, uint32_t type)
139
+{
140
+ struct virtio_blk_discard_write_zeroes desc;
141
+ ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
142
+ if (unlikely(size != sizeof(desc))) {
143
+ error_report("Invalid size %zd, expect %zu", size, sizeof(desc));
144
+ return -EINVAL;
145
+ }
146
+
147
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
148
+ uint64_t range[2] = { le64_to_cpu(desc.sector) << 9,
149
+ le32_to_cpu(desc.num_sectors) << 9 };
150
+ if (type == VIRTIO_BLK_T_DISCARD) {
151
+ if (blk_co_pdiscard(vdev_blk->backend, range[0], range[1]) == 0) {
152
+ return 0;
153
+ }
154
+ } else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
155
+ if (blk_co_pwrite_zeroes(vdev_blk->backend,
156
+ range[0], range[1], 0) == 0) {
157
+ return 0;
158
+ }
159
+ }
160
+
161
+ return -EINVAL;
162
+}
163
+
164
+static void coroutine_fn vu_block_flush(VuBlockReq *req)
165
+{
166
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
167
+ BlockBackend *backend = vdev_blk->backend;
168
+ blk_co_flush(backend);
169
+}
170
+
171
+struct req_data {
172
+ VuServer *server;
173
+ VuVirtq *vq;
174
+ VuVirtqElement *elem;
175
+};
176
+
177
+static void coroutine_fn vu_block_virtio_process_req(void *opaque)
178
+{
179
+ struct req_data *data = opaque;
180
+ VuServer *server = data->server;
181
+ VuVirtq *vq = data->vq;
182
+ VuVirtqElement *elem = data->elem;
183
+ uint32_t type;
184
+ VuBlockReq *req;
185
+
186
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
187
+ BlockBackend *backend = vdev_blk->backend;
188
+
189
+ struct iovec *in_iov = elem->in_sg;
190
+ struct iovec *out_iov = elem->out_sg;
191
+ unsigned in_num = elem->in_num;
192
+ unsigned out_num = elem->out_num;
193
+ /* refer to hw/block/virtio_blk.c */
194
+ if (elem->out_num < 1 || elem->in_num < 1) {
195
+ error_report("virtio-blk request missing headers");
196
+ free(elem);
197
+ return;
198
+ }
199
+
200
+ req = g_new0(VuBlockReq, 1);
201
+ req->server = server;
202
+ req->vq = vq;
203
+ req->elem = elem;
204
+
205
+ if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
206
+ sizeof(req->out)) != sizeof(req->out))) {
207
+ error_report("virtio-blk request outhdr too short");
208
+ goto err;
209
+ }
210
+
211
+ iov_discard_front(&out_iov, &out_num, sizeof(req->out));
212
+
213
+ if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) {
214
+ error_report("virtio-blk request inhdr too short");
215
+ goto err;
216
+ }
217
+
218
+ /* We always touch the last byte, so just see how big in_iov is. */
219
+ req->in = (void *)in_iov[in_num - 1].iov_base
220
+ + in_iov[in_num - 1].iov_len
221
+ - sizeof(struct virtio_blk_inhdr);
222
+ iov_discard_back(in_iov, &in_num, sizeof(struct virtio_blk_inhdr));
223
+
224
+ type = le32_to_cpu(req->out.type);
225
+ switch (type & ~VIRTIO_BLK_T_BARRIER) {
226
+ case VIRTIO_BLK_T_IN:
227
+ case VIRTIO_BLK_T_OUT: {
228
+ ssize_t ret = 0;
229
+ bool is_write = type & VIRTIO_BLK_T_OUT;
230
+ req->sector_num = le64_to_cpu(req->out.sector);
231
+
232
+ int64_t offset = req->sector_num * vdev_blk->blk_size;
233
+ QEMUIOVector qiov;
234
+ if (is_write) {
235
+ qemu_iovec_init_external(&qiov, out_iov, out_num);
236
+ ret = blk_co_pwritev(backend, offset, qiov.size,
237
+ &qiov, 0);
238
+ } else {
239
+ qemu_iovec_init_external(&qiov, in_iov, in_num);
240
+ ret = blk_co_preadv(backend, offset, qiov.size,
241
+ &qiov, 0);
242
+ }
243
+ if (ret >= 0) {
244
+ req->in->status = VIRTIO_BLK_S_OK;
245
+ } else {
246
+ req->in->status = VIRTIO_BLK_S_IOERR;
247
+ }
248
+ break;
249
+ }
250
+ case VIRTIO_BLK_T_FLUSH:
251
+ vu_block_flush(req);
252
+ req->in->status = VIRTIO_BLK_S_OK;
253
+ break;
254
+ case VIRTIO_BLK_T_GET_ID: {
255
+ size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
256
+ VIRTIO_BLK_ID_BYTES);
257
+ snprintf(elem->in_sg[0].iov_base, size, "%s", "vhost_user_blk");
258
+ req->in->status = VIRTIO_BLK_S_OK;
259
+ req->size = elem->in_sg[0].iov_len;
260
+ break;
261
+ }
262
+ case VIRTIO_BLK_T_DISCARD:
263
+ case VIRTIO_BLK_T_WRITE_ZEROES: {
264
+ int rc;
265
+ rc = vu_block_discard_write_zeroes(req, &elem->out_sg[1],
266
+ out_num, type);
267
+ if (rc == 0) {
268
+ req->in->status = VIRTIO_BLK_S_OK;
269
+ } else {
270
+ req->in->status = VIRTIO_BLK_S_IOERR;
271
+ }
272
+ break;
273
+ }
274
+ default:
275
+ req->in->status = VIRTIO_BLK_S_UNSUPP;
276
+ break;
277
+ }
278
+
279
+ vu_block_req_complete(req);
280
+ return;
281
+
282
+err:
283
+ free(elem);
284
+ g_free(req);
285
+ return;
286
+}
287
+
288
+static void vu_block_process_vq(VuDev *vu_dev, int idx)
289
+{
290
+ VuServer *server;
291
+ VuVirtq *vq;
292
+ struct req_data *req_data;
293
+
294
+ server = container_of(vu_dev, VuServer, vu_dev);
295
+ assert(server);
296
+
297
+ vq = vu_get_queue(vu_dev, idx);
298
+ assert(vq);
299
+ VuVirtqElement *elem;
300
+ while (1) {
301
+ elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) +
302
+ sizeof(VuBlockReq));
303
+ if (elem) {
304
+ req_data = g_new0(struct req_data, 1);
305
+ req_data->server = server;
306
+ req_data->vq = vq;
307
+ req_data->elem = elem;
308
+ Coroutine *co = qemu_coroutine_create(vu_block_virtio_process_req,
309
+ req_data);
310
+ aio_co_enter(server->ioc->ctx, co);
311
+ } else {
312
+ break;
313
+ }
314
+ }
315
+}
316
+
317
+static void vu_block_queue_set_started(VuDev *vu_dev, int idx, bool started)
318
+{
319
+ VuVirtq *vq;
320
+
321
+ assert(vu_dev);
322
+
323
+ vq = vu_get_queue(vu_dev, idx);
324
+ vu_set_queue_handler(vu_dev, vq, started ? vu_block_process_vq : NULL);
325
+}
326
+
327
+static uint64_t vu_block_get_features(VuDev *dev)
328
+{
329
+ uint64_t features;
330
+ VuServer *server = container_of(dev, VuServer, vu_dev);
331
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
332
+ features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
333
+ 1ull << VIRTIO_BLK_F_SEG_MAX |
334
+ 1ull << VIRTIO_BLK_F_TOPOLOGY |
335
+ 1ull << VIRTIO_BLK_F_BLK_SIZE |
336
+ 1ull << VIRTIO_BLK_F_FLUSH |
337
+ 1ull << VIRTIO_BLK_F_DISCARD |
338
+ 1ull << VIRTIO_BLK_F_WRITE_ZEROES |
339
+ 1ull << VIRTIO_BLK_F_CONFIG_WCE |
340
+ 1ull << VIRTIO_F_VERSION_1 |
341
+ 1ull << VIRTIO_RING_F_INDIRECT_DESC |
342
+ 1ull << VIRTIO_RING_F_EVENT_IDX |
343
+ 1ull << VHOST_USER_F_PROTOCOL_FEATURES;
344
+
345
+ if (!vdev_blk->writable) {
346
+ features |= 1ull << VIRTIO_BLK_F_RO;
347
+ }
348
+
349
+ return features;
350
+}
351
+
352
+static uint64_t vu_block_get_protocol_features(VuDev *dev)
353
+{
354
+ return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
355
+ 1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
356
+}
357
+
358
+static int
359
+vu_block_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
360
+{
361
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
362
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
363
+ memcpy(config, &vdev_blk->blkcfg, len);
364
+
365
+ return 0;
366
+}
367
+
368
+static int
369
+vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
370
+ uint32_t offset, uint32_t size, uint32_t flags)
371
+{
372
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
373
+ VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
374
+ uint8_t wce;
375
+
376
+ /* don't support live migration */
377
+ if (flags != VHOST_SET_CONFIG_TYPE_MASTER) {
378
+ return -EINVAL;
379
+ }
380
+
381
+ if (offset != offsetof(struct virtio_blk_config, wce) ||
382
+ size != 1) {
383
+ return -EINVAL;
384
+ }
385
+
386
+ wce = *data;
387
+ vdev_blk->blkcfg.wce = wce;
388
+ blk_set_enable_write_cache(vdev_blk->backend, wce);
389
+ return 0;
390
+}
391
+
392
+/*
393
+ * When the client disconnects, it sends a VHOST_USER_NONE request
394
+ * and vu_process_message will simple call exit which cause the VM
395
+ * to exit abruptly.
396
+ * To avoid this issue, process VHOST_USER_NONE request ahead
397
+ * of vu_process_message.
398
+ *
399
+ */
400
+static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
401
+{
402
+ if (vmsg->request == VHOST_USER_NONE) {
403
+ dev->panic(dev, "disconnect");
404
+ return true;
405
+ }
406
+ return false;
407
+}
408
+
409
+static const VuDevIface vu_block_iface = {
410
+ .get_features = vu_block_get_features,
411
+ .queue_set_started = vu_block_queue_set_started,
412
+ .get_protocol_features = vu_block_get_protocol_features,
413
+ .get_config = vu_block_get_config,
414
+ .set_config = vu_block_set_config,
415
+ .process_msg = vu_block_process_msg,
416
+};
417
+
418
+static void blk_aio_attached(AioContext *ctx, void *opaque)
419
+{
420
+ VuBlockDev *vub_dev = opaque;
421
+ aio_context_acquire(ctx);
422
+ vhost_user_server_set_aio_context(&vub_dev->vu_server, ctx);
423
+ aio_context_release(ctx);
424
+}
425
+
426
+static void blk_aio_detach(void *opaque)
427
+{
428
+ VuBlockDev *vub_dev = opaque;
429
+ AioContext *ctx = vub_dev->vu_server.ctx;
430
+ aio_context_acquire(ctx);
431
+ vhost_user_server_set_aio_context(&vub_dev->vu_server, NULL);
432
+ aio_context_release(ctx);
433
+}
434
+
435
+static void
436
+vu_block_initialize_config(BlockDriverState *bs,
437
+ struct virtio_blk_config *config, uint32_t blk_size)
438
+{
439
+ config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
440
+ config->blk_size = blk_size;
441
+ config->size_max = 0;
442
+ config->seg_max = 128 - 2;
443
+ config->min_io_size = 1;
444
+ config->opt_io_size = 1;
445
+ config->num_queues = VHOST_USER_BLK_MAX_QUEUES;
446
+ config->max_discard_sectors = 32768;
447
+ config->max_discard_seg = 1;
448
+ config->discard_sector_alignment = config->blk_size >> 9;
449
+ config->max_write_zeroes_sectors = 32768;
450
+ config->max_write_zeroes_seg = 1;
451
+}
452
+
453
+static VuBlockDev *vu_block_init(VuBlockDev *vu_block_device, Error **errp)
454
+{
455
+
456
+ BlockBackend *blk;
457
+ Error *local_error = NULL;
458
+ const char *node_name = vu_block_device->node_name;
459
+ bool writable = vu_block_device->writable;
460
+ uint64_t perm = BLK_PERM_CONSISTENT_READ;
461
+ int ret;
462
+
463
+ AioContext *ctx;
464
+
465
+ BlockDriverState *bs = bdrv_lookup_bs(node_name, node_name, &local_error);
466
+
467
+ if (!bs) {
468
+ error_propagate(errp, local_error);
469
+ return NULL;
470
+ }
471
+
472
+ if (bdrv_is_read_only(bs)) {
473
+ writable = false;
474
+ }
475
+
476
+ if (writable) {
477
+ perm |= BLK_PERM_WRITE;
478
+ }
479
+
480
+ ctx = bdrv_get_aio_context(bs);
481
+ aio_context_acquire(ctx);
482
+ bdrv_invalidate_cache(bs, NULL);
483
+ aio_context_release(ctx);
484
+
485
+ /*
486
+ * Don't allow resize while the vhost user server is running,
487
+ * otherwise we don't care what happens with the node.
488
+ */
489
+ blk = blk_new(bdrv_get_aio_context(bs), perm,
490
+ BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
491
+ BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
492
+ ret = blk_insert_bs(blk, bs, errp);
493
+
494
+ if (ret < 0) {
495
+ goto fail;
496
+ }
497
+
498
+ blk_set_enable_write_cache(blk, false);
499
+
500
+ blk_set_allow_aio_context_change(blk, true);
501
+
502
+ vu_block_device->blkcfg.wce = 0;
503
+ vu_block_device->backend = blk;
504
+ if (!vu_block_device->blk_size) {
505
+ vu_block_device->blk_size = BDRV_SECTOR_SIZE;
506
+ }
507
+ vu_block_device->blkcfg.blk_size = vu_block_device->blk_size;
508
+ blk_set_guest_block_size(blk, vu_block_device->blk_size);
509
+ vu_block_initialize_config(bs, &vu_block_device->blkcfg,
510
+ vu_block_device->blk_size);
511
+ return vu_block_device;
512
+
513
+fail:
514
+ blk_unref(blk);
515
+ return NULL;
516
+}
517
+
518
+static void vu_block_deinit(VuBlockDev *vu_block_device)
519
+{
520
+ if (vu_block_device->backend) {
521
+ blk_remove_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
522
+ blk_aio_detach, vu_block_device);
523
+ }
524
+
525
+ blk_unref(vu_block_device->backend);
526
+}
527
+
528
+static void vhost_user_blk_server_stop(VuBlockDev *vu_block_device)
529
+{
530
+ vhost_user_server_stop(&vu_block_device->vu_server);
531
+ vu_block_deinit(vu_block_device);
532
+}
533
+
534
+static void vhost_user_blk_server_start(VuBlockDev *vu_block_device,
535
+ Error **errp)
536
+{
537
+ AioContext *ctx;
538
+ SocketAddress *addr = vu_block_device->addr;
539
+
540
+ if (!vu_block_init(vu_block_device, errp)) {
541
+ return;
542
+ }
543
+
544
+ ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
545
+
546
+ if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx,
547
+ VHOST_USER_BLK_MAX_QUEUES,
548
+ NULL, &vu_block_iface,
549
+ errp)) {
550
+ goto error;
551
+ }
552
+
553
+ blk_add_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
554
+ blk_aio_detach, vu_block_device);
555
+ vu_block_device->running = true;
556
+ return;
557
+
558
+ error:
559
+ vu_block_deinit(vu_block_device);
560
+}
561
+
562
+static bool vu_prop_modifiable(VuBlockDev *vus, Error **errp)
563
+{
564
+ if (vus->running) {
565
+ error_setg(errp, "The property can't be modified "
566
+ "while the server is running");
567
+ return false;
568
+ }
569
+ return true;
570
+}
571
+
572
+static void vu_set_node_name(Object *obj, const char *value, Error **errp)
573
+{
574
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
575
+
576
+ if (!vu_prop_modifiable(vus, errp)) {
577
+ return;
578
+ }
579
+
580
+ if (vus->node_name) {
581
+ g_free(vus->node_name);
582
+ }
583
+
584
+ vus->node_name = g_strdup(value);
585
+}
586
+
587
+static char *vu_get_node_name(Object *obj, Error **errp)
588
+{
589
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
590
+ return g_strdup(vus->node_name);
591
+}
592
+
593
+static void free_socket_addr(SocketAddress *addr)
594
+{
595
+ g_free(addr->u.q_unix.path);
596
+ g_free(addr);
597
+}
598
+
599
+static void vu_set_unix_socket(Object *obj, const char *value,
600
+ Error **errp)
601
+{
602
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
603
+
604
+ if (!vu_prop_modifiable(vus, errp)) {
605
+ return;
606
+ }
607
+
608
+ if (vus->addr) {
609
+ free_socket_addr(vus->addr);
610
+ }
611
+
612
+ SocketAddress *addr = g_new0(SocketAddress, 1);
613
+ addr->type = SOCKET_ADDRESS_TYPE_UNIX;
614
+ addr->u.q_unix.path = g_strdup(value);
615
+ vus->addr = addr;
616
+}
617
+
618
+static char *vu_get_unix_socket(Object *obj, Error **errp)
619
+{
620
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
621
+ return g_strdup(vus->addr->u.q_unix.path);
622
+}
623
+
624
+static bool vu_get_block_writable(Object *obj, Error **errp)
625
+{
626
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
627
+ return vus->writable;
628
+}
629
+
630
+static void vu_set_block_writable(Object *obj, bool value, Error **errp)
631
+{
632
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
633
+
634
+ if (!vu_prop_modifiable(vus, errp)) {
635
+ return;
636
+ }
637
+
638
+ vus->writable = value;
639
+}
640
+
641
+static void vu_get_blk_size(Object *obj, Visitor *v, const char *name,
642
+ void *opaque, Error **errp)
643
+{
644
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
645
+ uint32_t value = vus->blk_size;
646
+
647
+ visit_type_uint32(v, name, &value, errp);
648
+}
649
+
650
+static void vu_set_blk_size(Object *obj, Visitor *v, const char *name,
651
+ void *opaque, Error **errp)
652
+{
653
+ VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
654
+
655
+ Error *local_err = NULL;
656
+ uint32_t value;
657
+
658
+ if (!vu_prop_modifiable(vus, errp)) {
659
+ return;
660
+ }
661
+
662
+ visit_type_uint32(v, name, &value, &local_err);
663
+ if (local_err) {
664
+ goto out;
665
+ }
666
+
667
+ check_block_size(object_get_typename(obj), name, value, &local_err);
668
+ if (local_err) {
669
+ goto out;
670
+ }
671
+
672
+ vus->blk_size = value;
673
+
674
+out:
675
+ error_propagate(errp, local_err);
676
+}
677
+
678
+static void vhost_user_blk_server_instance_finalize(Object *obj)
679
+{
680
+ VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
681
+
682
+ vhost_user_blk_server_stop(vub);
683
+
684
+ /*
685
+ * Unlike object_property_add_str, object_class_property_add_str
686
+ * doesn't have a release method. Thus manual memory freeing is
687
+ * needed.
688
+ */
689
+ free_socket_addr(vub->addr);
690
+ g_free(vub->node_name);
691
+}
692
+
693
+static void vhost_user_blk_server_complete(UserCreatable *obj, Error **errp)
694
+{
695
+ VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
696
+
697
+ vhost_user_blk_server_start(vub, errp);
698
+}
699
+
700
+static void vhost_user_blk_server_class_init(ObjectClass *klass,
701
+ void *class_data)
702
+{
703
+ UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
704
+ ucc->complete = vhost_user_blk_server_complete;
705
+
706
+ object_class_property_add_bool(klass, "writable",
707
+ vu_get_block_writable,
708
+ vu_set_block_writable);
709
+
710
+ object_class_property_add_str(klass, "node-name",
711
+ vu_get_node_name,
712
+ vu_set_node_name);
713
+
714
+ object_class_property_add_str(klass, "unix-socket",
715
+ vu_get_unix_socket,
716
+ vu_set_unix_socket);
717
+
718
+ object_class_property_add(klass, "logical-block-size", "uint32",
719
+ vu_get_blk_size, vu_set_blk_size,
720
+ NULL, NULL);
721
+}
722
+
723
+static const TypeInfo vhost_user_blk_server_info = {
724
+ .name = TYPE_VHOST_USER_BLK_SERVER,
725
+ .parent = TYPE_OBJECT,
726
+ .instance_size = sizeof(VuBlockDev),
727
+ .instance_finalize = vhost_user_blk_server_instance_finalize,
728
+ .class_init = vhost_user_blk_server_class_init,
729
+ .interfaces = (InterfaceInfo[]) {
730
+ {TYPE_USER_CREATABLE},
731
+ {}
732
+ },
733
+};
734
+
735
+static void vhost_user_blk_server_register_types(void)
736
+{
737
+ type_register_static(&vhost_user_blk_server_info);
738
+}
739
+
740
+type_init(vhost_user_blk_server_register_types)
741
diff --git a/softmmu/vl.c b/softmmu/vl.c
742
index XXXXXXX..XXXXXXX 100644
743
--- a/softmmu/vl.c
744
+++ b/softmmu/vl.c
745
@@ -XXX,XX +XXX,XX @@ static bool object_create_initial(const char *type, QemuOpts *opts)
746
}
747
#endif
748
749
+ /* Reason: vhost-user-blk-server property "node-name" */
750
+ if (g_str_equal(type, "vhost-user-blk-server")) {
751
+ return false;
752
+ }
753
/*
754
* Reason: filter-* property "netdev" etc.
755
*/
756
diff --git a/block/meson.build b/block/meson.build
757
index XXXXXXX..XXXXXXX 100644
758
--- a/block/meson.build
759
+++ b/block/meson.build
760
@@ -XXX,XX +XXX,XX @@ block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c')
761
block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit])
762
block_ss.add(when: 'CONFIG_LIBISCSI', if_true: files('iscsi-opts.c'))
763
block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
764
+block_ss.add(when: 'CONFIG_LINUX', if_true: files('export/vhost-user-blk-server.c', '../contrib/libvhost-user/libvhost-user.c'))
765
block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c'))
766
block_ss.add(when: 'CONFIG_SHEEPDOG', if_true: files('sheepdog.c'))
767
block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c'))
768
--
769
2.26.2
770
diff view generated by jsdifflib
New patch
1
From: Coiby Xu <coiby.xu@gmail.com>
1
2
3
Suggested-by: Stefano Garzarella <sgarzare@redhat.com>
4
Signed-off-by: Coiby Xu <coiby.xu@gmail.com>
5
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
7
Message-id: 20200918080912.321299-8-coiby.xu@gmail.com
8
[Removed reference to vhost-user-blk-test.c, it will be sent in a
9
separate pull request.
10
--Stefan]
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
MAINTAINERS | 7 +++++++
14
1 file changed, 7 insertions(+)
15
16
diff --git a/MAINTAINERS b/MAINTAINERS
17
index XXXXXXX..XXXXXXX 100644
18
--- a/MAINTAINERS
19
+++ b/MAINTAINERS
20
@@ -XXX,XX +XXX,XX @@ L: qemu-block@nongnu.org
21
S: Supported
22
F: tests/image-fuzzer/
23
24
+Vhost-user block device backend server
25
+M: Coiby Xu <Coiby.Xu@gmail.com>
26
+S: Maintained
27
+F: block/export/vhost-user-blk-server.c
28
+F: util/vhost-user-server.c
29
+F: tests/qtest/libqos/vhost-user-blk.c
30
+
31
Replication
32
M: Wen Congyang <wencongyang2@huawei.com>
33
M: Xie Changlong <xiechanglong.d@gmail.com>
34
--
35
2.26.2
36
diff view generated by jsdifflib
New patch
1
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2
Message-id: 20200924151549.913737-3-stefanha@redhat.com
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
---
5
util/vhost-user-server.c | 2 +-
6
1 file changed, 1 insertion(+), 1 deletion(-)
1
7
8
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
9
index XXXXXXX..XXXXXXX 100644
10
--- a/util/vhost-user-server.c
11
+++ b/util/vhost-user-server.c
12
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
13
return false;
14
}
15
16
- /* zero out unspecified fileds */
17
+ /* zero out unspecified fields */
18
*server = (VuServer) {
19
.listener = listener,
20
.vu_iface = vu_iface,
21
--
22
2.26.2
23
diff view generated by jsdifflib
1
Tests should place their files into the test directory. This includes
1
We already have access to the value with the correct type (ioc and sioc
2
Unix sockets. 205 currently fails to do so, which prevents it from
2
are the same QIOChannel).
3
being run concurrently.
4
3
5
Signed-off-by: Max Reitz <mreitz@redhat.com>
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20190618210238.9524-1-mreitz@redhat.com
5
Message-id: 20200924151549.913737-4-stefanha@redhat.com
7
Reviewed-by: Eric Blake <eblake@redhat.com>
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
---
7
---
10
tests/qemu-iotests/205 | 2 +-
8
util/vhost-user-server.c | 2 +-
11
1 file changed, 1 insertion(+), 1 deletion(-)
9
1 file changed, 1 insertion(+), 1 deletion(-)
12
10
13
diff --git a/tests/qemu-iotests/205 b/tests/qemu-iotests/205
11
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
14
index XXXXXXX..XXXXXXX 100755
12
index XXXXXXX..XXXXXXX 100644
15
--- a/tests/qemu-iotests/205
13
--- a/util/vhost-user-server.c
16
+++ b/tests/qemu-iotests/205
14
+++ b/util/vhost-user-server.c
17
@@ -XXX,XX +XXX,XX @@ import iotests
15
@@ -XXX,XX +XXX,XX @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
18
import time
16
server->ioc = QIO_CHANNEL(sioc);
19
from iotests import qemu_img_create, qemu_io, filter_qemu_io, QemuIoInteractive
17
object_ref(OBJECT(server->ioc));
20
18
qio_channel_attach_aio_context(server->ioc, server->ctx);
21
-nbd_sock = 'nbd_sock'
19
- qio_channel_set_blocking(QIO_CHANNEL(server->sioc), false, NULL);
22
+nbd_sock = os.path.join(iotests.test_dir, 'nbd_sock')
20
+ qio_channel_set_blocking(server->ioc, false, NULL);
23
nbd_uri = 'nbd+unix:///exp?socket=' + nbd_sock
21
vu_client_start(server);
24
disk = os.path.join(iotests.test_dir, 'disk')
22
}
25
23
26
--
24
--
27
2.21.0
25
2.26.2
28
26
29
diff view generated by jsdifflib
New patch
1
Explicitly deleting watches is not necessary since libvhost-user calls
2
remove_watch() during vu_deinit(). Add an assertion to check this
3
though.
1
4
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200924151549.913737-5-stefanha@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
util/vhost-user-server.c | 19 ++++---------------
10
1 file changed, 4 insertions(+), 15 deletions(-)
11
12
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/util/vhost-user-server.c
15
+++ b/util/vhost-user-server.c
16
@@ -XXX,XX +XXX,XX @@ static void close_client(VuServer *server)
17
/* When this is set vu_client_trip will stop new processing vhost-user message */
18
server->sioc = NULL;
19
20
- VuFdWatch *vu_fd_watch, *next;
21
- QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
22
- aio_set_fd_handler(server->ioc->ctx, vu_fd_watch->fd, true, NULL,
23
- NULL, NULL, NULL);
24
- }
25
-
26
- while (!QTAILQ_EMPTY(&server->vu_fd_watches)) {
27
- QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
28
- if (!vu_fd_watch->processing) {
29
- QTAILQ_REMOVE(&server->vu_fd_watches, vu_fd_watch, next);
30
- g_free(vu_fd_watch);
31
- }
32
- }
33
- }
34
-
35
while (server->processing_msg) {
36
if (server->ioc->read_coroutine) {
37
server->ioc->read_coroutine = NULL;
38
@@ -XXX,XX +XXX,XX @@ static void close_client(VuServer *server)
39
}
40
41
vu_deinit(&server->vu_dev);
42
+
43
+ /* vu_deinit() should have called remove_watch() */
44
+ assert(QTAILQ_EMPTY(&server->vu_fd_watches));
45
+
46
object_unref(OBJECT(sioc));
47
object_unref(OBJECT(server->ioc));
48
}
49
--
50
2.26.2
51
diff view generated by jsdifflib
New patch
1
Only one struct is needed per request. Drop req_data and the separate
2
VuBlockReq instance. Instead let vu_queue_pop() allocate everything at
3
once.
1
4
5
This fixes the req_data memory leak in vu_block_virtio_process_req().
6
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20200924151549.913737-6-stefanha@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
block/export/vhost-user-blk-server.c | 68 +++++++++-------------------
12
1 file changed, 21 insertions(+), 47 deletions(-)
13
14
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/block/export/vhost-user-blk-server.c
17
+++ b/block/export/vhost-user-blk-server.c
18
@@ -XXX,XX +XXX,XX @@ struct virtio_blk_inhdr {
19
};
20
21
typedef struct VuBlockReq {
22
- VuVirtqElement *elem;
23
+ VuVirtqElement elem;
24
int64_t sector_num;
25
size_t size;
26
struct virtio_blk_inhdr *in;
27
@@ -XXX,XX +XXX,XX @@ static void vu_block_req_complete(VuBlockReq *req)
28
VuDev *vu_dev = &req->server->vu_dev;
29
30
/* IO size with 1 extra status byte */
31
- vu_queue_push(vu_dev, req->vq, req->elem, req->size + 1);
32
+ vu_queue_push(vu_dev, req->vq, &req->elem, req->size + 1);
33
vu_queue_notify(vu_dev, req->vq);
34
35
- if (req->elem) {
36
- free(req->elem);
37
- }
38
-
39
- g_free(req);
40
+ free(req);
41
}
42
43
static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
44
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_flush(VuBlockReq *req)
45
blk_co_flush(backend);
46
}
47
48
-struct req_data {
49
- VuServer *server;
50
- VuVirtq *vq;
51
- VuVirtqElement *elem;
52
-};
53
-
54
static void coroutine_fn vu_block_virtio_process_req(void *opaque)
55
{
56
- struct req_data *data = opaque;
57
- VuServer *server = data->server;
58
- VuVirtq *vq = data->vq;
59
- VuVirtqElement *elem = data->elem;
60
+ VuBlockReq *req = opaque;
61
+ VuServer *server = req->server;
62
+ VuVirtqElement *elem = &req->elem;
63
uint32_t type;
64
- VuBlockReq *req;
65
66
VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
67
BlockBackend *backend = vdev_blk->backend;
68
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
69
struct iovec *out_iov = elem->out_sg;
70
unsigned in_num = elem->in_num;
71
unsigned out_num = elem->out_num;
72
+
73
/* refer to hw/block/virtio_blk.c */
74
if (elem->out_num < 1 || elem->in_num < 1) {
75
error_report("virtio-blk request missing headers");
76
- free(elem);
77
- return;
78
+ goto err;
79
}
80
81
- req = g_new0(VuBlockReq, 1);
82
- req->server = server;
83
- req->vq = vq;
84
- req->elem = elem;
85
-
86
if (unlikely(iov_to_buf(out_iov, out_num, 0, &req->out,
87
sizeof(req->out)) != sizeof(req->out))) {
88
error_report("virtio-blk request outhdr too short");
89
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
90
91
err:
92
free(elem);
93
- g_free(req);
94
- return;
95
}
96
97
static void vu_block_process_vq(VuDev *vu_dev, int idx)
98
{
99
- VuServer *server;
100
- VuVirtq *vq;
101
- struct req_data *req_data;
102
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
103
+ VuVirtq *vq = vu_get_queue(vu_dev, idx);
104
105
- server = container_of(vu_dev, VuServer, vu_dev);
106
- assert(server);
107
-
108
- vq = vu_get_queue(vu_dev, idx);
109
- assert(vq);
110
- VuVirtqElement *elem;
111
while (1) {
112
- elem = vu_queue_pop(vu_dev, vq, sizeof(VuVirtqElement) +
113
- sizeof(VuBlockReq));
114
- if (elem) {
115
- req_data = g_new0(struct req_data, 1);
116
- req_data->server = server;
117
- req_data->vq = vq;
118
- req_data->elem = elem;
119
- Coroutine *co = qemu_coroutine_create(vu_block_virtio_process_req,
120
- req_data);
121
- aio_co_enter(server->ioc->ctx, co);
122
- } else {
123
+ VuBlockReq *req;
124
+
125
+ req = vu_queue_pop(vu_dev, vq, sizeof(VuBlockReq));
126
+ if (!req) {
127
break;
128
}
129
+
130
+ req->server = server;
131
+ req->vq = vq;
132
+
133
+ Coroutine *co =
134
+ qemu_coroutine_create(vu_block_virtio_process_req, req);
135
+ qemu_coroutine_enter(co);
136
}
137
}
138
139
--
140
2.26.2
141
diff view generated by jsdifflib
New patch
1
The device panic notifier callback is not used. Drop it.
1
2
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Message-id: 20200924151549.913737-7-stefanha@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
util/vhost-user-server.h | 3 ---
8
block/export/vhost-user-blk-server.c | 3 +--
9
util/vhost-user-server.c | 6 ------
10
3 files changed, 1 insertion(+), 11 deletions(-)
11
12
diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
13
index XXXXXXX..XXXXXXX 100644
14
--- a/util/vhost-user-server.h
15
+++ b/util/vhost-user-server.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct VuFdWatch {
17
} VuFdWatch;
18
19
typedef struct VuServer VuServer;
20
-typedef void DevicePanicNotifierFn(VuServer *server);
21
22
struct VuServer {
23
QIONetListener *listener;
24
AioContext *ctx;
25
- DevicePanicNotifierFn *device_panic_notifier;
26
int max_queues;
27
const VuDevIface *vu_iface;
28
VuDev vu_dev;
29
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
30
SocketAddress *unix_socket,
31
AioContext *ctx,
32
uint16_t max_queues,
33
- DevicePanicNotifierFn *device_panic_notifier,
34
const VuDevIface *vu_iface,
35
Error **errp);
36
37
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
38
index XXXXXXX..XXXXXXX 100644
39
--- a/block/export/vhost-user-blk-server.c
40
+++ b/block/export/vhost-user-blk-server.c
41
@@ -XXX,XX +XXX,XX @@ static void vhost_user_blk_server_start(VuBlockDev *vu_block_device,
42
ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
43
44
if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx,
45
- VHOST_USER_BLK_MAX_QUEUES,
46
- NULL, &vu_block_iface,
47
+ VHOST_USER_BLK_MAX_QUEUES, &vu_block_iface,
48
errp)) {
49
goto error;
50
}
51
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
52
index XXXXXXX..XXXXXXX 100644
53
--- a/util/vhost-user-server.c
54
+++ b/util/vhost-user-server.c
55
@@ -XXX,XX +XXX,XX @@ static void panic_cb(VuDev *vu_dev, const char *buf)
56
close_client(server);
57
}
58
59
- if (server->device_panic_notifier) {
60
- server->device_panic_notifier(server);
61
- }
62
-
63
/*
64
* Set the callback function for network listener so another
65
* vhost-user client can connect to this server
66
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
67
SocketAddress *socket_addr,
68
AioContext *ctx,
69
uint16_t max_queues,
70
- DevicePanicNotifierFn *device_panic_notifier,
71
const VuDevIface *vu_iface,
72
Error **errp)
73
{
74
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
75
.vu_iface = vu_iface,
76
.max_queues = max_queues,
77
.ctx = ctx,
78
- .device_panic_notifier = device_panic_notifier,
79
};
80
81
qio_net_listener_set_name(server->listener, "vhost-user-backend-listener");
82
--
83
2.26.2
84
diff view generated by jsdifflib
New patch
1
fds[] is leaked when qio_channel_readv_full() fails.
1
2
3
Use vmsg->fds[] instead of keeping a local fds[] array. Then we can
4
reuse goto fail to clean up fds. vmsg->fd_num must be zeroed before the
5
loop to make this safe.
6
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20200924151549.913737-8-stefanha@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
util/vhost-user-server.c | 50 ++++++++++++++++++----------------------
12
1 file changed, 23 insertions(+), 27 deletions(-)
13
14
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/util/vhost-user-server.c
17
+++ b/util/vhost-user-server.c
18
@@ -XXX,XX +XXX,XX @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
19
};
20
int rc, read_bytes = 0;
21
Error *local_err = NULL;
22
- /*
23
- * Store fds/nfds returned from qio_channel_readv_full into
24
- * temporary variables.
25
- *
26
- * VhostUserMsg is a packed structure, gcc will complain about passing
27
- * pointer to a packed structure member if we pass &VhostUserMsg.fd_num
28
- * and &VhostUserMsg.fds directly when calling qio_channel_readv_full,
29
- * thus two temporary variables nfds and fds are used here.
30
- */
31
- size_t nfds = 0, nfds_t = 0;
32
const size_t max_fds = G_N_ELEMENTS(vmsg->fds);
33
- int *fds_t = NULL;
34
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
35
QIOChannel *ioc = server->ioc;
36
37
+ vmsg->fd_num = 0;
38
if (!ioc) {
39
error_report_err(local_err);
40
goto fail;
41
@@ -XXX,XX +XXX,XX @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
42
43
assert(qemu_in_coroutine());
44
do {
45
+ size_t nfds = 0;
46
+ int *fds = NULL;
47
+
48
/*
49
* qio_channel_readv_full may have short reads, keeping calling it
50
* until getting VHOST_USER_HDR_SIZE or 0 bytes in total
51
*/
52
- rc = qio_channel_readv_full(ioc, &iov, 1, &fds_t, &nfds_t, &local_err);
53
+ rc = qio_channel_readv_full(ioc, &iov, 1, &fds, &nfds, &local_err);
54
if (rc < 0) {
55
if (rc == QIO_CHANNEL_ERR_BLOCK) {
56
+ assert(local_err == NULL);
57
qio_channel_yield(ioc, G_IO_IN);
58
continue;
59
} else {
60
error_report_err(local_err);
61
- return false;
62
+ goto fail;
63
}
64
}
65
- read_bytes += rc;
66
- if (nfds_t > 0) {
67
- if (nfds + nfds_t > max_fds) {
68
+
69
+ if (nfds > 0) {
70
+ if (vmsg->fd_num + nfds > max_fds) {
71
error_report("A maximum of %zu fds are allowed, "
72
"however got %zu fds now",
73
- max_fds, nfds + nfds_t);
74
+ max_fds, vmsg->fd_num + nfds);
75
+ g_free(fds);
76
goto fail;
77
}
78
- memcpy(vmsg->fds + nfds, fds_t,
79
- nfds_t *sizeof(vmsg->fds[0]));
80
- nfds += nfds_t;
81
- g_free(fds_t);
82
+ memcpy(vmsg->fds + vmsg->fd_num, fds, nfds * sizeof(vmsg->fds[0]));
83
+ vmsg->fd_num += nfds;
84
+ g_free(fds);
85
}
86
- if (read_bytes == VHOST_USER_HDR_SIZE || rc == 0) {
87
- break;
88
+
89
+ if (rc == 0) { /* socket closed */
90
+ goto fail;
91
}
92
- iov.iov_base = (char *)vmsg + read_bytes;
93
- iov.iov_len = VHOST_USER_HDR_SIZE - read_bytes;
94
- } while (true);
95
96
- vmsg->fd_num = nfds;
97
+ iov.iov_base += rc;
98
+ iov.iov_len -= rc;
99
+ read_bytes += rc;
100
+ } while (read_bytes != VHOST_USER_HDR_SIZE);
101
+
102
/* qio_channel_readv_full will make socket fds blocking, unblock them */
103
vmsg_unblock_fds(vmsg);
104
if (vmsg->size > sizeof(vmsg->payload)) {
105
--
106
2.26.2
107
diff view generated by jsdifflib
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
1
Unexpected EOF is an error that must be reported.
2
2
3
Commit b0651b8c246d ("vmdk: Move l1_size check into vmdk_add_extent")
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
extended the l1_size check from VMDK4 to VMDK3 but did not update the
4
Message-id: 20200924151549.913737-9-stefanha@redhat.com
5
default coverage in the moved comment.
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
util/vhost-user-server.c | 6 ++++--
8
1 file changed, 4 insertions(+), 2 deletions(-)
6
9
7
The previous vmdk4 calculation:
10
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
8
9
(512 * 1024 * 1024) * 512(l2 entries) * 65536(grain) = 16PB
10
11
The added vmdk3 calculation:
12
13
(512 * 1024 * 1024) * 4096(l2 entries) * 512(grain) = 1PB
14
15
Adding the calculation of vmdk3 to the comment.
16
17
In any case, VMware does not offer virtual disks more than 2TB for
18
vmdk4/vmdk3 or 64TB for the new undocumented seSparse format which is
19
not implemented yet in qemu.
20
21
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
22
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
23
Reviewed-by: Liran Alon <liran.alon@oracle.com>
24
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
25
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
26
Message-id: 20190620091057.47441-2-shmuel.eiderman@oracle.com
27
Reviewed-by: yuchenlin <yuchenlin@synology.com>
28
Reviewed-by: Max Reitz <mreitz@redhat.com>
29
Signed-off-by: Max Reitz <mreitz@redhat.com>
30
---
31
block/vmdk.c | 11 ++++++++---
32
1 file changed, 8 insertions(+), 3 deletions(-)
33
34
diff --git a/block/vmdk.c b/block/vmdk.c
35
index XXXXXXX..XXXXXXX 100644
11
index XXXXXXX..XXXXXXX 100644
36
--- a/block/vmdk.c
12
--- a/util/vhost-user-server.c
37
+++ b/block/vmdk.c
13
+++ b/util/vhost-user-server.c
38
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
14
@@ -XXX,XX +XXX,XX @@ vu_message_read(VuDev *vu_dev, int conn_fd, VhostUserMsg *vmsg)
39
return -EFBIG;
15
};
40
}
16
if (vmsg->size) {
41
if (l1_size > 512 * 1024 * 1024) {
17
rc = qio_channel_readv_all_eof(ioc, &iov_payload, 1, &local_err);
42
- /* Although with big capacity and small l1_entry_sectors, we can get a
18
- if (rc == -1) {
43
+ /*
19
- error_report_err(local_err);
44
+ * Although with big capacity and small l1_entry_sectors, we can get a
20
+ if (rc != 1) {
45
* big l1_size, we don't want unbounded value to allocate the table.
21
+ if (local_err) {
46
- * Limit it to 512M, which is 16PB for default cluster and L2 table
22
+ error_report_err(local_err);
47
- * size */
23
+ }
48
+ * Limit it to 512M, which is:
24
goto fail;
49
+ * 16PB - for default "Hosted Sparse Extent" (VMDK4)
25
}
50
+ * cluster size: 64KB, L2 table size: 512 entries
51
+ * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
52
+ * cluster size: 512B, L2 table size: 4096 entries
53
+ */
54
error_setg(errp, "L1 size too big");
55
return -EFBIG;
56
}
26
}
57
--
27
--
58
2.21.0
28
2.26.2
59
29
60
diff view generated by jsdifflib
New patch
1
The vu_client_trip() coroutine is leaked during AioContext switching. It
2
is also unsafe to destroy the vu_dev in panic_cb() since its callers
3
still access it in some cases.
1
4
5
Rework the lifecycle to solve these safety issues.
6
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20200924151549.913737-10-stefanha@redhat.com
9
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
10
---
11
util/vhost-user-server.h | 29 ++--
12
block/export/vhost-user-blk-server.c | 9 +-
13
util/vhost-user-server.c | 245 +++++++++++++++------------
14
3 files changed, 155 insertions(+), 128 deletions(-)
15
16
diff --git a/util/vhost-user-server.h b/util/vhost-user-server.h
17
index XXXXXXX..XXXXXXX 100644
18
--- a/util/vhost-user-server.h
19
+++ b/util/vhost-user-server.h
20
@@ -XXX,XX +XXX,XX @@
21
#include "qapi/error.h"
22
#include "standard-headers/linux/virtio_blk.h"
23
24
+/* A kick fd that we monitor on behalf of libvhost-user */
25
typedef struct VuFdWatch {
26
VuDev *vu_dev;
27
int fd; /*kick fd*/
28
void *pvt;
29
vu_watch_cb cb;
30
- bool processing;
31
QTAILQ_ENTRY(VuFdWatch) next;
32
} VuFdWatch;
33
34
-typedef struct VuServer VuServer;
35
-
36
-struct VuServer {
37
+/**
38
+ * VuServer:
39
+ * A vhost-user server instance with user-defined VuDevIface callbacks.
40
+ * Vhost-user device backends can be implemented using VuServer. VuDevIface
41
+ * callbacks and virtqueue kicks run in the given AioContext.
42
+ */
43
+typedef struct {
44
QIONetListener *listener;
45
+ QEMUBH *restart_listener_bh;
46
AioContext *ctx;
47
int max_queues;
48
const VuDevIface *vu_iface;
49
+
50
+ /* Protected by ctx lock */
51
VuDev vu_dev;
52
QIOChannel *ioc; /* The I/O channel with the client */
53
QIOChannelSocket *sioc; /* The underlying data channel with the client */
54
- /* IOChannel for fd provided via VHOST_USER_SET_SLAVE_REQ_FD */
55
- QIOChannel *ioc_slave;
56
- QIOChannelSocket *sioc_slave;
57
- Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
58
QTAILQ_HEAD(, VuFdWatch) vu_fd_watches;
59
- /* restart coroutine co_trip if AIOContext is changed */
60
- bool aio_context_changed;
61
- bool processing_msg;
62
-};
63
+
64
+ Coroutine *co_trip; /* coroutine for processing VhostUserMsg */
65
+} VuServer;
66
67
bool vhost_user_server_start(VuServer *server,
68
SocketAddress *unix_socket,
69
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
70
71
void vhost_user_server_stop(VuServer *server);
72
73
-void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx);
74
+void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx);
75
+void vhost_user_server_detach_aio_context(VuServer *server);
76
77
#endif /* VHOST_USER_SERVER_H */
78
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
79
index XXXXXXX..XXXXXXX 100644
80
--- a/block/export/vhost-user-blk-server.c
81
+++ b/block/export/vhost-user-blk-server.c
82
@@ -XXX,XX +XXX,XX @@ static const VuDevIface vu_block_iface = {
83
static void blk_aio_attached(AioContext *ctx, void *opaque)
84
{
85
VuBlockDev *vub_dev = opaque;
86
- aio_context_acquire(ctx);
87
- vhost_user_server_set_aio_context(&vub_dev->vu_server, ctx);
88
- aio_context_release(ctx);
89
+ vhost_user_server_attach_aio_context(&vub_dev->vu_server, ctx);
90
}
91
92
static void blk_aio_detach(void *opaque)
93
{
94
VuBlockDev *vub_dev = opaque;
95
- AioContext *ctx = vub_dev->vu_server.ctx;
96
- aio_context_acquire(ctx);
97
- vhost_user_server_set_aio_context(&vub_dev->vu_server, NULL);
98
- aio_context_release(ctx);
99
+ vhost_user_server_detach_aio_context(&vub_dev->vu_server);
100
}
101
102
static void
103
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
104
index XXXXXXX..XXXXXXX 100644
105
--- a/util/vhost-user-server.c
106
+++ b/util/vhost-user-server.c
107
@@ -XXX,XX +XXX,XX @@
108
*/
109
#include "qemu/osdep.h"
110
#include "qemu/main-loop.h"
111
+#include "block/aio-wait.h"
112
#include "vhost-user-server.h"
113
114
+/*
115
+ * Theory of operation:
116
+ *
117
+ * VuServer is started and stopped by vhost_user_server_start() and
118
+ * vhost_user_server_stop() from the main loop thread. Starting the server
119
+ * opens a vhost-user UNIX domain socket and listens for incoming connections.
120
+ * Only one connection is allowed at a time.
121
+ *
122
+ * The connection is handled by the vu_client_trip() coroutine in the
123
+ * VuServer->ctx AioContext. The coroutine consists of a vu_dispatch() loop
124
+ * where libvhost-user calls vu_message_read() to receive the next vhost-user
125
+ * protocol messages over the UNIX domain socket.
126
+ *
127
+ * When virtqueues are set up libvhost-user calls set_watch() to monitor kick
128
+ * fds. These fds are also handled in the VuServer->ctx AioContext.
129
+ *
130
+ * Both vu_client_trip() and kick fd monitoring can be stopped by shutting down
131
+ * the socket connection. Shutting down the socket connection causes
132
+ * vu_message_read() to fail since no more data can be received from the socket.
133
+ * After vu_dispatch() fails, vu_client_trip() calls vu_deinit() to stop
134
+ * libvhost-user before terminating the coroutine. vu_deinit() calls
135
+ * remove_watch() to stop monitoring kick fds and this stops virtqueue
136
+ * processing.
137
+ *
138
+ * When vu_client_trip() has finished cleaning up it schedules a BH in the main
139
+ * loop thread to accept the next client connection.
140
+ *
141
+ * When libvhost-user detects an error it calls panic_cb() and sets the
142
+ * dev->broken flag. Both vu_client_trip() and kick fd processing stop when
143
+ * the dev->broken flag is set.
144
+ *
145
+ * It is possible to switch AioContexts using
146
+ * vhost_user_server_detach_aio_context() and
147
+ * vhost_user_server_attach_aio_context(). They stop monitoring fds in the old
148
+ * AioContext and resume monitoring in the new AioContext. The vu_client_trip()
149
+ * coroutine remains in a yielded state during the switch. This is made
150
+ * possible by QIOChannel's support for spurious coroutine re-entry in
151
+ * qio_channel_yield(). The coroutine will restart I/O when re-entered from the
152
+ * new AioContext.
153
+ */
154
+
155
static void vmsg_close_fds(VhostUserMsg *vmsg)
156
{
157
int i;
158
@@ -XXX,XX +XXX,XX @@ static void vmsg_unblock_fds(VhostUserMsg *vmsg)
159
}
160
}
161
162
-static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
163
- gpointer opaque);
164
-
165
-static void close_client(VuServer *server)
166
-{
167
- /*
168
- * Before closing the client
169
- *
170
- * 1. Let vu_client_trip stop processing new vhost-user msg
171
- *
172
- * 2. remove kick_handler
173
- *
174
- * 3. wait for the kick handler to be finished
175
- *
176
- * 4. wait for the current vhost-user msg to be finished processing
177
- */
178
-
179
- QIOChannelSocket *sioc = server->sioc;
180
- /* When this is set vu_client_trip will stop new processing vhost-user message */
181
- server->sioc = NULL;
182
-
183
- while (server->processing_msg) {
184
- if (server->ioc->read_coroutine) {
185
- server->ioc->read_coroutine = NULL;
186
- qio_channel_set_aio_fd_handler(server->ioc, server->ioc->ctx, NULL,
187
- NULL, server->ioc);
188
- server->processing_msg = false;
189
- }
190
- }
191
-
192
- vu_deinit(&server->vu_dev);
193
-
194
- /* vu_deinit() should have called remove_watch() */
195
- assert(QTAILQ_EMPTY(&server->vu_fd_watches));
196
-
197
- object_unref(OBJECT(sioc));
198
- object_unref(OBJECT(server->ioc));
199
-}
200
-
201
static void panic_cb(VuDev *vu_dev, const char *buf)
202
{
203
- VuServer *server = container_of(vu_dev, VuServer, vu_dev);
204
-
205
- /* avoid while loop in close_client */
206
- server->processing_msg = false;
207
-
208
- if (buf) {
209
- error_report("vu_panic: %s", buf);
210
- }
211
-
212
- if (server->sioc) {
213
- close_client(server);
214
- }
215
-
216
- /*
217
- * Set the callback function for network listener so another
218
- * vhost-user client can connect to this server
219
- */
220
- qio_net_listener_set_client_func(server->listener,
221
- vu_accept,
222
- server,
223
- NULL);
224
+ error_report("vu_panic: %s", buf);
225
}
226
227
static bool coroutine_fn
228
@@ -XXX,XX +XXX,XX @@ fail:
229
return false;
230
}
231
232
-
233
-static void vu_client_start(VuServer *server);
234
static coroutine_fn void vu_client_trip(void *opaque)
235
{
236
VuServer *server = opaque;
237
+ VuDev *vu_dev = &server->vu_dev;
238
239
- while (!server->aio_context_changed && server->sioc) {
240
- server->processing_msg = true;
241
- vu_dispatch(&server->vu_dev);
242
- server->processing_msg = false;
243
+ while (!vu_dev->broken && vu_dispatch(vu_dev)) {
244
+ /* Keep running */
245
}
246
247
- if (server->aio_context_changed && server->sioc) {
248
- server->aio_context_changed = false;
249
- vu_client_start(server);
250
- }
251
-}
252
+ vu_deinit(vu_dev);
253
+
254
+ /* vu_deinit() should have called remove_watch() */
255
+ assert(QTAILQ_EMPTY(&server->vu_fd_watches));
256
+
257
+ object_unref(OBJECT(server->sioc));
258
+ server->sioc = NULL;
259
260
-static void vu_client_start(VuServer *server)
261
-{
262
- server->co_trip = qemu_coroutine_create(vu_client_trip, server);
263
- aio_co_enter(server->ctx, server->co_trip);
264
+ object_unref(OBJECT(server->ioc));
265
+ server->ioc = NULL;
266
+
267
+ server->co_trip = NULL;
268
+ if (server->restart_listener_bh) {
269
+ qemu_bh_schedule(server->restart_listener_bh);
270
+ }
271
+ aio_wait_kick();
272
}
273
274
/*
275
@@ -XXX,XX +XXX,XX @@ static void vu_client_start(VuServer *server)
276
static void kick_handler(void *opaque)
277
{
278
VuFdWatch *vu_fd_watch = opaque;
279
- vu_fd_watch->processing = true;
280
- vu_fd_watch->cb(vu_fd_watch->vu_dev, 0, vu_fd_watch->pvt);
281
- vu_fd_watch->processing = false;
282
+ VuDev *vu_dev = vu_fd_watch->vu_dev;
283
+
284
+ vu_fd_watch->cb(vu_dev, 0, vu_fd_watch->pvt);
285
+
286
+ /* Stop vu_client_trip() if an error occurred in vu_fd_watch->cb() */
287
+ if (vu_dev->broken) {
288
+ VuServer *server = container_of(vu_dev, VuServer, vu_dev);
289
+
290
+ qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
291
+ }
292
}
293
294
-
295
static VuFdWatch *find_vu_fd_watch(VuServer *server, int fd)
296
{
297
298
@@ -XXX,XX +XXX,XX @@ static void vu_accept(QIONetListener *listener, QIOChannelSocket *sioc,
299
qio_channel_set_name(QIO_CHANNEL(sioc), "vhost-user client");
300
server->ioc = QIO_CHANNEL(sioc);
301
object_ref(OBJECT(server->ioc));
302
- qio_channel_attach_aio_context(server->ioc, server->ctx);
303
+
304
+ /* TODO vu_message_write() spins if non-blocking! */
305
qio_channel_set_blocking(server->ioc, false, NULL);
306
- vu_client_start(server);
307
+
308
+ server->co_trip = qemu_coroutine_create(vu_client_trip, server);
309
+
310
+ aio_context_acquire(server->ctx);
311
+ vhost_user_server_attach_aio_context(server, server->ctx);
312
+ aio_context_release(server->ctx);
313
}
314
315
-
316
void vhost_user_server_stop(VuServer *server)
317
{
318
+ aio_context_acquire(server->ctx);
319
+
320
+ qemu_bh_delete(server->restart_listener_bh);
321
+ server->restart_listener_bh = NULL;
322
+
323
if (server->sioc) {
324
- close_client(server);
325
+ VuFdWatch *vu_fd_watch;
326
+
327
+ QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
328
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
329
+ NULL, NULL, NULL, vu_fd_watch);
330
+ }
331
+
332
+ qio_channel_shutdown(server->ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL);
333
+
334
+ AIO_WAIT_WHILE(server->ctx, server->co_trip);
335
}
336
337
+ aio_context_release(server->ctx);
338
+
339
if (server->listener) {
340
qio_net_listener_disconnect(server->listener);
341
object_unref(OBJECT(server->listener));
342
}
343
+}
344
+
345
+/*
346
+ * Allow the next client to connect to the server. Called from a BH in the main
347
+ * loop.
348
+ */
349
+static void restart_listener_bh(void *opaque)
350
+{
351
+ VuServer *server = opaque;
352
353
+ qio_net_listener_set_client_func(server->listener, vu_accept, server,
354
+ NULL);
355
}
356
357
-void vhost_user_server_set_aio_context(VuServer *server, AioContext *ctx)
358
+/* Called with ctx acquired */
359
+void vhost_user_server_attach_aio_context(VuServer *server, AioContext *ctx)
360
{
361
- VuFdWatch *vu_fd_watch, *next;
362
- void *opaque = NULL;
363
- IOHandler *io_read = NULL;
364
- bool attach;
365
+ VuFdWatch *vu_fd_watch;
366
367
- server->ctx = ctx ? ctx : qemu_get_aio_context();
368
+ server->ctx = ctx;
369
370
if (!server->sioc) {
371
- /* not yet serving any client*/
372
return;
373
}
374
375
- if (ctx) {
376
- qio_channel_attach_aio_context(server->ioc, ctx);
377
- server->aio_context_changed = true;
378
- io_read = kick_handler;
379
- attach = true;
380
- } else {
381
+ qio_channel_attach_aio_context(server->ioc, ctx);
382
+
383
+ QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
384
+ aio_set_fd_handler(ctx, vu_fd_watch->fd, true, kick_handler, NULL,
385
+ NULL, vu_fd_watch);
386
+ }
387
+
388
+ aio_co_schedule(ctx, server->co_trip);
389
+}
390
+
391
+/* Called with server->ctx acquired */
392
+void vhost_user_server_detach_aio_context(VuServer *server)
393
+{
394
+ if (server->sioc) {
395
+ VuFdWatch *vu_fd_watch;
396
+
397
+ QTAILQ_FOREACH(vu_fd_watch, &server->vu_fd_watches, next) {
398
+ aio_set_fd_handler(server->ctx, vu_fd_watch->fd, true,
399
+ NULL, NULL, NULL, vu_fd_watch);
400
+ }
401
+
402
qio_channel_detach_aio_context(server->ioc);
403
- /* server->ioc->ctx keeps the old AioConext */
404
- ctx = server->ioc->ctx;
405
- attach = false;
406
}
407
408
- QTAILQ_FOREACH_SAFE(vu_fd_watch, &server->vu_fd_watches, next, next) {
409
- if (vu_fd_watch->cb) {
410
- opaque = attach ? vu_fd_watch : NULL;
411
- aio_set_fd_handler(ctx, vu_fd_watch->fd, true,
412
- io_read, NULL, NULL,
413
- opaque);
414
- }
415
- }
416
+ server->ctx = NULL;
417
}
418
419
-
420
bool vhost_user_server_start(VuServer *server,
421
SocketAddress *socket_addr,
422
AioContext *ctx,
423
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
424
const VuDevIface *vu_iface,
425
Error **errp)
426
{
427
+ QEMUBH *bh;
428
QIONetListener *listener = qio_net_listener_new();
429
if (qio_net_listener_open_sync(listener, socket_addr, 1,
430
errp) < 0) {
431
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
432
return false;
433
}
434
435
+ bh = qemu_bh_new(restart_listener_bh, server);
436
+
437
/* zero out unspecified fields */
438
*server = (VuServer) {
439
.listener = listener,
440
+ .restart_listener_bh = bh,
441
.vu_iface = vu_iface,
442
.max_queues = max_queues,
443
.ctx = ctx,
444
--
445
2.26.2
446
diff view generated by jsdifflib
New patch
1
Propagate the flush return value since errors are possible.
1
2
3
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
Message-id: 20200924151549.913737-11-stefanha@redhat.com
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
---
7
block/export/vhost-user-blk-server.c | 11 +++++++----
8
1 file changed, 7 insertions(+), 4 deletions(-)
9
10
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/block/export/vhost-user-blk-server.c
13
+++ b/block/export/vhost-user-blk-server.c
14
@@ -XXX,XX +XXX,XX @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
15
return -EINVAL;
16
}
17
18
-static void coroutine_fn vu_block_flush(VuBlockReq *req)
19
+static int coroutine_fn vu_block_flush(VuBlockReq *req)
20
{
21
VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
22
BlockBackend *backend = vdev_blk->backend;
23
- blk_co_flush(backend);
24
+ return blk_co_flush(backend);
25
}
26
27
static void coroutine_fn vu_block_virtio_process_req(void *opaque)
28
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
29
break;
30
}
31
case VIRTIO_BLK_T_FLUSH:
32
- vu_block_flush(req);
33
- req->in->status = VIRTIO_BLK_S_OK;
34
+ if (vu_block_flush(req) == 0) {
35
+ req->in->status = VIRTIO_BLK_S_OK;
36
+ } else {
37
+ req->in->status = VIRTIO_BLK_S_IOERR;
38
+ }
39
break;
40
case VIRTIO_BLK_T_GET_ID: {
41
size_t size = MIN(iov_size(&elem->in_sg[0], in_num),
42
--
43
2.26.2
44
diff view generated by jsdifflib
New patch
1
1
Use the new QAPI block exports API instead of defining our own QOM
2
objects.
3
4
This is a large change because the lifecycle of VuBlockDev needs to
5
follow BlockExportDriver. QOM properties are replaced by QAPI options
6
objects.
7
8
VuBlockDev is renamed VuBlkExport and contains a BlockExport field.
9
Several fields can be dropped since BlockExport already has equivalents.
10
11
The file names and meson build integration will be adjusted in a future
12
patch. libvhost-user should probably be built as a static library that
13
is linked into QEMU instead of as a .c file that results in duplicate
14
compilation.
15
16
The new command-line syntax is:
17
18
$ qemu-storage-daemon \
19
--blockdev file,node-name=drive0,filename=test.img \
20
--export vhost-user-blk,node-name=drive0,id=export0,unix-socket=/tmp/vhost-user-blk.sock
21
22
Note that unix-socket is optional because we may wish to accept chardevs
23
too in the future.
24
25
Markus noted that supported address families are not explicit in the
26
QAPI schema. It is unlikely that support for more address families will
27
be added since file descriptor passing is required and few address
28
families support it. If a new address family needs to be added, then the
29
QAPI 'features' syntax can be used to advertize them.
30
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
32
Acked-by: Markus Armbruster <armbru@redhat.com>
33
Message-id: 20200924151549.913737-12-stefanha@redhat.com
34
[Skip test on big-endian host architectures because this device doesn't
35
support them yet (as already mentioned in a code comment).
36
--Stefan]
37
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
38
---
39
qapi/block-export.json | 21 +-
40
block/export/vhost-user-blk-server.h | 23 +-
41
block/export/export.c | 6 +
42
block/export/vhost-user-blk-server.c | 452 +++++++--------------------
43
util/vhost-user-server.c | 10 +-
44
block/export/meson.build | 1 +
45
block/meson.build | 1 -
46
7 files changed, 156 insertions(+), 358 deletions(-)
47
48
diff --git a/qapi/block-export.json b/qapi/block-export.json
49
index XXXXXXX..XXXXXXX 100644
50
--- a/qapi/block-export.json
51
+++ b/qapi/block-export.json
52
@@ -XXX,XX +XXX,XX @@
53
'data': { '*name': 'str', '*description': 'str',
54
'*bitmap': 'str' } }
55
56
+##
57
+# @BlockExportOptionsVhostUserBlk:
58
+#
59
+# A vhost-user-blk block export.
60
+#
61
+# @addr: The vhost-user socket on which to listen. Both 'unix' and 'fd'
62
+# SocketAddress types are supported. Passed fds must be UNIX domain
63
+# sockets.
64
+# @logical-block-size: Logical block size in bytes. Defaults to 512 bytes.
65
+#
66
+# Since: 5.2
67
+##
68
+{ 'struct': 'BlockExportOptionsVhostUserBlk',
69
+ 'data': { 'addr': 'SocketAddress', '*logical-block-size': 'size' } }
70
+
71
##
72
# @NbdServerAddOptions:
73
#
74
@@ -XXX,XX +XXX,XX @@
75
# An enumeration of block export types
76
#
77
# @nbd: NBD export
78
+# @vhost-user-blk: vhost-user-blk export (since 5.2)
79
#
80
# Since: 4.2
81
##
82
{ 'enum': 'BlockExportType',
83
- 'data': [ 'nbd' ] }
84
+ 'data': [ 'nbd', 'vhost-user-blk' ] }
85
86
##
87
# @BlockExportOptions:
88
@@ -XXX,XX +XXX,XX @@
89
'*writethrough': 'bool' },
90
'discriminator': 'type',
91
'data': {
92
- 'nbd': 'BlockExportOptionsNbd'
93
+ 'nbd': 'BlockExportOptionsNbd',
94
+ 'vhost-user-blk': 'BlockExportOptionsVhostUserBlk'
95
} }
96
97
##
98
diff --git a/block/export/vhost-user-blk-server.h b/block/export/vhost-user-blk-server.h
99
index XXXXXXX..XXXXXXX 100644
100
--- a/block/export/vhost-user-blk-server.h
101
+++ b/block/export/vhost-user-blk-server.h
102
@@ -XXX,XX +XXX,XX @@
103
104
#ifndef VHOST_USER_BLK_SERVER_H
105
#define VHOST_USER_BLK_SERVER_H
106
-#include "util/vhost-user-server.h"
107
108
-typedef struct VuBlockDev VuBlockDev;
109
-#define TYPE_VHOST_USER_BLK_SERVER "vhost-user-blk-server"
110
-#define VHOST_USER_BLK_SERVER(obj) \
111
- OBJECT_CHECK(VuBlockDev, obj, TYPE_VHOST_USER_BLK_SERVER)
112
+#include "block/export.h"
113
114
-/* vhost user block device */
115
-struct VuBlockDev {
116
- Object parent_obj;
117
- char *node_name;
118
- SocketAddress *addr;
119
- AioContext *ctx;
120
- VuServer vu_server;
121
- bool running;
122
- uint32_t blk_size;
123
- BlockBackend *backend;
124
- QIOChannelSocket *sioc;
125
- QTAILQ_ENTRY(VuBlockDev) next;
126
- struct virtio_blk_config blkcfg;
127
- bool writable;
128
-};
129
+/* For block/export/export.c */
130
+extern const BlockExportDriver blk_exp_vhost_user_blk;
131
132
#endif /* VHOST_USER_BLK_SERVER_H */
133
diff --git a/block/export/export.c b/block/export/export.c
134
index XXXXXXX..XXXXXXX 100644
135
--- a/block/export/export.c
136
+++ b/block/export/export.c
137
@@ -XXX,XX +XXX,XX @@
138
#include "sysemu/block-backend.h"
139
#include "block/export.h"
140
#include "block/nbd.h"
141
+#if CONFIG_LINUX
142
+#include "block/export/vhost-user-blk-server.h"
143
+#endif
144
#include "qapi/error.h"
145
#include "qapi/qapi-commands-block-export.h"
146
#include "qapi/qapi-events-block-export.h"
147
@@ -XXX,XX +XXX,XX @@
148
149
static const BlockExportDriver *blk_exp_drivers[] = {
150
&blk_exp_nbd,
151
+#if CONFIG_LINUX
152
+ &blk_exp_vhost_user_blk,
153
+#endif
154
};
155
156
/* Only accessed from the main thread */
157
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
158
index XXXXXXX..XXXXXXX 100644
159
--- a/block/export/vhost-user-blk-server.c
160
+++ b/block/export/vhost-user-blk-server.c
161
@@ -XXX,XX +XXX,XX @@
162
*/
163
#include "qemu/osdep.h"
164
#include "block/block.h"
165
+#include "contrib/libvhost-user/libvhost-user.h"
166
+#include "standard-headers/linux/virtio_blk.h"
167
+#include "util/vhost-user-server.h"
168
#include "vhost-user-blk-server.h"
169
#include "qapi/error.h"
170
#include "qom/object_interfaces.h"
171
@@ -XXX,XX +XXX,XX @@ struct virtio_blk_inhdr {
172
unsigned char status;
173
};
174
175
-typedef struct VuBlockReq {
176
+typedef struct VuBlkReq {
177
VuVirtqElement elem;
178
int64_t sector_num;
179
size_t size;
180
@@ -XXX,XX +XXX,XX @@ typedef struct VuBlockReq {
181
struct virtio_blk_outhdr out;
182
VuServer *server;
183
struct VuVirtq *vq;
184
-} VuBlockReq;
185
+} VuBlkReq;
186
187
-static void vu_block_req_complete(VuBlockReq *req)
188
+/* vhost user block device */
189
+typedef struct {
190
+ BlockExport export;
191
+ VuServer vu_server;
192
+ uint32_t blk_size;
193
+ QIOChannelSocket *sioc;
194
+ struct virtio_blk_config blkcfg;
195
+ bool writable;
196
+} VuBlkExport;
197
+
198
+static void vu_blk_req_complete(VuBlkReq *req)
199
{
200
VuDev *vu_dev = &req->server->vu_dev;
201
202
@@ -XXX,XX +XXX,XX @@ static void vu_block_req_complete(VuBlockReq *req)
203
free(req);
204
}
205
206
-static VuBlockDev *get_vu_block_device_by_server(VuServer *server)
207
-{
208
- return container_of(server, VuBlockDev, vu_server);
209
-}
210
-
211
static int coroutine_fn
212
-vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
213
- uint32_t iovcnt, uint32_t type)
214
+vu_blk_discard_write_zeroes(BlockBackend *blk, struct iovec *iov,
215
+ uint32_t iovcnt, uint32_t type)
216
{
217
struct virtio_blk_discard_write_zeroes desc;
218
ssize_t size = iov_to_buf(iov, iovcnt, 0, &desc, sizeof(desc));
219
@@ -XXX,XX +XXX,XX @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
220
return -EINVAL;
221
}
222
223
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
224
uint64_t range[2] = { le64_to_cpu(desc.sector) << 9,
225
le32_to_cpu(desc.num_sectors) << 9 };
226
if (type == VIRTIO_BLK_T_DISCARD) {
227
- if (blk_co_pdiscard(vdev_blk->backend, range[0], range[1]) == 0) {
228
+ if (blk_co_pdiscard(blk, range[0], range[1]) == 0) {
229
return 0;
230
}
231
} else if (type == VIRTIO_BLK_T_WRITE_ZEROES) {
232
- if (blk_co_pwrite_zeroes(vdev_blk->backend,
233
- range[0], range[1], 0) == 0) {
234
+ if (blk_co_pwrite_zeroes(blk, range[0], range[1], 0) == 0) {
235
return 0;
236
}
237
}
238
@@ -XXX,XX +XXX,XX @@ vu_block_discard_write_zeroes(VuBlockReq *req, struct iovec *iov,
239
return -EINVAL;
240
}
241
242
-static int coroutine_fn vu_block_flush(VuBlockReq *req)
243
+static void coroutine_fn vu_blk_virtio_process_req(void *opaque)
244
{
245
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(req->server);
246
- BlockBackend *backend = vdev_blk->backend;
247
- return blk_co_flush(backend);
248
-}
249
-
250
-static void coroutine_fn vu_block_virtio_process_req(void *opaque)
251
-{
252
- VuBlockReq *req = opaque;
253
+ VuBlkReq *req = opaque;
254
VuServer *server = req->server;
255
VuVirtqElement *elem = &req->elem;
256
uint32_t type;
257
258
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
259
- BlockBackend *backend = vdev_blk->backend;
260
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
261
+ BlockBackend *blk = vexp->export.blk;
262
263
struct iovec *in_iov = elem->in_sg;
264
struct iovec *out_iov = elem->out_sg;
265
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
266
bool is_write = type & VIRTIO_BLK_T_OUT;
267
req->sector_num = le64_to_cpu(req->out.sector);
268
269
- int64_t offset = req->sector_num * vdev_blk->blk_size;
270
+ if (is_write && !vexp->writable) {
271
+ req->in->status = VIRTIO_BLK_S_IOERR;
272
+ break;
273
+ }
274
+
275
+ int64_t offset = req->sector_num * vexp->blk_size;
276
QEMUIOVector qiov;
277
if (is_write) {
278
qemu_iovec_init_external(&qiov, out_iov, out_num);
279
- ret = blk_co_pwritev(backend, offset, qiov.size,
280
- &qiov, 0);
281
+ ret = blk_co_pwritev(blk, offset, qiov.size, &qiov, 0);
282
} else {
283
qemu_iovec_init_external(&qiov, in_iov, in_num);
284
- ret = blk_co_preadv(backend, offset, qiov.size,
285
- &qiov, 0);
286
+ ret = blk_co_preadv(blk, offset, qiov.size, &qiov, 0);
287
}
288
if (ret >= 0) {
289
req->in->status = VIRTIO_BLK_S_OK;
290
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
291
break;
292
}
293
case VIRTIO_BLK_T_FLUSH:
294
- if (vu_block_flush(req) == 0) {
295
+ if (blk_co_flush(blk) == 0) {
296
req->in->status = VIRTIO_BLK_S_OK;
297
} else {
298
req->in->status = VIRTIO_BLK_S_IOERR;
299
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
300
case VIRTIO_BLK_T_DISCARD:
301
case VIRTIO_BLK_T_WRITE_ZEROES: {
302
int rc;
303
- rc = vu_block_discard_write_zeroes(req, &elem->out_sg[1],
304
- out_num, type);
305
+
306
+ if (!vexp->writable) {
307
+ req->in->status = VIRTIO_BLK_S_IOERR;
308
+ break;
309
+ }
310
+
311
+ rc = vu_blk_discard_write_zeroes(blk, &elem->out_sg[1], out_num, type);
312
if (rc == 0) {
313
req->in->status = VIRTIO_BLK_S_OK;
314
} else {
315
@@ -XXX,XX +XXX,XX @@ static void coroutine_fn vu_block_virtio_process_req(void *opaque)
316
break;
317
}
318
319
- vu_block_req_complete(req);
320
+ vu_blk_req_complete(req);
321
return;
322
323
err:
324
- free(elem);
325
+ free(req);
326
}
327
328
-static void vu_block_process_vq(VuDev *vu_dev, int idx)
329
+static void vu_blk_process_vq(VuDev *vu_dev, int idx)
330
{
331
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
332
VuVirtq *vq = vu_get_queue(vu_dev, idx);
333
334
while (1) {
335
- VuBlockReq *req;
336
+ VuBlkReq *req;
337
338
- req = vu_queue_pop(vu_dev, vq, sizeof(VuBlockReq));
339
+ req = vu_queue_pop(vu_dev, vq, sizeof(VuBlkReq));
340
if (!req) {
341
break;
342
}
343
@@ -XXX,XX +XXX,XX @@ static void vu_block_process_vq(VuDev *vu_dev, int idx)
344
req->vq = vq;
345
346
Coroutine *co =
347
- qemu_coroutine_create(vu_block_virtio_process_req, req);
348
+ qemu_coroutine_create(vu_blk_virtio_process_req, req);
349
qemu_coroutine_enter(co);
350
}
351
}
352
353
-static void vu_block_queue_set_started(VuDev *vu_dev, int idx, bool started)
354
+static void vu_blk_queue_set_started(VuDev *vu_dev, int idx, bool started)
355
{
356
VuVirtq *vq;
357
358
assert(vu_dev);
359
360
vq = vu_get_queue(vu_dev, idx);
361
- vu_set_queue_handler(vu_dev, vq, started ? vu_block_process_vq : NULL);
362
+ vu_set_queue_handler(vu_dev, vq, started ? vu_blk_process_vq : NULL);
363
}
364
365
-static uint64_t vu_block_get_features(VuDev *dev)
366
+static uint64_t vu_blk_get_features(VuDev *dev)
367
{
368
uint64_t features;
369
VuServer *server = container_of(dev, VuServer, vu_dev);
370
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
371
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
372
features = 1ull << VIRTIO_BLK_F_SIZE_MAX |
373
1ull << VIRTIO_BLK_F_SEG_MAX |
374
1ull << VIRTIO_BLK_F_TOPOLOGY |
375
@@ -XXX,XX +XXX,XX @@ static uint64_t vu_block_get_features(VuDev *dev)
376
1ull << VIRTIO_RING_F_EVENT_IDX |
377
1ull << VHOST_USER_F_PROTOCOL_FEATURES;
378
379
- if (!vdev_blk->writable) {
380
+ if (!vexp->writable) {
381
features |= 1ull << VIRTIO_BLK_F_RO;
382
}
383
384
return features;
385
}
386
387
-static uint64_t vu_block_get_protocol_features(VuDev *dev)
388
+static uint64_t vu_blk_get_protocol_features(VuDev *dev)
389
{
390
return 1ull << VHOST_USER_PROTOCOL_F_CONFIG |
391
1ull << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD;
392
}
393
394
static int
395
-vu_block_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
396
+vu_blk_get_config(VuDev *vu_dev, uint8_t *config, uint32_t len)
397
{
398
+ /* TODO blkcfg must be little-endian for VIRTIO 1.0 */
399
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
400
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
401
- memcpy(config, &vdev_blk->blkcfg, len);
402
-
403
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
404
+ memcpy(config, &vexp->blkcfg, len);
405
return 0;
406
}
407
408
static int
409
-vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
410
+vu_blk_set_config(VuDev *vu_dev, const uint8_t *data,
411
uint32_t offset, uint32_t size, uint32_t flags)
412
{
413
VuServer *server = container_of(vu_dev, VuServer, vu_dev);
414
- VuBlockDev *vdev_blk = get_vu_block_device_by_server(server);
415
+ VuBlkExport *vexp = container_of(server, VuBlkExport, vu_server);
416
uint8_t wce;
417
418
/* don't support live migration */
419
@@ -XXX,XX +XXX,XX @@ vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
420
}
421
422
wce = *data;
423
- vdev_blk->blkcfg.wce = wce;
424
- blk_set_enable_write_cache(vdev_blk->backend, wce);
425
+ vexp->blkcfg.wce = wce;
426
+ blk_set_enable_write_cache(vexp->export.blk, wce);
427
return 0;
428
}
429
430
@@ -XXX,XX +XXX,XX @@ vu_block_set_config(VuDev *vu_dev, const uint8_t *data,
431
* of vu_process_message.
432
*
433
*/
434
-static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
435
+static int vu_blk_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
436
{
437
if (vmsg->request == VHOST_USER_NONE) {
438
dev->panic(dev, "disconnect");
439
@@ -XXX,XX +XXX,XX @@ static int vu_block_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
440
return false;
441
}
442
443
-static const VuDevIface vu_block_iface = {
444
- .get_features = vu_block_get_features,
445
- .queue_set_started = vu_block_queue_set_started,
446
- .get_protocol_features = vu_block_get_protocol_features,
447
- .get_config = vu_block_get_config,
448
- .set_config = vu_block_set_config,
449
- .process_msg = vu_block_process_msg,
450
+static const VuDevIface vu_blk_iface = {
451
+ .get_features = vu_blk_get_features,
452
+ .queue_set_started = vu_blk_queue_set_started,
453
+ .get_protocol_features = vu_blk_get_protocol_features,
454
+ .get_config = vu_blk_get_config,
455
+ .set_config = vu_blk_set_config,
456
+ .process_msg = vu_blk_process_msg,
457
};
458
459
static void blk_aio_attached(AioContext *ctx, void *opaque)
460
{
461
- VuBlockDev *vub_dev = opaque;
462
- vhost_user_server_attach_aio_context(&vub_dev->vu_server, ctx);
463
+ VuBlkExport *vexp = opaque;
464
+ vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
465
}
466
467
static void blk_aio_detach(void *opaque)
468
{
469
- VuBlockDev *vub_dev = opaque;
470
- vhost_user_server_detach_aio_context(&vub_dev->vu_server);
471
+ VuBlkExport *vexp = opaque;
472
+ vhost_user_server_detach_aio_context(&vexp->vu_server);
473
}
474
475
static void
476
-vu_block_initialize_config(BlockDriverState *bs,
477
+vu_blk_initialize_config(BlockDriverState *bs,
478
struct virtio_blk_config *config, uint32_t blk_size)
479
{
480
config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
481
@@ -XXX,XX +XXX,XX @@ vu_block_initialize_config(BlockDriverState *bs,
482
config->max_write_zeroes_seg = 1;
483
}
484
485
-static VuBlockDev *vu_block_init(VuBlockDev *vu_block_device, Error **errp)
486
+static void vu_blk_exp_request_shutdown(BlockExport *exp)
487
{
488
+ VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
489
490
- BlockBackend *blk;
491
- Error *local_error = NULL;
492
- const char *node_name = vu_block_device->node_name;
493
- bool writable = vu_block_device->writable;
494
- uint64_t perm = BLK_PERM_CONSISTENT_READ;
495
- int ret;
496
-
497
- AioContext *ctx;
498
-
499
- BlockDriverState *bs = bdrv_lookup_bs(node_name, node_name, &local_error);
500
-
501
- if (!bs) {
502
- error_propagate(errp, local_error);
503
- return NULL;
504
- }
505
-
506
- if (bdrv_is_read_only(bs)) {
507
- writable = false;
508
- }
509
-
510
- if (writable) {
511
- perm |= BLK_PERM_WRITE;
512
- }
513
-
514
- ctx = bdrv_get_aio_context(bs);
515
- aio_context_acquire(ctx);
516
- bdrv_invalidate_cache(bs, NULL);
517
- aio_context_release(ctx);
518
-
519
- /*
520
- * Don't allow resize while the vhost user server is running,
521
- * otherwise we don't care what happens with the node.
522
- */
523
- blk = blk_new(bdrv_get_aio_context(bs), perm,
524
- BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
525
- BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
526
- ret = blk_insert_bs(blk, bs, errp);
527
-
528
- if (ret < 0) {
529
- goto fail;
530
- }
531
-
532
- blk_set_enable_write_cache(blk, false);
533
-
534
- blk_set_allow_aio_context_change(blk, true);
535
-
536
- vu_block_device->blkcfg.wce = 0;
537
- vu_block_device->backend = blk;
538
- if (!vu_block_device->blk_size) {
539
- vu_block_device->blk_size = BDRV_SECTOR_SIZE;
540
- }
541
- vu_block_device->blkcfg.blk_size = vu_block_device->blk_size;
542
- blk_set_guest_block_size(blk, vu_block_device->blk_size);
543
- vu_block_initialize_config(bs, &vu_block_device->blkcfg,
544
- vu_block_device->blk_size);
545
- return vu_block_device;
546
-
547
-fail:
548
- blk_unref(blk);
549
- return NULL;
550
-}
551
-
552
-static void vu_block_deinit(VuBlockDev *vu_block_device)
553
-{
554
- if (vu_block_device->backend) {
555
- blk_remove_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
556
- blk_aio_detach, vu_block_device);
557
- }
558
-
559
- blk_unref(vu_block_device->backend);
560
-}
561
-
562
-static void vhost_user_blk_server_stop(VuBlockDev *vu_block_device)
563
-{
564
- vhost_user_server_stop(&vu_block_device->vu_server);
565
- vu_block_deinit(vu_block_device);
566
-}
567
-
568
-static void vhost_user_blk_server_start(VuBlockDev *vu_block_device,
569
- Error **errp)
570
-{
571
- AioContext *ctx;
572
- SocketAddress *addr = vu_block_device->addr;
573
-
574
- if (!vu_block_init(vu_block_device, errp)) {
575
- return;
576
- }
577
-
578
- ctx = bdrv_get_aio_context(blk_bs(vu_block_device->backend));
579
-
580
- if (!vhost_user_server_start(&vu_block_device->vu_server, addr, ctx,
581
- VHOST_USER_BLK_MAX_QUEUES, &vu_block_iface,
582
- errp)) {
583
- goto error;
584
- }
585
-
586
- blk_add_aio_context_notifier(vu_block_device->backend, blk_aio_attached,
587
- blk_aio_detach, vu_block_device);
588
- vu_block_device->running = true;
589
- return;
590
-
591
- error:
592
- vu_block_deinit(vu_block_device);
593
-}
594
-
595
-static bool vu_prop_modifiable(VuBlockDev *vus, Error **errp)
596
-{
597
- if (vus->running) {
598
- error_setg(errp, "The property can't be modified "
599
- "while the server is running");
600
- return false;
601
- }
602
- return true;
603
-}
604
-
605
-static void vu_set_node_name(Object *obj, const char *value, Error **errp)
606
-{
607
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
608
-
609
- if (!vu_prop_modifiable(vus, errp)) {
610
- return;
611
- }
612
-
613
- if (vus->node_name) {
614
- g_free(vus->node_name);
615
- }
616
-
617
- vus->node_name = g_strdup(value);
618
-}
619
-
620
-static char *vu_get_node_name(Object *obj, Error **errp)
621
-{
622
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
623
- return g_strdup(vus->node_name);
624
-}
625
-
626
-static void free_socket_addr(SocketAddress *addr)
627
-{
628
- g_free(addr->u.q_unix.path);
629
- g_free(addr);
630
-}
631
-
632
-static void vu_set_unix_socket(Object *obj, const char *value,
633
- Error **errp)
634
-{
635
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
636
-
637
- if (!vu_prop_modifiable(vus, errp)) {
638
- return;
639
- }
640
-
641
- if (vus->addr) {
642
- free_socket_addr(vus->addr);
643
- }
644
-
645
- SocketAddress *addr = g_new0(SocketAddress, 1);
646
- addr->type = SOCKET_ADDRESS_TYPE_UNIX;
647
- addr->u.q_unix.path = g_strdup(value);
648
- vus->addr = addr;
649
+ vhost_user_server_stop(&vexp->vu_server);
650
}
651
652
-static char *vu_get_unix_socket(Object *obj, Error **errp)
653
+static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
654
+ Error **errp)
655
{
656
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
657
- return g_strdup(vus->addr->u.q_unix.path);
658
-}
659
-
660
-static bool vu_get_block_writable(Object *obj, Error **errp)
661
-{
662
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
663
- return vus->writable;
664
-}
665
-
666
-static void vu_set_block_writable(Object *obj, bool value, Error **errp)
667
-{
668
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
669
-
670
- if (!vu_prop_modifiable(vus, errp)) {
671
- return;
672
- }
673
-
674
- vus->writable = value;
675
-}
676
-
677
-static void vu_get_blk_size(Object *obj, Visitor *v, const char *name,
678
- void *opaque, Error **errp)
679
-{
680
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
681
- uint32_t value = vus->blk_size;
682
-
683
- visit_type_uint32(v, name, &value, errp);
684
-}
685
-
686
-static void vu_set_blk_size(Object *obj, Visitor *v, const char *name,
687
- void *opaque, Error **errp)
688
-{
689
- VuBlockDev *vus = VHOST_USER_BLK_SERVER(obj);
690
-
691
+ VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
692
+ BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk;
693
Error *local_err = NULL;
694
- uint32_t value;
695
+ uint64_t logical_block_size;
696
697
- if (!vu_prop_modifiable(vus, errp)) {
698
- return;
699
- }
700
+ vexp->writable = opts->writable;
701
+ vexp->blkcfg.wce = 0;
702
703
- visit_type_uint32(v, name, &value, &local_err);
704
- if (local_err) {
705
- goto out;
706
+ if (vu_opts->has_logical_block_size) {
707
+ logical_block_size = vu_opts->logical_block_size;
708
+ } else {
709
+ logical_block_size = BDRV_SECTOR_SIZE;
710
}
711
-
712
- check_block_size(object_get_typename(obj), name, value, &local_err);
713
+ check_block_size(exp->id, "logical-block-size", logical_block_size,
714
+ &local_err);
715
if (local_err) {
716
- goto out;
717
+ error_propagate(errp, local_err);
718
+ return -EINVAL;
719
+ }
720
+ vexp->blk_size = logical_block_size;
721
+ blk_set_guest_block_size(exp->blk, logical_block_size);
722
+ vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
723
+ logical_block_size);
724
+
725
+ blk_set_allow_aio_context_change(exp->blk, true);
726
+ blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
727
+ vexp);
728
+
729
+ if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
730
+ VHOST_USER_BLK_MAX_QUEUES, &vu_blk_iface,
731
+ errp)) {
732
+ blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
733
+ blk_aio_detach, vexp);
734
+ return -EADDRNOTAVAIL;
735
}
736
737
- vus->blk_size = value;
738
-
739
-out:
740
- error_propagate(errp, local_err);
741
-}
742
-
743
-static void vhost_user_blk_server_instance_finalize(Object *obj)
744
-{
745
- VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
746
-
747
- vhost_user_blk_server_stop(vub);
748
-
749
- /*
750
- * Unlike object_property_add_str, object_class_property_add_str
751
- * doesn't have a release method. Thus manual memory freeing is
752
- * needed.
753
- */
754
- free_socket_addr(vub->addr);
755
- g_free(vub->node_name);
756
-}
757
-
758
-static void vhost_user_blk_server_complete(UserCreatable *obj, Error **errp)
759
-{
760
- VuBlockDev *vub = VHOST_USER_BLK_SERVER(obj);
761
-
762
- vhost_user_blk_server_start(vub, errp);
763
+ return 0;
764
}
765
766
-static void vhost_user_blk_server_class_init(ObjectClass *klass,
767
- void *class_data)
768
+static void vu_blk_exp_delete(BlockExport *exp)
769
{
770
- UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass);
771
- ucc->complete = vhost_user_blk_server_complete;
772
-
773
- object_class_property_add_bool(klass, "writable",
774
- vu_get_block_writable,
775
- vu_set_block_writable);
776
-
777
- object_class_property_add_str(klass, "node-name",
778
- vu_get_node_name,
779
- vu_set_node_name);
780
-
781
- object_class_property_add_str(klass, "unix-socket",
782
- vu_get_unix_socket,
783
- vu_set_unix_socket);
784
+ VuBlkExport *vexp = container_of(exp, VuBlkExport, export);
785
786
- object_class_property_add(klass, "logical-block-size", "uint32",
787
- vu_get_blk_size, vu_set_blk_size,
788
- NULL, NULL);
789
+ blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
790
+ vexp);
791
}
792
793
-static const TypeInfo vhost_user_blk_server_info = {
794
- .name = TYPE_VHOST_USER_BLK_SERVER,
795
- .parent = TYPE_OBJECT,
796
- .instance_size = sizeof(VuBlockDev),
797
- .instance_finalize = vhost_user_blk_server_instance_finalize,
798
- .class_init = vhost_user_blk_server_class_init,
799
- .interfaces = (InterfaceInfo[]) {
800
- {TYPE_USER_CREATABLE},
801
- {}
802
- },
803
+const BlockExportDriver blk_exp_vhost_user_blk = {
804
+ .type = BLOCK_EXPORT_TYPE_VHOST_USER_BLK,
805
+ .instance_size = sizeof(VuBlkExport),
806
+ .create = vu_blk_exp_create,
807
+ .delete = vu_blk_exp_delete,
808
+ .request_shutdown = vu_blk_exp_request_shutdown,
809
};
810
-
811
-static void vhost_user_blk_server_register_types(void)
812
-{
813
- type_register_static(&vhost_user_blk_server_info);
814
-}
815
-
816
-type_init(vhost_user_blk_server_register_types)
817
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
818
index XXXXXXX..XXXXXXX 100644
819
--- a/util/vhost-user-server.c
820
+++ b/util/vhost-user-server.c
821
@@ -XXX,XX +XXX,XX @@ bool vhost_user_server_start(VuServer *server,
822
Error **errp)
823
{
824
QEMUBH *bh;
825
- QIONetListener *listener = qio_net_listener_new();
826
+ QIONetListener *listener;
827
+
828
+ if (socket_addr->type != SOCKET_ADDRESS_TYPE_UNIX &&
829
+ socket_addr->type != SOCKET_ADDRESS_TYPE_FD) {
830
+ error_setg(errp, "Only socket address types 'unix' and 'fd' are supported");
831
+ return false;
832
+ }
833
+
834
+ listener = qio_net_listener_new();
835
if (qio_net_listener_open_sync(listener, socket_addr, 1,
836
errp) < 0) {
837
object_unref(OBJECT(listener));
838
diff --git a/block/export/meson.build b/block/export/meson.build
839
index XXXXXXX..XXXXXXX 100644
840
--- a/block/export/meson.build
841
+++ b/block/export/meson.build
842
@@ -1 +1,2 @@
843
block_ss.add(files('export.c'))
844
+block_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-blk-server.c', '../../contrib/libvhost-user/libvhost-user.c'))
845
diff --git a/block/meson.build b/block/meson.build
846
index XXXXXXX..XXXXXXX 100644
847
--- a/block/meson.build
848
+++ b/block/meson.build
849
@@ -XXX,XX +XXX,XX @@ block_ss.add(when: 'CONFIG_WIN32', if_true: files('file-win32.c', 'win32-aio.c')
850
block_ss.add(when: 'CONFIG_POSIX', if_true: [files('file-posix.c'), coref, iokit])
851
block_ss.add(when: 'CONFIG_LIBISCSI', if_true: files('iscsi-opts.c'))
852
block_ss.add(when: 'CONFIG_LINUX', if_true: files('nvme.c'))
853
-block_ss.add(when: 'CONFIG_LINUX', if_true: files('export/vhost-user-blk-server.c', '../contrib/libvhost-user/libvhost-user.c'))
854
block_ss.add(when: 'CONFIG_REPLICATION', if_true: files('replication.c'))
855
block_ss.add(when: 'CONFIG_SHEEPDOG', if_true: files('sheepdog.c'))
856
block_ss.add(when: ['CONFIG_LINUX_AIO', libaio], if_true: files('linux-aio.c'))
857
--
858
2.26.2
859
diff view generated by jsdifflib
New patch
1
Headers used by other subsystems are located in include/. Also add the
2
vhost-user-server and vhost-user-blk-server headers to MAINTAINERS.
1
3
4
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
5
Message-id: 20200924151549.913737-13-stefanha@redhat.com
6
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
---
8
MAINTAINERS | 4 +++-
9
{util => include/qemu}/vhost-user-server.h | 0
10
block/export/vhost-user-blk-server.c | 2 +-
11
util/vhost-user-server.c | 2 +-
12
4 files changed, 5 insertions(+), 3 deletions(-)
13
rename {util => include/qemu}/vhost-user-server.h (100%)
14
15
diff --git a/MAINTAINERS b/MAINTAINERS
16
index XXXXXXX..XXXXXXX 100644
17
--- a/MAINTAINERS
18
+++ b/MAINTAINERS
19
@@ -XXX,XX +XXX,XX @@ Vhost-user block device backend server
20
M: Coiby Xu <Coiby.Xu@gmail.com>
21
S: Maintained
22
F: block/export/vhost-user-blk-server.c
23
-F: util/vhost-user-server.c
24
+F: block/export/vhost-user-blk-server.h
25
+F: include/qemu/vhost-user-server.h
26
F: tests/qtest/libqos/vhost-user-blk.c
27
+F: util/vhost-user-server.c
28
29
Replication
30
M: Wen Congyang <wencongyang2@huawei.com>
31
diff --git a/util/vhost-user-server.h b/include/qemu/vhost-user-server.h
32
similarity index 100%
33
rename from util/vhost-user-server.h
34
rename to include/qemu/vhost-user-server.h
35
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/block/export/vhost-user-blk-server.c
38
+++ b/block/export/vhost-user-blk-server.c
39
@@ -XXX,XX +XXX,XX @@
40
#include "block/block.h"
41
#include "contrib/libvhost-user/libvhost-user.h"
42
#include "standard-headers/linux/virtio_blk.h"
43
-#include "util/vhost-user-server.h"
44
+#include "qemu/vhost-user-server.h"
45
#include "vhost-user-blk-server.h"
46
#include "qapi/error.h"
47
#include "qom/object_interfaces.h"
48
diff --git a/util/vhost-user-server.c b/util/vhost-user-server.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/util/vhost-user-server.c
51
+++ b/util/vhost-user-server.c
52
@@ -XXX,XX +XXX,XX @@
53
*/
54
#include "qemu/osdep.h"
55
#include "qemu/main-loop.h"
56
+#include "qemu/vhost-user-server.h"
57
#include "block/aio-wait.h"
58
-#include "vhost-user-server.h"
59
60
/*
61
* Theory of operation:
62
--
63
2.26.2
64
diff view generated by jsdifflib
New patch
1
Don't compile contrib/libvhost-user/libvhost-user.c again. Instead build
2
the static library once and then reuse it throughout QEMU.
1
3
4
Also switch from CONFIG_LINUX to CONFIG_VHOST_USER, which is what the
5
vhost-user tools (vhost-user-gpu, etc) do.
6
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
Message-id: 20200924151549.913737-14-stefanha@redhat.com
9
[Added CONFIG_LINUX again because libvhost-user doesn't build on macOS.
10
--Stefan]
11
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
12
---
13
block/export/export.c | 8 ++++----
14
block/export/meson.build | 2 +-
15
contrib/libvhost-user/meson.build | 1 +
16
meson.build | 6 +++++-
17
util/meson.build | 4 +++-
18
5 files changed, 14 insertions(+), 7 deletions(-)
19
20
diff --git a/block/export/export.c b/block/export/export.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/block/export/export.c
23
+++ b/block/export/export.c
24
@@ -XXX,XX +XXX,XX @@
25
#include "sysemu/block-backend.h"
26
#include "block/export.h"
27
#include "block/nbd.h"
28
-#if CONFIG_LINUX
29
-#include "block/export/vhost-user-blk-server.h"
30
-#endif
31
#include "qapi/error.h"
32
#include "qapi/qapi-commands-block-export.h"
33
#include "qapi/qapi-events-block-export.h"
34
#include "qemu/id.h"
35
+#ifdef CONFIG_VHOST_USER
36
+#include "vhost-user-blk-server.h"
37
+#endif
38
39
static const BlockExportDriver *blk_exp_drivers[] = {
40
&blk_exp_nbd,
41
-#if CONFIG_LINUX
42
+#ifdef CONFIG_VHOST_USER
43
&blk_exp_vhost_user_blk,
44
#endif
45
};
46
diff --git a/block/export/meson.build b/block/export/meson.build
47
index XXXXXXX..XXXXXXX 100644
48
--- a/block/export/meson.build
49
+++ b/block/export/meson.build
50
@@ -XXX,XX +XXX,XX @@
51
block_ss.add(files('export.c'))
52
-block_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-blk-server.c', '../../contrib/libvhost-user/libvhost-user.c'))
53
+block_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: files('vhost-user-blk-server.c'))
54
diff --git a/contrib/libvhost-user/meson.build b/contrib/libvhost-user/meson.build
55
index XXXXXXX..XXXXXXX 100644
56
--- a/contrib/libvhost-user/meson.build
57
+++ b/contrib/libvhost-user/meson.build
58
@@ -XXX,XX +XXX,XX @@
59
libvhost_user = static_library('vhost-user',
60
files('libvhost-user.c', 'libvhost-user-glib.c'),
61
build_by_default: false)
62
+vhost_user = declare_dependency(link_with: libvhost_user)
63
diff --git a/meson.build b/meson.build
64
index XXXXXXX..XXXXXXX 100644
65
--- a/meson.build
66
+++ b/meson.build
67
@@ -XXX,XX +XXX,XX @@ trace_events_subdirs += [
68
'util',
69
]
70
71
+vhost_user = not_found
72
+if 'CONFIG_VHOST_USER' in config_host
73
+ subdir('contrib/libvhost-user')
74
+endif
75
+
76
subdir('qapi')
77
subdir('qobject')
78
subdir('stubs')
79
@@ -XXX,XX +XXX,XX @@ if have_tools
80
install: true)
81
82
if 'CONFIG_VHOST_USER' in config_host
83
- subdir('contrib/libvhost-user')
84
subdir('contrib/vhost-user-blk')
85
subdir('contrib/vhost-user-gpu')
86
subdir('contrib/vhost-user-input')
87
diff --git a/util/meson.build b/util/meson.build
88
index XXXXXXX..XXXXXXX 100644
89
--- a/util/meson.build
90
+++ b/util/meson.build
91
@@ -XXX,XX +XXX,XX @@ if have_block
92
util_ss.add(files('main-loop.c'))
93
util_ss.add(files('nvdimm-utils.c'))
94
util_ss.add(files('qemu-coroutine.c', 'qemu-coroutine-lock.c', 'qemu-coroutine-io.c'))
95
- util_ss.add(when: 'CONFIG_LINUX', if_true: files('vhost-user-server.c'))
96
+ util_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: [
97
+ files('vhost-user-server.c'), vhost_user
98
+ ])
99
util_ss.add(files('block-helpers.c'))
100
util_ss.add(files('qemu-coroutine-sleep.c'))
101
util_ss.add(files('qemu-co-shared-resource.c'))
102
--
103
2.26.2
104
diff view generated by jsdifflib
New patch
1
Introduce libblkdev.fa to avoid recompiling blockdev_ss twice.
1
2
3
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
4
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
5
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
6
Message-id: 20200929125516.186715-3-stefanha@redhat.com
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
8
---
9
meson.build | 12 ++++++++++--
10
storage-daemon/meson.build | 3 +--
11
2 files changed, 11 insertions(+), 4 deletions(-)
12
13
diff --git a/meson.build b/meson.build
14
index XXXXXXX..XXXXXXX 100644
15
--- a/meson.build
16
+++ b/meson.build
17
@@ -XXX,XX +XXX,XX @@ blockdev_ss.add(files(
18
# os-win32.c does not
19
blockdev_ss.add(when: 'CONFIG_POSIX', if_true: files('os-posix.c'))
20
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: [files('os-win32.c')])
21
-softmmu_ss.add_all(blockdev_ss)
22
23
common_ss.add(files('cpus-common.c'))
24
25
@@ -XXX,XX +XXX,XX @@ block = declare_dependency(link_whole: [libblock],
26
link_args: '@block.syms',
27
dependencies: [crypto, io])
28
29
+blockdev_ss = blockdev_ss.apply(config_host, strict: false)
30
+libblockdev = static_library('blockdev', blockdev_ss.sources() + genh,
31
+ dependencies: blockdev_ss.dependencies(),
32
+ name_suffix: 'fa',
33
+ build_by_default: false)
34
+
35
+blockdev = declare_dependency(link_whole: [libblockdev],
36
+ dependencies: [block])
37
+
38
qmp_ss = qmp_ss.apply(config_host, strict: false)
39
libqmp = static_library('qmp', qmp_ss.sources() + genh,
40
dependencies: qmp_ss.dependencies(),
41
@@ -XXX,XX +XXX,XX @@ foreach m : block_mods + softmmu_mods
42
install_dir: config_host['qemu_moddir'])
43
endforeach
44
45
-softmmu_ss.add(authz, block, chardev, crypto, io, qmp)
46
+softmmu_ss.add(authz, blockdev, chardev, crypto, io, qmp)
47
common_ss.add(qom, qemuutil)
48
49
common_ss.add_all(when: 'CONFIG_SOFTMMU', if_true: [softmmu_ss])
50
diff --git a/storage-daemon/meson.build b/storage-daemon/meson.build
51
index XXXXXXX..XXXXXXX 100644
52
--- a/storage-daemon/meson.build
53
+++ b/storage-daemon/meson.build
54
@@ -XXX,XX +XXX,XX @@
55
qsd_ss = ss.source_set()
56
qsd_ss.add(files('qemu-storage-daemon.c'))
57
-qsd_ss.add(block, chardev, qmp, qom, qemuutil)
58
-qsd_ss.add_all(blockdev_ss)
59
+qsd_ss.add(blockdev, chardev, qmp, qom, qemuutil)
60
61
subdir('qapi')
62
63
--
64
2.26.2
65
diff view generated by jsdifflib
New patch
1
Block exports are used by softmmu, qemu-storage-daemon, and qemu-nbd.
2
They are not used by other programs and are not otherwise needed in
3
libblock.
1
4
5
Undo the recent move of blockdev-nbd.c from blockdev_ss into block_ss.
6
Since bdrv_close_all() (libblock) calls blk_exp_close_all()
7
(libblockdev) a stub function is required..
8
9
Make qemu-nbd.c use signal handling utility functions instead of
10
duplicating the code. This helps because os-posix.c is in libblockdev
11
and it depends on a qemu_system_killed() symbol that qemu-nbd.c lacks.
12
Once we use the signal handling utility functions we also end up
13
providing the necessary symbol.
14
15
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
16
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
17
Reviewed-by: Eric Blake <eblake@redhat.com>
18
Message-id: 20200929125516.186715-4-stefanha@redhat.com
19
[Fixed s/ndb/nbd/ typo in commit description as suggested by Eric Blake
20
--Stefan]
21
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
22
---
23
qemu-nbd.c | 21 ++++++++-------------
24
stubs/blk-exp-close-all.c | 7 +++++++
25
block/export/meson.build | 4 ++--
26
meson.build | 4 ++--
27
nbd/meson.build | 2 ++
28
stubs/meson.build | 1 +
29
6 files changed, 22 insertions(+), 17 deletions(-)
30
create mode 100644 stubs/blk-exp-close-all.c
31
32
diff --git a/qemu-nbd.c b/qemu-nbd.c
33
index XXXXXXX..XXXXXXX 100644
34
--- a/qemu-nbd.c
35
+++ b/qemu-nbd.c
36
@@ -XXX,XX +XXX,XX @@
37
#include "qapi/error.h"
38
#include "qemu/cutils.h"
39
#include "sysemu/block-backend.h"
40
+#include "sysemu/runstate.h" /* for qemu_system_killed() prototype */
41
#include "block/block_int.h"
42
#include "block/nbd.h"
43
#include "qemu/main-loop.h"
44
@@ -XXX,XX +XXX,XX @@ QEMU_COPYRIGHT "\n"
45
}
46
47
#ifdef CONFIG_POSIX
48
-static void termsig_handler(int signum)
49
+/*
50
+ * The client thread uses SIGTERM to interrupt the server. A signal
51
+ * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
52
+ */
53
+void qemu_system_killed(int signum, pid_t pid)
54
{
55
qatomic_cmpxchg(&state, RUNNING, TERMINATE);
56
qemu_notify_event();
57
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv)
58
BlockExportOptions *export_opts;
59
60
#ifdef CONFIG_POSIX
61
- /*
62
- * Exit gracefully on various signals, which includes SIGTERM used
63
- * by 'qemu-nbd -v -c'.
64
- */
65
- struct sigaction sa_sigterm;
66
- memset(&sa_sigterm, 0, sizeof(sa_sigterm));
67
- sa_sigterm.sa_handler = termsig_handler;
68
- sigaction(SIGTERM, &sa_sigterm, NULL);
69
- sigaction(SIGINT, &sa_sigterm, NULL);
70
- sigaction(SIGHUP, &sa_sigterm, NULL);
71
-
72
- signal(SIGPIPE, SIG_IGN);
73
+ os_setup_early_signal_handling();
74
+ os_setup_signal_handling();
75
#endif
76
77
socket_init();
78
diff --git a/stubs/blk-exp-close-all.c b/stubs/blk-exp-close-all.c
79
new file mode 100644
80
index XXXXXXX..XXXXXXX
81
--- /dev/null
82
+++ b/stubs/blk-exp-close-all.c
83
@@ -XXX,XX +XXX,XX @@
84
+#include "qemu/osdep.h"
85
+#include "block/export.h"
86
+
87
+/* Only used in programs that support block exports (libblockdev.fa) */
88
+void blk_exp_close_all(void)
89
+{
90
+}
91
diff --git a/block/export/meson.build b/block/export/meson.build
92
index XXXXXXX..XXXXXXX 100644
93
--- a/block/export/meson.build
94
+++ b/block/export/meson.build
95
@@ -XXX,XX +XXX,XX @@
96
-block_ss.add(files('export.c'))
97
-block_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: files('vhost-user-blk-server.c'))
98
+blockdev_ss.add(files('export.c'))
99
+blockdev_ss.add(when: ['CONFIG_LINUX', 'CONFIG_VHOST_USER'], if_true: files('vhost-user-blk-server.c'))
100
diff --git a/meson.build b/meson.build
101
index XXXXXXX..XXXXXXX 100644
102
--- a/meson.build
103
+++ b/meson.build
104
@@ -XXX,XX +XXX,XX @@ subdir('dump')
105
106
block_ss.add(files(
107
'block.c',
108
- 'blockdev-nbd.c',
109
'blockjob.c',
110
'job.c',
111
'qemu-io-cmds.c',
112
@@ -XXX,XX +XXX,XX @@ subdir('block')
113
114
blockdev_ss.add(files(
115
'blockdev.c',
116
+ 'blockdev-nbd.c',
117
'iothread.c',
118
'job-qmp.c',
119
))
120
@@ -XXX,XX +XXX,XX @@ if have_tools
121
qemu_io = executable('qemu-io', files('qemu-io.c'),
122
dependencies: [block, qemuutil], install: true)
123
qemu_nbd = executable('qemu-nbd', files('qemu-nbd.c'),
124
- dependencies: [block, qemuutil], install: true)
125
+ dependencies: [blockdev, qemuutil], install: true)
126
127
subdir('storage-daemon')
128
subdir('contrib/rdmacm-mux')
129
diff --git a/nbd/meson.build b/nbd/meson.build
130
index XXXXXXX..XXXXXXX 100644
131
--- a/nbd/meson.build
132
+++ b/nbd/meson.build
133
@@ -XXX,XX +XXX,XX @@
134
block_ss.add(files(
135
'client.c',
136
'common.c',
137
+))
138
+blockdev_ss.add(files(
139
'server.c',
140
))
141
diff --git a/stubs/meson.build b/stubs/meson.build
142
index XXXXXXX..XXXXXXX 100644
143
--- a/stubs/meson.build
144
+++ b/stubs/meson.build
145
@@ -XXX,XX +XXX,XX @@
146
stub_ss.add(files('arch_type.c'))
147
stub_ss.add(files('bdrv-next-monitor-owned.c'))
148
stub_ss.add(files('blk-commit-all.c'))
149
+stub_ss.add(files('blk-exp-close-all.c'))
150
stub_ss.add(files('blockdev-close-all-bdrv-states.c'))
151
stub_ss.add(files('change-state-handler.c'))
152
stub_ss.add(files('cmos.c'))
153
--
154
2.26.2
155
diff view generated by jsdifflib
1
From: Pino Toscano <ptoscano@redhat.com>
1
Make it possible to specify the iothread where the export will run. By
2
default the block node can be moved to other AioContexts later and the
3
export will follow. The fixed-iothread option forces strict behavior
4
that prevents changing AioContext while the export is active. See the
5
QAPI docs for details.
2
6
3
Rewrite the implementation of the ssh block driver to use libssh instead
7
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
4
of libssh2. The libssh library has various advantages over libssh2:
8
Message-id: 20200929125516.186715-5-stefanha@redhat.com
5
- easier API for authentication (for example for using ssh-agent)
9
[Fix stray '#' character in block-export.json and add missing "(since:
6
- easier API for known_hosts handling
10
5.2)" as suggested by Eric Blake.
7
- supports newer types of keys in known_hosts
11
--Stefan]
12
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
13
---
14
qapi/block-export.json | 11 ++++++++++
15
block/export/export.c | 31 +++++++++++++++++++++++++++-
16
block/export/vhost-user-blk-server.c | 5 ++++-
17
nbd/server.c | 2 --
18
4 files changed, 45 insertions(+), 4 deletions(-)
8
19
9
Use APIs/features available in libssh 0.8 conditionally, to support
20
diff --git a/qapi/block-export.json b/qapi/block-export.json
10
older versions (which are not recommended though).
11
12
Adjust the iotest 207 according to the different error message, and to
13
find the default key type for localhost (to properly compare the
14
fingerprint with).
15
Contributed-by: Max Reitz <mreitz@redhat.com>
16
17
Adjust the various Docker/Travis scripts to use libssh when available
18
instead of libssh2. The mingw/mxe testing is dropped for now, as there
19
are no packages for it.
20
21
Signed-off-by: Pino Toscano <ptoscano@redhat.com>
22
Tested-by: Philippe Mathieu-Daudé <philmd@redhat.com>
23
Acked-by: Alex Bennée <alex.bennee@linaro.org>
24
Message-id: 20190620200840.17655-1-ptoscano@redhat.com
25
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
26
Signed-off-by: Max Reitz <mreitz@redhat.com>
27
---
28
configure | 65 +-
29
block/Makefile.objs | 6 +-
30
block/ssh.c | 673 ++++++++++--------
31
.travis.yml | 4 +-
32
block/trace-events | 14 +-
33
docs/qemu-block-drivers.texi | 2 +-
34
.../dockerfiles/debian-win32-cross.docker | 1 -
35
.../dockerfiles/debian-win64-cross.docker | 1 -
36
tests/docker/dockerfiles/fedora.docker | 4 +-
37
tests/docker/dockerfiles/ubuntu.docker | 2 +-
38
tests/docker/dockerfiles/ubuntu1804.docker | 2 +-
39
tests/qemu-iotests/207 | 54 +-
40
tests/qemu-iotests/207.out | 2 +-
41
13 files changed, 470 insertions(+), 360 deletions(-)
42
43
diff --git a/configure b/configure
44
index XXXXXXX..XXXXXXX 100755
45
--- a/configure
46
+++ b/configure
47
@@ -XXX,XX +XXX,XX @@ auth_pam=""
48
vte=""
49
virglrenderer=""
50
tpm=""
51
-libssh2=""
52
+libssh=""
53
live_block_migration="yes"
54
numa=""
55
tcmalloc="no"
56
@@ -XXX,XX +XXX,XX @@ for opt do
57
;;
58
--enable-tpm) tpm="yes"
59
;;
60
- --disable-libssh2) libssh2="no"
61
+ --disable-libssh) libssh="no"
62
;;
63
- --enable-libssh2) libssh2="yes"
64
+ --enable-libssh) libssh="yes"
65
;;
66
--disable-live-block-migration) live_block_migration="no"
67
;;
68
@@ -XXX,XX +XXX,XX @@ disabled with --disable-FEATURE, default is enabled if available:
69
coroutine-pool coroutine freelist (better performance)
70
glusterfs GlusterFS backend
71
tpm TPM support
72
- libssh2 ssh block device support
73
+ libssh ssh block device support
74
numa libnuma support
75
libxml2 for Parallels image format
76
tcmalloc tcmalloc support
77
@@ -XXX,XX +XXX,XX @@ EOF
78
fi
79
80
##########################################
81
-# libssh2 probe
82
-min_libssh2_version=1.2.8
83
-if test "$libssh2" != "no" ; then
84
- if $pkg_config --atleast-version=$min_libssh2_version libssh2; then
85
- libssh2_cflags=$($pkg_config libssh2 --cflags)
86
- libssh2_libs=$($pkg_config libssh2 --libs)
87
- libssh2=yes
88
+# libssh probe
89
+if test "$libssh" != "no" ; then
90
+ if $pkg_config --exists libssh; then
91
+ libssh_cflags=$($pkg_config libssh --cflags)
92
+ libssh_libs=$($pkg_config libssh --libs)
93
+ libssh=yes
94
else
95
- if test "$libssh2" = "yes" ; then
96
- error_exit "libssh2 >= $min_libssh2_version required for --enable-libssh2"
97
+ if test "$libssh" = "yes" ; then
98
+ error_exit "libssh required for --enable-libssh"
99
fi
100
- libssh2=no
101
+ libssh=no
102
fi
103
fi
104
105
##########################################
106
-# libssh2_sftp_fsync probe
107
+# Check for libssh 0.8
108
+# This is done like this instead of using the LIBSSH_VERSION_* and
109
+# SSH_VERSION_* macros because some distributions in the past shipped
110
+# snapshots of the future 0.8 from Git, and those snapshots did not
111
+# have updated version numbers (still referring to 0.7.0).
112
113
-if test "$libssh2" = "yes"; then
114
+if test "$libssh" = "yes"; then
115
cat > $TMPC <<EOF
116
-#include <stdio.h>
117
-#include <libssh2.h>
118
-#include <libssh2_sftp.h>
119
-int main(void) {
120
- LIBSSH2_SESSION *session;
121
- LIBSSH2_SFTP *sftp;
122
- LIBSSH2_SFTP_HANDLE *sftp_handle;
123
- session = libssh2_session_init ();
124
- sftp = libssh2_sftp_init (session);
125
- sftp_handle = libssh2_sftp_open (sftp, "/", 0, 0);
126
- libssh2_sftp_fsync (sftp_handle);
127
- return 0;
128
-}
129
+#include <libssh/libssh.h>
130
+int main(void) { return ssh_get_server_publickey(NULL, NULL); }
131
EOF
132
- # libssh2_cflags/libssh2_libs defined in previous test.
133
- if compile_prog "$libssh2_cflags" "$libssh2_libs" ; then
134
- QEMU_CFLAGS="-DHAS_LIBSSH2_SFTP_FSYNC $QEMU_CFLAGS"
135
+ if compile_prog "$libssh_cflags" "$libssh_libs"; then
136
+ libssh_cflags="-DHAVE_LIBSSH_0_8 $libssh_cflags"
137
fi
138
fi
139
140
@@ -XXX,XX +XXX,XX @@ echo "GlusterFS support $glusterfs"
141
echo "gcov $gcov_tool"
142
echo "gcov enabled $gcov"
143
echo "TPM support $tpm"
144
-echo "libssh2 support $libssh2"
145
+echo "libssh support $libssh"
146
echo "QOM debugging $qom_cast_debug"
147
echo "Live block migration $live_block_migration"
148
echo "lzo support $lzo"
149
@@ -XXX,XX +XXX,XX @@ if test "$glusterfs_iocb_has_stat" = "yes" ; then
150
echo "CONFIG_GLUSTERFS_IOCB_HAS_STAT=y" >> $config_host_mak
151
fi
152
153
-if test "$libssh2" = "yes" ; then
154
- echo "CONFIG_LIBSSH2=m" >> $config_host_mak
155
- echo "LIBSSH2_CFLAGS=$libssh2_cflags" >> $config_host_mak
156
- echo "LIBSSH2_LIBS=$libssh2_libs" >> $config_host_mak
157
+if test "$libssh" = "yes" ; then
158
+ echo "CONFIG_LIBSSH=m" >> $config_host_mak
159
+ echo "LIBSSH_CFLAGS=$libssh_cflags" >> $config_host_mak
160
+ echo "LIBSSH_LIBS=$libssh_libs" >> $config_host_mak
161
fi
162
163
if test "$live_block_migration" = "yes" ; then
164
diff --git a/block/Makefile.objs b/block/Makefile.objs
165
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
166
--- a/block/Makefile.objs
22
--- a/qapi/block-export.json
167
+++ b/block/Makefile.objs
23
+++ b/qapi/block-export.json
168
@@ -XXX,XX +XXX,XX @@ block-obj-$(CONFIG_CURL) += curl.o
24
@@ -XXX,XX +XXX,XX @@
169
block-obj-$(CONFIG_RBD) += rbd.o
25
# export before completion is signalled. (since: 5.2;
170
block-obj-$(CONFIG_GLUSTERFS) += gluster.o
26
# default: false)
171
block-obj-$(CONFIG_VXHS) += vxhs.o
27
#
172
-block-obj-$(CONFIG_LIBSSH2) += ssh.o
28
+# @iothread: The name of the iothread object where the export will run. The
173
+block-obj-$(CONFIG_LIBSSH) += ssh.o
29
+# default is to use the thread currently associated with the
174
block-obj-y += accounting.o dirty-bitmap.o
30
+# block node. (since: 5.2)
175
block-obj-y += write-threshold.o
31
+#
176
block-obj-y += backup.o
32
+# @fixed-iothread: True prevents the block node from being moved to another
177
@@ -XXX,XX +XXX,XX @@ rbd.o-libs := $(RBD_LIBS)
33
+# thread while the export is active. If true and @iothread is
178
gluster.o-cflags := $(GLUSTERFS_CFLAGS)
34
+# given, export creation fails if the block node cannot be
179
gluster.o-libs := $(GLUSTERFS_LIBS)
35
+# moved to the iothread. The default is false. (since: 5.2)
180
vxhs.o-libs := $(VXHS_LIBS)
36
+#
181
-ssh.o-cflags := $(LIBSSH2_CFLAGS)
37
# Since: 4.2
182
-ssh.o-libs := $(LIBSSH2_LIBS)
38
##
183
+ssh.o-cflags := $(LIBSSH_CFLAGS)
39
{ 'union': 'BlockExportOptions',
184
+ssh.o-libs := $(LIBSSH_LIBS)
40
'base': { 'type': 'BlockExportType',
185
block-obj-dmg-bz2-$(CONFIG_BZIP2) += dmg-bz2.o
41
'id': 'str',
186
block-obj-$(if $(CONFIG_DMG),m,n) += $(block-obj-dmg-bz2-y)
42
+     '*fixed-iothread': 'bool',
187
dmg-bz2.o-libs := $(BZIP2_LIBS)
43
+     '*iothread': 'str',
188
diff --git a/block/ssh.c b/block/ssh.c
44
'node-name': 'str',
45
'*writable': 'bool',
46
'*writethrough': 'bool' },
47
diff --git a/block/export/export.c b/block/export/export.c
189
index XXXXXXX..XXXXXXX 100644
48
index XXXXXXX..XXXXXXX 100644
190
--- a/block/ssh.c
49
--- a/block/export/export.c
191
+++ b/block/ssh.c
50
+++ b/block/export/export.c
192
@@ -XXX,XX +XXX,XX @@
51
@@ -XXX,XX +XXX,XX @@
193
52
194
#include "qemu/osdep.h"
53
#include "block/block.h"
195
54
#include "sysemu/block-backend.h"
196
-#include <libssh2.h>
55
+#include "sysemu/iothread.h"
197
-#include <libssh2_sftp.h>
56
#include "block/export.h"
198
+#include <libssh/libssh.h>
57
#include "block/nbd.h"
199
+#include <libssh/sftp.h>
58
#include "qapi/error.h"
200
59
@@ -XXX,XX +XXX,XX @@ static const BlockExportDriver *blk_exp_find_driver(BlockExportType type)
201
#include "block/block_int.h"
60
202
#include "block/qdict.h"
61
BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
203
@@ -XXX,XX +XXX,XX @@
204
#include "trace.h"
205
206
/*
207
- * TRACE_LIBSSH2=<bitmask> enables tracing in libssh2 itself. Note
208
- * that this requires that libssh2 was specially compiled with the
209
- * `./configure --enable-debug' option, so most likely you will have
210
- * to compile it yourself. The meaning of <bitmask> is described
211
- * here: http://www.libssh2.org/libssh2_trace.html
212
+ * TRACE_LIBSSH=<level> enables tracing in libssh itself.
213
+ * The meaning of <level> is described here:
214
+ * http://api.libssh.org/master/group__libssh__log.html
215
*/
216
-#define TRACE_LIBSSH2 0 /* or try: LIBSSH2_TRACE_SFTP */
217
+#define TRACE_LIBSSH 0 /* see: SSH_LOG_* */
218
219
typedef struct BDRVSSHState {
220
/* Coroutine. */
221
@@ -XXX,XX +XXX,XX @@ typedef struct BDRVSSHState {
222
223
/* SSH connection. */
224
int sock; /* socket */
225
- LIBSSH2_SESSION *session; /* ssh session */
226
- LIBSSH2_SFTP *sftp; /* sftp session */
227
- LIBSSH2_SFTP_HANDLE *sftp_handle; /* sftp remote file handle */
228
+ ssh_session session; /* ssh session */
229
+ sftp_session sftp; /* sftp session */
230
+ sftp_file sftp_handle; /* sftp remote file handle */
231
232
- /* See ssh_seek() function below. */
233
- int64_t offset;
234
- bool offset_op_read;
235
-
236
- /* File attributes at open. We try to keep the .filesize field
237
+ /*
238
+ * File attributes at open. We try to keep the .size field
239
* updated if it changes (eg by writing at the end of the file).
240
*/
241
- LIBSSH2_SFTP_ATTRIBUTES attrs;
242
+ sftp_attributes attrs;
243
244
InetSocketAddress *inet;
245
246
@@ -XXX,XX +XXX,XX @@ static void ssh_state_init(BDRVSSHState *s)
247
{
62
{
248
memset(s, 0, sizeof *s);
63
+ bool fixed_iothread = export->has_fixed_iothread && export->fixed_iothread;
249
s->sock = -1;
64
const BlockExportDriver *drv;
250
- s->offset = -1;
65
BlockExport *exp = NULL;
251
qemu_co_mutex_init(&s->lock);
66
BlockDriverState *bs;
252
}
67
- BlockBackend *blk;
253
68
+ BlockBackend *blk = NULL;
254
@@ -XXX,XX +XXX,XX @@ static void ssh_state_free(BDRVSSHState *s)
69
AioContext *ctx;
255
{
70
uint64_t perm;
256
g_free(s->user);
71
int ret;
257
72
@@ -XXX,XX +XXX,XX @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
258
+ if (s->attrs) {
73
ctx = bdrv_get_aio_context(bs);
259
+ sftp_attributes_free(s->attrs);
74
aio_context_acquire(ctx);
260
+ }
75
261
if (s->sftp_handle) {
76
+ if (export->has_iothread) {
262
- libssh2_sftp_close(s->sftp_handle);
77
+ IOThread *iothread;
263
+ sftp_close(s->sftp_handle);
78
+ AioContext *new_ctx;
264
}
265
if (s->sftp) {
266
- libssh2_sftp_shutdown(s->sftp);
267
+ sftp_free(s->sftp);
268
}
269
if (s->session) {
270
- libssh2_session_disconnect(s->session,
271
- "from qemu ssh client: "
272
- "user closed the connection");
273
- libssh2_session_free(s->session);
274
- }
275
- if (s->sock >= 0) {
276
- close(s->sock);
277
+ ssh_disconnect(s->session);
278
+ ssh_free(s->session); /* This frees s->sock */
279
}
280
}
281
282
@@ -XXX,XX +XXX,XX @@ session_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
283
va_end(args);
284
285
if (s->session) {
286
- char *ssh_err;
287
+ const char *ssh_err;
288
int ssh_err_code;
289
290
- /* This is not an errno. See <libssh2.h>. */
291
- ssh_err_code = libssh2_session_last_error(s->session,
292
- &ssh_err, NULL, 0);
293
- error_setg(errp, "%s: %s (libssh2 error code: %d)",
294
+ /* This is not an errno. See <libssh/libssh.h>. */
295
+ ssh_err = ssh_get_error(s->session);
296
+ ssh_err_code = ssh_get_error_code(s->session);
297
+ error_setg(errp, "%s: %s (libssh error code: %d)",
298
msg, ssh_err, ssh_err_code);
299
} else {
300
error_setg(errp, "%s", msg);
301
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
302
va_end(args);
303
304
if (s->sftp) {
305
- char *ssh_err;
306
+ const char *ssh_err;
307
int ssh_err_code;
308
- unsigned long sftp_err_code;
309
+ int sftp_err_code;
310
311
- /* This is not an errno. See <libssh2.h>. */
312
- ssh_err_code = libssh2_session_last_error(s->session,
313
- &ssh_err, NULL, 0);
314
- /* See <libssh2_sftp.h>. */
315
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
316
+ /* This is not an errno. See <libssh/libssh.h>. */
317
+ ssh_err = ssh_get_error(s->session);
318
+ ssh_err_code = ssh_get_error_code(s->session);
319
+ /* See <libssh/sftp.h>. */
320
+ sftp_err_code = sftp_get_error(s->sftp);
321
322
error_setg(errp,
323
- "%s: %s (libssh2 error code: %d, sftp error code: %lu)",
324
+ "%s: %s (libssh error code: %d, sftp error code: %d)",
325
msg, ssh_err, ssh_err_code, sftp_err_code);
326
} else {
327
error_setg(errp, "%s", msg);
328
@@ -XXX,XX +XXX,XX @@ sftp_error_setg(Error **errp, BDRVSSHState *s, const char *fs, ...)
329
330
static void sftp_error_trace(BDRVSSHState *s, const char *op)
331
{
332
- char *ssh_err;
333
+ const char *ssh_err;
334
int ssh_err_code;
335
- unsigned long sftp_err_code;
336
+ int sftp_err_code;
337
338
- /* This is not an errno. See <libssh2.h>. */
339
- ssh_err_code = libssh2_session_last_error(s->session,
340
- &ssh_err, NULL, 0);
341
- /* See <libssh2_sftp.h>. */
342
- sftp_err_code = libssh2_sftp_last_error((s)->sftp);
343
+ /* This is not an errno. See <libssh/libssh.h>. */
344
+ ssh_err = ssh_get_error(s->session);
345
+ ssh_err_code = ssh_get_error_code(s->session);
346
+ /* See <libssh/sftp.h>. */
347
+ sftp_err_code = sftp_get_error(s->sftp);
348
349
trace_sftp_error(op, ssh_err, ssh_err_code, sftp_err_code);
350
}
351
@@ -XXX,XX +XXX,XX @@ static void ssh_parse_filename(const char *filename, QDict *options,
352
parse_uri(filename, options, errp);
353
}
354
355
-static int check_host_key_knownhosts(BDRVSSHState *s,
356
- const char *host, int port, Error **errp)
357
+static int check_host_key_knownhosts(BDRVSSHState *s, Error **errp)
358
{
359
- const char *home;
360
- char *knh_file = NULL;
361
- LIBSSH2_KNOWNHOSTS *knh = NULL;
362
- struct libssh2_knownhost *found;
363
- int ret, r;
364
- const char *hostkey;
365
- size_t len;
366
- int type;
367
-
368
- hostkey = libssh2_session_hostkey(s->session, &len, &type);
369
- if (!hostkey) {
370
+ int ret;
371
+ int r;
372
+ ssh_key pubkey;
373
+ enum ssh_keytypes_e pubkey_type;
374
+ unsigned char *server_hash = NULL;
375
+ size_t server_hash_len;
376
+ char *fingerprint = NULL;
377
+#ifdef HAVE_LIBSSH_0_8
378
+ enum ssh_known_hosts_e state;
379
+
79
+
380
+ state = ssh_session_is_known_server(s->session);
80
+ iothread = iothread_by_id(export->iothread);
381
+ trace_ssh_server_status(state);
81
+ if (!iothread) {
82
+ error_setg(errp, "iothread \"%s\" not found", export->iothread);
83
+ goto fail;
84
+ }
382
+
85
+
383
+ switch (state) {
86
+ new_ctx = iothread_get_aio_context(iothread);
384
+ case SSH_KNOWN_HOSTS_OK:
385
+ /* OK */
386
+ trace_ssh_check_host_key_knownhosts();
387
+ break;
388
+ case SSH_KNOWN_HOSTS_CHANGED:
389
ret = -EINVAL;
390
- session_error_setg(errp, s, "failed to read remote host key");
391
+ r = ssh_get_server_publickey(s->session, &pubkey);
392
+ if (r == 0) {
393
+ r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA256,
394
+ &server_hash, &server_hash_len);
395
+ pubkey_type = ssh_key_type(pubkey);
396
+ ssh_key_free(pubkey);
397
+ }
398
+ if (r == 0) {
399
+ fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA256,
400
+ server_hash,
401
+ server_hash_len);
402
+ ssh_clean_pubkey_hash(&server_hash);
403
+ }
404
+ if (fingerprint) {
405
+ error_setg(errp,
406
+ "host key (%s key with fingerprint %s) does not match "
407
+ "the one in known_hosts; this may be a possible attack",
408
+ ssh_key_type_to_char(pubkey_type), fingerprint);
409
+ ssh_string_free_char(fingerprint);
410
+ } else {
411
+ error_setg(errp,
412
+ "host key does not match the one in known_hosts; this "
413
+ "may be a possible attack");
414
+ }
415
goto out;
416
- }
417
-
418
- knh = libssh2_knownhost_init(s->session);
419
- if (!knh) {
420
+ case SSH_KNOWN_HOSTS_OTHER:
421
ret = -EINVAL;
422
- session_error_setg(errp, s,
423
- "failed to initialize known hosts support");
424
+ error_setg(errp,
425
+ "host key for this server not found, another type exists");
426
+ goto out;
427
+ case SSH_KNOWN_HOSTS_UNKNOWN:
428
+ ret = -EINVAL;
429
+ error_setg(errp, "no host key was found in known_hosts");
430
+ goto out;
431
+ case SSH_KNOWN_HOSTS_NOT_FOUND:
432
+ ret = -ENOENT;
433
+ error_setg(errp, "known_hosts file not found");
434
+ goto out;
435
+ case SSH_KNOWN_HOSTS_ERROR:
436
+ ret = -EINVAL;
437
+ error_setg(errp, "error while checking the host");
438
+ goto out;
439
+ default:
440
+ ret = -EINVAL;
441
+ error_setg(errp, "error while checking for known server (%d)", state);
442
goto out;
443
}
444
+#else /* !HAVE_LIBSSH_0_8 */
445
+ int state;
446
447
- home = getenv("HOME");
448
- if (home) {
449
- knh_file = g_strdup_printf("%s/.ssh/known_hosts", home);
450
- } else {
451
- knh_file = g_strdup_printf("/root/.ssh/known_hosts");
452
- }
453
-
454
- /* Read all known hosts from OpenSSH-style known_hosts file. */
455
- libssh2_knownhost_readfile(knh, knh_file, LIBSSH2_KNOWNHOST_FILE_OPENSSH);
456
+ state = ssh_is_server_known(s->session);
457
+ trace_ssh_server_status(state);
458
459
- r = libssh2_knownhost_checkp(knh, host, port, hostkey, len,
460
- LIBSSH2_KNOWNHOST_TYPE_PLAIN|
461
- LIBSSH2_KNOWNHOST_KEYENC_RAW,
462
- &found);
463
- switch (r) {
464
- case LIBSSH2_KNOWNHOST_CHECK_MATCH:
465
+ switch (state) {
466
+ case SSH_SERVER_KNOWN_OK:
467
/* OK */
468
- trace_ssh_check_host_key_knownhosts(found->key);
469
+ trace_ssh_check_host_key_knownhosts();
470
break;
471
- case LIBSSH2_KNOWNHOST_CHECK_MISMATCH:
472
+ case SSH_SERVER_KNOWN_CHANGED:
473
ret = -EINVAL;
474
- session_error_setg(errp, s,
475
- "host key does not match the one in known_hosts"
476
- " (found key %s)", found->key);
477
+ r = ssh_get_publickey(s->session, &pubkey);
478
+ if (r == 0) {
479
+ r = ssh_get_publickey_hash(pubkey, SSH_PUBLICKEY_HASH_SHA1,
480
+ &server_hash, &server_hash_len);
481
+ pubkey_type = ssh_key_type(pubkey);
482
+ ssh_key_free(pubkey);
483
+ }
484
+ if (r == 0) {
485
+ fingerprint = ssh_get_fingerprint_hash(SSH_PUBLICKEY_HASH_SHA1,
486
+ server_hash,
487
+ server_hash_len);
488
+ ssh_clean_pubkey_hash(&server_hash);
489
+ }
490
+ if (fingerprint) {
491
+ error_setg(errp,
492
+ "host key (%s key with fingerprint %s) does not match "
493
+ "the one in known_hosts; this may be a possible attack",
494
+ ssh_key_type_to_char(pubkey_type), fingerprint);
495
+ ssh_string_free_char(fingerprint);
496
+ } else {
497
+ error_setg(errp,
498
+ "host key does not match the one in known_hosts; this "
499
+ "may be a possible attack");
500
+ }
501
+ goto out;
502
+ case SSH_SERVER_FOUND_OTHER:
503
+ ret = -EINVAL;
504
+ error_setg(errp,
505
+ "host key for this server not found, another type exists");
506
+ goto out;
507
+ case SSH_SERVER_FILE_NOT_FOUND:
508
+ ret = -ENOENT;
509
+ error_setg(errp, "known_hosts file not found");
510
goto out;
511
- case LIBSSH2_KNOWNHOST_CHECK_NOTFOUND:
512
+ case SSH_SERVER_NOT_KNOWN:
513
ret = -EINVAL;
514
- session_error_setg(errp, s, "no host key was found in known_hosts");
515
+ error_setg(errp, "no host key was found in known_hosts");
516
goto out;
517
- case LIBSSH2_KNOWNHOST_CHECK_FAILURE:
518
+ case SSH_SERVER_ERROR:
519
ret = -EINVAL;
520
- session_error_setg(errp, s,
521
- "failure matching the host key with known_hosts");
522
+ error_setg(errp, "server error");
523
goto out;
524
default:
525
ret = -EINVAL;
526
- session_error_setg(errp, s, "unknown error matching the host key"
527
- " with known_hosts (%d)", r);
528
+ error_setg(errp, "error while checking for known server (%d)", state);
529
goto out;
530
}
531
+#endif /* !HAVE_LIBSSH_0_8 */
532
533
/* known_hosts checking successful. */
534
ret = 0;
535
536
out:
537
- if (knh != NULL) {
538
- libssh2_knownhost_free(knh);
539
- }
540
- g_free(knh_file);
541
return ret;
542
}
543
544
@@ -XXX,XX +XXX,XX @@ static int compare_fingerprint(const unsigned char *fingerprint, size_t len,
545
546
static int
547
check_host_key_hash(BDRVSSHState *s, const char *hash,
548
- int hash_type, size_t fingerprint_len, Error **errp)
549
+ enum ssh_publickey_hash_type type, Error **errp)
550
{
551
- const char *fingerprint;
552
-
553
- fingerprint = libssh2_hostkey_hash(s->session, hash_type);
554
- if (!fingerprint) {
555
+ int r;
556
+ ssh_key pubkey;
557
+ unsigned char *server_hash;
558
+ size_t server_hash_len;
559
+
87
+
560
+#ifdef HAVE_LIBSSH_0_8
88
+ ret = bdrv_try_set_aio_context(bs, new_ctx, errp);
561
+ r = ssh_get_server_publickey(s->session, &pubkey);
89
+ if (ret == 0) {
562
+#else
90
+ aio_context_release(ctx);
563
+ r = ssh_get_publickey(s->session, &pubkey);
91
+ aio_context_acquire(new_ctx);
564
+#endif
92
+ ctx = new_ctx;
565
+ if (r != SSH_OK) {
93
+ } else if (fixed_iothread) {
566
session_error_setg(errp, s, "failed to read remote host key");
94
+ goto fail;
567
return -EINVAL;
568
}
569
570
- if(compare_fingerprint((unsigned char *) fingerprint, fingerprint_len,
571
- hash) != 0) {
572
+ r = ssh_get_publickey_hash(pubkey, type, &server_hash, &server_hash_len);
573
+ ssh_key_free(pubkey);
574
+ if (r != 0) {
575
+ session_error_setg(errp, s,
576
+ "failed reading the hash of the server SSH key");
577
+ return -EINVAL;
578
+ }
579
+
580
+ r = compare_fingerprint(server_hash, server_hash_len, hash);
581
+ ssh_clean_pubkey_hash(&server_hash);
582
+ if (r != 0) {
583
error_setg(errp, "remote host key does not match host_key_check '%s'",
584
hash);
585
return -EPERM;
586
@@ -XXX,XX +XXX,XX @@ check_host_key_hash(BDRVSSHState *s, const char *hash,
587
return 0;
588
}
589
590
-static int check_host_key(BDRVSSHState *s, const char *host, int port,
591
- SshHostKeyCheck *hkc, Error **errp)
592
+static int check_host_key(BDRVSSHState *s, SshHostKeyCheck *hkc, Error **errp)
593
{
594
SshHostKeyCheckMode mode;
595
596
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
597
case SSH_HOST_KEY_CHECK_MODE_HASH:
598
if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_MD5) {
599
return check_host_key_hash(s, hkc->u.hash.hash,
600
- LIBSSH2_HOSTKEY_HASH_MD5, 16, errp);
601
+ SSH_PUBLICKEY_HASH_MD5, errp);
602
} else if (hkc->u.hash.type == SSH_HOST_KEY_CHECK_HASH_TYPE_SHA1) {
603
return check_host_key_hash(s, hkc->u.hash.hash,
604
- LIBSSH2_HOSTKEY_HASH_SHA1, 20, errp);
605
+ SSH_PUBLICKEY_HASH_SHA1, errp);
606
}
607
g_assert_not_reached();
608
break;
609
case SSH_HOST_KEY_CHECK_MODE_KNOWN_HOSTS:
610
- return check_host_key_knownhosts(s, host, port, errp);
611
+ return check_host_key_knownhosts(s, errp);
612
default:
613
g_assert_not_reached();
614
}
615
@@ -XXX,XX +XXX,XX @@ static int check_host_key(BDRVSSHState *s, const char *host, int port,
616
return -EINVAL;
617
}
618
619
-static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
620
+static int authenticate(BDRVSSHState *s, Error **errp)
621
{
622
int r, ret;
623
- const char *userauthlist;
624
- LIBSSH2_AGENT *agent = NULL;
625
- struct libssh2_agent_publickey *identity;
626
- struct libssh2_agent_publickey *prev_identity = NULL;
627
+ int method;
628
629
- userauthlist = libssh2_userauth_list(s->session, user, strlen(user));
630
- if (strstr(userauthlist, "publickey") == NULL) {
631
+ /* Try to authenticate with the "none" method. */
632
+ r = ssh_userauth_none(s->session, NULL);
633
+ if (r == SSH_AUTH_ERROR) {
634
ret = -EPERM;
635
- error_setg(errp,
636
- "remote server does not support \"publickey\" authentication");
637
+ session_error_setg(errp, s, "failed to authenticate using none "
638
+ "authentication");
639
goto out;
640
- }
641
-
642
- /* Connect to ssh-agent and try each identity in turn. */
643
- agent = libssh2_agent_init(s->session);
644
- if (!agent) {
645
- ret = -EINVAL;
646
- session_error_setg(errp, s, "failed to initialize ssh-agent support");
647
- goto out;
648
- }
649
- if (libssh2_agent_connect(agent)) {
650
- ret = -ECONNREFUSED;
651
- session_error_setg(errp, s, "failed to connect to ssh-agent");
652
- goto out;
653
- }
654
- if (libssh2_agent_list_identities(agent)) {
655
- ret = -EINVAL;
656
- session_error_setg(errp, s,
657
- "failed requesting identities from ssh-agent");
658
+ } else if (r == SSH_AUTH_SUCCESS) {
659
+ /* Authenticated! */
660
+ ret = 0;
661
goto out;
662
}
663
664
- for(;;) {
665
- r = libssh2_agent_get_identity(agent, &identity, prev_identity);
666
- if (r == 1) { /* end of list */
667
- break;
668
- }
669
- if (r < 0) {
670
+ method = ssh_userauth_list(s->session, NULL);
671
+ trace_ssh_auth_methods(method);
672
+
673
+ /*
674
+ * Try to authenticate with publickey, using the ssh-agent
675
+ * if available.
676
+ */
677
+ if (method & SSH_AUTH_METHOD_PUBLICKEY) {
678
+ r = ssh_userauth_publickey_auto(s->session, NULL, NULL);
679
+ if (r == SSH_AUTH_ERROR) {
680
ret = -EINVAL;
681
- session_error_setg(errp, s,
682
- "failed to obtain identity from ssh-agent");
683
+ session_error_setg(errp, s, "failed to authenticate using "
684
+ "publickey authentication");
685
goto out;
686
- }
687
- r = libssh2_agent_userauth(agent, user, identity);
688
- if (r == 0) {
689
+ } else if (r == SSH_AUTH_SUCCESS) {
690
/* Authenticated! */
691
ret = 0;
692
goto out;
693
}
694
- /* Failed to authenticate with this identity, try the next one. */
695
- prev_identity = identity;
696
}
697
698
ret = -EPERM;
699
@@ -XXX,XX +XXX,XX @@ static int authenticate(BDRVSSHState *s, const char *user, Error **errp)
700
"and the identities held by your ssh-agent");
701
702
out:
703
- if (agent != NULL) {
704
- /* Note: libssh2 implementation implicitly calls
705
- * libssh2_agent_disconnect if necessary.
706
- */
707
- libssh2_agent_free(agent);
708
- }
709
-
710
return ret;
711
}
712
713
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
714
int ssh_flags, int creat_mode, Error **errp)
715
{
716
int r, ret;
717
- long port = 0;
718
+ unsigned int port = 0;
719
+ int new_sock = -1;
720
721
if (opts->has_user) {
722
s->user = g_strdup(opts->user);
723
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
724
s->inet = opts->server;
725
opts->server = NULL;
726
727
- if (qemu_strtol(s->inet->port, NULL, 10, &port) < 0) {
728
+ if (qemu_strtoui(s->inet->port, NULL, 10, &port) < 0) {
729
error_setg(errp, "Use only numeric port value");
730
ret = -EINVAL;
731
goto err;
732
}
733
734
/* Open the socket and connect. */
735
- s->sock = inet_connect_saddr(s->inet, errp);
736
- if (s->sock < 0) {
737
+ new_sock = inet_connect_saddr(s->inet, errp);
738
+ if (new_sock < 0) {
739
ret = -EIO;
740
goto err;
741
}
742
743
+ /*
744
+ * Try to disable the Nagle algorithm on TCP sockets to reduce latency,
745
+ * but do not fail if it cannot be disabled.
746
+ */
747
+ r = socket_set_nodelay(new_sock);
748
+ if (r < 0) {
749
+ warn_report("can't set TCP_NODELAY for the ssh server %s: %s",
750
+ s->inet->host, strerror(errno));
751
+ }
752
+
753
/* Create SSH session. */
754
- s->session = libssh2_session_init();
755
+ s->session = ssh_new();
756
if (!s->session) {
757
ret = -EINVAL;
758
- session_error_setg(errp, s, "failed to initialize libssh2 session");
759
+ session_error_setg(errp, s, "failed to initialize libssh session");
760
goto err;
761
}
762
763
-#if TRACE_LIBSSH2 != 0
764
- libssh2_trace(s->session, TRACE_LIBSSH2);
765
-#endif
766
+ /*
767
+ * Make sure we are in blocking mode during the connection and
768
+ * authentication phases.
769
+ */
770
+ ssh_set_blocking(s->session, 1);
771
772
- r = libssh2_session_handshake(s->session, s->sock);
773
- if (r != 0) {
774
+ r = ssh_options_set(s->session, SSH_OPTIONS_USER, s->user);
775
+ if (r < 0) {
776
+ ret = -EINVAL;
777
+ session_error_setg(errp, s,
778
+ "failed to set the user in the libssh session");
779
+ goto err;
780
+ }
781
+
782
+ r = ssh_options_set(s->session, SSH_OPTIONS_HOST, s->inet->host);
783
+ if (r < 0) {
784
+ ret = -EINVAL;
785
+ session_error_setg(errp, s,
786
+ "failed to set the host in the libssh session");
787
+ goto err;
788
+ }
789
+
790
+ if (port > 0) {
791
+ r = ssh_options_set(s->session, SSH_OPTIONS_PORT, &port);
792
+ if (r < 0) {
793
+ ret = -EINVAL;
794
+ session_error_setg(errp, s,
795
+ "failed to set the port in the libssh session");
796
+ goto err;
797
+ }
95
+ }
798
+ }
96
+ }
799
+
97
+
800
+ r = ssh_options_set(s->session, SSH_OPTIONS_COMPRESSION, "none");
98
/*
801
+ if (r < 0) {
99
* Block exports are used for non-shared storage migration. Make sure
802
+ ret = -EINVAL;
100
* that BDRV_O_INACTIVE is cleared and the image is ready for write
803
+ session_error_setg(errp, s,
101
@@ -XXX,XX +XXX,XX @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
804
+ "failed to disable the compression in the libssh "
102
}
805
+ "session");
103
806
+ goto err;
104
blk = blk_new(ctx, perm, BLK_PERM_ALL);
105
+
106
+ if (!fixed_iothread) {
107
+ blk_set_allow_aio_context_change(blk, true);
807
+ }
108
+ }
808
+
109
+
809
+ /* Read ~/.ssh/config. */
110
ret = blk_insert_bs(blk, bs, errp);
810
+ r = ssh_options_parse_config(s->session, NULL);
811
+ if (r < 0) {
812
+ ret = -EINVAL;
813
+ session_error_setg(errp, s, "failed to parse ~/.ssh/config");
814
+ goto err;
815
+ }
816
+
817
+ r = ssh_options_set(s->session, SSH_OPTIONS_FD, &new_sock);
818
+ if (r < 0) {
819
+ ret = -EINVAL;
820
+ session_error_setg(errp, s,
821
+ "failed to set the socket in the libssh session");
822
+ goto err;
823
+ }
824
+ /* libssh took ownership of the socket. */
825
+ s->sock = new_sock;
826
+ new_sock = -1;
827
+
828
+ /* Connect. */
829
+ r = ssh_connect(s->session);
830
+ if (r != SSH_OK) {
831
ret = -EINVAL;
832
session_error_setg(errp, s, "failed to establish SSH session");
833
goto err;
834
}
835
836
/* Check the remote host's key against known_hosts. */
837
- ret = check_host_key(s, s->inet->host, port, opts->host_key_check, errp);
838
+ ret = check_host_key(s, opts->host_key_check, errp);
839
if (ret < 0) {
840
goto err;
841
}
842
843
/* Authenticate. */
844
- ret = authenticate(s, s->user, errp);
845
+ ret = authenticate(s, errp);
846
if (ret < 0) {
847
goto err;
848
}
849
850
/* Start SFTP. */
851
- s->sftp = libssh2_sftp_init(s->session);
852
+ s->sftp = sftp_new(s->session);
853
if (!s->sftp) {
854
- session_error_setg(errp, s, "failed to initialize sftp handle");
855
+ session_error_setg(errp, s, "failed to create sftp handle");
856
+ ret = -EINVAL;
857
+ goto err;
858
+ }
859
+
860
+ r = sftp_init(s->sftp);
861
+ if (r < 0) {
862
+ sftp_error_setg(errp, s, "failed to initialize sftp handle");
863
ret = -EINVAL;
864
goto err;
865
}
866
867
/* Open the remote file. */
868
trace_ssh_connect_to_ssh(opts->path, ssh_flags, creat_mode);
869
- s->sftp_handle = libssh2_sftp_open(s->sftp, opts->path, ssh_flags,
870
- creat_mode);
871
+ s->sftp_handle = sftp_open(s->sftp, opts->path, ssh_flags, creat_mode);
872
if (!s->sftp_handle) {
873
- session_error_setg(errp, s, "failed to open remote file '%s'",
874
- opts->path);
875
+ sftp_error_setg(errp, s, "failed to open remote file '%s'",
876
+ opts->path);
877
ret = -EINVAL;
878
goto err;
879
}
880
881
- r = libssh2_sftp_fstat(s->sftp_handle, &s->attrs);
882
- if (r < 0) {
883
+ /* Make sure the SFTP file is handled in blocking mode. */
884
+ sftp_file_set_blocking(s->sftp_handle);
885
+
886
+ s->attrs = sftp_fstat(s->sftp_handle);
887
+ if (!s->attrs) {
888
sftp_error_setg(errp, s, "failed to read file attributes");
889
return -EINVAL;
890
}
891
@@ -XXX,XX +XXX,XX @@ static int connect_to_ssh(BDRVSSHState *s, BlockdevOptionsSsh *opts,
892
return 0;
893
894
err:
895
+ if (s->attrs) {
896
+ sftp_attributes_free(s->attrs);
897
+ }
898
+ s->attrs = NULL;
899
if (s->sftp_handle) {
900
- libssh2_sftp_close(s->sftp_handle);
901
+ sftp_close(s->sftp_handle);
902
}
903
s->sftp_handle = NULL;
904
if (s->sftp) {
905
- libssh2_sftp_shutdown(s->sftp);
906
+ sftp_free(s->sftp);
907
}
908
s->sftp = NULL;
909
if (s->session) {
910
- libssh2_session_disconnect(s->session,
911
- "from qemu ssh client: "
912
- "error opening connection");
913
- libssh2_session_free(s->session);
914
+ ssh_disconnect(s->session);
915
+ ssh_free(s->session);
916
}
917
s->session = NULL;
918
+ s->sock = -1;
919
+ if (new_sock >= 0) {
920
+ close(new_sock);
921
+ }
922
923
return ret;
924
}
925
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
926
927
ssh_state_init(s);
928
929
- ssh_flags = LIBSSH2_FXF_READ;
930
+ ssh_flags = 0;
931
if (bdrv_flags & BDRV_O_RDWR) {
932
- ssh_flags |= LIBSSH2_FXF_WRITE;
933
+ ssh_flags |= O_RDWR;
934
+ } else {
935
+ ssh_flags |= O_RDONLY;
936
}
937
938
opts = ssh_parse_options(options, errp);
939
@@ -XXX,XX +XXX,XX @@ static int ssh_file_open(BlockDriverState *bs, QDict *options, int bdrv_flags,
940
}
941
942
/* Go non-blocking. */
943
- libssh2_session_set_blocking(s->session, 0);
944
+ ssh_set_blocking(s->session, 0);
945
946
qapi_free_BlockdevOptionsSsh(opts);
947
948
return 0;
949
950
err:
951
- if (s->sock >= 0) {
952
- close(s->sock);
953
- }
954
- s->sock = -1;
955
-
956
qapi_free_BlockdevOptionsSsh(opts);
957
958
return ret;
959
@@ -XXX,XX +XXX,XX @@ static int ssh_grow_file(BDRVSSHState *s, int64_t offset, Error **errp)
960
{
961
ssize_t ret;
962
char c[1] = { '\0' };
963
- int was_blocking = libssh2_session_get_blocking(s->session);
964
+ int was_blocking = ssh_is_blocking(s->session);
965
966
/* offset must be strictly greater than the current size so we do
967
* not overwrite anything */
968
- assert(offset > 0 && offset > s->attrs.filesize);
969
+ assert(offset > 0 && offset > s->attrs->size);
970
971
- libssh2_session_set_blocking(s->session, 1);
972
+ ssh_set_blocking(s->session, 1);
973
974
- libssh2_sftp_seek64(s->sftp_handle, offset - 1);
975
- ret = libssh2_sftp_write(s->sftp_handle, c, 1);
976
+ sftp_seek64(s->sftp_handle, offset - 1);
977
+ ret = sftp_write(s->sftp_handle, c, 1);
978
979
- libssh2_session_set_blocking(s->session, was_blocking);
980
+ ssh_set_blocking(s->session, was_blocking);
981
982
if (ret < 0) {
983
sftp_error_setg(errp, s, "Failed to grow file");
984
return -EIO;
985
}
986
987
- s->attrs.filesize = offset;
988
+ s->attrs->size = offset;
989
return 0;
990
}
991
992
@@ -XXX,XX +XXX,XX @@ static int ssh_co_create(BlockdevCreateOptions *options, Error **errp)
993
ssh_state_init(&s);
994
995
ret = connect_to_ssh(&s, opts->location,
996
- LIBSSH2_FXF_READ|LIBSSH2_FXF_WRITE|
997
- LIBSSH2_FXF_CREAT|LIBSSH2_FXF_TRUNC,
998
+ O_RDWR | O_CREAT | O_TRUNC,
999
0644, errp);
1000
if (ret < 0) {
111
if (ret < 0) {
1001
goto fail;
112
goto fail;
1002
@@ -XXX,XX +XXX,XX @@ static int ssh_has_zero_init(BlockDriverState *bs)
113
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
1003
/* Assume false, unless we can positively prove it's true. */
114
index XXXXXXX..XXXXXXX 100644
1004
int has_zero_init = 0;
115
--- a/block/export/vhost-user-blk-server.c
1005
116
+++ b/block/export/vhost-user-blk-server.c
1006
- if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) {
117
@@ -XXX,XX +XXX,XX @@ static const VuDevIface vu_blk_iface = {
1007
- if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) {
118
static void blk_aio_attached(AioContext *ctx, void *opaque)
1008
- has_zero_init = 1;
119
{
1009
- }
120
VuBlkExport *vexp = opaque;
1010
+ if (s->attrs->type == SSH_FILEXFER_TYPE_REGULAR) {
121
+
1011
+ has_zero_init = 1;
122
+ vexp->export.ctx = ctx;
123
vhost_user_server_attach_aio_context(&vexp->vu_server, ctx);
124
}
125
126
static void blk_aio_detach(void *opaque)
127
{
128
VuBlkExport *vexp = opaque;
129
+
130
vhost_user_server_detach_aio_context(&vexp->vu_server);
131
+ vexp->export.ctx = NULL;
132
}
133
134
static void
135
@@ -XXX,XX +XXX,XX @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
136
vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
137
logical_block_size);
138
139
- blk_set_allow_aio_context_change(exp->blk, true);
140
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
141
vexp);
142
143
diff --git a/nbd/server.c b/nbd/server.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/nbd/server.c
146
+++ b/nbd/server.c
147
@@ -XXX,XX +XXX,XX @@ static int nbd_export_create(BlockExport *blk_exp, BlockExportOptions *exp_args,
148
return ret;
1012
}
149
}
1013
150
1014
return has_zero_init;
151
- blk_set_allow_aio_context_change(blk, true);
1015
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
1016
.co = qemu_coroutine_self()
1017
};
1018
1019
- r = libssh2_session_block_directions(s->session);
1020
+ r = ssh_get_poll_flags(s->session);
1021
1022
- if (r & LIBSSH2_SESSION_BLOCK_INBOUND) {
1023
+ if (r & SSH_READ_PENDING) {
1024
rd_handler = restart_coroutine;
1025
}
1026
- if (r & LIBSSH2_SESSION_BLOCK_OUTBOUND) {
1027
+ if (r & SSH_WRITE_PENDING) {
1028
wr_handler = restart_coroutine;
1029
}
1030
1031
@@ -XXX,XX +XXX,XX @@ static coroutine_fn void co_yield(BDRVSSHState *s, BlockDriverState *bs)
1032
trace_ssh_co_yield_back(s->sock);
1033
}
1034
1035
-/* SFTP has a function `libssh2_sftp_seek64' which seeks to a position
1036
- * in the remote file. Notice that it just updates a field in the
1037
- * sftp_handle structure, so there is no network traffic and it cannot
1038
- * fail.
1039
- *
1040
- * However, `libssh2_sftp_seek64' does have a catastrophic effect on
1041
- * performance since it causes the handle to throw away all in-flight
1042
- * reads and buffered readahead data. Therefore this function tries
1043
- * to be intelligent about when to call the underlying libssh2 function.
1044
- */
1045
-#define SSH_SEEK_WRITE 0
1046
-#define SSH_SEEK_READ 1
1047
-#define SSH_SEEK_FORCE 2
1048
-
152
-
1049
-static void ssh_seek(BDRVSSHState *s, int64_t offset, int flags)
153
QTAILQ_INIT(&exp->clients);
1050
-{
154
exp->name = g_strdup(arg->name);
1051
- bool op_read = (flags & SSH_SEEK_READ) != 0;
155
exp->description = g_strdup(arg->description);
1052
- bool force = (flags & SSH_SEEK_FORCE) != 0;
1053
-
1054
- if (force || op_read != s->offset_op_read || offset != s->offset) {
1055
- trace_ssh_seek(offset);
1056
- libssh2_sftp_seek64(s->sftp_handle, offset);
1057
- s->offset = offset;
1058
- s->offset_op_read = op_read;
1059
- }
1060
-}
1061
-
1062
static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1063
int64_t offset, size_t size,
1064
QEMUIOVector *qiov)
1065
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1066
1067
trace_ssh_read(offset, size);
1068
1069
- ssh_seek(s, offset, SSH_SEEK_READ);
1070
+ trace_ssh_seek(offset);
1071
+ sftp_seek64(s->sftp_handle, offset);
1072
1073
/* This keeps track of the current iovec element ('i'), where we
1074
* will write to next ('buf'), and the end of the current iovec
1075
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_read(BDRVSSHState *s, BlockDriverState *bs,
1076
buf = i->iov_base;
1077
end_of_vec = i->iov_base + i->iov_len;
1078
1079
- /* libssh2 has a hard-coded limit of 2000 bytes per request,
1080
- * although it will also do readahead behind our backs. Therefore
1081
- * we may have to do repeated reads here until we have read 'size'
1082
- * bytes.
1083
- */
1084
for (got = 0; got < size; ) {
1085
+ size_t request_read_size;
1086
again:
1087
- trace_ssh_read_buf(buf, end_of_vec - buf);
1088
- r = libssh2_sftp_read(s->sftp_handle, buf, end_of_vec - buf);
1089
- trace_ssh_read_return(r);
1090
+ /*
1091
+ * The size of SFTP packets is limited to 32K bytes, so limit
1092
+ * the amount of data requested to 16K, as libssh currently
1093
+ * does not handle multiple requests on its own.
1094
+ */
1095
+ request_read_size = MIN(end_of_vec - buf, 16384);
1096
+ trace_ssh_read_buf(buf, end_of_vec - buf, request_read_size);
1097
+ r = sftp_read(s->sftp_handle, buf, request_read_size);
1098
+ trace_ssh_read_return(r, sftp_get_error(s->sftp));
1099
1100
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1101
+ if (r == SSH_AGAIN) {
1102
co_yield(s, bs);
1103
goto again;
1104
}
1105
- if (r < 0) {
1106
- sftp_error_trace(s, "read");
1107
- s->offset = -1;
1108
- return -EIO;
1109
- }
1110
- if (r == 0) {
1111
+ if (r == SSH_EOF || (r == 0 && sftp_get_error(s->sftp) == SSH_FX_EOF)) {
1112
/* EOF: Short read so pad the buffer with zeroes and return it. */
1113
qemu_iovec_memset(qiov, got, 0, size - got);
1114
return 0;
1115
}
1116
+ if (r <= 0) {
1117
+ sftp_error_trace(s, "read");
1118
+ return -EIO;
1119
+ }
1120
1121
got += r;
1122
buf += r;
1123
- s->offset += r;
1124
if (buf >= end_of_vec && got < size) {
1125
i++;
1126
buf = i->iov_base;
1127
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1128
1129
trace_ssh_write(offset, size);
1130
1131
- ssh_seek(s, offset, SSH_SEEK_WRITE);
1132
+ trace_ssh_seek(offset);
1133
+ sftp_seek64(s->sftp_handle, offset);
1134
1135
/* This keeps track of the current iovec element ('i'), where we
1136
* will read from next ('buf'), and the end of the current iovec
1137
@@ -XXX,XX +XXX,XX @@ static int ssh_write(BDRVSSHState *s, BlockDriverState *bs,
1138
end_of_vec = i->iov_base + i->iov_len;
1139
1140
for (written = 0; written < size; ) {
1141
+ size_t request_write_size;
1142
again:
1143
- trace_ssh_write_buf(buf, end_of_vec - buf);
1144
- r = libssh2_sftp_write(s->sftp_handle, buf, end_of_vec - buf);
1145
- trace_ssh_write_return(r);
1146
+ /*
1147
+ * Avoid too large data packets, as libssh currently does not
1148
+ * handle multiple requests on its own.
1149
+ */
1150
+ request_write_size = MIN(end_of_vec - buf, 131072);
1151
+ trace_ssh_write_buf(buf, end_of_vec - buf, request_write_size);
1152
+ r = sftp_write(s->sftp_handle, buf, request_write_size);
1153
+ trace_ssh_write_return(r, sftp_get_error(s->sftp));
1154
1155
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1156
+ if (r == SSH_AGAIN) {
1157
co_yield(s, bs);
1158
goto again;
1159
}
1160
if (r < 0) {
1161
sftp_error_trace(s, "write");
1162
- s->offset = -1;
1163
return -EIO;
1164
}
1165
- /* The libssh2 API is very unclear about this. A comment in
1166
- * the code says "nothing was acked, and no EAGAIN was
1167
- * received!" which apparently means that no data got sent
1168
- * out, and the underlying channel didn't return any EAGAIN
1169
- * indication. I think this is a bug in either libssh2 or
1170
- * OpenSSH (server-side). In any case, forcing a seek (to
1171
- * discard libssh2 internal buffers), and then trying again
1172
- * works for me.
1173
- */
1174
- if (r == 0) {
1175
- ssh_seek(s, offset + written, SSH_SEEK_WRITE|SSH_SEEK_FORCE);
1176
- co_yield(s, bs);
1177
- goto again;
1178
- }
1179
1180
written += r;
1181
buf += r;
1182
- s->offset += r;
1183
if (buf >= end_of_vec && written < size) {
1184
i++;
1185
buf = i->iov_base;
1186
end_of_vec = i->iov_base + i->iov_len;
1187
}
1188
1189
- if (offset + written > s->attrs.filesize)
1190
- s->attrs.filesize = offset + written;
1191
+ if (offset + written > s->attrs->size) {
1192
+ s->attrs->size = offset + written;
1193
+ }
1194
}
1195
1196
return 0;
1197
@@ -XXX,XX +XXX,XX @@ static void unsafe_flush_warning(BDRVSSHState *s, const char *what)
1198
}
1199
}
1200
1201
-#ifdef HAS_LIBSSH2_SFTP_FSYNC
1202
+#ifdef HAVE_LIBSSH_0_8
1203
1204
static coroutine_fn int ssh_flush(BDRVSSHState *s, BlockDriverState *bs)
1205
{
1206
int r;
1207
1208
trace_ssh_flush();
1209
+
1210
+ if (!sftp_extension_supported(s->sftp, "fsync@openssh.com", "1")) {
1211
+ unsafe_flush_warning(s, "OpenSSH >= 6.3");
1212
+ return 0;
1213
+ }
1214
again:
1215
- r = libssh2_sftp_fsync(s->sftp_handle);
1216
- if (r == LIBSSH2_ERROR_EAGAIN || r == LIBSSH2_ERROR_TIMEOUT) {
1217
+ r = sftp_fsync(s->sftp_handle);
1218
+ if (r == SSH_AGAIN) {
1219
co_yield(s, bs);
1220
goto again;
1221
}
1222
- if (r == LIBSSH2_ERROR_SFTP_PROTOCOL &&
1223
- libssh2_sftp_last_error(s->sftp) == LIBSSH2_FX_OP_UNSUPPORTED) {
1224
- unsafe_flush_warning(s, "OpenSSH >= 6.3");
1225
- return 0;
1226
- }
1227
if (r < 0) {
1228
sftp_error_trace(s, "fsync");
1229
return -EIO;
1230
@@ -XXX,XX +XXX,XX @@ static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
1231
return ret;
1232
}
1233
1234
-#else /* !HAS_LIBSSH2_SFTP_FSYNC */
1235
+#else /* !HAVE_LIBSSH_0_8 */
1236
1237
static coroutine_fn int ssh_co_flush(BlockDriverState *bs)
1238
{
1239
BDRVSSHState *s = bs->opaque;
1240
1241
- unsafe_flush_warning(s, "libssh2 >= 1.4.4");
1242
+ unsafe_flush_warning(s, "libssh >= 0.8.0");
1243
return 0;
1244
}
1245
1246
-#endif /* !HAS_LIBSSH2_SFTP_FSYNC */
1247
+#endif /* !HAVE_LIBSSH_0_8 */
1248
1249
static int64_t ssh_getlength(BlockDriverState *bs)
1250
{
1251
BDRVSSHState *s = bs->opaque;
1252
int64_t length;
1253
1254
- /* Note we cannot make a libssh2 call here. */
1255
- length = (int64_t) s->attrs.filesize;
1256
+ /* Note we cannot make a libssh call here. */
1257
+ length = (int64_t) s->attrs->size;
1258
trace_ssh_getlength(length);
1259
1260
return length;
1261
@@ -XXX,XX +XXX,XX @@ static int coroutine_fn ssh_co_truncate(BlockDriverState *bs, int64_t offset,
1262
return -ENOTSUP;
1263
}
1264
1265
- if (offset < s->attrs.filesize) {
1266
+ if (offset < s->attrs->size) {
1267
error_setg(errp, "ssh driver does not support shrinking files");
1268
return -ENOTSUP;
1269
}
1270
1271
- if (offset == s->attrs.filesize) {
1272
+ if (offset == s->attrs->size) {
1273
return 0;
1274
}
1275
1276
@@ -XXX,XX +XXX,XX @@ static void bdrv_ssh_init(void)
1277
{
1278
int r;
1279
1280
- r = libssh2_init(0);
1281
+ r = ssh_init();
1282
if (r != 0) {
1283
- fprintf(stderr, "libssh2 initialization failed, %d\n", r);
1284
+ fprintf(stderr, "libssh initialization failed, %d\n", r);
1285
exit(EXIT_FAILURE);
1286
}
1287
1288
+#if TRACE_LIBSSH != 0
1289
+ ssh_set_log_level(TRACE_LIBSSH);
1290
+#endif
1291
+
1292
bdrv_register(&bdrv_ssh);
1293
}
1294
1295
diff --git a/.travis.yml b/.travis.yml
1296
index XXXXXXX..XXXXXXX 100644
1297
--- a/.travis.yml
1298
+++ b/.travis.yml
1299
@@ -XXX,XX +XXX,XX @@ addons:
1300
- libseccomp-dev
1301
- libspice-protocol-dev
1302
- libspice-server-dev
1303
- - libssh2-1-dev
1304
+ - libssh-dev
1305
- liburcu-dev
1306
- libusb-1.0-0-dev
1307
- libvte-2.91-dev
1308
@@ -XXX,XX +XXX,XX @@ matrix:
1309
- libseccomp-dev
1310
- libspice-protocol-dev
1311
- libspice-server-dev
1312
- - libssh2-1-dev
1313
+ - libssh-dev
1314
- liburcu-dev
1315
- libusb-1.0-0-dev
1316
- libvte-2.91-dev
1317
diff --git a/block/trace-events b/block/trace-events
1318
index XXXXXXX..XXXXXXX 100644
1319
--- a/block/trace-events
1320
+++ b/block/trace-events
1321
@@ -XXX,XX +XXX,XX @@ nbd_client_connect_success(const char *export_name) "export '%s'"
1322
# ssh.c
1323
ssh_restart_coroutine(void *co) "co=%p"
1324
ssh_flush(void) "fsync"
1325
-ssh_check_host_key_knownhosts(const char *key) "host key OK: %s"
1326
+ssh_check_host_key_knownhosts(void) "host key OK"
1327
ssh_connect_to_ssh(char *path, int flags, int mode) "opening file %s flags=0x%x creat_mode=0%o"
1328
ssh_co_yield(int sock, void *rd_handler, void *wr_handler) "s->sock=%d rd_handler=%p wr_handler=%p"
1329
ssh_co_yield_back(int sock) "s->sock=%d - back"
1330
ssh_getlength(int64_t length) "length=%" PRIi64
1331
ssh_co_create_opts(uint64_t size) "total_size=%" PRIu64
1332
ssh_read(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
1333
-ssh_read_buf(void *buf, size_t size) "sftp_read buf=%p size=%zu"
1334
-ssh_read_return(ssize_t ret) "sftp_read returned %zd"
1335
+ssh_read_buf(void *buf, size_t size, size_t actual_size) "sftp_read buf=%p size=%zu (actual size=%zu)"
1336
+ssh_read_return(ssize_t ret, int sftp_err) "sftp_read returned %zd (sftp error=%d)"
1337
ssh_write(int64_t offset, size_t size) "offset=%" PRIi64 " size=%zu"
1338
-ssh_write_buf(void *buf, size_t size) "sftp_write buf=%p size=%zu"
1339
-ssh_write_return(ssize_t ret) "sftp_write returned %zd"
1340
+ssh_write_buf(void *buf, size_t size, size_t actual_size) "sftp_write buf=%p size=%zu (actual size=%zu)"
1341
+ssh_write_return(ssize_t ret, int sftp_err) "sftp_write returned %zd (sftp error=%d)"
1342
ssh_seek(int64_t offset) "seeking to offset=%" PRIi64
1343
+ssh_auth_methods(int methods) "auth methods=0x%x"
1344
+ssh_server_status(int status) "server status=%d"
1345
1346
# curl.c
1347
curl_timer_cb(long timeout_ms) "timer callback timeout_ms %ld"
1348
@@ -XXX,XX +XXX,XX @@ sheepdog_snapshot_create(const char *sn_name, const char *id) "%s %s"
1349
sheepdog_snapshot_create_inode(const char *name, uint32_t snap, uint32_t vdi) "s->inode: name %s snap_id 0x%" PRIx32 " vdi 0x%" PRIx32
1350
1351
# ssh.c
1352
-sftp_error(const char *op, const char *ssh_err, int ssh_err_code, unsigned long sftp_err_code) "%s failed: %s (libssh2 error code: %d, sftp error code: %lu)"
1353
+sftp_error(const char *op, const char *ssh_err, int ssh_err_code, int sftp_err_code) "%s failed: %s (libssh error code: %d, sftp error code: %d)"
1354
diff --git a/docs/qemu-block-drivers.texi b/docs/qemu-block-drivers.texi
1355
index XXXXXXX..XXXXXXX 100644
1356
--- a/docs/qemu-block-drivers.texi
1357
+++ b/docs/qemu-block-drivers.texi
1358
@@ -XXX,XX +XXX,XX @@ print a warning when @code{fsync} is not supported:
1359
1360
warning: ssh server @code{ssh.example.com:22} does not support fsync
1361
1362
-With sufficiently new versions of libssh2 and OpenSSH, @code{fsync} is
1363
+With sufficiently new versions of libssh and OpenSSH, @code{fsync} is
1364
supported.
1365
1366
@node disk_images_nvme
1367
diff --git a/tests/docker/dockerfiles/debian-win32-cross.docker b/tests/docker/dockerfiles/debian-win32-cross.docker
1368
index XXXXXXX..XXXXXXX 100644
1369
--- a/tests/docker/dockerfiles/debian-win32-cross.docker
1370
+++ b/tests/docker/dockerfiles/debian-win32-cross.docker
1371
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1372
mxe-$TARGET-w64-mingw32.shared-curl \
1373
mxe-$TARGET-w64-mingw32.shared-glib \
1374
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1375
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1376
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1377
mxe-$TARGET-w64-mingw32.shared-lzo \
1378
mxe-$TARGET-w64-mingw32.shared-nettle \
1379
diff --git a/tests/docker/dockerfiles/debian-win64-cross.docker b/tests/docker/dockerfiles/debian-win64-cross.docker
1380
index XXXXXXX..XXXXXXX 100644
1381
--- a/tests/docker/dockerfiles/debian-win64-cross.docker
1382
+++ b/tests/docker/dockerfiles/debian-win64-cross.docker
1383
@@ -XXX,XX +XXX,XX @@ RUN DEBIAN_FRONTEND=noninteractive eatmydata \
1384
mxe-$TARGET-w64-mingw32.shared-curl \
1385
mxe-$TARGET-w64-mingw32.shared-glib \
1386
mxe-$TARGET-w64-mingw32.shared-libgcrypt \
1387
- mxe-$TARGET-w64-mingw32.shared-libssh2 \
1388
mxe-$TARGET-w64-mingw32.shared-libusb1 \
1389
mxe-$TARGET-w64-mingw32.shared-lzo \
1390
mxe-$TARGET-w64-mingw32.shared-nettle \
1391
diff --git a/tests/docker/dockerfiles/fedora.docker b/tests/docker/dockerfiles/fedora.docker
1392
index XXXXXXX..XXXXXXX 100644
1393
--- a/tests/docker/dockerfiles/fedora.docker
1394
+++ b/tests/docker/dockerfiles/fedora.docker
1395
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1396
libpng-devel \
1397
librbd-devel \
1398
libseccomp-devel \
1399
- libssh2-devel \
1400
+ libssh-devel \
1401
libubsan \
1402
libusbx-devel \
1403
libxml2-devel \
1404
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1405
mingw32-gtk3 \
1406
mingw32-libjpeg-turbo \
1407
mingw32-libpng \
1408
- mingw32-libssh2 \
1409
mingw32-libtasn1 \
1410
mingw32-nettle \
1411
mingw32-pixman \
1412
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES \
1413
mingw64-gtk3 \
1414
mingw64-libjpeg-turbo \
1415
mingw64-libpng \
1416
- mingw64-libssh2 \
1417
mingw64-libtasn1 \
1418
mingw64-nettle \
1419
mingw64-pixman \
1420
diff --git a/tests/docker/dockerfiles/ubuntu.docker b/tests/docker/dockerfiles/ubuntu.docker
1421
index XXXXXXX..XXXXXXX 100644
1422
--- a/tests/docker/dockerfiles/ubuntu.docker
1423
+++ b/tests/docker/dockerfiles/ubuntu.docker
1424
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1425
libsnappy-dev \
1426
libspice-protocol-dev \
1427
libspice-server-dev \
1428
- libssh2-1-dev \
1429
+ libssh-dev \
1430
libusb-1.0-0-dev \
1431
libusbredirhost-dev \
1432
libvdeplug-dev \
1433
diff --git a/tests/docker/dockerfiles/ubuntu1804.docker b/tests/docker/dockerfiles/ubuntu1804.docker
1434
index XXXXXXX..XXXXXXX 100644
1435
--- a/tests/docker/dockerfiles/ubuntu1804.docker
1436
+++ b/tests/docker/dockerfiles/ubuntu1804.docker
1437
@@ -XXX,XX +XXX,XX @@ ENV PACKAGES flex bison \
1438
libsnappy-dev \
1439
libspice-protocol-dev \
1440
libspice-server-dev \
1441
- libssh2-1-dev \
1442
+ libssh-dev \
1443
libusb-1.0-0-dev \
1444
libusbredirhost-dev \
1445
libvdeplug-dev \
1446
diff --git a/tests/qemu-iotests/207 b/tests/qemu-iotests/207
1447
index XXXXXXX..XXXXXXX 100755
1448
--- a/tests/qemu-iotests/207
1449
+++ b/tests/qemu-iotests/207
1450
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1451
1452
iotests.img_info_log(remote_path)
1453
1454
- md5_key = subprocess.check_output(
1455
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1456
- 'cut -d" " -f3 | base64 -d | md5sum -b | cut -d" " -f1',
1457
- shell=True).rstrip().decode('ascii')
1458
+ keys = subprocess.check_output(
1459
+ 'ssh-keyscan 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1460
+ 'cut -d" " -f3',
1461
+ shell=True).rstrip().decode('ascii').split('\n')
1462
+
1463
+ # Mappings of base64 representations to digests
1464
+ md5_keys = {}
1465
+ sha1_keys = {}
1466
+
1467
+ for key in keys:
1468
+ md5_keys[key] = subprocess.check_output(
1469
+ 'echo %s | base64 -d | md5sum -b | cut -d" " -f1' % key,
1470
+ shell=True).rstrip().decode('ascii')
1471
+
1472
+ sha1_keys[key] = subprocess.check_output(
1473
+ 'echo %s | base64 -d | sha1sum -b | cut -d" " -f1' % key,
1474
+ shell=True).rstrip().decode('ascii')
1475
1476
vm.launch()
1477
+
1478
+ # Find correct key first
1479
+ matching_key = None
1480
+ for key in keys:
1481
+ result = vm.qmp('blockdev-add',
1482
+ driver='ssh', node_name='node0', path=disk_path,
1483
+ server={
1484
+ 'host': '127.0.0.1',
1485
+ 'port': '22',
1486
+ }, host_key_check={
1487
+ 'mode': 'hash',
1488
+ 'type': 'md5',
1489
+ 'hash': md5_keys[key],
1490
+ })
1491
+
1492
+ if 'error' not in result:
1493
+ vm.qmp('blockdev-del', node_name='node0')
1494
+ matching_key = key
1495
+ break
1496
+
1497
+ if matching_key is None:
1498
+ vm.shutdown()
1499
+ iotests.notrun('Did not find a key that fits 127.0.0.1')
1500
+
1501
blockdev_create(vm, { 'driver': 'ssh',
1502
'location': {
1503
'path': disk_path,
1504
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1505
'host-key-check': {
1506
'mode': 'hash',
1507
'type': 'md5',
1508
- 'hash': md5_key,
1509
+ 'hash': md5_keys[matching_key],
1510
}
1511
},
1512
'size': 8388608 })
1513
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1514
1515
iotests.img_info_log(remote_path)
1516
1517
- sha1_key = subprocess.check_output(
1518
- 'ssh-keyscan -t rsa 127.0.0.1 2>/dev/null | grep -v "\\^#" | ' +
1519
- 'cut -d" " -f3 | base64 -d | sha1sum -b | cut -d" " -f1',
1520
- shell=True).rstrip().decode('ascii')
1521
-
1522
vm.launch()
1523
blockdev_create(vm, { 'driver': 'ssh',
1524
'location': {
1525
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('t.img') as disk_path, \
1526
'host-key-check': {
1527
'mode': 'hash',
1528
'type': 'sha1',
1529
- 'hash': sha1_key,
1530
+ 'hash': sha1_keys[matching_key],
1531
}
1532
},
1533
'size': 4194304 })
1534
diff --git a/tests/qemu-iotests/207.out b/tests/qemu-iotests/207.out
1535
index XXXXXXX..XXXXXXX 100644
1536
--- a/tests/qemu-iotests/207.out
1537
+++ b/tests/qemu-iotests/207.out
1538
@@ -XXX,XX +XXX,XX @@ virtual size: 4 MiB (4194304 bytes)
1539
1540
{"execute": "blockdev-create", "arguments": {"job-id": "job0", "options": {"driver": "ssh", "location": {"host-key-check": {"mode": "none"}, "path": "/this/is/not/an/existing/path", "server": {"host": "127.0.0.1", "port": "22"}}, "size": 4194304}}}
1541
{"return": {}}
1542
-Job failed: failed to open remote file '/this/is/not/an/existing/path': Failed opening remote file (libssh2 error code: -31)
1543
+Job failed: failed to open remote file '/this/is/not/an/existing/path': SFTP server: No such file (libssh error code: 1, sftp error code: 2)
1544
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
1545
{"return": {}}
1546
1547
--
156
--
1548
2.21.0
157
2.26.2
1549
158
1550
diff view generated by jsdifflib
New patch
1
Allow the number of queues to be configured using --export
2
vhost-user-blk,num-queues=N. This setting should match the QEMU --device
3
vhost-user-blk-pci,num-queues=N setting but QEMU vhost-user-blk.c lowers
4
its own value if the vhost-user-blk backend offers fewer queues than
5
QEMU.
1
6
7
The vhost-user-blk-server.c code is already capable of multi-queue. All
8
virtqueue processing runs in the same AioContext. No new locking is
9
needed.
10
11
Add the num-queues=N option and set the VIRTIO_BLK_F_MQ feature bit.
12
Note that the feature bit only announces the presence of the num_queues
13
configuration space field. It does not promise that there is more than 1
14
virtqueue, so we can set it unconditionally.
15
16
I tested multi-queue by running a random read fio test with numjobs=4 on
17
an -smp 4 guest. After the benchmark finished the guest /proc/interrupts
18
file showed activity on all 4 virtio-blk MSI-X. The /sys/block/vda/mq/
19
directory shows that Linux blk-mq has 4 queues configured.
20
21
An automated test is included in the next commit.
22
23
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
24
Acked-by: Markus Armbruster <armbru@redhat.com>
25
Message-id: 20201001144604.559733-2-stefanha@redhat.com
26
[Fixed accidental tab characters as suggested by Markus Armbruster
27
--Stefan]
28
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
29
---
30
qapi/block-export.json | 10 +++++++---
31
block/export/vhost-user-blk-server.c | 24 ++++++++++++++++++------
32
2 files changed, 25 insertions(+), 9 deletions(-)
33
34
diff --git a/qapi/block-export.json b/qapi/block-export.json
35
index XXXXXXX..XXXXXXX 100644
36
--- a/qapi/block-export.json
37
+++ b/qapi/block-export.json
38
@@ -XXX,XX +XXX,XX @@
39
# SocketAddress types are supported. Passed fds must be UNIX domain
40
# sockets.
41
# @logical-block-size: Logical block size in bytes. Defaults to 512 bytes.
42
+# @num-queues: Number of request virtqueues. Must be greater than 0. Defaults
43
+# to 1.
44
#
45
# Since: 5.2
46
##
47
{ 'struct': 'BlockExportOptionsVhostUserBlk',
48
- 'data': { 'addr': 'SocketAddress', '*logical-block-size': 'size' } }
49
+ 'data': { 'addr': 'SocketAddress',
50
+     '*logical-block-size': 'size',
51
+ '*num-queues': 'uint16'} }
52
53
##
54
# @NbdServerAddOptions:
55
@@ -XXX,XX +XXX,XX @@
56
{ 'union': 'BlockExportOptions',
57
'base': { 'type': 'BlockExportType',
58
'id': 'str',
59
-     '*fixed-iothread': 'bool',
60
-     '*iothread': 'str',
61
+ '*fixed-iothread': 'bool',
62
+ '*iothread': 'str',
63
'node-name': 'str',
64
'*writable': 'bool',
65
'*writethrough': 'bool' },
66
diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c
67
index XXXXXXX..XXXXXXX 100644
68
--- a/block/export/vhost-user-blk-server.c
69
+++ b/block/export/vhost-user-blk-server.c
70
@@ -XXX,XX +XXX,XX @@
71
#include "util/block-helpers.h"
72
73
enum {
74
- VHOST_USER_BLK_MAX_QUEUES = 1,
75
+ VHOST_USER_BLK_NUM_QUEUES_DEFAULT = 1,
76
};
77
struct virtio_blk_inhdr {
78
unsigned char status;
79
@@ -XXX,XX +XXX,XX @@ static uint64_t vu_blk_get_features(VuDev *dev)
80
1ull << VIRTIO_BLK_F_DISCARD |
81
1ull << VIRTIO_BLK_F_WRITE_ZEROES |
82
1ull << VIRTIO_BLK_F_CONFIG_WCE |
83
+ 1ull << VIRTIO_BLK_F_MQ |
84
1ull << VIRTIO_F_VERSION_1 |
85
1ull << VIRTIO_RING_F_INDIRECT_DESC |
86
1ull << VIRTIO_RING_F_EVENT_IDX |
87
@@ -XXX,XX +XXX,XX @@ static void blk_aio_detach(void *opaque)
88
89
static void
90
vu_blk_initialize_config(BlockDriverState *bs,
91
- struct virtio_blk_config *config, uint32_t blk_size)
92
+ struct virtio_blk_config *config,
93
+ uint32_t blk_size,
94
+ uint16_t num_queues)
95
{
96
config->capacity = bdrv_getlength(bs) >> BDRV_SECTOR_BITS;
97
config->blk_size = blk_size;
98
@@ -XXX,XX +XXX,XX @@ vu_blk_initialize_config(BlockDriverState *bs,
99
config->seg_max = 128 - 2;
100
config->min_io_size = 1;
101
config->opt_io_size = 1;
102
- config->num_queues = VHOST_USER_BLK_MAX_QUEUES;
103
+ config->num_queues = num_queues;
104
config->max_discard_sectors = 32768;
105
config->max_discard_seg = 1;
106
config->discard_sector_alignment = config->blk_size >> 9;
107
@@ -XXX,XX +XXX,XX @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
108
BlockExportOptionsVhostUserBlk *vu_opts = &opts->u.vhost_user_blk;
109
Error *local_err = NULL;
110
uint64_t logical_block_size;
111
+ uint16_t num_queues = VHOST_USER_BLK_NUM_QUEUES_DEFAULT;
112
113
vexp->writable = opts->writable;
114
vexp->blkcfg.wce = 0;
115
@@ -XXX,XX +XXX,XX @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts,
116
}
117
vexp->blk_size = logical_block_size;
118
blk_set_guest_block_size(exp->blk, logical_block_size);
119
+
120
+ if (vu_opts->has_num_queues) {
121
+ num_queues = vu_opts->num_queues;
122
+ }
123
+ if (num_queues == 0) {
124
+ error_setg(errp, "num-queues must be greater than 0");
125
+ return -EINVAL;
126
+ }
127
+
128
vu_blk_initialize_config(blk_bs(exp->blk), &vexp->blkcfg,
129
- logical_block_size);
130
+ logical_block_size, num_queues);
131
132
blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach,
133
vexp);
134
135
if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx,
136
- VHOST_USER_BLK_MAX_QUEUES, &vu_blk_iface,
137
- errp)) {
138
+ num_queues, &vu_blk_iface, errp)) {
139
blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
140
blk_aio_detach, vexp);
141
return -EADDRNOTAVAIL;
142
--
143
2.26.2
144
diff view generated by jsdifflib
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
Until ESXi 6.5 VMware used the vmfsSparse format for snapshots (VMDK3 in
3
bdrv_co_block_status_above has several design problems with handling
4
QEMU).
4
short backing files:
5
5
6
This format was lacking in the following:
6
1. With want_zeros=true, it may return ret with BDRV_BLOCK_ZERO but
7
without BDRV_BLOCK_ALLOCATED flag, when actually short backing file
8
which produces these after-EOF zeros is inside requested backing
9
sequence.
7
10
8
* Grain directory (L1) and grain table (L2) entries were 32-bit,
11
2. With want_zero=false, it may return pnum=0 prior to actual EOF,
9
allowing access to only 2TB (slightly less) of data.
12
because of EOF of short backing file.
10
* The grain size (default) was 512 bytes - leading to data
11
fragmentation and many grain tables.
12
* For space reclamation purposes, it was necessary to find all the
13
grains which are not pointed to by any grain table - so a reverse
14
mapping of "offset of grain in vmdk" to "grain table" must be
15
constructed - which takes large amounts of CPU/RAM.
16
13
17
The format specification can be found in VMware's documentation:
14
Fix these things, making logic about short backing files clearer.
18
https://www.vmware.com/support/developer/vddk/vmdk_50_technote.pdf
19
15
20
In ESXi 6.5, to support snapshot files larger than 2TB, a new format was
16
With fixed bdrv_block_status_above we also have to improve is_zero in
21
introduced: SESparse (Space Efficient).
17
qcow2 code, otherwise iotest 154 will fail, because with this patch we
18
stop to merge zeros of different types (produced by fully unallocated
19
in the whole backing chain regions vs produced by short backing files).
22
20
23
This format fixes the above issues:
21
Note also, that this patch leaves for another day the general problem
22
around block-status: misuse of BDRV_BLOCK_ALLOCATED as is-fs-allocated
23
vs go-to-backing.
24
24
25
* All entries are now 64-bit.
25
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
26
* The grain size (default) is 4KB.
26
Reviewed-by: Alberto Garcia <berto@igalia.com>
27
* Grain directory and grain tables are now located at the beginning
27
Reviewed-by: Eric Blake <eblake@redhat.com>
28
of the file.
28
Message-id: 20200924194003.22080-2-vsementsov@virtuozzo.com
29
+ seSparse format reserves space for all grain tables.
29
[Fix s/comes/come/ as suggested by Eric Blake
30
+ Grain tables can be addressed using an index.
30
--Stefan]
31
+ Grains are located in the end of the file and can also be
31
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
32
addressed with an index.
32
---
33
- seSparse vmdks of large disks (64TB) have huge preallocated
33
block/io.c | 68 ++++++++++++++++++++++++++++++++++++++++-----------
34
headers - mainly due to L2 tables, even for empty snapshots.
34
block/qcow2.c | 16 ++++++++++--
35
* The header contains a reverse mapping ("backmap") of "offset of
35
2 files changed, 68 insertions(+), 16 deletions(-)
36
grain in vmdk" to "grain table" and a bitmap ("free bitmap") which
37
specifies for each grain - whether it is allocated or not.
38
Using these data structures we can implement space reclamation
39
efficiently.
40
* Due to the fact that the header now maintains two mappings:
41
* The regular one (grain directory & grain tables)
42
* A reverse one (backmap and free bitmap)
43
These data structures can lose consistency upon crash and result
44
in a corrupted VMDK.
45
Therefore, a journal is also added to the VMDK and is replayed
46
when the VMware reopens the file after a crash.
47
36
48
Since ESXi 6.7 - SESparse is the only snapshot format available.
37
diff --git a/block/io.c b/block/io.c
49
50
Unfortunately, VMware does not provide documentation regarding the new
51
seSparse format.
52
53
This commit is based on black-box research of the seSparse format.
54
Various in-guest block operations and their effect on the snapshot file
55
were tested.
56
57
The only VMware provided source of information (regarding the underlying
58
implementation) was a log file on the ESXi:
59
60
/var/log/hostd.log
61
62
Whenever an seSparse snapshot is created - the log is being populated
63
with seSparse records.
64
65
Relevant log records are of the form:
66
67
[...] Const Header:
68
[...] constMagic = 0xcafebabe
69
[...] version = 2.1
70
[...] capacity = 204800
71
[...] grainSize = 8
72
[...] grainTableSize = 64
73
[...] flags = 0
74
[...] Extents:
75
[...] Header : <1 : 1>
76
[...] JournalHdr : <2 : 2>
77
[...] Journal : <2048 : 2048>
78
[...] GrainDirectory : <4096 : 2048>
79
[...] GrainTables : <6144 : 2048>
80
[...] FreeBitmap : <8192 : 2048>
81
[...] BackMap : <10240 : 2048>
82
[...] Grain : <12288 : 204800>
83
[...] Volatile Header:
84
[...] volatileMagic = 0xcafecafe
85
[...] FreeGTNumber = 0
86
[...] nextTxnSeqNumber = 0
87
[...] replayJournal = 0
88
89
The sizes that are seen in the log file are in sectors.
90
Extents are of the following format: <offset : size>
91
92
This commit is a strict implementation which enforces:
93
* magics
94
* version number 2.1
95
* grain size of 8 sectors (4KB)
96
* grain table size of 64 sectors
97
* zero flags
98
* extent locations
99
100
Additionally, this commit proivdes only a subset of the functionality
101
offered by seSparse's format:
102
* Read-only
103
* No journal replay
104
* No space reclamation
105
* No unmap support
106
107
Hence, journal header, journal, free bitmap and backmap extents are
108
unused, only the "classic" (L1 -> L2 -> data) grain access is
109
implemented.
110
111
However there are several differences in the grain access itself.
112
Grain directory (L1):
113
* Grain directory entries are indexes (not offsets) to grain
114
tables.
115
* Valid grain directory entries have their highest nibble set to
116
0x1.
117
* Since grain tables are always located in the beginning of the
118
file - the index can fit into 32 bits - so we can use its low
119
part if it's valid.
120
Grain table (L2):
121
* Grain table entries are indexes (not offsets) to grains.
122
* If the highest nibble of the entry is:
123
0x0:
124
The grain in not allocated.
125
The rest of the bytes are 0.
126
0x1:
127
The grain is unmapped - guest sees a zero grain.
128
The rest of the bits point to the previously mapped grain,
129
see 0x3 case.
130
0x2:
131
The grain is zero.
132
0x3:
133
The grain is allocated - to get the index calculate:
134
((entry & 0x0fff000000000000) >> 48) |
135
((entry & 0x0000ffffffffffff) << 12)
136
* The difference between 0x1 and 0x2 is that 0x1 is an unallocated
137
grain which results from the guest using sg_unmap to unmap the
138
grain - but the grain itself still exists in the grain extent - a
139
space reclamation procedure should delete it.
140
Unmapping a zero grain has no effect (0x2 will not change to 0x1)
141
but unmapping an unallocated grain will (0x0 to 0x1) - naturally.
142
143
In order to implement seSparse some fields had to be changed to support
144
both 32-bit and 64-bit entry sizes.
145
146
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
147
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
148
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
149
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
150
Message-id: 20190620091057.47441-4-shmuel.eiderman@oracle.com
151
Signed-off-by: Max Reitz <mreitz@redhat.com>
152
---
153
block/vmdk.c | 358 ++++++++++++++++++++++++++++++++++++++++++++++++---
154
1 file changed, 342 insertions(+), 16 deletions(-)
155
156
diff --git a/block/vmdk.c b/block/vmdk.c
157
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
158
--- a/block/vmdk.c
39
--- a/block/io.c
159
+++ b/block/vmdk.c
40
+++ b/block/io.c
160
@@ -XXX,XX +XXX,XX @@ typedef struct {
41
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
161
uint16_t compressAlgorithm;
42
int64_t *map,
162
} QEMU_PACKED VMDK4Header;
43
BlockDriverState **file)
163
44
{
164
+typedef struct VMDKSESparseConstHeader {
45
+ int ret;
165
+ uint64_t magic;
46
BlockDriverState *p;
166
+ uint64_t version;
47
- int ret = 0;
167
+ uint64_t capacity;
48
- bool first = true;
168
+ uint64_t grain_size;
49
+ int64_t eof = 0;
169
+ uint64_t grain_table_size;
50
170
+ uint64_t flags;
51
assert(bs != base);
171
+ uint64_t reserved1;
52
- for (p = bs; p != base; p = bdrv_filter_or_cow_bs(p)) {
172
+ uint64_t reserved2;
173
+ uint64_t reserved3;
174
+ uint64_t reserved4;
175
+ uint64_t volatile_header_offset;
176
+ uint64_t volatile_header_size;
177
+ uint64_t journal_header_offset;
178
+ uint64_t journal_header_size;
179
+ uint64_t journal_offset;
180
+ uint64_t journal_size;
181
+ uint64_t grain_dir_offset;
182
+ uint64_t grain_dir_size;
183
+ uint64_t grain_tables_offset;
184
+ uint64_t grain_tables_size;
185
+ uint64_t free_bitmap_offset;
186
+ uint64_t free_bitmap_size;
187
+ uint64_t backmap_offset;
188
+ uint64_t backmap_size;
189
+ uint64_t grains_offset;
190
+ uint64_t grains_size;
191
+ uint8_t pad[304];
192
+} QEMU_PACKED VMDKSESparseConstHeader;
193
+
53
+
194
+typedef struct VMDKSESparseVolatileHeader {
54
+ ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
195
+ uint64_t magic;
55
+ if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED) {
196
+ uint64_t free_gt_number;
197
+ uint64_t next_txn_seq_number;
198
+ uint64_t replay_journal;
199
+ uint8_t pad[480];
200
+} QEMU_PACKED VMDKSESparseVolatileHeader;
201
+
202
#define L2_CACHE_SIZE 16
203
204
typedef struct VmdkExtent {
205
@@ -XXX,XX +XXX,XX @@ typedef struct VmdkExtent {
206
bool compressed;
207
bool has_marker;
208
bool has_zero_grain;
209
+ bool sesparse;
210
+ uint64_t sesparse_l2_tables_offset;
211
+ uint64_t sesparse_clusters_offset;
212
+ int32_t entry_size;
213
int version;
214
int64_t sectors;
215
int64_t end_sector;
216
int64_t flat_start_offset;
217
int64_t l1_table_offset;
218
int64_t l1_backup_table_offset;
219
- uint32_t *l1_table;
220
+ void *l1_table;
221
uint32_t *l1_backup_table;
222
unsigned int l1_size;
223
uint32_t l1_entry_sectors;
224
225
unsigned int l2_size;
226
- uint32_t *l2_cache;
227
+ void *l2_cache;
228
uint32_t l2_cache_offsets[L2_CACHE_SIZE];
229
uint32_t l2_cache_counts[L2_CACHE_SIZE];
230
231
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
232
* minimal L2 table size: 512 entries
233
* 8 TB is still more than the maximal value supported for
234
* VMDK3 & VMDK4 which is 2TB.
235
+ * 64TB - for "ESXi seSparse Extent"
236
+ * minimal cluster size: 512B (default is 4KB)
237
+ * L2 table size: 4096 entries (const).
238
+ * 64TB is more than the maximal value supported for
239
+ * seSparse VMDKs (which is slightly less than 64TB)
240
*/
241
error_setg(errp, "L1 size too big");
242
return -EFBIG;
243
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
244
extent->l2_size = l2_size;
245
extent->cluster_sectors = flat ? sectors : cluster_sectors;
246
extent->next_cluster_sector = ROUND_UP(nb_sectors, cluster_sectors);
247
+ extent->entry_size = sizeof(uint32_t);
248
249
if (s->num_extents > 1) {
250
extent->end_sector = (*(extent - 1)).end_sector + extent->sectors;
251
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
252
int i;
253
254
/* read the L1 table */
255
- l1_size = extent->l1_size * sizeof(uint32_t);
256
+ l1_size = extent->l1_size * extent->entry_size;
257
extent->l1_table = g_try_malloc(l1_size);
258
if (l1_size && extent->l1_table == NULL) {
259
return -ENOMEM;
260
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
261
goto fail_l1;
262
}
263
for (i = 0; i < extent->l1_size; i++) {
264
- le32_to_cpus(&extent->l1_table[i]);
265
+ if (extent->entry_size == sizeof(uint64_t)) {
266
+ le64_to_cpus((uint64_t *)extent->l1_table + i);
267
+ } else {
268
+ assert(extent->entry_size == sizeof(uint32_t));
269
+ le32_to_cpus((uint32_t *)extent->l1_table + i);
270
+ }
271
}
272
273
if (extent->l1_backup_table_offset) {
274
+ assert(!extent->sesparse);
275
extent->l1_backup_table = g_try_malloc(l1_size);
276
if (l1_size && extent->l1_backup_table == NULL) {
277
ret = -ENOMEM;
278
@@ -XXX,XX +XXX,XX @@ static int vmdk_init_tables(BlockDriverState *bs, VmdkExtent *extent,
279
}
280
281
extent->l2_cache =
282
- g_new(uint32_t, extent->l2_size * L2_CACHE_SIZE);
283
+ g_malloc(extent->entry_size * extent->l2_size * L2_CACHE_SIZE);
284
return 0;
285
fail_l1b:
286
g_free(extent->l1_backup_table);
287
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_vmfs_sparse(BlockDriverState *bs,
288
return ret;
289
}
290
291
+#define SESPARSE_CONST_HEADER_MAGIC UINT64_C(0x00000000cafebabe)
292
+#define SESPARSE_VOLATILE_HEADER_MAGIC UINT64_C(0x00000000cafecafe)
293
+
294
+/* Strict checks - format not officially documented */
295
+static int check_se_sparse_const_header(VMDKSESparseConstHeader *header,
296
+ Error **errp)
297
+{
298
+ header->magic = le64_to_cpu(header->magic);
299
+ header->version = le64_to_cpu(header->version);
300
+ header->grain_size = le64_to_cpu(header->grain_size);
301
+ header->grain_table_size = le64_to_cpu(header->grain_table_size);
302
+ header->flags = le64_to_cpu(header->flags);
303
+ header->reserved1 = le64_to_cpu(header->reserved1);
304
+ header->reserved2 = le64_to_cpu(header->reserved2);
305
+ header->reserved3 = le64_to_cpu(header->reserved3);
306
+ header->reserved4 = le64_to_cpu(header->reserved4);
307
+
308
+ header->volatile_header_offset =
309
+ le64_to_cpu(header->volatile_header_offset);
310
+ header->volatile_header_size = le64_to_cpu(header->volatile_header_size);
311
+
312
+ header->journal_header_offset = le64_to_cpu(header->journal_header_offset);
313
+ header->journal_header_size = le64_to_cpu(header->journal_header_size);
314
+
315
+ header->journal_offset = le64_to_cpu(header->journal_offset);
316
+ header->journal_size = le64_to_cpu(header->journal_size);
317
+
318
+ header->grain_dir_offset = le64_to_cpu(header->grain_dir_offset);
319
+ header->grain_dir_size = le64_to_cpu(header->grain_dir_size);
320
+
321
+ header->grain_tables_offset = le64_to_cpu(header->grain_tables_offset);
322
+ header->grain_tables_size = le64_to_cpu(header->grain_tables_size);
323
+
324
+ header->free_bitmap_offset = le64_to_cpu(header->free_bitmap_offset);
325
+ header->free_bitmap_size = le64_to_cpu(header->free_bitmap_size);
326
+
327
+ header->backmap_offset = le64_to_cpu(header->backmap_offset);
328
+ header->backmap_size = le64_to_cpu(header->backmap_size);
329
+
330
+ header->grains_offset = le64_to_cpu(header->grains_offset);
331
+ header->grains_size = le64_to_cpu(header->grains_size);
332
+
333
+ if (header->magic != SESPARSE_CONST_HEADER_MAGIC) {
334
+ error_setg(errp, "Bad const header magic: 0x%016" PRIx64,
335
+ header->magic);
336
+ return -EINVAL;
337
+ }
338
+
339
+ if (header->version != 0x0000000200000001) {
340
+ error_setg(errp, "Unsupported version: 0x%016" PRIx64,
341
+ header->version);
342
+ return -ENOTSUP;
343
+ }
344
+
345
+ if (header->grain_size != 8) {
346
+ error_setg(errp, "Unsupported grain size: %" PRIu64,
347
+ header->grain_size);
348
+ return -ENOTSUP;
349
+ }
350
+
351
+ if (header->grain_table_size != 64) {
352
+ error_setg(errp, "Unsupported grain table size: %" PRIu64,
353
+ header->grain_table_size);
354
+ return -ENOTSUP;
355
+ }
356
+
357
+ if (header->flags != 0) {
358
+ error_setg(errp, "Unsupported flags: 0x%016" PRIx64,
359
+ header->flags);
360
+ return -ENOTSUP;
361
+ }
362
+
363
+ if (header->reserved1 != 0 || header->reserved2 != 0 ||
364
+ header->reserved3 != 0 || header->reserved4 != 0) {
365
+ error_setg(errp, "Unsupported reserved bits:"
366
+ " 0x%016" PRIx64 " 0x%016" PRIx64
367
+ " 0x%016" PRIx64 " 0x%016" PRIx64,
368
+ header->reserved1, header->reserved2,
369
+ header->reserved3, header->reserved4);
370
+ return -ENOTSUP;
371
+ }
372
+
373
+ /* check that padding is 0 */
374
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
375
+ error_setg(errp, "Unsupported non-zero const header padding");
376
+ return -ENOTSUP;
377
+ }
378
+
379
+ return 0;
380
+}
381
+
382
+static int check_se_sparse_volatile_header(VMDKSESparseVolatileHeader *header,
383
+ Error **errp)
384
+{
385
+ header->magic = le64_to_cpu(header->magic);
386
+ header->free_gt_number = le64_to_cpu(header->free_gt_number);
387
+ header->next_txn_seq_number = le64_to_cpu(header->next_txn_seq_number);
388
+ header->replay_journal = le64_to_cpu(header->replay_journal);
389
+
390
+ if (header->magic != SESPARSE_VOLATILE_HEADER_MAGIC) {
391
+ error_setg(errp, "Bad volatile header magic: 0x%016" PRIx64,
392
+ header->magic);
393
+ return -EINVAL;
394
+ }
395
+
396
+ if (header->replay_journal) {
397
+ error_setg(errp, "Image is dirty, Replaying journal not supported");
398
+ return -ENOTSUP;
399
+ }
400
+
401
+ /* check that padding is 0 */
402
+ if (!buffer_is_zero(header->pad, sizeof(header->pad))) {
403
+ error_setg(errp, "Unsupported non-zero volatile header padding");
404
+ return -ENOTSUP;
405
+ }
406
+
407
+ return 0;
408
+}
409
+
410
+static int vmdk_open_se_sparse(BlockDriverState *bs,
411
+ BdrvChild *file,
412
+ int flags, Error **errp)
413
+{
414
+ int ret;
415
+ VMDKSESparseConstHeader const_header;
416
+ VMDKSESparseVolatileHeader volatile_header;
417
+ VmdkExtent *extent;
418
+
419
+ ret = bdrv_apply_auto_read_only(bs,
420
+ "No write support for seSparse images available", errp);
421
+ if (ret < 0) {
422
+ return ret;
56
+ return ret;
423
+ }
57
+ }
424
+
58
+
425
+ assert(sizeof(const_header) == SECTOR_SIZE);
59
+ if (ret & BDRV_BLOCK_EOF) {
426
+
60
+ eof = offset + *pnum;
427
+ ret = bdrv_pread(file, 0, &const_header, sizeof(const_header));
428
+ if (ret < 0) {
429
+ bdrv_refresh_filename(file->bs);
430
+ error_setg_errno(errp, -ret,
431
+ "Could not read const header from file '%s'",
432
+ file->bs->filename);
433
+ return ret;
434
+ }
61
+ }
435
+
62
+
436
+ /* check const header */
63
+ assert(*pnum <= bytes);
437
+ ret = check_se_sparse_const_header(&const_header, errp);
64
+ bytes = *pnum;
438
+ if (ret < 0) {
65
+
439
+ return ret;
66
+ for (p = bdrv_filter_or_cow_bs(bs); p != base;
67
+ p = bdrv_filter_or_cow_bs(p))
68
+ {
69
ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
70
file);
71
if (ret < 0) {
72
- break;
73
+ return ret;
74
}
75
- if (ret & BDRV_BLOCK_ZERO && ret & BDRV_BLOCK_EOF && !first) {
76
+ if (*pnum == 0) {
77
/*
78
- * Reading beyond the end of the file continues to read
79
- * zeroes, but we can only widen the result to the
80
- * unallocated length we learned from an earlier
81
- * iteration.
82
+ * The top layer deferred to this layer, and because this layer is
83
+ * short, any zeroes that we synthesize beyond EOF behave as if they
84
+ * were allocated at this layer.
85
+ *
86
+ * We don't include BDRV_BLOCK_EOF into ret, as upper layer may be
87
+ * larger. We'll add BDRV_BLOCK_EOF if needed at function end, see
88
+ * below.
89
*/
90
+ assert(ret & BDRV_BLOCK_EOF);
91
*pnum = bytes;
92
+ if (file) {
93
+ *file = p;
94
+ }
95
+ ret = BDRV_BLOCK_ZERO | BDRV_BLOCK_ALLOCATED;
96
+ break;
97
}
98
- if (ret & (BDRV_BLOCK_ZERO | BDRV_BLOCK_DATA)) {
99
+ if (ret & BDRV_BLOCK_ALLOCATED) {
100
+ /*
101
+ * We've found the node and the status, we must break.
102
+ *
103
+ * Drop BDRV_BLOCK_EOF, as it's not for upper layer, which may be
104
+ * larger. We'll add BDRV_BLOCK_EOF if needed at function end, see
105
+ * below.
106
+ */
107
+ ret &= ~BDRV_BLOCK_EOF;
108
break;
109
}
110
- /* [offset, pnum] unallocated on this layer, which could be only
111
- * the first part of [offset, bytes]. */
112
- bytes = MIN(bytes, *pnum);
113
- first = false;
114
+
115
+ /*
116
+ * OK, [offset, offset + *pnum) region is unallocated on this layer,
117
+ * let's continue the diving.
118
+ */
119
+ assert(*pnum <= bytes);
120
+ bytes = *pnum;
440
+ }
121
+ }
441
+
122
+
442
+ assert(sizeof(volatile_header) == SECTOR_SIZE);
123
+ if (offset + *pnum == eof) {
124
+ ret |= BDRV_BLOCK_EOF;
125
}
443
+
126
+
444
+ ret = bdrv_pread(file,
127
return ret;
445
+ const_header.volatile_header_offset * SECTOR_SIZE,
128
}
446
+ &volatile_header, sizeof(volatile_header));
129
447
+ if (ret < 0) {
130
diff --git a/block/qcow2.c b/block/qcow2.c
448
+ bdrv_refresh_filename(file->bs);
131
index XXXXXXX..XXXXXXX 100644
449
+ error_setg_errno(errp, -ret,
132
--- a/block/qcow2.c
450
+ "Could not read volatile header from file '%s'",
133
+++ b/block/qcow2.c
451
+ file->bs->filename);
134
@@ -XXX,XX +XXX,XX @@ static bool is_zero(BlockDriverState *bs, int64_t offset, int64_t bytes)
452
+ return ret;
135
if (!bytes) {
453
+ }
136
return true;
137
}
138
- res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
139
- return res >= 0 && (res & BDRV_BLOCK_ZERO) && nr == bytes;
454
+
140
+
455
+ /* check volatile header */
141
+ /*
456
+ ret = check_se_sparse_volatile_header(&volatile_header, errp);
142
+ * bdrv_block_status_above doesn't merge different types of zeros, for
457
+ if (ret < 0) {
143
+ * example, zeros which come from the region which is unallocated in
458
+ return ret;
144
+ * the whole backing chain, and zeros which come because of a short
459
+ }
145
+ * backing file. So, we need a loop.
146
+ */
147
+ do {
148
+ res = bdrv_block_status_above(bs, NULL, offset, bytes, &nr, NULL, NULL);
149
+ offset += nr;
150
+ bytes -= nr;
151
+ } while (res >= 0 && (res & BDRV_BLOCK_ZERO) && nr && bytes);
460
+
152
+
461
+ ret = vmdk_add_extent(bs, file, false,
153
+ return res >= 0 && (res & BDRV_BLOCK_ZERO) && bytes == 0;
462
+ const_header.capacity,
154
}
463
+ const_header.grain_dir_offset * SECTOR_SIZE,
155
464
+ 0,
156
static coroutine_fn int qcow2_co_pwrite_zeroes(BlockDriverState *bs,
465
+ const_header.grain_dir_size *
466
+ SECTOR_SIZE / sizeof(uint64_t),
467
+ const_header.grain_table_size *
468
+ SECTOR_SIZE / sizeof(uint64_t),
469
+ const_header.grain_size,
470
+ &extent,
471
+ errp);
472
+ if (ret < 0) {
473
+ return ret;
474
+ }
475
+
476
+ extent->sesparse = true;
477
+ extent->sesparse_l2_tables_offset = const_header.grain_tables_offset;
478
+ extent->sesparse_clusters_offset = const_header.grains_offset;
479
+ extent->entry_size = sizeof(uint64_t);
480
+
481
+ ret = vmdk_init_tables(bs, extent, errp);
482
+ if (ret) {
483
+ /* free extent allocated by vmdk_add_extent */
484
+ vmdk_free_last_extent(bs);
485
+ }
486
+
487
+ return ret;
488
+}
489
+
490
static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
491
QDict *options, Error **errp);
492
493
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
494
* RW [size in sectors] SPARSE "file-name.vmdk"
495
* RW [size in sectors] VMFS "file-name.vmdk"
496
* RW [size in sectors] VMFSSPARSE "file-name.vmdk"
497
+ * RW [size in sectors] SESPARSE "file-name.vmdk"
498
*/
499
flat_offset = -1;
500
matches = sscanf(p, "%10s %" SCNd64 " %10s \"%511[^\n\r\"]\" %" SCNd64,
501
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
502
503
if (sectors <= 0 ||
504
(strcmp(type, "FLAT") && strcmp(type, "SPARSE") &&
505
- strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE")) ||
506
+ strcmp(type, "VMFS") && strcmp(type, "VMFSSPARSE") &&
507
+ strcmp(type, "SESPARSE")) ||
508
(strcmp(access, "RW"))) {
509
continue;
510
}
511
@@ -XXX,XX +XXX,XX @@ static int vmdk_parse_extents(const char *desc, BlockDriverState *bs,
512
return ret;
513
}
514
extent = &s->extents[s->num_extents - 1];
515
+ } else if (!strcmp(type, "SESPARSE")) {
516
+ ret = vmdk_open_se_sparse(bs, extent_file, bs->open_flags, errp);
517
+ if (ret) {
518
+ bdrv_unref_child(bs, extent_file);
519
+ return ret;
520
+ }
521
+ extent = &s->extents[s->num_extents - 1];
522
} else {
523
error_setg(errp, "Unsupported extent type '%s'", type);
524
bdrv_unref_child(bs, extent_file);
525
@@ -XXX,XX +XXX,XX @@ static int vmdk_open_desc_file(BlockDriverState *bs, int flags, char *buf,
526
if (strcmp(ct, "monolithicFlat") &&
527
strcmp(ct, "vmfs") &&
528
strcmp(ct, "vmfsSparse") &&
529
+ strcmp(ct, "seSparse") &&
530
strcmp(ct, "twoGbMaxExtentSparse") &&
531
strcmp(ct, "twoGbMaxExtentFlat")) {
532
error_setg(errp, "Unsupported image type '%s'", ct);
533
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
534
{
535
unsigned int l1_index, l2_offset, l2_index;
536
int min_index, i, j;
537
- uint32_t min_count, *l2_table;
538
+ uint32_t min_count;
539
+ void *l2_table;
540
bool zeroed = false;
541
int64_t ret;
542
int64_t cluster_sector;
543
+ unsigned int l2_size_bytes = extent->l2_size * extent->entry_size;
544
545
if (m_data) {
546
m_data->valid = 0;
547
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
548
if (l1_index >= extent->l1_size) {
549
return VMDK_ERROR;
550
}
551
- l2_offset = extent->l1_table[l1_index];
552
+ if (extent->sesparse) {
553
+ uint64_t l2_offset_u64;
554
+
555
+ assert(extent->entry_size == sizeof(uint64_t));
556
+
557
+ l2_offset_u64 = ((uint64_t *)extent->l1_table)[l1_index];
558
+ if (l2_offset_u64 == 0) {
559
+ l2_offset = 0;
560
+ } else if ((l2_offset_u64 & 0xffffffff00000000) != 0x1000000000000000) {
561
+ /*
562
+ * Top most nibble is 0x1 if grain table is allocated.
563
+ * strict check - top most 4 bytes must be 0x10000000 since max
564
+ * supported size is 64TB for disk - so no more than 64TB / 16MB
565
+ * grain directories which is smaller than uint32,
566
+ * where 16MB is the only supported default grain table coverage.
567
+ */
568
+ return VMDK_ERROR;
569
+ } else {
570
+ l2_offset_u64 = l2_offset_u64 & 0x00000000ffffffff;
571
+ l2_offset_u64 = extent->sesparse_l2_tables_offset +
572
+ l2_offset_u64 * l2_size_bytes / SECTOR_SIZE;
573
+ if (l2_offset_u64 > 0x00000000ffffffff) {
574
+ return VMDK_ERROR;
575
+ }
576
+ l2_offset = (unsigned int)(l2_offset_u64);
577
+ }
578
+ } else {
579
+ assert(extent->entry_size == sizeof(uint32_t));
580
+ l2_offset = ((uint32_t *)extent->l1_table)[l1_index];
581
+ }
582
if (!l2_offset) {
583
return VMDK_UNALLOC;
584
}
585
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
586
extent->l2_cache_counts[j] >>= 1;
587
}
588
}
589
- l2_table = extent->l2_cache + (i * extent->l2_size);
590
+ l2_table = (char *)extent->l2_cache + (i * l2_size_bytes);
591
goto found;
592
}
593
}
594
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
595
min_index = i;
596
}
597
}
598
- l2_table = extent->l2_cache + (min_index * extent->l2_size);
599
+ l2_table = (char *)extent->l2_cache + (min_index * l2_size_bytes);
600
BLKDBG_EVENT(extent->file, BLKDBG_L2_LOAD);
601
if (bdrv_pread(extent->file,
602
(int64_t)l2_offset * 512,
603
l2_table,
604
- extent->l2_size * sizeof(uint32_t)
605
- ) != extent->l2_size * sizeof(uint32_t)) {
606
+ l2_size_bytes
607
+ ) != l2_size_bytes) {
608
return VMDK_ERROR;
609
}
610
611
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
612
extent->l2_cache_counts[min_index] = 1;
613
found:
614
l2_index = ((offset >> 9) / extent->cluster_sectors) % extent->l2_size;
615
- cluster_sector = le32_to_cpu(l2_table[l2_index]);
616
617
- if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
618
- zeroed = true;
619
+ if (extent->sesparse) {
620
+ cluster_sector = le64_to_cpu(((uint64_t *)l2_table)[l2_index]);
621
+ switch (cluster_sector & 0xf000000000000000) {
622
+ case 0x0000000000000000:
623
+ /* unallocated grain */
624
+ if (cluster_sector != 0) {
625
+ return VMDK_ERROR;
626
+ }
627
+ break;
628
+ case 0x1000000000000000:
629
+ /* scsi-unmapped grain - fallthrough */
630
+ case 0x2000000000000000:
631
+ /* zero grain */
632
+ zeroed = true;
633
+ break;
634
+ case 0x3000000000000000:
635
+ /* allocated grain */
636
+ cluster_sector = (((cluster_sector & 0x0fff000000000000) >> 48) |
637
+ ((cluster_sector & 0x0000ffffffffffff) << 12));
638
+ cluster_sector = extent->sesparse_clusters_offset +
639
+ cluster_sector * extent->cluster_sectors;
640
+ break;
641
+ default:
642
+ return VMDK_ERROR;
643
+ }
644
+ } else {
645
+ cluster_sector = le32_to_cpu(((uint32_t *)l2_table)[l2_index]);
646
+
647
+ if (extent->has_zero_grain && cluster_sector == VMDK_GTE_ZEROED) {
648
+ zeroed = true;
649
+ }
650
}
651
652
if (!cluster_sector || zeroed) {
653
if (!allocate) {
654
return zeroed ? VMDK_ZEROED : VMDK_UNALLOC;
655
}
656
+ assert(!extent->sesparse);
657
658
if (extent->next_cluster_sector >= VMDK_EXTENT_MAX_SECTORS) {
659
return VMDK_ERROR;
660
@@ -XXX,XX +XXX,XX @@ static int get_cluster_offset(BlockDriverState *bs,
661
m_data->l1_index = l1_index;
662
m_data->l2_index = l2_index;
663
m_data->l2_offset = l2_offset;
664
- m_data->l2_cache_entry = &l2_table[l2_index];
665
+ m_data->l2_cache_entry = ((uint32_t *)l2_table) + l2_index;
666
}
667
}
668
*cluster_offset = cluster_sector << BDRV_SECTOR_BITS;
669
@@ -XXX,XX +XXX,XX @@ static int vmdk_pwritev(BlockDriverState *bs, uint64_t offset,
670
if (!extent) {
671
return -EIO;
672
}
673
+ if (extent->sesparse) {
674
+ return -ENOTSUP;
675
+ }
676
offset_in_cluster = vmdk_find_offset_in_cluster(extent, offset);
677
n_bytes = MIN(bytes, extent->cluster_sectors * BDRV_SECTOR_SIZE
678
- offset_in_cluster);
679
--
157
--
680
2.21.0
158
2.26.2
681
159
682
diff view generated by jsdifflib
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
We forget to enable it for transaction .prepare, while it is already
3
In order to reuse bdrv_common_block_status_above in
4
enabled in do_drive_backup since commit a2d665c1bc362
4
bdrv_is_allocated_above, let's support include_base parameter.
5
"blockdev: loosen restrictions on drive-backup source node"
6
5
7
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
8
Message-id: 20190618140804.59214-1-vsementsov@virtuozzo.com
7
Reviewed-by: Alberto Garcia <berto@igalia.com>
9
Reviewed-by: John Snow <jsnow@redhat.com>
8
Reviewed-by: Eric Blake <eblake@redhat.com>
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
9
Message-id: 20200924194003.22080-3-vsementsov@virtuozzo.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
11
---
12
blockdev.c | 2 +-
12
block/coroutines.h | 2 ++
13
1 file changed, 1 insertion(+), 1 deletion(-)
13
block/io.c | 21 ++++++++++++++-------
14
2 files changed, 16 insertions(+), 7 deletions(-)
14
15
15
diff --git a/blockdev.c b/blockdev.c
16
diff --git a/block/coroutines.h b/block/coroutines.h
16
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
17
--- a/blockdev.c
18
--- a/block/coroutines.h
18
+++ b/blockdev.c
19
+++ b/block/coroutines.h
19
@@ -XXX,XX +XXX,XX @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
20
@@ -XXX,XX +XXX,XX @@ bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
20
assert(common->action->type == TRANSACTION_ACTION_KIND_DRIVE_BACKUP);
21
int coroutine_fn
21
backup = common->action->u.drive_backup.data;
22
bdrv_co_common_block_status_above(BlockDriverState *bs,
22
23
BlockDriverState *base,
23
- bs = qmp_get_root_bs(backup->device, errp);
24
+ bool include_base,
24
+ bs = bdrv_lookup_bs(backup->device, backup->device, errp);
25
bool want_zero,
25
if (!bs) {
26
int64_t offset,
26
return;
27
int64_t bytes,
28
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
29
int generated_co_wrapper
30
bdrv_common_block_status_above(BlockDriverState *bs,
31
BlockDriverState *base,
32
+ bool include_base,
33
bool want_zero,
34
int64_t offset,
35
int64_t bytes,
36
diff --git a/block/io.c b/block/io.c
37
index XXXXXXX..XXXXXXX 100644
38
--- a/block/io.c
39
+++ b/block/io.c
40
@@ -XXX,XX +XXX,XX @@ early_out:
41
int coroutine_fn
42
bdrv_co_common_block_status_above(BlockDriverState *bs,
43
BlockDriverState *base,
44
+ bool include_base,
45
bool want_zero,
46
int64_t offset,
47
int64_t bytes,
48
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
49
BlockDriverState *p;
50
int64_t eof = 0;
51
52
- assert(bs != base);
53
+ assert(include_base || bs != base);
54
+ assert(!include_base || base); /* Can't include NULL base */
55
56
ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
57
- if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED) {
58
+ if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
59
return ret;
60
}
61
62
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
63
assert(*pnum <= bytes);
64
bytes = *pnum;
65
66
- for (p = bdrv_filter_or_cow_bs(bs); p != base;
67
+ for (p = bdrv_filter_or_cow_bs(bs); include_base || p != base;
68
p = bdrv_filter_or_cow_bs(p))
69
{
70
ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
71
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
72
break;
73
}
74
75
+ if (p == base) {
76
+ assert(include_base);
77
+ break;
78
+ }
79
+
80
/*
81
* OK, [offset, offset + *pnum) region is unallocated on this layer,
82
* let's continue the diving.
83
@@ -XXX,XX +XXX,XX @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
84
int64_t offset, int64_t bytes, int64_t *pnum,
85
int64_t *map, BlockDriverState **file)
86
{
87
- return bdrv_common_block_status_above(bs, base, true, offset, bytes,
88
+ return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
89
pnum, map, file);
90
}
91
92
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
93
int ret;
94
int64_t dummy;
95
96
- ret = bdrv_common_block_status_above(bs, bdrv_filter_or_cow_bs(bs), false,
97
- offset, bytes, pnum ? pnum : &dummy,
98
- NULL, NULL);
99
+ ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
100
+ bytes, pnum ? pnum : &dummy, NULL,
101
+ NULL);
102
if (ret < 0) {
103
return ret;
27
}
104
}
28
--
105
--
29
2.21.0
106
2.26.2
30
107
31
diff view generated by jsdifflib
1
From: Sam Eiderman <shmuel.eiderman@oracle.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
512M of L1 entries is a very loose bound, only 32M are required to store
3
We are going to reuse bdrv_common_block_status_above in
4
the maximal supported VMDK file size of 2TB.
4
bdrv_is_allocated_above. bdrv_is_allocated_above may be called with
5
include_base == false and still bs == base (for ex. from img_rebase()).
5
6
6
Fixed qemu-iotest 59# - now failure occures before on impossible L1
7
So, support this corner case.
7
table size.
8
8
9
Reviewed-by: Karl Heubaum <karl.heubaum@oracle.com>
9
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
10
Reviewed-by: Eyal Moscovici <eyal.moscovici@oracle.com>
10
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
11
Reviewed-by: Liran Alon <liran.alon@oracle.com>
11
Reviewed-by: Eric Blake <eblake@redhat.com>
12
Reviewed-by: Arbel Moshe <arbel.moshe@oracle.com>
12
Reviewed-by: Alberto Garcia <berto@igalia.com>
13
Signed-off-by: Sam Eiderman <shmuel.eiderman@oracle.com>
13
Message-id: 20200924194003.22080-4-vsementsov@virtuozzo.com
14
Message-id: 20190620091057.47441-3-shmuel.eiderman@oracle.com
14
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
15
Reviewed-by: Max Reitz <mreitz@redhat.com>
16
Signed-off-by: Max Reitz <mreitz@redhat.com>
17
---
15
---
18
block/vmdk.c | 13 +++++++------
16
block/io.c | 6 +++++-
19
tests/qemu-iotests/059.out | 2 +-
17
1 file changed, 5 insertions(+), 1 deletion(-)
20
2 files changed, 8 insertions(+), 7 deletions(-)
21
18
22
diff --git a/block/vmdk.c b/block/vmdk.c
19
diff --git a/block/io.c b/block/io.c
23
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
24
--- a/block/vmdk.c
21
--- a/block/io.c
25
+++ b/block/vmdk.c
22
+++ b/block/io.c
26
@@ -XXX,XX +XXX,XX @@ static int vmdk_add_extent(BlockDriverState *bs,
23
@@ -XXX,XX +XXX,XX @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
27
error_setg(errp, "Invalid granularity, image may be corrupt");
24
BlockDriverState *p;
28
return -EFBIG;
25
int64_t eof = 0;
29
}
26
30
- if (l1_size > 512 * 1024 * 1024) {
27
- assert(include_base || bs != base);
31
+ if (l1_size > 32 * 1024 * 1024) {
28
assert(!include_base || base); /* Can't include NULL base */
32
/*
29
33
* Although with big capacity and small l1_entry_sectors, we can get a
30
+ if (!include_base && bs == base) {
34
* big l1_size, we don't want unbounded value to allocate the table.
31
+ *pnum = bytes;
35
- * Limit it to 512M, which is:
32
+ return 0;
36
- * 16PB - for default "Hosted Sparse Extent" (VMDK4)
33
+ }
37
- * cluster size: 64KB, L2 table size: 512 entries
34
+
38
- * 1PB - for default "ESXi Host Sparse Extent" (VMDK3/vmfsSparse)
35
ret = bdrv_co_block_status(bs, want_zero, offset, bytes, pnum, map, file);
39
- * cluster size: 512B, L2 table size: 4096 entries
36
if (ret < 0 || *pnum == 0 || ret & BDRV_BLOCK_ALLOCATED || bs == base) {
40
+ * Limit it to 32M, which is enough to store:
37
return ret;
41
+ * 8TB - for both VMDK3 & VMDK4 with
42
+ * minimal cluster size: 512B
43
+ * minimal L2 table size: 512 entries
44
+ * 8 TB is still more than the maximal value supported for
45
+ * VMDK3 & VMDK4 which is 2TB.
46
*/
47
error_setg(errp, "L1 size too big");
48
return -EFBIG;
49
diff --git a/tests/qemu-iotests/059.out b/tests/qemu-iotests/059.out
50
index XXXXXXX..XXXXXXX 100644
51
--- a/tests/qemu-iotests/059.out
52
+++ b/tests/qemu-iotests/059.out
53
@@ -XXX,XX +XXX,XX @@ Offset Length Mapped to File
54
0x140000000 0x10000 0x50000 TEST_DIR/t-s003.vmdk
55
56
=== Testing afl image with a very large capacity ===
57
-qemu-img: Can't get image size 'TEST_DIR/afl9.IMGFMT': File too large
58
+qemu-img: Could not open 'TEST_DIR/afl9.IMGFMT': L1 size too big
59
*** done
60
--
38
--
61
2.21.0
39
2.26.2
62
40
63
diff view generated by jsdifflib
1
From: Anton Nefedov <anton.nefedov@virtuozzo.com>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
COW (even empty/zero) areas require encryption too
3
bdrv_is_allocated_above wrongly handles short backing files: it reports
4
after-EOF space as UNALLOCATED which is wrong, as on read the data is
5
generated on the level of short backing file (if all overlays have
6
unallocated areas at that place).
4
7
5
Signed-off-by: Anton Nefedov <anton.nefedov@virtuozzo.com>
8
Reusing bdrv_common_block_status_above fixes the issue and unifies code
9
path.
10
11
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
12
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Max Reitz <mreitz@redhat.com>
8
Reviewed-by: Alberto Garcia <berto@igalia.com>
13
Reviewed-by: Alberto Garcia <berto@igalia.com>
9
Message-id: 20190516143028.81155-1-anton.nefedov@virtuozzo.com
14
Message-id: 20200924194003.22080-5-vsementsov@virtuozzo.com
10
Signed-off-by: Max Reitz <mreitz@redhat.com>
15
[Fix s/has/have/ as suggested by Eric Blake. Fix s/area/areas/.
16
--Stefan]
17
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
18
---
12
tests/qemu-iotests/134 | 9 +++++++++
19
block/io.c | 43 +++++--------------------------------------
13
tests/qemu-iotests/134.out | 10 ++++++++++
20
1 file changed, 5 insertions(+), 38 deletions(-)
14
2 files changed, 19 insertions(+)
15
21
16
diff --git a/tests/qemu-iotests/134 b/tests/qemu-iotests/134
22
diff --git a/block/io.c b/block/io.c
17
index XXXXXXX..XXXXXXX 100755
18
--- a/tests/qemu-iotests/134
19
+++ b/tests/qemu-iotests/134
20
@@ -XXX,XX +XXX,XX @@ echo
21
echo "== reading whole image =="
22
$QEMU_IO --object $SECRET -c "read 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
23
24
+echo
25
+echo "== rewriting cluster part =="
26
+$QEMU_IO --object $SECRET -c "write -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
27
+
28
+echo
29
+echo "== verify pattern =="
30
+$QEMU_IO --object $SECRET -c "read -P 0 0 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
31
+$QEMU_IO --object $SECRET -c "read -P 0xb 512 512" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
32
+
33
echo
34
echo "== rewriting whole image =="
35
$QEMU_IO --object $SECRET -c "write -P 0xa 0 $size" --image-opts $IMGSPEC | _filter_qemu_io | _filter_testdir
36
diff --git a/tests/qemu-iotests/134.out b/tests/qemu-iotests/134.out
37
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
38
--- a/tests/qemu-iotests/134.out
24
--- a/block/io.c
39
+++ b/tests/qemu-iotests/134.out
25
+++ b/block/io.c
40
@@ -XXX,XX +XXX,XX @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=134217728 encryption=on encrypt.
26
@@ -XXX,XX +XXX,XX @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
41
read 134217728/134217728 bytes at offset 0
27
* at 'offset + *pnum' may return the same allocation status (in other
42
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
28
* words, the result is not necessarily the maximum possible range);
43
29
* but 'pnum' will only be 0 when end of file is reached.
44
+== rewriting cluster part ==
30
- *
45
+wrote 512/512 bytes at offset 512
31
*/
46
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
32
int bdrv_is_allocated_above(BlockDriverState *top,
47
+
33
BlockDriverState *base,
48
+== verify pattern ==
34
bool include_base, int64_t offset,
49
+read 512/512 bytes at offset 0
35
int64_t bytes, int64_t *pnum)
50
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
36
{
51
+read 512/512 bytes at offset 512
37
- BlockDriverState *intermediate;
52
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
38
- int ret;
53
+
39
- int64_t n = bytes;
54
== rewriting whole image ==
40
-
55
wrote 134217728/134217728 bytes at offset 0
41
- assert(base || !include_base);
56
128 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
42
-
43
- intermediate = top;
44
- while (include_base || intermediate != base) {
45
- int64_t pnum_inter;
46
- int64_t size_inter;
47
-
48
- assert(intermediate);
49
- ret = bdrv_is_allocated(intermediate, offset, bytes, &pnum_inter);
50
- if (ret < 0) {
51
- return ret;
52
- }
53
- if (ret) {
54
- *pnum = pnum_inter;
55
- return 1;
56
- }
57
-
58
- size_inter = bdrv_getlength(intermediate);
59
- if (size_inter < 0) {
60
- return size_inter;
61
- }
62
- if (n > pnum_inter &&
63
- (intermediate == top || offset + pnum_inter < size_inter)) {
64
- n = pnum_inter;
65
- }
66
-
67
- if (intermediate == base) {
68
- break;
69
- }
70
-
71
- intermediate = bdrv_filter_or_cow_bs(intermediate);
72
+ int ret = bdrv_common_block_status_above(top, base, include_base, false,
73
+ offset, bytes, pnum, NULL, NULL);
74
+ if (ret < 0) {
75
+ return ret;
76
}
77
78
- *pnum = n;
79
- return 0;
80
+ return !!(ret & BDRV_BLOCK_ALLOCATED);
81
}
82
83
int coroutine_fn
57
--
84
--
58
2.21.0
85
2.26.2
59
86
60
diff view generated by jsdifflib
1
From: Klaus Birkelund Jensen <klaus@birkelund.eu>
1
From: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
2
2
3
The device mistakenly reports that the Weighted Round Robin with Urgent
3
These cases are fixed by previous patches around block_status and
4
Priority Class arbitration mechanism is supported.
4
is_allocated.
5
5
6
It is not.
6
Signed-off-by: Vladimir Sementsov-Ogievskiy <vsementsov@virtuozzo.com>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Alberto Garcia <berto@igalia.com>
9
Message-id: 20200924194003.22080-6-vsementsov@virtuozzo.com
10
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
11
---
12
tests/qemu-iotests/274 | 20 +++++++++++
13
tests/qemu-iotests/274.out | 68 ++++++++++++++++++++++++++++++++++++++
14
2 files changed, 88 insertions(+)
7
15
8
Signed-off-by: Klaus Birkelund Jensen <klaus.jensen@cnexlabs.com>
16
diff --git a/tests/qemu-iotests/274 b/tests/qemu-iotests/274
9
Message-id: 20190606092530.14206-1-klaus@birkelund.eu
17
index XXXXXXX..XXXXXXX 100755
10
Acked-by: Maxim Levitsky <mlevitsk@redhat.com>
18
--- a/tests/qemu-iotests/274
11
Signed-off-by: Max Reitz <mreitz@redhat.com>
19
+++ b/tests/qemu-iotests/274
12
---
20
@@ -XXX,XX +XXX,XX @@ with iotests.FilePath('base') as base, \
13
hw/block/nvme.c | 1 -
21
iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, mid)
14
1 file changed, 1 deletion(-)
22
iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), mid)
23
24
+ iotests.log('=== Testing qemu-img commit (top -> base) ===')
25
+
26
+ create_chain()
27
+ iotests.qemu_img_log('commit', '-b', base, top)
28
+ iotests.img_info_log(base)
29
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, base)
30
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), base)
31
+
32
+ iotests.log('=== Testing QMP active commit (top -> base) ===')
33
+
34
+ create_chain()
35
+ with create_vm() as vm:
36
+ vm.launch()
37
+ vm.qmp_log('block-commit', device='top', base_node='base',
38
+ job_id='job0', auto_dismiss=False)
39
+ vm.run_job('job0', wait=5)
40
+
41
+ iotests.img_info_log(mid)
42
+ iotests.qemu_io_log('-c', 'read -P 1 0 %d' % size_short, base)
43
+ iotests.qemu_io_log('-c', 'read -P 0 %d %d' % (size_short, size_diff), base)
44
45
iotests.log('== Resize tests ==')
46
47
diff --git a/tests/qemu-iotests/274.out b/tests/qemu-iotests/274.out
48
index XXXXXXX..XXXXXXX 100644
49
--- a/tests/qemu-iotests/274.out
50
+++ b/tests/qemu-iotests/274.out
51
@@ -XXX,XX +XXX,XX @@ read 1048576/1048576 bytes at offset 0
52
read 1048576/1048576 bytes at offset 1048576
53
1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
54
55
+=== Testing qemu-img commit (top -> base) ===
56
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16
57
+
58
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
59
+
60
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
61
+
62
+wrote 2097152/2097152 bytes at offset 0
63
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
64
+
65
+Image committed.
66
+
67
+image: TEST_IMG
68
+file format: IMGFMT
69
+virtual size: 2 MiB (2097152 bytes)
70
+cluster_size: 65536
71
+Format specific information:
72
+ compat: 1.1
73
+ compression type: zlib
74
+ lazy refcounts: false
75
+ refcount bits: 16
76
+ corrupt: false
77
+ extended l2: false
78
+
79
+read 1048576/1048576 bytes at offset 0
80
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
81
+
82
+read 1048576/1048576 bytes at offset 1048576
83
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
84
+
85
+=== Testing QMP active commit (top -> base) ===
86
+Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 lazy_refcounts=off refcount_bits=16
87
+
88
+Formatting 'TEST_DIR/PID-mid', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=1048576 backing_file=TEST_DIR/PID-base backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
89
+
90
+Formatting 'TEST_DIR/PID-top', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=2097152 backing_file=TEST_DIR/PID-mid backing_fmt=qcow2 lazy_refcounts=off refcount_bits=16
91
+
92
+wrote 2097152/2097152 bytes at offset 0
93
+2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
94
+
95
+{"execute": "block-commit", "arguments": {"auto-dismiss": false, "base-node": "base", "device": "top", "job-id": "job0"}}
96
+{"return": {}}
97
+{"execute": "job-complete", "arguments": {"id": "job0"}}
98
+{"return": {}}
99
+{"data": {"device": "job0", "len": 1048576, "offset": 1048576, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_READY", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
100
+{"data": {"device": "job0", "len": 1048576, "offset": 1048576, "speed": 0, "type": "commit"}, "event": "BLOCK_JOB_COMPLETED", "timestamp": {"microseconds": "USECS", "seconds": "SECS"}}
101
+{"execute": "job-dismiss", "arguments": {"id": "job0"}}
102
+{"return": {}}
103
+image: TEST_IMG
104
+file format: IMGFMT
105
+virtual size: 1 MiB (1048576 bytes)
106
+cluster_size: 65536
107
+backing file: TEST_DIR/PID-base
108
+backing file format: IMGFMT
109
+Format specific information:
110
+ compat: 1.1
111
+ compression type: zlib
112
+ lazy refcounts: false
113
+ refcount bits: 16
114
+ corrupt: false
115
+ extended l2: false
116
+
117
+read 1048576/1048576 bytes at offset 0
118
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
119
+
120
+read 1048576/1048576 bytes at offset 1048576
121
+1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
122
+
123
== Resize tests ==
124
=== preallocation=off ===
125
Formatting 'TEST_DIR/PID-base', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=6442450944 lazy_refcounts=off refcount_bits=16
126
--
127
2.26.2
15
128
16
diff --git a/hw/block/nvme.c b/hw/block/nvme.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/block/nvme.c
19
+++ b/hw/block/nvme.c
20
@@ -XXX,XX +XXX,XX @@ static void nvme_realize(PCIDevice *pci_dev, Error **errp)
21
n->bar.cap = 0;
22
NVME_CAP_SET_MQES(n->bar.cap, 0x7ff);
23
NVME_CAP_SET_CQR(n->bar.cap, 1);
24
- NVME_CAP_SET_AMS(n->bar.cap, 1);
25
NVME_CAP_SET_TO(n->bar.cap, 0xf);
26
NVME_CAP_SET_CSS(n->bar.cap, 1);
27
NVME_CAP_SET_MPSMAX(n->bar.cap, 4);
28
--
29
2.21.0
30
31
diff view generated by jsdifflib