1
The following changes since commit 136c67e07869227b21b3f627316e03679ce7b738:
1
The following changes since commit 352998df1c53b366413690d95b35f76d0721ebed:
2
2
3
Merge remote-tracking branch 'remotes/bkoppelmann/tags/pull-tricore-2018-03-02' into staging (2018-03-02 16:56:20 +0000)
3
Merge tag 'i2c-20220314' of https://github.com/philmd/qemu into staging (2022-03-14 14:39:33 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 46d4d36d0bf2b24b205f2f604f0905db80264eef:
9
for you to fetch changes up to 12a195fa343aae2ead1301ce04727bd0ae25eb15:
10
10
11
tap: setting error appropriately when calling net_init_tap_one() (2018-03-05 10:30:16 +0800)
11
vdpa: Expose VHOST_F_LOG_ALL on SVQ (2022-03-15 13:57:44 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
Changes since V2:
16
- fix 32bit build errros
17
15
----------------------------------------------------------------
18
----------------------------------------------------------------
16
Jay Zhou (1):
19
Eugenio Pérez (14):
17
tap: setting error appropriately when calling net_init_tap_one()
20
vhost: Add VhostShadowVirtqueue
21
vhost: Add Shadow VirtQueue kick forwarding capabilities
22
vhost: Add Shadow VirtQueue call forwarding capabilities
23
vhost: Add vhost_svq_valid_features to shadow vq
24
virtio: Add vhost_svq_get_vring_addr
25
vdpa: adapt vhost_ops callbacks to svq
26
vhost: Shadow virtqueue buffers forwarding
27
util: Add iova_tree_alloc_map
28
util: add iova_tree_find_iova
29
vhost: Add VhostIOVATree
30
vdpa: Add custom IOTLB translations to SVQ
31
vdpa: Adapt vhost_vdpa_get_vring_base to SVQ
32
vdpa: Never set log_base addr if SVQ is enabled
33
vdpa: Expose VHOST_F_LOG_ALL on SVQ
18
34
19
Thomas Huth (8):
35
Jason Wang (1):
20
net: Move error reporting from net_init_client/netdev to the calling site
36
virtio-net: fix map leaking on error during receive
21
net: List available netdevs with "-netdev help"
22
net: Only show vhost-user in the help text if CONFIG_POSIX is defined
23
net: Make net_client_init() static
24
net: Remove the deprecated way of dumping network packets
25
net: Remove the deprecated 'host_net_add' and 'host_net_remove' HMP commands
26
net: Add a new convenience option "--nic" to configure default/on-board NICs
27
hw/net: Remove unnecessary header includes
28
37
29
hmp-commands.hx | 30 ------
38
hw/net/virtio-net.c | 1 +
30
hmp.h | 3 -
39
hw/virtio/meson.build | 2 +-
31
hw/net/e1000.c | 1 -
40
hw/virtio/vhost-iova-tree.c | 110 +++++++
32
hw/net/lance.c | 3 -
41
hw/virtio/vhost-iova-tree.h | 27 ++
33
hw/net/ne2000.c | 2 -
42
hw/virtio/vhost-shadow-virtqueue.c | 636 +++++++++++++++++++++++++++++++++++++
34
hw/net/pcnet-pci.c | 1 -
43
hw/virtio/vhost-shadow-virtqueue.h | 87 +++++
35
hw/net/pcnet.c | 1 -
44
hw/virtio/vhost-vdpa.c | 522 +++++++++++++++++++++++++++++-
36
hw/net/rtl8139.c | 2 -
45
include/hw/virtio/vhost-vdpa.h | 8 +
37
hw/net/xgmac.c | 1 -
46
include/qemu/iova-tree.h | 38 ++-
38
include/net/net.h | 4 +-
47
util/iova-tree.c | 170 ++++++++++
39
include/net/vhost_net.h | 3 +
48
10 files changed, 1584 insertions(+), 17 deletions(-)
40
include/sysemu/sysemu.h | 1 +
49
create mode 100644 hw/virtio/vhost-iova-tree.c
41
monitor.c | 61 ------------
50
create mode 100644 hw/virtio/vhost-iova-tree.h
42
net/dump.c | 102 +--------------------
51
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
43
net/net.c | 239 +++++++++++++++++++++++-------------------------
52
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
44
net/tap.c | 22 ++++-
45
qapi/net.json | 29 ++----
46
qemu-doc.texi | 16 ----
47
qemu-options.hx | 48 +++++++---
48
tests/test-hmp.c | 2 -
49
vl.c | 10 +-
50
21 files changed, 190 insertions(+), 391 deletions(-)
51
53
52
54
55
diff view generated by jsdifflib
New patch
1
Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
2
tries to fix the use after free of the sg by caching the virtqueue
3
elements in an array and unmap them at once after receiving the
4
packets, But it forgot to unmap the cached elements on error which
5
will lead to leaking of mapping and other unexpected results.
1
6
7
Fixing this by detaching the cached elements on error. This addresses
8
CVE-2022-26353.
9
10
Reported-by: Victor Tom <vv474172261@gmail.com>
11
Cc: qemu-stable@nongnu.org
12
Fixes: CVE-2022-26353
13
Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
14
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
17
hw/net/virtio-net.c | 1 +
18
1 file changed, 1 insertion(+)
19
20
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/net/virtio-net.c
23
+++ b/hw/net/virtio-net.c
24
@@ -XXX,XX +XXX,XX @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
25
26
err:
27
for (j = 0; j < i; j++) {
28
+ virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
29
g_free(elems[j]);
30
}
31
32
--
33
2.7.4
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
Vhost shadow virtqueue (SVQ) is an intermediate jump for virtqueue
4
notifications and buffers, allowing qemu to track them. While qemu is
5
forwarding the buffers and virtqueue changes, it is able to commit the
6
memory it's being dirtied, the same way regular qemu's VirtIO devices
7
do.
8
9
This commit only exposes basic SVQ allocation and free. Next patches of
10
the series add functionality like notifications and buffers forwarding.
11
12
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
hw/virtio/meson.build | 2 +-
17
hw/virtio/vhost-shadow-virtqueue.c | 62 ++++++++++++++++++++++++++++++++++++++
18
hw/virtio/vhost-shadow-virtqueue.h | 28 +++++++++++++++++
19
3 files changed, 91 insertions(+), 1 deletion(-)
20
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
21
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
22
23
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
24
index XXXXXXX..XXXXXXX 100644
25
--- a/hw/virtio/meson.build
26
+++ b/hw/virtio/meson.build
27
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
28
29
virtio_ss = ss.source_set()
30
virtio_ss.add(files('virtio.c'))
31
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c'))
32
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
33
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
34
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
35
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
36
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
37
new file mode 100644
38
index XXXXXXX..XXXXXXX
39
--- /dev/null
40
+++ b/hw/virtio/vhost-shadow-virtqueue.c
41
@@ -XXX,XX +XXX,XX @@
42
+/*
43
+ * vhost shadow virtqueue
44
+ *
45
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
46
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
47
+ *
48
+ * SPDX-License-Identifier: GPL-2.0-or-later
49
+ */
50
+
51
+#include "qemu/osdep.h"
52
+#include "hw/virtio/vhost-shadow-virtqueue.h"
53
+
54
+#include "qemu/error-report.h"
55
+
56
+/**
57
+ * Creates vhost shadow virtqueue, and instructs the vhost device to use the
58
+ * shadow methods and file descriptors.
59
+ *
60
+ * Returns the new virtqueue or NULL.
61
+ *
62
+ * In case of error, reason is reported through error_report.
63
+ */
64
+VhostShadowVirtqueue *vhost_svq_new(void)
65
+{
66
+ g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
67
+ int r;
68
+
69
+ r = event_notifier_init(&svq->hdev_kick, 0);
70
+ if (r != 0) {
71
+ error_report("Couldn't create kick event notifier: %s (%d)",
72
+ g_strerror(errno), errno);
73
+ goto err_init_hdev_kick;
74
+ }
75
+
76
+ r = event_notifier_init(&svq->hdev_call, 0);
77
+ if (r != 0) {
78
+ error_report("Couldn't create call event notifier: %s (%d)",
79
+ g_strerror(errno), errno);
80
+ goto err_init_hdev_call;
81
+ }
82
+
83
+ return g_steal_pointer(&svq);
84
+
85
+err_init_hdev_call:
86
+ event_notifier_cleanup(&svq->hdev_kick);
87
+
88
+err_init_hdev_kick:
89
+ return NULL;
90
+}
91
+
92
+/**
93
+ * Free the resources of the shadow virtqueue.
94
+ *
95
+ * @pvq: gpointer to SVQ so it can be used by autofree functions.
96
+ */
97
+void vhost_svq_free(gpointer pvq)
98
+{
99
+ VhostShadowVirtqueue *vq = pvq;
100
+ event_notifier_cleanup(&vq->hdev_kick);
101
+ event_notifier_cleanup(&vq->hdev_call);
102
+ g_free(vq);
103
+}
104
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
105
new file mode 100644
106
index XXXXXXX..XXXXXXX
107
--- /dev/null
108
+++ b/hw/virtio/vhost-shadow-virtqueue.h
109
@@ -XXX,XX +XXX,XX @@
110
+/*
111
+ * vhost shadow virtqueue
112
+ *
113
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
114
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
115
+ *
116
+ * SPDX-License-Identifier: GPL-2.0-or-later
117
+ */
118
+
119
+#ifndef VHOST_SHADOW_VIRTQUEUE_H
120
+#define VHOST_SHADOW_VIRTQUEUE_H
121
+
122
+#include "qemu/event_notifier.h"
123
+
124
+/* Shadow virtqueue to relay notifications */
125
+typedef struct VhostShadowVirtqueue {
126
+ /* Shadow kick notifier, sent to vhost */
127
+ EventNotifier hdev_kick;
128
+ /* Shadow call notifier, sent to vhost */
129
+ EventNotifier hdev_call;
130
+} VhostShadowVirtqueue;
131
+
132
+VhostShadowVirtqueue *vhost_svq_new(void);
133
+
134
+void vhost_svq_free(gpointer vq);
135
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
136
+
137
+#endif
138
--
139
2.7.4
140
141
diff view generated by jsdifflib
New patch
1
1
From: Eugenio Pérez <eperezma@redhat.com>
2
3
At this mode no buffer forwarding will be performed in SVQ mode: Qemu
4
will just forward the guest's kicks to the device.
5
6
Host memory notifiers regions are left out for simplicity, and they will
7
not be addressed in this series.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/virtio/vhost-shadow-virtqueue.c | 55 ++++++++++++++
14
hw/virtio/vhost-shadow-virtqueue.h | 14 ++++
15
hw/virtio/vhost-vdpa.c | 144 ++++++++++++++++++++++++++++++++++++-
16
include/hw/virtio/vhost-vdpa.h | 4 ++
17
4 files changed, 215 insertions(+), 2 deletions(-)
18
19
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/hw/virtio/vhost-shadow-virtqueue.c
23
@@ -XXX,XX +XXX,XX @@
24
#include "hw/virtio/vhost-shadow-virtqueue.h"
25
26
#include "qemu/error-report.h"
27
+#include "qemu/main-loop.h"
28
+#include "linux-headers/linux/vhost.h"
29
+
30
+/**
31
+ * Forward guest notifications.
32
+ *
33
+ * @n: guest kick event notifier, the one that guest set to notify svq.
34
+ */
35
+static void vhost_handle_guest_kick(EventNotifier *n)
36
+{
37
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
38
+ event_notifier_test_and_clear(n);
39
+ event_notifier_set(&svq->hdev_kick);
40
+}
41
+
42
+/**
43
+ * Set a new file descriptor for the guest to kick the SVQ and notify for avail
44
+ *
45
+ * @svq: The svq
46
+ * @svq_kick_fd: The svq kick fd
47
+ *
48
+ * Note that the SVQ will never close the old file descriptor.
49
+ */
50
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
51
+{
52
+ EventNotifier *svq_kick = &svq->svq_kick;
53
+ bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick);
54
+ bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND;
55
+
56
+ if (poll_stop) {
57
+ event_notifier_set_handler(svq_kick, NULL);
58
+ }
59
+
60
+ /*
61
+ * event_notifier_set_handler already checks for guest's notifications if
62
+ * they arrive at the new file descriptor in the switch, so there is no
63
+ * need to explicitly check for them.
64
+ */
65
+ if (poll_start) {
66
+ event_notifier_init_fd(svq_kick, svq_kick_fd);
67
+ event_notifier_set(svq_kick);
68
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
69
+ }
70
+}
71
+
72
+/**
73
+ * Stop the shadow virtqueue operation.
74
+ * @svq: Shadow Virtqueue
75
+ */
76
+void vhost_svq_stop(VhostShadowVirtqueue *svq)
77
+{
78
+ event_notifier_set_handler(&svq->svq_kick, NULL);
79
+}
80
81
/**
82
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
83
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
84
goto err_init_hdev_call;
85
}
86
87
+ event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
88
return g_steal_pointer(&svq);
89
90
err_init_hdev_call:
91
@@ -XXX,XX +XXX,XX @@ err_init_hdev_kick:
92
void vhost_svq_free(gpointer pvq)
93
{
94
VhostShadowVirtqueue *vq = pvq;
95
+ vhost_svq_stop(vq);
96
event_notifier_cleanup(&vq->hdev_kick);
97
event_notifier_cleanup(&vq->hdev_call);
98
g_free(vq);
99
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/hw/virtio/vhost-shadow-virtqueue.h
102
+++ b/hw/virtio/vhost-shadow-virtqueue.h
103
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
104
EventNotifier hdev_kick;
105
/* Shadow call notifier, sent to vhost */
106
EventNotifier hdev_call;
107
+
108
+ /*
109
+ * Borrowed virtqueue's guest to host notifier. To borrow it in this event
110
+ * notifier allows to recover the VhostShadowVirtqueue from the event loop
111
+ * easily. If we use the VirtQueue's one, we don't have an easy way to
112
+ * retrieve VhostShadowVirtqueue.
113
+ *
114
+ * So shadow virtqueue must not clean it, or we would lose VirtQueue one.
115
+ */
116
+ EventNotifier svq_kick;
117
} VhostShadowVirtqueue;
118
119
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
120
+
121
+void vhost_svq_stop(VhostShadowVirtqueue *svq);
122
+
123
VhostShadowVirtqueue *vhost_svq_new(void);
124
125
void vhost_svq_free(gpointer vq);
126
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/hw/virtio/vhost-vdpa.c
129
+++ b/hw/virtio/vhost-vdpa.c
130
@@ -XXX,XX +XXX,XX @@
131
#include "hw/virtio/vhost.h"
132
#include "hw/virtio/vhost-backend.h"
133
#include "hw/virtio/virtio-net.h"
134
+#include "hw/virtio/vhost-shadow-virtqueue.h"
135
#include "hw/virtio/vhost-vdpa.h"
136
#include "exec/address-spaces.h"
137
#include "qemu/main-loop.h"
138
#include "cpu.h"
139
#include "trace.h"
140
#include "qemu-common.h"
141
+#include "qapi/error.h"
142
143
/*
144
* Return one past the end of the end of section. Be careful with uint64_t
145
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
146
return v->index != 0;
147
}
148
149
+static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
150
+ Error **errp)
151
+{
152
+ g_autoptr(GPtrArray) shadow_vqs = NULL;
153
+
154
+ if (!v->shadow_vqs_enabled) {
155
+ return 0;
156
+ }
157
+
158
+ shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
159
+ for (unsigned n = 0; n < hdev->nvqs; ++n) {
160
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
161
+
162
+ if (unlikely(!svq)) {
163
+ error_setg(errp, "Cannot create svq %u", n);
164
+ return -1;
165
+ }
166
+ g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq));
167
+ }
168
+
169
+ v->shadow_vqs = g_steal_pointer(&shadow_vqs);
170
+ return 0;
171
+}
172
+
173
static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
174
{
175
struct vhost_vdpa *v;
176
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
177
dev->opaque = opaque ;
178
v->listener = vhost_vdpa_memory_listener;
179
v->msg_type = VHOST_IOTLB_MSG_V2;
180
+ ret = vhost_vdpa_init_svq(dev, v, errp);
181
+ if (ret) {
182
+ goto err;
183
+ }
184
185
vhost_vdpa_get_iova_range(v);
186
187
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
188
VIRTIO_CONFIG_S_DRIVER);
189
190
return 0;
191
+
192
+err:
193
+ ram_block_discard_disable(false);
194
+ return ret;
195
}
196
197
static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
198
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
199
200
static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
201
{
202
+ struct vhost_vdpa *v = dev->opaque;
203
int i;
204
205
+ if (v->shadow_vqs_enabled) {
206
+ /* FIXME SVQ is not compatible with host notifiers mr */
207
+ return;
208
+ }
209
+
210
for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
211
if (vhost_vdpa_host_notifier_init(dev, i)) {
212
goto err;
213
@@ -XXX,XX +XXX,XX @@ err:
214
return;
215
}
216
217
+static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
218
+{
219
+ struct vhost_vdpa *v = dev->opaque;
220
+ size_t idx;
221
+
222
+ if (!v->shadow_vqs) {
223
+ return;
224
+ }
225
+
226
+ for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
227
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
228
+ }
229
+ g_ptr_array_free(v->shadow_vqs, true);
230
+}
231
+
232
static int vhost_vdpa_cleanup(struct vhost_dev *dev)
233
{
234
struct vhost_vdpa *v;
235
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
236
trace_vhost_vdpa_cleanup(dev, v);
237
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
238
memory_listener_unregister(&v->listener);
239
+ vhost_vdpa_svq_cleanup(dev);
240
241
dev->opaque = NULL;
242
ram_block_discard_disable(false);
243
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
244
return ret;
245
}
246
247
+static void vhost_vdpa_reset_svq(struct vhost_vdpa *v)
248
+{
249
+ if (!v->shadow_vqs_enabled) {
250
+ return;
251
+ }
252
+
253
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
254
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
255
+ vhost_svq_stop(svq);
256
+ }
257
+}
258
+
259
static int vhost_vdpa_reset_device(struct vhost_dev *dev)
260
{
261
+ struct vhost_vdpa *v = dev->opaque;
262
int ret;
263
uint8_t status = 0;
264
265
+ vhost_vdpa_reset_svq(v);
266
+
267
ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
268
trace_vhost_vdpa_reset_device(dev, status);
269
return ret;
270
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
271
return ret;
272
}
273
274
+static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
275
+ struct vhost_vring_file *file)
276
+{
277
+ trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
278
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
279
+}
280
+
281
+/**
282
+ * Set the shadow virtqueue descriptors to the device
283
+ *
284
+ * @dev: The vhost device model
285
+ * @svq: The shadow virtqueue
286
+ * @idx: The index of the virtqueue in the vhost device
287
+ * @errp: Error
288
+ */
289
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
290
+ VhostShadowVirtqueue *svq, unsigned idx,
291
+ Error **errp)
292
+{
293
+ struct vhost_vring_file file = {
294
+ .index = dev->vq_index + idx,
295
+ };
296
+ const EventNotifier *event_notifier = &svq->hdev_kick;
297
+ int r;
298
+
299
+ file.fd = event_notifier_get_fd(event_notifier);
300
+ r = vhost_vdpa_set_vring_dev_kick(dev, &file);
301
+ if (unlikely(r != 0)) {
302
+ error_setg_errno(errp, -r, "Can't set device kick fd");
303
+ }
304
+
305
+ return r == 0;
306
+}
307
+
308
+static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
309
+{
310
+ struct vhost_vdpa *v = dev->opaque;
311
+ Error *err = NULL;
312
+ unsigned i;
313
+
314
+ if (!v->shadow_vqs) {
315
+ return true;
316
+ }
317
+
318
+ for (i = 0; i < v->shadow_vqs->len; ++i) {
319
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
320
+ bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
321
+ if (unlikely(!ok)) {
322
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
323
+ return false;
324
+ }
325
+ }
326
+
327
+ return true;
328
+}
329
+
330
static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
331
{
332
struct vhost_vdpa *v = dev->opaque;
333
+ bool ok;
334
trace_vhost_vdpa_dev_start(dev, started);
335
336
if (started) {
337
vhost_vdpa_host_notifiers_init(dev);
338
+ ok = vhost_vdpa_svqs_start(dev);
339
+ if (unlikely(!ok)) {
340
+ return -1;
341
+ }
342
vhost_vdpa_set_vring_ready(dev);
343
} else {
344
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
345
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
346
static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
347
struct vhost_vring_file *file)
348
{
349
- trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
350
- return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
351
+ struct vhost_vdpa *v = dev->opaque;
352
+ int vdpa_idx = file->index - dev->vq_index;
353
+
354
+ if (v->shadow_vqs_enabled) {
355
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
356
+ vhost_svq_set_svq_kick_fd(svq, file->fd);
357
+ return 0;
358
+ } else {
359
+ return vhost_vdpa_set_vring_dev_kick(dev, file);
360
+ }
361
}
362
363
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
364
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
365
index XXXXXXX..XXXXXXX 100644
366
--- a/include/hw/virtio/vhost-vdpa.h
367
+++ b/include/hw/virtio/vhost-vdpa.h
368
@@ -XXX,XX +XXX,XX @@
369
#ifndef HW_VIRTIO_VHOST_VDPA_H
370
#define HW_VIRTIO_VHOST_VDPA_H
371
372
+#include <gmodule.h>
373
+
374
#include "hw/virtio/virtio.h"
375
#include "standard-headers/linux/vhost_types.h"
376
377
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
378
bool iotlb_batch_begin_sent;
379
MemoryListener listener;
380
struct vhost_vdpa_iova_range iova_range;
381
+ bool shadow_vqs_enabled;
382
+ GPtrArray *shadow_vqs;
383
struct vhost_dev *dev;
384
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
385
} VhostVDPA;
386
--
387
2.7.4
388
389
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This will make qemu aware of the device used buffers, allowing it to
4
write the guest memory with its contents if needed.
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Acked-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 38 ++++++++++++++++++++++++++++++++++++++
11
hw/virtio/vhost-shadow-virtqueue.h | 4 ++++
12
hw/virtio/vhost-vdpa.c | 31 +++++++++++++++++++++++++++++--
13
3 files changed, 71 insertions(+), 2 deletions(-)
14
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
19
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(EventNotifier *n)
20
}
21
22
/**
23
+ * Forward vhost notifications
24
+ *
25
+ * @n: hdev call event notifier, the one that device set to notify svq.
26
+ */
27
+static void vhost_svq_handle_call(EventNotifier *n)
28
+{
29
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
30
+ hdev_call);
31
+ event_notifier_test_and_clear(n);
32
+ event_notifier_set(&svq->svq_call);
33
+}
34
+
35
+/**
36
+ * Set the call notifier for the SVQ to call the guest
37
+ *
38
+ * @svq: Shadow virtqueue
39
+ * @call_fd: call notifier
40
+ *
41
+ * Called on BQL context.
42
+ */
43
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
44
+{
45
+ if (call_fd == VHOST_FILE_UNBIND) {
46
+ /*
47
+ * Fail event_notifier_set if called handling device call.
48
+ *
49
+ * SVQ still needs device notifications, since it needs to keep
50
+ * forwarding used buffers even with the unbind.
51
+ */
52
+ memset(&svq->svq_call, 0, sizeof(svq->svq_call));
53
+ } else {
54
+ event_notifier_init_fd(&svq->svq_call, call_fd);
55
+ }
56
+}
57
+
58
+/**
59
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
60
*
61
* @svq: The svq
62
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
63
}
64
65
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
66
+ event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
67
return g_steal_pointer(&svq);
68
69
err_init_hdev_call:
70
@@ -XXX,XX +XXX,XX @@ void vhost_svq_free(gpointer pvq)
71
VhostShadowVirtqueue *vq = pvq;
72
vhost_svq_stop(vq);
73
event_notifier_cleanup(&vq->hdev_kick);
74
+ event_notifier_set_handler(&vq->hdev_call, NULL);
75
event_notifier_cleanup(&vq->hdev_call);
76
g_free(vq);
77
}
78
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
79
index XXXXXXX..XXXXXXX 100644
80
--- a/hw/virtio/vhost-shadow-virtqueue.h
81
+++ b/hw/virtio/vhost-shadow-virtqueue.h
82
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
83
* So shadow virtqueue must not clean it, or we would lose VirtQueue one.
84
*/
85
EventNotifier svq_kick;
86
+
87
+ /* Guest's call notifier, where the SVQ calls guest. */
88
+ EventNotifier svq_call;
89
} VhostShadowVirtqueue;
90
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
void vhost_svq_stop(VhostShadowVirtqueue *svq);
95
96
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/hw/virtio/vhost-vdpa.c
99
+++ b/hw/virtio/vhost-vdpa.c
100
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
101
return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
102
}
103
104
+static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
105
+ struct vhost_vring_file *file)
106
+{
107
+ trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
108
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
109
+}
110
+
111
/**
112
* Set the shadow virtqueue descriptors to the device
113
*
114
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
115
* @svq: The shadow virtqueue
116
* @idx: The index of the virtqueue in the vhost device
117
* @errp: Error
118
+ *
119
+ * Note that this function does not rewind kick file descriptor if cannot set
120
+ * call one.
121
*/
122
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
123
VhostShadowVirtqueue *svq, unsigned idx,
124
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
125
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
126
if (unlikely(r != 0)) {
127
error_setg_errno(errp, -r, "Can't set device kick fd");
128
+ return false;
129
+ }
130
+
131
+ event_notifier = &svq->hdev_call;
132
+ file.fd = event_notifier_get_fd(event_notifier);
133
+ r = vhost_vdpa_set_vring_dev_call(dev, &file);
134
+ if (unlikely(r != 0)) {
135
+ error_setg_errno(errp, -r, "Can't set device call fd");
136
}
137
138
return r == 0;
139
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
140
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
141
struct vhost_vring_file *file)
142
{
143
- trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
144
- return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
145
+ struct vhost_vdpa *v = dev->opaque;
146
+
147
+ if (v->shadow_vqs_enabled) {
148
+ int vdpa_idx = file->index - dev->vq_index;
149
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
150
+
151
+ vhost_svq_set_svq_call_fd(svq, file->fd);
152
+ return 0;
153
+ } else {
154
+ return vhost_vdpa_set_vring_dev_call(dev, file);
155
+ }
156
}
157
158
static int vhost_vdpa_get_features(struct vhost_dev *dev,
159
--
160
2.7.4
161
162
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This allows SVQ to negotiate features with the guest and the device. For
4
the device, SVQ is a driver. While this function bypasses all
5
non-transport features, it needs to disable the features that SVQ does
6
not support when forwarding buffers. This includes packed vq layout,
7
indirect descriptors or event idx.
8
9
Future changes can add support to offer more features to the guest,
10
since the use of VirtQueue gives this for free. This is left out at the
11
moment for simplicity.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
17
hw/virtio/vhost-shadow-virtqueue.c | 44 ++++++++++++++++++++++++++++++++++++++
18
hw/virtio/vhost-shadow-virtqueue.h | 2 ++
19
hw/virtio/vhost-vdpa.c | 15 +++++++++++++
20
3 files changed, 61 insertions(+)
21
22
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/virtio/vhost-shadow-virtqueue.c
25
+++ b/hw/virtio/vhost-shadow-virtqueue.c
26
@@ -XXX,XX +XXX,XX @@
27
#include "hw/virtio/vhost-shadow-virtqueue.h"
28
29
#include "qemu/error-report.h"
30
+#include "qapi/error.h"
31
#include "qemu/main-loop.h"
32
#include "linux-headers/linux/vhost.h"
33
34
/**
35
+ * Validate the transport device features that both guests can use with the SVQ
36
+ * and SVQs can use with the device.
37
+ *
38
+ * @dev_features: The features
39
+ * @errp: Error pointer
40
+ */
41
+bool vhost_svq_valid_features(uint64_t features, Error **errp)
42
+{
43
+ bool ok = true;
44
+ uint64_t svq_features = features;
45
+
46
+ for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
47
+ ++b) {
48
+ switch (b) {
49
+ case VIRTIO_F_ANY_LAYOUT:
50
+ continue;
51
+
52
+ case VIRTIO_F_ACCESS_PLATFORM:
53
+ /* SVQ trust in the host's IOMMU to translate addresses */
54
+ case VIRTIO_F_VERSION_1:
55
+ /* SVQ trust that the guest vring is little endian */
56
+ if (!(svq_features & BIT_ULL(b))) {
57
+ svq_features |= BIT_ULL(b);
58
+ ok = false;
59
+ }
60
+ continue;
61
+
62
+ default:
63
+ if (svq_features & BIT_ULL(b)) {
64
+ svq_features &= ~BIT_ULL(b);
65
+ ok = false;
66
+ }
67
+ }
68
+ }
69
+
70
+ if (!ok) {
71
+ error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64
72
+ ", ok: 0x%"PRIx64, features, svq_features);
73
+ }
74
+ return ok;
75
+}
76
+
77
+/**
78
* Forward guest notifications.
79
*
80
* @n: guest kick event notifier, the one that guest set to notify svq.
81
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/hw/virtio/vhost-shadow-virtqueue.h
84
+++ b/hw/virtio/vhost-shadow-virtqueue.h
85
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
86
EventNotifier svq_call;
87
} VhostShadowVirtqueue;
88
89
+bool vhost_svq_valid_features(uint64_t features, Error **errp);
90
+
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/hw/virtio/vhost-vdpa.c
97
+++ b/hw/virtio/vhost-vdpa.c
98
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
99
Error **errp)
100
{
101
g_autoptr(GPtrArray) shadow_vqs = NULL;
102
+ uint64_t dev_features, svq_features;
103
+ int r;
104
+ bool ok;
105
106
if (!v->shadow_vqs_enabled) {
107
return 0;
108
}
109
110
+ r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
111
+ if (r != 0) {
112
+ error_setg_errno(errp, -r, "Can't get vdpa device features");
113
+ return r;
114
+ }
115
+
116
+ svq_features = dev_features;
117
+ ok = vhost_svq_valid_features(svq_features, errp);
118
+ if (unlikely(!ok)) {
119
+ return -1;
120
+ }
121
+
122
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
123
for (unsigned n = 0; n < hdev->nvqs; ++n) {
124
g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
125
--
126
2.7.4
127
128
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
It reports the shadow virtqueue address from qemu virtual address space.
4
5
Since this will be different from the guest's vaddr, but the device can
6
access it, SVQ takes special care about its alignment & lack of garbage
7
data. It assumes that IOMMU will work in host_page_size ranges for that.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/virtio/vhost-shadow-virtqueue.c | 29 +++++++++++++++++++++++++++++
14
hw/virtio/vhost-shadow-virtqueue.h | 9 +++++++++
15
2 files changed, 38 insertions(+)
16
17
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/virtio/vhost-shadow-virtqueue.c
20
+++ b/hw/virtio/vhost-shadow-virtqueue.c
21
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
22
}
23
24
/**
25
+ * Get the shadow vq vring address.
26
+ * @svq: Shadow virtqueue
27
+ * @addr: Destination to store address
28
+ */
29
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
30
+ struct vhost_vring_addr *addr)
31
+{
32
+ addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
33
+ addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
34
+ addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
35
+}
36
+
37
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
38
+{
39
+ size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
40
+ size_t avail_size = offsetof(vring_avail_t, ring) +
41
+ sizeof(uint16_t) * svq->vring.num;
42
+
43
+ return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size);
44
+}
45
+
46
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
47
+{
48
+ size_t used_size = offsetof(vring_used_t, ring) +
49
+ sizeof(vring_used_elem_t) * svq->vring.num;
50
+ return ROUND_UP(used_size, qemu_real_host_page_size);
51
+}
52
+
53
+/**
54
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
55
*
56
* @svq: The svq
57
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
58
index XXXXXXX..XXXXXXX 100644
59
--- a/hw/virtio/vhost-shadow-virtqueue.h
60
+++ b/hw/virtio/vhost-shadow-virtqueue.h
61
@@ -XXX,XX +XXX,XX @@
62
#define VHOST_SHADOW_VIRTQUEUE_H
63
64
#include "qemu/event_notifier.h"
65
+#include "hw/virtio/virtio.h"
66
+#include "standard-headers/linux/vhost_types.h"
67
68
/* Shadow virtqueue to relay notifications */
69
typedef struct VhostShadowVirtqueue {
70
+ /* Shadow vring */
71
+ struct vring vring;
72
+
73
/* Shadow kick notifier, sent to vhost */
74
EventNotifier hdev_kick;
75
/* Shadow call notifier, sent to vhost */
76
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp);
77
78
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
79
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
80
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
81
+ struct vhost_vring_addr *addr);
82
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
83
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
84
85
void vhost_svq_stop(VhostShadowVirtqueue *svq);
86
87
--
88
2.7.4
89
90
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
They are deprecated since QEMU v2.10, and so far nobody complained that
3
First half of the buffers forwarding part, preparing vhost-vdpa
4
these commands are still necessary for any reason - and since you can use
4
callbacks to SVQ to offer it. QEMU cannot enable it at this moment, so
5
'netdev_add' and 'netdev_remove' instead, there also should not be any
5
this is effectively dead code at the moment, but it helps to reduce
6
real reason. Since they are also standing in the way for the upcoming
6
patch size.
7
'vlan' clean-up, it's now time to remove them.
8
7
9
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Signed-off-by: Thomas Huth <thuth@redhat.com>
9
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
11
---
13
hmp-commands.hx | 30 ------------------
12
hw/virtio/vhost-vdpa.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
14
hmp.h | 3 --
13
1 file changed, 41 insertions(+), 7 deletions(-)
15
monitor.c | 61 ------------------------------------
16
net/net.c | 94 --------------------------------------------------------
17
qemu-doc.texi | 10 ------
18
tests/test-hmp.c | 2 --
19
6 files changed, 200 deletions(-)
20
14
21
diff --git a/hmp-commands.hx b/hmp-commands.hx
15
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
22
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
23
--- a/hmp-commands.hx
17
--- a/hw/virtio/vhost-vdpa.c
24
+++ b/hmp-commands.hx
18
+++ b/hw/virtio/vhost-vdpa.c
25
@@ -XXX,XX +XXX,XX @@ Inject PCIe AER error
19
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
26
ETEXI
20
return ret;
27
21
}
28
{
22
29
- .name = "host_net_add",
23
+static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev,
30
- .args_type = "device:s,opts:s?",
24
+ struct vhost_vring_state *ring)
31
- .params = "tap|user|socket|vde|netmap|bridge|vhost-user|dump [options]",
25
+{
32
- .help = "add host VLAN client (deprecated, use netdev_add instead)",
26
+ trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
33
- .cmd = hmp_host_net_add,
27
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
34
- .command_completion = host_net_add_completion,
28
+}
35
- },
29
+
36
-
30
static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
37
-STEXI
31
struct vhost_vring_file *file)
38
-@item host_net_add
32
{
39
-@findex host_net_add
33
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
40
-Add host VLAN client. Deprecated, please use @code{netdev_add} instead.
34
return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
41
-ETEXI
42
-
43
- {
44
- .name = "host_net_remove",
45
- .args_type = "vlan_id:i,device:s",
46
- .params = "vlan_id name",
47
- .help = "remove host VLAN client (deprecated, use netdev_del instead)",
48
- .cmd = hmp_host_net_remove,
49
- .command_completion = host_net_remove_completion,
50
- },
51
-
52
-STEXI
53
-@item host_net_remove
54
-@findex host_net_remove
55
-Remove host VLAN client. Deprecated, please use @code{netdev_del} instead.
56
-ETEXI
57
-
58
- {
59
.name = "netdev_add",
60
.args_type = "netdev:O",
61
.params = "[user|tap|socket|vde|bridge|hubport|netmap|vhost-user],id=str[,prop=value][,...]",
62
diff --git a/hmp.h b/hmp.h
63
index XXXXXXX..XXXXXXX 100644
64
--- a/hmp.h
65
+++ b/hmp.h
66
@@ -XXX,XX +XXX,XX @@ void migrate_set_capability_completion(ReadLineState *rs, int nb_args,
67
const char *str);
68
void migrate_set_parameter_completion(ReadLineState *rs, int nb_args,
69
const char *str);
70
-void host_net_add_completion(ReadLineState *rs, int nb_args, const char *str);
71
-void host_net_remove_completion(ReadLineState *rs, int nb_args,
72
- const char *str);
73
void delvm_completion(ReadLineState *rs, int nb_args, const char *str);
74
void loadvm_completion(ReadLineState *rs, int nb_args, const char *str);
75
void hmp_rocker(Monitor *mon, const QDict *qdict);
76
diff --git a/monitor.c b/monitor.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/monitor.c
79
+++ b/monitor.c
80
@@ -XXX,XX +XXX,XX @@ void migrate_set_parameter_completion(ReadLineState *rs, int nb_args,
81
}
82
}
35
}
83
36
84
-void host_net_add_completion(ReadLineState *rs, int nb_args, const char *str)
37
+static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
85
-{
38
+ struct vhost_vring_addr *addr)
86
- int i;
39
+{
87
- size_t len;
40
+ trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
88
- if (nb_args != 2) {
41
+ addr->desc_user_addr, addr->used_user_addr,
89
- return;
42
+ addr->avail_user_addr,
90
- }
43
+ addr->log_guest_addr);
91
- len = strlen(str);
44
+
92
- readline_set_completion_index(rs, len);
45
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
93
- for (i = 0; host_net_devices[i]; i++) {
46
+
94
- if (!strncmp(host_net_devices[i], str, len)) {
47
+}
95
- readline_add_completion(rs, host_net_devices[i]);
48
+
96
- }
49
/**
97
- }
50
* Set the shadow virtqueue descriptors to the device
98
-}
51
*
99
-
52
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
100
-void host_net_remove_completion(ReadLineState *rs, int nb_args, const char *str)
53
static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
101
-{
54
struct vhost_vring_addr *addr)
102
- NetClientState *ncs[MAX_QUEUE_NUM];
103
- int count, i, len;
104
-
105
- len = strlen(str);
106
- readline_set_completion_index(rs, len);
107
- if (nb_args == 2) {
108
- count = qemu_find_net_clients_except(NULL, ncs,
109
- NET_CLIENT_DRIVER_NONE,
110
- MAX_QUEUE_NUM);
111
- for (i = 0; i < MIN(count, MAX_QUEUE_NUM); i++) {
112
- int id;
113
- char name[16];
114
-
115
- if (net_hub_id_for_client(ncs[i], &id)) {
116
- continue;
117
- }
118
- snprintf(name, sizeof(name), "%d", id);
119
- if (!strncmp(str, name, len)) {
120
- readline_add_completion(rs, name);
121
- }
122
- }
123
- return;
124
- } else if (nb_args == 3) {
125
- count = qemu_find_net_clients_except(NULL, ncs,
126
- NET_CLIENT_DRIVER_NIC,
127
- MAX_QUEUE_NUM);
128
- for (i = 0; i < MIN(count, MAX_QUEUE_NUM); i++) {
129
- int id;
130
- const char *name;
131
-
132
- if (ncs[i]->info->type == NET_CLIENT_DRIVER_HUBPORT ||
133
- net_hub_id_for_client(ncs[i], &id)) {
134
- continue;
135
- }
136
- name = ncs[i]->name;
137
- if (!strncmp(str, name, len)) {
138
- readline_add_completion(rs, name);
139
- }
140
- }
141
- return;
142
- }
143
-}
144
-
145
static void vm_completion(ReadLineState *rs, const char *str)
146
{
55
{
147
size_t len;
56
- trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
148
diff --git a/net/net.c b/net/net.c
57
- addr->desc_user_addr, addr->used_user_addr,
149
index XXXXXXX..XXXXXXX 100644
58
- addr->avail_user_addr,
150
--- a/net/net.c
59
- addr->log_guest_addr);
151
+++ b/net/net.c
60
- return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
152
@@ -XXX,XX +XXX,XX @@
61
+ struct vhost_vdpa *v = dev->opaque;
153
static VMChangeStateEntry *net_change_state_entry;
62
+
154
static QTAILQ_HEAD(, NetClientState) net_clients;
63
+ if (v->shadow_vqs_enabled) {
155
64
+ /*
156
-const char *host_net_devices[] = {
65
+ * Device vring addr was set at device start. SVQ base is handled by
157
- "tap",
66
+ * VirtQueue code.
158
- "socket",
67
+ */
159
-#ifdef CONFIG_NET_BRIDGE
68
+ return 0;
160
- "bridge",
69
+ }
161
-#endif
70
+
162
-#ifdef CONFIG_NETMAP
71
+ return vhost_vdpa_set_vring_dev_addr(dev, addr);
163
- "netmap",
164
-#endif
165
-#ifdef CONFIG_SLIRP
166
- "user",
167
-#endif
168
-#ifdef CONFIG_VDE
169
- "vde",
170
-#endif
171
- "vhost-user",
172
- NULL,
173
-};
174
-
175
/***********************************************************/
176
/* network device redirectors */
177
178
@@ -XXX,XX +XXX,XX @@ static int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp)
179
return ret;
180
}
72
}
181
73
182
-
74
static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
183
-static int net_host_check_device(const char *device)
75
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
184
-{
76
static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
185
- int i;
77
struct vhost_vring_state *ring)
186
- for (i = 0; host_net_devices[i]; i++) {
187
- if (!strncmp(host_net_devices[i], device,
188
- strlen(host_net_devices[i]))) {
189
- return 1;
190
- }
191
- }
192
-
193
- return 0;
194
-}
195
-
196
-void hmp_host_net_add(Monitor *mon, const QDict *qdict)
197
-{
198
- const char *device = qdict_get_str(qdict, "device");
199
- const char *opts_str = qdict_get_try_str(qdict, "opts");
200
- Error *local_err = NULL;
201
- QemuOpts *opts;
202
- static bool warned;
203
-
204
- if (!warned && !qtest_enabled()) {
205
- error_report("host_net_add is deprecated, use netdev_add instead");
206
- warned = true;
207
- }
208
-
209
- if (!net_host_check_device(device)) {
210
- monitor_printf(mon, "invalid host network device %s\n", device);
211
- return;
212
- }
213
-
214
- opts = qemu_opts_parse_noisily(qemu_find_opts("net"),
215
- opts_str ? opts_str : "", false);
216
- if (!opts) {
217
- return;
218
- }
219
-
220
- qemu_opt_set(opts, "type", device, &error_abort);
221
-
222
- net_client_init(opts, false, &local_err);
223
- if (local_err) {
224
- error_report_err(local_err);
225
- monitor_printf(mon, "adding host network device %s failed\n", device);
226
- }
227
-}
228
-
229
-void hmp_host_net_remove(Monitor *mon, const QDict *qdict)
230
-{
231
- NetClientState *nc;
232
- int vlan_id = qdict_get_int(qdict, "vlan_id");
233
- const char *device = qdict_get_str(qdict, "device");
234
- static bool warned;
235
-
236
- if (!warned && !qtest_enabled()) {
237
- error_report("host_net_remove is deprecated, use netdev_del instead");
238
- warned = true;
239
- }
240
-
241
- nc = net_hub_find_client_by_name(vlan_id, device);
242
- if (!nc) {
243
- error_report("Host network device '%s' on hub '%d' not found",
244
- device, vlan_id);
245
- return;
246
- }
247
- if (nc->info->type == NET_CLIENT_DRIVER_NIC) {
248
- error_report("invalid host network device '%s'", device);
249
- return;
250
- }
251
-
252
- qemu_del_net_client(nc->peer);
253
- qemu_del_net_client(nc);
254
- qemu_opts_del(qemu_opts_find(qemu_find_opts("net"), device));
255
-}
256
-
257
void netdev_add(QemuOpts *opts, Error **errp)
258
{
78
{
259
net_client_init(opts, true, errp);
79
- trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
260
diff --git a/qemu-doc.texi b/qemu-doc.texi
80
- return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
261
index XXXXXXX..XXXXXXX 100644
81
+ struct vhost_vdpa *v = dev->opaque;
262
--- a/qemu-doc.texi
82
+
263
+++ b/qemu-doc.texi
83
+ if (v->shadow_vqs_enabled) {
264
@@ -XXX,XX +XXX,XX @@ from qcow2 images.
84
+ /*
265
85
+ * Device vring base was set at device start. SVQ base is handled by
266
The ``query-cpus'' command is replaced by the ``query-cpus-fast'' command.
86
+ * VirtQueue code.
267
87
+ */
268
-@section System emulator human monitor commands
88
+ return 0;
269
-
89
+ }
270
-@subsection host_net_add (since 2.10.0)
90
+
271
-
91
+ return vhost_vdpa_set_dev_vring_base(dev, ring);
272
-The ``host_net_add'' command is replaced by the ``netdev_add'' command.
92
}
273
-
93
274
-@subsection host_net_remove (since 2.10.0)
94
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
275
-
276
-The ``host_net_remove'' command is replaced by the ``netdev_del'' command.
277
-
278
@section System emulator devices
279
280
@subsection ivshmem (since 2.6.0)
281
diff --git a/tests/test-hmp.c b/tests/test-hmp.c
282
index XXXXXXX..XXXXXXX 100644
283
--- a/tests/test-hmp.c
284
+++ b/tests/test-hmp.c
285
@@ -XXX,XX +XXX,XX @@ static const char *hmp_cmds[] = {
286
"dump-guest-memory /dev/null 0 4096",
287
"dump-guest-memory /dev/null",
288
"gdbserver",
289
- "host_net_add user id=net0",
290
"hostfwd_add tcp::43210-:43210",
291
"hostfwd_remove tcp::43210-:43210",
292
- "host_net_remove 0 net0",
293
"i /w 0",
294
"log all",
295
"log none",
296
--
95
--
297
2.7.4
96
2.7.4
298
97
299
98
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
It looks strange that net_init_client() and net_init_netdev() both
3
Initial version of shadow virtqueue that actually forward buffers. There
4
take an "Error **errp" parameter, but then do the error reporting
4
is no iommu support at the moment, and that will be addressed in future
5
with "error_report_err(local_err)" on their own. Let's move the
5
patches of this series. Since all vhost-vdpa devices use forced IOMMU,
6
error reporting to the calling site instead to simplify this code
6
this means that SVQ is not usable at this point of the series on any
7
a little bit.
7
device.
8
8
9
Reviewed-by: Eric Blake <eblake@redhat.com>
9
For simplicity it only supports modern devices, that expects vring
10
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
10
in little endian, with split ring and no event idx or indirect
11
Signed-off-by: Thomas Huth <thuth@redhat.com>
11
descriptors. Support for them will not be added in this series.
12
13
It reuses the VirtQueue code for the device part. The driver part is
14
based on Linux's virtio_ring driver, but with stripped functionality
15
and optimizations so it's easier to review.
16
17
However, forwarding buffers have some particular pieces: One of the most
18
unexpected ones is that a guest's buffer can expand through more than
19
one descriptor in SVQ. While this is handled gracefully by qemu's
20
emulated virtio devices, it may cause unexpected SVQ queue full. This
21
patch also solves it by checking for this condition at both guest's
22
kicks and device's calls. The code may be more elegant in the future if
23
SVQ code runs in its own iocontext.
24
25
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
26
Acked-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
27
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
28
---
14
include/net/net.h | 2 +-
29
hw/virtio/vhost-shadow-virtqueue.c | 352 ++++++++++++++++++++++++++++++++++++-
15
net/net.c | 29 +++++------------------------
30
hw/virtio/vhost-shadow-virtqueue.h | 26 +++
16
vl.c | 3 ++-
31
hw/virtio/vhost-vdpa.c | 155 +++++++++++++++-
17
3 files changed, 8 insertions(+), 26 deletions(-)
32
3 files changed, 522 insertions(+), 11 deletions(-)
18
33
19
diff --git a/include/net/net.h b/include/net/net.h
34
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
20
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
21
--- a/include/net/net.h
36
--- a/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/include/net/net.h
37
+++ b/hw/virtio/vhost-shadow-virtqueue.c
23
@@ -XXX,XX +XXX,XX @@ extern const char *legacy_bootp_filename;
38
@@ -XXX,XX +XXX,XX @@
24
39
#include "qemu/error-report.h"
25
int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp);
40
#include "qapi/error.h"
26
int net_client_parse(QemuOptsList *opts_list, const char *str);
41
#include "qemu/main-loop.h"
27
-int net_init_clients(void);
42
+#include "qemu/log.h"
28
+int net_init_clients(Error **errp);
43
+#include "qemu/memalign.h"
29
void net_check_clients(void);
44
#include "linux-headers/linux/vhost.h"
30
void net_cleanup(void);
45
31
void hmp_host_net_add(Monitor *mon, const QDict *qdict);
46
/**
32
diff --git a/net/net.c b/net/net.c
47
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
48
}
49
50
/**
51
- * Forward guest notifications.
52
+ * Number of descriptors that the SVQ can make available from the guest.
53
+ *
54
+ * @svq: The svq
55
+ */
56
+static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
57
+{
58
+ return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
59
+}
60
+
61
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
62
+ const struct iovec *iovec, size_t num,
63
+ bool more_descs, bool write)
64
+{
65
+ uint16_t i = svq->free_head, last = svq->free_head;
66
+ unsigned n;
67
+ uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
68
+ vring_desc_t *descs = svq->vring.desc;
69
+
70
+ if (num == 0) {
71
+ return;
72
+ }
73
+
74
+ for (n = 0; n < num; n++) {
75
+ if (more_descs || (n + 1 < num)) {
76
+ descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
77
+ } else {
78
+ descs[i].flags = flags;
79
+ }
80
+ descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
81
+ descs[i].len = cpu_to_le32(iovec[n].iov_len);
82
+
83
+ last = i;
84
+ i = cpu_to_le16(descs[i].next);
85
+ }
86
+
87
+ svq->free_head = le16_to_cpu(descs[last].next);
88
+}
89
+
90
+static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
91
+ VirtQueueElement *elem, unsigned *head)
92
+{
93
+ unsigned avail_idx;
94
+ vring_avail_t *avail = svq->vring.avail;
95
+
96
+ *head = svq->free_head;
97
+
98
+ /* We need some descriptors here */
99
+ if (unlikely(!elem->out_num && !elem->in_num)) {
100
+ qemu_log_mask(LOG_GUEST_ERROR,
101
+ "Guest provided element with no descriptors");
102
+ return false;
103
+ }
104
+
105
+ vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
106
+ false);
107
+ vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
108
+
109
+ /*
110
+ * Put the entry in the available array (but don't update avail->idx until
111
+ * they do sync).
112
+ */
113
+ avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
114
+ avail->ring[avail_idx] = cpu_to_le16(*head);
115
+ svq->shadow_avail_idx++;
116
+
117
+ /* Update the avail index after write the descriptor */
118
+ smp_wmb();
119
+ avail->idx = cpu_to_le16(svq->shadow_avail_idx);
120
+
121
+ return true;
122
+}
123
+
124
+static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
125
+{
126
+ unsigned qemu_head;
127
+ bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
128
+ if (unlikely(!ok)) {
129
+ return false;
130
+ }
131
+
132
+ svq->ring_id_maps[qemu_head] = elem;
133
+ return true;
134
+}
135
+
136
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
137
+{
138
+ /*
139
+ * We need to expose the available array entries before checking the used
140
+ * flags
141
+ */
142
+ smp_mb();
143
+ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
144
+ return;
145
+ }
146
+
147
+ event_notifier_set(&svq->hdev_kick);
148
+}
149
+
150
+/**
151
+ * Forward available buffers.
152
+ *
153
+ * @svq: Shadow VirtQueue
154
+ *
155
+ * Note that this function does not guarantee that all guest's available
156
+ * buffers are available to the device in SVQ avail ring. The guest may have
157
+ * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
158
+ * qemu vaddr.
159
+ *
160
+ * If that happens, guest's kick notifications will be disabled until the
161
+ * device uses some buffers.
162
+ */
163
+static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
164
+{
165
+ /* Clear event notifier */
166
+ event_notifier_test_and_clear(&svq->svq_kick);
167
+
168
+ /* Forward to the device as many available buffers as possible */
169
+ do {
170
+ virtio_queue_set_notification(svq->vq, false);
171
+
172
+ while (true) {
173
+ VirtQueueElement *elem;
174
+ bool ok;
175
+
176
+ if (svq->next_guest_avail_elem) {
177
+ elem = g_steal_pointer(&svq->next_guest_avail_elem);
178
+ } else {
179
+ elem = virtqueue_pop(svq->vq, sizeof(*elem));
180
+ }
181
+
182
+ if (!elem) {
183
+ break;
184
+ }
185
+
186
+ if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) {
187
+ /*
188
+ * This condition is possible since a contiguous buffer in GPA
189
+ * does not imply a contiguous buffer in qemu's VA
190
+ * scatter-gather segments. If that happens, the buffer exposed
191
+ * to the device needs to be a chain of descriptors at this
192
+ * moment.
193
+ *
194
+ * SVQ cannot hold more available buffers if we are here:
195
+ * queue the current guest descriptor and ignore further kicks
196
+ * until some elements are used.
197
+ */
198
+ svq->next_guest_avail_elem = elem;
199
+ return;
200
+ }
201
+
202
+ ok = vhost_svq_add(svq, elem);
203
+ if (unlikely(!ok)) {
204
+ /* VQ is broken, just return and ignore any other kicks */
205
+ return;
206
+ }
207
+ vhost_svq_kick(svq);
208
+ }
209
+
210
+ virtio_queue_set_notification(svq->vq, true);
211
+ } while (!virtio_queue_empty(svq->vq));
212
+}
213
+
214
+/**
215
+ * Handle guest's kick.
216
*
217
* @n: guest kick event notifier, the one that guest set to notify svq.
218
*/
219
-static void vhost_handle_guest_kick(EventNotifier *n)
220
+static void vhost_handle_guest_kick_notifier(EventNotifier *n)
221
{
222
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
223
event_notifier_test_and_clear(n);
224
- event_notifier_set(&svq->hdev_kick);
225
+ vhost_handle_guest_kick(svq);
226
+}
227
+
228
+static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
229
+{
230
+ if (svq->last_used_idx != svq->shadow_used_idx) {
231
+ return true;
232
+ }
233
+
234
+ svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
235
+
236
+ return svq->last_used_idx != svq->shadow_used_idx;
237
}
238
239
/**
240
- * Forward vhost notifications
241
+ * Enable vhost device calls after disable them.
242
+ *
243
+ * @svq: The svq
244
+ *
245
+ * It returns false if there are pending used buffers from the vhost device,
246
+ * avoiding the possible races between SVQ checking for more work and enabling
247
+ * callbacks. True if SVQ used vring has no more pending buffers.
248
+ */
249
+static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
250
+{
251
+ svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
252
+ /* Make sure the flag is written before the read of used_idx */
253
+ smp_mb();
254
+ return !vhost_svq_more_used(svq);
255
+}
256
+
257
+static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
258
+{
259
+ svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
260
+}
261
+
262
+static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
263
+ uint32_t *len)
264
+{
265
+ vring_desc_t *descs = svq->vring.desc;
266
+ const vring_used_t *used = svq->vring.used;
267
+ vring_used_elem_t used_elem;
268
+ uint16_t last_used;
269
+
270
+ if (!vhost_svq_more_used(svq)) {
271
+ return NULL;
272
+ }
273
+
274
+ /* Only get used array entries after they have been exposed by dev */
275
+ smp_rmb();
276
+ last_used = svq->last_used_idx & (svq->vring.num - 1);
277
+ used_elem.id = le32_to_cpu(used->ring[last_used].id);
278
+ used_elem.len = le32_to_cpu(used->ring[last_used].len);
279
+
280
+ svq->last_used_idx++;
281
+ if (unlikely(used_elem.id >= svq->vring.num)) {
282
+ qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
283
+ svq->vdev->name, used_elem.id);
284
+ return NULL;
285
+ }
286
+
287
+ if (unlikely(!svq->ring_id_maps[used_elem.id])) {
288
+ qemu_log_mask(LOG_GUEST_ERROR,
289
+ "Device %s says index %u is used, but it was not available",
290
+ svq->vdev->name, used_elem.id);
291
+ return NULL;
292
+ }
293
+
294
+ descs[used_elem.id].next = svq->free_head;
295
+ svq->free_head = used_elem.id;
296
+
297
+ *len = used_elem.len;
298
+ return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
299
+}
300
+
301
+static void vhost_svq_flush(VhostShadowVirtqueue *svq,
302
+ bool check_for_avail_queue)
303
+{
304
+ VirtQueue *vq = svq->vq;
305
+
306
+ /* Forward as many used buffers as possible. */
307
+ do {
308
+ unsigned i = 0;
309
+
310
+ vhost_svq_disable_notification(svq);
311
+ while (true) {
312
+ uint32_t len;
313
+ g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
314
+ if (!elem) {
315
+ break;
316
+ }
317
+
318
+ if (unlikely(i >= svq->vring.num)) {
319
+ qemu_log_mask(LOG_GUEST_ERROR,
320
+ "More than %u used buffers obtained in a %u size SVQ",
321
+ i, svq->vring.num);
322
+ virtqueue_fill(vq, elem, len, i);
323
+ virtqueue_flush(vq, i);
324
+ return;
325
+ }
326
+ virtqueue_fill(vq, elem, len, i++);
327
+ }
328
+
329
+ virtqueue_flush(vq, i);
330
+ event_notifier_set(&svq->svq_call);
331
+
332
+ if (check_for_avail_queue && svq->next_guest_avail_elem) {
333
+ /*
334
+ * Avail ring was full when vhost_svq_flush was called, so it's a
335
+ * good moment to make more descriptors available if possible.
336
+ */
337
+ vhost_handle_guest_kick(svq);
338
+ }
339
+ } while (!vhost_svq_enable_notification(svq));
340
+}
341
+
342
+/**
343
+ * Forward used buffers.
344
*
345
* @n: hdev call event notifier, the one that device set to notify svq.
346
+ *
347
+ * Note that we are not making any buffers available in the loop, there is no
348
+ * way that it runs more than virtqueue size times.
349
*/
350
static void vhost_svq_handle_call(EventNotifier *n)
351
{
352
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
353
hdev_call);
354
event_notifier_test_and_clear(n);
355
- event_notifier_set(&svq->svq_call);
356
+ vhost_svq_flush(svq, true);
357
}
358
359
/**
360
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
361
if (poll_start) {
362
event_notifier_init_fd(svq_kick, svq_kick_fd);
363
event_notifier_set(svq_kick);
364
- event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
365
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
366
+ }
367
+}
368
+
369
+/**
370
+ * Start the shadow virtqueue operation.
371
+ *
372
+ * @svq: Shadow Virtqueue
373
+ * @vdev: VirtIO device
374
+ * @vq: Virtqueue to shadow
375
+ */
376
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
377
+ VirtQueue *vq)
378
+{
379
+ size_t desc_size, driver_size, device_size;
380
+
381
+ svq->next_guest_avail_elem = NULL;
382
+ svq->shadow_avail_idx = 0;
383
+ svq->shadow_used_idx = 0;
384
+ svq->last_used_idx = 0;
385
+ svq->vdev = vdev;
386
+ svq->vq = vq;
387
+
388
+ svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
389
+ driver_size = vhost_svq_driver_area_size(svq);
390
+ device_size = vhost_svq_device_area_size(svq);
391
+ svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size);
392
+ desc_size = sizeof(vring_desc_t) * svq->vring.num;
393
+ svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
394
+ memset(svq->vring.desc, 0, driver_size);
395
+ svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
396
+ memset(svq->vring.used, 0, device_size);
397
+ svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
398
+ for (unsigned i = 0; i < svq->vring.num - 1; i++) {
399
+ svq->vring.desc[i].next = cpu_to_le16(i + 1);
400
}
401
}
402
403
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
404
void vhost_svq_stop(VhostShadowVirtqueue *svq)
405
{
406
event_notifier_set_handler(&svq->svq_kick, NULL);
407
+ g_autofree VirtQueueElement *next_avail_elem = NULL;
408
+
409
+ if (!svq->vq) {
410
+ return;
411
+ }
412
+
413
+ /* Send all pending used descriptors to guest */
414
+ vhost_svq_flush(svq, false);
415
+
416
+ for (unsigned i = 0; i < svq->vring.num; ++i) {
417
+ g_autofree VirtQueueElement *elem = NULL;
418
+ elem = g_steal_pointer(&svq->ring_id_maps[i]);
419
+ if (elem) {
420
+ virtqueue_detach_element(svq->vq, elem, 0);
421
+ }
422
+ }
423
+
424
+ next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
425
+ if (next_avail_elem) {
426
+ virtqueue_detach_element(svq->vq, next_avail_elem, 0);
427
+ }
428
+ svq->vq = NULL;
429
+ g_free(svq->ring_id_maps);
430
+ qemu_vfree(svq->vring.desc);
431
+ qemu_vfree(svq->vring.used);
432
}
433
434
/**
435
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
33
index XXXXXXX..XXXXXXX 100644
436
index XXXXXXX..XXXXXXX 100644
34
--- a/net/net.c
437
--- a/hw/virtio/vhost-shadow-virtqueue.h
35
+++ b/net/net.c
438
+++ b/hw/virtio/vhost-shadow-virtqueue.h
36
@@ -XXX,XX +XXX,XX @@ void net_check_clients(void)
439
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
37
440
38
static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
441
/* Guest's call notifier, where the SVQ calls guest. */
442
EventNotifier svq_call;
443
+
444
+ /* Virtio queue shadowing */
445
+ VirtQueue *vq;
446
+
447
+ /* Virtio device */
448
+ VirtIODevice *vdev;
449
+
450
+ /* Map for use the guest's descriptors */
451
+ VirtQueueElement **ring_id_maps;
452
+
453
+ /* Next VirtQueue element that guest made available */
454
+ VirtQueueElement *next_guest_avail_elem;
455
+
456
+ /* Next head to expose to the device */
457
+ uint16_t shadow_avail_idx;
458
+
459
+ /* Next free descriptor */
460
+ uint16_t free_head;
461
+
462
+ /* Last seen used idx */
463
+ uint16_t shadow_used_idx;
464
+
465
+ /* Next head to consume from the device */
466
+ uint16_t last_used_idx;
467
} VhostShadowVirtqueue;
468
469
bool vhost_svq_valid_features(uint64_t features, Error **errp);
470
@@ -XXX,XX +XXX,XX @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
471
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
472
size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
473
474
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
475
+ VirtQueue *vq);
476
void vhost_svq_stop(VhostShadowVirtqueue *svq);
477
478
VhostShadowVirtqueue *vhost_svq_new(void);
479
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
480
index XXXXXXX..XXXXXXX 100644
481
--- a/hw/virtio/vhost-vdpa.c
482
+++ b/hw/virtio/vhost-vdpa.c
483
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
484
* Note that this function does not rewind kick file descriptor if cannot set
485
* call one.
486
*/
487
-static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
488
- VhostShadowVirtqueue *svq, unsigned idx,
489
- Error **errp)
490
+static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
491
+ VhostShadowVirtqueue *svq, unsigned idx,
492
+ Error **errp)
39
{
493
{
40
- Error *local_err = NULL;
494
struct vhost_vring_file file = {
41
-
495
.index = dev->vq_index + idx,
42
- net_client_init(opts, false, &local_err);
496
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
43
- if (local_err) {
497
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
44
- error_report_err(local_err);
498
if (unlikely(r != 0)) {
45
- return -1;
499
error_setg_errno(errp, -r, "Can't set device kick fd");
46
- }
500
- return false;
47
-
501
+ return r;
48
- return 0;
502
}
49
+ return net_client_init(opts, false, errp);
503
504
event_notifier = &svq->hdev_call;
505
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
506
error_setg_errno(errp, -r, "Can't set device call fd");
507
}
508
509
+ return r;
510
+}
511
+
512
+/**
513
+ * Unmap a SVQ area in the device
514
+ */
515
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
516
+ hwaddr size)
517
+{
518
+ int r;
519
+
520
+ size = ROUND_UP(size, qemu_real_host_page_size);
521
+ r = vhost_vdpa_dma_unmap(v, iova, size);
522
+ return r == 0;
523
+}
524
+
525
+static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
526
+ const VhostShadowVirtqueue *svq)
527
+{
528
+ struct vhost_vdpa *v = dev->opaque;
529
+ struct vhost_vring_addr svq_addr;
530
+ size_t device_size = vhost_svq_device_area_size(svq);
531
+ size_t driver_size = vhost_svq_driver_area_size(svq);
532
+ bool ok;
533
+
534
+ vhost_svq_get_vring_addr(svq, &svq_addr);
535
+
536
+ ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
537
+ if (unlikely(!ok)) {
538
+ return false;
539
+ }
540
+
541
+ return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
542
+}
543
+
544
+/**
545
+ * Map the shadow virtqueue rings in the device
546
+ *
547
+ * @dev: The vhost device
548
+ * @svq: The shadow virtqueue
549
+ * @addr: Assigned IOVA addresses
550
+ * @errp: Error pointer
551
+ */
552
+static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
553
+ const VhostShadowVirtqueue *svq,
554
+ struct vhost_vring_addr *addr,
555
+ Error **errp)
556
+{
557
+ struct vhost_vdpa *v = dev->opaque;
558
+ size_t device_size = vhost_svq_device_area_size(svq);
559
+ size_t driver_size = vhost_svq_driver_area_size(svq);
560
+ int r;
561
+
562
+ ERRP_GUARD();
563
+ vhost_svq_get_vring_addr(svq, addr);
564
+
565
+ r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
566
+ (void *)(uintptr_t)addr->desc_user_addr, true);
567
+ if (unlikely(r != 0)) {
568
+ error_setg_errno(errp, -r, "Cannot create vq driver region: ");
569
+ return false;
570
+ }
571
+
572
+ r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
573
+ (void *)(intptr_t)addr->used_user_addr, false);
574
+ if (unlikely(r != 0)) {
575
+ error_setg_errno(errp, -r, "Cannot create vq device region: ");
576
+ }
577
+
578
+ return r == 0;
579
+}
580
+
581
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
582
+ VhostShadowVirtqueue *svq, unsigned idx,
583
+ Error **errp)
584
+{
585
+ uint16_t vq_index = dev->vq_index + idx;
586
+ struct vhost_vring_state s = {
587
+ .index = vq_index,
588
+ };
589
+ int r;
590
+
591
+ r = vhost_vdpa_set_dev_vring_base(dev, &s);
592
+ if (unlikely(r)) {
593
+ error_setg_errno(errp, -r, "Cannot set vring base");
594
+ return false;
595
+ }
596
+
597
+ r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp);
598
return r == 0;
50
}
599
}
51
600
52
static int net_init_netdev(void *dummy, QemuOpts *opts, Error **errp)
601
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
53
{
54
- Error *local_err = NULL;
55
- int ret;
56
-
57
- ret = net_client_init(opts, true, &local_err);
58
- if (local_err) {
59
- error_report_err(local_err);
60
- return -1;
61
- }
62
-
63
- return ret;
64
+ return net_client_init(opts, true, errp);
65
}
66
67
-int net_init_clients(void)
68
+int net_init_clients(Error **errp)
69
{
70
- QemuOptsList *net = qemu_find_opts("net");
71
-
72
net_change_state_entry =
73
qemu_add_vm_change_state_handler(net_vm_change_state_handler, NULL);
74
75
QTAILQ_INIT(&net_clients);
76
77
if (qemu_opts_foreach(qemu_find_opts("netdev"),
78
- net_init_netdev, NULL, NULL)) {
79
+ net_init_netdev, NULL, errp)) {
80
return -1;
81
}
602
}
82
603
83
- if (qemu_opts_foreach(net, net_init_client, NULL, NULL)) {
604
for (i = 0; i < v->shadow_vqs->len; ++i) {
84
+ if (qemu_opts_foreach(qemu_find_opts("net"), net_init_client, NULL, errp)) {
605
+ VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
85
return -1;
606
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
607
+ struct vhost_vring_addr addr = {
608
+ .index = i,
609
+ };
610
+ int r;
611
bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
612
if (unlikely(!ok)) {
613
- error_reportf_err(err, "Cannot setup SVQ %u: ", i);
614
+ goto err;
615
+ }
616
+
617
+ vhost_svq_start(svq, dev->vdev, vq);
618
+ ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err);
619
+ if (unlikely(!ok)) {
620
+ goto err_map;
621
+ }
622
+
623
+ /* Override vring GPA set by vhost subsystem */
624
+ r = vhost_vdpa_set_vring_dev_addr(dev, &addr);
625
+ if (unlikely(r != 0)) {
626
+ error_setg_errno(&err, -r, "Cannot set device address");
627
+ goto err_set_addr;
628
+ }
629
+ }
630
+
631
+ return true;
632
+
633
+err_set_addr:
634
+ vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i));
635
+
636
+err_map:
637
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
638
+
639
+err:
640
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
641
+ for (unsigned j = 0; j < i; ++j) {
642
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
643
+ vhost_vdpa_svq_unmap_rings(dev, svq);
644
+ vhost_svq_stop(svq);
645
+ }
646
+
647
+ return false;
648
+}
649
+
650
+static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
651
+{
652
+ struct vhost_vdpa *v = dev->opaque;
653
+
654
+ if (!v->shadow_vqs) {
655
+ return true;
656
+ }
657
+
658
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
659
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
660
+ bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
661
+ if (unlikely(!ok)) {
662
return false;
663
}
86
}
664
}
87
665
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
88
diff --git a/vl.c b/vl.c
666
}
89
index XXXXXXX..XXXXXXX 100644
667
vhost_vdpa_set_vring_ready(dev);
90
--- a/vl.c
668
} else {
91
+++ b/vl.c
669
+ ok = vhost_vdpa_svqs_stop(dev);
92
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
670
+ if (unlikely(!ok)) {
93
671
+ return -1;
94
colo_info_init();
672
+ }
95
673
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
96
- if (net_init_clients() < 0) {
97
+ if (net_init_clients(&err) < 0) {
98
+ error_report_err(err);
99
exit(1);
100
}
674
}
101
675
102
--
676
--
103
2.7.4
677
2.7.4
104
678
105
679
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Other options like "-chardev" or "-device" feature a nice help text
3
This iova tree function allows it to look for a hole in allocated
4
with the available devices when being called with "help" or "?".
4
regions and return a totally new translation for a given translated
5
Since it is quite useful, especially if you want to see which network
5
address.
6
backends have been compiled into the QEMU binary, let's provide such
6
7
a help text for "-netdev", too.
7
It's usage is mainly to allow devices to access qemu address space,
8
8
remapping guest's one into a new iova space where qemu can add chunks of
9
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
9
addresses.
10
Reviewed-by: Eric Blake <eblake@redhat.com>
10
11
Signed-off-by: Thomas Huth <thuth@redhat.com>
11
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
12
Reviewed-by: Peter Xu <peterx@redhat.com>
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
15
---
14
net/net.c | 37 ++++++++++++++++++++++++++++++++++++-
16
include/qemu/iova-tree.h | 18 +++++++
15
1 file changed, 36 insertions(+), 1 deletion(-)
17
util/iova-tree.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++
16
18
2 files changed, 154 insertions(+)
17
diff --git a/net/net.c b/net/net.c
19
20
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
18
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
19
--- a/net/net.c
22
--- a/include/qemu/iova-tree.h
20
+++ b/net/net.c
23
+++ b/include/qemu/iova-tree.h
21
@@ -XXX,XX +XXX,XX @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp)
24
@@ -XXX,XX +XXX,XX @@
22
return 0;
25
#define IOVA_OK (0)
26
#define IOVA_ERR_INVALID (-1) /* Invalid parameters */
27
#define IOVA_ERR_OVERLAP (-2) /* IOVA range overlapped */
28
+#define IOVA_ERR_NOMEM (-3) /* Cannot allocate */
29
30
typedef struct IOVATree IOVATree;
31
typedef struct DMAMap {
32
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova);
33
void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator);
34
35
/**
36
+ * iova_tree_alloc_map:
37
+ *
38
+ * @tree: the iova tree to allocate from
39
+ * @map: the new map (as translated addr & size) to allocate in the iova region
40
+ * @iova_begin: the minimum address of the allocation
41
+ * @iova_end: the maximum addressable direction of the allocation
42
+ *
43
+ * Allocates a new region of a given size, between iova_min and iova_max.
44
+ *
45
+ * Return: Same as iova_tree_insert, but cannot overlap and can return error if
46
+ * iova tree is out of free contiguous range. The caller gets the assigned iova
47
+ * in map->iova.
48
+ */
49
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
50
+ hwaddr iova_end);
51
+
52
+/**
53
* iova_tree_destroy:
54
*
55
* @tree: the iova tree to destroy
56
diff --git a/util/iova-tree.c b/util/iova-tree.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/util/iova-tree.c
59
+++ b/util/iova-tree.c
60
@@ -XXX,XX +XXX,XX @@ struct IOVATree {
61
GTree *tree;
62
};
63
64
+/* Args to pass to iova_tree_alloc foreach function. */
65
+struct IOVATreeAllocArgs {
66
+ /* Size of the desired allocation */
67
+ size_t new_size;
68
+
69
+ /* The minimum address allowed in the allocation */
70
+ hwaddr iova_begin;
71
+
72
+ /* Map at the left of the hole, can be NULL if "this" is first one */
73
+ const DMAMap *prev;
74
+
75
+ /* Map at the right of the hole, can be NULL if "prev" is the last one */
76
+ const DMAMap *this;
77
+
78
+ /* If found, we fill in the IOVA here */
79
+ hwaddr iova_result;
80
+
81
+ /* Whether have we found a valid IOVA */
82
+ bool iova_found;
83
+};
84
+
85
+/**
86
+ * Iterate args to the next hole
87
+ *
88
+ * @args: The alloc arguments
89
+ * @next: The next mapping in the tree. Can be NULL to signal the last one
90
+ */
91
+static void iova_tree_alloc_args_iterate(struct IOVATreeAllocArgs *args,
92
+ const DMAMap *next)
93
+{
94
+ args->prev = args->this;
95
+ args->this = next;
96
+}
97
+
98
static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
99
{
100
const DMAMap *m1 = a, *m2 = b;
101
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map)
102
return IOVA_OK;
23
}
103
}
24
104
25
+static void show_netdevs(void)
105
+/**
26
+{
106
+ * Try to find an unallocated IOVA range between prev and this elements.
27
+ int idx;
107
+ *
28
+ const char *available_netdevs[] = {
108
+ * @args: Arguments to allocation
29
+ "socket",
109
+ *
30
+ "hubport",
110
+ * Cases:
31
+ "tap",
111
+ *
32
+#ifdef CONFIG_SLIRP
112
+ * (1) !prev, !this: No entries allocated, always succeed
33
+ "user",
113
+ *
34
+#endif
114
+ * (2) !prev, this: We're iterating at the 1st element.
35
+#ifdef CONFIG_L2TPV3
115
+ *
36
+ "l2tpv3",
116
+ * (3) prev, !this: We're iterating at the last element.
37
+#endif
117
+ *
38
+#ifdef CONFIG_VDE
118
+ * (4) prev, this: this is the most common case, we'll try to find a hole
39
+ "vde",
119
+ * between "prev" and "this" mapping.
40
+#endif
120
+ *
41
+#ifdef CONFIG_NET_BRIDGE
121
+ * Note that this function assumes the last valid iova is HWADDR_MAX, but it
42
+ "bridge",
122
+ * searches linearly so it's easy to discard the result if it's not the case.
43
+#endif
123
+ */
44
+#ifdef CONFIG_NETMAP
124
+static void iova_tree_alloc_map_in_hole(struct IOVATreeAllocArgs *args)
45
+ "netmap",
125
+{
46
+#endif
126
+ const DMAMap *prev = args->prev, *this = args->this;
47
+#ifdef CONFIG_POSIX
127
+ uint64_t hole_start, hole_last;
48
+ "vhost-user",
128
+
49
+#endif
129
+ if (this && this->iova + this->size < args->iova_begin) {
130
+ return;
131
+ }
132
+
133
+ hole_start = MAX(prev ? prev->iova + prev->size + 1 : 0, args->iova_begin);
134
+ hole_last = this ? this->iova : HWADDR_MAX;
135
+
136
+ if (hole_last - hole_start > args->new_size) {
137
+ args->iova_result = hole_start;
138
+ args->iova_found = true;
139
+ }
140
+}
141
+
142
+/**
143
+ * Foreach dma node in the tree, compare if there is a hole with its previous
144
+ * node (or minimum iova address allowed) and the node.
145
+ *
146
+ * @key: Node iterating
147
+ * @value: Node iterating
148
+ * @pargs: Struct to communicate with the outside world
149
+ *
150
+ * Return: false to keep iterating, true if needs break.
151
+ */
152
+static gboolean iova_tree_alloc_traverse(gpointer key, gpointer value,
153
+ gpointer pargs)
154
+{
155
+ struct IOVATreeAllocArgs *args = pargs;
156
+ DMAMap *node = value;
157
+
158
+ assert(key == value);
159
+
160
+ iova_tree_alloc_args_iterate(args, node);
161
+ iova_tree_alloc_map_in_hole(args);
162
+ return args->iova_found;
163
+}
164
+
165
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
166
+ hwaddr iova_last)
167
+{
168
+ struct IOVATreeAllocArgs args = {
169
+ .new_size = map->size,
170
+ .iova_begin = iova_begin,
50
+ };
171
+ };
51
+
172
+
52
+ printf("Available netdev backend types:\n");
173
+ if (unlikely(iova_last < iova_begin)) {
53
+ for (idx = 0; idx < ARRAY_SIZE(available_netdevs); idx++) {
174
+ return IOVA_ERR_INVALID;
54
+ puts(available_netdevs[idx]);
175
+ }
55
+ }
176
+
56
+}
177
+ /*
57
178
+ * Find a valid hole for the mapping
58
int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp)
179
+ *
180
+ * Assuming low iova_begin, so no need to do a binary search to
181
+ * locate the first node.
182
+ *
183
+ * TODO: Replace all this with g_tree_node_first/next/last when available
184
+ * (from glib since 2.68). To do it with g_tree_foreach complicates the
185
+ * code a lot.
186
+ *
187
+ */
188
+ g_tree_foreach(tree->tree, iova_tree_alloc_traverse, &args);
189
+ if (!args.iova_found) {
190
+ /*
191
+ * Either tree is empty or the last hole is still not checked.
192
+ * g_tree_foreach does not compare (last, iova_last] range, so we check
193
+ * it here.
194
+ */
195
+ iova_tree_alloc_args_iterate(&args, NULL);
196
+ iova_tree_alloc_map_in_hole(&args);
197
+ }
198
+
199
+ if (!args.iova_found || args.iova_result + map->size > iova_last) {
200
+ return IOVA_ERR_NOMEM;
201
+ }
202
+
203
+ map->iova = args.iova_result;
204
+ return iova_tree_insert(tree, map);
205
+}
206
+
207
void iova_tree_destroy(IOVATree *tree)
59
{
208
{
60
@@ -XXX,XX +XXX,XX @@ int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp)
209
g_tree_destroy(tree->tree);
61
int ret = -1;
62
Visitor *v = opts_visitor_new(opts);
63
64
- {
65
+ if (is_netdev && is_help_option(qemu_opt_get(opts, "type"))) {
66
+ show_netdevs();
67
+ exit(0);
68
+ } else {
69
/* Parse convenience option format ip6-net=fec0::0[/64] */
70
const char *ip6_net = qemu_opt_get(opts, "ipv6-net");
71
72
--
210
--
73
2.7.4
211
2.7.4
74
212
75
213
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The function is only used within net.c, so there's no need that
3
This function does the reverse operation of iova_tree_find: To look for
4
this is a global function.
4
a mapping that match a translated address so we can do the reverse.
5
5
6
While we're at it, also remove the unused prototype compute_mcast_idx()
6
This have linear complexity instead of logarithmic, but it supports
7
(the function has been removed in commit d9caeb09b107e91122d10ba4a08a).
7
overlapping HVA. Future developments could reduce it.
8
8
9
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Signed-off-by: Thomas Huth <thuth@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
12
---
13
include/net/net.h | 2 --
13
include/qemu/iova-tree.h | 20 +++++++++++++++++++-
14
net/net.c | 2 +-
14
util/iova-tree.c | 34 ++++++++++++++++++++++++++++++++++
15
2 files changed, 1 insertion(+), 3 deletions(-)
15
2 files changed, 53 insertions(+), 1 deletion(-)
16
16
17
diff --git a/include/net/net.h b/include/net/net.h
17
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/net/net.h
19
--- a/include/qemu/iova-tree.h
20
+++ b/include/net/net.h
20
+++ b/include/qemu/iova-tree.h
21
@@ -XXX,XX +XXX,XX @@ extern const char *host_net_devices[];
21
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
22
extern const char *legacy_tftp_prefix;
22
* @tree: the iova tree to search from
23
extern const char *legacy_bootp_filename;
23
* @map: the mapping to search
24
24
*
25
-int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp);
25
- * Search for a mapping in the iova tree that overlaps with the
26
int net_client_parse(QemuOptsList *opts_list, const char *str);
26
+ * Search for a mapping in the iova tree that iova overlaps with the
27
int net_init_clients(Error **errp);
27
* mapping range specified. Only the first found mapping will be
28
void net_check_clients(void);
28
* returned.
29
@@ -XXX,XX +XXX,XX @@ void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd);
29
*
30
#define POLYNOMIAL_LE 0xedb88320
30
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
31
uint32_t net_crc32(const uint8_t *p, int len);
31
const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map);
32
uint32_t net_crc32_le(const uint8_t *p, int len);
32
33
-unsigned compute_mcast_idx(const uint8_t *ep);
33
/**
34
34
+ * iova_tree_find_iova:
35
#define vmstate_offset_macaddr(_state, _field) \
35
+ *
36
vmstate_offset_array(_state, _field.a, uint8_t, \
36
+ * @tree: the iova tree to search from
37
diff --git a/net/net.c b/net/net.c
37
+ * @map: the mapping to search
38
+ *
39
+ * Search for a mapping in the iova tree that translated_addr overlaps with the
40
+ * mapping range specified. Only the first found mapping will be
41
+ * returned.
42
+ *
43
+ * Return: DMAMap pointer if found, or NULL if not found. Note that
44
+ * the returned DMAMap pointer is maintained internally. User should
45
+ * only read the content but never modify or free the content. Also,
46
+ * user is responsible to make sure the pointer is valid (say, no
47
+ * concurrent deletion in progress).
48
+ */
49
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map);
50
+
51
+/**
52
* iova_tree_find_address:
53
*
54
* @tree: the iova tree to search from
55
diff --git a/util/iova-tree.c b/util/iova-tree.c
38
index XXXXXXX..XXXXXXX 100644
56
index XXXXXXX..XXXXXXX 100644
39
--- a/net/net.c
57
--- a/util/iova-tree.c
40
+++ b/net/net.c
58
+++ b/util/iova-tree.c
41
@@ -XXX,XX +XXX,XX @@ static void show_netdevs(void)
59
@@ -XXX,XX +XXX,XX @@ struct IOVATreeAllocArgs {
42
}
60
bool iova_found;
61
};
62
63
+typedef struct IOVATreeFindIOVAArgs {
64
+ const DMAMap *needle;
65
+ const DMAMap *result;
66
+} IOVATreeFindIOVAArgs;
67
+
68
/**
69
* Iterate args to the next hole
70
*
71
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map)
72
return g_tree_lookup(tree->tree, map);
43
}
73
}
44
74
45
-int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp)
75
+static gboolean iova_tree_find_address_iterator(gpointer key, gpointer value,
46
+static int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp)
76
+ gpointer data)
77
+{
78
+ const DMAMap *map = key;
79
+ IOVATreeFindIOVAArgs *args = data;
80
+ const DMAMap *needle;
81
+
82
+ g_assert(key == value);
83
+
84
+ needle = args->needle;
85
+ if (map->translated_addr + map->size < needle->translated_addr ||
86
+ needle->translated_addr + needle->size < map->translated_addr) {
87
+ return false;
88
+ }
89
+
90
+ args->result = map;
91
+ return true;
92
+}
93
+
94
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map)
95
+{
96
+ IOVATreeFindIOVAArgs args = {
97
+ .needle = map,
98
+ };
99
+
100
+ g_tree_foreach(tree->tree, iova_tree_find_address_iterator, &args);
101
+ return args.result;
102
+}
103
+
104
const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova)
47
{
105
{
48
void *object = NULL;
106
const DMAMap map = { .iova = iova, .size = 0 };
49
Error *err = NULL;
50
--
107
--
51
2.7.4
108
2.7.4
52
109
53
110
diff view generated by jsdifflib
1
From: Jay Zhou <jianjay.zhou@huawei.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
If netdev_add tap,id=net0,...,vhost=on failed in net_init_tap_one(),
3
This tree is able to look for a translated address from an IOVA address.
4
the followed up device_add virtio-net-pci,netdev=net0 will fail
5
too, prints:
6
4
7
TUNSETOFFLOAD ioctl() failed: Bad file descriptor TUNSETOFFLOAD
5
At first glance it is similar to util/iova-tree. However, SVQ working on
8
ioctl() failed: Bad file descriptor
6
devices with limited IOVA space need more capabilities, like allocating
7
IOVA chunks or performing reverse translations (qemu addresses to iova).
9
8
10
The reason is that the fd of tap is closed when error occured after
9
The allocation capability, as "assign a free IOVA address to this chunk
11
calling net_init_tap_one().
10
of memory in qemu's address space" allows shadow virtqueue to create a
11
new address space that is not restricted by guest's addressable one, so
12
we can allocate shadow vqs vrings outside of it.
12
13
13
The fd should be closed when calling net_init_tap_one failed:
14
It duplicates the tree so it can search efficiently in both directions,
14
- if tap_set_sndbuf() failed
15
and it will signal overlap if iova or the translated address is present
15
- if tap_set_sndbuf() succeeded but vhost failed to open or
16
in any tree.
16
initialize with vhostforce flag on
17
- with wrong vhost command line parameter
18
The fd should not be closed just because vhost failed to open or
19
initialize but without vhostforce flag. So the followed up
20
device_add can fall back to userspace virtio successfully.
21
17
22
Suggested-by: Michael S. Tsirkin <mst@redhat.com>
18
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
23
Suggested-by: Igor Mammedov <imammedo@redhat.com>
19
Acked-by: Michael S. Tsirkin <mst@redhat.com>
24
Suggested-by: Jason Wang <jasowang@redhat.com>
25
Signed-off-by: Jay Zhou <jianjay.zhou@huawei.com>
26
Signed-off-by: Jason Wang <jasowang@redhat.com>
20
Signed-off-by: Jason Wang <jasowang@redhat.com>
27
---
21
---
28
include/net/vhost_net.h | 3 +++
22
hw/virtio/meson.build | 2 +-
29
net/tap.c | 22 +++++++++++++++++-----
23
hw/virtio/vhost-iova-tree.c | 110 ++++++++++++++++++++++++++++++++++++++++++++
30
2 files changed, 20 insertions(+), 5 deletions(-)
24
hw/virtio/vhost-iova-tree.h | 27 +++++++++++
25
3 files changed, 138 insertions(+), 1 deletion(-)
26
create mode 100644 hw/virtio/vhost-iova-tree.c
27
create mode 100644 hw/virtio/vhost-iova-tree.h
31
28
32
diff --git a/include/net/vhost_net.h b/include/net/vhost_net.h
29
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
33
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
34
--- a/include/net/vhost_net.h
31
--- a/hw/virtio/meson.build
35
+++ b/include/net/vhost_net.h
32
+++ b/hw/virtio/meson.build
33
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
34
35
virtio_ss = ss.source_set()
36
virtio_ss.add(files('virtio.c'))
37
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
38
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c'))
39
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
40
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
41
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
42
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
43
new file mode 100644
44
index XXXXXXX..XXXXXXX
45
--- /dev/null
46
+++ b/hw/virtio/vhost-iova-tree.c
36
@@ -XXX,XX +XXX,XX @@
47
@@ -XXX,XX +XXX,XX @@
37
#include "net/net.h"
48
+/*
38
#include "hw/virtio/vhost-backend.h"
49
+ * vhost software live migration iova tree
39
50
+ *
40
+#define VHOST_NET_INIT_FAILED \
51
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
41
+ "vhost-net requested but could not be initialized"
52
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
53
+ *
54
+ * SPDX-License-Identifier: GPL-2.0-or-later
55
+ */
42
+
56
+
43
struct vhost_net;
57
+#include "qemu/osdep.h"
44
typedef struct vhost_net VHostNetState;
58
+#include "qemu/iova-tree.h"
45
59
+#include "vhost-iova-tree.h"
46
diff --git a/net/tap.c b/net/tap.c
60
+
47
index XXXXXXX..XXXXXXX 100644
61
+#define iova_min_addr qemu_real_host_page_size
48
--- a/net/tap.c
62
+
49
+++ b/net/tap.c
63
+/**
50
@@ -XXX,XX +XXX,XX @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
64
+ * VhostIOVATree, able to:
51
if (vhostfdname) {
65
+ * - Translate iova address
52
vhostfd = monitor_fd_param(cur_mon, vhostfdname, &err);
66
+ * - Reverse translate iova address (from translated to iova)
53
if (vhostfd == -1) {
67
+ * - Allocate IOVA regions for translated range (linear operation)
54
- error_propagate(errp, err);
68
+ */
55
+ if (tap->has_vhostforce && tap->vhostforce) {
69
+struct VhostIOVATree {
56
+ error_propagate(errp, err);
70
+ /* First addressable iova address in the device */
57
+ } else {
71
+ uint64_t iova_first;
58
+ warn_report_err(err);
72
+
59
+ }
73
+ /* Last addressable iova address in the device */
60
return;
74
+ uint64_t iova_last;
61
}
75
+
62
} else {
76
+ /* IOVA address to qemu memory maps. */
63
vhostfd = open("/dev/vhost-net", O_RDWR);
77
+ IOVATree *iova_taddr_map;
64
if (vhostfd < 0) {
78
+};
65
- error_setg_errno(errp, errno,
79
+
66
- "tap: open vhost char device failed");
80
+/**
67
+ if (tap->has_vhostforce && tap->vhostforce) {
81
+ * Create a new IOVA tree
68
+ error_setg_errno(errp, errno,
82
+ *
69
+ "tap: open vhost char device failed");
83
+ * Returns the new IOVA tree
70
+ } else {
84
+ */
71
+ warn_report("tap: open vhost char device failed: %s",
85
+VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
72
+ strerror(errno));
86
+{
73
+ }
87
+ VhostIOVATree *tree = g_new(VhostIOVATree, 1);
74
return;
88
+
75
}
89
+ /* Some devices do not like 0 addresses */
76
fcntl(vhostfd, F_SETFL, O_NONBLOCK);
90
+ tree->iova_first = MAX(iova_first, iova_min_addr);
77
@@ -XXX,XX +XXX,XX @@ static void net_init_tap_one(const NetdevTapOptions *tap, NetClientState *peer,
91
+ tree->iova_last = iova_last;
78
92
+
79
s->vhost_net = vhost_net_init(&options);
93
+ tree->iova_taddr_map = iova_tree_new();
80
if (!s->vhost_net) {
94
+ return tree;
81
- error_setg(errp,
95
+}
82
- "vhost-net requested but could not be initialized");
96
+
83
+ if (tap->has_vhostforce && tap->vhostforce) {
97
+/**
84
+ error_setg(errp, VHOST_NET_INIT_FAILED);
98
+ * Delete an iova tree
85
+ } else {
99
+ */
86
+ warn_report(VHOST_NET_INIT_FAILED);
100
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
87
+ }
101
+{
88
return;
102
+ iova_tree_destroy(iova_tree->iova_taddr_map);
89
}
103
+ g_free(iova_tree);
90
} else if (vhostfdname) {
104
+}
105
+
106
+/**
107
+ * Find the IOVA address stored from a memory address
108
+ *
109
+ * @tree: The iova tree
110
+ * @map: The map with the memory address
111
+ *
112
+ * Return the stored mapping, or NULL if not found.
113
+ */
114
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
115
+ const DMAMap *map)
116
+{
117
+ return iova_tree_find_iova(tree->iova_taddr_map, map);
118
+}
119
+
120
+/**
121
+ * Allocate a new mapping
122
+ *
123
+ * @tree: The iova tree
124
+ * @map: The iova map
125
+ *
126
+ * Returns:
127
+ * - IOVA_OK if the map fits in the container
128
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
129
+ * - IOVA_ERR_NOMEM if tree cannot allocate more space.
130
+ *
131
+ * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
132
+ */
133
+int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
134
+{
135
+ /* Some vhost devices do not like addr 0. Skip first page */
136
+ hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size;
137
+
138
+ if (map->translated_addr + map->size < map->translated_addr ||
139
+ map->perm == IOMMU_NONE) {
140
+ return IOVA_ERR_INVALID;
141
+ }
142
+
143
+ /* Allocate a node in IOVA address */
144
+ return iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
145
+ tree->iova_last);
146
+}
147
+
148
+/**
149
+ * Remove existing mappings from iova tree
150
+ *
151
+ * @iova_tree: The vhost iova tree
152
+ * @map: The map to remove
153
+ */
154
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map)
155
+{
156
+ iova_tree_remove(iova_tree->iova_taddr_map, map);
157
+}
158
diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
159
new file mode 100644
160
index XXXXXXX..XXXXXXX
161
--- /dev/null
162
+++ b/hw/virtio/vhost-iova-tree.h
163
@@ -XXX,XX +XXX,XX @@
164
+/*
165
+ * vhost software live migration iova tree
166
+ *
167
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
168
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
169
+ *
170
+ * SPDX-License-Identifier: GPL-2.0-or-later
171
+ */
172
+
173
+#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
174
+#define HW_VIRTIO_VHOST_IOVA_TREE_H
175
+
176
+#include "qemu/iova-tree.h"
177
+#include "exec/memory.h"
178
+
179
+typedef struct VhostIOVATree VhostIOVATree;
180
+
181
+VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
182
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
183
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
184
+
185
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
186
+ const DMAMap *map);
187
+int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
188
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
189
+
190
+#endif
91
--
191
--
92
2.7.4
192
2.7.4
93
193
94
194
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
"-net dump" has been marked as deprecated since QEMU v2.10, since it
3
Use translations added in VhostIOVATree in SVQ.
4
only works with the deprecated 'vlan' parameter (or hubs). Network
4
5
dumping should be done with "-object filter-dump" nowadays instead.
5
Only introduce usage here, not allocation and deallocation. As with
6
Since nobody complained so far about the deprecation message, let's
6
previous patches, we use the dead code paths of shadow_vqs_enabled to
7
finally get rid of "-net dump" now.
7
avoid commiting too many changes at once. These are impossible to take
8
8
at the moment.
9
Signed-off-by: Thomas Huth <thuth@redhat.com>
9
10
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
11
Acked-by: Michael S. Tsirkin <mst@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
13
---
12
net/dump.c | 102 ++------------------------------------------------------
14
hw/virtio/vhost-shadow-virtqueue.c | 86 +++++++++++++++++++++++---
13
net/net.c | 9 +----
15
hw/virtio/vhost-shadow-virtqueue.h | 6 +-
14
qapi/net.json | 29 ++++------------
16
hw/virtio/vhost-vdpa.c | 122 +++++++++++++++++++++++++++++++------
15
qemu-doc.texi | 6 ----
17
include/hw/virtio/vhost-vdpa.h | 3 +
16
qemu-options.hx | 8 -----
18
4 files changed, 187 insertions(+), 30 deletions(-)
17
5 files changed, 9 insertions(+), 145 deletions(-)
19
18
20
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
19
diff --git a/net/dump.c b/net/dump.c
20
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
21
--- a/net/dump.c
22
--- a/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/net/dump.c
23
+++ b/hw/virtio/vhost-shadow-virtqueue.c
23
@@ -XXX,XX +XXX,XX @@ static int net_dump_state_init(DumpState *s, const char *filename,
24
@@ -XXX,XX +XXX,XX @@ static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
24
25
return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
25
fd = open(filename, O_CREAT | O_TRUNC | O_WRONLY | O_BINARY, 0644);
26
}
26
if (fd < 0) {
27
27
- error_setg_errno(errp, errno, "-net dump: can't open %s", filename);
28
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
28
+ error_setg_errno(errp, errno, "net dump: can't open %s", filename);
29
+/**
29
return -1;
30
+ * Translate addresses between the qemu's virtual address and the SVQ IOVA
31
+ *
32
+ * @svq: Shadow VirtQueue
33
+ * @vaddr: Translated IOVA addresses
34
+ * @iovec: Source qemu's VA addresses
35
+ * @num: Length of iovec and minimum length of vaddr
36
+ */
37
+static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
38
+ hwaddr *addrs, const struct iovec *iovec,
39
+ size_t num)
40
+{
41
+ if (num == 0) {
42
+ return true;
43
+ }
44
+
45
+ for (size_t i = 0; i < num; ++i) {
46
+ DMAMap needle = {
47
+ .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
48
+ .size = iovec[i].iov_len,
49
+ };
50
+ Int128 needle_last, map_last;
51
+ size_t off;
52
+
53
+ const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
54
+ /*
55
+ * Map cannot be NULL since iova map contains all guest space and
56
+ * qemu already has a physical address mapped
57
+ */
58
+ if (unlikely(!map)) {
59
+ qemu_log_mask(LOG_GUEST_ERROR,
60
+ "Invalid address 0x%"HWADDR_PRIx" given by guest",
61
+ needle.translated_addr);
62
+ return false;
63
+ }
64
+
65
+ off = needle.translated_addr - map->translated_addr;
66
+ addrs[i] = map->iova + off;
67
+
68
+ needle_last = int128_add(int128_make64(needle.translated_addr),
69
+ int128_make64(iovec[i].iov_len));
70
+ map_last = int128_make64(map->translated_addr + map->size);
71
+ if (unlikely(int128_gt(needle_last, map_last))) {
72
+ qemu_log_mask(LOG_GUEST_ERROR,
73
+ "Guest buffer expands over iova range");
74
+ return false;
75
+ }
76
+ }
77
+
78
+ return true;
79
+}
80
+
81
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
82
const struct iovec *iovec, size_t num,
83
bool more_descs, bool write)
84
{
85
@@ -XXX,XX +XXX,XX @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
86
} else {
87
descs[i].flags = flags;
88
}
89
- descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
90
+ descs[i].addr = cpu_to_le64(sg[n]);
91
descs[i].len = cpu_to_le32(iovec[n].iov_len);
92
93
last = i;
94
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
95
{
96
unsigned avail_idx;
97
vring_avail_t *avail = svq->vring.avail;
98
+ bool ok;
99
+ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
100
101
*head = svq->free_head;
102
103
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
104
return false;
30
}
105
}
31
106
32
@@ -XXX,XX +XXX,XX @@ static int net_dump_state_init(DumpState *s, const char *filename,
107
- vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
33
hdr.linktype = 1;
108
- false);
34
109
- vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
35
if (write(fd, &hdr, sizeof(hdr)) < sizeof(hdr)) {
110
+ ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
36
- error_setg_errno(errp, errno, "-net dump write error");
111
+ if (unlikely(!ok)) {
37
+ error_setg_errno(errp, errno, "net dump write error");
112
+ return false;
38
close(fd);
113
+ }
39
return -1;
114
+ vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
115
+ elem->in_num > 0, false);
116
+
117
+
118
+ ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
119
+ if (unlikely(!ok)) {
120
+ return false;
121
+ }
122
+
123
+ vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
124
125
/*
126
* Put the entry in the available array (but don't update avail->idx until
127
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
128
void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
129
struct vhost_vring_addr *addr)
130
{
131
- addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
132
- addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
133
- addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
134
+ addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc;
135
+ addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail;
136
+ addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used;
137
}
138
139
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
140
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
141
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
142
* shadow methods and file descriptors.
143
*
144
+ * @iova_tree: Tree to perform descriptors translations
145
+ *
146
* Returns the new virtqueue or NULL.
147
*
148
* In case of error, reason is reported through error_report.
149
*/
150
-VhostShadowVirtqueue *vhost_svq_new(void)
151
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
152
{
153
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
154
int r;
155
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
156
157
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
158
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
159
+ svq->iova_tree = iova_tree;
160
return g_steal_pointer(&svq);
161
162
err_init_hdev_call:
163
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
164
index XXXXXXX..XXXXXXX 100644
165
--- a/hw/virtio/vhost-shadow-virtqueue.h
166
+++ b/hw/virtio/vhost-shadow-virtqueue.h
167
@@ -XXX,XX +XXX,XX @@
168
#include "qemu/event_notifier.h"
169
#include "hw/virtio/virtio.h"
170
#include "standard-headers/linux/vhost_types.h"
171
+#include "hw/virtio/vhost-iova-tree.h"
172
173
/* Shadow virtqueue to relay notifications */
174
typedef struct VhostShadowVirtqueue {
175
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
176
/* Virtio device */
177
VirtIODevice *vdev;
178
179
+ /* IOVA mapping */
180
+ VhostIOVATree *iova_tree;
181
+
182
/* Map for use the guest's descriptors */
183
VirtQueueElement **ring_id_maps;
184
185
@@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
186
VirtQueue *vq);
187
void vhost_svq_stop(VhostShadowVirtqueue *svq);
188
189
-VhostShadowVirtqueue *vhost_svq_new(void);
190
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
191
192
void vhost_svq_free(gpointer vq);
193
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
194
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
195
index XXXXXXX..XXXXXXX 100644
196
--- a/hw/virtio/vhost-vdpa.c
197
+++ b/hw/virtio/vhost-vdpa.c
198
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
199
vaddr, section->readonly);
200
201
llsize = int128_sub(llend, int128_make64(iova));
202
+ if (v->shadow_vqs_enabled) {
203
+ DMAMap mem_region = {
204
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
205
+ .size = int128_get64(llsize) - 1,
206
+ .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
207
+ };
208
+
209
+ int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
210
+ if (unlikely(r != IOVA_OK)) {
211
+ error_report("Can't allocate a mapping (%d)", r);
212
+ goto fail;
213
+ }
214
+
215
+ iova = mem_region.iova;
216
+ }
217
218
vhost_vdpa_iotlb_batch_begin_once(v);
219
ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
220
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
221
222
llsize = int128_sub(llend, int128_make64(iova));
223
224
+ if (v->shadow_vqs_enabled) {
225
+ const DMAMap *result;
226
+ const void *vaddr = memory_region_get_ram_ptr(section->mr) +
227
+ section->offset_within_region +
228
+ (iova - section->offset_within_address_space);
229
+ DMAMap mem_region = {
230
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
231
+ .size = int128_get64(llsize) - 1,
232
+ };
233
+
234
+ result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
235
+ iova = result->iova;
236
+ vhost_iova_tree_remove(v->iova_tree, &mem_region);
237
+ }
238
vhost_vdpa_iotlb_batch_begin_once(v);
239
ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
240
if (ret) {
241
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
242
243
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
244
for (unsigned n = 0; n < hdev->nvqs; ++n) {
245
- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
246
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
247
248
if (unlikely(!svq)) {
249
error_setg(errp, "Cannot create svq %u", n);
250
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
251
/**
252
* Unmap a SVQ area in the device
253
*/
254
-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
255
- hwaddr size)
256
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
257
+ const DMAMap *needle)
258
{
259
+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
260
+ hwaddr size;
261
int r;
262
263
- size = ROUND_UP(size, qemu_real_host_page_size);
264
- r = vhost_vdpa_dma_unmap(v, iova, size);
265
+ if (unlikely(!result)) {
266
+ error_report("Unable to find SVQ address to unmap");
267
+ return false;
268
+ }
269
+
270
+ size = ROUND_UP(result->size, qemu_real_host_page_size);
271
+ r = vhost_vdpa_dma_unmap(v, result->iova, size);
272
return r == 0;
273
}
274
275
static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
276
const VhostShadowVirtqueue *svq)
277
{
278
+ DMAMap needle = {};
279
struct vhost_vdpa *v = dev->opaque;
280
struct vhost_vring_addr svq_addr;
281
- size_t device_size = vhost_svq_device_area_size(svq);
282
- size_t driver_size = vhost_svq_driver_area_size(svq);
283
bool ok;
284
285
vhost_svq_get_vring_addr(svq, &svq_addr);
286
287
- ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
288
+ needle.translated_addr = svq_addr.desc_user_addr;
289
+ ok = vhost_vdpa_svq_unmap_ring(v, &needle);
290
if (unlikely(!ok)) {
291
return false;
40
}
292
}
41
@@ -XXX,XX +XXX,XX @@ static int net_dump_state_init(DumpState *s, const char *filename,
293
42
return 0;
294
- return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
295
+ needle.translated_addr = svq_addr.used_user_addr;
296
+ return vhost_vdpa_svq_unmap_ring(v, &needle);
297
+}
298
+
299
+/**
300
+ * Map the SVQ area in the device
301
+ *
302
+ * @v: Vhost-vdpa device
303
+ * @needle: The area to search iova
304
+ * @errorp: Error pointer
305
+ */
306
+static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
307
+ Error **errp)
308
+{
309
+ int r;
310
+
311
+ r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
312
+ if (unlikely(r != IOVA_OK)) {
313
+ error_setg(errp, "Cannot allocate iova (%d)", r);
314
+ return false;
315
+ }
316
+
317
+ r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1,
318
+ (void *)(uintptr_t)needle->translated_addr,
319
+ needle->perm == IOMMU_RO);
320
+ if (unlikely(r != 0)) {
321
+ error_setg_errno(errp, -r, "Cannot map region to device");
322
+ vhost_iova_tree_remove(v->iova_tree, needle);
323
+ }
324
+
325
+ return r == 0;
43
}
326
}
44
327
45
-/* Dumping via VLAN netclient */
328
/**
46
-
329
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
47
-struct DumpNetClient {
330
struct vhost_vring_addr *addr,
48
- NetClientState nc;
331
Error **errp)
49
- DumpState ds;
332
{
50
-};
333
+ DMAMap device_region, driver_region;
51
-typedef struct DumpNetClient DumpNetClient;
334
+ struct vhost_vring_addr svq_addr;
52
-
335
struct vhost_vdpa *v = dev->opaque;
53
-static ssize_t dumpclient_receive(NetClientState *nc, const uint8_t *buf,
336
size_t device_size = vhost_svq_device_area_size(svq);
54
- size_t size)
337
size_t driver_size = vhost_svq_driver_area_size(svq);
55
-{
338
- int r;
56
- DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
339
+ size_t avail_offset;
57
- struct iovec iov = {
340
+ bool ok;
58
- .iov_base = (void *)buf,
341
59
- .iov_len = size
342
ERRP_GUARD();
60
- };
343
- vhost_svq_get_vring_addr(svq, addr);
61
-
344
+ vhost_svq_get_vring_addr(svq, &svq_addr);
62
- return dump_receive_iov(&dc->ds, &iov, 1);
345
63
-}
346
- r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
64
-
347
- (void *)(uintptr_t)addr->desc_user_addr, true);
65
-static ssize_t dumpclient_receive_iov(NetClientState *nc,
348
- if (unlikely(r != 0)) {
66
- const struct iovec *iov, int cnt)
349
- error_setg_errno(errp, -r, "Cannot create vq driver region: ");
67
-{
350
+ driver_region = (DMAMap) {
68
- DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
351
+ .translated_addr = svq_addr.desc_user_addr,
69
-
352
+ .size = driver_size - 1,
70
- return dump_receive_iov(&dc->ds, iov, cnt);
353
+ .perm = IOMMU_RO,
71
-}
354
+ };
72
-
355
+ ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
73
-static void dumpclient_cleanup(NetClientState *nc)
356
+ if (unlikely(!ok)) {
74
-{
357
+ error_prepend(errp, "Cannot create vq driver region: ");
75
- DumpNetClient *dc = DO_UPCAST(DumpNetClient, nc, nc);
358
return false;
76
-
359
}
77
- dump_cleanup(&dc->ds);
360
+ addr->desc_user_addr = driver_region.iova;
78
-}
361
+ avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
79
-
362
+ addr->avail_user_addr = driver_region.iova + avail_offset;
80
-static NetClientInfo net_dump_info = {
363
81
- .type = NET_CLIENT_DRIVER_DUMP,
364
- r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
82
- .size = sizeof(DumpNetClient),
365
- (void *)(intptr_t)addr->used_user_addr, false);
83
- .receive = dumpclient_receive,
366
- if (unlikely(r != 0)) {
84
- .receive_iov = dumpclient_receive_iov,
367
- error_setg_errno(errp, -r, "Cannot create vq device region: ");
85
- .cleanup = dumpclient_cleanup,
368
+ device_region = (DMAMap) {
86
-};
369
+ .translated_addr = svq_addr.used_user_addr,
87
-
370
+ .size = device_size - 1,
88
-int net_init_dump(const Netdev *netdev, const char *name,
371
+ .perm = IOMMU_RW,
89
- NetClientState *peer, Error **errp)
372
+ };
90
-{
373
+ ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
91
- int len, rc;
374
+ if (unlikely(!ok)) {
92
- const char *file;
375
+ error_prepend(errp, "Cannot create vq device region: ");
93
- char def_file[128];
376
+ vhost_vdpa_svq_unmap_ring(v, &driver_region);
94
- const NetdevDumpOptions *dump;
377
}
95
- NetClientState *nc;
378
+ addr->used_user_addr = device_region.iova;
96
- DumpNetClient *dnc;
379
97
-
380
- return r == 0;
98
- assert(netdev->type == NET_CLIENT_DRIVER_DUMP);
381
+ return ok;
99
- dump = &netdev->u.dump;
382
}
100
-
383
101
- assert(peer);
384
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
102
-
385
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
103
- error_report("'-net dump' is deprecated. "
104
- "Please use '-object filter-dump' instead.");
105
-
106
- if (dump->has_file) {
107
- file = dump->file;
108
- } else {
109
- int id;
110
- int ret;
111
-
112
- ret = net_hub_id_for_client(peer, &id);
113
- assert(ret == 0); /* peer must be on a hub */
114
-
115
- snprintf(def_file, sizeof(def_file), "qemu-vlan%d.pcap", id);
116
- file = def_file;
117
- }
118
-
119
- if (dump->has_len) {
120
- if (dump->len > INT_MAX) {
121
- error_setg(errp, "invalid length: %"PRIu64, dump->len);
122
- return -1;
123
- }
124
- len = dump->len;
125
- } else {
126
- len = 65536;
127
- }
128
-
129
- nc = qemu_new_net_client(&net_dump_info, peer, "dump", name);
130
- snprintf(nc->info_str, sizeof(nc->info_str),
131
- "dump to %s (len=%d)", file, len);
132
-
133
- dnc = DO_UPCAST(DumpNetClient, nc, nc);
134
- rc = net_dump_state_init(&dnc->ds, file, len, errp);
135
- if (rc) {
136
- qemu_del_net_client(nc);
137
- }
138
- return rc;
139
-}
140
-
141
-/* Dumping via filter */
142
-
143
#define TYPE_FILTER_DUMP "filter-dump"
144
145
#define FILTER_DUMP(obj) \
146
diff --git a/net/net.c b/net/net.c
147
index XXXXXXX..XXXXXXX 100644
386
index XXXXXXX..XXXXXXX 100644
148
--- a/net/net.c
387
--- a/include/hw/virtio/vhost-vdpa.h
149
+++ b/net/net.c
388
+++ b/include/hw/virtio/vhost-vdpa.h
150
@@ -XXX,XX +XXX,XX @@ static QTAILQ_HEAD(, NetClientState) net_clients;
151
const char *host_net_devices[] = {
152
"tap",
153
"socket",
154
- "dump",
155
#ifdef CONFIG_NET_BRIDGE
156
"bridge",
157
#endif
158
@@ -XXX,XX +XXX,XX @@ static int (* const net_client_init_fun[NET_CLIENT_DRIVER__MAX])(
159
#ifdef CONFIG_NETMAP
160
[NET_CLIENT_DRIVER_NETMAP] = net_init_netmap,
161
#endif
162
- [NET_CLIENT_DRIVER_DUMP] = net_init_dump,
163
#ifdef CONFIG_NET_BRIDGE
164
[NET_CLIENT_DRIVER_BRIDGE] = net_init_bridge,
165
#endif
166
@@ -XXX,XX +XXX,XX @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp)
167
netdev = object;
168
name = netdev->id;
169
170
- if (netdev->type == NET_CLIENT_DRIVER_DUMP ||
171
- netdev->type == NET_CLIENT_DRIVER_NIC ||
172
+ if (netdev->type == NET_CLIENT_DRIVER_NIC ||
173
!net_client_init_fun[netdev->type]) {
174
error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "type",
175
"a netdev backend type");
176
@@ -XXX,XX +XXX,XX @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp)
177
legacy.type = NET_CLIENT_DRIVER_VDE;
178
legacy.u.vde = opts->u.vde;
179
break;
180
- case NET_LEGACY_OPTIONS_TYPE_DUMP:
181
- legacy.type = NET_CLIENT_DRIVER_DUMP;
182
- legacy.u.dump = opts->u.dump;
183
- break;
184
case NET_LEGACY_OPTIONS_TYPE_BRIDGE:
185
legacy.type = NET_CLIENT_DRIVER_BRIDGE;
186
legacy.u.bridge = opts->u.bridge;
187
diff --git a/qapi/net.json b/qapi/net.json
188
index XXXXXXX..XXXXXXX 100644
189
--- a/qapi/net.json
190
+++ b/qapi/net.json
191
@@ -XXX,XX +XXX,XX @@
389
@@ -XXX,XX +XXX,XX @@
192
#
390
193
# Add a network backend.
391
#include <gmodule.h>
194
#
392
195
-# @type: the type of network backend. Current valid values are 'user', 'tap',
393
+#include "hw/virtio/vhost-iova-tree.h"
196
-# 'vde', 'socket', 'dump' and 'bridge'
394
#include "hw/virtio/virtio.h"
197
+# @type: the type of network backend. Possible values are listed in
395
#include "standard-headers/linux/vhost_types.h"
198
+# NetClientDriver (excluding 'none' and 'nic')
396
199
#
397
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
200
# @id: the name of the new network backend
398
MemoryListener listener;
201
#
399
struct vhost_vdpa_iova_range iova_range;
202
@@ -XXX,XX +XXX,XX @@
400
bool shadow_vqs_enabled;
203
'*mode': 'uint16' } }
401
+ /* IOVA mapping used by the Shadow Virtqueue */
204
402
+ VhostIOVATree *iova_tree;
205
##
403
GPtrArray *shadow_vqs;
206
-# @NetdevDumpOptions:
404
struct vhost_dev *dev;
207
-#
405
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
208
-# Dump VLAN network traffic to a file.
209
-#
210
-# @len: per-packet size limit (64k default). Understands [TGMKkb]
211
-# suffixes.
212
-#
213
-# @file: dump file path (default is qemu-vlan0.pcap)
214
-#
215
-# Since: 1.2
216
-##
217
-{ 'struct': 'NetdevDumpOptions',
218
- 'data': {
219
- '*len': 'size',
220
- '*file': 'str' } }
221
-
222
-##
223
# @NetdevBridgeOptions:
224
#
225
# Connect a host TAP network interface to a host bridge device.
226
@@ -XXX,XX +XXX,XX @@
227
# Available netdev drivers.
228
#
229
# Since: 2.7
230
+#
231
+# 'dump' - removed with 2.12
232
##
233
{ 'enum': 'NetClientDriver',
234
- 'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde', 'dump',
235
+ 'data': [ 'none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
236
'bridge', 'hubport', 'netmap', 'vhost-user' ] }
237
238
##
239
@@ -XXX,XX +XXX,XX @@
240
'l2tpv3': 'NetdevL2TPv3Options',
241
'socket': 'NetdevSocketOptions',
242
'vde': 'NetdevVdeOptions',
243
- 'dump': 'NetdevDumpOptions',
244
'bridge': 'NetdevBridgeOptions',
245
'hubport': 'NetdevHubPortOptions',
246
'netmap': 'NetdevNetmapOptions',
247
@@ -XXX,XX +XXX,XX @@
248
##
249
{ 'enum': 'NetLegacyOptionsType',
250
'data': ['none', 'nic', 'user', 'tap', 'l2tpv3', 'socket', 'vde',
251
- 'dump', 'bridge', 'netmap', 'vhost-user'] }
252
+ 'bridge', 'netmap', 'vhost-user'] }
253
254
##
255
# @NetLegacyOptions:
256
@@ -XXX,XX +XXX,XX @@
257
'l2tpv3': 'NetdevL2TPv3Options',
258
'socket': 'NetdevSocketOptions',
259
'vde': 'NetdevVdeOptions',
260
- 'dump': 'NetdevDumpOptions',
261
'bridge': 'NetdevBridgeOptions',
262
'netmap': 'NetdevNetmapOptions',
263
'vhost-user': 'NetdevVhostUserOptions' } }
264
diff --git a/qemu-doc.texi b/qemu-doc.texi
265
index XXXXXXX..XXXXXXX 100644
266
--- a/qemu-doc.texi
267
+++ b/qemu-doc.texi
268
@@ -XXX,XX +XXX,XX @@ that can be specified with the ``-device'' parameter.
269
The drive addr argument is replaced by the the addr argument
270
that can be specified with the ``-device'' parameter.
271
272
-@subsection -net dump (since 2.10.0)
273
-
274
-The ``--net dump'' argument is now replaced with the
275
-``-object filter-dump'' argument which works in combination
276
-with the modern ``-netdev`` backends instead.
277
-
278
@subsection -usbdevice (since 2.10.0)
279
280
The ``-usbdevice DEV'' argument is now a synonym for setting
281
diff --git a/qemu-options.hx b/qemu-options.hx
282
index XXXXXXX..XXXXXXX 100644
283
--- a/qemu-options.hx
284
+++ b/qemu-options.hx
285
@@ -XXX,XX +XXX,XX @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
286
" configure or create an on-board (or machine default) NIC and\n"
287
" connect it either to VLAN 'n' or the netdev 'nd' (for pluggable\n"
288
" NICs please use '-device devtype,netdev=nd' instead)\n"
289
- "-net dump[,vlan=n][,file=f][,len=n]\n"
290
- " dump traffic on vlan 'n' to file 'f' (max n bytes per packet)\n"
291
"-net none use it alone to have zero network devices. If no -net option\n"
292
" is provided, the default is '-net nic -net user'\n"
293
"-net ["
294
@@ -XXX,XX +XXX,XX @@ qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,sha
295
-device virtio-net-pci,netdev=net0
296
@end example
297
298
-@item -net dump[,vlan=@var{n}][,file=@var{file}][,len=@var{len}]
299
-Dump network traffic on VLAN @var{n} to file @var{file} (@file{qemu-vlan0.pcap} by default).
300
-At most @var{len} bytes (64k by default) per packet are stored. The file format is
301
-libpcap, so it can be analyzed with tools such as tcpdump or Wireshark.
302
-Note: For devices created with '-netdev', use '-object filter-dump,...' instead.
303
-
304
@item -net none
305
Indicate that no network devices should be configured. It is used to
306
override the default configuration (@option{-net nic -net user}) which
307
--
406
--
308
2.7.4
407
2.7.4
309
408
310
409
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Headers like "hw/loader.h" and "qemu/sockets.h" are not needed in
3
This is needed to achieve migration, so the destination can restore its
4
the hw/net/*.c files. And Some other headers are included via other
4
index.
5
headers already, so we can drop them, too.
6
5
7
Signed-off-by: Thomas Huth <thuth@redhat.com>
6
Setting base as last used idx, so destination will see as available all
7
the entries that the device did not use, including the in-flight
8
processing ones.
9
10
This is ok for networking, but other kinds of devices might have
11
problems with these retransmissions.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
16
---
10
hw/net/e1000.c | 1 -
17
hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
11
hw/net/lance.c | 3 ---
18
1 file changed, 17 insertions(+)
12
hw/net/ne2000.c | 2 --
13
hw/net/pcnet-pci.c | 1 -
14
hw/net/pcnet.c | 1 -
15
hw/net/rtl8139.c | 2 --
16
hw/net/xgmac.c | 1 -
17
7 files changed, 11 deletions(-)
18
19
19
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
20
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
20
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/net/e1000.c
22
--- a/hw/virtio/vhost-vdpa.c
22
+++ b/hw/net/e1000.c
23
+++ b/hw/virtio/vhost-vdpa.c
23
@@ -XXX,XX +XXX,XX @@
24
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
24
#include "hw/pci/pci.h"
25
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
25
#include "net/net.h"
26
struct vhost_vring_state *ring)
26
#include "net/checksum.h"
27
{
27
-#include "hw/loader.h"
28
+ struct vhost_vdpa *v = dev->opaque;
28
#include "sysemu/sysemu.h"
29
int ret;
29
#include "sysemu/dma.h"
30
30
#include "qemu/iov.h"
31
+ if (v->shadow_vqs_enabled) {
31
diff --git a/hw/net/lance.c b/hw/net/lance.c
32
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
32
index XXXXXXX..XXXXXXX 100644
33
+ ring->index);
33
--- a/hw/net/lance.c
34
+
34
+++ b/hw/net/lance.c
35
+ /*
35
@@ -XXX,XX +XXX,XX @@
36
+ * Setting base as last used idx, so destination will see as available
36
*/
37
+ * all the entries that the device did not use, including the in-flight
37
38
+ * processing ones.
38
#include "qemu/osdep.h"
39
+ *
39
-#include "hw/sysbus.h"
40
+ * TODO: This is ok for networking, but other kinds of devices might
40
-#include "net/net.h"
41
+ * have problems with these retransmissions.
41
#include "qemu/timer.h"
42
+ */
42
-#include "qemu/sockets.h"
43
+ ring->num = svq->last_used_idx;
43
#include "hw/sparc/sparc32_dma.h"
44
+ return 0;
44
#include "hw/net/lance.h"
45
+ }
45
#include "trace.h"
46
+
46
diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
47
ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
47
index XXXXXXX..XXXXXXX 100644
48
trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
48
--- a/hw/net/ne2000.c
49
return ret;
49
+++ b/hw/net/ne2000.c
50
@@ -XXX,XX +XXX,XX @@
51
*/
52
#include "qemu/osdep.h"
53
#include "hw/pci/pci.h"
54
-#include "net/net.h"
55
#include "net/eth.h"
56
#include "ne2000.h"
57
-#include "hw/loader.h"
58
#include "sysemu/sysemu.h"
59
60
/* debug NE2000 card */
61
diff --git a/hw/net/pcnet-pci.c b/hw/net/pcnet-pci.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/hw/net/pcnet-pci.c
64
+++ b/hw/net/pcnet-pci.c
65
@@ -XXX,XX +XXX,XX @@
66
#include "qemu/osdep.h"
67
#include "hw/pci/pci.h"
68
#include "net/net.h"
69
-#include "hw/loader.h"
70
#include "qemu/timer.h"
71
#include "sysemu/dma.h"
72
#include "sysemu/sysemu.h"
73
diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c
74
index XXXXXXX..XXXXXXX 100644
75
--- a/hw/net/pcnet.c
76
+++ b/hw/net/pcnet.c
77
@@ -XXX,XX +XXX,XX @@
78
#include "net/net.h"
79
#include "net/eth.h"
80
#include "qemu/timer.h"
81
-#include "qemu/sockets.h"
82
#include "sysemu/sysemu.h"
83
#include "trace.h"
84
85
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/hw/net/rtl8139.c
88
+++ b/hw/net/rtl8139.c
89
@@ -XXX,XX +XXX,XX @@
90
#include "qemu/timer.h"
91
#include "net/net.h"
92
#include "net/eth.h"
93
-#include "hw/loader.h"
94
#include "sysemu/sysemu.h"
95
-#include "qemu/iov.h"
96
97
/* debug RTL8139 card */
98
//#define DEBUG_RTL8139 1
99
diff --git a/hw/net/xgmac.c b/hw/net/xgmac.c
100
index XXXXXXX..XXXXXXX 100644
101
--- a/hw/net/xgmac.c
102
+++ b/hw/net/xgmac.c
103
@@ -XXX,XX +XXX,XX @@
104
#include "hw/sysbus.h"
105
#include "qemu/log.h"
106
#include "net/net.h"
107
-#include "net/checksum.h"
108
109
#ifdef DEBUG_XGMAC
110
#define DEBUGF_BRK(message, args...) do { \
111
--
50
--
112
2.7.4
51
2.7.4
113
52
114
53
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
According to net/Makefile.objs we only link in the vhost-user code
3
Setting the log address would make the device start reporting invalid
4
if CONFIG_POSIX has been set. So the help screen should also only
4
dirty memory because the SVQ vrings are located in qemu's memory.
5
show this information if CONFIG_POSIX has been defined.
6
5
7
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
7
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
9
---
11
qemu-options.hx | 2 ++
10
hw/virtio/vhost-vdpa.c | 3 ++-
12
1 file changed, 2 insertions(+)
11
1 file changed, 2 insertions(+), 1 deletion(-)
13
12
14
diff --git a/qemu-options.hx b/qemu-options.hx
13
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/qemu-options.hx
15
--- a/hw/virtio/vhost-vdpa.c
17
+++ b/qemu-options.hx
16
+++ b/hw/virtio/vhost-vdpa.c
18
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
17
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
19
" VALE port (created on the fly) called 'name' ('nmname' is name of the \n"
18
static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
20
" netmap device, defaults to '/dev/netmap')\n"
19
struct vhost_log *log)
21
#endif
20
{
22
+#ifdef CONFIG_POSIX
21
- if (vhost_vdpa_one_time_request(dev)) {
23
"-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
22
+ struct vhost_vdpa *v = dev->opaque;
24
" configure a vhost-user network, backed by a chardev 'dev'\n"
23
+ if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) {
25
+#endif
24
return 0;
26
"-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
25
}
27
" configure a hub port on QEMU VLAN 'n'\n", QEMU_ARCH_ALL)
26
28
DEF("net", HAS_ARG, QEMU_OPTION_net,
29
--
27
--
30
2.7.4
28
2.7.4
31
29
32
30
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The legacy "-net" option can be quite confusing for the users since most
3
SVQ is able to log the dirty bits by itself, so let's use it to not
4
people do not expect to get a "vlan" hub between their emulated guest
4
block migration.
5
hardware and the host backend. But so far, we are also not able to get
6
rid of "-net" completely, since it is the only way to configure on-board
7
NICs that can not be instantiated via "-device" yet. It's also a little
8
bit shorter to type "-net nic -net tap" instead of "-device xyz,netdev=n1
9
-netdev tap,id=n1".
10
5
11
So what we need is a new convenience option that is shorter to type than
6
Also, ignore set and clear of VHOST_F_LOG_ALL on set_features if SVQ is
12
the full -device + -netdev stuff, and which can be used to configure the
7
enabled. Even if the device supports it, the reports would be nonsense
13
on-board NICs that can not be handled via -device yet. Thus this patch now
8
because SVQ memory is in the qemu region.
14
provides such a new option "--nic": It adds an entry in the nd_table to
15
configure a on-board / default NIC, creates a host backend and connects
16
the two directly, without a confusing "vlan" hub inbetween.
17
9
18
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
10
The log region is still allocated. Future changes might skip that, but
19
Signed-off-by: Thomas Huth <thuth@redhat.com>
11
this series is already long enough.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
20
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
21
---
16
---
22
include/sysemu/sysemu.h | 1 +
17
hw/virtio/vhost-vdpa.c | 39 +++++++++++++++++++++++++++++++++++----
23
net/net.c | 78 +++++++++++++++++++++++++++++++++++++++++++++++++
18
include/hw/virtio/vhost-vdpa.h | 1 +
24
qemu-options.hx | 40 +++++++++++++++++++++----
19
2 files changed, 36 insertions(+), 4 deletions(-)
25
vl.c | 7 +++++
26
4 files changed, 120 insertions(+), 6 deletions(-)
27
20
28
diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h
21
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
29
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
30
--- a/include/sysemu/sysemu.h
23
--- a/hw/virtio/vhost-vdpa.c
31
+++ b/include/sysemu/sysemu.h
24
+++ b/hw/virtio/vhost-vdpa.c
32
@@ -XXX,XX +XXX,XX @@ extern QemuOptsList bdrv_runtime_opts;
25
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
33
extern QemuOptsList qemu_chardev_opts;
26
return v->index != 0;
34
extern QemuOptsList qemu_device_opts;
35
extern QemuOptsList qemu_netdev_opts;
36
+extern QemuOptsList qemu_nic_opts;
37
extern QemuOptsList qemu_net_opts;
38
extern QemuOptsList qemu_global_opts;
39
extern QemuOptsList qemu_mon_opts;
40
diff --git a/net/net.c b/net/net.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/net/net.c
43
+++ b/net/net.c
44
@@ -XXX,XX +XXX,XX @@ static int net_init_netdev(void *dummy, QemuOpts *opts, Error **errp)
45
return net_client_init(opts, true, errp);
46
}
27
}
47
28
48
+/* For the convenience "--nic" parameter */
29
+static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
49
+static int net_param_nic(void *dummy, QemuOpts *opts, Error **errp)
30
+ uint64_t *features)
50
+{
31
+{
51
+ char *mac, *nd_id;
32
+ int ret;
52
+ int idx, ret;
53
+ NICInfo *ni;
54
+ const char *type;
55
+
33
+
56
+ type = qemu_opt_get(opts, "type");
34
+ ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
57
+ if (type && g_str_equal(type, "none")) {
35
+ trace_vhost_vdpa_get_features(dev, *features);
58
+ return 0; /* Nothing to do, default_net is cleared in vl.c */
59
+ }
60
+
61
+ idx = nic_get_free_idx();
62
+ if (idx == -1 || nb_nics >= MAX_NICS) {
63
+ error_setg(errp, "no more on-board/default NIC slots available");
64
+ return -1;
65
+ }
66
+
67
+ if (!type) {
68
+ qemu_opt_set(opts, "type", "user", &error_abort);
69
+ }
70
+
71
+ ni = &nd_table[idx];
72
+ memset(ni, 0, sizeof(*ni));
73
+ ni->model = qemu_opt_get_del(opts, "model");
74
+
75
+ /* Create an ID if the user did not specify one */
76
+ nd_id = g_strdup(qemu_opts_id(opts));
77
+ if (!nd_id) {
78
+ nd_id = g_strdup_printf("__org.qemu.nic%i\n", idx);
79
+ qemu_opts_set_id(opts, nd_id);
80
+ }
81
+
82
+ /* Handle MAC address */
83
+ mac = qemu_opt_get_del(opts, "mac");
84
+ if (mac) {
85
+ ret = net_parse_macaddr(ni->macaddr.a, mac);
86
+ g_free(mac);
87
+ if (ret) {
88
+ error_setg(errp, "invalid syntax for ethernet address");
89
+ return -1;
90
+ }
91
+ if (is_multicast_ether_addr(ni->macaddr.a)) {
92
+ error_setg(errp, "NIC cannot have multicast MAC address");
93
+ return -1;
94
+ }
95
+ }
96
+ qemu_macaddr_default_if_unset(&ni->macaddr);
97
+
98
+ ret = net_client_init(opts, true, errp);
99
+ if (ret == 0) {
100
+ ni->netdev = qemu_find_netdev(nd_id);
101
+ ni->used = true;
102
+ nb_nics++;
103
+ }
104
+
105
+ g_free(nd_id);
106
+ return ret;
36
+ return ret;
107
+}
37
+}
108
+
38
+
109
int net_init_clients(Error **errp)
39
static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
40
Error **errp)
110
{
41
{
111
net_change_state_entry =
42
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
112
@@ -XXX,XX +XXX,XX @@ int net_init_clients(Error **errp)
43
return 0;
113
return -1;
114
}
44
}
115
45
116
+ if (qemu_opts_foreach(qemu_find_opts("nic"), net_param_nic, NULL, errp)) {
46
- r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
117
+ return -1;
47
+ r = vhost_vdpa_get_dev_features(hdev, &dev_features);
48
if (r != 0) {
49
error_setg_errno(errp, -r, "Can't get vdpa device features");
50
return r;
51
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
52
static int vhost_vdpa_set_features(struct vhost_dev *dev,
53
uint64_t features)
54
{
55
+ struct vhost_vdpa *v = dev->opaque;
56
int ret;
57
58
if (vhost_vdpa_one_time_request(dev)) {
59
return 0;
60
}
61
62
+ if (v->shadow_vqs_enabled) {
63
+ if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) {
64
+ /*
65
+ * QEMU is just trying to enable or disable logging. SVQ handles
66
+ * this sepparately, so no need to forward this.
67
+ */
68
+ v->acked_features = features;
69
+ return 0;
70
+ }
71
+
72
+ v->acked_features = features;
73
+
74
+ /* We must not ack _F_LOG if SVQ is enabled */
75
+ features &= ~BIT_ULL(VHOST_F_LOG_ALL);
118
+ }
76
+ }
119
+
77
+
120
if (qemu_opts_foreach(qemu_find_opts("net"), net_init_client, NULL, errp)) {
78
trace_vhost_vdpa_set_features(dev, features);
121
return -1;
79
ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
122
}
80
if (ret) {
123
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_netdev_opts = {
81
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
124
},
82
static int vhost_vdpa_get_features(struct vhost_dev *dev,
125
};
83
uint64_t *features)
126
84
{
127
+QemuOptsList qemu_nic_opts = {
85
- int ret;
128
+ .name = "nic",
86
+ struct vhost_vdpa *v = dev->opaque;
129
+ .implied_opt_name = "type",
87
+ int ret = vhost_vdpa_get_dev_features(dev, features);
130
+ .head = QTAILQ_HEAD_INITIALIZER(qemu_nic_opts.head),
131
+ .desc = {
132
+ /*
133
+ * no elements => accept any params
134
+ * validation will happen later
135
+ */
136
+ { /* end of list */ }
137
+ },
138
+};
139
+
88
+
140
QemuOptsList qemu_net_opts = {
89
+ if (ret == 0 && v->shadow_vqs_enabled) {
141
.name = "net",
90
+ /* Add SVQ logging capabilities */
142
.implied_opt_name = "type",
91
+ *features |= BIT_ULL(VHOST_F_LOG_ALL);
143
diff --git a/qemu-options.hx b/qemu-options.hx
92
+ }
93
94
- ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
95
- trace_vhost_vdpa_get_features(dev, *features);
96
return ret;
97
}
98
99
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
144
index XXXXXXX..XXXXXXX 100644
100
index XXXXXXX..XXXXXXX 100644
145
--- a/qemu-options.hx
101
--- a/include/hw/virtio/vhost-vdpa.h
146
+++ b/qemu-options.hx
102
+++ b/include/hw/virtio/vhost-vdpa.h
147
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
103
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
148
#endif
104
bool iotlb_batch_begin_sent;
149
"-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
105
MemoryListener listener;
150
" configure a hub port on QEMU VLAN 'n'\n", QEMU_ARCH_ALL)
106
struct vhost_vdpa_iova_range iova_range;
151
+DEF("nic", HAS_ARG, QEMU_OPTION_nic,
107
+ uint64_t acked_features;
152
+ "--nic [tap|bridge|"
108
bool shadow_vqs_enabled;
153
+#ifdef CONFIG_SLIRP
109
/* IOVA mapping used by the Shadow Virtqueue */
154
+ "user|"
110
VhostIOVATree *iova_tree;
155
+#endif
156
+#ifdef __linux__
157
+ "l2tpv3|"
158
+#endif
159
+#ifdef CONFIG_VDE
160
+ "vde|"
161
+#endif
162
+#ifdef CONFIG_NETMAP
163
+ "netmap|"
164
+#endif
165
+#ifdef CONFIG_POSIX
166
+ "vhost-user|"
167
+#endif
168
+ "socket][,option][,...][mac=macaddr]\n"
169
+ " initialize an on-board / default host NIC (using MAC address\n"
170
+ " macaddr) and connect it to the given host network backend\n"
171
+ "--nic none use it alone to have zero network devices (the default is to\n"
172
+ " provided a 'user' network connection)\n",
173
+ QEMU_ARCH_ALL)
174
DEF("net", HAS_ARG, QEMU_OPTION_net,
175
"-net nic[,vlan=n][,netdev=nd][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v]\n"
176
" configure or create an on-board (or machine default) NIC and\n"
177
" connect it either to VLAN 'n' or the netdev 'nd' (for pluggable\n"
178
" NICs please use '-device devtype,netdev=nd' instead)\n"
179
- "-net none use it alone to have zero network devices. If no -net option\n"
180
- " is provided, the default is '-net nic -net user'\n"
181
"-net ["
182
#ifdef CONFIG_SLIRP
183
"user|"
184
@@ -XXX,XX +XXX,XX @@ qemu -m 512 -object memory-backend-file,id=mem,size=512M,mem-path=/hugetlbfs,sha
185
-device virtio-net-pci,netdev=net0
186
@end example
187
188
-@item -net none
189
-Indicate that no network devices should be configured. It is used to
190
-override the default configuration (@option{-net nic -net user}) which
191
-is activated if no @option{-net} options are provided.
192
+@item --nic [tap|bridge|user|l2tpv3|vde|netmap|vhost-user|socket][,...][,mac=macaddr]
193
+
194
+This option is a shortcut for setting both, the on-board (default) guest NIC
195
+hardware and the host network backend in one go. The host backend options are
196
+the same as with the corresponding @option{--netdev} option. The guest NIC
197
+hardware MAC address can be set with @option{mac=@var{macaddr}}.
198
+
199
+@item --nic none
200
+Indicate that no network devices should be configured. It is used to override
201
+the default configuration (default NIC with @option{--net user} backend) which
202
+is activated if no other networking options are provided.
203
ETEXI
204
205
STEXI
206
diff --git a/vl.c b/vl.c
207
index XXXXXXX..XXXXXXX 100644
208
--- a/vl.c
209
+++ b/vl.c
210
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
211
qemu_add_opts(&qemu_chardev_opts);
212
qemu_add_opts(&qemu_device_opts);
213
qemu_add_opts(&qemu_netdev_opts);
214
+ qemu_add_opts(&qemu_nic_opts);
215
qemu_add_opts(&qemu_net_opts);
216
qemu_add_opts(&qemu_rtc_opts);
217
qemu_add_opts(&qemu_global_opts);
218
@@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv, char **envp)
219
exit(1);
220
}
221
break;
222
+ case QEMU_OPTION_nic:
223
+ default_net = 0;
224
+ if (net_client_parse(qemu_find_opts("nic"), optarg) == -1) {
225
+ exit(1);
226
+ }
227
+ break;
228
case QEMU_OPTION_net:
229
default_net = 0;
230
if (net_client_parse(qemu_find_opts("net"), optarg) == -1) {
231
--
111
--
232
2.7.4
112
2.7.4
233
113
234
114
diff view generated by jsdifflib