1
The following changes since commit e3debd5e7d0ce031356024878a0a18b9d109354a:
1
The following changes since commit d9ccf33f9479201e5add8db0af68ca9ca8da358b:
2
2
3
Merge tag 'pull-request-2023-03-24' of https://gitlab.com/thuth/qemu into staging (2023-03-24 16:08:46 +0000)
3
Merge remote-tracking branch 'remotes/lvivier-gitlab/tags/linux-user-for-7.0-pull-request' into staging (2022-03-09 20:01:17 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to fba7c3b788dfcb99a3f9253f7d99cc0d217d6d3c:
9
for you to fetch changes up to eea40402ecf895ed345f8e8eb07dbb484f4542c5:
10
10
11
igb: respect VMVIR and VMOLR for VLAN (2023-03-28 13:10:55 +0800)
11
vdpa: Expose VHOST_F_LOG_ALL on SVQ (2022-03-10 10:26:32 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
----------------------------------------------------------------
15
----------------------------------------------------------------
16
Akihiko Odaki (4):
16
Eugenio Pérez (14):
17
igb: Save more Tx states
17
vhost: Add VhostShadowVirtqueue
18
igb: Fix DMA requester specification for Tx packet
18
vhost: Add Shadow VirtQueue kick forwarding capabilities
19
hw/net/net_tx_pkt: Ignore ECN bit
19
vhost: Add Shadow VirtQueue call forwarding capabilities
20
hw/net/net_tx_pkt: Align l3_hdr
20
vhost: Add vhost_svq_valid_features to shadow vq
21
virtio: Add vhost_svq_get_vring_addr
22
vdpa: adapt vhost_ops callbacks to svq
23
vhost: Shadow virtqueue buffers forwarding
24
util: Add iova_tree_alloc_map
25
util: add iova_tree_find_iova
26
vhost: Add VhostIOVATree
27
vdpa: Add custom IOTLB translations to SVQ
28
vdpa: Adapt vhost_vdpa_get_vring_base to SVQ
29
vdpa: Never set log_base addr if SVQ is enabled
30
vdpa: Expose VHOST_F_LOG_ALL on SVQ
21
31
22
Sriram Yagnaraman (8):
32
Jason Wang (1):
23
MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer
33
virtio-net: fix map leaking on error during receive
24
igb: handle PF/VF reset properly
25
igb: add ICR_RXDW
26
igb: implement VFRE and VFTE registers
27
igb: check oversized packets for VMDq
28
igb: respect E1000_VMOLR_RSSE
29
igb: implement VF Tx and Rx stats
30
igb: respect VMVIR and VMOLR for VLAN
31
34
32
MAINTAINERS | 1 +
35
hw/net/virtio-net.c | 1 +
33
hw/net/e1000e_core.c | 6 +-
36
hw/virtio/meson.build | 2 +-
34
hw/net/e1000x_regs.h | 4 +
37
hw/virtio/vhost-iova-tree.c | 110 +++++++
35
hw/net/igb.c | 26 ++++--
38
hw/virtio/vhost-iova-tree.h | 27 ++
36
hw/net/igb_core.c | 256 ++++++++++++++++++++++++++++++++++++++-------------
39
hw/virtio/vhost-shadow-virtqueue.c | 638 +++++++++++++++++++++++++++++++++++++
37
hw/net/igb_core.h | 9 +-
40
hw/virtio/vhost-shadow-virtqueue.h | 87 +++++
38
hw/net/igb_regs.h | 6 ++
41
hw/virtio/vhost-vdpa.c | 525 +++++++++++++++++++++++++++++-
39
hw/net/net_tx_pkt.c | 30 +++---
42
include/hw/virtio/vhost-vdpa.h | 8 +
40
hw/net/net_tx_pkt.h | 3 +-
43
include/qemu/iova-tree.h | 38 ++-
41
hw/net/trace-events | 2 +
44
util/iova-tree.c | 169 ++++++++++
42
hw/net/vmxnet3.c | 4 +-
45
10 files changed, 1588 insertions(+), 17 deletions(-)
43
11 files changed, 254 insertions(+), 93 deletions(-)
46
create mode 100644 hw/virtio/vhost-iova-tree.c
47
create mode 100644 hw/virtio/vhost-iova-tree.h
48
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
49
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
50
51
diff view generated by jsdifflib
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
2
tries to fix the use after free of the sg by caching the virtqueue
3
elements in an array and unmap them at once after receiving the
4
packets, But it forgot to unmap the cached elements on error which
5
will lead to leaking of mapping and other unexpected results.
2
6
3
I would like to review and be informed on changes to igb device
7
Fixing this by detaching the cached elements on error. This addresses
8
CVE-2022-26353.
4
9
5
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
10
Reported-by: Victor Tom <vv474172261@gmail.com>
11
Cc: qemu-stable@nongnu.org
12
Fixes: CVE-2022-26353
13
Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
14
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
16
---
8
MAINTAINERS | 1 +
17
hw/net/virtio-net.c | 1 +
9
1 file changed, 1 insertion(+)
18
1 file changed, 1 insertion(+)
10
19
11
diff --git a/MAINTAINERS b/MAINTAINERS
20
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
12
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
13
--- a/MAINTAINERS
22
--- a/hw/net/virtio-net.c
14
+++ b/MAINTAINERS
23
+++ b/hw/net/virtio-net.c
15
@@ -XXX,XX +XXX,XX @@ F: tests/qtest/libqos/e1000e.*
24
@@ -XXX,XX +XXX,XX @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
16
25
17
igb
26
err:
18
M: Akihiko Odaki <akihiko.odaki@daynix.com>
27
for (j = 0; j < i; j++) {
19
+R: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
28
+ virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
20
S: Maintained
29
g_free(elems[j]);
21
F: docs/system/devices/igb.rst
30
}
22
F: hw/net/igb*
31
23
--
32
--
24
2.7.4
33
2.7.4
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
Vhost shadow virtqueue (SVQ) is an intermediate jump for virtqueue
4
notifications and buffers, allowing qemu to track them. While qemu is
5
forwarding the buffers and virtqueue changes, it is able to commit the
6
memory it's being dirtied, the same way regular qemu's VirtIO devices
7
do.
8
9
This commit only exposes basic SVQ allocation and free. Next patches of
10
the series add functionality like notifications and buffers forwarding.
11
12
Acked-by: Michael S. Tsirkin <mst@redhat.com>
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
hw/virtio/meson.build | 2 +-
17
hw/virtio/vhost-shadow-virtqueue.c | 62 ++++++++++++++++++++++++++++++++++++++
18
hw/virtio/vhost-shadow-virtqueue.h | 28 +++++++++++++++++
19
3 files changed, 91 insertions(+), 1 deletion(-)
20
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
21
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
22
23
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
24
index XXXXXXX..XXXXXXX 100644
25
--- a/hw/virtio/meson.build
26
+++ b/hw/virtio/meson.build
27
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
28
29
virtio_ss = ss.source_set()
30
virtio_ss.add(files('virtio.c'))
31
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c'))
32
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
33
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
34
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
35
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
36
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
37
new file mode 100644
38
index XXXXXXX..XXXXXXX
39
--- /dev/null
40
+++ b/hw/virtio/vhost-shadow-virtqueue.c
41
@@ -XXX,XX +XXX,XX @@
42
+/*
43
+ * vhost shadow virtqueue
44
+ *
45
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
46
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
47
+ *
48
+ * SPDX-License-Identifier: GPL-2.0-or-later
49
+ */
50
+
51
+#include "qemu/osdep.h"
52
+#include "hw/virtio/vhost-shadow-virtqueue.h"
53
+
54
+#include "qemu/error-report.h"
55
+
56
+/**
57
+ * Creates vhost shadow virtqueue, and instructs the vhost device to use the
58
+ * shadow methods and file descriptors.
59
+ *
60
+ * Returns the new virtqueue or NULL.
61
+ *
62
+ * In case of error, reason is reported through error_report.
63
+ */
64
+VhostShadowVirtqueue *vhost_svq_new(void)
65
+{
66
+ g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
67
+ int r;
68
+
69
+ r = event_notifier_init(&svq->hdev_kick, 0);
70
+ if (r != 0) {
71
+ error_report("Couldn't create kick event notifier: %s (%d)",
72
+ g_strerror(errno), errno);
73
+ goto err_init_hdev_kick;
74
+ }
75
+
76
+ r = event_notifier_init(&svq->hdev_call, 0);
77
+ if (r != 0) {
78
+ error_report("Couldn't create call event notifier: %s (%d)",
79
+ g_strerror(errno), errno);
80
+ goto err_init_hdev_call;
81
+ }
82
+
83
+ return g_steal_pointer(&svq);
84
+
85
+err_init_hdev_call:
86
+ event_notifier_cleanup(&svq->hdev_kick);
87
+
88
+err_init_hdev_kick:
89
+ return NULL;
90
+}
91
+
92
+/**
93
+ * Free the resources of the shadow virtqueue.
94
+ *
95
+ * @pvq: gpointer to SVQ so it can be used by autofree functions.
96
+ */
97
+void vhost_svq_free(gpointer pvq)
98
+{
99
+ VhostShadowVirtqueue *vq = pvq;
100
+ event_notifier_cleanup(&vq->hdev_kick);
101
+ event_notifier_cleanup(&vq->hdev_call);
102
+ g_free(vq);
103
+}
104
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
105
new file mode 100644
106
index XXXXXXX..XXXXXXX
107
--- /dev/null
108
+++ b/hw/virtio/vhost-shadow-virtqueue.h
109
@@ -XXX,XX +XXX,XX @@
110
+/*
111
+ * vhost shadow virtqueue
112
+ *
113
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
114
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
115
+ *
116
+ * SPDX-License-Identifier: GPL-2.0-or-later
117
+ */
118
+
119
+#ifndef VHOST_SHADOW_VIRTQUEUE_H
120
+#define VHOST_SHADOW_VIRTQUEUE_H
121
+
122
+#include "qemu/event_notifier.h"
123
+
124
+/* Shadow virtqueue to relay notifications */
125
+typedef struct VhostShadowVirtqueue {
126
+ /* Shadow kick notifier, sent to vhost */
127
+ EventNotifier hdev_kick;
128
+ /* Shadow call notifier, sent to vhost */
129
+ EventNotifier hdev_call;
130
+} VhostShadowVirtqueue;
131
+
132
+VhostShadowVirtqueue *vhost_svq_new(void);
133
+
134
+void vhost_svq_free(gpointer vq);
135
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
136
+
137
+#endif
138
--
139
2.7.4
140
141
diff view generated by jsdifflib
New patch
1
1
From: Eugenio Pérez <eperezma@redhat.com>
2
3
At this mode no buffer forwarding will be performed in SVQ mode: Qemu
4
will just forward the guest's kicks to the device.
5
6
Host memory notifiers regions are left out for simplicity, and they will
7
not be addressed in this series.
8
9
Acked-by: Michael S. Tsirkin <mst@redhat.com>
10
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/virtio/vhost-shadow-virtqueue.c | 56 ++++++++++++++
14
hw/virtio/vhost-shadow-virtqueue.h | 14 ++++
15
hw/virtio/vhost-vdpa.c | 145 ++++++++++++++++++++++++++++++++++++-
16
include/hw/virtio/vhost-vdpa.h | 4 +
17
4 files changed, 217 insertions(+), 2 deletions(-)
18
19
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/hw/virtio/vhost-shadow-virtqueue.c
23
@@ -XXX,XX +XXX,XX @@
24
#include "hw/virtio/vhost-shadow-virtqueue.h"
25
26
#include "qemu/error-report.h"
27
+#include "qemu/main-loop.h"
28
+#include "linux-headers/linux/vhost.h"
29
+
30
+/**
31
+ * Forward guest notifications.
32
+ *
33
+ * @n: guest kick event notifier, the one that guest set to notify svq.
34
+ */
35
+static void vhost_handle_guest_kick(EventNotifier *n)
36
+{
37
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
38
+ svq_kick);
39
+ event_notifier_test_and_clear(n);
40
+ event_notifier_set(&svq->hdev_kick);
41
+}
42
+
43
+/**
44
+ * Set a new file descriptor for the guest to kick the SVQ and notify for avail
45
+ *
46
+ * @svq: The svq
47
+ * @svq_kick_fd: The svq kick fd
48
+ *
49
+ * Note that the SVQ will never close the old file descriptor.
50
+ */
51
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
52
+{
53
+ EventNotifier *svq_kick = &svq->svq_kick;
54
+ bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick);
55
+ bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND;
56
+
57
+ if (poll_stop) {
58
+ event_notifier_set_handler(svq_kick, NULL);
59
+ }
60
+
61
+ /*
62
+ * event_notifier_set_handler already checks for guest's notifications if
63
+ * they arrive at the new file descriptor in the switch, so there is no
64
+ * need to explicitly check for them.
65
+ */
66
+ if (poll_start) {
67
+ event_notifier_init_fd(svq_kick, svq_kick_fd);
68
+ event_notifier_set(svq_kick);
69
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
70
+ }
71
+}
72
+
73
+/**
74
+ * Stop the shadow virtqueue operation.
75
+ * @svq: Shadow Virtqueue
76
+ */
77
+void vhost_svq_stop(VhostShadowVirtqueue *svq)
78
+{
79
+ event_notifier_set_handler(&svq->svq_kick, NULL);
80
+}
81
82
/**
83
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
84
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
85
goto err_init_hdev_call;
86
}
87
88
+ event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
89
return g_steal_pointer(&svq);
90
91
err_init_hdev_call:
92
@@ -XXX,XX +XXX,XX @@ err_init_hdev_kick:
93
void vhost_svq_free(gpointer pvq)
94
{
95
VhostShadowVirtqueue *vq = pvq;
96
+ vhost_svq_stop(vq);
97
event_notifier_cleanup(&vq->hdev_kick);
98
event_notifier_cleanup(&vq->hdev_call);
99
g_free(vq);
100
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
101
index XXXXXXX..XXXXXXX 100644
102
--- a/hw/virtio/vhost-shadow-virtqueue.h
103
+++ b/hw/virtio/vhost-shadow-virtqueue.h
104
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
105
EventNotifier hdev_kick;
106
/* Shadow call notifier, sent to vhost */
107
EventNotifier hdev_call;
108
+
109
+ /*
110
+ * Borrowed virtqueue's guest to host notifier. To borrow it in this event
111
+ * notifier allows to recover the VhostShadowVirtqueue from the event loop
112
+ * easily. If we use the VirtQueue's one, we don't have an easy way to
113
+ * retrieve VhostShadowVirtqueue.
114
+ *
115
+ * So shadow virtqueue must not clean it, or we would lose VirtQueue one.
116
+ */
117
+ EventNotifier svq_kick;
118
} VhostShadowVirtqueue;
119
120
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
121
+
122
+void vhost_svq_stop(VhostShadowVirtqueue *svq);
123
+
124
VhostShadowVirtqueue *vhost_svq_new(void);
125
126
void vhost_svq_free(gpointer vq);
127
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
128
index XXXXXXX..XXXXXXX 100644
129
--- a/hw/virtio/vhost-vdpa.c
130
+++ b/hw/virtio/vhost-vdpa.c
131
@@ -XXX,XX +XXX,XX @@
132
#include "hw/virtio/vhost.h"
133
#include "hw/virtio/vhost-backend.h"
134
#include "hw/virtio/virtio-net.h"
135
+#include "hw/virtio/vhost-shadow-virtqueue.h"
136
#include "hw/virtio/vhost-vdpa.h"
137
#include "exec/address-spaces.h"
138
#include "qemu/main-loop.h"
139
#include "cpu.h"
140
#include "trace.h"
141
#include "qemu-common.h"
142
+#include "qapi/error.h"
143
144
/*
145
* Return one past the end of the end of section. Be careful with uint64_t
146
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
147
return v->index != 0;
148
}
149
150
+static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
151
+ Error **errp)
152
+{
153
+ g_autoptr(GPtrArray) shadow_vqs = NULL;
154
+
155
+ if (!v->shadow_vqs_enabled) {
156
+ return 0;
157
+ }
158
+
159
+ shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
160
+ for (unsigned n = 0; n < hdev->nvqs; ++n) {
161
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
162
+
163
+ if (unlikely(!svq)) {
164
+ error_setg(errp, "Cannot create svq %u", n);
165
+ return -1;
166
+ }
167
+ g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq));
168
+ }
169
+
170
+ v->shadow_vqs = g_steal_pointer(&shadow_vqs);
171
+ return 0;
172
+}
173
+
174
static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
175
{
176
struct vhost_vdpa *v;
177
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
178
dev->opaque = opaque ;
179
v->listener = vhost_vdpa_memory_listener;
180
v->msg_type = VHOST_IOTLB_MSG_V2;
181
+ ret = vhost_vdpa_init_svq(dev, v, errp);
182
+ if (ret) {
183
+ goto err;
184
+ }
185
186
vhost_vdpa_get_iova_range(v);
187
188
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
189
VIRTIO_CONFIG_S_DRIVER);
190
191
return 0;
192
+
193
+err:
194
+ ram_block_discard_disable(false);
195
+ return ret;
196
}
197
198
static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
199
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
200
201
static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
202
{
203
+ struct vhost_vdpa *v = dev->opaque;
204
int i;
205
206
+ if (v->shadow_vqs_enabled) {
207
+ /* FIXME SVQ is not compatible with host notifiers mr */
208
+ return;
209
+ }
210
+
211
for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
212
if (vhost_vdpa_host_notifier_init(dev, i)) {
213
goto err;
214
@@ -XXX,XX +XXX,XX @@ err:
215
return;
216
}
217
218
+static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
219
+{
220
+ struct vhost_vdpa *v = dev->opaque;
221
+ size_t idx;
222
+
223
+ if (!v->shadow_vqs) {
224
+ return;
225
+ }
226
+
227
+ for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
228
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
229
+ }
230
+ g_ptr_array_free(v->shadow_vqs, true);
231
+}
232
+
233
static int vhost_vdpa_cleanup(struct vhost_dev *dev)
234
{
235
struct vhost_vdpa *v;
236
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
237
trace_vhost_vdpa_cleanup(dev, v);
238
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
239
memory_listener_unregister(&v->listener);
240
+ vhost_vdpa_svq_cleanup(dev);
241
242
dev->opaque = NULL;
243
ram_block_discard_disable(false);
244
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
245
return ret;
246
}
247
248
+static void vhost_vdpa_reset_svq(struct vhost_vdpa *v)
249
+{
250
+ if (!v->shadow_vqs_enabled) {
251
+ return;
252
+ }
253
+
254
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
255
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
256
+ vhost_svq_stop(svq);
257
+ }
258
+}
259
+
260
static int vhost_vdpa_reset_device(struct vhost_dev *dev)
261
{
262
+ struct vhost_vdpa *v = dev->opaque;
263
int ret;
264
uint8_t status = 0;
265
266
+ vhost_vdpa_reset_svq(v);
267
+
268
ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
269
trace_vhost_vdpa_reset_device(dev, status);
270
return ret;
271
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
272
return ret;
273
}
274
275
+static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
276
+ struct vhost_vring_file *file)
277
+{
278
+ trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
279
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
280
+}
281
+
282
+/**
283
+ * Set the shadow virtqueue descriptors to the device
284
+ *
285
+ * @dev: The vhost device model
286
+ * @svq: The shadow virtqueue
287
+ * @idx: The index of the virtqueue in the vhost device
288
+ * @errp: Error
289
+ */
290
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
291
+ VhostShadowVirtqueue *svq,
292
+ unsigned idx,
293
+ Error **errp)
294
+{
295
+ struct vhost_vring_file file = {
296
+ .index = dev->vq_index + idx,
297
+ };
298
+ const EventNotifier *event_notifier = &svq->hdev_kick;
299
+ int r;
300
+
301
+ file.fd = event_notifier_get_fd(event_notifier);
302
+ r = vhost_vdpa_set_vring_dev_kick(dev, &file);
303
+ if (unlikely(r != 0)) {
304
+ error_setg_errno(errp, -r, "Can't set device kick fd");
305
+ }
306
+
307
+ return r == 0;
308
+}
309
+
310
+static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
311
+{
312
+ struct vhost_vdpa *v = dev->opaque;
313
+ Error *err = NULL;
314
+ unsigned i;
315
+
316
+ if (!v->shadow_vqs) {
317
+ return true;
318
+ }
319
+
320
+ for (i = 0; i < v->shadow_vqs->len; ++i) {
321
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
322
+ bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
323
+ if (unlikely(!ok)) {
324
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
325
+ return false;
326
+ }
327
+ }
328
+
329
+ return true;
330
+}
331
+
332
static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
333
{
334
struct vhost_vdpa *v = dev->opaque;
335
+ bool ok;
336
trace_vhost_vdpa_dev_start(dev, started);
337
338
if (started) {
339
vhost_vdpa_host_notifiers_init(dev);
340
+ ok = vhost_vdpa_svqs_start(dev);
341
+ if (unlikely(!ok)) {
342
+ return -1;
343
+ }
344
vhost_vdpa_set_vring_ready(dev);
345
} else {
346
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
347
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
348
static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
349
struct vhost_vring_file *file)
350
{
351
- trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
352
- return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
353
+ struct vhost_vdpa *v = dev->opaque;
354
+ int vdpa_idx = file->index - dev->vq_index;
355
+
356
+ if (v->shadow_vqs_enabled) {
357
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
358
+ vhost_svq_set_svq_kick_fd(svq, file->fd);
359
+ return 0;
360
+ } else {
361
+ return vhost_vdpa_set_vring_dev_kick(dev, file);
362
+ }
363
}
364
365
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
366
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
367
index XXXXXXX..XXXXXXX 100644
368
--- a/include/hw/virtio/vhost-vdpa.h
369
+++ b/include/hw/virtio/vhost-vdpa.h
370
@@ -XXX,XX +XXX,XX @@
371
#ifndef HW_VIRTIO_VHOST_VDPA_H
372
#define HW_VIRTIO_VHOST_VDPA_H
373
374
+#include <gmodule.h>
375
+
376
#include "hw/virtio/virtio.h"
377
#include "standard-headers/linux/vhost_types.h"
378
379
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
380
bool iotlb_batch_begin_sent;
381
MemoryListener listener;
382
struct vhost_vdpa_iova_range iova_range;
383
+ bool shadow_vqs_enabled;
384
+ GPtrArray *shadow_vqs;
385
struct vhost_dev *dev;
386
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
387
} VhostVDPA;
388
--
389
2.7.4
390
391
diff view generated by jsdifflib
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Use PFRSTD to reset RSTI bit for VFs, and raise VFLRE interrupt when VF
3
This will make qemu aware of the device used buffers, allowing it to
4
is reset.
4
write the guest memory with its contents if needed.
5
5
6
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
6
Acked-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
---
9
hw/net/igb_core.c | 38 ++++++++++++++++++++++++++------------
10
hw/virtio/vhost-shadow-virtqueue.c | 38 ++++++++++++++++++++++++++++++++++++++
10
hw/net/igb_regs.h | 3 +++
11
hw/virtio/vhost-shadow-virtqueue.h | 4 ++++
11
hw/net/trace-events | 2 ++
12
hw/virtio/vhost-vdpa.c | 31 +++++++++++++++++++++++++++++--
12
3 files changed, 31 insertions(+), 12 deletions(-)
13
3 files changed, 71 insertions(+), 2 deletions(-)
13
14
14
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/igb_core.c
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
+++ b/hw/net/igb_core.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
@@ -XXX,XX +XXX,XX @@ static void igb_set_eims(IGBCore *core, int index, uint32_t val)
19
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(EventNotifier *n)
19
igb_update_interrupt_state(core);
20
}
20
}
21
21
22
-static void igb_vf_reset(IGBCore *core, uint16_t vfn)
22
/**
23
-{
23
+ * Forward vhost notifications
24
- /* TODO: Reset of the queue enable and the interrupt registers of the VF. */
24
+ *
25
-
25
+ * @n: hdev call event notifier, the one that device set to notify svq.
26
- core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
26
+ */
27
- core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD;
27
+static void vhost_svq_handle_call(EventNotifier *n)
28
-}
29
-
30
static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn)
31
{
32
uint32_t ent = core->mac[VTIVAR_MISC + vfn];
33
@@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val)
34
}
35
}
36
37
+static void igb_vf_reset(IGBCore *core, uint16_t vfn)
38
+{
28
+{
39
+ /* disable Rx and Tx for the VF*/
29
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
40
+ core->mac[VFTE] &= ~BIT(vfn);
30
+ hdev_call);
41
+ core->mac[VFRE] &= ~BIT(vfn);
31
+ event_notifier_test_and_clear(n);
42
+ /* indicate VF reset to PF */
32
+ event_notifier_set(&svq->svq_call);
43
+ core->mac[VFLRE] |= BIT(vfn);
44
+ /* VFLRE and mailbox use the same interrupt cause */
45
+ mailbox_interrupt_to_pf(core);
46
+}
33
+}
47
+
34
+
48
static void igb_w1c(IGBCore *core, int index, uint32_t val)
35
+/**
36
+ * Set the call notifier for the SVQ to call the guest
37
+ *
38
+ * @svq: Shadow virtqueue
39
+ * @call_fd: call notifier
40
+ *
41
+ * Called on BQL context.
42
+ */
43
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
44
+{
45
+ if (call_fd == VHOST_FILE_UNBIND) {
46
+ /*
47
+ * Fail event_notifier_set if called handling device call.
48
+ *
49
+ * SVQ still needs device notifications, since it needs to keep
50
+ * forwarding used buffers even with the unbind.
51
+ */
52
+ memset(&svq->svq_call, 0, sizeof(svq->svq_call));
53
+ } else {
54
+ event_notifier_init_fd(&svq->svq_call, call_fd);
55
+ }
56
+}
57
+
58
+/**
59
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
60
*
61
* @svq: The svq
62
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
63
}
64
65
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
66
+ event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
67
return g_steal_pointer(&svq);
68
69
err_init_hdev_call:
70
@@ -XXX,XX +XXX,XX @@ void vhost_svq_free(gpointer pvq)
71
VhostShadowVirtqueue *vq = pvq;
72
vhost_svq_stop(vq);
73
event_notifier_cleanup(&vq->hdev_kick);
74
+ event_notifier_set_handler(&vq->hdev_call, NULL);
75
event_notifier_cleanup(&vq->hdev_call);
76
g_free(vq);
77
}
78
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
79
index XXXXXXX..XXXXXXX 100644
80
--- a/hw/virtio/vhost-shadow-virtqueue.h
81
+++ b/hw/virtio/vhost-shadow-virtqueue.h
82
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
83
* So shadow virtqueue must not clean it, or we would lose VirtQueue one.
84
*/
85
EventNotifier svq_kick;
86
+
87
+ /* Guest's call notifier, where the SVQ calls guest. */
88
+ EventNotifier svq_call;
89
} VhostShadowVirtqueue;
90
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
void vhost_svq_stop(VhostShadowVirtqueue *svq);
95
96
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/hw/virtio/vhost-vdpa.c
99
+++ b/hw/virtio/vhost-vdpa.c
100
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
101
return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
102
}
103
104
+static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
105
+ struct vhost_vring_file *file)
106
+{
107
+ trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
108
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
109
+}
110
+
111
/**
112
* Set the shadow virtqueue descriptors to the device
113
*
114
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
115
* @svq: The shadow virtqueue
116
* @idx: The index of the virtqueue in the vhost device
117
* @errp: Error
118
+ *
119
+ * Note that this function does not rewind kick file descriptor if cannot set
120
+ * call one.
121
*/
122
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
123
VhostShadowVirtqueue *svq,
124
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
125
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
126
if (unlikely(r != 0)) {
127
error_setg_errno(errp, -r, "Can't set device kick fd");
128
+ return false;
129
+ }
130
+
131
+ event_notifier = &svq->hdev_call;
132
+ file.fd = event_notifier_get_fd(event_notifier);
133
+ r = vhost_vdpa_set_vring_dev_call(dev, &file);
134
+ if (unlikely(r != 0)) {
135
+ error_setg_errno(errp, -r, "Can't set device call fd");
136
}
137
138
return r == 0;
139
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
140
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
141
struct vhost_vring_file *file)
49
{
142
{
50
core->mac[index] &= ~val;
143
- trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
51
@@ -XXX,XX +XXX,XX @@ igb_set_status(IGBCore *core, int index, uint32_t val)
144
- return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
52
static void
145
+ struct vhost_vdpa *v = dev->opaque;
53
igb_set_ctrlext(IGBCore *core, int index, uint32_t val)
54
{
55
- trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
56
- !!(val & E1000_CTRL_EXT_SPD_BYPS));
57
-
58
- /* TODO: PFRSTD */
59
+ trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
60
+ !!(val & E1000_CTRL_EXT_SPD_BYPS),
61
+ !!(val & E1000_CTRL_EXT_PFRSTD));
62
63
/* Zero self-clearing bits */
64
val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST);
65
core->mac[CTRL_EXT] = val;
66
+
146
+
67
+ if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) {
147
+ if (v->shadow_vqs_enabled) {
68
+ for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
148
+ int vdpa_idx = file->index - dev->vq_index;
69
+ core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
149
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
70
+ core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD;
150
+
71
+ }
151
+ vhost_svq_set_svq_call_fd(svq, file->fd);
152
+ return 0;
153
+ } else {
154
+ return vhost_vdpa_set_vring_dev_call(dev, file);
72
+ }
155
+ }
73
}
156
}
74
157
75
static void
158
static int vhost_vdpa_get_features(struct vhost_dev *dev,
76
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
77
78
e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
79
80
+ for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
81
+ /* Set RSTI, so VF can identify a PF reset is in progress */
82
+ core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTI;
83
+ }
84
+
85
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
86
tx = &core->tx[i];
87
net_tx_pkt_reset(tx->tx_pkt, NULL);
88
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/hw/net/igb_regs.h
91
+++ b/hw/net/igb_regs.h
92
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
93
94
/* from igb/e1000_defines.h */
95
96
+/* Physical Func Reset Done Indication */
97
+#define E1000_CTRL_EXT_PFRSTD 0x00004000
98
+
99
#define E1000_IVAR_VALID 0x80
100
#define E1000_GPIE_NSICR 0x00000001
101
#define E1000_GPIE_MSIX_MODE 0x00000010
102
diff --git a/hw/net/trace-events b/hw/net/trace-events
103
index XXXXXXX..XXXXXXX 100644
104
--- a/hw/net/trace-events
105
+++ b/hw/net/trace-events
106
@@ -XXX,XX +XXX,XX @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: PHY[%u] UNHANDLED"
107
igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x"
108
igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
109
110
+igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"
111
+
112
igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
113
igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
114
115
--
159
--
116
2.7.4
160
2.7.4
161
162
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This allows SVQ to negotiate features with the guest and the device. For
4
the device, SVQ is a driver. While this function bypasses all
5
non-transport features, it needs to disable the features that SVQ does
6
not support when forwarding buffers. This includes packed vq layout,
7
indirect descriptors or event idx.
8
9
Future changes can add support to offer more features to the guest,
10
since the use of VirtQueue gives this for free. This is left out at the
11
moment for simplicity.
12
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
14
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
17
hw/virtio/vhost-shadow-virtqueue.c | 44 ++++++++++++++++++++++++++++++++++++++
18
hw/virtio/vhost-shadow-virtqueue.h | 2 ++
19
hw/virtio/vhost-vdpa.c | 15 +++++++++++++
20
3 files changed, 61 insertions(+)
21
22
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/virtio/vhost-shadow-virtqueue.c
25
+++ b/hw/virtio/vhost-shadow-virtqueue.c
26
@@ -XXX,XX +XXX,XX @@
27
#include "hw/virtio/vhost-shadow-virtqueue.h"
28
29
#include "qemu/error-report.h"
30
+#include "qapi/error.h"
31
#include "qemu/main-loop.h"
32
#include "linux-headers/linux/vhost.h"
33
34
/**
35
+ * Validate the transport device features that both guests can use with the SVQ
36
+ * and SVQs can use with the device.
37
+ *
38
+ * @dev_features: The features
39
+ * @errp: Error pointer
40
+ */
41
+bool vhost_svq_valid_features(uint64_t features, Error **errp)
42
+{
43
+ bool ok = true;
44
+ uint64_t svq_features = features;
45
+
46
+ for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
47
+ ++b) {
48
+ switch (b) {
49
+ case VIRTIO_F_ANY_LAYOUT:
50
+ continue;
51
+
52
+ case VIRTIO_F_ACCESS_PLATFORM:
53
+ /* SVQ trust in the host's IOMMU to translate addresses */
54
+ case VIRTIO_F_VERSION_1:
55
+ /* SVQ trust that the guest vring is little endian */
56
+ if (!(svq_features & BIT_ULL(b))) {
57
+ set_bit(b, &svq_features);
58
+ ok = false;
59
+ }
60
+ continue;
61
+
62
+ default:
63
+ if (svq_features & BIT_ULL(b)) {
64
+ clear_bit(b, &svq_features);
65
+ ok = false;
66
+ }
67
+ }
68
+ }
69
+
70
+ if (!ok) {
71
+ error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64
72
+ ", ok: 0x%"PRIx64, features, svq_features);
73
+ }
74
+ return ok;
75
+}
76
+
77
+/**
78
* Forward guest notifications.
79
*
80
* @n: guest kick event notifier, the one that guest set to notify svq.
81
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/hw/virtio/vhost-shadow-virtqueue.h
84
+++ b/hw/virtio/vhost-shadow-virtqueue.h
85
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
86
EventNotifier svq_call;
87
} VhostShadowVirtqueue;
88
89
+bool vhost_svq_valid_features(uint64_t features, Error **errp);
90
+
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/hw/virtio/vhost-vdpa.c
97
+++ b/hw/virtio/vhost-vdpa.c
98
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
99
Error **errp)
100
{
101
g_autoptr(GPtrArray) shadow_vqs = NULL;
102
+ uint64_t dev_features, svq_features;
103
+ int r;
104
+ bool ok;
105
106
if (!v->shadow_vqs_enabled) {
107
return 0;
108
}
109
110
+ r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
111
+ if (r != 0) {
112
+ error_setg_errno(errp, -r, "Can't get vdpa device features");
113
+ return r;
114
+ }
115
+
116
+ svq_features = dev_features;
117
+ ok = vhost_svq_valid_features(svq_features, errp);
118
+ if (unlikely(!ok)) {
119
+ return -1;
120
+ }
121
+
122
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
123
for (unsigned n = 0; n < hdev->nvqs; ++n) {
124
g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
125
--
126
2.7.4
127
128
diff view generated by jsdifflib
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Also introduce:
3
It reports the shadow virtqueue address from qemu virtual address space.
4
- Checks for RXDCTL/TXDCTL queue enable bits
5
- IGB_NUM_VM_POOLS enum (Sec 1.5: Table 1-7)
6
4
7
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
5
Since this will be different from the guest's vaddr, but the device can
6
access it, SVQ takes special care about its alignment & lack of garbage
7
data. It assumes that IOMMU will work in host_page_size ranges for that.
8
9
Acked-by: Michael S. Tsirkin <mst@redhat.com>
10
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
12
---
10
hw/net/igb_core.c | 38 +++++++++++++++++++++++++++++++-------
13
hw/virtio/vhost-shadow-virtqueue.c | 29 +++++++++++++++++++++++++++++
11
hw/net/igb_core.h | 1 +
14
hw/virtio/vhost-shadow-virtqueue.h | 9 +++++++++
12
hw/net/igb_regs.h | 3 +++
15
2 files changed, 38 insertions(+)
13
3 files changed, 35 insertions(+), 7 deletions(-)
14
16
15
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
17
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/igb_core.c
19
--- a/hw/virtio/vhost-shadow-virtqueue.c
18
+++ b/hw/net/igb_core.c
20
+++ b/hw/virtio/vhost-shadow-virtqueue.c
19
@@ -XXX,XX +XXX,XX @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
21
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
20
return igb_tx_wb_eic(core, txi->idx);
21
}
22
}
22
23
23
+static inline bool
24
/**
24
+igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
25
+ * Get the shadow vq vring address.
26
+ * @svq: Shadow virtqueue
27
+ * @addr: Destination to store address
28
+ */
29
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
30
+ struct vhost_vring_addr *addr)
25
+{
31
+{
26
+ bool vmdq = core->mac[MRQC] & 1;
32
+ addr->desc_user_addr = (uint64_t)svq->vring.desc;
27
+ uint16_t qn = txi->idx;
33
+ addr->avail_user_addr = (uint64_t)svq->vring.avail;
28
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
34
+ addr->used_user_addr = (uint64_t)svq->vring.used;
29
+
30
+ return (core->mac[TCTL] & E1000_TCTL_EN) &&
31
+ (!vmdq || core->mac[VFTE] & BIT(pool)) &&
32
+ (core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE);
33
+}
35
+}
34
+
36
+
35
static void
37
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
36
igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
38
+{
37
{
39
+ size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
38
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
40
+ size_t avail_size = offsetof(vring_avail_t, ring) +
39
const E1000E_RingInfo *txi = txr->i;
41
+ sizeof(uint16_t) * svq->vring.num;
40
uint32_t eic = 0;
41
42
- /* TODO: check if the queue itself is enabled too. */
43
- if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
44
+ if (!igb_tx_enabled(core, txi)) {
45
trace_e1000e_tx_disabled();
46
return;
47
}
48
@@ -XXX,XX +XXX,XX @@ igb_can_receive(IGBCore *core)
49
50
for (i = 0; i < IGB_NUM_QUEUES; i++) {
51
E1000E_RxRing rxr;
52
+ if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
53
+ continue;
54
+ }
55
56
igb_rx_ring_init(core, &rxr, i);
57
if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) {
58
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
59
60
if (core->mac[MRQC] & 1) {
61
if (is_broadcast_ether_addr(ehdr->h_dest)) {
62
- for (i = 0; i < 8; i++) {
63
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
64
if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) {
65
queues |= BIT(i);
66
}
67
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
68
f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
69
f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff;
70
if (macp[f >> 5] & (1 << (f & 0x1f))) {
71
- for (i = 0; i < 8; i++) {
72
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
73
if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) {
74
queues |= BIT(i);
75
}
76
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
77
}
78
}
79
} else {
80
- for (i = 0; i < 8; i++) {
81
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
82
if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) {
83
mask |= BIT(i);
84
}
85
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
86
queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT);
87
}
88
89
+ queues &= core->mac[VFRE];
90
igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info);
91
if (rss_info->queue & 1) {
92
queues <<= 8;
93
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
94
e1000x_fcs_len(core->mac);
95
96
for (i = 0; i < IGB_NUM_QUEUES; i++) {
97
- if (!(queues & BIT(i))) {
98
+ if (!(queues & BIT(i)) ||
99
+ !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
100
continue;
101
}
102
103
@@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val)
104
105
static void igb_vf_reset(IGBCore *core, uint16_t vfn)
106
{
107
+ uint16_t qn0 = vfn;
108
+ uint16_t qn1 = vfn + IGB_NUM_VM_POOLS;
109
+
42
+
110
/* disable Rx and Tx for the VF*/
43
+ return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size);
111
- core->mac[VFTE] &= ~BIT(vfn);
44
+}
112
+ core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
45
+
113
+ core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
46
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
114
+ core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
47
+{
115
+ core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
48
+ size_t used_size = offsetof(vring_used_t, ring) +
116
core->mac[VFRE] &= ~BIT(vfn);
49
+ sizeof(vring_used_elem_t) * svq->vring.num;
117
+ core->mac[VFTE] &= ~BIT(vfn);
50
+ return ROUND_UP(used_size, qemu_real_host_page_size);
118
/* indicate VF reset to PF */
51
+}
119
core->mac[VFLRE] |= BIT(vfn);
52
+
120
/* VFLRE and mailbox use the same interrupt cause */
53
+/**
121
@@ -XXX,XX +XXX,XX @@ igb_phy_reg_init[] = {
54
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
122
static const uint32_t igb_mac_reg_init[] = {
55
*
123
[LEDCTL] = 2 | (3 << 8) | BIT(15) | (6 << 16) | (7 << 24),
56
* @svq: The svq
124
[EEMNGCTL] = BIT(31),
57
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
125
+ [TXDCTL0] = E1000_TXDCTL_QUEUE_ENABLE,
126
[RXDCTL0] = E1000_RXDCTL_QUEUE_ENABLE | (1 << 16),
127
[RXDCTL1] = 1 << 16,
128
[RXDCTL2] = 1 << 16,
129
diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h
130
index XXXXXXX..XXXXXXX 100644
58
index XXXXXXX..XXXXXXX 100644
131
--- a/hw/net/igb_core.h
59
--- a/hw/virtio/vhost-shadow-virtqueue.h
132
+++ b/hw/net/igb_core.h
60
+++ b/hw/virtio/vhost-shadow-virtqueue.h
133
@@ -XXX,XX +XXX,XX @@
61
@@ -XXX,XX +XXX,XX @@
134
#define IGB_MSIX_VEC_NUM (10)
62
#define VHOST_SHADOW_VIRTQUEUE_H
135
#define IGBVF_MSIX_VEC_NUM (3)
63
136
#define IGB_NUM_QUEUES (16)
64
#include "qemu/event_notifier.h"
137
+#define IGB_NUM_VM_POOLS (8)
65
+#include "hw/virtio/virtio.h"
138
66
+#include "standard-headers/linux/vhost_types.h"
139
typedef struct IGBCore IGBCore;
67
140
68
/* Shadow virtqueue to relay notifications */
141
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
69
typedef struct VhostShadowVirtqueue {
142
index XXXXXXX..XXXXXXX 100644
70
+ /* Shadow vring */
143
--- a/hw/net/igb_regs.h
71
+ struct vring vring;
144
+++ b/hw/net/igb_regs.h
145
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
146
#define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000
147
#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000
148
149
+/* Additional Transmit Descriptor Control definitions */
150
+#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Tx Queue */
151
+
72
+
152
/* Additional Receive Descriptor Control definitions */
73
/* Shadow kick notifier, sent to vhost */
153
#define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Rx Queue */
74
EventNotifier hdev_kick;
75
/* Shadow call notifier, sent to vhost */
76
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp);
77
78
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
79
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
80
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
81
+ struct vhost_vring_addr *addr);
82
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
83
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
84
85
void vhost_svq_stop(VhostShadowVirtqueue *svq);
154
86
155
--
87
--
156
2.7.4
88
2.7.4
89
90
diff view generated by jsdifflib
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
igb used to specify the PF as DMA requester when reading Tx packets.
3
First half of the buffers forwarding part, preparing vhost-vdpa
4
This made Tx requests from VFs to be performed on the address space of
4
callbacks to SVQ to offer it. QEMU cannot enable it at this moment, so
5
the PF, defeating the purpose of SR-IOV. Add some logic to change the
5
this is effectively dead code at the moment, but it helps to reduce
6
requester depending on the queue, which can be assigned to a VF.
6
patch size.
7
7
8
Fixes: 3a977deebe ("Intrdocue igb device emulation")
8
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
11
---
12
hw/net/e1000e_core.c | 6 +++---
12
hw/virtio/vhost-vdpa.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
13
hw/net/igb_core.c | 13 ++++++++-----
13
1 file changed, 41 insertions(+), 7 deletions(-)
14
hw/net/net_tx_pkt.c | 3 ++-
15
hw/net/net_tx_pkt.h | 3 ++-
16
hw/net/vmxnet3.c | 4 ++--
17
5 files changed, 17 insertions(+), 12 deletions(-)
18
14
19
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
15
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
20
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/net/e1000e_core.c
17
--- a/hw/virtio/vhost-vdpa.c
22
+++ b/hw/net/e1000e_core.c
18
+++ b/hw/virtio/vhost-vdpa.c
23
@@ -XXX,XX +XXX,XX @@ e1000e_process_tx_desc(E1000ECore *core,
19
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
24
}
20
return ret;
25
21
}
26
tx->skip_cp = false;
22
27
- net_tx_pkt_reset(tx->tx_pkt);
23
+static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev,
28
+ net_tx_pkt_reset(tx->tx_pkt, core->owner);
24
+ struct vhost_vring_state *ring)
29
25
+{
30
tx->sum_needed = 0;
26
+ trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
31
tx->cptse = 0;
27
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
32
@@ -XXX,XX +XXX,XX @@ e1000e_core_pci_uninit(E1000ECore *core)
28
+}
33
qemu_del_vm_change_state_handler(core->vmstate);
29
+
34
30
static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
35
for (i = 0; i < E1000E_NUM_QUEUES; i++) {
31
struct vhost_vring_file *file)
36
- net_tx_pkt_reset(core->tx[i].tx_pkt);
32
{
37
+ net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
33
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
38
net_tx_pkt_uninit(core->tx[i].tx_pkt);
34
return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
39
}
40
41
@@ -XXX,XX +XXX,XX @@ static void e1000e_reset(E1000ECore *core, bool sw)
42
e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
43
44
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
45
- net_tx_pkt_reset(core->tx[i].tx_pkt);
46
+ net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
47
memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
48
core->tx[i].skip_cp = false;
49
}
50
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/hw/net/igb_core.c
53
+++ b/hw/net/igb_core.c
54
@@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
55
56
static void
57
igb_process_tx_desc(IGBCore *core,
58
+ PCIDevice *dev,
59
struct igb_tx *tx,
60
union e1000_adv_tx_desc *tx_desc,
61
int queue_index)
62
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
63
64
tx->first = true;
65
tx->skip_cp = false;
66
- net_tx_pkt_reset(tx->tx_pkt);
67
+ net_tx_pkt_reset(tx->tx_pkt, dev);
68
}
69
}
35
}
70
36
71
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
37
+static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
72
d = core->owner;
38
+ struct vhost_vring_addr *addr)
73
}
39
+{
74
40
+ trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
75
+ net_tx_pkt_reset(txr->tx->tx_pkt, d);
41
+ addr->desc_user_addr, addr->used_user_addr,
42
+ addr->avail_user_addr,
43
+ addr->log_guest_addr);
76
+
44
+
77
while (!igb_ring_empty(core, txi)) {
45
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
78
base = igb_ring_head_descr(core, txi);
46
+
79
47
+}
80
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
48
+
81
trace_e1000e_tx_descr((void *)(intptr_t)desc.read.buffer_addr,
49
/**
82
desc.read.cmd_type_len, desc.wb.status);
50
* Set the shadow virtqueue descriptors to the device
83
51
*
84
- igb_process_tx_desc(core, txr->tx, &desc, txi->idx);
52
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
85
+ igb_process_tx_desc(core, d, txr->tx, &desc, txi->idx);
53
static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
86
igb_ring_advance(core, txi, 1);
54
struct vhost_vring_addr *addr)
87
eic |= igb_txdesc_writeback(core, base, &desc, txi);
55
{
88
}
56
- trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
89
@@ -XXX,XX +XXX,XX @@ igb_core_pci_realize(IGBCore *core,
57
- addr->desc_user_addr, addr->used_user_addr,
90
core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core);
58
- addr->avail_user_addr,
91
59
- addr->log_guest_addr);
92
for (i = 0; i < IGB_NUM_QUEUES; i++) {
60
- return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
93
- net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS);
61
+ struct vhost_vdpa *v = dev->opaque;
94
+ net_tx_pkt_init(&core->tx[i].tx_pkt, NULL, E1000E_MAX_TX_FRAGS);
62
+
95
}
63
+ if (v->shadow_vqs_enabled) {
96
64
+ /*
97
net_rx_pkt_init(&core->rx_pkt);
65
+ * Device vring addr was set at device start. SVQ base is handled by
98
@@ -XXX,XX +XXX,XX @@ igb_core_pci_uninit(IGBCore *core)
66
+ * VirtQueue code.
99
qemu_del_vm_change_state_handler(core->vmstate);
67
+ */
100
68
+ return 0;
101
for (i = 0; i < IGB_NUM_QUEUES; i++) {
69
+ }
102
- net_tx_pkt_reset(core->tx[i].tx_pkt);
70
+
103
+ net_tx_pkt_reset(core->tx[i].tx_pkt, NULL);
71
+ return vhost_vdpa_set_vring_dev_addr(dev, addr);
104
net_tx_pkt_uninit(core->tx[i].tx_pkt);
105
}
106
107
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
108
109
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
110
tx = &core->tx[i];
111
- net_tx_pkt_reset(tx->tx_pkt);
112
+ net_tx_pkt_reset(tx->tx_pkt, NULL);
113
memset(tx->ctx, 0, sizeof(tx->ctx));
114
tx->first = true;
115
tx->skip_cp = false;
116
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/hw/net/net_tx_pkt.c
119
+++ b/hw/net/net_tx_pkt.c
120
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt)
121
#endif
122
}
72
}
123
73
124
-void net_tx_pkt_reset(struct NetTxPkt *pkt)
74
static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
125
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
75
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
76
static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
77
struct vhost_vring_state *ring)
126
{
78
{
127
int i;
79
- trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
128
80
- return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
129
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt)
81
+ struct vhost_vdpa *v = dev->opaque;
130
pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0);
82
+
131
}
83
+ if (v->shadow_vqs_enabled) {
132
}
84
+ /*
133
+ pkt->pci_dev = pci_dev;
85
+ * Device vring base was set at device start. SVQ base is handled by
134
pkt->raw_frags = 0;
86
+ * VirtQueue code.
135
87
+ */
136
pkt->hdr_len = 0;
88
+ return 0;
137
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
89
+ }
138
index XXXXXXX..XXXXXXX 100644
90
+
139
--- a/hw/net/net_tx_pkt.h
91
+ return vhost_vdpa_set_dev_vring_base(dev, ring);
140
+++ b/hw/net/net_tx_pkt.h
141
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt);
142
* reset tx packet private context (needed to be called between packets)
143
*
144
* @pkt: packet
145
+ * @dev: PCI device processing the next packet
146
*
147
*/
148
-void net_tx_pkt_reset(struct NetTxPkt *pkt);
149
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev);
150
151
/**
152
* Send packet to qemu. handles sw offloads if vhdr is not supported.
153
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
154
index XXXXXXX..XXXXXXX 100644
155
--- a/hw/net/vmxnet3.c
156
+++ b/hw/net/vmxnet3.c
157
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
158
vmxnet3_complete_packet(s, qidx, txd_idx);
159
s->tx_sop = true;
160
s->skip_current_tx_pkt = false;
161
- net_tx_pkt_reset(s->tx_pkt);
162
+ net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
163
}
164
}
165
}
92
}
166
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_deactivate_device(VMXNET3State *s)
93
167
{
94
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
168
if (s->device_active) {
169
VMW_CBPRN("Deactivating vmxnet3...");
170
- net_tx_pkt_reset(s->tx_pkt);
171
+ net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
172
net_tx_pkt_uninit(s->tx_pkt);
173
net_rx_pkt_uninit(s->rx_pkt);
174
s->device_active = false;
175
--
95
--
176
2.7.4
96
2.7.4
97
98
diff view generated by jsdifflib
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The current implementation of igb uses only part of a advanced Tx
3
Initial version of shadow virtqueue that actually forward buffers. There
4
context descriptor and first data descriptor because it misses some
4
is no iommu support at the moment, and that will be addressed in future
5
features and sniffs the trait of the packet instead of respecting the
5
patches of this series. Since all vhost-vdpa devices use forced IOMMU,
6
packet type specified in the descriptor. However, we will certainly
6
this means that SVQ is not usable at this point of the series on any
7
need the entire Tx context descriptor when we update igb to respect
7
device.
8
these ignored fields. Save the entire context descriptor and first
8
9
data descriptor except the buffer address to prepare for such a change.
9
For simplicity it only supports modern devices, that expects vring
10
10
in little endian, with split ring and no event idx or indirect
11
This also introduces the distinction of contexts with different
11
descriptors. Support for them will not be added in this series.
12
indexes, which was not present in e1000e but in igb.
12
13
13
It reuses the VirtQueue code for the device part. The driver part is
14
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
14
based on Linux's virtio_ring driver, but with stripped functionality
15
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
15
and optimizations so it's easier to review.
16
17
However, forwarding buffers have some particular pieces: One of the most
18
unexpected ones is that a guest's buffer can expand through more than
19
one descriptor in SVQ. While this is handled gracefully by qemu's
20
emulated virtio devices, it may cause unexpected SVQ queue full. This
21
patch also solves it by checking for this condition at both guest's
22
kicks and device's calls. The code may be more elegant in the future if
23
SVQ code runs in its own iocontext.
24
25
Acked-by: Michael S. Tsirkin <mst@redhat.com>
26
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
27
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
28
---
18
hw/net/igb.c | 26 +++++++++++++++++++-------
29
hw/virtio/vhost-shadow-virtqueue.c | 354 ++++++++++++++++++++++++++++++++++++-
19
hw/net/igb_core.c | 39 +++++++++++++++++++--------------------
30
hw/virtio/vhost-shadow-virtqueue.h | 26 +++
20
hw/net/igb_core.h | 8 +++-----
31
hw/virtio/vhost-vdpa.c | 159 ++++++++++++++++-
21
3 files changed, 41 insertions(+), 32 deletions(-)
32
3 files changed, 527 insertions(+), 12 deletions(-)
22
33
23
diff --git a/hw/net/igb.c b/hw/net/igb.c
34
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
24
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
25
--- a/hw/net/igb.c
36
--- a/hw/virtio/vhost-shadow-virtqueue.c
26
+++ b/hw/net/igb.c
37
+++ b/hw/virtio/vhost-shadow-virtqueue.c
27
@@ -XXX,XX +XXX,XX @@ static int igb_post_load(void *opaque, int version_id)
38
@@ -XXX,XX +XXX,XX @@
28
return igb_core_post_load(&s->core);
39
#include "qemu/error-report.h"
40
#include "qapi/error.h"
41
#include "qemu/main-loop.h"
42
+#include "qemu/log.h"
43
+#include "qemu/memalign.h"
44
#include "linux-headers/linux/vhost.h"
45
46
/**
47
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
29
}
48
}
30
49
31
-static const VMStateDescription igb_vmstate_tx = {
50
/**
32
- .name = "igb-tx",
51
- * Forward guest notifications.
33
+static const VMStateDescription igb_vmstate_tx_ctx = {
52
+ * Number of descriptors that the SVQ can make available from the guest.
34
+ .name = "igb-tx-ctx",
53
+ *
35
.version_id = 1,
54
+ * @svq: The svq
36
.minimum_version_id = 1,
55
+ */
37
.fields = (VMStateField[]) {
56
+static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
38
- VMSTATE_UINT16(vlan, struct igb_tx),
57
+{
39
- VMSTATE_UINT16(mss, struct igb_tx),
58
+ return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
40
- VMSTATE_BOOL(tse, struct igb_tx),
59
+}
41
- VMSTATE_BOOL(ixsm, struct igb_tx),
60
+
42
- VMSTATE_BOOL(txsm, struct igb_tx),
61
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
43
+ VMSTATE_UINT32(vlan_macip_lens, struct e1000_adv_tx_context_desc),
62
+ const struct iovec *iovec,
44
+ VMSTATE_UINT32(seqnum_seed, struct e1000_adv_tx_context_desc),
63
+ size_t num, bool more_descs, bool write)
45
+ VMSTATE_UINT32(type_tucmd_mlhl, struct e1000_adv_tx_context_desc),
64
+{
46
+ VMSTATE_UINT32(mss_l4len_idx, struct e1000_adv_tx_context_desc),
65
+ uint16_t i = svq->free_head, last = svq->free_head;
47
+ VMSTATE_END_OF_LIST()
66
+ unsigned n;
48
+ }
67
+ uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
49
+};
68
+ vring_desc_t *descs = svq->vring.desc;
50
+
69
+
51
+static const VMStateDescription igb_vmstate_tx = {
70
+ if (num == 0) {
52
+ .name = "igb-tx",
71
+ return;
53
+ .version_id = 2,
72
+ }
54
+ .minimum_version_id = 2,
73
+
55
+ .fields = (VMStateField[]) {
74
+ for (n = 0; n < num; n++) {
56
+ VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0, igb_vmstate_tx_ctx,
75
+ if (more_descs || (n + 1 < num)) {
57
+ struct e1000_adv_tx_context_desc),
76
+ descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
58
+ VMSTATE_UINT32(first_cmd_type_len, struct igb_tx),
77
+ } else {
59
+ VMSTATE_UINT32(first_olinfo_status, struct igb_tx),
78
+ descs[i].flags = flags;
60
VMSTATE_BOOL(first, struct igb_tx),
79
+ }
61
VMSTATE_BOOL(skip_cp, struct igb_tx),
80
+ descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base);
62
VMSTATE_END_OF_LIST()
81
+ descs[i].len = cpu_to_le32(iovec[n].iov_len);
63
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
82
+
83
+ last = i;
84
+ i = cpu_to_le16(descs[i].next);
85
+ }
86
+
87
+ svq->free_head = le16_to_cpu(descs[last].next);
88
+}
89
+
90
+static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
91
+ VirtQueueElement *elem,
92
+ unsigned *head)
93
+{
94
+ unsigned avail_idx;
95
+ vring_avail_t *avail = svq->vring.avail;
96
+
97
+ *head = svq->free_head;
98
+
99
+ /* We need some descriptors here */
100
+ if (unlikely(!elem->out_num && !elem->in_num)) {
101
+ qemu_log_mask(LOG_GUEST_ERROR,
102
+ "Guest provided element with no descriptors");
103
+ return false;
104
+ }
105
+
106
+ vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
107
+ elem->in_num > 0, false);
108
+ vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
109
+
110
+ /*
111
+ * Put the entry in the available array (but don't update avail->idx until
112
+ * they do sync).
113
+ */
114
+ avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
115
+ avail->ring[avail_idx] = cpu_to_le16(*head);
116
+ svq->shadow_avail_idx++;
117
+
118
+ /* Update the avail index after write the descriptor */
119
+ smp_wmb();
120
+ avail->idx = cpu_to_le16(svq->shadow_avail_idx);
121
+
122
+ return true;
123
+}
124
+
125
+static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
126
+{
127
+ unsigned qemu_head;
128
+ bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
129
+ if (unlikely(!ok)) {
130
+ return false;
131
+ }
132
+
133
+ svq->ring_id_maps[qemu_head] = elem;
134
+ return true;
135
+}
136
+
137
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
138
+{
139
+ /*
140
+ * We need to expose the available array entries before checking the used
141
+ * flags
142
+ */
143
+ smp_mb();
144
+ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
145
+ return;
146
+ }
147
+
148
+ event_notifier_set(&svq->hdev_kick);
149
+}
150
+
151
+/**
152
+ * Forward available buffers.
153
+ *
154
+ * @svq: Shadow VirtQueue
155
+ *
156
+ * Note that this function does not guarantee that all guest's available
157
+ * buffers are available to the device in SVQ avail ring. The guest may have
158
+ * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
159
+ * qemu vaddr.
160
+ *
161
+ * If that happens, guest's kick notifications will be disabled until the
162
+ * device uses some buffers.
163
+ */
164
+static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
165
+{
166
+ /* Clear event notifier */
167
+ event_notifier_test_and_clear(&svq->svq_kick);
168
+
169
+ /* Forward to the device as many available buffers as possible */
170
+ do {
171
+ virtio_queue_set_notification(svq->vq, false);
172
+
173
+ while (true) {
174
+ VirtQueueElement *elem;
175
+ bool ok;
176
+
177
+ if (svq->next_guest_avail_elem) {
178
+ elem = g_steal_pointer(&svq->next_guest_avail_elem);
179
+ } else {
180
+ elem = virtqueue_pop(svq->vq, sizeof(*elem));
181
+ }
182
+
183
+ if (!elem) {
184
+ break;
185
+ }
186
+
187
+ if (elem->out_num + elem->in_num >
188
+ vhost_svq_available_slots(svq)) {
189
+ /*
190
+ * This condition is possible since a contiguous buffer in GPA
191
+ * does not imply a contiguous buffer in qemu's VA
192
+ * scatter-gather segments. If that happens, the buffer exposed
193
+ * to the device needs to be a chain of descriptors at this
194
+ * moment.
195
+ *
196
+ * SVQ cannot hold more available buffers if we are here:
197
+ * queue the current guest descriptor and ignore further kicks
198
+ * until some elements are used.
199
+ */
200
+ svq->next_guest_avail_elem = elem;
201
+ return;
202
+ }
203
+
204
+ ok = vhost_svq_add(svq, elem);
205
+ if (unlikely(!ok)) {
206
+ /* VQ is broken, just return and ignore any other kicks */
207
+ return;
208
+ }
209
+ vhost_svq_kick(svq);
210
+ }
211
+
212
+ virtio_queue_set_notification(svq->vq, true);
213
+ } while (!virtio_queue_empty(svq->vq));
214
+}
215
+
216
+/**
217
+ * Handle guest's kick.
218
*
219
* @n: guest kick event notifier, the one that guest set to notify svq.
220
*/
221
-static void vhost_handle_guest_kick(EventNotifier *n)
222
+static void vhost_handle_guest_kick_notifier(EventNotifier *n)
223
{
224
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
225
svq_kick);
226
event_notifier_test_and_clear(n);
227
- event_notifier_set(&svq->hdev_kick);
228
+ vhost_handle_guest_kick(svq);
229
+}
230
+
231
+static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
232
+{
233
+ if (svq->last_used_idx != svq->shadow_used_idx) {
234
+ return true;
235
+ }
236
+
237
+ svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
238
+
239
+ return svq->last_used_idx != svq->shadow_used_idx;
240
}
241
242
/**
243
- * Forward vhost notifications
244
+ * Enable vhost device calls after disable them.
245
+ *
246
+ * @svq: The svq
247
+ *
248
+ * It returns false if there are pending used buffers from the vhost device,
249
+ * avoiding the possible races between SVQ checking for more work and enabling
250
+ * callbacks. True if SVQ used vring has no more pending buffers.
251
+ */
252
+static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
253
+{
254
+ svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
255
+ /* Make sure the flag is written before the read of used_idx */
256
+ smp_mb();
257
+ return !vhost_svq_more_used(svq);
258
+}
259
+
260
+static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
261
+{
262
+ svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
263
+}
264
+
265
+static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
266
+ uint32_t *len)
267
+{
268
+ vring_desc_t *descs = svq->vring.desc;
269
+ const vring_used_t *used = svq->vring.used;
270
+ vring_used_elem_t used_elem;
271
+ uint16_t last_used;
272
+
273
+ if (!vhost_svq_more_used(svq)) {
274
+ return NULL;
275
+ }
276
+
277
+ /* Only get used array entries after they have been exposed by dev */
278
+ smp_rmb();
279
+ last_used = svq->last_used_idx & (svq->vring.num - 1);
280
+ used_elem.id = le32_to_cpu(used->ring[last_used].id);
281
+ used_elem.len = le32_to_cpu(used->ring[last_used].len);
282
+
283
+ svq->last_used_idx++;
284
+ if (unlikely(used_elem.id >= svq->vring.num)) {
285
+ qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
286
+ svq->vdev->name, used_elem.id);
287
+ return NULL;
288
+ }
289
+
290
+ if (unlikely(!svq->ring_id_maps[used_elem.id])) {
291
+ qemu_log_mask(LOG_GUEST_ERROR,
292
+ "Device %s says index %u is used, but it was not available",
293
+ svq->vdev->name, used_elem.id);
294
+ return NULL;
295
+ }
296
+
297
+ descs[used_elem.id].next = svq->free_head;
298
+ svq->free_head = used_elem.id;
299
+
300
+ *len = used_elem.len;
301
+ return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
302
+}
303
+
304
+static void vhost_svq_flush(VhostShadowVirtqueue *svq,
305
+ bool check_for_avail_queue)
306
+{
307
+ VirtQueue *vq = svq->vq;
308
+
309
+ /* Forward as many used buffers as possible. */
310
+ do {
311
+ unsigned i = 0;
312
+
313
+ vhost_svq_disable_notification(svq);
314
+ while (true) {
315
+ uint32_t len;
316
+ g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
317
+ if (!elem) {
318
+ break;
319
+ }
320
+
321
+ if (unlikely(i >= svq->vring.num)) {
322
+ qemu_log_mask(LOG_GUEST_ERROR,
323
+ "More than %u used buffers obtained in a %u size SVQ",
324
+ i, svq->vring.num);
325
+ virtqueue_fill(vq, elem, len, i);
326
+ virtqueue_flush(vq, i);
327
+ return;
328
+ }
329
+ virtqueue_fill(vq, elem, len, i++);
330
+ }
331
+
332
+ virtqueue_flush(vq, i);
333
+ event_notifier_set(&svq->svq_call);
334
+
335
+ if (check_for_avail_queue && svq->next_guest_avail_elem) {
336
+ /*
337
+ * Avail ring was full when vhost_svq_flush was called, so it's a
338
+ * good moment to make more descriptors available if possible.
339
+ */
340
+ vhost_handle_guest_kick(svq);
341
+ }
342
+ } while (!vhost_svq_enable_notification(svq));
343
+}
344
+
345
+/**
346
+ * Forward used buffers.
347
*
348
* @n: hdev call event notifier, the one that device set to notify svq.
349
+ *
350
+ * Note that we are not making any buffers available in the loop, there is no
351
+ * way that it runs more than virtqueue size times.
352
*/
353
static void vhost_svq_handle_call(EventNotifier *n)
354
{
355
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
356
hdev_call);
357
event_notifier_test_and_clear(n);
358
- event_notifier_set(&svq->svq_call);
359
+ vhost_svq_flush(svq, true);
360
}
361
362
/**
363
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
364
if (poll_start) {
365
event_notifier_init_fd(svq_kick, svq_kick_fd);
366
event_notifier_set(svq_kick);
367
- event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
368
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
369
+ }
370
+}
371
+
372
+/**
373
+ * Start the shadow virtqueue operation.
374
+ *
375
+ * @svq: Shadow Virtqueue
376
+ * @vdev: VirtIO device
377
+ * @vq: Virtqueue to shadow
378
+ */
379
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
380
+ VirtQueue *vq)
381
+{
382
+ size_t desc_size, driver_size, device_size;
383
+
384
+ svq->next_guest_avail_elem = NULL;
385
+ svq->shadow_avail_idx = 0;
386
+ svq->shadow_used_idx = 0;
387
+ svq->last_used_idx = 0;
388
+ svq->vdev = vdev;
389
+ svq->vq = vq;
390
+
391
+ svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
392
+ driver_size = vhost_svq_driver_area_size(svq);
393
+ device_size = vhost_svq_device_area_size(svq);
394
+ svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size);
395
+ desc_size = sizeof(vring_desc_t) * svq->vring.num;
396
+ svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
397
+ memset(svq->vring.desc, 0, driver_size);
398
+ svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
399
+ memset(svq->vring.used, 0, device_size);
400
+ svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
401
+ for (unsigned i = 0; i < svq->vring.num - 1; i++) {
402
+ svq->vring.desc[i].next = cpu_to_le16(i + 1);
403
}
404
}
405
406
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
407
void vhost_svq_stop(VhostShadowVirtqueue *svq)
408
{
409
event_notifier_set_handler(&svq->svq_kick, NULL);
410
+ g_autofree VirtQueueElement *next_avail_elem = NULL;
411
+
412
+ if (!svq->vq) {
413
+ return;
414
+ }
415
+
416
+ /* Send all pending used descriptors to guest */
417
+ vhost_svq_flush(svq, false);
418
+
419
+ for (unsigned i = 0; i < svq->vring.num; ++i) {
420
+ g_autofree VirtQueueElement *elem = NULL;
421
+ elem = g_steal_pointer(&svq->ring_id_maps[i]);
422
+ if (elem) {
423
+ virtqueue_detach_element(svq->vq, elem, 0);
424
+ }
425
+ }
426
+
427
+ next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
428
+ if (next_avail_elem) {
429
+ virtqueue_detach_element(svq->vq, next_avail_elem, 0);
430
+ }
431
+ svq->vq = NULL;
432
+ g_free(svq->ring_id_maps);
433
+ qemu_vfree(svq->vring.desc);
434
+ qemu_vfree(svq->vring.used);
435
}
436
437
/**
438
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
64
index XXXXXXX..XXXXXXX 100644
439
index XXXXXXX..XXXXXXX 100644
65
--- a/hw/net/igb_core.c
440
--- a/hw/virtio/vhost-shadow-virtqueue.h
66
+++ b/hw/net/igb_core.c
441
+++ b/hw/virtio/vhost-shadow-virtqueue.h
67
@@ -XXX,XX +XXX,XX @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx,
442
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
68
static bool
443
69
igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
444
/* Guest's call notifier, where the SVQ calls guest. */
445
EventNotifier svq_call;
446
+
447
+ /* Virtio queue shadowing */
448
+ VirtQueue *vq;
449
+
450
+ /* Virtio device */
451
+ VirtIODevice *vdev;
452
+
453
+ /* Map for use the guest's descriptors */
454
+ VirtQueueElement **ring_id_maps;
455
+
456
+ /* Next VirtQueue element that guest made available */
457
+ VirtQueueElement *next_guest_avail_elem;
458
+
459
+ /* Next head to expose to the device */
460
+ uint16_t shadow_avail_idx;
461
+
462
+ /* Next free descriptor */
463
+ uint16_t free_head;
464
+
465
+ /* Last seen used idx */
466
+ uint16_t shadow_used_idx;
467
+
468
+ /* Next head to consume from the device */
469
+ uint16_t last_used_idx;
470
} VhostShadowVirtqueue;
471
472
bool vhost_svq_valid_features(uint64_t features, Error **errp);
473
@@ -XXX,XX +XXX,XX @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
474
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
475
size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
476
477
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
478
+ VirtQueue *vq);
479
void vhost_svq_stop(VhostShadowVirtqueue *svq);
480
481
VhostShadowVirtqueue *vhost_svq_new(void);
482
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
483
index XXXXXXX..XXXXXXX 100644
484
--- a/hw/virtio/vhost-vdpa.c
485
+++ b/hw/virtio/vhost-vdpa.c
486
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
487
* Note that this function does not rewind kick file descriptor if cannot set
488
* call one.
489
*/
490
-static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
491
- VhostShadowVirtqueue *svq,
492
- unsigned idx,
493
- Error **errp)
494
+static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
495
+ VhostShadowVirtqueue *svq,
496
+ unsigned idx,
497
+ Error **errp)
70
{
498
{
71
- if (tx->tse) {
499
struct vhost_vring_file file = {
72
- if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) {
500
.index = dev->vq_index + idx,
73
+ if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
501
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
74
+ uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
502
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
75
+ uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16;
503
if (unlikely(r != 0)) {
76
+ if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) {
504
error_setg_errno(errp, -r, "Can't set device kick fd");
77
return false;
505
- return false;
78
}
506
+ return r;
79
80
@@ -XXX,XX +XXX,XX @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
81
return true;
82
}
507
}
83
508
84
- if (tx->txsm) {
509
event_notifier = &svq->hdev_call;
85
+ if (tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) {
510
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
86
if (!net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0)) {
511
error_setg_errno(errp, -r, "Can't set device call fd");
512
}
513
514
+ return r;
515
+}
516
+
517
+/**
518
+ * Unmap a SVQ area in the device
519
+ */
520
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
521
+ hwaddr size)
522
+{
523
+ int r;
524
+
525
+ size = ROUND_UP(size, qemu_real_host_page_size);
526
+ r = vhost_vdpa_dma_unmap(v, iova, size);
527
+ return r == 0;
528
+}
529
+
530
+static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
531
+ const VhostShadowVirtqueue *svq)
532
+{
533
+ struct vhost_vdpa *v = dev->opaque;
534
+ struct vhost_vring_addr svq_addr;
535
+ size_t device_size = vhost_svq_device_area_size(svq);
536
+ size_t driver_size = vhost_svq_driver_area_size(svq);
537
+ bool ok;
538
+
539
+ vhost_svq_get_vring_addr(svq, &svq_addr);
540
+
541
+ ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
542
+ if (unlikely(!ok)) {
543
+ return false;
544
+ }
545
+
546
+ return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
547
+}
548
+
549
+/**
550
+ * Map the shadow virtqueue rings in the device
551
+ *
552
+ * @dev: The vhost device
553
+ * @svq: The shadow virtqueue
554
+ * @addr: Assigned IOVA addresses
555
+ * @errp: Error pointer
556
+ */
557
+static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
558
+ const VhostShadowVirtqueue *svq,
559
+ struct vhost_vring_addr *addr,
560
+ Error **errp)
561
+{
562
+ struct vhost_vdpa *v = dev->opaque;
563
+ size_t device_size = vhost_svq_device_area_size(svq);
564
+ size_t driver_size = vhost_svq_driver_area_size(svq);
565
+ int r;
566
+
567
+ ERRP_GUARD();
568
+ vhost_svq_get_vring_addr(svq, addr);
569
+
570
+ r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
571
+ (void *)addr->desc_user_addr, true);
572
+ if (unlikely(r != 0)) {
573
+ error_setg_errno(errp, -r, "Cannot create vq driver region: ");
574
+ return false;
575
+ }
576
+
577
+ r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
578
+ (void *)addr->used_user_addr, false);
579
+ if (unlikely(r != 0)) {
580
+ error_setg_errno(errp, -r, "Cannot create vq device region: ");
581
+ }
582
+
583
+ return r == 0;
584
+}
585
+
586
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
587
+ VhostShadowVirtqueue *svq,
588
+ unsigned idx,
589
+ Error **errp)
590
+{
591
+ uint16_t vq_index = dev->vq_index + idx;
592
+ struct vhost_vring_state s = {
593
+ .index = vq_index,
594
+ };
595
+ int r;
596
+
597
+ r = vhost_vdpa_set_dev_vring_base(dev, &s);
598
+ if (unlikely(r)) {
599
+ error_setg_errno(errp, -r, "Cannot set vring base");
600
+ return false;
601
+ }
602
+
603
+ r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp);
604
return r == 0;
605
}
606
607
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
608
}
609
610
for (i = 0; i < v->shadow_vqs->len; ++i) {
611
+ VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
612
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
613
+ struct vhost_vring_addr addr = {
614
+ .index = i,
615
+ };
616
+ int r;
617
bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
618
if (unlikely(!ok)) {
619
- error_reportf_err(err, "Cannot setup SVQ %u: ", i);
620
+ goto err;
621
+ }
622
+
623
+ vhost_svq_start(svq, dev->vdev, vq);
624
+ ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err);
625
+ if (unlikely(!ok)) {
626
+ goto err_map;
627
+ }
628
+
629
+ /* Override vring GPA set by vhost subsystem */
630
+ r = vhost_vdpa_set_vring_dev_addr(dev, &addr);
631
+ if (unlikely(r != 0)) {
632
+ error_setg_errno(&err, -r, "Cannot set device address");
633
+ goto err_set_addr;
634
+ }
635
+ }
636
+
637
+ return true;
638
+
639
+err_set_addr:
640
+ vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i));
641
+
642
+err_map:
643
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
644
+
645
+err:
646
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
647
+ for (unsigned j = 0; j < i; ++j) {
648
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
649
+ vhost_vdpa_svq_unmap_rings(dev, svq);
650
+ vhost_svq_stop(svq);
651
+ }
652
+
653
+ return false;
654
+}
655
+
656
+static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
657
+{
658
+ struct vhost_vdpa *v = dev->opaque;
659
+
660
+ if (!v->shadow_vqs) {
661
+ return true;
662
+ }
663
+
664
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
665
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
666
+ i);
667
+ bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
668
+ if (unlikely(!ok)) {
87
return false;
669
return false;
88
}
670
}
89
}
671
}
90
672
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
91
- if (tx->ixsm) {
673
}
92
+ if (tx->first_olinfo_status & E1000_ADVTXD_POTS_IXSM) {
674
vhost_vdpa_set_vring_ready(dev);
93
net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt);
675
} else {
676
+ ok = vhost_vdpa_svqs_stop(dev);
677
+ if (unlikely(!ok)) {
678
+ return -1;
679
+ }
680
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
94
}
681
}
95
682
96
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
97
{
98
struct e1000_adv_tx_context_desc *tx_ctx_desc;
99
uint32_t cmd_type_len;
100
- uint32_t olinfo_status;
101
+ uint32_t idx;
102
uint64_t buffer_addr;
103
uint16_t length;
104
105
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
106
E1000_ADVTXD_DTYP_DATA) {
107
/* advanced transmit data descriptor */
108
if (tx->first) {
109
- olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
110
-
111
- tx->tse = !!(cmd_type_len & E1000_ADVTXD_DCMD_TSE);
112
- tx->ixsm = !!(olinfo_status & E1000_ADVTXD_POTS_IXSM);
113
- tx->txsm = !!(olinfo_status & E1000_ADVTXD_POTS_TXSM);
114
-
115
+ tx->first_cmd_type_len = cmd_type_len;
116
+ tx->first_olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
117
tx->first = false;
118
}
119
} else if ((cmd_type_len & E1000_ADVTXD_DTYP_CTXT) ==
120
E1000_ADVTXD_DTYP_CTXT) {
121
/* advanced transmit context descriptor */
122
tx_ctx_desc = (struct e1000_adv_tx_context_desc *)tx_desc;
123
- tx->vlan = le32_to_cpu(tx_ctx_desc->vlan_macip_lens) >> 16;
124
- tx->mss = le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 16;
125
+ idx = (le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 4) & 1;
126
+ tx->ctx[idx].vlan_macip_lens = le32_to_cpu(tx_ctx_desc->vlan_macip_lens);
127
+ tx->ctx[idx].seqnum_seed = le32_to_cpu(tx_ctx_desc->seqnum_seed);
128
+ tx->ctx[idx].type_tucmd_mlhl = le32_to_cpu(tx_ctx_desc->type_tucmd_mlhl);
129
+ tx->ctx[idx].mss_l4len_idx = le32_to_cpu(tx_ctx_desc->mss_l4len_idx);
130
return;
131
} else {
132
/* unknown descriptor type */
133
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
134
if (cmd_type_len & E1000_TXD_CMD_EOP) {
135
if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
136
if (cmd_type_len & E1000_TXD_CMD_VLE) {
137
- net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan,
138
- core->mac[VET] & 0xffff);
139
+ idx = (tx->first_olinfo_status >> 4) & 1;
140
+ uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
141
+ uint16_t vet = core->mac[VET] & 0xffff;
142
+ net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
143
}
144
if (igb_tx_pkt_send(core, tx, queue_index)) {
145
igb_on_tx_done_update_stats(core, tx->tx_pkt);
146
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
147
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
148
tx = &core->tx[i];
149
net_tx_pkt_reset(tx->tx_pkt);
150
- tx->vlan = 0;
151
- tx->mss = 0;
152
- tx->tse = false;
153
- tx->ixsm = false;
154
- tx->txsm = false;
155
+ memset(tx->ctx, 0, sizeof(tx->ctx));
156
tx->first = true;
157
tx->skip_cp = false;
158
}
159
diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h
160
index XXXXXXX..XXXXXXX 100644
161
--- a/hw/net/igb_core.h
162
+++ b/hw/net/igb_core.h
163
@@ -XXX,XX +XXX,XX @@ struct IGBCore {
164
QEMUTimer *autoneg_timer;
165
166
struct igb_tx {
167
- uint16_t vlan; /* VLAN Tag */
168
- uint16_t mss; /* Maximum Segment Size */
169
- bool tse; /* TCP/UDP Segmentation Enable */
170
- bool ixsm; /* Insert IP Checksum */
171
- bool txsm; /* Insert TCP/UDP Checksum */
172
+ struct e1000_adv_tx_context_desc ctx[2];
173
+ uint32_t first_cmd_type_len;
174
+ uint32_t first_olinfo_status;
175
176
bool first;
177
bool skip_cp;
178
--
683
--
179
2.7.4
684
2.7.4
685
686
diff view generated by jsdifflib
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Align the l3_hdr member of NetTxPkt by defining it as a union of
3
This iova tree function allows it to look for a hole in allocated
4
ip_header, ip6_header, and an array of octets.
4
regions and return a totally new translation for a given translated
5
5
address.
6
Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices")
6
7
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1544
7
It's usage is mainly to allow devices to access qemu address space,
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
8
remapping guest's one into a new iova space where qemu can add chunks of
9
addresses.
10
11
Acked-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
Reviewed-by: Peter Xu <peterx@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
15
---
11
hw/net/net_tx_pkt.c | 21 +++++++++++----------
16
include/qemu/iova-tree.h | 18 +++++++
12
1 file changed, 11 insertions(+), 10 deletions(-)
17
util/iova-tree.c | 135 +++++++++++++++++++++++++++++++++++++++++++++++
13
18
2 files changed, 153 insertions(+)
14
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
19
20
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
15
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/net_tx_pkt.c
22
--- a/include/qemu/iova-tree.h
17
+++ b/hw/net/net_tx_pkt.c
23
+++ b/include/qemu/iova-tree.h
18
@@ -XXX,XX +XXX,XX @@ struct NetTxPkt {
24
@@ -XXX,XX +XXX,XX @@
19
struct iovec *vec;
25
#define IOVA_OK (0)
20
26
#define IOVA_ERR_INVALID (-1) /* Invalid parameters */
21
uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN];
27
#define IOVA_ERR_OVERLAP (-2) /* IOVA range overlapped */
22
- uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN];
28
+#define IOVA_ERR_NOMEM (-3) /* Cannot allocate */
23
+ union {
29
24
+ struct ip_header ip;
30
typedef struct IOVATree IOVATree;
25
+ struct ip6_header ip6;
31
typedef struct DMAMap {
26
+ uint8_t octets[ETH_MAX_IP_DGRAM_LEN];
32
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova);
27
+ } l3_hdr;
33
void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator);
28
34
29
uint32_t payload_len;
35
/**
30
36
+ * iova_tree_alloc_map:
31
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt)
37
+ *
38
+ * @tree: the iova tree to allocate from
39
+ * @map: the new map (as translated addr & size) to allocate in the iova region
40
+ * @iova_begin: the minimum address of the allocation
41
+ * @iova_end: the maximum addressable direction of the allocation
42
+ *
43
+ * Allocates a new region of a given size, between iova_min and iova_max.
44
+ *
45
+ * Return: Same as iova_tree_insert, but cannot overlap and can return error if
46
+ * iova tree is out of free contiguous range. The caller gets the assigned iova
47
+ * in map->iova.
48
+ */
49
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
50
+ hwaddr iova_end);
51
+
52
+/**
53
* iova_tree_destroy:
54
*
55
* @tree: the iova tree to destroy
56
diff --git a/util/iova-tree.c b/util/iova-tree.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/util/iova-tree.c
59
+++ b/util/iova-tree.c
60
@@ -XXX,XX +XXX,XX @@ struct IOVATree {
61
GTree *tree;
62
};
63
64
+/* Args to pass to iova_tree_alloc foreach function. */
65
+struct IOVATreeAllocArgs {
66
+ /* Size of the desired allocation */
67
+ size_t new_size;
68
+
69
+ /* The minimum address allowed in the allocation */
70
+ hwaddr iova_begin;
71
+
72
+ /* Map at the left of the hole, can be NULL if "this" is first one */
73
+ const DMAMap *prev;
74
+
75
+ /* Map at the right of the hole, can be NULL if "prev" is the last one */
76
+ const DMAMap *this;
77
+
78
+ /* If found, we fill in the IOVA here */
79
+ hwaddr iova_result;
80
+
81
+ /* Whether have we found a valid IOVA */
82
+ bool iova_found;
83
+};
84
+
85
+/**
86
+ * Iterate args to the next hole
87
+ *
88
+ * @args: The alloc arguments
89
+ * @next: The next mapping in the tree. Can be NULL to signal the last one
90
+ */
91
+static void iova_tree_alloc_args_iterate(struct IOVATreeAllocArgs *args,
92
+ const DMAMap *next) {
93
+ args->prev = args->this;
94
+ args->this = next;
95
+}
96
+
97
static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
32
{
98
{
33
uint16_t csum;
99
const DMAMap *m1 = a, *m2 = b;
34
assert(pkt);
100
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map)
35
- struct ip_header *ip_hdr;
101
return IOVA_OK;
36
- ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
37
38
- ip_hdr->ip_len = cpu_to_be16(pkt->payload_len +
39
+ pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len +
40
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
41
42
- ip_hdr->ip_sum = 0;
43
- csum = net_raw_checksum((uint8_t *)ip_hdr,
44
+ pkt->l3_hdr.ip.ip_sum = 0;
45
+ csum = net_raw_checksum(pkt->l3_hdr.octets,
46
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
47
- ip_hdr->ip_sum = cpu_to_be16(csum);
48
+ pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum);
49
}
102
}
50
103
51
void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
104
+/**
52
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt)
105
+ * Try to find an unallocated IOVA range between prev and this elements.
106
+ *
107
+ * @args: Arguments to allocation
108
+ *
109
+ * Cases:
110
+ *
111
+ * (1) !prev, !this: No entries allocated, always succeed
112
+ *
113
+ * (2) !prev, this: We're iterating at the 1st element.
114
+ *
115
+ * (3) prev, !this: We're iterating at the last element.
116
+ *
117
+ * (4) prev, this: this is the most common case, we'll try to find a hole
118
+ * between "prev" and "this" mapping.
119
+ *
120
+ * Note that this function assumes the last valid iova is HWADDR_MAX, but it
121
+ * searches linearly so it's easy to discard the result if it's not the case.
122
+ */
123
+static void iova_tree_alloc_map_in_hole(struct IOVATreeAllocArgs *args)
124
+{
125
+ const DMAMap *prev = args->prev, *this = args->this;
126
+ uint64_t hole_start, hole_last;
127
+
128
+ if (this && this->iova + this->size < args->iova_begin) {
129
+ return;
130
+ }
131
+
132
+ hole_start = MAX(prev ? prev->iova + prev->size + 1 : 0, args->iova_begin);
133
+ hole_last = this ? this->iova : HWADDR_MAX;
134
+
135
+ if (hole_last - hole_start > args->new_size) {
136
+ args->iova_result = hole_start;
137
+ args->iova_found = true;
138
+ }
139
+}
140
+
141
+/**
142
+ * Foreach dma node in the tree, compare if there is a hole with its previous
143
+ * node (or minimum iova address allowed) and the node.
144
+ *
145
+ * @key: Node iterating
146
+ * @value: Node iterating
147
+ * @pargs: Struct to communicate with the outside world
148
+ *
149
+ * Return: false to keep iterating, true if needs break.
150
+ */
151
+static gboolean iova_tree_alloc_traverse(gpointer key, gpointer value,
152
+ gpointer pargs)
153
+{
154
+ struct IOVATreeAllocArgs *args = pargs;
155
+ DMAMap *node = value;
156
+
157
+ assert(key == value);
158
+
159
+ iova_tree_alloc_args_iterate(args, node);
160
+ iova_tree_alloc_map_in_hole(args);
161
+ return args->iova_found;
162
+}
163
+
164
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
165
+ hwaddr iova_last)
166
+{
167
+ struct IOVATreeAllocArgs args = {
168
+ .new_size = map->size,
169
+ .iova_begin = iova_begin,
170
+ };
171
+
172
+ if (unlikely(iova_last < iova_begin)) {
173
+ return IOVA_ERR_INVALID;
174
+ }
175
+
176
+ /*
177
+ * Find a valid hole for the mapping
178
+ *
179
+ * Assuming low iova_begin, so no need to do a binary search to
180
+ * locate the first node.
181
+ *
182
+ * TODO: Replace all this with g_tree_node_first/next/last when available
183
+ * (from glib since 2.68). To do it with g_tree_foreach complicates the
184
+ * code a lot.
185
+ *
186
+ */
187
+ g_tree_foreach(tree->tree, iova_tree_alloc_traverse, &args);
188
+ if (!args.iova_found) {
189
+ /*
190
+ * Either tree is empty or the last hole is still not checked.
191
+ * g_tree_foreach does not compare (last, iova_last] range, so we check
192
+ * it here.
193
+ */
194
+ iova_tree_alloc_args_iterate(&args, NULL);
195
+ iova_tree_alloc_map_in_hole(&args);
196
+ }
197
+
198
+ if (!args.iova_found || args.iova_result + map->size > iova_last) {
199
+ return IOVA_ERR_NOMEM;
200
+ }
201
+
202
+ map->iova = args.iova_result;
203
+ return iova_tree_insert(tree, map);
204
+}
205
+
206
void iova_tree_destroy(IOVATree *tree)
53
{
207
{
54
struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
208
g_tree_destroy(tree->tree);
55
if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) {
56
- struct ip6_header *ip6 = (struct ip6_header *) pkt->l3_hdr;
57
/*
58
* TODO: if qemu would support >64K packets - add jumbo option check
59
* something like that:
60
* 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {'
61
*/
62
- if (ip6->ip6_plen == 0) {
63
+ if (pkt->l3_hdr.ip6.ip6_plen == 0) {
64
if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) {
65
- ip6->ip6_plen = htons(pkt->payload_len);
66
+ pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len);
67
}
68
/*
69
* TODO: if qemu would support >64K packets
70
--
209
--
71
2.7.4
210
2.7.4
211
212
diff view generated by jsdifflib
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Add support for stripping/inserting VLAN for VFs.
3
This function does the reverse operation of iova_tree_find: To look for
4
a mapping that match a translated address so we can do the reverse.
4
5
5
Had to move CSUM calculation back into the for loop, since packet data
6
This have linear complexity instead of logarithmic, but it supports
6
is pulled inside the loop based on strip VLAN decision for every VF.
7
overlapping HVA. Future developments could reduce it.
7
8
8
net_rx_pkt_fix_l4_csum should be extended to accept a buffer instead for
9
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
igb. Work for a future patch.
10
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
11
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
12
---
14
hw/net/igb_core.c | 62 +++++++++++++++++++++++++++++++++++++++++++------------
13
include/qemu/iova-tree.h | 20 +++++++++++++++++++-
15
1 file changed, 49 insertions(+), 13 deletions(-)
14
util/iova-tree.c | 34 ++++++++++++++++++++++++++++++++++
15
2 files changed, 53 insertions(+), 1 deletion(-)
16
16
17
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
17
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
18
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/igb_core.c
19
--- a/include/qemu/iova-tree.h
20
+++ b/hw/net/igb_core.c
20
+++ b/include/qemu/iova-tree.h
21
@@ -XXX,XX +XXX,XX @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx,
21
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
22
info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash);
22
* @tree: the iova tree to search from
23
* @map: the mapping to search
24
*
25
- * Search for a mapping in the iova tree that overlaps with the
26
+ * Search for a mapping in the iova tree that iova overlaps with the
27
* mapping range specified. Only the first found mapping will be
28
* returned.
29
*
30
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
31
const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map);
32
33
/**
34
+ * iova_tree_find_iova:
35
+ *
36
+ * @tree: the iova tree to search from
37
+ * @map: the mapping to search
38
+ *
39
+ * Search for a mapping in the iova tree that translated_addr overlaps with the
40
+ * mapping range specified. Only the first found mapping will be
41
+ * returned.
42
+ *
43
+ * Return: DMAMap pointer if found, or NULL if not found. Note that
44
+ * the returned DMAMap pointer is maintained internally. User should
45
+ * only read the content but never modify or free the content. Also,
46
+ * user is responsible to make sure the pointer is valid (say, no
47
+ * concurrent deletion in progress).
48
+ */
49
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map);
50
+
51
+/**
52
* iova_tree_find_address:
53
*
54
* @tree: the iova tree to search from
55
diff --git a/util/iova-tree.c b/util/iova-tree.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/util/iova-tree.c
58
+++ b/util/iova-tree.c
59
@@ -XXX,XX +XXX,XX @@ struct IOVATreeAllocArgs {
60
bool iova_found;
61
};
62
63
+typedef struct IOVATreeFindIOVAArgs {
64
+ const DMAMap *needle;
65
+ const DMAMap *result;
66
+} IOVATreeFindIOVAArgs;
67
+
68
/**
69
* Iterate args to the next hole
70
*
71
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map)
72
return g_tree_lookup(tree->tree, map);
23
}
73
}
24
74
25
+static void
75
+static gboolean iova_tree_find_address_iterator(gpointer key, gpointer value,
26
+igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx,
76
+ gpointer data)
27
+ uint16_t vlan, bool insert_vlan)
28
+{
77
+{
29
+ if (core->mac[MRQC] & 1) {
78
+ const DMAMap *map = key;
30
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
79
+ IOVATreeFindIOVAArgs *args = data;
80
+ const DMAMap *needle;
31
+
81
+
32
+ if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) {
82
+ g_assert(key == value);
33
+ /* always insert default VLAN */
83
+
34
+ insert_vlan = true;
84
+ needle = args->needle;
35
+ vlan = core->mac[VMVIR0 + pool] & 0xffff;
85
+ if (map->translated_addr + map->size < needle->translated_addr ||
36
+ } else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) {
86
+ needle->translated_addr + needle->size < map->translated_addr) {
37
+ insert_vlan = false;
87
+ return false;
38
+ }
39
+ }
88
+ }
40
+
89
+
41
+ if (insert_vlan && e1000x_vlan_enabled(core->mac)) {
90
+ args->result = map;
42
+ net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan,
91
+ return true;
43
+ core->mac[VET] & 0xffff);
44
+ }
45
+}
92
+}
46
+
93
+
47
static bool
94
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map)
48
igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
95
+{
49
{
96
+ IOVATreeFindIOVAArgs args = {
50
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
97
+ .needle = map,
51
98
+ };
52
if (cmd_type_len & E1000_TXD_CMD_EOP) {
53
if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
54
- if (cmd_type_len & E1000_TXD_CMD_VLE) {
55
- idx = (tx->first_olinfo_status >> 4) & 1;
56
- uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
57
- uint16_t vet = core->mac[VET] & 0xffff;
58
- net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
59
- }
60
+ idx = (tx->first_olinfo_status >> 4) & 1;
61
+ igb_tx_insert_vlan(core, queue_index, tx,
62
+ tx->ctx[idx].vlan_macip_lens >> 16,
63
+ !!(cmd_type_len & E1000_TXD_CMD_VLE));
64
+
99
+
65
if (igb_tx_pkt_send(core, tx, queue_index)) {
100
+ g_tree_foreach(tree->tree, iova_tree_find_address_iterator, &args);
66
igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
101
+ return args.result;
67
}
68
@@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
69
igb_update_rx_stats(core, rxi, size, total_size);
70
}
71
72
+static bool
73
+igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
74
+{
75
+ if (core->mac[MRQC] & 1) {
76
+ uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
77
+ /* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */
78
+ return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ?
79
+ core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN :
80
+ core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN;
81
+ }
82
+
83
+ return e1000x_vlan_enabled(core->mac);
84
+}
102
+}
85
+
103
+
86
static inline void
104
const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova)
87
igb_rx_fix_l4_csum(IGBCore *core, struct NetRxPkt *pkt)
88
{
105
{
89
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
106
const DMAMap map = { .iova = iova, .size = 0 };
90
91
ehdr = PKT_GET_ETH_HDR(filter_buf);
92
net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
93
-
94
- net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
95
- e1000x_vlan_enabled(core->mac),
96
- core->mac[VET] & 0xffff);
97
+ net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
98
99
queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
100
if (!queues) {
101
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
102
return orig_size;
103
}
104
105
- total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
106
- e1000x_fcs_len(core->mac);
107
-
108
for (i = 0; i < IGB_NUM_QUEUES; i++) {
109
if (!(queues & BIT(i)) ||
110
!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
111
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
112
113
igb_rx_ring_init(core, &rxr, i);
114
115
+ net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
116
+ igb_rx_strip_vlan(core, rxr.i),
117
+ core->mac[VET] & 0xffff);
118
+
119
+ total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
120
+ e1000x_fcs_len(core->mac);
121
+
122
if (!igb_has_rxbufs(core, rxr.i, total_size)) {
123
n |= E1000_ICS_RXO;
124
trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
125
--
107
--
126
2.7.4
108
2.7.4
109
110
diff view generated by jsdifflib
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
RSS for VFs is only enabled if VMOLR[n].RSSE is set.
3
This tree is able to look for a translated address from an IOVA address.
4
4
5
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
5
At first glance it is similar to util/iova-tree. However, SVQ working on
6
devices with limited IOVA space need more capabilities, like allocating
7
IOVA chunks or performing reverse translations (qemu addresses to iova).
8
9
The allocation capability, as "assign a free IOVA address to this chunk
10
of memory in qemu's address space" allows shadow virtqueue to create a
11
new address space that is not restricted by guest's addressable one, so
12
we can allocate shadow vqs vrings outside of it.
13
14
It duplicates the tree so it can search efficiently in both directions,
15
and it will signal overlap if iova or the translated address is present
16
in any tree.
17
18
Acked-by: Michael S. Tsirkin <mst@redhat.com>
19
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
20
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
21
---
8
hw/net/igb_core.c | 9 ++++++++-
22
hw/virtio/meson.build | 2 +-
9
1 file changed, 8 insertions(+), 1 deletion(-)
23
hw/virtio/vhost-iova-tree.c | 110 ++++++++++++++++++++++++++++++++++++++++++++
24
hw/virtio/vhost-iova-tree.h | 27 +++++++++++
25
3 files changed, 138 insertions(+), 1 deletion(-)
26
create mode 100644 hw/virtio/vhost-iova-tree.c
27
create mode 100644 hw/virtio/vhost-iova-tree.h
10
28
11
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
29
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
12
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
13
--- a/hw/net/igb_core.c
31
--- a/hw/virtio/meson.build
14
+++ b/hw/net/igb_core.c
32
+++ b/hw/virtio/meson.build
15
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
33
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
16
if (queues) {
34
17
igb_rss_parse_packet(core, core->rx_pkt,
35
virtio_ss = ss.source_set()
18
external_tx != NULL, rss_info);
36
virtio_ss.add(files('virtio.c'))
19
+ /* Sec 8.26.1: PQn = VFn + VQn*8 */
37
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
20
if (rss_info->queue & 1) {
38
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c'))
21
- queues <<= 8;
39
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
22
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
40
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
23
+ if ((queues & BIT(i)) &&
41
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
24
+ (core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) {
42
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
25
+ queues |= BIT(i + IGB_NUM_VM_POOLS);
43
new file mode 100644
26
+ queues &= ~BIT(i);
44
index XXXXXXX..XXXXXXX
27
+ }
45
--- /dev/null
28
+ }
46
+++ b/hw/virtio/vhost-iova-tree.c
29
}
47
@@ -XXX,XX +XXX,XX @@
30
}
48
+/*
31
} else {
49
+ * vhost software live migration iova tree
50
+ *
51
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
52
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
53
+ *
54
+ * SPDX-License-Identifier: GPL-2.0-or-later
55
+ */
56
+
57
+#include "qemu/osdep.h"
58
+#include "qemu/iova-tree.h"
59
+#include "vhost-iova-tree.h"
60
+
61
+#define iova_min_addr qemu_real_host_page_size
62
+
63
+/**
64
+ * VhostIOVATree, able to:
65
+ * - Translate iova address
66
+ * - Reverse translate iova address (from translated to iova)
67
+ * - Allocate IOVA regions for translated range (linear operation)
68
+ */
69
+struct VhostIOVATree {
70
+ /* First addressable iova address in the device */
71
+ uint64_t iova_first;
72
+
73
+ /* Last addressable iova address in the device */
74
+ uint64_t iova_last;
75
+
76
+ /* IOVA address to qemu memory maps. */
77
+ IOVATree *iova_taddr_map;
78
+};
79
+
80
+/**
81
+ * Create a new IOVA tree
82
+ *
83
+ * Returns the new IOVA tree
84
+ */
85
+VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
86
+{
87
+ VhostIOVATree *tree = g_new(VhostIOVATree, 1);
88
+
89
+ /* Some devices do not like 0 addresses */
90
+ tree->iova_first = MAX(iova_first, iova_min_addr);
91
+ tree->iova_last = iova_last;
92
+
93
+ tree->iova_taddr_map = iova_tree_new();
94
+ return tree;
95
+}
96
+
97
+/**
98
+ * Delete an iova tree
99
+ */
100
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
101
+{
102
+ iova_tree_destroy(iova_tree->iova_taddr_map);
103
+ g_free(iova_tree);
104
+}
105
+
106
+/**
107
+ * Find the IOVA address stored from a memory address
108
+ *
109
+ * @tree: The iova tree
110
+ * @map: The map with the memory address
111
+ *
112
+ * Return the stored mapping, or NULL if not found.
113
+ */
114
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
115
+ const DMAMap *map)
116
+{
117
+ return iova_tree_find_iova(tree->iova_taddr_map, map);
118
+}
119
+
120
+/**
121
+ * Allocate a new mapping
122
+ *
123
+ * @tree: The iova tree
124
+ * @map: The iova map
125
+ *
126
+ * Returns:
127
+ * - IOVA_OK if the map fits in the container
128
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
129
+ * - IOVA_ERR_NOMEM if tree cannot allocate more space.
130
+ *
131
+ * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
132
+ */
133
+int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
134
+{
135
+ /* Some vhost devices do not like addr 0. Skip first page */
136
+ hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size;
137
+
138
+ if (map->translated_addr + map->size < map->translated_addr ||
139
+ map->perm == IOMMU_NONE) {
140
+ return IOVA_ERR_INVALID;
141
+ }
142
+
143
+ /* Allocate a node in IOVA address */
144
+ return iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
145
+ tree->iova_last);
146
+}
147
+
148
+/**
149
+ * Remove existing mappings from iova tree
150
+ *
151
+ * @iova_tree: The vhost iova tree
152
+ * @map: The map to remove
153
+ */
154
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map)
155
+{
156
+ iova_tree_remove(iova_tree->iova_taddr_map, map);
157
+}
158
diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
159
new file mode 100644
160
index XXXXXXX..XXXXXXX
161
--- /dev/null
162
+++ b/hw/virtio/vhost-iova-tree.h
163
@@ -XXX,XX +XXX,XX @@
164
+/*
165
+ * vhost software live migration iova tree
166
+ *
167
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
168
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
169
+ *
170
+ * SPDX-License-Identifier: GPL-2.0-or-later
171
+ */
172
+
173
+#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
174
+#define HW_VIRTIO_VHOST_IOVA_TREE_H
175
+
176
+#include "qemu/iova-tree.h"
177
+#include "exec/memory.h"
178
+
179
+typedef struct VhostIOVATree VhostIOVATree;
180
+
181
+VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
182
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
183
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
184
+
185
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
186
+ const DMAMap *map);
187
+int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
188
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
189
+
190
+#endif
32
--
191
--
33
2.7.4
192
2.7.4
193
194
diff view generated by jsdifflib
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Please note that loopback counters for VM to VM traffic is not
3
Use translations added in VhostIOVATree in SVQ.
4
implemented yet: VFGOTLBC, VFGPTLBC, VFGORLBC and VFGPRLBC.
4
5
5
Only introduce usage here, not allocation and deallocation. As with
6
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
6
previous patches, we use the dead code paths of shadow_vqs_enabled to
7
avoid commiting too many changes at once. These are impossible to take
8
at the moment.
9
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
13
---
9
hw/net/igb_core.c | 26 ++++++++++++++++++++++----
14
hw/virtio/vhost-shadow-virtqueue.c | 75 +++++++++++++++++++++--
10
1 file changed, 22 insertions(+), 4 deletions(-)
15
hw/virtio/vhost-shadow-virtqueue.h | 6 +-
11
16
hw/virtio/vhost-vdpa.c | 122 +++++++++++++++++++++++++++++++------
12
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
17
include/hw/virtio/vhost-vdpa.h | 3 +
18
4 files changed, 181 insertions(+), 25 deletions(-)
19
20
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
13
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/igb_core.c
22
--- a/hw/virtio/vhost-shadow-virtqueue.c
15
+++ b/hw/net/igb_core.c
23
+++ b/hw/virtio/vhost-shadow-virtqueue.c
16
@@ -XXX,XX +XXX,XX @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int queue_index)
24
@@ -XXX,XX +XXX,XX @@ static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
25
return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
17
}
26
}
18
27
19
static void
28
+/**
20
-igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
29
+ * Translate addresses between the qemu's virtual address and the SVQ IOVA
21
+igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
30
+ *
22
{
31
+ * @svq: Shadow VirtQueue
23
static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
32
+ * @vaddr: Translated IOVA addresses
24
PTC1023, PTC1522 };
33
+ * @iovec: Source qemu's VA addresses
25
@@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
34
+ * @num: Length of iovec and minimum length of vaddr
26
core->mac[GPTC] = core->mac[TPT];
35
+ */
27
core->mac[GOTCL] = core->mac[TOTL];
36
+static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
28
core->mac[GOTCH] = core->mac[TOTH];
37
+ void **addrs, const struct iovec *iovec,
29
+
38
+ size_t num)
30
+ if (core->mac[MRQC] & 1) {
39
+{
31
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
40
+ if (num == 0) {
32
+
41
+ return true;
33
+ core->mac[PVFGOTC0 + (pool * 64)] += tot_len;
42
+ }
34
+ core->mac[PVFGPTC0 + (pool * 64)]++;
43
+
35
+ }
44
+ for (size_t i = 0; i < num; ++i) {
45
+ DMAMap needle = {
46
+ .translated_addr = (hwaddr)iovec[i].iov_base,
47
+ .size = iovec[i].iov_len,
48
+ };
49
+ size_t off;
50
+
51
+ const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
52
+ /*
53
+ * Map cannot be NULL since iova map contains all guest space and
54
+ * qemu already has a physical address mapped
55
+ */
56
+ if (unlikely(!map)) {
57
+ qemu_log_mask(LOG_GUEST_ERROR,
58
+ "Invalid address 0x%"HWADDR_PRIx" given by guest",
59
+ needle.translated_addr);
60
+ return false;
61
+ }
62
+
63
+ off = needle.translated_addr - map->translated_addr;
64
+ addrs[i] = (void *)(map->iova + off);
65
+
66
+ if (unlikely(int128_gt(int128_add(needle.translated_addr,
67
+ iovec[i].iov_len),
68
+ map->translated_addr + map->size))) {
69
+ qemu_log_mask(LOG_GUEST_ERROR,
70
+ "Guest buffer expands over iova range");
71
+ return false;
72
+ }
73
+ }
74
+
75
+ return true;
76
+}
77
+
78
static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
79
+ void * const *sg,
80
const struct iovec *iovec,
81
size_t num, bool more_descs, bool write)
82
{
83
@@ -XXX,XX +XXX,XX @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
84
} else {
85
descs[i].flags = flags;
86
}
87
- descs[i].addr = cpu_to_le64((hwaddr)iovec[n].iov_base);
88
+ descs[i].addr = cpu_to_le64((hwaddr)sg[n]);
89
descs[i].len = cpu_to_le32(iovec[n].iov_len);
90
91
last = i;
92
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
93
{
94
unsigned avail_idx;
95
vring_avail_t *avail = svq->vring.avail;
96
+ bool ok;
97
+ g_autofree void **sgs = g_new(void *, MAX(elem->out_num, elem->in_num));
98
99
*head = svq->free_head;
100
101
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
102
return false;
103
}
104
105
- vhost_vring_write_descs(svq, elem->out_sg, elem->out_num,
106
+ ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
107
+ if (unlikely(!ok)) {
108
+ return false;
109
+ }
110
+ vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
111
elem->in_num > 0, false);
112
- vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
113
+
114
+
115
+ ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
116
+ if (unlikely(!ok)) {
117
+ return false;
118
+ }
119
+
120
+ vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
121
122
/*
123
* Put the entry in the available array (but don't update avail->idx until
124
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
125
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
126
* shadow methods and file descriptors.
127
*
128
+ * @iova_tree: Tree to perform descriptors translations
129
+ *
130
* Returns the new virtqueue or NULL.
131
*
132
* In case of error, reason is reported through error_report.
133
*/
134
-VhostShadowVirtqueue *vhost_svq_new(void)
135
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
136
{
137
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
138
int r;
139
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
140
141
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
142
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
143
+ svq->iova_tree = iova_tree;
144
return g_steal_pointer(&svq);
145
146
err_init_hdev_call:
147
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
148
index XXXXXXX..XXXXXXX 100644
149
--- a/hw/virtio/vhost-shadow-virtqueue.h
150
+++ b/hw/virtio/vhost-shadow-virtqueue.h
151
@@ -XXX,XX +XXX,XX @@
152
#include "qemu/event_notifier.h"
153
#include "hw/virtio/virtio.h"
154
#include "standard-headers/linux/vhost_types.h"
155
+#include "hw/virtio/vhost-iova-tree.h"
156
157
/* Shadow virtqueue to relay notifications */
158
typedef struct VhostShadowVirtqueue {
159
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
160
/* Virtio device */
161
VirtIODevice *vdev;
162
163
+ /* IOVA mapping */
164
+ VhostIOVATree *iova_tree;
165
+
166
/* Map for use the guest's descriptors */
167
VirtQueueElement **ring_id_maps;
168
169
@@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
170
VirtQueue *vq);
171
void vhost_svq_stop(VhostShadowVirtqueue *svq);
172
173
-VhostShadowVirtqueue *vhost_svq_new(void);
174
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
175
176
void vhost_svq_free(gpointer vq);
177
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
178
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
179
index XXXXXXX..XXXXXXX 100644
180
--- a/hw/virtio/vhost-vdpa.c
181
+++ b/hw/virtio/vhost-vdpa.c
182
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
183
vaddr, section->readonly);
184
185
llsize = int128_sub(llend, int128_make64(iova));
186
+ if (v->shadow_vqs_enabled) {
187
+ DMAMap mem_region = {
188
+ .translated_addr = (hwaddr)vaddr,
189
+ .size = int128_get64(llsize) - 1,
190
+ .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
191
+ };
192
+
193
+ int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
194
+ if (unlikely(r != IOVA_OK)) {
195
+ error_report("Can't allocate a mapping (%d)", r);
196
+ goto fail;
197
+ }
198
+
199
+ iova = mem_region.iova;
200
+ }
201
202
vhost_vdpa_iotlb_batch_begin_once(v);
203
ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
204
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
205
206
llsize = int128_sub(llend, int128_make64(iova));
207
208
+ if (v->shadow_vqs_enabled) {
209
+ const DMAMap *result;
210
+ const void *vaddr = memory_region_get_ram_ptr(section->mr) +
211
+ section->offset_within_region +
212
+ (iova - section->offset_within_address_space);
213
+ DMAMap mem_region = {
214
+ .translated_addr = (hwaddr)vaddr,
215
+ .size = int128_get64(llsize) - 1,
216
+ };
217
+
218
+ result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
219
+ iova = result->iova;
220
+ vhost_iova_tree_remove(v->iova_tree, &mem_region);
221
+ }
222
vhost_vdpa_iotlb_batch_begin_once(v);
223
ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
224
if (ret) {
225
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
226
227
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
228
for (unsigned n = 0; n < hdev->nvqs; ++n) {
229
- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
230
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
231
232
if (unlikely(!svq)) {
233
error_setg(errp, "Cannot create svq %u", n);
234
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
235
/**
236
* Unmap a SVQ area in the device
237
*/
238
-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
239
- hwaddr size)
240
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
241
+ const DMAMap *needle)
242
{
243
+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
244
+ hwaddr size;
245
int r;
246
247
- size = ROUND_UP(size, qemu_real_host_page_size);
248
- r = vhost_vdpa_dma_unmap(v, iova, size);
249
+ if (unlikely(!result)) {
250
+ error_report("Unable to find SVQ address to unmap");
251
+ return false;
252
+ }
253
+
254
+ size = ROUND_UP(result->size, qemu_real_host_page_size);
255
+ r = vhost_vdpa_dma_unmap(v, result->iova, size);
256
return r == 0;
36
}
257
}
37
258
38
static void
259
static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
39
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
260
const VhostShadowVirtqueue *svq)
40
net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
261
{
41
}
262
+ DMAMap needle = {};
42
if (igb_tx_pkt_send(core, tx, queue_index)) {
263
struct vhost_vdpa *v = dev->opaque;
43
- igb_on_tx_done_update_stats(core, tx->tx_pkt);
264
struct vhost_vring_addr svq_addr;
44
+ igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
265
- size_t device_size = vhost_svq_device_area_size(svq);
45
}
266
- size_t driver_size = vhost_svq_driver_area_size(svq);
46
}
267
bool ok;
47
268
48
@@ -XXX,XX +XXX,XX @@ igb_write_to_rx_buffers(IGBCore *core,
269
vhost_svq_get_vring_addr(svq, &svq_addr);
270
271
- ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
272
+ needle.translated_addr = svq_addr.desc_user_addr;
273
+ ok = vhost_vdpa_svq_unmap_ring(v, &needle);
274
if (unlikely(!ok)) {
275
return false;
276
}
277
278
- return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
279
+ needle.translated_addr = svq_addr.used_user_addr;
280
+ return vhost_vdpa_svq_unmap_ring(v, &needle);
281
+}
282
+
283
+/**
284
+ * Map the SVQ area in the device
285
+ *
286
+ * @v: Vhost-vdpa device
287
+ * @needle: The area to search iova
288
+ * @errorp: Error pointer
289
+ */
290
+static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
291
+ Error **errp)
292
+{
293
+ int r;
294
+
295
+ r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
296
+ if (unlikely(r != IOVA_OK)) {
297
+ error_setg(errp, "Cannot allocate iova (%d)", r);
298
+ return false;
299
+ }
300
+
301
+ r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1,
302
+ (void *)needle->translated_addr,
303
+ needle->perm == IOMMU_RO);
304
+ if (unlikely(r != 0)) {
305
+ error_setg_errno(errp, -r, "Cannot map region to device");
306
+ vhost_iova_tree_remove(v->iova_tree, needle);
307
+ }
308
+
309
+ return r == 0;
49
}
310
}
50
311
51
static void
312
/**
52
-igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
313
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
53
+igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
314
struct vhost_vring_addr *addr,
54
+ size_t data_size, size_t data_fcs_size)
315
Error **errp)
55
{
316
{
56
e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
317
+ DMAMap device_region, driver_region;
57
318
+ struct vhost_vring_addr svq_addr;
58
@@ -XXX,XX +XXX,XX @@ igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
319
struct vhost_vdpa *v = dev->opaque;
59
default:
320
size_t device_size = vhost_svq_device_area_size(svq);
60
break;
321
size_t driver_size = vhost_svq_driver_area_size(svq);
322
- int r;
323
+ size_t avail_offset;
324
+ bool ok;
325
326
ERRP_GUARD();
327
- vhost_svq_get_vring_addr(svq, addr);
328
+ vhost_svq_get_vring_addr(svq, &svq_addr);
329
330
- r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
331
- (void *)addr->desc_user_addr, true);
332
- if (unlikely(r != 0)) {
333
- error_setg_errno(errp, -r, "Cannot create vq driver region: ");
334
+ driver_region = (DMAMap) {
335
+ .translated_addr = svq_addr.desc_user_addr,
336
+ .size = driver_size - 1,
337
+ .perm = IOMMU_RO,
338
+ };
339
+ ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
340
+ if (unlikely(!ok)) {
341
+ error_prepend(errp, "Cannot create vq driver region: ");
342
return false;
61
}
343
}
62
+
344
+ addr->desc_user_addr = driver_region.iova;
63
+ if (core->mac[MRQC] & 1) {
345
+ avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
64
+ uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
346
+ addr->avail_user_addr = driver_region.iova + avail_offset;
65
+
347
66
+ core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
348
- r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
67
+ core->mac[PVFGPRC0 + (pool * 64)]++;
349
- (void *)addr->used_user_addr, false);
68
+ if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
350
- if (unlikely(r != 0)) {
69
+ core->mac[PVFMPRC0 + (pool * 64)]++;
351
- error_setg_errno(errp, -r, "Cannot create vq device region: ");
70
+ }
352
+ device_region = (DMAMap) {
71
+ }
353
+ .translated_addr = svq_addr.used_user_addr,
354
+ .size = device_size - 1,
355
+ .perm = IOMMU_RW,
356
+ };
357
+ ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
358
+ if (unlikely(!ok)) {
359
+ error_prepend(errp, "Cannot create vq device region: ");
360
+ vhost_vdpa_svq_unmap_ring(v, &driver_region);
361
}
362
+ addr->used_user_addr = device_region.iova;
363
364
- return r == 0;
365
+ return ok;
72
}
366
}
73
367
74
static inline bool
368
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
75
@@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
369
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
76
370
index XXXXXXX..XXXXXXX 100644
77
} while (desc_offset < total_size);
371
--- a/include/hw/virtio/vhost-vdpa.h
78
372
+++ b/include/hw/virtio/vhost-vdpa.h
79
- igb_update_rx_stats(core, size, total_size);
373
@@ -XXX,XX +XXX,XX @@
80
+ igb_update_rx_stats(core, rxi, size, total_size);
374
81
}
375
#include <gmodule.h>
82
376
83
static inline void
377
+#include "hw/virtio/vhost-iova-tree.h"
378
#include "hw/virtio/virtio.h"
379
#include "standard-headers/linux/vhost_types.h"
380
381
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
382
MemoryListener listener;
383
struct vhost_vdpa_iova_range iova_range;
384
bool shadow_vqs_enabled;
385
+ /* IOVA mapping used by the Shadow Virtqueue */
386
+ VhostIOVATree *iova_tree;
387
GPtrArray *shadow_vqs;
388
struct vhost_dev *dev;
389
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
84
--
390
--
85
2.7.4
391
2.7.4
392
393
diff view generated by jsdifflib
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
IGB uses RXDW ICR bit to indicate that rx descriptor has been written
3
This is needed to achieve migration, so the destination can restore its
4
back. This is the same as RXT0 bit in older HW.
4
index.
5
5
6
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
6
Setting base as last used idx, so destination will see as available all
7
the entries that the device did not use, including the in-flight
8
processing ones.
9
10
This is ok for networking, but other kinds of devices might have
11
problems with these retransmissions.
12
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
14
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
16
---
9
hw/net/e1000x_regs.h | 4 ++++
17
hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
10
hw/net/igb_core.c | 2 +-
18
1 file changed, 17 insertions(+)
11
2 files changed, 5 insertions(+), 1 deletion(-)
12
19
13
diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
20
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
14
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/net/e1000x_regs.h
22
--- a/hw/virtio/vhost-vdpa.c
16
+++ b/hw/net/e1000x_regs.h
23
+++ b/hw/virtio/vhost-vdpa.c
17
@@ -XXX,XX +XXX,XX @@
24
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
18
#define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */
25
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
19
#define E1000_ICR_RXO 0x00000040 /* rx overrun */
26
struct vhost_vring_state *ring)
20
#define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */
27
{
21
+#define E1000_ICR_RXDW 0x00000080 /* rx desc written back */
28
+ struct vhost_vdpa *v = dev->opaque;
22
#define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */
29
int ret;
23
#define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */
30
24
#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */
31
+ if (v->shadow_vqs_enabled) {
25
@@ -XXX,XX +XXX,XX @@
32
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
26
#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
33
+ ring->index);
27
#define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */
34
+
28
#define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */
35
+ /*
29
+#define E1000_ICS_RXDW E1000_ICR_RXDW /* rx desc written back */
36
+ * Setting base as last used idx, so destination will see as available
30
#define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */
37
+ * all the entries that the device did not use, including the in-flight
31
#define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
38
+ * processing ones.
32
#define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
39
+ *
33
@@ -XXX,XX +XXX,XX @@
40
+ * TODO: This is ok for networking, but other kinds of devices might
34
#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
41
+ * have problems with these retransmissions.
35
#define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */
42
+ */
36
#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */
43
+ ring->num = svq->last_used_idx;
37
+#define E1000_IMS_RXDW E1000_ICR_RXDW /* rx desc written back */
44
+ return 0;
38
#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */
45
+ }
39
#define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
46
+
40
#define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
47
ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
41
@@ -XXX,XX +XXX,XX @@
48
trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
42
#define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
49
return ret;
43
#define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */
44
#define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */
45
+#define E1000_IMC_RXDW E1000_ICR_RXDW /* rx desc written back */
46
#define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */
47
#define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
48
#define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
49
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/hw/net/igb_core.c
52
+++ b/hw/net/igb_core.c
53
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
54
continue;
55
}
56
57
- n |= E1000_ICR_RXT0;
58
+ n |= E1000_ICR_RXDW;
59
60
igb_rx_fix_l4_csum(core, core->rx_pkt);
61
igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info);
62
--
50
--
63
2.7.4
51
2.7.4
52
53
diff view generated by jsdifflib
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
No segmentation should be performed if gso type is
3
Setting the log address would make the device start reporting invalid
4
VIRTIO_NET_HDR_GSO_NONE even if ECN bit is set.
4
dirty memory because the SVQ vrings are located in qemu's memory.
5
5
6
Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices")
6
Acked-by: Michael S. Tsirkin <mst@redhat.com>
7
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1544
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
9
---
11
hw/net/net_tx_pkt.c | 6 ++++--
10
hw/virtio/vhost-vdpa.c | 3 ++-
12
1 file changed, 4 insertions(+), 2 deletions(-)
11
1 file changed, 2 insertions(+), 1 deletion(-)
13
12
14
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
13
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/net_tx_pkt.c
15
--- a/hw/virtio/vhost-vdpa.c
17
+++ b/hw/net/net_tx_pkt.c
16
+++ b/hw/virtio/vhost-vdpa.c
18
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
17
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
18
static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
19
struct vhost_log *log)
19
{
20
{
20
assert(pkt);
21
- if (vhost_vdpa_one_time_request(dev)) {
21
22
+ struct vhost_vdpa *v = dev->opaque;
22
+ uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
23
+ if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) {
23
+
24
return 0;
24
/*
25
* Since underlying infrastructure does not support IP datagrams longer
26
* than 64K we should drop such packets and don't even try to send
27
*/
28
- if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) {
29
+ if (VIRTIO_NET_HDR_GSO_NONE != gso_type) {
30
if (pkt->payload_len >
31
ETH_MAX_IP_DGRAM_LEN -
32
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) {
33
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
34
}
35
}
25
}
36
26
37
- if (offload || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
38
+ if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) {
39
if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
40
net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
41
pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
42
--
27
--
43
2.7.4
28
2.7.4
29
30
diff view generated by jsdifflib
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
3
SVQ is able to log the dirty bits by itself, so let's use it to not
4
block migration.
5
6
Also, ignore set and clear of VHOST_F_LOG_ALL on set_features if SVQ is
7
enabled. Even if the device supports it, the reports would be nonsense
8
because SVQ memory is in the qemu region.
9
10
The log region is still allocated. Future changes might skip that, but
11
this series is already long enough.
12
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
14
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
16
---
6
hw/net/igb_core.c | 41 ++++++++++++++++++++++++++++++++++++-----
17
hw/virtio/vhost-vdpa.c | 39 +++++++++++++++++++++++++++++++++++----
7
1 file changed, 36 insertions(+), 5 deletions(-)
18
include/hw/virtio/vhost-vdpa.h | 1 +
19
2 files changed, 36 insertions(+), 4 deletions(-)
8
20
9
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
21
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
10
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
11
--- a/hw/net/igb_core.c
23
--- a/hw/virtio/vhost-vdpa.c
12
+++ b/hw/net/igb_core.c
24
+++ b/hw/virtio/vhost-vdpa.c
13
@@ -XXX,XX +XXX,XX @@ igb_rx_l4_cso_enabled(IGBCore *core)
25
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
14
return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
26
return v->index != 0;
15
}
27
}
16
28
17
+static bool
29
+static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
18
+igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
30
+ uint64_t *features)
19
+{
31
+{
20
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
32
+ int ret;
21
+ bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
22
+ int max_ethernet_lpe_size =
23
+ core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
24
+ int max_ethernet_vlan_size = 1522;
25
+
33
+
26
+ return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
34
+ ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
35
+ trace_vhost_vdpa_get_features(dev, *features);
36
+ return ret;
27
+}
37
+}
28
+
38
+
29
static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
39
static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
30
- E1000E_RSSInfo *rss_info, bool *external_tx)
40
Error **errp)
31
+ size_t size, E1000E_RSSInfo *rss_info,
32
+ bool *external_tx)
33
{
41
{
34
static const int ta_shift[] = { 4, 3, 2, 0 };
42
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
35
uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
43
return 0;
36
uint16_t queues = 0;
44
}
37
+ uint16_t oversized = 0;
45
38
uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
46
- r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
39
bool accepted = false;
47
+ r = vhost_vdpa_get_dev_features(hdev, &dev_features);
40
int i;
48
if (r != 0) {
41
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
49
error_setg_errno(errp, -r, "Can't get vdpa device features");
42
}
50
return r;
43
51
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
44
queues &= core->mac[VFRE];
52
static int vhost_vdpa_set_features(struct vhost_dev *dev,
45
- igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info);
53
uint64_t features)
46
- if (rss_info->queue & 1) {
54
{
47
- queues <<= 8;
55
+ struct vhost_vdpa *v = dev->opaque;
48
+ if (queues) {
56
int ret;
49
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
57
50
+ if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) {
58
if (vhost_vdpa_one_time_request(dev)) {
51
+ oversized |= BIT(i);
59
return 0;
52
+ }
60
}
53
+ }
61
54
+ /* 8.19.37 increment ROC if packet is oversized for all queues */
62
+ if (v->shadow_vqs_enabled) {
55
+ if (oversized == queues) {
63
+ if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) {
56
+ trace_e1000x_rx_oversized(size);
64
+ /*
57
+ e1000x_inc_reg_if_not_full(core->mac, ROC);
65
+ * QEMU is just trying to enable or disable logging. SVQ handles
58
+ }
66
+ * this sepparately, so no need to forward this.
59
+ queues &= ~oversized;
67
+ */
68
+ v->acked_features = features;
69
+ return 0;
60
+ }
70
+ }
61
+
71
+
62
+ if (queues) {
72
+ v->acked_features = features;
63
+ igb_rss_parse_packet(core, core->rx_pkt,
73
+
64
+ external_tx != NULL, rss_info);
74
+ /* We must not ack _F_LOG if SVQ is enabled */
65
+ if (rss_info->queue & 1) {
75
+ features &= ~BIT_ULL(VHOST_F_LOG_ALL);
66
+ queues <<= 8;
76
+ }
67
+ }
77
+
68
}
78
trace_vhost_vdpa_set_features(dev, features);
69
} else {
79
ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
70
switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
80
if (ret) {
71
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
81
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
72
e1000x_vlan_enabled(core->mac),
82
static int vhost_vdpa_get_features(struct vhost_dev *dev,
73
core->mac[VET] & 0xffff);
83
uint64_t *features)
74
84
{
75
- queues = igb_receive_assign(core, ehdr, &rss_info, external_tx);
85
- int ret;
76
+ queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
86
+ struct vhost_vdpa *v = dev->opaque;
77
if (!queues) {
87
+ int ret = vhost_vdpa_get_dev_features(dev, features);
78
trace_e1000e_rx_flt_dropped();
88
+
79
return orig_size;
89
+ if (ret == 0 && v->shadow_vqs_enabled) {
90
+ /* Add SVQ logging capabilities */
91
+ *features |= BIT_ULL(VHOST_F_LOG_ALL);
92
+ }
93
94
- ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
95
- trace_vhost_vdpa_get_features(dev, *features);
96
return ret;
97
}
98
99
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/include/hw/virtio/vhost-vdpa.h
102
+++ b/include/hw/virtio/vhost-vdpa.h
103
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
104
bool iotlb_batch_begin_sent;
105
MemoryListener listener;
106
struct vhost_vdpa_iova_range iova_range;
107
+ uint64_t acked_features;
108
bool shadow_vqs_enabled;
109
/* IOVA mapping used by the Shadow Virtqueue */
110
VhostIOVATree *iova_tree;
80
--
111
--
81
2.7.4
112
2.7.4
113
114
diff view generated by jsdifflib