1
The following changes since commit 17783ac828adc694d986698d2d7014aedfeb48c6:
1
The following changes since commit 352998df1c53b366413690d95b35f76d0721ebed:
2
2
3
Merge remote-tracking branch 'remotes/dgibson/tags/ppc-for-2.9-20170303' into staging (2017-03-04 16:31:14 +0000)
3
Merge tag 'i2c-20220314' of https://github.com/philmd/qemu into staging (2022-03-14 14:39:33 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to f0aabd5c4ae11fde704d138e8f06c69e5c902a16:
9
for you to fetch changes up to 12a195fa343aae2ead1301ce04727bd0ae25eb15:
10
10
11
net/filter-mirror: Follow CODING_STYLE (2017-03-06 11:46:02 +0800)
11
vdpa: Expose VHOST_F_LOG_ALL on SVQ (2022-03-15 13:57:44 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
Preview pull does not reach list.
15
14
16
- fix vlan issues for e1000e
15
Changes since V2:
17
- convert to use vmstate for vmxnet3
16
- fix 32bit build errros
18
- several fixes for colo
17
19
----------------------------------------------------------------
18
----------------------------------------------------------------
20
Dmitry Fleytman (5):
19
Eugenio Pérez (14):
21
eth: Extend vlan stripping functions
20
vhost: Add VhostShadowVirtqueue
22
NetRxPkt: Fix memory corruption on VLAN header stripping
21
vhost: Add Shadow VirtQueue kick forwarding capabilities
23
NetRxPkt: Do not try to pull more data than present
22
vhost: Add Shadow VirtQueue call forwarding capabilities
24
NetRxPkt: Account buffer with ETH header in IOV length
23
vhost: Add vhost_svq_valid_features to shadow vq
25
NetRxPkt: Remove code duplication in net_rx_pkt_pull_data()
24
virtio: Add vhost_svq_get_vring_addr
25
vdpa: adapt vhost_ops callbacks to svq
26
vhost: Shadow virtqueue buffers forwarding
27
util: Add iova_tree_alloc_map
28
util: add iova_tree_find_iova
29
vhost: Add VhostIOVATree
30
vdpa: Add custom IOTLB translations to SVQ
31
vdpa: Adapt vhost_vdpa_get_vring_base to SVQ
32
vdpa: Never set log_base addr if SVQ is enabled
33
vdpa: Expose VHOST_F_LOG_ALL on SVQ
26
34
27
Dr. David Alan Gilbert (2):
35
Jason Wang (1):
28
vmxnet3: Convert ring values to uint32_t's
36
virtio-net: fix map leaking on error during receive
29
vmxnet3: VMStatify rx/tx q_descr and int_state
30
37
31
Fam Zheng (1):
38
hw/net/virtio-net.c | 1 +
32
net: Remove useless local var pkt
39
hw/virtio/meson.build | 2 +-
33
40
hw/virtio/vhost-iova-tree.c | 110 +++++++
34
Zhang Chen (5):
41
hw/virtio/vhost-iova-tree.h | 27 ++
35
net/colo-compare: Fix memory free error
42
hw/virtio/vhost-shadow-virtqueue.c | 636 +++++++++++++++++++++++++++++++++++++
36
COLO-compare: Rename compare function and remove duplicate codes
43
hw/virtio/vhost-shadow-virtqueue.h | 87 +++++
37
COLO-compare: Optimize compare_common and compare_tcp
44
hw/virtio/vhost-vdpa.c | 522 +++++++++++++++++++++++++++++-
38
COLO-compare: Fix icmp and udp compare different packet always dump bug
45
include/hw/virtio/vhost-vdpa.h | 8 +
39
net/filter-mirror: Follow CODING_STYLE
46
include/qemu/iova-tree.h | 38 ++-
40
47
util/iova-tree.c | 170 ++++++++++
41
zhanghailiang (6):
48
10 files changed, 1584 insertions(+), 17 deletions(-)
42
colo-compare: use g_timeout_source_new() to process the stale packets
49
create mode 100644 hw/virtio/vhost-iova-tree.c
43
colo-compare: kick compare thread to exit after some cleanup in finalization
50
create mode 100644 hw/virtio/vhost-iova-tree.h
44
char: remove the right fd been watched in qemu_chr_fe_set_handlers()
51
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
45
colo-compare: Fix removing fds been watched incorrectly in finalization
52
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
46
net/colo: fix memory double free error
47
filter-rewriter: skip net_checksum_calculate() while offset = 0
48
49
chardev/char-fd.c | 6 +-
50
chardev/char-io.c | 8 +-
51
chardev/char-io.h | 2 +-
52
chardev/char-pty.c | 2 +-
53
chardev/char-socket.c | 4 +-
54
chardev/char-udp.c | 6 +-
55
chardev/char.c | 2 +-
56
hw/net/net_rx_pkt.c | 41 ++++----
57
hw/net/vmxnet3.c | 284 +++++++++++++++++---------------------------------
58
include/net/eth.h | 4 +-
59
net/colo-compare.c | 189 +++++++++++++++++++--------------
60
net/colo.c | 4 +-
61
net/eth.c | 25 +++--
62
net/filter-mirror.c | 7 +-
63
net/filter-rewriter.c | 17 +--
64
15 files changed, 273 insertions(+), 328 deletions(-)
65
53
66
54
55
diff view generated by jsdifflib
Deleted patch
1
From: Fam Zheng <famz@redhat.com>
2
1
3
This has been pointless since commit 605d52e62, which was a
4
search-and-replace, overlooked the redundancy.
5
6
Signed-off-by: Fam Zheng <famz@redhat.com>
7
Reviewed-by: Dmitry Fleytman <dmitry@daynix.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/net/net_rx_pkt.c | 1 -
11
1 file changed, 1 deletion(-)
12
13
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/net/net_rx_pkt.c
16
+++ b/hw/net/net_rx_pkt.c
17
@@ -XXX,XX +XXX,XX @@ void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
18
void net_rx_pkt_dump(struct NetRxPkt *pkt)
19
{
20
#ifdef NET_RX_PKT_DEBUG
21
- NetRxPkt *pkt = (NetRxPkt *)pkt;
22
assert(pkt);
23
24
printf("RX PKT: tot_len: %d, vlan_stripped: %d, vlan_tag: %d\n",
25
--
26
2.7.4
27
28
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
2
tries to fix the use after free of the sg by caching the virtqueue
3
elements in an array and unmap them at once after receiving the
4
packets, But it forgot to unmap the cached elements on error which
5
will lead to leaking of mapping and other unexpected results.
2
6
3
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Fixing this by detaching the cached elements on error. This addresses
8
CVE-2022-26353.
9
10
Reported-by: Victor Tom <vv474172261@gmail.com>
11
Cc: qemu-stable@nongnu.org
12
Fixes: CVE-2022-26353
13
Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
14
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
16
---
6
net/colo-compare.c | 18 ++++++++++++------
17
hw/net/virtio-net.c | 1 +
7
1 file changed, 12 insertions(+), 6 deletions(-)
18
1 file changed, 1 insertion(+)
8
19
9
diff --git a/net/colo-compare.c b/net/colo-compare.c
20
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
10
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
11
--- a/net/colo-compare.c
22
--- a/hw/net/virtio-net.c
12
+++ b/net/colo-compare.c
23
+++ b/hw/net/virtio-net.c
13
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
24
@@ -XXX,XX +XXX,XX @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
14
25
15
if (ret) {
26
err:
16
trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
27
for (j = 0; j < i; j++) {
17
- qemu_hexdump((char *)ppkt->data, stderr, "colo-compare", ppkt->size);
28
+ virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
18
trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
29
g_free(elems[j]);
19
- qemu_hexdump((char *)spkt->data, stderr, "colo-compare", spkt->size);
20
+ if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
21
+ qemu_hexdump((char *)ppkt->data, stderr, "colo-compare pri pkt",
22
+ ppkt->size);
23
+ qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt",
24
+ spkt->size);
25
+ }
26
}
30
}
27
31
28
return ret;
29
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
30
network_header_length + ETH_HLEN)) {
31
trace_colo_compare_icmp_miscompare("primary pkt size",
32
ppkt->size);
33
- qemu_hexdump((char *)ppkt->data, stderr, "colo-compare",
34
- ppkt->size);
35
trace_colo_compare_icmp_miscompare("Secondary pkt size",
36
spkt->size);
37
- qemu_hexdump((char *)spkt->data, stderr, "colo-compare",
38
- spkt->size);
39
+ if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
40
+ qemu_hexdump((char *)ppkt->data, stderr, "colo-compare pri pkt",
41
+ ppkt->size);
42
+ qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt",
43
+ spkt->size);
44
+ }
45
return -1;
46
} else {
47
return 0;
48
--
32
--
49
2.7.4
33
2.7.4
50
51
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
3
Vhost shadow virtqueue (SVQ) is an intermediate jump for virtqueue
4
notifications and buffers, allowing qemu to track them. While qemu is
5
forwarding the buffers and virtqueue changes, it is able to commit the
6
memory it's being dirtied, the same way regular qemu's VirtIO devices
7
do.
8
9
This commit only exposes basic SVQ allocation and free. Next patches of
10
the series add functionality like notifications and buffers forwarding.
11
12
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
15
---
6
net/filter-mirror.c | 7 ++++---
16
hw/virtio/meson.build | 2 +-
7
1 file changed, 4 insertions(+), 3 deletions(-)
17
hw/virtio/vhost-shadow-virtqueue.c | 62 ++++++++++++++++++++++++++++++++++++++
18
hw/virtio/vhost-shadow-virtqueue.h | 28 +++++++++++++++++
19
3 files changed, 91 insertions(+), 1 deletion(-)
20
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
21
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
8
22
9
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
23
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
10
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
11
--- a/net/filter-mirror.c
25
--- a/hw/virtio/meson.build
12
+++ b/net/filter-mirror.c
26
+++ b/hw/virtio/meson.build
13
@@ -XXX,XX +XXX,XX @@ static int filter_mirror_send(CharBackend *chr_out,
27
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
14
{
28
15
int ret = 0;
29
virtio_ss = ss.source_set()
16
ssize_t size = 0;
30
virtio_ss.add(files('virtio.c'))
17
- uint32_t len = 0;
31
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c'))
18
+ uint32_t len = 0;
32
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
19
char *buf;
33
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
20
34
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
21
size = iov_size(iov, iovcnt);
35
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
22
@@ -XXX,XX +XXX,XX @@ err:
36
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
23
return ret < 0 ? ret : -EIO;
37
new file mode 100644
24
}
38
index XXXXXXX..XXXXXXX
25
39
--- /dev/null
26
-static void
40
+++ b/hw/virtio/vhost-shadow-virtqueue.c
27
-redirector_to_filter(NetFilterState *nf, const uint8_t *buf, int len)
41
@@ -XXX,XX +XXX,XX @@
28
+static void redirector_to_filter(NetFilterState *nf,
42
+/*
29
+ const uint8_t *buf,
43
+ * vhost shadow virtqueue
30
+ int len)
44
+ *
31
{
45
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
32
struct iovec iov = {
46
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
33
.iov_base = (void *)buf,
47
+ *
48
+ * SPDX-License-Identifier: GPL-2.0-or-later
49
+ */
50
+
51
+#include "qemu/osdep.h"
52
+#include "hw/virtio/vhost-shadow-virtqueue.h"
53
+
54
+#include "qemu/error-report.h"
55
+
56
+/**
57
+ * Creates vhost shadow virtqueue, and instructs the vhost device to use the
58
+ * shadow methods and file descriptors.
59
+ *
60
+ * Returns the new virtqueue or NULL.
61
+ *
62
+ * In case of error, reason is reported through error_report.
63
+ */
64
+VhostShadowVirtqueue *vhost_svq_new(void)
65
+{
66
+ g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
67
+ int r;
68
+
69
+ r = event_notifier_init(&svq->hdev_kick, 0);
70
+ if (r != 0) {
71
+ error_report("Couldn't create kick event notifier: %s (%d)",
72
+ g_strerror(errno), errno);
73
+ goto err_init_hdev_kick;
74
+ }
75
+
76
+ r = event_notifier_init(&svq->hdev_call, 0);
77
+ if (r != 0) {
78
+ error_report("Couldn't create call event notifier: %s (%d)",
79
+ g_strerror(errno), errno);
80
+ goto err_init_hdev_call;
81
+ }
82
+
83
+ return g_steal_pointer(&svq);
84
+
85
+err_init_hdev_call:
86
+ event_notifier_cleanup(&svq->hdev_kick);
87
+
88
+err_init_hdev_kick:
89
+ return NULL;
90
+}
91
+
92
+/**
93
+ * Free the resources of the shadow virtqueue.
94
+ *
95
+ * @pvq: gpointer to SVQ so it can be used by autofree functions.
96
+ */
97
+void vhost_svq_free(gpointer pvq)
98
+{
99
+ VhostShadowVirtqueue *vq = pvq;
100
+ event_notifier_cleanup(&vq->hdev_kick);
101
+ event_notifier_cleanup(&vq->hdev_call);
102
+ g_free(vq);
103
+}
104
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
105
new file mode 100644
106
index XXXXXXX..XXXXXXX
107
--- /dev/null
108
+++ b/hw/virtio/vhost-shadow-virtqueue.h
109
@@ -XXX,XX +XXX,XX @@
110
+/*
111
+ * vhost shadow virtqueue
112
+ *
113
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
114
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
115
+ *
116
+ * SPDX-License-Identifier: GPL-2.0-or-later
117
+ */
118
+
119
+#ifndef VHOST_SHADOW_VIRTQUEUE_H
120
+#define VHOST_SHADOW_VIRTQUEUE_H
121
+
122
+#include "qemu/event_notifier.h"
123
+
124
+/* Shadow virtqueue to relay notifications */
125
+typedef struct VhostShadowVirtqueue {
126
+ /* Shadow kick notifier, sent to vhost */
127
+ EventNotifier hdev_kick;
128
+ /* Shadow call notifier, sent to vhost */
129
+ EventNotifier hdev_call;
130
+} VhostShadowVirtqueue;
131
+
132
+VhostShadowVirtqueue *vhost_svq_new(void);
133
+
134
+void vhost_svq_free(gpointer vq);
135
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
136
+
137
+#endif
34
--
138
--
35
2.7.4
139
2.7.4
36
140
37
141
diff view generated by jsdifflib
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We should call g_main_loop_quit() to notify colo compare thread to
3
At this mode no buffer forwarding will be performed in SVQ mode: Qemu
4
exit, Or it will run in g_main_loop_run() forever.
4
will just forward the guest's kicks to the device.
5
5
6
Besides, the finalizing process can't happen in context of colo thread,
6
Host memory notifiers regions are left out for simplicity, and they will
7
it is reasonable to remove the 'if (qemu_thread_is_self(&s->thread))'
7
not be addressed in this series.
8
branch.
8
9
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Before compare thead exits, some cleanup works need to be
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
done, All unhandled packets need to be released and connection_track_table
12
needs to be freed, or there will be memory leak.
13
14
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
15
Reviewed-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
12
---
18
net/colo-compare.c | 39 +++++++++++++++++++++++++++++----------
13
hw/virtio/vhost-shadow-virtqueue.c | 55 ++++++++++++++
19
1 file changed, 29 insertions(+), 10 deletions(-)
14
hw/virtio/vhost-shadow-virtqueue.h | 14 ++++
20
15
hw/virtio/vhost-vdpa.c | 144 ++++++++++++++++++++++++++++++++++++-
21
diff --git a/net/colo-compare.c b/net/colo-compare.c
16
include/hw/virtio/vhost-vdpa.h | 4 ++
17
4 files changed, 215 insertions(+), 2 deletions(-)
18
19
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
22
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
23
--- a/net/colo-compare.c
21
--- a/hw/virtio/vhost-shadow-virtqueue.c
24
+++ b/net/colo-compare.c
22
+++ b/hw/virtio/vhost-shadow-virtqueue.c
25
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
23
@@ -XXX,XX +XXX,XX @@
26
GHashTable *connection_track_table;
24
#include "hw/virtio/vhost-shadow-virtqueue.h"
27
/* compare thread, a thread for each NIC */
25
28
QemuThread thread;
26
#include "qemu/error-report.h"
29
+
27
+#include "qemu/main-loop.h"
30
+ GMainLoop *compare_loop;
28
+#include "linux-headers/linux/vhost.h"
31
} CompareState;
29
+
32
30
+/**
33
typedef struct CompareClass {
31
+ * Forward guest notifications.
34
@@ -XXX,XX +XXX,XX @@ static gboolean check_old_packet_regular(void *opaque)
32
+ *
35
static void *colo_compare_thread(void *opaque)
33
+ * @n: guest kick event notifier, the one that guest set to notify svq.
36
{
34
+ */
37
GMainContext *worker_context;
35
+static void vhost_handle_guest_kick(EventNotifier *n)
38
- GMainLoop *compare_loop;
36
+{
39
CompareState *s = opaque;
37
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
40
GSource *timeout_source;
38
+ event_notifier_test_and_clear(n);
41
39
+ event_notifier_set(&svq->hdev_kick);
42
@@ -XXX,XX +XXX,XX @@ static void *colo_compare_thread(void *opaque)
40
+}
43
qemu_chr_fe_set_handlers(&s->chr_sec_in, compare_chr_can_read,
41
+
44
compare_sec_chr_in, NULL, s, worker_context, true);
42
+/**
45
43
+ * Set a new file descriptor for the guest to kick the SVQ and notify for avail
46
- compare_loop = g_main_loop_new(worker_context, FALSE);
44
+ *
47
+ s->compare_loop = g_main_loop_new(worker_context, FALSE);
45
+ * @svq: The svq
48
46
+ * @svq_kick_fd: The svq kick fd
49
/* To kick any packets that the secondary doesn't match */
47
+ *
50
timeout_source = g_timeout_source_new(REGULAR_PACKET_CHECK_MS);
48
+ * Note that the SVQ will never close the old file descriptor.
51
@@ -XXX,XX +XXX,XX @@ static void *colo_compare_thread(void *opaque)
49
+ */
52
(GSourceFunc)check_old_packet_regular, s, NULL);
50
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
53
g_source_attach(timeout_source, worker_context);
51
+{
54
52
+ EventNotifier *svq_kick = &svq->svq_kick;
55
- g_main_loop_run(compare_loop);
53
+ bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick);
56
+ g_main_loop_run(s->compare_loop);
54
+ bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND;
57
55
+
58
g_source_unref(timeout_source);
56
+ if (poll_stop) {
59
- g_main_loop_unref(compare_loop);
57
+ event_notifier_set_handler(svq_kick, NULL);
60
+ g_main_loop_unref(s->compare_loop);
58
+ }
61
g_main_context_unref(worker_context);
59
+
62
return NULL;
60
+ /*
63
}
61
+ * event_notifier_set_handler already checks for guest's notifications if
64
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
62
+ * they arrive at the new file descriptor in the switch, so there is no
63
+ * need to explicitly check for them.
64
+ */
65
+ if (poll_start) {
66
+ event_notifier_init_fd(svq_kick, svq_kick_fd);
67
+ event_notifier_set(svq_kick);
68
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
69
+ }
70
+}
71
+
72
+/**
73
+ * Stop the shadow virtqueue operation.
74
+ * @svq: Shadow Virtqueue
75
+ */
76
+void vhost_svq_stop(VhostShadowVirtqueue *svq)
77
+{
78
+ event_notifier_set_handler(&svq->svq_kick, NULL);
79
+}
80
81
/**
82
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
83
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
84
goto err_init_hdev_call;
85
}
86
87
+ event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
88
return g_steal_pointer(&svq);
89
90
err_init_hdev_call:
91
@@ -XXX,XX +XXX,XX @@ err_init_hdev_kick:
92
void vhost_svq_free(gpointer pvq)
93
{
94
VhostShadowVirtqueue *vq = pvq;
95
+ vhost_svq_stop(vq);
96
event_notifier_cleanup(&vq->hdev_kick);
97
event_notifier_cleanup(&vq->hdev_call);
98
g_free(vq);
99
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/hw/virtio/vhost-shadow-virtqueue.h
102
+++ b/hw/virtio/vhost-shadow-virtqueue.h
103
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
104
EventNotifier hdev_kick;
105
/* Shadow call notifier, sent to vhost */
106
EventNotifier hdev_call;
107
+
108
+ /*
109
+ * Borrowed virtqueue's guest to host notifier. To borrow it in this event
110
+ * notifier allows to recover the VhostShadowVirtqueue from the event loop
111
+ * easily. If we use the VirtQueue's one, we don't have an easy way to
112
+ * retrieve VhostShadowVirtqueue.
113
+ *
114
+ * So shadow virtqueue must not clean it, or we would lose VirtQueue one.
115
+ */
116
+ EventNotifier svq_kick;
117
} VhostShadowVirtqueue;
118
119
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
120
+
121
+void vhost_svq_stop(VhostShadowVirtqueue *svq);
122
+
123
VhostShadowVirtqueue *vhost_svq_new(void);
124
125
void vhost_svq_free(gpointer vq);
126
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/hw/virtio/vhost-vdpa.c
129
+++ b/hw/virtio/vhost-vdpa.c
130
@@ -XXX,XX +XXX,XX @@
131
#include "hw/virtio/vhost.h"
132
#include "hw/virtio/vhost-backend.h"
133
#include "hw/virtio/virtio-net.h"
134
+#include "hw/virtio/vhost-shadow-virtqueue.h"
135
#include "hw/virtio/vhost-vdpa.h"
136
#include "exec/address-spaces.h"
137
#include "qemu/main-loop.h"
138
#include "cpu.h"
139
#include "trace.h"
140
#include "qemu-common.h"
141
+#include "qapi/error.h"
142
143
/*
144
* Return one past the end of the end of section. Be careful with uint64_t
145
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
146
return v->index != 0;
147
}
148
149
+static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
150
+ Error **errp)
151
+{
152
+ g_autoptr(GPtrArray) shadow_vqs = NULL;
153
+
154
+ if (!v->shadow_vqs_enabled) {
155
+ return 0;
156
+ }
157
+
158
+ shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
159
+ for (unsigned n = 0; n < hdev->nvqs; ++n) {
160
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
161
+
162
+ if (unlikely(!svq)) {
163
+ error_setg(errp, "Cannot create svq %u", n);
164
+ return -1;
165
+ }
166
+ g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq));
167
+ }
168
+
169
+ v->shadow_vqs = g_steal_pointer(&shadow_vqs);
170
+ return 0;
171
+}
172
+
173
static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
174
{
175
struct vhost_vdpa *v;
176
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
177
dev->opaque = opaque ;
178
v->listener = vhost_vdpa_memory_listener;
179
v->msg_type = VHOST_IOTLB_MSG_V2;
180
+ ret = vhost_vdpa_init_svq(dev, v, errp);
181
+ if (ret) {
182
+ goto err;
183
+ }
184
185
vhost_vdpa_get_iova_range(v);
186
187
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
188
VIRTIO_CONFIG_S_DRIVER);
189
190
return 0;
191
+
192
+err:
193
+ ram_block_discard_disable(false);
194
+ return ret;
195
}
196
197
static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
198
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
199
200
static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
201
{
202
+ struct vhost_vdpa *v = dev->opaque;
203
int i;
204
205
+ if (v->shadow_vqs_enabled) {
206
+ /* FIXME SVQ is not compatible with host notifiers mr */
207
+ return;
208
+ }
209
+
210
for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
211
if (vhost_vdpa_host_notifier_init(dev, i)) {
212
goto err;
213
@@ -XXX,XX +XXX,XX @@ err:
65
return;
214
return;
66
}
215
}
67
216
68
+static void colo_flush_packets(void *opaque, void *user_data)
217
+static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
69
+{
218
+{
70
+ CompareState *s = user_data;
219
+ struct vhost_vdpa *v = dev->opaque;
71
+ Connection *conn = opaque;
220
+ size_t idx;
72
+ Packet *pkt = NULL;
221
+
73
+
222
+ if (!v->shadow_vqs) {
74
+ while (!g_queue_is_empty(&conn->primary_list)) {
223
+ return;
75
+ pkt = g_queue_pop_head(&conn->primary_list);
224
+ }
76
+ compare_chr_send(&s->chr_out, pkt->data, pkt->size);
225
+
77
+ packet_destroy(pkt, NULL);
226
+ for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
78
+ }
227
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
79
+ while (!g_queue_is_empty(&conn->secondary_list)) {
228
+ }
80
+ pkt = g_queue_pop_head(&conn->secondary_list);
229
+ g_ptr_array_free(v->shadow_vqs, true);
81
+ packet_destroy(pkt, NULL);
230
+}
82
+ }
231
+
83
+}
232
static int vhost_vdpa_cleanup(struct vhost_dev *dev)
84
+
233
{
85
static void colo_compare_class_init(ObjectClass *oc, void *data)
234
struct vhost_vdpa *v;
86
{
235
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
87
UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
236
trace_vhost_vdpa_cleanup(dev, v);
88
@@ -XXX,XX +XXX,XX @@ static void colo_compare_finalize(Object *obj)
237
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
89
qemu_chr_fe_deinit(&s->chr_sec_in);
238
memory_listener_unregister(&v->listener);
90
qemu_chr_fe_deinit(&s->chr_out);
239
+ vhost_vdpa_svq_cleanup(dev);
91
240
92
- g_queue_free(&s->conn_list);
241
dev->opaque = NULL;
93
+ g_main_loop_quit(s->compare_loop);
242
ram_block_discard_disable(false);
94
+ qemu_thread_join(&s->thread);
243
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
95
244
return ret;
96
- if (qemu_thread_is_self(&s->thread)) {
245
}
97
- /* compare connection */
246
98
- g_queue_foreach(&s->conn_list, colo_compare_connection, s);
247
+static void vhost_vdpa_reset_svq(struct vhost_vdpa *v)
99
- qemu_thread_join(&s->thread);
248
+{
100
- }
249
+ if (!v->shadow_vqs_enabled) {
101
+ /* Release all unhandled packets after compare thead exited */
250
+ return;
102
+ g_queue_foreach(&s->conn_list, colo_flush_packets, s);
251
+ }
103
+
252
+
104
+ g_queue_free(&s->conn_list);
253
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
105
254
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
106
+ g_hash_table_destroy(s->connection_track_table);
255
+ vhost_svq_stop(svq);
107
g_free(s->pri_indev);
256
+ }
108
g_free(s->sec_indev);
257
+}
109
g_free(s->outdev);
258
+
259
static int vhost_vdpa_reset_device(struct vhost_dev *dev)
260
{
261
+ struct vhost_vdpa *v = dev->opaque;
262
int ret;
263
uint8_t status = 0;
264
265
+ vhost_vdpa_reset_svq(v);
266
+
267
ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
268
trace_vhost_vdpa_reset_device(dev, status);
269
return ret;
270
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
271
return ret;
272
}
273
274
+static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
275
+ struct vhost_vring_file *file)
276
+{
277
+ trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
278
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
279
+}
280
+
281
+/**
282
+ * Set the shadow virtqueue descriptors to the device
283
+ *
284
+ * @dev: The vhost device model
285
+ * @svq: The shadow virtqueue
286
+ * @idx: The index of the virtqueue in the vhost device
287
+ * @errp: Error
288
+ */
289
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
290
+ VhostShadowVirtqueue *svq, unsigned idx,
291
+ Error **errp)
292
+{
293
+ struct vhost_vring_file file = {
294
+ .index = dev->vq_index + idx,
295
+ };
296
+ const EventNotifier *event_notifier = &svq->hdev_kick;
297
+ int r;
298
+
299
+ file.fd = event_notifier_get_fd(event_notifier);
300
+ r = vhost_vdpa_set_vring_dev_kick(dev, &file);
301
+ if (unlikely(r != 0)) {
302
+ error_setg_errno(errp, -r, "Can't set device kick fd");
303
+ }
304
+
305
+ return r == 0;
306
+}
307
+
308
+static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
309
+{
310
+ struct vhost_vdpa *v = dev->opaque;
311
+ Error *err = NULL;
312
+ unsigned i;
313
+
314
+ if (!v->shadow_vqs) {
315
+ return true;
316
+ }
317
+
318
+ for (i = 0; i < v->shadow_vqs->len; ++i) {
319
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
320
+ bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
321
+ if (unlikely(!ok)) {
322
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
323
+ return false;
324
+ }
325
+ }
326
+
327
+ return true;
328
+}
329
+
330
static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
331
{
332
struct vhost_vdpa *v = dev->opaque;
333
+ bool ok;
334
trace_vhost_vdpa_dev_start(dev, started);
335
336
if (started) {
337
vhost_vdpa_host_notifiers_init(dev);
338
+ ok = vhost_vdpa_svqs_start(dev);
339
+ if (unlikely(!ok)) {
340
+ return -1;
341
+ }
342
vhost_vdpa_set_vring_ready(dev);
343
} else {
344
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
345
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
346
static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
347
struct vhost_vring_file *file)
348
{
349
- trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
350
- return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
351
+ struct vhost_vdpa *v = dev->opaque;
352
+ int vdpa_idx = file->index - dev->vq_index;
353
+
354
+ if (v->shadow_vqs_enabled) {
355
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
356
+ vhost_svq_set_svq_kick_fd(svq, file->fd);
357
+ return 0;
358
+ } else {
359
+ return vhost_vdpa_set_vring_dev_kick(dev, file);
360
+ }
361
}
362
363
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
364
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
365
index XXXXXXX..XXXXXXX 100644
366
--- a/include/hw/virtio/vhost-vdpa.h
367
+++ b/include/hw/virtio/vhost-vdpa.h
368
@@ -XXX,XX +XXX,XX @@
369
#ifndef HW_VIRTIO_VHOST_VDPA_H
370
#define HW_VIRTIO_VHOST_VDPA_H
371
372
+#include <gmodule.h>
373
+
374
#include "hw/virtio/virtio.h"
375
#include "standard-headers/linux/vhost_types.h"
376
377
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
378
bool iotlb_batch_begin_sent;
379
MemoryListener listener;
380
struct vhost_vdpa_iova_range iova_range;
381
+ bool shadow_vqs_enabled;
382
+ GPtrArray *shadow_vqs;
383
struct vhost_dev *dev;
384
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
385
} VhostVDPA;
110
--
386
--
111
2.7.4
387
2.7.4
112
388
113
389
diff view generated by jsdifflib
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
While the offset of packets's sequence for primary side and
3
This will make qemu aware of the device used buffers, allowing it to
4
secondary side is zero, it is unnecessary to call net_checksum_calculate()
4
write the guest memory with its contents if needed.
5
to recalculate the checksume value of packets.
6
5
7
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Acked-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
9
---
10
net/filter-rewriter.c | 17 +++++++++++------
10
hw/virtio/vhost-shadow-virtqueue.c | 38 ++++++++++++++++++++++++++++++++++++++
11
1 file changed, 11 insertions(+), 6 deletions(-)
11
hw/virtio/vhost-shadow-virtqueue.h | 4 ++++
12
hw/virtio/vhost-vdpa.c | 31 +++++++++++++++++++++++++++++--
13
3 files changed, 71 insertions(+), 2 deletions(-)
12
14
13
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/net/filter-rewriter.c
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
16
+++ b/net/filter-rewriter.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
17
@@ -XXX,XX +XXX,XX @@ static int handle_primary_tcp_pkt(NetFilterState *nf,
19
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(EventNotifier *n)
18
conn->offset -= (ntohl(tcp_pkt->th_ack) - 1);
20
}
19
conn->syn_flag = 0;
21
20
}
22
/**
21
- /* handle packets to the secondary from the primary */
23
+ * Forward vhost notifications
22
- tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
24
+ *
23
+ if (conn->offset) {
25
+ * @n: hdev call event notifier, the one that device set to notify svq.
24
+ /* handle packets to the secondary from the primary */
26
+ */
25
+ tcp_pkt->th_ack = htonl(ntohl(tcp_pkt->th_ack) + conn->offset);
27
+static void vhost_svq_handle_call(EventNotifier *n)
26
28
+{
27
- net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
29
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
28
+ net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
30
+ hdev_call);
29
+ }
31
+ event_notifier_test_and_clear(n);
32
+ event_notifier_set(&svq->svq_call);
33
+}
34
+
35
+/**
36
+ * Set the call notifier for the SVQ to call the guest
37
+ *
38
+ * @svq: Shadow virtqueue
39
+ * @call_fd: call notifier
40
+ *
41
+ * Called on BQL context.
42
+ */
43
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
44
+{
45
+ if (call_fd == VHOST_FILE_UNBIND) {
46
+ /*
47
+ * Fail event_notifier_set if called handling device call.
48
+ *
49
+ * SVQ still needs device notifications, since it needs to keep
50
+ * forwarding used buffers even with the unbind.
51
+ */
52
+ memset(&svq->svq_call, 0, sizeof(svq->svq_call));
53
+ } else {
54
+ event_notifier_init_fd(&svq->svq_call, call_fd);
55
+ }
56
+}
57
+
58
+/**
59
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
60
*
61
* @svq: The svq
62
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
30
}
63
}
31
64
32
return 0;
65
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
33
@@ -XXX,XX +XXX,XX @@ static int handle_secondary_tcp_pkt(NetFilterState *nf,
66
+ event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
67
return g_steal_pointer(&svq);
68
69
err_init_hdev_call:
70
@@ -XXX,XX +XXX,XX @@ void vhost_svq_free(gpointer pvq)
71
VhostShadowVirtqueue *vq = pvq;
72
vhost_svq_stop(vq);
73
event_notifier_cleanup(&vq->hdev_kick);
74
+ event_notifier_set_handler(&vq->hdev_call, NULL);
75
event_notifier_cleanup(&vq->hdev_call);
76
g_free(vq);
77
}
78
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
79
index XXXXXXX..XXXXXXX 100644
80
--- a/hw/virtio/vhost-shadow-virtqueue.h
81
+++ b/hw/virtio/vhost-shadow-virtqueue.h
82
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
83
* So shadow virtqueue must not clean it, or we would lose VirtQueue one.
84
*/
85
EventNotifier svq_kick;
86
+
87
+ /* Guest's call notifier, where the SVQ calls guest. */
88
+ EventNotifier svq_call;
89
} VhostShadowVirtqueue;
90
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
void vhost_svq_stop(VhostShadowVirtqueue *svq);
95
96
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/hw/virtio/vhost-vdpa.c
99
+++ b/hw/virtio/vhost-vdpa.c
100
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
101
return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
102
}
103
104
+static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
105
+ struct vhost_vring_file *file)
106
+{
107
+ trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
108
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
109
+}
110
+
111
/**
112
* Set the shadow virtqueue descriptors to the device
113
*
114
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
115
* @svq: The shadow virtqueue
116
* @idx: The index of the virtqueue in the vhost device
117
* @errp: Error
118
+ *
119
+ * Note that this function does not rewind kick file descriptor if cannot set
120
+ * call one.
121
*/
122
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
123
VhostShadowVirtqueue *svq, unsigned idx,
124
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
125
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
126
if (unlikely(r != 0)) {
127
error_setg_errno(errp, -r, "Can't set device kick fd");
128
+ return false;
129
+ }
130
+
131
+ event_notifier = &svq->hdev_call;
132
+ file.fd = event_notifier_get_fd(event_notifier);
133
+ r = vhost_vdpa_set_vring_dev_call(dev, &file);
134
+ if (unlikely(r != 0)) {
135
+ error_setg_errno(errp, -r, "Can't set device call fd");
34
}
136
}
35
137
36
if ((tcp_pkt->th_flags & (TH_ACK | TH_SYN)) == TH_ACK) {
138
return r == 0;
37
- /* handle packets to the primary from the secondary*/
139
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
38
- tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
140
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
39
+ /* Only need to adjust seq while offset is Non-zero */
141
struct vhost_vring_file *file)
40
+ if (conn->offset) {
142
{
41
+ /* handle packets to the primary from the secondary*/
143
- trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
42
+ tcp_pkt->th_seq = htonl(ntohl(tcp_pkt->th_seq) - conn->offset);
144
- return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
43
145
+ struct vhost_vdpa *v = dev->opaque;
44
- net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
146
+
45
+ net_checksum_calculate((uint8_t *)pkt->data, pkt->size);
147
+ if (v->shadow_vqs_enabled) {
46
+ }
148
+ int vdpa_idx = file->index - dev->vq_index;
47
}
149
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
48
150
+
49
return 0;
151
+ vhost_svq_set_svq_call_fd(svq, file->fd);
152
+ return 0;
153
+ } else {
154
+ return vhost_vdpa_set_vring_dev_call(dev, file);
155
+ }
156
}
157
158
static int vhost_vdpa_get_features(struct vhost_dev *dev,
50
--
159
--
51
2.7.4
160
2.7.4
52
161
53
162
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Add offset args for colo_packet_compare_common, optimize
3
This allows SVQ to negotiate features with the guest and the device. For
4
colo_packet_compare_icmp() and colo_packet_compare_udp()
4
the device, SVQ is a driver. While this function bypasses all
5
just compare the IP payload. Before compare all tcp packet,
5
non-transport features, it needs to disable the features that SVQ does
6
we compare tcp checksum firstly, this function can get
6
not support when forwarding buffers. This includes packed vq layout,
7
better performance.
7
indirect descriptors or event idx.
8
8
9
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
9
Future changes can add support to offer more features to the guest,
10
since the use of VirtQueue gives this for free. This is left out at the
11
moment for simplicity.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
16
---
12
net/colo-compare.c | 50 ++++++++++++++++++++++++++++++++++++--------------
17
hw/virtio/vhost-shadow-virtqueue.c | 44 ++++++++++++++++++++++++++++++++++++++
13
1 file changed, 36 insertions(+), 14 deletions(-)
18
hw/virtio/vhost-shadow-virtqueue.h | 2 ++
19
hw/virtio/vhost-vdpa.c | 15 +++++++++++++
20
3 files changed, 61 insertions(+)
14
21
15
diff --git a/net/colo-compare.c b/net/colo-compare.c
22
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
16
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
17
--- a/net/colo-compare.c
24
--- a/hw/virtio/vhost-shadow-virtqueue.c
18
+++ b/net/colo-compare.c
25
+++ b/hw/virtio/vhost-shadow-virtqueue.c
19
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode)
26
@@ -XXX,XX +XXX,XX @@
20
* return: 0 means packet same
27
#include "hw/virtio/vhost-shadow-virtqueue.h"
21
* > 0 || < 0 means packet different
28
22
*/
29
#include "qemu/error-report.h"
23
-static int colo_packet_compare_common(Packet *ppkt, Packet *spkt)
30
+#include "qapi/error.h"
24
+static int colo_packet_compare_common(Packet *ppkt, Packet *spkt, int offset)
31
#include "qemu/main-loop.h"
32
#include "linux-headers/linux/vhost.h"
33
34
/**
35
+ * Validate the transport device features that both guests can use with the SVQ
36
+ * and SVQs can use with the device.
37
+ *
38
+ * @dev_features: The features
39
+ * @errp: Error pointer
40
+ */
41
+bool vhost_svq_valid_features(uint64_t features, Error **errp)
42
+{
43
+ bool ok = true;
44
+ uint64_t svq_features = features;
45
+
46
+ for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
47
+ ++b) {
48
+ switch (b) {
49
+ case VIRTIO_F_ANY_LAYOUT:
50
+ continue;
51
+
52
+ case VIRTIO_F_ACCESS_PLATFORM:
53
+ /* SVQ trust in the host's IOMMU to translate addresses */
54
+ case VIRTIO_F_VERSION_1:
55
+ /* SVQ trust that the guest vring is little endian */
56
+ if (!(svq_features & BIT_ULL(b))) {
57
+ svq_features |= BIT_ULL(b);
58
+ ok = false;
59
+ }
60
+ continue;
61
+
62
+ default:
63
+ if (svq_features & BIT_ULL(b)) {
64
+ svq_features &= ~BIT_ULL(b);
65
+ ok = false;
66
+ }
67
+ }
68
+ }
69
+
70
+ if (!ok) {
71
+ error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64
72
+ ", ok: 0x%"PRIx64, features, svq_features);
73
+ }
74
+ return ok;
75
+}
76
+
77
+/**
78
* Forward guest notifications.
79
*
80
* @n: guest kick event notifier, the one that guest set to notify svq.
81
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/hw/virtio/vhost-shadow-virtqueue.h
84
+++ b/hw/virtio/vhost-shadow-virtqueue.h
85
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
86
EventNotifier svq_call;
87
} VhostShadowVirtqueue;
88
89
+bool vhost_svq_valid_features(uint64_t features, Error **errp);
90
+
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/hw/virtio/vhost-vdpa.c
97
+++ b/hw/virtio/vhost-vdpa.c
98
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
99
Error **errp)
25
{
100
{
26
trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src),
101
g_autoptr(GPtrArray) shadow_vqs = NULL;
27
inet_ntoa(ppkt->ip->ip_dst), spkt->size,
102
+ uint64_t dev_features, svq_features;
28
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt, Packet *spkt)
103
+ int r;
29
inet_ntoa(spkt->ip->ip_dst));
104
+ bool ok;
30
105
31
if (ppkt->size == spkt->size) {
106
if (!v->shadow_vqs_enabled) {
32
- return memcmp(ppkt->data, spkt->data, spkt->size);
107
return 0;
33
+ return memcmp(ppkt->data + offset, spkt->data + offset,
34
+ spkt->size - offset);
35
} else {
36
trace_colo_compare_main("Net packet size are not the same");
37
return -1;
38
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
39
40
trace_colo_compare_main("compare tcp");
41
42
- if (ppkt->size != spkt->size) {
43
- if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
44
- trace_colo_compare_main("pkt size not same");
45
- }
46
- return -1;
47
- }
48
-
49
ptcp = (struct tcphdr *)ppkt->transport_header;
50
stcp = (struct tcphdr *)spkt->transport_header;
51
52
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
53
spkt->ip->ip_sum = ppkt->ip->ip_sum;
54
}
108
}
55
109
56
- res = memcmp(ppkt->data + ETH_HLEN, spkt->data + ETH_HLEN,
110
+ r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
57
- (spkt->size - ETH_HLEN));
111
+ if (r != 0) {
58
+ if (ptcp->th_sum == stcp->th_sum) {
112
+ error_setg_errno(errp, -r, "Can't get vdpa device features");
59
+ res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN);
113
+ return r;
60
+ } else {
61
+ res = -1;
62
+ }
114
+ }
63
64
if (res != 0 && trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
65
trace_colo_compare_pkt_info_src(inet_ntoa(ppkt->ip->ip_src),
66
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
67
static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
68
{
69
int ret;
70
+ int network_header_length = ppkt->ip->ip_hl * 4;
71
72
trace_colo_compare_main("compare udp");
73
74
- ret = colo_packet_compare_common(ppkt, spkt);
75
+ /*
76
+ * Because of ppkt and spkt are both in the same connection,
77
+ * The ppkt's src ip, dst ip, src port, dst port, ip_proto all are
78
+ * same with spkt. In addition, IP header's Identification is a random
79
+ * field, we can handle it in IP fragmentation function later.
80
+ * COLO just concern the response net packet payload from primary guest
81
+ * and secondary guest are same or not, So we ignored all IP header include
82
+ * other field like TOS,TTL,IP Checksum. we only need to compare
83
+ * the ip payload here.
84
+ */
85
+ ret = colo_packet_compare_common(ppkt, spkt,
86
+ network_header_length + ETH_HLEN);
87
88
if (ret) {
89
trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
90
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
91
*/
92
static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
93
{
94
+ int network_header_length = ppkt->ip->ip_hl * 4;
95
+
115
+
96
trace_colo_compare_main("compare icmp");
116
+ svq_features = dev_features;
97
117
+ ok = vhost_svq_valid_features(svq_features, errp);
98
- if (colo_packet_compare_common(ppkt, spkt)) {
118
+ if (unlikely(!ok)) {
99
+ /*
119
+ return -1;
100
+ * Because of ppkt and spkt are both in the same connection,
120
+ }
101
+ * The ppkt's src ip, dst ip, src port, dst port, ip_proto all are
121
+
102
+ * same with spkt. In addition, IP header's Identification is a random
122
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
103
+ * field, we can handle it in IP fragmentation function later.
123
for (unsigned n = 0; n < hdev->nvqs; ++n) {
104
+ * COLO just concern the response net packet payload from primary guest
124
g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
105
+ * and secondary guest are same or not, So we ignored all IP header include
106
+ * other field like TOS,TTL,IP Checksum. we only need to compare
107
+ * the ip payload here.
108
+ */
109
+ if (colo_packet_compare_common(ppkt, spkt,
110
+ network_header_length + ETH_HLEN)) {
111
trace_colo_compare_icmp_miscompare("primary pkt size",
112
ppkt->size);
113
qemu_hexdump((char *)ppkt->data, stderr, "colo-compare",
114
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
115
inet_ntoa(ppkt->ip->ip_dst), spkt->size,
116
inet_ntoa(spkt->ip->ip_src),
117
inet_ntoa(spkt->ip->ip_dst));
118
- return colo_packet_compare_common(ppkt, spkt);
119
+ return colo_packet_compare_common(ppkt, spkt, 0);
120
}
121
122
static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
123
--
125
--
124
2.7.4
126
2.7.4
125
127
126
128
diff view generated by jsdifflib
1
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The index's in the Vmxnet3Ring were migrated as 32bit ints
3
It reports the shadow virtqueue address from qemu virtual address space.
4
yet are declared as size_t's. They appear to be derived
5
from 32bit values loaded from guest memory, so actually
6
store them as that.
7
4
8
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
5
Since this will be different from the guest's vaddr, but the device can
9
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
6
access it, SVQ takes special care about its alignment & lack of garbage
7
data. It assumes that IOMMU will work in host_page_size ranges for that.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
---
12
hw/net/vmxnet3.c | 12 ++++++------
13
hw/virtio/vhost-shadow-virtqueue.c | 29 +++++++++++++++++++++++++++++
13
1 file changed, 6 insertions(+), 6 deletions(-)
14
hw/virtio/vhost-shadow-virtqueue.h | 9 +++++++++
15
2 files changed, 38 insertions(+)
14
16
15
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
17
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
16
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/vmxnet3.c
19
--- a/hw/virtio/vhost-shadow-virtqueue.c
18
+++ b/hw/net/vmxnet3.c
20
+++ b/hw/virtio/vhost-shadow-virtqueue.c
19
@@ -XXX,XX +XXX,XX @@ typedef struct VMXNET3Class {
21
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
20
/* Cyclic ring abstraction */
21
typedef struct {
22
hwaddr pa;
23
- size_t size;
24
- size_t cell_size;
25
- size_t next;
26
+ uint32_t size;
27
+ uint32_t cell_size;
28
+ uint32_t next;
29
uint8_t gen;
30
} Vmxnet3Ring;
31
32
static inline void vmxnet3_ring_init(PCIDevice *d,
33
                 Vmxnet3Ring *ring,
34
hwaddr pa,
35
- size_t size,
36
- size_t cell_size,
37
+ uint32_t size,
38
+ uint32_t cell_size,
39
bool zero_region)
40
{
41
ring->pa = pa;
42
@@ -XXX,XX +XXX,XX @@ static inline void vmxnet3_ring_init(PCIDevice *d,
43
}
22
}
44
23
45
#define VMXNET3_RING_DUMP(macro, ring_name, ridx, r) \
24
/**
46
- macro("%s#%d: base %" PRIx64 " size %zu cell_size %zu gen %d next %zu", \
25
+ * Get the shadow vq vring address.
47
+ macro("%s#%d: base %" PRIx64 " size %u cell_size %u gen %d next %u", \
26
+ * @svq: Shadow virtqueue
48
(ring_name), (ridx), \
27
+ * @addr: Destination to store address
49
(r)->pa, (r)->size, (r)->cell_size, (r)->gen, (r)->next)
28
+ */
29
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
30
+ struct vhost_vring_addr *addr)
31
+{
32
+ addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
33
+ addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
34
+ addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
35
+}
36
+
37
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
38
+{
39
+ size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
40
+ size_t avail_size = offsetof(vring_avail_t, ring) +
41
+ sizeof(uint16_t) * svq->vring.num;
42
+
43
+ return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size);
44
+}
45
+
46
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
47
+{
48
+ size_t used_size = offsetof(vring_used_t, ring) +
49
+ sizeof(vring_used_elem_t) * svq->vring.num;
50
+ return ROUND_UP(used_size, qemu_real_host_page_size);
51
+}
52
+
53
+/**
54
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
55
*
56
* @svq: The svq
57
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
58
index XXXXXXX..XXXXXXX 100644
59
--- a/hw/virtio/vhost-shadow-virtqueue.h
60
+++ b/hw/virtio/vhost-shadow-virtqueue.h
61
@@ -XXX,XX +XXX,XX @@
62
#define VHOST_SHADOW_VIRTQUEUE_H
63
64
#include "qemu/event_notifier.h"
65
+#include "hw/virtio/virtio.h"
66
+#include "standard-headers/linux/vhost_types.h"
67
68
/* Shadow virtqueue to relay notifications */
69
typedef struct VhostShadowVirtqueue {
70
+ /* Shadow vring */
71
+ struct vring vring;
72
+
73
/* Shadow kick notifier, sent to vhost */
74
EventNotifier hdev_kick;
75
/* Shadow call notifier, sent to vhost */
76
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp);
77
78
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
79
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
80
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
81
+ struct vhost_vring_addr *addr);
82
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
83
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
84
85
void vhost_svq_stop(VhostShadowVirtqueue *svq);
50
86
51
--
87
--
52
2.7.4
88
2.7.4
53
89
54
90
diff view generated by jsdifflib
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Instead of using qemu timer to process the stale packets,
3
First half of the buffers forwarding part, preparing vhost-vdpa
4
We re-use the colo compare thread to process these packets
4
callbacks to SVQ to offer it. QEMU cannot enable it at this moment, so
5
by creating a new timeout coroutine.
5
this is effectively dead code at the moment, but it helps to reduce
6
patch size.
6
7
7
Besides, since we process all the same vNIC's net connection/packets
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
in one thread, it is safe to remove the timer_check_lock.
9
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
10
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
11
---
13
net/colo-compare.c | 62 +++++++++++++++++++-----------------------------------
12
hw/virtio/vhost-vdpa.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
14
1 file changed, 22 insertions(+), 40 deletions(-)
13
1 file changed, 41 insertions(+), 7 deletions(-)
15
14
16
diff --git a/net/colo-compare.c b/net/colo-compare.c
15
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
17
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
18
--- a/net/colo-compare.c
17
--- a/hw/virtio/vhost-vdpa.c
19
+++ b/net/colo-compare.c
18
+++ b/hw/virtio/vhost-vdpa.c
20
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
19
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
21
GHashTable *connection_track_table;
20
return ret;
22
/* compare thread, a thread for each NIC */
21
}
23
QemuThread thread;
22
24
- /* Timer used on the primary to find packets that are never matched */
23
+static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev,
25
- QEMUTimer *timer;
24
+ struct vhost_vring_state *ring)
26
- QemuMutex timer_check_lock;
27
} CompareState;
28
29
typedef struct CompareClass {
30
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
31
32
while (!g_queue_is_empty(&conn->primary_list) &&
33
!g_queue_is_empty(&conn->secondary_list)) {
34
- qemu_mutex_lock(&s->timer_check_lock);
35
pkt = g_queue_pop_tail(&conn->primary_list);
36
- qemu_mutex_unlock(&s->timer_check_lock);
37
switch (conn->ip_proto) {
38
case IPPROTO_TCP:
39
result = g_queue_find_custom(&conn->secondary_list,
40
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
41
* until next comparison.
42
*/
43
trace_colo_compare_main("packet different");
44
- qemu_mutex_lock(&s->timer_check_lock);
45
g_queue_push_tail(&conn->primary_list, pkt);
46
- qemu_mutex_unlock(&s->timer_check_lock);
47
/* TODO: colo_notify_checkpoint();*/
48
break;
49
}
50
@@ -XXX,XX +XXX,XX @@ static void compare_sec_chr_in(void *opaque, const uint8_t *buf, int size)
51
}
52
}
53
54
+/*
55
+ * Check old packet regularly so it can watch for any packets
56
+ * that the secondary hasn't produced equivalents of.
57
+ */
58
+static gboolean check_old_packet_regular(void *opaque)
59
+{
25
+{
60
+ CompareState *s = opaque;
26
+ trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
61
+
27
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
62
+ /* if have old packet we will notify checkpoint */
63
+ colo_old_packet_check(s);
64
+
65
+ return TRUE;
66
+}
28
+}
67
+
29
+
68
static void *colo_compare_thread(void *opaque)
30
static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
31
struct vhost_vring_file *file)
69
{
32
{
70
GMainContext *worker_context;
33
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
71
GMainLoop *compare_loop;
34
return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
72
CompareState *s = opaque;
35
}
73
+ GSource *timeout_source;
36
74
37
+static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
75
worker_context = g_main_context_new();
38
+ struct vhost_vring_addr *addr)
76
39
+{
77
@@ -XXX,XX +XXX,XX @@ static void *colo_compare_thread(void *opaque)
40
+ trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
78
41
+ addr->desc_user_addr, addr->used_user_addr,
79
compare_loop = g_main_loop_new(worker_context, FALSE);
42
+ addr->avail_user_addr,
80
43
+ addr->log_guest_addr);
81
+ /* To kick any packets that the secondary doesn't match */
82
+ timeout_source = g_timeout_source_new(REGULAR_PACKET_CHECK_MS);
83
+ g_source_set_callback(timeout_source,
84
+ (GSourceFunc)check_old_packet_regular, s, NULL);
85
+ g_source_attach(timeout_source, worker_context);
86
+
44
+
87
g_main_loop_run(compare_loop);
45
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
88
46
+
89
+ g_source_unref(timeout_source);
47
+}
90
g_main_loop_unref(compare_loop);
48
+
91
g_main_context_unref(worker_context);
49
/**
92
return NULL;
50
* Set the shadow virtqueue descriptors to the device
93
@@ -XXX,XX +XXX,XX @@ static int find_and_check_chardev(Chardev **chr,
51
*
52
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
53
static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
54
struct vhost_vring_addr *addr)
55
{
56
- trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
57
- addr->desc_user_addr, addr->used_user_addr,
58
- addr->avail_user_addr,
59
- addr->log_guest_addr);
60
- return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
61
+ struct vhost_vdpa *v = dev->opaque;
62
+
63
+ if (v->shadow_vqs_enabled) {
64
+ /*
65
+ * Device vring addr was set at device start. SVQ base is handled by
66
+ * VirtQueue code.
67
+ */
68
+ return 0;
69
+ }
70
+
71
+ return vhost_vdpa_set_vring_dev_addr(dev, addr);
94
}
72
}
95
73
96
/*
74
static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
97
- * Check old packet regularly so it can watch for any packets
75
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
98
- * that the secondary hasn't produced equivalents of.
76
static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
99
- */
77
struct vhost_vring_state *ring)
100
-static void check_old_packet_regular(void *opaque)
78
{
101
-{
79
- trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
102
- CompareState *s = opaque;
80
- return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
103
-
81
+ struct vhost_vdpa *v = dev->opaque;
104
- timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
82
+
105
- REGULAR_PACKET_CHECK_MS);
83
+ if (v->shadow_vqs_enabled) {
106
- /* if have old packet we will notify checkpoint */
84
+ /*
107
- /*
85
+ * Device vring base was set at device start. SVQ base is handled by
108
- * TODO: Make timer handler run in compare thread
86
+ * VirtQueue code.
109
- * like qemu_chr_add_handlers_full.
87
+ */
110
- */
88
+ return 0;
111
- qemu_mutex_lock(&s->timer_check_lock);
89
+ }
112
- colo_old_packet_check(s);
90
+
113
- qemu_mutex_unlock(&s->timer_check_lock);
91
+ return vhost_vdpa_set_dev_vring_base(dev, ring);
114
-}
115
-
116
-/*
117
* Called from the main thread on the primary
118
* to setup colo-compare.
119
*/
120
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
121
net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize);
122
123
g_queue_init(&s->conn_list);
124
- qemu_mutex_init(&s->timer_check_lock);
125
126
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
127
connection_key_equal,
128
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
129
QEMU_THREAD_JOINABLE);
130
compare_id++;
131
132
- /* A regular timer to kick any packets that the secondary doesn't match */
133
- s->timer = timer_new_ms(QEMU_CLOCK_VIRTUAL, /* Only when guest runs */
134
- check_old_packet_regular, s);
135
- timer_mod(s->timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
136
- REGULAR_PACKET_CHECK_MS);
137
-
138
return;
139
}
92
}
140
93
141
@@ -XXX,XX +XXX,XX @@ static void colo_compare_finalize(Object *obj)
94
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
142
qemu_thread_join(&s->thread);
143
}
144
145
- if (s->timer) {
146
- timer_del(s->timer);
147
- }
148
-
149
- qemu_mutex_destroy(&s->timer_check_lock);
150
-
151
g_free(s->pri_indev);
152
g_free(s->sec_indev);
153
g_free(s->outdev);
154
--
95
--
155
2.7.4
96
2.7.4
156
97
157
98
diff view generated by jsdifflib
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We can call qemu_chr_fe_set_handlers() to add/remove fd been watched
3
Initial version of shadow virtqueue that actually forward buffers. There
4
in 'context' which can be either default main context or other explicit
4
is no iommu support at the moment, and that will be addressed in future
5
context. But the original logic is not correct, we didn't remove
5
patches of this series. Since all vhost-vdpa devices use forced IOMMU,
6
the right fd because we call g_main_context_find_source_by_id(NULL, tag)
6
this means that SVQ is not usable at this point of the series on any
7
which always try to find the Gsource from default context.
7
device.
8
8
9
Fix it by passing the right context to g_main_context_find_source_by_id().
9
For simplicity it only supports modern devices, that expects vring
10
10
in little endian, with split ring and no event idx or indirect
11
Cc: Paolo Bonzini <pbonzini@redhat.com>
11
descriptors. Support for them will not be added in this series.
12
Cc: Marc-André Lureau <marcandre.lureau@redhat.com>
12
13
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
13
It reuses the VirtQueue code for the device part. The driver part is
14
Reviewed-by: Marc-André Lureau <marcandre.lureau@redhat.com>
14
based on Linux's virtio_ring driver, but with stripped functionality
15
and optimizations so it's easier to review.
16
17
However, forwarding buffers have some particular pieces: One of the most
18
unexpected ones is that a guest's buffer can expand through more than
19
one descriptor in SVQ. While this is handled gracefully by qemu's
20
emulated virtio devices, it may cause unexpected SVQ queue full. This
21
patch also solves it by checking for this condition at both guest's
22
kicks and device's calls. The code may be more elegant in the future if
23
SVQ code runs in its own iocontext.
24
25
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
26
Acked-by: Michael S. Tsirkin <mst@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
27
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
28
---
17
chardev/char-fd.c | 6 +++---
29
hw/virtio/vhost-shadow-virtqueue.c | 352 ++++++++++++++++++++++++++++++++++++-
18
chardev/char-io.c | 8 ++++----
30
hw/virtio/vhost-shadow-virtqueue.h | 26 +++
19
chardev/char-io.h | 2 +-
31
hw/virtio/vhost-vdpa.c | 155 +++++++++++++++-
20
chardev/char-pty.c | 2 +-
32
3 files changed, 522 insertions(+), 11 deletions(-)
21
chardev/char-socket.c | 4 ++--
33
22
chardev/char-udp.c | 6 +++---
34
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
23
chardev/char.c | 2 +-
24
7 files changed, 15 insertions(+), 15 deletions(-)
25
26
diff --git a/chardev/char-fd.c b/chardev/char-fd.c
27
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
28
--- a/chardev/char-fd.c
36
--- a/hw/virtio/vhost-shadow-virtqueue.c
29
+++ b/chardev/char-fd.c
37
+++ b/hw/virtio/vhost-shadow-virtqueue.c
30
@@ -XXX,XX +XXX,XX @@ static gboolean fd_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
38
@@ -XXX,XX +XXX,XX @@
31
ret = qio_channel_read(
39
#include "qemu/error-report.h"
32
chan, (gchar *)buf, len, NULL);
40
#include "qapi/error.h"
33
if (ret == 0) {
41
#include "qemu/main-loop.h"
34
- remove_fd_in_watch(chr);
42
+#include "qemu/log.h"
35
+ remove_fd_in_watch(chr, NULL);
43
+#include "qemu/memalign.h"
36
qemu_chr_be_event(chr, CHR_EVENT_CLOSED);
44
#include "linux-headers/linux/vhost.h"
37
return FALSE;
45
38
}
46
/**
39
@@ -XXX,XX +XXX,XX @@ static void fd_chr_update_read_handler(Chardev *chr,
47
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
48
}
49
50
/**
51
- * Forward guest notifications.
52
+ * Number of descriptors that the SVQ can make available from the guest.
53
+ *
54
+ * @svq: The svq
55
+ */
56
+static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
57
+{
58
+ return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
59
+}
60
+
61
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
62
+ const struct iovec *iovec, size_t num,
63
+ bool more_descs, bool write)
64
+{
65
+ uint16_t i = svq->free_head, last = svq->free_head;
66
+ unsigned n;
67
+ uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
68
+ vring_desc_t *descs = svq->vring.desc;
69
+
70
+ if (num == 0) {
71
+ return;
72
+ }
73
+
74
+ for (n = 0; n < num; n++) {
75
+ if (more_descs || (n + 1 < num)) {
76
+ descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
77
+ } else {
78
+ descs[i].flags = flags;
79
+ }
80
+ descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
81
+ descs[i].len = cpu_to_le32(iovec[n].iov_len);
82
+
83
+ last = i;
84
+ i = cpu_to_le16(descs[i].next);
85
+ }
86
+
87
+ svq->free_head = le16_to_cpu(descs[last].next);
88
+}
89
+
90
+static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
91
+ VirtQueueElement *elem, unsigned *head)
92
+{
93
+ unsigned avail_idx;
94
+ vring_avail_t *avail = svq->vring.avail;
95
+
96
+ *head = svq->free_head;
97
+
98
+ /* We need some descriptors here */
99
+ if (unlikely(!elem->out_num && !elem->in_num)) {
100
+ qemu_log_mask(LOG_GUEST_ERROR,
101
+ "Guest provided element with no descriptors");
102
+ return false;
103
+ }
104
+
105
+ vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
106
+ false);
107
+ vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
108
+
109
+ /*
110
+ * Put the entry in the available array (but don't update avail->idx until
111
+ * they do sync).
112
+ */
113
+ avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
114
+ avail->ring[avail_idx] = cpu_to_le16(*head);
115
+ svq->shadow_avail_idx++;
116
+
117
+ /* Update the avail index after write the descriptor */
118
+ smp_wmb();
119
+ avail->idx = cpu_to_le16(svq->shadow_avail_idx);
120
+
121
+ return true;
122
+}
123
+
124
+static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
125
+{
126
+ unsigned qemu_head;
127
+ bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
128
+ if (unlikely(!ok)) {
129
+ return false;
130
+ }
131
+
132
+ svq->ring_id_maps[qemu_head] = elem;
133
+ return true;
134
+}
135
+
136
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
137
+{
138
+ /*
139
+ * We need to expose the available array entries before checking the used
140
+ * flags
141
+ */
142
+ smp_mb();
143
+ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
144
+ return;
145
+ }
146
+
147
+ event_notifier_set(&svq->hdev_kick);
148
+}
149
+
150
+/**
151
+ * Forward available buffers.
152
+ *
153
+ * @svq: Shadow VirtQueue
154
+ *
155
+ * Note that this function does not guarantee that all guest's available
156
+ * buffers are available to the device in SVQ avail ring. The guest may have
157
+ * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
158
+ * qemu vaddr.
159
+ *
160
+ * If that happens, guest's kick notifications will be disabled until the
161
+ * device uses some buffers.
162
+ */
163
+static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
164
+{
165
+ /* Clear event notifier */
166
+ event_notifier_test_and_clear(&svq->svq_kick);
167
+
168
+ /* Forward to the device as many available buffers as possible */
169
+ do {
170
+ virtio_queue_set_notification(svq->vq, false);
171
+
172
+ while (true) {
173
+ VirtQueueElement *elem;
174
+ bool ok;
175
+
176
+ if (svq->next_guest_avail_elem) {
177
+ elem = g_steal_pointer(&svq->next_guest_avail_elem);
178
+ } else {
179
+ elem = virtqueue_pop(svq->vq, sizeof(*elem));
180
+ }
181
+
182
+ if (!elem) {
183
+ break;
184
+ }
185
+
186
+ if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) {
187
+ /*
188
+ * This condition is possible since a contiguous buffer in GPA
189
+ * does not imply a contiguous buffer in qemu's VA
190
+ * scatter-gather segments. If that happens, the buffer exposed
191
+ * to the device needs to be a chain of descriptors at this
192
+ * moment.
193
+ *
194
+ * SVQ cannot hold more available buffers if we are here:
195
+ * queue the current guest descriptor and ignore further kicks
196
+ * until some elements are used.
197
+ */
198
+ svq->next_guest_avail_elem = elem;
199
+ return;
200
+ }
201
+
202
+ ok = vhost_svq_add(svq, elem);
203
+ if (unlikely(!ok)) {
204
+ /* VQ is broken, just return and ignore any other kicks */
205
+ return;
206
+ }
207
+ vhost_svq_kick(svq);
208
+ }
209
+
210
+ virtio_queue_set_notification(svq->vq, true);
211
+ } while (!virtio_queue_empty(svq->vq));
212
+}
213
+
214
+/**
215
+ * Handle guest's kick.
216
*
217
* @n: guest kick event notifier, the one that guest set to notify svq.
218
*/
219
-static void vhost_handle_guest_kick(EventNotifier *n)
220
+static void vhost_handle_guest_kick_notifier(EventNotifier *n)
40
{
221
{
41
FDChardev *s = FD_CHARDEV(chr);
222
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
42
223
event_notifier_test_and_clear(n);
43
- remove_fd_in_watch(chr);
224
- event_notifier_set(&svq->hdev_kick);
44
+ remove_fd_in_watch(chr, NULL);
225
+ vhost_handle_guest_kick(svq);
45
if (s->ioc_in) {
226
+}
46
chr->fd_in_tag = io_add_watch_poll(chr, s->ioc_in,
227
+
47
fd_chr_read_poll,
228
+static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
48
@@ -XXX,XX +XXX,XX @@ static void char_fd_finalize(Object *obj)
229
+{
49
Chardev *chr = CHARDEV(obj);
230
+ if (svq->last_used_idx != svq->shadow_used_idx) {
50
FDChardev *s = FD_CHARDEV(obj);
231
+ return true;
51
232
+ }
52
- remove_fd_in_watch(chr);
233
+
53
+ remove_fd_in_watch(chr, NULL);
234
+ svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
54
if (s->ioc_in) {
235
+
55
object_unref(OBJECT(s->ioc_in));
236
+ return svq->last_used_idx != svq->shadow_used_idx;
56
}
57
diff --git a/chardev/char-io.c b/chardev/char-io.c
58
index XXXXXXX..XXXXXXX 100644
59
--- a/chardev/char-io.c
60
+++ b/chardev/char-io.c
61
@@ -XXX,XX +XXX,XX @@ guint io_add_watch_poll(Chardev *chr,
62
return tag;
63
}
237
}
64
238
65
-static void io_remove_watch_poll(guint tag)
239
/**
66
+static void io_remove_watch_poll(guint tag, GMainContext *context)
240
- * Forward vhost notifications
241
+ * Enable vhost device calls after disable them.
242
+ *
243
+ * @svq: The svq
244
+ *
245
+ * It returns false if there are pending used buffers from the vhost device,
246
+ * avoiding the possible races between SVQ checking for more work and enabling
247
+ * callbacks. True if SVQ used vring has no more pending buffers.
248
+ */
249
+static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
250
+{
251
+ svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
252
+ /* Make sure the flag is written before the read of used_idx */
253
+ smp_mb();
254
+ return !vhost_svq_more_used(svq);
255
+}
256
+
257
+static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
258
+{
259
+ svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
260
+}
261
+
262
+static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
263
+ uint32_t *len)
264
+{
265
+ vring_desc_t *descs = svq->vring.desc;
266
+ const vring_used_t *used = svq->vring.used;
267
+ vring_used_elem_t used_elem;
268
+ uint16_t last_used;
269
+
270
+ if (!vhost_svq_more_used(svq)) {
271
+ return NULL;
272
+ }
273
+
274
+ /* Only get used array entries after they have been exposed by dev */
275
+ smp_rmb();
276
+ last_used = svq->last_used_idx & (svq->vring.num - 1);
277
+ used_elem.id = le32_to_cpu(used->ring[last_used].id);
278
+ used_elem.len = le32_to_cpu(used->ring[last_used].len);
279
+
280
+ svq->last_used_idx++;
281
+ if (unlikely(used_elem.id >= svq->vring.num)) {
282
+ qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
283
+ svq->vdev->name, used_elem.id);
284
+ return NULL;
285
+ }
286
+
287
+ if (unlikely(!svq->ring_id_maps[used_elem.id])) {
288
+ qemu_log_mask(LOG_GUEST_ERROR,
289
+ "Device %s says index %u is used, but it was not available",
290
+ svq->vdev->name, used_elem.id);
291
+ return NULL;
292
+ }
293
+
294
+ descs[used_elem.id].next = svq->free_head;
295
+ svq->free_head = used_elem.id;
296
+
297
+ *len = used_elem.len;
298
+ return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
299
+}
300
+
301
+static void vhost_svq_flush(VhostShadowVirtqueue *svq,
302
+ bool check_for_avail_queue)
303
+{
304
+ VirtQueue *vq = svq->vq;
305
+
306
+ /* Forward as many used buffers as possible. */
307
+ do {
308
+ unsigned i = 0;
309
+
310
+ vhost_svq_disable_notification(svq);
311
+ while (true) {
312
+ uint32_t len;
313
+ g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
314
+ if (!elem) {
315
+ break;
316
+ }
317
+
318
+ if (unlikely(i >= svq->vring.num)) {
319
+ qemu_log_mask(LOG_GUEST_ERROR,
320
+ "More than %u used buffers obtained in a %u size SVQ",
321
+ i, svq->vring.num);
322
+ virtqueue_fill(vq, elem, len, i);
323
+ virtqueue_flush(vq, i);
324
+ return;
325
+ }
326
+ virtqueue_fill(vq, elem, len, i++);
327
+ }
328
+
329
+ virtqueue_flush(vq, i);
330
+ event_notifier_set(&svq->svq_call);
331
+
332
+ if (check_for_avail_queue && svq->next_guest_avail_elem) {
333
+ /*
334
+ * Avail ring was full when vhost_svq_flush was called, so it's a
335
+ * good moment to make more descriptors available if possible.
336
+ */
337
+ vhost_handle_guest_kick(svq);
338
+ }
339
+ } while (!vhost_svq_enable_notification(svq));
340
+}
341
+
342
+/**
343
+ * Forward used buffers.
344
*
345
* @n: hdev call event notifier, the one that device set to notify svq.
346
+ *
347
+ * Note that we are not making any buffers available in the loop, there is no
348
+ * way that it runs more than virtqueue size times.
349
*/
350
static void vhost_svq_handle_call(EventNotifier *n)
67
{
351
{
68
GSource *source;
352
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
69
IOWatchPoll *iwp;
353
hdev_call);
70
354
event_notifier_test_and_clear(n);
71
g_return_if_fail(tag > 0);
355
- event_notifier_set(&svq->svq_call);
72
356
+ vhost_svq_flush(svq, true);
73
- source = g_main_context_find_source_by_id(NULL, tag);
74
+ source = g_main_context_find_source_by_id(context, tag);
75
g_return_if_fail(source != NULL);
76
77
iwp = io_watch_poll_from_source(source);
78
@@ -XXX,XX +XXX,XX @@ static void io_remove_watch_poll(guint tag)
79
g_source_destroy(&iwp->parent);
80
}
357
}
81
358
82
-void remove_fd_in_watch(Chardev *chr)
359
/**
83
+void remove_fd_in_watch(Chardev *chr, GMainContext *context)
360
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
84
{
361
if (poll_start) {
85
if (chr->fd_in_tag) {
362
event_notifier_init_fd(svq_kick, svq_kick_fd);
86
- io_remove_watch_poll(chr->fd_in_tag);
363
event_notifier_set(svq_kick);
87
+ io_remove_watch_poll(chr->fd_in_tag, context);
364
- event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
88
chr->fd_in_tag = 0;
365
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
366
+ }
367
+}
368
+
369
+/**
370
+ * Start the shadow virtqueue operation.
371
+ *
372
+ * @svq: Shadow Virtqueue
373
+ * @vdev: VirtIO device
374
+ * @vq: Virtqueue to shadow
375
+ */
376
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
377
+ VirtQueue *vq)
378
+{
379
+ size_t desc_size, driver_size, device_size;
380
+
381
+ svq->next_guest_avail_elem = NULL;
382
+ svq->shadow_avail_idx = 0;
383
+ svq->shadow_used_idx = 0;
384
+ svq->last_used_idx = 0;
385
+ svq->vdev = vdev;
386
+ svq->vq = vq;
387
+
388
+ svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
389
+ driver_size = vhost_svq_driver_area_size(svq);
390
+ device_size = vhost_svq_device_area_size(svq);
391
+ svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size);
392
+ desc_size = sizeof(vring_desc_t) * svq->vring.num;
393
+ svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
394
+ memset(svq->vring.desc, 0, driver_size);
395
+ svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
396
+ memset(svq->vring.used, 0, device_size);
397
+ svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
398
+ for (unsigned i = 0; i < svq->vring.num - 1; i++) {
399
+ svq->vring.desc[i].next = cpu_to_le16(i + 1);
89
}
400
}
90
}
401
}
91
diff --git a/chardev/char-io.h b/chardev/char-io.h
402
403
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
404
void vhost_svq_stop(VhostShadowVirtqueue *svq)
405
{
406
event_notifier_set_handler(&svq->svq_kick, NULL);
407
+ g_autofree VirtQueueElement *next_avail_elem = NULL;
408
+
409
+ if (!svq->vq) {
410
+ return;
411
+ }
412
+
413
+ /* Send all pending used descriptors to guest */
414
+ vhost_svq_flush(svq, false);
415
+
416
+ for (unsigned i = 0; i < svq->vring.num; ++i) {
417
+ g_autofree VirtQueueElement *elem = NULL;
418
+ elem = g_steal_pointer(&svq->ring_id_maps[i]);
419
+ if (elem) {
420
+ virtqueue_detach_element(svq->vq, elem, 0);
421
+ }
422
+ }
423
+
424
+ next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
425
+ if (next_avail_elem) {
426
+ virtqueue_detach_element(svq->vq, next_avail_elem, 0);
427
+ }
428
+ svq->vq = NULL;
429
+ g_free(svq->ring_id_maps);
430
+ qemu_vfree(svq->vring.desc);
431
+ qemu_vfree(svq->vring.used);
432
}
433
434
/**
435
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
92
index XXXXXXX..XXXXXXX 100644
436
index XXXXXXX..XXXXXXX 100644
93
--- a/chardev/char-io.h
437
--- a/hw/virtio/vhost-shadow-virtqueue.h
94
+++ b/chardev/char-io.h
438
+++ b/hw/virtio/vhost-shadow-virtqueue.h
95
@@ -XXX,XX +XXX,XX @@ guint io_add_watch_poll(Chardev *chr,
439
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
96
gpointer user_data,
440
97
GMainContext *context);
441
/* Guest's call notifier, where the SVQ calls guest. */
98
442
EventNotifier svq_call;
99
-void remove_fd_in_watch(Chardev *chr);
443
+
100
+void remove_fd_in_watch(Chardev *chr, GMainContext *context);
444
+ /* Virtio queue shadowing */
101
445
+ VirtQueue *vq;
102
int io_channel_send(QIOChannel *ioc, const void *buf, size_t len);
446
+
103
447
+ /* Virtio device */
104
diff --git a/chardev/char-pty.c b/chardev/char-pty.c
448
+ VirtIODevice *vdev;
449
+
450
+ /* Map for use the guest's descriptors */
451
+ VirtQueueElement **ring_id_maps;
452
+
453
+ /* Next VirtQueue element that guest made available */
454
+ VirtQueueElement *next_guest_avail_elem;
455
+
456
+ /* Next head to expose to the device */
457
+ uint16_t shadow_avail_idx;
458
+
459
+ /* Next free descriptor */
460
+ uint16_t free_head;
461
+
462
+ /* Last seen used idx */
463
+ uint16_t shadow_used_idx;
464
+
465
+ /* Next head to consume from the device */
466
+ uint16_t last_used_idx;
467
} VhostShadowVirtqueue;
468
469
bool vhost_svq_valid_features(uint64_t features, Error **errp);
470
@@ -XXX,XX +XXX,XX @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
471
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
472
size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
473
474
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
475
+ VirtQueue *vq);
476
void vhost_svq_stop(VhostShadowVirtqueue *svq);
477
478
VhostShadowVirtqueue *vhost_svq_new(void);
479
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
105
index XXXXXXX..XXXXXXX 100644
480
index XXXXXXX..XXXXXXX 100644
106
--- a/chardev/char-pty.c
481
--- a/hw/virtio/vhost-vdpa.c
107
+++ b/chardev/char-pty.c
482
+++ b/hw/virtio/vhost-vdpa.c
108
@@ -XXX,XX +XXX,XX @@ static void pty_chr_state(Chardev *chr, int connected)
483
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
109
g_source_remove(s->open_tag);
484
* Note that this function does not rewind kick file descriptor if cannot set
110
s->open_tag = 0;
485
* call one.
486
*/
487
-static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
488
- VhostShadowVirtqueue *svq, unsigned idx,
489
- Error **errp)
490
+static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
491
+ VhostShadowVirtqueue *svq, unsigned idx,
492
+ Error **errp)
493
{
494
struct vhost_vring_file file = {
495
.index = dev->vq_index + idx,
496
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
497
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
498
if (unlikely(r != 0)) {
499
error_setg_errno(errp, -r, "Can't set device kick fd");
500
- return false;
501
+ return r;
502
}
503
504
event_notifier = &svq->hdev_call;
505
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
506
error_setg_errno(errp, -r, "Can't set device call fd");
507
}
508
509
+ return r;
510
+}
511
+
512
+/**
513
+ * Unmap a SVQ area in the device
514
+ */
515
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
516
+ hwaddr size)
517
+{
518
+ int r;
519
+
520
+ size = ROUND_UP(size, qemu_real_host_page_size);
521
+ r = vhost_vdpa_dma_unmap(v, iova, size);
522
+ return r == 0;
523
+}
524
+
525
+static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
526
+ const VhostShadowVirtqueue *svq)
527
+{
528
+ struct vhost_vdpa *v = dev->opaque;
529
+ struct vhost_vring_addr svq_addr;
530
+ size_t device_size = vhost_svq_device_area_size(svq);
531
+ size_t driver_size = vhost_svq_driver_area_size(svq);
532
+ bool ok;
533
+
534
+ vhost_svq_get_vring_addr(svq, &svq_addr);
535
+
536
+ ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
537
+ if (unlikely(!ok)) {
538
+ return false;
539
+ }
540
+
541
+ return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
542
+}
543
+
544
+/**
545
+ * Map the shadow virtqueue rings in the device
546
+ *
547
+ * @dev: The vhost device
548
+ * @svq: The shadow virtqueue
549
+ * @addr: Assigned IOVA addresses
550
+ * @errp: Error pointer
551
+ */
552
+static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
553
+ const VhostShadowVirtqueue *svq,
554
+ struct vhost_vring_addr *addr,
555
+ Error **errp)
556
+{
557
+ struct vhost_vdpa *v = dev->opaque;
558
+ size_t device_size = vhost_svq_device_area_size(svq);
559
+ size_t driver_size = vhost_svq_driver_area_size(svq);
560
+ int r;
561
+
562
+ ERRP_GUARD();
563
+ vhost_svq_get_vring_addr(svq, addr);
564
+
565
+ r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
566
+ (void *)(uintptr_t)addr->desc_user_addr, true);
567
+ if (unlikely(r != 0)) {
568
+ error_setg_errno(errp, -r, "Cannot create vq driver region: ");
569
+ return false;
570
+ }
571
+
572
+ r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
573
+ (void *)(intptr_t)addr->used_user_addr, false);
574
+ if (unlikely(r != 0)) {
575
+ error_setg_errno(errp, -r, "Cannot create vq device region: ");
576
+ }
577
+
578
+ return r == 0;
579
+}
580
+
581
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
582
+ VhostShadowVirtqueue *svq, unsigned idx,
583
+ Error **errp)
584
+{
585
+ uint16_t vq_index = dev->vq_index + idx;
586
+ struct vhost_vring_state s = {
587
+ .index = vq_index,
588
+ };
589
+ int r;
590
+
591
+ r = vhost_vdpa_set_dev_vring_base(dev, &s);
592
+ if (unlikely(r)) {
593
+ error_setg_errno(errp, -r, "Cannot set vring base");
594
+ return false;
595
+ }
596
+
597
+ r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp);
598
return r == 0;
599
}
600
601
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
602
}
603
604
for (i = 0; i < v->shadow_vqs->len; ++i) {
605
+ VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
606
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
607
+ struct vhost_vring_addr addr = {
608
+ .index = i,
609
+ };
610
+ int r;
611
bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
612
if (unlikely(!ok)) {
613
- error_reportf_err(err, "Cannot setup SVQ %u: ", i);
614
+ goto err;
615
+ }
616
+
617
+ vhost_svq_start(svq, dev->vdev, vq);
618
+ ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err);
619
+ if (unlikely(!ok)) {
620
+ goto err_map;
621
+ }
622
+
623
+ /* Override vring GPA set by vhost subsystem */
624
+ r = vhost_vdpa_set_vring_dev_addr(dev, &addr);
625
+ if (unlikely(r != 0)) {
626
+ error_setg_errno(&err, -r, "Cannot set device address");
627
+ goto err_set_addr;
628
+ }
629
+ }
630
+
631
+ return true;
632
+
633
+err_set_addr:
634
+ vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i));
635
+
636
+err_map:
637
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
638
+
639
+err:
640
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
641
+ for (unsigned j = 0; j < i; ++j) {
642
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
643
+ vhost_vdpa_svq_unmap_rings(dev, svq);
644
+ vhost_svq_stop(svq);
645
+ }
646
+
647
+ return false;
648
+}
649
+
650
+static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
651
+{
652
+ struct vhost_vdpa *v = dev->opaque;
653
+
654
+ if (!v->shadow_vqs) {
655
+ return true;
656
+ }
657
+
658
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
659
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
660
+ bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
661
+ if (unlikely(!ok)) {
662
return false;
111
}
663
}
112
- remove_fd_in_watch(chr);
113
+ remove_fd_in_watch(chr, NULL);
114
s->connected = 0;
115
/* (re-)connect poll interval for idle guests: once per second.
116
* We check more frequently in case the guests sends data to
117
diff --git a/chardev/char-socket.c b/chardev/char-socket.c
118
index XXXXXXX..XXXXXXX 100644
119
--- a/chardev/char-socket.c
120
+++ b/chardev/char-socket.c
121
@@ -XXX,XX +XXX,XX @@ static void tcp_chr_free_connection(Chardev *chr)
122
}
664
}
123
665
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
124
tcp_set_msgfds(chr, NULL, 0);
666
}
125
- remove_fd_in_watch(chr);
667
vhost_vdpa_set_vring_ready(dev);
126
+ remove_fd_in_watch(chr, NULL);
668
} else {
127
object_unref(OBJECT(s->sioc));
669
+ ok = vhost_vdpa_svqs_stop(dev);
128
s->sioc = NULL;
670
+ if (unlikely(!ok)) {
129
object_unref(OBJECT(s->ioc));
671
+ return -1;
130
@@ -XXX,XX +XXX,XX @@ static void tcp_chr_update_read_handler(Chardev *chr,
672
+ }
131
return;
673
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
132
}
674
}
133
675
134
- remove_fd_in_watch(chr);
135
+ remove_fd_in_watch(chr, NULL);
136
if (s->ioc) {
137
chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
138
tcp_chr_read_poll,
139
diff --git a/chardev/char-udp.c b/chardev/char-udp.c
140
index XXXXXXX..XXXXXXX 100644
141
--- a/chardev/char-udp.c
142
+++ b/chardev/char-udp.c
143
@@ -XXX,XX +XXX,XX @@ static gboolean udp_chr_read(QIOChannel *chan, GIOCondition cond, void *opaque)
144
ret = qio_channel_read(
145
s->ioc, (char *)s->buf, sizeof(s->buf), NULL);
146
if (ret <= 0) {
147
- remove_fd_in_watch(chr);
148
+ remove_fd_in_watch(chr, NULL);
149
return FALSE;
150
}
151
s->bufcnt = ret;
152
@@ -XXX,XX +XXX,XX @@ static void udp_chr_update_read_handler(Chardev *chr,
153
{
154
UdpChardev *s = UDP_CHARDEV(chr);
155
156
- remove_fd_in_watch(chr);
157
+ remove_fd_in_watch(chr, NULL);
158
if (s->ioc) {
159
chr->fd_in_tag = io_add_watch_poll(chr, s->ioc,
160
udp_chr_read_poll,
161
@@ -XXX,XX +XXX,XX @@ static void char_udp_finalize(Object *obj)
162
Chardev *chr = CHARDEV(obj);
163
UdpChardev *s = UDP_CHARDEV(obj);
164
165
- remove_fd_in_watch(chr);
166
+ remove_fd_in_watch(chr, NULL);
167
if (s->ioc) {
168
object_unref(OBJECT(s->ioc));
169
}
170
diff --git a/chardev/char.c b/chardev/char.c
171
index XXXXXXX..XXXXXXX 100644
172
--- a/chardev/char.c
173
+++ b/chardev/char.c
174
@@ -XXX,XX +XXX,XX @@ void qemu_chr_fe_set_handlers(CharBackend *b,
175
cc = CHARDEV_GET_CLASS(s);
176
if (!opaque && !fd_can_read && !fd_read && !fd_event) {
177
fe_open = 0;
178
- remove_fd_in_watch(s);
179
+ remove_fd_in_watch(s, context);
180
} else {
181
fe_open = 1;
182
}
183
--
676
--
184
2.7.4
677
2.7.4
185
678
186
679
diff view generated by jsdifflib
1
From: "Dr. David Alan Gilbert" <dgilbert@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Fairly simple mechanical conversion of all fields.
3
This iova tree function allows it to look for a hole in allocated
4
4
regions and return a totally new translation for a given translated
5
TODO!!!!
5
address.
6
The problem is vmxnet3-ring size/cell_size/next are declared as size_t
6
7
but written as 32bit.
7
It's usage is mainly to allow devices to access qemu address space,
8
8
remapping guest's one into a new iova space where qemu can add chunks of
9
Signed-off-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
9
addresses.
10
Acked-by: Dmitry Fleytman <dmitry@daynix.com>
10
11
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
12
Reviewed-by: Peter Xu <peterx@redhat.com>
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
15
---
13
hw/net/vmxnet3.c | 272 ++++++++++++++++++-------------------------------------
16
include/qemu/iova-tree.h | 18 +++++++
14
1 file changed, 90 insertions(+), 182 deletions(-)
17
util/iova-tree.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++
15
18
2 files changed, 154 insertions(+)
16
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
19
20
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
17
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/net/vmxnet3.c
22
--- a/include/qemu/iova-tree.h
19
+++ b/hw/net/vmxnet3.c
23
+++ b/include/qemu/iova-tree.h
20
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmxstate_vmxnet3_mcast_list = {
24
@@ -XXX,XX +XXX,XX @@
21
}
25
#define IOVA_OK (0)
26
#define IOVA_ERR_INVALID (-1) /* Invalid parameters */
27
#define IOVA_ERR_OVERLAP (-2) /* IOVA range overlapped */
28
+#define IOVA_ERR_NOMEM (-3) /* Cannot allocate */
29
30
typedef struct IOVATree IOVATree;
31
typedef struct DMAMap {
32
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova);
33
void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator);
34
35
/**
36
+ * iova_tree_alloc_map:
37
+ *
38
+ * @tree: the iova tree to allocate from
39
+ * @map: the new map (as translated addr & size) to allocate in the iova region
40
+ * @iova_begin: the minimum address of the allocation
41
+ * @iova_end: the maximum addressable direction of the allocation
42
+ *
43
+ * Allocates a new region of a given size, between iova_min and iova_max.
44
+ *
45
+ * Return: Same as iova_tree_insert, but cannot overlap and can return error if
46
+ * iova tree is out of free contiguous range. The caller gets the assigned iova
47
+ * in map->iova.
48
+ */
49
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
50
+ hwaddr iova_end);
51
+
52
+/**
53
* iova_tree_destroy:
54
*
55
* @tree: the iova tree to destroy
56
diff --git a/util/iova-tree.c b/util/iova-tree.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/util/iova-tree.c
59
+++ b/util/iova-tree.c
60
@@ -XXX,XX +XXX,XX @@ struct IOVATree {
61
GTree *tree;
22
};
62
};
23
63
24
-static void vmxnet3_get_ring_from_file(QEMUFile *f, Vmxnet3Ring *r)
64
+/* Args to pass to iova_tree_alloc foreach function. */
25
-{
65
+struct IOVATreeAllocArgs {
26
- r->pa = qemu_get_be64(f);
66
+ /* Size of the desired allocation */
27
- r->size = qemu_get_be32(f);
67
+ size_t new_size;
28
- r->cell_size = qemu_get_be32(f);
68
+
29
- r->next = qemu_get_be32(f);
69
+ /* The minimum address allowed in the allocation */
30
- r->gen = qemu_get_byte(f);
70
+ hwaddr iova_begin;
31
-}
71
+
32
-
72
+ /* Map at the left of the hole, can be NULL if "this" is first one */
33
-static void vmxnet3_put_ring_to_file(QEMUFile *f, Vmxnet3Ring *r)
73
+ const DMAMap *prev;
34
-{
74
+
35
- qemu_put_be64(f, r->pa);
75
+ /* Map at the right of the hole, can be NULL if "prev" is the last one */
36
- qemu_put_be32(f, r->size);
76
+ const DMAMap *this;
37
- qemu_put_be32(f, r->cell_size);
77
+
38
- qemu_put_be32(f, r->next);
78
+ /* If found, we fill in the IOVA here */
39
- qemu_put_byte(f, r->gen);
79
+ hwaddr iova_result;
40
-}
80
+
41
-
81
+ /* Whether have we found a valid IOVA */
42
-static void vmxnet3_get_tx_stats_from_file(QEMUFile *f,
82
+ bool iova_found;
43
- struct UPT1_TxStats *tx_stat)
44
-{
45
- tx_stat->TSOPktsTxOK = qemu_get_be64(f);
46
- tx_stat->TSOBytesTxOK = qemu_get_be64(f);
47
- tx_stat->ucastPktsTxOK = qemu_get_be64(f);
48
- tx_stat->ucastBytesTxOK = qemu_get_be64(f);
49
- tx_stat->mcastPktsTxOK = qemu_get_be64(f);
50
- tx_stat->mcastBytesTxOK = qemu_get_be64(f);
51
- tx_stat->bcastPktsTxOK = qemu_get_be64(f);
52
- tx_stat->bcastBytesTxOK = qemu_get_be64(f);
53
- tx_stat->pktsTxError = qemu_get_be64(f);
54
- tx_stat->pktsTxDiscard = qemu_get_be64(f);
55
-}
56
-
57
-static void vmxnet3_put_tx_stats_to_file(QEMUFile *f,
58
- struct UPT1_TxStats *tx_stat)
59
-{
60
- qemu_put_be64(f, tx_stat->TSOPktsTxOK);
61
- qemu_put_be64(f, tx_stat->TSOBytesTxOK);
62
- qemu_put_be64(f, tx_stat->ucastPktsTxOK);
63
- qemu_put_be64(f, tx_stat->ucastBytesTxOK);
64
- qemu_put_be64(f, tx_stat->mcastPktsTxOK);
65
- qemu_put_be64(f, tx_stat->mcastBytesTxOK);
66
- qemu_put_be64(f, tx_stat->bcastPktsTxOK);
67
- qemu_put_be64(f, tx_stat->bcastBytesTxOK);
68
- qemu_put_be64(f, tx_stat->pktsTxError);
69
- qemu_put_be64(f, tx_stat->pktsTxDiscard);
70
-}
71
-
72
-static int vmxnet3_get_txq_descr(QEMUFile *f, void *pv, size_t size,
73
- VMStateField *field)
74
-{
75
- Vmxnet3TxqDescr *r = pv;
76
-
77
- vmxnet3_get_ring_from_file(f, &r->tx_ring);
78
- vmxnet3_get_ring_from_file(f, &r->comp_ring);
79
- r->intr_idx = qemu_get_byte(f);
80
- r->tx_stats_pa = qemu_get_be64(f);
81
-
82
- vmxnet3_get_tx_stats_from_file(f, &r->txq_stats);
83
-
84
- return 0;
85
-}
86
-
87
-static int vmxnet3_put_txq_descr(QEMUFile *f, void *pv, size_t size,
88
- VMStateField *field, QJSON *vmdesc)
89
-{
90
- Vmxnet3TxqDescr *r = pv;
91
-
92
- vmxnet3_put_ring_to_file(f, &r->tx_ring);
93
- vmxnet3_put_ring_to_file(f, &r->comp_ring);
94
- qemu_put_byte(f, r->intr_idx);
95
- qemu_put_be64(f, r->tx_stats_pa);
96
- vmxnet3_put_tx_stats_to_file(f, &r->txq_stats);
97
-
98
- return 0;
99
-}
100
-
101
-static const VMStateInfo txq_descr_info = {
102
- .name = "txq_descr",
103
- .get = vmxnet3_get_txq_descr,
104
- .put = vmxnet3_put_txq_descr
105
+static const VMStateDescription vmstate_vmxnet3_ring = {
106
+ .name = "vmxnet3-ring",
107
+ .version_id = 0,
108
+ .fields = (VMStateField[]) {
109
+ VMSTATE_UINT64(pa, Vmxnet3Ring),
110
+ VMSTATE_UINT32(size, Vmxnet3Ring),
111
+ VMSTATE_UINT32(cell_size, Vmxnet3Ring),
112
+ VMSTATE_UINT32(next, Vmxnet3Ring),
113
+ VMSTATE_UINT8(gen, Vmxnet3Ring),
114
+ VMSTATE_END_OF_LIST()
115
+ }
116
};
117
118
-static void vmxnet3_get_rx_stats_from_file(QEMUFile *f,
119
- struct UPT1_RxStats *rx_stat)
120
-{
121
- rx_stat->LROPktsRxOK = qemu_get_be64(f);
122
- rx_stat->LROBytesRxOK = qemu_get_be64(f);
123
- rx_stat->ucastPktsRxOK = qemu_get_be64(f);
124
- rx_stat->ucastBytesRxOK = qemu_get_be64(f);
125
- rx_stat->mcastPktsRxOK = qemu_get_be64(f);
126
- rx_stat->mcastBytesRxOK = qemu_get_be64(f);
127
- rx_stat->bcastPktsRxOK = qemu_get_be64(f);
128
- rx_stat->bcastBytesRxOK = qemu_get_be64(f);
129
- rx_stat->pktsRxOutOfBuf = qemu_get_be64(f);
130
- rx_stat->pktsRxError = qemu_get_be64(f);
131
-}
132
-
133
-static void vmxnet3_put_rx_stats_to_file(QEMUFile *f,
134
- struct UPT1_RxStats *rx_stat)
135
-{
136
- qemu_put_be64(f, rx_stat->LROPktsRxOK);
137
- qemu_put_be64(f, rx_stat->LROBytesRxOK);
138
- qemu_put_be64(f, rx_stat->ucastPktsRxOK);
139
- qemu_put_be64(f, rx_stat->ucastBytesRxOK);
140
- qemu_put_be64(f, rx_stat->mcastPktsRxOK);
141
- qemu_put_be64(f, rx_stat->mcastBytesRxOK);
142
- qemu_put_be64(f, rx_stat->bcastPktsRxOK);
143
- qemu_put_be64(f, rx_stat->bcastBytesRxOK);
144
- qemu_put_be64(f, rx_stat->pktsRxOutOfBuf);
145
- qemu_put_be64(f, rx_stat->pktsRxError);
146
-}
147
-
148
-static int vmxnet3_get_rxq_descr(QEMUFile *f, void *pv, size_t size,
149
- VMStateField *field)
150
-{
151
- Vmxnet3RxqDescr *r = pv;
152
- int i;
153
-
154
- for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
155
- vmxnet3_get_ring_from_file(f, &r->rx_ring[i]);
156
+static const VMStateDescription vmstate_vmxnet3_tx_stats = {
157
+ .name = "vmxnet3-tx-stats",
158
+ .version_id = 0,
159
+ .fields = (VMStateField[]) {
160
+ VMSTATE_UINT64(TSOPktsTxOK, struct UPT1_TxStats),
161
+ VMSTATE_UINT64(TSOBytesTxOK, struct UPT1_TxStats),
162
+ VMSTATE_UINT64(ucastPktsTxOK, struct UPT1_TxStats),
163
+ VMSTATE_UINT64(ucastBytesTxOK, struct UPT1_TxStats),
164
+ VMSTATE_UINT64(mcastPktsTxOK, struct UPT1_TxStats),
165
+ VMSTATE_UINT64(mcastBytesTxOK, struct UPT1_TxStats),
166
+ VMSTATE_UINT64(bcastPktsTxOK, struct UPT1_TxStats),
167
+ VMSTATE_UINT64(bcastBytesTxOK, struct UPT1_TxStats),
168
+ VMSTATE_UINT64(pktsTxError, struct UPT1_TxStats),
169
+ VMSTATE_UINT64(pktsTxDiscard, struct UPT1_TxStats),
170
+ VMSTATE_END_OF_LIST()
171
}
172
+};
83
+};
173
84
+
174
- vmxnet3_get_ring_from_file(f, &r->comp_ring);
85
+/**
175
- r->intr_idx = qemu_get_byte(f);
86
+ * Iterate args to the next hole
176
- r->rx_stats_pa = qemu_get_be64(f);
87
+ *
177
-
88
+ * @args: The alloc arguments
178
- vmxnet3_get_rx_stats_from_file(f, &r->rxq_stats);
89
+ * @next: The next mapping in the tree. Can be NULL to signal the last one
179
-
90
+ */
180
- return 0;
91
+static void iova_tree_alloc_args_iterate(struct IOVATreeAllocArgs *args,
181
-}
92
+ const DMAMap *next)
182
-
93
+{
183
-static int vmxnet3_put_rxq_descr(QEMUFile *f, void *pv, size_t size,
94
+ args->prev = args->this;
184
- VMStateField *field, QJSON *vmdesc)
95
+ args->this = next;
185
-{
96
+}
186
- Vmxnet3RxqDescr *r = pv;
97
+
187
- int i;
98
static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
188
-
189
- for (i = 0; i < VMXNET3_RX_RINGS_PER_QUEUE; i++) {
190
- vmxnet3_put_ring_to_file(f, &r->rx_ring[i]);
191
+static const VMStateDescription vmstate_vmxnet3_txq_descr = {
192
+ .name = "vmxnet3-txq-descr",
193
+ .version_id = 0,
194
+ .fields = (VMStateField[]) {
195
+ VMSTATE_STRUCT(tx_ring, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_ring,
196
+ Vmxnet3Ring),
197
+ VMSTATE_STRUCT(comp_ring, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_ring,
198
+ Vmxnet3Ring),
199
+ VMSTATE_UINT8(intr_idx, Vmxnet3TxqDescr),
200
+ VMSTATE_UINT64(tx_stats_pa, Vmxnet3TxqDescr),
201
+ VMSTATE_STRUCT(txq_stats, Vmxnet3TxqDescr, 0, vmstate_vmxnet3_tx_stats,
202
+ struct UPT1_TxStats),
203
+ VMSTATE_END_OF_LIST()
204
}
205
+};
206
207
- vmxnet3_put_ring_to_file(f, &r->comp_ring);
208
- qemu_put_byte(f, r->intr_idx);
209
- qemu_put_be64(f, r->rx_stats_pa);
210
- vmxnet3_put_rx_stats_to_file(f, &r->rxq_stats);
211
+static const VMStateDescription vmstate_vmxnet3_rx_stats = {
212
+ .name = "vmxnet3-rx-stats",
213
+ .version_id = 0,
214
+ .fields = (VMStateField[]) {
215
+ VMSTATE_UINT64(LROPktsRxOK, struct UPT1_RxStats),
216
+ VMSTATE_UINT64(LROBytesRxOK, struct UPT1_RxStats),
217
+ VMSTATE_UINT64(ucastPktsRxOK, struct UPT1_RxStats),
218
+ VMSTATE_UINT64(ucastBytesRxOK, struct UPT1_RxStats),
219
+ VMSTATE_UINT64(mcastPktsRxOK, struct UPT1_RxStats),
220
+ VMSTATE_UINT64(mcastBytesRxOK, struct UPT1_RxStats),
221
+ VMSTATE_UINT64(bcastPktsRxOK, struct UPT1_RxStats),
222
+ VMSTATE_UINT64(bcastBytesRxOK, struct UPT1_RxStats),
223
+ VMSTATE_UINT64(pktsRxOutOfBuf, struct UPT1_RxStats),
224
+ VMSTATE_UINT64(pktsRxError, struct UPT1_RxStats),
225
+ VMSTATE_END_OF_LIST()
226
+ }
227
+};
228
229
- return 0;
230
-}
231
+static const VMStateDescription vmstate_vmxnet3_rxq_descr = {
232
+ .name = "vmxnet3-rxq-descr",
233
+ .version_id = 0,
234
+ .fields = (VMStateField[]) {
235
+ VMSTATE_STRUCT_ARRAY(rx_ring, Vmxnet3RxqDescr,
236
+ VMXNET3_RX_RINGS_PER_QUEUE, 0,
237
+ vmstate_vmxnet3_ring, Vmxnet3Ring),
238
+ VMSTATE_STRUCT(comp_ring, Vmxnet3RxqDescr, 0, vmstate_vmxnet3_ring,
239
+ Vmxnet3Ring),
240
+ VMSTATE_UINT8(intr_idx, Vmxnet3RxqDescr),
241
+ VMSTATE_UINT64(rx_stats_pa, Vmxnet3RxqDescr),
242
+ VMSTATE_STRUCT(rxq_stats, Vmxnet3RxqDescr, 0, vmstate_vmxnet3_rx_stats,
243
+ struct UPT1_RxStats),
244
+ VMSTATE_END_OF_LIST()
245
+ }
246
+};
247
248
static int vmxnet3_post_load(void *opaque, int version_id)
249
{
99
{
250
@@ -XXX,XX +XXX,XX @@ static int vmxnet3_post_load(void *opaque, int version_id)
100
const DMAMap *m1 = a, *m2 = b;
251
return 0;
101
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map)
102
return IOVA_OK;
252
}
103
}
253
104
254
-static const VMStateInfo rxq_descr_info = {
105
+/**
255
- .name = "rxq_descr",
106
+ * Try to find an unallocated IOVA range between prev and this elements.
256
- .get = vmxnet3_get_rxq_descr,
107
+ *
257
- .put = vmxnet3_put_rxq_descr
108
+ * @args: Arguments to allocation
258
-};
109
+ *
259
-
110
+ * Cases:
260
-static int vmxnet3_get_int_state(QEMUFile *f, void *pv, size_t size,
111
+ *
261
- VMStateField *field)
112
+ * (1) !prev, !this: No entries allocated, always succeed
262
-{
113
+ *
263
- Vmxnet3IntState *r = pv;
114
+ * (2) !prev, this: We're iterating at the 1st element.
264
-
115
+ *
265
- r->is_masked = qemu_get_byte(f);
116
+ * (3) prev, !this: We're iterating at the last element.
266
- r->is_pending = qemu_get_byte(f);
117
+ *
267
- r->is_asserted = qemu_get_byte(f);
118
+ * (4) prev, this: this is the most common case, we'll try to find a hole
268
-
119
+ * between "prev" and "this" mapping.
269
- return 0;
120
+ *
270
-}
121
+ * Note that this function assumes the last valid iova is HWADDR_MAX, but it
271
-
122
+ * searches linearly so it's easy to discard the result if it's not the case.
272
-static int vmxnet3_put_int_state(QEMUFile *f, void *pv, size_t size,
123
+ */
273
- VMStateField *field, QJSON *vmdesc)
124
+static void iova_tree_alloc_map_in_hole(struct IOVATreeAllocArgs *args)
274
-{
125
+{
275
- Vmxnet3IntState *r = pv;
126
+ const DMAMap *prev = args->prev, *this = args->this;
276
-
127
+ uint64_t hole_start, hole_last;
277
- qemu_put_byte(f, r->is_masked);
128
+
278
- qemu_put_byte(f, r->is_pending);
129
+ if (this && this->iova + this->size < args->iova_begin) {
279
- qemu_put_byte(f, r->is_asserted);
130
+ return;
280
-
131
+ }
281
- return 0;
132
+
282
-}
133
+ hole_start = MAX(prev ? prev->iova + prev->size + 1 : 0, args->iova_begin);
283
-
134
+ hole_last = this ? this->iova : HWADDR_MAX;
284
-static const VMStateInfo int_state_info = {
135
+
285
- .name = "int_state",
136
+ if (hole_last - hole_start > args->new_size) {
286
- .get = vmxnet3_get_int_state,
137
+ args->iova_result = hole_start;
287
- .put = vmxnet3_put_int_state
138
+ args->iova_found = true;
288
+static const VMStateDescription vmstate_vmxnet3_int_state = {
139
+ }
289
+ .name = "vmxnet3-int-state",
140
+}
290
+ .version_id = 0,
141
+
291
+ .fields = (VMStateField[]) {
142
+/**
292
+ VMSTATE_BOOL(is_masked, Vmxnet3IntState),
143
+ * Foreach dma node in the tree, compare if there is a hole with its previous
293
+ VMSTATE_BOOL(is_pending, Vmxnet3IntState),
144
+ * node (or minimum iova address allowed) and the node.
294
+ VMSTATE_BOOL(is_asserted, Vmxnet3IntState),
145
+ *
295
+ VMSTATE_END_OF_LIST()
146
+ * @key: Node iterating
296
+ }
147
+ * @value: Node iterating
297
};
148
+ * @pargs: Struct to communicate with the outside world
298
149
+ *
299
static bool vmxnet3_vmstate_need_pcie_device(void *opaque)
150
+ * Return: false to keep iterating, true if needs break.
300
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_vmxnet3 = {
151
+ */
301
VMSTATE_UINT64(drv_shmem, VMXNET3State),
152
+static gboolean iova_tree_alloc_traverse(gpointer key, gpointer value,
302
VMSTATE_UINT64(temp_shared_guest_driver_memory, VMXNET3State),
153
+ gpointer pargs)
303
154
+{
304
- VMSTATE_ARRAY(txq_descr, VMXNET3State,
155
+ struct IOVATreeAllocArgs *args = pargs;
305
- VMXNET3_DEVICE_MAX_TX_QUEUES, 0, txq_descr_info,
156
+ DMAMap *node = value;
306
+ VMSTATE_STRUCT_ARRAY(txq_descr, VMXNET3State,
157
+
307
+ VMXNET3_DEVICE_MAX_TX_QUEUES, 0, vmstate_vmxnet3_txq_descr,
158
+ assert(key == value);
308
Vmxnet3TxqDescr),
159
+
309
- VMSTATE_ARRAY(rxq_descr, VMXNET3State,
160
+ iova_tree_alloc_args_iterate(args, node);
310
- VMXNET3_DEVICE_MAX_RX_QUEUES, 0, rxq_descr_info,
161
+ iova_tree_alloc_map_in_hole(args);
311
+ VMSTATE_STRUCT_ARRAY(rxq_descr, VMXNET3State,
162
+ return args->iova_found;
312
+ VMXNET3_DEVICE_MAX_RX_QUEUES, 0, vmstate_vmxnet3_rxq_descr,
163
+}
313
Vmxnet3RxqDescr),
164
+
314
- VMSTATE_ARRAY(interrupt_states, VMXNET3State, VMXNET3_MAX_INTRS,
165
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
315
- 0, int_state_info, Vmxnet3IntState),
166
+ hwaddr iova_last)
316
+ VMSTATE_STRUCT_ARRAY(interrupt_states, VMXNET3State,
167
+{
317
+ VMXNET3_MAX_INTRS, 0, vmstate_vmxnet3_int_state,
168
+ struct IOVATreeAllocArgs args = {
318
+ Vmxnet3IntState),
169
+ .new_size = map->size,
319
170
+ .iova_begin = iova_begin,
320
VMSTATE_END_OF_LIST()
171
+ };
321
},
172
+
173
+ if (unlikely(iova_last < iova_begin)) {
174
+ return IOVA_ERR_INVALID;
175
+ }
176
+
177
+ /*
178
+ * Find a valid hole for the mapping
179
+ *
180
+ * Assuming low iova_begin, so no need to do a binary search to
181
+ * locate the first node.
182
+ *
183
+ * TODO: Replace all this with g_tree_node_first/next/last when available
184
+ * (from glib since 2.68). To do it with g_tree_foreach complicates the
185
+ * code a lot.
186
+ *
187
+ */
188
+ g_tree_foreach(tree->tree, iova_tree_alloc_traverse, &args);
189
+ if (!args.iova_found) {
190
+ /*
191
+ * Either tree is empty or the last hole is still not checked.
192
+ * g_tree_foreach does not compare (last, iova_last] range, so we check
193
+ * it here.
194
+ */
195
+ iova_tree_alloc_args_iterate(&args, NULL);
196
+ iova_tree_alloc_map_in_hole(&args);
197
+ }
198
+
199
+ if (!args.iova_found || args.iova_result + map->size > iova_last) {
200
+ return IOVA_ERR_NOMEM;
201
+ }
202
+
203
+ map->iova = args.iova_result;
204
+ return iova_tree_insert(tree, map);
205
+}
206
+
207
void iova_tree_destroy(IOVATree *tree)
208
{
209
g_tree_destroy(tree->tree);
322
--
210
--
323
2.7.4
211
2.7.4
324
212
325
213
diff view generated by jsdifflib
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The 'primary_list' and 'secondary_list' members of struct Connection
3
This function does the reverse operation of iova_tree_find: To look for
4
is not allocated through dynamically g_queue_new(), but we free it by using
4
a mapping that match a translated address so we can do the reverse.
5
g_queue_free(), which will lead to a double-free bug.
6
5
7
Reviewed-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
This have linear complexity instead of logarithmic, but it supports
8
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
7
overlapping HVA. Future developments could reduce it.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
12
---
11
net/colo.c | 4 ++--
13
include/qemu/iova-tree.h | 20 +++++++++++++++++++-
12
1 file changed, 2 insertions(+), 2 deletions(-)
14
util/iova-tree.c | 34 ++++++++++++++++++++++++++++++++++
15
2 files changed, 53 insertions(+), 1 deletion(-)
13
16
14
diff --git a/net/colo.c b/net/colo.c
17
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/net/colo.c
19
--- a/include/qemu/iova-tree.h
17
+++ b/net/colo.c
20
+++ b/include/qemu/iova-tree.h
18
@@ -XXX,XX +XXX,XX @@ void connection_destroy(void *opaque)
21
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
19
Connection *conn = opaque;
22
* @tree: the iova tree to search from
20
23
* @map: the mapping to search
21
g_queue_foreach(&conn->primary_list, packet_destroy, NULL);
24
*
22
- g_queue_free(&conn->primary_list);
25
- * Search for a mapping in the iova tree that overlaps with the
23
+ g_queue_clear(&conn->primary_list);
26
+ * Search for a mapping in the iova tree that iova overlaps with the
24
g_queue_foreach(&conn->secondary_list, packet_destroy, NULL);
27
* mapping range specified. Only the first found mapping will be
25
- g_queue_free(&conn->secondary_list);
28
* returned.
26
+ g_queue_clear(&conn->secondary_list);
29
*
27
g_slice_free(Connection, conn);
30
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
31
const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map);
32
33
/**
34
+ * iova_tree_find_iova:
35
+ *
36
+ * @tree: the iova tree to search from
37
+ * @map: the mapping to search
38
+ *
39
+ * Search for a mapping in the iova tree that translated_addr overlaps with the
40
+ * mapping range specified. Only the first found mapping will be
41
+ * returned.
42
+ *
43
+ * Return: DMAMap pointer if found, or NULL if not found. Note that
44
+ * the returned DMAMap pointer is maintained internally. User should
45
+ * only read the content but never modify or free the content. Also,
46
+ * user is responsible to make sure the pointer is valid (say, no
47
+ * concurrent deletion in progress).
48
+ */
49
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map);
50
+
51
+/**
52
* iova_tree_find_address:
53
*
54
* @tree: the iova tree to search from
55
diff --git a/util/iova-tree.c b/util/iova-tree.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/util/iova-tree.c
58
+++ b/util/iova-tree.c
59
@@ -XXX,XX +XXX,XX @@ struct IOVATreeAllocArgs {
60
bool iova_found;
61
};
62
63
+typedef struct IOVATreeFindIOVAArgs {
64
+ const DMAMap *needle;
65
+ const DMAMap *result;
66
+} IOVATreeFindIOVAArgs;
67
+
68
/**
69
* Iterate args to the next hole
70
*
71
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map)
72
return g_tree_lookup(tree->tree, map);
28
}
73
}
29
74
75
+static gboolean iova_tree_find_address_iterator(gpointer key, gpointer value,
76
+ gpointer data)
77
+{
78
+ const DMAMap *map = key;
79
+ IOVATreeFindIOVAArgs *args = data;
80
+ const DMAMap *needle;
81
+
82
+ g_assert(key == value);
83
+
84
+ needle = args->needle;
85
+ if (map->translated_addr + map->size < needle->translated_addr ||
86
+ needle->translated_addr + needle->size < map->translated_addr) {
87
+ return false;
88
+ }
89
+
90
+ args->result = map;
91
+ return true;
92
+}
93
+
94
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map)
95
+{
96
+ IOVATreeFindIOVAArgs args = {
97
+ .needle = map,
98
+ };
99
+
100
+ g_tree_foreach(tree->tree, iova_tree_find_address_iterator, &args);
101
+ return args.result;
102
+}
103
+
104
const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova)
105
{
106
const DMAMap map = { .iova = iova, .size = 0 };
30
--
107
--
31
2.7.4
108
2.7.4
32
109
33
110
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Rename colo_packet_compare() to colo_packet_compare_common() that
3
This tree is able to look for a translated address from an IOVA address.
4
make tcp_compare udp_compare icmp_compare reuse this function.
5
Remove minimum packet size check in icmp_compare, because we have
6
check this in parse_packet_early().
7
4
8
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
5
At first glance it is similar to util/iova-tree. However, SVQ working on
6
devices with limited IOVA space need more capabilities, like allocating
7
IOVA chunks or performing reverse translations (qemu addresses to iova).
8
9
The allocation capability, as "assign a free IOVA address to this chunk
10
of memory in qemu's address space" allows shadow virtqueue to create a
11
new address space that is not restricted by guest's addressable one, so
12
we can allocate shadow vqs vrings outside of it.
13
14
It duplicates the tree so it can search efficiently in both directions,
15
and it will signal overlap if iova or the translated address is present
16
in any tree.
17
18
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
19
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
20
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
21
---
11
net/colo-compare.c | 18 +++++++-----------
22
hw/virtio/meson.build | 2 +-
12
1 file changed, 7 insertions(+), 11 deletions(-)
23
hw/virtio/vhost-iova-tree.c | 110 ++++++++++++++++++++++++++++++++++++++++++++
24
hw/virtio/vhost-iova-tree.h | 27 +++++++++++
25
3 files changed, 138 insertions(+), 1 deletion(-)
26
create mode 100644 hw/virtio/vhost-iova-tree.c
27
create mode 100644 hw/virtio/vhost-iova-tree.h
13
28
14
diff --git a/net/colo-compare.c b/net/colo-compare.c
29
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
15
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
16
--- a/net/colo-compare.c
31
--- a/hw/virtio/meson.build
17
+++ b/net/colo-compare.c
32
+++ b/hw/virtio/meson.build
18
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode)
33
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
19
* return: 0 means packet same
34
20
* > 0 || < 0 means packet different
35
virtio_ss = ss.source_set()
21
*/
36
virtio_ss.add(files('virtio.c'))
22
-static int colo_packet_compare(Packet *ppkt, Packet *spkt)
37
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
23
+static int colo_packet_compare_common(Packet *ppkt, Packet *spkt)
38
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c'))
24
{
39
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
25
trace_colo_compare_ip_info(ppkt->size, inet_ntoa(ppkt->ip->ip_src),
40
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
26
inet_ntoa(ppkt->ip->ip_dst), spkt->size,
41
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
27
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare(Packet *ppkt, Packet *spkt)
42
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
28
if (ppkt->size == spkt->size) {
43
new file mode 100644
29
return memcmp(ppkt->data, spkt->data, spkt->size);
44
index XXXXXXX..XXXXXXX
30
} else {
45
--- /dev/null
31
+ trace_colo_compare_main("Net packet size are not the same");
46
+++ b/hw/virtio/vhost-iova-tree.c
32
return -1;
47
@@ -XXX,XX +XXX,XX @@
33
}
48
+/*
34
}
49
+ * vhost software live migration iova tree
35
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
50
+ *
36
int res;
51
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
37
52
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
38
trace_colo_compare_main("compare tcp");
53
+ *
54
+ * SPDX-License-Identifier: GPL-2.0-or-later
55
+ */
39
+
56
+
40
if (ppkt->size != spkt->size) {
57
+#include "qemu/osdep.h"
41
if (trace_event_get_state(TRACE_COLO_COMPARE_MISCOMPARE)) {
58
+#include "qemu/iova-tree.h"
42
trace_colo_compare_main("pkt size not same");
59
+#include "vhost-iova-tree.h"
43
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
44
int ret;
45
46
trace_colo_compare_main("compare udp");
47
- ret = colo_packet_compare(ppkt, spkt);
48
+
60
+
49
+ ret = colo_packet_compare_common(ppkt, spkt);
61
+#define iova_min_addr qemu_real_host_page_size
50
62
+
51
if (ret) {
63
+/**
52
trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
64
+ * VhostIOVATree, able to:
53
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
65
+ * - Translate iova address
54
*/
66
+ * - Reverse translate iova address (from translated to iova)
55
static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
67
+ * - Allocate IOVA regions for translated range (linear operation)
56
{
68
+ */
57
- int network_length;
69
+struct VhostIOVATree {
58
-
70
+ /* First addressable iova address in the device */
59
trace_colo_compare_main("compare icmp");
71
+ uint64_t iova_first;
60
- network_length = ppkt->ip->ip_hl * 4;
72
+
61
- if (ppkt->size != spkt->size ||
73
+ /* Last addressable iova address in the device */
62
- ppkt->size < network_length + ETH_HLEN) {
74
+ uint64_t iova_last;
63
- return -1;
75
+
64
- }
76
+ /* IOVA address to qemu memory maps. */
65
77
+ IOVATree *iova_taddr_map;
66
- if (colo_packet_compare(ppkt, spkt)) {
78
+};
67
+ if (colo_packet_compare_common(ppkt, spkt)) {
79
+
68
trace_colo_compare_icmp_miscompare("primary pkt size",
80
+/**
69
ppkt->size);
81
+ * Create a new IOVA tree
70
qemu_hexdump((char *)ppkt->data, stderr, "colo-compare",
82
+ *
71
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
83
+ * Returns the new IOVA tree
72
inet_ntoa(ppkt->ip->ip_dst), spkt->size,
84
+ */
73
inet_ntoa(spkt->ip->ip_src),
85
+VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
74
inet_ntoa(spkt->ip->ip_dst));
86
+{
75
- return colo_packet_compare(ppkt, spkt);
87
+ VhostIOVATree *tree = g_new(VhostIOVATree, 1);
76
+ return colo_packet_compare_common(ppkt, spkt);
88
+
77
}
89
+ /* Some devices do not like 0 addresses */
78
90
+ tree->iova_first = MAX(iova_first, iova_min_addr);
79
static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
91
+ tree->iova_last = iova_last;
92
+
93
+ tree->iova_taddr_map = iova_tree_new();
94
+ return tree;
95
+}
96
+
97
+/**
98
+ * Delete an iova tree
99
+ */
100
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
101
+{
102
+ iova_tree_destroy(iova_tree->iova_taddr_map);
103
+ g_free(iova_tree);
104
+}
105
+
106
+/**
107
+ * Find the IOVA address stored from a memory address
108
+ *
109
+ * @tree: The iova tree
110
+ * @map: The map with the memory address
111
+ *
112
+ * Return the stored mapping, or NULL if not found.
113
+ */
114
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
115
+ const DMAMap *map)
116
+{
117
+ return iova_tree_find_iova(tree->iova_taddr_map, map);
118
+}
119
+
120
+/**
121
+ * Allocate a new mapping
122
+ *
123
+ * @tree: The iova tree
124
+ * @map: The iova map
125
+ *
126
+ * Returns:
127
+ * - IOVA_OK if the map fits in the container
128
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
129
+ * - IOVA_ERR_NOMEM if tree cannot allocate more space.
130
+ *
131
+ * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
132
+ */
133
+int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
134
+{
135
+ /* Some vhost devices do not like addr 0. Skip first page */
136
+ hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size;
137
+
138
+ if (map->translated_addr + map->size < map->translated_addr ||
139
+ map->perm == IOMMU_NONE) {
140
+ return IOVA_ERR_INVALID;
141
+ }
142
+
143
+ /* Allocate a node in IOVA address */
144
+ return iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
145
+ tree->iova_last);
146
+}
147
+
148
+/**
149
+ * Remove existing mappings from iova tree
150
+ *
151
+ * @iova_tree: The vhost iova tree
152
+ * @map: The map to remove
153
+ */
154
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map)
155
+{
156
+ iova_tree_remove(iova_tree->iova_taddr_map, map);
157
+}
158
diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
159
new file mode 100644
160
index XXXXXXX..XXXXXXX
161
--- /dev/null
162
+++ b/hw/virtio/vhost-iova-tree.h
163
@@ -XXX,XX +XXX,XX @@
164
+/*
165
+ * vhost software live migration iova tree
166
+ *
167
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
168
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
169
+ *
170
+ * SPDX-License-Identifier: GPL-2.0-or-later
171
+ */
172
+
173
+#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
174
+#define HW_VIRTIO_VHOST_IOVA_TREE_H
175
+
176
+#include "qemu/iova-tree.h"
177
+#include "exec/memory.h"
178
+
179
+typedef struct VhostIOVATree VhostIOVATree;
180
+
181
+VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
182
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
183
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
184
+
185
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
186
+ const DMAMap *map);
187
+int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
188
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
189
+
190
+#endif
80
--
191
--
81
2.7.4
192
2.7.4
82
193
83
194
diff view generated by jsdifflib
1
From: Dmitry Fleytman <dmitry@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Make VLAN stripping functions return number of bytes
3
Use translations added in VhostIOVATree in SVQ.
4
copied to given Ethernet header buffer.
4
5
5
Only introduce usage here, not allocation and deallocation. As with
6
This information should be used to re-compose
6
previous patches, we use the dead code paths of shadow_vqs_enabled to
7
packet IOV after VLAN stripping.
7
avoid commiting too many changes at once. These are impossible to take
8
8
at the moment.
9
Cc: qemu-stable@nongnu.org
9
10
Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
10
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
11
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
---
13
include/net/eth.h | 4 ++--
14
hw/virtio/vhost-shadow-virtqueue.c | 86 +++++++++++++++++++++++---
14
net/eth.c | 25 ++++++++++++++-----------
15
hw/virtio/vhost-shadow-virtqueue.h | 6 +-
15
2 files changed, 16 insertions(+), 13 deletions(-)
16
hw/virtio/vhost-vdpa.c | 122 +++++++++++++++++++++++++++++++------
16
17
include/hw/virtio/vhost-vdpa.h | 3 +
17
diff --git a/include/net/eth.h b/include/net/eth.h
18
4 files changed, 187 insertions(+), 30 deletions(-)
19
20
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
18
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
19
--- a/include/net/eth.h
22
--- a/hw/virtio/vhost-shadow-virtqueue.c
20
+++ b/include/net/eth.h
23
+++ b/hw/virtio/vhost-shadow-virtqueue.c
21
@@ -XXX,XX +XXX,XX @@ eth_get_pkt_tci(const void *p)
24
@@ -XXX,XX +XXX,XX @@ static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
25
return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
26
}
27
28
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
29
+/**
30
+ * Translate addresses between the qemu's virtual address and the SVQ IOVA
31
+ *
32
+ * @svq: Shadow VirtQueue
33
+ * @vaddr: Translated IOVA addresses
34
+ * @iovec: Source qemu's VA addresses
35
+ * @num: Length of iovec and minimum length of vaddr
36
+ */
37
+static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
38
+ hwaddr *addrs, const struct iovec *iovec,
39
+ size_t num)
40
+{
41
+ if (num == 0) {
42
+ return true;
43
+ }
44
+
45
+ for (size_t i = 0; i < num; ++i) {
46
+ DMAMap needle = {
47
+ .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
48
+ .size = iovec[i].iov_len,
49
+ };
50
+ Int128 needle_last, map_last;
51
+ size_t off;
52
+
53
+ const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
54
+ /*
55
+ * Map cannot be NULL since iova map contains all guest space and
56
+ * qemu already has a physical address mapped
57
+ */
58
+ if (unlikely(!map)) {
59
+ qemu_log_mask(LOG_GUEST_ERROR,
60
+ "Invalid address 0x%"HWADDR_PRIx" given by guest",
61
+ needle.translated_addr);
62
+ return false;
63
+ }
64
+
65
+ off = needle.translated_addr - map->translated_addr;
66
+ addrs[i] = map->iova + off;
67
+
68
+ needle_last = int128_add(int128_make64(needle.translated_addr),
69
+ int128_make64(iovec[i].iov_len));
70
+ map_last = int128_make64(map->translated_addr + map->size);
71
+ if (unlikely(int128_gt(needle_last, map_last))) {
72
+ qemu_log_mask(LOG_GUEST_ERROR,
73
+ "Guest buffer expands over iova range");
74
+ return false;
75
+ }
76
+ }
77
+
78
+ return true;
79
+}
80
+
81
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
82
const struct iovec *iovec, size_t num,
83
bool more_descs, bool write)
84
{
85
@@ -XXX,XX +XXX,XX @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
86
} else {
87
descs[i].flags = flags;
88
}
89
- descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
90
+ descs[i].addr = cpu_to_le64(sg[n]);
91
descs[i].len = cpu_to_le32(iovec[n].iov_len);
92
93
last = i;
94
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
95
{
96
unsigned avail_idx;
97
vring_avail_t *avail = svq->vring.avail;
98
+ bool ok;
99
+ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
100
101
*head = svq->free_head;
102
103
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
104
return false;
22
}
105
}
106
107
- vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
108
- false);
109
- vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
110
+ ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
111
+ if (unlikely(!ok)) {
112
+ return false;
113
+ }
114
+ vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
115
+ elem->in_num > 0, false);
116
+
117
+
118
+ ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
119
+ if (unlikely(!ok)) {
120
+ return false;
121
+ }
122
+
123
+ vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
124
125
/*
126
* Put the entry in the available array (but don't update avail->idx until
127
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
128
void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
129
struct vhost_vring_addr *addr)
130
{
131
- addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
132
- addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
133
- addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
134
+ addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc;
135
+ addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail;
136
+ addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used;
23
}
137
}
24
138
25
-bool
139
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
26
+size_t
140
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
27
eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
141
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
28
uint8_t *new_ehdr_buf,
142
* shadow methods and file descriptors.
29
uint16_t *payload_offset, uint16_t *tci);
143
*
30
144
+ * @iova_tree: Tree to perform descriptors translations
31
-bool
145
+ *
32
+size_t
146
* Returns the new virtqueue or NULL.
33
eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
147
*
34
uint16_t vet, uint8_t *new_ehdr_buf,
148
* In case of error, reason is reported through error_report.
35
uint16_t *payload_offset, uint16_t *tci);
149
*/
36
diff --git a/net/eth.c b/net/eth.c
150
-VhostShadowVirtqueue *vhost_svq_new(void)
151
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
152
{
153
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
154
int r;
155
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
156
157
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
158
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
159
+ svq->iova_tree = iova_tree;
160
return g_steal_pointer(&svq);
161
162
err_init_hdev_call:
163
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
37
index XXXXXXX..XXXXXXX 100644
164
index XXXXXXX..XXXXXXX 100644
38
--- a/net/eth.c
165
--- a/hw/virtio/vhost-shadow-virtqueue.h
39
+++ b/net/eth.c
166
+++ b/hw/virtio/vhost-shadow-virtqueue.h
40
@@ -XXX,XX +XXX,XX @@ void eth_get_protocols(const struct iovec *iov, int iovcnt,
167
@@ -XXX,XX +XXX,XX @@
168
#include "qemu/event_notifier.h"
169
#include "hw/virtio/virtio.h"
170
#include "standard-headers/linux/vhost_types.h"
171
+#include "hw/virtio/vhost-iova-tree.h"
172
173
/* Shadow virtqueue to relay notifications */
174
typedef struct VhostShadowVirtqueue {
175
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
176
/* Virtio device */
177
VirtIODevice *vdev;
178
179
+ /* IOVA mapping */
180
+ VhostIOVATree *iova_tree;
181
+
182
/* Map for use the guest's descriptors */
183
VirtQueueElement **ring_id_maps;
184
185
@@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
186
VirtQueue *vq);
187
void vhost_svq_stop(VhostShadowVirtqueue *svq);
188
189
-VhostShadowVirtqueue *vhost_svq_new(void);
190
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
191
192
void vhost_svq_free(gpointer vq);
193
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
194
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
195
index XXXXXXX..XXXXXXX 100644
196
--- a/hw/virtio/vhost-vdpa.c
197
+++ b/hw/virtio/vhost-vdpa.c
198
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
199
vaddr, section->readonly);
200
201
llsize = int128_sub(llend, int128_make64(iova));
202
+ if (v->shadow_vqs_enabled) {
203
+ DMAMap mem_region = {
204
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
205
+ .size = int128_get64(llsize) - 1,
206
+ .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
207
+ };
208
+
209
+ int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
210
+ if (unlikely(r != IOVA_OK)) {
211
+ error_report("Can't allocate a mapping (%d)", r);
212
+ goto fail;
213
+ }
214
+
215
+ iova = mem_region.iova;
216
+ }
217
218
vhost_vdpa_iotlb_batch_begin_once(v);
219
ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
220
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
221
222
llsize = int128_sub(llend, int128_make64(iova));
223
224
+ if (v->shadow_vqs_enabled) {
225
+ const DMAMap *result;
226
+ const void *vaddr = memory_region_get_ram_ptr(section->mr) +
227
+ section->offset_within_region +
228
+ (iova - section->offset_within_address_space);
229
+ DMAMap mem_region = {
230
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
231
+ .size = int128_get64(llsize) - 1,
232
+ };
233
+
234
+ result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
235
+ iova = result->iova;
236
+ vhost_iova_tree_remove(v->iova_tree, &mem_region);
237
+ }
238
vhost_vdpa_iotlb_batch_begin_once(v);
239
ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
240
if (ret) {
241
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
242
243
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
244
for (unsigned n = 0; n < hdev->nvqs; ++n) {
245
- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
246
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
247
248
if (unlikely(!svq)) {
249
error_setg(errp, "Cannot create svq %u", n);
250
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
251
/**
252
* Unmap a SVQ area in the device
253
*/
254
-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
255
- hwaddr size)
256
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
257
+ const DMAMap *needle)
258
{
259
+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
260
+ hwaddr size;
261
int r;
262
263
- size = ROUND_UP(size, qemu_real_host_page_size);
264
- r = vhost_vdpa_dma_unmap(v, iova, size);
265
+ if (unlikely(!result)) {
266
+ error_report("Unable to find SVQ address to unmap");
267
+ return false;
268
+ }
269
+
270
+ size = ROUND_UP(result->size, qemu_real_host_page_size);
271
+ r = vhost_vdpa_dma_unmap(v, result->iova, size);
272
return r == 0;
273
}
274
275
static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
276
const VhostShadowVirtqueue *svq)
277
{
278
+ DMAMap needle = {};
279
struct vhost_vdpa *v = dev->opaque;
280
struct vhost_vring_addr svq_addr;
281
- size_t device_size = vhost_svq_device_area_size(svq);
282
- size_t driver_size = vhost_svq_driver_area_size(svq);
283
bool ok;
284
285
vhost_svq_get_vring_addr(svq, &svq_addr);
286
287
- ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
288
+ needle.translated_addr = svq_addr.desc_user_addr;
289
+ ok = vhost_vdpa_svq_unmap_ring(v, &needle);
290
if (unlikely(!ok)) {
291
return false;
41
}
292
}
293
294
- return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
295
+ needle.translated_addr = svq_addr.used_user_addr;
296
+ return vhost_vdpa_svq_unmap_ring(v, &needle);
297
+}
298
+
299
+/**
300
+ * Map the SVQ area in the device
301
+ *
302
+ * @v: Vhost-vdpa device
303
+ * @needle: The area to search iova
304
+ * @errorp: Error pointer
305
+ */
306
+static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
307
+ Error **errp)
308
+{
309
+ int r;
310
+
311
+ r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
312
+ if (unlikely(r != IOVA_OK)) {
313
+ error_setg(errp, "Cannot allocate iova (%d)", r);
314
+ return false;
315
+ }
316
+
317
+ r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1,
318
+ (void *)(uintptr_t)needle->translated_addr,
319
+ needle->perm == IOMMU_RO);
320
+ if (unlikely(r != 0)) {
321
+ error_setg_errno(errp, -r, "Cannot map region to device");
322
+ vhost_iova_tree_remove(v->iova_tree, needle);
323
+ }
324
+
325
+ return r == 0;
42
}
326
}
43
327
44
-bool
328
/**
45
+size_t
329
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
46
eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
330
struct vhost_vring_addr *addr,
47
uint8_t *new_ehdr_buf,
331
Error **errp)
48
uint16_t *payload_offset, uint16_t *tci)
332
{
49
@@ -XXX,XX +XXX,XX @@ eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
333
+ DMAMap device_region, driver_region;
50
new_ehdr, sizeof(*new_ehdr));
334
+ struct vhost_vring_addr svq_addr;
51
335
struct vhost_vdpa *v = dev->opaque;
52
if (copied < sizeof(*new_ehdr)) {
336
size_t device_size = vhost_svq_device_area_size(svq);
53
- return false;
337
size_t driver_size = vhost_svq_driver_area_size(svq);
54
+ return 0;
338
- int r;
339
+ size_t avail_offset;
340
+ bool ok;
341
342
ERRP_GUARD();
343
- vhost_svq_get_vring_addr(svq, addr);
344
+ vhost_svq_get_vring_addr(svq, &svq_addr);
345
346
- r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
347
- (void *)(uintptr_t)addr->desc_user_addr, true);
348
- if (unlikely(r != 0)) {
349
- error_setg_errno(errp, -r, "Cannot create vq driver region: ");
350
+ driver_region = (DMAMap) {
351
+ .translated_addr = svq_addr.desc_user_addr,
352
+ .size = driver_size - 1,
353
+ .perm = IOMMU_RO,
354
+ };
355
+ ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
356
+ if (unlikely(!ok)) {
357
+ error_prepend(errp, "Cannot create vq driver region: ");
358
return false;
55
}
359
}
56
360
+ addr->desc_user_addr = driver_region.iova;
57
switch (be16_to_cpu(new_ehdr->h_proto)) {
361
+ avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
58
@@ -XXX,XX +XXX,XX @@ eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
362
+ addr->avail_user_addr = driver_region.iova + avail_offset;
59
&vlan_hdr, sizeof(vlan_hdr));
363
60
364
- r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
61
if (copied < sizeof(vlan_hdr)) {
365
- (void *)(intptr_t)addr->used_user_addr, false);
62
- return false;
366
- if (unlikely(r != 0)) {
63
+ return 0;
367
- error_setg_errno(errp, -r, "Cannot create vq device region: ");
64
}
368
+ device_region = (DMAMap) {
65
369
+ .translated_addr = svq_addr.used_user_addr,
66
new_ehdr->h_proto = vlan_hdr.h_proto;
370
+ .size = device_size - 1,
67
@@ -XXX,XX +XXX,XX @@ eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
371
+ .perm = IOMMU_RW,
68
PKT_GET_VLAN_HDR(new_ehdr), sizeof(vlan_hdr));
372
+ };
69
373
+ ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
70
if (copied < sizeof(vlan_hdr)) {
374
+ if (unlikely(!ok)) {
71
- return false;
375
+ error_prepend(errp, "Cannot create vq device region: ");
72
+ return 0;
376
+ vhost_vdpa_svq_unmap_ring(v, &driver_region);
73
}
74
75
*payload_offset += sizeof(vlan_hdr);
76
+
77
+ return sizeof(struct eth_header) + sizeof(struct vlan_header);
78
+ } else {
79
+ return sizeof(struct eth_header);
80
}
81
- return true;
82
default:
83
- return false;
84
+ return 0;
85
}
377
}
378
+ addr->used_user_addr = device_region.iova;
379
380
- return r == 0;
381
+ return ok;
86
}
382
}
87
383
88
-bool
384
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
89
+size_t
385
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
90
eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
386
index XXXXXXX..XXXXXXX 100644
91
uint16_t vet, uint8_t *new_ehdr_buf,
387
--- a/include/hw/virtio/vhost-vdpa.h
92
uint16_t *payload_offset, uint16_t *tci)
388
+++ b/include/hw/virtio/vhost-vdpa.h
93
@@ -XXX,XX +XXX,XX @@ eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
389
@@ -XXX,XX +XXX,XX @@
94
new_ehdr, sizeof(*new_ehdr));
390
95
391
#include <gmodule.h>
96
if (copied < sizeof(*new_ehdr)) {
392
97
- return false;
393
+#include "hw/virtio/vhost-iova-tree.h"
98
+ return 0;
394
#include "hw/virtio/virtio.h"
99
}
395
#include "standard-headers/linux/vhost_types.h"
100
396
101
if (be16_to_cpu(new_ehdr->h_proto) == vet) {
397
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
102
@@ -XXX,XX +XXX,XX @@ eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
398
MemoryListener listener;
103
&vlan_hdr, sizeof(vlan_hdr));
399
struct vhost_vdpa_iova_range iova_range;
104
400
bool shadow_vqs_enabled;
105
if (copied < sizeof(vlan_hdr)) {
401
+ /* IOVA mapping used by the Shadow Virtqueue */
106
- return false;
402
+ VhostIOVATree *iova_tree;
107
+ return 0;
403
GPtrArray *shadow_vqs;
108
}
404
struct vhost_dev *dev;
109
405
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
110
new_ehdr->h_proto = vlan_hdr.h_proto;
111
112
*tci = be16_to_cpu(vlan_hdr.h_tci);
113
*payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
114
- return true;
115
+ return sizeof(struct eth_header);
116
}
117
118
- return false;
119
+ return 0;
120
}
121
122
void
123
--
406
--
124
2.7.4
407
2.7.4
125
408
126
409
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We use g_queue_init() to init s->conn_list, so we should use g_queue_clear()
3
This is needed to achieve migration, so the destination can restore its
4
to instead of g_queue_free().
4
index.
5
5
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Setting base as last used idx, so destination will see as available all
7
Reviewed-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
7
the entries that the device did not use, including the in-flight
8
processing ones.
9
10
This is ok for networking, but other kinds of devices might have
11
problems with these retransmissions.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
16
---
10
net/colo-compare.c | 2 +-
17
hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
11
1 file changed, 1 insertion(+), 1 deletion(-)
18
1 file changed, 17 insertions(+)
12
19
13
diff --git a/net/colo-compare.c b/net/colo-compare.c
20
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
14
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
15
--- a/net/colo-compare.c
22
--- a/hw/virtio/vhost-vdpa.c
16
+++ b/net/colo-compare.c
23
+++ b/hw/virtio/vhost-vdpa.c
17
@@ -XXX,XX +XXX,XX @@ static void colo_compare_finalize(Object *obj)
24
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
18
/* Release all unhandled packets after compare thead exited */
25
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
19
g_queue_foreach(&s->conn_list, colo_flush_packets, s);
26
struct vhost_vring_state *ring)
20
27
{
21
- g_queue_free(&s->conn_list);
28
+ struct vhost_vdpa *v = dev->opaque;
22
+ g_queue_clear(&s->conn_list);
29
int ret;
23
30
24
g_hash_table_destroy(s->connection_track_table);
31
+ if (v->shadow_vqs_enabled) {
25
g_free(s->pri_indev);
32
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
33
+ ring->index);
34
+
35
+ /*
36
+ * Setting base as last used idx, so destination will see as available
37
+ * all the entries that the device did not use, including the in-flight
38
+ * processing ones.
39
+ *
40
+ * TODO: This is ok for networking, but other kinds of devices might
41
+ * have problems with these retransmissions.
42
+ */
43
+ ring->num = svq->last_used_idx;
44
+ return 0;
45
+ }
46
+
47
ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
48
trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
49
return ret;
26
--
50
--
27
2.7.4
51
2.7.4
28
52
29
53
diff view generated by jsdifflib
1
From: Dmitry Fleytman <dmitry@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
In case of VLAN stripping, ETH header put into a
3
Setting the log address would make the device start reporting invalid
4
separate buffer, therefore amont of data copied
4
dirty memory because the SVQ vrings are located in qemu's memory.
5
from original IOV should be smaller.
6
5
7
Cc: qemu-stable@nongnu.org
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
7
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
9
---
11
hw/net/net_rx_pkt.c | 3 ++-
10
hw/virtio/vhost-vdpa.c | 3 ++-
12
1 file changed, 2 insertions(+), 1 deletion(-)
11
1 file changed, 2 insertions(+), 1 deletion(-)
13
12
14
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
13
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
15
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/net_rx_pkt.c
15
--- a/hw/virtio/vhost-vdpa.c
17
+++ b/hw/net/net_rx_pkt.c
16
+++ b/hw/virtio/vhost-vdpa.c
18
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt,
17
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
19
18
static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
20
pkt->tot_len = iov_size(iov, iovcnt) - ploff + pkt->ehdr_buf_len;
19
struct vhost_log *log)
21
pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1,
20
{
22
- iov, iovcnt, ploff, pkt->tot_len);
21
- if (vhost_vdpa_one_time_request(dev)) {
23
+ iov, iovcnt, ploff,
22
+ struct vhost_vdpa *v = dev->opaque;
24
+ pkt->tot_len - pkt->ehdr_buf_len);
23
+ if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) {
25
} else {
24
return 0;
26
net_rx_pkt_iovec_realloc(pkt, iovcnt);
25
}
27
26
28
--
27
--
29
2.7.4
28
2.7.4
30
29
31
30
diff view generated by jsdifflib
1
From: Dmitry Fleytman <dmitry@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
This patch fixed a problem that was introduced in commit eb700029.
3
SVQ is able to log the dirty bits by itself, so let's use it to not
4
block migration.
4
5
5
When net_rx_pkt_attach_iovec() calls eth_strip_vlan()
6
Also, ignore set and clear of VHOST_F_LOG_ALL on set_features if SVQ is
6
this can result in pkt->ehdr_buf being overflowed, because
7
enabled. Even if the device supports it, the reports would be nonsense
7
ehdr_buf is only sizeof(struct eth_header) bytes large
8
because SVQ memory is in the qemu region.
8
but eth_strip_vlan() can write
9
sizeof(struct eth_header) + sizeof(struct vlan_header)
10
bytes into it.
11
9
12
Devices affected by this problem: vmxnet3.
10
The log region is still allocated. Future changes might skip that, but
11
this series is already long enough.
13
12
14
Cc: qemu-stable@nongnu.org
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
15
Reported-by: Peter Maydell <peter.maydell@linaro.org>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
16
Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
17
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
18
---
16
---
19
hw/net/net_rx_pkt.c | 34 +++++++++++++++++-----------------
17
hw/virtio/vhost-vdpa.c | 39 +++++++++++++++++++++++++++++++++++----
20
1 file changed, 17 insertions(+), 17 deletions(-)
18
include/hw/virtio/vhost-vdpa.h | 1 +
19
2 files changed, 36 insertions(+), 4 deletions(-)
21
20
22
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
21
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
23
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/net/net_rx_pkt.c
23
--- a/hw/virtio/vhost-vdpa.c
25
+++ b/hw/net/net_rx_pkt.c
24
+++ b/hw/virtio/vhost-vdpa.c
26
@@ -XXX,XX +XXX,XX @@
25
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
27
26
return v->index != 0;
28
struct NetRxPkt {
27
}
29
struct virtio_net_hdr virt_hdr;
28
30
- uint8_t ehdr_buf[sizeof(struct eth_header)];
29
+static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
31
+ uint8_t ehdr_buf[sizeof(struct eth_header) + sizeof(struct vlan_header)];
30
+ uint64_t *features)
32
struct iovec *vec;
31
+{
33
uint16_t vec_len_total;
32
+ int ret;
34
uint16_t vec_len;
33
+
35
uint32_t tot_len;
34
+ ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
36
uint16_t tci;
35
+ trace_vhost_vdpa_get_features(dev, *features);
37
- bool vlan_stripped;
36
+ return ret;
38
+ size_t ehdr_buf_len;
37
+}
39
bool has_virt_hdr;
38
+
40
eth_pkt_types_e packet_type;
39
static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
41
40
Error **errp)
42
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt,
43
const struct iovec *iov, int iovcnt,
44
size_t ploff)
45
{
41
{
46
- if (pkt->vlan_stripped) {
42
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
47
+ if (pkt->ehdr_buf_len) {
43
return 0;
48
net_rx_pkt_iovec_realloc(pkt, iovcnt + 1);
49
50
pkt->vec[0].iov_base = pkt->ehdr_buf;
51
- pkt->vec[0].iov_len = sizeof(pkt->ehdr_buf);
52
-
53
- pkt->tot_len =
54
- iov_size(iov, iovcnt) - ploff + sizeof(struct eth_header);
55
+ pkt->vec[0].iov_len = pkt->ehdr_buf_len;
56
57
+ pkt->tot_len = iov_size(iov, iovcnt) - ploff + pkt->ehdr_buf_len;
58
pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1,
59
iov, iovcnt, ploff, pkt->tot_len);
60
} else {
61
@@ -XXX,XX +XXX,XX @@ void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
62
uint16_t tci = 0;
63
uint16_t ploff = iovoff;
64
assert(pkt);
65
- pkt->vlan_stripped = false;
66
67
if (strip_vlan) {
68
- pkt->vlan_stripped = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf,
69
- &ploff, &tci);
70
+ pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf,
71
+ &ploff, &tci);
72
+ } else {
73
+ pkt->ehdr_buf_len = 0;
74
}
44
}
75
45
76
pkt->tci = tci;
46
- r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
77
@@ -XXX,XX +XXX,XX @@ void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
47
+ r = vhost_vdpa_get_dev_features(hdev, &dev_features);
78
uint16_t tci = 0;
48
if (r != 0) {
79
uint16_t ploff = iovoff;
49
error_setg_errno(errp, -r, "Can't get vdpa device features");
80
assert(pkt);
50
return r;
81
- pkt->vlan_stripped = false;
51
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
82
52
static int vhost_vdpa_set_features(struct vhost_dev *dev,
83
if (strip_vlan) {
53
uint64_t features)
84
- pkt->vlan_stripped = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet,
54
{
85
- pkt->ehdr_buf,
55
+ struct vhost_vdpa *v = dev->opaque;
86
- &ploff, &tci);
56
int ret;
87
+ pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet,
57
88
+ pkt->ehdr_buf,
58
if (vhost_vdpa_one_time_request(dev)) {
89
+ &ploff, &tci);
59
return 0;
90
+ } else {
91
+ pkt->ehdr_buf_len = 0;
92
}
60
}
93
61
94
pkt->tci = tci;
62
+ if (v->shadow_vqs_enabled) {
95
@@ -XXX,XX +XXX,XX @@ void net_rx_pkt_dump(struct NetRxPkt *pkt)
63
+ if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) {
96
#ifdef NET_RX_PKT_DEBUG
64
+ /*
97
assert(pkt);
65
+ * QEMU is just trying to enable or disable logging. SVQ handles
98
66
+ * this sepparately, so no need to forward this.
99
- printf("RX PKT: tot_len: %d, vlan_stripped: %d, vlan_tag: %d\n",
67
+ */
100
- pkt->tot_len, pkt->vlan_stripped, pkt->tci);
68
+ v->acked_features = features;
101
+ printf("RX PKT: tot_len: %d, ehdr_buf_len: %lu, vlan_tag: %d\n",
69
+ return 0;
102
+ pkt->tot_len, pkt->ehdr_buf_len, pkt->tci);
70
+ }
103
#endif
71
+
72
+ v->acked_features = features;
73
+
74
+ /* We must not ack _F_LOG if SVQ is enabled */
75
+ features &= ~BIT_ULL(VHOST_F_LOG_ALL);
76
+ }
77
+
78
trace_vhost_vdpa_set_features(dev, features);
79
ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
80
if (ret) {
81
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
82
static int vhost_vdpa_get_features(struct vhost_dev *dev,
83
uint64_t *features)
84
{
85
- int ret;
86
+ struct vhost_vdpa *v = dev->opaque;
87
+ int ret = vhost_vdpa_get_dev_features(dev, features);
88
+
89
+ if (ret == 0 && v->shadow_vqs_enabled) {
90
+ /* Add SVQ logging capabilities */
91
+ *features |= BIT_ULL(VHOST_F_LOG_ALL);
92
+ }
93
94
- ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
95
- trace_vhost_vdpa_get_features(dev, *features);
96
return ret;
104
}
97
}
105
98
106
@@ -XXX,XX +XXX,XX @@ bool net_rx_pkt_is_vlan_stripped(struct NetRxPkt *pkt)
99
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
107
{
100
index XXXXXXX..XXXXXXX 100644
108
assert(pkt);
101
--- a/include/hw/virtio/vhost-vdpa.h
109
102
+++ b/include/hw/virtio/vhost-vdpa.h
110
- return pkt->vlan_stripped;
103
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
111
+ return pkt->ehdr_buf_len ? true : false;
104
bool iotlb_batch_begin_sent;
112
}
105
MemoryListener listener;
113
106
struct vhost_vdpa_iova_range iova_range;
114
bool net_rx_pkt_has_virt_hdr(struct NetRxPkt *pkt)
107
+ uint64_t acked_features;
108
bool shadow_vqs_enabled;
109
/* IOVA mapping used by the Shadow Virtqueue */
110
VhostIOVATree *iova_tree;
115
--
111
--
116
2.7.4
112
2.7.4
117
113
118
114
diff view generated by jsdifflib
Deleted patch
1
From: Dmitry Fleytman <dmitry@daynix.com>
2
1
3
In case of VLAN stripping ETH header is stored in a
4
separate chunk and length of IOV should take this into
5
account.
6
7
This patch fixes checksum validation for RX packets
8
with VLAN header.
9
10
Devices affected by this problem: e1000e and vmxnet3.
11
12
Cc: qemu-stable@nongnu.org
13
Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
hw/net/net_rx_pkt.c | 2 +-
17
1 file changed, 1 insertion(+), 1 deletion(-)
18
19
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/net/net_rx_pkt.c
22
+++ b/hw/net/net_rx_pkt.c
23
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt,
24
pkt->tot_len = iov_size(iov, iovcnt) - ploff + pkt->ehdr_buf_len;
25
pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1,
26
iov, iovcnt, ploff,
27
- pkt->tot_len - pkt->ehdr_buf_len);
28
+ pkt->tot_len - pkt->ehdr_buf_len) + 1;
29
} else {
30
net_rx_pkt_iovec_realloc(pkt, iovcnt);
31
32
--
33
2.7.4
34
35
diff view generated by jsdifflib
Deleted patch
1
From: Dmitry Fleytman <dmitry@daynix.com>
2
1
3
This is a refactoring commit that does not change behavior.
4
5
Signed-off-by: Dmitry Fleytman <dmitry@daynix.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
hw/net/net_rx_pkt.c | 9 +++++----
9
1 file changed, 5 insertions(+), 4 deletions(-)
10
11
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/hw/net/net_rx_pkt.c
14
+++ b/hw/net/net_rx_pkt.c
15
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt,
16
const struct iovec *iov, int iovcnt,
17
size_t ploff)
18
{
19
+ uint32_t pllen = iov_size(iov, iovcnt) - ploff;
20
+
21
if (pkt->ehdr_buf_len) {
22
net_rx_pkt_iovec_realloc(pkt, iovcnt + 1);
23
24
pkt->vec[0].iov_base = pkt->ehdr_buf;
25
pkt->vec[0].iov_len = pkt->ehdr_buf_len;
26
27
- pkt->tot_len = iov_size(iov, iovcnt) - ploff + pkt->ehdr_buf_len;
28
+ pkt->tot_len = pllen + pkt->ehdr_buf_len;
29
pkt->vec_len = iov_copy(pkt->vec + 1, pkt->vec_len_total - 1,
30
- iov, iovcnt, ploff,
31
- pkt->tot_len - pkt->ehdr_buf_len) + 1;
32
+ iov, iovcnt, ploff, pllen) + 1;
33
} else {
34
net_rx_pkt_iovec_realloc(pkt, iovcnt);
35
36
- pkt->tot_len = iov_size(iov, iovcnt) - ploff;
37
+ pkt->tot_len = pllen;
38
pkt->vec_len = iov_copy(pkt->vec, pkt->vec_len_total,
39
iov, iovcnt, ploff, pkt->tot_len);
40
}
41
--
42
2.7.4
43
44
diff view generated by jsdifflib
Deleted patch
1
From: zhanghailiang <zhang.zhanghailiang@huawei.com>
2
1
3
We will catch the bellow error report while try to delete compare object
4
by qmp command:
5
chardev/char-io.c:91: io_watch_poll_finalize: Assertion `iwp->src == ((void *)0)' failed.
6
7
This is caused by failing to remove the right fd been watched while
8
call qemu_chr_fe_set_handlers();
9
10
Fix it by pass the worker_context parameter to qemu_chr_fe_set_handlers().
11
12
Signed-off-by: zhanghailiang <zhang.zhanghailiang@huawei.com>
13
Reviewed-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
net/colo-compare.c | 20 +++++++++++---------
17
1 file changed, 11 insertions(+), 9 deletions(-)
18
19
diff --git a/net/colo-compare.c b/net/colo-compare.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/net/colo-compare.c
22
+++ b/net/colo-compare.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
24
/* compare thread, a thread for each NIC */
25
QemuThread thread;
26
27
+ GMainContext *worker_context;
28
GMainLoop *compare_loop;
29
} CompareState;
30
31
@@ -XXX,XX +XXX,XX @@ static gboolean check_old_packet_regular(void *opaque)
32
33
static void *colo_compare_thread(void *opaque)
34
{
35
- GMainContext *worker_context;
36
CompareState *s = opaque;
37
GSource *timeout_source;
38
39
- worker_context = g_main_context_new();
40
+ s->worker_context = g_main_context_new();
41
42
qemu_chr_fe_set_handlers(&s->chr_pri_in, compare_chr_can_read,
43
- compare_pri_chr_in, NULL, s, worker_context, true);
44
+ compare_pri_chr_in, NULL, s, s->worker_context, true);
45
qemu_chr_fe_set_handlers(&s->chr_sec_in, compare_chr_can_read,
46
- compare_sec_chr_in, NULL, s, worker_context, true);
47
+ compare_sec_chr_in, NULL, s, s->worker_context, true);
48
49
- s->compare_loop = g_main_loop_new(worker_context, FALSE);
50
+ s->compare_loop = g_main_loop_new(s->worker_context, FALSE);
51
52
/* To kick any packets that the secondary doesn't match */
53
timeout_source = g_timeout_source_new(REGULAR_PACKET_CHECK_MS);
54
g_source_set_callback(timeout_source,
55
(GSourceFunc)check_old_packet_regular, s, NULL);
56
- g_source_attach(timeout_source, worker_context);
57
+ g_source_attach(timeout_source, s->worker_context);
58
59
g_main_loop_run(s->compare_loop);
60
61
g_source_unref(timeout_source);
62
g_main_loop_unref(s->compare_loop);
63
- g_main_context_unref(worker_context);
64
+ g_main_context_unref(s->worker_context);
65
return NULL;
66
}
67
68
@@ -XXX,XX +XXX,XX @@ static void colo_compare_finalize(Object *obj)
69
{
70
CompareState *s = COLO_COMPARE(obj);
71
72
- qemu_chr_fe_deinit(&s->chr_pri_in);
73
- qemu_chr_fe_deinit(&s->chr_sec_in);
74
+ qemu_chr_fe_set_handlers(&s->chr_pri_in, NULL, NULL, NULL, NULL,
75
+ s->worker_context, true);
76
+ qemu_chr_fe_set_handlers(&s->chr_sec_in, NULL, NULL, NULL, NULL,
77
+ s->worker_context, true);
78
qemu_chr_fe_deinit(&s->chr_out);
79
80
g_main_loop_quit(s->compare_loop);
81
--
82
2.7.4
83
84
diff view generated by jsdifflib