1
The following changes since commit e607bbee553cfe73072870cef458cfa4e78133e2:
1
The following changes since commit d48125de38f48a61d6423ef6a01156d6dff9ee2c:
2
2
3
Merge remote-tracking branch 'remotes/edgar/tags/edgar/xilinx-next-2018-01-26.for-upstream' into staging (2018-01-26 14:24:25 +0000)
3
Merge tag 'kraxel-20220719-pull-request' of https://gitlab.com/kraxel/qemu into staging (2022-07-19 17:40:36 +0100)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to bf4835a4d5338bb7424827715df22570a8adc67c:
9
for you to fetch changes up to 8bdab83b34efb0b598be4e5b98e4f466ca5f2f80:
10
10
11
MAINTAINERS: update Dmitry Fleytman email (2018-01-29 16:05:38 +0800)
11
net/colo.c: fix segmentation fault when packet is not parsed correctly (2022-07-20 16:58:08 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
Changes since V1:
16
- Fix build erros of vhost-vdpa when virtio-net is not set
17
15
----------------------------------------------------------------
18
----------------------------------------------------------------
16
Mao Zhongyi (2):
19
Eugenio Pérez (21):
17
colo: modified the payload compare function
20
vhost: move descriptor translation to vhost_svq_vring_write_descs
18
colo: compare the packet based on the tcp sequence number
21
virtio-net: Expose MAC_TABLE_ENTRIES
22
virtio-net: Expose ctrl virtqueue logic
23
vdpa: Avoid compiler to squash reads to used idx
24
vhost: Reorder vhost_svq_kick
25
vhost: Move vhost_svq_kick call to vhost_svq_add
26
vhost: Check for queue full at vhost_svq_add
27
vhost: Decouple vhost_svq_add from VirtQueueElement
28
vhost: Add SVQDescState
29
vhost: Track number of descs in SVQDescState
30
vhost: add vhost_svq_push_elem
31
vhost: Expose vhost_svq_add
32
vhost: add vhost_svq_poll
33
vhost: Add svq avail_handler callback
34
vdpa: Export vhost_vdpa_dma_map and unmap calls
35
vhost-net-vdpa: add stubs for when no virtio-net device is present
36
vdpa: manual forward CVQ buffers
37
vdpa: Buffer CVQ support on shadow virtqueue
38
vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs
39
vdpa: Add device migration blocker
40
vdpa: Add x-svq to NetdevVhostVDPAOptions
19
41
20
Philippe Mathieu-Daudé (1):
42
Zhang Chen (4):
21
MAINTAINERS: update Dmitry Fleytman email
43
softmmu/runstate.c: add RunStateTransition support form COLO to PRELAUNCH
44
net/colo: Fix a "double free" crash to clear the conn_list
45
net/colo.c: No need to track conn_list for filter-rewriter
46
net/colo.c: fix segmentation fault when packet is not parsed correctly
22
47
23
Thomas Huth (3):
48
hw/net/virtio-net.c | 85 +++++----
24
net: Allow hubports to connect to other netdevs
49
hw/virtio/vhost-shadow-virtqueue.c | 210 +++++++++++++++-------
25
net: Allow netdevs to be used with 'hostfwd_add' and 'hostfwd_remove'
50
hw/virtio/vhost-shadow-virtqueue.h | 52 +++++-
26
qemu-doc: Get rid of "vlan=X" example in the documentation
51
hw/virtio/vhost-vdpa.c | 26 ++-
27
52
include/hw/virtio/vhost-vdpa.h | 8 +
28
MAINTAINERS | 8 +-
53
include/hw/virtio/virtio-net.h | 7 +
29
hmp-commands.hx | 4 +-
54
net/colo-compare.c | 2 +-
30
net/colo-compare.c | 411 +++++++++++++++++++++++++++++++++--------------------
55
net/colo.c | 11 +-
31
net/colo.c | 9 ++
56
net/filter-rewriter.c | 2 +-
32
net/colo.h | 15 ++
57
net/meson.build | 3 +-
33
net/hub.c | 27 +++-
58
net/trace-events | 1 +
34
net/hub.h | 3 +-
59
net/vhost-vdpa-stub.c | 21 +++
35
net/net.c | 2 +-
60
net/vhost-vdpa.c | 357 +++++++++++++++++++++++++++++++++++--
36
net/slirp.c | 33 +++--
61
qapi/net.json | 9 +-
37
net/trace-events | 2 +-
62
softmmu/runstate.c | 1 +
38
qapi/net.json | 4 +-
63
15 files changed, 671 insertions(+), 124 deletions(-)
39
qemu-options.hx | 12 +-
64
create mode 100644 net/vhost-vdpa-stub.c
40
12 files changed, 347 insertions(+), 183 deletions(-)
41
65
42
66
67
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
It's done for both in and out descriptors so it's better placed here.
4
5
Acked-by: Jason Wang <jasowang@redhat.com>
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 38 +++++++++++++++++++++++++++-----------
11
1 file changed, 27 insertions(+), 11 deletions(-)
12
13
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/virtio/vhost-shadow-virtqueue.c
16
+++ b/hw/virtio/vhost-shadow-virtqueue.c
17
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
18
return true;
19
}
20
21
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
22
- const struct iovec *iovec, size_t num,
23
- bool more_descs, bool write)
24
+/**
25
+ * Write descriptors to SVQ vring
26
+ *
27
+ * @svq: The shadow virtqueue
28
+ * @sg: Cache for hwaddr
29
+ * @iovec: The iovec from the guest
30
+ * @num: iovec length
31
+ * @more_descs: True if more descriptors come in the chain
32
+ * @write: True if they are writeable descriptors
33
+ *
34
+ * Return true if success, false otherwise and print error.
35
+ */
36
+static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
37
+ const struct iovec *iovec, size_t num,
38
+ bool more_descs, bool write)
39
{
40
uint16_t i = svq->free_head, last = svq->free_head;
41
unsigned n;
42
uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
43
vring_desc_t *descs = svq->vring.desc;
44
+ bool ok;
45
46
if (num == 0) {
47
- return;
48
+ return true;
49
+ }
50
+
51
+ ok = vhost_svq_translate_addr(svq, sg, iovec, num);
52
+ if (unlikely(!ok)) {
53
+ return false;
54
}
55
56
for (n = 0; n < num; n++) {
57
@@ -XXX,XX +XXX,XX @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
58
}
59
60
svq->free_head = le16_to_cpu(svq->desc_next[last]);
61
+ return true;
62
}
63
64
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
65
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
66
return false;
67
}
68
69
- ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
70
+ ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
71
+ elem->in_num > 0, false);
72
if (unlikely(!ok)) {
73
return false;
74
}
75
- vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
76
- elem->in_num > 0, false);
77
-
78
79
- ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
80
+ ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false,
81
+ true);
82
if (unlikely(!ok)) {
83
return false;
84
}
85
86
- vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
87
-
88
/*
89
* Put the entry in the available array (but don't update avail->idx until
90
* they do sync).
91
--
92
2.7.4
93
94
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
vhost-vdpa control virtqueue needs to know the maximum entries supported
4
by the virtio-net device, so we know if it is possible to apply the
5
filter.
6
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/net/virtio-net.c | 1 -
12
include/hw/virtio/virtio-net.h | 3 +++
13
2 files changed, 3 insertions(+), 1 deletion(-)
14
15
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/virtio-net.c
18
+++ b/hw/net/virtio-net.c
19
@@ -XXX,XX +XXX,XX @@
20
21
#define VIRTIO_NET_VM_VERSION 11
22
23
-#define MAC_TABLE_ENTRIES 64
24
#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
25
26
/* previously fixed value */
27
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/hw/virtio/virtio-net.h
30
+++ b/include/hw/virtio/virtio-net.h
31
@@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
32
* and latency. */
33
#define TX_BURST 256
34
35
+/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */
36
+#define MAC_TABLE_ENTRIES 64
37
+
38
typedef struct virtio_net_conf
39
{
40
uint32_t txtimer;
41
--
42
2.7.4
43
44
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
gently asked by his automatic reply :)
3
This allows external vhost-net devices to modify the state of the
4
VirtIO device model once the vhost-vdpa device has acknowledged the
5
control commands.
4
6
5
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
10
---
8
MAINTAINERS | 8 ++++----
11
hw/net/virtio-net.c | 84 ++++++++++++++++++++++++------------------
9
1 file changed, 4 insertions(+), 4 deletions(-)
12
include/hw/virtio/virtio-net.h | 4 ++
13
2 files changed, 53 insertions(+), 35 deletions(-)
10
14
11
diff --git a/MAINTAINERS b/MAINTAINERS
15
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/MAINTAINERS
17
--- a/hw/net/virtio-net.c
14
+++ b/MAINTAINERS
18
+++ b/hw/net/virtio-net.c
15
@@ -XXX,XX +XXX,XX @@ F: hw/scsi/mfi.h
19
@@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
16
F: tests/megasas-test.c
20
return VIRTIO_NET_OK;
17
21
}
18
Network packet abstractions
22
19
-M: Dmitry Fleytman <dmitry@daynix.com>
23
-static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
20
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
24
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
21
S: Maintained
25
+ const struct iovec *in_sg, unsigned in_num,
22
F: include/net/eth.h
26
+ const struct iovec *out_sg,
23
F: net/eth.c
27
+ unsigned out_num)
24
@@ -XXX,XX +XXX,XX @@ F: hw/net/net_rx_pkt*
28
{
25
F: hw/net/net_tx_pkt*
29
VirtIONet *n = VIRTIO_NET(vdev);
26
30
struct virtio_net_ctrl_hdr ctrl;
27
Vmware
31
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
28
-M: Dmitry Fleytman <dmitry@daynix.com>
32
- VirtQueueElement *elem;
29
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
33
size_t s;
30
S: Maintained
34
struct iovec *iov, *iov2;
31
F: hw/net/vmxnet*
35
- unsigned int iov_cnt;
32
F: hw/scsi/vmw_pvscsi*
36
+
33
@@ -XXX,XX +XXX,XX @@ F: hw/mem/nvdimm.c
37
+ if (iov_size(in_sg, in_num) < sizeof(status) ||
34
F: include/hw/mem/nvdimm.h
38
+ iov_size(out_sg, out_num) < sizeof(ctrl)) {
35
39
+ virtio_error(vdev, "virtio-net ctrl missing headers");
36
e1000x
40
+ return 0;
37
-M: Dmitry Fleytman <dmitry@daynix.com>
41
+ }
38
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
42
+
39
S: Maintained
43
+ iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
40
F: hw/net/e1000x*
44
+ s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
41
45
+ iov_discard_front(&iov, &out_num, sizeof(ctrl));
42
e1000e
46
+ if (s != sizeof(ctrl)) {
43
-M: Dmitry Fleytman <dmitry@daynix.com>
47
+ status = VIRTIO_NET_ERR;
44
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
48
+ } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
45
S: Maintained
49
+ status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
46
F: hw/net/e1000e*
50
+ } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
51
+ status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
52
+ } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
53
+ status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
54
+ } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
55
+ status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
56
+ } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
57
+ status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
58
+ } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
59
+ status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
60
+ }
61
+
62
+ s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
63
+ assert(s == sizeof(status));
64
+
65
+ g_free(iov2);
66
+ return sizeof(status);
67
+}
68
+
69
+static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
70
+{
71
+ VirtQueueElement *elem;
72
73
for (;;) {
74
+ size_t written;
75
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
76
if (!elem) {
77
break;
78
}
79
- if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
80
- iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
81
- virtio_error(vdev, "virtio-net ctrl missing headers");
82
+
83
+ written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
84
+ elem->out_sg, elem->out_num);
85
+ if (written > 0) {
86
+ virtqueue_push(vq, elem, written);
87
+ virtio_notify(vdev, vq);
88
+ g_free(elem);
89
+ } else {
90
virtqueue_detach_element(vq, elem, 0);
91
g_free(elem);
92
break;
93
}
94
-
95
- iov_cnt = elem->out_num;
96
- iov2 = iov = g_memdup2(elem->out_sg,
97
- sizeof(struct iovec) * elem->out_num);
98
- s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
99
- iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
100
- if (s != sizeof(ctrl)) {
101
- status = VIRTIO_NET_ERR;
102
- } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
103
- status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
104
- } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
105
- status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
106
- } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
107
- status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
108
- } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
109
- status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
110
- } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
111
- status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
112
- } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
113
- status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
114
- }
115
-
116
- s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
117
- assert(s == sizeof(status));
118
-
119
- virtqueue_push(vq, elem, sizeof(status));
120
- virtio_notify(vdev, vq);
121
- g_free(iov2);
122
- g_free(elem);
123
}
124
}
125
126
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
127
index XXXXXXX..XXXXXXX 100644
128
--- a/include/hw/virtio/virtio-net.h
129
+++ b/include/hw/virtio/virtio-net.h
130
@@ -XXX,XX +XXX,XX @@ struct VirtIONet {
131
struct EBPFRSSContext ebpf_rss;
132
};
133
134
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
135
+ const struct iovec *in_sg, unsigned in_num,
136
+ const struct iovec *out_sg,
137
+ unsigned out_num);
138
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
139
const char *type);
47
140
48
--
141
--
49
2.7.4
142
2.7.4
50
143
51
144
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
In the next patch we will allow busypolling of this value. The compiler
4
have a running path where shadow_used_idx, last_used_idx, and vring used
5
idx are not modified within the same thread busypolling.
6
7
This was not an issue before since we always cleared device event
8
notifier before checking it, and that could act as memory barrier.
9
However, the busypoll needs something similar to kernel READ_ONCE.
10
11
Let's add it here, sepparated from the polling.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
hw/virtio/vhost-shadow-virtqueue.c | 3 ++-
17
1 file changed, 2 insertions(+), 1 deletion(-)
18
19
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/hw/virtio/vhost-shadow-virtqueue.c
23
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n)
24
25
static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
26
{
27
+ uint16_t *used_idx = &svq->vring.used->idx;
28
if (svq->last_used_idx != svq->shadow_used_idx) {
29
return true;
30
}
31
32
- svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
33
+ svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx);
34
35
return svq->last_used_idx != svq->shadow_used_idx;
36
}
37
--
38
2.7.4
39
40
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
Future code needs to call it from vhost_svq_add.
4
5
No functional change intended.
6
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/virtio/vhost-shadow-virtqueue.c | 28 ++++++++++++++--------------
12
1 file changed, 14 insertions(+), 14 deletions(-)
13
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
19
return true;
20
}
21
22
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
23
+{
24
+ /*
25
+ * We need to expose the available array entries before checking the used
26
+ * flags
27
+ */
28
+ smp_mb();
29
+ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
30
+ return;
31
+ }
32
+
33
+ event_notifier_set(&svq->hdev_kick);
34
+}
35
+
36
/**
37
* Add an element to a SVQ.
38
*
39
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
40
return true;
41
}
42
43
-static void vhost_svq_kick(VhostShadowVirtqueue *svq)
44
-{
45
- /*
46
- * We need to expose the available array entries before checking the used
47
- * flags
48
- */
49
- smp_mb();
50
- if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
51
- return;
52
- }
53
-
54
- event_notifier_set(&svq->hdev_kick);
55
-}
56
-
57
/**
58
* Forward available buffers.
59
*
60
--
61
2.7.4
62
63
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
The series needs to expose vhost_svq_add with full functionality,
4
including kick
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 2 +-
11
1 file changed, 1 insertion(+), 1 deletion(-)
12
13
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/virtio/vhost-shadow-virtqueue.c
16
+++ b/hw/virtio/vhost-shadow-virtqueue.c
17
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
18
}
19
20
svq->ring_id_maps[qemu_head] = elem;
21
+ vhost_svq_kick(svq);
22
return true;
23
}
24
25
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
26
/* VQ is broken, just return and ignore any other kicks */
27
return;
28
}
29
- vhost_svq_kick(svq);
30
}
31
32
virtio_queue_set_notification(svq->vq, true);
33
--
34
2.7.4
35
36
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
The series need to expose vhost_svq_add with full functionality,
4
including checking for full queue.
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 59 +++++++++++++++++++++-----------------
11
1 file changed, 33 insertions(+), 26 deletions(-)
12
13
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/virtio/vhost-shadow-virtqueue.c
16
+++ b/hw/virtio/vhost-shadow-virtqueue.c
17
@@ -XXX,XX +XXX,XX @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
18
* Add an element to a SVQ.
19
*
20
* The caller must check that there is enough slots for the new element. It
21
- * takes ownership of the element: In case of failure, it is free and the SVQ
22
- * is considered broken.
23
+ * takes ownership of the element: In case of failure not ENOSPC, it is free.
24
+ *
25
+ * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
26
*/
27
-static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
28
+static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
29
{
30
unsigned qemu_head;
31
- bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
32
+ unsigned ndescs = elem->in_num + elem->out_num;
33
+ bool ok;
34
+
35
+ if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
36
+ return -ENOSPC;
37
+ }
38
+
39
+ ok = vhost_svq_add_split(svq, elem, &qemu_head);
40
if (unlikely(!ok)) {
41
g_free(elem);
42
- return false;
43
+ return -EINVAL;
44
}
45
46
svq->ring_id_maps[qemu_head] = elem;
47
vhost_svq_kick(svq);
48
- return true;
49
+ return 0;
50
}
51
52
/**
53
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
54
55
while (true) {
56
VirtQueueElement *elem;
57
- bool ok;
58
+ int r;
59
60
if (svq->next_guest_avail_elem) {
61
elem = g_steal_pointer(&svq->next_guest_avail_elem);
62
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
63
break;
64
}
65
66
- if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) {
67
- /*
68
- * This condition is possible since a contiguous buffer in GPA
69
- * does not imply a contiguous buffer in qemu's VA
70
- * scatter-gather segments. If that happens, the buffer exposed
71
- * to the device needs to be a chain of descriptors at this
72
- * moment.
73
- *
74
- * SVQ cannot hold more available buffers if we are here:
75
- * queue the current guest descriptor and ignore further kicks
76
- * until some elements are used.
77
- */
78
- svq->next_guest_avail_elem = elem;
79
- return;
80
- }
81
-
82
- ok = vhost_svq_add(svq, elem);
83
- if (unlikely(!ok)) {
84
- /* VQ is broken, just return and ignore any other kicks */
85
+ r = vhost_svq_add(svq, elem);
86
+ if (unlikely(r != 0)) {
87
+ if (r == -ENOSPC) {
88
+ /*
89
+ * This condition is possible since a contiguous buffer in
90
+ * GPA does not imply a contiguous buffer in qemu's VA
91
+ * scatter-gather segments. If that happens, the buffer
92
+ * exposed to the device needs to be a chain of descriptors
93
+ * at this moment.
94
+ *
95
+ * SVQ cannot hold more available buffers if we are here:
96
+ * queue the current guest descriptor and ignore kicks
97
+ * until some elements are used.
98
+ */
99
+ svq->next_guest_avail_elem = elem;
100
+ }
101
+
102
+ /* VQ is full or broken, just return and ignore kicks */
103
return;
104
}
105
}
106
--
107
2.7.4
108
109
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
VirtQueueElement comes from the guest, but we're heading SVQ to be able
4
to modify the element presented to the device without the guest's
5
knowledge.
6
7
To do so, make SVQ accept sg buffers directly, instead of using
8
VirtQueueElement.
9
10
Add vhost_svq_add_element to maintain element convenience.
11
12
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
Acked-by: Jason Wang <jasowang@redhat.com>
14
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
17
hw/virtio/vhost-shadow-virtqueue.c | 33 ++++++++++++++++++++++-----------
18
1 file changed, 22 insertions(+), 11 deletions(-)
19
20
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/virtio/vhost-shadow-virtqueue.c
23
+++ b/hw/virtio/vhost-shadow-virtqueue.c
24
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
25
}
26
27
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
28
- VirtQueueElement *elem, unsigned *head)
29
+ const struct iovec *out_sg, size_t out_num,
30
+ const struct iovec *in_sg, size_t in_num,
31
+ unsigned *head)
32
{
33
unsigned avail_idx;
34
vring_avail_t *avail = svq->vring.avail;
35
bool ok;
36
- g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
37
+ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
38
39
*head = svq->free_head;
40
41
/* We need some descriptors here */
42
- if (unlikely(!elem->out_num && !elem->in_num)) {
43
+ if (unlikely(!out_num && !in_num)) {
44
qemu_log_mask(LOG_GUEST_ERROR,
45
"Guest provided element with no descriptors");
46
return false;
47
}
48
49
- ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
50
- elem->in_num > 0, false);
51
+ ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
52
+ false);
53
if (unlikely(!ok)) {
54
return false;
55
}
56
57
- ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false,
58
- true);
59
+ ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
60
if (unlikely(!ok)) {
61
return false;
62
}
63
@@ -XXX,XX +XXX,XX @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
64
*
65
* Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
66
*/
67
-static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
68
+static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
69
+ size_t out_num, const struct iovec *in_sg,
70
+ size_t in_num, VirtQueueElement *elem)
71
{
72
unsigned qemu_head;
73
- unsigned ndescs = elem->in_num + elem->out_num;
74
+ unsigned ndescs = in_num + out_num;
75
bool ok;
76
77
if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
78
return -ENOSPC;
79
}
80
81
- ok = vhost_svq_add_split(svq, elem, &qemu_head);
82
+ ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
83
if (unlikely(!ok)) {
84
g_free(elem);
85
return -EINVAL;
86
@@ -XXX,XX +XXX,XX @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
87
return 0;
88
}
89
90
+/* Convenience wrapper to add a guest's element to SVQ */
91
+static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
92
+ VirtQueueElement *elem)
93
+{
94
+ return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
95
+ elem->in_num, elem);
96
+}
97
+
98
/**
99
* Forward available buffers.
100
*
101
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
102
break;
103
}
104
105
- r = vhost_svq_add(svq, elem);
106
+ r = vhost_svq_add_element(svq, elem);
107
if (unlikely(r != 0)) {
108
if (r == -ENOSPC) {
109
/*
110
--
111
2.7.4
112
113
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This will allow SVQ to add context to the different queue elements.
4
5
This patch only store the actual element, no functional change intended.
6
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++--------
12
hw/virtio/vhost-shadow-virtqueue.h | 8 ++++++--
13
2 files changed, 14 insertions(+), 10 deletions(-)
14
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
19
@@ -XXX,XX +XXX,XX @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
20
return -EINVAL;
21
}
22
23
- svq->ring_id_maps[qemu_head] = elem;
24
+ svq->desc_state[qemu_head].elem = elem;
25
vhost_svq_kick(svq);
26
return 0;
27
}
28
@@ -XXX,XX +XXX,XX @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
29
return NULL;
30
}
31
32
- if (unlikely(!svq->ring_id_maps[used_elem.id])) {
33
+ if (unlikely(!svq->desc_state[used_elem.id].elem)) {
34
qemu_log_mask(LOG_GUEST_ERROR,
35
"Device %s says index %u is used, but it was not available",
36
svq->vdev->name, used_elem.id);
37
return NULL;
38
}
39
40
- num = svq->ring_id_maps[used_elem.id]->in_num +
41
- svq->ring_id_maps[used_elem.id]->out_num;
42
+ num = svq->desc_state[used_elem.id].elem->in_num +
43
+ svq->desc_state[used_elem.id].elem->out_num;
44
last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
45
svq->desc_next[last_used_chain] = svq->free_head;
46
svq->free_head = used_elem.id;
47
48
*len = used_elem.len;
49
- return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
50
+ return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
51
}
52
53
static void vhost_svq_flush(VhostShadowVirtqueue *svq,
54
@@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
55
memset(svq->vring.desc, 0, driver_size);
56
svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
57
memset(svq->vring.used, 0, device_size);
58
- svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
59
+ svq->desc_state = g_new0(SVQDescState, svq->vring.num);
60
svq->desc_next = g_new0(uint16_t, svq->vring.num);
61
for (unsigned i = 0; i < svq->vring.num - 1; i++) {
62
svq->desc_next[i] = cpu_to_le16(i + 1);
63
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
64
65
for (unsigned i = 0; i < svq->vring.num; ++i) {
66
g_autofree VirtQueueElement *elem = NULL;
67
- elem = g_steal_pointer(&svq->ring_id_maps[i]);
68
+ elem = g_steal_pointer(&svq->desc_state[i].elem);
69
if (elem) {
70
virtqueue_detach_element(svq->vq, elem, 0);
71
}
72
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
73
}
74
svq->vq = NULL;
75
g_free(svq->desc_next);
76
- g_free(svq->ring_id_maps);
77
+ g_free(svq->desc_state);
78
qemu_vfree(svq->vring.desc);
79
qemu_vfree(svq->vring.used);
80
}
81
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/hw/virtio/vhost-shadow-virtqueue.h
84
+++ b/hw/virtio/vhost-shadow-virtqueue.h
85
@@ -XXX,XX +XXX,XX @@
86
#include "standard-headers/linux/vhost_types.h"
87
#include "hw/virtio/vhost-iova-tree.h"
88
89
+typedef struct SVQDescState {
90
+ VirtQueueElement *elem;
91
+} SVQDescState;
92
+
93
/* Shadow virtqueue to relay notifications */
94
typedef struct VhostShadowVirtqueue {
95
/* Shadow vring */
96
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
97
/* IOVA mapping */
98
VhostIOVATree *iova_tree;
99
100
- /* Map for use the guest's descriptors */
101
- VirtQueueElement **ring_id_maps;
102
+ /* SVQ vring descriptors state */
103
+ SVQDescState *desc_state;
104
105
/* Next VirtQueue element that guest made available */
106
VirtQueueElement *next_guest_avail_elem;
107
--
108
2.7.4
109
110
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
A guest's buffer continuos on GPA may need multiple descriptors on
4
qemu's VA, so SVQ should track its length sepparatedly.
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 4 ++--
11
hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++
12
2 files changed, 8 insertions(+), 2 deletions(-)
13
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
@@ -XXX,XX +XXX,XX @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
19
}
20
21
svq->desc_state[qemu_head].elem = elem;
22
+ svq->desc_state[qemu_head].ndescs = ndescs;
23
vhost_svq_kick(svq);
24
return 0;
25
}
26
@@ -XXX,XX +XXX,XX @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
27
return NULL;
28
}
29
30
- num = svq->desc_state[used_elem.id].elem->in_num +
31
- svq->desc_state[used_elem.id].elem->out_num;
32
+ num = svq->desc_state[used_elem.id].ndescs;
33
last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
34
svq->desc_next[last_used_chain] = svq->free_head;
35
svq->free_head = used_elem.id;
36
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
37
index XXXXXXX..XXXXXXX 100644
38
--- a/hw/virtio/vhost-shadow-virtqueue.h
39
+++ b/hw/virtio/vhost-shadow-virtqueue.h
40
@@ -XXX,XX +XXX,XX @@
41
42
typedef struct SVQDescState {
43
VirtQueueElement *elem;
44
+
45
+ /*
46
+ * Number of descriptors exposed to the device. May or may not match
47
+ * guest's
48
+ */
49
+ unsigned int ndescs;
50
} SVQDescState;
51
52
/* Shadow virtqueue to relay notifications */
53
--
54
2.7.4
55
56
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This function allows external SVQ users to return guest's available
4
buffers.
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++++++++++
11
hw/virtio/vhost-shadow-virtqueue.h | 3 +++
12
2 files changed, 19 insertions(+)
13
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
@@ -XXX,XX +XXX,XX @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
19
return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
20
}
21
22
+/**
23
+ * Push an element to SVQ, returning it to the guest.
24
+ */
25
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
26
+ const VirtQueueElement *elem, uint32_t len)
27
+{
28
+ virtqueue_push(svq->vq, elem, len);
29
+ if (svq->next_guest_avail_elem) {
30
+ /*
31
+ * Avail ring was full when vhost_svq_flush was called, so it's a
32
+ * good moment to make more descriptors available if possible.
33
+ */
34
+ vhost_handle_guest_kick(svq);
35
+ }
36
+}
37
+
38
static void vhost_svq_flush(VhostShadowVirtqueue *svq,
39
bool check_for_avail_queue)
40
{
41
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/virtio/vhost-shadow-virtqueue.h
44
+++ b/hw/virtio/vhost-shadow-virtqueue.h
45
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
46
47
bool vhost_svq_valid_features(uint64_t features, Error **errp);
48
49
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
50
+ const VirtQueueElement *elem, uint32_t len);
51
+
52
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
53
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
54
void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
55
--
56
2.7.4
57
58
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This allows external parts of SVQ to forward custom buffers to the
4
device.
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 6 +++---
11
hw/virtio/vhost-shadow-virtqueue.h | 3 +++
12
2 files changed, 6 insertions(+), 3 deletions(-)
13
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
@@ -XXX,XX +XXX,XX @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
19
*
20
* Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
21
*/
22
-static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
23
- size_t out_num, const struct iovec *in_sg,
24
- size_t in_num, VirtQueueElement *elem)
25
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
26
+ size_t out_num, const struct iovec *in_sg, size_t in_num,
27
+ VirtQueueElement *elem)
28
{
29
unsigned qemu_head;
30
unsigned ndescs = in_num + out_num;
31
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/hw/virtio/vhost-shadow-virtqueue.h
34
+++ b/hw/virtio/vhost-shadow-virtqueue.h
35
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp);
36
37
void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
38
const VirtQueueElement *elem, uint32_t len);
39
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
40
+ size_t out_num, const struct iovec *in_sg, size_t in_num,
41
+ VirtQueueElement *elem);
42
43
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
44
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
45
--
46
2.7.4
47
48
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
It allows the Shadow Control VirtQueue to wait for the device to use the
4
available buffers.
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 27 +++++++++++++++++++++++++++
11
hw/virtio/vhost-shadow-virtqueue.h | 1 +
12
2 files changed, 28 insertions(+)
13
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
@@ -XXX,XX +XXX,XX @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
19
}
20
21
/**
22
+ * Poll the SVQ for one device used buffer.
23
+ *
24
+ * This function race with main event loop SVQ polling, so extra
25
+ * synchronization is needed.
26
+ *
27
+ * Return the length written by the device.
28
+ */
29
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
30
+{
31
+ int64_t start_us = g_get_monotonic_time();
32
+ do {
33
+ uint32_t len;
34
+ VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
35
+ if (elem) {
36
+ return len;
37
+ }
38
+
39
+ if (unlikely(g_get_monotonic_time() - start_us > 10e6)) {
40
+ return 0;
41
+ }
42
+
43
+ /* Make sure we read new used_idx */
44
+ smp_rmb();
45
+ } while (true);
46
+}
47
+
48
+/**
49
* Forward used buffers.
50
*
51
* @n: hdev call event notifier, the one that device set to notify svq.
52
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/hw/virtio/vhost-shadow-virtqueue.h
55
+++ b/hw/virtio/vhost-shadow-virtqueue.h
56
@@ -XXX,XX +XXX,XX @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
57
int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
58
size_t out_num, const struct iovec *in_sg, size_t in_num,
59
VirtQueueElement *elem);
60
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
61
62
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
63
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
64
--
65
2.7.4
66
67
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This allows external handlers to be aware of new buffers that the guest
4
places in the virtqueue.
5
6
When this callback is defined the ownership of the guest's virtqueue
7
element is transferred to the callback. This means that if the user
8
wants to forward the descriptor it needs to manually inject it. The
9
callback is also free to process the command by itself and use the
10
element with svq_push.
11
12
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
hw/virtio/vhost-shadow-virtqueue.c | 14 ++++++++++++--
17
hw/virtio/vhost-shadow-virtqueue.h | 31 ++++++++++++++++++++++++++++++-
18
hw/virtio/vhost-vdpa.c | 3 ++-
19
3 files changed, 44 insertions(+), 4 deletions(-)
20
21
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/virtio/vhost-shadow-virtqueue.c
24
+++ b/hw/virtio/vhost-shadow-virtqueue.c
25
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
26
break;
27
}
28
29
- r = vhost_svq_add_element(svq, elem);
30
+ if (svq->ops) {
31
+ r = svq->ops->avail_handler(svq, elem, svq->ops_opaque);
32
+ } else {
33
+ r = vhost_svq_add_element(svq, elem);
34
+ }
35
if (unlikely(r != 0)) {
36
if (r == -ENOSPC) {
37
/*
38
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
39
* shadow methods and file descriptors.
40
*
41
* @iova_tree: Tree to perform descriptors translations
42
+ * @ops: SVQ owner callbacks
43
+ * @ops_opaque: ops opaque pointer
44
*
45
* Returns the new virtqueue or NULL.
46
*
47
* In case of error, reason is reported through error_report.
48
*/
49
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
50
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
51
+ const VhostShadowVirtqueueOps *ops,
52
+ void *ops_opaque)
53
{
54
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
55
int r;
56
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
57
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
58
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
59
svq->iova_tree = iova_tree;
60
+ svq->ops = ops;
61
+ svq->ops_opaque = ops_opaque;
62
return g_steal_pointer(&svq);
63
64
err_init_hdev_call:
65
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
66
index XXXXXXX..XXXXXXX 100644
67
--- a/hw/virtio/vhost-shadow-virtqueue.h
68
+++ b/hw/virtio/vhost-shadow-virtqueue.h
69
@@ -XXX,XX +XXX,XX @@ typedef struct SVQDescState {
70
unsigned int ndescs;
71
} SVQDescState;
72
73
+typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
74
+
75
+/**
76
+ * Callback to handle an avail buffer.
77
+ *
78
+ * @svq: Shadow virtqueue
79
+ * @elem: Element placed in the queue by the guest
80
+ * @vq_callback_opaque: Opaque
81
+ *
82
+ * Returns 0 if the vq is running as expected.
83
+ *
84
+ * Note that ownership of elem is transferred to the callback.
85
+ */
86
+typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq,
87
+ VirtQueueElement *elem,
88
+ void *vq_callback_opaque);
89
+
90
+typedef struct VhostShadowVirtqueueOps {
91
+ VirtQueueAvailCallback avail_handler;
92
+} VhostShadowVirtqueueOps;
93
+
94
/* Shadow virtqueue to relay notifications */
95
typedef struct VhostShadowVirtqueue {
96
/* Shadow vring */
97
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
98
*/
99
uint16_t *desc_next;
100
101
+ /* Caller callbacks */
102
+ const VhostShadowVirtqueueOps *ops;
103
+
104
+ /* Caller callbacks opaque */
105
+ void *ops_opaque;
106
+
107
/* Next head to expose to the device */
108
uint16_t shadow_avail_idx;
109
110
@@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
111
VirtQueue *vq);
112
void vhost_svq_stop(VhostShadowVirtqueue *svq);
113
114
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
115
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
116
+ const VhostShadowVirtqueueOps *ops,
117
+ void *ops_opaque);
118
119
void vhost_svq_free(gpointer vq);
120
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
121
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/hw/virtio/vhost-vdpa.c
124
+++ b/hw/virtio/vhost-vdpa.c
125
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
126
127
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
128
for (unsigned n = 0; n < hdev->nvqs; ++n) {
129
- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
130
+ g_autoptr(VhostShadowVirtqueue) svq;
131
132
+ svq = vhost_svq_new(v->iova_tree, NULL, NULL);
133
if (unlikely(!svq)) {
134
error_setg(errp, "Cannot create svq %u", n);
135
return -1;
136
--
137
2.7.4
138
139
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from
4
the guest that could set a different state in qemu device model and vdpa
5
device.
6
7
To do so, it needs to be able to map these new buffers to the device.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Jason Wang <jasowang@redhat.com>
11
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/virtio/vhost-vdpa.c | 7 +++----
15
include/hw/virtio/vhost-vdpa.h | 4 ++++
16
2 files changed, 7 insertions(+), 4 deletions(-)
17
18
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/virtio/vhost-vdpa.c
21
+++ b/hw/virtio/vhost-vdpa.c
22
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
23
return false;
24
}
25
26
-static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
27
- void *vaddr, bool readonly)
28
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
29
+ void *vaddr, bool readonly)
30
{
31
struct vhost_msg_v2 msg = {};
32
int fd = v->device_fd;
33
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
34
return ret;
35
}
36
37
-static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
38
- hwaddr size)
39
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
40
{
41
struct vhost_msg_v2 msg = {};
42
int fd = v->device_fd;
43
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/include/hw/virtio/vhost-vdpa.h
46
+++ b/include/hw/virtio/vhost-vdpa.h
47
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
48
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
49
} VhostVDPA;
50
51
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
52
+ void *vaddr, bool readonly);
53
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size);
54
+
55
#endif
56
--
57
2.7.4
58
59
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
net/vhost-vdpa.c will need functions that are declared in
4
vhost-shadow-virtqueue.c, that needs functions of virtio-net.c.
5
6
Copy the vhost-vdpa-stub.c code so
7
only the constructor net_init_vhost_vdpa needs to be defined.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
net/meson.build | 3 ++-
13
net/vhost-vdpa-stub.c | 21 +++++++++++++++++++++
14
2 files changed, 23 insertions(+), 1 deletion(-)
15
create mode 100644 net/vhost-vdpa-stub.c
16
17
diff --git a/net/meson.build b/net/meson.build
18
index XXXXXXX..XXXXXXX 100644
19
--- a/net/meson.build
20
+++ b/net/meson.build
21
@@ -XXX,XX +XXX,XX @@ endif
22
softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix))
23
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c'))
24
if have_vhost_net_vdpa
25
- softmmu_ss.add(files('vhost-vdpa.c'))
26
+ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-vdpa.c'), if_false: files('vhost-vdpa-stub.c'))
27
+ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-vdpa-stub.c'))
28
endif
29
30
vmnet_files = files(
31
diff --git a/net/vhost-vdpa-stub.c b/net/vhost-vdpa-stub.c
32
new file mode 100644
33
index XXXXXXX..XXXXXXX
34
--- /dev/null
35
+++ b/net/vhost-vdpa-stub.c
36
@@ -XXX,XX +XXX,XX @@
37
+/*
38
+ * vhost-vdpa-stub.c
39
+ *
40
+ * Copyright (c) 2022 Red Hat, Inc.
41
+ *
42
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
43
+ * See the COPYING file in the top-level directory.
44
+ *
45
+ */
46
+
47
+#include "qemu/osdep.h"
48
+#include "clients.h"
49
+#include "net/vhost-vdpa.h"
50
+#include "qapi/error.h"
51
+
52
+int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
53
+ NetClientState *peer, Error **errp)
54
+{
55
+ error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
56
+ return -1;
57
+}
58
--
59
2.7.4
60
61
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The vlan concept is marked as deprecated, so we should not use
3
Do a simple forwarding of CVQ buffers, the same work SVQ could do but
4
this for examples in the documentation anymore.
4
through callbacks. No functional change intended.
5
5
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
---
9
qemu-options.hx | 4 ++--
10
hw/virtio/vhost-vdpa.c | 3 ++-
10
1 file changed, 2 insertions(+), 2 deletions(-)
11
include/hw/virtio/vhost-vdpa.h | 3 +++
12
net/vhost-vdpa.c | 58 ++++++++++++++++++++++++++++++++++++++++++
13
3 files changed, 63 insertions(+), 1 deletion(-)
11
14
12
diff --git a/qemu-options.hx b/qemu-options.hx
15
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/qemu-options.hx
17
--- a/hw/virtio/vhost-vdpa.c
15
+++ b/qemu-options.hx
18
+++ b/hw/virtio/vhost-vdpa.c
16
@@ -XXX,XX +XXX,XX @@ qemu-system-i386 linux.img -net nic -net tap
19
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
17
#launch a QEMU instance with two NICs, each one connected
20
for (unsigned n = 0; n < hdev->nvqs; ++n) {
18
#to a TAP device
21
g_autoptr(VhostShadowVirtqueue) svq;
19
qemu-system-i386 linux.img \
22
20
- -net nic,vlan=0 -net tap,vlan=0,ifname=tap0 \
23
- svq = vhost_svq_new(v->iova_tree, NULL, NULL);
21
- -net nic,vlan=1 -net tap,vlan=1,ifname=tap1
24
+ svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
22
+ -netdev tap,id=nd0,ifname=tap0 -device e1000,netdev=nd0 \
25
+ v->shadow_vq_ops_opaque);
23
+ -netdev tap,id=nd1,ifname=tap1 -device rtl8139,netdev=nd1
26
if (unlikely(!svq)) {
24
@end example
27
error_setg(errp, "Cannot create svq %u", n);
25
28
return -1;
26
@example
29
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/hw/virtio/vhost-vdpa.h
32
+++ b/include/hw/virtio/vhost-vdpa.h
33
@@ -XXX,XX +XXX,XX @@
34
#include <gmodule.h>
35
36
#include "hw/virtio/vhost-iova-tree.h"
37
+#include "hw/virtio/vhost-shadow-virtqueue.h"
38
#include "hw/virtio/virtio.h"
39
#include "standard-headers/linux/vhost_types.h"
40
41
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
42
/* IOVA mapping used by the Shadow Virtqueue */
43
VhostIOVATree *iova_tree;
44
GPtrArray *shadow_vqs;
45
+ const VhostShadowVirtqueueOps *shadow_vq_ops;
46
+ void *shadow_vq_ops_opaque;
47
struct vhost_dev *dev;
48
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
49
} VhostVDPA;
50
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/net/vhost-vdpa.c
53
+++ b/net/vhost-vdpa.c
54
@@ -XXX,XX +XXX,XX @@
55
56
#include "qemu/osdep.h"
57
#include "clients.h"
58
+#include "hw/virtio/virtio-net.h"
59
#include "net/vhost_net.h"
60
#include "net/vhost-vdpa.h"
61
#include "hw/virtio/vhost-vdpa.h"
62
#include "qemu/config-file.h"
63
#include "qemu/error-report.h"
64
+#include "qemu/log.h"
65
+#include "qemu/memalign.h"
66
#include "qemu/option.h"
67
#include "qapi/error.h"
68
#include <linux/vhost.h>
69
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_vhost_vdpa_info = {
70
.check_peer_type = vhost_vdpa_check_peer_type,
71
};
72
73
+/**
74
+ * Forward buffer for the moment.
75
+ */
76
+static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
77
+ VirtQueueElement *elem,
78
+ void *opaque)
79
+{
80
+ unsigned int n = elem->out_num + elem->in_num;
81
+ g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
82
+ size_t in_len, dev_written;
83
+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
84
+ int r;
85
+
86
+ memcpy(dev_buffers, elem->out_sg, elem->out_num);
87
+ memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
88
+
89
+ r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
90
+ elem->in_num, elem);
91
+ if (unlikely(r != 0)) {
92
+ if (unlikely(r == -ENOSPC)) {
93
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
94
+ __func__);
95
+ }
96
+ goto out;
97
+ }
98
+
99
+ /*
100
+ * We can poll here since we've had BQL from the time we sent the
101
+ * descriptor. Also, we need to take the answer before SVQ pulls by itself,
102
+ * when BQL is released
103
+ */
104
+ dev_written = vhost_svq_poll(svq);
105
+ if (unlikely(dev_written < sizeof(status))) {
106
+ error_report("Insufficient written data (%zu)", dev_written);
107
+ }
108
+
109
+out:
110
+ in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
111
+ sizeof(status));
112
+ if (unlikely(in_len < sizeof(status))) {
113
+ error_report("Bad device CVQ written length");
114
+ }
115
+ vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
116
+ g_free(elem);
117
+ return r;
118
+}
119
+
120
+static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
121
+ .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
122
+};
123
+
124
static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
125
const char *device,
126
const char *name,
127
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
128
129
s->vhost_vdpa.device_fd = vdpa_device_fd;
130
s->vhost_vdpa.index = queue_pair_index;
131
+ if (!is_datapath) {
132
+ s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
133
+ s->vhost_vdpa.shadow_vq_ops_opaque = s;
134
+ }
135
ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
136
if (ret) {
137
qemu_del_net_client(nc);
27
--
138
--
28
2.7.4
139
2.7.4
29
140
30
141
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
It does not make much sense to limit these commands to the legacy 'vlan'
3
Introduce the control virtqueue support for vDPA shadow virtqueue. This
4
concept only, they should work with the modern netdevs, too. So now
4
is needed for advanced networking features like rx filtering.
5
it is possible to use this command with one, two or three parameters.
5
6
6
Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
7
With one parameter, the command installs a hostfwd rule on the default
7
TOCTOU with the guest's or device's memory every time there is a device
8
"user" network:
8
model change. Otherwise, the guest could change the memory content in
9
hostfwd_add tcp:...
9
the time between qemu and the device read it.
10
10
11
With two parameters, the command installs a hostfwd rule on a netdev
11
To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
12
(that's the new way of using this command):
12
implemented. If the virtio-net driver changes MAC the virtio-net device
13
hostfwd_add netdev_id tcp:...
13
model will be updated with the new one, and a rx filtering change event
14
14
will be raised.
15
With three parameters, the command installs a rule on a 'vlan' (aka hub):
15
16
hostfwd_add hub_id name tcp:...
16
More cvq commands could be added here straightforwardly but they have
17
17
not been tested.
18
Same applies to the hostfwd_remove command now.
18
19
19
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
20
Signed-off-by: Thomas Huth <thuth@redhat.com>
20
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
21
Signed-off-by: Jason Wang <jasowang@redhat.com>
21
Signed-off-by: Jason Wang <jasowang@redhat.com>
22
---
22
---
23
hmp-commands.hx | 4 ++--
23
net/vhost-vdpa.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
24
net/slirp.c | 33 +++++++++++++++++++++++----------
24
1 file changed, 205 insertions(+), 8 deletions(-)
25
2 files changed, 25 insertions(+), 12 deletions(-)
25
26
26
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
27
diff --git a/hmp-commands.hx b/hmp-commands.hx
28
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
29
--- a/hmp-commands.hx
28
--- a/net/vhost-vdpa.c
30
+++ b/hmp-commands.hx
29
+++ b/net/vhost-vdpa.c
31
@@ -XXX,XX +XXX,XX @@ ETEXI
30
@@ -XXX,XX +XXX,XX @@ typedef struct VhostVDPAState {
32
{
31
NetClientState nc;
33
.name = "hostfwd_add",
32
struct vhost_vdpa vhost_vdpa;
34
.args_type = "arg1:s,arg2:s?,arg3:s?",
33
VHostNetState *vhost_net;
35
- .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
34
+
36
+ .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
35
+ /* Control commands shadow buffers */
37
.help = "redirect TCP or UDP connections from host to guest (requires -net user)",
36
+ void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
38
.cmd = hmp_hostfwd_add,
37
bool started;
39
},
38
} VhostVDPAState;
40
@@ -XXX,XX +XXX,XX @@ ETEXI
39
41
{
40
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_cleanup(NetClientState *nc)
42
.name = "hostfwd_remove",
41
{
43
.args_type = "arg1:s,arg2:s?,arg3:s?",
42
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
44
- .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport",
43
45
+ .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport",
44
+ qemu_vfree(s->cvq_cmd_out_buffer);
46
.help = "remove host-to-guest TCP or UDP redirection",
45
+ qemu_vfree(s->cvq_cmd_in_buffer);
47
.cmd = hmp_hostfwd_remove,
46
if (s->vhost_net) {
48
},
47
vhost_net_cleanup(s->vhost_net);
49
diff --git a/net/slirp.c b/net/slirp.c
48
g_free(s->vhost_net);
50
index XXXXXXX..XXXXXXX 100644
49
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_vhost_vdpa_info = {
51
--- a/net/slirp.c
50
.check_peer_type = vhost_vdpa_check_peer_type,
52
+++ b/net/slirp.c
51
};
53
@@ -XXX,XX +XXX,XX @@ error:
52
54
return -1;
53
+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
54
+{
55
+ VhostIOVATree *tree = v->iova_tree;
56
+ DMAMap needle = {
57
+ /*
58
+ * No need to specify size or to look for more translations since
59
+ * this contiguous chunk was allocated by us.
60
+ */
61
+ .translated_addr = (hwaddr)(uintptr_t)addr,
62
+ };
63
+ const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
64
+ int r;
65
+
66
+ if (unlikely(!map)) {
67
+ error_report("Cannot locate expected map");
68
+ return;
69
+ }
70
+
71
+ r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
72
+ if (unlikely(r != 0)) {
73
+ error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
74
+ }
75
+
76
+ vhost_iova_tree_remove(tree, map);
77
+}
78
+
79
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
80
+{
81
+ /*
82
+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
83
+ * In buffer is always 1 byte, so it should fit here
84
+ */
85
+ return sizeof(struct virtio_net_ctrl_hdr) +
86
+ 2 * sizeof(struct virtio_net_ctrl_mac) +
87
+ MAC_TABLE_ENTRIES * ETH_ALEN;
88
+}
89
+
90
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
91
+{
92
+ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
93
+}
94
+
95
+/** Copy and map a guest buffer. */
96
+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
97
+ const struct iovec *out_data,
98
+ size_t out_num, size_t data_len, void *buf,
99
+ size_t *written, bool write)
100
+{
101
+ DMAMap map = {};
102
+ int r;
103
+
104
+ if (unlikely(!data_len)) {
105
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
106
+ __func__, write ? "in" : "out");
107
+ return false;
108
+ }
109
+
110
+ *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
111
+ map.translated_addr = (hwaddr)(uintptr_t)buf;
112
+ map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
113
+ map.perm = write ? IOMMU_RW : IOMMU_RO,
114
+ r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
115
+ if (unlikely(r != IOVA_OK)) {
116
+ error_report("Cannot map injected element");
117
+ return false;
118
+ }
119
+
120
+ r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
121
+ !write);
122
+ if (unlikely(r < 0)) {
123
+ goto dma_map_err;
124
+ }
125
+
126
+ return true;
127
+
128
+dma_map_err:
129
+ vhost_iova_tree_remove(v->iova_tree, &map);
130
+ return false;
131
+}
132
+
133
/**
134
- * Forward buffer for the moment.
135
+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC
136
+ *
137
+ * @iov: [0] is the out buffer, [1] is the in one
138
+ */
139
+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
140
+ VirtQueueElement *elem,
141
+ struct iovec *iov)
142
+{
143
+ size_t in_copied;
144
+ bool ok;
145
+
146
+ iov[0].iov_base = s->cvq_cmd_out_buffer;
147
+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
148
+ vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
149
+ &iov[0].iov_len, false);
150
+ if (unlikely(!ok)) {
151
+ return false;
152
+ }
153
+
154
+ iov[1].iov_base = s->cvq_cmd_in_buffer;
155
+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
156
+ sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
157
+ &in_copied, true);
158
+ if (unlikely(!ok)) {
159
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
160
+ return false;
161
+ }
162
+
163
+ iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
164
+ return true;
165
+}
166
+
167
+/**
168
+ * Do not forward commands not supported by SVQ. Otherwise, the device could
169
+ * accept it and qemu would not know how to update the device model.
170
+ */
171
+static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
172
+ size_t out_num)
173
+{
174
+ struct virtio_net_ctrl_hdr ctrl;
175
+ size_t n;
176
+
177
+ n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
178
+ if (unlikely(n < sizeof(ctrl))) {
179
+ qemu_log_mask(LOG_GUEST_ERROR,
180
+ "%s: invalid legnth of out buffer %zu\n", __func__, n);
181
+ return false;
182
+ }
183
+
184
+ switch (ctrl.class) {
185
+ case VIRTIO_NET_CTRL_MAC:
186
+ switch (ctrl.cmd) {
187
+ case VIRTIO_NET_CTRL_MAC_ADDR_SET:
188
+ return true;
189
+ default:
190
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
191
+ __func__, ctrl.cmd);
192
+ };
193
+ break;
194
+ default:
195
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
196
+ __func__, ctrl.class);
197
+ };
198
+
199
+ return false;
200
+}
201
+
202
+/**
203
+ * Validate and copy control virtqueue commands.
204
+ *
205
+ * Following QEMU guidelines, we offer a copy of the buffers to the device to
206
+ * prevent TOCTOU bugs.
207
*/
208
static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
209
VirtQueueElement *elem,
210
void *opaque)
211
{
212
- unsigned int n = elem->out_num + elem->in_num;
213
- g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
214
+ VhostVDPAState *s = opaque;
215
size_t in_len, dev_written;
216
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
217
- int r;
218
+ /* out and in buffers sent to the device */
219
+ struct iovec dev_buffers[2] = {
220
+ { .iov_base = s->cvq_cmd_out_buffer },
221
+ { .iov_base = s->cvq_cmd_in_buffer },
222
+ };
223
+ /* in buffer used for device model */
224
+ const struct iovec in = {
225
+ .iov_base = &status,
226
+ .iov_len = sizeof(status),
227
+ };
228
+ int r = -EINVAL;
229
+ bool ok;
230
+
231
+ ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
232
+ if (unlikely(!ok)) {
233
+ goto out;
234
+ }
235
236
- memcpy(dev_buffers, elem->out_sg, elem->out_num);
237
- memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
238
+ ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
239
+ if (unlikely(!ok)) {
240
+ goto out;
241
+ }
242
243
- r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
244
- elem->in_num, elem);
245
+ r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
246
if (unlikely(r != 0)) {
247
if (unlikely(r == -ENOSPC)) {
248
qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
249
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
250
dev_written = vhost_svq_poll(svq);
251
if (unlikely(dev_written < sizeof(status))) {
252
error_report("Insufficient written data (%zu)", dev_written);
253
+ goto out;
254
+ }
255
+
256
+ memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
257
+ if (status != VIRTIO_NET_OK) {
258
+ goto out;
259
+ }
260
+
261
+ status = VIRTIO_NET_ERR;
262
+ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
263
+ if (status != VIRTIO_NET_OK) {
264
+ error_report("Bad CVQ processing in model");
265
}
266
267
out:
268
@@ -XXX,XX +XXX,XX @@ out:
269
}
270
vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
271
g_free(elem);
272
+ if (dev_buffers[0].iov_base) {
273
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
274
+ }
275
+ if (dev_buffers[1].iov_base) {
276
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
277
+ }
278
return r;
55
}
279
}
56
280
57
-static SlirpState *slirp_lookup(Monitor *mon, const char *vlan,
281
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
58
- const char *stack)
282
s->vhost_vdpa.device_fd = vdpa_device_fd;
59
+static SlirpState *slirp_lookup(Monitor *mon, const char *hub_id,
283
s->vhost_vdpa.index = queue_pair_index;
60
+ const char *name)
284
if (!is_datapath) {
61
{
285
+ s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
62
-
286
+ vhost_vdpa_net_cvq_cmd_page_len());
63
- if (vlan) {
287
+ memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
64
+ if (name) {
288
+ s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(),
65
NetClientState *nc;
289
+ vhost_vdpa_net_cvq_cmd_page_len());
66
- nc = net_hub_find_client_by_name(strtol(vlan, NULL, 0), stack);
290
+ memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
67
- if (!nc) {
291
+
68
- monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n");
292
s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
69
- return NULL;
293
s->vhost_vdpa.shadow_vq_ops_opaque = s;
70
+ if (hub_id) {
294
}
71
+ nc = net_hub_find_client_by_name(strtol(hub_id, NULL, 0), name);
72
+ if (!nc) {
73
+ monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n");
74
+ return NULL;
75
+ }
76
+ } else {
77
+ nc = qemu_find_netdev(name);
78
+ if (!nc) {
79
+ monitor_printf(mon, "unrecognized netdev id '%s'\n", name);
80
+ return NULL;
81
+ }
82
}
83
if (strcmp(nc->model, "user")) {
84
monitor_printf(mon, "invalid device specified\n");
85
@@ -XXX,XX +XXX,XX @@ void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict)
86
const char *arg2 = qdict_get_try_str(qdict, "arg2");
87
const char *arg3 = qdict_get_try_str(qdict, "arg3");
88
89
- if (arg2) {
90
+ if (arg3) {
91
s = slirp_lookup(mon, arg1, arg2);
92
src_str = arg3;
93
+ } else if (arg2) {
94
+ s = slirp_lookup(mon, NULL, arg1);
95
+ src_str = arg2;
96
} else {
97
s = slirp_lookup(mon, NULL, NULL);
98
src_str = arg1;
99
@@ -XXX,XX +XXX,XX @@ void hmp_hostfwd_add(Monitor *mon, const QDict *qdict)
100
const char *arg2 = qdict_get_try_str(qdict, "arg2");
101
const char *arg3 = qdict_get_try_str(qdict, "arg3");
102
103
- if (arg2) {
104
+ if (arg3) {
105
s = slirp_lookup(mon, arg1, arg2);
106
redir_str = arg3;
107
+ } else if (arg2) {
108
+ s = slirp_lookup(mon, NULL, arg1);
109
+ redir_str = arg2;
110
} else {
111
s = slirp_lookup(mon, NULL, NULL);
112
redir_str = arg1;
113
--
295
--
114
2.7.4
296
2.7.4
115
297
116
298
diff view generated by jsdifflib
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Packet size some time different or when network is busy.
3
To know the device features is needed for CVQ SVQ, so SVQ knows if it
4
Based on same payload size, but TCP protocol can not
4
can handle all commands or not. Extract from
5
guarantee send the same one packet in the same way,
5
vhost_vdpa_get_max_queue_pairs so we can reuse it.
6
6
7
like that:
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
We send this payload:
8
Acked-by: Jason Wang <jasowang@redhat.com>
9
------------------------------
9
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
10
| header |1|2|3|4|5|6|7|8|9|0|
11
------------------------------
12
13
primary:
14
ppkt1:
15
----------------
16
| header |1|2|3|
17
----------------
18
ppkt2:
19
------------------------
20
| header |4|5|6|7|8|9|0|
21
------------------------
22
23
secondary:
24
spkt1:
25
------------------------------
26
| header |1|2|3|4|5|6|7|8|9|0|
27
------------------------------
28
29
In the original method, ppkt1 and ppkt2 are different in size and
30
spkt1, so they can't compare and trigger the checkpoint.
31
32
I have tested FTP get 200M and 1G file many times, I found that
33
the performance was less than 1% of the native.
34
35
Now I reconstructed the comparison of TCP packets based on the
36
TCP sequence number. first of all, ppkt1 and spkt1 have the same
37
starting sequence number, so they can compare, even though their
38
length is different. And then ppkt1 with a smaller payload length
39
is used as the comparison length, if the payload is same, send
40
out the ppkt1 and record the offset(the length of ppkt1 payload)
41
in spkt1. The next comparison, ppkt2 and spkt1 can be compared
42
from the recorded position of spkt1.
43
44
like that:
45
----------------
46
| header |1|2|3| ppkt1
47
---------|-----|
48
| |
49
---------v-----v--------------
50
| header |1|2|3|4|5|6|7|8|9|0| spkt1
51
---------------|\------------|
52
| \offset |
53
---------v-------------v
54
| header |4|5|6|7|8|9|0| ppkt2
55
------------------------
56
57
In this way, the performance can reach native 20% in my multiple
58
tests.
59
60
Cc: Zhang Chen <zhangckid@gmail.com>
61
Cc: Li Zhijian <lizhijian@cn.fujitsu.com>
62
Cc: Jason Wang <jasowang@redhat.com>
63
64
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
65
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
66
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
67
Reviewed-by: Zhang Chen <zhangckid@gmail.com>
68
Tested-by: Zhang Chen <zhangckid@gmail.com>
69
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
70
---
11
---
71
net/colo-compare.c | 343 +++++++++++++++++++++++++++++++++++------------------
12
net/vhost-vdpa.c | 30 ++++++++++++++++++++----------
72
net/colo.c | 9 ++
13
1 file changed, 20 insertions(+), 10 deletions(-)
73
net/colo.h | 15 +++
74
net/trace-events | 2 +-
75
4 files changed, 250 insertions(+), 119 deletions(-)
76
14
77
diff --git a/net/colo-compare.c b/net/colo-compare.c
15
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
78
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
79
--- a/net/colo-compare.c
17
--- a/net/vhost-vdpa.c
80
+++ b/net/colo-compare.c
18
+++ b/net/vhost-vdpa.c
81
@@ -XXX,XX +XXX,XX @@
19
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
82
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
20
return nc;
83
#define MAX_QUEUE_SIZE 1024
84
85
+#define COLO_COMPARE_FREE_PRIMARY 0x01
86
+#define COLO_COMPARE_FREE_SECONDARY 0x02
87
+
88
/* TODO: Should be configurable */
89
#define REGULAR_PACKET_CHECK_MS 3000
90
91
@@ -XXX,XX +XXX,XX @@ static gint seq_sorter(Packet *a, Packet *b, gpointer data)
92
return ntohl(atcp->th_seq) - ntohl(btcp->th_seq);
93
}
21
}
94
22
95
+static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
23
-static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
24
+static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
96
+{
25
+{
97
+ Packet *pkt = data;
26
+ int ret = ioctl(fd, VHOST_GET_FEATURES, features);
98
+ struct tcphdr *tcphd;
27
+ if (unlikely(ret < 0)) {
99
+
28
+ error_setg_errno(errp, errno,
100
+ tcphd = (struct tcphdr *)pkt->transport_header;
29
+ "Fail to query features from vhost-vDPA device");
101
+
30
+ }
102
+ pkt->tcp_seq = ntohl(tcphd->th_seq);
31
+ return ret;
103
+ pkt->tcp_ack = ntohl(tcphd->th_ack);
104
+ *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack;
105
+ pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data
106
+ + (tcphd->th_off << 2) - pkt->vnet_hdr_len;
107
+ pkt->payload_size = pkt->size - pkt->header_size;
108
+ pkt->seq_end = pkt->tcp_seq + pkt->payload_size;
109
+ pkt->flags = tcphd->th_flags;
110
+}
32
+}
111
+
33
+
112
/*
34
+static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
113
* Return 1 on success, if return 0 means the
35
+ int *has_cvq, Error **errp)
114
* packet will be dropped
115
*/
116
-static int colo_insert_packet(GQueue *queue, Packet *pkt)
117
+static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)
118
{
36
{
119
if (g_queue_get_length(queue) <= MAX_QUEUE_SIZE) {
37
unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
120
if (pkt->ip->ip_p == IPPROTO_TCP) {
38
g_autofree struct vhost_vdpa_config *config = NULL;
121
+ fill_pkt_tcp_info(pkt, max_ack);
39
__virtio16 *max_queue_pairs;
122
g_queue_insert_sorted(queue,
40
- uint64_t features;
123
pkt,
41
int ret;
124
(GCompareDataFunc)seq_sorter,
42
125
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
43
- ret = ioctl(fd, VHOST_GET_FEATURES, &features);
44
- if (ret) {
45
- error_setg(errp, "Fail to query features from vhost-vDPA device");
46
- return ret;
47
- }
48
-
49
if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
50
*has_cvq = 1;
51
} else {
52
@@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
53
NetClientState *peer, Error **errp)
54
{
55
const NetdevVhostVDPAOptions *opts;
56
+ uint64_t features;
57
int vdpa_device_fd;
58
g_autofree NetClientState **ncs = NULL;
59
NetClientState *nc;
60
- int queue_pairs, i, has_cvq = 0;
61
+ int queue_pairs, r, i, has_cvq = 0;
62
63
assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
64
opts = &netdev->u.vhost_vdpa;
65
@@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
66
return -errno;
126
}
67
}
127
68
128
if (mode == PRIMARY_IN) {
69
- queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
129
- if (!colo_insert_packet(&conn->primary_list, pkt)) {
70
+ r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
130
+ if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) {
71
+ if (unlikely(r < 0)) {
131
error_report("colo compare primary queue size too big,"
72
+ return r;
132
"drop packet");
133
}
134
} else {
135
- if (!colo_insert_packet(&conn->secondary_list, pkt)) {
136
+ if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) {
137
error_report("colo compare secondary queue size too big,"
138
"drop packet");
139
}
140
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
141
return 0;
142
}
143
144
+static inline bool after(uint32_t seq1, uint32_t seq2)
145
+{
146
+ return (int32_t)(seq1 - seq2) > 0;
147
+}
148
+
149
+static void colo_release_primary_pkt(CompareState *s, Packet *pkt)
150
+{
151
+ int ret;
152
+ ret = compare_chr_send(s,
153
+ pkt->data,
154
+ pkt->size,
155
+ pkt->vnet_hdr_len);
156
+ if (ret < 0) {
157
+ error_report("colo send primary packet failed");
158
+ }
159
+ trace_colo_compare_main("packet same and release packet");
160
+ packet_destroy(pkt, NULL);
161
+}
162
+
163
/*
164
* The IP packets sent by primary and secondary
165
* will be compared in here
166
@@ -XXX,XX +XXX,XX @@ static int colo_compare_packet_payload(Packet *ppkt,
167
}
168
169
/*
170
- * Called from the compare thread on the primary
171
- * for compare tcp packet
172
- * compare_tcp copied from Dr. David Alan Gilbert's branch
173
- */
174
-static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
175
+ * return true means that the payload is consist and
176
+ * need to make the next comparison, false means do
177
+ * the checkpoint
178
+*/
179
+static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
180
+ int8_t *mark, uint32_t max_ack)
181
{
182
- struct tcphdr *ptcp, *stcp;
183
- int res;
184
+ *mark = 0;
185
+
186
+ if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
187
+ if (colo_compare_packet_payload(ppkt, spkt,
188
+ ppkt->header_size, spkt->header_size,
189
+ ppkt->payload_size)) {
190
+ *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
191
+ return true;
192
+ }
193
+ }
194
+ if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
195
+ if (colo_compare_packet_payload(ppkt, spkt,
196
+ ppkt->header_size, spkt->header_size,
197
+ ppkt->payload_size)) {
198
+ *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
199
+ return true;
200
+ }
201
+ }
73
+ }
202
+
74
+
203
+ /* one part of secondary packet payload still need to be compared */
75
+ queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
204
+ if (!after(ppkt->seq_end, spkt->seq_end)) {
76
&has_cvq, errp);
205
+ if (colo_compare_packet_payload(ppkt, spkt,
77
if (queue_pairs < 0) {
206
+ ppkt->header_size + ppkt->offset,
78
qemu_close(vdpa_device_fd);
207
+ spkt->header_size + spkt->offset,
208
+ ppkt->payload_size - ppkt->offset)) {
209
+ if (!after(ppkt->tcp_ack, max_ack)) {
210
+ *mark = COLO_COMPARE_FREE_PRIMARY;
211
+ spkt->offset += ppkt->payload_size - ppkt->offset;
212
+ return true;
213
+ } else {
214
+ /* secondary guest hasn't ack the data, don't send
215
+ * out this packet
216
+ */
217
+ return false;
218
+ }
219
+ }
220
+ } else {
221
+ /* primary packet is longer than secondary packet, compare
222
+ * the same part and mark the primary packet offset
223
+ */
224
+ if (colo_compare_packet_payload(ppkt, spkt,
225
+ ppkt->header_size + ppkt->offset,
226
+ spkt->header_size + spkt->offset,
227
+ spkt->payload_size - spkt->offset)) {
228
+ *mark = COLO_COMPARE_FREE_SECONDARY;
229
+ ppkt->offset += spkt->payload_size - spkt->offset;
230
+ return true;
231
+ }
232
+ }
233
234
- trace_colo_compare_main("compare tcp");
235
+ return false;
236
+}
237
238
- ptcp = (struct tcphdr *)ppkt->transport_header;
239
- stcp = (struct tcphdr *)spkt->transport_header;
240
+static void colo_compare_tcp(CompareState *s, Connection *conn)
241
+{
242
+ Packet *ppkt = NULL, *spkt = NULL;
243
+ int8_t mark;
244
245
/*
246
- * The 'identification' field in the IP header is *very* random
247
- * it almost never matches. Fudge this by ignoring differences in
248
- * unfragmented packets; they'll normally sort themselves out if different
249
- * anyway, and it should recover at the TCP level.
250
- * An alternative would be to get both the primary and secondary to rewrite
251
- * somehow; but that would need some sync traffic to sync the state
252
- */
253
- if (ntohs(ppkt->ip->ip_off) & IP_DF) {
254
- spkt->ip->ip_id = ppkt->ip->ip_id;
255
- /* and the sum will be different if the IDs were different */
256
- spkt->ip->ip_sum = ppkt->ip->ip_sum;
257
+ * If ppkt and spkt have the same payload, but ppkt's ACK
258
+ * is greater than spkt's ACK, in this case we can not
259
+ * send the ppkt because it will cause the secondary guest
260
+ * to miss sending some data in the next. Therefore, we
261
+ * record the maximum ACK in the current queue at both
262
+ * primary side and secondary side. Only when the ack is
263
+ * less than the smaller of the two maximum ack, then we
264
+ * can ensure that the packet's payload is acknowledged by
265
+ * primary and secondary.
266
+ */
267
+ uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack;
268
+
269
+pri:
270
+ if (g_queue_is_empty(&conn->primary_list)) {
271
+ return;
272
}
273
+ ppkt = g_queue_pop_head(&conn->primary_list);
274
+sec:
275
+ if (g_queue_is_empty(&conn->secondary_list)) {
276
+ g_queue_push_head(&conn->primary_list, ppkt);
277
+ return;
278
+ }
279
+ spkt = g_queue_pop_head(&conn->secondary_list);
280
281
- /*
282
- * Check tcp header length for tcp option field.
283
- * th_off > 5 means this tcp packet have options field.
284
- * The tcp options maybe always different.
285
- * for example:
286
- * From RFC 7323.
287
- * TCP Timestamps option (TSopt):
288
- * Kind: 8
289
- *
290
- * Length: 10 bytes
291
- *
292
- * +-------+-------+---------------------+---------------------+
293
- * |Kind=8 | 10 | TS Value (TSval) |TS Echo Reply (TSecr)|
294
- * +-------+-------+---------------------+---------------------+
295
- * 1 1 4 4
296
- *
297
- * In this case the primary guest's timestamp always different with
298
- * the secondary guest's timestamp. COLO just focus on payload,
299
- * so we just need skip this field.
300
- */
301
+ if (ppkt->tcp_seq == ppkt->seq_end) {
302
+ colo_release_primary_pkt(s, ppkt);
303
+ ppkt = NULL;
304
+ }
305
306
- ptrdiff_t ptcp_offset, stcp_offset;
307
+ if (ppkt && conn->compare_seq && !after(ppkt->seq_end, conn->compare_seq)) {
308
+ trace_colo_compare_main("pri: this packet has compared");
309
+ colo_release_primary_pkt(s, ppkt);
310
+ ppkt = NULL;
311
+ }
312
313
- ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
314
- + (ptcp->th_off << 2) - ppkt->vnet_hdr_len;
315
- stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
316
- + (stcp->th_off << 2) - spkt->vnet_hdr_len;
317
- if (ppkt->size - ptcp_offset == spkt->size - stcp_offset) {
318
- res = colo_compare_packet_payload(ppkt, spkt,
319
- ptcp_offset, stcp_offset,
320
- ppkt->size - ptcp_offset);
321
+ if (spkt->tcp_seq == spkt->seq_end) {
322
+ packet_destroy(spkt, NULL);
323
+ if (!ppkt) {
324
+ goto pri;
325
+ } else {
326
+ goto sec;
327
+ }
328
} else {
329
- trace_colo_compare_main("TCP: payload size of packets are different");
330
- res = -1;
331
+ if (conn->compare_seq && !after(spkt->seq_end, conn->compare_seq)) {
332
+ trace_colo_compare_main("sec: this packet has compared");
333
+ packet_destroy(spkt, NULL);
334
+ if (!ppkt) {
335
+ goto pri;
336
+ } else {
337
+ goto sec;
338
+ }
339
+ }
340
+ if (!ppkt) {
341
+ g_queue_push_head(&conn->secondary_list, spkt);
342
+ goto pri;
343
+ }
344
}
345
346
- if (res != 0 &&
347
- trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
348
- char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
349
-
350
- strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
351
- strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
352
- strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
353
- strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
354
-
355
- trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
356
- pri_ip_dst, spkt->size,
357
- sec_ip_src, sec_ip_dst);
358
-
359
- trace_colo_compare_tcp_info("pri tcp packet",
360
- ntohl(ptcp->th_seq),
361
- ntohl(ptcp->th_ack),
362
- res, ptcp->th_flags,
363
- ppkt->size);
364
-
365
- trace_colo_compare_tcp_info("sec tcp packet",
366
- ntohl(stcp->th_seq),
367
- ntohl(stcp->th_ack),
368
- res, stcp->th_flags,
369
- spkt->size);
370
+ if (colo_mark_tcp_pkt(ppkt, spkt, &mark, min_ack)) {
371
+ trace_colo_compare_tcp_info("pri",
372
+ ppkt->tcp_seq, ppkt->tcp_ack,
373
+ ppkt->header_size, ppkt->payload_size,
374
+ ppkt->offset, ppkt->flags);
375
+
376
+ trace_colo_compare_tcp_info("sec",
377
+ spkt->tcp_seq, spkt->tcp_ack,
378
+ spkt->header_size, spkt->payload_size,
379
+ spkt->offset, spkt->flags);
380
+
381
+ if (mark == COLO_COMPARE_FREE_PRIMARY) {
382
+ conn->compare_seq = ppkt->seq_end;
383
+ colo_release_primary_pkt(s, ppkt);
384
+ g_queue_push_head(&conn->secondary_list, spkt);
385
+ goto pri;
386
+ }
387
+ if (mark == COLO_COMPARE_FREE_SECONDARY) {
388
+ conn->compare_seq = spkt->seq_end;
389
+ packet_destroy(spkt, NULL);
390
+ goto sec;
391
+ }
392
+ if (mark == (COLO_COMPARE_FREE_PRIMARY | COLO_COMPARE_FREE_SECONDARY)) {
393
+ conn->compare_seq = ppkt->seq_end;
394
+ colo_release_primary_pkt(s, ppkt);
395
+ packet_destroy(spkt, NULL);
396
+ goto pri;
397
+ }
398
+ } else {
399
+ g_queue_push_head(&conn->primary_list, ppkt);
400
+ g_queue_push_head(&conn->secondary_list, spkt);
401
402
qemu_hexdump((char *)ppkt->data, stderr,
403
"colo-compare ppkt", ppkt->size);
404
qemu_hexdump((char *)spkt->data, stderr,
405
"colo-compare spkt", spkt->size);
406
- }
407
408
- return res;
409
+ /*
410
+ * colo_compare_inconsistent_notify();
411
+ * TODO: notice to checkpoint();
412
+ */
413
+ }
414
}
415
416
+
417
/*
418
* Called from the compare thread on the primary
419
* for compare udp packet
420
@@ -XXX,XX +XXX,XX @@ static void colo_old_packet_check(void *opaque)
421
(GCompareFunc)colo_old_packet_check_one_conn);
422
}
423
424
-/*
425
- * Called from the compare thread on the primary
426
- * for compare packet with secondary list of the
427
- * specified connection when a new packet was
428
- * queued to it.
429
- */
430
-static void colo_compare_connection(void *opaque, void *user_data)
431
+static void colo_compare_packet(CompareState *s, Connection *conn,
432
+ int (*HandlePacket)(Packet *spkt,
433
+ Packet *ppkt))
434
{
435
- CompareState *s = user_data;
436
- Connection *conn = opaque;
437
Packet *pkt = NULL;
438
GList *result = NULL;
439
- int ret;
440
441
while (!g_queue_is_empty(&conn->primary_list) &&
442
!g_queue_is_empty(&conn->secondary_list)) {
443
pkt = g_queue_pop_head(&conn->primary_list);
444
- switch (conn->ip_proto) {
445
- case IPPROTO_TCP:
446
- result = g_queue_find_custom(&conn->secondary_list,
447
- pkt, (GCompareFunc)colo_packet_compare_tcp);
448
- break;
449
- case IPPROTO_UDP:
450
- result = g_queue_find_custom(&conn->secondary_list,
451
- pkt, (GCompareFunc)colo_packet_compare_udp);
452
- break;
453
- case IPPROTO_ICMP:
454
- result = g_queue_find_custom(&conn->secondary_list,
455
- pkt, (GCompareFunc)colo_packet_compare_icmp);
456
- break;
457
- default:
458
- result = g_queue_find_custom(&conn->secondary_list,
459
- pkt, (GCompareFunc)colo_packet_compare_other);
460
- break;
461
- }
462
+ result = g_queue_find_custom(&conn->secondary_list,
463
+ pkt, (GCompareFunc)HandlePacket);
464
465
if (result) {
466
- ret = compare_chr_send(s,
467
- pkt->data,
468
- pkt->size,
469
- pkt->vnet_hdr_len);
470
- if (ret < 0) {
471
- error_report("colo_send_primary_packet failed");
472
- }
473
- trace_colo_compare_main("packet same and release packet");
474
+ colo_release_primary_pkt(s, pkt);
475
g_queue_remove(&conn->secondary_list, result->data);
476
- packet_destroy(pkt, NULL);
477
} else {
478
/*
479
* If one packet arrive late, the secondary_list or
480
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
481
}
482
}
483
484
+/*
485
+ * Called from the compare thread on the primary
486
+ * for compare packet with secondary list of the
487
+ * specified connection when a new packet was
488
+ * queued to it.
489
+ */
490
+static void colo_compare_connection(void *opaque, void *user_data)
491
+{
492
+ CompareState *s = user_data;
493
+ Connection *conn = opaque;
494
+
495
+ switch (conn->ip_proto) {
496
+ case IPPROTO_TCP:
497
+ colo_compare_tcp(s, conn);
498
+ break;
499
+ case IPPROTO_UDP:
500
+ colo_compare_packet(s, conn, colo_packet_compare_udp);
501
+ break;
502
+ case IPPROTO_ICMP:
503
+ colo_compare_packet(s, conn, colo_packet_compare_icmp);
504
+ break;
505
+ default:
506
+ colo_compare_packet(s, conn, colo_packet_compare_other);
507
+ break;
508
+ }
509
+}
510
+
511
static int compare_chr_send(CompareState *s,
512
const uint8_t *buf,
513
uint32_t size,
514
diff --git a/net/colo.c b/net/colo.c
515
index XXXXXXX..XXXXXXX 100644
516
--- a/net/colo.c
517
+++ b/net/colo.c
518
@@ -XXX,XX +XXX,XX @@ Connection *connection_new(ConnectionKey *key)
519
conn->processing = false;
520
conn->offset = 0;
521
conn->syn_flag = 0;
522
+ conn->pack = 0;
523
+ conn->sack = 0;
524
g_queue_init(&conn->primary_list);
525
g_queue_init(&conn->secondary_list);
526
527
@@ -XXX,XX +XXX,XX @@ Packet *packet_new(const void *data, int size, int vnet_hdr_len)
528
pkt->size = size;
529
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
530
pkt->vnet_hdr_len = vnet_hdr_len;
531
+ pkt->tcp_seq = 0;
532
+ pkt->tcp_ack = 0;
533
+ pkt->seq_end = 0;
534
+ pkt->header_size = 0;
535
+ pkt->payload_size = 0;
536
+ pkt->offset = 0;
537
+ pkt->flags = 0;
538
539
return pkt;
540
}
541
diff --git a/net/colo.h b/net/colo.h
542
index XXXXXXX..XXXXXXX 100644
543
--- a/net/colo.h
544
+++ b/net/colo.h
545
@@ -XXX,XX +XXX,XX @@ typedef struct Packet {
546
int64_t creation_ms;
547
/* Get vnet_hdr_len from filter */
548
uint32_t vnet_hdr_len;
549
+ uint32_t tcp_seq; /* sequence number */
550
+ uint32_t tcp_ack; /* acknowledgement number */
551
+ /* the sequence number of the last byte of the packet */
552
+ uint32_t seq_end;
553
+ uint8_t header_size; /* the header length */
554
+ uint16_t payload_size; /* the payload length */
555
+ /* record the payload offset(the length that has been compared) */
556
+ uint16_t offset;
557
+ uint8_t flags; /* Flags(aka Control bits) */
558
} Packet;
559
560
typedef struct ConnectionKey {
561
@@ -XXX,XX +XXX,XX @@ typedef struct Connection {
562
/* flag to enqueue unprocessed_connections */
563
bool processing;
564
uint8_t ip_proto;
565
+ /* record the sequence number that has been compared */
566
+ uint32_t compare_seq;
567
+ /* the maximum of acknowledgement number in primary_list queue */
568
+ uint32_t pack;
569
+ /* the maximum of acknowledgement number in secondary_list queue */
570
+ uint32_t sack;
571
/* offset = secondary_seq - primary_seq */
572
tcp_seq offset;
573
/*
574
diff --git a/net/trace-events b/net/trace-events
575
index XXXXXXX..XXXXXXX 100644
576
--- a/net/trace-events
577
+++ b/net/trace-events
578
@@ -XXX,XX +XXX,XX @@ colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d"
579
colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
580
colo_old_packet_check_found(int64_t old_time) "%" PRId64
581
colo_compare_miscompare(void) ""
582
-colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int res, uint32_t flag, int size) "side: %s seq/ack= %u/%u res= %d flags= 0x%x pkt_size: %d\n"
583
+colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int hdlen, int pdlen, int offset, int flags) "%s: seq/ack= %u/%u hdlen= %d pdlen= %d offset= %d flags=%d\n"
584
585
# net/filter-rewriter.c
586
colo_filter_rewriter_debug(void) ""
587
--
79
--
588
2.7.4
80
2.7.4
589
81
590
82
diff view generated by jsdifflib
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Modified the function colo_packet_compare_common to prepare for the
3
Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if
4
tcp packet comparison in the next patch.
4
it uses CVQ.
5
5
6
Cc: Zhang Chen <zhangckid@gmail.com>
6
However, qemu is able to migrate simple devices with no CVQ as long as
7
Cc: Li Zhijian <lizhijian@cn.fujitsu.com>
7
they use SVQ. To allow it, add a placeholder error to vhost_vdpa, and
8
Cc: Jason Wang <jasowang@redhat.com>
8
only add to vhost_dev when used. vhost_dev machinery place the migration
9
blocker if needed.
9
10
10
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
11
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
11
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
12
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
13
Reviewed-by: Zhang Chen <zhangckid@gmail.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
14
---
16
net/colo-compare.c | 88 +++++++++++++++++++++++++++---------------------------
15
hw/virtio/vhost-vdpa.c | 15 +++++++++++++++
17
1 file changed, 44 insertions(+), 44 deletions(-)
16
include/hw/virtio/vhost-vdpa.h | 1 +
17
2 files changed, 16 insertions(+)
18
18
19
diff --git a/net/colo-compare.c b/net/colo-compare.c
19
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
20
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
21
--- a/net/colo-compare.c
21
--- a/hw/virtio/vhost-vdpa.c
22
+++ b/net/colo-compare.c
22
+++ b/hw/virtio/vhost-vdpa.c
23
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
23
@@ -XXX,XX +XXX,XX @@
24
* return: 0 means packet same
24
#include "hw/virtio/vhost-shadow-virtqueue.h"
25
* > 0 || < 0 means packet different
25
#include "hw/virtio/vhost-vdpa.h"
26
*/
26
#include "exec/address-spaces.h"
27
-static int colo_packet_compare_common(Packet *ppkt,
27
+#include "migration/blocker.h"
28
- Packet *spkt,
28
#include "qemu/cutils.h"
29
- int poffset,
29
#include "qemu/main-loop.h"
30
- int soffset)
30
#include "cpu.h"
31
+static int colo_compare_packet_payload(Packet *ppkt,
31
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
32
+ Packet *spkt,
32
return true;
33
+ uint16_t poffset,
33
}
34
+ uint16_t soffset,
34
35
+ uint16_t len)
35
+ if (v->migration_blocker) {
36
+ int r = migrate_add_blocker(v->migration_blocker, &err);
37
+ if (unlikely(r < 0)) {
38
+ return false;
39
+ }
40
+ }
36
+
41
+
37
{
42
for (i = 0; i < v->shadow_vqs->len; ++i) {
38
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
43
VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
39
char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
44
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
40
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt,
45
@@ -XXX,XX +XXX,XX @@ err:
41
sec_ip_src, sec_ip_dst);
46
vhost_svq_stop(svq);
42
}
47
}
43
48
44
- poffset = ppkt->vnet_hdr_len + poffset;
49
+ if (v->migration_blocker) {
45
- soffset = ppkt->vnet_hdr_len + soffset;
50
+ migrate_del_blocker(v->migration_blocker);
46
-
51
+ }
47
- if (ppkt->size - poffset == spkt->size - soffset) {
52
+
48
- return memcmp(ppkt->data + poffset,
53
return false;
49
- spkt->data + soffset,
50
- spkt->size - soffset);
51
- } else {
52
- trace_colo_compare_main("Net packet size are not the same");
53
- return -1;
54
- }
55
+ return memcmp(ppkt->data + poffset, spkt->data + soffset, len);
56
}
54
}
57
55
58
/*
56
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
59
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
57
}
60
* the secondary guest's timestamp. COLO just focus on payload,
61
* so we just need skip this field.
62
*/
63
- if (ptcp->th_off > 5) {
64
- ptrdiff_t ptcp_offset, stcp_offset;
65
66
- ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
67
- + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
68
- stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
69
- + (stcp->th_off * 4) - spkt->vnet_hdr_len;
70
+ ptrdiff_t ptcp_offset, stcp_offset;
71
72
- /*
73
- * When network is busy, some tcp options(like sack) will unpredictable
74
- * occur in primary side or secondary side. it will make packet size
75
- * not same, but the two packet's payload is identical. colo just
76
- * care about packet payload, so we skip the option field.
77
- */
78
- res = colo_packet_compare_common(ppkt, spkt, ptcp_offset, stcp_offset);
79
- } else if (ptcp->th_sum == stcp->th_sum) {
80
- res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN, ETH_HLEN);
81
+ ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
82
+ + (ptcp->th_off << 2) - ppkt->vnet_hdr_len;
83
+ stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
84
+ + (stcp->th_off << 2) - spkt->vnet_hdr_len;
85
+ if (ppkt->size - ptcp_offset == spkt->size - stcp_offset) {
86
+ res = colo_compare_packet_payload(ppkt, spkt,
87
+ ptcp_offset, stcp_offset,
88
+ ppkt->size - ptcp_offset);
89
} else {
90
+ trace_colo_compare_main("TCP: payload size of packets are different");
91
res = -1;
92
}
58
}
93
59
94
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
60
+ if (v->migration_blocker) {
95
*/
61
+ migrate_del_blocker(v->migration_blocker);
96
static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
97
{
98
- int ret;
99
- int network_header_length = ppkt->ip->ip_hl * 4;
100
+ uint16_t network_header_length = ppkt->ip->ip_hl << 2;
101
+ uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
102
103
trace_colo_compare_main("compare udp");
104
105
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
106
* other field like TOS,TTL,IP Checksum. we only need to compare
107
* the ip payload here.
108
*/
109
- ret = colo_packet_compare_common(ppkt, spkt,
110
- network_header_length + ETH_HLEN,
111
- network_header_length + ETH_HLEN);
112
-
113
- if (ret) {
114
+ if (ppkt->size != spkt->size) {
115
+ trace_colo_compare_main("UDP: payload size of packets are different");
116
+ return -1;
117
+ }
62
+ }
118
+ if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
63
return true;
119
+ ppkt->size - offset)) {
120
trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
121
trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
122
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
123
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
124
qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt",
125
spkt->size);
126
}
127
+ return -1;
128
+ } else {
129
+ return 0;
130
}
131
-
132
- return ret;
133
}
64
}
134
65
135
/*
66
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
136
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
67
index XXXXXXX..XXXXXXX 100644
137
*/
68
--- a/include/hw/virtio/vhost-vdpa.h
138
static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
69
+++ b/include/hw/virtio/vhost-vdpa.h
139
{
70
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
140
- int network_header_length = ppkt->ip->ip_hl * 4;
71
bool shadow_vqs_enabled;
141
+ uint16_t network_header_length = ppkt->ip->ip_hl << 2;
72
/* IOVA mapping used by the Shadow Virtqueue */
142
+ uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
73
VhostIOVATree *iova_tree;
143
74
+ Error *migration_blocker;
144
trace_colo_compare_main("compare icmp");
75
GPtrArray *shadow_vqs;
145
76
const VhostShadowVirtqueueOps *shadow_vq_ops;
146
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
77
void *shadow_vq_ops_opaque;
147
* other field like TOS,TTL,IP Checksum. we only need to compare
148
* the ip payload here.
149
*/
150
- if (colo_packet_compare_common(ppkt, spkt,
151
- network_header_length + ETH_HLEN,
152
- network_header_length + ETH_HLEN)) {
153
+ if (ppkt->size != spkt->size) {
154
+ trace_colo_compare_main("ICMP: payload size of packets are different");
155
+ return -1;
156
+ }
157
+ if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
158
+ ppkt->size - offset)) {
159
trace_colo_compare_icmp_miscompare("primary pkt size",
160
ppkt->size);
161
trace_colo_compare_icmp_miscompare("Secondary pkt size",
162
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
163
*/
164
static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
165
{
166
+ uint16_t offset = ppkt->vnet_hdr_len;
167
+
168
trace_colo_compare_main("compare other");
169
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
170
char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
171
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
172
sec_ip_src, sec_ip_dst);
173
}
174
175
- return colo_packet_compare_common(ppkt, spkt, 0, 0);
176
+ if (ppkt->size != spkt->size) {
177
+ trace_colo_compare_main("Other: payload size of packets are different");
178
+ return -1;
179
+ }
180
+ return colo_compare_packet_payload(ppkt, spkt, offset, offset,
181
+ ppkt->size - offset);
182
}
183
184
static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
185
--
78
--
186
2.7.4
79
2.7.4
187
80
188
81
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
QEMU can emulate hubs to connect NICs and netdevs. This is currently
3
Finally offering the possibility to enable SVQ from the command line.
4
primarily used for the mis-named 'vlan' feature of the networking
5
subsystem. Now the 'vlan' feature has been marked as deprecated, since
6
its name is rather confusing and the users often rather mis-configure
7
their network when trying to use it. But while the 'vlan' parameter
8
should be removed at one point in time, the basic idea of emulating
9
a hub in QEMU is still good: It's useful for bundling up the output of
10
multiple NICs into one single l2tp netdev for example.
11
4
12
Now to be able to use the hubport feature without 'vlan's, there is one
5
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
missing piece: The possibility to connect a hubport to a netdev, too.
6
Acked-by: Markus Armbruster <armbru@redhat.com>
14
This patch adds this possibility by introducing a new "netdev=..."
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
15
parameter to the hubports.
16
17
To bundle up the output of multiple NICs into one socket netdev, you can
18
now run QEMU with these parameters for example:
19
20
qemu-system-ppc64 ... -netdev socket,id=s1,connect=:11122 \
21
-netdev hubport,hubid=1,id=h1,netdev=s1 \
22
-netdev hubport,hubid=1,id=h2 -device e1000,netdev=h2 \
23
-netdev hubport,hubid=1,id=h3 -device virtio-net-pci,netdev=h3
24
25
For using the socket netdev, you have got to start another QEMU as the
26
receiving side first, for example with network dumping enabled:
27
28
qemu-system-x86_64 -M isapc -netdev socket,id=s0,listen=:11122 \
29
-device ne2k_isa,netdev=s0 \
30
-object filter-dump,id=f1,netdev=s0,file=/tmp/dump.dat
31
32
After the ppc64 guest tried to boot from both NICs, you can see in the
33
dump file (using Wireshark, for example), that the output of both NICs
34
(the e1000 and the virtio-net-pci) has been successfully transfered
35
via the socket netdev in this case.
36
37
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
38
Signed-off-by: Thomas Huth <thuth@redhat.com>
39
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
40
---
9
---
41
net/hub.c | 27 +++++++++++++++++++++------
10
net/vhost-vdpa.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
42
net/hub.h | 3 ++-
11
qapi/net.json | 9 ++++++-
43
net/net.c | 2 +-
12
2 files changed, 77 insertions(+), 4 deletions(-)
44
qapi/net.json | 4 +++-
45
qemu-options.hx | 8 +++++---
46
5 files changed, 32 insertions(+), 12 deletions(-)
47
13
48
diff --git a/net/hub.c b/net/hub.c
14
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
49
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
50
--- a/net/hub.c
16
--- a/net/vhost-vdpa.c
51
+++ b/net/hub.c
17
+++ b/net/vhost-vdpa.c
52
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ const int vdpa_feature_bits[] = {
53
*/
19
VHOST_INVALID_FEATURE_BIT
54
55
#include "qemu/osdep.h"
56
+#include "qapi/error.h"
57
#include "monitor/monitor.h"
58
#include "net/net.h"
59
#include "clients.h"
60
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_hub_port_info = {
61
.cleanup = net_hub_port_cleanup,
62
};
20
};
63
21
64
-static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
22
+/** Supported device specific feature bits with SVQ */
65
+static NetHubPort *net_hub_port_new(NetHub *hub, const char *name,
23
+static const uint64_t vdpa_svq_device_features =
66
+ NetClientState *hubpeer)
24
+ BIT_ULL(VIRTIO_NET_F_CSUM) |
25
+ BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
26
+ BIT_ULL(VIRTIO_NET_F_MTU) |
27
+ BIT_ULL(VIRTIO_NET_F_MAC) |
28
+ BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
29
+ BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
30
+ BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
31
+ BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
32
+ BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
33
+ BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
34
+ BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
35
+ BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
36
+ BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
37
+ BIT_ULL(VIRTIO_NET_F_STATUS) |
38
+ BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
39
+ BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
40
+ BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
41
+ BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
42
+ BIT_ULL(VIRTIO_NET_F_STANDBY);
43
+
44
VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
67
{
45
{
46
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
47
@@ -XXX,XX +XXX,XX @@ err_init:
48
static void vhost_vdpa_cleanup(NetClientState *nc)
49
{
50
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
51
+ struct vhost_dev *dev = &s->vhost_net->dev;
52
53
qemu_vfree(s->cvq_cmd_out_buffer);
54
qemu_vfree(s->cvq_cmd_in_buffer);
55
+ if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
56
+ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
57
+ }
58
if (s->vhost_net) {
59
vhost_net_cleanup(s->vhost_net);
60
g_free(s->vhost_net);
61
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
62
int vdpa_device_fd,
63
int queue_pair_index,
64
int nvqs,
65
- bool is_datapath)
66
+ bool is_datapath,
67
+ bool svq,
68
+ VhostIOVATree *iova_tree)
69
{
70
NetClientState *nc = NULL;
71
VhostVDPAState *s;
72
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
73
74
s->vhost_vdpa.device_fd = vdpa_device_fd;
75
s->vhost_vdpa.index = queue_pair_index;
76
+ s->vhost_vdpa.shadow_vqs_enabled = svq;
77
+ s->vhost_vdpa.iova_tree = iova_tree;
78
if (!is_datapath) {
79
s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
80
vhost_vdpa_net_cvq_cmd_page_len());
81
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
82
83
s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
84
s->vhost_vdpa.shadow_vq_ops_opaque = s;
85
+ error_setg(&s->vhost_vdpa.migration_blocker,
86
+ "Migration disabled: vhost-vdpa uses CVQ.");
87
}
88
ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
89
if (ret) {
90
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
91
return nc;
92
}
93
94
+static int vhost_vdpa_get_iova_range(int fd,
95
+ struct vhost_vdpa_iova_range *iova_range)
96
+{
97
+ int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
98
+
99
+ return ret < 0 ? -errno : 0;
100
+}
101
+
102
static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
103
{
104
int ret = ioctl(fd, VHOST_GET_FEATURES, features);
105
@@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
106
uint64_t features;
107
int vdpa_device_fd;
108
g_autofree NetClientState **ncs = NULL;
109
+ g_autoptr(VhostIOVATree) iova_tree = NULL;
68
NetClientState *nc;
110
NetClientState *nc;
69
NetHubPort *port;
111
int queue_pairs, r, i, has_cvq = 0;
70
@@ -XXX,XX +XXX,XX @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
112
71
name = default_name;
113
@@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
114
return queue_pairs;
72
}
115
}
73
116
74
- nc = qemu_new_net_client(&net_hub_port_info, NULL, "hub", name);
117
+ if (opts->x_svq) {
75
+ nc = qemu_new_net_client(&net_hub_port_info, hubpeer, "hub", name);
118
+ struct vhost_vdpa_iova_range iova_range;
76
port = DO_UPCAST(NetHubPort, nc, nc);
119
+
77
port->id = id;
120
+ uint64_t invalid_dev_features =
78
port->hub = hub;
121
+ features & ~vdpa_svq_device_features &
79
@@ -XXX,XX +XXX,XX @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
122
+ /* Transport are all accepted at this point */
80
123
+ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
81
/**
124
+ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
82
* Create a port on a given hub
125
+
83
+ * @hub_id: Number of the hub
126
+ if (invalid_dev_features) {
84
* @name: Net client name or NULL for default name.
127
+ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
85
+ * @hubpeer: Peer to use (if "netdev=id" has been specified)
128
+ invalid_dev_features);
86
*
129
+ goto err_svq;
87
* If there is no existing hub with the given id then a new hub is created.
130
+ }
88
*/
131
+
89
-NetClientState *net_hub_add_port(int hub_id, const char *name)
132
+ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
90
+NetClientState *net_hub_add_port(int hub_id, const char *name,
133
+ iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
91
+ NetClientState *hubpeer)
134
+ }
92
{
135
+
93
NetHub *hub;
136
ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
94
NetHubPort *port;
137
95
@@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_add_port(int hub_id, const char *name)
138
for (i = 0; i < queue_pairs; i++) {
96
hub = net_hub_new(hub_id);
139
ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
140
- vdpa_device_fd, i, 2, true);
141
+ vdpa_device_fd, i, 2, true, opts->x_svq,
142
+ iova_tree);
143
if (!ncs[i])
144
goto err;
97
}
145
}
98
146
99
- port = net_hub_port_new(hub, name);
147
if (has_cvq) {
100
+ port = net_hub_port_new(hub, name, hubpeer);
148
nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
101
return &port->nc;
149
- vdpa_device_fd, i, 1, false);
102
}
150
+ vdpa_device_fd, i, 1, false,
103
151
+ opts->x_svq, iova_tree);
104
@@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_port_find(int hub_id)
152
if (!nc)
153
goto err;
154
}
155
156
+ /* iova_tree ownership belongs to last NetClientState */
157
+ g_steal_pointer(&iova_tree);
158
return 0;
159
160
err:
161
@@ -XXX,XX +XXX,XX @@ err:
162
qemu_del_net_client(ncs[i]);
105
}
163
}
106
}
164
}
107
108
- nc = net_hub_add_port(hub_id, NULL);
109
+ nc = net_hub_add_port(hub_id, NULL, NULL);
110
return nc;
111
}
112
113
@@ -XXX,XX +XXX,XX @@ int net_init_hubport(const Netdev *netdev, const char *name,
114
NetClientState *peer, Error **errp)
115
{
116
const NetdevHubPortOptions *hubport;
117
+ NetClientState *hubpeer = NULL;
118
119
assert(netdev->type == NET_CLIENT_DRIVER_HUBPORT);
120
assert(!peer);
121
hubport = &netdev->u.hubport;
122
123
- net_hub_add_port(hubport->hubid, name);
124
+ if (hubport->has_netdev) {
125
+ hubpeer = qemu_find_netdev(hubport->netdev);
126
+ if (!hubpeer) {
127
+ error_setg(errp, "netdev '%s' not found", hubport->netdev);
128
+ return -1;
129
+ }
130
+ }
131
+
165
+
132
+ net_hub_add_port(hubport->hubid, name, hubpeer);
166
+err_svq:
133
+
167
qemu_close(vdpa_device_fd);
134
return 0;
168
135
}
169
return -1;
136
137
diff --git a/net/hub.h b/net/hub.h
138
index XXXXXXX..XXXXXXX 100644
139
--- a/net/hub.h
140
+++ b/net/hub.h
141
@@ -XXX,XX +XXX,XX @@
142
143
#include "qemu-common.h"
144
145
-NetClientState *net_hub_add_port(int hub_id, const char *name);
146
+NetClientState *net_hub_add_port(int hub_id, const char *name,
147
+ NetClientState *hubpeer);
148
NetClientState *net_hub_find_client_by_name(int hub_id, const char *name);
149
void net_hub_info(Monitor *mon);
150
void net_hub_check_clients(void);
151
diff --git a/net/net.c b/net/net.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/net/net.c
154
+++ b/net/net.c
155
@@ -XXX,XX +XXX,XX @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp)
156
/* Do not add to a vlan if it's a nic with a netdev= parameter. */
157
if (netdev->type != NET_CLIENT_DRIVER_NIC ||
158
!opts->u.nic.has_netdev) {
159
- peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL);
160
+ peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL, NULL);
161
}
162
163
if (net->has_vlan && !vlan_warned) {
164
diff --git a/qapi/net.json b/qapi/net.json
170
diff --git a/qapi/net.json b/qapi/net.json
165
index XXXXXXX..XXXXXXX 100644
171
index XXXXXXX..XXXXXXX 100644
166
--- a/qapi/net.json
172
--- a/qapi/net.json
167
+++ b/qapi/net.json
173
+++ b/qapi/net.json
168
@@ -XXX,XX +XXX,XX @@
174
@@ -XXX,XX +XXX,XX @@
169
# Connect two or more net clients through a software hub.
175
# @queues: number of queues to be created for multiqueue vhost-vdpa
176
# (default: 1)
170
#
177
#
171
# @hubid: hub identifier number
178
+# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1)
172
+# @netdev: used to connect hub to a netdev instead of a device (since 2.12)
179
+# (default: false)
173
#
180
+#
174
# Since: 1.2
181
+# Features:
182
+# @unstable: Member @x-svq is experimental.
183
+#
184
# Since: 5.1
175
##
185
##
176
{ 'struct': 'NetdevHubPortOptions',
186
{ 'struct': 'NetdevVhostVDPAOptions',
177
'data': {
187
'data': {
178
- 'hubid': 'int32' } }
188
'*vhostdev': 'str',
179
+ 'hubid': 'int32',
189
- '*queues': 'int' } }
180
+ '*netdev': 'str' } }
190
+ '*queues': 'int',
191
+ '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } }
181
192
182
##
193
##
183
# @NetdevNetmapOptions:
194
# @NetdevVmnetHostOptions:
184
diff --git a/qemu-options.hx b/qemu-options.hx
185
index XXXXXXX..XXXXXXX 100644
186
--- a/qemu-options.hx
187
+++ b/qemu-options.hx
188
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
189
#endif
190
"-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
191
" configure a vhost-user network, backed by a chardev 'dev'\n"
192
- "-netdev hubport,id=str,hubid=n\n"
193
+ "-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
194
" configure a hub port on QEMU VLAN 'n'\n", QEMU_ARCH_ALL)
195
DEF("net", HAS_ARG, QEMU_OPTION_net,
196
"-net nic[,vlan=n][,netdev=nd][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v]\n"
197
@@ -XXX,XX +XXX,XX @@ vde_switch -F -sock /tmp/myswitch
198
qemu-system-i386 linux.img -net nic -net vde,sock=/tmp/myswitch
199
@end example
200
201
-@item -netdev hubport,id=@var{id},hubid=@var{hubid}
202
+@item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}]
203
204
Create a hub port on QEMU "vlan" @var{hubid}.
205
206
The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
207
netdev. @code{-net} and @code{-device} with parameter @option{vlan} create the
208
-required hub automatically.
209
+required hub automatically. Alternatively, you can also connect the hubport
210
+to another netdev with ID @var{nd} by using the @option{netdev=@var{nd}}
211
+option.
212
213
@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
214
215
--
195
--
216
2.7.4
196
2.7.4
217
197
218
198
diff view generated by jsdifflib
New patch
1
From: Zhang Chen <chen.zhang@intel.com>
1
2
3
If the checkpoint occurs when the guest finishes restarting
4
but has not started running, the runstate_set() may reject
5
the transition from COLO to PRELAUNCH with the crash log:
6
7
{"timestamp": {"seconds": 1593484591, "microseconds": 26605},\
8
"event": "RESET", "data": {"guest": true, "reason": "guest-reset"}}
9
qemu-system-x86_64: invalid runstate transition: 'colo' -> 'prelaunch'
10
11
Long-term testing says that it's pretty safe.
12
13
Signed-off-by: Like Xu <like.xu@linux.intel.com>
14
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
15
Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
18
softmmu/runstate.c | 1 +
19
1 file changed, 1 insertion(+)
20
21
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/softmmu/runstate.c
24
+++ b/softmmu/runstate.c
25
@@ -XXX,XX +XXX,XX @@ static const RunStateTransition runstate_transitions_def[] = {
26
{ RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
27
28
{ RUN_STATE_COLO, RUN_STATE_RUNNING },
29
+ { RUN_STATE_COLO, RUN_STATE_PRELAUNCH },
30
{ RUN_STATE_COLO, RUN_STATE_SHUTDOWN},
31
32
{ RUN_STATE_RUNNING, RUN_STATE_DEBUG },
33
--
34
2.7.4
diff view generated by jsdifflib
New patch
1
From: Zhang Chen <chen.zhang@intel.com>
1
2
3
We notice the QEMU may crash when the guest has too many
4
incoming network connections with the following log:
5
6
15197@1593578622.668573:colo_proxy_main : colo proxy connection hashtable full, clear it
7
free(): invalid pointer
8
[1] 15195 abort (core dumped) qemu-system-x86_64 ....
9
10
This is because we create the s->connection_track_table with
11
g_hash_table_new_full() which is defined as:
12
13
GHashTable * g_hash_table_new_full (GHashFunc hash_func,
14
GEqualFunc key_equal_func,
15
GDestroyNotify key_destroy_func,
16
GDestroyNotify value_destroy_func);
17
18
The fourth parameter connection_destroy() will be called to free the
19
memory allocated for all 'Connection' values in the hashtable when
20
we call g_hash_table_remove_all() in the connection_hashtable_reset().
21
22
But both connection_track_table and conn_list reference to the same
23
conn instance. It will trigger double free in conn_list clear. So this
24
patch remove free action on hash table side to avoid double free the
25
conn.
26
27
Signed-off-by: Like Xu <like.xu@linux.intel.com>
28
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
29
Signed-off-by: Jason Wang <jasowang@redhat.com>
30
---
31
net/colo-compare.c | 2 +-
32
net/filter-rewriter.c | 2 +-
33
2 files changed, 2 insertions(+), 2 deletions(-)
34
35
diff --git a/net/colo-compare.c b/net/colo-compare.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/net/colo-compare.c
38
+++ b/net/colo-compare.c
39
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
40
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
41
connection_key_equal,
42
g_free,
43
- connection_destroy);
44
+ NULL);
45
46
colo_compare_iothread(s);
47
48
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/net/filter-rewriter.c
51
+++ b/net/filter-rewriter.c
52
@@ -XXX,XX +XXX,XX @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
53
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
54
connection_key_equal,
55
g_free,
56
- connection_destroy);
57
+ NULL);
58
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
59
}
60
61
--
62
2.7.4
diff view generated by jsdifflib
New patch
1
From: Zhang Chen <chen.zhang@intel.com>
1
2
3
Filter-rewriter no need to track connection in conn_list.
4
This patch fix the glib g_queue_is_empty assertion when COLO guest
5
keep a lot of network connection.
6
7
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
8
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
net/colo.c | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
13
14
diff --git a/net/colo.c b/net/colo.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/net/colo.c
17
+++ b/net/colo.c
18
@@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table,
19
/*
20
* clear the conn_list
21
*/
22
- while (!g_queue_is_empty(conn_list)) {
23
+ while (conn_list && !g_queue_is_empty(conn_list)) {
24
connection_destroy(g_queue_pop_head(conn_list));
25
}
26
}
27
--
28
2.7.4
diff view generated by jsdifflib
New patch
1
From: Zhang Chen <chen.zhang@intel.com>
1
2
3
When COLO use only one vnet_hdr_support parameter between
4
filter-redirector and filter-mirror(or colo-compare), COLO will crash
5
with segmentation fault. Back track as follow:
6
7
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
8
0x0000555555cb200b in eth_get_l2_hdr_length (p=0x0)
9
at /home/tao/project/COLO/colo-qemu/include/net/eth.h:296
10
296 uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
11
(gdb) bt
12
0 0x0000555555cb200b in eth_get_l2_hdr_length (p=0x0)
13
at /home/tao/project/COLO/colo-qemu/include/net/eth.h:296
14
1 0x0000555555cb22b4 in parse_packet_early (pkt=0x555556a44840) at
15
net/colo.c:49
16
2 0x0000555555cb2b91 in is_tcp_packet (pkt=0x555556a44840) at
17
net/filter-rewriter.c:63
18
19
So wrong vnet_hdr_len will cause pkt->data become NULL. Add check to
20
raise error and add trace-events to track vnet_hdr_len.
21
22
Signed-off-by: Tao Xu <tao3.xu@intel.com>
23
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
24
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
25
Signed-off-by: Jason Wang <jasowang@redhat.com>
26
---
27
net/colo.c | 9 ++++++++-
28
net/trace-events | 1 +
29
2 files changed, 9 insertions(+), 1 deletion(-)
30
31
diff --git a/net/colo.c b/net/colo.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/net/colo.c
34
+++ b/net/colo.c
35
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
36
static const uint8_t vlan[] = {0x81, 0x00};
37
uint8_t *data = pkt->data + pkt->vnet_hdr_len;
38
uint16_t l3_proto;
39
- ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
40
+ ssize_t l2hdr_len;
41
+
42
+ if (data == NULL) {
43
+ trace_colo_proxy_main_vnet_info("This packet is not parsed correctly, "
44
+ "pkt->vnet_hdr_len", pkt->vnet_hdr_len);
45
+ return 1;
46
+ }
47
+ l2hdr_len = eth_get_l2_hdr_length(data);
48
49
if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
50
trace_colo_proxy_main("pkt->size < ETH_HLEN");
51
diff --git a/net/trace-events b/net/trace-events
52
index XXXXXXX..XXXXXXX 100644
53
--- a/net/trace-events
54
+++ b/net/trace-events
55
@@ -XXX,XX +XXX,XX @@ vhost_user_event(const char *chr, int event) "chr: %s got event: %d"
56
57
# colo.c
58
colo_proxy_main(const char *chr) ": %s"
59
+colo_proxy_main_vnet_info(const char *sta, int size) ": %s = %d"
60
61
# colo-compare.c
62
colo_compare_main(const char *chr) ": %s"
63
--
64
2.7.4
diff view generated by jsdifflib