1
The following changes since commit 6632f6ff96f0537fc34cdc00c760656fc62e23c5:
1
The following changes since commit d48125de38f48a61d6423ef6a01156d6dff9ee2c:
2
2
3
Merge remote-tracking branch 'remotes/famz/tags/block-and-testing-pull-request' into staging (2017-07-17 11:46:36 +0100)
3
Merge tag 'kraxel-20220719-pull-request' of https://gitlab.com/kraxel/qemu into staging (2022-07-19 17:40:36 +0100)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 189ae6bb5ce1f5a322f8691d00fe942ba43dd601:
9
for you to fetch changes up to 8bdab83b34efb0b598be4e5b98e4f466ca5f2f80:
10
10
11
virtio-net: fix offload ctrl endian (2017-07-17 20:13:56 +0800)
11
net/colo.c: fix segmentation fault when packet is not parsed correctly (2022-07-20 16:58:08 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
- fix virtio-net ctrl offload endian
15
Changes since V1:
16
- vnet header support for variou COLO netfilters and compare thread
16
- Fix build erros of vhost-vdpa when virtio-net is not set
17
17
18
----------------------------------------------------------------
18
----------------------------------------------------------------
19
Jason Wang (1):
19
Eugenio Pérez (21):
20
virtio-net: fix offload ctrl endian
20
vhost: move descriptor translation to vhost_svq_vring_write_descs
21
virtio-net: Expose MAC_TABLE_ENTRIES
22
virtio-net: Expose ctrl virtqueue logic
23
vdpa: Avoid compiler to squash reads to used idx
24
vhost: Reorder vhost_svq_kick
25
vhost: Move vhost_svq_kick call to vhost_svq_add
26
vhost: Check for queue full at vhost_svq_add
27
vhost: Decouple vhost_svq_add from VirtQueueElement
28
vhost: Add SVQDescState
29
vhost: Track number of descs in SVQDescState
30
vhost: add vhost_svq_push_elem
31
vhost: Expose vhost_svq_add
32
vhost: add vhost_svq_poll
33
vhost: Add svq avail_handler callback
34
vdpa: Export vhost_vdpa_dma_map and unmap calls
35
vhost-net-vdpa: add stubs for when no virtio-net device is present
36
vdpa: manual forward CVQ buffers
37
vdpa: Buffer CVQ support on shadow virtqueue
38
vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs
39
vdpa: Add device migration blocker
40
vdpa: Add x-svq to NetdevVhostVDPAOptions
21
41
22
Michal Privoznik (1):
42
Zhang Chen (4):
23
virtion-net: Prefer is_power_of_2()
43
softmmu/runstate.c: add RunStateTransition support form COLO to PRELAUNCH
44
net/colo: Fix a "double free" crash to clear the conn_list
45
net/colo.c: No need to track conn_list for filter-rewriter
46
net/colo.c: fix segmentation fault when packet is not parsed correctly
24
47
25
Zhang Chen (12):
48
hw/net/virtio-net.c | 85 +++++----
26
net: Add vnet_hdr_len arguments in NetClientState
49
hw/virtio/vhost-shadow-virtqueue.c | 210 +++++++++++++++-------
27
net/net.c: Add vnet_hdr support in SocketReadState
50
hw/virtio/vhost-shadow-virtqueue.h | 52 +++++-
28
net/filter-mirror.c: Introduce parameter for filter_send()
51
hw/virtio/vhost-vdpa.c | 26 ++-
29
net/filter-mirror.c: Make filter mirror support vnet support.
52
include/hw/virtio/vhost-vdpa.h | 8 +
30
net/filter-mirror.c: Add new option to enable vnet support for filter-redirector
53
include/hw/virtio/virtio-net.h | 7 +
31
net/colo.c: Make vnet_hdr_len as packet property
54
net/colo-compare.c | 2 +-
32
net/colo-compare.c: Introduce parameter for compare_chr_send()
55
net/colo.c | 11 +-
33
net/colo-compare.c: Make colo-compare support vnet_hdr_len
56
net/filter-rewriter.c | 2 +-
34
net/colo.c: Add vnet packet parse feature in colo-proxy
57
net/meson.build | 3 +-
35
net/colo-compare.c: Add vnet packet's tcp/udp/icmp compare
58
net/trace-events | 1 +
36
net/filter-rewriter.c: Make filter-rewriter support vnet_hdr_len
59
net/vhost-vdpa-stub.c | 21 +++
37
docs/colo-proxy.txt: Update colo-proxy usage of net driver with vnet_header
60
net/vhost-vdpa.c | 357 +++++++++++++++++++++++++++++++++++--
38
61
qapi/net.json | 9 +-
39
docs/colo-proxy.txt | 26 ++++++++++++++++
62
softmmu/runstate.c | 1 +
40
hw/net/virtio-net.c | 4 ++-
63
15 files changed, 671 insertions(+), 124 deletions(-)
41
include/net/net.h | 10 ++++--
64
create mode 100644 net/vhost-vdpa-stub.c
42
net/colo-compare.c | 84 ++++++++++++++++++++++++++++++++++++++++++---------
43
net/colo.c | 9 +++---
44
net/colo.h | 4 ++-
45
net/filter-mirror.c | 75 +++++++++++++++++++++++++++++++++++++++++----
46
net/filter-rewriter.c | 37 ++++++++++++++++++++++-
47
net/net.c | 37 ++++++++++++++++++++---
48
net/socket.c | 8 ++---
49
qemu-options.hx | 19 ++++++------
50
11 files changed, 265 insertions(+), 48 deletions(-)
51
65
52
66
67
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
This patch change the compare_chr_send() parameter from CharBackend to CompareState,
3
It's done for both in and out descriptors so it's better placed here.
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
5
4
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
5
Acked-by: Jason Wang <jasowang@redhat.com>
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
---
9
net/colo-compare.c | 14 +++++++-------
10
hw/virtio/vhost-shadow-virtqueue.c | 38 +++++++++++++++++++++++++++-----------
10
1 file changed, 7 insertions(+), 7 deletions(-)
11
1 file changed, 27 insertions(+), 11 deletions(-)
11
12
12
diff --git a/net/colo-compare.c b/net/colo-compare.c
13
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/net/colo-compare.c
15
--- a/hw/virtio/vhost-shadow-virtqueue.c
15
+++ b/net/colo-compare.c
16
+++ b/hw/virtio/vhost-shadow-virtqueue.c
16
@@ -XXX,XX +XXX,XX @@ enum {
17
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
17
SECONDARY_IN,
18
return true;
18
};
19
}
19
20
20
-static int compare_chr_send(CharBackend *out,
21
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
21
+static int compare_chr_send(CompareState *s,
22
- const struct iovec *iovec, size_t num,
22
const uint8_t *buf,
23
- bool more_descs, bool write)
23
uint32_t size);
24
+/**
24
25
+ * Write descriptors to SVQ vring
25
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
26
+ *
26
}
27
+ * @svq: The shadow virtqueue
27
28
+ * @sg: Cache for hwaddr
28
if (result) {
29
+ * @iovec: The iovec from the guest
29
- ret = compare_chr_send(&s->chr_out, pkt->data, pkt->size);
30
+ * @num: iovec length
30
+ ret = compare_chr_send(s, pkt->data, pkt->size);
31
+ * @more_descs: True if more descriptors come in the chain
31
if (ret < 0) {
32
+ * @write: True if they are writeable descriptors
32
error_report("colo_send_primary_packet failed");
33
+ *
33
}
34
+ * Return true if success, false otherwise and print error.
34
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
35
+ */
36
+static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
37
+ const struct iovec *iovec, size_t num,
38
+ bool more_descs, bool write)
39
{
40
uint16_t i = svq->free_head, last = svq->free_head;
41
unsigned n;
42
uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
43
vring_desc_t *descs = svq->vring.desc;
44
+ bool ok;
45
46
if (num == 0) {
47
- return;
48
+ return true;
49
+ }
50
+
51
+ ok = vhost_svq_translate_addr(svq, sg, iovec, num);
52
+ if (unlikely(!ok)) {
53
+ return false;
35
}
54
}
55
56
for (n = 0; n < num; n++) {
57
@@ -XXX,XX +XXX,XX @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
58
}
59
60
svq->free_head = le16_to_cpu(svq->desc_next[last]);
61
+ return true;
36
}
62
}
37
63
38
-static int compare_chr_send(CharBackend *out,
64
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
39
+static int compare_chr_send(CompareState *s,
65
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
40
const uint8_t *buf,
66
return false;
41
uint32_t size)
42
{
43
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CharBackend *out,
44
return 0;
45
}
67
}
46
68
47
- ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len));
69
- ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
48
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
70
+ ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
49
if (ret != sizeof(len)) {
71
+ elem->in_num > 0, false);
50
goto err;
72
if (unlikely(!ok)) {
73
return false;
51
}
74
}
52
75
- vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
53
- ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size);
76
- elem->in_num > 0, false);
54
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
77
-
55
if (ret != size) {
78
56
goto err;
79
- ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
80
+ ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false,
81
+ true);
82
if (unlikely(!ok)) {
83
return false;
57
}
84
}
58
@@ -XXX,XX +XXX,XX @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs)
85
59
86
- vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
60
if (packet_enqueue(s, PRIMARY_IN)) {
87
-
61
trace_colo_compare_main("primary: unsupported packet in");
88
/*
62
- compare_chr_send(&s->chr_out, pri_rs->buf, pri_rs->packet_len);
89
* Put the entry in the available array (but don't update avail->idx until
63
+ compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
90
* they do sync).
64
} else {
65
/* compare connection */
66
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
67
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
68
69
while (!g_queue_is_empty(&conn->primary_list)) {
70
pkt = g_queue_pop_head(&conn->primary_list);
71
- compare_chr_send(&s->chr_out, pkt->data, pkt->size);
72
+ compare_chr_send(s, pkt->data, pkt->size);
73
packet_destroy(pkt, NULL);
74
}
75
while (!g_queue_is_empty(&conn->secondary_list)) {
76
--
91
--
77
2.7.4
92
2.7.4
78
93
79
94
diff view generated by jsdifflib
1
Spec said offloads should be le64, so use virtio_ldq_p() to guarantee
1
From: Eugenio Pérez <eperezma@redhat.com>
2
valid endian.
3
2
4
Fixes: 644c98587d4c ("virtio-net: dynamic network offloads configuration")
3
vhost-vdpa control virtqueue needs to know the maximum entries supported
5
Cc: qemu-stable@nongnu.org
4
by the virtio-net device, so we know if it is possible to apply the
6
Cc: Dmitry Fleytman <dfleytma@redhat.com>
5
filter.
6
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
10
---
9
hw/net/virtio-net.c | 2 ++
11
hw/net/virtio-net.c | 1 -
10
1 file changed, 2 insertions(+)
12
include/hw/virtio/virtio-net.h | 3 +++
13
2 files changed, 3 insertions(+), 1 deletion(-)
11
14
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
15
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
17
--- a/hw/net/virtio-net.c
15
+++ b/hw/net/virtio-net.c
18
+++ b/hw/net/virtio-net.c
16
@@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
19
@@ -XXX,XX +XXX,XX @@
17
if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
20
18
uint64_t supported_offloads;
21
#define VIRTIO_NET_VM_VERSION 11
19
22
20
+ offloads = virtio_ldq_p(vdev, &offloads);
23
-#define MAC_TABLE_ENTRIES 64
24
#define MAX_VLAN (1 << 12) /* Per 802.1Q definition */
25
26
/* previously fixed value */
27
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/include/hw/virtio/virtio-net.h
30
+++ b/include/hw/virtio/virtio-net.h
31
@@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET)
32
* and latency. */
33
#define TX_BURST 256
34
35
+/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */
36
+#define MAC_TABLE_ENTRIES 64
21
+
37
+
22
if (!n->has_vnet_hdr) {
38
typedef struct virtio_net_conf
23
return VIRTIO_NET_ERR;
39
{
24
}
40
uint32_t txtimer;
25
--
41
--
26
2.7.4
42
2.7.4
27
43
28
44
diff view generated by jsdifflib
1
From: Michal Privoznik <mprivozn@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We have a function that checks if given number is power of two.
3
This allows external vhost-net devices to modify the state of the
4
We should prefer it instead of expanding the check on our own.
4
VirtIO device model once the vhost-vdpa device has acknowledged the
5
control commands.
5
6
6
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
10
---
9
hw/net/virtio-net.c | 2 +-
11
hw/net/virtio-net.c | 84 ++++++++++++++++++++++++------------------
10
1 file changed, 1 insertion(+), 1 deletion(-)
12
include/hw/virtio/virtio-net.h | 4 ++
13
2 files changed, 53 insertions(+), 35 deletions(-)
11
14
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
15
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
17
--- a/hw/net/virtio-net.c
15
+++ b/hw/net/virtio-net.c
18
+++ b/hw/net/virtio-net.c
16
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
19
@@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd,
17
*/
20
return VIRTIO_NET_OK;
18
if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
21
}
19
n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
22
20
- (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
23
-static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
21
+ !is_power_of_2(n->net_conf.rx_queue_size)) {
24
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
22
error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
25
+ const struct iovec *in_sg, unsigned in_num,
23
"must be a power of 2 between %d and %d.",
26
+ const struct iovec *out_sg,
24
n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
27
+ unsigned out_num)
28
{
29
VirtIONet *n = VIRTIO_NET(vdev);
30
struct virtio_net_ctrl_hdr ctrl;
31
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
32
- VirtQueueElement *elem;
33
size_t s;
34
struct iovec *iov, *iov2;
35
- unsigned int iov_cnt;
36
+
37
+ if (iov_size(in_sg, in_num) < sizeof(status) ||
38
+ iov_size(out_sg, out_num) < sizeof(ctrl)) {
39
+ virtio_error(vdev, "virtio-net ctrl missing headers");
40
+ return 0;
41
+ }
42
+
43
+ iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num);
44
+ s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl));
45
+ iov_discard_front(&iov, &out_num, sizeof(ctrl));
46
+ if (s != sizeof(ctrl)) {
47
+ status = VIRTIO_NET_ERR;
48
+ } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
49
+ status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num);
50
+ } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
51
+ status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num);
52
+ } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
53
+ status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num);
54
+ } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
55
+ status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num);
56
+ } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
57
+ status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num);
58
+ } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
59
+ status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num);
60
+ }
61
+
62
+ s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status));
63
+ assert(s == sizeof(status));
64
+
65
+ g_free(iov2);
66
+ return sizeof(status);
67
+}
68
+
69
+static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq)
70
+{
71
+ VirtQueueElement *elem;
72
73
for (;;) {
74
+ size_t written;
75
elem = virtqueue_pop(vq, sizeof(VirtQueueElement));
76
if (!elem) {
77
break;
78
}
79
- if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) ||
80
- iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) {
81
- virtio_error(vdev, "virtio-net ctrl missing headers");
82
+
83
+ written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num,
84
+ elem->out_sg, elem->out_num);
85
+ if (written > 0) {
86
+ virtqueue_push(vq, elem, written);
87
+ virtio_notify(vdev, vq);
88
+ g_free(elem);
89
+ } else {
90
virtqueue_detach_element(vq, elem, 0);
91
g_free(elem);
92
break;
93
}
94
-
95
- iov_cnt = elem->out_num;
96
- iov2 = iov = g_memdup2(elem->out_sg,
97
- sizeof(struct iovec) * elem->out_num);
98
- s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl));
99
- iov_discard_front(&iov, &iov_cnt, sizeof(ctrl));
100
- if (s != sizeof(ctrl)) {
101
- status = VIRTIO_NET_ERR;
102
- } else if (ctrl.class == VIRTIO_NET_CTRL_RX) {
103
- status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt);
104
- } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) {
105
- status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt);
106
- } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) {
107
- status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt);
108
- } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) {
109
- status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt);
110
- } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) {
111
- status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt);
112
- } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) {
113
- status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt);
114
- }
115
-
116
- s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status));
117
- assert(s == sizeof(status));
118
-
119
- virtqueue_push(vq, elem, sizeof(status));
120
- virtio_notify(vdev, vq);
121
- g_free(iov2);
122
- g_free(elem);
123
}
124
}
125
126
diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h
127
index XXXXXXX..XXXXXXX 100644
128
--- a/include/hw/virtio/virtio-net.h
129
+++ b/include/hw/virtio/virtio-net.h
130
@@ -XXX,XX +XXX,XX @@ struct VirtIONet {
131
struct EBPFRSSContext ebpf_rss;
132
};
133
134
+size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev,
135
+ const struct iovec *in_sg, unsigned in_num,
136
+ const struct iovec *out_sg,
137
+ unsigned out_num);
138
void virtio_net_set_netclient_name(VirtIONet *n, const char *name,
139
const char *type);
140
25
--
141
--
26
2.7.4
142
2.7.4
27
143
28
144
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
In the next patch we will allow busypolling of this value. The compiler
4
have a running path where shadow_used_idx, last_used_idx, and vring used
5
idx are not modified within the same thread busypolling.
6
7
This was not an issue before since we always cleared device event
8
notifier before checking it, and that could act as memory barrier.
9
However, the busypoll needs something similar to kernel READ_ONCE.
10
11
Let's add it here, sepparated from the polling.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
hw/virtio/vhost-shadow-virtqueue.c | 3 ++-
17
1 file changed, 2 insertions(+), 1 deletion(-)
18
19
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/hw/virtio/vhost-shadow-virtqueue.c
23
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n)
24
25
static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
26
{
27
+ uint16_t *used_idx = &svq->vring.used->idx;
28
if (svq->last_used_idx != svq->shadow_used_idx) {
29
return true;
30
}
31
32
- svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
33
+ svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx);
34
35
return svq->last_used_idx != svq->shadow_used_idx;
36
}
37
--
38
2.7.4
39
40
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We add the vnet_hdr_support option for filter-redirector, default is disabled.
3
Future code needs to call it from vhost_svq_add.
4
If you use virtio-net-pci net driver or other driver needs vnet_hdr, please enable it.
5
Because colo-compare or other modules needs the vnet_hdr_len to parse
6
packet, we add this new option send the len to others.
7
You can use it for example:
8
-object filter-redirector,id=r0,netdev=hn0,queue=tx,outdev=red0,vnet_hdr_support
9
4
10
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
5
No functional change intended.
6
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
10
---
13
net/filter-mirror.c | 23 +++++++++++++++++++++++
11
hw/virtio/vhost-shadow-virtqueue.c | 28 ++++++++++++++--------------
14
qemu-options.hx | 6 +++---
12
1 file changed, 14 insertions(+), 14 deletions(-)
15
2 files changed, 26 insertions(+), 3 deletions(-)
16
13
17
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
18
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
19
--- a/net/filter-mirror.c
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
20
+++ b/net/filter-mirror.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
21
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
18
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
22
s->outdev = g_strdup(value);
19
return true;
23
}
20
}
24
21
25
+static bool filter_redirector_get_vnet_hdr(Object *obj, Error **errp)
22
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
26
+{
23
+{
27
+ MirrorState *s = FILTER_REDIRECTOR(obj);
24
+ /*
25
+ * We need to expose the available array entries before checking the used
26
+ * flags
27
+ */
28
+ smp_mb();
29
+ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
30
+ return;
31
+ }
28
+
32
+
29
+ return s->vnet_hdr;
33
+ event_notifier_set(&svq->hdev_kick);
30
+}
34
+}
31
+
35
+
32
+static void filter_redirector_set_vnet_hdr(Object *obj,
36
/**
33
+ bool value,
37
* Add an element to a SVQ.
34
+ Error **errp)
38
*
35
+{
39
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
36
+ MirrorState *s = FILTER_REDIRECTOR(obj);
40
return true;
37
+
38
+ s->vnet_hdr = value;
39
+}
40
+
41
static void filter_mirror_init(Object *obj)
42
{
43
MirrorState *s = FILTER_MIRROR(obj);
44
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_init(Object *obj)
45
46
static void filter_redirector_init(Object *obj)
47
{
48
+ MirrorState *s = FILTER_REDIRECTOR(obj);
49
+
50
object_property_add_str(obj, "indev", filter_redirector_get_indev,
51
filter_redirector_set_indev, NULL);
52
object_property_add_str(obj, "outdev", filter_redirector_get_outdev,
53
filter_redirector_set_outdev, NULL);
54
+
55
+ s->vnet_hdr = false;
56
+ object_property_add_bool(obj, "vnet_hdr_support",
57
+ filter_redirector_get_vnet_hdr,
58
+ filter_redirector_set_vnet_hdr, NULL);
59
}
41
}
60
42
61
static void filter_mirror_fini(Object *obj)
43
-static void vhost_svq_kick(VhostShadowVirtqueue *svq)
62
diff --git a/qemu-options.hx b/qemu-options.hx
44
-{
63
index XXXXXXX..XXXXXXX 100644
45
- /*
64
--- a/qemu-options.hx
46
- * We need to expose the available array entries before checking the used
65
+++ b/qemu-options.hx
47
- * flags
66
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
48
- */
67
49
- smp_mb();
68
filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
50
- if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
69
51
- return;
70
-@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
52
- }
71
-outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
53
-
72
+@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
54
- event_notifier_set(&svq->hdev_kick);
73
55
-}
74
filter-redirector on netdev @var{netdevid},redirect filter's net packet to chardev
56
-
75
-@var{chardevid},and redirect indev's packet to filter.
57
/**
76
+@var{chardevid},and redirect indev's packet to filter.if it has the vnet_hdr_support flag,
58
* Forward available buffers.
77
+filter-redirector will redirect packet with vnet_hdr_len.
59
*
78
Create a filter-redirector we need to differ outdev id from indev id, id can not
79
be the same. we can just use indev or outdev, but at least one of indev or outdev
80
need to be specified.
81
--
60
--
82
2.7.4
61
2.7.4
83
62
84
63
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
The series needs to expose vhost_svq_add with full functionality,
4
including kick
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 2 +-
11
1 file changed, 1 insertion(+), 1 deletion(-)
12
13
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/virtio/vhost-shadow-virtqueue.c
16
+++ b/hw/virtio/vhost-shadow-virtqueue.c
17
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
18
}
19
20
svq->ring_id_maps[qemu_head] = elem;
21
+ vhost_svq_kick(svq);
22
return true;
23
}
24
25
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
26
/* VQ is broken, just return and ignore any other kicks */
27
return;
28
}
29
- vhost_svq_kick(svq);
30
}
31
32
virtio_queue_set_notification(svq->vq, true);
33
--
34
2.7.4
35
36
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
The series need to expose vhost_svq_add with full functionality,
4
including checking for full queue.
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 59 +++++++++++++++++++++-----------------
11
1 file changed, 33 insertions(+), 26 deletions(-)
12
13
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/virtio/vhost-shadow-virtqueue.c
16
+++ b/hw/virtio/vhost-shadow-virtqueue.c
17
@@ -XXX,XX +XXX,XX @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
18
* Add an element to a SVQ.
19
*
20
* The caller must check that there is enough slots for the new element. It
21
- * takes ownership of the element: In case of failure, it is free and the SVQ
22
- * is considered broken.
23
+ * takes ownership of the element: In case of failure not ENOSPC, it is free.
24
+ *
25
+ * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
26
*/
27
-static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
28
+static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
29
{
30
unsigned qemu_head;
31
- bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
32
+ unsigned ndescs = elem->in_num + elem->out_num;
33
+ bool ok;
34
+
35
+ if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
36
+ return -ENOSPC;
37
+ }
38
+
39
+ ok = vhost_svq_add_split(svq, elem, &qemu_head);
40
if (unlikely(!ok)) {
41
g_free(elem);
42
- return false;
43
+ return -EINVAL;
44
}
45
46
svq->ring_id_maps[qemu_head] = elem;
47
vhost_svq_kick(svq);
48
- return true;
49
+ return 0;
50
}
51
52
/**
53
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
54
55
while (true) {
56
VirtQueueElement *elem;
57
- bool ok;
58
+ int r;
59
60
if (svq->next_guest_avail_elem) {
61
elem = g_steal_pointer(&svq->next_guest_avail_elem);
62
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
63
break;
64
}
65
66
- if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) {
67
- /*
68
- * This condition is possible since a contiguous buffer in GPA
69
- * does not imply a contiguous buffer in qemu's VA
70
- * scatter-gather segments. If that happens, the buffer exposed
71
- * to the device needs to be a chain of descriptors at this
72
- * moment.
73
- *
74
- * SVQ cannot hold more available buffers if we are here:
75
- * queue the current guest descriptor and ignore further kicks
76
- * until some elements are used.
77
- */
78
- svq->next_guest_avail_elem = elem;
79
- return;
80
- }
81
-
82
- ok = vhost_svq_add(svq, elem);
83
- if (unlikely(!ok)) {
84
- /* VQ is broken, just return and ignore any other kicks */
85
+ r = vhost_svq_add(svq, elem);
86
+ if (unlikely(r != 0)) {
87
+ if (r == -ENOSPC) {
88
+ /*
89
+ * This condition is possible since a contiguous buffer in
90
+ * GPA does not imply a contiguous buffer in qemu's VA
91
+ * scatter-gather segments. If that happens, the buffer
92
+ * exposed to the device needs to be a chain of descriptors
93
+ * at this moment.
94
+ *
95
+ * SVQ cannot hold more available buffers if we are here:
96
+ * queue the current guest descriptor and ignore kicks
97
+ * until some elements are used.
98
+ */
99
+ svq->next_guest_avail_elem = elem;
100
+ }
101
+
102
+ /* VQ is full or broken, just return and ignore kicks */
103
return;
104
}
105
}
106
--
107
2.7.4
108
109
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We add the vnet_hdr_support option for filter-mirror, default is disabled.
3
VirtQueueElement comes from the guest, but we're heading SVQ to be able
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
to modify the element presented to the device without the guest's
5
You can use it for example:
5
knowledge.
6
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
7
6
8
If it has vnet_hdr_support flag, we will change the sending packet format from
7
To do so, make SVQ accept sg buffers directly, instead of using
9
struct {int size; const uint8_t buf[];} to {int size; int vnet_hdr_len; const uint8_t buf[];}.
8
VirtQueueElement.
10
make other module(like colo-compare) know how to parse net packet correctly.
11
9
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
10
Add vhost_svq_add_element to maintain element convenience.
11
12
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
Acked-by: Jason Wang <jasowang@redhat.com>
14
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
16
---
15
net/filter-mirror.c | 42 +++++++++++++++++++++++++++++++++++++++++-
17
hw/virtio/vhost-shadow-virtqueue.c | 33 ++++++++++++++++++++++-----------
16
qemu-options.hx | 5 ++---
18
1 file changed, 22 insertions(+), 11 deletions(-)
17
2 files changed, 43 insertions(+), 4 deletions(-)
18
19
19
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
20
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
20
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
21
--- a/net/filter-mirror.c
22
--- a/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/net/filter-mirror.c
23
+++ b/hw/virtio/vhost-shadow-virtqueue.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
24
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
24
CharBackend chr_in;
25
}
25
CharBackend chr_out;
26
26
SocketReadState rs;
27
static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
27
+ bool vnet_hdr;
28
- VirtQueueElement *elem, unsigned *head)
28
} MirrorState;
29
+ const struct iovec *out_sg, size_t out_num,
29
30
+ const struct iovec *in_sg, size_t in_num,
30
static int filter_send(MirrorState *s,
31
+ unsigned *head)
31
const struct iovec *iov,
32
int iovcnt)
33
{
32
{
34
+ NetFilterState *nf = NETFILTER(s);
33
unsigned avail_idx;
35
int ret = 0;
34
vring_avail_t *avail = svq->vring.avail;
36
ssize_t size = 0;
35
bool ok;
37
uint32_t len = 0;
36
- g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
38
@@ -XXX,XX +XXX,XX @@ static int filter_send(MirrorState *s,
37
+ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num));
39
goto err;
38
39
*head = svq->free_head;
40
41
/* We need some descriptors here */
42
- if (unlikely(!elem->out_num && !elem->in_num)) {
43
+ if (unlikely(!out_num && !in_num)) {
44
qemu_log_mask(LOG_GUEST_ERROR,
45
"Guest provided element with no descriptors");
46
return false;
40
}
47
}
41
48
42
+ if (s->vnet_hdr) {
49
- ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
43
+ /*
50
- elem->in_num > 0, false);
44
+ * If vnet_hdr = on, we send vnet header len to make other
51
+ ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0,
45
+ * module(like colo-compare) know how to parse net
52
+ false);
46
+ * packet correctly.
53
if (unlikely(!ok)) {
47
+ */
54
return false;
48
+ ssize_t vnet_hdr_len;
49
+
50
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
51
+
52
+ len = htonl(vnet_hdr_len);
53
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
54
+ if (ret != sizeof(len)) {
55
+ goto err;
56
+ }
57
+ }
58
+
59
buf = g_malloc(size);
60
iov_to_buf(iov, iovcnt, 0, buf, size);
61
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
62
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
63
}
64
}
55
}
65
56
66
- net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
57
- ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false,
67
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr);
58
- true);
68
59
+ ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true);
69
if (s->indev) {
60
if (unlikely(!ok)) {
70
chr = qemu_chr_find(s->indev);
61
return false;
71
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_set_outdev(Object *obj,
72
}
62
}
63
@@ -XXX,XX +XXX,XX @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
64
*
65
* Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
66
*/
67
-static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
68
+static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
69
+ size_t out_num, const struct iovec *in_sg,
70
+ size_t in_num, VirtQueueElement *elem)
71
{
72
unsigned qemu_head;
73
- unsigned ndescs = elem->in_num + elem->out_num;
74
+ unsigned ndescs = in_num + out_num;
75
bool ok;
76
77
if (unlikely(ndescs > vhost_svq_available_slots(svq))) {
78
return -ENOSPC;
79
}
80
81
- ok = vhost_svq_add_split(svq, elem, &qemu_head);
82
+ ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head);
83
if (unlikely(!ok)) {
84
g_free(elem);
85
return -EINVAL;
86
@@ -XXX,XX +XXX,XX @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
87
return 0;
73
}
88
}
74
89
75
+static bool filter_mirror_get_vnet_hdr(Object *obj, Error **errp)
90
+/* Convenience wrapper to add a guest's element to SVQ */
91
+static int vhost_svq_add_element(VhostShadowVirtqueue *svq,
92
+ VirtQueueElement *elem)
76
+{
93
+{
77
+ MirrorState *s = FILTER_MIRROR(obj);
94
+ return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg,
78
+
95
+ elem->in_num, elem);
79
+ return s->vnet_hdr;
80
+}
96
+}
81
+
97
+
82
+static void filter_mirror_set_vnet_hdr(Object *obj, bool value, Error **errp)
98
/**
83
+{
99
* Forward available buffers.
84
+ MirrorState *s = FILTER_MIRROR(obj);
100
*
85
+
101
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
86
+ s->vnet_hdr = value;
102
break;
87
+}
103
}
88
+
104
89
static char *filter_redirector_get_outdev(Object *obj, Error **errp)
105
- r = vhost_svq_add(svq, elem);
90
{
106
+ r = vhost_svq_add_element(svq, elem);
91
MirrorState *s = FILTER_REDIRECTOR(obj);
107
if (unlikely(r != 0)) {
92
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
108
if (r == -ENOSPC) {
93
109
/*
94
static void filter_mirror_init(Object *obj)
95
{
96
+ MirrorState *s = FILTER_MIRROR(obj);
97
+
98
object_property_add_str(obj, "outdev", filter_mirror_get_outdev,
99
filter_mirror_set_outdev, NULL);
100
+
101
+ s->vnet_hdr = false;
102
+ object_property_add_bool(obj, "vnet_hdr_support",
103
+ filter_mirror_get_vnet_hdr,
104
+ filter_mirror_set_vnet_hdr, NULL);
105
}
106
107
static void filter_redirector_init(Object *obj)
108
diff --git a/qemu-options.hx b/qemu-options.hx
109
index XXXXXXX..XXXXXXX 100644
110
--- a/qemu-options.hx
111
+++ b/qemu-options.hx
112
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
113
@option{tx}: the filter is attached to the transmit queue of the netdev,
114
where it will receive packets sent by the netdev.
115
116
-@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
117
+@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
118
119
-filter-mirror on netdev @var{netdevid},mirror net packet to chardev
120
-@var{chardevid}
121
+filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
122
123
@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
124
outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
125
--
110
--
126
2.7.4
111
2.7.4
127
112
128
113
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This will allow SVQ to add context to the different queue elements.
4
5
This patch only store the actual element, no functional change intended.
6
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++--------
12
hw/virtio/vhost-shadow-virtqueue.h | 8 ++++++--
13
2 files changed, 14 insertions(+), 10 deletions(-)
14
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
19
@@ -XXX,XX +XXX,XX @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
20
return -EINVAL;
21
}
22
23
- svq->ring_id_maps[qemu_head] = elem;
24
+ svq->desc_state[qemu_head].elem = elem;
25
vhost_svq_kick(svq);
26
return 0;
27
}
28
@@ -XXX,XX +XXX,XX @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
29
return NULL;
30
}
31
32
- if (unlikely(!svq->ring_id_maps[used_elem.id])) {
33
+ if (unlikely(!svq->desc_state[used_elem.id].elem)) {
34
qemu_log_mask(LOG_GUEST_ERROR,
35
"Device %s says index %u is used, but it was not available",
36
svq->vdev->name, used_elem.id);
37
return NULL;
38
}
39
40
- num = svq->ring_id_maps[used_elem.id]->in_num +
41
- svq->ring_id_maps[used_elem.id]->out_num;
42
+ num = svq->desc_state[used_elem.id].elem->in_num +
43
+ svq->desc_state[used_elem.id].elem->out_num;
44
last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
45
svq->desc_next[last_used_chain] = svq->free_head;
46
svq->free_head = used_elem.id;
47
48
*len = used_elem.len;
49
- return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
50
+ return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
51
}
52
53
static void vhost_svq_flush(VhostShadowVirtqueue *svq,
54
@@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
55
memset(svq->vring.desc, 0, driver_size);
56
svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size);
57
memset(svq->vring.used, 0, device_size);
58
- svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
59
+ svq->desc_state = g_new0(SVQDescState, svq->vring.num);
60
svq->desc_next = g_new0(uint16_t, svq->vring.num);
61
for (unsigned i = 0; i < svq->vring.num - 1; i++) {
62
svq->desc_next[i] = cpu_to_le16(i + 1);
63
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
64
65
for (unsigned i = 0; i < svq->vring.num; ++i) {
66
g_autofree VirtQueueElement *elem = NULL;
67
- elem = g_steal_pointer(&svq->ring_id_maps[i]);
68
+ elem = g_steal_pointer(&svq->desc_state[i].elem);
69
if (elem) {
70
virtqueue_detach_element(svq->vq, elem, 0);
71
}
72
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
73
}
74
svq->vq = NULL;
75
g_free(svq->desc_next);
76
- g_free(svq->ring_id_maps);
77
+ g_free(svq->desc_state);
78
qemu_vfree(svq->vring.desc);
79
qemu_vfree(svq->vring.used);
80
}
81
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/hw/virtio/vhost-shadow-virtqueue.h
84
+++ b/hw/virtio/vhost-shadow-virtqueue.h
85
@@ -XXX,XX +XXX,XX @@
86
#include "standard-headers/linux/vhost_types.h"
87
#include "hw/virtio/vhost-iova-tree.h"
88
89
+typedef struct SVQDescState {
90
+ VirtQueueElement *elem;
91
+} SVQDescState;
92
+
93
/* Shadow virtqueue to relay notifications */
94
typedef struct VhostShadowVirtqueue {
95
/* Shadow vring */
96
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
97
/* IOVA mapping */
98
VhostIOVATree *iova_tree;
99
100
- /* Map for use the guest's descriptors */
101
- VirtQueueElement **ring_id_maps;
102
+ /* SVQ vring descriptors state */
103
+ SVQDescState *desc_state;
104
105
/* Next VirtQueue element that guest made available */
106
VirtQueueElement *next_guest_avail_elem;
107
--
108
2.7.4
109
110
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
This patch change the filter_send() parameter from CharBackend to MirrorState,
3
A guest's buffer continuos on GPA may need multiple descriptors on
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
4
qemu's VA, so SVQ should track its length sepparatedly.
5
5
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
---
9
net/filter-mirror.c | 10 +++++-----
10
hw/virtio/vhost-shadow-virtqueue.c | 4 ++--
10
1 file changed, 5 insertions(+), 5 deletions(-)
11
hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++
12
2 files changed, 8 insertions(+), 2 deletions(-)
11
13
12
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
13
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
14
--- a/net/filter-mirror.c
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
15
+++ b/net/filter-mirror.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
18
@@ -XXX,XX +XXX,XX @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
17
SocketReadState rs;
18
} MirrorState;
19
20
-static int filter_send(CharBackend *chr_out,
21
+static int filter_send(MirrorState *s,
22
const struct iovec *iov,
23
int iovcnt)
24
{
25
@@ -XXX,XX +XXX,XX @@ static int filter_send(CharBackend *chr_out,
26
}
19
}
27
20
28
len = htonl(size);
21
svq->desc_state[qemu_head].elem = elem;
29
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)&len, sizeof(len));
22
+ svq->desc_state[qemu_head].ndescs = ndescs;
30
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
23
vhost_svq_kick(svq);
31
if (ret != sizeof(len)) {
24
return 0;
32
goto err;
25
}
26
@@ -XXX,XX +XXX,XX @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
27
return NULL;
33
}
28
}
34
29
35
buf = g_malloc(size);
30
- num = svq->desc_state[used_elem.id].elem->in_num +
36
iov_to_buf(iov, iovcnt, 0, buf, size);
31
- svq->desc_state[used_elem.id].elem->out_num;
37
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)buf, size);
32
+ num = svq->desc_state[used_elem.id].ndescs;
38
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
33
last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id);
39
g_free(buf);
34
svq->desc_next[last_used_chain] = svq->free_head;
40
if (ret != size) {
35
svq->free_head = used_elem.id;
41
goto err;
36
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
42
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
37
index XXXXXXX..XXXXXXX 100644
43
MirrorState *s = FILTER_MIRROR(nf);
38
--- a/hw/virtio/vhost-shadow-virtqueue.h
44
int ret;
39
+++ b/hw/virtio/vhost-shadow-virtqueue.h
45
40
@@ -XXX,XX +XXX,XX @@
46
- ret = filter_send(&s->chr_out, iov, iovcnt);
41
47
+ ret = filter_send(s, iov, iovcnt);
42
typedef struct SVQDescState {
48
if (ret) {
43
VirtQueueElement *elem;
49
error_report("filter mirror send failed(%s)", strerror(-ret));
44
+
50
}
45
+ /*
51
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_redirector_receive_iov(NetFilterState *nf,
46
+ * Number of descriptors exposed to the device. May or may not match
52
int ret;
47
+ * guest's
53
48
+ */
54
if (qemu_chr_fe_backend_connected(&s->chr_out)) {
49
+ unsigned int ndescs;
55
- ret = filter_send(&s->chr_out, iov, iovcnt);
50
} SVQDescState;
56
+ ret = filter_send(s, iov, iovcnt);
51
57
if (ret) {
52
/* Shadow virtqueue to relay notifications */
58
error_report("filter redirector send failed(%s)", strerror(-ret));
59
}
60
--
53
--
61
2.7.4
54
2.7.4
62
55
63
56
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This function allows external SVQ users to return guest's available
4
buffers.
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++++++++++
11
hw/virtio/vhost-shadow-virtqueue.h | 3 +++
12
2 files changed, 19 insertions(+)
13
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
@@ -XXX,XX +XXX,XX @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
19
return g_steal_pointer(&svq->desc_state[used_elem.id].elem);
20
}
21
22
+/**
23
+ * Push an element to SVQ, returning it to the guest.
24
+ */
25
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
26
+ const VirtQueueElement *elem, uint32_t len)
27
+{
28
+ virtqueue_push(svq->vq, elem, len);
29
+ if (svq->next_guest_avail_elem) {
30
+ /*
31
+ * Avail ring was full when vhost_svq_flush was called, so it's a
32
+ * good moment to make more descriptors available if possible.
33
+ */
34
+ vhost_handle_guest_kick(svq);
35
+ }
36
+}
37
+
38
static void vhost_svq_flush(VhostShadowVirtqueue *svq,
39
bool check_for_avail_queue)
40
{
41
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/virtio/vhost-shadow-virtqueue.h
44
+++ b/hw/virtio/vhost-shadow-virtqueue.h
45
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
46
47
bool vhost_svq_valid_features(uint64_t features, Error **errp);
48
49
+void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
50
+ const VirtQueueElement *elem, uint32_t len);
51
+
52
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
53
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
54
void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
55
--
56
2.7.4
57
58
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This allows external parts of SVQ to forward custom buffers to the
4
device.
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 6 +++---
11
hw/virtio/vhost-shadow-virtqueue.h | 3 +++
12
2 files changed, 6 insertions(+), 3 deletions(-)
13
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
@@ -XXX,XX +XXX,XX @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq)
19
*
20
* Return -EINVAL if element is invalid, -ENOSPC if dev queue is full
21
*/
22
-static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
23
- size_t out_num, const struct iovec *in_sg,
24
- size_t in_num, VirtQueueElement *elem)
25
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
26
+ size_t out_num, const struct iovec *in_sg, size_t in_num,
27
+ VirtQueueElement *elem)
28
{
29
unsigned qemu_head;
30
unsigned ndescs = in_num + out_num;
31
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
32
index XXXXXXX..XXXXXXX 100644
33
--- a/hw/virtio/vhost-shadow-virtqueue.h
34
+++ b/hw/virtio/vhost-shadow-virtqueue.h
35
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp);
36
37
void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
38
const VirtQueueElement *elem, uint32_t len);
39
+int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
40
+ size_t out_num, const struct iovec *in_sg, size_t in_num,
41
+ VirtQueueElement *elem);
42
43
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
44
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
45
--
46
2.7.4
47
48
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
It allows the Shadow Control VirtQueue to wait for the device to use the
4
available buffers.
5
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/virtio/vhost-shadow-virtqueue.c | 27 +++++++++++++++++++++++++++
11
hw/virtio/vhost-shadow-virtqueue.h | 1 +
12
2 files changed, 28 insertions(+)
13
14
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/virtio/vhost-shadow-virtqueue.c
17
+++ b/hw/virtio/vhost-shadow-virtqueue.c
18
@@ -XXX,XX +XXX,XX @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq,
19
}
20
21
/**
22
+ * Poll the SVQ for one device used buffer.
23
+ *
24
+ * This function race with main event loop SVQ polling, so extra
25
+ * synchronization is needed.
26
+ *
27
+ * Return the length written by the device.
28
+ */
29
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq)
30
+{
31
+ int64_t start_us = g_get_monotonic_time();
32
+ do {
33
+ uint32_t len;
34
+ VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
35
+ if (elem) {
36
+ return len;
37
+ }
38
+
39
+ if (unlikely(g_get_monotonic_time() - start_us > 10e6)) {
40
+ return 0;
41
+ }
42
+
43
+ /* Make sure we read new used_idx */
44
+ smp_rmb();
45
+ } while (true);
46
+}
47
+
48
+/**
49
* Forward used buffers.
50
*
51
* @n: hdev call event notifier, the one that device set to notify svq.
52
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/hw/virtio/vhost-shadow-virtqueue.h
55
+++ b/hw/virtio/vhost-shadow-virtqueue.h
56
@@ -XXX,XX +XXX,XX @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq,
57
int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg,
58
size_t out_num, const struct iovec *in_sg, size_t in_num,
59
VirtQueueElement *elem);
60
+size_t vhost_svq_poll(VhostShadowVirtqueue *svq);
61
62
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
63
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
64
--
65
2.7.4
66
67
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
This allows external handlers to be aware of new buffers that the guest
4
places in the virtqueue.
5
6
When this callback is defined the ownership of the guest's virtqueue
7
element is transferred to the callback. This means that if the user
8
wants to forward the descriptor it needs to manually inject it. The
9
callback is also free to process the command by itself and use the
10
element with svq_push.
11
12
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
13
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
hw/virtio/vhost-shadow-virtqueue.c | 14 ++++++++++++--
17
hw/virtio/vhost-shadow-virtqueue.h | 31 ++++++++++++++++++++++++++++++-
18
hw/virtio/vhost-vdpa.c | 3 ++-
19
3 files changed, 44 insertions(+), 4 deletions(-)
20
21
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/virtio/vhost-shadow-virtqueue.c
24
+++ b/hw/virtio/vhost-shadow-virtqueue.c
25
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
26
break;
27
}
28
29
- r = vhost_svq_add_element(svq, elem);
30
+ if (svq->ops) {
31
+ r = svq->ops->avail_handler(svq, elem, svq->ops_opaque);
32
+ } else {
33
+ r = vhost_svq_add_element(svq, elem);
34
+ }
35
if (unlikely(r != 0)) {
36
if (r == -ENOSPC) {
37
/*
38
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
39
* shadow methods and file descriptors.
40
*
41
* @iova_tree: Tree to perform descriptors translations
42
+ * @ops: SVQ owner callbacks
43
+ * @ops_opaque: ops opaque pointer
44
*
45
* Returns the new virtqueue or NULL.
46
*
47
* In case of error, reason is reported through error_report.
48
*/
49
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
50
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
51
+ const VhostShadowVirtqueueOps *ops,
52
+ void *ops_opaque)
53
{
54
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
55
int r;
56
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
57
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
58
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
59
svq->iova_tree = iova_tree;
60
+ svq->ops = ops;
61
+ svq->ops_opaque = ops_opaque;
62
return g_steal_pointer(&svq);
63
64
err_init_hdev_call:
65
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
66
index XXXXXXX..XXXXXXX 100644
67
--- a/hw/virtio/vhost-shadow-virtqueue.h
68
+++ b/hw/virtio/vhost-shadow-virtqueue.h
69
@@ -XXX,XX +XXX,XX @@ typedef struct SVQDescState {
70
unsigned int ndescs;
71
} SVQDescState;
72
73
+typedef struct VhostShadowVirtqueue VhostShadowVirtqueue;
74
+
75
+/**
76
+ * Callback to handle an avail buffer.
77
+ *
78
+ * @svq: Shadow virtqueue
79
+ * @elem: Element placed in the queue by the guest
80
+ * @vq_callback_opaque: Opaque
81
+ *
82
+ * Returns 0 if the vq is running as expected.
83
+ *
84
+ * Note that ownership of elem is transferred to the callback.
85
+ */
86
+typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq,
87
+ VirtQueueElement *elem,
88
+ void *vq_callback_opaque);
89
+
90
+typedef struct VhostShadowVirtqueueOps {
91
+ VirtQueueAvailCallback avail_handler;
92
+} VhostShadowVirtqueueOps;
93
+
94
/* Shadow virtqueue to relay notifications */
95
typedef struct VhostShadowVirtqueue {
96
/* Shadow vring */
97
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
98
*/
99
uint16_t *desc_next;
100
101
+ /* Caller callbacks */
102
+ const VhostShadowVirtqueueOps *ops;
103
+
104
+ /* Caller callbacks opaque */
105
+ void *ops_opaque;
106
+
107
/* Next head to expose to the device */
108
uint16_t shadow_avail_idx;
109
110
@@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
111
VirtQueue *vq);
112
void vhost_svq_stop(VhostShadowVirtqueue *svq);
113
114
-VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
115
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree,
116
+ const VhostShadowVirtqueueOps *ops,
117
+ void *ops_opaque);
118
119
void vhost_svq_free(gpointer vq);
120
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
121
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
122
index XXXXXXX..XXXXXXX 100644
123
--- a/hw/virtio/vhost-vdpa.c
124
+++ b/hw/virtio/vhost-vdpa.c
125
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
126
127
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
128
for (unsigned n = 0; n < hdev->nvqs; ++n) {
129
- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
130
+ g_autoptr(VhostShadowVirtqueue) svq;
131
132
+ svq = vhost_svq_new(v->iova_tree, NULL, NULL);
133
if (unlikely(!svq)) {
134
error_setg(errp, "Cannot create svq %u", n);
135
return -1;
136
--
137
2.7.4
138
139
diff view generated by jsdifflib
New patch
1
From: Eugenio Pérez <eperezma@redhat.com>
1
2
3
Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from
4
the guest that could set a different state in qemu device model and vdpa
5
device.
6
7
To do so, it needs to be able to map these new buffers to the device.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Jason Wang <jasowang@redhat.com>
11
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/virtio/vhost-vdpa.c | 7 +++----
15
include/hw/virtio/vhost-vdpa.h | 4 ++++
16
2 files changed, 7 insertions(+), 4 deletions(-)
17
18
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/virtio/vhost-vdpa.c
21
+++ b/hw/virtio/vhost-vdpa.c
22
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section,
23
return false;
24
}
25
26
-static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
27
- void *vaddr, bool readonly)
28
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
29
+ void *vaddr, bool readonly)
30
{
31
struct vhost_msg_v2 msg = {};
32
int fd = v->device_fd;
33
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
34
return ret;
35
}
36
37
-static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova,
38
- hwaddr size)
39
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size)
40
{
41
struct vhost_msg_v2 msg = {};
42
int fd = v->device_fd;
43
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
44
index XXXXXXX..XXXXXXX 100644
45
--- a/include/hw/virtio/vhost-vdpa.h
46
+++ b/include/hw/virtio/vhost-vdpa.h
47
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
48
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
49
} VhostVDPA;
50
51
+int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size,
52
+ void *vaddr, bool readonly);
53
+int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size);
54
+
55
#endif
56
--
57
2.7.4
58
59
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
3
net/vhost-vdpa.c will need functions that are declared in
4
vhost-shadow-virtqueue.c, that needs functions of virtio-net.c.
5
6
Copy the vhost-vdpa-stub.c code so
7
only the constructor net_init_vhost_vdpa needs to be defined.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
11
---
6
docs/colo-proxy.txt | 26 ++++++++++++++++++++++++++
12
net/meson.build | 3 ++-
7
1 file changed, 26 insertions(+)
13
net/vhost-vdpa-stub.c | 21 +++++++++++++++++++++
14
2 files changed, 23 insertions(+), 1 deletion(-)
15
create mode 100644 net/vhost-vdpa-stub.c
8
16
9
diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt
17
diff --git a/net/meson.build b/net/meson.build
10
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
11
--- a/docs/colo-proxy.txt
19
--- a/net/meson.build
12
+++ b/docs/colo-proxy.txt
20
+++ b/net/meson.build
13
@@ -XXX,XX +XXX,XX @@ Secondary(ip:3.3.3.8):
21
@@ -XXX,XX +XXX,XX @@ endif
14
-chardev socket,id=red1,host=3.3.3.3,port=9004
22
softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix))
15
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
23
softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c'))
16
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
24
if have_vhost_net_vdpa
17
+-object filter-rewriter,id=f3,netdev=hn0,queue=all
25
- softmmu_ss.add(files('vhost-vdpa.c'))
26
+ softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-vdpa.c'), if_false: files('vhost-vdpa-stub.c'))
27
+ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-vdpa-stub.c'))
28
endif
29
30
vmnet_files = files(
31
diff --git a/net/vhost-vdpa-stub.c b/net/vhost-vdpa-stub.c
32
new file mode 100644
33
index XXXXXXX..XXXXXXX
34
--- /dev/null
35
+++ b/net/vhost-vdpa-stub.c
36
@@ -XXX,XX +XXX,XX @@
37
+/*
38
+ * vhost-vdpa-stub.c
39
+ *
40
+ * Copyright (c) 2022 Red Hat, Inc.
41
+ *
42
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
43
+ * See the COPYING file in the top-level directory.
44
+ *
45
+ */
18
+
46
+
19
+If you want to use virtio-net-pci or other driver with vnet_header:
47
+#include "qemu/osdep.h"
48
+#include "clients.h"
49
+#include "net/vhost-vdpa.h"
50
+#include "qapi/error.h"
20
+
51
+
21
+Primary(ip:3.3.3.3):
52
+int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
22
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
53
+ NetClientState *peer, Error **errp)
23
+-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
54
+{
24
+-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
55
+ error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*");
25
+-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
56
+ return -1;
26
+-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
57
+}
27
+-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
28
+-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
29
+-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
30
+-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
31
+-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out,vnet_hdr_support
32
+-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0,vnet_hdr_support
33
+-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
34
+
35
+Secondary(ip:3.3.3.8):
36
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
37
+-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
38
+-chardev socket,id=red0,host=3.3.3.3,port=9003
39
+-chardev socket,id=red1,host=3.3.3.3,port=9004
40
+-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0,vnet_hdr_support
41
+-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1,vnet_hdr_support
42
+-object filter-rewriter,id=f3,netdev=hn0,queue=all,vnet_hdr_support
43
44
Note:
45
a.COLO-proxy must work with COLO-frame and Block-replication.
46
--
58
--
47
2.7.4
59
2.7.4
48
60
49
61
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We add the vnet_hdr_support option for filter-rewriter, default is disabled.
3
Do a simple forwarding of CVQ buffers, the same work SVQ could do but
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
through callbacks. No functional change intended.
5
You can use it for example:
6
-object filter-rewriter,id=rew0,netdev=hn0,queue=all,vnet_hdr_support
7
5
8
We get the vnet_hdr_len from NetClientState that make us
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
9
parse net packet correctly.
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
10
11
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
9
---
14
net/filter-rewriter.c | 37 ++++++++++++++++++++++++++++++++++++-
10
hw/virtio/vhost-vdpa.c | 3 ++-
15
qemu-options.hx | 4 ++--
11
include/hw/virtio/vhost-vdpa.h | 3 +++
16
2 files changed, 38 insertions(+), 3 deletions(-)
12
net/vhost-vdpa.c | 58 ++++++++++++++++++++++++++++++++++++++++++
13
3 files changed, 63 insertions(+), 1 deletion(-)
17
14
18
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
15
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
19
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
20
--- a/net/filter-rewriter.c
17
--- a/hw/virtio/vhost-vdpa.c
21
+++ b/net/filter-rewriter.c
18
+++ b/hw/virtio/vhost-vdpa.c
19
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
20
for (unsigned n = 0; n < hdev->nvqs; ++n) {
21
g_autoptr(VhostShadowVirtqueue) svq;
22
23
- svq = vhost_svq_new(v->iova_tree, NULL, NULL);
24
+ svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops,
25
+ v->shadow_vq_ops_opaque);
26
if (unlikely(!svq)) {
27
error_setg(errp, "Cannot create svq %u", n);
28
return -1;
29
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/include/hw/virtio/vhost-vdpa.h
32
+++ b/include/hw/virtio/vhost-vdpa.h
22
@@ -XXX,XX +XXX,XX @@
33
@@ -XXX,XX +XXX,XX @@
23
#include "qemu-common.h"
34
#include <gmodule.h>
35
36
#include "hw/virtio/vhost-iova-tree.h"
37
+#include "hw/virtio/vhost-shadow-virtqueue.h"
38
#include "hw/virtio/virtio.h"
39
#include "standard-headers/linux/vhost_types.h"
40
41
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
42
/* IOVA mapping used by the Shadow Virtqueue */
43
VhostIOVATree *iova_tree;
44
GPtrArray *shadow_vqs;
45
+ const VhostShadowVirtqueueOps *shadow_vq_ops;
46
+ void *shadow_vq_ops_opaque;
47
struct vhost_dev *dev;
48
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
49
} VhostVDPA;
50
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/net/vhost-vdpa.c
53
+++ b/net/vhost-vdpa.c
54
@@ -XXX,XX +XXX,XX @@
55
56
#include "qemu/osdep.h"
57
#include "clients.h"
58
+#include "hw/virtio/virtio-net.h"
59
#include "net/vhost_net.h"
60
#include "net/vhost-vdpa.h"
61
#include "hw/virtio/vhost-vdpa.h"
62
#include "qemu/config-file.h"
63
#include "qemu/error-report.h"
64
+#include "qemu/log.h"
65
+#include "qemu/memalign.h"
66
#include "qemu/option.h"
24
#include "qapi/error.h"
67
#include "qapi/error.h"
25
#include "qapi/qmp/qerror.h"
68
#include <linux/vhost.h>
26
+#include "qemu/error-report.h"
69
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_vhost_vdpa_info = {
27
#include "qapi-visit.h"
70
.check_peer_type = vhost_vdpa_check_peer_type,
28
#include "qom/object.h"
71
};
29
#include "qemu/main-loop.h"
72
30
@@ -XXX,XX +XXX,XX @@ typedef struct RewriterState {
73
+/**
31
NetQueue *incoming_queue;
74
+ * Forward buffer for the moment.
32
/* hashtable to save connection */
75
+ */
33
GHashTable *connection_track_table;
76
+static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
34
+ bool vnet_hdr;
77
+ VirtQueueElement *elem,
35
} RewriterState;
78
+ void *opaque)
36
79
+{
37
static void filter_rewriter_flush(NetFilterState *nf)
80
+ unsigned int n = elem->out_num + elem->in_num;
38
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
81
+ g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
39
ConnectionKey key;
82
+ size_t in_len, dev_written;
40
Packet *pkt;
83
+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
41
ssize_t size = iov_size(iov, iovcnt);
84
+ int r;
42
+ ssize_t vnet_hdr_len = 0;
43
char *buf = g_malloc0(size);
44
45
iov_to_buf(iov, iovcnt, 0, buf, size);
46
- pkt = packet_new(buf, size, 0);
47
+
85
+
48
+ if (s->vnet_hdr) {
86
+ memcpy(dev_buffers, elem->out_sg, elem->out_num);
49
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
87
+ memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
88
+
89
+ r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
90
+ elem->in_num, elem);
91
+ if (unlikely(r != 0)) {
92
+ if (unlikely(r == -ENOSPC)) {
93
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
94
+ __func__);
95
+ }
96
+ goto out;
50
+ }
97
+ }
51
+
98
+
52
+ pkt = packet_new(buf, size, vnet_hdr_len);
99
+ /*
53
g_free(buf);
100
+ * We can poll here since we've had BQL from the time we sent the
54
101
+ * descriptor. Also, we need to take the answer before SVQ pulls by itself,
55
/*
102
+ * when BQL is released
56
@@ -XXX,XX +XXX,XX @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
103
+ */
57
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
104
+ dev_written = vhost_svq_poll(svq);
58
}
105
+ if (unlikely(dev_written < sizeof(status))) {
59
106
+ error_report("Insufficient written data (%zu)", dev_written);
60
+static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp)
107
+ }
61
+{
62
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
63
+
108
+
64
+ return s->vnet_hdr;
109
+out:
110
+ in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status,
111
+ sizeof(status));
112
+ if (unlikely(in_len < sizeof(status))) {
113
+ error_report("Bad device CVQ written length");
114
+ }
115
+ vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
116
+ g_free(elem);
117
+ return r;
65
+}
118
+}
66
+
119
+
67
+static void filter_rewriter_set_vnet_hdr(Object *obj,
120
+static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = {
68
+ bool value,
121
+ .avail_handler = vhost_vdpa_net_handle_ctrl_avail,
69
+ Error **errp)
122
+};
70
+{
71
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
72
+
123
+
73
+ s->vnet_hdr = value;
124
static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
74
+}
125
const char *device,
75
+
126
const char *name,
76
+static void filter_rewriter_init(Object *obj)
127
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
77
+{
128
78
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
129
s->vhost_vdpa.device_fd = vdpa_device_fd;
79
+
130
s->vhost_vdpa.index = queue_pair_index;
80
+ s->vnet_hdr = false;
131
+ if (!is_datapath) {
81
+ object_property_add_bool(obj, "vnet_hdr_support",
132
+ s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
82
+ filter_rewriter_get_vnet_hdr,
133
+ s->vhost_vdpa.shadow_vq_ops_opaque = s;
83
+ filter_rewriter_set_vnet_hdr, NULL);
134
+ }
84
+}
135
ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
85
+
136
if (ret) {
86
static void colo_rewriter_class_init(ObjectClass *oc, void *data)
137
qemu_del_net_client(nc);
87
{
88
NetFilterClass *nfc = NETFILTER_CLASS(oc);
89
@@ -XXX,XX +XXX,XX @@ static const TypeInfo colo_rewriter_info = {
90
.name = TYPE_FILTER_REWRITER,
91
.parent = TYPE_NETFILTER,
92
.class_init = colo_rewriter_class_init,
93
+ .instance_init = filter_rewriter_init,
94
.instance_size = sizeof(RewriterState),
95
};
96
97
diff --git a/qemu-options.hx b/qemu-options.hx
98
index XXXXXXX..XXXXXXX 100644
99
--- a/qemu-options.hx
100
+++ b/qemu-options.hx
101
@@ -XXX,XX +XXX,XX @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
102
be the same. we can just use indev or outdev, but at least one of indev or outdev
103
need to be specified.
104
105
-@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid}[,queue=@var{all|rx|tx}]
106
+@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support]
107
108
Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
109
secondary from primary to keep secondary tcp connection,and rewrite
110
tcp packet to primary from secondary make tcp packet can be handled by
111
-client.
112
+client.if it has the vnet_hdr_support flag, we can parse packet with vnet header.
113
114
usage:
115
colo secondary:
116
--
138
--
117
2.7.4
139
2.7.4
118
140
119
141
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Add vnet_hdr_len arguments in NetClientState
3
Introduce the control virtqueue support for vDPA shadow virtqueue. This
4
that make other module get real vnet_hdr_len easily.
4
is needed for advanced networking features like rx filtering.
5
5
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid
7
TOCTOU with the guest's or device's memory every time there is a device
8
model change. Otherwise, the guest could change the memory content in
9
the time between qemu and the device read it.
10
11
To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is
12
implemented. If the virtio-net driver changes MAC the virtio-net device
13
model will be updated with the new one, and a rx filtering change event
14
will be raised.
15
16
More cvq commands could be added here straightforwardly but they have
17
not been tested.
18
19
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
20
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
21
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
22
---
9
include/net/net.h | 1 +
23
net/vhost-vdpa.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
10
net/net.c | 1 +
24
1 file changed, 205 insertions(+), 8 deletions(-)
11
2 files changed, 2 insertions(+)
25
12
26
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
13
diff --git a/include/net/net.h b/include/net/net.h
14
index XXXXXXX..XXXXXXX 100644
27
index XXXXXXX..XXXXXXX 100644
15
--- a/include/net/net.h
28
--- a/net/vhost-vdpa.c
16
+++ b/include/net/net.h
29
+++ b/net/vhost-vdpa.c
17
@@ -XXX,XX +XXX,XX @@ struct NetClientState {
30
@@ -XXX,XX +XXX,XX @@ typedef struct VhostVDPAState {
18
unsigned int queue_index;
31
NetClientState nc;
19
unsigned rxfilter_notify_enabled:1;
32
struct vhost_vdpa vhost_vdpa;
20
int vring_enable;
33
VHostNetState *vhost_net;
21
+ int vnet_hdr_len;
34
+
22
QTAILQ_HEAD(NetFilterHead, NetFilterState) filters;
35
+ /* Control commands shadow buffers */
36
+ void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer;
37
bool started;
38
} VhostVDPAState;
39
40
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_cleanup(NetClientState *nc)
41
{
42
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
43
44
+ qemu_vfree(s->cvq_cmd_out_buffer);
45
+ qemu_vfree(s->cvq_cmd_in_buffer);
46
if (s->vhost_net) {
47
vhost_net_cleanup(s->vhost_net);
48
g_free(s->vhost_net);
49
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_vhost_vdpa_info = {
50
.check_peer_type = vhost_vdpa_check_peer_type,
23
};
51
};
24
52
25
diff --git a/net/net.c b/net/net.c
53
+static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr)
26
index XXXXXXX..XXXXXXX 100644
54
+{
27
--- a/net/net.c
55
+ VhostIOVATree *tree = v->iova_tree;
28
+++ b/net/net.c
56
+ DMAMap needle = {
29
@@ -XXX,XX +XXX,XX @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
57
+ /*
30
return;
58
+ * No need to specify size or to look for more translations since
59
+ * this contiguous chunk was allocated by us.
60
+ */
61
+ .translated_addr = (hwaddr)(uintptr_t)addr,
62
+ };
63
+ const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle);
64
+ int r;
65
+
66
+ if (unlikely(!map)) {
67
+ error_report("Cannot locate expected map");
68
+ return;
69
+ }
70
+
71
+ r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1);
72
+ if (unlikely(r != 0)) {
73
+ error_report("Device cannot unmap: %s(%d)", g_strerror(r), r);
74
+ }
75
+
76
+ vhost_iova_tree_remove(tree, map);
77
+}
78
+
79
+static size_t vhost_vdpa_net_cvq_cmd_len(void)
80
+{
81
+ /*
82
+ * MAC_TABLE_SET is the ctrl command that produces the longer out buffer.
83
+ * In buffer is always 1 byte, so it should fit here
84
+ */
85
+ return sizeof(struct virtio_net_ctrl_hdr) +
86
+ 2 * sizeof(struct virtio_net_ctrl_mac) +
87
+ MAC_TABLE_ENTRIES * ETH_ALEN;
88
+}
89
+
90
+static size_t vhost_vdpa_net_cvq_cmd_page_len(void)
91
+{
92
+ return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size());
93
+}
94
+
95
+/** Copy and map a guest buffer. */
96
+static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v,
97
+ const struct iovec *out_data,
98
+ size_t out_num, size_t data_len, void *buf,
99
+ size_t *written, bool write)
100
+{
101
+ DMAMap map = {};
102
+ int r;
103
+
104
+ if (unlikely(!data_len)) {
105
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n",
106
+ __func__, write ? "in" : "out");
107
+ return false;
108
+ }
109
+
110
+ *written = iov_to_buf(out_data, out_num, 0, buf, data_len);
111
+ map.translated_addr = (hwaddr)(uintptr_t)buf;
112
+ map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1;
113
+ map.perm = write ? IOMMU_RW : IOMMU_RO,
114
+ r = vhost_iova_tree_map_alloc(v->iova_tree, &map);
115
+ if (unlikely(r != IOVA_OK)) {
116
+ error_report("Cannot map injected element");
117
+ return false;
118
+ }
119
+
120
+ r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf,
121
+ !write);
122
+ if (unlikely(r < 0)) {
123
+ goto dma_map_err;
124
+ }
125
+
126
+ return true;
127
+
128
+dma_map_err:
129
+ vhost_iova_tree_remove(v->iova_tree, &map);
130
+ return false;
131
+}
132
+
133
/**
134
- * Forward buffer for the moment.
135
+ * Copy the guest element into a dedicated buffer suitable to be sent to NIC
136
+ *
137
+ * @iov: [0] is the out buffer, [1] is the in one
138
+ */
139
+static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s,
140
+ VirtQueueElement *elem,
141
+ struct iovec *iov)
142
+{
143
+ size_t in_copied;
144
+ bool ok;
145
+
146
+ iov[0].iov_base = s->cvq_cmd_out_buffer;
147
+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num,
148
+ vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base,
149
+ &iov[0].iov_len, false);
150
+ if (unlikely(!ok)) {
151
+ return false;
152
+ }
153
+
154
+ iov[1].iov_base = s->cvq_cmd_in_buffer;
155
+ ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0,
156
+ sizeof(virtio_net_ctrl_ack), iov[1].iov_base,
157
+ &in_copied, true);
158
+ if (unlikely(!ok)) {
159
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer);
160
+ return false;
161
+ }
162
+
163
+ iov[1].iov_len = sizeof(virtio_net_ctrl_ack);
164
+ return true;
165
+}
166
+
167
+/**
168
+ * Do not forward commands not supported by SVQ. Otherwise, the device could
169
+ * accept it and qemu would not know how to update the device model.
170
+ */
171
+static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out,
172
+ size_t out_num)
173
+{
174
+ struct virtio_net_ctrl_hdr ctrl;
175
+ size_t n;
176
+
177
+ n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl));
178
+ if (unlikely(n < sizeof(ctrl))) {
179
+ qemu_log_mask(LOG_GUEST_ERROR,
180
+ "%s: invalid legnth of out buffer %zu\n", __func__, n);
181
+ return false;
182
+ }
183
+
184
+ switch (ctrl.class) {
185
+ case VIRTIO_NET_CTRL_MAC:
186
+ switch (ctrl.cmd) {
187
+ case VIRTIO_NET_CTRL_MAC_ADDR_SET:
188
+ return true;
189
+ default:
190
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n",
191
+ __func__, ctrl.cmd);
192
+ };
193
+ break;
194
+ default:
195
+ qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n",
196
+ __func__, ctrl.class);
197
+ };
198
+
199
+ return false;
200
+}
201
+
202
+/**
203
+ * Validate and copy control virtqueue commands.
204
+ *
205
+ * Following QEMU guidelines, we offer a copy of the buffers to the device to
206
+ * prevent TOCTOU bugs.
207
*/
208
static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
209
VirtQueueElement *elem,
210
void *opaque)
211
{
212
- unsigned int n = elem->out_num + elem->in_num;
213
- g_autofree struct iovec *dev_buffers = g_new(struct iovec, n);
214
+ VhostVDPAState *s = opaque;
215
size_t in_len, dev_written;
216
virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
217
- int r;
218
+ /* out and in buffers sent to the device */
219
+ struct iovec dev_buffers[2] = {
220
+ { .iov_base = s->cvq_cmd_out_buffer },
221
+ { .iov_base = s->cvq_cmd_in_buffer },
222
+ };
223
+ /* in buffer used for device model */
224
+ const struct iovec in = {
225
+ .iov_base = &status,
226
+ .iov_len = sizeof(status),
227
+ };
228
+ int r = -EINVAL;
229
+ bool ok;
230
+
231
+ ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers);
232
+ if (unlikely(!ok)) {
233
+ goto out;
234
+ }
235
236
- memcpy(dev_buffers, elem->out_sg, elem->out_num);
237
- memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num);
238
+ ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1);
239
+ if (unlikely(!ok)) {
240
+ goto out;
241
+ }
242
243
- r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1],
244
- elem->in_num, elem);
245
+ r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem);
246
if (unlikely(r != 0)) {
247
if (unlikely(r == -ENOSPC)) {
248
qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n",
249
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq,
250
dev_written = vhost_svq_poll(svq);
251
if (unlikely(dev_written < sizeof(status))) {
252
error_report("Insufficient written data (%zu)", dev_written);
253
+ goto out;
254
+ }
255
+
256
+ memcpy(&status, dev_buffers[1].iov_base, sizeof(status));
257
+ if (status != VIRTIO_NET_OK) {
258
+ goto out;
259
+ }
260
+
261
+ status = VIRTIO_NET_ERR;
262
+ virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1);
263
+ if (status != VIRTIO_NET_OK) {
264
+ error_report("Bad CVQ processing in model");
31
}
265
}
32
266
33
+ nc->vnet_hdr_len = len;
267
out:
34
nc->info->set_vnet_hdr_len(nc, len);
268
@@ -XXX,XX +XXX,XX @@ out:
269
}
270
vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status)));
271
g_free(elem);
272
+ if (dev_buffers[0].iov_base) {
273
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base);
274
+ }
275
+ if (dev_buffers[1].iov_base) {
276
+ vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base);
277
+ }
278
return r;
35
}
279
}
36
280
281
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
282
s->vhost_vdpa.device_fd = vdpa_device_fd;
283
s->vhost_vdpa.index = queue_pair_index;
284
if (!is_datapath) {
285
+ s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
286
+ vhost_vdpa_net_cvq_cmd_page_len());
287
+ memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
288
+ s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(),
289
+ vhost_vdpa_net_cvq_cmd_page_len());
290
+ memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len());
291
+
292
s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
293
s->vhost_vdpa.shadow_vq_ops_opaque = s;
294
}
37
--
295
--
38
2.7.4
296
2.7.4
39
297
40
298
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
COLO-Proxy just focus on packet payload, so we skip vnet header.
3
To know the device features is needed for CVQ SVQ, so SVQ knows if it
4
can handle all commands or not. Extract from
5
vhost_vdpa_get_max_queue_pairs so we can reuse it.
4
6
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
Acked-by: Jason Wang <jasowang@redhat.com>
9
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
11
---
8
net/colo-compare.c | 8 ++++++--
12
net/vhost-vdpa.c | 30 ++++++++++++++++++++----------
9
1 file changed, 6 insertions(+), 2 deletions(-)
13
1 file changed, 20 insertions(+), 10 deletions(-)
10
14
11
diff --git a/net/colo-compare.c b/net/colo-compare.c
15
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo-compare.c
17
--- a/net/vhost-vdpa.c
14
+++ b/net/colo-compare.c
18
+++ b/net/vhost-vdpa.c
15
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt, Packet *spkt, int offset)
19
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
16
sec_ip_src, sec_ip_dst);
20
return nc;
21
}
22
23
-static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp)
24
+static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
25
+{
26
+ int ret = ioctl(fd, VHOST_GET_FEATURES, features);
27
+ if (unlikely(ret < 0)) {
28
+ error_setg_errno(errp, errno,
29
+ "Fail to query features from vhost-vDPA device");
30
+ }
31
+ return ret;
32
+}
33
+
34
+static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features,
35
+ int *has_cvq, Error **errp)
36
{
37
unsigned long config_size = offsetof(struct vhost_vdpa_config, buf);
38
g_autofree struct vhost_vdpa_config *config = NULL;
39
__virtio16 *max_queue_pairs;
40
- uint64_t features;
41
int ret;
42
43
- ret = ioctl(fd, VHOST_GET_FEATURES, &features);
44
- if (ret) {
45
- error_setg(errp, "Fail to query features from vhost-vDPA device");
46
- return ret;
47
- }
48
-
49
if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) {
50
*has_cvq = 1;
51
} else {
52
@@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
53
NetClientState *peer, Error **errp)
54
{
55
const NetdevVhostVDPAOptions *opts;
56
+ uint64_t features;
57
int vdpa_device_fd;
58
g_autofree NetClientState **ncs = NULL;
59
NetClientState *nc;
60
- int queue_pairs, i, has_cvq = 0;
61
+ int queue_pairs, r, i, has_cvq = 0;
62
63
assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA);
64
opts = &netdev->u.vhost_vdpa;
65
@@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
66
return -errno;
17
}
67
}
18
68
19
+ offset = ppkt->vnet_hdr_len + offset;
69
- queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd,
70
+ r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp);
71
+ if (unlikely(r < 0)) {
72
+ return r;
73
+ }
20
+
74
+
21
if (ppkt->size == spkt->size) {
75
+ queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features,
22
- return memcmp(ppkt->data + offset, spkt->data + offset,
76
&has_cvq, errp);
23
+ return memcmp(ppkt->data + offset,
77
if (queue_pairs < 0) {
24
+ spkt->data + offset,
78
qemu_close(vdpa_device_fd);
25
spkt->size - offset);
26
} else {
27
trace_colo_compare_main("Net packet size are not the same");
28
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
29
*/
30
if (ptcp->th_off > 5) {
31
ptrdiff_t tcp_offset;
32
+
33
tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
34
- + (ptcp->th_off * 4);
35
+ + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
36
res = colo_packet_compare_common(ppkt, spkt, tcp_offset);
37
} else if (ptcp->th_sum == stcp->th_sum) {
38
res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN);
39
--
79
--
40
2.7.4
80
2.7.4
41
81
42
82
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We add the vnet_hdr_support option for colo-compare, default is disabled.
3
Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
it uses CVQ.
5
You can use it for example:
6
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
7
5
8
COLO-compare can get vnet header length from filter,
6
However, qemu is able to migrate simple devices with no CVQ as long as
9
Add vnet_hdr_len to struct packet and output packet with
7
they use SVQ. To allow it, add a placeholder error to vhost_vdpa, and
10
the vnet_hdr_len.
8
only add to vhost_dev when used. vhost_dev machinery place the migration
9
blocker if needed.
11
10
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
11
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
12
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
14
---
15
net/colo-compare.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-------
15
hw/virtio/vhost-vdpa.c | 15 +++++++++++++++
16
qemu-options.hx | 4 ++--
16
include/hw/virtio/vhost-vdpa.h | 1 +
17
2 files changed, 55 insertions(+), 9 deletions(-)
17
2 files changed, 16 insertions(+)
18
18
19
diff --git a/net/colo-compare.c b/net/colo-compare.c
19
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
20
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
21
--- a/net/colo-compare.c
21
--- a/hw/virtio/vhost-vdpa.c
22
+++ b/net/colo-compare.c
22
+++ b/hw/virtio/vhost-vdpa.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
23
@@ -XXX,XX +XXX,XX @@
24
CharBackend chr_out;
24
#include "hw/virtio/vhost-shadow-virtqueue.h"
25
SocketReadState pri_rs;
25
#include "hw/virtio/vhost-vdpa.h"
26
SocketReadState sec_rs;
26
#include "exec/address-spaces.h"
27
+ bool vnet_hdr;
27
+#include "migration/blocker.h"
28
28
#include "qemu/cutils.h"
29
/* connection list: the connections belonged to this NIC could be found
29
#include "qemu/main-loop.h"
30
* in this list.
30
#include "cpu.h"
31
@@ -XXX,XX +XXX,XX @@ enum {
31
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
32
32
return true;
33
static int compare_chr_send(CompareState *s,
34
const uint8_t *buf,
35
- uint32_t size);
36
+ uint32_t size,
37
+ uint32_t vnet_hdr_len);
38
39
static gint seq_sorter(Packet *a, Packet *b, gpointer data)
40
{
41
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
42
}
43
44
if (result) {
45
- ret = compare_chr_send(s, pkt->data, pkt->size);
46
+ ret = compare_chr_send(s,
47
+ pkt->data,
48
+ pkt->size,
49
+ pkt->vnet_hdr_len);
50
if (ret < 0) {
51
error_report("colo_send_primary_packet failed");
52
}
53
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
54
55
static int compare_chr_send(CompareState *s,
56
const uint8_t *buf,
57
- uint32_t size)
58
+ uint32_t size,
59
+ uint32_t vnet_hdr_len)
60
{
61
int ret = 0;
62
uint32_t len = htonl(size);
63
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CompareState *s,
64
goto err;
65
}
33
}
66
34
67
+ if (s->vnet_hdr) {
35
+ if (v->migration_blocker) {
68
+ /*
36
+ int r = migrate_add_blocker(v->migration_blocker, &err);
69
+ * We send vnet header len make other module(like filter-redirector)
37
+ if (unlikely(r < 0)) {
70
+ * know how to parse net packet correctly.
38
+ return false;
71
+ */
72
+ len = htonl(vnet_hdr_len);
73
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
74
+ if (ret != sizeof(len)) {
75
+ goto err;
76
+ }
39
+ }
77
+ }
40
+ }
78
+
41
+
79
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
42
for (i = 0; i < v->shadow_vqs->len; ++i) {
80
if (ret != size) {
43
VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
81
goto err;
44
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
82
@@ -XXX,XX +XXX,XX @@ static void compare_set_outdev(Object *obj, const char *value, Error **errp)
45
@@ -XXX,XX +XXX,XX @@ err:
83
s->outdev = g_strdup(value);
46
vhost_svq_stop(svq);
47
}
48
49
+ if (v->migration_blocker) {
50
+ migrate_del_blocker(v->migration_blocker);
51
+ }
52
+
53
return false;
84
}
54
}
85
55
86
+static bool compare_get_vnet_hdr(Object *obj, Error **errp)
56
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
87
+{
57
}
88
+ CompareState *s = COLO_COMPARE(obj);
89
+
90
+ return s->vnet_hdr;
91
+}
92
+
93
+static void compare_set_vnet_hdr(Object *obj,
94
+ bool value,
95
+ Error **errp)
96
+{
97
+ CompareState *s = COLO_COMPARE(obj);
98
+
99
+ s->vnet_hdr = value;
100
+}
101
+
102
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
103
{
104
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
105
106
if (packet_enqueue(s, PRIMARY_IN)) {
107
trace_colo_compare_main("primary: unsupported packet in");
108
- compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
109
+ compare_chr_send(s,
110
+ pri_rs->buf,
111
+ pri_rs->packet_len,
112
+ pri_rs->vnet_hdr_len);
113
} else {
114
/* compare connection */
115
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
116
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
117
return;
118
}
58
}
119
59
120
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
60
+ if (v->migration_blocker) {
121
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
61
+ migrate_del_blocker(v->migration_blocker);
122
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
62
+ }
123
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
63
return true;
124
125
g_queue_init(&s->conn_list);
126
127
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
128
129
while (!g_queue_is_empty(&conn->primary_list)) {
130
pkt = g_queue_pop_head(&conn->primary_list);
131
- compare_chr_send(s, pkt->data, pkt->size);
132
+ compare_chr_send(s,
133
+ pkt->data,
134
+ pkt->size,
135
+ pkt->vnet_hdr_len);
136
packet_destroy(pkt, NULL);
137
}
138
while (!g_queue_is_empty(&conn->secondary_list)) {
139
@@ -XXX,XX +XXX,XX @@ static void colo_compare_class_init(ObjectClass *oc, void *data)
140
141
static void colo_compare_init(Object *obj)
142
{
143
+ CompareState *s = COLO_COMPARE(obj);
144
+
145
object_property_add_str(obj, "primary_in",
146
compare_get_pri_indev, compare_set_pri_indev,
147
NULL);
148
@@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj)
149
object_property_add_str(obj, "outdev",
150
compare_get_outdev, compare_set_outdev,
151
NULL);
152
+
153
+ s->vnet_hdr = false;
154
+ object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
155
+ compare_set_vnet_hdr, NULL);
156
}
64
}
157
65
158
static void colo_compare_finalize(Object *obj)
66
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
159
diff --git a/qemu-options.hx b/qemu-options.hx
160
index XXXXXXX..XXXXXXX 100644
67
index XXXXXXX..XXXXXXX 100644
161
--- a/qemu-options.hx
68
--- a/include/hw/virtio/vhost-vdpa.h
162
+++ b/qemu-options.hx
69
+++ b/include/hw/virtio/vhost-vdpa.h
163
@@ -XXX,XX +XXX,XX @@ Dump the network traffic on netdev @var{dev} to the file specified by
70
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
164
The file format is libpcap, so it can be analyzed with tools such as tcpdump
71
bool shadow_vqs_enabled;
165
or Wireshark.
72
/* IOVA mapping used by the Shadow Virtqueue */
166
73
VhostIOVATree *iova_tree;
167
-@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},
74
+ Error *migration_blocker;
168
-outdev=@var{chardevid}
75
GPtrArray *shadow_vqs;
169
+@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid}[,vnet_hdr_support]
76
const VhostShadowVirtqueueOps *shadow_vq_ops;
170
77
void *shadow_vq_ops_opaque;
171
Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with
172
secondary packet. If the packets are same, we will output primary
173
packet to outdev@var{chardevid}, else we will notify colo-frame
174
do checkpoint and send primary packet to outdev@var{chardevid}.
175
+if it has the vnet_hdr_support flag, colo compare will send/recv packet with vnet_hdr_len.
176
177
we must use it with the help of filter-mirror and filter-redirector.
178
179
--
78
--
180
2.7.4
79
2.7.4
181
80
182
81
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We add a flag to decide whether net_fill_rstate() need read
3
Finally offering the possibility to enable SVQ from the command line.
4
the vnet_hdr_len or not.
5
4
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
5
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
7
Suggested-by: Jason Wang <jasowang@redhat.com>
6
Acked-by: Markus Armbruster <armbru@redhat.com>
7
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
9
---
10
include/net/net.h | 9 +++++++--
10
net/vhost-vdpa.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
11
net/colo-compare.c | 4 ++--
11
qapi/net.json | 9 ++++++-
12
net/filter-mirror.c | 2 +-
12
2 files changed, 77 insertions(+), 4 deletions(-)
13
net/net.c | 36 ++++++++++++++++++++++++++++++++----
14
net/socket.c | 8 ++++----
15
5 files changed, 46 insertions(+), 13 deletions(-)
16
13
17
diff --git a/include/net/net.h b/include/net/net.h
14
diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c
18
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
19
--- a/include/net/net.h
16
--- a/net/vhost-vdpa.c
20
+++ b/include/net/net.h
17
+++ b/net/vhost-vdpa.c
21
@@ -XXX,XX +XXX,XX @@ typedef struct NICState {
18
@@ -XXX,XX +XXX,XX @@ const int vdpa_feature_bits[] = {
22
} NICState;
19
VHOST_INVALID_FEATURE_BIT
23
24
struct SocketReadState {
25
- int state; /* 0 = getting length, 1 = getting data */
26
+ /* 0 = getting length, 1 = getting vnet header length, 2 = getting data */
27
+ int state;
28
+ /* This flag decide whether to read the vnet_hdr_len field */
29
+ bool vnet_hdr;
30
uint32_t index;
31
uint32_t packet_len;
32
+ uint32_t vnet_hdr_len;
33
uint8_t buf[NET_BUFSIZE];
34
SocketReadStateFinalize *finalize;
35
};
20
};
36
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
21
37
void print_net_client(Monitor *mon, NetClientState *nc);
22
+/** Supported device specific feature bits with SVQ */
38
void hmp_info_network(Monitor *mon, const QDict *qdict);
23
+static const uint64_t vdpa_svq_device_features =
39
void net_socket_rs_init(SocketReadState *rs,
24
+ BIT_ULL(VIRTIO_NET_F_CSUM) |
40
- SocketReadStateFinalize *finalize);
25
+ BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |
41
+ SocketReadStateFinalize *finalize,
26
+ BIT_ULL(VIRTIO_NET_F_MTU) |
42
+ bool vnet_hdr);
27
+ BIT_ULL(VIRTIO_NET_F_MAC) |
43
28
+ BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) |
44
/* NIC info */
29
+ BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |
45
30
+ BIT_ULL(VIRTIO_NET_F_GUEST_ECN) |
46
diff --git a/net/colo-compare.c b/net/colo-compare.c
31
+ BIT_ULL(VIRTIO_NET_F_GUEST_UFO) |
47
index XXXXXXX..XXXXXXX 100644
32
+ BIT_ULL(VIRTIO_NET_F_HOST_TSO4) |
48
--- a/net/colo-compare.c
33
+ BIT_ULL(VIRTIO_NET_F_HOST_TSO6) |
49
+++ b/net/colo-compare.c
34
+ BIT_ULL(VIRTIO_NET_F_HOST_ECN) |
50
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
35
+ BIT_ULL(VIRTIO_NET_F_HOST_UFO) |
51
return;
36
+ BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) |
37
+ BIT_ULL(VIRTIO_NET_F_STATUS) |
38
+ BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |
39
+ BIT_ULL(VIRTIO_F_ANY_LAYOUT) |
40
+ BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |
41
+ BIT_ULL(VIRTIO_NET_F_RSC_EXT) |
42
+ BIT_ULL(VIRTIO_NET_F_STANDBY);
43
+
44
VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc)
45
{
46
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
47
@@ -XXX,XX +XXX,XX @@ err_init:
48
static void vhost_vdpa_cleanup(NetClientState *nc)
49
{
50
VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc);
51
+ struct vhost_dev *dev = &s->vhost_net->dev;
52
53
qemu_vfree(s->cvq_cmd_out_buffer);
54
qemu_vfree(s->cvq_cmd_in_buffer);
55
+ if (dev->vq_index + dev->nvqs == dev->vq_index_end) {
56
+ g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete);
57
+ }
58
if (s->vhost_net) {
59
vhost_net_cleanup(s->vhost_net);
60
g_free(s->vhost_net);
61
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
62
int vdpa_device_fd,
63
int queue_pair_index,
64
int nvqs,
65
- bool is_datapath)
66
+ bool is_datapath,
67
+ bool svq,
68
+ VhostIOVATree *iova_tree)
69
{
70
NetClientState *nc = NULL;
71
VhostVDPAState *s;
72
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
73
74
s->vhost_vdpa.device_fd = vdpa_device_fd;
75
s->vhost_vdpa.index = queue_pair_index;
76
+ s->vhost_vdpa.shadow_vqs_enabled = svq;
77
+ s->vhost_vdpa.iova_tree = iova_tree;
78
if (!is_datapath) {
79
s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(),
80
vhost_vdpa_net_cvq_cmd_page_len());
81
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
82
83
s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops;
84
s->vhost_vdpa.shadow_vq_ops_opaque = s;
85
+ error_setg(&s->vhost_vdpa.migration_blocker,
86
+ "Migration disabled: vhost-vdpa uses CVQ.");
52
}
87
}
53
88
ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs);
54
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize);
89
if (ret) {
55
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize);
90
@@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer,
56
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
91
return nc;
57
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
92
}
58
93
59
g_queue_init(&s->conn_list);
94
+static int vhost_vdpa_get_iova_range(int fd,
60
95
+ struct vhost_vdpa_iova_range *iova_range)
61
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
96
+{
62
index XXXXXXX..XXXXXXX 100644
97
+ int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range);
63
--- a/net/filter-mirror.c
98
+
64
+++ b/net/filter-mirror.c
99
+ return ret < 0 ? -errno : 0;
65
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
100
+}
101
+
102
static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp)
103
{
104
int ret = ioctl(fd, VHOST_GET_FEATURES, features);
105
@@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
106
uint64_t features;
107
int vdpa_device_fd;
108
g_autofree NetClientState **ncs = NULL;
109
+ g_autoptr(VhostIOVATree) iova_tree = NULL;
110
NetClientState *nc;
111
int queue_pairs, r, i, has_cvq = 0;
112
113
@@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name,
114
return queue_pairs;
115
}
116
117
+ if (opts->x_svq) {
118
+ struct vhost_vdpa_iova_range iova_range;
119
+
120
+ uint64_t invalid_dev_features =
121
+ features & ~vdpa_svq_device_features &
122
+ /* Transport are all accepted at this point */
123
+ ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START,
124
+ VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START);
125
+
126
+ if (invalid_dev_features) {
127
+ error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64,
128
+ invalid_dev_features);
129
+ goto err_svq;
130
+ }
131
+
132
+ vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range);
133
+ iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last);
134
+ }
135
+
136
ncs = g_malloc0(sizeof(*ncs) * queue_pairs);
137
138
for (i = 0; i < queue_pairs; i++) {
139
ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
140
- vdpa_device_fd, i, 2, true);
141
+ vdpa_device_fd, i, 2, true, opts->x_svq,
142
+ iova_tree);
143
if (!ncs[i])
144
goto err;
145
}
146
147
if (has_cvq) {
148
nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name,
149
- vdpa_device_fd, i, 1, false);
150
+ vdpa_device_fd, i, 1, false,
151
+ opts->x_svq, iova_tree);
152
if (!nc)
153
goto err;
154
}
155
156
+ /* iova_tree ownership belongs to last NetClientState */
157
+ g_steal_pointer(&iova_tree);
158
return 0;
159
160
err:
161
@@ -XXX,XX +XXX,XX @@ err:
162
qemu_del_net_client(ncs[i]);
66
}
163
}
67
}
164
}
68
165
+
69
- net_socket_rs_init(&s->rs, redirector_rs_finalize);
166
+err_svq:
70
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
167
qemu_close(vdpa_device_fd);
71
168
72
if (s->indev) {
169
return -1;
73
chr = qemu_chr_find(s->indev);
170
diff --git a/qapi/net.json b/qapi/net.json
74
diff --git a/net/net.c b/net/net.c
75
index XXXXXXX..XXXXXXX 100644
171
index XXXXXXX..XXXXXXX 100644
76
--- a/net/net.c
172
--- a/qapi/net.json
77
+++ b/net/net.c
173
+++ b/qapi/net.json
78
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_net_opts = {
174
@@ -XXX,XX +XXX,XX @@
79
};
175
# @queues: number of queues to be created for multiqueue vhost-vdpa
80
176
# (default: 1)
81
void net_socket_rs_init(SocketReadState *rs,
177
#
82
- SocketReadStateFinalize *finalize)
178
+# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1)
83
+ SocketReadStateFinalize *finalize,
179
+# (default: false)
84
+ bool vnet_hdr)
180
+#
85
{
181
+# Features:
86
rs->state = 0;
182
+# @unstable: Member @x-svq is experimental.
87
+ rs->vnet_hdr = vnet_hdr;
183
+#
88
rs->index = 0;
184
# Since: 5.1
89
rs->packet_len = 0;
185
##
90
+ rs->vnet_hdr_len = 0;
186
{ 'struct': 'NetdevVhostVDPAOptions',
91
memset(rs->buf, 0, sizeof(rs->buf));
187
'data': {
92
rs->finalize = finalize;
188
'*vhostdev': 'str',
93
}
189
- '*queues': 'int' } }
94
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
190
+ '*queues': 'int',
95
unsigned int l;
191
+ '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } }
96
192
97
while (size > 0) {
193
##
98
- /* reassemble a packet from the network */
194
# @NetdevVmnetHostOptions:
99
- switch (rs->state) { /* 0 = getting length, 1 = getting data */
100
+ /* Reassemble a packet from the network.
101
+ * 0 = getting length.
102
+ * 1 = getting vnet header length.
103
+ * 2 = getting data.
104
+ */
105
+ switch (rs->state) {
106
case 0:
107
l = 4 - rs->index;
108
if (l > size) {
109
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
110
/* got length */
111
rs->packet_len = ntohl(*(uint32_t *)rs->buf);
112
rs->index = 0;
113
- rs->state = 1;
114
+ if (rs->vnet_hdr) {
115
+ rs->state = 1;
116
+ } else {
117
+ rs->state = 2;
118
+ rs->vnet_hdr_len = 0;
119
+ }
120
}
121
break;
122
case 1:
123
+ l = 4 - rs->index;
124
+ if (l > size) {
125
+ l = size;
126
+ }
127
+ memcpy(rs->buf + rs->index, buf, l);
128
+ buf += l;
129
+ size -= l;
130
+ rs->index += l;
131
+ if (rs->index == 4) {
132
+ /* got vnet header length */
133
+ rs->vnet_hdr_len = ntohl(*(uint32_t *)rs->buf);
134
+ rs->index = 0;
135
+ rs->state = 2;
136
+ }
137
+ break;
138
+ case 2:
139
l = rs->packet_len - rs->index;
140
if (l > size) {
141
l = size;
142
diff --git a/net/socket.c b/net/socket.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/net/socket.c
145
+++ b/net/socket.c
146
@@ -XXX,XX +XXX,XX @@ static void net_socket_send(void *opaque)
147
closesocket(s->fd);
148
149
s->fd = -1;
150
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
151
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
152
s->nc.link_down = true;
153
memset(s->nc.info_str, 0, sizeof(s->nc.info_str));
154
155
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer,
156
s->fd = fd;
157
s->listen_fd = -1;
158
s->send_fn = net_socket_send_dgram;
159
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
160
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
161
net_socket_read_poll(s, true);
162
163
/* mcast: save bound address as dst */
164
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer,
165
166
s->fd = fd;
167
s->listen_fd = -1;
168
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
169
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
170
171
/* Disable Nagle algorithm on TCP sockets to reduce latency */
172
socket_set_nodelay(fd);
173
@@ -XXX,XX +XXX,XX @@ static int net_socket_listen_init(NetClientState *peer,
174
s->fd = -1;
175
s->listen_fd = fd;
176
s->nc.link_down = true;
177
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
178
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
179
180
qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s);
181
return 0;
182
--
195
--
183
2.7.4
196
2.7.4
184
197
185
198
diff view generated by jsdifflib
New patch
1
From: Zhang Chen <chen.zhang@intel.com>
1
2
3
If the checkpoint occurs when the guest finishes restarting
4
but has not started running, the runstate_set() may reject
5
the transition from COLO to PRELAUNCH with the crash log:
6
7
{"timestamp": {"seconds": 1593484591, "microseconds": 26605},\
8
"event": "RESET", "data": {"guest": true, "reason": "guest-reset"}}
9
qemu-system-x86_64: invalid runstate transition: 'colo' -> 'prelaunch'
10
11
Long-term testing says that it's pretty safe.
12
13
Signed-off-by: Like Xu <like.xu@linux.intel.com>
14
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
15
Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
18
softmmu/runstate.c | 1 +
19
1 file changed, 1 insertion(+)
20
21
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/softmmu/runstate.c
24
+++ b/softmmu/runstate.c
25
@@ -XXX,XX +XXX,XX @@ static const RunStateTransition runstate_transitions_def[] = {
26
{ RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
27
28
{ RUN_STATE_COLO, RUN_STATE_RUNNING },
29
+ { RUN_STATE_COLO, RUN_STATE_PRELAUNCH },
30
{ RUN_STATE_COLO, RUN_STATE_SHUTDOWN},
31
32
{ RUN_STATE_RUNNING, RUN_STATE_DEBUG },
33
--
34
2.7.4
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Zhang Chen <chen.zhang@intel.com>
2
2
3
We can use this property flush and send packet with vnet_hdr_len.
3
We notice the QEMU may crash when the guest has too many
4
incoming network connections with the following log:
4
5
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
15197@1593578622.668573:colo_proxy_main : colo proxy connection hashtable full, clear it
7
free(): invalid pointer
8
[1] 15195 abort (core dumped) qemu-system-x86_64 ....
9
10
This is because we create the s->connection_track_table with
11
g_hash_table_new_full() which is defined as:
12
13
GHashTable * g_hash_table_new_full (GHashFunc hash_func,
14
GEqualFunc key_equal_func,
15
GDestroyNotify key_destroy_func,
16
GDestroyNotify value_destroy_func);
17
18
The fourth parameter connection_destroy() will be called to free the
19
memory allocated for all 'Connection' values in the hashtable when
20
we call g_hash_table_remove_all() in the connection_hashtable_reset().
21
22
But both connection_track_table and conn_list reference to the same
23
conn instance. It will trigger double free in conn_list clear. So this
24
patch remove free action on hash table side to avoid double free the
25
conn.
26
27
Signed-off-by: Like Xu <like.xu@linux.intel.com>
28
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
29
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
30
---
8
net/colo-compare.c | 8 ++++++--
31
net/colo-compare.c | 2 +-
9
net/colo.c | 3 ++-
10
net/colo.h | 4 +++-
11
net/filter-rewriter.c | 2 +-
32
net/filter-rewriter.c | 2 +-
12
4 files changed, 12 insertions(+), 5 deletions(-)
33
2 files changed, 2 insertions(+), 2 deletions(-)
13
34
14
diff --git a/net/colo-compare.c b/net/colo-compare.c
35
diff --git a/net/colo-compare.c b/net/colo-compare.c
15
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
16
--- a/net/colo-compare.c
37
--- a/net/colo-compare.c
17
+++ b/net/colo-compare.c
38
+++ b/net/colo-compare.c
18
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode)
39
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
19
Connection *conn;
40
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
20
41
connection_key_equal,
21
if (mode == PRIMARY_IN) {
42
g_free,
22
- pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len);
43
- connection_destroy);
23
+ pkt = packet_new(s->pri_rs.buf,
44
+ NULL);
24
+ s->pri_rs.packet_len,
45
25
+ s->pri_rs.vnet_hdr_len);
46
colo_compare_iothread(s);
26
} else {
47
27
- pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len);
28
+ pkt = packet_new(s->sec_rs.buf,
29
+ s->sec_rs.packet_len,
30
+ s->sec_rs.vnet_hdr_len);
31
}
32
33
if (parse_packet_early(pkt)) {
34
diff --git a/net/colo.c b/net/colo.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/net/colo.c
37
+++ b/net/colo.c
38
@@ -XXX,XX +XXX,XX @@ void connection_destroy(void *opaque)
39
g_slice_free(Connection, conn);
40
}
41
42
-Packet *packet_new(const void *data, int size)
43
+Packet *packet_new(const void *data, int size, int vnet_hdr_len)
44
{
45
Packet *pkt = g_slice_new(Packet);
46
47
pkt->data = g_memdup(data, size);
48
pkt->size = size;
49
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
50
+ pkt->vnet_hdr_len = vnet_hdr_len;
51
52
return pkt;
53
}
54
diff --git a/net/colo.h b/net/colo.h
55
index XXXXXXX..XXXXXXX 100644
56
--- a/net/colo.h
57
+++ b/net/colo.h
58
@@ -XXX,XX +XXX,XX @@ typedef struct Packet {
59
int size;
60
/* Time of packet creation, in wall clock ms */
61
int64_t creation_ms;
62
+ /* Get vnet_hdr_len from filter */
63
+ uint32_t vnet_hdr_len;
64
} Packet;
65
66
typedef struct ConnectionKey {
67
@@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table,
68
ConnectionKey *key,
69
GQueue *conn_list);
70
void connection_hashtable_reset(GHashTable *connection_track_table);
71
-Packet *packet_new(const void *data, int size);
72
+Packet *packet_new(const void *data, int size, int vnet_hdr_len);
73
void packet_destroy(void *opaque, void *user_data);
74
75
#endif /* QEMU_COLO_PROXY_H */
76
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
48
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
77
index XXXXXXX..XXXXXXX 100644
49
index XXXXXXX..XXXXXXX 100644
78
--- a/net/filter-rewriter.c
50
--- a/net/filter-rewriter.c
79
+++ b/net/filter-rewriter.c
51
+++ b/net/filter-rewriter.c
80
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
52
@@ -XXX,XX +XXX,XX @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
81
char *buf = g_malloc0(size);
53
s->connection_track_table = g_hash_table_new_full(connection_key_hash,
82
54
connection_key_equal,
83
iov_to_buf(iov, iovcnt, 0, buf, size);
55
g_free,
84
- pkt = packet_new(buf, size);
56
- connection_destroy);
85
+ pkt = packet_new(buf, size, 0);
57
+ NULL);
86
g_free(buf);
58
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
87
59
}
88
/*
60
89
--
61
--
90
2.7.4
62
2.7.4
91
92
diff view generated by jsdifflib
New patch
1
From: Zhang Chen <chen.zhang@intel.com>
1
2
3
Filter-rewriter no need to track connection in conn_list.
4
This patch fix the glib g_queue_is_empty assertion when COLO guest
5
keep a lot of network connection.
6
7
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
8
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
net/colo.c | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
13
14
diff --git a/net/colo.c b/net/colo.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/net/colo.c
17
+++ b/net/colo.c
18
@@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table,
19
/*
20
* clear the conn_list
21
*/
22
- while (!g_queue_is_empty(conn_list)) {
23
+ while (conn_list && !g_queue_is_empty(conn_list)) {
24
connection_destroy(g_queue_pop_head(conn_list));
25
}
26
}
27
--
28
2.7.4
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Zhang Chen <chen.zhang@intel.com>
2
2
3
Make colo-compare and filter-rewriter can parse vnet packet.
3
When COLO use only one vnet_hdr_support parameter between
4
filter-redirector and filter-mirror(or colo-compare), COLO will crash
5
with segmentation fault. Back track as follow:
4
6
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault.
8
0x0000555555cb200b in eth_get_l2_hdr_length (p=0x0)
9
at /home/tao/project/COLO/colo-qemu/include/net/eth.h:296
10
296 uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto);
11
(gdb) bt
12
0 0x0000555555cb200b in eth_get_l2_hdr_length (p=0x0)
13
at /home/tao/project/COLO/colo-qemu/include/net/eth.h:296
14
1 0x0000555555cb22b4 in parse_packet_early (pkt=0x555556a44840) at
15
net/colo.c:49
16
2 0x0000555555cb2b91 in is_tcp_packet (pkt=0x555556a44840) at
17
net/filter-rewriter.c:63
18
19
So wrong vnet_hdr_len will cause pkt->data become NULL. Add check to
20
raise error and add trace-events to track vnet_hdr_len.
21
22
Signed-off-by: Tao Xu <tao3.xu@intel.com>
23
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
24
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
25
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
26
---
8
net/colo.c | 6 +++---
27
net/colo.c | 9 ++++++++-
9
1 file changed, 3 insertions(+), 3 deletions(-)
28
net/trace-events | 1 +
29
2 files changed, 9 insertions(+), 1 deletion(-)
10
30
11
diff --git a/net/colo.c b/net/colo.c
31
diff --git a/net/colo.c b/net/colo.c
12
index XXXXXXX..XXXXXXX 100644
32
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo.c
33
--- a/net/colo.c
14
+++ b/net/colo.c
34
+++ b/net/colo.c
15
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
35
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
16
{
17
int network_length;
18
static const uint8_t vlan[] = {0x81, 0x00};
36
static const uint8_t vlan[] = {0x81, 0x00};
19
- uint8_t *data = pkt->data;
37
uint8_t *data = pkt->data + pkt->vnet_hdr_len;
20
+ uint8_t *data = pkt->data + pkt->vnet_hdr_len;
21
uint16_t l3_proto;
38
uint16_t l3_proto;
22
ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
39
- ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
23
40
+ ssize_t l2hdr_len;
24
- if (pkt->size < ETH_HLEN) {
41
+
25
+ if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
42
+ if (data == NULL) {
43
+ trace_colo_proxy_main_vnet_info("This packet is not parsed correctly, "
44
+ "pkt->vnet_hdr_len", pkt->vnet_hdr_len);
45
+ return 1;
46
+ }
47
+ l2hdr_len = eth_get_l2_hdr_length(data);
48
49
if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
26
trace_colo_proxy_main("pkt->size < ETH_HLEN");
50
trace_colo_proxy_main("pkt->size < ETH_HLEN");
27
return 1;
51
diff --git a/net/trace-events b/net/trace-events
28
}
52
index XXXXXXX..XXXXXXX 100644
29
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
53
--- a/net/trace-events
30
}
54
+++ b/net/trace-events
31
55
@@ -XXX,XX +XXX,XX @@ vhost_user_event(const char *chr, int event) "chr: %s got event: %d"
32
network_length = pkt->ip->ip_hl * 4;
56
33
- if (pkt->size < l2hdr_len + network_length) {
57
# colo.c
34
+ if (pkt->size < l2hdr_len + network_length + pkt->vnet_hdr_len) {
58
colo_proxy_main(const char *chr) ": %s"
35
trace_colo_proxy_main("pkt->size < network_header + network_length");
59
+colo_proxy_main_vnet_info(const char *sta, int size) ": %s = %d"
36
return 1;
60
37
}
61
# colo-compare.c
62
colo_compare_main(const char *chr) ": %s"
38
--
63
--
39
2.7.4
64
2.7.4
40
41
diff view generated by jsdifflib