1 | The following changes since commit 6632f6ff96f0537fc34cdc00c760656fc62e23c5: | 1 | The following changes since commit d48125de38f48a61d6423ef6a01156d6dff9ee2c: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/famz/tags/block-and-testing-pull-request' into staging (2017-07-17 11:46:36 +0100) | 3 | Merge tag 'kraxel-20220719-pull-request' of https://gitlab.com/kraxel/qemu into staging (2022-07-19 17:40:36 +0100) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the git repository at: |
6 | 6 | ||
7 | https://github.com/jasowang/qemu.git tags/net-pull-request | 7 | https://github.com/jasowang/qemu.git tags/net-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to 189ae6bb5ce1f5a322f8691d00fe942ba43dd601: | 9 | for you to fetch changes up to 8bdab83b34efb0b598be4e5b98e4f466ca5f2f80: |
10 | 10 | ||
11 | virtio-net: fix offload ctrl endian (2017-07-17 20:13:56 +0800) | 11 | net/colo.c: fix segmentation fault when packet is not parsed correctly (2022-07-20 16:58:08 +0800) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | 14 | ||
15 | - fix virtio-net ctrl offload endian | 15 | Changes since V1: |
16 | - vnet header support for variou COLO netfilters and compare thread | 16 | - Fix build erros of vhost-vdpa when virtio-net is not set |
17 | 17 | ||
18 | ---------------------------------------------------------------- | 18 | ---------------------------------------------------------------- |
19 | Jason Wang (1): | 19 | Eugenio Pérez (21): |
20 | virtio-net: fix offload ctrl endian | 20 | vhost: move descriptor translation to vhost_svq_vring_write_descs |
21 | virtio-net: Expose MAC_TABLE_ENTRIES | ||
22 | virtio-net: Expose ctrl virtqueue logic | ||
23 | vdpa: Avoid compiler to squash reads to used idx | ||
24 | vhost: Reorder vhost_svq_kick | ||
25 | vhost: Move vhost_svq_kick call to vhost_svq_add | ||
26 | vhost: Check for queue full at vhost_svq_add | ||
27 | vhost: Decouple vhost_svq_add from VirtQueueElement | ||
28 | vhost: Add SVQDescState | ||
29 | vhost: Track number of descs in SVQDescState | ||
30 | vhost: add vhost_svq_push_elem | ||
31 | vhost: Expose vhost_svq_add | ||
32 | vhost: add vhost_svq_poll | ||
33 | vhost: Add svq avail_handler callback | ||
34 | vdpa: Export vhost_vdpa_dma_map and unmap calls | ||
35 | vhost-net-vdpa: add stubs for when no virtio-net device is present | ||
36 | vdpa: manual forward CVQ buffers | ||
37 | vdpa: Buffer CVQ support on shadow virtqueue | ||
38 | vdpa: Extract get features part from vhost_vdpa_get_max_queue_pairs | ||
39 | vdpa: Add device migration blocker | ||
40 | vdpa: Add x-svq to NetdevVhostVDPAOptions | ||
21 | 41 | ||
22 | Michal Privoznik (1): | 42 | Zhang Chen (4): |
23 | virtion-net: Prefer is_power_of_2() | 43 | softmmu/runstate.c: add RunStateTransition support form COLO to PRELAUNCH |
44 | net/colo: Fix a "double free" crash to clear the conn_list | ||
45 | net/colo.c: No need to track conn_list for filter-rewriter | ||
46 | net/colo.c: fix segmentation fault when packet is not parsed correctly | ||
24 | 47 | ||
25 | Zhang Chen (12): | 48 | hw/net/virtio-net.c | 85 +++++---- |
26 | net: Add vnet_hdr_len arguments in NetClientState | 49 | hw/virtio/vhost-shadow-virtqueue.c | 210 +++++++++++++++------- |
27 | net/net.c: Add vnet_hdr support in SocketReadState | 50 | hw/virtio/vhost-shadow-virtqueue.h | 52 +++++- |
28 | net/filter-mirror.c: Introduce parameter for filter_send() | 51 | hw/virtio/vhost-vdpa.c | 26 ++- |
29 | net/filter-mirror.c: Make filter mirror support vnet support. | 52 | include/hw/virtio/vhost-vdpa.h | 8 + |
30 | net/filter-mirror.c: Add new option to enable vnet support for filter-redirector | 53 | include/hw/virtio/virtio-net.h | 7 + |
31 | net/colo.c: Make vnet_hdr_len as packet property | 54 | net/colo-compare.c | 2 +- |
32 | net/colo-compare.c: Introduce parameter for compare_chr_send() | 55 | net/colo.c | 11 +- |
33 | net/colo-compare.c: Make colo-compare support vnet_hdr_len | 56 | net/filter-rewriter.c | 2 +- |
34 | net/colo.c: Add vnet packet parse feature in colo-proxy | 57 | net/meson.build | 3 +- |
35 | net/colo-compare.c: Add vnet packet's tcp/udp/icmp compare | 58 | net/trace-events | 1 + |
36 | net/filter-rewriter.c: Make filter-rewriter support vnet_hdr_len | 59 | net/vhost-vdpa-stub.c | 21 +++ |
37 | docs/colo-proxy.txt: Update colo-proxy usage of net driver with vnet_header | 60 | net/vhost-vdpa.c | 357 +++++++++++++++++++++++++++++++++++-- |
38 | 61 | qapi/net.json | 9 +- | |
39 | docs/colo-proxy.txt | 26 ++++++++++++++++ | 62 | softmmu/runstate.c | 1 + |
40 | hw/net/virtio-net.c | 4 ++- | 63 | 15 files changed, 671 insertions(+), 124 deletions(-) |
41 | include/net/net.h | 10 ++++-- | 64 | create mode 100644 net/vhost-vdpa-stub.c |
42 | net/colo-compare.c | 84 ++++++++++++++++++++++++++++++++++++++++++--------- | ||
43 | net/colo.c | 9 +++--- | ||
44 | net/colo.h | 4 ++- | ||
45 | net/filter-mirror.c | 75 +++++++++++++++++++++++++++++++++++++++++---- | ||
46 | net/filter-rewriter.c | 37 ++++++++++++++++++++++- | ||
47 | net/net.c | 37 ++++++++++++++++++++--- | ||
48 | net/socket.c | 8 ++--- | ||
49 | qemu-options.hx | 19 ++++++------ | ||
50 | 11 files changed, 265 insertions(+), 48 deletions(-) | ||
51 | 65 | ||
52 | 66 | ||
67 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This patch change the compare_chr_send() parameter from CharBackend to CompareState, | 3 | It's done for both in and out descriptors so it's better placed here. |
4 | we can get more information like vnet_hdr(We use it to support packet with vnet_header). | ||
5 | 4 | ||
6 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 5 | Acked-by: Jason Wang <jasowang@redhat.com> |
6 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
7 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 8 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
8 | --- | 9 | --- |
9 | net/colo-compare.c | 14 +++++++------- | 10 | hw/virtio/vhost-shadow-virtqueue.c | 38 +++++++++++++++++++++++++++----------- |
10 | 1 file changed, 7 insertions(+), 7 deletions(-) | 11 | 1 file changed, 27 insertions(+), 11 deletions(-) |
11 | 12 | ||
12 | diff --git a/net/colo-compare.c b/net/colo-compare.c | 13 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c |
13 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/net/colo-compare.c | 15 | --- a/hw/virtio/vhost-shadow-virtqueue.c |
15 | +++ b/net/colo-compare.c | 16 | +++ b/hw/virtio/vhost-shadow-virtqueue.c |
16 | @@ -XXX,XX +XXX,XX @@ enum { | 17 | @@ -XXX,XX +XXX,XX @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, |
17 | SECONDARY_IN, | 18 | return true; |
18 | }; | 19 | } |
19 | 20 | ||
20 | -static int compare_chr_send(CharBackend *out, | 21 | -static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, |
21 | +static int compare_chr_send(CompareState *s, | 22 | - const struct iovec *iovec, size_t num, |
22 | const uint8_t *buf, | 23 | - bool more_descs, bool write) |
23 | uint32_t size); | 24 | +/** |
24 | 25 | + * Write descriptors to SVQ vring | |
25 | @@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data) | 26 | + * |
26 | } | 27 | + * @svq: The shadow virtqueue |
27 | 28 | + * @sg: Cache for hwaddr | |
28 | if (result) { | 29 | + * @iovec: The iovec from the guest |
29 | - ret = compare_chr_send(&s->chr_out, pkt->data, pkt->size); | 30 | + * @num: iovec length |
30 | + ret = compare_chr_send(s, pkt->data, pkt->size); | 31 | + * @more_descs: True if more descriptors come in the chain |
31 | if (ret < 0) { | 32 | + * @write: True if they are writeable descriptors |
32 | error_report("colo_send_primary_packet failed"); | 33 | + * |
33 | } | 34 | + * Return true if success, false otherwise and print error. |
34 | @@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data) | 35 | + */ |
36 | +static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, | ||
37 | + const struct iovec *iovec, size_t num, | ||
38 | + bool more_descs, bool write) | ||
39 | { | ||
40 | uint16_t i = svq->free_head, last = svq->free_head; | ||
41 | unsigned n; | ||
42 | uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0; | ||
43 | vring_desc_t *descs = svq->vring.desc; | ||
44 | + bool ok; | ||
45 | |||
46 | if (num == 0) { | ||
47 | - return; | ||
48 | + return true; | ||
49 | + } | ||
50 | + | ||
51 | + ok = vhost_svq_translate_addr(svq, sg, iovec, num); | ||
52 | + if (unlikely(!ok)) { | ||
53 | + return false; | ||
35 | } | 54 | } |
55 | |||
56 | for (n = 0; n < num; n++) { | ||
57 | @@ -XXX,XX +XXX,XX @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, | ||
58 | } | ||
59 | |||
60 | svq->free_head = le16_to_cpu(svq->desc_next[last]); | ||
61 | + return true; | ||
36 | } | 62 | } |
37 | 63 | ||
38 | -static int compare_chr_send(CharBackend *out, | 64 | static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, |
39 | +static int compare_chr_send(CompareState *s, | 65 | @@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, |
40 | const uint8_t *buf, | 66 | return false; |
41 | uint32_t size) | ||
42 | { | ||
43 | @@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CharBackend *out, | ||
44 | return 0; | ||
45 | } | 67 | } |
46 | 68 | ||
47 | - ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len)); | 69 | - ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num); |
48 | + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); | 70 | + ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, |
49 | if (ret != sizeof(len)) { | 71 | + elem->in_num > 0, false); |
50 | goto err; | 72 | if (unlikely(!ok)) { |
73 | return false; | ||
51 | } | 74 | } |
52 | 75 | - vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, | |
53 | - ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size); | 76 | - elem->in_num > 0, false); |
54 | + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); | 77 | - |
55 | if (ret != size) { | 78 | |
56 | goto err; | 79 | - ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num); |
80 | + ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, | ||
81 | + true); | ||
82 | if (unlikely(!ok)) { | ||
83 | return false; | ||
57 | } | 84 | } |
58 | @@ -XXX,XX +XXX,XX @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs) | 85 | |
59 | 86 | - vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true); | |
60 | if (packet_enqueue(s, PRIMARY_IN)) { | 87 | - |
61 | trace_colo_compare_main("primary: unsupported packet in"); | 88 | /* |
62 | - compare_chr_send(&s->chr_out, pri_rs->buf, pri_rs->packet_len); | 89 | * Put the entry in the available array (but don't update avail->idx until |
63 | + compare_chr_send(s, pri_rs->buf, pri_rs->packet_len); | 90 | * they do sync). |
64 | } else { | ||
65 | /* compare connection */ | ||
66 | g_queue_foreach(&s->conn_list, colo_compare_connection, s); | ||
67 | @@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data) | ||
68 | |||
69 | while (!g_queue_is_empty(&conn->primary_list)) { | ||
70 | pkt = g_queue_pop_head(&conn->primary_list); | ||
71 | - compare_chr_send(&s->chr_out, pkt->data, pkt->size); | ||
72 | + compare_chr_send(s, pkt->data, pkt->size); | ||
73 | packet_destroy(pkt, NULL); | ||
74 | } | ||
75 | while (!g_queue_is_empty(&conn->secondary_list)) { | ||
76 | -- | 91 | -- |
77 | 2.7.4 | 92 | 2.7.4 |
78 | 93 | ||
79 | 94 | diff view generated by jsdifflib |
1 | Spec said offloads should be le64, so use virtio_ldq_p() to guarantee | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | valid endian. | ||
3 | 2 | ||
4 | Fixes: 644c98587d4c ("virtio-net: dynamic network offloads configuration") | 3 | vhost-vdpa control virtqueue needs to know the maximum entries supported |
5 | Cc: qemu-stable@nongnu.org | 4 | by the virtio-net device, so we know if it is possible to apply the |
6 | Cc: Dmitry Fleytman <dfleytma@redhat.com> | 5 | filter. |
6 | |||
7 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
8 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 9 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
8 | --- | 10 | --- |
9 | hw/net/virtio-net.c | 2 ++ | 11 | hw/net/virtio-net.c | 1 - |
10 | 1 file changed, 2 insertions(+) | 12 | include/hw/virtio/virtio-net.h | 3 +++ |
13 | 2 files changed, 3 insertions(+), 1 deletion(-) | ||
11 | 14 | ||
12 | diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c | 15 | diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c |
13 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/hw/net/virtio-net.c | 17 | --- a/hw/net/virtio-net.c |
15 | +++ b/hw/net/virtio-net.c | 18 | +++ b/hw/net/virtio-net.c |
16 | @@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd, | 19 | @@ -XXX,XX +XXX,XX @@ |
17 | if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) { | 20 | |
18 | uint64_t supported_offloads; | 21 | #define VIRTIO_NET_VM_VERSION 11 |
19 | 22 | ||
20 | + offloads = virtio_ldq_p(vdev, &offloads); | 23 | -#define MAC_TABLE_ENTRIES 64 |
24 | #define MAX_VLAN (1 << 12) /* Per 802.1Q definition */ | ||
25 | |||
26 | /* previously fixed value */ | ||
27 | diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h | ||
28 | index XXXXXXX..XXXXXXX 100644 | ||
29 | --- a/include/hw/virtio/virtio-net.h | ||
30 | +++ b/include/hw/virtio/virtio-net.h | ||
31 | @@ -XXX,XX +XXX,XX @@ OBJECT_DECLARE_SIMPLE_TYPE(VirtIONet, VIRTIO_NET) | ||
32 | * and latency. */ | ||
33 | #define TX_BURST 256 | ||
34 | |||
35 | +/* Maximum VIRTIO_NET_CTRL_MAC_TABLE_SET unicast + multicast entries. */ | ||
36 | +#define MAC_TABLE_ENTRIES 64 | ||
21 | + | 37 | + |
22 | if (!n->has_vnet_hdr) { | 38 | typedef struct virtio_net_conf |
23 | return VIRTIO_NET_ERR; | 39 | { |
24 | } | 40 | uint32_t txtimer; |
25 | -- | 41 | -- |
26 | 2.7.4 | 42 | 2.7.4 |
27 | 43 | ||
28 | 44 | diff view generated by jsdifflib |
1 | From: Michal Privoznik <mprivozn@redhat.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We have a function that checks if given number is power of two. | 3 | This allows external vhost-net devices to modify the state of the |
4 | We should prefer it instead of expanding the check on our own. | 4 | VirtIO device model once the vhost-vdpa device has acknowledged the |
5 | control commands. | ||
5 | 6 | ||
6 | Signed-off-by: Michal Privoznik <mprivozn@redhat.com> | 7 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> |
8 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 9 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
8 | --- | 10 | --- |
9 | hw/net/virtio-net.c | 2 +- | 11 | hw/net/virtio-net.c | 84 ++++++++++++++++++++++++------------------ |
10 | 1 file changed, 1 insertion(+), 1 deletion(-) | 12 | include/hw/virtio/virtio-net.h | 4 ++ |
13 | 2 files changed, 53 insertions(+), 35 deletions(-) | ||
11 | 14 | ||
12 | diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c | 15 | diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c |
13 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/hw/net/virtio-net.c | 17 | --- a/hw/net/virtio-net.c |
15 | +++ b/hw/net/virtio-net.c | 18 | +++ b/hw/net/virtio-net.c |
16 | @@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp) | 19 | @@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_mq(VirtIONet *n, uint8_t cmd, |
17 | */ | 20 | return VIRTIO_NET_OK; |
18 | if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE || | 21 | } |
19 | n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE || | 22 | |
20 | - (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) { | 23 | -static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) |
21 | + !is_power_of_2(n->net_conf.rx_queue_size)) { | 24 | +size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, |
22 | error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), " | 25 | + const struct iovec *in_sg, unsigned in_num, |
23 | "must be a power of 2 between %d and %d.", | 26 | + const struct iovec *out_sg, |
24 | n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE, | 27 | + unsigned out_num) |
28 | { | ||
29 | VirtIONet *n = VIRTIO_NET(vdev); | ||
30 | struct virtio_net_ctrl_hdr ctrl; | ||
31 | virtio_net_ctrl_ack status = VIRTIO_NET_ERR; | ||
32 | - VirtQueueElement *elem; | ||
33 | size_t s; | ||
34 | struct iovec *iov, *iov2; | ||
35 | - unsigned int iov_cnt; | ||
36 | + | ||
37 | + if (iov_size(in_sg, in_num) < sizeof(status) || | ||
38 | + iov_size(out_sg, out_num) < sizeof(ctrl)) { | ||
39 | + virtio_error(vdev, "virtio-net ctrl missing headers"); | ||
40 | + return 0; | ||
41 | + } | ||
42 | + | ||
43 | + iov2 = iov = g_memdup2(out_sg, sizeof(struct iovec) * out_num); | ||
44 | + s = iov_to_buf(iov, out_num, 0, &ctrl, sizeof(ctrl)); | ||
45 | + iov_discard_front(&iov, &out_num, sizeof(ctrl)); | ||
46 | + if (s != sizeof(ctrl)) { | ||
47 | + status = VIRTIO_NET_ERR; | ||
48 | + } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { | ||
49 | + status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, out_num); | ||
50 | + } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { | ||
51 | + status = virtio_net_handle_mac(n, ctrl.cmd, iov, out_num); | ||
52 | + } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { | ||
53 | + status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, out_num); | ||
54 | + } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { | ||
55 | + status = virtio_net_handle_announce(n, ctrl.cmd, iov, out_num); | ||
56 | + } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { | ||
57 | + status = virtio_net_handle_mq(n, ctrl.cmd, iov, out_num); | ||
58 | + } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { | ||
59 | + status = virtio_net_handle_offloads(n, ctrl.cmd, iov, out_num); | ||
60 | + } | ||
61 | + | ||
62 | + s = iov_from_buf(in_sg, in_num, 0, &status, sizeof(status)); | ||
63 | + assert(s == sizeof(status)); | ||
64 | + | ||
65 | + g_free(iov2); | ||
66 | + return sizeof(status); | ||
67 | +} | ||
68 | + | ||
69 | +static void virtio_net_handle_ctrl(VirtIODevice *vdev, VirtQueue *vq) | ||
70 | +{ | ||
71 | + VirtQueueElement *elem; | ||
72 | |||
73 | for (;;) { | ||
74 | + size_t written; | ||
75 | elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); | ||
76 | if (!elem) { | ||
77 | break; | ||
78 | } | ||
79 | - if (iov_size(elem->in_sg, elem->in_num) < sizeof(status) || | ||
80 | - iov_size(elem->out_sg, elem->out_num) < sizeof(ctrl)) { | ||
81 | - virtio_error(vdev, "virtio-net ctrl missing headers"); | ||
82 | + | ||
83 | + written = virtio_net_handle_ctrl_iov(vdev, elem->in_sg, elem->in_num, | ||
84 | + elem->out_sg, elem->out_num); | ||
85 | + if (written > 0) { | ||
86 | + virtqueue_push(vq, elem, written); | ||
87 | + virtio_notify(vdev, vq); | ||
88 | + g_free(elem); | ||
89 | + } else { | ||
90 | virtqueue_detach_element(vq, elem, 0); | ||
91 | g_free(elem); | ||
92 | break; | ||
93 | } | ||
94 | - | ||
95 | - iov_cnt = elem->out_num; | ||
96 | - iov2 = iov = g_memdup2(elem->out_sg, | ||
97 | - sizeof(struct iovec) * elem->out_num); | ||
98 | - s = iov_to_buf(iov, iov_cnt, 0, &ctrl, sizeof(ctrl)); | ||
99 | - iov_discard_front(&iov, &iov_cnt, sizeof(ctrl)); | ||
100 | - if (s != sizeof(ctrl)) { | ||
101 | - status = VIRTIO_NET_ERR; | ||
102 | - } else if (ctrl.class == VIRTIO_NET_CTRL_RX) { | ||
103 | - status = virtio_net_handle_rx_mode(n, ctrl.cmd, iov, iov_cnt); | ||
104 | - } else if (ctrl.class == VIRTIO_NET_CTRL_MAC) { | ||
105 | - status = virtio_net_handle_mac(n, ctrl.cmd, iov, iov_cnt); | ||
106 | - } else if (ctrl.class == VIRTIO_NET_CTRL_VLAN) { | ||
107 | - status = virtio_net_handle_vlan_table(n, ctrl.cmd, iov, iov_cnt); | ||
108 | - } else if (ctrl.class == VIRTIO_NET_CTRL_ANNOUNCE) { | ||
109 | - status = virtio_net_handle_announce(n, ctrl.cmd, iov, iov_cnt); | ||
110 | - } else if (ctrl.class == VIRTIO_NET_CTRL_MQ) { | ||
111 | - status = virtio_net_handle_mq(n, ctrl.cmd, iov, iov_cnt); | ||
112 | - } else if (ctrl.class == VIRTIO_NET_CTRL_GUEST_OFFLOADS) { | ||
113 | - status = virtio_net_handle_offloads(n, ctrl.cmd, iov, iov_cnt); | ||
114 | - } | ||
115 | - | ||
116 | - s = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, sizeof(status)); | ||
117 | - assert(s == sizeof(status)); | ||
118 | - | ||
119 | - virtqueue_push(vq, elem, sizeof(status)); | ||
120 | - virtio_notify(vdev, vq); | ||
121 | - g_free(iov2); | ||
122 | - g_free(elem); | ||
123 | } | ||
124 | } | ||
125 | |||
126 | diff --git a/include/hw/virtio/virtio-net.h b/include/hw/virtio/virtio-net.h | ||
127 | index XXXXXXX..XXXXXXX 100644 | ||
128 | --- a/include/hw/virtio/virtio-net.h | ||
129 | +++ b/include/hw/virtio/virtio-net.h | ||
130 | @@ -XXX,XX +XXX,XX @@ struct VirtIONet { | ||
131 | struct EBPFRSSContext ebpf_rss; | ||
132 | }; | ||
133 | |||
134 | +size_t virtio_net_handle_ctrl_iov(VirtIODevice *vdev, | ||
135 | + const struct iovec *in_sg, unsigned in_num, | ||
136 | + const struct iovec *out_sg, | ||
137 | + unsigned out_num); | ||
138 | void virtio_net_set_netclient_name(VirtIONet *n, const char *name, | ||
139 | const char *type); | ||
140 | |||
25 | -- | 141 | -- |
26 | 2.7.4 | 142 | 2.7.4 |
27 | 143 | ||
28 | 144 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eugenio Pérez <eperezma@redhat.com> | ||
1 | 2 | ||
3 | In the next patch we will allow busypolling of this value. The compiler | ||
4 | have a running path where shadow_used_idx, last_used_idx, and vring used | ||
5 | idx are not modified within the same thread busypolling. | ||
6 | |||
7 | This was not an issue before since we always cleared device event | ||
8 | notifier before checking it, and that could act as memory barrier. | ||
9 | However, the busypoll needs something similar to kernel READ_ONCE. | ||
10 | |||
11 | Let's add it here, sepparated from the polling. | ||
12 | |||
13 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
14 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
15 | --- | ||
16 | hw/virtio/vhost-shadow-virtqueue.c | 3 ++- | ||
17 | 1 file changed, 2 insertions(+), 1 deletion(-) | ||
18 | |||
19 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/hw/virtio/vhost-shadow-virtqueue.c | ||
22 | +++ b/hw/virtio/vhost-shadow-virtqueue.c | ||
23 | @@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick_notifier(EventNotifier *n) | ||
24 | |||
25 | static bool vhost_svq_more_used(VhostShadowVirtqueue *svq) | ||
26 | { | ||
27 | + uint16_t *used_idx = &svq->vring.used->idx; | ||
28 | if (svq->last_used_idx != svq->shadow_used_idx) { | ||
29 | return true; | ||
30 | } | ||
31 | |||
32 | - svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx); | ||
33 | + svq->shadow_used_idx = cpu_to_le16(*(volatile uint16_t *)used_idx); | ||
34 | |||
35 | return svq->last_used_idx != svq->shadow_used_idx; | ||
36 | } | ||
37 | -- | ||
38 | 2.7.4 | ||
39 | |||
40 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We add the vnet_hdr_support option for filter-redirector, default is disabled. | 3 | Future code needs to call it from vhost_svq_add. |
4 | If you use virtio-net-pci net driver or other driver needs vnet_hdr, please enable it. | ||
5 | Because colo-compare or other modules needs the vnet_hdr_len to parse | ||
6 | packet, we add this new option send the len to others. | ||
7 | You can use it for example: | ||
8 | -object filter-redirector,id=r0,netdev=hn0,queue=tx,outdev=red0,vnet_hdr_support | ||
9 | 4 | ||
10 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 5 | No functional change intended. |
6 | |||
7 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
8 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
11 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 9 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
12 | --- | 10 | --- |
13 | net/filter-mirror.c | 23 +++++++++++++++++++++++ | 11 | hw/virtio/vhost-shadow-virtqueue.c | 28 ++++++++++++++-------------- |
14 | qemu-options.hx | 6 +++--- | 12 | 1 file changed, 14 insertions(+), 14 deletions(-) |
15 | 2 files changed, 26 insertions(+), 3 deletions(-) | ||
16 | 13 | ||
17 | diff --git a/net/filter-mirror.c b/net/filter-mirror.c | 14 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c |
18 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/net/filter-mirror.c | 16 | --- a/hw/virtio/vhost-shadow-virtqueue.c |
20 | +++ b/net/filter-mirror.c | 17 | +++ b/hw/virtio/vhost-shadow-virtqueue.c |
21 | @@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj, | 18 | @@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, |
22 | s->outdev = g_strdup(value); | 19 | return true; |
23 | } | 20 | } |
24 | 21 | ||
25 | +static bool filter_redirector_get_vnet_hdr(Object *obj, Error **errp) | 22 | +static void vhost_svq_kick(VhostShadowVirtqueue *svq) |
26 | +{ | 23 | +{ |
27 | + MirrorState *s = FILTER_REDIRECTOR(obj); | 24 | + /* |
25 | + * We need to expose the available array entries before checking the used | ||
26 | + * flags | ||
27 | + */ | ||
28 | + smp_mb(); | ||
29 | + if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { | ||
30 | + return; | ||
31 | + } | ||
28 | + | 32 | + |
29 | + return s->vnet_hdr; | 33 | + event_notifier_set(&svq->hdev_kick); |
30 | +} | 34 | +} |
31 | + | 35 | + |
32 | +static void filter_redirector_set_vnet_hdr(Object *obj, | 36 | /** |
33 | + bool value, | 37 | * Add an element to a SVQ. |
34 | + Error **errp) | 38 | * |
35 | +{ | 39 | @@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) |
36 | + MirrorState *s = FILTER_REDIRECTOR(obj); | 40 | return true; |
37 | + | ||
38 | + s->vnet_hdr = value; | ||
39 | +} | ||
40 | + | ||
41 | static void filter_mirror_init(Object *obj) | ||
42 | { | ||
43 | MirrorState *s = FILTER_MIRROR(obj); | ||
44 | @@ -XXX,XX +XXX,XX @@ static void filter_mirror_init(Object *obj) | ||
45 | |||
46 | static void filter_redirector_init(Object *obj) | ||
47 | { | ||
48 | + MirrorState *s = FILTER_REDIRECTOR(obj); | ||
49 | + | ||
50 | object_property_add_str(obj, "indev", filter_redirector_get_indev, | ||
51 | filter_redirector_set_indev, NULL); | ||
52 | object_property_add_str(obj, "outdev", filter_redirector_get_outdev, | ||
53 | filter_redirector_set_outdev, NULL); | ||
54 | + | ||
55 | + s->vnet_hdr = false; | ||
56 | + object_property_add_bool(obj, "vnet_hdr_support", | ||
57 | + filter_redirector_get_vnet_hdr, | ||
58 | + filter_redirector_set_vnet_hdr, NULL); | ||
59 | } | 41 | } |
60 | 42 | ||
61 | static void filter_mirror_fini(Object *obj) | 43 | -static void vhost_svq_kick(VhostShadowVirtqueue *svq) |
62 | diff --git a/qemu-options.hx b/qemu-options.hx | 44 | -{ |
63 | index XXXXXXX..XXXXXXX 100644 | 45 | - /* |
64 | --- a/qemu-options.hx | 46 | - * We need to expose the available array entries before checking the used |
65 | +++ b/qemu-options.hx | 47 | - * flags |
66 | @@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter. | 48 | - */ |
67 | 49 | - smp_mb(); | |
68 | filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len. | 50 | - if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) { |
69 | 51 | - return; | |
70 | -@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid}, | 52 | - } |
71 | -outdev=@var{chardevid}[,queue=@var{all|rx|tx}] | 53 | - |
72 | +@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support] | 54 | - event_notifier_set(&svq->hdev_kick); |
73 | 55 | -} | |
74 | filter-redirector on netdev @var{netdevid},redirect filter's net packet to chardev | 56 | - |
75 | -@var{chardevid},and redirect indev's packet to filter. | 57 | /** |
76 | +@var{chardevid},and redirect indev's packet to filter.if it has the vnet_hdr_support flag, | 58 | * Forward available buffers. |
77 | +filter-redirector will redirect packet with vnet_hdr_len. | 59 | * |
78 | Create a filter-redirector we need to differ outdev id from indev id, id can not | ||
79 | be the same. we can just use indev or outdev, but at least one of indev or outdev | ||
80 | need to be specified. | ||
81 | -- | 60 | -- |
82 | 2.7.4 | 61 | 2.7.4 |
83 | 62 | ||
84 | 63 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eugenio Pérez <eperezma@redhat.com> | ||
1 | 2 | ||
3 | The series needs to expose vhost_svq_add with full functionality, | ||
4 | including kick | ||
5 | |||
6 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
7 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | hw/virtio/vhost-shadow-virtqueue.c | 2 +- | ||
11 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
12 | |||
13 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/hw/virtio/vhost-shadow-virtqueue.c | ||
16 | +++ b/hw/virtio/vhost-shadow-virtqueue.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) | ||
18 | } | ||
19 | |||
20 | svq->ring_id_maps[qemu_head] = elem; | ||
21 | + vhost_svq_kick(svq); | ||
22 | return true; | ||
23 | } | ||
24 | |||
25 | @@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) | ||
26 | /* VQ is broken, just return and ignore any other kicks */ | ||
27 | return; | ||
28 | } | ||
29 | - vhost_svq_kick(svq); | ||
30 | } | ||
31 | |||
32 | virtio_queue_set_notification(svq->vq, true); | ||
33 | -- | ||
34 | 2.7.4 | ||
35 | |||
36 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eugenio Pérez <eperezma@redhat.com> | ||
1 | 2 | ||
3 | The series need to expose vhost_svq_add with full functionality, | ||
4 | including checking for full queue. | ||
5 | |||
6 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
7 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | hw/virtio/vhost-shadow-virtqueue.c | 59 +++++++++++++++++++++----------------- | ||
11 | 1 file changed, 33 insertions(+), 26 deletions(-) | ||
12 | |||
13 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/hw/virtio/vhost-shadow-virtqueue.c | ||
16 | +++ b/hw/virtio/vhost-shadow-virtqueue.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) | ||
18 | * Add an element to a SVQ. | ||
19 | * | ||
20 | * The caller must check that there is enough slots for the new element. It | ||
21 | - * takes ownership of the element: In case of failure, it is free and the SVQ | ||
22 | - * is considered broken. | ||
23 | + * takes ownership of the element: In case of failure not ENOSPC, it is free. | ||
24 | + * | ||
25 | + * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full | ||
26 | */ | ||
27 | -static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) | ||
28 | +static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) | ||
29 | { | ||
30 | unsigned qemu_head; | ||
31 | - bool ok = vhost_svq_add_split(svq, elem, &qemu_head); | ||
32 | + unsigned ndescs = elem->in_num + elem->out_num; | ||
33 | + bool ok; | ||
34 | + | ||
35 | + if (unlikely(ndescs > vhost_svq_available_slots(svq))) { | ||
36 | + return -ENOSPC; | ||
37 | + } | ||
38 | + | ||
39 | + ok = vhost_svq_add_split(svq, elem, &qemu_head); | ||
40 | if (unlikely(!ok)) { | ||
41 | g_free(elem); | ||
42 | - return false; | ||
43 | + return -EINVAL; | ||
44 | } | ||
45 | |||
46 | svq->ring_id_maps[qemu_head] = elem; | ||
47 | vhost_svq_kick(svq); | ||
48 | - return true; | ||
49 | + return 0; | ||
50 | } | ||
51 | |||
52 | /** | ||
53 | @@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) | ||
54 | |||
55 | while (true) { | ||
56 | VirtQueueElement *elem; | ||
57 | - bool ok; | ||
58 | + int r; | ||
59 | |||
60 | if (svq->next_guest_avail_elem) { | ||
61 | elem = g_steal_pointer(&svq->next_guest_avail_elem); | ||
62 | @@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) | ||
63 | break; | ||
64 | } | ||
65 | |||
66 | - if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) { | ||
67 | - /* | ||
68 | - * This condition is possible since a contiguous buffer in GPA | ||
69 | - * does not imply a contiguous buffer in qemu's VA | ||
70 | - * scatter-gather segments. If that happens, the buffer exposed | ||
71 | - * to the device needs to be a chain of descriptors at this | ||
72 | - * moment. | ||
73 | - * | ||
74 | - * SVQ cannot hold more available buffers if we are here: | ||
75 | - * queue the current guest descriptor and ignore further kicks | ||
76 | - * until some elements are used. | ||
77 | - */ | ||
78 | - svq->next_guest_avail_elem = elem; | ||
79 | - return; | ||
80 | - } | ||
81 | - | ||
82 | - ok = vhost_svq_add(svq, elem); | ||
83 | - if (unlikely(!ok)) { | ||
84 | - /* VQ is broken, just return and ignore any other kicks */ | ||
85 | + r = vhost_svq_add(svq, elem); | ||
86 | + if (unlikely(r != 0)) { | ||
87 | + if (r == -ENOSPC) { | ||
88 | + /* | ||
89 | + * This condition is possible since a contiguous buffer in | ||
90 | + * GPA does not imply a contiguous buffer in qemu's VA | ||
91 | + * scatter-gather segments. If that happens, the buffer | ||
92 | + * exposed to the device needs to be a chain of descriptors | ||
93 | + * at this moment. | ||
94 | + * | ||
95 | + * SVQ cannot hold more available buffers if we are here: | ||
96 | + * queue the current guest descriptor and ignore kicks | ||
97 | + * until some elements are used. | ||
98 | + */ | ||
99 | + svq->next_guest_avail_elem = elem; | ||
100 | + } | ||
101 | + | ||
102 | + /* VQ is full or broken, just return and ignore kicks */ | ||
103 | return; | ||
104 | } | ||
105 | } | ||
106 | -- | ||
107 | 2.7.4 | ||
108 | |||
109 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We add the vnet_hdr_support option for filter-mirror, default is disabled. | 3 | VirtQueueElement comes from the guest, but we're heading SVQ to be able |
4 | If you use virtio-net-pci or other driver needs vnet_hdr, please enable it. | 4 | to modify the element presented to the device without the guest's |
5 | You can use it for example: | 5 | knowledge. |
6 | -object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support | ||
7 | 6 | ||
8 | If it has vnet_hdr_support flag, we will change the sending packet format from | 7 | To do so, make SVQ accept sg buffers directly, instead of using |
9 | struct {int size; const uint8_t buf[];} to {int size; int vnet_hdr_len; const uint8_t buf[];}. | 8 | VirtQueueElement. |
10 | make other module(like colo-compare) know how to parse net packet correctly. | ||
11 | 9 | ||
12 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 10 | Add vhost_svq_add_element to maintain element convenience. |
11 | |||
12 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
13 | Acked-by: Jason Wang <jasowang@redhat.com> | ||
14 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
13 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 15 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
14 | --- | 16 | --- |
15 | net/filter-mirror.c | 42 +++++++++++++++++++++++++++++++++++++++++- | 17 | hw/virtio/vhost-shadow-virtqueue.c | 33 ++++++++++++++++++++++----------- |
16 | qemu-options.hx | 5 ++--- | 18 | 1 file changed, 22 insertions(+), 11 deletions(-) |
17 | 2 files changed, 43 insertions(+), 4 deletions(-) | ||
18 | 19 | ||
19 | diff --git a/net/filter-mirror.c b/net/filter-mirror.c | 20 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c |
20 | index XXXXXXX..XXXXXXX 100644 | 21 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/net/filter-mirror.c | 22 | --- a/hw/virtio/vhost-shadow-virtqueue.c |
22 | +++ b/net/filter-mirror.c | 23 | +++ b/hw/virtio/vhost-shadow-virtqueue.c |
23 | @@ -XXX,XX +XXX,XX @@ typedef struct MirrorState { | 24 | @@ -XXX,XX +XXX,XX @@ static bool vhost_svq_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg, |
24 | CharBackend chr_in; | 25 | } |
25 | CharBackend chr_out; | 26 | |
26 | SocketReadState rs; | 27 | static bool vhost_svq_add_split(VhostShadowVirtqueue *svq, |
27 | + bool vnet_hdr; | 28 | - VirtQueueElement *elem, unsigned *head) |
28 | } MirrorState; | 29 | + const struct iovec *out_sg, size_t out_num, |
29 | 30 | + const struct iovec *in_sg, size_t in_num, | |
30 | static int filter_send(MirrorState *s, | 31 | + unsigned *head) |
31 | const struct iovec *iov, | ||
32 | int iovcnt) | ||
33 | { | 32 | { |
34 | + NetFilterState *nf = NETFILTER(s); | 33 | unsigned avail_idx; |
35 | int ret = 0; | 34 | vring_avail_t *avail = svq->vring.avail; |
36 | ssize_t size = 0; | 35 | bool ok; |
37 | uint32_t len = 0; | 36 | - g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num)); |
38 | @@ -XXX,XX +XXX,XX @@ static int filter_send(MirrorState *s, | 37 | + g_autofree hwaddr *sgs = g_new(hwaddr, MAX(out_num, in_num)); |
39 | goto err; | 38 | |
39 | *head = svq->free_head; | ||
40 | |||
41 | /* We need some descriptors here */ | ||
42 | - if (unlikely(!elem->out_num && !elem->in_num)) { | ||
43 | + if (unlikely(!out_num && !in_num)) { | ||
44 | qemu_log_mask(LOG_GUEST_ERROR, | ||
45 | "Guest provided element with no descriptors"); | ||
46 | return false; | ||
40 | } | 47 | } |
41 | 48 | ||
42 | + if (s->vnet_hdr) { | 49 | - ok = vhost_svq_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num, |
43 | + /* | 50 | - elem->in_num > 0, false); |
44 | + * If vnet_hdr = on, we send vnet header len to make other | 51 | + ok = vhost_svq_vring_write_descs(svq, sgs, out_sg, out_num, in_num > 0, |
45 | + * module(like colo-compare) know how to parse net | 52 | + false); |
46 | + * packet correctly. | 53 | if (unlikely(!ok)) { |
47 | + */ | 54 | return false; |
48 | + ssize_t vnet_hdr_len; | ||
49 | + | ||
50 | + vnet_hdr_len = nf->netdev->vnet_hdr_len; | ||
51 | + | ||
52 | + len = htonl(vnet_hdr_len); | ||
53 | + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); | ||
54 | + if (ret != sizeof(len)) { | ||
55 | + goto err; | ||
56 | + } | ||
57 | + } | ||
58 | + | ||
59 | buf = g_malloc(size); | ||
60 | iov_to_buf(iov, iovcnt, 0, buf, size); | ||
61 | ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); | ||
62 | @@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp) | ||
63 | } | ||
64 | } | 55 | } |
65 | 56 | ||
66 | - net_socket_rs_init(&s->rs, redirector_rs_finalize, false); | 57 | - ok = vhost_svq_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, |
67 | + net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr); | 58 | - true); |
68 | 59 | + ok = vhost_svq_vring_write_descs(svq, sgs, in_sg, in_num, false, true); | |
69 | if (s->indev) { | 60 | if (unlikely(!ok)) { |
70 | chr = qemu_chr_find(s->indev); | 61 | return false; |
71 | @@ -XXX,XX +XXX,XX @@ static void filter_mirror_set_outdev(Object *obj, | ||
72 | } | 62 | } |
63 | @@ -XXX,XX +XXX,XX @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) | ||
64 | * | ||
65 | * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full | ||
66 | */ | ||
67 | -static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) | ||
68 | +static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, | ||
69 | + size_t out_num, const struct iovec *in_sg, | ||
70 | + size_t in_num, VirtQueueElement *elem) | ||
71 | { | ||
72 | unsigned qemu_head; | ||
73 | - unsigned ndescs = elem->in_num + elem->out_num; | ||
74 | + unsigned ndescs = in_num + out_num; | ||
75 | bool ok; | ||
76 | |||
77 | if (unlikely(ndescs > vhost_svq_available_slots(svq))) { | ||
78 | return -ENOSPC; | ||
79 | } | ||
80 | |||
81 | - ok = vhost_svq_add_split(svq, elem, &qemu_head); | ||
82 | + ok = vhost_svq_add_split(svq, out_sg, out_num, in_sg, in_num, &qemu_head); | ||
83 | if (unlikely(!ok)) { | ||
84 | g_free(elem); | ||
85 | return -EINVAL; | ||
86 | @@ -XXX,XX +XXX,XX @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem) | ||
87 | return 0; | ||
73 | } | 88 | } |
74 | 89 | ||
75 | +static bool filter_mirror_get_vnet_hdr(Object *obj, Error **errp) | 90 | +/* Convenience wrapper to add a guest's element to SVQ */ |
91 | +static int vhost_svq_add_element(VhostShadowVirtqueue *svq, | ||
92 | + VirtQueueElement *elem) | ||
76 | +{ | 93 | +{ |
77 | + MirrorState *s = FILTER_MIRROR(obj); | 94 | + return vhost_svq_add(svq, elem->out_sg, elem->out_num, elem->in_sg, |
78 | + | 95 | + elem->in_num, elem); |
79 | + return s->vnet_hdr; | ||
80 | +} | 96 | +} |
81 | + | 97 | + |
82 | +static void filter_mirror_set_vnet_hdr(Object *obj, bool value, Error **errp) | 98 | /** |
83 | +{ | 99 | * Forward available buffers. |
84 | + MirrorState *s = FILTER_MIRROR(obj); | 100 | * |
85 | + | 101 | @@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) |
86 | + s->vnet_hdr = value; | 102 | break; |
87 | +} | 103 | } |
88 | + | 104 | |
89 | static char *filter_redirector_get_outdev(Object *obj, Error **errp) | 105 | - r = vhost_svq_add(svq, elem); |
90 | { | 106 | + r = vhost_svq_add_element(svq, elem); |
91 | MirrorState *s = FILTER_REDIRECTOR(obj); | 107 | if (unlikely(r != 0)) { |
92 | @@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj, | 108 | if (r == -ENOSPC) { |
93 | 109 | /* | |
94 | static void filter_mirror_init(Object *obj) | ||
95 | { | ||
96 | + MirrorState *s = FILTER_MIRROR(obj); | ||
97 | + | ||
98 | object_property_add_str(obj, "outdev", filter_mirror_get_outdev, | ||
99 | filter_mirror_set_outdev, NULL); | ||
100 | + | ||
101 | + s->vnet_hdr = false; | ||
102 | + object_property_add_bool(obj, "vnet_hdr_support", | ||
103 | + filter_mirror_get_vnet_hdr, | ||
104 | + filter_mirror_set_vnet_hdr, NULL); | ||
105 | } | ||
106 | |||
107 | static void filter_redirector_init(Object *obj) | ||
108 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
109 | index XXXXXXX..XXXXXXX 100644 | ||
110 | --- a/qemu-options.hx | ||
111 | +++ b/qemu-options.hx | ||
112 | @@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter. | ||
113 | @option{tx}: the filter is attached to the transmit queue of the netdev, | ||
114 | where it will receive packets sent by the netdev. | ||
115 | |||
116 | -@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid}[,queue=@var{all|rx|tx}] | ||
117 | +@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support] | ||
118 | |||
119 | -filter-mirror on netdev @var{netdevid},mirror net packet to chardev | ||
120 | -@var{chardevid} | ||
121 | +filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len. | ||
122 | |||
123 | @item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid}, | ||
124 | outdev=@var{chardevid}[,queue=@var{all|rx|tx}] | ||
125 | -- | 110 | -- |
126 | 2.7.4 | 111 | 2.7.4 |
127 | 112 | ||
128 | 113 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eugenio Pérez <eperezma@redhat.com> | ||
1 | 2 | ||
3 | This will allow SVQ to add context to the different queue elements. | ||
4 | |||
5 | This patch only store the actual element, no functional change intended. | ||
6 | |||
7 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
8 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
9 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
10 | --- | ||
11 | hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++-------- | ||
12 | hw/virtio/vhost-shadow-virtqueue.h | 8 ++++++-- | ||
13 | 2 files changed, 14 insertions(+), 10 deletions(-) | ||
14 | |||
15 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/hw/virtio/vhost-shadow-virtqueue.c | ||
18 | +++ b/hw/virtio/vhost-shadow-virtqueue.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, | ||
20 | return -EINVAL; | ||
21 | } | ||
22 | |||
23 | - svq->ring_id_maps[qemu_head] = elem; | ||
24 | + svq->desc_state[qemu_head].elem = elem; | ||
25 | vhost_svq_kick(svq); | ||
26 | return 0; | ||
27 | } | ||
28 | @@ -XXX,XX +XXX,XX @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, | ||
29 | return NULL; | ||
30 | } | ||
31 | |||
32 | - if (unlikely(!svq->ring_id_maps[used_elem.id])) { | ||
33 | + if (unlikely(!svq->desc_state[used_elem.id].elem)) { | ||
34 | qemu_log_mask(LOG_GUEST_ERROR, | ||
35 | "Device %s says index %u is used, but it was not available", | ||
36 | svq->vdev->name, used_elem.id); | ||
37 | return NULL; | ||
38 | } | ||
39 | |||
40 | - num = svq->ring_id_maps[used_elem.id]->in_num + | ||
41 | - svq->ring_id_maps[used_elem.id]->out_num; | ||
42 | + num = svq->desc_state[used_elem.id].elem->in_num + | ||
43 | + svq->desc_state[used_elem.id].elem->out_num; | ||
44 | last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); | ||
45 | svq->desc_next[last_used_chain] = svq->free_head; | ||
46 | svq->free_head = used_elem.id; | ||
47 | |||
48 | *len = used_elem.len; | ||
49 | - return g_steal_pointer(&svq->ring_id_maps[used_elem.id]); | ||
50 | + return g_steal_pointer(&svq->desc_state[used_elem.id].elem); | ||
51 | } | ||
52 | |||
53 | static void vhost_svq_flush(VhostShadowVirtqueue *svq, | ||
54 | @@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, | ||
55 | memset(svq->vring.desc, 0, driver_size); | ||
56 | svq->vring.used = qemu_memalign(qemu_real_host_page_size(), device_size); | ||
57 | memset(svq->vring.used, 0, device_size); | ||
58 | - svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num); | ||
59 | + svq->desc_state = g_new0(SVQDescState, svq->vring.num); | ||
60 | svq->desc_next = g_new0(uint16_t, svq->vring.num); | ||
61 | for (unsigned i = 0; i < svq->vring.num - 1; i++) { | ||
62 | svq->desc_next[i] = cpu_to_le16(i + 1); | ||
63 | @@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) | ||
64 | |||
65 | for (unsigned i = 0; i < svq->vring.num; ++i) { | ||
66 | g_autofree VirtQueueElement *elem = NULL; | ||
67 | - elem = g_steal_pointer(&svq->ring_id_maps[i]); | ||
68 | + elem = g_steal_pointer(&svq->desc_state[i].elem); | ||
69 | if (elem) { | ||
70 | virtqueue_detach_element(svq->vq, elem, 0); | ||
71 | } | ||
72 | @@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) | ||
73 | } | ||
74 | svq->vq = NULL; | ||
75 | g_free(svq->desc_next); | ||
76 | - g_free(svq->ring_id_maps); | ||
77 | + g_free(svq->desc_state); | ||
78 | qemu_vfree(svq->vring.desc); | ||
79 | qemu_vfree(svq->vring.used); | ||
80 | } | ||
81 | diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h | ||
82 | index XXXXXXX..XXXXXXX 100644 | ||
83 | --- a/hw/virtio/vhost-shadow-virtqueue.h | ||
84 | +++ b/hw/virtio/vhost-shadow-virtqueue.h | ||
85 | @@ -XXX,XX +XXX,XX @@ | ||
86 | #include "standard-headers/linux/vhost_types.h" | ||
87 | #include "hw/virtio/vhost-iova-tree.h" | ||
88 | |||
89 | +typedef struct SVQDescState { | ||
90 | + VirtQueueElement *elem; | ||
91 | +} SVQDescState; | ||
92 | + | ||
93 | /* Shadow virtqueue to relay notifications */ | ||
94 | typedef struct VhostShadowVirtqueue { | ||
95 | /* Shadow vring */ | ||
96 | @@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue { | ||
97 | /* IOVA mapping */ | ||
98 | VhostIOVATree *iova_tree; | ||
99 | |||
100 | - /* Map for use the guest's descriptors */ | ||
101 | - VirtQueueElement **ring_id_maps; | ||
102 | + /* SVQ vring descriptors state */ | ||
103 | + SVQDescState *desc_state; | ||
104 | |||
105 | /* Next VirtQueue element that guest made available */ | ||
106 | VirtQueueElement *next_guest_avail_elem; | ||
107 | -- | ||
108 | 2.7.4 | ||
109 | |||
110 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | This patch change the filter_send() parameter from CharBackend to MirrorState, | 3 | A guest's buffer continuos on GPA may need multiple descriptors on |
4 | we can get more information like vnet_hdr(We use it to support packet with vnet_header). | 4 | qemu's VA, so SVQ should track its length sepparatedly. |
5 | 5 | ||
6 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 6 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> |
7 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 8 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
8 | --- | 9 | --- |
9 | net/filter-mirror.c | 10 +++++----- | 10 | hw/virtio/vhost-shadow-virtqueue.c | 4 ++-- |
10 | 1 file changed, 5 insertions(+), 5 deletions(-) | 11 | hw/virtio/vhost-shadow-virtqueue.h | 6 ++++++ |
12 | 2 files changed, 8 insertions(+), 2 deletions(-) | ||
11 | 13 | ||
12 | diff --git a/net/filter-mirror.c b/net/filter-mirror.c | 14 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c |
13 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/net/filter-mirror.c | 16 | --- a/hw/virtio/vhost-shadow-virtqueue.c |
15 | +++ b/net/filter-mirror.c | 17 | +++ b/hw/virtio/vhost-shadow-virtqueue.c |
16 | @@ -XXX,XX +XXX,XX @@ typedef struct MirrorState { | 18 | @@ -XXX,XX +XXX,XX @@ static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, |
17 | SocketReadState rs; | ||
18 | } MirrorState; | ||
19 | |||
20 | -static int filter_send(CharBackend *chr_out, | ||
21 | +static int filter_send(MirrorState *s, | ||
22 | const struct iovec *iov, | ||
23 | int iovcnt) | ||
24 | { | ||
25 | @@ -XXX,XX +XXX,XX @@ static int filter_send(CharBackend *chr_out, | ||
26 | } | 19 | } |
27 | 20 | ||
28 | len = htonl(size); | 21 | svq->desc_state[qemu_head].elem = elem; |
29 | - ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)&len, sizeof(len)); | 22 | + svq->desc_state[qemu_head].ndescs = ndescs; |
30 | + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); | 23 | vhost_svq_kick(svq); |
31 | if (ret != sizeof(len)) { | 24 | return 0; |
32 | goto err; | 25 | } |
26 | @@ -XXX,XX +XXX,XX @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, | ||
27 | return NULL; | ||
33 | } | 28 | } |
34 | 29 | ||
35 | buf = g_malloc(size); | 30 | - num = svq->desc_state[used_elem.id].elem->in_num + |
36 | iov_to_buf(iov, iovcnt, 0, buf, size); | 31 | - svq->desc_state[used_elem.id].elem->out_num; |
37 | - ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)buf, size); | 32 | + num = svq->desc_state[used_elem.id].ndescs; |
38 | + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); | 33 | last_used_chain = vhost_svq_last_desc_of_chain(svq, num, used_elem.id); |
39 | g_free(buf); | 34 | svq->desc_next[last_used_chain] = svq->free_head; |
40 | if (ret != size) { | 35 | svq->free_head = used_elem.id; |
41 | goto err; | 36 | diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h |
42 | @@ -XXX,XX +XXX,XX @@ static ssize_t filter_mirror_receive_iov(NetFilterState *nf, | 37 | index XXXXXXX..XXXXXXX 100644 |
43 | MirrorState *s = FILTER_MIRROR(nf); | 38 | --- a/hw/virtio/vhost-shadow-virtqueue.h |
44 | int ret; | 39 | +++ b/hw/virtio/vhost-shadow-virtqueue.h |
45 | 40 | @@ -XXX,XX +XXX,XX @@ | |
46 | - ret = filter_send(&s->chr_out, iov, iovcnt); | 41 | |
47 | + ret = filter_send(s, iov, iovcnt); | 42 | typedef struct SVQDescState { |
48 | if (ret) { | 43 | VirtQueueElement *elem; |
49 | error_report("filter mirror send failed(%s)", strerror(-ret)); | 44 | + |
50 | } | 45 | + /* |
51 | @@ -XXX,XX +XXX,XX @@ static ssize_t filter_redirector_receive_iov(NetFilterState *nf, | 46 | + * Number of descriptors exposed to the device. May or may not match |
52 | int ret; | 47 | + * guest's |
53 | 48 | + */ | |
54 | if (qemu_chr_fe_backend_connected(&s->chr_out)) { | 49 | + unsigned int ndescs; |
55 | - ret = filter_send(&s->chr_out, iov, iovcnt); | 50 | } SVQDescState; |
56 | + ret = filter_send(s, iov, iovcnt); | 51 | |
57 | if (ret) { | 52 | /* Shadow virtqueue to relay notifications */ |
58 | error_report("filter redirector send failed(%s)", strerror(-ret)); | ||
59 | } | ||
60 | -- | 53 | -- |
61 | 2.7.4 | 54 | 2.7.4 |
62 | 55 | ||
63 | 56 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eugenio Pérez <eperezma@redhat.com> | ||
1 | 2 | ||
3 | This function allows external SVQ users to return guest's available | ||
4 | buffers. | ||
5 | |||
6 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
7 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | hw/virtio/vhost-shadow-virtqueue.c | 16 ++++++++++++++++ | ||
11 | hw/virtio/vhost-shadow-virtqueue.h | 3 +++ | ||
12 | 2 files changed, 19 insertions(+) | ||
13 | |||
14 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/hw/virtio/vhost-shadow-virtqueue.c | ||
17 | +++ b/hw/virtio/vhost-shadow-virtqueue.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq, | ||
19 | return g_steal_pointer(&svq->desc_state[used_elem.id].elem); | ||
20 | } | ||
21 | |||
22 | +/** | ||
23 | + * Push an element to SVQ, returning it to the guest. | ||
24 | + */ | ||
25 | +void vhost_svq_push_elem(VhostShadowVirtqueue *svq, | ||
26 | + const VirtQueueElement *elem, uint32_t len) | ||
27 | +{ | ||
28 | + virtqueue_push(svq->vq, elem, len); | ||
29 | + if (svq->next_guest_avail_elem) { | ||
30 | + /* | ||
31 | + * Avail ring was full when vhost_svq_flush was called, so it's a | ||
32 | + * good moment to make more descriptors available if possible. | ||
33 | + */ | ||
34 | + vhost_handle_guest_kick(svq); | ||
35 | + } | ||
36 | +} | ||
37 | + | ||
38 | static void vhost_svq_flush(VhostShadowVirtqueue *svq, | ||
39 | bool check_for_avail_queue) | ||
40 | { | ||
41 | diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/hw/virtio/vhost-shadow-virtqueue.h | ||
44 | +++ b/hw/virtio/vhost-shadow-virtqueue.h | ||
45 | @@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue { | ||
46 | |||
47 | bool vhost_svq_valid_features(uint64_t features, Error **errp); | ||
48 | |||
49 | +void vhost_svq_push_elem(VhostShadowVirtqueue *svq, | ||
50 | + const VirtQueueElement *elem, uint32_t len); | ||
51 | + | ||
52 | void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); | ||
53 | void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); | ||
54 | void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq, | ||
55 | -- | ||
56 | 2.7.4 | ||
57 | |||
58 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eugenio Pérez <eperezma@redhat.com> | ||
1 | 2 | ||
3 | This allows external parts of SVQ to forward custom buffers to the | ||
4 | device. | ||
5 | |||
6 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
7 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | hw/virtio/vhost-shadow-virtqueue.c | 6 +++--- | ||
11 | hw/virtio/vhost-shadow-virtqueue.h | 3 +++ | ||
12 | 2 files changed, 6 insertions(+), 3 deletions(-) | ||
13 | |||
14 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/hw/virtio/vhost-shadow-virtqueue.c | ||
17 | +++ b/hw/virtio/vhost-shadow-virtqueue.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static void vhost_svq_kick(VhostShadowVirtqueue *svq) | ||
19 | * | ||
20 | * Return -EINVAL if element is invalid, -ENOSPC if dev queue is full | ||
21 | */ | ||
22 | -static int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, | ||
23 | - size_t out_num, const struct iovec *in_sg, | ||
24 | - size_t in_num, VirtQueueElement *elem) | ||
25 | +int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, | ||
26 | + size_t out_num, const struct iovec *in_sg, size_t in_num, | ||
27 | + VirtQueueElement *elem) | ||
28 | { | ||
29 | unsigned qemu_head; | ||
30 | unsigned ndescs = in_num + out_num; | ||
31 | diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h | ||
32 | index XXXXXXX..XXXXXXX 100644 | ||
33 | --- a/hw/virtio/vhost-shadow-virtqueue.h | ||
34 | +++ b/hw/virtio/vhost-shadow-virtqueue.h | ||
35 | @@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp); | ||
36 | |||
37 | void vhost_svq_push_elem(VhostShadowVirtqueue *svq, | ||
38 | const VirtQueueElement *elem, uint32_t len); | ||
39 | +int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, | ||
40 | + size_t out_num, const struct iovec *in_sg, size_t in_num, | ||
41 | + VirtQueueElement *elem); | ||
42 | |||
43 | void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); | ||
44 | void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); | ||
45 | -- | ||
46 | 2.7.4 | ||
47 | |||
48 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eugenio Pérez <eperezma@redhat.com> | ||
1 | 2 | ||
3 | It allows the Shadow Control VirtQueue to wait for the device to use the | ||
4 | available buffers. | ||
5 | |||
6 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
7 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | hw/virtio/vhost-shadow-virtqueue.c | 27 +++++++++++++++++++++++++++ | ||
11 | hw/virtio/vhost-shadow-virtqueue.h | 1 + | ||
12 | 2 files changed, 28 insertions(+) | ||
13 | |||
14 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/hw/virtio/vhost-shadow-virtqueue.c | ||
17 | +++ b/hw/virtio/vhost-shadow-virtqueue.c | ||
18 | @@ -XXX,XX +XXX,XX @@ static void vhost_svq_flush(VhostShadowVirtqueue *svq, | ||
19 | } | ||
20 | |||
21 | /** | ||
22 | + * Poll the SVQ for one device used buffer. | ||
23 | + * | ||
24 | + * This function race with main event loop SVQ polling, so extra | ||
25 | + * synchronization is needed. | ||
26 | + * | ||
27 | + * Return the length written by the device. | ||
28 | + */ | ||
29 | +size_t vhost_svq_poll(VhostShadowVirtqueue *svq) | ||
30 | +{ | ||
31 | + int64_t start_us = g_get_monotonic_time(); | ||
32 | + do { | ||
33 | + uint32_t len; | ||
34 | + VirtQueueElement *elem = vhost_svq_get_buf(svq, &len); | ||
35 | + if (elem) { | ||
36 | + return len; | ||
37 | + } | ||
38 | + | ||
39 | + if (unlikely(g_get_monotonic_time() - start_us > 10e6)) { | ||
40 | + return 0; | ||
41 | + } | ||
42 | + | ||
43 | + /* Make sure we read new used_idx */ | ||
44 | + smp_rmb(); | ||
45 | + } while (true); | ||
46 | +} | ||
47 | + | ||
48 | +/** | ||
49 | * Forward used buffers. | ||
50 | * | ||
51 | * @n: hdev call event notifier, the one that device set to notify svq. | ||
52 | diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/hw/virtio/vhost-shadow-virtqueue.h | ||
55 | +++ b/hw/virtio/vhost-shadow-virtqueue.h | ||
56 | @@ -XXX,XX +XXX,XX @@ void vhost_svq_push_elem(VhostShadowVirtqueue *svq, | ||
57 | int vhost_svq_add(VhostShadowVirtqueue *svq, const struct iovec *out_sg, | ||
58 | size_t out_num, const struct iovec *in_sg, size_t in_num, | ||
59 | VirtQueueElement *elem); | ||
60 | +size_t vhost_svq_poll(VhostShadowVirtqueue *svq); | ||
61 | |||
62 | void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd); | ||
63 | void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd); | ||
64 | -- | ||
65 | 2.7.4 | ||
66 | |||
67 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eugenio Pérez <eperezma@redhat.com> | ||
1 | 2 | ||
3 | This allows external handlers to be aware of new buffers that the guest | ||
4 | places in the virtqueue. | ||
5 | |||
6 | When this callback is defined the ownership of the guest's virtqueue | ||
7 | element is transferred to the callback. This means that if the user | ||
8 | wants to forward the descriptor it needs to manually inject it. The | ||
9 | callback is also free to process the command by itself and use the | ||
10 | element with svq_push. | ||
11 | |||
12 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
13 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
14 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
15 | --- | ||
16 | hw/virtio/vhost-shadow-virtqueue.c | 14 ++++++++++++-- | ||
17 | hw/virtio/vhost-shadow-virtqueue.h | 31 ++++++++++++++++++++++++++++++- | ||
18 | hw/virtio/vhost-vdpa.c | 3 ++- | ||
19 | 3 files changed, 44 insertions(+), 4 deletions(-) | ||
20 | |||
21 | diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/hw/virtio/vhost-shadow-virtqueue.c | ||
24 | +++ b/hw/virtio/vhost-shadow-virtqueue.c | ||
25 | @@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq) | ||
26 | break; | ||
27 | } | ||
28 | |||
29 | - r = vhost_svq_add_element(svq, elem); | ||
30 | + if (svq->ops) { | ||
31 | + r = svq->ops->avail_handler(svq, elem, svq->ops_opaque); | ||
32 | + } else { | ||
33 | + r = vhost_svq_add_element(svq, elem); | ||
34 | + } | ||
35 | if (unlikely(r != 0)) { | ||
36 | if (r == -ENOSPC) { | ||
37 | /* | ||
38 | @@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq) | ||
39 | * shadow methods and file descriptors. | ||
40 | * | ||
41 | * @iova_tree: Tree to perform descriptors translations | ||
42 | + * @ops: SVQ owner callbacks | ||
43 | + * @ops_opaque: ops opaque pointer | ||
44 | * | ||
45 | * Returns the new virtqueue or NULL. | ||
46 | * | ||
47 | * In case of error, reason is reported through error_report. | ||
48 | */ | ||
49 | -VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) | ||
50 | +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, | ||
51 | + const VhostShadowVirtqueueOps *ops, | ||
52 | + void *ops_opaque) | ||
53 | { | ||
54 | g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1); | ||
55 | int r; | ||
56 | @@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree) | ||
57 | event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND); | ||
58 | event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call); | ||
59 | svq->iova_tree = iova_tree; | ||
60 | + svq->ops = ops; | ||
61 | + svq->ops_opaque = ops_opaque; | ||
62 | return g_steal_pointer(&svq); | ||
63 | |||
64 | err_init_hdev_call: | ||
65 | diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h | ||
66 | index XXXXXXX..XXXXXXX 100644 | ||
67 | --- a/hw/virtio/vhost-shadow-virtqueue.h | ||
68 | +++ b/hw/virtio/vhost-shadow-virtqueue.h | ||
69 | @@ -XXX,XX +XXX,XX @@ typedef struct SVQDescState { | ||
70 | unsigned int ndescs; | ||
71 | } SVQDescState; | ||
72 | |||
73 | +typedef struct VhostShadowVirtqueue VhostShadowVirtqueue; | ||
74 | + | ||
75 | +/** | ||
76 | + * Callback to handle an avail buffer. | ||
77 | + * | ||
78 | + * @svq: Shadow virtqueue | ||
79 | + * @elem: Element placed in the queue by the guest | ||
80 | + * @vq_callback_opaque: Opaque | ||
81 | + * | ||
82 | + * Returns 0 if the vq is running as expected. | ||
83 | + * | ||
84 | + * Note that ownership of elem is transferred to the callback. | ||
85 | + */ | ||
86 | +typedef int (*VirtQueueAvailCallback)(VhostShadowVirtqueue *svq, | ||
87 | + VirtQueueElement *elem, | ||
88 | + void *vq_callback_opaque); | ||
89 | + | ||
90 | +typedef struct VhostShadowVirtqueueOps { | ||
91 | + VirtQueueAvailCallback avail_handler; | ||
92 | +} VhostShadowVirtqueueOps; | ||
93 | + | ||
94 | /* Shadow virtqueue to relay notifications */ | ||
95 | typedef struct VhostShadowVirtqueue { | ||
96 | /* Shadow vring */ | ||
97 | @@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue { | ||
98 | */ | ||
99 | uint16_t *desc_next; | ||
100 | |||
101 | + /* Caller callbacks */ | ||
102 | + const VhostShadowVirtqueueOps *ops; | ||
103 | + | ||
104 | + /* Caller callbacks opaque */ | ||
105 | + void *ops_opaque; | ||
106 | + | ||
107 | /* Next head to expose to the device */ | ||
108 | uint16_t shadow_avail_idx; | ||
109 | |||
110 | @@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev, | ||
111 | VirtQueue *vq); | ||
112 | void vhost_svq_stop(VhostShadowVirtqueue *svq); | ||
113 | |||
114 | -VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree); | ||
115 | +VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree, | ||
116 | + const VhostShadowVirtqueueOps *ops, | ||
117 | + void *ops_opaque); | ||
118 | |||
119 | void vhost_svq_free(gpointer vq); | ||
120 | G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free); | ||
121 | diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c | ||
122 | index XXXXXXX..XXXXXXX 100644 | ||
123 | --- a/hw/virtio/vhost-vdpa.c | ||
124 | +++ b/hw/virtio/vhost-vdpa.c | ||
125 | @@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, | ||
126 | |||
127 | shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free); | ||
128 | for (unsigned n = 0; n < hdev->nvqs; ++n) { | ||
129 | - g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree); | ||
130 | + g_autoptr(VhostShadowVirtqueue) svq; | ||
131 | |||
132 | + svq = vhost_svq_new(v->iova_tree, NULL, NULL); | ||
133 | if (unlikely(!svq)) { | ||
134 | error_setg(errp, "Cannot create svq %u", n); | ||
135 | return -1; | ||
136 | -- | ||
137 | 2.7.4 | ||
138 | |||
139 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Eugenio Pérez <eperezma@redhat.com> | ||
1 | 2 | ||
3 | Shadow CVQ will copy buffers on qemu VA, so we avoid TOCTOU attacks from | ||
4 | the guest that could set a different state in qemu device model and vdpa | ||
5 | device. | ||
6 | |||
7 | To do so, it needs to be able to map these new buffers to the device. | ||
8 | |||
9 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
10 | Acked-by: Jason Wang <jasowang@redhat.com> | ||
11 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
12 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
13 | --- | ||
14 | hw/virtio/vhost-vdpa.c | 7 +++---- | ||
15 | include/hw/virtio/vhost-vdpa.h | 4 ++++ | ||
16 | 2 files changed, 7 insertions(+), 4 deletions(-) | ||
17 | |||
18 | diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/hw/virtio/vhost-vdpa.c | ||
21 | +++ b/hw/virtio/vhost-vdpa.c | ||
22 | @@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, | ||
23 | return false; | ||
24 | } | ||
25 | |||
26 | -static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, | ||
27 | - void *vaddr, bool readonly) | ||
28 | +int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, | ||
29 | + void *vaddr, bool readonly) | ||
30 | { | ||
31 | struct vhost_msg_v2 msg = {}; | ||
32 | int fd = v->device_fd; | ||
33 | @@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, | ||
34 | return ret; | ||
35 | } | ||
36 | |||
37 | -static int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, | ||
38 | - hwaddr size) | ||
39 | +int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size) | ||
40 | { | ||
41 | struct vhost_msg_v2 msg = {}; | ||
42 | int fd = v->device_fd; | ||
43 | diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/include/hw/virtio/vhost-vdpa.h | ||
46 | +++ b/include/hw/virtio/vhost-vdpa.h | ||
47 | @@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa { | ||
48 | VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; | ||
49 | } VhostVDPA; | ||
50 | |||
51 | +int vhost_vdpa_dma_map(struct vhost_vdpa *v, hwaddr iova, hwaddr size, | ||
52 | + void *vaddr, bool readonly); | ||
53 | +int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, hwaddr iova, hwaddr size); | ||
54 | + | ||
55 | #endif | ||
56 | -- | ||
57 | 2.7.4 | ||
58 | |||
59 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 3 | net/vhost-vdpa.c will need functions that are declared in |
4 | vhost-shadow-virtqueue.c, that needs functions of virtio-net.c. | ||
5 | |||
6 | Copy the vhost-vdpa-stub.c code so | ||
7 | only the constructor net_init_vhost_vdpa needs to be defined. | ||
8 | |||
9 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
4 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 10 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
5 | --- | 11 | --- |
6 | docs/colo-proxy.txt | 26 ++++++++++++++++++++++++++ | 12 | net/meson.build | 3 ++- |
7 | 1 file changed, 26 insertions(+) | 13 | net/vhost-vdpa-stub.c | 21 +++++++++++++++++++++ |
14 | 2 files changed, 23 insertions(+), 1 deletion(-) | ||
15 | create mode 100644 net/vhost-vdpa-stub.c | ||
8 | 16 | ||
9 | diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt | 17 | diff --git a/net/meson.build b/net/meson.build |
10 | index XXXXXXX..XXXXXXX 100644 | 18 | index XXXXXXX..XXXXXXX 100644 |
11 | --- a/docs/colo-proxy.txt | 19 | --- a/net/meson.build |
12 | +++ b/docs/colo-proxy.txt | 20 | +++ b/net/meson.build |
13 | @@ -XXX,XX +XXX,XX @@ Secondary(ip:3.3.3.8): | 21 | @@ -XXX,XX +XXX,XX @@ endif |
14 | -chardev socket,id=red1,host=3.3.3.3,port=9004 | 22 | softmmu_ss.add(when: 'CONFIG_POSIX', if_true: files(tap_posix)) |
15 | -object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0 | 23 | softmmu_ss.add(when: 'CONFIG_WIN32', if_true: files('tap-win32.c')) |
16 | -object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1 | 24 | if have_vhost_net_vdpa |
17 | +-object filter-rewriter,id=f3,netdev=hn0,queue=all | 25 | - softmmu_ss.add(files('vhost-vdpa.c')) |
26 | + softmmu_ss.add(when: 'CONFIG_VIRTIO_NET', if_true: files('vhost-vdpa.c'), if_false: files('vhost-vdpa-stub.c')) | ||
27 | + softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-vdpa-stub.c')) | ||
28 | endif | ||
29 | |||
30 | vmnet_files = files( | ||
31 | diff --git a/net/vhost-vdpa-stub.c b/net/vhost-vdpa-stub.c | ||
32 | new file mode 100644 | ||
33 | index XXXXXXX..XXXXXXX | ||
34 | --- /dev/null | ||
35 | +++ b/net/vhost-vdpa-stub.c | ||
36 | @@ -XXX,XX +XXX,XX @@ | ||
37 | +/* | ||
38 | + * vhost-vdpa-stub.c | ||
39 | + * | ||
40 | + * Copyright (c) 2022 Red Hat, Inc. | ||
41 | + * | ||
42 | + * This work is licensed under the terms of the GNU GPL, version 2 or later. | ||
43 | + * See the COPYING file in the top-level directory. | ||
44 | + * | ||
45 | + */ | ||
18 | + | 46 | + |
19 | +If you want to use virtio-net-pci or other driver with vnet_header: | 47 | +#include "qemu/osdep.h" |
48 | +#include "clients.h" | ||
49 | +#include "net/vhost-vdpa.h" | ||
50 | +#include "qapi/error.h" | ||
20 | + | 51 | + |
21 | +Primary(ip:3.3.3.3): | 52 | +int net_init_vhost_vdpa(const Netdev *netdev, const char *name, |
22 | +-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown | 53 | + NetClientState *peer, Error **errp) |
23 | +-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66 | 54 | +{ |
24 | +-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait | 55 | + error_setg(errp, "vhost-vdpa requires frontend driver virtio-net-*"); |
25 | +-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait | 56 | + return -1; |
26 | +-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait | 57 | +} |
27 | +-chardev socket,id=compare0-0,host=3.3.3.3,port=9001 | ||
28 | +-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait | ||
29 | +-chardev socket,id=compare_out0,host=3.3.3.3,port=9005 | ||
30 | +-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support | ||
31 | +-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out,vnet_hdr_support | ||
32 | +-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0,vnet_hdr_support | ||
33 | +-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support | ||
34 | + | ||
35 | +Secondary(ip:3.3.3.8): | ||
36 | +-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown | ||
37 | +-device e1000,netdev=hn0,mac=52:a4:00:12:78:66 | ||
38 | +-chardev socket,id=red0,host=3.3.3.3,port=9003 | ||
39 | +-chardev socket,id=red1,host=3.3.3.3,port=9004 | ||
40 | +-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0,vnet_hdr_support | ||
41 | +-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1,vnet_hdr_support | ||
42 | +-object filter-rewriter,id=f3,netdev=hn0,queue=all,vnet_hdr_support | ||
43 | |||
44 | Note: | ||
45 | a.COLO-proxy must work with COLO-frame and Block-replication. | ||
46 | -- | 58 | -- |
47 | 2.7.4 | 59 | 2.7.4 |
48 | 60 | ||
49 | 61 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We add the vnet_hdr_support option for filter-rewriter, default is disabled. | 3 | Do a simple forwarding of CVQ buffers, the same work SVQ could do but |
4 | If you use virtio-net-pci or other driver needs vnet_hdr, please enable it. | 4 | through callbacks. No functional change intended. |
5 | You can use it for example: | ||
6 | -object filter-rewriter,id=rew0,netdev=hn0,queue=all,vnet_hdr_support | ||
7 | 5 | ||
8 | We get the vnet_hdr_len from NetClientState that make us | 6 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> |
9 | parse net packet correctly. | 7 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> |
10 | |||
11 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | ||
12 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 8 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
13 | --- | 9 | --- |
14 | net/filter-rewriter.c | 37 ++++++++++++++++++++++++++++++++++++- | 10 | hw/virtio/vhost-vdpa.c | 3 ++- |
15 | qemu-options.hx | 4 ++-- | 11 | include/hw/virtio/vhost-vdpa.h | 3 +++ |
16 | 2 files changed, 38 insertions(+), 3 deletions(-) | 12 | net/vhost-vdpa.c | 58 ++++++++++++++++++++++++++++++++++++++++++ |
13 | 3 files changed, 63 insertions(+), 1 deletion(-) | ||
17 | 14 | ||
18 | diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c | 15 | diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c |
19 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
20 | --- a/net/filter-rewriter.c | 17 | --- a/hw/virtio/vhost-vdpa.c |
21 | +++ b/net/filter-rewriter.c | 18 | +++ b/hw/virtio/vhost-vdpa.c |
19 | @@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v, | ||
20 | for (unsigned n = 0; n < hdev->nvqs; ++n) { | ||
21 | g_autoptr(VhostShadowVirtqueue) svq; | ||
22 | |||
23 | - svq = vhost_svq_new(v->iova_tree, NULL, NULL); | ||
24 | + svq = vhost_svq_new(v->iova_tree, v->shadow_vq_ops, | ||
25 | + v->shadow_vq_ops_opaque); | ||
26 | if (unlikely(!svq)) { | ||
27 | error_setg(errp, "Cannot create svq %u", n); | ||
28 | return -1; | ||
29 | diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/include/hw/virtio/vhost-vdpa.h | ||
32 | +++ b/include/hw/virtio/vhost-vdpa.h | ||
22 | @@ -XXX,XX +XXX,XX @@ | 33 | @@ -XXX,XX +XXX,XX @@ |
23 | #include "qemu-common.h" | 34 | #include <gmodule.h> |
35 | |||
36 | #include "hw/virtio/vhost-iova-tree.h" | ||
37 | +#include "hw/virtio/vhost-shadow-virtqueue.h" | ||
38 | #include "hw/virtio/virtio.h" | ||
39 | #include "standard-headers/linux/vhost_types.h" | ||
40 | |||
41 | @@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa { | ||
42 | /* IOVA mapping used by the Shadow Virtqueue */ | ||
43 | VhostIOVATree *iova_tree; | ||
44 | GPtrArray *shadow_vqs; | ||
45 | + const VhostShadowVirtqueueOps *shadow_vq_ops; | ||
46 | + void *shadow_vq_ops_opaque; | ||
47 | struct vhost_dev *dev; | ||
48 | VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; | ||
49 | } VhostVDPA; | ||
50 | diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/net/vhost-vdpa.c | ||
53 | +++ b/net/vhost-vdpa.c | ||
54 | @@ -XXX,XX +XXX,XX @@ | ||
55 | |||
56 | #include "qemu/osdep.h" | ||
57 | #include "clients.h" | ||
58 | +#include "hw/virtio/virtio-net.h" | ||
59 | #include "net/vhost_net.h" | ||
60 | #include "net/vhost-vdpa.h" | ||
61 | #include "hw/virtio/vhost-vdpa.h" | ||
62 | #include "qemu/config-file.h" | ||
63 | #include "qemu/error-report.h" | ||
64 | +#include "qemu/log.h" | ||
65 | +#include "qemu/memalign.h" | ||
66 | #include "qemu/option.h" | ||
24 | #include "qapi/error.h" | 67 | #include "qapi/error.h" |
25 | #include "qapi/qmp/qerror.h" | 68 | #include <linux/vhost.h> |
26 | +#include "qemu/error-report.h" | 69 | @@ -XXX,XX +XXX,XX @@ static NetClientInfo net_vhost_vdpa_info = { |
27 | #include "qapi-visit.h" | 70 | .check_peer_type = vhost_vdpa_check_peer_type, |
28 | #include "qom/object.h" | 71 | }; |
29 | #include "qemu/main-loop.h" | 72 | |
30 | @@ -XXX,XX +XXX,XX @@ typedef struct RewriterState { | 73 | +/** |
31 | NetQueue *incoming_queue; | 74 | + * Forward buffer for the moment. |
32 | /* hashtable to save connection */ | 75 | + */ |
33 | GHashTable *connection_track_table; | 76 | +static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, |
34 | + bool vnet_hdr; | 77 | + VirtQueueElement *elem, |
35 | } RewriterState; | 78 | + void *opaque) |
36 | 79 | +{ | |
37 | static void filter_rewriter_flush(NetFilterState *nf) | 80 | + unsigned int n = elem->out_num + elem->in_num; |
38 | @@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, | 81 | + g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); |
39 | ConnectionKey key; | 82 | + size_t in_len, dev_written; |
40 | Packet *pkt; | 83 | + virtio_net_ctrl_ack status = VIRTIO_NET_ERR; |
41 | ssize_t size = iov_size(iov, iovcnt); | 84 | + int r; |
42 | + ssize_t vnet_hdr_len = 0; | ||
43 | char *buf = g_malloc0(size); | ||
44 | |||
45 | iov_to_buf(iov, iovcnt, 0, buf, size); | ||
46 | - pkt = packet_new(buf, size, 0); | ||
47 | + | 85 | + |
48 | + if (s->vnet_hdr) { | 86 | + memcpy(dev_buffers, elem->out_sg, elem->out_num); |
49 | + vnet_hdr_len = nf->netdev->vnet_hdr_len; | 87 | + memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); |
88 | + | ||
89 | + r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], | ||
90 | + elem->in_num, elem); | ||
91 | + if (unlikely(r != 0)) { | ||
92 | + if (unlikely(r == -ENOSPC)) { | ||
93 | + qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", | ||
94 | + __func__); | ||
95 | + } | ||
96 | + goto out; | ||
50 | + } | 97 | + } |
51 | + | 98 | + |
52 | + pkt = packet_new(buf, size, vnet_hdr_len); | 99 | + /* |
53 | g_free(buf); | 100 | + * We can poll here since we've had BQL from the time we sent the |
54 | 101 | + * descriptor. Also, we need to take the answer before SVQ pulls by itself, | |
55 | /* | 102 | + * when BQL is released |
56 | @@ -XXX,XX +XXX,XX @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp) | 103 | + */ |
57 | s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); | 104 | + dev_written = vhost_svq_poll(svq); |
58 | } | 105 | + if (unlikely(dev_written < sizeof(status))) { |
59 | 106 | + error_report("Insufficient written data (%zu)", dev_written); | |
60 | +static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp) | 107 | + } |
61 | +{ | ||
62 | + RewriterState *s = FILTER_COLO_REWRITER(obj); | ||
63 | + | 108 | + |
64 | + return s->vnet_hdr; | 109 | +out: |
110 | + in_len = iov_from_buf(elem->in_sg, elem->in_num, 0, &status, | ||
111 | + sizeof(status)); | ||
112 | + if (unlikely(in_len < sizeof(status))) { | ||
113 | + error_report("Bad device CVQ written length"); | ||
114 | + } | ||
115 | + vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); | ||
116 | + g_free(elem); | ||
117 | + return r; | ||
65 | +} | 118 | +} |
66 | + | 119 | + |
67 | +static void filter_rewriter_set_vnet_hdr(Object *obj, | 120 | +static const VhostShadowVirtqueueOps vhost_vdpa_net_svq_ops = { |
68 | + bool value, | 121 | + .avail_handler = vhost_vdpa_net_handle_ctrl_avail, |
69 | + Error **errp) | 122 | +}; |
70 | +{ | ||
71 | + RewriterState *s = FILTER_COLO_REWRITER(obj); | ||
72 | + | 123 | + |
73 | + s->vnet_hdr = value; | 124 | static NetClientState *net_vhost_vdpa_init(NetClientState *peer, |
74 | +} | 125 | const char *device, |
75 | + | 126 | const char *name, |
76 | +static void filter_rewriter_init(Object *obj) | 127 | @@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, |
77 | +{ | 128 | |
78 | + RewriterState *s = FILTER_COLO_REWRITER(obj); | 129 | s->vhost_vdpa.device_fd = vdpa_device_fd; |
79 | + | 130 | s->vhost_vdpa.index = queue_pair_index; |
80 | + s->vnet_hdr = false; | 131 | + if (!is_datapath) { |
81 | + object_property_add_bool(obj, "vnet_hdr_support", | 132 | + s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; |
82 | + filter_rewriter_get_vnet_hdr, | 133 | + s->vhost_vdpa.shadow_vq_ops_opaque = s; |
83 | + filter_rewriter_set_vnet_hdr, NULL); | 134 | + } |
84 | +} | 135 | ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); |
85 | + | 136 | if (ret) { |
86 | static void colo_rewriter_class_init(ObjectClass *oc, void *data) | 137 | qemu_del_net_client(nc); |
87 | { | ||
88 | NetFilterClass *nfc = NETFILTER_CLASS(oc); | ||
89 | @@ -XXX,XX +XXX,XX @@ static const TypeInfo colo_rewriter_info = { | ||
90 | .name = TYPE_FILTER_REWRITER, | ||
91 | .parent = TYPE_NETFILTER, | ||
92 | .class_init = colo_rewriter_class_init, | ||
93 | + .instance_init = filter_rewriter_init, | ||
94 | .instance_size = sizeof(RewriterState), | ||
95 | }; | ||
96 | |||
97 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
98 | index XXXXXXX..XXXXXXX 100644 | ||
99 | --- a/qemu-options.hx | ||
100 | +++ b/qemu-options.hx | ||
101 | @@ -XXX,XX +XXX,XX @@ Create a filter-redirector we need to differ outdev id from indev id, id can not | ||
102 | be the same. we can just use indev or outdev, but at least one of indev or outdev | ||
103 | need to be specified. | ||
104 | |||
105 | -@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid}[,queue=@var{all|rx|tx}] | ||
106 | +@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support] | ||
107 | |||
108 | Filter-rewriter is a part of COLO project.It will rewrite tcp packet to | ||
109 | secondary from primary to keep secondary tcp connection,and rewrite | ||
110 | tcp packet to primary from secondary make tcp packet can be handled by | ||
111 | -client. | ||
112 | +client.if it has the vnet_hdr_support flag, we can parse packet with vnet header. | ||
113 | |||
114 | usage: | ||
115 | colo secondary: | ||
116 | -- | 138 | -- |
117 | 2.7.4 | 139 | 2.7.4 |
118 | 140 | ||
119 | 141 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | Add vnet_hdr_len arguments in NetClientState | 3 | Introduce the control virtqueue support for vDPA shadow virtqueue. This |
4 | that make other module get real vnet_hdr_len easily. | 4 | is needed for advanced networking features like rx filtering. |
5 | 5 | ||
6 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 6 | Virtio-net control VQ copies the descriptors to qemu's VA, so we avoid |
7 | TOCTOU with the guest's or device's memory every time there is a device | ||
8 | model change. Otherwise, the guest could change the memory content in | ||
9 | the time between qemu and the device read it. | ||
10 | |||
11 | To demonstrate command handling, VIRTIO_NET_F_CTRL_MACADDR is | ||
12 | implemented. If the virtio-net driver changes MAC the virtio-net device | ||
13 | model will be updated with the new one, and a rx filtering change event | ||
14 | will be raised. | ||
15 | |||
16 | More cvq commands could be added here straightforwardly but they have | ||
17 | not been tested. | ||
18 | |||
19 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> | ||
20 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 21 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
8 | --- | 22 | --- |
9 | include/net/net.h | 1 + | 23 | net/vhost-vdpa.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++++++++--- |
10 | net/net.c | 1 + | 24 | 1 file changed, 205 insertions(+), 8 deletions(-) |
11 | 2 files changed, 2 insertions(+) | 25 | |
12 | 26 | diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c | |
13 | diff --git a/include/net/net.h b/include/net/net.h | ||
14 | index XXXXXXX..XXXXXXX 100644 | 27 | index XXXXXXX..XXXXXXX 100644 |
15 | --- a/include/net/net.h | 28 | --- a/net/vhost-vdpa.c |
16 | +++ b/include/net/net.h | 29 | +++ b/net/vhost-vdpa.c |
17 | @@ -XXX,XX +XXX,XX @@ struct NetClientState { | 30 | @@ -XXX,XX +XXX,XX @@ typedef struct VhostVDPAState { |
18 | unsigned int queue_index; | 31 | NetClientState nc; |
19 | unsigned rxfilter_notify_enabled:1; | 32 | struct vhost_vdpa vhost_vdpa; |
20 | int vring_enable; | 33 | VHostNetState *vhost_net; |
21 | + int vnet_hdr_len; | 34 | + |
22 | QTAILQ_HEAD(NetFilterHead, NetFilterState) filters; | 35 | + /* Control commands shadow buffers */ |
36 | + void *cvq_cmd_out_buffer, *cvq_cmd_in_buffer; | ||
37 | bool started; | ||
38 | } VhostVDPAState; | ||
39 | |||
40 | @@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_cleanup(NetClientState *nc) | ||
41 | { | ||
42 | VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); | ||
43 | |||
44 | + qemu_vfree(s->cvq_cmd_out_buffer); | ||
45 | + qemu_vfree(s->cvq_cmd_in_buffer); | ||
46 | if (s->vhost_net) { | ||
47 | vhost_net_cleanup(s->vhost_net); | ||
48 | g_free(s->vhost_net); | ||
49 | @@ -XXX,XX +XXX,XX @@ static NetClientInfo net_vhost_vdpa_info = { | ||
50 | .check_peer_type = vhost_vdpa_check_peer_type, | ||
23 | }; | 51 | }; |
24 | 52 | ||
25 | diff --git a/net/net.c b/net/net.c | 53 | +static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) |
26 | index XXXXXXX..XXXXXXX 100644 | 54 | +{ |
27 | --- a/net/net.c | 55 | + VhostIOVATree *tree = v->iova_tree; |
28 | +++ b/net/net.c | 56 | + DMAMap needle = { |
29 | @@ -XXX,XX +XXX,XX @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len) | 57 | + /* |
30 | return; | 58 | + * No need to specify size or to look for more translations since |
59 | + * this contiguous chunk was allocated by us. | ||
60 | + */ | ||
61 | + .translated_addr = (hwaddr)(uintptr_t)addr, | ||
62 | + }; | ||
63 | + const DMAMap *map = vhost_iova_tree_find_iova(tree, &needle); | ||
64 | + int r; | ||
65 | + | ||
66 | + if (unlikely(!map)) { | ||
67 | + error_report("Cannot locate expected map"); | ||
68 | + return; | ||
69 | + } | ||
70 | + | ||
71 | + r = vhost_vdpa_dma_unmap(v, map->iova, map->size + 1); | ||
72 | + if (unlikely(r != 0)) { | ||
73 | + error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); | ||
74 | + } | ||
75 | + | ||
76 | + vhost_iova_tree_remove(tree, map); | ||
77 | +} | ||
78 | + | ||
79 | +static size_t vhost_vdpa_net_cvq_cmd_len(void) | ||
80 | +{ | ||
81 | + /* | ||
82 | + * MAC_TABLE_SET is the ctrl command that produces the longer out buffer. | ||
83 | + * In buffer is always 1 byte, so it should fit here | ||
84 | + */ | ||
85 | + return sizeof(struct virtio_net_ctrl_hdr) + | ||
86 | + 2 * sizeof(struct virtio_net_ctrl_mac) + | ||
87 | + MAC_TABLE_ENTRIES * ETH_ALEN; | ||
88 | +} | ||
89 | + | ||
90 | +static size_t vhost_vdpa_net_cvq_cmd_page_len(void) | ||
91 | +{ | ||
92 | + return ROUND_UP(vhost_vdpa_net_cvq_cmd_len(), qemu_real_host_page_size()); | ||
93 | +} | ||
94 | + | ||
95 | +/** Copy and map a guest buffer. */ | ||
96 | +static bool vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, | ||
97 | + const struct iovec *out_data, | ||
98 | + size_t out_num, size_t data_len, void *buf, | ||
99 | + size_t *written, bool write) | ||
100 | +{ | ||
101 | + DMAMap map = {}; | ||
102 | + int r; | ||
103 | + | ||
104 | + if (unlikely(!data_len)) { | ||
105 | + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid legnth of %s buffer\n", | ||
106 | + __func__, write ? "in" : "out"); | ||
107 | + return false; | ||
108 | + } | ||
109 | + | ||
110 | + *written = iov_to_buf(out_data, out_num, 0, buf, data_len); | ||
111 | + map.translated_addr = (hwaddr)(uintptr_t)buf; | ||
112 | + map.size = vhost_vdpa_net_cvq_cmd_page_len() - 1; | ||
113 | + map.perm = write ? IOMMU_RW : IOMMU_RO, | ||
114 | + r = vhost_iova_tree_map_alloc(v->iova_tree, &map); | ||
115 | + if (unlikely(r != IOVA_OK)) { | ||
116 | + error_report("Cannot map injected element"); | ||
117 | + return false; | ||
118 | + } | ||
119 | + | ||
120 | + r = vhost_vdpa_dma_map(v, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, | ||
121 | + !write); | ||
122 | + if (unlikely(r < 0)) { | ||
123 | + goto dma_map_err; | ||
124 | + } | ||
125 | + | ||
126 | + return true; | ||
127 | + | ||
128 | +dma_map_err: | ||
129 | + vhost_iova_tree_remove(v->iova_tree, &map); | ||
130 | + return false; | ||
131 | +} | ||
132 | + | ||
133 | /** | ||
134 | - * Forward buffer for the moment. | ||
135 | + * Copy the guest element into a dedicated buffer suitable to be sent to NIC | ||
136 | + * | ||
137 | + * @iov: [0] is the out buffer, [1] is the in one | ||
138 | + */ | ||
139 | +static bool vhost_vdpa_net_cvq_map_elem(VhostVDPAState *s, | ||
140 | + VirtQueueElement *elem, | ||
141 | + struct iovec *iov) | ||
142 | +{ | ||
143 | + size_t in_copied; | ||
144 | + bool ok; | ||
145 | + | ||
146 | + iov[0].iov_base = s->cvq_cmd_out_buffer; | ||
147 | + ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, elem->out_sg, elem->out_num, | ||
148 | + vhost_vdpa_net_cvq_cmd_len(), iov[0].iov_base, | ||
149 | + &iov[0].iov_len, false); | ||
150 | + if (unlikely(!ok)) { | ||
151 | + return false; | ||
152 | + } | ||
153 | + | ||
154 | + iov[1].iov_base = s->cvq_cmd_in_buffer; | ||
155 | + ok = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, NULL, 0, | ||
156 | + sizeof(virtio_net_ctrl_ack), iov[1].iov_base, | ||
157 | + &in_copied, true); | ||
158 | + if (unlikely(!ok)) { | ||
159 | + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer); | ||
160 | + return false; | ||
161 | + } | ||
162 | + | ||
163 | + iov[1].iov_len = sizeof(virtio_net_ctrl_ack); | ||
164 | + return true; | ||
165 | +} | ||
166 | + | ||
167 | +/** | ||
168 | + * Do not forward commands not supported by SVQ. Otherwise, the device could | ||
169 | + * accept it and qemu would not know how to update the device model. | ||
170 | + */ | ||
171 | +static bool vhost_vdpa_net_cvq_validate_cmd(const struct iovec *out, | ||
172 | + size_t out_num) | ||
173 | +{ | ||
174 | + struct virtio_net_ctrl_hdr ctrl; | ||
175 | + size_t n; | ||
176 | + | ||
177 | + n = iov_to_buf(out, out_num, 0, &ctrl, sizeof(ctrl)); | ||
178 | + if (unlikely(n < sizeof(ctrl))) { | ||
179 | + qemu_log_mask(LOG_GUEST_ERROR, | ||
180 | + "%s: invalid legnth of out buffer %zu\n", __func__, n); | ||
181 | + return false; | ||
182 | + } | ||
183 | + | ||
184 | + switch (ctrl.class) { | ||
185 | + case VIRTIO_NET_CTRL_MAC: | ||
186 | + switch (ctrl.cmd) { | ||
187 | + case VIRTIO_NET_CTRL_MAC_ADDR_SET: | ||
188 | + return true; | ||
189 | + default: | ||
190 | + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid mac cmd %u\n", | ||
191 | + __func__, ctrl.cmd); | ||
192 | + }; | ||
193 | + break; | ||
194 | + default: | ||
195 | + qemu_log_mask(LOG_GUEST_ERROR, "%s: invalid control class %u\n", | ||
196 | + __func__, ctrl.class); | ||
197 | + }; | ||
198 | + | ||
199 | + return false; | ||
200 | +} | ||
201 | + | ||
202 | +/** | ||
203 | + * Validate and copy control virtqueue commands. | ||
204 | + * | ||
205 | + * Following QEMU guidelines, we offer a copy of the buffers to the device to | ||
206 | + * prevent TOCTOU bugs. | ||
207 | */ | ||
208 | static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, | ||
209 | VirtQueueElement *elem, | ||
210 | void *opaque) | ||
211 | { | ||
212 | - unsigned int n = elem->out_num + elem->in_num; | ||
213 | - g_autofree struct iovec *dev_buffers = g_new(struct iovec, n); | ||
214 | + VhostVDPAState *s = opaque; | ||
215 | size_t in_len, dev_written; | ||
216 | virtio_net_ctrl_ack status = VIRTIO_NET_ERR; | ||
217 | - int r; | ||
218 | + /* out and in buffers sent to the device */ | ||
219 | + struct iovec dev_buffers[2] = { | ||
220 | + { .iov_base = s->cvq_cmd_out_buffer }, | ||
221 | + { .iov_base = s->cvq_cmd_in_buffer }, | ||
222 | + }; | ||
223 | + /* in buffer used for device model */ | ||
224 | + const struct iovec in = { | ||
225 | + .iov_base = &status, | ||
226 | + .iov_len = sizeof(status), | ||
227 | + }; | ||
228 | + int r = -EINVAL; | ||
229 | + bool ok; | ||
230 | + | ||
231 | + ok = vhost_vdpa_net_cvq_map_elem(s, elem, dev_buffers); | ||
232 | + if (unlikely(!ok)) { | ||
233 | + goto out; | ||
234 | + } | ||
235 | |||
236 | - memcpy(dev_buffers, elem->out_sg, elem->out_num); | ||
237 | - memcpy(dev_buffers + elem->out_num, elem->in_sg, elem->in_num); | ||
238 | + ok = vhost_vdpa_net_cvq_validate_cmd(&dev_buffers[0], 1); | ||
239 | + if (unlikely(!ok)) { | ||
240 | + goto out; | ||
241 | + } | ||
242 | |||
243 | - r = vhost_svq_add(svq, &dev_buffers[0], elem->out_num, &dev_buffers[1], | ||
244 | - elem->in_num, elem); | ||
245 | + r = vhost_svq_add(svq, &dev_buffers[0], 1, &dev_buffers[1], 1, elem); | ||
246 | if (unlikely(r != 0)) { | ||
247 | if (unlikely(r == -ENOSPC)) { | ||
248 | qemu_log_mask(LOG_GUEST_ERROR, "%s: No space on device queue\n", | ||
249 | @@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, | ||
250 | dev_written = vhost_svq_poll(svq); | ||
251 | if (unlikely(dev_written < sizeof(status))) { | ||
252 | error_report("Insufficient written data (%zu)", dev_written); | ||
253 | + goto out; | ||
254 | + } | ||
255 | + | ||
256 | + memcpy(&status, dev_buffers[1].iov_base, sizeof(status)); | ||
257 | + if (status != VIRTIO_NET_OK) { | ||
258 | + goto out; | ||
259 | + } | ||
260 | + | ||
261 | + status = VIRTIO_NET_ERR; | ||
262 | + virtio_net_handle_ctrl_iov(svq->vdev, &in, 1, dev_buffers, 1); | ||
263 | + if (status != VIRTIO_NET_OK) { | ||
264 | + error_report("Bad CVQ processing in model"); | ||
31 | } | 265 | } |
32 | 266 | ||
33 | + nc->vnet_hdr_len = len; | 267 | out: |
34 | nc->info->set_vnet_hdr_len(nc, len); | 268 | @@ -XXX,XX +XXX,XX @@ out: |
269 | } | ||
270 | vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); | ||
271 | g_free(elem); | ||
272 | + if (dev_buffers[0].iov_base) { | ||
273 | + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[0].iov_base); | ||
274 | + } | ||
275 | + if (dev_buffers[1].iov_base) { | ||
276 | + vhost_vdpa_cvq_unmap_buf(&s->vhost_vdpa, dev_buffers[1].iov_base); | ||
277 | + } | ||
278 | return r; | ||
35 | } | 279 | } |
36 | 280 | ||
281 | @@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, | ||
282 | s->vhost_vdpa.device_fd = vdpa_device_fd; | ||
283 | s->vhost_vdpa.index = queue_pair_index; | ||
284 | if (!is_datapath) { | ||
285 | + s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), | ||
286 | + vhost_vdpa_net_cvq_cmd_page_len()); | ||
287 | + memset(s->cvq_cmd_out_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); | ||
288 | + s->cvq_cmd_in_buffer = qemu_memalign(qemu_real_host_page_size(), | ||
289 | + vhost_vdpa_net_cvq_cmd_page_len()); | ||
290 | + memset(s->cvq_cmd_in_buffer, 0, vhost_vdpa_net_cvq_cmd_page_len()); | ||
291 | + | ||
292 | s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; | ||
293 | s->vhost_vdpa.shadow_vq_ops_opaque = s; | ||
294 | } | ||
37 | -- | 295 | -- |
38 | 2.7.4 | 296 | 2.7.4 |
39 | 297 | ||
40 | 298 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | COLO-Proxy just focus on packet payload, so we skip vnet header. | 3 | To know the device features is needed for CVQ SVQ, so SVQ knows if it |
4 | can handle all commands or not. Extract from | ||
5 | vhost_vdpa_get_max_queue_pairs so we can reuse it. | ||
4 | 6 | ||
5 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 7 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> |
8 | Acked-by: Jason Wang <jasowang@redhat.com> | ||
9 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
6 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 10 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
7 | --- | 11 | --- |
8 | net/colo-compare.c | 8 ++++++-- | 12 | net/vhost-vdpa.c | 30 ++++++++++++++++++++---------- |
9 | 1 file changed, 6 insertions(+), 2 deletions(-) | 13 | 1 file changed, 20 insertions(+), 10 deletions(-) |
10 | 14 | ||
11 | diff --git a/net/colo-compare.c b/net/colo-compare.c | 15 | diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c |
12 | index XXXXXXX..XXXXXXX 100644 | 16 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/net/colo-compare.c | 17 | --- a/net/vhost-vdpa.c |
14 | +++ b/net/colo-compare.c | 18 | +++ b/net/vhost-vdpa.c |
15 | @@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt, Packet *spkt, int offset) | 19 | @@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, |
16 | sec_ip_src, sec_ip_dst); | 20 | return nc; |
21 | } | ||
22 | |||
23 | -static int vhost_vdpa_get_max_queue_pairs(int fd, int *has_cvq, Error **errp) | ||
24 | +static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) | ||
25 | +{ | ||
26 | + int ret = ioctl(fd, VHOST_GET_FEATURES, features); | ||
27 | + if (unlikely(ret < 0)) { | ||
28 | + error_setg_errno(errp, errno, | ||
29 | + "Fail to query features from vhost-vDPA device"); | ||
30 | + } | ||
31 | + return ret; | ||
32 | +} | ||
33 | + | ||
34 | +static int vhost_vdpa_get_max_queue_pairs(int fd, uint64_t features, | ||
35 | + int *has_cvq, Error **errp) | ||
36 | { | ||
37 | unsigned long config_size = offsetof(struct vhost_vdpa_config, buf); | ||
38 | g_autofree struct vhost_vdpa_config *config = NULL; | ||
39 | __virtio16 *max_queue_pairs; | ||
40 | - uint64_t features; | ||
41 | int ret; | ||
42 | |||
43 | - ret = ioctl(fd, VHOST_GET_FEATURES, &features); | ||
44 | - if (ret) { | ||
45 | - error_setg(errp, "Fail to query features from vhost-vDPA device"); | ||
46 | - return ret; | ||
47 | - } | ||
48 | - | ||
49 | if (features & (1 << VIRTIO_NET_F_CTRL_VQ)) { | ||
50 | *has_cvq = 1; | ||
51 | } else { | ||
52 | @@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, | ||
53 | NetClientState *peer, Error **errp) | ||
54 | { | ||
55 | const NetdevVhostVDPAOptions *opts; | ||
56 | + uint64_t features; | ||
57 | int vdpa_device_fd; | ||
58 | g_autofree NetClientState **ncs = NULL; | ||
59 | NetClientState *nc; | ||
60 | - int queue_pairs, i, has_cvq = 0; | ||
61 | + int queue_pairs, r, i, has_cvq = 0; | ||
62 | |||
63 | assert(netdev->type == NET_CLIENT_DRIVER_VHOST_VDPA); | ||
64 | opts = &netdev->u.vhost_vdpa; | ||
65 | @@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, | ||
66 | return -errno; | ||
17 | } | 67 | } |
18 | 68 | ||
19 | + offset = ppkt->vnet_hdr_len + offset; | 69 | - queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, |
70 | + r = vhost_vdpa_get_features(vdpa_device_fd, &features, errp); | ||
71 | + if (unlikely(r < 0)) { | ||
72 | + return r; | ||
73 | + } | ||
20 | + | 74 | + |
21 | if (ppkt->size == spkt->size) { | 75 | + queue_pairs = vhost_vdpa_get_max_queue_pairs(vdpa_device_fd, features, |
22 | - return memcmp(ppkt->data + offset, spkt->data + offset, | 76 | &has_cvq, errp); |
23 | + return memcmp(ppkt->data + offset, | 77 | if (queue_pairs < 0) { |
24 | + spkt->data + offset, | 78 | qemu_close(vdpa_device_fd); |
25 | spkt->size - offset); | ||
26 | } else { | ||
27 | trace_colo_compare_main("Net packet size are not the same"); | ||
28 | @@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) | ||
29 | */ | ||
30 | if (ptcp->th_off > 5) { | ||
31 | ptrdiff_t tcp_offset; | ||
32 | + | ||
33 | tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data | ||
34 | - + (ptcp->th_off * 4); | ||
35 | + + (ptcp->th_off * 4) - ppkt->vnet_hdr_len; | ||
36 | res = colo_packet_compare_common(ppkt, spkt, tcp_offset); | ||
37 | } else if (ptcp->th_sum == stcp->th_sum) { | ||
38 | res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN); | ||
39 | -- | 79 | -- |
40 | 2.7.4 | 80 | 2.7.4 |
41 | 81 | ||
42 | 82 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We add the vnet_hdr_support option for colo-compare, default is disabled. | 3 | Since the vhost-vdpa device is exposing _F_LOG, adding a migration blocker if |
4 | If you use virtio-net-pci or other driver needs vnet_hdr, please enable it. | 4 | it uses CVQ. |
5 | You can use it for example: | ||
6 | -object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support | ||
7 | 5 | ||
8 | COLO-compare can get vnet header length from filter, | 6 | However, qemu is able to migrate simple devices with no CVQ as long as |
9 | Add vnet_hdr_len to struct packet and output packet with | 7 | they use SVQ. To allow it, add a placeholder error to vhost_vdpa, and |
10 | the vnet_hdr_len. | 8 | only add to vhost_dev when used. vhost_dev machinery place the migration |
9 | blocker if needed. | ||
11 | 10 | ||
12 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 11 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> |
12 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
13 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 13 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
14 | --- | 14 | --- |
15 | net/colo-compare.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++------- | 15 | hw/virtio/vhost-vdpa.c | 15 +++++++++++++++ |
16 | qemu-options.hx | 4 ++-- | 16 | include/hw/virtio/vhost-vdpa.h | 1 + |
17 | 2 files changed, 55 insertions(+), 9 deletions(-) | 17 | 2 files changed, 16 insertions(+) |
18 | 18 | ||
19 | diff --git a/net/colo-compare.c b/net/colo-compare.c | 19 | diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c |
20 | index XXXXXXX..XXXXXXX 100644 | 20 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/net/colo-compare.c | 21 | --- a/hw/virtio/vhost-vdpa.c |
22 | +++ b/net/colo-compare.c | 22 | +++ b/hw/virtio/vhost-vdpa.c |
23 | @@ -XXX,XX +XXX,XX @@ typedef struct CompareState { | 23 | @@ -XXX,XX +XXX,XX @@ |
24 | CharBackend chr_out; | 24 | #include "hw/virtio/vhost-shadow-virtqueue.h" |
25 | SocketReadState pri_rs; | 25 | #include "hw/virtio/vhost-vdpa.h" |
26 | SocketReadState sec_rs; | 26 | #include "exec/address-spaces.h" |
27 | + bool vnet_hdr; | 27 | +#include "migration/blocker.h" |
28 | 28 | #include "qemu/cutils.h" | |
29 | /* connection list: the connections belonged to this NIC could be found | 29 | #include "qemu/main-loop.h" |
30 | * in this list. | 30 | #include "cpu.h" |
31 | @@ -XXX,XX +XXX,XX @@ enum { | 31 | @@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) |
32 | 32 | return true; | |
33 | static int compare_chr_send(CompareState *s, | ||
34 | const uint8_t *buf, | ||
35 | - uint32_t size); | ||
36 | + uint32_t size, | ||
37 | + uint32_t vnet_hdr_len); | ||
38 | |||
39 | static gint seq_sorter(Packet *a, Packet *b, gpointer data) | ||
40 | { | ||
41 | @@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data) | ||
42 | } | ||
43 | |||
44 | if (result) { | ||
45 | - ret = compare_chr_send(s, pkt->data, pkt->size); | ||
46 | + ret = compare_chr_send(s, | ||
47 | + pkt->data, | ||
48 | + pkt->size, | ||
49 | + pkt->vnet_hdr_len); | ||
50 | if (ret < 0) { | ||
51 | error_report("colo_send_primary_packet failed"); | ||
52 | } | ||
53 | @@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data) | ||
54 | |||
55 | static int compare_chr_send(CompareState *s, | ||
56 | const uint8_t *buf, | ||
57 | - uint32_t size) | ||
58 | + uint32_t size, | ||
59 | + uint32_t vnet_hdr_len) | ||
60 | { | ||
61 | int ret = 0; | ||
62 | uint32_t len = htonl(size); | ||
63 | @@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CompareState *s, | ||
64 | goto err; | ||
65 | } | 33 | } |
66 | 34 | ||
67 | + if (s->vnet_hdr) { | 35 | + if (v->migration_blocker) { |
68 | + /* | 36 | + int r = migrate_add_blocker(v->migration_blocker, &err); |
69 | + * We send vnet header len make other module(like filter-redirector) | 37 | + if (unlikely(r < 0)) { |
70 | + * know how to parse net packet correctly. | 38 | + return false; |
71 | + */ | ||
72 | + len = htonl(vnet_hdr_len); | ||
73 | + ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len)); | ||
74 | + if (ret != sizeof(len)) { | ||
75 | + goto err; | ||
76 | + } | 39 | + } |
77 | + } | 40 | + } |
78 | + | 41 | + |
79 | ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size); | 42 | for (i = 0; i < v->shadow_vqs->len; ++i) { |
80 | if (ret != size) { | 43 | VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i); |
81 | goto err; | 44 | VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i); |
82 | @@ -XXX,XX +XXX,XX @@ static void compare_set_outdev(Object *obj, const char *value, Error **errp) | 45 | @@ -XXX,XX +XXX,XX @@ err: |
83 | s->outdev = g_strdup(value); | 46 | vhost_svq_stop(svq); |
47 | } | ||
48 | |||
49 | + if (v->migration_blocker) { | ||
50 | + migrate_del_blocker(v->migration_blocker); | ||
51 | + } | ||
52 | + | ||
53 | return false; | ||
84 | } | 54 | } |
85 | 55 | ||
86 | +static bool compare_get_vnet_hdr(Object *obj, Error **errp) | 56 | @@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev) |
87 | +{ | 57 | } |
88 | + CompareState *s = COLO_COMPARE(obj); | ||
89 | + | ||
90 | + return s->vnet_hdr; | ||
91 | +} | ||
92 | + | ||
93 | +static void compare_set_vnet_hdr(Object *obj, | ||
94 | + bool value, | ||
95 | + Error **errp) | ||
96 | +{ | ||
97 | + CompareState *s = COLO_COMPARE(obj); | ||
98 | + | ||
99 | + s->vnet_hdr = value; | ||
100 | +} | ||
101 | + | ||
102 | static void compare_pri_rs_finalize(SocketReadState *pri_rs) | ||
103 | { | ||
104 | CompareState *s = container_of(pri_rs, CompareState, pri_rs); | ||
105 | |||
106 | if (packet_enqueue(s, PRIMARY_IN)) { | ||
107 | trace_colo_compare_main("primary: unsupported packet in"); | ||
108 | - compare_chr_send(s, pri_rs->buf, pri_rs->packet_len); | ||
109 | + compare_chr_send(s, | ||
110 | + pri_rs->buf, | ||
111 | + pri_rs->packet_len, | ||
112 | + pri_rs->vnet_hdr_len); | ||
113 | } else { | ||
114 | /* compare connection */ | ||
115 | g_queue_foreach(&s->conn_list, colo_compare_connection, s); | ||
116 | @@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) | ||
117 | return; | ||
118 | } | 58 | } |
119 | 59 | ||
120 | - net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false); | 60 | + if (v->migration_blocker) { |
121 | - net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false); | 61 | + migrate_del_blocker(v->migration_blocker); |
122 | + net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr); | 62 | + } |
123 | + net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr); | 63 | return true; |
124 | |||
125 | g_queue_init(&s->conn_list); | ||
126 | |||
127 | @@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data) | ||
128 | |||
129 | while (!g_queue_is_empty(&conn->primary_list)) { | ||
130 | pkt = g_queue_pop_head(&conn->primary_list); | ||
131 | - compare_chr_send(s, pkt->data, pkt->size); | ||
132 | + compare_chr_send(s, | ||
133 | + pkt->data, | ||
134 | + pkt->size, | ||
135 | + pkt->vnet_hdr_len); | ||
136 | packet_destroy(pkt, NULL); | ||
137 | } | ||
138 | while (!g_queue_is_empty(&conn->secondary_list)) { | ||
139 | @@ -XXX,XX +XXX,XX @@ static void colo_compare_class_init(ObjectClass *oc, void *data) | ||
140 | |||
141 | static void colo_compare_init(Object *obj) | ||
142 | { | ||
143 | + CompareState *s = COLO_COMPARE(obj); | ||
144 | + | ||
145 | object_property_add_str(obj, "primary_in", | ||
146 | compare_get_pri_indev, compare_set_pri_indev, | ||
147 | NULL); | ||
148 | @@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj) | ||
149 | object_property_add_str(obj, "outdev", | ||
150 | compare_get_outdev, compare_set_outdev, | ||
151 | NULL); | ||
152 | + | ||
153 | + s->vnet_hdr = false; | ||
154 | + object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr, | ||
155 | + compare_set_vnet_hdr, NULL); | ||
156 | } | 64 | } |
157 | 65 | ||
158 | static void colo_compare_finalize(Object *obj) | 66 | diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h |
159 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
160 | index XXXXXXX..XXXXXXX 100644 | 67 | index XXXXXXX..XXXXXXX 100644 |
161 | --- a/qemu-options.hx | 68 | --- a/include/hw/virtio/vhost-vdpa.h |
162 | +++ b/qemu-options.hx | 69 | +++ b/include/hw/virtio/vhost-vdpa.h |
163 | @@ -XXX,XX +XXX,XX @@ Dump the network traffic on netdev @var{dev} to the file specified by | 70 | @@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa { |
164 | The file format is libpcap, so it can be analyzed with tools such as tcpdump | 71 | bool shadow_vqs_enabled; |
165 | or Wireshark. | 72 | /* IOVA mapping used by the Shadow Virtqueue */ |
166 | 73 | VhostIOVATree *iova_tree; | |
167 | -@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid}, | 74 | + Error *migration_blocker; |
168 | -outdev=@var{chardevid} | 75 | GPtrArray *shadow_vqs; |
169 | +@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid}[,vnet_hdr_support] | 76 | const VhostShadowVirtqueueOps *shadow_vq_ops; |
170 | 77 | void *shadow_vq_ops_opaque; | |
171 | Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with | ||
172 | secondary packet. If the packets are same, we will output primary | ||
173 | packet to outdev@var{chardevid}, else we will notify colo-frame | ||
174 | do checkpoint and send primary packet to outdev@var{chardevid}. | ||
175 | +if it has the vnet_hdr_support flag, colo compare will send/recv packet with vnet_hdr_len. | ||
176 | |||
177 | we must use it with the help of filter-mirror and filter-redirector. | ||
178 | |||
179 | -- | 78 | -- |
180 | 2.7.4 | 79 | 2.7.4 |
181 | 80 | ||
182 | 81 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Eugenio Pérez <eperezma@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | We add a flag to decide whether net_fill_rstate() need read | 3 | Finally offering the possibility to enable SVQ from the command line. |
4 | the vnet_hdr_len or not. | ||
5 | 4 | ||
6 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 5 | Signed-off-by: Eugenio Pérez <eperezma@redhat.com> |
7 | Suggested-by: Jason Wang <jasowang@redhat.com> | 6 | Acked-by: Markus Armbruster <armbru@redhat.com> |
7 | Reviewed-by: Michael S. Tsirkin <mst@redhat.com> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 8 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
9 | --- | 9 | --- |
10 | include/net/net.h | 9 +++++++-- | 10 | net/vhost-vdpa.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- |
11 | net/colo-compare.c | 4 ++-- | 11 | qapi/net.json | 9 ++++++- |
12 | net/filter-mirror.c | 2 +- | 12 | 2 files changed, 77 insertions(+), 4 deletions(-) |
13 | net/net.c | 36 ++++++++++++++++++++++++++++++++---- | ||
14 | net/socket.c | 8 ++++---- | ||
15 | 5 files changed, 46 insertions(+), 13 deletions(-) | ||
16 | 13 | ||
17 | diff --git a/include/net/net.h b/include/net/net.h | 14 | diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c |
18 | index XXXXXXX..XXXXXXX 100644 | 15 | index XXXXXXX..XXXXXXX 100644 |
19 | --- a/include/net/net.h | 16 | --- a/net/vhost-vdpa.c |
20 | +++ b/include/net/net.h | 17 | +++ b/net/vhost-vdpa.c |
21 | @@ -XXX,XX +XXX,XX @@ typedef struct NICState { | 18 | @@ -XXX,XX +XXX,XX @@ const int vdpa_feature_bits[] = { |
22 | } NICState; | 19 | VHOST_INVALID_FEATURE_BIT |
23 | |||
24 | struct SocketReadState { | ||
25 | - int state; /* 0 = getting length, 1 = getting data */ | ||
26 | + /* 0 = getting length, 1 = getting vnet header length, 2 = getting data */ | ||
27 | + int state; | ||
28 | + /* This flag decide whether to read the vnet_hdr_len field */ | ||
29 | + bool vnet_hdr; | ||
30 | uint32_t index; | ||
31 | uint32_t packet_len; | ||
32 | + uint32_t vnet_hdr_len; | ||
33 | uint8_t buf[NET_BUFSIZE]; | ||
34 | SocketReadStateFinalize *finalize; | ||
35 | }; | 20 | }; |
36 | @@ -XXX,XX +XXX,XX @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender, | 21 | |
37 | void print_net_client(Monitor *mon, NetClientState *nc); | 22 | +/** Supported device specific feature bits with SVQ */ |
38 | void hmp_info_network(Monitor *mon, const QDict *qdict); | 23 | +static const uint64_t vdpa_svq_device_features = |
39 | void net_socket_rs_init(SocketReadState *rs, | 24 | + BIT_ULL(VIRTIO_NET_F_CSUM) | |
40 | - SocketReadStateFinalize *finalize); | 25 | + BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) | |
41 | + SocketReadStateFinalize *finalize, | 26 | + BIT_ULL(VIRTIO_NET_F_MTU) | |
42 | + bool vnet_hdr); | 27 | + BIT_ULL(VIRTIO_NET_F_MAC) | |
43 | 28 | + BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | | |
44 | /* NIC info */ | 29 | + BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) | |
45 | 30 | + BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | | |
46 | diff --git a/net/colo-compare.c b/net/colo-compare.c | 31 | + BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | |
47 | index XXXXXXX..XXXXXXX 100644 | 32 | + BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | |
48 | --- a/net/colo-compare.c | 33 | + BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | |
49 | +++ b/net/colo-compare.c | 34 | + BIT_ULL(VIRTIO_NET_F_HOST_ECN) | |
50 | @@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) | 35 | + BIT_ULL(VIRTIO_NET_F_HOST_UFO) | |
51 | return; | 36 | + BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | |
37 | + BIT_ULL(VIRTIO_NET_F_STATUS) | | ||
38 | + BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | | ||
39 | + BIT_ULL(VIRTIO_F_ANY_LAYOUT) | | ||
40 | + BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | | ||
41 | + BIT_ULL(VIRTIO_NET_F_RSC_EXT) | | ||
42 | + BIT_ULL(VIRTIO_NET_F_STANDBY); | ||
43 | + | ||
44 | VHostNetState *vhost_vdpa_get_vhost_net(NetClientState *nc) | ||
45 | { | ||
46 | VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); | ||
47 | @@ -XXX,XX +XXX,XX @@ err_init: | ||
48 | static void vhost_vdpa_cleanup(NetClientState *nc) | ||
49 | { | ||
50 | VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); | ||
51 | + struct vhost_dev *dev = &s->vhost_net->dev; | ||
52 | |||
53 | qemu_vfree(s->cvq_cmd_out_buffer); | ||
54 | qemu_vfree(s->cvq_cmd_in_buffer); | ||
55 | + if (dev->vq_index + dev->nvqs == dev->vq_index_end) { | ||
56 | + g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); | ||
57 | + } | ||
58 | if (s->vhost_net) { | ||
59 | vhost_net_cleanup(s->vhost_net); | ||
60 | g_free(s->vhost_net); | ||
61 | @@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, | ||
62 | int vdpa_device_fd, | ||
63 | int queue_pair_index, | ||
64 | int nvqs, | ||
65 | - bool is_datapath) | ||
66 | + bool is_datapath, | ||
67 | + bool svq, | ||
68 | + VhostIOVATree *iova_tree) | ||
69 | { | ||
70 | NetClientState *nc = NULL; | ||
71 | VhostVDPAState *s; | ||
72 | @@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, | ||
73 | |||
74 | s->vhost_vdpa.device_fd = vdpa_device_fd; | ||
75 | s->vhost_vdpa.index = queue_pair_index; | ||
76 | + s->vhost_vdpa.shadow_vqs_enabled = svq; | ||
77 | + s->vhost_vdpa.iova_tree = iova_tree; | ||
78 | if (!is_datapath) { | ||
79 | s->cvq_cmd_out_buffer = qemu_memalign(qemu_real_host_page_size(), | ||
80 | vhost_vdpa_net_cvq_cmd_page_len()); | ||
81 | @@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, | ||
82 | |||
83 | s->vhost_vdpa.shadow_vq_ops = &vhost_vdpa_net_svq_ops; | ||
84 | s->vhost_vdpa.shadow_vq_ops_opaque = s; | ||
85 | + error_setg(&s->vhost_vdpa.migration_blocker, | ||
86 | + "Migration disabled: vhost-vdpa uses CVQ."); | ||
52 | } | 87 | } |
53 | 88 | ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); | |
54 | - net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize); | 89 | if (ret) { |
55 | - net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize); | 90 | @@ -XXX,XX +XXX,XX @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, |
56 | + net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false); | 91 | return nc; |
57 | + net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false); | 92 | } |
58 | 93 | ||
59 | g_queue_init(&s->conn_list); | 94 | +static int vhost_vdpa_get_iova_range(int fd, |
60 | 95 | + struct vhost_vdpa_iova_range *iova_range) | |
61 | diff --git a/net/filter-mirror.c b/net/filter-mirror.c | 96 | +{ |
62 | index XXXXXXX..XXXXXXX 100644 | 97 | + int ret = ioctl(fd, VHOST_VDPA_GET_IOVA_RANGE, iova_range); |
63 | --- a/net/filter-mirror.c | 98 | + |
64 | +++ b/net/filter-mirror.c | 99 | + return ret < 0 ? -errno : 0; |
65 | @@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp) | 100 | +} |
101 | + | ||
102 | static int vhost_vdpa_get_features(int fd, uint64_t *features, Error **errp) | ||
103 | { | ||
104 | int ret = ioctl(fd, VHOST_GET_FEATURES, features); | ||
105 | @@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, | ||
106 | uint64_t features; | ||
107 | int vdpa_device_fd; | ||
108 | g_autofree NetClientState **ncs = NULL; | ||
109 | + g_autoptr(VhostIOVATree) iova_tree = NULL; | ||
110 | NetClientState *nc; | ||
111 | int queue_pairs, r, i, has_cvq = 0; | ||
112 | |||
113 | @@ -XXX,XX +XXX,XX @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, | ||
114 | return queue_pairs; | ||
115 | } | ||
116 | |||
117 | + if (opts->x_svq) { | ||
118 | + struct vhost_vdpa_iova_range iova_range; | ||
119 | + | ||
120 | + uint64_t invalid_dev_features = | ||
121 | + features & ~vdpa_svq_device_features & | ||
122 | + /* Transport are all accepted at this point */ | ||
123 | + ~MAKE_64BIT_MASK(VIRTIO_TRANSPORT_F_START, | ||
124 | + VIRTIO_TRANSPORT_F_END - VIRTIO_TRANSPORT_F_START); | ||
125 | + | ||
126 | + if (invalid_dev_features) { | ||
127 | + error_setg(errp, "vdpa svq does not work with features 0x%" PRIx64, | ||
128 | + invalid_dev_features); | ||
129 | + goto err_svq; | ||
130 | + } | ||
131 | + | ||
132 | + vhost_vdpa_get_iova_range(vdpa_device_fd, &iova_range); | ||
133 | + iova_tree = vhost_iova_tree_new(iova_range.first, iova_range.last); | ||
134 | + } | ||
135 | + | ||
136 | ncs = g_malloc0(sizeof(*ncs) * queue_pairs); | ||
137 | |||
138 | for (i = 0; i < queue_pairs; i++) { | ||
139 | ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, | ||
140 | - vdpa_device_fd, i, 2, true); | ||
141 | + vdpa_device_fd, i, 2, true, opts->x_svq, | ||
142 | + iova_tree); | ||
143 | if (!ncs[i]) | ||
144 | goto err; | ||
145 | } | ||
146 | |||
147 | if (has_cvq) { | ||
148 | nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, | ||
149 | - vdpa_device_fd, i, 1, false); | ||
150 | + vdpa_device_fd, i, 1, false, | ||
151 | + opts->x_svq, iova_tree); | ||
152 | if (!nc) | ||
153 | goto err; | ||
154 | } | ||
155 | |||
156 | + /* iova_tree ownership belongs to last NetClientState */ | ||
157 | + g_steal_pointer(&iova_tree); | ||
158 | return 0; | ||
159 | |||
160 | err: | ||
161 | @@ -XXX,XX +XXX,XX @@ err: | ||
162 | qemu_del_net_client(ncs[i]); | ||
66 | } | 163 | } |
67 | } | 164 | } |
68 | 165 | + | |
69 | - net_socket_rs_init(&s->rs, redirector_rs_finalize); | 166 | +err_svq: |
70 | + net_socket_rs_init(&s->rs, redirector_rs_finalize, false); | 167 | qemu_close(vdpa_device_fd); |
71 | 168 | ||
72 | if (s->indev) { | 169 | return -1; |
73 | chr = qemu_chr_find(s->indev); | 170 | diff --git a/qapi/net.json b/qapi/net.json |
74 | diff --git a/net/net.c b/net/net.c | ||
75 | index XXXXXXX..XXXXXXX 100644 | 171 | index XXXXXXX..XXXXXXX 100644 |
76 | --- a/net/net.c | 172 | --- a/qapi/net.json |
77 | +++ b/net/net.c | 173 | +++ b/qapi/net.json |
78 | @@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_net_opts = { | 174 | @@ -XXX,XX +XXX,XX @@ |
79 | }; | 175 | # @queues: number of queues to be created for multiqueue vhost-vdpa |
80 | 176 | # (default: 1) | |
81 | void net_socket_rs_init(SocketReadState *rs, | 177 | # |
82 | - SocketReadStateFinalize *finalize) | 178 | +# @x-svq: Start device with (experimental) shadow virtqueue. (Since 7.1) |
83 | + SocketReadStateFinalize *finalize, | 179 | +# (default: false) |
84 | + bool vnet_hdr) | 180 | +# |
85 | { | 181 | +# Features: |
86 | rs->state = 0; | 182 | +# @unstable: Member @x-svq is experimental. |
87 | + rs->vnet_hdr = vnet_hdr; | 183 | +# |
88 | rs->index = 0; | 184 | # Since: 5.1 |
89 | rs->packet_len = 0; | 185 | ## |
90 | + rs->vnet_hdr_len = 0; | 186 | { 'struct': 'NetdevVhostVDPAOptions', |
91 | memset(rs->buf, 0, sizeof(rs->buf)); | 187 | 'data': { |
92 | rs->finalize = finalize; | 188 | '*vhostdev': 'str', |
93 | } | 189 | - '*queues': 'int' } } |
94 | @@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size) | 190 | + '*queues': 'int', |
95 | unsigned int l; | 191 | + '*x-svq': {'type': 'bool', 'features' : [ 'unstable'] } } } |
96 | 192 | ||
97 | while (size > 0) { | 193 | ## |
98 | - /* reassemble a packet from the network */ | 194 | # @NetdevVmnetHostOptions: |
99 | - switch (rs->state) { /* 0 = getting length, 1 = getting data */ | ||
100 | + /* Reassemble a packet from the network. | ||
101 | + * 0 = getting length. | ||
102 | + * 1 = getting vnet header length. | ||
103 | + * 2 = getting data. | ||
104 | + */ | ||
105 | + switch (rs->state) { | ||
106 | case 0: | ||
107 | l = 4 - rs->index; | ||
108 | if (l > size) { | ||
109 | @@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size) | ||
110 | /* got length */ | ||
111 | rs->packet_len = ntohl(*(uint32_t *)rs->buf); | ||
112 | rs->index = 0; | ||
113 | - rs->state = 1; | ||
114 | + if (rs->vnet_hdr) { | ||
115 | + rs->state = 1; | ||
116 | + } else { | ||
117 | + rs->state = 2; | ||
118 | + rs->vnet_hdr_len = 0; | ||
119 | + } | ||
120 | } | ||
121 | break; | ||
122 | case 1: | ||
123 | + l = 4 - rs->index; | ||
124 | + if (l > size) { | ||
125 | + l = size; | ||
126 | + } | ||
127 | + memcpy(rs->buf + rs->index, buf, l); | ||
128 | + buf += l; | ||
129 | + size -= l; | ||
130 | + rs->index += l; | ||
131 | + if (rs->index == 4) { | ||
132 | + /* got vnet header length */ | ||
133 | + rs->vnet_hdr_len = ntohl(*(uint32_t *)rs->buf); | ||
134 | + rs->index = 0; | ||
135 | + rs->state = 2; | ||
136 | + } | ||
137 | + break; | ||
138 | + case 2: | ||
139 | l = rs->packet_len - rs->index; | ||
140 | if (l > size) { | ||
141 | l = size; | ||
142 | diff --git a/net/socket.c b/net/socket.c | ||
143 | index XXXXXXX..XXXXXXX 100644 | ||
144 | --- a/net/socket.c | ||
145 | +++ b/net/socket.c | ||
146 | @@ -XXX,XX +XXX,XX @@ static void net_socket_send(void *opaque) | ||
147 | closesocket(s->fd); | ||
148 | |||
149 | s->fd = -1; | ||
150 | - net_socket_rs_init(&s->rs, net_socket_rs_finalize); | ||
151 | + net_socket_rs_init(&s->rs, net_socket_rs_finalize, false); | ||
152 | s->nc.link_down = true; | ||
153 | memset(s->nc.info_str, 0, sizeof(s->nc.info_str)); | ||
154 | |||
155 | @@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer, | ||
156 | s->fd = fd; | ||
157 | s->listen_fd = -1; | ||
158 | s->send_fn = net_socket_send_dgram; | ||
159 | - net_socket_rs_init(&s->rs, net_socket_rs_finalize); | ||
160 | + net_socket_rs_init(&s->rs, net_socket_rs_finalize, false); | ||
161 | net_socket_read_poll(s, true); | ||
162 | |||
163 | /* mcast: save bound address as dst */ | ||
164 | @@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer, | ||
165 | |||
166 | s->fd = fd; | ||
167 | s->listen_fd = -1; | ||
168 | - net_socket_rs_init(&s->rs, net_socket_rs_finalize); | ||
169 | + net_socket_rs_init(&s->rs, net_socket_rs_finalize, false); | ||
170 | |||
171 | /* Disable Nagle algorithm on TCP sockets to reduce latency */ | ||
172 | socket_set_nodelay(fd); | ||
173 | @@ -XXX,XX +XXX,XX @@ static int net_socket_listen_init(NetClientState *peer, | ||
174 | s->fd = -1; | ||
175 | s->listen_fd = fd; | ||
176 | s->nc.link_down = true; | ||
177 | - net_socket_rs_init(&s->rs, net_socket_rs_finalize); | ||
178 | + net_socket_rs_init(&s->rs, net_socket_rs_finalize, false); | ||
179 | |||
180 | qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s); | ||
181 | return 0; | ||
182 | -- | 195 | -- |
183 | 2.7.4 | 196 | 2.7.4 |
184 | 197 | ||
185 | 198 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Zhang Chen <chen.zhang@intel.com> | ||
1 | 2 | ||
3 | If the checkpoint occurs when the guest finishes restarting | ||
4 | but has not started running, the runstate_set() may reject | ||
5 | the transition from COLO to PRELAUNCH with the crash log: | ||
6 | |||
7 | {"timestamp": {"seconds": 1593484591, "microseconds": 26605},\ | ||
8 | "event": "RESET", "data": {"guest": true, "reason": "guest-reset"}} | ||
9 | qemu-system-x86_64: invalid runstate transition: 'colo' -> 'prelaunch' | ||
10 | |||
11 | Long-term testing says that it's pretty safe. | ||
12 | |||
13 | Signed-off-by: Like Xu <like.xu@linux.intel.com> | ||
14 | Signed-off-by: Zhang Chen <chen.zhang@intel.com> | ||
15 | Acked-by: Dr. David Alan Gilbert <dgilbert@redhat.com> | ||
16 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
17 | --- | ||
18 | softmmu/runstate.c | 1 + | ||
19 | 1 file changed, 1 insertion(+) | ||
20 | |||
21 | diff --git a/softmmu/runstate.c b/softmmu/runstate.c | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/softmmu/runstate.c | ||
24 | +++ b/softmmu/runstate.c | ||
25 | @@ -XXX,XX +XXX,XX @@ static const RunStateTransition runstate_transitions_def[] = { | ||
26 | { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH }, | ||
27 | |||
28 | { RUN_STATE_COLO, RUN_STATE_RUNNING }, | ||
29 | + { RUN_STATE_COLO, RUN_STATE_PRELAUNCH }, | ||
30 | { RUN_STATE_COLO, RUN_STATE_SHUTDOWN}, | ||
31 | |||
32 | { RUN_STATE_RUNNING, RUN_STATE_DEBUG }, | ||
33 | -- | ||
34 | 2.7.4 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Zhang Chen <chen.zhang@intel.com> |
---|---|---|---|
2 | 2 | ||
3 | We can use this property flush and send packet with vnet_hdr_len. | 3 | We notice the QEMU may crash when the guest has too many |
4 | incoming network connections with the following log: | ||
4 | 5 | ||
5 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 6 | 15197@1593578622.668573:colo_proxy_main : colo proxy connection hashtable full, clear it |
7 | free(): invalid pointer | ||
8 | [1] 15195 abort (core dumped) qemu-system-x86_64 .... | ||
9 | |||
10 | This is because we create the s->connection_track_table with | ||
11 | g_hash_table_new_full() which is defined as: | ||
12 | |||
13 | GHashTable * g_hash_table_new_full (GHashFunc hash_func, | ||
14 | GEqualFunc key_equal_func, | ||
15 | GDestroyNotify key_destroy_func, | ||
16 | GDestroyNotify value_destroy_func); | ||
17 | |||
18 | The fourth parameter connection_destroy() will be called to free the | ||
19 | memory allocated for all 'Connection' values in the hashtable when | ||
20 | we call g_hash_table_remove_all() in the connection_hashtable_reset(). | ||
21 | |||
22 | But both connection_track_table and conn_list reference to the same | ||
23 | conn instance. It will trigger double free in conn_list clear. So this | ||
24 | patch remove free action on hash table side to avoid double free the | ||
25 | conn. | ||
26 | |||
27 | Signed-off-by: Like Xu <like.xu@linux.intel.com> | ||
28 | Signed-off-by: Zhang Chen <chen.zhang@intel.com> | ||
6 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 29 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
7 | --- | 30 | --- |
8 | net/colo-compare.c | 8 ++++++-- | 31 | net/colo-compare.c | 2 +- |
9 | net/colo.c | 3 ++- | ||
10 | net/colo.h | 4 +++- | ||
11 | net/filter-rewriter.c | 2 +- | 32 | net/filter-rewriter.c | 2 +- |
12 | 4 files changed, 12 insertions(+), 5 deletions(-) | 33 | 2 files changed, 2 insertions(+), 2 deletions(-) |
13 | 34 | ||
14 | diff --git a/net/colo-compare.c b/net/colo-compare.c | 35 | diff --git a/net/colo-compare.c b/net/colo-compare.c |
15 | index XXXXXXX..XXXXXXX 100644 | 36 | index XXXXXXX..XXXXXXX 100644 |
16 | --- a/net/colo-compare.c | 37 | --- a/net/colo-compare.c |
17 | +++ b/net/colo-compare.c | 38 | +++ b/net/colo-compare.c |
18 | @@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode) | 39 | @@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) |
19 | Connection *conn; | 40 | s->connection_track_table = g_hash_table_new_full(connection_key_hash, |
20 | 41 | connection_key_equal, | |
21 | if (mode == PRIMARY_IN) { | 42 | g_free, |
22 | - pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len); | 43 | - connection_destroy); |
23 | + pkt = packet_new(s->pri_rs.buf, | 44 | + NULL); |
24 | + s->pri_rs.packet_len, | 45 | |
25 | + s->pri_rs.vnet_hdr_len); | 46 | colo_compare_iothread(s); |
26 | } else { | 47 | |
27 | - pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len); | ||
28 | + pkt = packet_new(s->sec_rs.buf, | ||
29 | + s->sec_rs.packet_len, | ||
30 | + s->sec_rs.vnet_hdr_len); | ||
31 | } | ||
32 | |||
33 | if (parse_packet_early(pkt)) { | ||
34 | diff --git a/net/colo.c b/net/colo.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/net/colo.c | ||
37 | +++ b/net/colo.c | ||
38 | @@ -XXX,XX +XXX,XX @@ void connection_destroy(void *opaque) | ||
39 | g_slice_free(Connection, conn); | ||
40 | } | ||
41 | |||
42 | -Packet *packet_new(const void *data, int size) | ||
43 | +Packet *packet_new(const void *data, int size, int vnet_hdr_len) | ||
44 | { | ||
45 | Packet *pkt = g_slice_new(Packet); | ||
46 | |||
47 | pkt->data = g_memdup(data, size); | ||
48 | pkt->size = size; | ||
49 | pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST); | ||
50 | + pkt->vnet_hdr_len = vnet_hdr_len; | ||
51 | |||
52 | return pkt; | ||
53 | } | ||
54 | diff --git a/net/colo.h b/net/colo.h | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/net/colo.h | ||
57 | +++ b/net/colo.h | ||
58 | @@ -XXX,XX +XXX,XX @@ typedef struct Packet { | ||
59 | int size; | ||
60 | /* Time of packet creation, in wall clock ms */ | ||
61 | int64_t creation_ms; | ||
62 | + /* Get vnet_hdr_len from filter */ | ||
63 | + uint32_t vnet_hdr_len; | ||
64 | } Packet; | ||
65 | |||
66 | typedef struct ConnectionKey { | ||
67 | @@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table, | ||
68 | ConnectionKey *key, | ||
69 | GQueue *conn_list); | ||
70 | void connection_hashtable_reset(GHashTable *connection_track_table); | ||
71 | -Packet *packet_new(const void *data, int size); | ||
72 | +Packet *packet_new(const void *data, int size, int vnet_hdr_len); | ||
73 | void packet_destroy(void *opaque, void *user_data); | ||
74 | |||
75 | #endif /* QEMU_COLO_PROXY_H */ | ||
76 | diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c | 48 | diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c |
77 | index XXXXXXX..XXXXXXX 100644 | 49 | index XXXXXXX..XXXXXXX 100644 |
78 | --- a/net/filter-rewriter.c | 50 | --- a/net/filter-rewriter.c |
79 | +++ b/net/filter-rewriter.c | 51 | +++ b/net/filter-rewriter.c |
80 | @@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf, | 52 | @@ -XXX,XX +XXX,XX @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp) |
81 | char *buf = g_malloc0(size); | 53 | s->connection_track_table = g_hash_table_new_full(connection_key_hash, |
82 | 54 | connection_key_equal, | |
83 | iov_to_buf(iov, iovcnt, 0, buf, size); | 55 | g_free, |
84 | - pkt = packet_new(buf, size); | 56 | - connection_destroy); |
85 | + pkt = packet_new(buf, size, 0); | 57 | + NULL); |
86 | g_free(buf); | 58 | s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf); |
87 | 59 | } | |
88 | /* | 60 | |
89 | -- | 61 | -- |
90 | 2.7.4 | 62 | 2.7.4 |
91 | |||
92 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Zhang Chen <chen.zhang@intel.com> | ||
1 | 2 | ||
3 | Filter-rewriter no need to track connection in conn_list. | ||
4 | This patch fix the glib g_queue_is_empty assertion when COLO guest | ||
5 | keep a lot of network connection. | ||
6 | |||
7 | Signed-off-by: Zhang Chen <chen.zhang@intel.com> | ||
8 | Reviewed-by: Li Zhijian <lizhijian@fujitsu.com> | ||
9 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
10 | --- | ||
11 | net/colo.c | 2 +- | ||
12 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
13 | |||
14 | diff --git a/net/colo.c b/net/colo.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/net/colo.c | ||
17 | +++ b/net/colo.c | ||
18 | @@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table, | ||
19 | /* | ||
20 | * clear the conn_list | ||
21 | */ | ||
22 | - while (!g_queue_is_empty(conn_list)) { | ||
23 | + while (conn_list && !g_queue_is_empty(conn_list)) { | ||
24 | connection_destroy(g_queue_pop_head(conn_list)); | ||
25 | } | ||
26 | } | ||
27 | -- | ||
28 | 2.7.4 | diff view generated by jsdifflib |
1 | From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 1 | From: Zhang Chen <chen.zhang@intel.com> |
---|---|---|---|
2 | 2 | ||
3 | Make colo-compare and filter-rewriter can parse vnet packet. | 3 | When COLO use only one vnet_hdr_support parameter between |
4 | filter-redirector and filter-mirror(or colo-compare), COLO will crash | ||
5 | with segmentation fault. Back track as follow: | ||
4 | 6 | ||
5 | Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com> | 7 | Thread 1 "qemu-system-x86" received signal SIGSEGV, Segmentation fault. |
8 | 0x0000555555cb200b in eth_get_l2_hdr_length (p=0x0) | ||
9 | at /home/tao/project/COLO/colo-qemu/include/net/eth.h:296 | ||
10 | 296 uint16_t proto = be16_to_cpu(PKT_GET_ETH_HDR(p)->h_proto); | ||
11 | (gdb) bt | ||
12 | 0 0x0000555555cb200b in eth_get_l2_hdr_length (p=0x0) | ||
13 | at /home/tao/project/COLO/colo-qemu/include/net/eth.h:296 | ||
14 | 1 0x0000555555cb22b4 in parse_packet_early (pkt=0x555556a44840) at | ||
15 | net/colo.c:49 | ||
16 | 2 0x0000555555cb2b91 in is_tcp_packet (pkt=0x555556a44840) at | ||
17 | net/filter-rewriter.c:63 | ||
18 | |||
19 | So wrong vnet_hdr_len will cause pkt->data become NULL. Add check to | ||
20 | raise error and add trace-events to track vnet_hdr_len. | ||
21 | |||
22 | Signed-off-by: Tao Xu <tao3.xu@intel.com> | ||
23 | Signed-off-by: Zhang Chen <chen.zhang@intel.com> | ||
24 | Reviewed-by: Li Zhijian <lizhijian@fujitsu.com> | ||
6 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 25 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
7 | --- | 26 | --- |
8 | net/colo.c | 6 +++--- | 27 | net/colo.c | 9 ++++++++- |
9 | 1 file changed, 3 insertions(+), 3 deletions(-) | 28 | net/trace-events | 1 + |
29 | 2 files changed, 9 insertions(+), 1 deletion(-) | ||
10 | 30 | ||
11 | diff --git a/net/colo.c b/net/colo.c | 31 | diff --git a/net/colo.c b/net/colo.c |
12 | index XXXXXXX..XXXXXXX 100644 | 32 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/net/colo.c | 33 | --- a/net/colo.c |
14 | +++ b/net/colo.c | 34 | +++ b/net/colo.c |
15 | @@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt) | 35 | @@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt) |
16 | { | ||
17 | int network_length; | ||
18 | static const uint8_t vlan[] = {0x81, 0x00}; | 36 | static const uint8_t vlan[] = {0x81, 0x00}; |
19 | - uint8_t *data = pkt->data; | 37 | uint8_t *data = pkt->data + pkt->vnet_hdr_len; |
20 | + uint8_t *data = pkt->data + pkt->vnet_hdr_len; | ||
21 | uint16_t l3_proto; | 38 | uint16_t l3_proto; |
22 | ssize_t l2hdr_len = eth_get_l2_hdr_length(data); | 39 | - ssize_t l2hdr_len = eth_get_l2_hdr_length(data); |
23 | 40 | + ssize_t l2hdr_len; | |
24 | - if (pkt->size < ETH_HLEN) { | 41 | + |
25 | + if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) { | 42 | + if (data == NULL) { |
43 | + trace_colo_proxy_main_vnet_info("This packet is not parsed correctly, " | ||
44 | + "pkt->vnet_hdr_len", pkt->vnet_hdr_len); | ||
45 | + return 1; | ||
46 | + } | ||
47 | + l2hdr_len = eth_get_l2_hdr_length(data); | ||
48 | |||
49 | if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) { | ||
26 | trace_colo_proxy_main("pkt->size < ETH_HLEN"); | 50 | trace_colo_proxy_main("pkt->size < ETH_HLEN"); |
27 | return 1; | 51 | diff --git a/net/trace-events b/net/trace-events |
28 | } | 52 | index XXXXXXX..XXXXXXX 100644 |
29 | @@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt) | 53 | --- a/net/trace-events |
30 | } | 54 | +++ b/net/trace-events |
31 | 55 | @@ -XXX,XX +XXX,XX @@ vhost_user_event(const char *chr, int event) "chr: %s got event: %d" | |
32 | network_length = pkt->ip->ip_hl * 4; | 56 | |
33 | - if (pkt->size < l2hdr_len + network_length) { | 57 | # colo.c |
34 | + if (pkt->size < l2hdr_len + network_length + pkt->vnet_hdr_len) { | 58 | colo_proxy_main(const char *chr) ": %s" |
35 | trace_colo_proxy_main("pkt->size < network_header + network_length"); | 59 | +colo_proxy_main_vnet_info(const char *sta, int size) ": %s = %d" |
36 | return 1; | 60 | |
37 | } | 61 | # colo-compare.c |
62 | colo_compare_main(const char *chr) ": %s" | ||
38 | -- | 63 | -- |
39 | 2.7.4 | 64 | 2.7.4 |
40 | |||
41 | diff view generated by jsdifflib |