1
The following changes since commit 6632f6ff96f0537fc34cdc00c760656fc62e23c5:
1
The following changes since commit bdee969c0e65d4d509932b1d70e3a3b2ffbff6d5:
2
2
3
Merge remote-tracking branch 'remotes/famz/tags/block-and-testing-pull-request' into staging (2017-07-17 11:46:36 +0100)
3
Merge remote-tracking branch 'remotes/bonzini-gitlab/tags/for-upstream' into staging (2021-03-19 18:01:17 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 189ae6bb5ce1f5a322f8691d00fe942ba43dd601:
9
for you to fetch changes up to c7274b5ef43614dd133daec1e2018f71d8744088:
10
10
11
virtio-net: fix offload ctrl endian (2017-07-17 20:13:56 +0800)
11
net/eth: Add an assert() and invert if() statement to simplify code (2021-03-22 17:34:31 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
- fix virtio-net ctrl offload endian
15
----------------------------------------------------------------
16
- vnet header support for variou COLO netfilters and compare thread
16
Bin Meng (4):
17
net: eth: Add a helper to pad a short Ethernet frame
18
net: Add a 'do_not_pad" to NetClientState
19
net: Pad short frames to minimum size before sending from SLiRP/TAP
20
hw/net: virtio-net: Initialize nc->do_not_pad to true
17
21
18
----------------------------------------------------------------
22
Lukas Straub (2):
19
Jason Wang (1):
23
net/colo-compare.c: Fix memory leak for non-tcp packet
20
virtio-net: fix offload ctrl endian
24
net/colo-compare.c: Optimize removal of secondary packet
21
25
22
Michal Privoznik (1):
26
Philippe Mathieu-Daudé (7):
23
virtion-net: Prefer is_power_of_2()
27
net/eth: Use correct in6_address offset in _eth_get_rss_ex_dst_addr()
28
net/eth: Simplify _eth_get_rss_ex_dst_addr()
29
net/eth: Better describe _eth_get_rss_ex_dst_addr's offset argument
30
net/eth: Check size earlier in _eth_get_rss_ex_dst_addr()
31
net/eth: Check iovec has enough data earlier
32
net/eth: Read ip6_ext_hdr_routing buffer before accessing it
33
net/eth: Add an assert() and invert if() statement to simplify code
24
34
25
Zhang Chen (12):
35
MAINTAINERS | 1 +
26
net: Add vnet_hdr_len arguments in NetClientState
36
hw/net/virtio-net.c | 4 +++
27
net/net.c: Add vnet_hdr support in SocketReadState
37
include/net/eth.h | 17 ++++++++++++
28
net/filter-mirror.c: Introduce parameter for filter_send()
38
include/net/net.h | 1 +
29
net/filter-mirror.c: Make filter mirror support vnet support.
39
net/colo-compare.c | 3 ++-
30
net/filter-mirror.c: Add new option to enable vnet support for filter-redirector
40
net/eth.c | 61 +++++++++++++++++++++++++++---------------
31
net/colo.c: Make vnet_hdr_len as packet property
41
net/slirp.c | 10 +++++++
32
net/colo-compare.c: Introduce parameter for compare_chr_send()
42
net/tap-win32.c | 10 +++++++
33
net/colo-compare.c: Make colo-compare support vnet_hdr_len
43
net/tap.c | 10 +++++++
34
net/colo.c: Add vnet packet parse feature in colo-proxy
44
tests/qtest/fuzz-e1000e-test.c | 53 ++++++++++++++++++++++++++++++++++++
35
net/colo-compare.c: Add vnet packet's tcp/udp/icmp compare
45
tests/qtest/meson.build | 1 +
36
net/filter-rewriter.c: Make filter-rewriter support vnet_hdr_len
46
11 files changed, 148 insertions(+), 23 deletions(-)
37
docs/colo-proxy.txt: Update colo-proxy usage of net driver with vnet_header
47
create mode 100644 tests/qtest/fuzz-e1000e-test.c
38
39
docs/colo-proxy.txt | 26 ++++++++++++++++
40
hw/net/virtio-net.c | 4 ++-
41
include/net/net.h | 10 ++++--
42
net/colo-compare.c | 84 ++++++++++++++++++++++++++++++++++++++++++---------
43
net/colo.c | 9 +++---
44
net/colo.h | 4 ++-
45
net/filter-mirror.c | 75 +++++++++++++++++++++++++++++++++++++++++----
46
net/filter-rewriter.c | 37 ++++++++++++++++++++++-
47
net/net.c | 37 ++++++++++++++++++++---
48
net/socket.c | 8 ++---
49
qemu-options.hx | 19 ++++++------
50
11 files changed, 265 insertions(+), 48 deletions(-)
51
48
52
49
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Bin Meng <bmeng.cn@gmail.com>
2
2
3
We add the vnet_hdr_support option for filter-rewriter, default is disabled.
3
Add a helper to pad a short Ethernet frame to the minimum required
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
length, which can be used by backends' code.
5
You can use it for example:
6
-object filter-rewriter,id=rew0,netdev=hn0,queue=all,vnet_hdr_support
7
5
8
We get the vnet_hdr_len from NetClientState that make us
6
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
9
parse net packet correctly.
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
11
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
9
---
14
net/filter-rewriter.c | 37 ++++++++++++++++++++++++++++++++++++-
10
include/net/eth.h | 17 +++++++++++++++++
15
qemu-options.hx | 4 ++--
11
net/eth.c | 17 +++++++++++++++++
16
2 files changed, 38 insertions(+), 3 deletions(-)
12
2 files changed, 34 insertions(+)
17
13
18
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
14
diff --git a/include/net/eth.h b/include/net/eth.h
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/net/filter-rewriter.c
16
--- a/include/net/eth.h
21
+++ b/net/filter-rewriter.c
17
+++ b/include/net/eth.h
22
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@
23
#include "qemu-common.h"
19
24
#include "qapi/error.h"
20
#define ETH_ALEN 6
25
#include "qapi/qmp/qerror.h"
21
#define ETH_HLEN 14
26
+#include "qemu/error-report.h"
22
+#define ETH_ZLEN 60 /* Min. octets in frame without FCS */
27
#include "qapi-visit.h"
23
28
#include "qom/object.h"
24
struct eth_header {
29
#include "qemu/main-loop.h"
25
uint8_t h_dest[ETH_ALEN]; /* destination eth addr */
30
@@ -XXX,XX +XXX,XX @@ typedef struct RewriterState {
26
@@ -XXX,XX +XXX,XX @@ bool
31
NetQueue *incoming_queue;
27
eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
32
/* hashtable to save connection */
28
size_t ip6hdr_off, eth_ip6_hdr_info *info);
33
GHashTable *connection_track_table;
29
34
+ bool vnet_hdr;
30
+/**
35
} RewriterState;
31
+ * eth_pad_short_frame - pad a short frame to the minimum Ethernet frame length
36
32
+ *
37
static void filter_rewriter_flush(NetFilterState *nf)
33
+ * If the Ethernet frame size is shorter than 60 bytes, it will be padded to
38
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
34
+ * 60 bytes at the address @padded_pkt.
39
ConnectionKey key;
35
+ *
40
Packet *pkt;
36
+ * @padded_pkt: buffer address to hold the padded frame
41
ssize_t size = iov_size(iov, iovcnt);
37
+ * @padded_buflen: pointer holding length of @padded_pkt. If the frame is
42
+ ssize_t vnet_hdr_len = 0;
38
+ * padded, the length will be updated to the padded one.
43
char *buf = g_malloc0(size);
39
+ * @pkt: address to hold the original Ethernet frame
44
40
+ * @pkt_size: size of the original Ethernet frame
45
iov_to_buf(iov, iovcnt, 0, buf, size);
41
+ * @return true if the frame is padded, otherwise false
46
- pkt = packet_new(buf, size, 0);
42
+ */
43
+bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
44
+ const void *pkt, size_t pkt_size);
47
+
45
+
48
+ if (s->vnet_hdr) {
46
#endif
49
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
47
diff --git a/net/eth.c b/net/eth.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/net/eth.c
50
+++ b/net/eth.c
51
@@ -XXX,XX +XXX,XX @@ bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
52
info->l4proto = ext_hdr.ip6r_nxt;
53
return true;
54
}
55
+
56
+bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
57
+ const void *pkt, size_t pkt_size)
58
+{
59
+ assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
60
+
61
+ if (pkt_size >= ETH_ZLEN) {
62
+ return false;
50
+ }
63
+ }
51
+
64
+
52
+ pkt = packet_new(buf, size, vnet_hdr_len);
65
+ /* pad to minimum Ethernet frame length */
53
g_free(buf);
66
+ memcpy(padded_pkt, pkt, pkt_size);
54
67
+ memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
55
/*
68
+ *padded_buflen = ETH_ZLEN;
56
@@ -XXX,XX +XXX,XX @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
57
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
58
}
59
60
+static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp)
61
+{
62
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
63
+
69
+
64
+ return s->vnet_hdr;
70
+ return true;
65
+}
71
+}
66
+
67
+static void filter_rewriter_set_vnet_hdr(Object *obj,
68
+ bool value,
69
+ Error **errp)
70
+{
71
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
72
+
73
+ s->vnet_hdr = value;
74
+}
75
+
76
+static void filter_rewriter_init(Object *obj)
77
+{
78
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
79
+
80
+ s->vnet_hdr = false;
81
+ object_property_add_bool(obj, "vnet_hdr_support",
82
+ filter_rewriter_get_vnet_hdr,
83
+ filter_rewriter_set_vnet_hdr, NULL);
84
+}
85
+
86
static void colo_rewriter_class_init(ObjectClass *oc, void *data)
87
{
88
NetFilterClass *nfc = NETFILTER_CLASS(oc);
89
@@ -XXX,XX +XXX,XX @@ static const TypeInfo colo_rewriter_info = {
90
.name = TYPE_FILTER_REWRITER,
91
.parent = TYPE_NETFILTER,
92
.class_init = colo_rewriter_class_init,
93
+ .instance_init = filter_rewriter_init,
94
.instance_size = sizeof(RewriterState),
95
};
96
97
diff --git a/qemu-options.hx b/qemu-options.hx
98
index XXXXXXX..XXXXXXX 100644
99
--- a/qemu-options.hx
100
+++ b/qemu-options.hx
101
@@ -XXX,XX +XXX,XX @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
102
be the same. we can just use indev or outdev, but at least one of indev or outdev
103
need to be specified.
104
105
-@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid}[,queue=@var{all|rx|tx}]
106
+@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support]
107
108
Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
109
secondary from primary to keep secondary tcp connection,and rewrite
110
tcp packet to primary from secondary make tcp packet can be handled by
111
-client.
112
+client.if it has the vnet_hdr_support flag, we can parse packet with vnet header.
113
114
usage:
115
colo secondary:
116
--
72
--
117
2.7.4
73
2.7.4
118
74
119
75
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Bin Meng <bmeng.cn@gmail.com>
2
2
3
Add vnet_hdr_len arguments in NetClientState
3
This adds a flag in NetClientState, so that a net client can tell
4
that make other module get real vnet_hdr_len easily.
4
its peer that the packets do not need to be padded to the minimum
5
size of an Ethernet frame (60 bytes) before sending to it.
5
6
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
10
---
9
include/net/net.h | 1 +
11
include/net/net.h | 1 +
10
net/net.c | 1 +
12
1 file changed, 1 insertion(+)
11
2 files changed, 2 insertions(+)
12
13
13
diff --git a/include/net/net.h b/include/net/net.h
14
diff --git a/include/net/net.h b/include/net/net.h
14
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
15
--- a/include/net/net.h
16
--- a/include/net/net.h
16
+++ b/include/net/net.h
17
+++ b/include/net/net.h
17
@@ -XXX,XX +XXX,XX @@ struct NetClientState {
18
@@ -XXX,XX +XXX,XX @@ struct NetClientState {
18
unsigned int queue_index;
19
unsigned rxfilter_notify_enabled:1;
20
int vring_enable;
19
int vring_enable;
21
+ int vnet_hdr_len;
20
int vnet_hdr_len;
22
QTAILQ_HEAD(NetFilterHead, NetFilterState) filters;
21
bool is_netdev;
22
+ bool do_not_pad; /* do not pad to the minimum ethernet frame length */
23
QTAILQ_HEAD(, NetFilterState) filters;
23
};
24
};
24
25
diff --git a/net/net.c b/net/net.c
26
index XXXXXXX..XXXXXXX 100644
27
--- a/net/net.c
28
+++ b/net/net.c
29
@@ -XXX,XX +XXX,XX @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
30
return;
31
}
32
33
+ nc->vnet_hdr_len = len;
34
nc->info->set_vnet_hdr_len(nc, len);
35
}
36
25
37
--
26
--
38
2.7.4
27
2.7.4
39
28
40
29
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
2
1
3
We add a flag to decide whether net_fill_rstate() need read
4
the vnet_hdr_len or not.
5
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Suggested-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
include/net/net.h | 9 +++++++--
11
net/colo-compare.c | 4 ++--
12
net/filter-mirror.c | 2 +-
13
net/net.c | 36 ++++++++++++++++++++++++++++++++----
14
net/socket.c | 8 ++++----
15
5 files changed, 46 insertions(+), 13 deletions(-)
16
17
diff --git a/include/net/net.h b/include/net/net.h
18
index XXXXXXX..XXXXXXX 100644
19
--- a/include/net/net.h
20
+++ b/include/net/net.h
21
@@ -XXX,XX +XXX,XX @@ typedef struct NICState {
22
} NICState;
23
24
struct SocketReadState {
25
- int state; /* 0 = getting length, 1 = getting data */
26
+ /* 0 = getting length, 1 = getting vnet header length, 2 = getting data */
27
+ int state;
28
+ /* This flag decide whether to read the vnet_hdr_len field */
29
+ bool vnet_hdr;
30
uint32_t index;
31
uint32_t packet_len;
32
+ uint32_t vnet_hdr_len;
33
uint8_t buf[NET_BUFSIZE];
34
SocketReadStateFinalize *finalize;
35
};
36
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
37
void print_net_client(Monitor *mon, NetClientState *nc);
38
void hmp_info_network(Monitor *mon, const QDict *qdict);
39
void net_socket_rs_init(SocketReadState *rs,
40
- SocketReadStateFinalize *finalize);
41
+ SocketReadStateFinalize *finalize,
42
+ bool vnet_hdr);
43
44
/* NIC info */
45
46
diff --git a/net/colo-compare.c b/net/colo-compare.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/net/colo-compare.c
49
+++ b/net/colo-compare.c
50
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
51
return;
52
}
53
54
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize);
55
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize);
56
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
57
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
58
59
g_queue_init(&s->conn_list);
60
61
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/net/filter-mirror.c
64
+++ b/net/filter-mirror.c
65
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
66
}
67
}
68
69
- net_socket_rs_init(&s->rs, redirector_rs_finalize);
70
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
71
72
if (s->indev) {
73
chr = qemu_chr_find(s->indev);
74
diff --git a/net/net.c b/net/net.c
75
index XXXXXXX..XXXXXXX 100644
76
--- a/net/net.c
77
+++ b/net/net.c
78
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_net_opts = {
79
};
80
81
void net_socket_rs_init(SocketReadState *rs,
82
- SocketReadStateFinalize *finalize)
83
+ SocketReadStateFinalize *finalize,
84
+ bool vnet_hdr)
85
{
86
rs->state = 0;
87
+ rs->vnet_hdr = vnet_hdr;
88
rs->index = 0;
89
rs->packet_len = 0;
90
+ rs->vnet_hdr_len = 0;
91
memset(rs->buf, 0, sizeof(rs->buf));
92
rs->finalize = finalize;
93
}
94
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
95
unsigned int l;
96
97
while (size > 0) {
98
- /* reassemble a packet from the network */
99
- switch (rs->state) { /* 0 = getting length, 1 = getting data */
100
+ /* Reassemble a packet from the network.
101
+ * 0 = getting length.
102
+ * 1 = getting vnet header length.
103
+ * 2 = getting data.
104
+ */
105
+ switch (rs->state) {
106
case 0:
107
l = 4 - rs->index;
108
if (l > size) {
109
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
110
/* got length */
111
rs->packet_len = ntohl(*(uint32_t *)rs->buf);
112
rs->index = 0;
113
- rs->state = 1;
114
+ if (rs->vnet_hdr) {
115
+ rs->state = 1;
116
+ } else {
117
+ rs->state = 2;
118
+ rs->vnet_hdr_len = 0;
119
+ }
120
}
121
break;
122
case 1:
123
+ l = 4 - rs->index;
124
+ if (l > size) {
125
+ l = size;
126
+ }
127
+ memcpy(rs->buf + rs->index, buf, l);
128
+ buf += l;
129
+ size -= l;
130
+ rs->index += l;
131
+ if (rs->index == 4) {
132
+ /* got vnet header length */
133
+ rs->vnet_hdr_len = ntohl(*(uint32_t *)rs->buf);
134
+ rs->index = 0;
135
+ rs->state = 2;
136
+ }
137
+ break;
138
+ case 2:
139
l = rs->packet_len - rs->index;
140
if (l > size) {
141
l = size;
142
diff --git a/net/socket.c b/net/socket.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/net/socket.c
145
+++ b/net/socket.c
146
@@ -XXX,XX +XXX,XX @@ static void net_socket_send(void *opaque)
147
closesocket(s->fd);
148
149
s->fd = -1;
150
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
151
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
152
s->nc.link_down = true;
153
memset(s->nc.info_str, 0, sizeof(s->nc.info_str));
154
155
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer,
156
s->fd = fd;
157
s->listen_fd = -1;
158
s->send_fn = net_socket_send_dgram;
159
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
160
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
161
net_socket_read_poll(s, true);
162
163
/* mcast: save bound address as dst */
164
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer,
165
166
s->fd = fd;
167
s->listen_fd = -1;
168
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
169
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
170
171
/* Disable Nagle algorithm on TCP sockets to reduce latency */
172
socket_set_nodelay(fd);
173
@@ -XXX,XX +XXX,XX @@ static int net_socket_listen_init(NetClientState *peer,
174
s->fd = -1;
175
s->listen_fd = fd;
176
s->nc.link_down = true;
177
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
178
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
179
180
qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s);
181
return 0;
182
--
183
2.7.4
184
185
diff view generated by jsdifflib
1
Spec said offloads should be le64, so use virtio_ldq_p() to guarantee
1
From: Bin Meng <bmeng.cn@gmail.com>
2
valid endian.
3
2
4
Fixes: 644c98587d4c ("virtio-net: dynamic network offloads configuration")
3
The minimum Ethernet frame length is 60 bytes. For short frames with
5
Cc: qemu-stable@nongnu.org
4
smaller length like ARP packets (only 42 bytes), on a real world NIC
6
Cc: Dmitry Fleytman <dfleytma@redhat.com>
5
it can choose either padding its length to the minimum required 60
6
bytes, or sending it out directly to the wire. Such behavior can be
7
hardcoded or controled by a register bit. Similarly on the receive
8
path, NICs can choose either dropping such short frames directly or
9
handing them over to software to handle.
10
11
On the other hand, for the network backends like SLiRP/TAP, they
12
don't expose a way to control the short frame behavior. As of today
13
they just send/receive data from/to the other end connected to them,
14
which means any sized packet is acceptable. So they can send and
15
receive short frames without any problem. It is observed that ARP
16
packets sent from SLiRP/TAP are 42 bytes, and SLiRP/TAP just send
17
these ARP packets to the other end which might be a NIC model that
18
does not allow short frames to pass through.
19
20
To provide better compatibility, for packets sent from QEMU network
21
backends like SLiRP/TAP, we change to pad short frames before sending
22
it out to the other end, if the other end does not forbid it via the
23
nc->do_not_pad flag. This ensures a backend as an Ethernet sender
24
does not violate the spec. But with this change, the behavior of
25
dropping short frames from SLiRP/TAP interfaces in the NIC model
26
cannot be emulated because it always receives a packet that is spec
27
complaint. The capability of sending short frames from NIC models is
28
still supported and short frames can still pass through SLiRP/TAP.
29
30
This commit should be able to fix the issue as reported with some
31
NIC models before, that ARP requests get dropped, preventing the
32
guest from becoming visible on the network. It was workarounded in
33
these NIC models on the receive path, that when a short frame is
34
received, it is padded up to 60 bytes.
35
36
The following 2 commits seem to be the one to workaround this issue
37
in e1000 and vmxenet3 before, and should probably be reverted.
38
39
commit 78aeb23eded2 ("e1000: Pad short frames to minimum size (60 bytes)")
40
commit 40a87c6c9b11 ("vmxnet3: Pad short frames to minimum size (60 bytes)")
41
42
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
43
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
44
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
45
---
9
hw/net/virtio-net.c | 2 ++
46
net/slirp.c | 10 ++++++++++
10
1 file changed, 2 insertions(+)
47
net/tap-win32.c | 10 ++++++++++
48
net/tap.c | 10 ++++++++++
49
3 files changed, 30 insertions(+)
11
50
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
51
diff --git a/net/slirp.c b/net/slirp.c
13
index XXXXXXX..XXXXXXX 100644
52
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
53
--- a/net/slirp.c
15
+++ b/hw/net/virtio-net.c
54
+++ b/net/slirp.c
16
@@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
55
@@ -XXX,XX +XXX,XX @@
17
if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
56
#include <pwd.h>
18
uint64_t supported_offloads;
57
#include <sys/wait.h>
19
58
#endif
20
+ offloads = virtio_ldq_p(vdev, &offloads);
59
+#include "net/eth.h"
60
#include "net/net.h"
61
#include "clients.h"
62
#include "hub.h"
63
@@ -XXX,XX +XXX,XX @@ static ssize_t net_slirp_send_packet(const void *pkt, size_t pkt_len,
64
void *opaque)
65
{
66
SlirpState *s = opaque;
67
+ uint8_t min_pkt[ETH_ZLEN];
68
+ size_t min_pktsz = sizeof(min_pkt);
21
+
69
+
22
if (!n->has_vnet_hdr) {
70
+ if (!s->nc.peer->do_not_pad) {
23
return VIRTIO_NET_ERR;
71
+ if (eth_pad_short_frame(min_pkt, &min_pktsz, pkt, pkt_len)) {
72
+ pkt = min_pkt;
73
+ pkt_len = min_pktsz;
74
+ }
75
+ }
76
77
return qemu_send_packet(&s->nc, pkt, pkt_len);
78
}
79
diff --git a/net/tap-win32.c b/net/tap-win32.c
80
index XXXXXXX..XXXXXXX 100644
81
--- a/net/tap-win32.c
82
+++ b/net/tap-win32.c
83
@@ -XXX,XX +XXX,XX @@
84
85
#include "qemu-common.h"
86
#include "clients.h" /* net_init_tap */
87
+#include "net/eth.h"
88
#include "net/net.h"
89
#include "net/tap.h" /* tap_has_ufo, ... */
90
#include "qemu/error-report.h"
91
@@ -XXX,XX +XXX,XX @@ static void tap_win32_send(void *opaque)
92
uint8_t *buf;
93
int max_size = 4096;
94
int size;
95
+ uint8_t min_pkt[ETH_ZLEN];
96
+ size_t min_pktsz = sizeof(min_pkt);
97
98
size = tap_win32_read(s->handle, &buf, max_size);
99
if (size > 0) {
100
+ if (!s->nc.peer->do_not_pad) {
101
+ if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) {
102
+ buf = min_pkt;
103
+ size = min_pktsz;
104
+ }
105
+ }
106
+
107
qemu_send_packet(&s->nc, buf, size);
108
tap_win32_free_buffer(s->handle, buf);
109
}
110
diff --git a/net/tap.c b/net/tap.c
111
index XXXXXXX..XXXXXXX 100644
112
--- a/net/tap.c
113
+++ b/net/tap.c
114
@@ -XXX,XX +XXX,XX @@
115
#include <sys/socket.h>
116
#include <net/if.h>
117
118
+#include "net/eth.h"
119
#include "net/net.h"
120
#include "clients.h"
121
#include "monitor/monitor.h"
122
@@ -XXX,XX +XXX,XX @@ static void tap_send(void *opaque)
123
124
while (true) {
125
uint8_t *buf = s->buf;
126
+ uint8_t min_pkt[ETH_ZLEN];
127
+ size_t min_pktsz = sizeof(min_pkt);
128
129
size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
130
if (size <= 0) {
131
@@ -XXX,XX +XXX,XX @@ static void tap_send(void *opaque)
132
size -= s->host_vnet_hdr_len;
24
}
133
}
134
135
+ if (!s->nc.peer->do_not_pad) {
136
+ if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) {
137
+ buf = min_pkt;
138
+ size = min_pktsz;
139
+ }
140
+ }
141
+
142
size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
143
if (size == 0) {
144
tap_read_poll(s, false);
25
--
145
--
26
2.7.4
146
2.7.4
27
147
28
148
diff view generated by jsdifflib
1
From: Michal Privoznik <mprivozn@redhat.com>
1
From: Bin Meng <bmeng.cn@gmail.com>
2
2
3
We have a function that checks if given number is power of two.
3
For virtio-net, there is no need to pad the Ethernet frame size to
4
We should prefer it instead of expanding the check on our own.
4
60 bytes before sending to it.
5
5
6
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
6
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
8
---
9
hw/net/virtio-net.c | 2 +-
9
hw/net/virtio-net.c | 4 ++++
10
1 file changed, 1 insertion(+), 1 deletion(-)
10
1 file changed, 4 insertions(+)
11
11
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
14
--- a/hw/net/virtio-net.c
15
+++ b/hw/net/virtio-net.c
15
+++ b/hw/net/virtio-net.c
16
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
16
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
17
*/
17
object_get_typename(OBJECT(dev)), dev->id, n);
18
if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
18
}
19
n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
19
20
- (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
20
+ for (i = 0; i < n->max_queues; i++) {
21
+ !is_power_of_2(n->net_conf.rx_queue_size)) {
21
+ n->nic->ncs[i].do_not_pad = true;
22
error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
22
+ }
23
"must be a power of 2 between %d and %d.",
23
+
24
n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
24
peer_test_vnet_hdr(n);
25
if (peer_has_vnet_hdr(n)) {
26
for (i = 0; i < n->max_queues; i++) {
25
--
27
--
26
2.7.4
28
2.7.4
27
29
28
30
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Lukas Straub <lukasstraub2@web.de>
2
2
3
We add the vnet_hdr_support option for colo-compare, default is disabled.
3
Additional to removing the packet from the secondary queue,
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
we also need to free it.
5
You can use it for example:
6
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
7
5
8
COLO-compare can get vnet header length from filter,
6
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
9
Add vnet_hdr_len to struct packet and output packet with
7
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
10
the vnet_hdr_len.
8
Reviewed-by: Zhang Chen <chen.zhang@intel.com>
11
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
10
---
15
net/colo-compare.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-------
11
net/colo-compare.c | 1 +
16
qemu-options.hx | 4 ++--
12
1 file changed, 1 insertion(+)
17
2 files changed, 55 insertions(+), 9 deletions(-)
18
13
19
diff --git a/net/colo-compare.c b/net/colo-compare.c
14
diff --git a/net/colo-compare.c b/net/colo-compare.c
20
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
21
--- a/net/colo-compare.c
16
--- a/net/colo-compare.c
22
+++ b/net/colo-compare.c
17
+++ b/net/colo-compare.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
18
@@ -XXX,XX +XXX,XX @@ static void colo_compare_packet(CompareState *s, Connection *conn,
24
CharBackend chr_out;
25
SocketReadState pri_rs;
26
SocketReadState sec_rs;
27
+ bool vnet_hdr;
28
29
/* connection list: the connections belonged to this NIC could be found
30
* in this list.
31
@@ -XXX,XX +XXX,XX @@ enum {
32
33
static int compare_chr_send(CompareState *s,
34
const uint8_t *buf,
35
- uint32_t size);
36
+ uint32_t size,
37
+ uint32_t vnet_hdr_len);
38
39
static gint seq_sorter(Packet *a, Packet *b, gpointer data)
40
{
41
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
42
}
43
19
44
if (result) {
20
if (result) {
45
- ret = compare_chr_send(s, pkt->data, pkt->size);
21
colo_release_primary_pkt(s, pkt);
46
+ ret = compare_chr_send(s,
22
+ packet_destroy(result->data, NULL);
47
+ pkt->data,
23
g_queue_remove(&conn->secondary_list, result->data);
48
+ pkt->size,
24
} else {
49
+ pkt->vnet_hdr_len);
25
/*
50
if (ret < 0) {
51
error_report("colo_send_primary_packet failed");
52
}
53
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
54
55
static int compare_chr_send(CompareState *s,
56
const uint8_t *buf,
57
- uint32_t size)
58
+ uint32_t size,
59
+ uint32_t vnet_hdr_len)
60
{
61
int ret = 0;
62
uint32_t len = htonl(size);
63
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CompareState *s,
64
goto err;
65
}
66
67
+ if (s->vnet_hdr) {
68
+ /*
69
+ * We send vnet header len make other module(like filter-redirector)
70
+ * know how to parse net packet correctly.
71
+ */
72
+ len = htonl(vnet_hdr_len);
73
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
74
+ if (ret != sizeof(len)) {
75
+ goto err;
76
+ }
77
+ }
78
+
79
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
80
if (ret != size) {
81
goto err;
82
@@ -XXX,XX +XXX,XX @@ static void compare_set_outdev(Object *obj, const char *value, Error **errp)
83
s->outdev = g_strdup(value);
84
}
85
86
+static bool compare_get_vnet_hdr(Object *obj, Error **errp)
87
+{
88
+ CompareState *s = COLO_COMPARE(obj);
89
+
90
+ return s->vnet_hdr;
91
+}
92
+
93
+static void compare_set_vnet_hdr(Object *obj,
94
+ bool value,
95
+ Error **errp)
96
+{
97
+ CompareState *s = COLO_COMPARE(obj);
98
+
99
+ s->vnet_hdr = value;
100
+}
101
+
102
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
103
{
104
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
105
106
if (packet_enqueue(s, PRIMARY_IN)) {
107
trace_colo_compare_main("primary: unsupported packet in");
108
- compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
109
+ compare_chr_send(s,
110
+ pri_rs->buf,
111
+ pri_rs->packet_len,
112
+ pri_rs->vnet_hdr_len);
113
} else {
114
/* compare connection */
115
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
116
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
117
return;
118
}
119
120
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
121
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
122
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
123
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
124
125
g_queue_init(&s->conn_list);
126
127
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
128
129
while (!g_queue_is_empty(&conn->primary_list)) {
130
pkt = g_queue_pop_head(&conn->primary_list);
131
- compare_chr_send(s, pkt->data, pkt->size);
132
+ compare_chr_send(s,
133
+ pkt->data,
134
+ pkt->size,
135
+ pkt->vnet_hdr_len);
136
packet_destroy(pkt, NULL);
137
}
138
while (!g_queue_is_empty(&conn->secondary_list)) {
139
@@ -XXX,XX +XXX,XX @@ static void colo_compare_class_init(ObjectClass *oc, void *data)
140
141
static void colo_compare_init(Object *obj)
142
{
143
+ CompareState *s = COLO_COMPARE(obj);
144
+
145
object_property_add_str(obj, "primary_in",
146
compare_get_pri_indev, compare_set_pri_indev,
147
NULL);
148
@@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj)
149
object_property_add_str(obj, "outdev",
150
compare_get_outdev, compare_set_outdev,
151
NULL);
152
+
153
+ s->vnet_hdr = false;
154
+ object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
155
+ compare_set_vnet_hdr, NULL);
156
}
157
158
static void colo_compare_finalize(Object *obj)
159
diff --git a/qemu-options.hx b/qemu-options.hx
160
index XXXXXXX..XXXXXXX 100644
161
--- a/qemu-options.hx
162
+++ b/qemu-options.hx
163
@@ -XXX,XX +XXX,XX @@ Dump the network traffic on netdev @var{dev} to the file specified by
164
The file format is libpcap, so it can be analyzed with tools such as tcpdump
165
or Wireshark.
166
167
-@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},
168
-outdev=@var{chardevid}
169
+@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid}[,vnet_hdr_support]
170
171
Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with
172
secondary packet. If the packets are same, we will output primary
173
packet to outdev@var{chardevid}, else we will notify colo-frame
174
do checkpoint and send primary packet to outdev@var{chardevid}.
175
+if it has the vnet_hdr_support flag, colo compare will send/recv packet with vnet_hdr_len.
176
177
we must use it with the help of filter-mirror and filter-redirector.
178
179
--
26
--
180
2.7.4
27
2.7.4
181
28
182
29
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Lukas Straub <lukasstraub2@web.de>
2
2
3
COLO-Proxy just focus on packet payload, so we skip vnet header.
3
g_queue_remove needs to look up the list entry first, but we
4
already have it as result and can remove it directly with
5
g_queue_delete_link.
4
6
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
8
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
9
Reviewed-by: Zhang Chen <chen.zhang@intel.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
11
---
8
net/colo-compare.c | 8 ++++++--
12
net/colo-compare.c | 2 +-
9
1 file changed, 6 insertions(+), 2 deletions(-)
13
1 file changed, 1 insertion(+), 1 deletion(-)
10
14
11
diff --git a/net/colo-compare.c b/net/colo-compare.c
15
diff --git a/net/colo-compare.c b/net/colo-compare.c
12
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo-compare.c
17
--- a/net/colo-compare.c
14
+++ b/net/colo-compare.c
18
+++ b/net/colo-compare.c
15
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt, Packet *spkt, int offset)
19
@@ -XXX,XX +XXX,XX @@ static void colo_compare_packet(CompareState *s, Connection *conn,
16
sec_ip_src, sec_ip_dst);
20
if (result) {
17
}
21
colo_release_primary_pkt(s, pkt);
18
22
packet_destroy(result->data, NULL);
19
+ offset = ppkt->vnet_hdr_len + offset;
23
- g_queue_remove(&conn->secondary_list, result->data);
20
+
24
+ g_queue_delete_link(&conn->secondary_list, result);
21
if (ppkt->size == spkt->size) {
25
} else {
22
- return memcmp(ppkt->data + offset, spkt->data + offset,
26
/*
23
+ return memcmp(ppkt->data + offset,
27
* If one packet arrive late, the secondary_list or
24
+ spkt->data + offset,
25
spkt->size - offset);
26
} else {
27
trace_colo_compare_main("Net packet size are not the same");
28
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
29
*/
30
if (ptcp->th_off > 5) {
31
ptrdiff_t tcp_offset;
32
+
33
tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
34
- + (ptcp->th_off * 4);
35
+ + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
36
res = colo_packet_compare_common(ppkt, spkt, tcp_offset);
37
} else if (ptcp->th_sum == stcp->th_sum) {
38
res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN);
39
--
28
--
40
2.7.4
29
2.7.4
41
30
42
31
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
3
The in6_address comes after the ip6_ext_hdr_routing header,
4
not after the ip6_ext_hdr one. Fix the offset.
5
6
Cc: qemu-stable@nongnu.org
7
Reported-by: Stefano Garzarella <sgarzare@redhat.com>
8
Fixes: eb700029c78 ("net_pkt: Extend packet abstraction as required by e1000e functionality")
9
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
10
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
13
---
6
docs/colo-proxy.txt | 26 ++++++++++++++++++++++++++
14
net/eth.c | 2 +-
7
1 file changed, 26 insertions(+)
15
1 file changed, 1 insertion(+), 1 deletion(-)
8
16
9
diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt
17
diff --git a/net/eth.c b/net/eth.c
10
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
11
--- a/docs/colo-proxy.txt
19
--- a/net/eth.c
12
+++ b/docs/colo-proxy.txt
20
+++ b/net/eth.c
13
@@ -XXX,XX +XXX,XX @@ Secondary(ip:3.3.3.8):
21
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
14
-chardev socket,id=red1,host=3.3.3.3,port=9004
22
}
15
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
23
16
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
24
bytes_read = iov_to_buf(pkt, pkt_frags,
17
+-object filter-rewriter,id=f3,netdev=hn0,queue=all
25
- rthdr_offset + sizeof(*ext_hdr),
18
+
26
+ rthdr_offset + sizeof(*rthdr),
19
+If you want to use virtio-net-pci or other driver with vnet_header:
27
dst_addr, sizeof(*dst_addr));
20
+
28
21
+Primary(ip:3.3.3.3):
29
return bytes_read == sizeof(*dst_addr);
22
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
23
+-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
24
+-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
25
+-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
26
+-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
27
+-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
28
+-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
29
+-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
30
+-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
31
+-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out,vnet_hdr_support
32
+-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0,vnet_hdr_support
33
+-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
34
+
35
+Secondary(ip:3.3.3.8):
36
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
37
+-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
38
+-chardev socket,id=red0,host=3.3.3.3,port=9003
39
+-chardev socket,id=red1,host=3.3.3.3,port=9004
40
+-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0,vnet_hdr_support
41
+-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1,vnet_hdr_support
42
+-object filter-rewriter,id=f3,netdev=hn0,queue=all,vnet_hdr_support
43
44
Note:
45
a.COLO-proxy must work with COLO-frame and Block-replication.
46
--
30
--
47
2.7.4
31
2.7.4
48
32
49
33
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
This patch change the compare_chr_send() parameter from CharBackend to CompareState,
3
The length field is already contained in the ip6_ext_hdr structure.
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
4
Check it direcly in eth_parse_ipv6_hdr() before calling
5
_eth_get_rss_ex_dst_addr(), which gets a bit simplified.
5
6
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
8
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
11
---
9
net/colo-compare.c | 14 +++++++-------
12
net/eth.c | 14 +++++++-------
10
1 file changed, 7 insertions(+), 7 deletions(-)
13
1 file changed, 7 insertions(+), 7 deletions(-)
11
14
12
diff --git a/net/colo-compare.c b/net/colo-compare.c
15
diff --git a/net/eth.c b/net/eth.c
13
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
14
--- a/net/colo-compare.c
17
--- a/net/eth.c
15
+++ b/net/colo-compare.c
18
+++ b/net/eth.c
16
@@ -XXX,XX +XXX,XX @@ enum {
19
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
17
SECONDARY_IN,
20
{
18
};
21
struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
19
22
20
-static int compare_chr_send(CharBackend *out,
23
- if ((rthdr->rtype == 2) &&
21
+static int compare_chr_send(CompareState *s,
24
- (rthdr->len == sizeof(struct in6_address) / 8) &&
22
const uint8_t *buf,
25
- (rthdr->segleft == 1)) {
23
uint32_t size);
26
+ if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
24
27
25
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
28
size_t input_size = iov_size(pkt, pkt_frags);
29
size_t bytes_read;
30
@@ -XXX,XX +XXX,XX @@ bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
26
}
31
}
27
32
28
if (result) {
33
if (curr_ext_hdr_type == IP6_ROUTING) {
29
- ret = compare_chr_send(&s->chr_out, pkt->data, pkt->size);
34
- info->rss_ex_dst_valid =
30
+ ret = compare_chr_send(s, pkt->data, pkt->size);
35
- _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
31
if (ret < 0) {
36
- ip6hdr_off + info->full_hdr_len,
32
error_report("colo_send_primary_packet failed");
37
- &ext_hdr, &info->rss_ex_dst);
33
}
38
+ if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
34
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
39
+ info->rss_ex_dst_valid =
35
}
40
+ _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
36
}
41
+ ip6hdr_off + info->full_hdr_len,
37
42
+ &ext_hdr, &info->rss_ex_dst);
38
-static int compare_chr_send(CharBackend *out,
43
+ }
39
+static int compare_chr_send(CompareState *s,
44
} else if (curr_ext_hdr_type == IP6_DESTINATON) {
40
const uint8_t *buf,
45
info->rss_ex_src_valid =
41
uint32_t size)
46
_eth_get_rss_ex_src_addr(pkt, pkt_frags,
42
{
43
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CharBackend *out,
44
return 0;
45
}
46
47
- ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len));
48
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
49
if (ret != sizeof(len)) {
50
goto err;
51
}
52
53
- ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size);
54
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
55
if (ret != size) {
56
goto err;
57
}
58
@@ -XXX,XX +XXX,XX @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs)
59
60
if (packet_enqueue(s, PRIMARY_IN)) {
61
trace_colo_compare_main("primary: unsupported packet in");
62
- compare_chr_send(&s->chr_out, pri_rs->buf, pri_rs->packet_len);
63
+ compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
64
} else {
65
/* compare connection */
66
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
67
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
68
69
while (!g_queue_is_empty(&conn->primary_list)) {
70
pkt = g_queue_pop_head(&conn->primary_list);
71
- compare_chr_send(&s->chr_out, pkt->data, pkt->size);
72
+ compare_chr_send(s, pkt->data, pkt->size);
73
packet_destroy(pkt, NULL);
74
}
75
while (!g_queue_is_empty(&conn->secondary_list)) {
76
--
47
--
77
2.7.4
48
2.7.4
78
49
79
50
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
Make colo-compare and filter-rewriter can parse vnet packet.
3
The 'offset' argument represents the offset to the ip6_ext_hdr
4
header, rename it as 'ext_hdr_offset'.
4
5
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
7
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
10
---
8
net/colo.c | 6 +++---
11
net/eth.c | 6 +++---
9
1 file changed, 3 insertions(+), 3 deletions(-)
12
1 file changed, 3 insertions(+), 3 deletions(-)
10
13
11
diff --git a/net/colo.c b/net/colo.c
14
diff --git a/net/eth.c b/net/eth.c
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo.c
16
--- a/net/eth.c
14
+++ b/net/colo.c
17
+++ b/net/eth.c
15
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
18
@@ -XXX,XX +XXX,XX @@ eth_is_ip6_extension_header_type(uint8_t hdr_type)
19
20
static bool
21
_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
22
- size_t rthdr_offset,
23
+ size_t ext_hdr_offset,
24
struct ip6_ext_hdr *ext_hdr,
25
struct in6_address *dst_addr)
16
{
26
{
17
int network_length;
27
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
18
static const uint8_t vlan[] = {0x81, 0x00};
28
size_t input_size = iov_size(pkt, pkt_frags);
19
- uint8_t *data = pkt->data;
29
size_t bytes_read;
20
+ uint8_t *data = pkt->data + pkt->vnet_hdr_len;
30
21
uint16_t l3_proto;
31
- if (input_size < rthdr_offset + sizeof(*ext_hdr)) {
22
ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
32
+ if (input_size < ext_hdr_offset + sizeof(*ext_hdr)) {
23
33
return false;
24
- if (pkt->size < ETH_HLEN) {
34
}
25
+ if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
35
26
trace_colo_proxy_main("pkt->size < ETH_HLEN");
36
bytes_read = iov_to_buf(pkt, pkt_frags,
27
return 1;
37
- rthdr_offset + sizeof(*rthdr),
28
}
38
+ ext_hdr_offset + sizeof(*rthdr),
29
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
39
dst_addr, sizeof(*dst_addr));
30
}
40
31
41
return bytes_read == sizeof(*dst_addr);
32
network_length = pkt->ip->ip_hl * 4;
33
- if (pkt->size < l2hdr_len + network_length) {
34
+ if (pkt->size < l2hdr_len + network_length + pkt->vnet_hdr_len) {
35
trace_colo_proxy_main("pkt->size < network_header + network_length");
36
return 1;
37
}
38
--
42
--
39
2.7.4
43
2.7.4
40
44
41
45
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
We can use this property flush and send packet with vnet_hdr_len.
3
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
4
4
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
7
---
8
net/colo-compare.c | 8 ++++++--
8
net/eth.c | 14 ++++++--------
9
net/colo.c | 3 ++-
9
1 file changed, 6 insertions(+), 8 deletions(-)
10
net/colo.h | 4 +++-
11
net/filter-rewriter.c | 2 +-
12
4 files changed, 12 insertions(+), 5 deletions(-)
13
10
14
diff --git a/net/colo-compare.c b/net/colo-compare.c
11
diff --git a/net/eth.c b/net/eth.c
15
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
16
--- a/net/colo-compare.c
13
--- a/net/eth.c
17
+++ b/net/colo-compare.c
14
+++ b/net/eth.c
18
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode)
15
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
19
Connection *conn;
16
struct in6_address *dst_addr)
20
21
if (mode == PRIMARY_IN) {
22
- pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len);
23
+ pkt = packet_new(s->pri_rs.buf,
24
+ s->pri_rs.packet_len,
25
+ s->pri_rs.vnet_hdr_len);
26
} else {
27
- pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len);
28
+ pkt = packet_new(s->sec_rs.buf,
29
+ s->sec_rs.packet_len,
30
+ s->sec_rs.vnet_hdr_len);
31
}
32
33
if (parse_packet_early(pkt)) {
34
diff --git a/net/colo.c b/net/colo.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/net/colo.c
37
+++ b/net/colo.c
38
@@ -XXX,XX +XXX,XX @@ void connection_destroy(void *opaque)
39
g_slice_free(Connection, conn);
40
}
41
42
-Packet *packet_new(const void *data, int size)
43
+Packet *packet_new(const void *data, int size, int vnet_hdr_len)
44
{
17
{
45
Packet *pkt = g_slice_new(Packet);
18
struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
46
19
+ size_t input_size = iov_size(pkt, pkt_frags);
47
pkt->data = g_memdup(data, size);
20
+ size_t bytes_read;
48
pkt->size = size;
21
49
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
22
- if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
50
+ pkt->vnet_hdr_len = vnet_hdr_len;
23
-
51
24
- size_t input_size = iov_size(pkt, pkt_frags);
52
return pkt;
25
- size_t bytes_read;
53
}
26
-
54
diff --git a/net/colo.h b/net/colo.h
27
- if (input_size < ext_hdr_offset + sizeof(*ext_hdr)) {
55
index XXXXXXX..XXXXXXX 100644
28
- return false;
56
--- a/net/colo.h
29
- }
57
+++ b/net/colo.h
30
+ if (input_size < ext_hdr_offset + sizeof(*ext_hdr)) {
58
@@ -XXX,XX +XXX,XX @@ typedef struct Packet {
31
+ return false;
59
int size;
32
+ }
60
/* Time of packet creation, in wall clock ms */
33
61
int64_t creation_ms;
34
+ if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
62
+ /* Get vnet_hdr_len from filter */
35
bytes_read = iov_to_buf(pkt, pkt_frags,
63
+ uint32_t vnet_hdr_len;
36
ext_hdr_offset + sizeof(*rthdr),
64
} Packet;
37
dst_addr, sizeof(*dst_addr));
65
66
typedef struct ConnectionKey {
67
@@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table,
68
ConnectionKey *key,
69
GQueue *conn_list);
70
void connection_hashtable_reset(GHashTable *connection_track_table);
71
-Packet *packet_new(const void *data, int size);
72
+Packet *packet_new(const void *data, int size, int vnet_hdr_len);
73
void packet_destroy(void *opaque, void *user_data);
74
75
#endif /* QEMU_COLO_PROXY_H */
76
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/net/filter-rewriter.c
79
+++ b/net/filter-rewriter.c
80
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
81
char *buf = g_malloc0(size);
82
83
iov_to_buf(iov, iovcnt, 0, buf, size);
84
- pkt = packet_new(buf, size);
85
+ pkt = packet_new(buf, size, 0);
86
g_free(buf);
87
88
/*
89
--
38
--
90
2.7.4
39
2.7.4
91
40
92
41
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
This patch change the filter_send() parameter from CharBackend to MirrorState,
3
We want to check fields from ip6_ext_hdr_routing structure
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
4
and if correct read the full in6_address. Let's directly check
5
if our iovec contains enough data for everything, else return
6
early.
5
7
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
8
Suggested-by: Stefano Garzarella <sgarzare@redhat.com>
9
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
12
---
9
net/filter-mirror.c | 10 +++++-----
13
net/eth.c | 2 +-
10
1 file changed, 5 insertions(+), 5 deletions(-)
14
1 file changed, 1 insertion(+), 1 deletion(-)
11
15
12
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
16
diff --git a/net/eth.c b/net/eth.c
13
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
14
--- a/net/filter-mirror.c
18
--- a/net/eth.c
15
+++ b/net/filter-mirror.c
19
+++ b/net/eth.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
20
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
17
SocketReadState rs;
21
size_t input_size = iov_size(pkt, pkt_frags);
18
} MirrorState;
22
size_t bytes_read;
19
23
20
-static int filter_send(CharBackend *chr_out,
24
- if (input_size < ext_hdr_offset + sizeof(*ext_hdr)) {
21
+static int filter_send(MirrorState *s,
25
+ if (input_size < ext_hdr_offset + sizeof(*rthdr) + sizeof(*dst_addr)) {
22
const struct iovec *iov,
26
return false;
23
int iovcnt)
24
{
25
@@ -XXX,XX +XXX,XX @@ static int filter_send(CharBackend *chr_out,
26
}
27
}
27
28
28
len = htonl(size);
29
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)&len, sizeof(len));
30
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
31
if (ret != sizeof(len)) {
32
goto err;
33
}
34
35
buf = g_malloc(size);
36
iov_to_buf(iov, iovcnt, 0, buf, size);
37
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)buf, size);
38
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
39
g_free(buf);
40
if (ret != size) {
41
goto err;
42
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
43
MirrorState *s = FILTER_MIRROR(nf);
44
int ret;
45
46
- ret = filter_send(&s->chr_out, iov, iovcnt);
47
+ ret = filter_send(s, iov, iovcnt);
48
if (ret) {
49
error_report("filter mirror send failed(%s)", strerror(-ret));
50
}
51
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_redirector_receive_iov(NetFilterState *nf,
52
int ret;
53
54
if (qemu_chr_fe_backend_connected(&s->chr_out)) {
55
- ret = filter_send(&s->chr_out, iov, iovcnt);
56
+ ret = filter_send(s, iov, iovcnt);
57
if (ret) {
58
error_report("filter redirector send failed(%s)", strerror(-ret));
59
}
60
--
29
--
61
2.7.4
30
2.7.4
62
31
63
32
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
We add the vnet_hdr_support option for filter-mirror, default is disabled.
3
We can't know the caller read enough data in the memory pointed
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
by ext_hdr to cast it as a ip6_ext_hdr_routing.
5
You can use it for example:
5
Declare rt_hdr on the stack and fill it again from the iovec.
6
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
6
7
7
Since we already checked there is enough data in the iovec buffer,
8
If it has vnet_hdr_support flag, we will change the sending packet format from
8
simply add an assert() call to consume the bytes_read variable.
9
struct {int size; const uint8_t buf[];} to {int size; int vnet_hdr_len; const uint8_t buf[];}.
9
10
make other module(like colo-compare) know how to parse net packet correctly.
10
This fix a 2 bytes buffer overrun in eth_parse_ipv6_hdr() reported
11
11
by QEMU fuzzer:
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
12
13
$ cat << EOF | ./qemu-system-i386 -M pc-q35-5.0 \
14
-accel qtest -monitor none \
15
-serial none -nographic -qtest stdio
16
outl 0xcf8 0x80001010
17
outl 0xcfc 0xe1020000
18
outl 0xcf8 0x80001004
19
outw 0xcfc 0x7
20
write 0x25 0x1 0x86
21
write 0x26 0x1 0xdd
22
write 0x4f 0x1 0x2b
23
write 0xe1020030 0x4 0x190002e1
24
write 0xe102003a 0x2 0x0807
25
write 0xe1020048 0x4 0x12077cdd
26
write 0xe1020400 0x4 0xba077cdd
27
write 0xe1020420 0x4 0x190002e1
28
write 0xe1020428 0x4 0x3509d807
29
write 0xe1020438 0x1 0xe2
30
EOF
31
=================================================================
32
==2859770==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7ffdef904902 at pc 0x561ceefa78de bp 0x7ffdef904820 sp 0x7ffdef904818
33
READ of size 1 at 0x7ffdef904902 thread T0
34
#0 0x561ceefa78dd in _eth_get_rss_ex_dst_addr net/eth.c:410:17
35
#1 0x561ceefa41fb in eth_parse_ipv6_hdr net/eth.c:532:17
36
#2 0x561cef7de639 in net_tx_pkt_parse_headers hw/net/net_tx_pkt.c:228:14
37
#3 0x561cef7dbef4 in net_tx_pkt_parse hw/net/net_tx_pkt.c:273:9
38
#4 0x561ceec29f22 in e1000e_process_tx_desc hw/net/e1000e_core.c:730:29
39
#5 0x561ceec28eac in e1000e_start_xmit hw/net/e1000e_core.c:927:9
40
#6 0x561ceec1baab in e1000e_set_tdt hw/net/e1000e_core.c:2444:9
41
#7 0x561ceebf300e in e1000e_core_write hw/net/e1000e_core.c:3256:9
42
#8 0x561cef3cd4cd in e1000e_mmio_write hw/net/e1000e.c:110:5
43
44
Address 0x7ffdef904902 is located in stack of thread T0 at offset 34 in frame
45
#0 0x561ceefa320f in eth_parse_ipv6_hdr net/eth.c:486
46
47
This frame has 1 object(s):
48
[32, 34) 'ext_hdr' (line 487) <== Memory access at offset 34 overflows this variable
49
HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
50
(longjmp and C++ exceptions *are* supported)
51
SUMMARY: AddressSanitizer: stack-buffer-overflow net/eth.c:410:17 in _eth_get_rss_ex_dst_addr
52
Shadow bytes around the buggy address:
53
0x10003df188d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
54
0x10003df188e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
55
0x10003df188f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
56
0x10003df18900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
57
0x10003df18910: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1
58
=>0x10003df18920:[02]f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
59
0x10003df18930: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
60
0x10003df18940: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
61
0x10003df18950: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
62
0x10003df18960: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
63
0x10003df18970: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
64
Shadow byte legend (one shadow byte represents 8 application bytes):
65
Addressable: 00
66
Partially addressable: 01 02 03 04 05 06 07
67
Stack left redzone: f1
68
Stack right redzone: f3
69
==2859770==ABORTING
70
71
Add the corresponding qtest case with the fuzzer reproducer.
72
73
FWIW GCC 11 similarly reported:
74
75
net/eth.c: In function 'eth_parse_ipv6_hdr':
76
net/eth.c:410:15: error: array subscript 'struct ip6_ext_hdr_routing[0]' is partly outside array bounds of 'struct ip6_ext_hdr[1]' [-Werror=array-bounds]
77
410 | if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
78
| ~~~~~^~~~~~~
79
net/eth.c:485:24: note: while referencing 'ext_hdr'
80
485 | struct ip6_ext_hdr ext_hdr;
81
| ^~~~~~~
82
net/eth.c:410:38: error: array subscript 'struct ip6_ext_hdr_routing[0]' is partly outside array bounds of 'struct ip6_ext_hdr[1]' [-Werror=array-bounds]
83
410 | if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
84
| ~~~~~^~~~~~~~~
85
net/eth.c:485:24: note: while referencing 'ext_hdr'
86
485 | struct ip6_ext_hdr ext_hdr;
87
| ^~~~~~~
88
89
Cc: qemu-stable@nongnu.org
90
Buglink: https://bugs.launchpad.net/qemu/+bug/1879531
91
Reported-by: Alexander Bulekov <alxndr@bu.edu>
92
Reported-by: Miroslav Rezanina <mrezanin@redhat.com>
93
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
94
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
95
Fixes: eb700029c78 ("net_pkt: Extend packet abstraction as required by e1000e functionality")
96
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
97
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
98
---
15
net/filter-mirror.c | 42 +++++++++++++++++++++++++++++++++++++++++-
99
MAINTAINERS | 1 +
16
qemu-options.hx | 5 ++---
100
net/eth.c | 13 +++++++----
17
2 files changed, 43 insertions(+), 4 deletions(-)
101
tests/qtest/fuzz-e1000e-test.c | 53 ++++++++++++++++++++++++++++++++++++++++++
18
102
tests/qtest/meson.build | 1 +
19
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
103
4 files changed, 63 insertions(+), 5 deletions(-)
104
create mode 100644 tests/qtest/fuzz-e1000e-test.c
105
106
diff --git a/MAINTAINERS b/MAINTAINERS
20
index XXXXXXX..XXXXXXX 100644
107
index XXXXXXX..XXXXXXX 100644
21
--- a/net/filter-mirror.c
108
--- a/MAINTAINERS
22
+++ b/net/filter-mirror.c
109
+++ b/MAINTAINERS
23
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
110
@@ -XXX,XX +XXX,XX @@ e1000e
24
CharBackend chr_in;
111
M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
25
CharBackend chr_out;
112
S: Maintained
26
SocketReadState rs;
113
F: hw/net/e1000e*
27
+ bool vnet_hdr;
114
+F: tests/qtest/fuzz-e1000e-test.c
28
} MirrorState;
115
29
116
eepro100
30
static int filter_send(MirrorState *s,
117
M: Stefan Weil <sw@weilnetz.de>
31
const struct iovec *iov,
118
diff --git a/net/eth.c b/net/eth.c
32
int iovcnt)
119
index XXXXXXX..XXXXXXX 100644
120
--- a/net/eth.c
121
+++ b/net/eth.c
122
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
123
struct ip6_ext_hdr *ext_hdr,
124
struct in6_address *dst_addr)
33
{
125
{
34
+ NetFilterState *nf = NETFILTER(s);
126
- struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
35
int ret = 0;
127
+ struct ip6_ext_hdr_routing rt_hdr;
36
ssize_t size = 0;
128
size_t input_size = iov_size(pkt, pkt_frags);
37
uint32_t len = 0;
129
size_t bytes_read;
38
@@ -XXX,XX +XXX,XX @@ static int filter_send(MirrorState *s,
130
39
goto err;
131
- if (input_size < ext_hdr_offset + sizeof(*rthdr) + sizeof(*dst_addr)) {
132
+ if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
133
return false;
40
}
134
}
41
135
42
+ if (s->vnet_hdr) {
136
- if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
43
+ /*
137
- bytes_read = iov_to_buf(pkt, pkt_frags,
44
+ * If vnet_hdr = on, we send vnet header len to make other
138
- ext_hdr_offset + sizeof(*rthdr),
45
+ * module(like colo-compare) know how to parse net
139
+ bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
46
+ * packet correctly.
140
+ &rt_hdr, sizeof(rt_hdr));
47
+ */
141
+ assert(bytes_read == sizeof(rt_hdr));
48
+ ssize_t vnet_hdr_len;
142
+
49
+
143
+ if ((rt_hdr.rtype == 2) && (rt_hdr.segleft == 1)) {
50
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
144
+ bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
51
+
145
dst_addr, sizeof(*dst_addr));
52
+ len = htonl(vnet_hdr_len);
146
53
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
147
return bytes_read == sizeof(*dst_addr);
54
+ if (ret != sizeof(len)) {
148
diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c
55
+ goto err;
149
new file mode 100644
56
+ }
150
index XXXXXXX..XXXXXXX
151
--- /dev/null
152
+++ b/tests/qtest/fuzz-e1000e-test.c
153
@@ -XXX,XX +XXX,XX @@
154
+/*
155
+ * QTest testcase for e1000e device generated by fuzzer
156
+ *
157
+ * Copyright (c) 2021 Red Hat, Inc.
158
+ *
159
+ * SPDX-License-Identifier: GPL-2.0-or-later
160
+ */
161
+
162
+#include "qemu/osdep.h"
163
+
164
+#include "libqos/libqtest.h"
165
+
166
+/*
167
+ * https://bugs.launchpad.net/qemu/+bug/1879531
168
+ */
169
+static void test_lp1879531_eth_get_rss_ex_dst_addr(void)
170
+{
171
+ QTestState *s;
172
+
173
+ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0");
174
+
175
+ qtest_outl(s, 0xcf8, 0x80001010);
176
+ qtest_outl(s, 0xcfc, 0xe1020000);
177
+ qtest_outl(s, 0xcf8, 0x80001004);
178
+ qtest_outw(s, 0xcfc, 0x7);
179
+ qtest_writeb(s, 0x25, 0x86);
180
+ qtest_writeb(s, 0x26, 0xdd);
181
+ qtest_writeb(s, 0x4f, 0x2b);
182
+
183
+ qtest_writel(s, 0xe1020030, 0x190002e1);
184
+ qtest_writew(s, 0xe102003a, 0x0807);
185
+ qtest_writel(s, 0xe1020048, 0x12077cdd);
186
+ qtest_writel(s, 0xe1020400, 0xba077cdd);
187
+ qtest_writel(s, 0xe1020420, 0x190002e1);
188
+ qtest_writel(s, 0xe1020428, 0x3509d807);
189
+ qtest_writeb(s, 0xe1020438, 0xe2);
190
+ qtest_writeb(s, 0x4f, 0x2b);
191
+ qtest_quit(s);
192
+}
193
+
194
+int main(int argc, char **argv)
195
+{
196
+ const char *arch = qtest_get_arch();
197
+
198
+ g_test_init(&argc, &argv, NULL);
199
+
200
+ if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
201
+ qtest_add_func("fuzz/test_lp1879531_eth_get_rss_ex_dst_addr",
202
+ test_lp1879531_eth_get_rss_ex_dst_addr);
57
+ }
203
+ }
58
+
204
+
59
buf = g_malloc(size);
205
+ return g_test_run();
60
iov_to_buf(iov, iovcnt, 0, buf, size);
61
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
62
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
63
}
64
}
65
66
- net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
67
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr);
68
69
if (s->indev) {
70
chr = qemu_chr_find(s->indev);
71
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_set_outdev(Object *obj,
72
}
73
}
74
75
+static bool filter_mirror_get_vnet_hdr(Object *obj, Error **errp)
76
+{
77
+ MirrorState *s = FILTER_MIRROR(obj);
78
+
79
+ return s->vnet_hdr;
80
+}
206
+}
81
+
207
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
82
+static void filter_mirror_set_vnet_hdr(Object *obj, bool value, Error **errp)
83
+{
84
+ MirrorState *s = FILTER_MIRROR(obj);
85
+
86
+ s->vnet_hdr = value;
87
+}
88
+
89
static char *filter_redirector_get_outdev(Object *obj, Error **errp)
90
{
91
MirrorState *s = FILTER_REDIRECTOR(obj);
92
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
93
94
static void filter_mirror_init(Object *obj)
95
{
96
+ MirrorState *s = FILTER_MIRROR(obj);
97
+
98
object_property_add_str(obj, "outdev", filter_mirror_get_outdev,
99
filter_mirror_set_outdev, NULL);
100
+
101
+ s->vnet_hdr = false;
102
+ object_property_add_bool(obj, "vnet_hdr_support",
103
+ filter_mirror_get_vnet_hdr,
104
+ filter_mirror_set_vnet_hdr, NULL);
105
}
106
107
static void filter_redirector_init(Object *obj)
108
diff --git a/qemu-options.hx b/qemu-options.hx
109
index XXXXXXX..XXXXXXX 100644
208
index XXXXXXX..XXXXXXX 100644
110
--- a/qemu-options.hx
209
--- a/tests/qtest/meson.build
111
+++ b/qemu-options.hx
210
+++ b/tests/qtest/meson.build
112
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
211
@@ -XXX,XX +XXX,XX @@ qtests_i386 = \
113
@option{tx}: the filter is attached to the transmit queue of the netdev,
212
(config_all_devices.has_key('CONFIG_TPM_TIS_ISA') ? ['tpm-tis-test'] : []) + \
114
where it will receive packets sent by the netdev.
213
(config_all_devices.has_key('CONFIG_TPM_TIS_ISA') ? ['tpm-tis-swtpm-test'] : []) + \
115
214
(config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) + \
116
-@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
215
+ (config_all_devices.has_key('CONFIG_E1000E_PCI_EXPRESS') ? ['fuzz-e1000e-test'] : []) + \
117
+@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
216
qtests_pci + \
118
217
['fdc-test',
119
-filter-mirror on netdev @var{netdevid},mirror net packet to chardev
218
'ide-test',
120
-@var{chardevid}
121
+filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
122
123
@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
124
outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
125
--
219
--
126
2.7.4
220
2.7.4
127
221
128
222
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
We add the vnet_hdr_support option for filter-redirector, default is disabled.
3
To simplify the function body, invert the if() statement, returning
4
If you use virtio-net-pci net driver or other driver needs vnet_hdr, please enable it.
4
earlier.
5
Because colo-compare or other modules needs the vnet_hdr_len to parse
5
Since we already checked there is enough data in the iovec buffer,
6
packet, we add this new option send the len to others.
6
simply add an assert() call to consume the bytes_read variable.
7
You can use it for example:
8
-object filter-redirector,id=r0,netdev=hn0,queue=tx,outdev=red0,vnet_hdr_support
9
7
10
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
8
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
9
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
12
---
13
net/filter-mirror.c | 23 +++++++++++++++++++++++
13
net/eth.c | 13 ++++++-------
14
qemu-options.hx | 6 +++---
14
1 file changed, 6 insertions(+), 7 deletions(-)
15
2 files changed, 26 insertions(+), 3 deletions(-)
16
15
17
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
16
diff --git a/net/eth.c b/net/eth.c
18
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
19
--- a/net/filter-mirror.c
18
--- a/net/eth.c
20
+++ b/net/filter-mirror.c
19
+++ b/net/eth.c
21
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
20
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
22
s->outdev = g_strdup(value);
21
bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
22
&rt_hdr, sizeof(rt_hdr));
23
assert(bytes_read == sizeof(rt_hdr));
24
-
25
- if ((rt_hdr.rtype == 2) && (rt_hdr.segleft == 1)) {
26
- bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
27
- dst_addr, sizeof(*dst_addr));
28
-
29
- return bytes_read == sizeof(*dst_addr);
30
+ if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
31
+ return false;
32
}
33
+ bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
34
+ dst_addr, sizeof(*dst_addr));
35
+ assert(bytes_read == sizeof(*dst_addr));
36
37
- return false;
38
+ return true;
23
}
39
}
24
40
25
+static bool filter_redirector_get_vnet_hdr(Object *obj, Error **errp)
41
static bool
26
+{
27
+ MirrorState *s = FILTER_REDIRECTOR(obj);
28
+
29
+ return s->vnet_hdr;
30
+}
31
+
32
+static void filter_redirector_set_vnet_hdr(Object *obj,
33
+ bool value,
34
+ Error **errp)
35
+{
36
+ MirrorState *s = FILTER_REDIRECTOR(obj);
37
+
38
+ s->vnet_hdr = value;
39
+}
40
+
41
static void filter_mirror_init(Object *obj)
42
{
43
MirrorState *s = FILTER_MIRROR(obj);
44
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_init(Object *obj)
45
46
static void filter_redirector_init(Object *obj)
47
{
48
+ MirrorState *s = FILTER_REDIRECTOR(obj);
49
+
50
object_property_add_str(obj, "indev", filter_redirector_get_indev,
51
filter_redirector_set_indev, NULL);
52
object_property_add_str(obj, "outdev", filter_redirector_get_outdev,
53
filter_redirector_set_outdev, NULL);
54
+
55
+ s->vnet_hdr = false;
56
+ object_property_add_bool(obj, "vnet_hdr_support",
57
+ filter_redirector_get_vnet_hdr,
58
+ filter_redirector_set_vnet_hdr, NULL);
59
}
60
61
static void filter_mirror_fini(Object *obj)
62
diff --git a/qemu-options.hx b/qemu-options.hx
63
index XXXXXXX..XXXXXXX 100644
64
--- a/qemu-options.hx
65
+++ b/qemu-options.hx
66
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
67
68
filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
69
70
-@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
71
-outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
72
+@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
73
74
filter-redirector on netdev @var{netdevid},redirect filter's net packet to chardev
75
-@var{chardevid},and redirect indev's packet to filter.
76
+@var{chardevid},and redirect indev's packet to filter.if it has the vnet_hdr_support flag,
77
+filter-redirector will redirect packet with vnet_hdr_len.
78
Create a filter-redirector we need to differ outdev id from indev id, id can not
79
be the same. we can just use indev or outdev, but at least one of indev or outdev
80
need to be specified.
81
--
42
--
82
2.7.4
43
2.7.4
83
44
84
45
diff view generated by jsdifflib