1
The following changes since commit e607bbee553cfe73072870cef458cfa4e78133e2:
1
The following changes since commit bdee969c0e65d4d509932b1d70e3a3b2ffbff6d5:
2
2
3
Merge remote-tracking branch 'remotes/edgar/tags/edgar/xilinx-next-2018-01-26.for-upstream' into staging (2018-01-26 14:24:25 +0000)
3
Merge remote-tracking branch 'remotes/bonzini-gitlab/tags/for-upstream' into staging (2021-03-19 18:01:17 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to bf4835a4d5338bb7424827715df22570a8adc67c:
9
for you to fetch changes up to c7274b5ef43614dd133daec1e2018f71d8744088:
10
10
11
MAINTAINERS: update Dmitry Fleytman email (2018-01-29 16:05:38 +0800)
11
net/eth: Add an assert() and invert if() statement to simplify code (2021-03-22 17:34:31 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
----------------------------------------------------------------
15
----------------------------------------------------------------
16
Mao Zhongyi (2):
16
Bin Meng (4):
17
colo: modified the payload compare function
17
net: eth: Add a helper to pad a short Ethernet frame
18
colo: compare the packet based on the tcp sequence number
18
net: Add a 'do_not_pad" to NetClientState
19
net: Pad short frames to minimum size before sending from SLiRP/TAP
20
hw/net: virtio-net: Initialize nc->do_not_pad to true
19
21
20
Philippe Mathieu-Daudé (1):
22
Lukas Straub (2):
21
MAINTAINERS: update Dmitry Fleytman email
23
net/colo-compare.c: Fix memory leak for non-tcp packet
24
net/colo-compare.c: Optimize removal of secondary packet
22
25
23
Thomas Huth (3):
26
Philippe Mathieu-Daudé (7):
24
net: Allow hubports to connect to other netdevs
27
net/eth: Use correct in6_address offset in _eth_get_rss_ex_dst_addr()
25
net: Allow netdevs to be used with 'hostfwd_add' and 'hostfwd_remove'
28
net/eth: Simplify _eth_get_rss_ex_dst_addr()
26
qemu-doc: Get rid of "vlan=X" example in the documentation
29
net/eth: Better describe _eth_get_rss_ex_dst_addr's offset argument
30
net/eth: Check size earlier in _eth_get_rss_ex_dst_addr()
31
net/eth: Check iovec has enough data earlier
32
net/eth: Read ip6_ext_hdr_routing buffer before accessing it
33
net/eth: Add an assert() and invert if() statement to simplify code
27
34
28
MAINTAINERS | 8 +-
35
MAINTAINERS | 1 +
29
hmp-commands.hx | 4 +-
36
hw/net/virtio-net.c | 4 +++
30
net/colo-compare.c | 411 +++++++++++++++++++++++++++++++++--------------------
37
include/net/eth.h | 17 ++++++++++++
31
net/colo.c | 9 ++
38
include/net/net.h | 1 +
32
net/colo.h | 15 ++
39
net/colo-compare.c | 3 ++-
33
net/hub.c | 27 +++-
40
net/eth.c | 61 +++++++++++++++++++++++++++---------------
34
net/hub.h | 3 +-
41
net/slirp.c | 10 +++++++
35
net/net.c | 2 +-
42
net/tap-win32.c | 10 +++++++
36
net/slirp.c | 33 +++--
43
net/tap.c | 10 +++++++
37
net/trace-events | 2 +-
44
tests/qtest/fuzz-e1000e-test.c | 53 ++++++++++++++++++++++++++++++++++++
38
qapi/net.json | 4 +-
45
tests/qtest/meson.build | 1 +
39
qemu-options.hx | 12 +-
46
11 files changed, 148 insertions(+), 23 deletions(-)
40
12 files changed, 347 insertions(+), 183 deletions(-)
47
create mode 100644 tests/qtest/fuzz-e1000e-test.c
41
48
42
49
diff view generated by jsdifflib
New patch
1
From: Bin Meng <bmeng.cn@gmail.com>
1
2
3
Add a helper to pad a short Ethernet frame to the minimum required
4
length, which can be used by backends' code.
5
6
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
include/net/eth.h | 17 +++++++++++++++++
11
net/eth.c | 17 +++++++++++++++++
12
2 files changed, 34 insertions(+)
13
14
diff --git a/include/net/eth.h b/include/net/eth.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/net/eth.h
17
+++ b/include/net/eth.h
18
@@ -XXX,XX +XXX,XX @@
19
20
#define ETH_ALEN 6
21
#define ETH_HLEN 14
22
+#define ETH_ZLEN 60 /* Min. octets in frame without FCS */
23
24
struct eth_header {
25
uint8_t h_dest[ETH_ALEN]; /* destination eth addr */
26
@@ -XXX,XX +XXX,XX @@ bool
27
eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
28
size_t ip6hdr_off, eth_ip6_hdr_info *info);
29
30
+/**
31
+ * eth_pad_short_frame - pad a short frame to the minimum Ethernet frame length
32
+ *
33
+ * If the Ethernet frame size is shorter than 60 bytes, it will be padded to
34
+ * 60 bytes at the address @padded_pkt.
35
+ *
36
+ * @padded_pkt: buffer address to hold the padded frame
37
+ * @padded_buflen: pointer holding length of @padded_pkt. If the frame is
38
+ * padded, the length will be updated to the padded one.
39
+ * @pkt: address to hold the original Ethernet frame
40
+ * @pkt_size: size of the original Ethernet frame
41
+ * @return true if the frame is padded, otherwise false
42
+ */
43
+bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
44
+ const void *pkt, size_t pkt_size);
45
+
46
#endif
47
diff --git a/net/eth.c b/net/eth.c
48
index XXXXXXX..XXXXXXX 100644
49
--- a/net/eth.c
50
+++ b/net/eth.c
51
@@ -XXX,XX +XXX,XX @@ bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
52
info->l4proto = ext_hdr.ip6r_nxt;
53
return true;
54
}
55
+
56
+bool eth_pad_short_frame(uint8_t *padded_pkt, size_t *padded_buflen,
57
+ const void *pkt, size_t pkt_size)
58
+{
59
+ assert(padded_buflen && *padded_buflen >= ETH_ZLEN);
60
+
61
+ if (pkt_size >= ETH_ZLEN) {
62
+ return false;
63
+ }
64
+
65
+ /* pad to minimum Ethernet frame length */
66
+ memcpy(padded_pkt, pkt, pkt_size);
67
+ memset(&padded_pkt[pkt_size], 0, ETH_ZLEN - pkt_size);
68
+ *padded_buflen = ETH_ZLEN;
69
+
70
+ return true;
71
+}
72
--
73
2.7.4
74
75
diff view generated by jsdifflib
New patch
1
From: Bin Meng <bmeng.cn@gmail.com>
1
2
3
This adds a flag in NetClientState, so that a net client can tell
4
its peer that the packets do not need to be padded to the minimum
5
size of an Ethernet frame (60 bytes) before sending to it.
6
7
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
include/net/net.h | 1 +
12
1 file changed, 1 insertion(+)
13
14
diff --git a/include/net/net.h b/include/net/net.h
15
index XXXXXXX..XXXXXXX 100644
16
--- a/include/net/net.h
17
+++ b/include/net/net.h
18
@@ -XXX,XX +XXX,XX @@ struct NetClientState {
19
int vring_enable;
20
int vnet_hdr_len;
21
bool is_netdev;
22
+ bool do_not_pad; /* do not pad to the minimum ethernet frame length */
23
QTAILQ_HEAD(, NetFilterState) filters;
24
};
25
26
--
27
2.7.4
28
29
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Bin Meng <bmeng.cn@gmail.com>
2
2
3
It does not make much sense to limit these commands to the legacy 'vlan'
3
The minimum Ethernet frame length is 60 bytes. For short frames with
4
concept only, they should work with the modern netdevs, too. So now
4
smaller length like ARP packets (only 42 bytes), on a real world NIC
5
it is possible to use this command with one, two or three parameters.
5
it can choose either padding its length to the minimum required 60
6
bytes, or sending it out directly to the wire. Such behavior can be
7
hardcoded or controled by a register bit. Similarly on the receive
8
path, NICs can choose either dropping such short frames directly or
9
handing them over to software to handle.
6
10
7
With one parameter, the command installs a hostfwd rule on the default
11
On the other hand, for the network backends like SLiRP/TAP, they
8
"user" network:
12
don't expose a way to control the short frame behavior. As of today
9
hostfwd_add tcp:...
13
they just send/receive data from/to the other end connected to them,
14
which means any sized packet is acceptable. So they can send and
15
receive short frames without any problem. It is observed that ARP
16
packets sent from SLiRP/TAP are 42 bytes, and SLiRP/TAP just send
17
these ARP packets to the other end which might be a NIC model that
18
does not allow short frames to pass through.
10
19
11
With two parameters, the command installs a hostfwd rule on a netdev
20
To provide better compatibility, for packets sent from QEMU network
12
(that's the new way of using this command):
21
backends like SLiRP/TAP, we change to pad short frames before sending
13
hostfwd_add netdev_id tcp:...
22
it out to the other end, if the other end does not forbid it via the
23
nc->do_not_pad flag. This ensures a backend as an Ethernet sender
24
does not violate the spec. But with this change, the behavior of
25
dropping short frames from SLiRP/TAP interfaces in the NIC model
26
cannot be emulated because it always receives a packet that is spec
27
complaint. The capability of sending short frames from NIC models is
28
still supported and short frames can still pass through SLiRP/TAP.
14
29
15
With three parameters, the command installs a rule on a 'vlan' (aka hub):
30
This commit should be able to fix the issue as reported with some
16
hostfwd_add hub_id name tcp:...
31
NIC models before, that ARP requests get dropped, preventing the
32
guest from becoming visible on the network. It was workarounded in
33
these NIC models on the receive path, that when a short frame is
34
received, it is padded up to 60 bytes.
17
35
18
Same applies to the hostfwd_remove command now.
36
The following 2 commits seem to be the one to workaround this issue
37
in e1000 and vmxenet3 before, and should probably be reverted.
19
38
20
Signed-off-by: Thomas Huth <thuth@redhat.com>
39
commit 78aeb23eded2 ("e1000: Pad short frames to minimum size (60 bytes)")
40
commit 40a87c6c9b11 ("vmxnet3: Pad short frames to minimum size (60 bytes)")
41
42
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
43
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
21
Signed-off-by: Jason Wang <jasowang@redhat.com>
44
Signed-off-by: Jason Wang <jasowang@redhat.com>
22
---
45
---
23
hmp-commands.hx | 4 ++--
46
net/slirp.c | 10 ++++++++++
24
net/slirp.c | 33 +++++++++++++++++++++++----------
47
net/tap-win32.c | 10 ++++++++++
25
2 files changed, 25 insertions(+), 12 deletions(-)
48
net/tap.c | 10 ++++++++++
49
3 files changed, 30 insertions(+)
26
50
27
diff --git a/hmp-commands.hx b/hmp-commands.hx
28
index XXXXXXX..XXXXXXX 100644
29
--- a/hmp-commands.hx
30
+++ b/hmp-commands.hx
31
@@ -XXX,XX +XXX,XX @@ ETEXI
32
{
33
.name = "hostfwd_add",
34
.args_type = "arg1:s,arg2:s?,arg3:s?",
35
- .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
36
+ .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
37
.help = "redirect TCP or UDP connections from host to guest (requires -net user)",
38
.cmd = hmp_hostfwd_add,
39
},
40
@@ -XXX,XX +XXX,XX @@ ETEXI
41
{
42
.name = "hostfwd_remove",
43
.args_type = "arg1:s,arg2:s?,arg3:s?",
44
- .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport",
45
+ .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport",
46
.help = "remove host-to-guest TCP or UDP redirection",
47
.cmd = hmp_hostfwd_remove,
48
},
49
diff --git a/net/slirp.c b/net/slirp.c
51
diff --git a/net/slirp.c b/net/slirp.c
50
index XXXXXXX..XXXXXXX 100644
52
index XXXXXXX..XXXXXXX 100644
51
--- a/net/slirp.c
53
--- a/net/slirp.c
52
+++ b/net/slirp.c
54
+++ b/net/slirp.c
53
@@ -XXX,XX +XXX,XX @@ error:
55
@@ -XXX,XX +XXX,XX @@
54
return -1;
56
#include <pwd.h>
57
#include <sys/wait.h>
58
#endif
59
+#include "net/eth.h"
60
#include "net/net.h"
61
#include "clients.h"
62
#include "hub.h"
63
@@ -XXX,XX +XXX,XX @@ static ssize_t net_slirp_send_packet(const void *pkt, size_t pkt_len,
64
void *opaque)
65
{
66
SlirpState *s = opaque;
67
+ uint8_t min_pkt[ETH_ZLEN];
68
+ size_t min_pktsz = sizeof(min_pkt);
69
+
70
+ if (!s->nc.peer->do_not_pad) {
71
+ if (eth_pad_short_frame(min_pkt, &min_pktsz, pkt, pkt_len)) {
72
+ pkt = min_pkt;
73
+ pkt_len = min_pktsz;
74
+ }
75
+ }
76
77
return qemu_send_packet(&s->nc, pkt, pkt_len);
55
}
78
}
56
79
diff --git a/net/tap-win32.c b/net/tap-win32.c
57
-static SlirpState *slirp_lookup(Monitor *mon, const char *vlan,
80
index XXXXXXX..XXXXXXX 100644
58
- const char *stack)
81
--- a/net/tap-win32.c
59
+static SlirpState *slirp_lookup(Monitor *mon, const char *hub_id,
82
+++ b/net/tap-win32.c
60
+ const char *name)
83
@@ -XXX,XX +XXX,XX @@
61
{
84
62
-
85
#include "qemu-common.h"
63
- if (vlan) {
86
#include "clients.h" /* net_init_tap */
64
+ if (name) {
87
+#include "net/eth.h"
65
NetClientState *nc;
88
#include "net/net.h"
66
- nc = net_hub_find_client_by_name(strtol(vlan, NULL, 0), stack);
89
#include "net/tap.h" /* tap_has_ufo, ... */
67
- if (!nc) {
90
#include "qemu/error-report.h"
68
- monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n");
91
@@ -XXX,XX +XXX,XX @@ static void tap_win32_send(void *opaque)
69
- return NULL;
92
uint8_t *buf;
70
+ if (hub_id) {
93
int max_size = 4096;
71
+ nc = net_hub_find_client_by_name(strtol(hub_id, NULL, 0), name);
94
int size;
72
+ if (!nc) {
95
+ uint8_t min_pkt[ETH_ZLEN];
73
+ monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n");
96
+ size_t min_pktsz = sizeof(min_pkt);
74
+ return NULL;
97
98
size = tap_win32_read(s->handle, &buf, max_size);
99
if (size > 0) {
100
+ if (!s->nc.peer->do_not_pad) {
101
+ if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) {
102
+ buf = min_pkt;
103
+ size = min_pktsz;
75
+ }
104
+ }
76
+ } else {
105
+ }
77
+ nc = qemu_find_netdev(name);
106
+
78
+ if (!nc) {
107
qemu_send_packet(&s->nc, buf, size);
79
+ monitor_printf(mon, "unrecognized netdev id '%s'\n", name);
108
tap_win32_free_buffer(s->handle, buf);
80
+ return NULL;
109
}
110
diff --git a/net/tap.c b/net/tap.c
111
index XXXXXXX..XXXXXXX 100644
112
--- a/net/tap.c
113
+++ b/net/tap.c
114
@@ -XXX,XX +XXX,XX @@
115
#include <sys/socket.h>
116
#include <net/if.h>
117
118
+#include "net/eth.h"
119
#include "net/net.h"
120
#include "clients.h"
121
#include "monitor/monitor.h"
122
@@ -XXX,XX +XXX,XX @@ static void tap_send(void *opaque)
123
124
while (true) {
125
uint8_t *buf = s->buf;
126
+ uint8_t min_pkt[ETH_ZLEN];
127
+ size_t min_pktsz = sizeof(min_pkt);
128
129
size = tap_read_packet(s->fd, s->buf, sizeof(s->buf));
130
if (size <= 0) {
131
@@ -XXX,XX +XXX,XX @@ static void tap_send(void *opaque)
132
size -= s->host_vnet_hdr_len;
133
}
134
135
+ if (!s->nc.peer->do_not_pad) {
136
+ if (eth_pad_short_frame(min_pkt, &min_pktsz, buf, size)) {
137
+ buf = min_pkt;
138
+ size = min_pktsz;
81
+ }
139
+ }
82
}
140
+ }
83
if (strcmp(nc->model, "user")) {
141
+
84
monitor_printf(mon, "invalid device specified\n");
142
size = qemu_send_packet_async(&s->nc, buf, size, tap_send_completed);
85
@@ -XXX,XX +XXX,XX @@ void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict)
143
if (size == 0) {
86
const char *arg2 = qdict_get_try_str(qdict, "arg2");
144
tap_read_poll(s, false);
87
const char *arg3 = qdict_get_try_str(qdict, "arg3");
88
89
- if (arg2) {
90
+ if (arg3) {
91
s = slirp_lookup(mon, arg1, arg2);
92
src_str = arg3;
93
+ } else if (arg2) {
94
+ s = slirp_lookup(mon, NULL, arg1);
95
+ src_str = arg2;
96
} else {
97
s = slirp_lookup(mon, NULL, NULL);
98
src_str = arg1;
99
@@ -XXX,XX +XXX,XX @@ void hmp_hostfwd_add(Monitor *mon, const QDict *qdict)
100
const char *arg2 = qdict_get_try_str(qdict, "arg2");
101
const char *arg3 = qdict_get_try_str(qdict, "arg3");
102
103
- if (arg2) {
104
+ if (arg3) {
105
s = slirp_lookup(mon, arg1, arg2);
106
redir_str = arg3;
107
+ } else if (arg2) {
108
+ s = slirp_lookup(mon, NULL, arg1);
109
+ redir_str = arg2;
110
} else {
111
s = slirp_lookup(mon, NULL, NULL);
112
redir_str = arg1;
113
--
145
--
114
2.7.4
146
2.7.4
115
147
116
148
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Bin Meng <bmeng.cn@gmail.com>
2
2
3
QEMU can emulate hubs to connect NICs and netdevs. This is currently
3
For virtio-net, there is no need to pad the Ethernet frame size to
4
primarily used for the mis-named 'vlan' feature of the networking
4
60 bytes before sending to it.
5
subsystem. Now the 'vlan' feature has been marked as deprecated, since
6
its name is rather confusing and the users often rather mis-configure
7
their network when trying to use it. But while the 'vlan' parameter
8
should be removed at one point in time, the basic idea of emulating
9
a hub in QEMU is still good: It's useful for bundling up the output of
10
multiple NICs into one single l2tp netdev for example.
11
5
12
Now to be able to use the hubport feature without 'vlan's, there is one
6
Signed-off-by: Bin Meng <bmeng.cn@gmail.com>
13
missing piece: The possibility to connect a hubport to a netdev, too.
14
This patch adds this possibility by introducing a new "netdev=..."
15
parameter to the hubports.
16
17
To bundle up the output of multiple NICs into one socket netdev, you can
18
now run QEMU with these parameters for example:
19
20
qemu-system-ppc64 ... -netdev socket,id=s1,connect=:11122 \
21
-netdev hubport,hubid=1,id=h1,netdev=s1 \
22
-netdev hubport,hubid=1,id=h2 -device e1000,netdev=h2 \
23
-netdev hubport,hubid=1,id=h3 -device virtio-net-pci,netdev=h3
24
25
For using the socket netdev, you have got to start another QEMU as the
26
receiving side first, for example with network dumping enabled:
27
28
qemu-system-x86_64 -M isapc -netdev socket,id=s0,listen=:11122 \
29
-device ne2k_isa,netdev=s0 \
30
-object filter-dump,id=f1,netdev=s0,file=/tmp/dump.dat
31
32
After the ppc64 guest tried to boot from both NICs, you can see in the
33
dump file (using Wireshark, for example), that the output of both NICs
34
(the e1000 and the virtio-net-pci) has been successfully transfered
35
via the socket netdev in this case.
36
37
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
38
Signed-off-by: Thomas Huth <thuth@redhat.com>
39
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
40
---
8
---
41
net/hub.c | 27 +++++++++++++++++++++------
9
hw/net/virtio-net.c | 4 ++++
42
net/hub.h | 3 ++-
10
1 file changed, 4 insertions(+)
43
net/net.c | 2 +-
44
qapi/net.json | 4 +++-
45
qemu-options.hx | 8 +++++---
46
5 files changed, 32 insertions(+), 12 deletions(-)
47
11
48
diff --git a/net/hub.c b/net/hub.c
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
49
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
50
--- a/net/hub.c
14
--- a/hw/net/virtio-net.c
51
+++ b/net/hub.c
15
+++ b/hw/net/virtio-net.c
52
@@ -XXX,XX +XXX,XX @@
16
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
53
*/
17
object_get_typename(OBJECT(dev)), dev->id, n);
54
55
#include "qemu/osdep.h"
56
+#include "qapi/error.h"
57
#include "monitor/monitor.h"
58
#include "net/net.h"
59
#include "clients.h"
60
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_hub_port_info = {
61
.cleanup = net_hub_port_cleanup,
62
};
63
64
-static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
65
+static NetHubPort *net_hub_port_new(NetHub *hub, const char *name,
66
+ NetClientState *hubpeer)
67
{
68
NetClientState *nc;
69
NetHubPort *port;
70
@@ -XXX,XX +XXX,XX @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
71
name = default_name;
72
}
18
}
73
19
74
- nc = qemu_new_net_client(&net_hub_port_info, NULL, "hub", name);
20
+ for (i = 0; i < n->max_queues; i++) {
75
+ nc = qemu_new_net_client(&net_hub_port_info, hubpeer, "hub", name);
21
+ n->nic->ncs[i].do_not_pad = true;
76
port = DO_UPCAST(NetHubPort, nc, nc);
77
port->id = id;
78
port->hub = hub;
79
@@ -XXX,XX +XXX,XX @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
80
81
/**
82
* Create a port on a given hub
83
+ * @hub_id: Number of the hub
84
* @name: Net client name or NULL for default name.
85
+ * @hubpeer: Peer to use (if "netdev=id" has been specified)
86
*
87
* If there is no existing hub with the given id then a new hub is created.
88
*/
89
-NetClientState *net_hub_add_port(int hub_id, const char *name)
90
+NetClientState *net_hub_add_port(int hub_id, const char *name,
91
+ NetClientState *hubpeer)
92
{
93
NetHub *hub;
94
NetHubPort *port;
95
@@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_add_port(int hub_id, const char *name)
96
hub = net_hub_new(hub_id);
97
}
98
99
- port = net_hub_port_new(hub, name);
100
+ port = net_hub_port_new(hub, name, hubpeer);
101
return &port->nc;
102
}
103
104
@@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_port_find(int hub_id)
105
}
106
}
107
108
- nc = net_hub_add_port(hub_id, NULL);
109
+ nc = net_hub_add_port(hub_id, NULL, NULL);
110
return nc;
111
}
112
113
@@ -XXX,XX +XXX,XX @@ int net_init_hubport(const Netdev *netdev, const char *name,
114
NetClientState *peer, Error **errp)
115
{
116
const NetdevHubPortOptions *hubport;
117
+ NetClientState *hubpeer = NULL;
118
119
assert(netdev->type == NET_CLIENT_DRIVER_HUBPORT);
120
assert(!peer);
121
hubport = &netdev->u.hubport;
122
123
- net_hub_add_port(hubport->hubid, name);
124
+ if (hubport->has_netdev) {
125
+ hubpeer = qemu_find_netdev(hubport->netdev);
126
+ if (!hubpeer) {
127
+ error_setg(errp, "netdev '%s' not found", hubport->netdev);
128
+ return -1;
129
+ }
130
+ }
22
+ }
131
+
23
+
132
+ net_hub_add_port(hubport->hubid, name, hubpeer);
24
peer_test_vnet_hdr(n);
133
+
25
if (peer_has_vnet_hdr(n)) {
134
return 0;
26
for (i = 0; i < n->max_queues; i++) {
135
}
136
137
diff --git a/net/hub.h b/net/hub.h
138
index XXXXXXX..XXXXXXX 100644
139
--- a/net/hub.h
140
+++ b/net/hub.h
141
@@ -XXX,XX +XXX,XX @@
142
143
#include "qemu-common.h"
144
145
-NetClientState *net_hub_add_port(int hub_id, const char *name);
146
+NetClientState *net_hub_add_port(int hub_id, const char *name,
147
+ NetClientState *hubpeer);
148
NetClientState *net_hub_find_client_by_name(int hub_id, const char *name);
149
void net_hub_info(Monitor *mon);
150
void net_hub_check_clients(void);
151
diff --git a/net/net.c b/net/net.c
152
index XXXXXXX..XXXXXXX 100644
153
--- a/net/net.c
154
+++ b/net/net.c
155
@@ -XXX,XX +XXX,XX @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp)
156
/* Do not add to a vlan if it's a nic with a netdev= parameter. */
157
if (netdev->type != NET_CLIENT_DRIVER_NIC ||
158
!opts->u.nic.has_netdev) {
159
- peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL);
160
+ peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL, NULL);
161
}
162
163
if (net->has_vlan && !vlan_warned) {
164
diff --git a/qapi/net.json b/qapi/net.json
165
index XXXXXXX..XXXXXXX 100644
166
--- a/qapi/net.json
167
+++ b/qapi/net.json
168
@@ -XXX,XX +XXX,XX @@
169
# Connect two or more net clients through a software hub.
170
#
171
# @hubid: hub identifier number
172
+# @netdev: used to connect hub to a netdev instead of a device (since 2.12)
173
#
174
# Since: 1.2
175
##
176
{ 'struct': 'NetdevHubPortOptions',
177
'data': {
178
- 'hubid': 'int32' } }
179
+ 'hubid': 'int32',
180
+ '*netdev': 'str' } }
181
182
##
183
# @NetdevNetmapOptions:
184
diff --git a/qemu-options.hx b/qemu-options.hx
185
index XXXXXXX..XXXXXXX 100644
186
--- a/qemu-options.hx
187
+++ b/qemu-options.hx
188
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
189
#endif
190
"-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
191
" configure a vhost-user network, backed by a chardev 'dev'\n"
192
- "-netdev hubport,id=str,hubid=n\n"
193
+ "-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
194
" configure a hub port on QEMU VLAN 'n'\n", QEMU_ARCH_ALL)
195
DEF("net", HAS_ARG, QEMU_OPTION_net,
196
"-net nic[,vlan=n][,netdev=nd][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v]\n"
197
@@ -XXX,XX +XXX,XX @@ vde_switch -F -sock /tmp/myswitch
198
qemu-system-i386 linux.img -net nic -net vde,sock=/tmp/myswitch
199
@end example
200
201
-@item -netdev hubport,id=@var{id},hubid=@var{hubid}
202
+@item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}]
203
204
Create a hub port on QEMU "vlan" @var{hubid}.
205
206
The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
207
netdev. @code{-net} and @code{-device} with parameter @option{vlan} create the
208
-required hub automatically.
209
+required hub automatically. Alternatively, you can also connect the hubport
210
+to another netdev with ID @var{nd} by using the @option{netdev=@var{nd}}
211
+option.
212
213
@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
214
215
--
27
--
216
2.7.4
28
2.7.4
217
29
218
30
diff view generated by jsdifflib
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
1
From: Lukas Straub <lukasstraub2@web.de>
2
2
3
Packet size some time different or when network is busy.
3
Additional to removing the packet from the secondary queue,
4
Based on same payload size, but TCP protocol can not
4
we also need to free it.
5
guarantee send the same one packet in the same way,
6
5
7
like that:
6
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
8
We send this payload:
7
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
9
------------------------------
8
Reviewed-by: Zhang Chen <chen.zhang@intel.com>
10
| header |1|2|3|4|5|6|7|8|9|0|
11
------------------------------
12
13
primary:
14
ppkt1:
15
----------------
16
| header |1|2|3|
17
----------------
18
ppkt2:
19
------------------------
20
| header |4|5|6|7|8|9|0|
21
------------------------
22
23
secondary:
24
spkt1:
25
------------------------------
26
| header |1|2|3|4|5|6|7|8|9|0|
27
------------------------------
28
29
In the original method, ppkt1 and ppkt2 are different in size and
30
spkt1, so they can't compare and trigger the checkpoint.
31
32
I have tested FTP get 200M and 1G file many times, I found that
33
the performance was less than 1% of the native.
34
35
Now I reconstructed the comparison of TCP packets based on the
36
TCP sequence number. first of all, ppkt1 and spkt1 have the same
37
starting sequence number, so they can compare, even though their
38
length is different. And then ppkt1 with a smaller payload length
39
is used as the comparison length, if the payload is same, send
40
out the ppkt1 and record the offset(the length of ppkt1 payload)
41
in spkt1. The next comparison, ppkt2 and spkt1 can be compared
42
from the recorded position of spkt1.
43
44
like that:
45
----------------
46
| header |1|2|3| ppkt1
47
---------|-----|
48
| |
49
---------v-----v--------------
50
| header |1|2|3|4|5|6|7|8|9|0| spkt1
51
---------------|\------------|
52
| \offset |
53
---------v-------------v
54
| header |4|5|6|7|8|9|0| ppkt2
55
------------------------
56
57
In this way, the performance can reach native 20% in my multiple
58
tests.
59
60
Cc: Zhang Chen <zhangckid@gmail.com>
61
Cc: Li Zhijian <lizhijian@cn.fujitsu.com>
62
Cc: Jason Wang <jasowang@redhat.com>
63
64
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
65
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
66
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
67
Reviewed-by: Zhang Chen <zhangckid@gmail.com>
68
Tested-by: Zhang Chen <zhangckid@gmail.com>
69
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
70
---
10
---
71
net/colo-compare.c | 343 +++++++++++++++++++++++++++++++++++------------------
11
net/colo-compare.c | 1 +
72
net/colo.c | 9 ++
12
1 file changed, 1 insertion(+)
73
net/colo.h | 15 +++
74
net/trace-events | 2 +-
75
4 files changed, 250 insertions(+), 119 deletions(-)
76
13
77
diff --git a/net/colo-compare.c b/net/colo-compare.c
14
diff --git a/net/colo-compare.c b/net/colo-compare.c
78
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
79
--- a/net/colo-compare.c
16
--- a/net/colo-compare.c
80
+++ b/net/colo-compare.c
17
+++ b/net/colo-compare.c
81
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ static void colo_compare_packet(CompareState *s, Connection *conn,
82
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
83
#define MAX_QUEUE_SIZE 1024
84
85
+#define COLO_COMPARE_FREE_PRIMARY 0x01
86
+#define COLO_COMPARE_FREE_SECONDARY 0x02
87
+
88
/* TODO: Should be configurable */
89
#define REGULAR_PACKET_CHECK_MS 3000
90
91
@@ -XXX,XX +XXX,XX @@ static gint seq_sorter(Packet *a, Packet *b, gpointer data)
92
return ntohl(atcp->th_seq) - ntohl(btcp->th_seq);
93
}
94
95
+static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
96
+{
97
+ Packet *pkt = data;
98
+ struct tcphdr *tcphd;
99
+
100
+ tcphd = (struct tcphdr *)pkt->transport_header;
101
+
102
+ pkt->tcp_seq = ntohl(tcphd->th_seq);
103
+ pkt->tcp_ack = ntohl(tcphd->th_ack);
104
+ *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack;
105
+ pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data
106
+ + (tcphd->th_off << 2) - pkt->vnet_hdr_len;
107
+ pkt->payload_size = pkt->size - pkt->header_size;
108
+ pkt->seq_end = pkt->tcp_seq + pkt->payload_size;
109
+ pkt->flags = tcphd->th_flags;
110
+}
111
+
112
/*
113
* Return 1 on success, if return 0 means the
114
* packet will be dropped
115
*/
116
-static int colo_insert_packet(GQueue *queue, Packet *pkt)
117
+static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)
118
{
119
if (g_queue_get_length(queue) <= MAX_QUEUE_SIZE) {
120
if (pkt->ip->ip_p == IPPROTO_TCP) {
121
+ fill_pkt_tcp_info(pkt, max_ack);
122
g_queue_insert_sorted(queue,
123
pkt,
124
(GCompareDataFunc)seq_sorter,
125
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
126
}
127
128
if (mode == PRIMARY_IN) {
129
- if (!colo_insert_packet(&conn->primary_list, pkt)) {
130
+ if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) {
131
error_report("colo compare primary queue size too big,"
132
"drop packet");
133
}
134
} else {
135
- if (!colo_insert_packet(&conn->secondary_list, pkt)) {
136
+ if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) {
137
error_report("colo compare secondary queue size too big,"
138
"drop packet");
139
}
140
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
141
return 0;
142
}
143
144
+static inline bool after(uint32_t seq1, uint32_t seq2)
145
+{
146
+ return (int32_t)(seq1 - seq2) > 0;
147
+}
148
+
149
+static void colo_release_primary_pkt(CompareState *s, Packet *pkt)
150
+{
151
+ int ret;
152
+ ret = compare_chr_send(s,
153
+ pkt->data,
154
+ pkt->size,
155
+ pkt->vnet_hdr_len);
156
+ if (ret < 0) {
157
+ error_report("colo send primary packet failed");
158
+ }
159
+ trace_colo_compare_main("packet same and release packet");
160
+ packet_destroy(pkt, NULL);
161
+}
162
+
163
/*
164
* The IP packets sent by primary and secondary
165
* will be compared in here
166
@@ -XXX,XX +XXX,XX @@ static int colo_compare_packet_payload(Packet *ppkt,
167
}
168
169
/*
170
- * Called from the compare thread on the primary
171
- * for compare tcp packet
172
- * compare_tcp copied from Dr. David Alan Gilbert's branch
173
- */
174
-static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
175
+ * return true means that the payload is consist and
176
+ * need to make the next comparison, false means do
177
+ * the checkpoint
178
+*/
179
+static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
180
+ int8_t *mark, uint32_t max_ack)
181
{
182
- struct tcphdr *ptcp, *stcp;
183
- int res;
184
+ *mark = 0;
185
+
186
+ if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
187
+ if (colo_compare_packet_payload(ppkt, spkt,
188
+ ppkt->header_size, spkt->header_size,
189
+ ppkt->payload_size)) {
190
+ *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
191
+ return true;
192
+ }
193
+ }
194
+ if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
195
+ if (colo_compare_packet_payload(ppkt, spkt,
196
+ ppkt->header_size, spkt->header_size,
197
+ ppkt->payload_size)) {
198
+ *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
199
+ return true;
200
+ }
201
+ }
202
+
203
+ /* one part of secondary packet payload still need to be compared */
204
+ if (!after(ppkt->seq_end, spkt->seq_end)) {
205
+ if (colo_compare_packet_payload(ppkt, spkt,
206
+ ppkt->header_size + ppkt->offset,
207
+ spkt->header_size + spkt->offset,
208
+ ppkt->payload_size - ppkt->offset)) {
209
+ if (!after(ppkt->tcp_ack, max_ack)) {
210
+ *mark = COLO_COMPARE_FREE_PRIMARY;
211
+ spkt->offset += ppkt->payload_size - ppkt->offset;
212
+ return true;
213
+ } else {
214
+ /* secondary guest hasn't ack the data, don't send
215
+ * out this packet
216
+ */
217
+ return false;
218
+ }
219
+ }
220
+ } else {
221
+ /* primary packet is longer than secondary packet, compare
222
+ * the same part and mark the primary packet offset
223
+ */
224
+ if (colo_compare_packet_payload(ppkt, spkt,
225
+ ppkt->header_size + ppkt->offset,
226
+ spkt->header_size + spkt->offset,
227
+ spkt->payload_size - spkt->offset)) {
228
+ *mark = COLO_COMPARE_FREE_SECONDARY;
229
+ ppkt->offset += spkt->payload_size - spkt->offset;
230
+ return true;
231
+ }
232
+ }
233
234
- trace_colo_compare_main("compare tcp");
235
+ return false;
236
+}
237
238
- ptcp = (struct tcphdr *)ppkt->transport_header;
239
- stcp = (struct tcphdr *)spkt->transport_header;
240
+static void colo_compare_tcp(CompareState *s, Connection *conn)
241
+{
242
+ Packet *ppkt = NULL, *spkt = NULL;
243
+ int8_t mark;
244
245
/*
246
- * The 'identification' field in the IP header is *very* random
247
- * it almost never matches. Fudge this by ignoring differences in
248
- * unfragmented packets; they'll normally sort themselves out if different
249
- * anyway, and it should recover at the TCP level.
250
- * An alternative would be to get both the primary and secondary to rewrite
251
- * somehow; but that would need some sync traffic to sync the state
252
- */
253
- if (ntohs(ppkt->ip->ip_off) & IP_DF) {
254
- spkt->ip->ip_id = ppkt->ip->ip_id;
255
- /* and the sum will be different if the IDs were different */
256
- spkt->ip->ip_sum = ppkt->ip->ip_sum;
257
+ * If ppkt and spkt have the same payload, but ppkt's ACK
258
+ * is greater than spkt's ACK, in this case we can not
259
+ * send the ppkt because it will cause the secondary guest
260
+ * to miss sending some data in the next. Therefore, we
261
+ * record the maximum ACK in the current queue at both
262
+ * primary side and secondary side. Only when the ack is
263
+ * less than the smaller of the two maximum ack, then we
264
+ * can ensure that the packet's payload is acknowledged by
265
+ * primary and secondary.
266
+ */
267
+ uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack;
268
+
269
+pri:
270
+ if (g_queue_is_empty(&conn->primary_list)) {
271
+ return;
272
}
273
+ ppkt = g_queue_pop_head(&conn->primary_list);
274
+sec:
275
+ if (g_queue_is_empty(&conn->secondary_list)) {
276
+ g_queue_push_head(&conn->primary_list, ppkt);
277
+ return;
278
+ }
279
+ spkt = g_queue_pop_head(&conn->secondary_list);
280
281
- /*
282
- * Check tcp header length for tcp option field.
283
- * th_off > 5 means this tcp packet have options field.
284
- * The tcp options maybe always different.
285
- * for example:
286
- * From RFC 7323.
287
- * TCP Timestamps option (TSopt):
288
- * Kind: 8
289
- *
290
- * Length: 10 bytes
291
- *
292
- * +-------+-------+---------------------+---------------------+
293
- * |Kind=8 | 10 | TS Value (TSval) |TS Echo Reply (TSecr)|
294
- * +-------+-------+---------------------+---------------------+
295
- * 1 1 4 4
296
- *
297
- * In this case the primary guest's timestamp always different with
298
- * the secondary guest's timestamp. COLO just focus on payload,
299
- * so we just need skip this field.
300
- */
301
+ if (ppkt->tcp_seq == ppkt->seq_end) {
302
+ colo_release_primary_pkt(s, ppkt);
303
+ ppkt = NULL;
304
+ }
305
306
- ptrdiff_t ptcp_offset, stcp_offset;
307
+ if (ppkt && conn->compare_seq && !after(ppkt->seq_end, conn->compare_seq)) {
308
+ trace_colo_compare_main("pri: this packet has compared");
309
+ colo_release_primary_pkt(s, ppkt);
310
+ ppkt = NULL;
311
+ }
312
313
- ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
314
- + (ptcp->th_off << 2) - ppkt->vnet_hdr_len;
315
- stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
316
- + (stcp->th_off << 2) - spkt->vnet_hdr_len;
317
- if (ppkt->size - ptcp_offset == spkt->size - stcp_offset) {
318
- res = colo_compare_packet_payload(ppkt, spkt,
319
- ptcp_offset, stcp_offset,
320
- ppkt->size - ptcp_offset);
321
+ if (spkt->tcp_seq == spkt->seq_end) {
322
+ packet_destroy(spkt, NULL);
323
+ if (!ppkt) {
324
+ goto pri;
325
+ } else {
326
+ goto sec;
327
+ }
328
} else {
329
- trace_colo_compare_main("TCP: payload size of packets are different");
330
- res = -1;
331
+ if (conn->compare_seq && !after(spkt->seq_end, conn->compare_seq)) {
332
+ trace_colo_compare_main("sec: this packet has compared");
333
+ packet_destroy(spkt, NULL);
334
+ if (!ppkt) {
335
+ goto pri;
336
+ } else {
337
+ goto sec;
338
+ }
339
+ }
340
+ if (!ppkt) {
341
+ g_queue_push_head(&conn->secondary_list, spkt);
342
+ goto pri;
343
+ }
344
}
345
346
- if (res != 0 &&
347
- trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
348
- char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
349
-
350
- strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
351
- strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
352
- strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
353
- strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
354
-
355
- trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
356
- pri_ip_dst, spkt->size,
357
- sec_ip_src, sec_ip_dst);
358
-
359
- trace_colo_compare_tcp_info("pri tcp packet",
360
- ntohl(ptcp->th_seq),
361
- ntohl(ptcp->th_ack),
362
- res, ptcp->th_flags,
363
- ppkt->size);
364
-
365
- trace_colo_compare_tcp_info("sec tcp packet",
366
- ntohl(stcp->th_seq),
367
- ntohl(stcp->th_ack),
368
- res, stcp->th_flags,
369
- spkt->size);
370
+ if (colo_mark_tcp_pkt(ppkt, spkt, &mark, min_ack)) {
371
+ trace_colo_compare_tcp_info("pri",
372
+ ppkt->tcp_seq, ppkt->tcp_ack,
373
+ ppkt->header_size, ppkt->payload_size,
374
+ ppkt->offset, ppkt->flags);
375
+
376
+ trace_colo_compare_tcp_info("sec",
377
+ spkt->tcp_seq, spkt->tcp_ack,
378
+ spkt->header_size, spkt->payload_size,
379
+ spkt->offset, spkt->flags);
380
+
381
+ if (mark == COLO_COMPARE_FREE_PRIMARY) {
382
+ conn->compare_seq = ppkt->seq_end;
383
+ colo_release_primary_pkt(s, ppkt);
384
+ g_queue_push_head(&conn->secondary_list, spkt);
385
+ goto pri;
386
+ }
387
+ if (mark == COLO_COMPARE_FREE_SECONDARY) {
388
+ conn->compare_seq = spkt->seq_end;
389
+ packet_destroy(spkt, NULL);
390
+ goto sec;
391
+ }
392
+ if (mark == (COLO_COMPARE_FREE_PRIMARY | COLO_COMPARE_FREE_SECONDARY)) {
393
+ conn->compare_seq = ppkt->seq_end;
394
+ colo_release_primary_pkt(s, ppkt);
395
+ packet_destroy(spkt, NULL);
396
+ goto pri;
397
+ }
398
+ } else {
399
+ g_queue_push_head(&conn->primary_list, ppkt);
400
+ g_queue_push_head(&conn->secondary_list, spkt);
401
402
qemu_hexdump((char *)ppkt->data, stderr,
403
"colo-compare ppkt", ppkt->size);
404
qemu_hexdump((char *)spkt->data, stderr,
405
"colo-compare spkt", spkt->size);
406
- }
407
408
- return res;
409
+ /*
410
+ * colo_compare_inconsistent_notify();
411
+ * TODO: notice to checkpoint();
412
+ */
413
+ }
414
}
415
416
+
417
/*
418
* Called from the compare thread on the primary
419
* for compare udp packet
420
@@ -XXX,XX +XXX,XX @@ static void colo_old_packet_check(void *opaque)
421
(GCompareFunc)colo_old_packet_check_one_conn);
422
}
423
424
-/*
425
- * Called from the compare thread on the primary
426
- * for compare packet with secondary list of the
427
- * specified connection when a new packet was
428
- * queued to it.
429
- */
430
-static void colo_compare_connection(void *opaque, void *user_data)
431
+static void colo_compare_packet(CompareState *s, Connection *conn,
432
+ int (*HandlePacket)(Packet *spkt,
433
+ Packet *ppkt))
434
{
435
- CompareState *s = user_data;
436
- Connection *conn = opaque;
437
Packet *pkt = NULL;
438
GList *result = NULL;
439
- int ret;
440
441
while (!g_queue_is_empty(&conn->primary_list) &&
442
!g_queue_is_empty(&conn->secondary_list)) {
443
pkt = g_queue_pop_head(&conn->primary_list);
444
- switch (conn->ip_proto) {
445
- case IPPROTO_TCP:
446
- result = g_queue_find_custom(&conn->secondary_list,
447
- pkt, (GCompareFunc)colo_packet_compare_tcp);
448
- break;
449
- case IPPROTO_UDP:
450
- result = g_queue_find_custom(&conn->secondary_list,
451
- pkt, (GCompareFunc)colo_packet_compare_udp);
452
- break;
453
- case IPPROTO_ICMP:
454
- result = g_queue_find_custom(&conn->secondary_list,
455
- pkt, (GCompareFunc)colo_packet_compare_icmp);
456
- break;
457
- default:
458
- result = g_queue_find_custom(&conn->secondary_list,
459
- pkt, (GCompareFunc)colo_packet_compare_other);
460
- break;
461
- }
462
+ result = g_queue_find_custom(&conn->secondary_list,
463
+ pkt, (GCompareFunc)HandlePacket);
464
19
465
if (result) {
20
if (result) {
466
- ret = compare_chr_send(s,
21
colo_release_primary_pkt(s, pkt);
467
- pkt->data,
22
+ packet_destroy(result->data, NULL);
468
- pkt->size,
469
- pkt->vnet_hdr_len);
470
- if (ret < 0) {
471
- error_report("colo_send_primary_packet failed");
472
- }
473
- trace_colo_compare_main("packet same and release packet");
474
+ colo_release_primary_pkt(s, pkt);
475
g_queue_remove(&conn->secondary_list, result->data);
23
g_queue_remove(&conn->secondary_list, result->data);
476
- packet_destroy(pkt, NULL);
477
} else {
24
} else {
478
/*
25
/*
479
* If one packet arrive late, the secondary_list or
480
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
481
}
482
}
483
484
+/*
485
+ * Called from the compare thread on the primary
486
+ * for compare packet with secondary list of the
487
+ * specified connection when a new packet was
488
+ * queued to it.
489
+ */
490
+static void colo_compare_connection(void *opaque, void *user_data)
491
+{
492
+ CompareState *s = user_data;
493
+ Connection *conn = opaque;
494
+
495
+ switch (conn->ip_proto) {
496
+ case IPPROTO_TCP:
497
+ colo_compare_tcp(s, conn);
498
+ break;
499
+ case IPPROTO_UDP:
500
+ colo_compare_packet(s, conn, colo_packet_compare_udp);
501
+ break;
502
+ case IPPROTO_ICMP:
503
+ colo_compare_packet(s, conn, colo_packet_compare_icmp);
504
+ break;
505
+ default:
506
+ colo_compare_packet(s, conn, colo_packet_compare_other);
507
+ break;
508
+ }
509
+}
510
+
511
static int compare_chr_send(CompareState *s,
512
const uint8_t *buf,
513
uint32_t size,
514
diff --git a/net/colo.c b/net/colo.c
515
index XXXXXXX..XXXXXXX 100644
516
--- a/net/colo.c
517
+++ b/net/colo.c
518
@@ -XXX,XX +XXX,XX @@ Connection *connection_new(ConnectionKey *key)
519
conn->processing = false;
520
conn->offset = 0;
521
conn->syn_flag = 0;
522
+ conn->pack = 0;
523
+ conn->sack = 0;
524
g_queue_init(&conn->primary_list);
525
g_queue_init(&conn->secondary_list);
526
527
@@ -XXX,XX +XXX,XX @@ Packet *packet_new(const void *data, int size, int vnet_hdr_len)
528
pkt->size = size;
529
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
530
pkt->vnet_hdr_len = vnet_hdr_len;
531
+ pkt->tcp_seq = 0;
532
+ pkt->tcp_ack = 0;
533
+ pkt->seq_end = 0;
534
+ pkt->header_size = 0;
535
+ pkt->payload_size = 0;
536
+ pkt->offset = 0;
537
+ pkt->flags = 0;
538
539
return pkt;
540
}
541
diff --git a/net/colo.h b/net/colo.h
542
index XXXXXXX..XXXXXXX 100644
543
--- a/net/colo.h
544
+++ b/net/colo.h
545
@@ -XXX,XX +XXX,XX @@ typedef struct Packet {
546
int64_t creation_ms;
547
/* Get vnet_hdr_len from filter */
548
uint32_t vnet_hdr_len;
549
+ uint32_t tcp_seq; /* sequence number */
550
+ uint32_t tcp_ack; /* acknowledgement number */
551
+ /* the sequence number of the last byte of the packet */
552
+ uint32_t seq_end;
553
+ uint8_t header_size; /* the header length */
554
+ uint16_t payload_size; /* the payload length */
555
+ /* record the payload offset(the length that has been compared) */
556
+ uint16_t offset;
557
+ uint8_t flags; /* Flags(aka Control bits) */
558
} Packet;
559
560
typedef struct ConnectionKey {
561
@@ -XXX,XX +XXX,XX @@ typedef struct Connection {
562
/* flag to enqueue unprocessed_connections */
563
bool processing;
564
uint8_t ip_proto;
565
+ /* record the sequence number that has been compared */
566
+ uint32_t compare_seq;
567
+ /* the maximum of acknowledgement number in primary_list queue */
568
+ uint32_t pack;
569
+ /* the maximum of acknowledgement number in secondary_list queue */
570
+ uint32_t sack;
571
/* offset = secondary_seq - primary_seq */
572
tcp_seq offset;
573
/*
574
diff --git a/net/trace-events b/net/trace-events
575
index XXXXXXX..XXXXXXX 100644
576
--- a/net/trace-events
577
+++ b/net/trace-events
578
@@ -XXX,XX +XXX,XX @@ colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d"
579
colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
580
colo_old_packet_check_found(int64_t old_time) "%" PRId64
581
colo_compare_miscompare(void) ""
582
-colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int res, uint32_t flag, int size) "side: %s seq/ack= %u/%u res= %d flags= 0x%x pkt_size: %d\n"
583
+colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int hdlen, int pdlen, int offset, int flags) "%s: seq/ack= %u/%u hdlen= %d pdlen= %d offset= %d flags=%d\n"
584
585
# net/filter-rewriter.c
586
colo_filter_rewriter_debug(void) ""
587
--
26
--
588
2.7.4
27
2.7.4
589
28
590
29
diff view generated by jsdifflib
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
1
From: Lukas Straub <lukasstraub2@web.de>
2
2
3
Modified the function colo_packet_compare_common to prepare for the
3
g_queue_remove needs to look up the list entry first, but we
4
tcp packet comparison in the next patch.
4
already have it as result and can remove it directly with
5
g_queue_delete_link.
5
6
6
Cc: Zhang Chen <zhangckid@gmail.com>
7
Signed-off-by: Lukas Straub <lukasstraub2@web.de>
7
Cc: Li Zhijian <lizhijian@cn.fujitsu.com>
8
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
8
Cc: Jason Wang <jasowang@redhat.com>
9
Reviewed-by: Zhang Chen <chen.zhang@intel.com>
9
10
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
11
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
12
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
13
Reviewed-by: Zhang Chen <zhangckid@gmail.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
11
---
16
net/colo-compare.c | 88 +++++++++++++++++++++++++++---------------------------
12
net/colo-compare.c | 2 +-
17
1 file changed, 44 insertions(+), 44 deletions(-)
13
1 file changed, 1 insertion(+), 1 deletion(-)
18
14
19
diff --git a/net/colo-compare.c b/net/colo-compare.c
15
diff --git a/net/colo-compare.c b/net/colo-compare.c
20
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
21
--- a/net/colo-compare.c
17
--- a/net/colo-compare.c
22
+++ b/net/colo-compare.c
18
+++ b/net/colo-compare.c
23
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
19
@@ -XXX,XX +XXX,XX @@ static void colo_compare_packet(CompareState *s, Connection *conn,
24
* return: 0 means packet same
20
if (result) {
25
* > 0 || < 0 means packet different
21
colo_release_primary_pkt(s, pkt);
26
*/
22
packet_destroy(result->data, NULL);
27
-static int colo_packet_compare_common(Packet *ppkt,
23
- g_queue_remove(&conn->secondary_list, result->data);
28
- Packet *spkt,
24
+ g_queue_delete_link(&conn->secondary_list, result);
29
- int poffset,
25
} else {
30
- int soffset)
26
/*
31
+static int colo_compare_packet_payload(Packet *ppkt,
27
* If one packet arrive late, the secondary_list or
32
+ Packet *spkt,
33
+ uint16_t poffset,
34
+ uint16_t soffset,
35
+ uint16_t len)
36
+
37
{
38
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
39
char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
40
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt,
41
sec_ip_src, sec_ip_dst);
42
}
43
44
- poffset = ppkt->vnet_hdr_len + poffset;
45
- soffset = ppkt->vnet_hdr_len + soffset;
46
-
47
- if (ppkt->size - poffset == spkt->size - soffset) {
48
- return memcmp(ppkt->data + poffset,
49
- spkt->data + soffset,
50
- spkt->size - soffset);
51
- } else {
52
- trace_colo_compare_main("Net packet size are not the same");
53
- return -1;
54
- }
55
+ return memcmp(ppkt->data + poffset, spkt->data + soffset, len);
56
}
57
58
/*
59
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
60
* the secondary guest's timestamp. COLO just focus on payload,
61
* so we just need skip this field.
62
*/
63
- if (ptcp->th_off > 5) {
64
- ptrdiff_t ptcp_offset, stcp_offset;
65
66
- ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
67
- + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
68
- stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
69
- + (stcp->th_off * 4) - spkt->vnet_hdr_len;
70
+ ptrdiff_t ptcp_offset, stcp_offset;
71
72
- /*
73
- * When network is busy, some tcp options(like sack) will unpredictable
74
- * occur in primary side or secondary side. it will make packet size
75
- * not same, but the two packet's payload is identical. colo just
76
- * care about packet payload, so we skip the option field.
77
- */
78
- res = colo_packet_compare_common(ppkt, spkt, ptcp_offset, stcp_offset);
79
- } else if (ptcp->th_sum == stcp->th_sum) {
80
- res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN, ETH_HLEN);
81
+ ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
82
+ + (ptcp->th_off << 2) - ppkt->vnet_hdr_len;
83
+ stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
84
+ + (stcp->th_off << 2) - spkt->vnet_hdr_len;
85
+ if (ppkt->size - ptcp_offset == spkt->size - stcp_offset) {
86
+ res = colo_compare_packet_payload(ppkt, spkt,
87
+ ptcp_offset, stcp_offset,
88
+ ppkt->size - ptcp_offset);
89
} else {
90
+ trace_colo_compare_main("TCP: payload size of packets are different");
91
res = -1;
92
}
93
94
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
95
*/
96
static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
97
{
98
- int ret;
99
- int network_header_length = ppkt->ip->ip_hl * 4;
100
+ uint16_t network_header_length = ppkt->ip->ip_hl << 2;
101
+ uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
102
103
trace_colo_compare_main("compare udp");
104
105
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
106
* other field like TOS,TTL,IP Checksum. we only need to compare
107
* the ip payload here.
108
*/
109
- ret = colo_packet_compare_common(ppkt, spkt,
110
- network_header_length + ETH_HLEN,
111
- network_header_length + ETH_HLEN);
112
-
113
- if (ret) {
114
+ if (ppkt->size != spkt->size) {
115
+ trace_colo_compare_main("UDP: payload size of packets are different");
116
+ return -1;
117
+ }
118
+ if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
119
+ ppkt->size - offset)) {
120
trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
121
trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
122
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
123
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
124
qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt",
125
spkt->size);
126
}
127
+ return -1;
128
+ } else {
129
+ return 0;
130
}
131
-
132
- return ret;
133
}
134
135
/*
136
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
137
*/
138
static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
139
{
140
- int network_header_length = ppkt->ip->ip_hl * 4;
141
+ uint16_t network_header_length = ppkt->ip->ip_hl << 2;
142
+ uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
143
144
trace_colo_compare_main("compare icmp");
145
146
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
147
* other field like TOS,TTL,IP Checksum. we only need to compare
148
* the ip payload here.
149
*/
150
- if (colo_packet_compare_common(ppkt, spkt,
151
- network_header_length + ETH_HLEN,
152
- network_header_length + ETH_HLEN)) {
153
+ if (ppkt->size != spkt->size) {
154
+ trace_colo_compare_main("ICMP: payload size of packets are different");
155
+ return -1;
156
+ }
157
+ if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
158
+ ppkt->size - offset)) {
159
trace_colo_compare_icmp_miscompare("primary pkt size",
160
ppkt->size);
161
trace_colo_compare_icmp_miscompare("Secondary pkt size",
162
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
163
*/
164
static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
165
{
166
+ uint16_t offset = ppkt->vnet_hdr_len;
167
+
168
trace_colo_compare_main("compare other");
169
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
170
char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
171
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
172
sec_ip_src, sec_ip_dst);
173
}
174
175
- return colo_packet_compare_common(ppkt, spkt, 0, 0);
176
+ if (ppkt->size != spkt->size) {
177
+ trace_colo_compare_main("Other: payload size of packets are different");
178
+ return -1;
179
+ }
180
+ return colo_compare_packet_payload(ppkt, spkt, offset, offset,
181
+ ppkt->size - offset);
182
}
183
184
static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
185
--
28
--
186
2.7.4
29
2.7.4
187
30
188
31
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
The in6_address comes after the ip6_ext_hdr_routing header,
4
not after the ip6_ext_hdr one. Fix the offset.
5
6
Cc: qemu-stable@nongnu.org
7
Reported-by: Stefano Garzarella <sgarzare@redhat.com>
8
Fixes: eb700029c78 ("net_pkt: Extend packet abstraction as required by e1000e functionality")
9
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
10
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
11
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
net/eth.c | 2 +-
15
1 file changed, 1 insertion(+), 1 deletion(-)
16
17
diff --git a/net/eth.c b/net/eth.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/net/eth.c
20
+++ b/net/eth.c
21
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
22
}
23
24
bytes_read = iov_to_buf(pkt, pkt_frags,
25
- rthdr_offset + sizeof(*ext_hdr),
26
+ rthdr_offset + sizeof(*rthdr),
27
dst_addr, sizeof(*dst_addr));
28
29
return bytes_read == sizeof(*dst_addr);
30
--
31
2.7.4
32
33
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
The length field is already contained in the ip6_ext_hdr structure.
4
Check it direcly in eth_parse_ipv6_hdr() before calling
5
_eth_get_rss_ex_dst_addr(), which gets a bit simplified.
6
7
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
8
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
9
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
net/eth.c | 14 +++++++-------
13
1 file changed, 7 insertions(+), 7 deletions(-)
14
15
diff --git a/net/eth.c b/net/eth.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/net/eth.c
18
+++ b/net/eth.c
19
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
20
{
21
struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
22
23
- if ((rthdr->rtype == 2) &&
24
- (rthdr->len == sizeof(struct in6_address) / 8) &&
25
- (rthdr->segleft == 1)) {
26
+ if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
27
28
size_t input_size = iov_size(pkt, pkt_frags);
29
size_t bytes_read;
30
@@ -XXX,XX +XXX,XX @@ bool eth_parse_ipv6_hdr(const struct iovec *pkt, int pkt_frags,
31
}
32
33
if (curr_ext_hdr_type == IP6_ROUTING) {
34
- info->rss_ex_dst_valid =
35
- _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
36
- ip6hdr_off + info->full_hdr_len,
37
- &ext_hdr, &info->rss_ex_dst);
38
+ if (ext_hdr.ip6r_len == sizeof(struct in6_address) / 8) {
39
+ info->rss_ex_dst_valid =
40
+ _eth_get_rss_ex_dst_addr(pkt, pkt_frags,
41
+ ip6hdr_off + info->full_hdr_len,
42
+ &ext_hdr, &info->rss_ex_dst);
43
+ }
44
} else if (curr_ext_hdr_type == IP6_DESTINATON) {
45
info->rss_ex_src_valid =
46
_eth_get_rss_ex_src_addr(pkt, pkt_frags,
47
--
48
2.7.4
49
50
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
The 'offset' argument represents the offset to the ip6_ext_hdr
4
header, rename it as 'ext_hdr_offset'.
5
6
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
7
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
8
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
net/eth.c | 6 +++---
12
1 file changed, 3 insertions(+), 3 deletions(-)
13
14
diff --git a/net/eth.c b/net/eth.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/net/eth.c
17
+++ b/net/eth.c
18
@@ -XXX,XX +XXX,XX @@ eth_is_ip6_extension_header_type(uint8_t hdr_type)
19
20
static bool
21
_eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
22
- size_t rthdr_offset,
23
+ size_t ext_hdr_offset,
24
struct ip6_ext_hdr *ext_hdr,
25
struct in6_address *dst_addr)
26
{
27
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
28
size_t input_size = iov_size(pkt, pkt_frags);
29
size_t bytes_read;
30
31
- if (input_size < rthdr_offset + sizeof(*ext_hdr)) {
32
+ if (input_size < ext_hdr_offset + sizeof(*ext_hdr)) {
33
return false;
34
}
35
36
bytes_read = iov_to_buf(pkt, pkt_frags,
37
- rthdr_offset + sizeof(*rthdr),
38
+ ext_hdr_offset + sizeof(*rthdr),
39
dst_addr, sizeof(*dst_addr));
40
41
return bytes_read == sizeof(*dst_addr);
42
--
43
2.7.4
44
45
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
4
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
net/eth.c | 14 ++++++--------
9
1 file changed, 6 insertions(+), 8 deletions(-)
10
11
diff --git a/net/eth.c b/net/eth.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/net/eth.c
14
+++ b/net/eth.c
15
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
16
struct in6_address *dst_addr)
17
{
18
struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
19
+ size_t input_size = iov_size(pkt, pkt_frags);
20
+ size_t bytes_read;
21
22
- if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
23
-
24
- size_t input_size = iov_size(pkt, pkt_frags);
25
- size_t bytes_read;
26
-
27
- if (input_size < ext_hdr_offset + sizeof(*ext_hdr)) {
28
- return false;
29
- }
30
+ if (input_size < ext_hdr_offset + sizeof(*ext_hdr)) {
31
+ return false;
32
+ }
33
34
+ if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
35
bytes_read = iov_to_buf(pkt, pkt_frags,
36
ext_hdr_offset + sizeof(*rthdr),
37
dst_addr, sizeof(*dst_addr));
38
--
39
2.7.4
40
41
diff view generated by jsdifflib
New patch
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
2
3
We want to check fields from ip6_ext_hdr_routing structure
4
and if correct read the full in6_address. Let's directly check
5
if our iovec contains enough data for everything, else return
6
early.
7
8
Suggested-by: Stefano Garzarella <sgarzare@redhat.com>
9
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
net/eth.c | 2 +-
14
1 file changed, 1 insertion(+), 1 deletion(-)
15
16
diff --git a/net/eth.c b/net/eth.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/net/eth.c
19
+++ b/net/eth.c
20
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
21
size_t input_size = iov_size(pkt, pkt_frags);
22
size_t bytes_read;
23
24
- if (input_size < ext_hdr_offset + sizeof(*ext_hdr)) {
25
+ if (input_size < ext_hdr_offset + sizeof(*rthdr) + sizeof(*dst_addr)) {
26
return false;
27
}
28
29
--
30
2.7.4
31
32
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
gently asked by his automatic reply :)
3
We can't know the caller read enough data in the memory pointed
4
4
by ext_hdr to cast it as a ip6_ext_hdr_routing.
5
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
5
Declare rt_hdr on the stack and fill it again from the iovec.
6
7
Since we already checked there is enough data in the iovec buffer,
8
simply add an assert() call to consume the bytes_read variable.
9
10
This fix a 2 bytes buffer overrun in eth_parse_ipv6_hdr() reported
11
by QEMU fuzzer:
12
13
$ cat << EOF | ./qemu-system-i386 -M pc-q35-5.0 \
14
-accel qtest -monitor none \
15
-serial none -nographic -qtest stdio
16
outl 0xcf8 0x80001010
17
outl 0xcfc 0xe1020000
18
outl 0xcf8 0x80001004
19
outw 0xcfc 0x7
20
write 0x25 0x1 0x86
21
write 0x26 0x1 0xdd
22
write 0x4f 0x1 0x2b
23
write 0xe1020030 0x4 0x190002e1
24
write 0xe102003a 0x2 0x0807
25
write 0xe1020048 0x4 0x12077cdd
26
write 0xe1020400 0x4 0xba077cdd
27
write 0xe1020420 0x4 0x190002e1
28
write 0xe1020428 0x4 0x3509d807
29
write 0xe1020438 0x1 0xe2
30
EOF
31
=================================================================
32
==2859770==ERROR: AddressSanitizer: stack-buffer-overflow on address 0x7ffdef904902 at pc 0x561ceefa78de bp 0x7ffdef904820 sp 0x7ffdef904818
33
READ of size 1 at 0x7ffdef904902 thread T0
34
#0 0x561ceefa78dd in _eth_get_rss_ex_dst_addr net/eth.c:410:17
35
#1 0x561ceefa41fb in eth_parse_ipv6_hdr net/eth.c:532:17
36
#2 0x561cef7de639 in net_tx_pkt_parse_headers hw/net/net_tx_pkt.c:228:14
37
#3 0x561cef7dbef4 in net_tx_pkt_parse hw/net/net_tx_pkt.c:273:9
38
#4 0x561ceec29f22 in e1000e_process_tx_desc hw/net/e1000e_core.c:730:29
39
#5 0x561ceec28eac in e1000e_start_xmit hw/net/e1000e_core.c:927:9
40
#6 0x561ceec1baab in e1000e_set_tdt hw/net/e1000e_core.c:2444:9
41
#7 0x561ceebf300e in e1000e_core_write hw/net/e1000e_core.c:3256:9
42
#8 0x561cef3cd4cd in e1000e_mmio_write hw/net/e1000e.c:110:5
43
44
Address 0x7ffdef904902 is located in stack of thread T0 at offset 34 in frame
45
#0 0x561ceefa320f in eth_parse_ipv6_hdr net/eth.c:486
46
47
This frame has 1 object(s):
48
[32, 34) 'ext_hdr' (line 487) <== Memory access at offset 34 overflows this variable
49
HINT: this may be a false positive if your program uses some custom stack unwind mechanism, swapcontext or vfork
50
(longjmp and C++ exceptions *are* supported)
51
SUMMARY: AddressSanitizer: stack-buffer-overflow net/eth.c:410:17 in _eth_get_rss_ex_dst_addr
52
Shadow bytes around the buggy address:
53
0x10003df188d0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
54
0x10003df188e0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
55
0x10003df188f0: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
56
0x10003df18900: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
57
0x10003df18910: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1
58
=>0x10003df18920:[02]f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
59
0x10003df18930: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
60
0x10003df18940: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
61
0x10003df18950: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
62
0x10003df18960: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
63
0x10003df18970: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
64
Shadow byte legend (one shadow byte represents 8 application bytes):
65
Addressable: 00
66
Partially addressable: 01 02 03 04 05 06 07
67
Stack left redzone: f1
68
Stack right redzone: f3
69
==2859770==ABORTING
70
71
Add the corresponding qtest case with the fuzzer reproducer.
72
73
FWIW GCC 11 similarly reported:
74
75
net/eth.c: In function 'eth_parse_ipv6_hdr':
76
net/eth.c:410:15: error: array subscript 'struct ip6_ext_hdr_routing[0]' is partly outside array bounds of 'struct ip6_ext_hdr[1]' [-Werror=array-bounds]
77
410 | if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
78
| ~~~~~^~~~~~~
79
net/eth.c:485:24: note: while referencing 'ext_hdr'
80
485 | struct ip6_ext_hdr ext_hdr;
81
| ^~~~~~~
82
net/eth.c:410:38: error: array subscript 'struct ip6_ext_hdr_routing[0]' is partly outside array bounds of 'struct ip6_ext_hdr[1]' [-Werror=array-bounds]
83
410 | if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
84
| ~~~~~^~~~~~~~~
85
net/eth.c:485:24: note: while referencing 'ext_hdr'
86
485 | struct ip6_ext_hdr ext_hdr;
87
| ^~~~~~~
88
89
Cc: qemu-stable@nongnu.org
90
Buglink: https://bugs.launchpad.net/qemu/+bug/1879531
91
Reported-by: Alexander Bulekov <alxndr@bu.edu>
92
Reported-by: Miroslav Rezanina <mrezanin@redhat.com>
93
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
94
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
95
Fixes: eb700029c78 ("net_pkt: Extend packet abstraction as required by e1000e functionality")
96
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
97
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
98
---
8
MAINTAINERS | 8 ++++----
99
MAINTAINERS | 1 +
9
1 file changed, 4 insertions(+), 4 deletions(-)
100
net/eth.c | 13 +++++++----
101
tests/qtest/fuzz-e1000e-test.c | 53 ++++++++++++++++++++++++++++++++++++++++++
102
tests/qtest/meson.build | 1 +
103
4 files changed, 63 insertions(+), 5 deletions(-)
104
create mode 100644 tests/qtest/fuzz-e1000e-test.c
10
105
11
diff --git a/MAINTAINERS b/MAINTAINERS
106
diff --git a/MAINTAINERS b/MAINTAINERS
12
index XXXXXXX..XXXXXXX 100644
107
index XXXXXXX..XXXXXXX 100644
13
--- a/MAINTAINERS
108
--- a/MAINTAINERS
14
+++ b/MAINTAINERS
109
+++ b/MAINTAINERS
15
@@ -XXX,XX +XXX,XX @@ F: hw/scsi/mfi.h
110
@@ -XXX,XX +XXX,XX @@ e1000e
16
F: tests/megasas-test.c
111
M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
17
18
Network packet abstractions
19
-M: Dmitry Fleytman <dmitry@daynix.com>
20
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
21
S: Maintained
22
F: include/net/eth.h
23
F: net/eth.c
24
@@ -XXX,XX +XXX,XX @@ F: hw/net/net_rx_pkt*
25
F: hw/net/net_tx_pkt*
26
27
Vmware
28
-M: Dmitry Fleytman <dmitry@daynix.com>
29
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
30
S: Maintained
31
F: hw/net/vmxnet*
32
F: hw/scsi/vmw_pvscsi*
33
@@ -XXX,XX +XXX,XX @@ F: hw/mem/nvdimm.c
34
F: include/hw/mem/nvdimm.h
35
36
e1000x
37
-M: Dmitry Fleytman <dmitry@daynix.com>
38
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
39
S: Maintained
40
F: hw/net/e1000x*
41
42
e1000e
43
-M: Dmitry Fleytman <dmitry@daynix.com>
44
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
45
S: Maintained
112
S: Maintained
46
F: hw/net/e1000e*
113
F: hw/net/e1000e*
47
114
+F: tests/qtest/fuzz-e1000e-test.c
115
116
eepro100
117
M: Stefan Weil <sw@weilnetz.de>
118
diff --git a/net/eth.c b/net/eth.c
119
index XXXXXXX..XXXXXXX 100644
120
--- a/net/eth.c
121
+++ b/net/eth.c
122
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
123
struct ip6_ext_hdr *ext_hdr,
124
struct in6_address *dst_addr)
125
{
126
- struct ip6_ext_hdr_routing *rthdr = (struct ip6_ext_hdr_routing *) ext_hdr;
127
+ struct ip6_ext_hdr_routing rt_hdr;
128
size_t input_size = iov_size(pkt, pkt_frags);
129
size_t bytes_read;
130
131
- if (input_size < ext_hdr_offset + sizeof(*rthdr) + sizeof(*dst_addr)) {
132
+ if (input_size < ext_hdr_offset + sizeof(rt_hdr) + sizeof(*dst_addr)) {
133
return false;
134
}
135
136
- if ((rthdr->rtype == 2) && (rthdr->segleft == 1)) {
137
- bytes_read = iov_to_buf(pkt, pkt_frags,
138
- ext_hdr_offset + sizeof(*rthdr),
139
+ bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
140
+ &rt_hdr, sizeof(rt_hdr));
141
+ assert(bytes_read == sizeof(rt_hdr));
142
+
143
+ if ((rt_hdr.rtype == 2) && (rt_hdr.segleft == 1)) {
144
+ bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
145
dst_addr, sizeof(*dst_addr));
146
147
return bytes_read == sizeof(*dst_addr);
148
diff --git a/tests/qtest/fuzz-e1000e-test.c b/tests/qtest/fuzz-e1000e-test.c
149
new file mode 100644
150
index XXXXXXX..XXXXXXX
151
--- /dev/null
152
+++ b/tests/qtest/fuzz-e1000e-test.c
153
@@ -XXX,XX +XXX,XX @@
154
+/*
155
+ * QTest testcase for e1000e device generated by fuzzer
156
+ *
157
+ * Copyright (c) 2021 Red Hat, Inc.
158
+ *
159
+ * SPDX-License-Identifier: GPL-2.0-or-later
160
+ */
161
+
162
+#include "qemu/osdep.h"
163
+
164
+#include "libqos/libqtest.h"
165
+
166
+/*
167
+ * https://bugs.launchpad.net/qemu/+bug/1879531
168
+ */
169
+static void test_lp1879531_eth_get_rss_ex_dst_addr(void)
170
+{
171
+ QTestState *s;
172
+
173
+ s = qtest_init("-nographic -monitor none -serial none -M pc-q35-5.0");
174
+
175
+ qtest_outl(s, 0xcf8, 0x80001010);
176
+ qtest_outl(s, 0xcfc, 0xe1020000);
177
+ qtest_outl(s, 0xcf8, 0x80001004);
178
+ qtest_outw(s, 0xcfc, 0x7);
179
+ qtest_writeb(s, 0x25, 0x86);
180
+ qtest_writeb(s, 0x26, 0xdd);
181
+ qtest_writeb(s, 0x4f, 0x2b);
182
+
183
+ qtest_writel(s, 0xe1020030, 0x190002e1);
184
+ qtest_writew(s, 0xe102003a, 0x0807);
185
+ qtest_writel(s, 0xe1020048, 0x12077cdd);
186
+ qtest_writel(s, 0xe1020400, 0xba077cdd);
187
+ qtest_writel(s, 0xe1020420, 0x190002e1);
188
+ qtest_writel(s, 0xe1020428, 0x3509d807);
189
+ qtest_writeb(s, 0xe1020438, 0xe2);
190
+ qtest_writeb(s, 0x4f, 0x2b);
191
+ qtest_quit(s);
192
+}
193
+
194
+int main(int argc, char **argv)
195
+{
196
+ const char *arch = qtest_get_arch();
197
+
198
+ g_test_init(&argc, &argv, NULL);
199
+
200
+ if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) {
201
+ qtest_add_func("fuzz/test_lp1879531_eth_get_rss_ex_dst_addr",
202
+ test_lp1879531_eth_get_rss_ex_dst_addr);
203
+ }
204
+
205
+ return g_test_run();
206
+}
207
diff --git a/tests/qtest/meson.build b/tests/qtest/meson.build
208
index XXXXXXX..XXXXXXX 100644
209
--- a/tests/qtest/meson.build
210
+++ b/tests/qtest/meson.build
211
@@ -XXX,XX +XXX,XX @@ qtests_i386 = \
212
(config_all_devices.has_key('CONFIG_TPM_TIS_ISA') ? ['tpm-tis-test'] : []) + \
213
(config_all_devices.has_key('CONFIG_TPM_TIS_ISA') ? ['tpm-tis-swtpm-test'] : []) + \
214
(config_all_devices.has_key('CONFIG_RTL8139_PCI') ? ['rtl8139-test'] : []) + \
215
+ (config_all_devices.has_key('CONFIG_E1000E_PCI_EXPRESS') ? ['fuzz-e1000e-test'] : []) + \
216
qtests_pci + \
217
['fdc-test',
218
'ide-test',
48
--
219
--
49
2.7.4
220
2.7.4
50
221
51
222
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
The vlan concept is marked as deprecated, so we should not use
3
To simplify the function body, invert the if() statement, returning
4
this for examples in the documentation anymore.
4
earlier.
5
Since we already checked there is enough data in the iovec buffer,
6
simply add an assert() call to consume the bytes_read variable.
5
7
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
8
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
9
Reviewed-by: Miroslav Rezanina <mrezanin@redhat.com>
10
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
12
---
9
qemu-options.hx | 4 ++--
13
net/eth.c | 13 ++++++-------
10
1 file changed, 2 insertions(+), 2 deletions(-)
14
1 file changed, 6 insertions(+), 7 deletions(-)
11
15
12
diff --git a/qemu-options.hx b/qemu-options.hx
16
diff --git a/net/eth.c b/net/eth.c
13
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
14
--- a/qemu-options.hx
18
--- a/net/eth.c
15
+++ b/qemu-options.hx
19
+++ b/net/eth.c
16
@@ -XXX,XX +XXX,XX @@ qemu-system-i386 linux.img -net nic -net tap
20
@@ -XXX,XX +XXX,XX @@ _eth_get_rss_ex_dst_addr(const struct iovec *pkt, int pkt_frags,
17
#launch a QEMU instance with two NICs, each one connected
21
bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset,
18
#to a TAP device
22
&rt_hdr, sizeof(rt_hdr));
19
qemu-system-i386 linux.img \
23
assert(bytes_read == sizeof(rt_hdr));
20
- -net nic,vlan=0 -net tap,vlan=0,ifname=tap0 \
24
-
21
- -net nic,vlan=1 -net tap,vlan=1,ifname=tap1
25
- if ((rt_hdr.rtype == 2) && (rt_hdr.segleft == 1)) {
22
+ -netdev tap,id=nd0,ifname=tap0 -device e1000,netdev=nd0 \
26
- bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
23
+ -netdev tap,id=nd1,ifname=tap1 -device rtl8139,netdev=nd1
27
- dst_addr, sizeof(*dst_addr));
24
@end example
28
-
25
29
- return bytes_read == sizeof(*dst_addr);
26
@example
30
+ if ((rt_hdr.rtype != 2) || (rt_hdr.segleft != 1)) {
31
+ return false;
32
}
33
+ bytes_read = iov_to_buf(pkt, pkt_frags, ext_hdr_offset + sizeof(rt_hdr),
34
+ dst_addr, sizeof(*dst_addr));
35
+ assert(bytes_read == sizeof(*dst_addr));
36
37
- return false;
38
+ return true;
39
}
40
41
static bool
27
--
42
--
28
2.7.4
43
2.7.4
29
44
30
45
diff view generated by jsdifflib