1
The following changes since commit e607bbee553cfe73072870cef458cfa4e78133e2:
1
The following changes since commit 48033ad678ae2def43bf0d543a2c4c3d2a93feaf:
2
2
3
Merge remote-tracking branch 'remotes/edgar/tags/edgar/xilinx-next-2018-01-26.for-upstream' into staging (2018-01-26 14:24:25 +0000)
3
Merge remote-tracking branch 'remotes/vsementsov/tags/pull-nbd-2022-02-09-v2' into staging (2022-02-12 22:04:07 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to bf4835a4d5338bb7424827715df22570a8adc67c:
9
for you to fetch changes up to 9d6267b240c114d1a3cd314a08fd6e1339d34b83:
10
10
11
MAINTAINERS: update Dmitry Fleytman email (2018-01-29 16:05:38 +0800)
11
net/eth: Don't consider ESP to be an IPv6 option header (2022-02-14 11:50:44 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
----------------------------------------------------------------
15
----------------------------------------------------------------
16
Mao Zhongyi (2):
16
Nick Hudson (1):
17
colo: modified the payload compare function
17
hw/net: e1000e: Clear ICR on read when using non MSI-X interrupts
18
colo: compare the packet based on the tcp sequence number
18
19
Peter Foley (2):
20
net/tap: Set return code on failure
21
net: Fix uninitialized data usage
19
22
20
Philippe Mathieu-Daudé (1):
23
Philippe Mathieu-Daudé (1):
21
MAINTAINERS: update Dmitry Fleytman email
24
hw/net/vmxnet3: Log guest-triggerable errors using LOG_GUEST_ERROR
22
25
23
Thomas Huth (3):
26
Rao Lei (1):
24
net: Allow hubports to connect to other netdevs
27
net/filter: Optimize filter_send to coroutine
25
net: Allow netdevs to be used with 'hostfwd_add' and 'hostfwd_remove'
26
qemu-doc: Get rid of "vlan=X" example in the documentation
27
28
28
MAINTAINERS | 8 +-
29
Thomas Jansen (1):
29
hmp-commands.hx | 4 +-
30
net/eth: Don't consider ESP to be an IPv6 option header
30
net/colo-compare.c | 411 +++++++++++++++++++++++++++++++++--------------------
31
31
net/colo.c | 9 ++
32
Zhang Chen (2):
32
net/colo.h | 15 ++
33
net/colo-compare.c: Optimize compare order for performance
33
net/hub.c | 27 +++-
34
net/colo-compare.c: Update the default value comments
34
net/hub.h | 3 +-
35
35
net/net.c | 2 +-
36
hw/net/e1000e_core.c | 5 ++++
36
net/slirp.c | 33 +++--
37
hw/net/trace-events | 1 +
37
net/trace-events | 2 +-
38
hw/net/vmxnet3.c | 4 +++-
38
qapi/net.json | 4 +-
39
net/colo-compare.c | 28 +++++++++++-----------
39
qemu-options.hx | 12 +-
40
net/eth.c | 1 -
40
12 files changed, 347 insertions(+), 183 deletions(-)
41
net/filter-mirror.c | 66 +++++++++++++++++++++++++++++++++++++++++-----------
42
net/tap-linux.c | 1 +
43
net/tap.c | 1 +
44
8 files changed, 78 insertions(+), 29 deletions(-)
41
45
42
46
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
2
2
3
gently asked by his automatic reply :)
3
The "Interrupt Cause" register (VMXNET3_REG_ICR) is read-only.
4
Write accesses are ignored. Log them with as LOG_GUEST_ERROR
5
instead of aborting:
4
6
5
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
[R +0.239743] writeq 0xe0002031 0x46291a5a55460800
8
ERROR:hw/net/vmxnet3.c:1819:vmxnet3_io_bar1_write: code should not be reached
9
Thread 1 "qemu-system-i38" received signal SIGABRT, Aborted.
10
(gdb) bt
11
#3 0x74c397d3 in __GI_abort () at abort.c:79
12
#4 0x76d3cd4c in g_assertion_message (domain=<optimized out>, file=<optimized out>, line=<optimized out>, func=<optimized out>, message=<optimized out>) at ../glib/gtestutils.c:3223
13
#5 0x76d9d45f in g_assertion_message_expr
14
(domain=0x0, file=0x59fc2e53 "hw/net/vmxnet3.c", line=1819, func=0x59fc11e0 <__func__.vmxnet3_io_bar1_write> "vmxnet3_io_bar1_write", expr=<optimized out>)
15
at ../glib/gtestutils.c:3249
16
#6 0x57e80a3a in vmxnet3_io_bar1_write (opaque=0x62814100, addr=56, val=70, size=4) at hw/net/vmxnet3.c:1819
17
#7 0x58c2d894 in memory_region_write_accessor (mr=0x62816b90, addr=56, value=0x7fff9450, size=4, shift=0, mask=4294967295, attrs=...) at softmmu/memory.c:492
18
#8 0x58c2d1d2 in access_with_adjusted_size (addr=56, value=0x7fff9450, size=1, access_size_min=4, access_size_max=4, access_fn=
19
0x58c2d290 <memory_region_write_accessor>, mr=0x62816b90, attrs=...) at softmmu/memory.c:554
20
#9 0x58c2bae7 in memory_region_dispatch_write (mr=0x62816b90, addr=56, data=70, op=MO_8, attrs=...) at softmmu/memory.c:1504
21
#10 0x58bfd034 in flatview_write_continue (fv=0x606000181700, addr=0xe0002038, attrs=..., ptr=0x7fffb9e0, len=1, addr1=56, l=1, mr=0x62816b90)
22
at softmmu/physmem.c:2782
23
#11 0x58beba00 in flatview_write (fv=0x606000181700, addr=0xe0002031, attrs=..., buf=0x7fffb9e0, len=8) at softmmu/physmem.c:2822
24
#12 0x58beb589 in address_space_write (as=0x608000015f20, addr=0xe0002031, attrs=..., buf=0x7fffb9e0, len=8) at softmmu/physmem.c:2914
25
26
Reported-by: Dike <dike199774@qq.com>
27
Reported-by: Duhao <504224090@qq.com>
28
BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=2032932
29
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
30
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
31
---
8
MAINTAINERS | 8 ++++----
32
hw/net/vmxnet3.c | 4 +++-
9
1 file changed, 4 insertions(+), 4 deletions(-)
33
1 file changed, 3 insertions(+), 1 deletion(-)
10
34
11
diff --git a/MAINTAINERS b/MAINTAINERS
35
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
12
index XXXXXXX..XXXXXXX 100644
36
index XXXXXXX..XXXXXXX 100644
13
--- a/MAINTAINERS
37
--- a/hw/net/vmxnet3.c
14
+++ b/MAINTAINERS
38
+++ b/hw/net/vmxnet3.c
15
@@ -XXX,XX +XXX,XX @@ F: hw/scsi/mfi.h
39
@@ -XXX,XX +XXX,XX @@ vmxnet3_io_bar1_write(void *opaque,
16
F: tests/megasas-test.c
40
case VMXNET3_REG_ICR:
17
41
VMW_CBPRN("Write BAR1 [VMXNET3_REG_ICR] = %" PRIx64 ", size %d",
18
Network packet abstractions
42
val, size);
19
-M: Dmitry Fleytman <dmitry@daynix.com>
43
- g_assert_not_reached();
20
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
44
+ qemu_log_mask(LOG_GUEST_ERROR,
21
S: Maintained
45
+ "%s: write to read-only register VMXNET3_REG_ICR\n",
22
F: include/net/eth.h
46
+ TYPE_VMXNET3);
23
F: net/eth.c
47
break;
24
@@ -XXX,XX +XXX,XX @@ F: hw/net/net_rx_pkt*
48
25
F: hw/net/net_tx_pkt*
49
/* Event Cause Register */
26
27
Vmware
28
-M: Dmitry Fleytman <dmitry@daynix.com>
29
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
30
S: Maintained
31
F: hw/net/vmxnet*
32
F: hw/scsi/vmw_pvscsi*
33
@@ -XXX,XX +XXX,XX @@ F: hw/mem/nvdimm.c
34
F: include/hw/mem/nvdimm.h
35
36
e1000x
37
-M: Dmitry Fleytman <dmitry@daynix.com>
38
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
39
S: Maintained
40
F: hw/net/e1000x*
41
42
e1000e
43
-M: Dmitry Fleytman <dmitry@daynix.com>
44
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
45
S: Maintained
46
F: hw/net/e1000e*
47
48
--
50
--
49
2.7.4
51
2.7.4
50
52
51
53
diff view generated by jsdifflib
New patch
1
From: Peter Foley <pefoley@google.com>
1
2
3
Match the other error handling in this function.
4
5
Fixes: e7b347d0bf6 ("net: detect errors from probing vnet hdr flag for TAP devices")
6
7
Reviewed-by: Patrick Venture <venture@google.com>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Peter Foley <pefoley@google.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
net/tap.c | 1 +
13
1 file changed, 1 insertion(+)
14
15
diff --git a/net/tap.c b/net/tap.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/net/tap.c
18
+++ b/net/tap.c
19
@@ -XXX,XX +XXX,XX @@ int net_init_tap(const Netdev *netdev, const char *name,
20
if (i == 0) {
21
vnet_hdr = tap_probe_vnet_hdr(fd, errp);
22
if (vnet_hdr < 0) {
23
+ ret = -1;
24
goto free_fail;
25
}
26
} else if (vnet_hdr != tap_probe_vnet_hdr(fd, NULL)) {
27
--
28
2.7.4
29
30
diff view generated by jsdifflib
New patch
1
From: Peter Foley <pefoley@google.com>
1
2
3
e.g.
4
1109 15:16:20.151506 Uninitialized bytes in ioctl_common_pre at offset 0 inside [0x7ffc516af9b8, 4)
5
1109 15:16:20.151659 ==588974==WARNING: MemorySanitizer: use-of-uninitialized-value
6
1109 15:16:20.312923 #0 0x5639b88acb21 in tap_probe_vnet_hdr_len third_party/qemu/net/tap-linux.c:183:9
7
1109 15:16:20.312952 #1 0x5639b88afd66 in net_tap_fd_init third_party/qemu/net/tap.c:409:9
8
1109 15:16:20.312954 #2 0x5639b88b2d1b in net_init_tap_one third_party/qemu/net/tap.c:681:19
9
1109 15:16:20.312956 #3 0x5639b88b16a8 in net_init_tap third_party/qemu/net/tap.c:912:13
10
1109 15:16:20.312957 #4 0x5639b8890175 in net_client_init1 third_party/qemu/net/net.c:1110:9
11
1109 15:16:20.312958 #5 0x5639b888f912 in net_client_init third_party/qemu/net/net.c:1208:15
12
1109 15:16:20.312960 #6 0x5639b8894aa5 in net_param_nic third_party/qemu/net/net.c:1588:11
13
1109 15:16:20.312961 #7 0x5639b900cd18 in qemu_opts_foreach third_party/qemu/util/qemu-option.c:1135:14
14
1109 15:16:20.312962 #8 0x5639b889393c in net_init_clients third_party/qemu/net/net.c:1612:9
15
1109 15:16:20.312964 #9 0x5639b717aaf3 in qemu_create_late_backends third_party/qemu/softmmu/vl.c:1962:5
16
1109 15:16:20.312965 #10 0x5639b717aaf3 in qemu_init third_party/qemu/softmmu/vl.c:3694:5
17
1109 15:16:20.312967 #11 0x5639b71083b8 in main third_party/qemu/softmmu/main.c:49:5
18
1109 15:16:20.312968 #12 0x7f464de1d8d2 in __libc_start_main (/usr/grte/v5/lib64/libc.so.6+0x628d2)
19
1109 15:16:20.312969 #13 0x5639b6bbd389 in _start /usr/grte/v5/debug-src/src/csu/../sysdeps/x86_64/start.S:120
20
1109 15:16:20.312970
21
1109 15:16:20.312975 Uninitialized value was stored to memory at
22
1109 15:16:20.313393 #0 0x5639b88acbee in tap_probe_vnet_hdr_len third_party/qemu/net/tap-linux.c
23
1109 15:16:20.313396 #1 0x5639b88afd66 in net_tap_fd_init third_party/qemu/net/tap.c:409:9
24
1109 15:16:20.313398 #2 0x5639b88b2d1b in net_init_tap_one third_party/qemu/net/tap.c:681:19
25
1109 15:16:20.313399 #3 0x5639b88b16a8 in net_init_tap third_party/qemu/net/tap.c:912:13
26
1109 15:16:20.313400 #4 0x5639b8890175 in net_client_init1 third_party/qemu/net/net.c:1110:9
27
1109 15:16:20.313401 #5 0x5639b888f912 in net_client_init third_party/qemu/net/net.c:1208:15
28
1109 15:16:20.313403 #6 0x5639b8894aa5 in net_param_nic third_party/qemu/net/net.c:1588:11
29
1109 15:16:20.313404 #7 0x5639b900cd18 in qemu_opts_foreach third_party/qemu/util/qemu-option.c:1135:14
30
1109 15:16:20.313405 #8 0x5639b889393c in net_init_clients third_party/qemu/net/net.c:1612:9
31
1109 15:16:20.313407 #9 0x5639b717aaf3 in qemu_create_late_backends third_party/qemu/softmmu/vl.c:1962:5
32
1109 15:16:20.313408 #10 0x5639b717aaf3 in qemu_init third_party/qemu/softmmu/vl.c:3694:5
33
1109 15:16:20.313409 #11 0x5639b71083b8 in main third_party/qemu/softmmu/main.c:49:5
34
1109 15:16:20.313410 #12 0x7f464de1d8d2 in __libc_start_main (/usr/grte/v5/lib64/libc.so.6+0x628d2)
35
1109 15:16:20.313412 #13 0x5639b6bbd389 in _start /usr/grte/v5/debug-src/src/csu/../sysdeps/x86_64/start.S:120
36
1109 15:16:20.313413
37
1109 15:16:20.313417 Uninitialized value was stored to memory at
38
1109 15:16:20.313791 #0 0x5639b88affbd in net_tap_fd_init third_party/qemu/net/tap.c:400:26
39
1109 15:16:20.313826 #1 0x5639b88b2d1b in net_init_tap_one third_party/qemu/net/tap.c:681:19
40
1109 15:16:20.313829 #2 0x5639b88b16a8 in net_init_tap third_party/qemu/net/tap.c:912:13
41
1109 15:16:20.313831 #3 0x5639b8890175 in net_client_init1 third_party/qemu/net/net.c:1110:9
42
1109 15:16:20.313836 #4 0x5639b888f912 in net_client_init third_party/qemu/net/net.c:1208:15
43
1109 15:16:20.313838 #5 0x5639b8894aa5 in net_param_nic third_party/qemu/net/net.c:1588:11
44
1109 15:16:20.313839 #6 0x5639b900cd18 in qemu_opts_foreach third_party/qemu/util/qemu-option.c:1135:14
45
1109 15:16:20.313841 #7 0x5639b889393c in net_init_clients third_party/qemu/net/net.c:1612:9
46
1109 15:16:20.313843 #8 0x5639b717aaf3 in qemu_create_late_backends third_party/qemu/softmmu/vl.c:1962:5
47
1109 15:16:20.313844 #9 0x5639b717aaf3 in qemu_init third_party/qemu/softmmu/vl.c:3694:5
48
1109 15:16:20.313845 #10 0x5639b71083b8 in main third_party/qemu/softmmu/main.c:49:5
49
1109 15:16:20.313846 #11 0x7f464de1d8d2 in __libc_start_main (/usr/grte/v5/lib64/libc.so.6+0x628d2)
50
1109 15:16:20.313847 #12 0x5639b6bbd389 in _start /usr/grte/v5/debug-src/src/csu/../sysdeps/x86_64/start.S:120
51
1109 15:16:20.313849
52
1109 15:16:20.313851 Uninitialized value was created by an allocation of 'ifr' in the stack frame of function 'tap_probe_vnet_hdr'
53
1109 15:16:20.313855 #0 0x5639b88ac680 in tap_probe_vnet_hdr third_party/qemu/net/tap-linux.c:151
54
1109 15:16:20.313856
55
1109 15:16:20.313878 SUMMARY: MemorySanitizer: use-of-uninitialized-value third_party/qemu/net/tap-linux.c:183:9 in tap_probe_vnet_hdr_len
56
57
Fixes: dc69004c7d8 ("net: move tap_probe_vnet_hdr() to tap-linux.c")
58
Reviewed-by: Hao Wu <wuhaotsh@google.com>
59
Reviewed-by: Patrick Venture <venture@google.com>
60
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
61
Signed-off-by: Peter Foley <pefoley@google.com>
62
Signed-off-by: Jason Wang <jasowang@redhat.com>
63
---
64
net/tap-linux.c | 1 +
65
1 file changed, 1 insertion(+)
66
67
diff --git a/net/tap-linux.c b/net/tap-linux.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/net/tap-linux.c
70
+++ b/net/tap-linux.c
71
@@ -XXX,XX +XXX,XX @@ void tap_set_sndbuf(int fd, const NetdevTapOptions *tap, Error **errp)
72
int tap_probe_vnet_hdr(int fd, Error **errp)
73
{
74
struct ifreq ifr;
75
+ memset(&ifr, 0, sizeof(ifr));
76
77
if (ioctl(fd, TUNGETIFF, &ifr) != 0) {
78
/* TUNGETIFF is available since kernel v2.6.27 */
79
--
80
2.7.4
81
82
diff view generated by jsdifflib
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
1
From: Zhang Chen <chen.zhang@intel.com>
2
2
3
Packet size some time different or when network is busy.
3
COLO-compare use the glib function g_queue_find_custom to dump
4
Based on same payload size, but TCP protocol can not
4
another VM's networking packet to compare. But this function always
5
guarantee send the same one packet in the same way,
5
start find from the queue->head(here is the newest packet), It will
6
reduce the success rate of comparison. So this patch reversed
7
the order of the queues for performance.
6
8
7
like that:
9
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
8
We send this payload:
10
Reported-by: leirao <lei.rao@intel.com>
9
------------------------------
10
| header |1|2|3|4|5|6|7|8|9|0|
11
------------------------------
12
13
primary:
14
ppkt1:
15
----------------
16
| header |1|2|3|
17
----------------
18
ppkt2:
19
------------------------
20
| header |4|5|6|7|8|9|0|
21
------------------------
22
23
secondary:
24
spkt1:
25
------------------------------
26
| header |1|2|3|4|5|6|7|8|9|0|
27
------------------------------
28
29
In the original method, ppkt1 and ppkt2 are different in size and
30
spkt1, so they can't compare and trigger the checkpoint.
31
32
I have tested FTP get 200M and 1G file many times, I found that
33
the performance was less than 1% of the native.
34
35
Now I reconstructed the comparison of TCP packets based on the
36
TCP sequence number. first of all, ppkt1 and spkt1 have the same
37
starting sequence number, so they can compare, even though their
38
length is different. And then ppkt1 with a smaller payload length
39
is used as the comparison length, if the payload is same, send
40
out the ppkt1 and record the offset(the length of ppkt1 payload)
41
in spkt1. The next comparison, ppkt2 and spkt1 can be compared
42
from the recorded position of spkt1.
43
44
like that:
45
----------------
46
| header |1|2|3| ppkt1
47
---------|-----|
48
| |
49
---------v-----v--------------
50
| header |1|2|3|4|5|6|7|8|9|0| spkt1
51
---------------|\------------|
52
| \offset |
53
---------v-------------v
54
| header |4|5|6|7|8|9|0| ppkt2
55
------------------------
56
57
In this way, the performance can reach native 20% in my multiple
58
tests.
59
60
Cc: Zhang Chen <zhangckid@gmail.com>
61
Cc: Li Zhijian <lizhijian@cn.fujitsu.com>
62
Cc: Jason Wang <jasowang@redhat.com>
63
64
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
65
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
66
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
67
Reviewed-by: Zhang Chen <zhangckid@gmail.com>
68
Tested-by: Zhang Chen <zhangckid@gmail.com>
69
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
70
---
12
---
71
net/colo-compare.c | 343 +++++++++++++++++++++++++++++++++++------------------
13
net/colo-compare.c | 26 +++++++++++++-------------
72
net/colo.c | 9 ++
14
1 file changed, 13 insertions(+), 13 deletions(-)
73
net/colo.h | 15 +++
74
net/trace-events | 2 +-
75
4 files changed, 250 insertions(+), 119 deletions(-)
76
15
77
diff --git a/net/colo-compare.c b/net/colo-compare.c
16
diff --git a/net/colo-compare.c b/net/colo-compare.c
78
index XXXXXXX..XXXXXXX 100644
17
index XXXXXXX..XXXXXXX 100644
79
--- a/net/colo-compare.c
18
--- a/net/colo-compare.c
80
+++ b/net/colo-compare.c
19
+++ b/net/colo-compare.c
81
@@ -XXX,XX +XXX,XX @@
20
@@ -XXX,XX +XXX,XX @@ static void colo_compare_inconsistency_notify(CompareState *s)
82
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
21
/* Use restricted to colo_insert_packet() */
83
#define MAX_QUEUE_SIZE 1024
22
static gint seq_sorter(Packet *a, Packet *b, gpointer data)
84
23
{
85
+#define COLO_COMPARE_FREE_PRIMARY 0x01
24
- return a->tcp_seq - b->tcp_seq;
86
+#define COLO_COMPARE_FREE_SECONDARY 0x02
25
+ return b->tcp_seq - a->tcp_seq;
87
+
88
/* TODO: Should be configurable */
89
#define REGULAR_PACKET_CHECK_MS 3000
90
91
@@ -XXX,XX +XXX,XX @@ static gint seq_sorter(Packet *a, Packet *b, gpointer data)
92
return ntohl(atcp->th_seq) - ntohl(btcp->th_seq);
93
}
26
}
94
27
95
+static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
28
static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
96
+{
29
@@ -XXX,XX +XXX,XX @@ pri:
97
+ Packet *pkt = data;
30
if (g_queue_is_empty(&conn->primary_list)) {
98
+ struct tcphdr *tcphd;
31
return;
99
+
100
+ tcphd = (struct tcphdr *)pkt->transport_header;
101
+
102
+ pkt->tcp_seq = ntohl(tcphd->th_seq);
103
+ pkt->tcp_ack = ntohl(tcphd->th_ack);
104
+ *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack;
105
+ pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data
106
+ + (tcphd->th_off << 2) - pkt->vnet_hdr_len;
107
+ pkt->payload_size = pkt->size - pkt->header_size;
108
+ pkt->seq_end = pkt->tcp_seq + pkt->payload_size;
109
+ pkt->flags = tcphd->th_flags;
110
+}
111
+
112
/*
113
* Return 1 on success, if return 0 means the
114
* packet will be dropped
115
*/
116
-static int colo_insert_packet(GQueue *queue, Packet *pkt)
117
+static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)
118
{
119
if (g_queue_get_length(queue) <= MAX_QUEUE_SIZE) {
120
if (pkt->ip->ip_p == IPPROTO_TCP) {
121
+ fill_pkt_tcp_info(pkt, max_ack);
122
g_queue_insert_sorted(queue,
123
pkt,
124
(GCompareDataFunc)seq_sorter,
125
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
126
}
32
}
127
33
- ppkt = g_queue_pop_head(&conn->primary_list);
128
if (mode == PRIMARY_IN) {
34
+ ppkt = g_queue_pop_tail(&conn->primary_list);
129
- if (!colo_insert_packet(&conn->primary_list, pkt)) {
35
sec:
130
+ if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) {
36
if (g_queue_is_empty(&conn->secondary_list)) {
131
error_report("colo compare primary queue size too big,"
37
- g_queue_push_head(&conn->primary_list, ppkt);
132
"drop packet");
38
+ g_queue_push_tail(&conn->primary_list, ppkt);
39
return;
40
}
41
- spkt = g_queue_pop_head(&conn->secondary_list);
42
+ spkt = g_queue_pop_tail(&conn->secondary_list);
43
44
if (ppkt->tcp_seq == ppkt->seq_end) {
45
colo_release_primary_pkt(s, ppkt);
46
@@ -XXX,XX +XXX,XX @@ sec:
47
}
48
}
49
if (!ppkt) {
50
- g_queue_push_head(&conn->secondary_list, spkt);
51
+ g_queue_push_tail(&conn->secondary_list, spkt);
52
goto pri;
53
}
54
}
55
@@ -XXX,XX +XXX,XX @@ sec:
56
if (mark == COLO_COMPARE_FREE_PRIMARY) {
57
conn->compare_seq = ppkt->seq_end;
58
colo_release_primary_pkt(s, ppkt);
59
- g_queue_push_head(&conn->secondary_list, spkt);
60
+ g_queue_push_tail(&conn->secondary_list, spkt);
61
goto pri;
62
} else if (mark == COLO_COMPARE_FREE_SECONDARY) {
63
conn->compare_seq = spkt->seq_end;
64
@@ -XXX,XX +XXX,XX @@ sec:
65
goto pri;
133
}
66
}
134
} else {
67
} else {
135
- if (!colo_insert_packet(&conn->secondary_list, pkt)) {
68
- g_queue_push_head(&conn->primary_list, ppkt);
136
+ if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) {
69
- g_queue_push_head(&conn->secondary_list, spkt);
137
error_report("colo compare secondary queue size too big,"
70
+ g_queue_push_tail(&conn->primary_list, ppkt);
138
"drop packet");
71
+ g_queue_push_tail(&conn->secondary_list, spkt);
139
}
72
140
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
73
#ifdef DEBUG_COLO_PACKETS
141
return 0;
74
qemu_hexdump(stderr, "colo-compare ppkt", ppkt->data, ppkt->size);
142
}
75
@@ -XXX,XX +XXX,XX @@ static void colo_compare_packet(CompareState *s, Connection *conn,
143
144
+static inline bool after(uint32_t seq1, uint32_t seq2)
145
+{
146
+ return (int32_t)(seq1 - seq2) > 0;
147
+}
148
+
149
+static void colo_release_primary_pkt(CompareState *s, Packet *pkt)
150
+{
151
+ int ret;
152
+ ret = compare_chr_send(s,
153
+ pkt->data,
154
+ pkt->size,
155
+ pkt->vnet_hdr_len);
156
+ if (ret < 0) {
157
+ error_report("colo send primary packet failed");
158
+ }
159
+ trace_colo_compare_main("packet same and release packet");
160
+ packet_destroy(pkt, NULL);
161
+}
162
+
163
/*
164
* The IP packets sent by primary and secondary
165
* will be compared in here
166
@@ -XXX,XX +XXX,XX @@ static int colo_compare_packet_payload(Packet *ppkt,
167
}
168
169
/*
170
- * Called from the compare thread on the primary
171
- * for compare tcp packet
172
- * compare_tcp copied from Dr. David Alan Gilbert's branch
173
- */
174
-static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
175
+ * return true means that the payload is consist and
176
+ * need to make the next comparison, false means do
177
+ * the checkpoint
178
+*/
179
+static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
180
+ int8_t *mark, uint32_t max_ack)
181
{
182
- struct tcphdr *ptcp, *stcp;
183
- int res;
184
+ *mark = 0;
185
+
186
+ if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
187
+ if (colo_compare_packet_payload(ppkt, spkt,
188
+ ppkt->header_size, spkt->header_size,
189
+ ppkt->payload_size)) {
190
+ *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
191
+ return true;
192
+ }
193
+ }
194
+ if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
195
+ if (colo_compare_packet_payload(ppkt, spkt,
196
+ ppkt->header_size, spkt->header_size,
197
+ ppkt->payload_size)) {
198
+ *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
199
+ return true;
200
+ }
201
+ }
202
+
203
+ /* one part of secondary packet payload still need to be compared */
204
+ if (!after(ppkt->seq_end, spkt->seq_end)) {
205
+ if (colo_compare_packet_payload(ppkt, spkt,
206
+ ppkt->header_size + ppkt->offset,
207
+ spkt->header_size + spkt->offset,
208
+ ppkt->payload_size - ppkt->offset)) {
209
+ if (!after(ppkt->tcp_ack, max_ack)) {
210
+ *mark = COLO_COMPARE_FREE_PRIMARY;
211
+ spkt->offset += ppkt->payload_size - ppkt->offset;
212
+ return true;
213
+ } else {
214
+ /* secondary guest hasn't ack the data, don't send
215
+ * out this packet
216
+ */
217
+ return false;
218
+ }
219
+ }
220
+ } else {
221
+ /* primary packet is longer than secondary packet, compare
222
+ * the same part and mark the primary packet offset
223
+ */
224
+ if (colo_compare_packet_payload(ppkt, spkt,
225
+ ppkt->header_size + ppkt->offset,
226
+ spkt->header_size + spkt->offset,
227
+ spkt->payload_size - spkt->offset)) {
228
+ *mark = COLO_COMPARE_FREE_SECONDARY;
229
+ ppkt->offset += spkt->payload_size - spkt->offset;
230
+ return true;
231
+ }
232
+ }
233
234
- trace_colo_compare_main("compare tcp");
235
+ return false;
236
+}
237
238
- ptcp = (struct tcphdr *)ppkt->transport_header;
239
- stcp = (struct tcphdr *)spkt->transport_header;
240
+static void colo_compare_tcp(CompareState *s, Connection *conn)
241
+{
242
+ Packet *ppkt = NULL, *spkt = NULL;
243
+ int8_t mark;
244
245
/*
246
- * The 'identification' field in the IP header is *very* random
247
- * it almost never matches. Fudge this by ignoring differences in
248
- * unfragmented packets; they'll normally sort themselves out if different
249
- * anyway, and it should recover at the TCP level.
250
- * An alternative would be to get both the primary and secondary to rewrite
251
- * somehow; but that would need some sync traffic to sync the state
252
- */
253
- if (ntohs(ppkt->ip->ip_off) & IP_DF) {
254
- spkt->ip->ip_id = ppkt->ip->ip_id;
255
- /* and the sum will be different if the IDs were different */
256
- spkt->ip->ip_sum = ppkt->ip->ip_sum;
257
+ * If ppkt and spkt have the same payload, but ppkt's ACK
258
+ * is greater than spkt's ACK, in this case we can not
259
+ * send the ppkt because it will cause the secondary guest
260
+ * to miss sending some data in the next. Therefore, we
261
+ * record the maximum ACK in the current queue at both
262
+ * primary side and secondary side. Only when the ack is
263
+ * less than the smaller of the two maximum ack, then we
264
+ * can ensure that the packet's payload is acknowledged by
265
+ * primary and secondary.
266
+ */
267
+ uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack;
268
+
269
+pri:
270
+ if (g_queue_is_empty(&conn->primary_list)) {
271
+ return;
272
}
273
+ ppkt = g_queue_pop_head(&conn->primary_list);
274
+sec:
275
+ if (g_queue_is_empty(&conn->secondary_list)) {
276
+ g_queue_push_head(&conn->primary_list, ppkt);
277
+ return;
278
+ }
279
+ spkt = g_queue_pop_head(&conn->secondary_list);
280
281
- /*
282
- * Check tcp header length for tcp option field.
283
- * th_off > 5 means this tcp packet have options field.
284
- * The tcp options maybe always different.
285
- * for example:
286
- * From RFC 7323.
287
- * TCP Timestamps option (TSopt):
288
- * Kind: 8
289
- *
290
- * Length: 10 bytes
291
- *
292
- * +-------+-------+---------------------+---------------------+
293
- * |Kind=8 | 10 | TS Value (TSval) |TS Echo Reply (TSecr)|
294
- * +-------+-------+---------------------+---------------------+
295
- * 1 1 4 4
296
- *
297
- * In this case the primary guest's timestamp always different with
298
- * the secondary guest's timestamp. COLO just focus on payload,
299
- * so we just need skip this field.
300
- */
301
+ if (ppkt->tcp_seq == ppkt->seq_end) {
302
+ colo_release_primary_pkt(s, ppkt);
303
+ ppkt = NULL;
304
+ }
305
306
- ptrdiff_t ptcp_offset, stcp_offset;
307
+ if (ppkt && conn->compare_seq && !after(ppkt->seq_end, conn->compare_seq)) {
308
+ trace_colo_compare_main("pri: this packet has compared");
309
+ colo_release_primary_pkt(s, ppkt);
310
+ ppkt = NULL;
311
+ }
312
313
- ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
314
- + (ptcp->th_off << 2) - ppkt->vnet_hdr_len;
315
- stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
316
- + (stcp->th_off << 2) - spkt->vnet_hdr_len;
317
- if (ppkt->size - ptcp_offset == spkt->size - stcp_offset) {
318
- res = colo_compare_packet_payload(ppkt, spkt,
319
- ptcp_offset, stcp_offset,
320
- ppkt->size - ptcp_offset);
321
+ if (spkt->tcp_seq == spkt->seq_end) {
322
+ packet_destroy(spkt, NULL);
323
+ if (!ppkt) {
324
+ goto pri;
325
+ } else {
326
+ goto sec;
327
+ }
328
} else {
329
- trace_colo_compare_main("TCP: payload size of packets are different");
330
- res = -1;
331
+ if (conn->compare_seq && !after(spkt->seq_end, conn->compare_seq)) {
332
+ trace_colo_compare_main("sec: this packet has compared");
333
+ packet_destroy(spkt, NULL);
334
+ if (!ppkt) {
335
+ goto pri;
336
+ } else {
337
+ goto sec;
338
+ }
339
+ }
340
+ if (!ppkt) {
341
+ g_queue_push_head(&conn->secondary_list, spkt);
342
+ goto pri;
343
+ }
344
}
345
346
- if (res != 0 &&
347
- trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
348
- char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
349
-
350
- strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
351
- strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
352
- strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
353
- strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
354
-
355
- trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
356
- pri_ip_dst, spkt->size,
357
- sec_ip_src, sec_ip_dst);
358
-
359
- trace_colo_compare_tcp_info("pri tcp packet",
360
- ntohl(ptcp->th_seq),
361
- ntohl(ptcp->th_ack),
362
- res, ptcp->th_flags,
363
- ppkt->size);
364
-
365
- trace_colo_compare_tcp_info("sec tcp packet",
366
- ntohl(stcp->th_seq),
367
- ntohl(stcp->th_ack),
368
- res, stcp->th_flags,
369
- spkt->size);
370
+ if (colo_mark_tcp_pkt(ppkt, spkt, &mark, min_ack)) {
371
+ trace_colo_compare_tcp_info("pri",
372
+ ppkt->tcp_seq, ppkt->tcp_ack,
373
+ ppkt->header_size, ppkt->payload_size,
374
+ ppkt->offset, ppkt->flags);
375
+
376
+ trace_colo_compare_tcp_info("sec",
377
+ spkt->tcp_seq, spkt->tcp_ack,
378
+ spkt->header_size, spkt->payload_size,
379
+ spkt->offset, spkt->flags);
380
+
381
+ if (mark == COLO_COMPARE_FREE_PRIMARY) {
382
+ conn->compare_seq = ppkt->seq_end;
383
+ colo_release_primary_pkt(s, ppkt);
384
+ g_queue_push_head(&conn->secondary_list, spkt);
385
+ goto pri;
386
+ }
387
+ if (mark == COLO_COMPARE_FREE_SECONDARY) {
388
+ conn->compare_seq = spkt->seq_end;
389
+ packet_destroy(spkt, NULL);
390
+ goto sec;
391
+ }
392
+ if (mark == (COLO_COMPARE_FREE_PRIMARY | COLO_COMPARE_FREE_SECONDARY)) {
393
+ conn->compare_seq = ppkt->seq_end;
394
+ colo_release_primary_pkt(s, ppkt);
395
+ packet_destroy(spkt, NULL);
396
+ goto pri;
397
+ }
398
+ } else {
399
+ g_queue_push_head(&conn->primary_list, ppkt);
400
+ g_queue_push_head(&conn->secondary_list, spkt);
401
402
qemu_hexdump((char *)ppkt->data, stderr,
403
"colo-compare ppkt", ppkt->size);
404
qemu_hexdump((char *)spkt->data, stderr,
405
"colo-compare spkt", spkt->size);
406
- }
407
408
- return res;
409
+ /*
410
+ * colo_compare_inconsistent_notify();
411
+ * TODO: notice to checkpoint();
412
+ */
413
+ }
414
}
415
416
+
417
/*
418
* Called from the compare thread on the primary
419
* for compare udp packet
420
@@ -XXX,XX +XXX,XX @@ static void colo_old_packet_check(void *opaque)
421
(GCompareFunc)colo_old_packet_check_one_conn);
422
}
423
424
-/*
425
- * Called from the compare thread on the primary
426
- * for compare packet with secondary list of the
427
- * specified connection when a new packet was
428
- * queued to it.
429
- */
430
-static void colo_compare_connection(void *opaque, void *user_data)
431
+static void colo_compare_packet(CompareState *s, Connection *conn,
432
+ int (*HandlePacket)(Packet *spkt,
433
+ Packet *ppkt))
434
{
435
- CompareState *s = user_data;
436
- Connection *conn = opaque;
437
Packet *pkt = NULL;
438
GList *result = NULL;
439
- int ret;
440
76
441
while (!g_queue_is_empty(&conn->primary_list) &&
77
while (!g_queue_is_empty(&conn->primary_list) &&
442
!g_queue_is_empty(&conn->secondary_list)) {
78
!g_queue_is_empty(&conn->secondary_list)) {
443
pkt = g_queue_pop_head(&conn->primary_list);
79
- pkt = g_queue_pop_head(&conn->primary_list);
444
- switch (conn->ip_proto) {
80
+ pkt = g_queue_pop_tail(&conn->primary_list);
445
- case IPPROTO_TCP:
81
result = g_queue_find_custom(&conn->secondary_list,
446
- result = g_queue_find_custom(&conn->secondary_list,
82
pkt, (GCompareFunc)HandlePacket);
447
- pkt, (GCompareFunc)colo_packet_compare_tcp);
83
448
- break;
84
@@ -XXX,XX +XXX,XX @@ static void colo_compare_packet(CompareState *s, Connection *conn,
449
- case IPPROTO_UDP:
85
* timeout, it will trigger a checkpoint request.
450
- result = g_queue_find_custom(&conn->secondary_list,
86
*/
451
- pkt, (GCompareFunc)colo_packet_compare_udp);
87
trace_colo_compare_main("packet different");
452
- break;
88
- g_queue_push_head(&conn->primary_list, pkt);
453
- case IPPROTO_ICMP:
89
+ g_queue_push_tail(&conn->primary_list, pkt);
454
- result = g_queue_find_custom(&conn->secondary_list,
90
455
- pkt, (GCompareFunc)colo_packet_compare_icmp);
91
colo_compare_inconsistency_notify(s);
456
- break;
92
break;
457
- default:
93
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CompareState *s,
458
- result = g_queue_find_custom(&conn->secondary_list,
94
entry->buf = g_malloc(size);
459
- pkt, (GCompareFunc)colo_packet_compare_other);
95
memcpy(entry->buf, buf, size);
460
- break;
96
}
461
- }
97
- g_queue_push_head(&sendco->send_list, entry);
462
+ result = g_queue_find_custom(&conn->secondary_list,
98
+ g_queue_push_tail(&sendco->send_list, entry);
463
+ pkt, (GCompareFunc)HandlePacket);
99
464
100
if (sendco->done) {
465
if (result) {
101
sendco->co = qemu_coroutine_create(_compare_chr_send, sendco);
466
- ret = compare_chr_send(s,
102
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
467
- pkt->data,
103
Packet *pkt = NULL;
468
- pkt->size,
104
469
- pkt->vnet_hdr_len);
105
while (!g_queue_is_empty(&conn->primary_list)) {
470
- if (ret < 0) {
106
- pkt = g_queue_pop_head(&conn->primary_list);
471
- error_report("colo_send_primary_packet failed");
107
+ pkt = g_queue_pop_tail(&conn->primary_list);
472
- }
108
compare_chr_send(s,
473
- trace_colo_compare_main("packet same and release packet");
109
pkt->data,
474
+ colo_release_primary_pkt(s, pkt);
110
pkt->size,
475
g_queue_remove(&conn->secondary_list, result->data);
111
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
476
- packet_destroy(pkt, NULL);
112
packet_destroy_partial(pkt, NULL);
477
} else {
113
}
478
/*
114
while (!g_queue_is_empty(&conn->secondary_list)) {
479
* If one packet arrive late, the secondary_list or
115
- pkt = g_queue_pop_head(&conn->secondary_list);
480
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
116
+ pkt = g_queue_pop_tail(&conn->secondary_list);
117
packet_destroy(pkt, NULL);
481
}
118
}
482
}
119
}
483
484
+/*
485
+ * Called from the compare thread on the primary
486
+ * for compare packet with secondary list of the
487
+ * specified connection when a new packet was
488
+ * queued to it.
489
+ */
490
+static void colo_compare_connection(void *opaque, void *user_data)
491
+{
492
+ CompareState *s = user_data;
493
+ Connection *conn = opaque;
494
+
495
+ switch (conn->ip_proto) {
496
+ case IPPROTO_TCP:
497
+ colo_compare_tcp(s, conn);
498
+ break;
499
+ case IPPROTO_UDP:
500
+ colo_compare_packet(s, conn, colo_packet_compare_udp);
501
+ break;
502
+ case IPPROTO_ICMP:
503
+ colo_compare_packet(s, conn, colo_packet_compare_icmp);
504
+ break;
505
+ default:
506
+ colo_compare_packet(s, conn, colo_packet_compare_other);
507
+ break;
508
+ }
509
+}
510
+
511
static int compare_chr_send(CompareState *s,
512
const uint8_t *buf,
513
uint32_t size,
514
diff --git a/net/colo.c b/net/colo.c
515
index XXXXXXX..XXXXXXX 100644
516
--- a/net/colo.c
517
+++ b/net/colo.c
518
@@ -XXX,XX +XXX,XX @@ Connection *connection_new(ConnectionKey *key)
519
conn->processing = false;
520
conn->offset = 0;
521
conn->syn_flag = 0;
522
+ conn->pack = 0;
523
+ conn->sack = 0;
524
g_queue_init(&conn->primary_list);
525
g_queue_init(&conn->secondary_list);
526
527
@@ -XXX,XX +XXX,XX @@ Packet *packet_new(const void *data, int size, int vnet_hdr_len)
528
pkt->size = size;
529
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
530
pkt->vnet_hdr_len = vnet_hdr_len;
531
+ pkt->tcp_seq = 0;
532
+ pkt->tcp_ack = 0;
533
+ pkt->seq_end = 0;
534
+ pkt->header_size = 0;
535
+ pkt->payload_size = 0;
536
+ pkt->offset = 0;
537
+ pkt->flags = 0;
538
539
return pkt;
540
}
541
diff --git a/net/colo.h b/net/colo.h
542
index XXXXXXX..XXXXXXX 100644
543
--- a/net/colo.h
544
+++ b/net/colo.h
545
@@ -XXX,XX +XXX,XX @@ typedef struct Packet {
546
int64_t creation_ms;
547
/* Get vnet_hdr_len from filter */
548
uint32_t vnet_hdr_len;
549
+ uint32_t tcp_seq; /* sequence number */
550
+ uint32_t tcp_ack; /* acknowledgement number */
551
+ /* the sequence number of the last byte of the packet */
552
+ uint32_t seq_end;
553
+ uint8_t header_size; /* the header length */
554
+ uint16_t payload_size; /* the payload length */
555
+ /* record the payload offset(the length that has been compared) */
556
+ uint16_t offset;
557
+ uint8_t flags; /* Flags(aka Control bits) */
558
} Packet;
559
560
typedef struct ConnectionKey {
561
@@ -XXX,XX +XXX,XX @@ typedef struct Connection {
562
/* flag to enqueue unprocessed_connections */
563
bool processing;
564
uint8_t ip_proto;
565
+ /* record the sequence number that has been compared */
566
+ uint32_t compare_seq;
567
+ /* the maximum of acknowledgement number in primary_list queue */
568
+ uint32_t pack;
569
+ /* the maximum of acknowledgement number in secondary_list queue */
570
+ uint32_t sack;
571
/* offset = secondary_seq - primary_seq */
572
tcp_seq offset;
573
/*
574
diff --git a/net/trace-events b/net/trace-events
575
index XXXXXXX..XXXXXXX 100644
576
--- a/net/trace-events
577
+++ b/net/trace-events
578
@@ -XXX,XX +XXX,XX @@ colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d"
579
colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
580
colo_old_packet_check_found(int64_t old_time) "%" PRId64
581
colo_compare_miscompare(void) ""
582
-colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int res, uint32_t flag, int size) "side: %s seq/ack= %u/%u res= %d flags= 0x%x pkt_size: %d\n"
583
+colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int hdlen, int pdlen, int offset, int flags) "%s: seq/ack= %u/%u hdlen= %d pdlen= %d offset= %d flags=%d\n"
584
585
# net/filter-rewriter.c
586
colo_filter_rewriter_debug(void) ""
587
--
120
--
588
2.7.4
121
2.7.4
589
122
590
123
diff view generated by jsdifflib
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
1
From: Zhang Chen <chen.zhang@intel.com>
2
2
3
Modified the function colo_packet_compare_common to prepare for the
3
Make the comments consistent with the REGULAR_PACKET_CHECK_MS.
4
tcp packet comparison in the next patch.
5
4
6
Cc: Zhang Chen <zhangckid@gmail.com>
5
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
7
Cc: Li Zhijian <lizhijian@cn.fujitsu.com>
8
Cc: Jason Wang <jasowang@redhat.com>
9
10
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
11
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
12
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
13
Reviewed-by: Zhang Chen <zhangckid@gmail.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
7
---
16
net/colo-compare.c | 88 +++++++++++++++++++++++++++---------------------------
8
net/colo-compare.c | 2 +-
17
1 file changed, 44 insertions(+), 44 deletions(-)
9
1 file changed, 1 insertion(+), 1 deletion(-)
18
10
19
diff --git a/net/colo-compare.c b/net/colo-compare.c
11
diff --git a/net/colo-compare.c b/net/colo-compare.c
20
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
21
--- a/net/colo-compare.c
13
--- a/net/colo-compare.c
22
+++ b/net/colo-compare.c
14
+++ b/net/colo-compare.c
23
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
15
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
24
* return: 0 means packet same
25
* > 0 || < 0 means packet different
26
*/
27
-static int colo_packet_compare_common(Packet *ppkt,
28
- Packet *spkt,
29
- int poffset,
30
- int soffset)
31
+static int colo_compare_packet_payload(Packet *ppkt,
32
+ Packet *spkt,
33
+ uint16_t poffset,
34
+ uint16_t soffset,
35
+ uint16_t len)
36
+
37
{
38
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
39
char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
40
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt,
41
sec_ip_src, sec_ip_dst);
42
}
16
}
43
17
44
- poffset = ppkt->vnet_hdr_len + poffset;
18
if (!s->expired_scan_cycle) {
45
- soffset = ppkt->vnet_hdr_len + soffset;
19
- /* Set default value to 3000 MS */
46
-
20
+ /* Set default value to 1000 MS */
47
- if (ppkt->size - poffset == spkt->size - soffset) {
21
s->expired_scan_cycle = REGULAR_PACKET_CHECK_MS;
48
- return memcmp(ppkt->data + poffset,
49
- spkt->data + soffset,
50
- spkt->size - soffset);
51
- } else {
52
- trace_colo_compare_main("Net packet size are not the same");
53
- return -1;
54
- }
55
+ return memcmp(ppkt->data + poffset, spkt->data + soffset, len);
56
}
57
58
/*
59
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
60
* the secondary guest's timestamp. COLO just focus on payload,
61
* so we just need skip this field.
62
*/
63
- if (ptcp->th_off > 5) {
64
- ptrdiff_t ptcp_offset, stcp_offset;
65
66
- ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
67
- + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
68
- stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
69
- + (stcp->th_off * 4) - spkt->vnet_hdr_len;
70
+ ptrdiff_t ptcp_offset, stcp_offset;
71
72
- /*
73
- * When network is busy, some tcp options(like sack) will unpredictable
74
- * occur in primary side or secondary side. it will make packet size
75
- * not same, but the two packet's payload is identical. colo just
76
- * care about packet payload, so we skip the option field.
77
- */
78
- res = colo_packet_compare_common(ppkt, spkt, ptcp_offset, stcp_offset);
79
- } else if (ptcp->th_sum == stcp->th_sum) {
80
- res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN, ETH_HLEN);
81
+ ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
82
+ + (ptcp->th_off << 2) - ppkt->vnet_hdr_len;
83
+ stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
84
+ + (stcp->th_off << 2) - spkt->vnet_hdr_len;
85
+ if (ppkt->size - ptcp_offset == spkt->size - stcp_offset) {
86
+ res = colo_compare_packet_payload(ppkt, spkt,
87
+ ptcp_offset, stcp_offset,
88
+ ppkt->size - ptcp_offset);
89
} else {
90
+ trace_colo_compare_main("TCP: payload size of packets are different");
91
res = -1;
92
}
22
}
93
23
94
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
95
*/
96
static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
97
{
98
- int ret;
99
- int network_header_length = ppkt->ip->ip_hl * 4;
100
+ uint16_t network_header_length = ppkt->ip->ip_hl << 2;
101
+ uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
102
103
trace_colo_compare_main("compare udp");
104
105
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
106
* other field like TOS,TTL,IP Checksum. we only need to compare
107
* the ip payload here.
108
*/
109
- ret = colo_packet_compare_common(ppkt, spkt,
110
- network_header_length + ETH_HLEN,
111
- network_header_length + ETH_HLEN);
112
-
113
- if (ret) {
114
+ if (ppkt->size != spkt->size) {
115
+ trace_colo_compare_main("UDP: payload size of packets are different");
116
+ return -1;
117
+ }
118
+ if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
119
+ ppkt->size - offset)) {
120
trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
121
trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
122
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
123
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
124
qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt",
125
spkt->size);
126
}
127
+ return -1;
128
+ } else {
129
+ return 0;
130
}
131
-
132
- return ret;
133
}
134
135
/*
136
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
137
*/
138
static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
139
{
140
- int network_header_length = ppkt->ip->ip_hl * 4;
141
+ uint16_t network_header_length = ppkt->ip->ip_hl << 2;
142
+ uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
143
144
trace_colo_compare_main("compare icmp");
145
146
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
147
* other field like TOS,TTL,IP Checksum. we only need to compare
148
* the ip payload here.
149
*/
150
- if (colo_packet_compare_common(ppkt, spkt,
151
- network_header_length + ETH_HLEN,
152
- network_header_length + ETH_HLEN)) {
153
+ if (ppkt->size != spkt->size) {
154
+ trace_colo_compare_main("ICMP: payload size of packets are different");
155
+ return -1;
156
+ }
157
+ if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
158
+ ppkt->size - offset)) {
159
trace_colo_compare_icmp_miscompare("primary pkt size",
160
ppkt->size);
161
trace_colo_compare_icmp_miscompare("Secondary pkt size",
162
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
163
*/
164
static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
165
{
166
+ uint16_t offset = ppkt->vnet_hdr_len;
167
+
168
trace_colo_compare_main("compare other");
169
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
170
char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
171
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
172
sec_ip_src, sec_ip_dst);
173
}
174
175
- return colo_packet_compare_common(ppkt, spkt, 0, 0);
176
+ if (ppkt->size != spkt->size) {
177
+ trace_colo_compare_main("Other: payload size of packets are different");
178
+ return -1;
179
+ }
180
+ return colo_compare_packet_payload(ppkt, spkt, offset, offset,
181
+ ppkt->size - offset);
182
}
183
184
static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
185
--
24
--
186
2.7.4
25
2.7.4
187
26
188
27
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Rao Lei <lei.rao@intel.com>
2
2
3
QEMU can emulate hubs to connect NICs and netdevs. This is currently
3
This patch is to improve the logic of QEMU main thread sleep code in
4
primarily used for the mis-named 'vlan' feature of the networking
4
qemu_chr_write_buffer() where it can be blocked and can't run other
5
subsystem. Now the 'vlan' feature has been marked as deprecated, since
5
coroutines during COLO IO stress test.
6
its name is rather confusing and the users often rather mis-configure
7
their network when trying to use it. But while the 'vlan' parameter
8
should be removed at one point in time, the basic idea of emulating
9
a hub in QEMU is still good: It's useful for bundling up the output of
10
multiple NICs into one single l2tp netdev for example.
11
6
12
Now to be able to use the hubport feature without 'vlan's, there is one
7
Our approach is to put filter_send() in a coroutine. In this way,
13
missing piece: The possibility to connect a hubport to a netdev, too.
8
filter_send() will call qemu_coroutine_yield() in qemu_co_sleep_ns(),
14
This patch adds this possibility by introducing a new "netdev=..."
9
so that it can be scheduled out and QEMU main thread has opportunity to
15
parameter to the hubports.
10
run other tasks.
16
11
17
To bundle up the output of multiple NICs into one socket netdev, you can
12
Signed-off-by: Lei Rao <lei.rao@intel.com>
18
now run QEMU with these parameters for example:
13
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
19
14
Reviewed-by: Li Zhijian <lizhijian@fujitsu.com>
20
qemu-system-ppc64 ... -netdev socket,id=s1,connect=:11122 \
15
Reviewed-by: Zhang Chen <chen.zhang@intel.com>
21
-netdev hubport,hubid=1,id=h1,netdev=s1 \
22
-netdev hubport,hubid=1,id=h2 -device e1000,netdev=h2 \
23
-netdev hubport,hubid=1,id=h3 -device virtio-net-pci,netdev=h3
24
25
For using the socket netdev, you have got to start another QEMU as the
26
receiving side first, for example with network dumping enabled:
27
28
qemu-system-x86_64 -M isapc -netdev socket,id=s0,listen=:11122 \
29
-device ne2k_isa,netdev=s0 \
30
-object filter-dump,id=f1,netdev=s0,file=/tmp/dump.dat
31
32
After the ppc64 guest tried to boot from both NICs, you can see in the
33
dump file (using Wireshark, for example), that the output of both NICs
34
(the e1000 and the virtio-net-pci) has been successfully transfered
35
via the socket netdev in this case.
36
37
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
38
Signed-off-by: Thomas Huth <thuth@redhat.com>
39
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
40
---
17
---
41
net/hub.c | 27 +++++++++++++++++++++------
18
net/filter-mirror.c | 66 ++++++++++++++++++++++++++++++++++++++++++-----------
42
net/hub.h | 3 ++-
19
1 file changed, 53 insertions(+), 13 deletions(-)
43
net/net.c | 2 +-
44
qapi/net.json | 4 +++-
45
qemu-options.hx | 8 +++++---
46
5 files changed, 32 insertions(+), 12 deletions(-)
47
20
48
diff --git a/net/hub.c b/net/hub.c
21
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
49
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
50
--- a/net/hub.c
23
--- a/net/filter-mirror.c
51
+++ b/net/hub.c
24
+++ b/net/filter-mirror.c
52
@@ -XXX,XX +XXX,XX @@
25
@@ -XXX,XX +XXX,XX @@
53
*/
26
#include "chardev/char-fe.h"
54
27
#include "qemu/iov.h"
55
#include "qemu/osdep.h"
28
#include "qemu/sockets.h"
56
+#include "qapi/error.h"
29
+#include "block/aio-wait.h"
57
#include "monitor/monitor.h"
30
58
#include "net/net.h"
31
#define TYPE_FILTER_MIRROR "filter-mirror"
59
#include "clients.h"
32
typedef struct MirrorState MirrorState;
60
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_hub_port_info = {
33
@@ -XXX,XX +XXX,XX @@ struct MirrorState {
61
.cleanup = net_hub_port_cleanup,
34
bool vnet_hdr;
62
};
35
};
63
36
64
-static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
37
-static int filter_send(MirrorState *s,
65
+static NetHubPort *net_hub_port_new(NetHub *hub, const char *name,
38
- const struct iovec *iov,
66
+ NetClientState *hubpeer)
39
- int iovcnt)
40
+typedef struct FilterSendCo {
41
+ MirrorState *s;
42
+ char *buf;
43
+ ssize_t size;
44
+ bool done;
45
+ int ret;
46
+} FilterSendCo;
47
+
48
+static int _filter_send(MirrorState *s,
49
+ char *buf,
50
+ ssize_t size)
67
{
51
{
68
NetClientState *nc;
52
NetFilterState *nf = NETFILTER(s);
69
NetHubPort *port;
53
int ret = 0;
70
@@ -XXX,XX +XXX,XX @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
54
- ssize_t size = 0;
71
name = default_name;
55
uint32_t len = 0;
72
}
56
- char *buf;
73
57
-
74
- nc = qemu_new_net_client(&net_hub_port_info, NULL, "hub", name);
58
- size = iov_size(iov, iovcnt);
75
+ nc = qemu_new_net_client(&net_hub_port_info, hubpeer, "hub", name);
59
- if (!size) {
76
port = DO_UPCAST(NetHubPort, nc, nc);
60
- return 0;
77
port->id = id;
61
- }
78
port->hub = hub;
62
79
@@ -XXX,XX +XXX,XX @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
63
len = htonl(size);
80
64
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
81
/**
65
@@ -XXX,XX +XXX,XX @@ static int filter_send(MirrorState *s,
82
* Create a port on a given hub
83
+ * @hub_id: Number of the hub
84
* @name: Net client name or NULL for default name.
85
+ * @hubpeer: Peer to use (if "netdev=id" has been specified)
86
*
87
* If there is no existing hub with the given id then a new hub is created.
88
*/
89
-NetClientState *net_hub_add_port(int hub_id, const char *name)
90
+NetClientState *net_hub_add_port(int hub_id, const char *name,
91
+ NetClientState *hubpeer)
92
{
93
NetHub *hub;
94
NetHubPort *port;
95
@@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_add_port(int hub_id, const char *name)
96
hub = net_hub_new(hub_id);
97
}
98
99
- port = net_hub_port_new(hub, name);
100
+ port = net_hub_port_new(hub, name, hubpeer);
101
return &port->nc;
102
}
103
104
@@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_port_find(int hub_id)
105
}
66
}
106
}
67
}
107
68
108
- nc = net_hub_add_port(hub_id, NULL);
69
- buf = g_malloc(size);
109
+ nc = net_hub_add_port(hub_id, NULL, NULL);
70
- iov_to_buf(iov, iovcnt, 0, buf, size);
110
return nc;
71
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
72
- g_free(buf);
73
if (ret != size) {
74
goto err;
75
}
76
@@ -XXX,XX +XXX,XX @@ err:
77
return ret < 0 ? ret : -EIO;
111
}
78
}
112
79
113
@@ -XXX,XX +XXX,XX @@ int net_init_hubport(const Netdev *netdev, const char *name,
80
+static void coroutine_fn filter_send_co(void *opaque)
114
NetClientState *peer, Error **errp)
81
+{
115
{
82
+ FilterSendCo *data = opaque;
116
const NetdevHubPortOptions *hubport;
83
+
117
+ NetClientState *hubpeer = NULL;
84
+ data->ret = _filter_send(data->s, data->buf, data->size);
118
85
+ data->done = true;
119
assert(netdev->type == NET_CLIENT_DRIVER_HUBPORT);
86
+ g_free(data->buf);
120
assert(!peer);
87
+ aio_wait_kick();
121
hubport = &netdev->u.hubport;
88
+}
122
89
+
123
- net_hub_add_port(hubport->hubid, name);
90
+static int filter_send(MirrorState *s,
124
+ if (hubport->has_netdev) {
91
+ const struct iovec *iov,
125
+ hubpeer = qemu_find_netdev(hubport->netdev);
92
+ int iovcnt)
126
+ if (!hubpeer) {
93
+{
127
+ error_setg(errp, "netdev '%s' not found", hubport->netdev);
94
+ ssize_t size = iov_size(iov, iovcnt);
128
+ return -1;
95
+ char *buf = NULL;
129
+ }
96
+
97
+ if (!size) {
98
+ return 0;
130
+ }
99
+ }
131
+
100
+
132
+ net_hub_add_port(hubport->hubid, name, hubpeer);
101
+ buf = g_malloc(size);
102
+ iov_to_buf(iov, iovcnt, 0, buf, size);
133
+
103
+
134
return 0;
104
+ FilterSendCo data = {
135
}
105
+ .s = s,
136
106
+ .size = size,
137
diff --git a/net/hub.h b/net/hub.h
107
+ .buf = buf,
138
index XXXXXXX..XXXXXXX 100644
108
+ .ret = 0,
139
--- a/net/hub.h
109
+ };
140
+++ b/net/hub.h
110
+
141
@@ -XXX,XX +XXX,XX @@
111
+ Coroutine *co = qemu_coroutine_create(filter_send_co, &data);
142
112
+ qemu_coroutine_enter(co);
143
#include "qemu-common.h"
113
+
144
114
+ while (!data.done) {
145
-NetClientState *net_hub_add_port(int hub_id, const char *name);
115
+ aio_poll(qemu_get_aio_context(), true);
146
+NetClientState *net_hub_add_port(int hub_id, const char *name,
116
+ }
147
+ NetClientState *hubpeer);
117
+
148
NetClientState *net_hub_find_client_by_name(int hub_id, const char *name);
118
+ return data.ret;
149
void net_hub_info(Monitor *mon);
119
+}
150
void net_hub_check_clients(void);
120
+
151
diff --git a/net/net.c b/net/net.c
121
static void redirector_to_filter(NetFilterState *nf,
152
index XXXXXXX..XXXXXXX 100644
122
const uint8_t *buf,
153
--- a/net/net.c
123
int len)
154
+++ b/net/net.c
155
@@ -XXX,XX +XXX,XX @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp)
156
/* Do not add to a vlan if it's a nic with a netdev= parameter. */
157
if (netdev->type != NET_CLIENT_DRIVER_NIC ||
158
!opts->u.nic.has_netdev) {
159
- peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL);
160
+ peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL, NULL);
161
}
162
163
if (net->has_vlan && !vlan_warned) {
164
diff --git a/qapi/net.json b/qapi/net.json
165
index XXXXXXX..XXXXXXX 100644
166
--- a/qapi/net.json
167
+++ b/qapi/net.json
168
@@ -XXX,XX +XXX,XX @@
169
# Connect two or more net clients through a software hub.
170
#
171
# @hubid: hub identifier number
172
+# @netdev: used to connect hub to a netdev instead of a device (since 2.12)
173
#
174
# Since: 1.2
175
##
176
{ 'struct': 'NetdevHubPortOptions',
177
'data': {
178
- 'hubid': 'int32' } }
179
+ 'hubid': 'int32',
180
+ '*netdev': 'str' } }
181
182
##
183
# @NetdevNetmapOptions:
184
diff --git a/qemu-options.hx b/qemu-options.hx
185
index XXXXXXX..XXXXXXX 100644
186
--- a/qemu-options.hx
187
+++ b/qemu-options.hx
188
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
189
#endif
190
"-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
191
" configure a vhost-user network, backed by a chardev 'dev'\n"
192
- "-netdev hubport,id=str,hubid=n\n"
193
+ "-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
194
" configure a hub port on QEMU VLAN 'n'\n", QEMU_ARCH_ALL)
195
DEF("net", HAS_ARG, QEMU_OPTION_net,
196
"-net nic[,vlan=n][,netdev=nd][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v]\n"
197
@@ -XXX,XX +XXX,XX @@ vde_switch -F -sock /tmp/myswitch
198
qemu-system-i386 linux.img -net nic -net vde,sock=/tmp/myswitch
199
@end example
200
201
-@item -netdev hubport,id=@var{id},hubid=@var{hubid}
202
+@item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}]
203
204
Create a hub port on QEMU "vlan" @var{hubid}.
205
206
The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
207
netdev. @code{-net} and @code{-device} with parameter @option{vlan} create the
208
-required hub automatically.
209
+required hub automatically. Alternatively, you can also connect the hubport
210
+to another netdev with ID @var{nd} by using the @option{netdev=@var{nd}}
211
+option.
212
213
@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
214
215
--
124
--
216
2.7.4
125
2.7.4
217
126
218
127
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Nick Hudson <skrll@netbsd.org>
2
2
3
The vlan concept is marked as deprecated, so we should not use
3
In section 7.4.3 of the 82574 datasheet it states that
4
this for examples in the documentation anymore.
5
4
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
5
"In systems that do not support MSI-X, reading the ICR
6
register clears it's bits..."
7
8
Some OSes rely on this.
9
10
Signed-off-by: Nick Hudson <skrll@netbsd.org>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
12
---
9
qemu-options.hx | 4 ++--
13
hw/net/e1000e_core.c | 5 +++++
10
1 file changed, 2 insertions(+), 2 deletions(-)
14
hw/net/trace-events | 1 +
15
2 files changed, 6 insertions(+)
11
16
12
diff --git a/qemu-options.hx b/qemu-options.hx
17
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
13
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
14
--- a/qemu-options.hx
19
--- a/hw/net/e1000e_core.c
15
+++ b/qemu-options.hx
20
+++ b/hw/net/e1000e_core.c
16
@@ -XXX,XX +XXX,XX @@ qemu-system-i386 linux.img -net nic -net tap
21
@@ -XXX,XX +XXX,XX @@ e1000e_mac_icr_read(E1000ECore *core, int index)
17
#launch a QEMU instance with two NICs, each one connected
22
core->mac[ICR] = 0;
18
#to a TAP device
23
}
19
qemu-system-i386 linux.img \
24
20
- -net nic,vlan=0 -net tap,vlan=0,ifname=tap0 \
25
+ if (!msix_enabled(core->owner)) {
21
- -net nic,vlan=1 -net tap,vlan=1,ifname=tap1
26
+ trace_e1000e_irq_icr_clear_nonmsix_icr_read();
22
+ -netdev tap,id=nd0,ifname=tap0 -device e1000,netdev=nd0 \
27
+ core->mac[ICR] = 0;
23
+ -netdev tap,id=nd1,ifname=tap1 -device rtl8139,netdev=nd1
28
+ }
24
@end example
29
+
25
30
if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
26
@example
31
(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
32
trace_e1000e_irq_icr_clear_iame();
33
diff --git a/hw/net/trace-events b/hw/net/trace-events
34
index XXXXXXX..XXXXXXX 100644
35
--- a/hw/net/trace-events
36
+++ b/hw/net/trace-events
37
@@ -XXX,XX +XXX,XX @@ e1000e_irq_write_ics(uint32_t val) "Adding ICR bits 0x%x"
38
e1000e_irq_icr_process_iame(void) "Clearing IMS bits due to IAME"
39
e1000e_irq_read_ics(uint32_t ics) "Current ICS: 0x%x"
40
e1000e_irq_read_ims(uint32_t ims) "Current IMS: 0x%x"
41
+e1000e_irq_icr_clear_nonmsix_icr_read(void) "Clearing ICR on read due to non MSI-X int"
42
e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR read. Current ICR: 0x%x"
43
e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x"
44
e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS"
27
--
45
--
28
2.7.4
46
2.7.4
29
47
30
48
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Thomas Jansen <mithi@mithi.net>
2
2
3
It does not make much sense to limit these commands to the legacy 'vlan'
3
The IPv6 option headers all have in common that they start with some
4
concept only, they should work with the modern netdevs, too. So now
4
common fields, in particular the type of the next header followed by the
5
it is possible to use this command with one, two or three parameters.
5
extention header length. This is used to traverse the list of the
6
options. The ESP header does not follow that format, which can break the
7
IPv6 option header traversal code in eth_parse_ipv6_hdr().
6
8
7
With one parameter, the command installs a hostfwd rule on the default
9
The effect of that is that network interfaces such as vmxnet3 that use
8
"user" network:
10
the following call chain
9
hostfwd_add tcp:...
11
eth_is_ip6_extension_header_type
12
eth_parse_ipv6_hdr
13
net_tx_pkt_parse_headers
14
net_tx_pkt_parse
15
vmxnet3_process_tx_queue
16
to send packets from the VM out to the host will drop packets of the
17
following structure:
18
Ethernet-Header(IPv6-Header(ESP(encrypted data)))
10
19
11
With two parameters, the command installs a hostfwd rule on a netdev
20
Note that not all types of network interfaces use the net_tx_pkt_parse
12
(that's the new way of using this command):
21
function though, leading to inconsistent behavior regarding sending
13
hostfwd_add netdev_id tcp:...
22
those packets. The e1000 network interface for example does not suffer
23
from this limitation.
14
24
15
With three parameters, the command installs a rule on a 'vlan' (aka hub):
25
By not considering ESP to be an IPv6 header we can allow sending those
16
hostfwd_add hub_id name tcp:...
26
packets out to the host on all types of network interfaces.
17
27
18
Same applies to the hostfwd_remove command now.
28
Fixes: 75020a702151 ("Common definitions for VMWARE devices")
19
29
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/149
20
Signed-off-by: Thomas Huth <thuth@redhat.com>
30
Buglink: https://bugs.launchpad.net/qemu/+bug/1758091
31
Signed-off-by: Thomas Jansen <mithi@mithi.net>
21
Signed-off-by: Jason Wang <jasowang@redhat.com>
32
Signed-off-by: Jason Wang <jasowang@redhat.com>
22
---
33
---
23
hmp-commands.hx | 4 ++--
34
net/eth.c | 1 -
24
net/slirp.c | 33 +++++++++++++++++++++++----------
35
1 file changed, 1 deletion(-)
25
2 files changed, 25 insertions(+), 12 deletions(-)
26
36
27
diff --git a/hmp-commands.hx b/hmp-commands.hx
37
diff --git a/net/eth.c b/net/eth.c
28
index XXXXXXX..XXXXXXX 100644
38
index XXXXXXX..XXXXXXX 100644
29
--- a/hmp-commands.hx
39
--- a/net/eth.c
30
+++ b/hmp-commands.hx
40
+++ b/net/eth.c
31
@@ -XXX,XX +XXX,XX @@ ETEXI
41
@@ -XXX,XX +XXX,XX @@ eth_is_ip6_extension_header_type(uint8_t hdr_type)
32
{
42
case IP6_HOP_BY_HOP:
33
.name = "hostfwd_add",
43
case IP6_ROUTING:
34
.args_type = "arg1:s,arg2:s?,arg3:s?",
44
case IP6_FRAGMENT:
35
- .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
45
- case IP6_ESP:
36
+ .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
46
case IP6_AUTHENTICATION:
37
.help = "redirect TCP or UDP connections from host to guest (requires -net user)",
47
case IP6_DESTINATON:
38
.cmd = hmp_hostfwd_add,
48
case IP6_MOBILITY:
39
},
40
@@ -XXX,XX +XXX,XX @@ ETEXI
41
{
42
.name = "hostfwd_remove",
43
.args_type = "arg1:s,arg2:s?,arg3:s?",
44
- .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport",
45
+ .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport",
46
.help = "remove host-to-guest TCP or UDP redirection",
47
.cmd = hmp_hostfwd_remove,
48
},
49
diff --git a/net/slirp.c b/net/slirp.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/net/slirp.c
52
+++ b/net/slirp.c
53
@@ -XXX,XX +XXX,XX @@ error:
54
return -1;
55
}
56
57
-static SlirpState *slirp_lookup(Monitor *mon, const char *vlan,
58
- const char *stack)
59
+static SlirpState *slirp_lookup(Monitor *mon, const char *hub_id,
60
+ const char *name)
61
{
62
-
63
- if (vlan) {
64
+ if (name) {
65
NetClientState *nc;
66
- nc = net_hub_find_client_by_name(strtol(vlan, NULL, 0), stack);
67
- if (!nc) {
68
- monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n");
69
- return NULL;
70
+ if (hub_id) {
71
+ nc = net_hub_find_client_by_name(strtol(hub_id, NULL, 0), name);
72
+ if (!nc) {
73
+ monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n");
74
+ return NULL;
75
+ }
76
+ } else {
77
+ nc = qemu_find_netdev(name);
78
+ if (!nc) {
79
+ monitor_printf(mon, "unrecognized netdev id '%s'\n", name);
80
+ return NULL;
81
+ }
82
}
83
if (strcmp(nc->model, "user")) {
84
monitor_printf(mon, "invalid device specified\n");
85
@@ -XXX,XX +XXX,XX @@ void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict)
86
const char *arg2 = qdict_get_try_str(qdict, "arg2");
87
const char *arg3 = qdict_get_try_str(qdict, "arg3");
88
89
- if (arg2) {
90
+ if (arg3) {
91
s = slirp_lookup(mon, arg1, arg2);
92
src_str = arg3;
93
+ } else if (arg2) {
94
+ s = slirp_lookup(mon, NULL, arg1);
95
+ src_str = arg2;
96
} else {
97
s = slirp_lookup(mon, NULL, NULL);
98
src_str = arg1;
99
@@ -XXX,XX +XXX,XX @@ void hmp_hostfwd_add(Monitor *mon, const QDict *qdict)
100
const char *arg2 = qdict_get_try_str(qdict, "arg2");
101
const char *arg3 = qdict_get_try_str(qdict, "arg3");
102
103
- if (arg2) {
104
+ if (arg3) {
105
s = slirp_lookup(mon, arg1, arg2);
106
redir_str = arg3;
107
+ } else if (arg2) {
108
+ s = slirp_lookup(mon, NULL, arg1);
109
+ redir_str = arg2;
110
} else {
111
s = slirp_lookup(mon, NULL, NULL);
112
redir_str = arg1;
113
--
49
--
114
2.7.4
50
2.7.4
115
51
116
52
diff view generated by jsdifflib