1
The following changes since commit 6632f6ff96f0537fc34cdc00c760656fc62e23c5:
1
The following changes since commit 43ab9a5376c95c61ae898a222c4d04bdf60e239b:
2
2
3
Merge remote-tracking branch 'remotes/famz/tags/block-and-testing-pull-request' into staging (2017-07-17 11:46:36 +0100)
3
hw/i386/vmport: fix missing definitions with non-log trace backends (2017-12-21 22:52:28 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 189ae6bb5ce1f5a322f8691d00fe942ba43dd601:
9
for you to fetch changes up to 0065e915192cdf83c2700bb377e5323c2649476e:
10
10
11
virtio-net: fix offload ctrl endian (2017-07-17 20:13:56 +0800)
11
qemu-doc: Update the deprecation information of -tftp, -bootp, -redir and -smb (2017-12-22 10:06:05 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
- fix virtio-net ctrl offload endian
15
----------------------------------------------------------------
16
- vnet header support for variou COLO netfilters and compare thread
16
Ed Swierk via Qemu-devel (2):
17
e1000, e1000e: Move per-packet TX offload flags out of context state
18
e1000: Separate TSO and non-TSO contexts, fixing UDP TX corruption
17
19
18
----------------------------------------------------------------
20
Mark Cave-Ayland (13):
19
Jason Wang (1):
21
net: move CRC32 calculation from compute_mcast_idx() into its own net_crc32() function
20
virtio-net: fix offload ctrl endian
22
net: introduce net_crc32_le() function
23
pcnet: switch pcnet over to use net_crc32_le()
24
eepro100: switch eepro100 e100_compute_mcast_idx() over to use net_crc32()
25
sunhme: switch sunhme over to use net_crc32_le()
26
sungem: fix multicast filter CRC calculation
27
eepro100: use inline net_crc32() and bitshift instead of compute_mcast_idx()
28
opencores_eth: use inline net_crc32() and bitshift instead of compute_mcast_idx()
29
lan9118: use inline net_crc32() and bitshift instead of compute_mcast_idx()
30
ftgmac100: use inline net_crc32() and bitshift instead of compute_mcast_idx()
31
ne2000: use inline net_crc32() and bitshift instead of compute_mcast_idx()
32
rtl8139: use inline net_crc32() and bitshift instead of compute_mcast_idx()
33
net: remove unused compute_mcast_idx() function
21
34
22
Michal Privoznik (1):
35
Thomas Huth (3):
23
virtion-net: Prefer is_power_of_2()
36
net: Remove the legacy "-net channel" parameter
37
qemu-doc: The "-net nic" option can be used with "netdev=...", too
38
qemu-doc: Update the deprecation information of -tftp, -bootp, -redir and -smb
24
39
25
Zhang Chen (12):
40
hw/net/e1000.c | 92 ++++++++++++++++++++++++++++----------------------
26
net: Add vnet_hdr_len arguments in NetClientState
41
hw/net/e1000e.c | 4 +--
27
net/net.c: Add vnet_hdr support in SocketReadState
42
hw/net/e1000e_core.c | 16 ++++-----
28
net/filter-mirror.c: Introduce parameter for filter_send()
43
hw/net/e1000e_core.h | 2 ++
29
net/filter-mirror.c: Make filter mirror support vnet support.
44
hw/net/e1000x_common.h | 2 --
30
net/filter-mirror.c: Add new option to enable vnet support for filter-redirector
45
hw/net/eepro100.c | 32 +++---------------
31
net/colo.c: Make vnet_hdr_len as packet property
46
hw/net/ftgmac100.c | 2 +-
32
net/colo-compare.c: Introduce parameter for compare_chr_send()
47
hw/net/lan9118.c | 3 +-
33
net/colo-compare.c: Make colo-compare support vnet_hdr_len
48
hw/net/ne2000.c | 4 ++-
34
net/colo.c: Add vnet packet parse feature in colo-proxy
49
hw/net/opencores_eth.c | 3 +-
35
net/colo-compare.c: Add vnet packet's tcp/udp/icmp compare
50
hw/net/pcnet.c | 22 ++----------
36
net/filter-rewriter.c: Make filter-rewriter support vnet_hdr_len
51
hw/net/rtl8139.c | 2 +-
37
docs/colo-proxy.txt: Update colo-proxy usage of net driver with vnet_header
52
hw/net/sungem.c | 5 ++-
38
53
hw/net/sunhme.c | 25 +-------------
39
docs/colo-proxy.txt | 26 ++++++++++++++++
54
include/net/net.h | 5 ++-
40
hw/net/virtio-net.c | 4 ++-
55
include/net/slirp.h | 2 --
41
include/net/net.h | 10 ++++--
56
net/net.c | 40 +++++++++++++++-------
42
net/colo-compare.c | 84 ++++++++++++++++++++++++++++++++++++++++++---------
57
net/slirp.c | 34 -------------------
43
net/colo.c | 9 +++---
58
qemu-doc.texi | 38 +++++++++++----------
44
net/colo.h | 4 ++-
59
qemu-options.hx | 14 ++++----
45
net/filter-mirror.c | 75 +++++++++++++++++++++++++++++++++++++++++----
60
20 files changed, 144 insertions(+), 203 deletions(-)
46
net/filter-rewriter.c | 37 ++++++++++++++++++++++-
47
net/net.c | 37 ++++++++++++++++++++---
48
net/socket.c | 8 ++---
49
qemu-options.hx | 19 ++++++------
50
11 files changed, 265 insertions(+), 48 deletions(-)
51
61
52
62
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Ed Swierk via Qemu-devel <qemu-devel@nongnu.org>
2
2
3
We add the vnet_hdr_support option for filter-mirror, default is disabled.
3
sum_needed and cptse flags are received from the guest within each
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
transmit data descriptor. They are not part of the offload context;
5
You can use it for example:
5
instead, they determine how to apply a previously received context to
6
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
6
the packet being transmitted:
7
7
8
If it has vnet_hdr_support flag, we will change the sending packet format from
8
- If cptse is set, perform both segmentation and checksum offload
9
struct {int size; const uint8_t buf[];} to {int size; int vnet_hdr_len; const uint8_t buf[];}.
9
using the parameters in the TSO context; otherwise just do checksum
10
make other module(like colo-compare) know how to parse net packet correctly.
10
offload. (Currently the e1000 device incorrectly stores only one
11
11
context, which will be fixed in a subsequent patch.)
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
12
13
- Depending on the bits set in sum_needed, possibly perform L4
14
checksum offload and/or IP checksum offload, using the parameters in
15
the appropriate context.
16
17
Move these flags out of struct e1000x_txd_props, which is otherwise
18
dedicated to storing values from a context descriptor, and into the
19
per-packet TX struct.
20
21
Signed-off-by: Ed Swierk <eswierk@skyportsystems.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
22
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
23
---
15
net/filter-mirror.c | 42 +++++++++++++++++++++++++++++++++++++++++-
24
hw/net/e1000.c | 30 ++++++++++++++++--------------
16
qemu-options.hx | 5 ++---
25
hw/net/e1000e.c | 4 ++--
17
2 files changed, 43 insertions(+), 4 deletions(-)
26
hw/net/e1000e_core.c | 16 ++++++++--------
18
27
hw/net/e1000e_core.h | 2 ++
19
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
28
hw/net/e1000x_common.h | 2 --
20
index XXXXXXX..XXXXXXX 100644
29
5 files changed, 28 insertions(+), 26 deletions(-)
21
--- a/net/filter-mirror.c
30
22
+++ b/net/filter-mirror.c
31
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
32
index XXXXXXX..XXXXXXX 100644
24
CharBackend chr_in;
33
--- a/hw/net/e1000.c
25
CharBackend chr_out;
34
+++ b/hw/net/e1000.c
26
SocketReadState rs;
35
@@ -XXX,XX +XXX,XX @@ typedef struct E1000State_st {
27
+ bool vnet_hdr;
36
unsigned char data[0x10000];
28
} MirrorState;
37
uint16_t size;
29
38
unsigned char vlan_needed;
30
static int filter_send(MirrorState *s,
39
+ unsigned char sum_needed;
31
const struct iovec *iov,
40
+ bool cptse;
32
int iovcnt)
41
e1000x_txd_props props;
42
uint16_t tso_frames;
43
} tx;
44
@@ -XXX,XX +XXX,XX @@ xmit_seg(E1000State *s)
45
unsigned int frames = s->tx.tso_frames, css, sofar;
46
struct e1000_tx *tp = &s->tx;
47
48
- if (tp->props.tse && tp->props.cptse) {
49
+ if (tp->props.tse && tp->cptse) {
50
css = tp->props.ipcss;
51
DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
52
frames, tp->size, css);
53
@@ -XXX,XX +XXX,XX @@ xmit_seg(E1000State *s)
54
}
55
} else /* UDP */
56
stw_be_p(tp->data+css+4, len);
57
- if (tp->props.sum_needed & E1000_TXD_POPTS_TXSM) {
58
+ if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
59
unsigned int phsum;
60
// add pseudo-header length before checksum calculation
61
void *sp = tp->data + tp->props.tucso;
62
@@ -XXX,XX +XXX,XX @@ xmit_seg(E1000State *s)
63
tp->tso_frames++;
64
}
65
66
- if (tp->props.sum_needed & E1000_TXD_POPTS_TXSM) {
67
+ if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
68
putsum(tp->data, tp->size, tp->props.tucso,
69
tp->props.tucss, tp->props.tucse);
70
}
71
- if (tp->props.sum_needed & E1000_TXD_POPTS_IXSM) {
72
+ if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
73
putsum(tp->data, tp->size, tp->props.ipcso,
74
tp->props.ipcss, tp->props.ipcse);
75
}
76
@@ -XXX,XX +XXX,XX @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
77
} else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
78
// data descriptor
79
if (tp->size == 0) {
80
- tp->props.sum_needed = le32_to_cpu(dp->upper.data) >> 8;
81
+ tp->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
82
}
83
- tp->props.cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
84
+ tp->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
85
} else {
86
// legacy descriptor
87
- tp->props.cptse = 0;
88
+ tp->cptse = 0;
89
}
90
91
if (e1000x_vlan_enabled(s->mac_reg) &&
92
e1000x_is_vlan_txd(txd_lower) &&
93
- (tp->props.cptse || txd_lower & E1000_TXD_CMD_EOP)) {
94
+ (tp->cptse || txd_lower & E1000_TXD_CMD_EOP)) {
95
tp->vlan_needed = 1;
96
stw_be_p(tp->vlan_header,
97
le16_to_cpu(s->mac_reg[VET]));
98
@@ -XXX,XX +XXX,XX @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
99
}
100
101
addr = le64_to_cpu(dp->buffer_addr);
102
- if (tp->props.tse && tp->props.cptse) {
103
+ if (tp->props.tse && tp->cptse) {
104
msh = tp->props.hdr_len + tp->props.mss;
105
do {
106
bytes = split_size;
107
@@ -XXX,XX +XXX,XX @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
108
}
109
split_size -= bytes;
110
} while (bytes && split_size);
111
- } else if (!tp->props.tse && tp->props.cptse) {
112
+ } else if (!tp->props.tse && tp->cptse) {
113
// context descriptor TSE is not set, while data descriptor TSE is set
114
DBGOUT(TXERR, "TCP segmentation error\n");
115
} else {
116
@@ -XXX,XX +XXX,XX @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
117
118
if (!(txd_lower & E1000_TXD_CMD_EOP))
119
return;
120
- if (!(tp->props.tse && tp->props.cptse && tp->size < tp->props.hdr_len)) {
121
+ if (!(tp->props.tse && tp->cptse && tp->size < tp->props.hdr_len)) {
122
xmit_seg(s);
123
}
124
tp->tso_frames = 0;
125
- tp->props.sum_needed = 0;
126
+ tp->sum_needed = 0;
127
tp->vlan_needed = 0;
128
tp->size = 0;
129
- tp->props.cptse = 0;
130
+ tp->cptse = 0;
131
}
132
133
static uint32_t
134
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_e1000 = {
135
VMSTATE_UINT16(tx.props.mss, E1000State),
136
VMSTATE_UINT16(tx.size, E1000State),
137
VMSTATE_UINT16(tx.tso_frames, E1000State),
138
- VMSTATE_UINT8(tx.props.sum_needed, E1000State),
139
+ VMSTATE_UINT8(tx.sum_needed, E1000State),
140
VMSTATE_INT8(tx.props.ip, E1000State),
141
VMSTATE_INT8(tx.props.tcp, E1000State),
142
VMSTATE_BUFFER(tx.header, E1000State),
143
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
144
index XXXXXXX..XXXXXXX 100644
145
--- a/hw/net/e1000e.c
146
+++ b/hw/net/e1000e.c
147
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription e1000e_vmstate_tx = {
148
.version_id = 1,
149
.minimum_version_id = 1,
150
.fields = (VMStateField[]) {
151
- VMSTATE_UINT8(props.sum_needed, struct e1000e_tx),
152
+ VMSTATE_UINT8(sum_needed, struct e1000e_tx),
153
VMSTATE_UINT8(props.ipcss, struct e1000e_tx),
154
VMSTATE_UINT8(props.ipcso, struct e1000e_tx),
155
VMSTATE_UINT16(props.ipcse, struct e1000e_tx),
156
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription e1000e_vmstate_tx = {
157
VMSTATE_INT8(props.ip, struct e1000e_tx),
158
VMSTATE_INT8(props.tcp, struct e1000e_tx),
159
VMSTATE_BOOL(props.tse, struct e1000e_tx),
160
- VMSTATE_BOOL(props.cptse, struct e1000e_tx),
161
+ VMSTATE_BOOL(cptse, struct e1000e_tx),
162
VMSTATE_BOOL(skip_cp, struct e1000e_tx),
163
VMSTATE_END_OF_LIST()
164
}
165
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
166
index XXXXXXX..XXXXXXX 100644
167
--- a/hw/net/e1000e_core.c
168
+++ b/hw/net/e1000e_core.c
169
@@ -XXX,XX +XXX,XX @@ e1000e_rss_parse_packet(E1000ECore *core,
170
static void
171
e1000e_setup_tx_offloads(E1000ECore *core, struct e1000e_tx *tx)
33
{
172
{
34
+ NetFilterState *nf = NETFILTER(s);
173
- if (tx->props.tse && tx->props.cptse) {
35
int ret = 0;
174
+ if (tx->props.tse && tx->cptse) {
36
ssize_t size = 0;
175
net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->props.mss);
37
uint32_t len = 0;
176
net_tx_pkt_update_ip_checksums(tx->tx_pkt);
38
@@ -XXX,XX +XXX,XX @@ static int filter_send(MirrorState *s,
177
e1000x_inc_reg_if_not_full(core->mac, TSCTC);
39
goto err;
178
return;
40
}
179
}
41
180
42
+ if (s->vnet_hdr) {
181
- if (tx->props.sum_needed & E1000_TXD_POPTS_TXSM) {
43
+ /*
182
+ if (tx->sum_needed & E1000_TXD_POPTS_TXSM) {
44
+ * If vnet_hdr = on, we send vnet header len to make other
183
net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0);
45
+ * module(like colo-compare) know how to parse net
184
}
46
+ * packet correctly.
185
47
+ */
186
- if (tx->props.sum_needed & E1000_TXD_POPTS_IXSM) {
48
+ ssize_t vnet_hdr_len;
187
+ if (tx->sum_needed & E1000_TXD_POPTS_IXSM) {
49
+
188
net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt);
50
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
51
+
52
+ len = htonl(vnet_hdr_len);
53
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
54
+ if (ret != sizeof(len)) {
55
+ goto err;
56
+ }
57
+ }
58
+
59
buf = g_malloc(size);
60
iov_to_buf(iov, iovcnt, 0, buf, size);
61
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
62
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
63
}
64
}
65
66
- net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
67
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr);
68
69
if (s->indev) {
70
chr = qemu_chr_find(s->indev);
71
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_set_outdev(Object *obj,
72
}
189
}
73
}
190
}
74
191
@@ -XXX,XX +XXX,XX @@ e1000e_process_tx_desc(E1000ECore *core,
75
+static bool filter_mirror_get_vnet_hdr(Object *obj, Error **errp)
192
return;
76
+{
193
} else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
77
+ MirrorState *s = FILTER_MIRROR(obj);
194
/* data descriptor */
78
+
195
- tx->props.sum_needed = le32_to_cpu(dp->upper.data) >> 8;
79
+ return s->vnet_hdr;
196
- tx->props.cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
80
+}
197
+ tx->sum_needed = le32_to_cpu(dp->upper.data) >> 8;
81
+
198
+ tx->cptse = (txd_lower & E1000_TXD_CMD_TSE) ? 1 : 0;
82
+static void filter_mirror_set_vnet_hdr(Object *obj, bool value, Error **errp)
199
e1000e_process_ts_option(core, dp);
83
+{
200
} else {
84
+ MirrorState *s = FILTER_MIRROR(obj);
201
/* legacy descriptor */
85
+
202
e1000e_process_ts_option(core, dp);
86
+ s->vnet_hdr = value;
203
- tx->props.cptse = 0;
87
+}
204
+ tx->cptse = 0;
88
+
205
}
89
static char *filter_redirector_get_outdev(Object *obj, Error **errp)
206
90
{
207
addr = le64_to_cpu(dp->buffer_addr);
91
MirrorState *s = FILTER_REDIRECTOR(obj);
208
@@ -XXX,XX +XXX,XX @@ e1000e_process_tx_desc(E1000ECore *core,
92
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
209
tx->skip_cp = false;
93
210
net_tx_pkt_reset(tx->tx_pkt);
94
static void filter_mirror_init(Object *obj)
211
95
{
212
- tx->props.sum_needed = 0;
96
+ MirrorState *s = FILTER_MIRROR(obj);
213
- tx->props.cptse = 0;
97
+
214
+ tx->sum_needed = 0;
98
object_property_add_str(obj, "outdev", filter_mirror_get_outdev,
215
+ tx->cptse = 0;
99
filter_mirror_set_outdev, NULL);
216
}
100
+
101
+ s->vnet_hdr = false;
102
+ object_property_add_bool(obj, "vnet_hdr_support",
103
+ filter_mirror_get_vnet_hdr,
104
+ filter_mirror_set_vnet_hdr, NULL);
105
}
217
}
106
218
107
static void filter_redirector_init(Object *obj)
219
diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
108
diff --git a/qemu-options.hx b/qemu-options.hx
220
index XXXXXXX..XXXXXXX 100644
109
index XXXXXXX..XXXXXXX 100644
221
--- a/hw/net/e1000e_core.h
110
--- a/qemu-options.hx
222
+++ b/hw/net/e1000e_core.h
111
+++ b/qemu-options.hx
223
@@ -XXX,XX +XXX,XX @@ struct E1000Core {
112
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
224
e1000x_txd_props props;
113
@option{tx}: the filter is attached to the transmit queue of the netdev,
225
114
where it will receive packets sent by the netdev.
226
bool skip_cp;
115
227
+ unsigned char sum_needed;
116
-@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
228
+ bool cptse;
117
+@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
229
struct NetTxPkt *tx_pkt;
118
230
} tx[E1000E_NUM_QUEUES];
119
-filter-mirror on netdev @var{netdevid},mirror net packet to chardev
231
120
-@var{chardevid}
232
diff --git a/hw/net/e1000x_common.h b/hw/net/e1000x_common.h
121
+filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
233
index XXXXXXX..XXXXXXX 100644
122
234
--- a/hw/net/e1000x_common.h
123
@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
235
+++ b/hw/net/e1000x_common.h
124
outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
236
@@ -XXX,XX +XXX,XX @@ void e1000x_update_regs_on_autoneg_done(uint32_t *mac, uint16_t *phy);
237
void e1000x_increase_size_stats(uint32_t *mac, const int *size_regs, int size);
238
239
typedef struct e1000x_txd_props {
240
- unsigned char sum_needed;
241
uint8_t ipcss;
242
uint8_t ipcso;
243
uint16_t ipcse;
244
@@ -XXX,XX +XXX,XX @@ typedef struct e1000x_txd_props {
245
int8_t ip;
246
int8_t tcp;
247
bool tse;
248
- bool cptse;
249
} e1000x_txd_props;
250
251
void e1000x_read_tx_ctx_descr(struct e1000_context_desc *d,
125
--
252
--
126
2.7.4
253
2.7.4
127
254
128
255
diff view generated by jsdifflib
New patch
1
1
From: Ed Swierk via Qemu-devel <qemu-devel@nongnu.org>
2
3
The device is supposed to maintain two distinct contexts for transmit
4
offloads: one has parameters for both segmentation and checksum
5
offload, the other only for checksum offload. The guest driver can
6
send two context descriptors, one for each context (the TSE flag
7
specifies which). Then the guest can refer to one or the other context
8
in subsequent transmit data descriptors, depending on what offloads it
9
wants applied to each packet.
10
11
Currently the e1000 device stores just one context, and misinterprets
12
the TSE flags in the context and data descriptors. This is often okay:
13
Linux happens to send a fresh context descriptor before every data
14
descriptor, so forgetting the other context doesn't matter. Windows
15
does rely on separate contexts for TSO vs. non-TSO packets, but for
16
mostly-TCP traffic the two contexts have identical TCP-specific
17
offload parameters so confusing them doesn't matter.
18
19
One case where this confusion matters is when a Windows guest sets up
20
a TSO context for TCP and a non-TSO context for UDP, and then
21
transmits both TCP and UDP traffic in parallel. The e1000 device
22
sometimes ends up using TCP-specific parameters while doing checksum
23
offload on a UDP datagram: it writes the checksum to offset 16 (the
24
correct location for a TCP checksum), stomping on two bytes of UDP
25
data, and leaving the wrong value in the actual UDP checksum field at
26
offset 6. (Even worse, the host network stack may then recompute the
27
UDP checksum, "correcting" it to match the corrupt data before sending
28
it out a physical interface.)
29
30
Correct this by tracking the TSO context independently of the non-TSO
31
context, and selecting the appropriate context based on the TSE flag
32
in each transmit data descriptor.
33
34
Signed-off-by: Ed Swierk <eswierk@skyportsystems.com>
35
Signed-off-by: Jason Wang <jasowang@redhat.com>
36
---
37
hw/net/e1000.c | 70 +++++++++++++++++++++++++++++++++-------------------------
38
1 file changed, 40 insertions(+), 30 deletions(-)
39
40
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
41
index XXXXXXX..XXXXXXX 100644
42
--- a/hw/net/e1000.c
43
+++ b/hw/net/e1000.c
44
@@ -XXX,XX +XXX,XX @@ typedef struct E1000State_st {
45
unsigned char sum_needed;
46
bool cptse;
47
e1000x_txd_props props;
48
+ e1000x_txd_props tso_props;
49
uint16_t tso_frames;
50
} tx;
51
52
@@ -XXX,XX +XXX,XX @@ xmit_seg(E1000State *s)
53
uint16_t len;
54
unsigned int frames = s->tx.tso_frames, css, sofar;
55
struct e1000_tx *tp = &s->tx;
56
+ struct e1000x_txd_props *props = tp->cptse ? &tp->tso_props : &tp->props;
57
58
- if (tp->props.tse && tp->cptse) {
59
- css = tp->props.ipcss;
60
+ if (tp->cptse) {
61
+ css = props->ipcss;
62
DBGOUT(TXSUM, "frames %d size %d ipcss %d\n",
63
frames, tp->size, css);
64
- if (tp->props.ip) { /* IPv4 */
65
+ if (props->ip) { /* IPv4 */
66
stw_be_p(tp->data+css+2, tp->size - css);
67
stw_be_p(tp->data+css+4,
68
lduw_be_p(tp->data + css + 4) + frames);
69
} else { /* IPv6 */
70
stw_be_p(tp->data+css+4, tp->size - css);
71
}
72
- css = tp->props.tucss;
73
+ css = props->tucss;
74
len = tp->size - css;
75
- DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", tp->props.tcp, css, len);
76
- if (tp->props.tcp) {
77
- sofar = frames * tp->props.mss;
78
+ DBGOUT(TXSUM, "tcp %d tucss %d len %d\n", props->tcp, css, len);
79
+ if (props->tcp) {
80
+ sofar = frames * props->mss;
81
stl_be_p(tp->data+css+4, ldl_be_p(tp->data+css+4)+sofar); /* seq */
82
- if (tp->props.paylen - sofar > tp->props.mss) {
83
+ if (props->paylen - sofar > props->mss) {
84
tp->data[css + 13] &= ~9; /* PSH, FIN */
85
} else if (frames) {
86
e1000x_inc_reg_if_not_full(s->mac_reg, TSCTC);
87
}
88
- } else /* UDP */
89
+ } else { /* UDP */
90
stw_be_p(tp->data+css+4, len);
91
+ }
92
if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
93
unsigned int phsum;
94
// add pseudo-header length before checksum calculation
95
- void *sp = tp->data + tp->props.tucso;
96
+ void *sp = tp->data + props->tucso;
97
98
phsum = lduw_be_p(sp) + len;
99
phsum = (phsum >> 16) + (phsum & 0xffff);
100
@@ -XXX,XX +XXX,XX @@ xmit_seg(E1000State *s)
101
}
102
103
if (tp->sum_needed & E1000_TXD_POPTS_TXSM) {
104
- putsum(tp->data, tp->size, tp->props.tucso,
105
- tp->props.tucss, tp->props.tucse);
106
+ putsum(tp->data, tp->size, props->tucso, props->tucss, props->tucse);
107
}
108
if (tp->sum_needed & E1000_TXD_POPTS_IXSM) {
109
- putsum(tp->data, tp->size, tp->props.ipcso,
110
- tp->props.ipcss, tp->props.ipcse);
111
+ putsum(tp->data, tp->size, props->ipcso, props->ipcss, props->ipcse);
112
}
113
if (tp->vlan_needed) {
114
memmove(tp->vlan, tp->data, 4);
115
@@ -XXX,XX +XXX,XX @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
116
117
s->mit_ide |= (txd_lower & E1000_TXD_CMD_IDE);
118
if (dtype == E1000_TXD_CMD_DEXT) { /* context descriptor */
119
- e1000x_read_tx_ctx_descr(xp, &tp->props);
120
- tp->tso_frames = 0;
121
- if (tp->props.tucso == 0) { /* this is probably wrong */
122
- DBGOUT(TXSUM, "TCP/UDP: cso 0!\n");
123
- tp->props.tucso = tp->props.tucss + (tp->props.tcp ? 16 : 6);
124
+ if (le32_to_cpu(xp->cmd_and_length) & E1000_TXD_CMD_TSE) {
125
+ e1000x_read_tx_ctx_descr(xp, &tp->tso_props);
126
+ tp->tso_frames = 0;
127
+ } else {
128
+ e1000x_read_tx_ctx_descr(xp, &tp->props);
129
}
130
return;
131
} else if (dtype == (E1000_TXD_CMD_DEXT | E1000_TXD_DTYP_D)) {
132
@@ -XXX,XX +XXX,XX @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
133
}
134
135
addr = le64_to_cpu(dp->buffer_addr);
136
- if (tp->props.tse && tp->cptse) {
137
- msh = tp->props.hdr_len + tp->props.mss;
138
+ if (tp->cptse) {
139
+ msh = tp->tso_props.hdr_len + tp->tso_props.mss;
140
do {
141
bytes = split_size;
142
if (tp->size + bytes > msh)
143
@@ -XXX,XX +XXX,XX @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
144
bytes = MIN(sizeof(tp->data) - tp->size, bytes);
145
pci_dma_read(d, addr, tp->data + tp->size, bytes);
146
sz = tp->size + bytes;
147
- if (sz >= tp->props.hdr_len && tp->size < tp->props.hdr_len) {
148
- memmove(tp->header, tp->data, tp->props.hdr_len);
149
+ if (sz >= tp->tso_props.hdr_len
150
+ && tp->size < tp->tso_props.hdr_len) {
151
+ memmove(tp->header, tp->data, tp->tso_props.hdr_len);
152
}
153
tp->size = sz;
154
addr += bytes;
155
if (sz == msh) {
156
xmit_seg(s);
157
- memmove(tp->data, tp->header, tp->props.hdr_len);
158
- tp->size = tp->props.hdr_len;
159
+ memmove(tp->data, tp->header, tp->tso_props.hdr_len);
160
+ tp->size = tp->tso_props.hdr_len;
161
}
162
split_size -= bytes;
163
} while (bytes && split_size);
164
- } else if (!tp->props.tse && tp->cptse) {
165
- // context descriptor TSE is not set, while data descriptor TSE is set
166
- DBGOUT(TXERR, "TCP segmentation error\n");
167
} else {
168
split_size = MIN(sizeof(tp->data) - tp->size, split_size);
169
pci_dma_read(d, addr, tp->data + tp->size, split_size);
170
@@ -XXX,XX +XXX,XX @@ process_tx_desc(E1000State *s, struct e1000_tx_desc *dp)
171
172
if (!(txd_lower & E1000_TXD_CMD_EOP))
173
return;
174
- if (!(tp->props.tse && tp->cptse && tp->size < tp->props.hdr_len)) {
175
+ if (!(tp->cptse && tp->size < tp->tso_props.hdr_len)) {
176
xmit_seg(s);
177
}
178
tp->tso_frames = 0;
179
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_e1000_full_mac_state = {
180
181
static const VMStateDescription vmstate_e1000 = {
182
.name = "e1000",
183
- .version_id = 2,
184
+ .version_id = 3,
185
.minimum_version_id = 1,
186
.pre_save = e1000_pre_save,
187
.post_load = e1000_post_load,
188
@@ -XXX,XX +XXX,XX @@ static const VMStateDescription vmstate_e1000 = {
189
VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, RA, 32),
190
VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, MTA, 128),
191
VMSTATE_UINT32_SUB_ARRAY(mac_reg, E1000State, VFTA, 128),
192
+ VMSTATE_UINT8_V(tx.tso_props.ipcss, E1000State, 3),
193
+ VMSTATE_UINT8_V(tx.tso_props.ipcso, E1000State, 3),
194
+ VMSTATE_UINT16_V(tx.tso_props.ipcse, E1000State, 3),
195
+ VMSTATE_UINT8_V(tx.tso_props.tucss, E1000State, 3),
196
+ VMSTATE_UINT8_V(tx.tso_props.tucso, E1000State, 3),
197
+ VMSTATE_UINT16_V(tx.tso_props.tucse, E1000State, 3),
198
+ VMSTATE_UINT32_V(tx.tso_props.paylen, E1000State, 3),
199
+ VMSTATE_UINT8_V(tx.tso_props.hdr_len, E1000State, 3),
200
+ VMSTATE_UINT16_V(tx.tso_props.mss, E1000State, 3),
201
+ VMSTATE_INT8_V(tx.tso_props.ip, E1000State, 3),
202
+ VMSTATE_INT8_V(tx.tso_props.tcp, E1000State, 3),
203
VMSTATE_END_OF_LIST()
204
},
205
.subsections = (const VMStateDescription*[]) {
206
--
207
2.7.4
208
209
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
2
3
Add vnet_hdr_len arguments in NetClientState
3
Separate out the standard ethernet CRC32 calculation into a new net_crc32()
4
that make other module get real vnet_hdr_len easily.
4
function, renaming the constant POLYNOMIAL to POLYNOMIAL_BE to make it clear
5
that this is a big-endian CRC32 calculation.
5
6
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
As part of the constant rename, remove the duplicate definition of POLYNOMIAL
8
from eepro100.c and use the new POLYNOMIAL_BE constant instead.
9
10
Once this is complete remove the existing CRC32 implementation from
11
compute_mcast_idx() and call the new net_crc32() function in its place.
12
13
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
14
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
16
---
9
include/net/net.h | 1 +
17
hw/net/eepro100.c | 4 +---
10
net/net.c | 1 +
18
include/net/net.h | 3 ++-
11
2 files changed, 2 insertions(+)
19
net/net.c | 16 +++++++++++-----
20
3 files changed, 14 insertions(+), 9 deletions(-)
12
21
22
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/net/eepro100.c
25
+++ b/hw/net/eepro100.c
26
@@ -XXX,XX +XXX,XX @@ static const uint16_t eepro100_mdi_mask[] = {
27
0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
28
};
29
30
-#define POLYNOMIAL 0x04c11db6
31
-
32
static E100PCIDeviceInfo *eepro100_get_class(EEPRO100State *s);
33
34
/* From FreeBSD (locally modified). */
35
@@ -XXX,XX +XXX,XX @@ static unsigned e100_compute_mcast_idx(const uint8_t *ep)
36
crc <<= 1;
37
b >>= 1;
38
if (carry) {
39
- crc = ((crc ^ POLYNOMIAL) | carry);
40
+ crc = ((crc ^ POLYNOMIAL_BE) | carry);
41
}
42
}
43
}
13
diff --git a/include/net/net.h b/include/net/net.h
44
diff --git a/include/net/net.h b/include/net/net.h
14
index XXXXXXX..XXXXXXX 100644
45
index XXXXXXX..XXXXXXX 100644
15
--- a/include/net/net.h
46
--- a/include/net/net.h
16
+++ b/include/net/net.h
47
+++ b/include/net/net.h
17
@@ -XXX,XX +XXX,XX @@ struct NetClientState {
48
@@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_port_find(int hub_id);
18
unsigned int queue_index;
49
19
unsigned rxfilter_notify_enabled:1;
50
void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd);
20
int vring_enable;
51
21
+ int vnet_hdr_len;
52
-#define POLYNOMIAL 0x04c11db6
22
QTAILQ_HEAD(NetFilterHead, NetFilterState) filters;
53
+#define POLYNOMIAL_BE 0x04c11db6
23
};
54
+uint32_t net_crc32(const uint8_t *p, int len);
24
55
unsigned compute_mcast_idx(const uint8_t *ep);
56
57
#define vmstate_offset_macaddr(_state, _field) \
25
diff --git a/net/net.c b/net/net.c
58
diff --git a/net/net.c b/net/net.c
26
index XXXXXXX..XXXXXXX 100644
59
index XXXXXXX..XXXXXXX 100644
27
--- a/net/net.c
60
--- a/net/net.c
28
+++ b/net/net.c
61
+++ b/net/net.c
29
@@ -XXX,XX +XXX,XX @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
62
@@ -XXX,XX +XXX,XX @@ int net_client_parse(QemuOptsList *opts_list, const char *optarg)
30
return;
63
64
/* From FreeBSD */
65
/* XXX: optimize */
66
-unsigned compute_mcast_idx(const uint8_t *ep)
67
+uint32_t net_crc32(const uint8_t *p, int len)
68
{
69
uint32_t crc;
70
int carry, i, j;
71
uint8_t b;
72
73
crc = 0xffffffff;
74
- for (i = 0; i < 6; i++) {
75
- b = *ep++;
76
+ for (i = 0; i < len; i++) {
77
+ b = *p++;
78
for (j = 0; j < 8; j++) {
79
carry = ((crc & 0x80000000L) ? 1 : 0) ^ (b & 0x01);
80
crc <<= 1;
81
b >>= 1;
82
if (carry) {
83
- crc = ((crc ^ POLYNOMIAL) | carry);
84
+ crc = ((crc ^ POLYNOMIAL_BE) | carry);
85
}
86
}
31
}
87
}
32
88
- return crc >> 26;
33
+ nc->vnet_hdr_len = len;
89
+
34
nc->info->set_vnet_hdr_len(nc, len);
90
+ return crc;
91
+}
92
+
93
+unsigned compute_mcast_idx(const uint8_t *ep)
94
+{
95
+ return net_crc32(ep, ETH_ALEN) >> 26;
35
}
96
}
36
97
98
QemuOptsList qemu_netdev_opts = {
37
--
99
--
38
2.7.4
100
2.7.4
39
101
40
102
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
2
3
We add a flag to decide whether net_fill_rstate() need read
3
This provides a standard ethernet CRC32 little-endian implementation.
4
the vnet_hdr_len or not.
5
4
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
5
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
7
Suggested-by: Jason Wang <jasowang@redhat.com>
6
Reviewed-by: Eric Blake <eblake@redhat.com>
7
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
9
---
10
include/net/net.h | 9 +++++++--
10
include/net/net.h | 2 ++
11
net/colo-compare.c | 4 ++--
11
net/net.c | 22 ++++++++++++++++++++++
12
net/filter-mirror.c | 2 +-
12
2 files changed, 24 insertions(+)
13
net/net.c | 36 ++++++++++++++++++++++++++++++++----
14
net/socket.c | 8 ++++----
15
5 files changed, 46 insertions(+), 13 deletions(-)
16
13
17
diff --git a/include/net/net.h b/include/net/net.h
14
diff --git a/include/net/net.h b/include/net/net.h
18
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
19
--- a/include/net/net.h
16
--- a/include/net/net.h
20
+++ b/include/net/net.h
17
+++ b/include/net/net.h
21
@@ -XXX,XX +XXX,XX @@ typedef struct NICState {
18
@@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_port_find(int hub_id);
22
} NICState;
19
void qdev_set_nic_properties(DeviceState *dev, NICInfo *nd);
23
20
24
struct SocketReadState {
21
#define POLYNOMIAL_BE 0x04c11db6
25
- int state; /* 0 = getting length, 1 = getting data */
22
+#define POLYNOMIAL_LE 0xedb88320
26
+ /* 0 = getting length, 1 = getting vnet header length, 2 = getting data */
23
uint32_t net_crc32(const uint8_t *p, int len);
27
+ int state;
24
+uint32_t net_crc32_le(const uint8_t *p, int len);
28
+ /* This flag decide whether to read the vnet_hdr_len field */
25
unsigned compute_mcast_idx(const uint8_t *ep);
29
+ bool vnet_hdr;
26
30
uint32_t index;
27
#define vmstate_offset_macaddr(_state, _field) \
31
uint32_t packet_len;
32
+ uint32_t vnet_hdr_len;
33
uint8_t buf[NET_BUFSIZE];
34
SocketReadStateFinalize *finalize;
35
};
36
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
37
void print_net_client(Monitor *mon, NetClientState *nc);
38
void hmp_info_network(Monitor *mon, const QDict *qdict);
39
void net_socket_rs_init(SocketReadState *rs,
40
- SocketReadStateFinalize *finalize);
41
+ SocketReadStateFinalize *finalize,
42
+ bool vnet_hdr);
43
44
/* NIC info */
45
46
diff --git a/net/colo-compare.c b/net/colo-compare.c
47
index XXXXXXX..XXXXXXX 100644
48
--- a/net/colo-compare.c
49
+++ b/net/colo-compare.c
50
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
51
return;
52
}
53
54
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize);
55
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize);
56
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
57
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
58
59
g_queue_init(&s->conn_list);
60
61
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/net/filter-mirror.c
64
+++ b/net/filter-mirror.c
65
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
66
}
67
}
68
69
- net_socket_rs_init(&s->rs, redirector_rs_finalize);
70
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
71
72
if (s->indev) {
73
chr = qemu_chr_find(s->indev);
74
diff --git a/net/net.c b/net/net.c
28
diff --git a/net/net.c b/net/net.c
75
index XXXXXXX..XXXXXXX 100644
29
index XXXXXXX..XXXXXXX 100644
76
--- a/net/net.c
30
--- a/net/net.c
77
+++ b/net/net.c
31
+++ b/net/net.c
78
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_net_opts = {
32
@@ -XXX,XX +XXX,XX @@ uint32_t net_crc32(const uint8_t *p, int len)
79
};
33
return crc;
80
34
}
81
void net_socket_rs_init(SocketReadState *rs,
35
82
- SocketReadStateFinalize *finalize)
36
+uint32_t net_crc32_le(const uint8_t *p, int len)
83
+ SocketReadStateFinalize *finalize,
37
+{
84
+ bool vnet_hdr)
38
+ uint32_t crc;
39
+ int carry, i, j;
40
+ uint8_t b;
41
+
42
+ crc = 0xffffffff;
43
+ for (i = 0; i < len; i++) {
44
+ b = *p++;
45
+ for (j = 0; j < 8; j++) {
46
+ carry = (crc & 0x1) ^ (b & 0x01);
47
+ crc >>= 1;
48
+ b >>= 1;
49
+ if (carry) {
50
+ crc ^= POLYNOMIAL_LE;
51
+ }
52
+ }
53
+ }
54
+
55
+ return crc;
56
+}
57
+
58
unsigned compute_mcast_idx(const uint8_t *ep)
85
{
59
{
86
rs->state = 0;
60
return net_crc32(ep, ETH_ALEN) >> 26;
87
+ rs->vnet_hdr = vnet_hdr;
88
rs->index = 0;
89
rs->packet_len = 0;
90
+ rs->vnet_hdr_len = 0;
91
memset(rs->buf, 0, sizeof(rs->buf));
92
rs->finalize = finalize;
93
}
94
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
95
unsigned int l;
96
97
while (size > 0) {
98
- /* reassemble a packet from the network */
99
- switch (rs->state) { /* 0 = getting length, 1 = getting data */
100
+ /* Reassemble a packet from the network.
101
+ * 0 = getting length.
102
+ * 1 = getting vnet header length.
103
+ * 2 = getting data.
104
+ */
105
+ switch (rs->state) {
106
case 0:
107
l = 4 - rs->index;
108
if (l > size) {
109
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
110
/* got length */
111
rs->packet_len = ntohl(*(uint32_t *)rs->buf);
112
rs->index = 0;
113
- rs->state = 1;
114
+ if (rs->vnet_hdr) {
115
+ rs->state = 1;
116
+ } else {
117
+ rs->state = 2;
118
+ rs->vnet_hdr_len = 0;
119
+ }
120
}
121
break;
122
case 1:
123
+ l = 4 - rs->index;
124
+ if (l > size) {
125
+ l = size;
126
+ }
127
+ memcpy(rs->buf + rs->index, buf, l);
128
+ buf += l;
129
+ size -= l;
130
+ rs->index += l;
131
+ if (rs->index == 4) {
132
+ /* got vnet header length */
133
+ rs->vnet_hdr_len = ntohl(*(uint32_t *)rs->buf);
134
+ rs->index = 0;
135
+ rs->state = 2;
136
+ }
137
+ break;
138
+ case 2:
139
l = rs->packet_len - rs->index;
140
if (l > size) {
141
l = size;
142
diff --git a/net/socket.c b/net/socket.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/net/socket.c
145
+++ b/net/socket.c
146
@@ -XXX,XX +XXX,XX @@ static void net_socket_send(void *opaque)
147
closesocket(s->fd);
148
149
s->fd = -1;
150
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
151
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
152
s->nc.link_down = true;
153
memset(s->nc.info_str, 0, sizeof(s->nc.info_str));
154
155
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer,
156
s->fd = fd;
157
s->listen_fd = -1;
158
s->send_fn = net_socket_send_dgram;
159
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
160
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
161
net_socket_read_poll(s, true);
162
163
/* mcast: save bound address as dst */
164
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer,
165
166
s->fd = fd;
167
s->listen_fd = -1;
168
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
169
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
170
171
/* Disable Nagle algorithm on TCP sockets to reduce latency */
172
socket_set_nodelay(fd);
173
@@ -XXX,XX +XXX,XX @@ static int net_socket_listen_init(NetClientState *peer,
174
s->fd = -1;
175
s->listen_fd = fd;
176
s->nc.link_down = true;
177
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
178
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
179
180
qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s);
181
return 0;
182
--
61
--
183
2.7.4
62
2.7.4
184
63
185
64
diff view generated by jsdifflib
New patch
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
1
2
3
Instead of lnc_mchash() using its own implementation, we can simply call
4
net_crc32_le() directly and apply the bit shift inline.
5
6
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
7
Reviewed-by: Eric Blake <eblake@redhat.com>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/net/pcnet.c | 22 ++--------------------
12
1 file changed, 2 insertions(+), 20 deletions(-)
13
14
diff --git a/hw/net/pcnet.c b/hw/net/pcnet.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/pcnet.c
17
+++ b/hw/net/pcnet.c
18
@@ -XXX,XX +XXX,XX @@
19
#include "qemu/osdep.h"
20
#include "hw/qdev.h"
21
#include "net/net.h"
22
+#include "net/eth.h"
23
#include "qemu/timer.h"
24
#include "qemu/sockets.h"
25
#include "sysemu/sysemu.h"
26
@@ -XXX,XX +XXX,XX @@ static inline void pcnet_rmd_store(PCNetState *s, struct pcnet_RMD *rmd,
27
be16_to_cpu(hdr->ether_type)); \
28
} while (0)
29
30
-#define MULTICAST_FILTER_LEN 8
31
-
32
-static inline uint32_t lnc_mchash(const uint8_t *ether_addr)
33
-{
34
-#define LNC_POLYNOMIAL 0xEDB88320UL
35
- uint32_t crc = 0xFFFFFFFF;
36
- int idx, bit;
37
- uint8_t data;
38
-
39
- for (idx = 0; idx < 6; idx++) {
40
- for (data = *ether_addr++, bit = 0; bit < MULTICAST_FILTER_LEN; bit++) {
41
- crc = (crc >> 1) ^ (((crc ^ data) & 1) ? LNC_POLYNOMIAL : 0);
42
- data >>= 1;
43
- }
44
- }
45
- return crc;
46
-#undef LNC_POLYNOMIAL
47
-}
48
-
49
#define CRC(crc, ch)     (crc = (crc >> 8) ^ crctab[(crc ^ (ch)) & 0xff])
50
51
/* generated using the AUTODIN II polynomial
52
@@ -XXX,XX +XXX,XX @@ static inline int ladr_match(PCNetState *s, const uint8_t *buf, int size)
53
s->csr[10] & 0xff, s->csr[10] >> 8,
54
s->csr[11] & 0xff, s->csr[11] >> 8
55
};
56
- int index = lnc_mchash(hdr->ether_dhost) >> 26;
57
+ int index = net_crc32_le(hdr->ether_dhost, ETH_ALEN) >> 26;
58
return !!(ladr[index >> 3] & (1 << (index & 7)));
59
}
60
return 0;
61
--
62
2.7.4
63
64
diff view generated by jsdifflib
New patch
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
1
2
3
Instead of e100_compute_mcast_idx() using its own implementation, we can
4
simply call net_crc32() directly and apply the bit shift inline.
5
6
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
7
Reviewed-by: Stefan Weil <sw@weilnetz.de>
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/net/eepro100.c | 28 ++++------------------------
12
1 file changed, 4 insertions(+), 24 deletions(-)
13
14
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/eepro100.c
17
+++ b/hw/net/eepro100.c
18
@@ -XXX,XX +XXX,XX @@
19
#include "hw/hw.h"
20
#include "hw/pci/pci.h"
21
#include "net/net.h"
22
+#include "net/eth.h"
23
#include "hw/nvram/eeprom93xx.h"
24
#include "sysemu/sysemu.h"
25
#include "sysemu/dma.h"
26
@@ -XXX,XX +XXX,XX @@ static const uint16_t eepro100_mdi_mask[] = {
27
28
static E100PCIDeviceInfo *eepro100_get_class(EEPRO100State *s);
29
30
-/* From FreeBSD (locally modified). */
31
-static unsigned e100_compute_mcast_idx(const uint8_t *ep)
32
-{
33
- uint32_t crc;
34
- int carry, i, j;
35
- uint8_t b;
36
-
37
- crc = 0xffffffff;
38
- for (i = 0; i < 6; i++) {
39
- b = *ep++;
40
- for (j = 0; j < 8; j++) {
41
- carry = ((crc & 0x80000000L) ? 1 : 0) ^ (b & 0x01);
42
- crc <<= 1;
43
- b >>= 1;
44
- if (carry) {
45
- crc = ((crc ^ POLYNOMIAL_BE) | carry);
46
- }
47
- }
48
- }
49
- return (crc & BITS(7, 2)) >> 2;
50
-}
51
-
52
/* Read a 16 bit control/status (CSR) register. */
53
static uint16_t e100_read_reg2(EEPRO100State *s, E100RegisterOffset addr)
54
{
55
@@ -XXX,XX +XXX,XX @@ static void set_multicast_list(EEPRO100State *s)
56
uint8_t multicast_addr[6];
57
pci_dma_read(&s->dev, s->cb_address + 10 + i, multicast_addr, 6);
58
TRACE(OTHER, logout("multicast entry %s\n", nic_dump(multicast_addr, 6)));
59
- unsigned mcast_idx = e100_compute_mcast_idx(multicast_addr);
60
+ unsigned mcast_idx = (net_crc32(multicast_addr, ETH_ALEN) &
61
+ BITS(7, 2)) >> 2;
62
assert(mcast_idx < 64);
63
s->mult[mcast_idx >> 3] |= (1 << (mcast_idx & 7));
64
}
65
@@ -XXX,XX +XXX,XX @@ static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size)
66
if (s->configuration[21] & BIT(3)) {
67
/* Multicast all bit is set, receive all multicast frames. */
68
} else {
69
- unsigned mcast_idx = e100_compute_mcast_idx(buf);
70
+ unsigned mcast_idx = (net_crc32(buf, ETH_ALEN) & BITS(7, 2)) >> 2;
71
assert(mcast_idx < 64);
72
if (s->mult[mcast_idx >> 3] & (1 << (mcast_idx & 7))) {
73
/* Multicast frame is allowed in hash table. */
74
--
75
2.7.4
76
77
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
2
3
We add the vnet_hdr_support option for filter-rewriter, default is disabled.
3
Instead of sunhme_crc32_le() using its own implementation, we can simply call
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
net_crc32_le() directly and apply the bit shift inline.
5
You can use it for example:
6
-object filter-rewriter,id=rew0,netdev=hn0,queue=all,vnet_hdr_support
7
5
8
We get the vnet_hdr_len from NetClientState that make us
6
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
9
parse net packet correctly.
7
Reviewed-by: Eric Blake <eblake@redhat.com>
10
8
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
11
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
10
---
14
net/filter-rewriter.c | 37 ++++++++++++++++++++++++++++++++++++-
11
hw/net/sunhme.c | 25 +------------------------
15
qemu-options.hx | 4 ++--
12
1 file changed, 1 insertion(+), 24 deletions(-)
16
2 files changed, 38 insertions(+), 3 deletions(-)
17
13
18
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
14
diff --git a/hw/net/sunhme.c b/hw/net/sunhme.c
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/net/filter-rewriter.c
16
--- a/hw/net/sunhme.c
21
+++ b/net/filter-rewriter.c
17
+++ b/hw/net/sunhme.c
22
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ static inline void sunhme_set_rx_ring_nr(SunHMEState *s, int i)
23
#include "qemu-common.h"
19
s->erxregs[HME_ERXI_RING >> 2] = ring;
24
#include "qapi/error.h"
25
#include "qapi/qmp/qerror.h"
26
+#include "qemu/error-report.h"
27
#include "qapi-visit.h"
28
#include "qom/object.h"
29
#include "qemu/main-loop.h"
30
@@ -XXX,XX +XXX,XX @@ typedef struct RewriterState {
31
NetQueue *incoming_queue;
32
/* hashtable to save connection */
33
GHashTable *connection_track_table;
34
+ bool vnet_hdr;
35
} RewriterState;
36
37
static void filter_rewriter_flush(NetFilterState *nf)
38
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
39
ConnectionKey key;
40
Packet *pkt;
41
ssize_t size = iov_size(iov, iovcnt);
42
+ ssize_t vnet_hdr_len = 0;
43
char *buf = g_malloc0(size);
44
45
iov_to_buf(iov, iovcnt, 0, buf, size);
46
- pkt = packet_new(buf, size, 0);
47
+
48
+ if (s->vnet_hdr) {
49
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
50
+ }
51
+
52
+ pkt = packet_new(buf, size, vnet_hdr_len);
53
g_free(buf);
54
55
/*
56
@@ -XXX,XX +XXX,XX @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
57
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
58
}
20
}
59
21
60
+static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp)
22
-#define POLYNOMIAL_LE 0xedb88320
61
+{
23
-static uint32_t sunhme_crc32_le(const uint8_t *p, int len)
62
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
24
-{
63
+
25
- uint32_t crc;
64
+ return s->vnet_hdr;
26
- int carry, i, j;
65
+}
27
- uint8_t b;
66
+
28
-
67
+static void filter_rewriter_set_vnet_hdr(Object *obj,
29
- crc = 0xffffffff;
68
+ bool value,
30
- for (i = 0; i < len; i++) {
69
+ Error **errp)
31
- b = *p++;
70
+{
32
- for (j = 0; j < 8; j++) {
71
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
33
- carry = (crc & 0x1) ^ (b & 0x01);
72
+
34
- crc >>= 1;
73
+ s->vnet_hdr = value;
35
- b >>= 1;
74
+}
36
- if (carry) {
75
+
37
- crc = crc ^ POLYNOMIAL_LE;
76
+static void filter_rewriter_init(Object *obj)
38
- }
77
+{
39
- }
78
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
40
- }
79
+
41
-
80
+ s->vnet_hdr = false;
42
- return crc;
81
+ object_property_add_bool(obj, "vnet_hdr_support",
43
-}
82
+ filter_rewriter_get_vnet_hdr,
44
-
83
+ filter_rewriter_set_vnet_hdr, NULL);
45
#define MIN_BUF_SIZE 60
84
+}
46
85
+
47
static ssize_t sunhme_receive(NetClientState *nc, const uint8_t *buf,
86
static void colo_rewriter_class_init(ObjectClass *oc, void *data)
48
@@ -XXX,XX +XXX,XX @@ static ssize_t sunhme_receive(NetClientState *nc, const uint8_t *buf,
87
{
49
trace_sunhme_rx_filter_bcast_match();
88
NetFilterClass *nfc = NETFILTER_CLASS(oc);
50
} else if (s->macregs[HME_MACI_RXCFG >> 2] & HME_MAC_RXCFG_HENABLE) {
89
@@ -XXX,XX +XXX,XX @@ static const TypeInfo colo_rewriter_info = {
51
/* Didn't match local address, check hash filter */
90
.name = TYPE_FILTER_REWRITER,
52
- int mcast_idx = sunhme_crc32_le(buf, 6) >> 26;
91
.parent = TYPE_NETFILTER,
53
+ int mcast_idx = net_crc32_le(buf, ETH_ALEN) >> 26;
92
.class_init = colo_rewriter_class_init,
54
if (!(s->macregs[(HME_MACI_HASHTAB0 >> 2) - (mcast_idx >> 4)] &
93
+ .instance_init = filter_rewriter_init,
55
(1 << (mcast_idx & 0xf)))) {
94
.instance_size = sizeof(RewriterState),
56
/* Didn't match hash filter */
95
};
96
97
diff --git a/qemu-options.hx b/qemu-options.hx
98
index XXXXXXX..XXXXXXX 100644
99
--- a/qemu-options.hx
100
+++ b/qemu-options.hx
101
@@ -XXX,XX +XXX,XX @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
102
be the same. we can just use indev or outdev, but at least one of indev or outdev
103
need to be specified.
104
105
-@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid}[,queue=@var{all|rx|tx}]
106
+@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support]
107
108
Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
109
secondary from primary to keep secondary tcp connection,and rewrite
110
tcp packet to primary from secondary make tcp packet can be handled by
111
-client.
112
+client.if it has the vnet_hdr_support flag, we can parse packet with vnet header.
113
114
usage:
115
colo secondary:
116
--
57
--
117
2.7.4
58
2.7.4
118
59
119
60
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
2
3
COLO-Proxy just focus on packet payload, so we skip vnet header.
3
From the Linux sungem driver, we know that the multicast filter CRC is
4
implemented using ether_crc_le() which isn't the same as calling zlib's
5
crc32() function (the zlib implementation requires a complemented initial value
6
and also returns the complemented result).
4
7
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
8
Fix the multicast filter by simply using the new net_crc32_le() function.
9
10
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
11
Reviewed-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
13
---
8
net/colo-compare.c | 8 ++++++--
14
hw/net/sungem.c | 5 ++---
9
1 file changed, 6 insertions(+), 2 deletions(-)
15
1 file changed, 2 insertions(+), 3 deletions(-)
10
16
11
diff --git a/net/colo-compare.c b/net/colo-compare.c
17
diff --git a/hw/net/sungem.c b/hw/net/sungem.c
12
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo-compare.c
19
--- a/hw/net/sungem.c
14
+++ b/net/colo-compare.c
20
+++ b/hw/net/sungem.c
15
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt, Packet *spkt, int offset)
21
@@ -XXX,XX +XXX,XX @@
16
sec_ip_src, sec_ip_dst);
22
#include "hw/pci/pci.h"
23
#include "qemu/log.h"
24
#include "net/net.h"
25
+#include "net/eth.h"
26
#include "net/checksum.h"
27
#include "hw/net/mii.h"
28
#include "sysemu/sysemu.h"
29
#include "trace.h"
30
-/* For crc32 */
31
-#include <zlib.h>
32
33
#define TYPE_SUNGEM "sungem"
34
35
@@ -XXX,XX +XXX,XX @@ static ssize_t sungem_receive(NetClientState *nc, const uint8_t *buf,
17
}
36
}
18
37
19
+ offset = ppkt->vnet_hdr_len + offset;
38
/* Get MAC crc */
20
+
39
- mac_crc = crc32(~0, buf, 6);
21
if (ppkt->size == spkt->size) {
40
+ mac_crc = net_crc32_le(buf, ETH_ALEN);
22
- return memcmp(ppkt->data + offset, spkt->data + offset,
41
23
+ return memcmp(ppkt->data + offset,
42
/* Packet isn't for me ? */
24
+ spkt->data + offset,
43
rx_cond = sungem_check_rx_mac(s, buf, mac_crc);
25
spkt->size - offset);
26
} else {
27
trace_colo_compare_main("Net packet size are not the same");
28
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
29
*/
30
if (ptcp->th_off > 5) {
31
ptrdiff_t tcp_offset;
32
+
33
tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
34
- + (ptcp->th_off * 4);
35
+ + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
36
res = colo_packet_compare_common(ppkt, spkt, tcp_offset);
37
} else if (ptcp->th_sum == stcp->th_sum) {
38
res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN);
39
--
44
--
40
2.7.4
45
2.7.4
41
46
42
47
diff view generated by jsdifflib
New patch
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
1
2
3
This makes it much easier to compare the multicast CRC calculation endian and
4
bitshift against the Linux driver implementation.
5
6
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
hw/net/eepro100.c | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
12
diff --git a/hw/net/eepro100.c b/hw/net/eepro100.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/eepro100.c
15
+++ b/hw/net/eepro100.c
16
@@ -XXX,XX +XXX,XX @@ static ssize_t nic_receive(NetClientState *nc, const uint8_t * buf, size_t size)
17
rfd_status |= 0x0004;
18
} else if (s->configuration[20] & BIT(6)) {
19
/* Multiple IA bit set. */
20
- unsigned mcast_idx = compute_mcast_idx(buf);
21
+ unsigned mcast_idx = net_crc32(buf, ETH_ALEN) >> 26;
22
assert(mcast_idx < 64);
23
if (s->mult[mcast_idx >> 3] & (1 << (mcast_idx & 7))) {
24
TRACE(RXTX, logout("%p accepted, multiple IA bit set\n", s));
25
--
26
2.7.4
27
28
diff view generated by jsdifflib
1
Spec said offloads should be le64, so use virtio_ldq_p() to guarantee
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
valid endian.
3
2
4
Fixes: 644c98587d4c ("virtio-net: dynamic network offloads configuration")
3
This makes it much easier to compare the multicast CRC calculation endian and
5
Cc: qemu-stable@nongnu.org
4
bitshift against the Linux driver implementation.
6
Cc: Dmitry Fleytman <dfleytma@redhat.com>
5
6
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
8
---
9
hw/net/virtio-net.c | 2 ++
9
hw/net/opencores_eth.c | 3 ++-
10
1 file changed, 2 insertions(+)
10
1 file changed, 2 insertions(+), 1 deletion(-)
11
11
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
12
diff --git a/hw/net/opencores_eth.c b/hw/net/opencores_eth.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
14
--- a/hw/net/opencores_eth.c
15
+++ b/hw/net/virtio-net.c
15
+++ b/hw/net/opencores_eth.c
16
@@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
16
@@ -XXX,XX +XXX,XX @@
17
if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
17
#include "hw/net/mii.h"
18
uint64_t supported_offloads;
18
#include "hw/sysbus.h"
19
19
#include "net/net.h"
20
+ offloads = virtio_ldq_p(vdev, &offloads);
20
+#include "net/eth.h"
21
+
21
#include "sysemu/sysemu.h"
22
if (!n->has_vnet_hdr) {
22
#include "trace.h"
23
return VIRTIO_NET_ERR;
23
24
}
24
@@ -XXX,XX +XXX,XX @@ static ssize_t open_eth_receive(NetClientState *nc,
25
if (memcmp(buf, bcast_addr, sizeof(bcast_addr)) == 0) {
26
miss = GET_REGBIT(s, MODER, BRO);
27
} else if ((buf[0] & 0x1) || GET_REGBIT(s, MODER, IAM)) {
28
- unsigned mcast_idx = compute_mcast_idx(buf);
29
+ unsigned mcast_idx = net_crc32(buf, ETH_ALEN) >> 26;
30
miss = !(s->regs[HASH0 + mcast_idx / 32] &
31
(1 << (mcast_idx % 32)));
32
trace_open_eth_receive_mcast(
25
--
33
--
26
2.7.4
34
2.7.4
27
35
28
36
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
2
3
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
3
This makes it much easier to compare the multicast CRC calculation endian and
4
bitshift against the Linux driver implementation.
5
6
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
8
---
6
docs/colo-proxy.txt | 26 ++++++++++++++++++++++++++
9
hw/net/lan9118.c | 3 ++-
7
1 file changed, 26 insertions(+)
10
1 file changed, 2 insertions(+), 1 deletion(-)
8
11
9
diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt
12
diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c
10
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
11
--- a/docs/colo-proxy.txt
14
--- a/hw/net/lan9118.c
12
+++ b/docs/colo-proxy.txt
15
+++ b/hw/net/lan9118.c
13
@@ -XXX,XX +XXX,XX @@ Secondary(ip:3.3.3.8):
16
@@ -XXX,XX +XXX,XX @@
14
-chardev socket,id=red1,host=3.3.3.3,port=9004
17
#include "qemu/osdep.h"
15
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
18
#include "hw/sysbus.h"
16
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
19
#include "net/net.h"
17
+-object filter-rewriter,id=f3,netdev=hn0,queue=all
20
+#include "net/eth.h"
18
+
21
#include "hw/devices.h"
19
+If you want to use virtio-net-pci or other driver with vnet_header:
22
#include "sysemu/sysemu.h"
20
+
23
#include "hw/ptimer.h"
21
+Primary(ip:3.3.3.3):
24
@@ -XXX,XX +XXX,XX @@ static int lan9118_filter(lan9118_state *s, const uint8_t *addr)
22
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
25
}
23
+-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
26
} else {
24
+-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
27
/* Hash matching */
25
+-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
28
- hash = compute_mcast_idx(addr);
26
+-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
29
+ hash = net_crc32(addr, ETH_ALEN) >> 26;
27
+-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
30
if (hash & 0x20) {
28
+-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
31
return (s->mac_hashh >> (hash & 0x1f)) & 1;
29
+-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
32
} else {
30
+-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
31
+-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out,vnet_hdr_support
32
+-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0,vnet_hdr_support
33
+-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
34
+
35
+Secondary(ip:3.3.3.8):
36
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
37
+-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
38
+-chardev socket,id=red0,host=3.3.3.3,port=9003
39
+-chardev socket,id=red1,host=3.3.3.3,port=9004
40
+-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0,vnet_hdr_support
41
+-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1,vnet_hdr_support
42
+-object filter-rewriter,id=f3,netdev=hn0,queue=all,vnet_hdr_support
43
44
Note:
45
a.COLO-proxy must work with COLO-frame and Block-replication.
46
--
33
--
47
2.7.4
34
2.7.4
48
35
49
36
diff view generated by jsdifflib
1
From: Michal Privoznik <mprivozn@redhat.com>
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
2
3
We have a function that checks if given number is power of two.
3
This makes it much easier to compare the multicast CRC calculation endian and
4
We should prefer it instead of expanding the check on our own.
4
bitshift against the Linux driver implementation.
5
5
6
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
6
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
8
---
9
hw/net/virtio-net.c | 2 +-
9
hw/net/ftgmac100.c | 2 +-
10
1 file changed, 1 insertion(+), 1 deletion(-)
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
11
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
12
diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
14
--- a/hw/net/ftgmac100.c
15
+++ b/hw/net/virtio-net.c
15
+++ b/hw/net/ftgmac100.c
16
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
16
@@ -XXX,XX +XXX,XX @@ static int ftgmac100_filter(FTGMAC100State *s, const uint8_t *buf, size_t len)
17
*/
17
}
18
if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
18
19
n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
19
/* TODO: this does not seem to work for ftgmac100 */
20
- (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
20
- mcast_idx = compute_mcast_idx(buf);
21
+ !is_power_of_2(n->net_conf.rx_queue_size)) {
21
+ mcast_idx = net_crc32(buf, ETH_ALEN) >> 26;
22
error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
22
if (!(s->math[mcast_idx / 32] & (1 << (mcast_idx % 32)))) {
23
"must be a power of 2 between %d and %d.",
23
return 0;
24
n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
24
}
25
--
25
--
26
2.7.4
26
2.7.4
27
27
28
28
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
2
3
Make colo-compare and filter-rewriter can parse vnet packet.
3
This makes it much easier to compare the multicast CRC calculation endian and
4
bitshift against the Linux driver implementation.
4
5
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
---
8
net/colo.c | 6 +++---
9
hw/net/ne2000.c | 4 +++-
9
1 file changed, 3 insertions(+), 3 deletions(-)
10
1 file changed, 3 insertions(+), 1 deletion(-)
10
11
11
diff --git a/net/colo.c b/net/colo.c
12
diff --git a/hw/net/ne2000.c b/hw/net/ne2000.c
12
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo.c
14
--- a/hw/net/ne2000.c
14
+++ b/net/colo.c
15
+++ b/hw/net/ne2000.c
15
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
16
@@ -XXX,XX +XXX,XX @@
16
{
17
*/
17
int network_length;
18
#include "qemu/osdep.h"
18
static const uint8_t vlan[] = {0x81, 0x00};
19
#include "hw/pci/pci.h"
19
- uint8_t *data = pkt->data;
20
+#include "net/net.h"
20
+ uint8_t *data = pkt->data + pkt->vnet_hdr_len;
21
+#include "net/eth.h"
21
uint16_t l3_proto;
22
#include "ne2000.h"
22
ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
23
#include "hw/loader.h"
23
24
#include "sysemu/sysemu.h"
24
- if (pkt->size < ETH_HLEN) {
25
@@ -XXX,XX +XXX,XX @@ ssize_t ne2000_receive(NetClientState *nc, const uint8_t *buf, size_t size_)
25
+ if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
26
/* multicast */
26
trace_colo_proxy_main("pkt->size < ETH_HLEN");
27
if (!(s->rxcr & 0x08))
27
return 1;
28
return size;
28
}
29
- mcast_idx = compute_mcast_idx(buf);
29
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
30
+ mcast_idx = net_crc32(buf, ETH_ALEN) >> 26;
30
}
31
if (!(s->mult[mcast_idx >> 3] & (1 << (mcast_idx & 7))))
31
32
return size;
32
network_length = pkt->ip->ip_hl * 4;
33
} else if (s->mem[0] == buf[0] &&
33
- if (pkt->size < l2hdr_len + network_length) {
34
+ if (pkt->size < l2hdr_len + network_length + pkt->vnet_hdr_len) {
35
trace_colo_proxy_main("pkt->size < network_header + network_length");
36
return 1;
37
}
38
--
34
--
39
2.7.4
35
2.7.4
40
36
41
37
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
2
3
This patch change the compare_chr_send() parameter from CharBackend to CompareState,
3
This makes it much easier to compare the multicast CRC calculation endian and
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
4
bitshift against the Linux driver implementation.
5
5
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
8
---
9
net/colo-compare.c | 14 +++++++-------
9
hw/net/rtl8139.c | 2 +-
10
1 file changed, 7 insertions(+), 7 deletions(-)
10
1 file changed, 1 insertion(+), 1 deletion(-)
11
11
12
diff --git a/net/colo-compare.c b/net/colo-compare.c
12
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
13
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
14
--- a/net/colo-compare.c
14
--- a/hw/net/rtl8139.c
15
+++ b/net/colo-compare.c
15
+++ b/hw/net/rtl8139.c
16
@@ -XXX,XX +XXX,XX @@ enum {
16
@@ -XXX,XX +XXX,XX @@ static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t
17
SECONDARY_IN,
17
return size;
18
};
19
20
-static int compare_chr_send(CharBackend *out,
21
+static int compare_chr_send(CompareState *s,
22
const uint8_t *buf,
23
uint32_t size);
24
25
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
26
}
27
28
if (result) {
29
- ret = compare_chr_send(&s->chr_out, pkt->data, pkt->size);
30
+ ret = compare_chr_send(s, pkt->data, pkt->size);
31
if (ret < 0) {
32
error_report("colo_send_primary_packet failed");
33
}
18
}
34
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
19
35
}
20
- int mcast_idx = compute_mcast_idx(buf);
36
}
21
+ int mcast_idx = net_crc32(buf, ETH_ALEN) >> 26;
37
22
38
-static int compare_chr_send(CharBackend *out,
23
if (!(s->mult[mcast_idx >> 3] & (1 << (mcast_idx & 7))))
39
+static int compare_chr_send(CompareState *s,
24
{
40
const uint8_t *buf,
41
uint32_t size)
42
{
43
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CharBackend *out,
44
return 0;
45
}
46
47
- ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len));
48
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
49
if (ret != sizeof(len)) {
50
goto err;
51
}
52
53
- ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size);
54
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
55
if (ret != size) {
56
goto err;
57
}
58
@@ -XXX,XX +XXX,XX @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs)
59
60
if (packet_enqueue(s, PRIMARY_IN)) {
61
trace_colo_compare_main("primary: unsupported packet in");
62
- compare_chr_send(&s->chr_out, pri_rs->buf, pri_rs->packet_len);
63
+ compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
64
} else {
65
/* compare connection */
66
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
67
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
68
69
while (!g_queue_is_empty(&conn->primary_list)) {
70
pkt = g_queue_pop_head(&conn->primary_list);
71
- compare_chr_send(&s->chr_out, pkt->data, pkt->size);
72
+ compare_chr_send(s, pkt->data, pkt->size);
73
packet_destroy(pkt, NULL);
74
}
75
while (!g_queue_is_empty(&conn->secondary_list)) {
76
--
25
--
77
2.7.4
26
2.7.4
78
27
79
28
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
2
2
3
We can use this property flush and send packet with vnet_hdr_len.
3
Now that all of the callers have been converted to compute the multicast index
4
inline using new net CRC functions, this function can now be dropped.
4
5
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Signed-off-by: Mark Cave-Ayland <mark.cave-ayland@ilande.co.uk>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
---
8
net/colo-compare.c | 8 ++++++--
9
net/net.c | 5 -----
9
net/colo.c | 3 ++-
10
1 file changed, 5 deletions(-)
10
net/colo.h | 4 +++-
11
net/filter-rewriter.c | 2 +-
12
4 files changed, 12 insertions(+), 5 deletions(-)
13
11
14
diff --git a/net/colo-compare.c b/net/colo-compare.c
12
diff --git a/net/net.c b/net/net.c
15
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
16
--- a/net/colo-compare.c
14
--- a/net/net.c
17
+++ b/net/colo-compare.c
15
+++ b/net/net.c
18
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode)
16
@@ -XXX,XX +XXX,XX @@ uint32_t net_crc32_le(const uint8_t *p, int len)
19
Connection *conn;
17
return crc;
20
21
if (mode == PRIMARY_IN) {
22
- pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len);
23
+ pkt = packet_new(s->pri_rs.buf,
24
+ s->pri_rs.packet_len,
25
+ s->pri_rs.vnet_hdr_len);
26
} else {
27
- pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len);
28
+ pkt = packet_new(s->sec_rs.buf,
29
+ s->sec_rs.packet_len,
30
+ s->sec_rs.vnet_hdr_len);
31
}
32
33
if (parse_packet_early(pkt)) {
34
diff --git a/net/colo.c b/net/colo.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/net/colo.c
37
+++ b/net/colo.c
38
@@ -XXX,XX +XXX,XX @@ void connection_destroy(void *opaque)
39
g_slice_free(Connection, conn);
40
}
18
}
41
19
42
-Packet *packet_new(const void *data, int size)
20
-unsigned compute_mcast_idx(const uint8_t *ep)
43
+Packet *packet_new(const void *data, int size, int vnet_hdr_len)
21
-{
44
{
22
- return net_crc32(ep, ETH_ALEN) >> 26;
45
Packet *pkt = g_slice_new(Packet);
23
-}
46
24
-
47
pkt->data = g_memdup(data, size);
25
QemuOptsList qemu_netdev_opts = {
48
pkt->size = size;
26
.name = "netdev",
49
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
27
.implied_opt_name = "type",
50
+ pkt->vnet_hdr_len = vnet_hdr_len;
51
52
return pkt;
53
}
54
diff --git a/net/colo.h b/net/colo.h
55
index XXXXXXX..XXXXXXX 100644
56
--- a/net/colo.h
57
+++ b/net/colo.h
58
@@ -XXX,XX +XXX,XX @@ typedef struct Packet {
59
int size;
60
/* Time of packet creation, in wall clock ms */
61
int64_t creation_ms;
62
+ /* Get vnet_hdr_len from filter */
63
+ uint32_t vnet_hdr_len;
64
} Packet;
65
66
typedef struct ConnectionKey {
67
@@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table,
68
ConnectionKey *key,
69
GQueue *conn_list);
70
void connection_hashtable_reset(GHashTable *connection_track_table);
71
-Packet *packet_new(const void *data, int size);
72
+Packet *packet_new(const void *data, int size, int vnet_hdr_len);
73
void packet_destroy(void *opaque, void *user_data);
74
75
#endif /* QEMU_COLO_PROXY_H */
76
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/net/filter-rewriter.c
79
+++ b/net/filter-rewriter.c
80
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
81
char *buf = g_malloc0(size);
82
83
iov_to_buf(iov, iovcnt, 0, buf, size);
84
- pkt = packet_new(buf, size);
85
+ pkt = packet_new(buf, size, 0);
86
g_free(buf);
87
88
/*
89
--
28
--
90
2.7.4
29
2.7.4
91
30
92
31
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
This patch change the filter_send() parameter from CharBackend to MirrorState,
3
It has never been documented, so hardly anybody knows about this
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
4
parameter, and it is marked as deprecated since QEMU v2.6.
5
Time to let it go now.
5
6
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Reviewed-by: Samuel Thibault <samuel.thibault@ens-lyon.org>
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
10
---
9
net/filter-mirror.c | 10 +++++-----
11
include/net/slirp.h | 2 --
10
1 file changed, 5 insertions(+), 5 deletions(-)
12
net/net.c | 7 -------
13
net/slirp.c | 34 ----------------------------------
14
qemu-doc.texi | 5 -----
15
4 files changed, 48 deletions(-)
11
16
12
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
17
diff --git a/include/net/slirp.h b/include/net/slirp.h
13
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
14
--- a/net/filter-mirror.c
19
--- a/include/net/slirp.h
15
+++ b/net/filter-mirror.c
20
+++ b/include/net/slirp.h
16
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
21
@@ -XXX,XX +XXX,XX @@ void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict);
17
SocketReadState rs;
22
18
} MirrorState;
23
int net_slirp_redir(const char *redir_str);
19
24
20
-static int filter_send(CharBackend *chr_out,
25
-int net_slirp_parse_legacy(QemuOptsList *opts_list, const char *optarg, int *ret);
21
+static int filter_send(MirrorState *s,
26
-
22
const struct iovec *iov,
27
int net_slirp_smb(const char *exported_dir);
23
int iovcnt)
28
29
void hmp_info_usernet(Monitor *mon, const QDict *qdict);
30
diff --git a/net/net.c b/net/net.c
31
index XXXXXXX..XXXXXXX 100644
32
--- a/net/net.c
33
+++ b/net/net.c
34
@@ -XXX,XX +XXX,XX @@ int net_init_clients(void)
35
36
int net_client_parse(QemuOptsList *opts_list, const char *optarg)
24
{
37
{
25
@@ -XXX,XX +XXX,XX @@ static int filter_send(CharBackend *chr_out,
38
-#if defined(CONFIG_SLIRP)
39
- int ret;
40
- if (net_slirp_parse_legacy(opts_list, optarg, &ret)) {
41
- return ret;
42
- }
43
-#endif
44
-
45
if (!qemu_opts_parse_noisily(opts_list, optarg, true)) {
46
return -1;
26
}
47
}
27
48
diff --git a/net/slirp.c b/net/slirp.c
28
len = htonl(size);
49
index XXXXXXX..XXXXXXX 100644
29
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)&len, sizeof(len));
50
--- a/net/slirp.c
30
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
51
+++ b/net/slirp.c
31
if (ret != sizeof(len)) {
52
@@ -XXX,XX +XXX,XX @@ int net_init_slirp(const Netdev *netdev, const char *name,
32
goto err;
53
33
}
54
return ret;
34
55
}
35
buf = g_malloc(size);
56
-
36
iov_to_buf(iov, iovcnt, 0, buf, size);
57
-int net_slirp_parse_legacy(QemuOptsList *opts_list, const char *optarg, int *ret)
37
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)buf, size);
58
-{
38
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
59
- if (strcmp(opts_list->name, "net") != 0 ||
39
g_free(buf);
60
- strncmp(optarg, "channel,", strlen("channel,")) != 0) {
40
if (ret != size) {
61
- return 0;
41
goto err;
62
- }
42
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
63
-
43
MirrorState *s = FILTER_MIRROR(nf);
64
- error_report("The '-net channel' option is deprecated. "
44
int ret;
65
- "Please use '-netdev user,guestfwd=...' instead.");
45
66
-
46
- ret = filter_send(&s->chr_out, iov, iovcnt);
67
- /* handle legacy -net channel,port:chr */
47
+ ret = filter_send(s, iov, iovcnt);
68
- optarg += strlen("channel,");
48
if (ret) {
69
-
49
error_report("filter mirror send failed(%s)", strerror(-ret));
70
- if (QTAILQ_EMPTY(&slirp_stacks)) {
50
}
71
- struct slirp_config_str *config;
51
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_redirector_receive_iov(NetFilterState *nf,
72
-
52
int ret;
73
- config = g_malloc(sizeof(*config));
53
74
- pstrcpy(config->str, sizeof(config->str), optarg);
54
if (qemu_chr_fe_backend_connected(&s->chr_out)) {
75
- config->flags = SLIRP_CFG_LEGACY;
55
- ret = filter_send(&s->chr_out, iov, iovcnt);
76
- config->next = slirp_configs;
56
+ ret = filter_send(s, iov, iovcnt);
77
- slirp_configs = config;
57
if (ret) {
78
- *ret = 0;
58
error_report("filter redirector send failed(%s)", strerror(-ret));
79
- } else {
59
}
80
- Error *err = NULL;
81
- *ret = slirp_guestfwd(QTAILQ_FIRST(&slirp_stacks), optarg, 1, &err);
82
- if (*ret < 0) {
83
- error_report_err(err);
84
- }
85
- }
86
-
87
- return 1;
88
-}
89
-
90
diff --git a/qemu-doc.texi b/qemu-doc.texi
91
index XXXXXXX..XXXXXXX 100644
92
--- a/qemu-doc.texi
93
+++ b/qemu-doc.texi
94
@@ -XXX,XX +XXX,XX @@ The ``-smb /some/dir'' argument is now a synonym for setting
95
the ``-netdev user,smb=/some/dir'' argument instead. The new
96
syntax allows different settings to be provided per NIC.
97
98
-@subsection -net channel (since 2.6.0)
99
-
100
-The ``--net channel,ARGS'' argument is now a synonym for setting
101
-the ``-netdev user,guestfwd=ARGS'' argument instead.
102
-
103
@subsection -net vlan (since 2.9.0)
104
105
The ``-net vlan=NN'' argument is partially replaced with the
60
--
106
--
61
2.7.4
107
2.7.4
62
108
63
109
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
We add the vnet_hdr_support option for filter-redirector, default is disabled.
3
Looks like we missed to document that it is also possible to specify
4
If you use virtio-net-pci net driver or other driver needs vnet_hdr, please enable it.
4
a netdev with "-net nic" - which is very useful if you want to
5
Because colo-compare or other modules needs the vnet_hdr_len to parse
5
configure your on-board NIC to use a backend that has been specified
6
packet, we add this new option send the len to others.
6
with "-netdev".
7
You can use it for example:
8
-object filter-redirector,id=r0,netdev=hn0,queue=tx,outdev=red0,vnet_hdr_support
9
7
10
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
10
---
13
net/filter-mirror.c | 23 +++++++++++++++++++++++
11
qemu-options.hx | 14 ++++++++------
14
qemu-options.hx | 6 +++---
12
1 file changed, 8 insertions(+), 6 deletions(-)
15
2 files changed, 26 insertions(+), 3 deletions(-)
16
13
17
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/net/filter-mirror.c
20
+++ b/net/filter-mirror.c
21
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
22
s->outdev = g_strdup(value);
23
}
24
25
+static bool filter_redirector_get_vnet_hdr(Object *obj, Error **errp)
26
+{
27
+ MirrorState *s = FILTER_REDIRECTOR(obj);
28
+
29
+ return s->vnet_hdr;
30
+}
31
+
32
+static void filter_redirector_set_vnet_hdr(Object *obj,
33
+ bool value,
34
+ Error **errp)
35
+{
36
+ MirrorState *s = FILTER_REDIRECTOR(obj);
37
+
38
+ s->vnet_hdr = value;
39
+}
40
+
41
static void filter_mirror_init(Object *obj)
42
{
43
MirrorState *s = FILTER_MIRROR(obj);
44
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_init(Object *obj)
45
46
static void filter_redirector_init(Object *obj)
47
{
48
+ MirrorState *s = FILTER_REDIRECTOR(obj);
49
+
50
object_property_add_str(obj, "indev", filter_redirector_get_indev,
51
filter_redirector_set_indev, NULL);
52
object_property_add_str(obj, "outdev", filter_redirector_get_outdev,
53
filter_redirector_set_outdev, NULL);
54
+
55
+ s->vnet_hdr = false;
56
+ object_property_add_bool(obj, "vnet_hdr_support",
57
+ filter_redirector_get_vnet_hdr,
58
+ filter_redirector_set_vnet_hdr, NULL);
59
}
60
61
static void filter_mirror_fini(Object *obj)
62
diff --git a/qemu-options.hx b/qemu-options.hx
14
diff --git a/qemu-options.hx b/qemu-options.hx
63
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
64
--- a/qemu-options.hx
16
--- a/qemu-options.hx
65
+++ b/qemu-options.hx
17
+++ b/qemu-options.hx
66
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
18
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
67
19
"-netdev hubport,id=str,hubid=n\n"
68
filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
20
" configure a hub port on QEMU VLAN 'n'\n", QEMU_ARCH_ALL)
69
21
DEF("net", HAS_ARG, QEMU_OPTION_net,
70
-@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
22
- "-net nic[,vlan=n][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v]\n"
71
-outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
23
- " old way to create a new NIC and connect it to VLAN 'n'\n"
72
+@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
24
- " (use the '-device devtype,netdev=str' option if possible instead)\n"
73
25
+ "-net nic[,vlan=n][,netdev=nd][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v]\n"
74
filter-redirector on netdev @var{netdevid},redirect filter's net packet to chardev
26
+ " configure or create an on-board (or machine default) NIC and\n"
75
-@var{chardevid},and redirect indev's packet to filter.
27
+ " connect it either to VLAN 'n' or the netdev 'nd' (for pluggable\n"
76
+@var{chardevid},and redirect indev's packet to filter.if it has the vnet_hdr_support flag,
28
+ " NICs please use '-device devtype,netdev=nd' instead)\n"
77
+filter-redirector will redirect packet with vnet_hdr_len.
29
"-net dump[,vlan=n][,file=f][,len=n]\n"
78
Create a filter-redirector we need to differ outdev id from indev id, id can not
30
" dump traffic on vlan 'n' to file 'f' (max n bytes per packet)\n"
79
be the same. we can just use indev or outdev, but at least one of indev or outdev
31
"-net none use it alone to have zero network devices. If no -net option\n"
80
need to be specified.
32
@@ -XXX,XX +XXX,XX @@ DEF("net", HAS_ARG, QEMU_OPTION_net,
33
" old way to initialize a host network interface\n"
34
" (use the -netdev option if possible instead)\n", QEMU_ARCH_ALL)
35
STEXI
36
-@item -net nic[,vlan=@var{n}][,macaddr=@var{mac}][,model=@var{type}] [,name=@var{name}][,addr=@var{addr}][,vectors=@var{v}]
37
+@item -net nic[,vlan=@var{n}][,netdev=@var{nd}][,macaddr=@var{mac}][,model=@var{type}] [,name=@var{name}][,addr=@var{addr}][,vectors=@var{v}]
38
@findex -net
39
-Create a new Network Interface Card and connect it to VLAN @var{n} (@var{n}
40
-= 0 is the default). The NIC is an e1000 by default on the PC
41
+Configure or create an on-board (or machine default) Network Interface Card
42
+(NIC) and connect it either to VLAN @var{n} (@var{n} = 0 is the default), or
43
+to the netdev @var{nd}. The NIC is an e1000 by default on the PC
44
target. Optionally, the MAC address can be changed to @var{mac}, the
45
device address set to @var{addr} (PCI cards only),
46
and a @var{name} can be assigned for use in monitor commands.
81
--
47
--
82
2.7.4
48
2.7.4
83
49
84
50
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Thomas Huth <thuth@redhat.com>
2
2
3
We add the vnet_hdr_support option for colo-compare, default is disabled.
3
The information how to update the deprecated parameters was too scarce,
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
so that some people did not update to the new syntax yet. Provide some
5
You can use it for example:
5
more information to make sure that it is clear how to update from the
6
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
6
old syntax to the new one.
7
7
8
COLO-compare can get vnet header length from filter,
8
Signed-off-by: Thomas Huth <thuth@redhat.com>
9
Add vnet_hdr_len to struct packet and output packet with
10
the vnet_hdr_len.
11
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
10
---
15
net/colo-compare.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-------
11
qemu-doc.texi | 33 +++++++++++++++++++++------------
16
qemu-options.hx | 4 ++--
12
1 file changed, 21 insertions(+), 12 deletions(-)
17
2 files changed, 55 insertions(+), 9 deletions(-)
18
13
19
diff --git a/net/colo-compare.c b/net/colo-compare.c
14
diff --git a/qemu-doc.texi b/qemu-doc.texi
20
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
21
--- a/net/colo-compare.c
16
--- a/qemu-doc.texi
22
+++ b/net/colo-compare.c
17
+++ b/qemu-doc.texi
23
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
18
@@ -XXX,XX +XXX,XX @@ combined with ``-vnc tls-creds=tls0'
24
CharBackend chr_out;
19
25
SocketReadState pri_rs;
20
@subsection -tftp (since 2.6.0)
26
SocketReadState sec_rs;
21
27
+ bool vnet_hdr;
22
-The ``-tftp /some/dir'' argument is now a synonym for setting
28
23
-the ``-netdev user,tftp=/some/dir' argument. The new syntax
29
/* connection list: the connections belonged to this NIC could be found
24
-allows different settings to be provided per NIC.
30
* in this list.
25
+The ``-tftp /some/dir'' argument is replaced by
31
@@ -XXX,XX +XXX,XX @@ enum {
26
+``-netdev user,id=x,tftp=/some/dir'', either accompanied with
32
27
+``-device ...,netdev=x'' (for pluggable NICs) or ``-net nic,netdev=x''
33
static int compare_chr_send(CompareState *s,
28
+(for embedded NICs). The new syntax allows different settings to be
34
const uint8_t *buf,
29
+provided per NIC.
35
- uint32_t size);
30
36
+ uint32_t size,
31
@subsection -bootp (since 2.6.0)
37
+ uint32_t vnet_hdr_len);
32
38
33
-The ``-bootp /some/file'' argument is now a synonym for setting
39
static gint seq_sorter(Packet *a, Packet *b, gpointer data)
34
-the ``-netdev user,bootp=/some/file' argument. The new syntax
40
{
35
-allows different settings to be provided per NIC.
41
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
36
+The ``-bootp /some/file'' argument is replaced by
42
}
37
+``-netdev user,id=x,bootp=/some/file'', either accompanied with
43
38
+``-device ...,netdev=x'' (for pluggable NICs) or ``-net nic,netdev=x''
44
if (result) {
39
+(for embedded NICs). The new syntax allows different settings to be
45
- ret = compare_chr_send(s, pkt->data, pkt->size);
40
+provided per NIC.
46
+ ret = compare_chr_send(s,
41
47
+ pkt->data,
42
@subsection -redir (since 2.6.0)
48
+ pkt->size,
43
49
+ pkt->vnet_hdr_len);
44
-The ``-redir ARGS'' argument is now a synonym for setting
50
if (ret < 0) {
45
-the ``-netdev user,hostfwd=ARGS'' argument instead. The new
51
error_report("colo_send_primary_packet failed");
46
-syntax allows different settings to be provided per NIC.
52
}
47
+The ``-redir [tcp|udp]:hostport:[guestaddr]:guestport'' argument is
53
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
48
+replaced by ``-netdev
54
49
+user,id=x,hostfwd=[tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport'',
55
static int compare_chr_send(CompareState *s,
50
+either accompanied with ``-device ...,netdev=x'' (for pluggable NICs) or
56
const uint8_t *buf,
51
+``-net nic,netdev=x'' (for embedded NICs). The new syntax allows different
57
- uint32_t size)
52
+settings to be provided per NIC.
58
+ uint32_t size,
53
59
+ uint32_t vnet_hdr_len)
54
@subsection -smb (since 2.6.0)
60
{
55
61
int ret = 0;
56
-The ``-smb /some/dir'' argument is now a synonym for setting
62
uint32_t len = htonl(size);
57
-the ``-netdev user,smb=/some/dir'' argument instead. The new
63
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CompareState *s,
58
-syntax allows different settings to be provided per NIC.
64
goto err;
59
+The ``-smb /some/dir'' argument is replaced by
65
}
60
+``-netdev user,id=x,smb=/some/dir'', either accompanied with
66
61
+``-device ...,netdev=x'' (for pluggable NICs) or ``-net nic,netdev=x''
67
+ if (s->vnet_hdr) {
62
+(for embedded NICs). The new syntax allows different settings to be
68
+ /*
63
+provided per NIC.
69
+ * We send vnet header len make other module(like filter-redirector)
64
70
+ * know how to parse net packet correctly.
65
@subsection -net vlan (since 2.9.0)
71
+ */
72
+ len = htonl(vnet_hdr_len);
73
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
74
+ if (ret != sizeof(len)) {
75
+ goto err;
76
+ }
77
+ }
78
+
79
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
80
if (ret != size) {
81
goto err;
82
@@ -XXX,XX +XXX,XX @@ static void compare_set_outdev(Object *obj, const char *value, Error **errp)
83
s->outdev = g_strdup(value);
84
}
85
86
+static bool compare_get_vnet_hdr(Object *obj, Error **errp)
87
+{
88
+ CompareState *s = COLO_COMPARE(obj);
89
+
90
+ return s->vnet_hdr;
91
+}
92
+
93
+static void compare_set_vnet_hdr(Object *obj,
94
+ bool value,
95
+ Error **errp)
96
+{
97
+ CompareState *s = COLO_COMPARE(obj);
98
+
99
+ s->vnet_hdr = value;
100
+}
101
+
102
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
103
{
104
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
105
106
if (packet_enqueue(s, PRIMARY_IN)) {
107
trace_colo_compare_main("primary: unsupported packet in");
108
- compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
109
+ compare_chr_send(s,
110
+ pri_rs->buf,
111
+ pri_rs->packet_len,
112
+ pri_rs->vnet_hdr_len);
113
} else {
114
/* compare connection */
115
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
116
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
117
return;
118
}
119
120
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
121
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
122
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
123
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
124
125
g_queue_init(&s->conn_list);
126
127
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
128
129
while (!g_queue_is_empty(&conn->primary_list)) {
130
pkt = g_queue_pop_head(&conn->primary_list);
131
- compare_chr_send(s, pkt->data, pkt->size);
132
+ compare_chr_send(s,
133
+ pkt->data,
134
+ pkt->size,
135
+ pkt->vnet_hdr_len);
136
packet_destroy(pkt, NULL);
137
}
138
while (!g_queue_is_empty(&conn->secondary_list)) {
139
@@ -XXX,XX +XXX,XX @@ static void colo_compare_class_init(ObjectClass *oc, void *data)
140
141
static void colo_compare_init(Object *obj)
142
{
143
+ CompareState *s = COLO_COMPARE(obj);
144
+
145
object_property_add_str(obj, "primary_in",
146
compare_get_pri_indev, compare_set_pri_indev,
147
NULL);
148
@@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj)
149
object_property_add_str(obj, "outdev",
150
compare_get_outdev, compare_set_outdev,
151
NULL);
152
+
153
+ s->vnet_hdr = false;
154
+ object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
155
+ compare_set_vnet_hdr, NULL);
156
}
157
158
static void colo_compare_finalize(Object *obj)
159
diff --git a/qemu-options.hx b/qemu-options.hx
160
index XXXXXXX..XXXXXXX 100644
161
--- a/qemu-options.hx
162
+++ b/qemu-options.hx
163
@@ -XXX,XX +XXX,XX @@ Dump the network traffic on netdev @var{dev} to the file specified by
164
The file format is libpcap, so it can be analyzed with tools such as tcpdump
165
or Wireshark.
166
167
-@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},
168
-outdev=@var{chardevid}
169
+@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid}[,vnet_hdr_support]
170
171
Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with
172
secondary packet. If the packets are same, we will output primary
173
packet to outdev@var{chardevid}, else we will notify colo-frame
174
do checkpoint and send primary packet to outdev@var{chardevid}.
175
+if it has the vnet_hdr_support flag, colo compare will send/recv packet with vnet_hdr_len.
176
177
we must use it with the help of filter-mirror and filter-redirector.
178
66
179
--
67
--
180
2.7.4
68
2.7.4
181
69
182
70
diff view generated by jsdifflib