1
The following changes since commit 6632f6ff96f0537fc34cdc00c760656fc62e23c5:
1
The following changes since commit e3debd5e7d0ce031356024878a0a18b9d109354a:
2
2
3
Merge remote-tracking branch 'remotes/famz/tags/block-and-testing-pull-request' into staging (2017-07-17 11:46:36 +0100)
3
Merge tag 'pull-request-2023-03-24' of https://gitlab.com/thuth/qemu into staging (2023-03-24 16:08:46 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 189ae6bb5ce1f5a322f8691d00fe942ba43dd601:
9
for you to fetch changes up to fba7c3b788dfcb99a3f9253f7d99cc0d217d6d3c:
10
10
11
virtio-net: fix offload ctrl endian (2017-07-17 20:13:56 +0800)
11
igb: respect VMVIR and VMOLR for VLAN (2023-03-28 13:10:55 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
- fix virtio-net ctrl offload endian
15
----------------------------------------------------------------
16
- vnet header support for variou COLO netfilters and compare thread
16
Akihiko Odaki (4):
17
igb: Save more Tx states
18
igb: Fix DMA requester specification for Tx packet
19
hw/net/net_tx_pkt: Ignore ECN bit
20
hw/net/net_tx_pkt: Align l3_hdr
17
21
18
----------------------------------------------------------------
22
Sriram Yagnaraman (8):
19
Jason Wang (1):
23
MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer
20
virtio-net: fix offload ctrl endian
24
igb: handle PF/VF reset properly
25
igb: add ICR_RXDW
26
igb: implement VFRE and VFTE registers
27
igb: check oversized packets for VMDq
28
igb: respect E1000_VMOLR_RSSE
29
igb: implement VF Tx and Rx stats
30
igb: respect VMVIR and VMOLR for VLAN
21
31
22
Michal Privoznik (1):
32
MAINTAINERS | 1 +
23
virtion-net: Prefer is_power_of_2()
33
hw/net/e1000e_core.c | 6 +-
24
34
hw/net/e1000x_regs.h | 4 +
25
Zhang Chen (12):
35
hw/net/igb.c | 26 ++++--
26
net: Add vnet_hdr_len arguments in NetClientState
36
hw/net/igb_core.c | 256 ++++++++++++++++++++++++++++++++++++++-------------
27
net/net.c: Add vnet_hdr support in SocketReadState
37
hw/net/igb_core.h | 9 +-
28
net/filter-mirror.c: Introduce parameter for filter_send()
38
hw/net/igb_regs.h | 6 ++
29
net/filter-mirror.c: Make filter mirror support vnet support.
39
hw/net/net_tx_pkt.c | 30 +++---
30
net/filter-mirror.c: Add new option to enable vnet support for filter-redirector
40
hw/net/net_tx_pkt.h | 3 +-
31
net/colo.c: Make vnet_hdr_len as packet property
41
hw/net/trace-events | 2 +
32
net/colo-compare.c: Introduce parameter for compare_chr_send()
42
hw/net/vmxnet3.c | 4 +-
33
net/colo-compare.c: Make colo-compare support vnet_hdr_len
43
11 files changed, 254 insertions(+), 93 deletions(-)
34
net/colo.c: Add vnet packet parse feature in colo-proxy
35
net/colo-compare.c: Add vnet packet's tcp/udp/icmp compare
36
net/filter-rewriter.c: Make filter-rewriter support vnet_hdr_len
37
docs/colo-proxy.txt: Update colo-proxy usage of net driver with vnet_header
38
39
docs/colo-proxy.txt | 26 ++++++++++++++++
40
hw/net/virtio-net.c | 4 ++-
41
include/net/net.h | 10 ++++--
42
net/colo-compare.c | 84 ++++++++++++++++++++++++++++++++++++++++++---------
43
net/colo.c | 9 +++---
44
net/colo.h | 4 ++-
45
net/filter-mirror.c | 75 +++++++++++++++++++++++++++++++++++++++++----
46
net/filter-rewriter.c | 37 ++++++++++++++++++++++-
47
net/net.c | 37 ++++++++++++++++++++---
48
net/socket.c | 8 ++---
49
qemu-options.hx | 19 ++++++------
50
11 files changed, 265 insertions(+), 48 deletions(-)
51
52
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
We add a flag to decide whether net_fill_rstate() need read
3
The current implementation of igb uses only part of a advanced Tx
4
the vnet_hdr_len or not.
4
context descriptor and first data descriptor because it misses some
5
features and sniffs the trait of the packet instead of respecting the
6
packet type specified in the descriptor. However, we will certainly
7
need the entire Tx context descriptor when we update igb to respect
8
these ignored fields. Save the entire context descriptor and first
9
data descriptor except the buffer address to prepare for such a change.
5
10
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
11
This also introduces the distinction of contexts with different
7
Suggested-by: Jason Wang <jasowang@redhat.com>
12
indexes, which was not present in e1000e but in igb.
13
14
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
15
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
17
---
10
include/net/net.h | 9 +++++++--
18
hw/net/igb.c | 26 +++++++++++++++++++-------
11
net/colo-compare.c | 4 ++--
19
hw/net/igb_core.c | 39 +++++++++++++++++++--------------------
12
net/filter-mirror.c | 2 +-
20
hw/net/igb_core.h | 8 +++-----
13
net/net.c | 36 ++++++++++++++++++++++++++++++++----
21
3 files changed, 41 insertions(+), 32 deletions(-)
14
net/socket.c | 8 ++++----
15
5 files changed, 46 insertions(+), 13 deletions(-)
16
22
17
diff --git a/include/net/net.h b/include/net/net.h
23
diff --git a/hw/net/igb.c b/hw/net/igb.c
18
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
19
--- a/include/net/net.h
25
--- a/hw/net/igb.c
20
+++ b/include/net/net.h
26
+++ b/hw/net/igb.c
21
@@ -XXX,XX +XXX,XX @@ typedef struct NICState {
27
@@ -XXX,XX +XXX,XX @@ static int igb_post_load(void *opaque, int version_id)
22
} NICState;
28
return igb_core_post_load(&s->core);
23
29
}
24
struct SocketReadState {
30
25
- int state; /* 0 = getting length, 1 = getting data */
31
-static const VMStateDescription igb_vmstate_tx = {
26
+ /* 0 = getting length, 1 = getting vnet header length, 2 = getting data */
32
- .name = "igb-tx",
27
+ int state;
33
+static const VMStateDescription igb_vmstate_tx_ctx = {
28
+ /* This flag decide whether to read the vnet_hdr_len field */
34
+ .name = "igb-tx-ctx",
29
+ bool vnet_hdr;
35
.version_id = 1,
30
uint32_t index;
36
.minimum_version_id = 1,
31
uint32_t packet_len;
37
.fields = (VMStateField[]) {
32
+ uint32_t vnet_hdr_len;
38
- VMSTATE_UINT16(vlan, struct igb_tx),
33
uint8_t buf[NET_BUFSIZE];
39
- VMSTATE_UINT16(mss, struct igb_tx),
34
SocketReadStateFinalize *finalize;
40
- VMSTATE_BOOL(tse, struct igb_tx),
35
};
41
- VMSTATE_BOOL(ixsm, struct igb_tx),
36
@@ -XXX,XX +XXX,XX @@ ssize_t qemu_deliver_packet_iov(NetClientState *sender,
42
- VMSTATE_BOOL(txsm, struct igb_tx),
37
void print_net_client(Monitor *mon, NetClientState *nc);
43
+ VMSTATE_UINT32(vlan_macip_lens, struct e1000_adv_tx_context_desc),
38
void hmp_info_network(Monitor *mon, const QDict *qdict);
44
+ VMSTATE_UINT32(seqnum_seed, struct e1000_adv_tx_context_desc),
39
void net_socket_rs_init(SocketReadState *rs,
45
+ VMSTATE_UINT32(type_tucmd_mlhl, struct e1000_adv_tx_context_desc),
40
- SocketReadStateFinalize *finalize);
46
+ VMSTATE_UINT32(mss_l4len_idx, struct e1000_adv_tx_context_desc),
41
+ SocketReadStateFinalize *finalize,
47
+ VMSTATE_END_OF_LIST()
42
+ bool vnet_hdr);
48
+ }
43
49
+};
44
/* NIC info */
50
+
45
51
+static const VMStateDescription igb_vmstate_tx = {
46
diff --git a/net/colo-compare.c b/net/colo-compare.c
52
+ .name = "igb-tx",
53
+ .version_id = 2,
54
+ .minimum_version_id = 2,
55
+ .fields = (VMStateField[]) {
56
+ VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0, igb_vmstate_tx_ctx,
57
+ struct e1000_adv_tx_context_desc),
58
+ VMSTATE_UINT32(first_cmd_type_len, struct igb_tx),
59
+ VMSTATE_UINT32(first_olinfo_status, struct igb_tx),
60
VMSTATE_BOOL(first, struct igb_tx),
61
VMSTATE_BOOL(skip_cp, struct igb_tx),
62
VMSTATE_END_OF_LIST()
63
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
47
index XXXXXXX..XXXXXXX 100644
64
index XXXXXXX..XXXXXXX 100644
48
--- a/net/colo-compare.c
65
--- a/hw/net/igb_core.c
49
+++ b/net/colo-compare.c
66
+++ b/hw/net/igb_core.c
50
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
67
@@ -XXX,XX +XXX,XX @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx,
51
return;
68
static bool
69
igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
70
{
71
- if (tx->tse) {
72
- if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) {
73
+ if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
74
+ uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
75
+ uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16;
76
+ if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) {
77
return false;
78
}
79
80
@@ -XXX,XX +XXX,XX @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
81
return true;
52
}
82
}
53
83
54
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize);
84
- if (tx->txsm) {
55
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize);
85
+ if (tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) {
56
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
86
if (!net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0)) {
57
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
87
return false;
58
59
g_queue_init(&s->conn_list);
60
61
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/net/filter-mirror.c
64
+++ b/net/filter-mirror.c
65
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
66
}
88
}
67
}
89
}
68
90
69
- net_socket_rs_init(&s->rs, redirector_rs_finalize);
91
- if (tx->ixsm) {
70
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
92
+ if (tx->first_olinfo_status & E1000_ADVTXD_POTS_IXSM) {
71
93
net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt);
72
if (s->indev) {
94
}
73
chr = qemu_chr_find(s->indev);
95
74
diff --git a/net/net.c b/net/net.c
96
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
97
{
98
struct e1000_adv_tx_context_desc *tx_ctx_desc;
99
uint32_t cmd_type_len;
100
- uint32_t olinfo_status;
101
+ uint32_t idx;
102
uint64_t buffer_addr;
103
uint16_t length;
104
105
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
106
E1000_ADVTXD_DTYP_DATA) {
107
/* advanced transmit data descriptor */
108
if (tx->first) {
109
- olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
110
-
111
- tx->tse = !!(cmd_type_len & E1000_ADVTXD_DCMD_TSE);
112
- tx->ixsm = !!(olinfo_status & E1000_ADVTXD_POTS_IXSM);
113
- tx->txsm = !!(olinfo_status & E1000_ADVTXD_POTS_TXSM);
114
-
115
+ tx->first_cmd_type_len = cmd_type_len;
116
+ tx->first_olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
117
tx->first = false;
118
}
119
} else if ((cmd_type_len & E1000_ADVTXD_DTYP_CTXT) ==
120
E1000_ADVTXD_DTYP_CTXT) {
121
/* advanced transmit context descriptor */
122
tx_ctx_desc = (struct e1000_adv_tx_context_desc *)tx_desc;
123
- tx->vlan = le32_to_cpu(tx_ctx_desc->vlan_macip_lens) >> 16;
124
- tx->mss = le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 16;
125
+ idx = (le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 4) & 1;
126
+ tx->ctx[idx].vlan_macip_lens = le32_to_cpu(tx_ctx_desc->vlan_macip_lens);
127
+ tx->ctx[idx].seqnum_seed = le32_to_cpu(tx_ctx_desc->seqnum_seed);
128
+ tx->ctx[idx].type_tucmd_mlhl = le32_to_cpu(tx_ctx_desc->type_tucmd_mlhl);
129
+ tx->ctx[idx].mss_l4len_idx = le32_to_cpu(tx_ctx_desc->mss_l4len_idx);
130
return;
131
} else {
132
/* unknown descriptor type */
133
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
134
if (cmd_type_len & E1000_TXD_CMD_EOP) {
135
if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
136
if (cmd_type_len & E1000_TXD_CMD_VLE) {
137
- net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan,
138
- core->mac[VET] & 0xffff);
139
+ idx = (tx->first_olinfo_status >> 4) & 1;
140
+ uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
141
+ uint16_t vet = core->mac[VET] & 0xffff;
142
+ net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
143
}
144
if (igb_tx_pkt_send(core, tx, queue_index)) {
145
igb_on_tx_done_update_stats(core, tx->tx_pkt);
146
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
147
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
148
tx = &core->tx[i];
149
net_tx_pkt_reset(tx->tx_pkt);
150
- tx->vlan = 0;
151
- tx->mss = 0;
152
- tx->tse = false;
153
- tx->ixsm = false;
154
- tx->txsm = false;
155
+ memset(tx->ctx, 0, sizeof(tx->ctx));
156
tx->first = true;
157
tx->skip_cp = false;
158
}
159
diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h
75
index XXXXXXX..XXXXXXX 100644
160
index XXXXXXX..XXXXXXX 100644
76
--- a/net/net.c
161
--- a/hw/net/igb_core.h
77
+++ b/net/net.c
162
+++ b/hw/net/igb_core.h
78
@@ -XXX,XX +XXX,XX @@ QemuOptsList qemu_net_opts = {
163
@@ -XXX,XX +XXX,XX @@ struct IGBCore {
79
};
164
QEMUTimer *autoneg_timer;
80
165
81
void net_socket_rs_init(SocketReadState *rs,
166
struct igb_tx {
82
- SocketReadStateFinalize *finalize)
167
- uint16_t vlan; /* VLAN Tag */
83
+ SocketReadStateFinalize *finalize,
168
- uint16_t mss; /* Maximum Segment Size */
84
+ bool vnet_hdr)
169
- bool tse; /* TCP/UDP Segmentation Enable */
85
{
170
- bool ixsm; /* Insert IP Checksum */
86
rs->state = 0;
171
- bool txsm; /* Insert TCP/UDP Checksum */
87
+ rs->vnet_hdr = vnet_hdr;
172
+ struct e1000_adv_tx_context_desc ctx[2];
88
rs->index = 0;
173
+ uint32_t first_cmd_type_len;
89
rs->packet_len = 0;
174
+ uint32_t first_olinfo_status;
90
+ rs->vnet_hdr_len = 0;
175
91
memset(rs->buf, 0, sizeof(rs->buf));
176
bool first;
92
rs->finalize = finalize;
177
bool skip_cp;
93
}
94
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
95
unsigned int l;
96
97
while (size > 0) {
98
- /* reassemble a packet from the network */
99
- switch (rs->state) { /* 0 = getting length, 1 = getting data */
100
+ /* Reassemble a packet from the network.
101
+ * 0 = getting length.
102
+ * 1 = getting vnet header length.
103
+ * 2 = getting data.
104
+ */
105
+ switch (rs->state) {
106
case 0:
107
l = 4 - rs->index;
108
if (l > size) {
109
@@ -XXX,XX +XXX,XX @@ int net_fill_rstate(SocketReadState *rs, const uint8_t *buf, int size)
110
/* got length */
111
rs->packet_len = ntohl(*(uint32_t *)rs->buf);
112
rs->index = 0;
113
- rs->state = 1;
114
+ if (rs->vnet_hdr) {
115
+ rs->state = 1;
116
+ } else {
117
+ rs->state = 2;
118
+ rs->vnet_hdr_len = 0;
119
+ }
120
}
121
break;
122
case 1:
123
+ l = 4 - rs->index;
124
+ if (l > size) {
125
+ l = size;
126
+ }
127
+ memcpy(rs->buf + rs->index, buf, l);
128
+ buf += l;
129
+ size -= l;
130
+ rs->index += l;
131
+ if (rs->index == 4) {
132
+ /* got vnet header length */
133
+ rs->vnet_hdr_len = ntohl(*(uint32_t *)rs->buf);
134
+ rs->index = 0;
135
+ rs->state = 2;
136
+ }
137
+ break;
138
+ case 2:
139
l = rs->packet_len - rs->index;
140
if (l > size) {
141
l = size;
142
diff --git a/net/socket.c b/net/socket.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/net/socket.c
145
+++ b/net/socket.c
146
@@ -XXX,XX +XXX,XX @@ static void net_socket_send(void *opaque)
147
closesocket(s->fd);
148
149
s->fd = -1;
150
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
151
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
152
s->nc.link_down = true;
153
memset(s->nc.info_str, 0, sizeof(s->nc.info_str));
154
155
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_dgram(NetClientState *peer,
156
s->fd = fd;
157
s->listen_fd = -1;
158
s->send_fn = net_socket_send_dgram;
159
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
160
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
161
net_socket_read_poll(s, true);
162
163
/* mcast: save bound address as dst */
164
@@ -XXX,XX +XXX,XX @@ static NetSocketState *net_socket_fd_init_stream(NetClientState *peer,
165
166
s->fd = fd;
167
s->listen_fd = -1;
168
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
169
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
170
171
/* Disable Nagle algorithm on TCP sockets to reduce latency */
172
socket_set_nodelay(fd);
173
@@ -XXX,XX +XXX,XX @@ static int net_socket_listen_init(NetClientState *peer,
174
s->fd = -1;
175
s->listen_fd = fd;
176
s->nc.link_down = true;
177
- net_socket_rs_init(&s->rs, net_socket_rs_finalize);
178
+ net_socket_rs_init(&s->rs, net_socket_rs_finalize, false);
179
180
qemu_set_fd_handler(s->listen_fd, net_socket_accept, NULL, s);
181
return 0;
182
--
178
--
183
2.7.4
179
2.7.4
184
185
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
We can use this property flush and send packet with vnet_hdr_len.
3
igb used to specify the PF as DMA requester when reading Tx packets.
4
This made Tx requests from VFs to be performed on the address space of
5
the PF, defeating the purpose of SR-IOV. Add some logic to change the
6
requester depending on the queue, which can be assigned to a VF.
4
7
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
8
Fixes: 3a977deebe ("Intrdocue igb device emulation")
9
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
11
---
8
net/colo-compare.c | 8 ++++++--
12
hw/net/e1000e_core.c | 6 +++---
9
net/colo.c | 3 ++-
13
hw/net/igb_core.c | 13 ++++++++-----
10
net/colo.h | 4 +++-
14
hw/net/net_tx_pkt.c | 3 ++-
11
net/filter-rewriter.c | 2 +-
15
hw/net/net_tx_pkt.h | 3 ++-
12
4 files changed, 12 insertions(+), 5 deletions(-)
16
hw/net/vmxnet3.c | 4 ++--
17
5 files changed, 17 insertions(+), 12 deletions(-)
13
18
14
diff --git a/net/colo-compare.c b/net/colo-compare.c
19
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
15
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
16
--- a/net/colo-compare.c
21
--- a/hw/net/e1000e_core.c
17
+++ b/net/colo-compare.c
22
+++ b/hw/net/e1000e_core.c
18
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode)
23
@@ -XXX,XX +XXX,XX @@ e1000e_process_tx_desc(E1000ECore *core,
19
Connection *conn;
24
}
20
25
21
if (mode == PRIMARY_IN) {
26
tx->skip_cp = false;
22
- pkt = packet_new(s->pri_rs.buf, s->pri_rs.packet_len);
27
- net_tx_pkt_reset(tx->tx_pkt);
23
+ pkt = packet_new(s->pri_rs.buf,
28
+ net_tx_pkt_reset(tx->tx_pkt, core->owner);
24
+ s->pri_rs.packet_len,
29
25
+ s->pri_rs.vnet_hdr_len);
30
tx->sum_needed = 0;
26
} else {
31
tx->cptse = 0;
27
- pkt = packet_new(s->sec_rs.buf, s->sec_rs.packet_len);
32
@@ -XXX,XX +XXX,XX @@ e1000e_core_pci_uninit(E1000ECore *core)
28
+ pkt = packet_new(s->sec_rs.buf,
33
qemu_del_vm_change_state_handler(core->vmstate);
29
+ s->sec_rs.packet_len,
34
30
+ s->sec_rs.vnet_hdr_len);
35
for (i = 0; i < E1000E_NUM_QUEUES; i++) {
36
- net_tx_pkt_reset(core->tx[i].tx_pkt);
37
+ net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
38
net_tx_pkt_uninit(core->tx[i].tx_pkt);
31
}
39
}
32
40
33
if (parse_packet_early(pkt)) {
41
@@ -XXX,XX +XXX,XX @@ static void e1000e_reset(E1000ECore *core, bool sw)
34
diff --git a/net/colo.c b/net/colo.c
42
e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
43
44
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
45
- net_tx_pkt_reset(core->tx[i].tx_pkt);
46
+ net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
47
memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
48
core->tx[i].skip_cp = false;
49
}
50
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
35
index XXXXXXX..XXXXXXX 100644
51
index XXXXXXX..XXXXXXX 100644
36
--- a/net/colo.c
52
--- a/hw/net/igb_core.c
37
+++ b/net/colo.c
53
+++ b/hw/net/igb_core.c
38
@@ -XXX,XX +XXX,XX @@ void connection_destroy(void *opaque)
54
@@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
39
g_slice_free(Connection, conn);
55
56
static void
57
igb_process_tx_desc(IGBCore *core,
58
+ PCIDevice *dev,
59
struct igb_tx *tx,
60
union e1000_adv_tx_desc *tx_desc,
61
int queue_index)
62
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
63
64
tx->first = true;
65
tx->skip_cp = false;
66
- net_tx_pkt_reset(tx->tx_pkt);
67
+ net_tx_pkt_reset(tx->tx_pkt, dev);
68
}
40
}
69
}
41
70
42
-Packet *packet_new(const void *data, int size)
71
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
43
+Packet *packet_new(const void *data, int size, int vnet_hdr_len)
72
d = core->owner;
73
}
74
75
+ net_tx_pkt_reset(txr->tx->tx_pkt, d);
76
+
77
while (!igb_ring_empty(core, txi)) {
78
base = igb_ring_head_descr(core, txi);
79
80
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
81
trace_e1000e_tx_descr((void *)(intptr_t)desc.read.buffer_addr,
82
desc.read.cmd_type_len, desc.wb.status);
83
84
- igb_process_tx_desc(core, txr->tx, &desc, txi->idx);
85
+ igb_process_tx_desc(core, d, txr->tx, &desc, txi->idx);
86
igb_ring_advance(core, txi, 1);
87
eic |= igb_txdesc_writeback(core, base, &desc, txi);
88
}
89
@@ -XXX,XX +XXX,XX @@ igb_core_pci_realize(IGBCore *core,
90
core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core);
91
92
for (i = 0; i < IGB_NUM_QUEUES; i++) {
93
- net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS);
94
+ net_tx_pkt_init(&core->tx[i].tx_pkt, NULL, E1000E_MAX_TX_FRAGS);
95
}
96
97
net_rx_pkt_init(&core->rx_pkt);
98
@@ -XXX,XX +XXX,XX @@ igb_core_pci_uninit(IGBCore *core)
99
qemu_del_vm_change_state_handler(core->vmstate);
100
101
for (i = 0; i < IGB_NUM_QUEUES; i++) {
102
- net_tx_pkt_reset(core->tx[i].tx_pkt);
103
+ net_tx_pkt_reset(core->tx[i].tx_pkt, NULL);
104
net_tx_pkt_uninit(core->tx[i].tx_pkt);
105
}
106
107
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
108
109
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
110
tx = &core->tx[i];
111
- net_tx_pkt_reset(tx->tx_pkt);
112
+ net_tx_pkt_reset(tx->tx_pkt, NULL);
113
memset(tx->ctx, 0, sizeof(tx->ctx));
114
tx->first = true;
115
tx->skip_cp = false;
116
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/hw/net/net_tx_pkt.c
119
+++ b/hw/net/net_tx_pkt.c
120
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt)
121
#endif
122
}
123
124
-void net_tx_pkt_reset(struct NetTxPkt *pkt)
125
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
44
{
126
{
45
Packet *pkt = g_slice_new(Packet);
127
int i;
46
128
47
pkt->data = g_memdup(data, size);
129
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt)
48
pkt->size = size;
130
pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0);
49
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
131
}
50
+ pkt->vnet_hdr_len = vnet_hdr_len;
132
}
51
133
+ pkt->pci_dev = pci_dev;
52
return pkt;
134
pkt->raw_frags = 0;
135
136
pkt->hdr_len = 0;
137
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
138
index XXXXXXX..XXXXXXX 100644
139
--- a/hw/net/net_tx_pkt.h
140
+++ b/hw/net/net_tx_pkt.h
141
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt);
142
* reset tx packet private context (needed to be called between packets)
143
*
144
* @pkt: packet
145
+ * @dev: PCI device processing the next packet
146
*
147
*/
148
-void net_tx_pkt_reset(struct NetTxPkt *pkt);
149
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev);
150
151
/**
152
* Send packet to qemu. handles sw offloads if vhdr is not supported.
153
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
154
index XXXXXXX..XXXXXXX 100644
155
--- a/hw/net/vmxnet3.c
156
+++ b/hw/net/vmxnet3.c
157
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
158
vmxnet3_complete_packet(s, qidx, txd_idx);
159
s->tx_sop = true;
160
s->skip_current_tx_pkt = false;
161
- net_tx_pkt_reset(s->tx_pkt);
162
+ net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
163
}
164
}
53
}
165
}
54
diff --git a/net/colo.h b/net/colo.h
166
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_deactivate_device(VMXNET3State *s)
55
index XXXXXXX..XXXXXXX 100644
167
{
56
--- a/net/colo.h
168
if (s->device_active) {
57
+++ b/net/colo.h
169
VMW_CBPRN("Deactivating vmxnet3...");
58
@@ -XXX,XX +XXX,XX @@ typedef struct Packet {
170
- net_tx_pkt_reset(s->tx_pkt);
59
int size;
171
+ net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
60
/* Time of packet creation, in wall clock ms */
172
net_tx_pkt_uninit(s->tx_pkt);
61
int64_t creation_ms;
173
net_rx_pkt_uninit(s->rx_pkt);
62
+ /* Get vnet_hdr_len from filter */
174
s->device_active = false;
63
+ uint32_t vnet_hdr_len;
64
} Packet;
65
66
typedef struct ConnectionKey {
67
@@ -XXX,XX +XXX,XX @@ Connection *connection_get(GHashTable *connection_track_table,
68
ConnectionKey *key,
69
GQueue *conn_list);
70
void connection_hashtable_reset(GHashTable *connection_track_table);
71
-Packet *packet_new(const void *data, int size);
72
+Packet *packet_new(const void *data, int size, int vnet_hdr_len);
73
void packet_destroy(void *opaque, void *user_data);
74
75
#endif /* QEMU_COLO_PROXY_H */
76
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
77
index XXXXXXX..XXXXXXX 100644
78
--- a/net/filter-rewriter.c
79
+++ b/net/filter-rewriter.c
80
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
81
char *buf = g_malloc0(size);
82
83
iov_to_buf(iov, iovcnt, 0, buf, size);
84
- pkt = packet_new(buf, size);
85
+ pkt = packet_new(buf, size, 0);
86
g_free(buf);
87
88
/*
89
--
175
--
90
2.7.4
176
2.7.4
91
92
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
COLO-Proxy just focus on packet payload, so we skip vnet header.
3
No segmentation should be performed if gso type is
4
VIRTIO_NET_HDR_GSO_NONE even if ECN bit is set.
4
5
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices")
7
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1544
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
10
---
8
net/colo-compare.c | 8 ++++++--
11
hw/net/net_tx_pkt.c | 6 ++++--
9
1 file changed, 6 insertions(+), 2 deletions(-)
12
1 file changed, 4 insertions(+), 2 deletions(-)
10
13
11
diff --git a/net/colo-compare.c b/net/colo-compare.c
14
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
12
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo-compare.c
16
--- a/hw/net/net_tx_pkt.c
14
+++ b/net/colo-compare.c
17
+++ b/hw/net/net_tx_pkt.c
15
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt, Packet *spkt, int offset)
18
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
16
sec_ip_src, sec_ip_dst);
19
{
20
assert(pkt);
21
22
+ uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
23
+
24
/*
25
* Since underlying infrastructure does not support IP datagrams longer
26
* than 64K we should drop such packets and don't even try to send
27
*/
28
- if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) {
29
+ if (VIRTIO_NET_HDR_GSO_NONE != gso_type) {
30
if (pkt->payload_len >
31
ETH_MAX_IP_DGRAM_LEN -
32
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) {
33
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
34
}
17
}
35
}
18
36
19
+ offset = ppkt->vnet_hdr_len + offset;
37
- if (offload || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
20
+
38
+ if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) {
21
if (ppkt->size == spkt->size) {
39
if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
22
- return memcmp(ppkt->data + offset, spkt->data + offset,
40
net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
23
+ return memcmp(ppkt->data + offset,
41
pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
24
+ spkt->data + offset,
25
spkt->size - offset);
26
} else {
27
trace_colo_compare_main("Net packet size are not the same");
28
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
29
*/
30
if (ptcp->th_off > 5) {
31
ptrdiff_t tcp_offset;
32
+
33
tcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
34
- + (ptcp->th_off * 4);
35
+ + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
36
res = colo_packet_compare_common(ppkt, spkt, tcp_offset);
37
} else if (ptcp->th_sum == stcp->th_sum) {
38
res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN);
39
--
42
--
40
2.7.4
43
2.7.4
41
42
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
We add the vnet_hdr_support option for filter-rewriter, default is disabled.
3
Align the l3_hdr member of NetTxPkt by defining it as a union of
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
ip_header, ip6_header, and an array of octets.
5
You can use it for example:
6
-object filter-rewriter,id=rew0,netdev=hn0,queue=all,vnet_hdr_support
7
5
8
We get the vnet_hdr_len from NetClientState that make us
6
Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices")
9
parse net packet correctly.
7
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1544
10
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
11
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
10
---
14
net/filter-rewriter.c | 37 ++++++++++++++++++++++++++++++++++++-
11
hw/net/net_tx_pkt.c | 21 +++++++++++----------
15
qemu-options.hx | 4 ++--
12
1 file changed, 11 insertions(+), 10 deletions(-)
16
2 files changed, 38 insertions(+), 3 deletions(-)
17
13
18
diff --git a/net/filter-rewriter.c b/net/filter-rewriter.c
14
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
19
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
20
--- a/net/filter-rewriter.c
16
--- a/hw/net/net_tx_pkt.c
21
+++ b/net/filter-rewriter.c
17
+++ b/hw/net/net_tx_pkt.c
22
@@ -XXX,XX +XXX,XX @@
18
@@ -XXX,XX +XXX,XX @@ struct NetTxPkt {
23
#include "qemu-common.h"
19
struct iovec *vec;
24
#include "qapi/error.h"
20
25
#include "qapi/qmp/qerror.h"
21
uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN];
26
+#include "qemu/error-report.h"
22
- uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN];
27
#include "qapi-visit.h"
23
+ union {
28
#include "qom/object.h"
24
+ struct ip_header ip;
29
#include "qemu/main-loop.h"
25
+ struct ip6_header ip6;
30
@@ -XXX,XX +XXX,XX @@ typedef struct RewriterState {
26
+ uint8_t octets[ETH_MAX_IP_DGRAM_LEN];
31
NetQueue *incoming_queue;
27
+ } l3_hdr;
32
/* hashtable to save connection */
28
33
GHashTable *connection_track_table;
29
uint32_t payload_len;
34
+ bool vnet_hdr;
30
35
} RewriterState;
31
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt)
36
32
{
37
static void filter_rewriter_flush(NetFilterState *nf)
33
uint16_t csum;
38
@@ -XXX,XX +XXX,XX @@ static ssize_t colo_rewriter_receive_iov(NetFilterState *nf,
34
assert(pkt);
39
ConnectionKey key;
35
- struct ip_header *ip_hdr;
40
Packet *pkt;
36
- ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
41
ssize_t size = iov_size(iov, iovcnt);
37
42
+ ssize_t vnet_hdr_len = 0;
38
- ip_hdr->ip_len = cpu_to_be16(pkt->payload_len +
43
char *buf = g_malloc0(size);
39
+ pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len +
44
40
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
45
iov_to_buf(iov, iovcnt, 0, buf, size);
41
46
- pkt = packet_new(buf, size, 0);
42
- ip_hdr->ip_sum = 0;
47
+
43
- csum = net_raw_checksum((uint8_t *)ip_hdr,
48
+ if (s->vnet_hdr) {
44
+ pkt->l3_hdr.ip.ip_sum = 0;
49
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
45
+ csum = net_raw_checksum(pkt->l3_hdr.octets,
50
+ }
46
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
51
+
47
- ip_hdr->ip_sum = cpu_to_be16(csum);
52
+ pkt = packet_new(buf, size, vnet_hdr_len);
48
+ pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum);
53
g_free(buf);
54
55
/*
56
@@ -XXX,XX +XXX,XX @@ static void colo_rewriter_setup(NetFilterState *nf, Error **errp)
57
s->incoming_queue = qemu_new_net_queue(qemu_netfilter_pass_to_next, nf);
58
}
49
}
59
50
60
+static bool filter_rewriter_get_vnet_hdr(Object *obj, Error **errp)
51
void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
61
+{
52
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt)
62
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
63
+
64
+ return s->vnet_hdr;
65
+}
66
+
67
+static void filter_rewriter_set_vnet_hdr(Object *obj,
68
+ bool value,
69
+ Error **errp)
70
+{
71
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
72
+
73
+ s->vnet_hdr = value;
74
+}
75
+
76
+static void filter_rewriter_init(Object *obj)
77
+{
78
+ RewriterState *s = FILTER_COLO_REWRITER(obj);
79
+
80
+ s->vnet_hdr = false;
81
+ object_property_add_bool(obj, "vnet_hdr_support",
82
+ filter_rewriter_get_vnet_hdr,
83
+ filter_rewriter_set_vnet_hdr, NULL);
84
+}
85
+
86
static void colo_rewriter_class_init(ObjectClass *oc, void *data)
87
{
53
{
88
NetFilterClass *nfc = NETFILTER_CLASS(oc);
54
struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
89
@@ -XXX,XX +XXX,XX @@ static const TypeInfo colo_rewriter_info = {
55
if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) {
90
.name = TYPE_FILTER_REWRITER,
56
- struct ip6_header *ip6 = (struct ip6_header *) pkt->l3_hdr;
91
.parent = TYPE_NETFILTER,
57
/*
92
.class_init = colo_rewriter_class_init,
58
* TODO: if qemu would support >64K packets - add jumbo option check
93
+ .instance_init = filter_rewriter_init,
59
* something like that:
94
.instance_size = sizeof(RewriterState),
60
* 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {'
95
};
61
*/
96
62
- if (ip6->ip6_plen == 0) {
97
diff --git a/qemu-options.hx b/qemu-options.hx
63
+ if (pkt->l3_hdr.ip6.ip6_plen == 0) {
98
index XXXXXXX..XXXXXXX 100644
64
if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) {
99
--- a/qemu-options.hx
65
- ip6->ip6_plen = htons(pkt->payload_len);
100
+++ b/qemu-options.hx
66
+ pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len);
101
@@ -XXX,XX +XXX,XX @@ Create a filter-redirector we need to differ outdev id from indev id, id can not
67
}
102
be the same. we can just use indev or outdev, but at least one of indev or outdev
68
/*
103
need to be specified.
69
* TODO: if qemu would support >64K packets
104
105
-@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid}[,queue=@var{all|rx|tx}]
106
+@item -object filter-rewriter,id=@var{id},netdev=@var{netdevid},queue=@var{all|rx|tx},[vnet_hdr_support]
107
108
Filter-rewriter is a part of COLO project.It will rewrite tcp packet to
109
secondary from primary to keep secondary tcp connection,and rewrite
110
tcp packet to primary from secondary make tcp packet can be handled by
111
-client.
112
+client.if it has the vnet_hdr_support flag, we can parse packet with vnet header.
113
114
usage:
115
colo secondary:
116
--
70
--
117
2.7.4
71
2.7.4
118
119
diff view generated by jsdifflib
1
Spec said offloads should be le64, so use virtio_ldq_p() to guarantee
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
2
valid endian.
3
2
4
Fixes: 644c98587d4c ("virtio-net: dynamic network offloads configuration")
3
I would like to review and be informed on changes to igb device
5
Cc: qemu-stable@nongnu.org
4
6
Cc: Dmitry Fleytman <dfleytma@redhat.com>
5
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
7
---
9
hw/net/virtio-net.c | 2 ++
8
MAINTAINERS | 1 +
10
1 file changed, 2 insertions(+)
9
1 file changed, 1 insertion(+)
11
10
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
11
diff --git a/MAINTAINERS b/MAINTAINERS
13
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
13
--- a/MAINTAINERS
15
+++ b/hw/net/virtio-net.c
14
+++ b/MAINTAINERS
16
@@ -XXX,XX +XXX,XX @@ static int virtio_net_handle_offloads(VirtIONet *n, uint8_t cmd,
15
@@ -XXX,XX +XXX,XX @@ F: tests/qtest/libqos/e1000e.*
17
if (cmd == VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET) {
16
18
uint64_t supported_offloads;
17
igb
19
18
M: Akihiko Odaki <akihiko.odaki@daynix.com>
20
+ offloads = virtio_ldq_p(vdev, &offloads);
19
+R: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
21
+
20
S: Maintained
22
if (!n->has_vnet_hdr) {
21
F: docs/system/devices/igb.rst
23
return VIRTIO_NET_ERR;
22
F: hw/net/igb*
24
}
25
--
23
--
26
2.7.4
24
2.7.4
27
28
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
2
2
3
This patch change the compare_chr_send() parameter from CharBackend to CompareState,
3
Use PFRSTD to reset RSTI bit for VFs, and raise VFLRE interrupt when VF
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
4
is reset.
5
5
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
8
---
9
net/colo-compare.c | 14 +++++++-------
9
hw/net/igb_core.c | 38 ++++++++++++++++++++++++++------------
10
1 file changed, 7 insertions(+), 7 deletions(-)
10
hw/net/igb_regs.h | 3 +++
11
hw/net/trace-events | 2 ++
12
3 files changed, 31 insertions(+), 12 deletions(-)
11
13
12
diff --git a/net/colo-compare.c b/net/colo-compare.c
14
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
13
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
14
--- a/net/colo-compare.c
16
--- a/hw/net/igb_core.c
15
+++ b/net/colo-compare.c
17
+++ b/hw/net/igb_core.c
16
@@ -XXX,XX +XXX,XX @@ enum {
18
@@ -XXX,XX +XXX,XX @@ static void igb_set_eims(IGBCore *core, int index, uint32_t val)
17
SECONDARY_IN,
19
igb_update_interrupt_state(core);
18
};
20
}
19
21
20
-static int compare_chr_send(CharBackend *out,
22
-static void igb_vf_reset(IGBCore *core, uint16_t vfn)
21
+static int compare_chr_send(CompareState *s,
23
-{
22
const uint8_t *buf,
24
- /* TODO: Reset of the queue enable and the interrupt registers of the VF. */
23
uint32_t size);
25
-
24
26
- core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
25
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
27
- core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD;
26
}
28
-}
27
29
-
28
if (result) {
30
static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn)
29
- ret = compare_chr_send(&s->chr_out, pkt->data, pkt->size);
31
{
30
+ ret = compare_chr_send(s, pkt->data, pkt->size);
32
uint32_t ent = core->mac[VTIVAR_MISC + vfn];
31
if (ret < 0) {
33
@@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val)
32
error_report("colo_send_primary_packet failed");
33
}
34
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
35
}
34
}
36
}
35
}
37
36
38
-static int compare_chr_send(CharBackend *out,
37
+static void igb_vf_reset(IGBCore *core, uint16_t vfn)
39
+static int compare_chr_send(CompareState *s,
38
+{
40
const uint8_t *buf,
39
+ /* disable Rx and Tx for the VF*/
41
uint32_t size)
40
+ core->mac[VFTE] &= ~BIT(vfn);
41
+ core->mac[VFRE] &= ~BIT(vfn);
42
+ /* indicate VF reset to PF */
43
+ core->mac[VFLRE] |= BIT(vfn);
44
+ /* VFLRE and mailbox use the same interrupt cause */
45
+ mailbox_interrupt_to_pf(core);
46
+}
47
+
48
static void igb_w1c(IGBCore *core, int index, uint32_t val)
42
{
49
{
43
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CharBackend *out,
50
core->mac[index] &= ~val;
44
return 0;
51
@@ -XXX,XX +XXX,XX @@ igb_set_status(IGBCore *core, int index, uint32_t val)
45
}
52
static void
46
53
igb_set_ctrlext(IGBCore *core, int index, uint32_t val)
47
- ret = qemu_chr_fe_write_all(out, (uint8_t *)&len, sizeof(len));
54
{
48
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
55
- trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
49
if (ret != sizeof(len)) {
56
- !!(val & E1000_CTRL_EXT_SPD_BYPS));
50
goto err;
57
-
51
}
58
- /* TODO: PFRSTD */
52
59
+ trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
53
- ret = qemu_chr_fe_write_all(out, (uint8_t *)buf, size);
60
+ !!(val & E1000_CTRL_EXT_SPD_BYPS),
54
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
61
+ !!(val & E1000_CTRL_EXT_PFRSTD));
55
if (ret != size) {
62
56
goto err;
63
/* Zero self-clearing bits */
57
}
64
val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST);
58
@@ -XXX,XX +XXX,XX @@ static void compare_pri_rs_finalize(SocketReadState *pri_rs)
65
core->mac[CTRL_EXT] = val;
59
66
+
60
if (packet_enqueue(s, PRIMARY_IN)) {
67
+ if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) {
61
trace_colo_compare_main("primary: unsupported packet in");
68
+ for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
62
- compare_chr_send(&s->chr_out, pri_rs->buf, pri_rs->packet_len);
69
+ core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
63
+ compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
70
+ core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD;
64
} else {
71
+ }
65
/* compare connection */
72
+ }
66
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
73
}
67
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
74
68
75
static void
69
while (!g_queue_is_empty(&conn->primary_list)) {
76
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
70
pkt = g_queue_pop_head(&conn->primary_list);
77
71
- compare_chr_send(&s->chr_out, pkt->data, pkt->size);
78
e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
72
+ compare_chr_send(s, pkt->data, pkt->size);
79
73
packet_destroy(pkt, NULL);
80
+ for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
74
}
81
+ /* Set RSTI, so VF can identify a PF reset is in progress */
75
while (!g_queue_is_empty(&conn->secondary_list)) {
82
+ core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTI;
83
+ }
84
+
85
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
86
tx = &core->tx[i];
87
net_tx_pkt_reset(tx->tx_pkt, NULL);
88
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/hw/net/igb_regs.h
91
+++ b/hw/net/igb_regs.h
92
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
93
94
/* from igb/e1000_defines.h */
95
96
+/* Physical Func Reset Done Indication */
97
+#define E1000_CTRL_EXT_PFRSTD 0x00004000
98
+
99
#define E1000_IVAR_VALID 0x80
100
#define E1000_GPIE_NSICR 0x00000001
101
#define E1000_GPIE_MSIX_MODE 0x00000010
102
diff --git a/hw/net/trace-events b/hw/net/trace-events
103
index XXXXXXX..XXXXXXX 100644
104
--- a/hw/net/trace-events
105
+++ b/hw/net/trace-events
106
@@ -XXX,XX +XXX,XX @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: PHY[%u] UNHANDLED"
107
igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x"
108
igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
109
110
+igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"
111
+
112
igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
113
igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
114
76
--
115
--
77
2.7.4
116
2.7.4
78
79
diff view generated by jsdifflib
1
From: Michal Privoznik <mprivozn@redhat.com>
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
2
2
3
We have a function that checks if given number is power of two.
3
IGB uses RXDW ICR bit to indicate that rx descriptor has been written
4
We should prefer it instead of expanding the check on our own.
4
back. This is the same as RXT0 bit in older HW.
5
5
6
Signed-off-by: Michal Privoznik <mprivozn@redhat.com>
6
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
8
---
9
hw/net/virtio-net.c | 2 +-
9
hw/net/e1000x_regs.h | 4 ++++
10
1 file changed, 1 insertion(+), 1 deletion(-)
10
hw/net/igb_core.c | 2 +-
11
2 files changed, 5 insertions(+), 1 deletion(-)
11
12
12
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
13
diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
13
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/virtio-net.c
15
--- a/hw/net/e1000x_regs.h
15
+++ b/hw/net/virtio-net.c
16
+++ b/hw/net/e1000x_regs.h
16
@@ -XXX,XX +XXX,XX @@ static void virtio_net_device_realize(DeviceState *dev, Error **errp)
17
@@ -XXX,XX +XXX,XX @@
17
*/
18
#define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */
18
if (n->net_conf.rx_queue_size < VIRTIO_NET_RX_QUEUE_MIN_SIZE ||
19
#define E1000_ICR_RXO 0x00000040 /* rx overrun */
19
n->net_conf.rx_queue_size > VIRTQUEUE_MAX_SIZE ||
20
#define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */
20
- (n->net_conf.rx_queue_size & (n->net_conf.rx_queue_size - 1))) {
21
+#define E1000_ICR_RXDW 0x00000080 /* rx desc written back */
21
+ !is_power_of_2(n->net_conf.rx_queue_size)) {
22
#define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */
22
error_setg(errp, "Invalid rx_queue_size (= %" PRIu16 "), "
23
#define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */
23
"must be a power of 2 between %d and %d.",
24
#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */
24
n->net_conf.rx_queue_size, VIRTIO_NET_RX_QUEUE_MIN_SIZE,
25
@@ -XXX,XX +XXX,XX @@
26
#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
27
#define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */
28
#define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */
29
+#define E1000_ICS_RXDW E1000_ICR_RXDW /* rx desc written back */
30
#define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */
31
#define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
32
#define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
33
@@ -XXX,XX +XXX,XX @@
34
#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
35
#define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */
36
#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */
37
+#define E1000_IMS_RXDW E1000_ICR_RXDW /* rx desc written back */
38
#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */
39
#define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
40
#define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
41
@@ -XXX,XX +XXX,XX @@
42
#define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
43
#define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */
44
#define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */
45
+#define E1000_IMC_RXDW E1000_ICR_RXDW /* rx desc written back */
46
#define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */
47
#define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
48
#define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
49
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/hw/net/igb_core.c
52
+++ b/hw/net/igb_core.c
53
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
54
continue;
55
}
56
57
- n |= E1000_ICR_RXT0;
58
+ n |= E1000_ICR_RXDW;
59
60
igb_rx_fix_l4_csum(core, core->rx_pkt);
61
igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info);
25
--
62
--
26
2.7.4
63
2.7.4
27
28
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
2
2
3
Add vnet_hdr_len arguments in NetClientState
3
Also introduce:
4
that make other module get real vnet_hdr_len easily.
4
- Checks for RXDCTL/TXDCTL queue enable bits
5
- IGB_NUM_VM_POOLS enum (Sec 1.5: Table 1-7)
5
6
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
---
9
include/net/net.h | 1 +
10
hw/net/igb_core.c | 38 +++++++++++++++++++++++++++++++-------
10
net/net.c | 1 +
11
hw/net/igb_core.h | 1 +
11
2 files changed, 2 insertions(+)
12
hw/net/igb_regs.h | 3 +++
13
3 files changed, 35 insertions(+), 7 deletions(-)
12
14
13
diff --git a/include/net/net.h b/include/net/net.h
15
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/include/net/net.h
17
--- a/hw/net/igb_core.c
16
+++ b/include/net/net.h
18
+++ b/hw/net/igb_core.c
17
@@ -XXX,XX +XXX,XX @@ struct NetClientState {
19
@@ -XXX,XX +XXX,XX @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
18
unsigned int queue_index;
20
return igb_tx_wb_eic(core, txi->idx);
19
unsigned rxfilter_notify_enabled:1;
21
}
20
int vring_enable;
22
21
+ int vnet_hdr_len;
23
+static inline bool
22
QTAILQ_HEAD(NetFilterHead, NetFilterState) filters;
24
+igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
23
};
25
+{
24
26
+ bool vmdq = core->mac[MRQC] & 1;
25
diff --git a/net/net.c b/net/net.c
27
+ uint16_t qn = txi->idx;
26
index XXXXXXX..XXXXXXX 100644
28
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
27
--- a/net/net.c
29
+
28
+++ b/net/net.c
30
+ return (core->mac[TCTL] & E1000_TCTL_EN) &&
29
@@ -XXX,XX +XXX,XX @@ void qemu_set_vnet_hdr_len(NetClientState *nc, int len)
31
+ (!vmdq || core->mac[VFTE] & BIT(pool)) &&
32
+ (core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE);
33
+}
34
+
35
static void
36
igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
37
{
38
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
39
const E1000E_RingInfo *txi = txr->i;
40
uint32_t eic = 0;
41
42
- /* TODO: check if the queue itself is enabled too. */
43
- if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
44
+ if (!igb_tx_enabled(core, txi)) {
45
trace_e1000e_tx_disabled();
30
return;
46
return;
31
}
47
}
32
48
@@ -XXX,XX +XXX,XX @@ igb_can_receive(IGBCore *core)
33
+ nc->vnet_hdr_len = len;
49
34
nc->info->set_vnet_hdr_len(nc, len);
50
for (i = 0; i < IGB_NUM_QUEUES; i++) {
35
}
51
E1000E_RxRing rxr;
52
+ if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
53
+ continue;
54
+ }
55
56
igb_rx_ring_init(core, &rxr, i);
57
if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) {
58
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
59
60
if (core->mac[MRQC] & 1) {
61
if (is_broadcast_ether_addr(ehdr->h_dest)) {
62
- for (i = 0; i < 8; i++) {
63
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
64
if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) {
65
queues |= BIT(i);
66
}
67
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
68
f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
69
f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff;
70
if (macp[f >> 5] & (1 << (f & 0x1f))) {
71
- for (i = 0; i < 8; i++) {
72
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
73
if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) {
74
queues |= BIT(i);
75
}
76
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
77
}
78
}
79
} else {
80
- for (i = 0; i < 8; i++) {
81
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
82
if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) {
83
mask |= BIT(i);
84
}
85
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
86
queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT);
87
}
88
89
+ queues &= core->mac[VFRE];
90
igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info);
91
if (rss_info->queue & 1) {
92
queues <<= 8;
93
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
94
e1000x_fcs_len(core->mac);
95
96
for (i = 0; i < IGB_NUM_QUEUES; i++) {
97
- if (!(queues & BIT(i))) {
98
+ if (!(queues & BIT(i)) ||
99
+ !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
100
continue;
101
}
102
103
@@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val)
104
105
static void igb_vf_reset(IGBCore *core, uint16_t vfn)
106
{
107
+ uint16_t qn0 = vfn;
108
+ uint16_t qn1 = vfn + IGB_NUM_VM_POOLS;
109
+
110
/* disable Rx and Tx for the VF*/
111
- core->mac[VFTE] &= ~BIT(vfn);
112
+ core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
113
+ core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
114
+ core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
115
+ core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
116
core->mac[VFRE] &= ~BIT(vfn);
117
+ core->mac[VFTE] &= ~BIT(vfn);
118
/* indicate VF reset to PF */
119
core->mac[VFLRE] |= BIT(vfn);
120
/* VFLRE and mailbox use the same interrupt cause */
121
@@ -XXX,XX +XXX,XX @@ igb_phy_reg_init[] = {
122
static const uint32_t igb_mac_reg_init[] = {
123
[LEDCTL] = 2 | (3 << 8) | BIT(15) | (6 << 16) | (7 << 24),
124
[EEMNGCTL] = BIT(31),
125
+ [TXDCTL0] = E1000_TXDCTL_QUEUE_ENABLE,
126
[RXDCTL0] = E1000_RXDCTL_QUEUE_ENABLE | (1 << 16),
127
[RXDCTL1] = 1 << 16,
128
[RXDCTL2] = 1 << 16,
129
diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h
130
index XXXXXXX..XXXXXXX 100644
131
--- a/hw/net/igb_core.h
132
+++ b/hw/net/igb_core.h
133
@@ -XXX,XX +XXX,XX @@
134
#define IGB_MSIX_VEC_NUM (10)
135
#define IGBVF_MSIX_VEC_NUM (3)
136
#define IGB_NUM_QUEUES (16)
137
+#define IGB_NUM_VM_POOLS (8)
138
139
typedef struct IGBCore IGBCore;
140
141
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
142
index XXXXXXX..XXXXXXX 100644
143
--- a/hw/net/igb_regs.h
144
+++ b/hw/net/igb_regs.h
145
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
146
#define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000
147
#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000
148
149
+/* Additional Transmit Descriptor Control definitions */
150
+#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Tx Queue */
151
+
152
/* Additional Receive Descriptor Control definitions */
153
#define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Rx Queue */
36
154
37
--
155
--
38
2.7.4
156
2.7.4
39
40
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
2
1
3
This patch change the filter_send() parameter from CharBackend to MirrorState,
4
we can get more information like vnet_hdr(We use it to support packet with vnet_header).
5
6
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
net/filter-mirror.c | 10 +++++-----
10
1 file changed, 5 insertions(+), 5 deletions(-)
11
12
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/net/filter-mirror.c
15
+++ b/net/filter-mirror.c
16
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
17
SocketReadState rs;
18
} MirrorState;
19
20
-static int filter_send(CharBackend *chr_out,
21
+static int filter_send(MirrorState *s,
22
const struct iovec *iov,
23
int iovcnt)
24
{
25
@@ -XXX,XX +XXX,XX @@ static int filter_send(CharBackend *chr_out,
26
}
27
28
len = htonl(size);
29
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)&len, sizeof(len));
30
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
31
if (ret != sizeof(len)) {
32
goto err;
33
}
34
35
buf = g_malloc(size);
36
iov_to_buf(iov, iovcnt, 0, buf, size);
37
- ret = qemu_chr_fe_write_all(chr_out, (uint8_t *)buf, size);
38
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
39
g_free(buf);
40
if (ret != size) {
41
goto err;
42
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_mirror_receive_iov(NetFilterState *nf,
43
MirrorState *s = FILTER_MIRROR(nf);
44
int ret;
45
46
- ret = filter_send(&s->chr_out, iov, iovcnt);
47
+ ret = filter_send(s, iov, iovcnt);
48
if (ret) {
49
error_report("filter mirror send failed(%s)", strerror(-ret));
50
}
51
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_redirector_receive_iov(NetFilterState *nf,
52
int ret;
53
54
if (qemu_chr_fe_backend_connected(&s->chr_out)) {
55
- ret = filter_send(&s->chr_out, iov, iovcnt);
56
+ ret = filter_send(s, iov, iovcnt);
57
if (ret) {
58
error_report("filter redirector send failed(%s)", strerror(-ret));
59
}
60
--
61
2.7.4
62
63
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
2
2
3
We add the vnet_hdr_support option for filter-redirector, default is disabled.
3
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
4
If you use virtio-net-pci net driver or other driver needs vnet_hdr, please enable it.
5
Because colo-compare or other modules needs the vnet_hdr_len to parse
6
packet, we add this new option send the len to others.
7
You can use it for example:
8
-object filter-redirector,id=r0,netdev=hn0,queue=tx,outdev=red0,vnet_hdr_support
9
10
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
5
---
13
net/filter-mirror.c | 23 +++++++++++++++++++++++
6
hw/net/igb_core.c | 41 ++++++++++++++++++++++++++++++++++++-----
14
qemu-options.hx | 6 +++---
7
1 file changed, 36 insertions(+), 5 deletions(-)
15
2 files changed, 26 insertions(+), 3 deletions(-)
16
8
17
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
9
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
18
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
19
--- a/net/filter-mirror.c
11
--- a/hw/net/igb_core.c
20
+++ b/net/filter-mirror.c
12
+++ b/hw/net/igb_core.c
21
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
13
@@ -XXX,XX +XXX,XX @@ igb_rx_l4_cso_enabled(IGBCore *core)
22
s->outdev = g_strdup(value);
14
return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
23
}
15
}
24
16
25
+static bool filter_redirector_get_vnet_hdr(Object *obj, Error **errp)
17
+static bool
18
+igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
26
+{
19
+{
27
+ MirrorState *s = FILTER_REDIRECTOR(obj);
20
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
21
+ bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
22
+ int max_ethernet_lpe_size =
23
+ core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
24
+ int max_ethernet_vlan_size = 1522;
28
+
25
+
29
+ return s->vnet_hdr;
26
+ return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
30
+}
27
+}
31
+
28
+
32
+static void filter_redirector_set_vnet_hdr(Object *obj,
29
static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
33
+ bool value,
30
- E1000E_RSSInfo *rss_info, bool *external_tx)
34
+ Error **errp)
31
+ size_t size, E1000E_RSSInfo *rss_info,
35
+{
32
+ bool *external_tx)
36
+ MirrorState *s = FILTER_REDIRECTOR(obj);
33
{
34
static const int ta_shift[] = { 4, 3, 2, 0 };
35
uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
36
uint16_t queues = 0;
37
+ uint16_t oversized = 0;
38
uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
39
bool accepted = false;
40
int i;
41
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
42
}
43
44
queues &= core->mac[VFRE];
45
- igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info);
46
- if (rss_info->queue & 1) {
47
- queues <<= 8;
48
+ if (queues) {
49
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
50
+ if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) {
51
+ oversized |= BIT(i);
52
+ }
53
+ }
54
+ /* 8.19.37 increment ROC if packet is oversized for all queues */
55
+ if (oversized == queues) {
56
+ trace_e1000x_rx_oversized(size);
57
+ e1000x_inc_reg_if_not_full(core->mac, ROC);
58
+ }
59
+ queues &= ~oversized;
60
+ }
37
+
61
+
38
+ s->vnet_hdr = value;
62
+ if (queues) {
39
+}
63
+ igb_rss_parse_packet(core, core->rx_pkt,
40
+
64
+ external_tx != NULL, rss_info);
41
static void filter_mirror_init(Object *obj)
65
+ if (rss_info->queue & 1) {
42
{
66
+ queues <<= 8;
43
MirrorState *s = FILTER_MIRROR(obj);
67
+ }
44
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_init(Object *obj)
68
}
45
69
} else {
46
static void filter_redirector_init(Object *obj)
70
switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
47
{
71
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
48
+ MirrorState *s = FILTER_REDIRECTOR(obj);
72
e1000x_vlan_enabled(core->mac),
49
+
73
core->mac[VET] & 0xffff);
50
object_property_add_str(obj, "indev", filter_redirector_get_indev,
74
51
filter_redirector_set_indev, NULL);
75
- queues = igb_receive_assign(core, ehdr, &rss_info, external_tx);
52
object_property_add_str(obj, "outdev", filter_redirector_get_outdev,
76
+ queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
53
filter_redirector_set_outdev, NULL);
77
if (!queues) {
54
+
78
trace_e1000e_rx_flt_dropped();
55
+ s->vnet_hdr = false;
79
return orig_size;
56
+ object_property_add_bool(obj, "vnet_hdr_support",
57
+ filter_redirector_get_vnet_hdr,
58
+ filter_redirector_set_vnet_hdr, NULL);
59
}
60
61
static void filter_mirror_fini(Object *obj)
62
diff --git a/qemu-options.hx b/qemu-options.hx
63
index XXXXXXX..XXXXXXX 100644
64
--- a/qemu-options.hx
65
+++ b/qemu-options.hx
66
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
67
68
filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
69
70
-@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
71
-outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
72
+@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
73
74
filter-redirector on netdev @var{netdevid},redirect filter's net packet to chardev
75
-@var{chardevid},and redirect indev's packet to filter.
76
+@var{chardevid},and redirect indev's packet to filter.if it has the vnet_hdr_support flag,
77
+filter-redirector will redirect packet with vnet_hdr_len.
78
Create a filter-redirector we need to differ outdev id from indev id, id can not
79
be the same. we can just use indev or outdev, but at least one of indev or outdev
80
need to be specified.
81
--
80
--
82
2.7.4
81
2.7.4
83
84
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
2
2
3
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
3
RSS for VFs is only enabled if VMOLR[n].RSSE is set.
4
5
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
7
---
6
docs/colo-proxy.txt | 26 ++++++++++++++++++++++++++
8
hw/net/igb_core.c | 9 ++++++++-
7
1 file changed, 26 insertions(+)
9
1 file changed, 8 insertions(+), 1 deletion(-)
8
10
9
diff --git a/docs/colo-proxy.txt b/docs/colo-proxy.txt
11
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
10
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
11
--- a/docs/colo-proxy.txt
13
--- a/hw/net/igb_core.c
12
+++ b/docs/colo-proxy.txt
14
+++ b/hw/net/igb_core.c
13
@@ -XXX,XX +XXX,XX @@ Secondary(ip:3.3.3.8):
15
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
14
-chardev socket,id=red1,host=3.3.3.3,port=9004
16
if (queues) {
15
-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0
17
igb_rss_parse_packet(core, core->rx_pkt,
16
-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1
18
external_tx != NULL, rss_info);
17
+-object filter-rewriter,id=f3,netdev=hn0,queue=all
19
+ /* Sec 8.26.1: PQn = VFn + VQn*8 */
18
+
20
if (rss_info->queue & 1) {
19
+If you want to use virtio-net-pci or other driver with vnet_header:
21
- queues <<= 8;
20
+
22
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
21
+Primary(ip:3.3.3.3):
23
+ if ((queues & BIT(i)) &&
22
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,downscript=/etc/qemu-ifdown
24
+ (core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) {
23
+-device e1000,id=e0,netdev=hn0,mac=52:a4:00:12:78:66
25
+ queues |= BIT(i + IGB_NUM_VM_POOLS);
24
+-chardev socket,id=mirror0,host=3.3.3.3,port=9003,server,nowait
26
+ queues &= ~BIT(i);
25
+-chardev socket,id=compare1,host=3.3.3.3,port=9004,server,nowait
27
+ }
26
+-chardev socket,id=compare0,host=3.3.3.3,port=9001,server,nowait
28
+ }
27
+-chardev socket,id=compare0-0,host=3.3.3.3,port=9001
29
}
28
+-chardev socket,id=compare_out,host=3.3.3.3,port=9005,server,nowait
30
}
29
+-chardev socket,id=compare_out0,host=3.3.3.3,port=9005
31
} else {
30
+-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
31
+-object filter-redirector,netdev=hn0,id=redire0,queue=rx,indev=compare_out,vnet_hdr_support
32
+-object filter-redirector,netdev=hn0,id=redire1,queue=rx,outdev=compare0,vnet_hdr_support
33
+-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
34
+
35
+Secondary(ip:3.3.3.8):
36
+-netdev tap,id=hn0,vhost=off,script=/etc/qemu-ifup,down script=/etc/qemu-ifdown
37
+-device e1000,netdev=hn0,mac=52:a4:00:12:78:66
38
+-chardev socket,id=red0,host=3.3.3.3,port=9003
39
+-chardev socket,id=red1,host=3.3.3.3,port=9004
40
+-object filter-redirector,id=f1,netdev=hn0,queue=tx,indev=red0,vnet_hdr_support
41
+-object filter-redirector,id=f2,netdev=hn0,queue=rx,outdev=red1,vnet_hdr_support
42
+-object filter-rewriter,id=f3,netdev=hn0,queue=all,vnet_hdr_support
43
44
Note:
45
a.COLO-proxy must work with COLO-frame and Block-replication.
46
--
32
--
47
2.7.4
33
2.7.4
48
49
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
2
2
3
We add the vnet_hdr_support option for colo-compare, default is disabled.
3
Please note that loopback counters for VM to VM traffic is not
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
4
implemented yet: VFGOTLBC, VFGPTLBC, VFGORLBC and VFGPRLBC.
5
You can use it for example:
6
-object colo-compare,id=comp0,primary_in=compare0-0,secondary_in=compare1,outdev=compare_out0,vnet_hdr_support
7
5
8
COLO-compare can get vnet header length from filter,
6
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
9
Add vnet_hdr_len to struct packet and output packet with
10
the vnet_hdr_len.
11
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
8
---
15
net/colo-compare.c | 60 +++++++++++++++++++++++++++++++++++++++++++++++-------
9
hw/net/igb_core.c | 26 ++++++++++++++++++++++----
16
qemu-options.hx | 4 ++--
10
1 file changed, 22 insertions(+), 4 deletions(-)
17
2 files changed, 55 insertions(+), 9 deletions(-)
18
11
19
diff --git a/net/colo-compare.c b/net/colo-compare.c
12
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
20
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
21
--- a/net/colo-compare.c
14
--- a/hw/net/igb_core.c
22
+++ b/net/colo-compare.c
15
+++ b/hw/net/igb_core.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
16
@@ -XXX,XX +XXX,XX @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int queue_index)
24
CharBackend chr_out;
17
}
25
SocketReadState pri_rs;
18
26
SocketReadState sec_rs;
19
static void
27
+ bool vnet_hdr;
20
-igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
28
21
+igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
29
/* connection list: the connections belonged to this NIC could be found
30
* in this list.
31
@@ -XXX,XX +XXX,XX @@ enum {
32
33
static int compare_chr_send(CompareState *s,
34
const uint8_t *buf,
35
- uint32_t size);
36
+ uint32_t size,
37
+ uint32_t vnet_hdr_len);
38
39
static gint seq_sorter(Packet *a, Packet *b, gpointer data)
40
{
22
{
41
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
23
static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
24
PTC1023, PTC1522 };
25
@@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
26
core->mac[GPTC] = core->mac[TPT];
27
core->mac[GOTCL] = core->mac[TOTL];
28
core->mac[GOTCH] = core->mac[TOTH];
29
+
30
+ if (core->mac[MRQC] & 1) {
31
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
32
+
33
+ core->mac[PVFGOTC0 + (pool * 64)] += tot_len;
34
+ core->mac[PVFGPTC0 + (pool * 64)]++;
35
+ }
36
}
37
38
static void
39
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
40
net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
41
}
42
if (igb_tx_pkt_send(core, tx, queue_index)) {
43
- igb_on_tx_done_update_stats(core, tx->tx_pkt);
44
+ igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
45
}
42
}
46
}
43
47
44
if (result) {
48
@@ -XXX,XX +XXX,XX @@ igb_write_to_rx_buffers(IGBCore *core,
45
- ret = compare_chr_send(s, pkt->data, pkt->size);
49
}
46
+ ret = compare_chr_send(s,
50
47
+ pkt->data,
51
static void
48
+ pkt->size,
52
-igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
49
+ pkt->vnet_hdr_len);
53
+igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
50
if (ret < 0) {
54
+ size_t data_size, size_t data_fcs_size)
51
error_report("colo_send_primary_packet failed");
52
}
53
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
54
55
static int compare_chr_send(CompareState *s,
56
const uint8_t *buf,
57
- uint32_t size)
58
+ uint32_t size,
59
+ uint32_t vnet_hdr_len)
60
{
55
{
61
int ret = 0;
56
e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
62
uint32_t len = htonl(size);
57
63
@@ -XXX,XX +XXX,XX @@ static int compare_chr_send(CompareState *s,
58
@@ -XXX,XX +XXX,XX @@ igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
64
goto err;
59
default:
60
break;
65
}
61
}
66
62
+
67
+ if (s->vnet_hdr) {
63
+ if (core->mac[MRQC] & 1) {
68
+ /*
64
+ uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
69
+ * We send vnet header len make other module(like filter-redirector)
65
+
70
+ * know how to parse net packet correctly.
66
+ core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
71
+ */
67
+ core->mac[PVFGPRC0 + (pool * 64)]++;
72
+ len = htonl(vnet_hdr_len);
68
+ if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
73
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
69
+ core->mac[PVFMPRC0 + (pool * 64)]++;
74
+ if (ret != sizeof(len)) {
75
+ goto err;
76
+ }
70
+ }
77
+ }
71
+ }
78
+
79
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
80
if (ret != size) {
81
goto err;
82
@@ -XXX,XX +XXX,XX @@ static void compare_set_outdev(Object *obj, const char *value, Error **errp)
83
s->outdev = g_strdup(value);
84
}
72
}
85
73
86
+static bool compare_get_vnet_hdr(Object *obj, Error **errp)
74
static inline bool
87
+{
75
@@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
88
+ CompareState *s = COLO_COMPARE(obj);
76
89
+
77
} while (desc_offset < total_size);
90
+ return s->vnet_hdr;
78
91
+}
79
- igb_update_rx_stats(core, size, total_size);
92
+
80
+ igb_update_rx_stats(core, rxi, size, total_size);
93
+static void compare_set_vnet_hdr(Object *obj,
94
+ bool value,
95
+ Error **errp)
96
+{
97
+ CompareState *s = COLO_COMPARE(obj);
98
+
99
+ s->vnet_hdr = value;
100
+}
101
+
102
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
103
{
104
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
105
106
if (packet_enqueue(s, PRIMARY_IN)) {
107
trace_colo_compare_main("primary: unsupported packet in");
108
- compare_chr_send(s, pri_rs->buf, pri_rs->packet_len);
109
+ compare_chr_send(s,
110
+ pri_rs->buf,
111
+ pri_rs->packet_len,
112
+ pri_rs->vnet_hdr_len);
113
} else {
114
/* compare connection */
115
g_queue_foreach(&s->conn_list, colo_compare_connection, s);
116
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
117
return;
118
}
119
120
- net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, false);
121
- net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, false);
122
+ net_socket_rs_init(&s->pri_rs, compare_pri_rs_finalize, s->vnet_hdr);
123
+ net_socket_rs_init(&s->sec_rs, compare_sec_rs_finalize, s->vnet_hdr);
124
125
g_queue_init(&s->conn_list);
126
127
@@ -XXX,XX +XXX,XX @@ static void colo_flush_packets(void *opaque, void *user_data)
128
129
while (!g_queue_is_empty(&conn->primary_list)) {
130
pkt = g_queue_pop_head(&conn->primary_list);
131
- compare_chr_send(s, pkt->data, pkt->size);
132
+ compare_chr_send(s,
133
+ pkt->data,
134
+ pkt->size,
135
+ pkt->vnet_hdr_len);
136
packet_destroy(pkt, NULL);
137
}
138
while (!g_queue_is_empty(&conn->secondary_list)) {
139
@@ -XXX,XX +XXX,XX @@ static void colo_compare_class_init(ObjectClass *oc, void *data)
140
141
static void colo_compare_init(Object *obj)
142
{
143
+ CompareState *s = COLO_COMPARE(obj);
144
+
145
object_property_add_str(obj, "primary_in",
146
compare_get_pri_indev, compare_set_pri_indev,
147
NULL);
148
@@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj)
149
object_property_add_str(obj, "outdev",
150
compare_get_outdev, compare_set_outdev,
151
NULL);
152
+
153
+ s->vnet_hdr = false;
154
+ object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
155
+ compare_set_vnet_hdr, NULL);
156
}
81
}
157
82
158
static void colo_compare_finalize(Object *obj)
83
static inline void
159
diff --git a/qemu-options.hx b/qemu-options.hx
160
index XXXXXXX..XXXXXXX 100644
161
--- a/qemu-options.hx
162
+++ b/qemu-options.hx
163
@@ -XXX,XX +XXX,XX @@ Dump the network traffic on netdev @var{dev} to the file specified by
164
The file format is libpcap, so it can be analyzed with tools such as tcpdump
165
or Wireshark.
166
167
-@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},
168
-outdev=@var{chardevid}
169
+@item -object colo-compare,id=@var{id},primary_in=@var{chardevid},secondary_in=@var{chardevid},outdev=@var{chardevid}[,vnet_hdr_support]
170
171
Colo-compare gets packet from primary_in@var{chardevid} and secondary_in@var{chardevid}, than compare primary packet with
172
secondary packet. If the packets are same, we will output primary
173
packet to outdev@var{chardevid}, else we will notify colo-frame
174
do checkpoint and send primary packet to outdev@var{chardevid}.
175
+if it has the vnet_hdr_support flag, colo compare will send/recv packet with vnet_hdr_len.
176
177
we must use it with the help of filter-mirror and filter-redirector.
178
179
--
84
--
180
2.7.4
85
2.7.4
181
182
diff view generated by jsdifflib
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
2
2
3
We add the vnet_hdr_support option for filter-mirror, default is disabled.
3
Add support for stripping/inserting VLAN for VFs.
4
If you use virtio-net-pci or other driver needs vnet_hdr, please enable it.
5
You can use it for example:
6
-object filter-mirror,id=m0,netdev=hn0,queue=tx,outdev=mirror0,vnet_hdr_support
7
4
8
If it has vnet_hdr_support flag, we will change the sending packet format from
5
Had to move CSUM calculation back into the for loop, since packet data
9
struct {int size; const uint8_t buf[];} to {int size; int vnet_hdr_len; const uint8_t buf[];}.
6
is pulled inside the loop based on strip VLAN decision for every VF.
10
make other module(like colo-compare) know how to parse net packet correctly.
11
7
12
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
8
net_rx_pkt_fix_l4_csum should be extended to accept a buffer instead for
9
igb. Work for a future patch.
10
11
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
13
---
15
net/filter-mirror.c | 42 +++++++++++++++++++++++++++++++++++++++++-
14
hw/net/igb_core.c | 62 +++++++++++++++++++++++++++++++++++++++++++------------
16
qemu-options.hx | 5 ++---
15
1 file changed, 49 insertions(+), 13 deletions(-)
17
2 files changed, 43 insertions(+), 4 deletions(-)
18
16
19
diff --git a/net/filter-mirror.c b/net/filter-mirror.c
17
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
20
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
21
--- a/net/filter-mirror.c
19
--- a/hw/net/igb_core.c
22
+++ b/net/filter-mirror.c
20
+++ b/hw/net/igb_core.c
23
@@ -XXX,XX +XXX,XX @@ typedef struct MirrorState {
21
@@ -XXX,XX +XXX,XX @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx,
24
CharBackend chr_in;
22
info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash);
25
CharBackend chr_out;
23
}
26
SocketReadState rs;
24
27
+ bool vnet_hdr;
25
+static void
28
} MirrorState;
26
+igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx,
29
27
+ uint16_t vlan, bool insert_vlan)
30
static int filter_send(MirrorState *s,
28
+{
31
const struct iovec *iov,
29
+ if (core->mac[MRQC] & 1) {
32
int iovcnt)
30
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
33
{
34
+ NetFilterState *nf = NETFILTER(s);
35
int ret = 0;
36
ssize_t size = 0;
37
uint32_t len = 0;
38
@@ -XXX,XX +XXX,XX @@ static int filter_send(MirrorState *s,
39
goto err;
40
}
41
42
+ if (s->vnet_hdr) {
43
+ /*
44
+ * If vnet_hdr = on, we send vnet header len to make other
45
+ * module(like colo-compare) know how to parse net
46
+ * packet correctly.
47
+ */
48
+ ssize_t vnet_hdr_len;
49
+
31
+
50
+ vnet_hdr_len = nf->netdev->vnet_hdr_len;
32
+ if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) {
51
+
33
+ /* always insert default VLAN */
52
+ len = htonl(vnet_hdr_len);
34
+ insert_vlan = true;
53
+ ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)&len, sizeof(len));
35
+ vlan = core->mac[VMVIR0 + pool] & 0xffff;
54
+ if (ret != sizeof(len)) {
36
+ } else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) {
55
+ goto err;
37
+ insert_vlan = false;
56
+ }
38
+ }
57
+ }
39
+ }
58
+
40
+
59
buf = g_malloc(size);
41
+ if (insert_vlan && e1000x_vlan_enabled(core->mac)) {
60
iov_to_buf(iov, iovcnt, 0, buf, size);
42
+ net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan,
61
ret = qemu_chr_fe_write_all(&s->chr_out, (uint8_t *)buf, size);
43
+ core->mac[VET] & 0xffff);
62
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_setup(NetFilterState *nf, Error **errp)
44
+ }
63
}
64
}
65
66
- net_socket_rs_init(&s->rs, redirector_rs_finalize, false);
67
+ net_socket_rs_init(&s->rs, redirector_rs_finalize, s->vnet_hdr);
68
69
if (s->indev) {
70
chr = qemu_chr_find(s->indev);
71
@@ -XXX,XX +XXX,XX @@ static void filter_mirror_set_outdev(Object *obj,
72
}
73
}
74
75
+static bool filter_mirror_get_vnet_hdr(Object *obj, Error **errp)
76
+{
77
+ MirrorState *s = FILTER_MIRROR(obj);
78
+
79
+ return s->vnet_hdr;
80
+}
45
+}
81
+
46
+
82
+static void filter_mirror_set_vnet_hdr(Object *obj, bool value, Error **errp)
47
static bool
48
igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
49
{
50
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
51
52
if (cmd_type_len & E1000_TXD_CMD_EOP) {
53
if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
54
- if (cmd_type_len & E1000_TXD_CMD_VLE) {
55
- idx = (tx->first_olinfo_status >> 4) & 1;
56
- uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
57
- uint16_t vet = core->mac[VET] & 0xffff;
58
- net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
59
- }
60
+ idx = (tx->first_olinfo_status >> 4) & 1;
61
+ igb_tx_insert_vlan(core, queue_index, tx,
62
+ tx->ctx[idx].vlan_macip_lens >> 16,
63
+ !!(cmd_type_len & E1000_TXD_CMD_VLE));
64
+
65
if (igb_tx_pkt_send(core, tx, queue_index)) {
66
igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
67
}
68
@@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
69
igb_update_rx_stats(core, rxi, size, total_size);
70
}
71
72
+static bool
73
+igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
83
+{
74
+{
84
+ MirrorState *s = FILTER_MIRROR(obj);
75
+ if (core->mac[MRQC] & 1) {
76
+ uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
77
+ /* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */
78
+ return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ?
79
+ core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN :
80
+ core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN;
81
+ }
85
+
82
+
86
+ s->vnet_hdr = value;
83
+ return e1000x_vlan_enabled(core->mac);
87
+}
84
+}
88
+
85
+
89
static char *filter_redirector_get_outdev(Object *obj, Error **errp)
86
static inline void
87
igb_rx_fix_l4_csum(IGBCore *core, struct NetRxPkt *pkt)
90
{
88
{
91
MirrorState *s = FILTER_REDIRECTOR(obj);
89
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
92
@@ -XXX,XX +XXX,XX @@ static void filter_redirector_set_outdev(Object *obj,
90
93
91
ehdr = PKT_GET_ETH_HDR(filter_buf);
94
static void filter_mirror_init(Object *obj)
92
net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
95
{
93
-
96
+ MirrorState *s = FILTER_MIRROR(obj);
94
- net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
95
- e1000x_vlan_enabled(core->mac),
96
- core->mac[VET] & 0xffff);
97
+ net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
98
99
queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
100
if (!queues) {
101
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
102
return orig_size;
103
}
104
105
- total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
106
- e1000x_fcs_len(core->mac);
107
-
108
for (i = 0; i < IGB_NUM_QUEUES; i++) {
109
if (!(queues & BIT(i)) ||
110
!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
111
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
112
113
igb_rx_ring_init(core, &rxr, i);
114
115
+ net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
116
+ igb_rx_strip_vlan(core, rxr.i),
117
+ core->mac[VET] & 0xffff);
97
+
118
+
98
object_property_add_str(obj, "outdev", filter_mirror_get_outdev,
119
+ total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
99
filter_mirror_set_outdev, NULL);
120
+ e1000x_fcs_len(core->mac);
100
+
121
+
101
+ s->vnet_hdr = false;
122
if (!igb_has_rxbufs(core, rxr.i, total_size)) {
102
+ object_property_add_bool(obj, "vnet_hdr_support",
123
n |= E1000_ICS_RXO;
103
+ filter_mirror_get_vnet_hdr,
124
trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
104
+ filter_mirror_set_vnet_hdr, NULL);
105
}
106
107
static void filter_redirector_init(Object *obj)
108
diff --git a/qemu-options.hx b/qemu-options.hx
109
index XXXXXXX..XXXXXXX 100644
110
--- a/qemu-options.hx
111
+++ b/qemu-options.hx
112
@@ -XXX,XX +XXX,XX @@ queue @var{all|rx|tx} is an option that can be applied to any netfilter.
113
@option{tx}: the filter is attached to the transmit queue of the netdev,
114
where it will receive packets sent by the netdev.
115
116
-@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
117
+@item -object filter-mirror,id=@var{id},netdev=@var{netdevid},outdev=@var{chardevid},queue=@var{all|rx|tx}[,vnet_hdr_support]
118
119
-filter-mirror on netdev @var{netdevid},mirror net packet to chardev
120
-@var{chardevid}
121
+filter-mirror on netdev @var{netdevid},mirror net packet to chardev@var{chardevid}, if it has the vnet_hdr_support flag, filter-mirror will mirror packet with vnet_hdr_len.
122
123
@item -object filter-redirector,id=@var{id},netdev=@var{netdevid},indev=@var{chardevid},
124
outdev=@var{chardevid}[,queue=@var{all|rx|tx}]
125
--
125
--
126
2.7.4
126
2.7.4
127
128
diff view generated by jsdifflib
Deleted patch
1
From: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
2
1
3
Make colo-compare and filter-rewriter can parse vnet packet.
4
5
Signed-off-by: Zhang Chen <zhangchen.fnst@cn.fujitsu.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
net/colo.c | 6 +++---
9
1 file changed, 3 insertions(+), 3 deletions(-)
10
11
diff --git a/net/colo.c b/net/colo.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/net/colo.c
14
+++ b/net/colo.c
15
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
16
{
17
int network_length;
18
static const uint8_t vlan[] = {0x81, 0x00};
19
- uint8_t *data = pkt->data;
20
+ uint8_t *data = pkt->data + pkt->vnet_hdr_len;
21
uint16_t l3_proto;
22
ssize_t l2hdr_len = eth_get_l2_hdr_length(data);
23
24
- if (pkt->size < ETH_HLEN) {
25
+ if (pkt->size < ETH_HLEN + pkt->vnet_hdr_len) {
26
trace_colo_proxy_main("pkt->size < ETH_HLEN");
27
return 1;
28
}
29
@@ -XXX,XX +XXX,XX @@ int parse_packet_early(Packet *pkt)
30
}
31
32
network_length = pkt->ip->ip_hl * 4;
33
- if (pkt->size < l2hdr_len + network_length) {
34
+ if (pkt->size < l2hdr_len + network_length + pkt->vnet_hdr_len) {
35
trace_colo_proxy_main("pkt->size < network_header + network_length");
36
return 1;
37
}
38
--
39
2.7.4
40
41
diff view generated by jsdifflib