1
The following changes since commit e607bbee553cfe73072870cef458cfa4e78133e2:
1
The following changes since commit 886c0453cbf10eebd42a9ccf89c3e46eb389c357:
2
2
3
Merge remote-tracking branch 'remotes/edgar/tags/edgar/xilinx-next-2018-01-26.for-upstream' into staging (2018-01-26 14:24:25 +0000)
3
Merge tag 'pull-qapi-2023-05-17-v2' of https://repo.or.cz/qemu/armbru into staging (2023-05-22 15:54:21 -0700)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to bf4835a4d5338bb7424827715df22570a8adc67c:
9
for you to fetch changes up to 792676c165159c11412346870fd58fd243ab2166:
10
10
11
MAINTAINERS: update Dmitry Fleytman email (2018-01-29 16:05:38 +0800)
11
rtl8139: fix large_send_mss divide-by-zero (2023-05-23 15:20:15 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
----------------------------------------------------------------
15
----------------------------------------------------------------
16
Mao Zhongyi (2):
16
Akihiko Odaki (48):
17
colo: modified the payload compare function
17
hw/net/net_tx_pkt: Decouple implementation from PCI
18
colo: compare the packet based on the tcp sequence number
18
hw/net/net_tx_pkt: Decouple interface from PCI
19
e1000x: Fix BPRC and MPRC
20
igb: Fix Rx packet type encoding
21
igb: Do not require CTRL.VME for tx VLAN tagging
22
igb: Clear IMS bits when committing ICR access
23
net/net_rx_pkt: Use iovec for net_rx_pkt_set_protocols()
24
e1000e: Always copy ethernet header
25
igb: Always copy ethernet header
26
Fix references to igb Avocado test
27
tests/avocado: Remove unused imports
28
tests/avocado: Remove test_igb_nomsi_kvm
29
hw/net/net_tx_pkt: Remove net_rx_pkt_get_l4_info
30
net/eth: Rename eth_setup_vlan_headers_ex
31
e1000x: Share more Rx filtering logic
32
e1000x: Take CRC into consideration for size check
33
e1000x: Rename TcpIpv6 into TcpIpv6Ex
34
e1000e: Always log status after building rx metadata
35
igb: Always log status after building rx metadata
36
igb: Remove goto
37
igb: Read DCMD.VLE of the first Tx descriptor
38
e1000e: Reset packet state after emptying Tx queue
39
vmxnet3: Reset packet state after emptying Tx queue
40
igb: Add more definitions for Tx descriptor
41
igb: Share common VF constants
42
igb: Fix igb_mac_reg_init coding style alignment
43
igb: Clear EICR bits for delayed MSI-X interrupts
44
e1000e: Rename a variable in e1000e_receive_internal()
45
igb: Rename a variable in igb_receive_internal()
46
net/eth: Use void pointers
47
net/eth: Always add VLAN tag
48
hw/net/net_rx_pkt: Enforce alignment for eth_header
49
tests/qtest/libqos/igb: Set GPIE.Multiple_MSIX
50
igb: Implement MSI-X single vector mode
51
igb: Use UDP for RSS hash
52
igb: Implement Rx SCTP CSO
53
igb: Implement Tx SCTP CSO
54
igb: Strip the second VLAN tag for extended VLAN
55
igb: Filter with the second VLAN tag for extended VLAN
56
igb: Implement igb-specific oversize check
57
igb: Implement Rx PTP2 timestamp
58
igb: Implement Tx timestamp
59
e1000e: Notify only new interrupts
60
igb: Notify only new interrupts
61
igb: Clear-on-read ICR when ICR.INTA is set
62
vmxnet3: Do not depend on PC
63
MAINTAINERS: Add a reviewer for network packet abstractions
64
docs/system/devices/igb: Note igb is tested for DPDK
19
65
20
Philippe Mathieu-Daudé (1):
66
Stefan Hajnoczi (1):
21
MAINTAINERS: update Dmitry Fleytman email
67
rtl8139: fix large_send_mss divide-by-zero
22
68
23
Thomas Huth (3):
69
timothee.cocault@gmail.com (1):
24
net: Allow hubports to connect to other netdevs
70
e1000e: Fix tx/rx counters
25
net: Allow netdevs to be used with 'hostfwd_add' and 'hostfwd_remove'
26
qemu-doc: Get rid of "vlan=X" example in the documentation
27
71
28
MAINTAINERS | 8 +-
72
MAINTAINERS | 3 +-
29
hmp-commands.hx | 4 +-
73
docs/system/devices/igb.rst | 14 +-
30
net/colo-compare.c | 411 +++++++++++++++++++++++++++++++++--------------------
74
hw/net/Kconfig | 2 +-
31
net/colo.c | 9 ++
75
hw/net/e1000.c | 46 +-
32
net/colo.h | 15 ++
76
hw/net/e1000e_core.c | 297 ++++-----
33
net/hub.c | 27 +++-
77
hw/net/e1000e_core.h | 2 -
34
net/hub.h | 3 +-
78
hw/net/e1000x_common.c | 82 ++-
35
net/net.c | 2 +-
79
hw/net/e1000x_common.h | 9 +-
36
net/slirp.c | 33 +++--
80
hw/net/e1000x_regs.h | 24 +-
37
net/trace-events | 2 +-
81
hw/net/igb.c | 10 +-
38
qapi/net.json | 4 +-
82
hw/net/igb_common.h | 24 +-
39
qemu-options.hx | 12 +-
83
hw/net/igb_core.c | 722 +++++++++++----------
40
12 files changed, 347 insertions(+), 183 deletions(-)
84
hw/net/igb_regs.h | 67 +-
41
85
hw/net/igbvf.c | 7 -
42
86
hw/net/net_rx_pkt.c | 107 +--
87
hw/net/net_rx_pkt.h | 38 +-
88
hw/net/net_tx_pkt.c | 101 +--
89
hw/net/net_tx_pkt.h | 46 +-
90
hw/net/rtl8139.c | 3 +
91
hw/net/trace-events | 19 +-
92
hw/net/virtio-net.c | 7 +-
93
hw/net/vmxnet3.c | 22 +-
94
include/net/eth.h | 29 +-
95
include/qemu/crc32c.h | 1 +
96
net/eth.c | 100 +--
97
scripts/ci/org.centos/stream/8/x86_64/test-avocado | 3 +-
98
tests/avocado/netdev-ethtool.py | 17 +-
99
tests/qtest/libqos/igb.c | 1 +
100
util/crc32c.c | 8 +
101
29 files changed, 979 insertions(+), 832 deletions(-)
diff view generated by jsdifflib
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
1
From: "timothee.cocault@gmail.com" <timothee.cocault@gmail.com>
2
2
3
Modified the function colo_packet_compare_common to prepare for the
3
The bytes and packets counter registers are cleared on read.
4
tcp packet comparison in the next patch.
5
4
6
Cc: Zhang Chen <zhangckid@gmail.com>
5
Copying the "total counter" registers to the "good counter" registers has
7
Cc: Li Zhijian <lizhijian@cn.fujitsu.com>
6
side effects.
8
Cc: Jason Wang <jasowang@redhat.com>
7
If the "total" register is never read by the OS, it only gets incremented.
8
This leads to exponential growth of the "good" register.
9
9
10
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
10
This commit increments the counters individually to avoid this.
11
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
11
12
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
12
Signed-off-by: Timothée Cocault <timothee.cocault@gmail.com>
13
Reviewed-by: Zhang Chen <zhangckid@gmail.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
14
---
16
net/colo-compare.c | 88 +++++++++++++++++++++++++++---------------------------
15
hw/net/e1000.c | 5 ++---
17
1 file changed, 44 insertions(+), 44 deletions(-)
16
hw/net/e1000e_core.c | 5 ++---
17
hw/net/e1000x_common.c | 5 ++---
18
hw/net/igb_core.c | 5 ++---
19
4 files changed, 8 insertions(+), 12 deletions(-)
18
20
19
diff --git a/net/colo-compare.c b/net/colo-compare.c
21
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
20
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
21
--- a/net/colo-compare.c
23
--- a/hw/net/e1000.c
22
+++ b/net/colo-compare.c
24
+++ b/hw/net/e1000.c
23
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
25
@@ -XXX,XX +XXX,XX @@ xmit_seg(E1000State *s)
24
* return: 0 means packet same
26
25
* > 0 || < 0 means packet different
27
e1000x_inc_reg_if_not_full(s->mac_reg, TPT);
26
*/
28
e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4);
27
-static int colo_packet_compare_common(Packet *ppkt,
29
- s->mac_reg[GPTC] = s->mac_reg[TPT];
28
- Packet *spkt,
30
- s->mac_reg[GOTCL] = s->mac_reg[TOTL];
29
- int poffset,
31
- s->mac_reg[GOTCH] = s->mac_reg[TOTH];
30
- int soffset)
32
+ e1000x_inc_reg_if_not_full(s->mac_reg, GPTC);
31
+static int colo_compare_packet_payload(Packet *ppkt,
33
+ e1000x_grow_8reg_if_not_full(s->mac_reg, GOTCL, s->tx.size + 4);
32
+ Packet *spkt,
34
}
33
+ uint16_t poffset,
35
34
+ uint16_t soffset,
36
static void
35
+ uint16_t len)
37
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
36
+
38
index XXXXXXX..XXXXXXX 100644
37
{
39
--- a/hw/net/e1000e_core.c
38
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
40
+++ b/hw/net/e1000e_core.c
39
char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
41
@@ -XXX,XX +XXX,XX @@ e1000e_on_tx_done_update_stats(E1000ECore *core, struct NetTxPkt *tx_pkt)
40
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt,
42
g_assert_not_reached();
41
sec_ip_src, sec_ip_dst);
42
}
43
}
43
44
44
- poffset = ppkt->vnet_hdr_len + poffset;
45
- core->mac[GPTC] = core->mac[TPT];
45
- soffset = ppkt->vnet_hdr_len + soffset;
46
- core->mac[GOTCL] = core->mac[TOTL];
46
-
47
- core->mac[GOTCH] = core->mac[TOTH];
47
- if (ppkt->size - poffset == spkt->size - soffset) {
48
+ e1000x_inc_reg_if_not_full(core->mac, GPTC);
48
- return memcmp(ppkt->data + poffset,
49
+ e1000x_grow_8reg_if_not_full(core->mac, GOTCL, tot_len);
49
- spkt->data + soffset,
50
- spkt->size - soffset);
51
- } else {
52
- trace_colo_compare_main("Net packet size are not the same");
53
- return -1;
54
- }
55
+ return memcmp(ppkt->data + poffset, spkt->data + soffset, len);
56
}
50
}
57
51
58
/*
52
static void
59
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
53
diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c
60
* the secondary guest's timestamp. COLO just focus on payload,
54
index XXXXXXX..XXXXXXX 100644
61
* so we just need skip this field.
55
--- a/hw/net/e1000x_common.c
62
*/
56
+++ b/hw/net/e1000x_common.c
63
- if (ptcp->th_off > 5) {
57
@@ -XXX,XX +XXX,XX @@ e1000x_update_rx_total_stats(uint32_t *mac,
64
- ptrdiff_t ptcp_offset, stcp_offset;
58
65
59
e1000x_increase_size_stats(mac, PRCregs, data_fcs_size);
66
- ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
60
e1000x_inc_reg_if_not_full(mac, TPR);
67
- + (ptcp->th_off * 4) - ppkt->vnet_hdr_len;
61
- mac[GPRC] = mac[TPR];
68
- stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
62
+ e1000x_inc_reg_if_not_full(mac, GPRC);
69
- + (stcp->th_off * 4) - spkt->vnet_hdr_len;
63
/* TOR - Total Octets Received:
70
+ ptrdiff_t ptcp_offset, stcp_offset;
64
* This register includes bytes received in a packet from the <Destination
71
65
* Address> field through the <CRC> field, inclusively.
72
- /*
66
* Always include FCS length (4) in size.
73
- * When network is busy, some tcp options(like sack) will unpredictable
67
*/
74
- * occur in primary side or secondary side. it will make packet size
68
e1000x_grow_8reg_if_not_full(mac, TORL, data_size + 4);
75
- * not same, but the two packet's payload is identical. colo just
69
- mac[GORCL] = mac[TORL];
76
- * care about packet payload, so we skip the option field.
70
- mac[GORCH] = mac[TORH];
77
- */
71
+ e1000x_grow_8reg_if_not_full(mac, GORCL, data_size + 4);
78
- res = colo_packet_compare_common(ppkt, spkt, ptcp_offset, stcp_offset);
72
}
79
- } else if (ptcp->th_sum == stcp->th_sum) {
73
80
- res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN, ETH_HLEN);
74
void
81
+ ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
75
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
82
+ + (ptcp->th_off << 2) - ppkt->vnet_hdr_len;
76
index XXXXXXX..XXXXXXX 100644
83
+ stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
77
--- a/hw/net/igb_core.c
84
+ + (stcp->th_off << 2) - spkt->vnet_hdr_len;
78
+++ b/hw/net/igb_core.c
85
+ if (ppkt->size - ptcp_offset == spkt->size - stcp_offset) {
79
@@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
86
+ res = colo_compare_packet_payload(ppkt, spkt,
80
g_assert_not_reached();
87
+ ptcp_offset, stcp_offset,
88
+ ppkt->size - ptcp_offset);
89
} else {
90
+ trace_colo_compare_main("TCP: payload size of packets are different");
91
res = -1;
92
}
81
}
93
82
94
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
83
- core->mac[GPTC] = core->mac[TPT];
95
*/
84
- core->mac[GOTCL] = core->mac[TOTL];
96
static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
85
- core->mac[GOTCH] = core->mac[TOTH];
97
{
86
+ e1000x_inc_reg_if_not_full(core->mac, GPTC);
98
- int ret;
87
+ e1000x_grow_8reg_if_not_full(core->mac, GOTCL, tot_len);
99
- int network_header_length = ppkt->ip->ip_hl * 4;
88
100
+ uint16_t network_header_length = ppkt->ip->ip_hl << 2;
89
if (core->mac[MRQC] & 1) {
101
+ uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
90
uint16_t pool = qn % IGB_NUM_VM_POOLS;
102
103
trace_colo_compare_main("compare udp");
104
105
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
106
* other field like TOS,TTL,IP Checksum. we only need to compare
107
* the ip payload here.
108
*/
109
- ret = colo_packet_compare_common(ppkt, spkt,
110
- network_header_length + ETH_HLEN,
111
- network_header_length + ETH_HLEN);
112
-
113
- if (ret) {
114
+ if (ppkt->size != spkt->size) {
115
+ trace_colo_compare_main("UDP: payload size of packets are different");
116
+ return -1;
117
+ }
118
+ if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
119
+ ppkt->size - offset)) {
120
trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size);
121
trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size);
122
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
123
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
124
qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt",
125
spkt->size);
126
}
127
+ return -1;
128
+ } else {
129
+ return 0;
130
}
131
-
132
- return ret;
133
}
134
135
/*
136
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt)
137
*/
138
static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
139
{
140
- int network_header_length = ppkt->ip->ip_hl * 4;
141
+ uint16_t network_header_length = ppkt->ip->ip_hl << 2;
142
+ uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len;
143
144
trace_colo_compare_main("compare icmp");
145
146
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
147
* other field like TOS,TTL,IP Checksum. we only need to compare
148
* the ip payload here.
149
*/
150
- if (colo_packet_compare_common(ppkt, spkt,
151
- network_header_length + ETH_HLEN,
152
- network_header_length + ETH_HLEN)) {
153
+ if (ppkt->size != spkt->size) {
154
+ trace_colo_compare_main("ICMP: payload size of packets are different");
155
+ return -1;
156
+ }
157
+ if (colo_compare_packet_payload(ppkt, spkt, offset, offset,
158
+ ppkt->size - offset)) {
159
trace_colo_compare_icmp_miscompare("primary pkt size",
160
ppkt->size);
161
trace_colo_compare_icmp_miscompare("Secondary pkt size",
162
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt)
163
*/
164
static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
165
{
166
+ uint16_t offset = ppkt->vnet_hdr_len;
167
+
168
trace_colo_compare_main("compare other");
169
if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
170
char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
171
@@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_other(Packet *spkt, Packet *ppkt)
172
sec_ip_src, sec_ip_dst);
173
}
174
175
- return colo_packet_compare_common(ppkt, spkt, 0, 0);
176
+ if (ppkt->size != spkt->size) {
177
+ trace_colo_compare_main("Other: payload size of packets are different");
178
+ return -1;
179
+ }
180
+ return colo_compare_packet_payload(ppkt, spkt, offset, offset,
181
+ ppkt->size - offset);
182
}
183
184
static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time)
185
--
91
--
186
2.7.4
92
2.7.4
187
93
188
94
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
This is intended to be followed by another change for the interface.
4
It also fixes the leak of memory mapping when the specified memory is
5
partially mapped.
6
7
Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices")
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/net/net_tx_pkt.c | 53 +++++++++++++++++++++++++++++++++--------------------
12
hw/net/net_tx_pkt.h | 9 +++++++++
13
2 files changed, 42 insertions(+), 20 deletions(-)
14
15
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/net_tx_pkt.c
18
+++ b/hw/net/net_tx_pkt.c
19
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt,
20
}
21
}
22
23
-bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa,
24
- size_t len)
25
+static bool net_tx_pkt_add_raw_fragment_common(struct NetTxPkt *pkt,
26
+ void *base, size_t len)
27
{
28
- hwaddr mapped_len = 0;
29
struct iovec *ventry;
30
assert(pkt);
31
32
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa,
33
return false;
34
}
35
36
- if (!len) {
37
- return true;
38
- }
39
-
40
ventry = &pkt->raw[pkt->raw_frags];
41
- mapped_len = len;
42
+ ventry->iov_base = base;
43
+ ventry->iov_len = len;
44
+ pkt->raw_frags++;
45
46
- ventry->iov_base = pci_dma_map(pkt->pci_dev, pa,
47
- &mapped_len, DMA_DIRECTION_TO_DEVICE);
48
-
49
- if ((ventry->iov_base != NULL) && (len == mapped_len)) {
50
- ventry->iov_len = mapped_len;
51
- pkt->raw_frags++;
52
- return true;
53
- } else {
54
- return false;
55
- }
56
+ return true;
57
}
58
59
bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt)
60
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
61
assert(pkt->raw);
62
for (i = 0; i < pkt->raw_frags; i++) {
63
assert(pkt->raw[i].iov_base);
64
- pci_dma_unmap(pkt->pci_dev, pkt->raw[i].iov_base,
65
- pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0);
66
+ net_tx_pkt_unmap_frag_pci(pkt->pci_dev,
67
+ pkt->raw[i].iov_base,
68
+ pkt->raw[i].iov_len);
69
}
70
}
71
pkt->pci_dev = pci_dev;
72
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
73
pkt->l4proto = 0;
74
}
75
76
+void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len)
77
+{
78
+ pci_dma_unmap(context, base, len, DMA_DIRECTION_TO_DEVICE, 0);
79
+}
80
+
81
+bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa,
82
+ size_t len)
83
+{
84
+ dma_addr_t mapped_len = len;
85
+ void *base = pci_dma_map(pkt->pci_dev, pa, &mapped_len,
86
+ DMA_DIRECTION_TO_DEVICE);
87
+ if (!base) {
88
+ return false;
89
+ }
90
+
91
+ if (mapped_len != len ||
92
+ !net_tx_pkt_add_raw_fragment_common(pkt, base, len)) {
93
+ net_tx_pkt_unmap_frag_pci(pkt->pci_dev, base, mapped_len);
94
+ return false;
95
+ }
96
+
97
+ return true;
98
+}
99
+
100
static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt,
101
struct iovec *iov, uint32_t iov_len,
102
uint16_t csl)
103
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
104
index XXXXXXX..XXXXXXX 100644
105
--- a/hw/net/net_tx_pkt.h
106
+++ b/hw/net/net_tx_pkt.h
107
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt);
108
void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev);
109
110
/**
111
+ * Unmap a fragment mapped from a PCI device.
112
+ *
113
+ * @context: PCI device owning fragment
114
+ * @base: pointer to fragment
115
+ * @len: length of fragment
116
+ */
117
+void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len);
118
+
119
+/**
120
* Send packet to qemu. handles sw offloads if vhdr is not supported.
121
*
122
* @pkt: packet
123
--
124
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
This allows to use the network packet abstractions even if PCI is not
4
used.
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
hw/net/e1000e_core.c | 13 ++++++++-----
10
hw/net/igb_core.c | 13 ++++++-------
11
hw/net/net_tx_pkt.c | 36 +++++++++++++-----------------------
12
hw/net/net_tx_pkt.h | 31 ++++++++++++++++++++-----------
13
hw/net/vmxnet3.c | 14 +++++++-------
14
5 files changed, 54 insertions(+), 53 deletions(-)
15
16
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/net/e1000e_core.c
19
+++ b/hw/net/e1000e_core.c
20
@@ -XXX,XX +XXX,XX @@ e1000e_process_tx_desc(E1000ECore *core,
21
addr = le64_to_cpu(dp->buffer_addr);
22
23
if (!tx->skip_cp) {
24
- if (!net_tx_pkt_add_raw_fragment(tx->tx_pkt, addr, split_size)) {
25
+ if (!net_tx_pkt_add_raw_fragment_pci(tx->tx_pkt, core->owner,
26
+ addr, split_size)) {
27
tx->skip_cp = true;
28
}
29
}
30
@@ -XXX,XX +XXX,XX @@ e1000e_process_tx_desc(E1000ECore *core,
31
}
32
33
tx->skip_cp = false;
34
- net_tx_pkt_reset(tx->tx_pkt, core->owner);
35
+ net_tx_pkt_reset(tx->tx_pkt, net_tx_pkt_unmap_frag_pci, core->owner);
36
37
tx->sum_needed = 0;
38
tx->cptse = 0;
39
@@ -XXX,XX +XXX,XX @@ e1000e_core_pci_realize(E1000ECore *core,
40
qemu_add_vm_change_state_handler(e1000e_vm_state_change, core);
41
42
for (i = 0; i < E1000E_NUM_QUEUES; i++) {
43
- net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS);
44
+ net_tx_pkt_init(&core->tx[i].tx_pkt, E1000E_MAX_TX_FRAGS);
45
}
46
47
net_rx_pkt_init(&core->rx_pkt);
48
@@ -XXX,XX +XXX,XX @@ e1000e_core_pci_uninit(E1000ECore *core)
49
qemu_del_vm_change_state_handler(core->vmstate);
50
51
for (i = 0; i < E1000E_NUM_QUEUES; i++) {
52
- net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
53
+ net_tx_pkt_reset(core->tx[i].tx_pkt,
54
+ net_tx_pkt_unmap_frag_pci, core->owner);
55
net_tx_pkt_uninit(core->tx[i].tx_pkt);
56
}
57
58
@@ -XXX,XX +XXX,XX @@ static void e1000e_reset(E1000ECore *core, bool sw)
59
e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
60
61
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
62
- net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
63
+ net_tx_pkt_reset(core->tx[i].tx_pkt,
64
+ net_tx_pkt_unmap_frag_pci, core->owner);
65
memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
66
core->tx[i].skip_cp = false;
67
}
68
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/hw/net/igb_core.c
71
+++ b/hw/net/igb_core.c
72
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
73
length = cmd_type_len & 0xFFFF;
74
75
if (!tx->skip_cp) {
76
- if (!net_tx_pkt_add_raw_fragment(tx->tx_pkt, buffer_addr, length)) {
77
+ if (!net_tx_pkt_add_raw_fragment_pci(tx->tx_pkt, dev,
78
+ buffer_addr, length)) {
79
tx->skip_cp = true;
80
}
81
}
82
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
83
84
tx->first = true;
85
tx->skip_cp = false;
86
- net_tx_pkt_reset(tx->tx_pkt, dev);
87
+ net_tx_pkt_reset(tx->tx_pkt, net_tx_pkt_unmap_frag_pci, dev);
88
}
89
}
90
91
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
92
d = core->owner;
93
}
94
95
- net_tx_pkt_reset(txr->tx->tx_pkt, d);
96
-
97
while (!igb_ring_empty(core, txi)) {
98
base = igb_ring_head_descr(core, txi);
99
100
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
101
core->mac[EICR] |= eic;
102
igb_set_interrupt_cause(core, E1000_ICR_TXDW);
103
}
104
+
105
+ net_tx_pkt_reset(txr->tx->tx_pkt, net_tx_pkt_unmap_frag_pci, d);
106
}
107
108
static uint32_t
109
@@ -XXX,XX +XXX,XX @@ igb_core_pci_realize(IGBCore *core,
110
core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core);
111
112
for (i = 0; i < IGB_NUM_QUEUES; i++) {
113
- net_tx_pkt_init(&core->tx[i].tx_pkt, NULL, E1000E_MAX_TX_FRAGS);
114
+ net_tx_pkt_init(&core->tx[i].tx_pkt, E1000E_MAX_TX_FRAGS);
115
}
116
117
net_rx_pkt_init(&core->rx_pkt);
118
@@ -XXX,XX +XXX,XX @@ igb_core_pci_uninit(IGBCore *core)
119
qemu_del_vm_change_state_handler(core->vmstate);
120
121
for (i = 0; i < IGB_NUM_QUEUES; i++) {
122
- net_tx_pkt_reset(core->tx[i].tx_pkt, NULL);
123
net_tx_pkt_uninit(core->tx[i].tx_pkt);
124
}
125
126
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
127
128
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
129
tx = &core->tx[i];
130
- net_tx_pkt_reset(tx->tx_pkt, NULL);
131
memset(tx->ctx, 0, sizeof(tx->ctx));
132
tx->first = true;
133
tx->skip_cp = false;
134
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
135
index XXXXXXX..XXXXXXX 100644
136
--- a/hw/net/net_tx_pkt.c
137
+++ b/hw/net/net_tx_pkt.c
138
@@ -XXX,XX +XXX,XX @@
139
*/
140
141
#include "qemu/osdep.h"
142
-#include "net_tx_pkt.h"
143
#include "net/eth.h"
144
#include "net/checksum.h"
145
#include "net/tap.h"
146
#include "net/net.h"
147
#include "hw/pci/pci_device.h"
148
+#include "net_tx_pkt.h"
149
150
enum {
151
NET_TX_PKT_VHDR_FRAG = 0,
152
@@ -XXX,XX +XXX,XX @@ enum {
153
154
/* TX packet private context */
155
struct NetTxPkt {
156
- PCIDevice *pci_dev;
157
-
158
struct virtio_net_hdr virt_hdr;
159
160
struct iovec *raw;
161
@@ -XXX,XX +XXX,XX @@ struct NetTxPkt {
162
uint8_t l4proto;
163
};
164
165
-void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev,
166
- uint32_t max_frags)
167
+void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags)
168
{
169
struct NetTxPkt *p = g_malloc0(sizeof *p);
170
171
- p->pci_dev = pci_dev;
172
-
173
p->vec = g_new(struct iovec, max_frags + NET_TX_PKT_PL_START_FRAG);
174
175
p->raw = g_new(struct iovec, max_frags);
176
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt,
177
}
178
}
179
180
-static bool net_tx_pkt_add_raw_fragment_common(struct NetTxPkt *pkt,
181
- void *base, size_t len)
182
+bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t len)
183
{
184
struct iovec *ventry;
185
assert(pkt);
186
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt)
187
#endif
188
}
189
190
-void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
191
+void net_tx_pkt_reset(struct NetTxPkt *pkt,
192
+ NetTxPktFreeFrag callback, void *context)
193
{
194
int i;
195
196
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
197
assert(pkt->raw);
198
for (i = 0; i < pkt->raw_frags; i++) {
199
assert(pkt->raw[i].iov_base);
200
- net_tx_pkt_unmap_frag_pci(pkt->pci_dev,
201
- pkt->raw[i].iov_base,
202
- pkt->raw[i].iov_len);
203
+ callback(context, pkt->raw[i].iov_base, pkt->raw[i].iov_len);
204
}
205
}
206
- pkt->pci_dev = pci_dev;
207
pkt->raw_frags = 0;
208
209
pkt->hdr_len = 0;
210
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len)
211
pci_dma_unmap(context, base, len, DMA_DIRECTION_TO_DEVICE, 0);
212
}
213
214
-bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa,
215
- size_t len)
216
+bool net_tx_pkt_add_raw_fragment_pci(struct NetTxPkt *pkt, PCIDevice *pci_dev,
217
+ dma_addr_t pa, size_t len)
218
{
219
dma_addr_t mapped_len = len;
220
- void *base = pci_dma_map(pkt->pci_dev, pa, &mapped_len,
221
- DMA_DIRECTION_TO_DEVICE);
222
+ void *base = pci_dma_map(pci_dev, pa, &mapped_len, DMA_DIRECTION_TO_DEVICE);
223
if (!base) {
224
return false;
225
}
226
227
- if (mapped_len != len ||
228
- !net_tx_pkt_add_raw_fragment_common(pkt, base, len)) {
229
- net_tx_pkt_unmap_frag_pci(pkt->pci_dev, base, mapped_len);
230
+ if (mapped_len != len || !net_tx_pkt_add_raw_fragment(pkt, base, len)) {
231
+ net_tx_pkt_unmap_frag_pci(pci_dev, base, mapped_len);
232
return false;
233
}
234
235
@@ -XXX,XX +XXX,XX @@ static void net_tx_pkt_udp_fragment_fix(struct NetTxPkt *pkt,
236
}
237
238
static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt,
239
- NetTxPktCallback callback,
240
+ NetTxPktSend callback,
241
void *context)
242
{
243
uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
244
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc)
245
}
246
247
bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
248
- NetTxPktCallback callback, void *context)
249
+ NetTxPktSend callback, void *context)
250
{
251
assert(pkt);
252
253
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
254
index XXXXXXX..XXXXXXX 100644
255
--- a/hw/net/net_tx_pkt.h
256
+++ b/hw/net/net_tx_pkt.h
257
@@ -XXX,XX +XXX,XX @@
258
259
struct NetTxPkt;
260
261
-typedef void (* NetTxPktCallback)(void *, const struct iovec *, int, const struct iovec *, int);
262
+typedef void (*NetTxPktFreeFrag)(void *, void *, size_t);
263
+typedef void (*NetTxPktSend)(void *, const struct iovec *, int, const struct iovec *, int);
264
265
/**
266
* Init function for tx packet functionality
267
*
268
* @pkt: packet pointer
269
- * @pci_dev: PCI device processing this packet
270
* @max_frags: max tx ip fragments
271
*/
272
-void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev,
273
- uint32_t max_frags);
274
+void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags);
275
276
/**
277
* Clean all tx packet resources.
278
@@ -XXX,XX +XXX,XX @@ net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan)
279
* populate data fragment into pkt context.
280
*
281
* @pkt: packet
282
- * @pa: physical address of fragment
283
+ * @base: pointer to fragment
284
* @len: length of fragment
285
*
286
*/
287
-bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa,
288
- size_t len);
289
+bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t len);
290
291
/**
292
* Fix ip header fields and calculate IP header and pseudo header checksums.
293
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt);
294
* reset tx packet private context (needed to be called between packets)
295
*
296
* @pkt: packet
297
- * @dev: PCI device processing the next packet
298
- *
299
+ * @callback: function to free the fragments
300
+ * @context: pointer to be passed to the callback
301
*/
302
-void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev);
303
+void net_tx_pkt_reset(struct NetTxPkt *pkt,
304
+ NetTxPktFreeFrag callback, void *context);
305
306
/**
307
* Unmap a fragment mapped from a PCI device.
308
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev);
309
void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len);
310
311
/**
312
+ * map data fragment from PCI device and populate it into pkt context.
313
+ *
314
+ * @pci_dev: PCI device owning fragment
315
+ * @pa: physical address of fragment
316
+ * @len: length of fragment
317
+ */
318
+bool net_tx_pkt_add_raw_fragment_pci(struct NetTxPkt *pkt, PCIDevice *pci_dev,
319
+ dma_addr_t pa, size_t len);
320
+
321
+/**
322
* Send packet to qemu. handles sw offloads if vhdr is not supported.
323
*
324
* @pkt: packet
325
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc);
326
* @ret: operation result
327
*/
328
bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
329
- NetTxPktCallback callback, void *context);
330
+ NetTxPktSend callback, void *context);
331
332
/**
333
* parse raw packet data and analyze offload requirements.
334
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
335
index XXXXXXX..XXXXXXX 100644
336
--- a/hw/net/vmxnet3.c
337
+++ b/hw/net/vmxnet3.c
338
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
339
data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE;
340
data_pa = txd.addr;
341
342
- if (!net_tx_pkt_add_raw_fragment(s->tx_pkt,
343
- data_pa,
344
- data_len)) {
345
+ if (!net_tx_pkt_add_raw_fragment_pci(s->tx_pkt, PCI_DEVICE(s),
346
+ data_pa, data_len)) {
347
s->skip_current_tx_pkt = true;
348
}
349
}
350
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
351
vmxnet3_complete_packet(s, qidx, txd_idx);
352
s->tx_sop = true;
353
s->skip_current_tx_pkt = false;
354
- net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
355
+ net_tx_pkt_reset(s->tx_pkt,
356
+ net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
357
}
358
}
359
}
360
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_deactivate_device(VMXNET3State *s)
361
{
362
if (s->device_active) {
363
VMW_CBPRN("Deactivating vmxnet3...");
364
- net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
365
+ net_tx_pkt_reset(s->tx_pkt, net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
366
net_tx_pkt_uninit(s->tx_pkt);
367
net_rx_pkt_uninit(s->rx_pkt);
368
s->device_active = false;
369
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_activate_device(VMXNET3State *s)
370
371
/* Preallocate TX packet wrapper */
372
VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags);
373
- net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), s->max_tx_frags);
374
+ net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags);
375
net_rx_pkt_init(&s->rx_pkt);
376
377
/* Read rings memory locations for RX queues */
378
@@ -XXX,XX +XXX,XX @@ static int vmxnet3_post_load(void *opaque, int version_id)
379
{
380
VMXNET3State *s = opaque;
381
382
- net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), s->max_tx_frags);
383
+ net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags);
384
net_rx_pkt_init(&s->rx_pkt);
385
386
if (s->msix_used) {
387
--
388
2.7.4
diff view generated by jsdifflib
New patch
1
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
3
Before this change, e1000 and the common code updated BPRC and MPRC
4
depending on the matched filter, but e1000e and igb decided to update
5
those counters by deriving the packet type independently. This
6
inconsistency caused a multicast packet to be counted twice.
7
8
Updating BPRC and MPRC depending on are fundamentally flawed anyway as
9
a filter can be used for different types of packets. For example, it is
10
possible to filter broadcast packets with MTA.
11
12
Always determine what counters to update by inspecting the packets.
13
14
Fixes: 3b27430177 ("e1000: Implementing various counters")
15
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
16
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
17
Signed-off-by: Jason Wang <jasowang@redhat.com>
18
---
19
hw/net/e1000.c | 6 +++---
20
hw/net/e1000e_core.c | 20 +++-----------------
21
hw/net/e1000x_common.c | 25 +++++++++++++++++++------
22
hw/net/e1000x_common.h | 5 +++--
23
hw/net/igb_core.c | 22 +++++-----------------
24
5 files changed, 33 insertions(+), 45 deletions(-)
25
26
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
27
index XXXXXXX..XXXXXXX 100644
28
--- a/hw/net/e1000.c
29
+++ b/hw/net/e1000.c
30
@@ -XXX,XX +XXX,XX @@ receive_filter(E1000State *s, const uint8_t *buf, int size)
31
}
32
33
if (ismcast && (rctl & E1000_RCTL_MPE)) { /* promiscuous mcast */
34
- e1000x_inc_reg_if_not_full(s->mac_reg, MPRC);
35
return 1;
36
}
37
38
if (isbcast && (rctl & E1000_RCTL_BAM)) { /* broadcast enabled */
39
- e1000x_inc_reg_if_not_full(s->mac_reg, BPRC);
40
return 1;
41
}
42
43
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
44
size_t desc_offset;
45
size_t desc_size;
46
size_t total_size;
47
+ eth_pkt_types_e pkt_type;
48
49
if (!e1000x_hw_rx_enabled(s->mac_reg)) {
50
return -1;
51
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
52
size -= 4;
53
}
54
55
+ pkt_type = get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf));
56
rdh_start = s->mac_reg[RDH];
57
desc_offset = 0;
58
total_size = size + e1000x_fcs_len(s->mac_reg);
59
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
60
}
61
} while (desc_offset < total_size);
62
63
- e1000x_update_rx_total_stats(s->mac_reg, size, total_size);
64
+ e1000x_update_rx_total_stats(s->mac_reg, pkt_type, size, total_size);
65
66
n = E1000_ICS_RXT0;
67
if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH])
68
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
69
index XXXXXXX..XXXXXXX 100644
70
--- a/hw/net/e1000e_core.c
71
+++ b/hw/net/e1000e_core.c
72
@@ -XXX,XX +XXX,XX @@ e1000e_write_to_rx_buffers(E1000ECore *core,
73
}
74
75
static void
76
-e1000e_update_rx_stats(E1000ECore *core,
77
- size_t data_size,
78
- size_t data_fcs_size)
79
+e1000e_update_rx_stats(E1000ECore *core, size_t pkt_size, size_t pkt_fcs_size)
80
{
81
- e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
82
-
83
- switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
84
- case ETH_PKT_BCAST:
85
- e1000x_inc_reg_if_not_full(core->mac, BPRC);
86
- break;
87
-
88
- case ETH_PKT_MCAST:
89
- e1000x_inc_reg_if_not_full(core->mac, MPRC);
90
- break;
91
-
92
- default:
93
- break;
94
- }
95
+ eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt);
96
+ e1000x_update_rx_total_stats(core->mac, pkt_type, pkt_size, pkt_fcs_size);
97
}
98
99
static inline bool
100
diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c
101
index XXXXXXX..XXXXXXX 100644
102
--- a/hw/net/e1000x_common.c
103
+++ b/hw/net/e1000x_common.c
104
@@ -XXX,XX +XXX,XX @@ bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf)
105
f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
106
f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
107
if (mac[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
108
- e1000x_inc_reg_if_not_full(mac, MPRC);
109
return true;
110
}
111
112
@@ -XXX,XX +XXX,XX @@ e1000x_rxbufsize(uint32_t rctl)
113
114
void
115
e1000x_update_rx_total_stats(uint32_t *mac,
116
- size_t data_size,
117
- size_t data_fcs_size)
118
+ eth_pkt_types_e pkt_type,
119
+ size_t pkt_size,
120
+ size_t pkt_fcs_size)
121
{
122
static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511,
123
PRC1023, PRC1522 };
124
125
- e1000x_increase_size_stats(mac, PRCregs, data_fcs_size);
126
+ e1000x_increase_size_stats(mac, PRCregs, pkt_fcs_size);
127
e1000x_inc_reg_if_not_full(mac, TPR);
128
e1000x_inc_reg_if_not_full(mac, GPRC);
129
/* TOR - Total Octets Received:
130
@@ -XXX,XX +XXX,XX @@ e1000x_update_rx_total_stats(uint32_t *mac,
131
* Address> field through the <CRC> field, inclusively.
132
* Always include FCS length (4) in size.
133
*/
134
- e1000x_grow_8reg_if_not_full(mac, TORL, data_size + 4);
135
- e1000x_grow_8reg_if_not_full(mac, GORCL, data_size + 4);
136
+ e1000x_grow_8reg_if_not_full(mac, TORL, pkt_size + 4);
137
+ e1000x_grow_8reg_if_not_full(mac, GORCL, pkt_size + 4);
138
+
139
+ switch (pkt_type) {
140
+ case ETH_PKT_BCAST:
141
+ e1000x_inc_reg_if_not_full(mac, BPRC);
142
+ break;
143
+
144
+ case ETH_PKT_MCAST:
145
+ e1000x_inc_reg_if_not_full(mac, MPRC);
146
+ break;
147
+
148
+ default:
149
+ break;
150
+ }
151
}
152
153
void
154
diff --git a/hw/net/e1000x_common.h b/hw/net/e1000x_common.h
155
index XXXXXXX..XXXXXXX 100644
156
--- a/hw/net/e1000x_common.h
157
+++ b/hw/net/e1000x_common.h
158
@@ -XXX,XX +XXX,XX @@ e1000x_update_regs_on_link_up(uint32_t *mac, uint16_t *phy)
159
}
160
161
void e1000x_update_rx_total_stats(uint32_t *mac,
162
- size_t data_size,
163
- size_t data_fcs_size);
164
+ eth_pkt_types_e pkt_type,
165
+ size_t pkt_size,
166
+ size_t pkt_fcs_size);
167
168
void e1000x_core_prepare_eeprom(uint16_t *eeprom,
169
const uint16_t *templ,
170
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
171
index XXXXXXX..XXXXXXX 100644
172
--- a/hw/net/igb_core.c
173
+++ b/hw/net/igb_core.c
174
@@ -XXX,XX +XXX,XX @@ igb_write_to_rx_buffers(IGBCore *core,
175
176
static void
177
igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
178
- size_t data_size, size_t data_fcs_size)
179
+ size_t pkt_size, size_t pkt_fcs_size)
180
{
181
- e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
182
-
183
- switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
184
- case ETH_PKT_BCAST:
185
- e1000x_inc_reg_if_not_full(core->mac, BPRC);
186
- break;
187
-
188
- case ETH_PKT_MCAST:
189
- e1000x_inc_reg_if_not_full(core->mac, MPRC);
190
- break;
191
-
192
- default:
193
- break;
194
- }
195
+ eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt);
196
+ e1000x_update_rx_total_stats(core->mac, pkt_type, pkt_size, pkt_fcs_size);
197
198
if (core->mac[MRQC] & 1) {
199
uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
200
201
- core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
202
+ core->mac[PVFGORC0 + (pool * 64)] += pkt_size + 4;
203
core->mac[PVFGPRC0 + (pool * 64)]++;
204
- if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
205
+ if (pkt_type == ETH_PKT_MCAST) {
206
core->mac[PVFMPRC0 + (pool * 64)]++;
207
}
208
}
209
--
210
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
igb's advanced descriptor uses a packet type encoding different from
4
one used in e1000e's extended descriptor. Fix the logic to encode
5
Rx packet type accordingly.
6
7
Fixes: 3a977deebe ("Intrdocue igb device emulation")
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
9
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
hw/net/igb_core.c | 38 +++++++++++++++++++-------------------
13
hw/net/igb_regs.h | 5 +++++
14
2 files changed, 24 insertions(+), 19 deletions(-)
15
16
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
17
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/net/igb_core.c
19
+++ b/hw/net/igb_core.c
20
@@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core,
21
struct virtio_net_hdr *vhdr;
22
bool hasip4, hasip6;
23
EthL4HdrProto l4hdr_proto;
24
- uint32_t pkt_type;
25
26
*status_flags = E1000_RXD_STAT_DD;
27
28
@@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core,
29
trace_e1000e_rx_metadata_ack();
30
}
31
32
- if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) {
33
- trace_e1000e_rx_metadata_ipv6_filtering_disabled();
34
- pkt_type = E1000_RXD_PKT_MAC;
35
- } else if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP ||
36
- l4hdr_proto == ETH_L4_HDR_PROTO_UDP) {
37
- pkt_type = hasip4 ? E1000_RXD_PKT_IP4_XDP : E1000_RXD_PKT_IP6_XDP;
38
- } else if (hasip4 || hasip6) {
39
- pkt_type = hasip4 ? E1000_RXD_PKT_IP4 : E1000_RXD_PKT_IP6;
40
- } else {
41
- pkt_type = E1000_RXD_PKT_MAC;
42
- }
43
+ if (pkt_info) {
44
+ *pkt_info = rss_info->enabled ? rss_info->type : 0;
45
46
- trace_e1000e_rx_metadata_pkt_type(pkt_type);
47
+ if (hasip4) {
48
+ *pkt_info |= E1000_ADVRXD_PKT_IP4;
49
+ }
50
51
- if (pkt_info) {
52
- if (rss_info->enabled) {
53
- *pkt_info = rss_info->type;
54
+ if (hasip6) {
55
+ *pkt_info |= E1000_ADVRXD_PKT_IP6;
56
}
57
58
- *pkt_info |= (pkt_type << 4);
59
- } else {
60
- *status_flags |= E1000_RXD_PKT_TYPE(pkt_type);
61
+ switch (l4hdr_proto) {
62
+ case ETH_L4_HDR_PROTO_TCP:
63
+ *pkt_info |= E1000_ADVRXD_PKT_TCP;
64
+ break;
65
+
66
+ case ETH_L4_HDR_PROTO_UDP:
67
+ *pkt_info |= E1000_ADVRXD_PKT_UDP;
68
+ break;
69
+
70
+ default:
71
+ break;
72
+ }
73
}
74
75
if (hdr_info) {
76
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
77
index XXXXXXX..XXXXXXX 100644
78
--- a/hw/net/igb_regs.h
79
+++ b/hw/net/igb_regs.h
80
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
81
82
#define E1000_STATUS_NUM_VFS_SHIFT 14
83
84
+#define E1000_ADVRXD_PKT_IP4 BIT(4)
85
+#define E1000_ADVRXD_PKT_IP6 BIT(6)
86
+#define E1000_ADVRXD_PKT_TCP BIT(8)
87
+#define E1000_ADVRXD_PKT_UDP BIT(9)
88
+
89
static inline uint8_t igb_ivar_entry_rx(uint8_t i)
90
{
91
return i < 8 ? i * 4 : (i - 8) * 4 + 2;
92
--
93
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
While the datasheet of e1000e says it checks CTRL.VME for tx VLAN
4
tagging, igb's datasheet has no such statements. It also says for
5
"CTRL.VLE":
6
> This register only affects the VLAN Strip in Rx it does not have any
7
> influence in the Tx path in the 82576.
8
(Appendix A. Changes from the 82575)
9
10
There is no "CTRL.VLE" so it is more likely that it is a mistake of
11
CTRL.VME.
12
13
Fixes: fba7c3b788 ("igb: respect VMVIR and VMOLR for VLAN")
14
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
15
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
18
hw/net/igb_core.c | 2 +-
19
1 file changed, 1 insertion(+), 1 deletion(-)
20
21
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/net/igb_core.c
24
+++ b/hw/net/igb_core.c
25
@@ -XXX,XX +XXX,XX @@ igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx,
26
}
27
}
28
29
- if (insert_vlan && e1000x_vlan_enabled(core->mac)) {
30
+ if (insert_vlan) {
31
net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan,
32
core->mac[VET] & 0xffff);
33
}
34
--
35
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
The datasheet says contradicting statements regarding ICR accesses so it
4
is not reliable to determine the behavior of ICR accesses. However,
5
e1000e does clear IMS bits when reading ICR accesses and Linux also
6
expects ICR accesses will clear IMS bits according to:
7
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/net/ethernet/intel/igb/igb_main.c?h=v6.2#n8048
8
9
Fixes: 3a977deebe ("Intrdocue igb device emulation")
10
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
11
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/igb_core.c | 8 ++++----
15
1 file changed, 4 insertions(+), 4 deletions(-)
16
17
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/igb_core.c
20
+++ b/hw/net/igb_core.c
21
@@ -XXX,XX +XXX,XX @@ igb_set_ims(IGBCore *core, int index, uint32_t val)
22
static void igb_commit_icr(IGBCore *core)
23
{
24
/*
25
- * If GPIE.NSICR = 0, then the copy of IAM to IMS will occur only if at
26
+ * If GPIE.NSICR = 0, then the clear of IMS will occur only if at
27
* least one bit is set in the IMS and there is a true interrupt as
28
* reflected in ICR.INTA.
29
*/
30
if ((core->mac[GPIE] & E1000_GPIE_NSICR) ||
31
(core->mac[IMS] && (core->mac[ICR] & E1000_ICR_INT_ASSERTED))) {
32
- igb_set_ims(core, IMS, core->mac[IAM]);
33
- } else {
34
- igb_update_interrupt_state(core);
35
+ igb_clear_ims_bits(core, core->mac[IAM]);
36
}
37
+
38
+ igb_update_interrupt_state(core);
39
}
40
41
static void igb_set_icr(IGBCore *core, int index, uint32_t val)
42
--
43
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
igb does not properly ensure the buffer passed to
4
net_rx_pkt_set_protocols() is contiguous for the entire L2/L3/L4 header.
5
Allow it to pass scattered data to net_rx_pkt_set_protocols().
6
7
Fixes: 3a977deebe ("Intrdocue igb device emulation")
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
9
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
hw/net/igb_core.c | 2 +-
13
hw/net/net_rx_pkt.c | 14 +++++---------
14
hw/net/net_rx_pkt.h | 10 ++++++----
15
hw/net/virtio-net.c | 7 +++++--
16
hw/net/vmxnet3.c | 7 ++++++-
17
include/net/eth.h | 6 +++---
18
net/eth.c | 18 ++++++++----------
19
7 files changed, 34 insertions(+), 30 deletions(-)
20
21
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
22
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/net/igb_core.c
24
+++ b/hw/net/igb_core.c
25
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
26
27
ehdr = PKT_GET_ETH_HDR(filter_buf);
28
net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
29
- net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
30
+ net_rx_pkt_set_protocols(core->rx_pkt, iov, iovcnt, iov_ofs);
31
32
queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
33
if (!queues) {
34
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/hw/net/net_rx_pkt.c
37
+++ b/hw/net/net_rx_pkt.c
38
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt,
39
iov, iovcnt, ploff, pkt->tot_len);
40
}
41
42
- eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->hasip4, &pkt->hasip6,
43
+ eth_get_protocols(pkt->vec, pkt->vec_len, 0, &pkt->hasip4, &pkt->hasip6,
44
&pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
45
&pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
46
47
@@ -XXX,XX +XXX,XX @@ size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt)
48
return pkt->tot_len;
49
}
50
51
-void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data,
52
- size_t len)
53
+void net_rx_pkt_set_protocols(struct NetRxPkt *pkt,
54
+ const struct iovec *iov, size_t iovcnt,
55
+ size_t iovoff)
56
{
57
- const struct iovec iov = {
58
- .iov_base = (void *)data,
59
- .iov_len = len
60
- };
61
-
62
assert(pkt);
63
64
- eth_get_protocols(&iov, 1, &pkt->hasip4, &pkt->hasip6,
65
+ eth_get_protocols(iov, iovcnt, iovoff, &pkt->hasip4, &pkt->hasip6,
66
&pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off,
67
&pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info);
68
}
69
diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h
70
index XXXXXXX..XXXXXXX 100644
71
--- a/hw/net/net_rx_pkt.h
72
+++ b/hw/net/net_rx_pkt.h
73
@@ -XXX,XX +XXX,XX @@ size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt);
74
* parse and set packet analysis results
75
*
76
* @pkt: packet
77
- * @data: pointer to the data buffer to be parsed
78
- * @len: data length
79
+ * @iov: received data scatter-gather list
80
+ * @iovcnt: number of elements in iov
81
+ * @iovoff: data start offset in the iov
82
*
83
*/
84
-void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data,
85
- size_t len);
86
+void net_rx_pkt_set_protocols(struct NetRxPkt *pkt,
87
+ const struct iovec *iov, size_t iovcnt,
88
+ size_t iovoff);
89
90
/**
91
* fetches packet analysis results
92
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
93
index XXXXXXX..XXXXXXX 100644
94
--- a/hw/net/virtio-net.c
95
+++ b/hw/net/virtio-net.c
96
@@ -XXX,XX +XXX,XX @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf,
97
VIRTIO_NET_HASH_REPORT_UDPv6,
98
VIRTIO_NET_HASH_REPORT_UDPv6_EX
99
};
100
+ struct iovec iov = {
101
+ .iov_base = (void *)buf,
102
+ .iov_len = size
103
+ };
104
105
- net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len,
106
- size - n->host_hdr_len);
107
+ net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len);
108
net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto);
109
net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto,
110
n->rss_data.hash_types);
111
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
112
index XXXXXXX..XXXXXXX 100644
113
--- a/hw/net/vmxnet3.c
114
+++ b/hw/net/vmxnet3.c
115
@@ -XXX,XX +XXX,XX @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size)
116
get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
117
118
if (vmxnet3_rx_filter_may_indicate(s, buf, size)) {
119
- net_rx_pkt_set_protocols(s->rx_pkt, buf, size);
120
+ struct iovec iov = {
121
+ .iov_base = (void *)buf,
122
+ .iov_len = size
123
+ };
124
+
125
+ net_rx_pkt_set_protocols(s->rx_pkt, &iov, 1, 0);
126
vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size);
127
net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping);
128
bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1;
129
diff --git a/include/net/eth.h b/include/net/eth.h
130
index XXXXXXX..XXXXXXX 100644
131
--- a/include/net/eth.h
132
+++ b/include/net/eth.h
133
@@ -XXX,XX +XXX,XX @@ eth_get_l2_hdr_length(const void *p)
134
}
135
136
static inline uint32_t
137
-eth_get_l2_hdr_length_iov(const struct iovec *iov, int iovcnt)
138
+eth_get_l2_hdr_length_iov(const struct iovec *iov, size_t iovcnt, size_t iovoff)
139
{
140
uint8_t p[sizeof(struct eth_header) + sizeof(struct vlan_header)];
141
- size_t copied = iov_to_buf(iov, iovcnt, 0, p, ARRAY_SIZE(p));
142
+ size_t copied = iov_to_buf(iov, iovcnt, iovoff, p, ARRAY_SIZE(p));
143
144
if (copied < ARRAY_SIZE(p)) {
145
return copied;
146
@@ -XXX,XX +XXX,XX @@ typedef struct eth_l4_hdr_info_st {
147
bool has_tcp_data;
148
} eth_l4_hdr_info;
149
150
-void eth_get_protocols(const struct iovec *iov, int iovcnt,
151
+void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff,
152
bool *hasip4, bool *hasip6,
153
size_t *l3hdr_off,
154
size_t *l4hdr_off,
155
diff --git a/net/eth.c b/net/eth.c
156
index XXXXXXX..XXXXXXX 100644
157
--- a/net/eth.c
158
+++ b/net/eth.c
159
@@ -XXX,XX +XXX,XX @@ _eth_tcp_has_data(bool is_ip4,
160
return l4len > TCP_HEADER_DATA_OFFSET(tcp);
161
}
162
163
-void eth_get_protocols(const struct iovec *iov, int iovcnt,
164
+void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff,
165
bool *hasip4, bool *hasip6,
166
size_t *l3hdr_off,
167
size_t *l4hdr_off,
168
@@ -XXX,XX +XXX,XX @@ void eth_get_protocols(const struct iovec *iov, int iovcnt,
169
{
170
int proto;
171
bool fragment = false;
172
- size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt);
173
size_t input_size = iov_size(iov, iovcnt);
174
size_t copied;
175
uint8_t ip_p;
176
177
*hasip4 = *hasip6 = false;
178
+ *l3hdr_off = iovoff + eth_get_l2_hdr_length_iov(iov, iovcnt, iovoff);
179
l4hdr_info->proto = ETH_L4_HDR_PROTO_INVALID;
180
181
- proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len);
182
-
183
- *l3hdr_off = l2hdr_len;
184
+ proto = eth_get_l3_proto(iov, iovcnt, *l3hdr_off);
185
186
if (proto == ETH_P_IP) {
187
struct ip_header *iphdr = &ip4hdr_info->ip4_hdr;
188
189
- if (input_size < l2hdr_len) {
190
+ if (input_size < *l3hdr_off) {
191
return;
192
}
193
194
- copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr));
195
+ copied = iov_to_buf(iov, iovcnt, *l3hdr_off, iphdr, sizeof(*iphdr));
196
if (copied < sizeof(*iphdr) ||
197
IP_HEADER_VERSION(iphdr) != IP_HEADER_VERSION_4) {
198
return;
199
@@ -XXX,XX +XXX,XX @@ void eth_get_protocols(const struct iovec *iov, int iovcnt,
200
*hasip4 = true;
201
ip_p = iphdr->ip_p;
202
ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr);
203
- *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr);
204
+ *l4hdr_off = *l3hdr_off + IP_HDR_GET_LEN(iphdr);
205
206
fragment = ip4hdr_info->fragment;
207
} else if (proto == ETH_P_IPV6) {
208
- if (!eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len, ip6hdr_info)) {
209
+ if (!eth_parse_ipv6_hdr(iov, iovcnt, *l3hdr_off, ip6hdr_info)) {
210
return;
211
}
212
213
*hasip6 = true;
214
ip_p = ip6hdr_info->l4proto;
215
- *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len;
216
+ *l4hdr_off = *l3hdr_off + ip6hdr_info->full_hdr_len;
217
fragment = ip6hdr_info->fragment;
218
} else {
219
return;
220
--
221
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
e1000e_receive_internal() used to check the iov length to determine
4
copy the iovs to a contiguous buffer, but the check is flawed in two
5
ways:
6
- It does not ensure that iovcnt > 0.
7
- It does not take virtio-net header into consideration.
8
9
The size of this copy is just 18 octets, which can be even less than
10
the code size required for checks. This (wrong) optimization is probably
11
not worth so just remove it.
12
13
Fixes: 6f3fbe4ed0 ("net: Introduce e1000e device emulation")
14
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
16
---
17
hw/net/e1000e_core.c | 26 ++++++++++----------------
18
1 file changed, 10 insertions(+), 16 deletions(-)
19
20
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
21
index XXXXXXX..XXXXXXX 100644
22
--- a/hw/net/e1000e_core.c
23
+++ b/hw/net/e1000e_core.c
24
@@ -XXX,XX +XXX,XX @@ static ssize_t
25
e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
26
bool has_vnet)
27
{
28
- static const int maximum_ethernet_hdr_len = (ETH_HLEN + 4);
29
-
30
uint32_t n = 0;
31
- uint8_t min_buf[ETH_ZLEN];
32
+ uint8_t buf[ETH_ZLEN];
33
struct iovec min_iov;
34
- uint8_t *filter_buf;
35
size_t size, orig_size;
36
size_t iov_ofs = 0;
37
E1000E_RxRing rxr;
38
@@ -XXX,XX +XXX,XX @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
39
net_rx_pkt_unset_vhdr(core->rx_pkt);
40
}
41
42
- filter_buf = iov->iov_base + iov_ofs;
43
orig_size = iov_size(iov, iovcnt);
44
size = orig_size - iov_ofs;
45
46
/* Pad to minimum Ethernet frame length */
47
- if (size < sizeof(min_buf)) {
48
- iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size);
49
- memset(&min_buf[size], 0, sizeof(min_buf) - size);
50
+ if (size < sizeof(buf)) {
51
+ iov_to_buf(iov, iovcnt, iov_ofs, buf, size);
52
+ memset(&buf[size], 0, sizeof(buf) - size);
53
e1000x_inc_reg_if_not_full(core->mac, RUC);
54
- min_iov.iov_base = filter_buf = min_buf;
55
- min_iov.iov_len = size = sizeof(min_buf);
56
+ min_iov.iov_base = buf;
57
+ min_iov.iov_len = size = sizeof(buf);
58
iovcnt = 1;
59
iov = &min_iov;
60
iov_ofs = 0;
61
- } else if (iov->iov_len < maximum_ethernet_hdr_len) {
62
- /* This is very unlikely, but may happen. */
63
- iov_to_buf(iov, iovcnt, iov_ofs, min_buf, maximum_ethernet_hdr_len);
64
- filter_buf = min_buf;
65
+ } else {
66
+ iov_to_buf(iov, iovcnt, iov_ofs, buf, ETH_HLEN + 4);
67
}
68
69
/* Discard oversized packets if !LPE and !SBP. */
70
@@ -XXX,XX +XXX,XX @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
71
}
72
73
net_rx_pkt_set_packet_type(core->rx_pkt,
74
- get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf)));
75
+ get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
76
77
- if (!e1000e_receive_filter(core, filter_buf, size)) {
78
+ if (!e1000e_receive_filter(core, buf, size)) {
79
trace_e1000e_rx_flt_dropped();
80
return orig_size;
81
}
82
--
83
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
igb_receive_internal() used to check the iov length to determine
4
copy the iovs to a contiguous buffer, but the check is flawed in two
5
ways:
6
- It does not ensure that iovcnt > 0.
7
- It does not take virtio-net header into consideration.
8
9
The size of this copy is just 22 octets, which can be even less than
10
the code size required for checks. This (wrong) optimization is probably
11
not worth so just remove it. Removing this also allows igb to assume
12
aligned accesses for the ethernet header.
13
14
Fixes: 3a977deebe ("Intrdocue igb device emulation")
15
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
16
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
17
Signed-off-by: Jason Wang <jasowang@redhat.com>
18
---
19
hw/net/igb_core.c | 43 +++++++++++++++++++++++--------------------
20
1 file changed, 23 insertions(+), 20 deletions(-)
21
22
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/net/igb_core.c
25
+++ b/hw/net/igb_core.c
26
@@ -XXX,XX +XXX,XX @@ typedef struct IGBTxPktVmdqCallbackContext {
27
NetClientState *nc;
28
} IGBTxPktVmdqCallbackContext;
29
30
+typedef struct L2Header {
31
+ struct eth_header eth;
32
+ struct vlan_header vlan;
33
+} L2Header;
34
+
35
static ssize_t
36
igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
37
bool has_vnet, bool *external_tx);
38
@@ -XXX,XX +XXX,XX @@ igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
39
return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
40
}
41
42
-static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
43
+static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
44
size_t size, E1000E_RSSInfo *rss_info,
45
bool *external_tx)
46
{
47
static const int ta_shift[] = { 4, 3, 2, 0 };
48
+ const struct eth_header *ehdr = &l2_header->eth;
49
uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
50
uint16_t queues = 0;
51
uint16_t oversized = 0;
52
- uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
53
+ uint16_t vid = be16_to_cpu(l2_header->vlan.h_tci) & VLAN_VID_MASK;
54
bool accepted = false;
55
int i;
56
57
@@ -XXX,XX +XXX,XX @@ static ssize_t
58
igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
59
bool has_vnet, bool *external_tx)
60
{
61
- static const int maximum_ethernet_hdr_len = (ETH_HLEN + 4);
62
-
63
uint16_t queues = 0;
64
uint32_t n = 0;
65
- uint8_t min_buf[ETH_ZLEN];
66
+ union {
67
+ L2Header l2_header;
68
+ uint8_t octets[ETH_ZLEN];
69
+ } buf;
70
struct iovec min_iov;
71
- struct eth_header *ehdr;
72
- uint8_t *filter_buf;
73
size_t size, orig_size;
74
size_t iov_ofs = 0;
75
E1000E_RxRing rxr;
76
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
77
net_rx_pkt_unset_vhdr(core->rx_pkt);
78
}
79
80
- filter_buf = iov->iov_base + iov_ofs;
81
orig_size = iov_size(iov, iovcnt);
82
size = orig_size - iov_ofs;
83
84
/* Pad to minimum Ethernet frame length */
85
- if (size < sizeof(min_buf)) {
86
- iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size);
87
- memset(&min_buf[size], 0, sizeof(min_buf) - size);
88
+ if (size < sizeof(buf)) {
89
+ iov_to_buf(iov, iovcnt, iov_ofs, &buf, size);
90
+ memset(&buf.octets[size], 0, sizeof(buf) - size);
91
e1000x_inc_reg_if_not_full(core->mac, RUC);
92
- min_iov.iov_base = filter_buf = min_buf;
93
- min_iov.iov_len = size = sizeof(min_buf);
94
+ min_iov.iov_base = &buf;
95
+ min_iov.iov_len = size = sizeof(buf);
96
iovcnt = 1;
97
iov = &min_iov;
98
iov_ofs = 0;
99
- } else if (iov->iov_len < maximum_ethernet_hdr_len) {
100
- /* This is very unlikely, but may happen. */
101
- iov_to_buf(iov, iovcnt, iov_ofs, min_buf, maximum_ethernet_hdr_len);
102
- filter_buf = min_buf;
103
+ } else {
104
+ iov_to_buf(iov, iovcnt, iov_ofs, &buf, sizeof(buf.l2_header));
105
}
106
107
/* Discard oversized packets if !LPE and !SBP. */
108
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
109
return orig_size;
110
}
111
112
- ehdr = PKT_GET_ETH_HDR(filter_buf);
113
- net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
114
+ net_rx_pkt_set_packet_type(core->rx_pkt,
115
+ get_eth_packet_type(&buf.l2_header.eth));
116
net_rx_pkt_set_protocols(core->rx_pkt, iov, iovcnt, iov_ofs);
117
118
- queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
119
+ queues = igb_receive_assign(core, &buf.l2_header, size,
120
+ &rss_info, external_tx);
121
if (!queues) {
122
trace_e1000e_rx_flt_dropped();
123
return orig_size;
124
--
125
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Fixes: 9f95111474 ("tests/avocado: re-factor igb test to avoid timeouts")
4
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
5
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
MAINTAINERS | 2 +-
9
docs/system/devices/igb.rst | 2 +-
10
scripts/ci/org.centos/stream/8/x86_64/test-avocado | 2 +-
11
3 files changed, 3 insertions(+), 3 deletions(-)
12
13
diff --git a/MAINTAINERS b/MAINTAINERS
14
index XXXXXXX..XXXXXXX 100644
15
--- a/MAINTAINERS
16
+++ b/MAINTAINERS
17
@@ -XXX,XX +XXX,XX @@ R: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
18
S: Maintained
19
F: docs/system/devices/igb.rst
20
F: hw/net/igb*
21
-F: tests/avocado/igb.py
22
+F: tests/avocado/netdev-ethtool.py
23
F: tests/qtest/igb-test.c
24
F: tests/qtest/libqos/igb.c
25
26
diff --git a/docs/system/devices/igb.rst b/docs/system/devices/igb.rst
27
index XXXXXXX..XXXXXXX 100644
28
--- a/docs/system/devices/igb.rst
29
+++ b/docs/system/devices/igb.rst
30
@@ -XXX,XX +XXX,XX @@ Avocado test and can be ran with the following command:
31
32
.. code:: shell
33
34
- make check-avocado AVOCADO_TESTS=tests/avocado/igb.py
35
+ make check-avocado AVOCADO_TESTS=tests/avocado/netdev-ethtool.py
36
37
References
38
==========
39
diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado
40
index XXXXXXX..XXXXXXX 100755
41
--- a/scripts/ci/org.centos/stream/8/x86_64/test-avocado
42
+++ b/scripts/ci/org.centos/stream/8/x86_64/test-avocado
43
@@ -XXX,XX +XXX,XX @@ make get-vm-images
44
tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \
45
tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \
46
tests/avocado/hotplug_cpu.py:HotPlugCPU.test \
47
- tests/avocado/igb.py:IGB.test \
48
+ tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb_nomsi \
49
tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \
50
tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \
51
tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_pt \
52
--
53
2.7.4
54
55
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
6
tests/avocado/netdev-ethtool.py | 1 -
7
1 file changed, 1 deletion(-)
8
9
diff --git a/tests/avocado/netdev-ethtool.py b/tests/avocado/netdev-ethtool.py
10
index XXXXXXX..XXXXXXX 100644
11
--- a/tests/avocado/netdev-ethtool.py
12
+++ b/tests/avocado/netdev-ethtool.py
13
@@ -XXX,XX +XXX,XX @@
14
15
from avocado import skip
16
from avocado_qemu import QemuSystemTest
17
-from avocado_qemu import exec_command, exec_command_and_wait_for_pattern
18
from avocado_qemu import wait_for_console_pattern
19
20
class NetDevEthtool(QemuSystemTest):
21
--
22
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
It is unlikely to find more bugs with KVM so remove test_igb_nomsi_kvm
4
to save time to run it.
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Reviewed-by: Thomas Huth <thuth@redhat.com>
8
Acked-by: Alex Bennée <alex.bennee@linaro.org>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
tests/avocado/netdev-ethtool.py | 12 +-----------
12
1 file changed, 1 insertion(+), 11 deletions(-)
13
14
diff --git a/tests/avocado/netdev-ethtool.py b/tests/avocado/netdev-ethtool.py
15
index XXXXXXX..XXXXXXX 100644
16
--- a/tests/avocado/netdev-ethtool.py
17
+++ b/tests/avocado/netdev-ethtool.py
18
@@ -XXX,XX +XXX,XX @@ def get_asset(self, name, sha1):
19
# URL into a unique one
20
return self.fetch_asset(name=name, locations=(url), asset_hash=sha1)
21
22
- def common_test_code(self, netdev, extra_args=None, kvm=False):
23
+ def common_test_code(self, netdev, extra_args=None):
24
25
# This custom kernel has drivers for all the supported network
26
# devices we can emulate in QEMU
27
@@ -XXX,XX +XXX,XX @@ def common_test_code(self, netdev, extra_args=None, kvm=False):
28
'-drive', drive,
29
'-device', netdev)
30
31
- if kvm:
32
- self.vm.add_args('-accel', 'kvm')
33
-
34
self.vm.set_console(console_index=0)
35
self.vm.launch()
36
37
@@ -XXX,XX +XXX,XX @@ def test_igb_nomsi(self):
38
"""
39
self.common_test_code("igb", "pci=nomsi")
40
41
- def test_igb_nomsi_kvm(self):
42
- """
43
- :avocado: tags=device:igb
44
- """
45
- self.require_accelerator('kvm')
46
- self.common_test_code("igb", "pci=nomsi", True)
47
-
48
# It seems the other popular cards we model in QEMU currently fail
49
# the pattern test with:
50
#
51
--
52
2.7.4
53
54
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
This function is not used.
4
5
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
hw/net/net_rx_pkt.c | 5 -----
9
hw/net/net_rx_pkt.h | 9 ---------
10
2 files changed, 14 deletions(-)
11
12
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/net_rx_pkt.c
15
+++ b/hw/net/net_rx_pkt.c
16
@@ -XXX,XX +XXX,XX @@ eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt)
17
return &pkt->ip4hdr_info;
18
}
19
20
-eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt)
21
-{
22
- return &pkt->l4hdr_info;
23
-}
24
-
25
static inline void
26
_net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written,
27
void *ptr, size_t size)
28
diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/hw/net/net_rx_pkt.h
31
+++ b/hw/net/net_rx_pkt.h
32
@@ -XXX,XX +XXX,XX @@ eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt);
33
*/
34
eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt);
35
36
-/**
37
- * fetches L4 header analysis results
38
- *
39
- * Return: pointer to analysis results structure which is stored in internal
40
- * packet area.
41
- *
42
- */
43
-eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt);
44
-
45
typedef enum {
46
NetPktRssIpV4,
47
NetPktRssIpV4Tcp,
48
--
49
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
The old eth_setup_vlan_headers has no user so remove it and rename
4
eth_setup_vlan_headers_ex.
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/net/net_tx_pkt.c | 2 +-
11
include/net/eth.h | 9 +--------
12
net/eth.c | 2 +-
13
3 files changed, 3 insertions(+), 10 deletions(-)
14
15
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/net_tx_pkt.c
18
+++ b/hw/net/net_tx_pkt.c
19
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt,
20
bool is_new;
21
assert(pkt);
22
23
- eth_setup_vlan_headers_ex(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
24
+ eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
25
vlan, vlan_ethtype, &is_new);
26
27
/* update l2hdrlen */
28
diff --git a/include/net/eth.h b/include/net/eth.h
29
index XXXXXXX..XXXXXXX 100644
30
--- a/include/net/eth.h
31
+++ b/include/net/eth.h
32
@@ -XXX,XX +XXX,XX @@ eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
33
uint16_t
34
eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len);
35
36
-void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
37
+void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
38
uint16_t vlan_ethtype, bool *is_new);
39
40
-static inline void
41
-eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
42
- bool *is_new)
43
-{
44
- eth_setup_vlan_headers_ex(ehdr, vlan_tag, ETH_P_VLAN, is_new);
45
-}
46
-
47
48
uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto);
49
50
diff --git a/net/eth.c b/net/eth.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/net/eth.c
53
+++ b/net/eth.c
54
@@ -XXX,XX +XXX,XX @@
55
#include "net/checksum.h"
56
#include "net/tap.h"
57
58
-void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag,
59
+void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
60
uint16_t vlan_ethtype, bool *is_new)
61
{
62
struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
63
--
64
2.7.4
65
66
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
This saves some code and enables tracepoint for e1000's VLAN filtering.
4
5
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
6
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
hw/net/e1000.c | 35 +++++------------------------------
10
hw/net/e1000e_core.c | 47 +++++------------------------------------------
11
hw/net/e1000x_common.c | 44 ++++++++++++++++++++++++++++++++++++++------
12
hw/net/e1000x_common.h | 4 +++-
13
hw/net/igb_core.c | 41 +++--------------------------------------
14
hw/net/trace-events | 4 ++--
15
6 files changed, 56 insertions(+), 119 deletions(-)
16
17
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/e1000.c
20
+++ b/hw/net/e1000.c
21
@@ -XXX,XX +XXX,XX @@ start_xmit(E1000State *s)
22
}
23
24
static int
25
-receive_filter(E1000State *s, const uint8_t *buf, int size)
26
+receive_filter(E1000State *s, const void *buf)
27
{
28
- uint32_t rctl = s->mac_reg[RCTL];
29
- int isbcast = is_broadcast_ether_addr(buf);
30
- int ismcast = is_multicast_ether_addr(buf);
31
-
32
- if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) &&
33
- e1000x_vlan_rx_filter_enabled(s->mac_reg)) {
34
- uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(buf)->h_tci);
35
- uint32_t vfta =
36
- ldl_le_p((uint32_t *)(s->mac_reg + VFTA) +
37
- ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK));
38
- if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) {
39
- return 0;
40
- }
41
- }
42
-
43
- if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */
44
- return 1;
45
- }
46
-
47
- if (ismcast && (rctl & E1000_RCTL_MPE)) { /* promiscuous mcast */
48
- return 1;
49
- }
50
-
51
- if (isbcast && (rctl & E1000_RCTL_BAM)) { /* broadcast enabled */
52
- return 1;
53
- }
54
-
55
- return e1000x_rx_group_filter(s->mac_reg, buf);
56
+ return (!e1000x_is_vlan_packet(buf, s->mac_reg[VET]) ||
57
+ e1000x_rx_vlan_filter(s->mac_reg, PKT_GET_VLAN_HDR(buf))) &&
58
+ e1000x_rx_group_filter(s->mac_reg, buf);
59
}
60
61
static void
62
@@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt)
63
return size;
64
}
65
66
- if (!receive_filter(s, filter_buf, size)) {
67
+ if (!receive_filter(s, filter_buf)) {
68
return size;
69
}
70
71
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
72
index XXXXXXX..XXXXXXX 100644
73
--- a/hw/net/e1000e_core.c
74
+++ b/hw/net/e1000e_core.c
75
@@ -XXX,XX +XXX,XX @@ e1000e_rx_l4_cso_enabled(E1000ECore *core)
76
}
77
78
static bool
79
-e1000e_receive_filter(E1000ECore *core, const uint8_t *buf, int size)
80
+e1000e_receive_filter(E1000ECore *core, const void *buf)
81
{
82
- uint32_t rctl = core->mac[RCTL];
83
-
84
- if (e1000x_is_vlan_packet(buf, core->mac[VET]) &&
85
- e1000x_vlan_rx_filter_enabled(core->mac)) {
86
- uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(buf)->h_tci);
87
- uint32_t vfta =
88
- ldl_le_p((uint32_t *)(core->mac + VFTA) +
89
- ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK));
90
- if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) {
91
- trace_e1000e_rx_flt_vlan_mismatch(vid);
92
- return false;
93
- } else {
94
- trace_e1000e_rx_flt_vlan_match(vid);
95
- }
96
- }
97
-
98
- switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
99
- case ETH_PKT_UCAST:
100
- if (rctl & E1000_RCTL_UPE) {
101
- return true; /* promiscuous ucast */
102
- }
103
- break;
104
-
105
- case ETH_PKT_BCAST:
106
- if (rctl & E1000_RCTL_BAM) {
107
- return true; /* broadcast enabled */
108
- }
109
- break;
110
-
111
- case ETH_PKT_MCAST:
112
- if (rctl & E1000_RCTL_MPE) {
113
- return true; /* promiscuous mcast */
114
- }
115
- break;
116
-
117
- default:
118
- g_assert_not_reached();
119
- }
120
-
121
- return e1000x_rx_group_filter(core->mac, buf);
122
+ return (!e1000x_is_vlan_packet(buf, core->mac[VET]) ||
123
+ e1000x_rx_vlan_filter(core->mac, PKT_GET_VLAN_HDR(buf))) &&
124
+ e1000x_rx_group_filter(core->mac, buf);
125
}
126
127
static inline void
128
@@ -XXX,XX +XXX,XX @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
129
net_rx_pkt_set_packet_type(core->rx_pkt,
130
get_eth_packet_type(PKT_GET_ETH_HDR(buf)));
131
132
- if (!e1000e_receive_filter(core, buf, size)) {
133
+ if (!e1000e_receive_filter(core, buf)) {
134
trace_e1000e_rx_flt_dropped();
135
return orig_size;
136
}
137
diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c
138
index XXXXXXX..XXXXXXX 100644
139
--- a/hw/net/e1000x_common.c
140
+++ b/hw/net/e1000x_common.c
141
@@ -XXX,XX +XXX,XX @@ bool e1000x_is_vlan_packet(const void *buf, uint16_t vet)
142
return res;
143
}
144
145
-bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf)
146
+bool e1000x_rx_vlan_filter(uint32_t *mac, const struct vlan_header *vhdr)
147
+{
148
+ if (e1000x_vlan_rx_filter_enabled(mac)) {
149
+ uint16_t vid = lduw_be_p(&vhdr->h_tci);
150
+ uint32_t vfta =
151
+ ldl_le_p((uint32_t *)(mac + VFTA) +
152
+ ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK));
153
+ if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) {
154
+ trace_e1000x_rx_flt_vlan_mismatch(vid);
155
+ return false;
156
+ }
157
+
158
+ trace_e1000x_rx_flt_vlan_match(vid);
159
+ }
160
+
161
+ return true;
162
+}
163
+
164
+bool e1000x_rx_group_filter(uint32_t *mac, const struct eth_header *ehdr)
165
{
166
static const int mta_shift[] = { 4, 3, 2, 0 };
167
uint32_t f, ra[2], *rp, rctl = mac[RCTL];
168
169
+ if (is_broadcast_ether_addr(ehdr->h_dest)) {
170
+ if (rctl & E1000_RCTL_BAM) {
171
+ return true;
172
+ }
173
+ } else if (is_multicast_ether_addr(ehdr->h_dest)) {
174
+ if (rctl & E1000_RCTL_MPE) {
175
+ return true;
176
+ }
177
+ } else {
178
+ if (rctl & E1000_RCTL_UPE) {
179
+ return true;
180
+ }
181
+ }
182
+
183
for (rp = mac + RA; rp < mac + RA + 32; rp += 2) {
184
if (!(rp[1] & E1000_RAH_AV)) {
185
continue;
186
}
187
ra[0] = cpu_to_le32(rp[0]);
188
ra[1] = cpu_to_le32(rp[1]);
189
- if (!memcmp(buf, (uint8_t *)ra, ETH_ALEN)) {
190
+ if (!memcmp(ehdr->h_dest, (uint8_t *)ra, ETH_ALEN)) {
191
trace_e1000x_rx_flt_ucast_match((int)(rp - mac - RA) / 2,
192
- MAC_ARG(buf));
193
+ MAC_ARG(ehdr->h_dest));
194
return true;
195
}
196
}
197
- trace_e1000x_rx_flt_ucast_mismatch(MAC_ARG(buf));
198
+ trace_e1000x_rx_flt_ucast_mismatch(MAC_ARG(ehdr->h_dest));
199
200
f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
201
- f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff;
202
+ f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff;
203
if (mac[MTA + (f >> 5)] & (1 << (f & 0x1f))) {
204
return true;
205
}
206
207
- trace_e1000x_rx_flt_inexact_mismatch(MAC_ARG(buf),
208
+ trace_e1000x_rx_flt_inexact_mismatch(MAC_ARG(ehdr->h_dest),
209
(rctl >> E1000_RCTL_MO_SHIFT) & 3,
210
f >> 5,
211
mac[MTA + (f >> 5)]);
212
diff --git a/hw/net/e1000x_common.h b/hw/net/e1000x_common.h
213
index XXXXXXX..XXXXXXX 100644
214
--- a/hw/net/e1000x_common.h
215
+++ b/hw/net/e1000x_common.h
216
@@ -XXX,XX +XXX,XX @@ bool e1000x_rx_ready(PCIDevice *d, uint32_t *mac);
217
218
bool e1000x_is_vlan_packet(const void *buf, uint16_t vet);
219
220
-bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf);
221
+bool e1000x_rx_vlan_filter(uint32_t *mac, const struct vlan_header *vhdr);
222
+
223
+bool e1000x_rx_group_filter(uint32_t *mac, const struct eth_header *ehdr);
224
225
bool e1000x_hw_rx_enabled(uint32_t *mac);
226
227
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
228
index XXXXXXX..XXXXXXX 100644
229
--- a/hw/net/igb_core.c
230
+++ b/hw/net/igb_core.c
231
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
232
uint16_t queues = 0;
233
uint16_t oversized = 0;
234
uint16_t vid = be16_to_cpu(l2_header->vlan.h_tci) & VLAN_VID_MASK;
235
- bool accepted = false;
236
int i;
237
238
memset(rss_info, 0, sizeof(E1000E_RSSInfo));
239
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
240
}
241
242
if (e1000x_is_vlan_packet(ehdr, core->mac[VET] & 0xffff) &&
243
- e1000x_vlan_rx_filter_enabled(core->mac)) {
244
- uint32_t vfta =
245
- ldl_le_p((uint32_t *)(core->mac + VFTA) +
246
- ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK));
247
- if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) {
248
- trace_e1000e_rx_flt_vlan_mismatch(vid);
249
- return queues;
250
- } else {
251
- trace_e1000e_rx_flt_vlan_match(vid);
252
- }
253
+ !e1000x_rx_vlan_filter(core->mac, PKT_GET_VLAN_HDR(ehdr))) {
254
+ return queues;
255
}
256
257
if (core->mac[MRQC] & 1) {
258
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
259
}
260
}
261
} else {
262
- switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
263
- case ETH_PKT_UCAST:
264
- if (rctl & E1000_RCTL_UPE) {
265
- accepted = true; /* promiscuous ucast */
266
- }
267
- break;
268
-
269
- case ETH_PKT_BCAST:
270
- if (rctl & E1000_RCTL_BAM) {
271
- accepted = true; /* broadcast enabled */
272
- }
273
- break;
274
-
275
- case ETH_PKT_MCAST:
276
- if (rctl & E1000_RCTL_MPE) {
277
- accepted = true; /* promiscuous mcast */
278
- }
279
- break;
280
-
281
- default:
282
- g_assert_not_reached();
283
- }
284
-
285
- if (!accepted) {
286
- accepted = e1000x_rx_group_filter(core->mac, ehdr->h_dest);
287
- }
288
-
289
+ bool accepted = e1000x_rx_group_filter(core->mac, ehdr);
290
if (!accepted) {
291
for (macp = core->mac + RA2; macp < core->mac + RA2 + 16; macp += 2) {
292
if (!(macp[1] & E1000_RAH_AV)) {
293
diff --git a/hw/net/trace-events b/hw/net/trace-events
294
index XXXXXXX..XXXXXXX 100644
295
--- a/hw/net/trace-events
296
+++ b/hw/net/trace-events
297
@@ -XXX,XX +XXX,XX @@ e1000_receiver_overrun(size_t s, uint32_t rdh, uint32_t rdt) "Receiver overrun:
298
# e1000x_common.c
299
e1000x_rx_can_recv_disabled(bool link_up, bool rx_enabled, bool pci_master) "link_up: %d, rx_enabled %d, pci_master %d"
300
e1000x_vlan_is_vlan_pkt(bool is_vlan_pkt, uint16_t eth_proto, uint16_t vet) "Is VLAN packet: %d, ETH proto: 0x%X, VET: 0x%X"
301
+e1000x_rx_flt_vlan_mismatch(uint16_t vid) "VID mismatch: 0x%X"
302
+e1000x_rx_flt_vlan_match(uint16_t vid) "VID match: 0x%X"
303
e1000x_rx_flt_ucast_match(uint32_t idx, uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x"
304
e1000x_rx_flt_ucast_mismatch(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x"
305
e1000x_rx_flt_inexact_mismatch(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5, uint32_t mo, uint32_t mta, uint32_t mta_val) "inexact mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] 0x%x"
306
@@ -XXX,XX +XXX,XX @@ e1000e_rx_can_recv_rings_full(void) "Cannot receive: all rings are full"
307
e1000e_rx_can_recv(void) "Can receive"
308
e1000e_rx_has_buffers(int ridx, uint32_t free_desc, size_t total_size, uint32_t desc_buf_size) "ring #%d: free descr: %u, packet size %zu, descr buffer size %u"
309
e1000e_rx_null_descriptor(void) "Null RX descriptor!!"
310
-e1000e_rx_flt_vlan_mismatch(uint16_t vid) "VID mismatch: 0x%X"
311
-e1000e_rx_flt_vlan_match(uint16_t vid) "VID match: 0x%X"
312
e1000e_rx_desc_ps_read(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3) "buffers: [0x%"PRIx64", 0x%"PRIx64", 0x%"PRIx64", 0x%"PRIx64"]"
313
e1000e_rx_desc_ps_write(uint16_t a0, uint16_t a1, uint16_t a2, uint16_t a3) "bytes written: [%u, %u, %u, %u]"
314
e1000e_rx_desc_buff_sizes(uint32_t b0, uint32_t b1, uint32_t b2, uint32_t b3) "buffer sizes: [%u, %u, %u, %u]"
315
--
316
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Section 13.7.15 Receive Length Error Count says:
4
> Packets over 1522 bytes are oversized if LongPacketEnable is 0b
5
> (RCTL.LPE). If LongPacketEnable (LPE) is 1b, then an incoming packet
6
> is considered oversized if it exceeds 16384 bytes.
7
8
> These lengths are based on bytes in the received packet from
9
> <Destination Address> through <CRC>, inclusively.
10
11
As QEMU processes packets without CRC, the number of bytes for CRC
12
need to be subtracted. This change adds some size definitions to be used
13
to derive the new size thresholds to eth.h.
14
15
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
18
hw/net/e1000x_common.c | 10 +++++-----
19
include/net/eth.h | 2 ++
20
2 files changed, 7 insertions(+), 5 deletions(-)
21
22
diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c
23
index XXXXXXX..XXXXXXX 100644
24
--- a/hw/net/e1000x_common.c
25
+++ b/hw/net/e1000x_common.c
26
@@ -XXX,XX +XXX,XX @@ bool e1000x_hw_rx_enabled(uint32_t *mac)
27
28
bool e1000x_is_oversized(uint32_t *mac, size_t size)
29
{
30
+ size_t header_size = sizeof(struct eth_header) + sizeof(struct vlan_header);
31
/* this is the size past which hardware will
32
drop packets when setting LPE=0 */
33
- static const int maximum_ethernet_vlan_size = 1522;
34
+ size_t maximum_short_size = header_size + ETH_MTU;
35
/* this is the size past which hardware will
36
drop packets when setting LPE=1 */
37
- static const int maximum_ethernet_lpe_size = 16 * KiB;
38
+ size_t maximum_large_size = 16 * KiB - ETH_FCS_LEN;
39
40
- if ((size > maximum_ethernet_lpe_size ||
41
- (size > maximum_ethernet_vlan_size
42
- && !(mac[RCTL] & E1000_RCTL_LPE)))
43
+ if ((size > maximum_large_size ||
44
+ (size > maximum_short_size && !(mac[RCTL] & E1000_RCTL_LPE)))
45
&& !(mac[RCTL] & E1000_RCTL_SBP)) {
46
e1000x_inc_reg_if_not_full(mac, ROC);
47
trace_e1000x_rx_oversized(size);
48
diff --git a/include/net/eth.h b/include/net/eth.h
49
index XXXXXXX..XXXXXXX 100644
50
--- a/include/net/eth.h
51
+++ b/include/net/eth.h
52
@@ -XXX,XX +XXX,XX @@
53
#define ETH_ALEN 6
54
#define ETH_HLEN 14
55
#define ETH_ZLEN 60 /* Min. octets in frame without FCS */
56
+#define ETH_FCS_LEN 4
57
+#define ETH_MTU 1500
58
59
struct eth_header {
60
uint8_t h_dest[ETH_ALEN]; /* destination eth addr */
61
--
62
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
e1000e and igb employs NetPktRssIpV6TcpEx for RSS hash if TcpIpv6 MRQC
4
bit is set. Moreover, igb also has a MRQC bit for NetPktRssIpV6Tcp
5
though it is not implemented yet. Rename it to TcpIpv6Ex to avoid
6
confusion.
7
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
9
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
hw/net/e1000e_core.c | 8 ++++----
13
hw/net/e1000x_regs.h | 24 ++++++++++++------------
14
hw/net/igb_core.c | 8 ++++----
15
hw/net/trace-events | 2 +-
16
4 files changed, 21 insertions(+), 21 deletions(-)
17
18
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/net/e1000e_core.c
21
+++ b/hw/net/e1000e_core.c
22
@@ -XXX,XX +XXX,XX @@ e1000e_rss_get_hash_type(E1000ECore *core, struct NetRxPkt *pkt)
23
ip6info->rss_ex_dst_valid,
24
ip6info->rss_ex_src_valid,
25
core->mac[MRQC],
26
- E1000_MRQC_EN_TCPIPV6(core->mac[MRQC]),
27
+ E1000_MRQC_EN_TCPIPV6EX(core->mac[MRQC]),
28
E1000_MRQC_EN_IPV6EX(core->mac[MRQC]),
29
E1000_MRQC_EN_IPV6(core->mac[MRQC]));
30
31
@@ -XXX,XX +XXX,XX @@ e1000e_rss_get_hash_type(E1000ECore *core, struct NetRxPkt *pkt)
32
ip6info->rss_ex_src_valid))) {
33
34
if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP &&
35
- E1000_MRQC_EN_TCPIPV6(core->mac[MRQC])) {
36
- return E1000_MRQ_RSS_TYPE_IPV6TCP;
37
+ E1000_MRQC_EN_TCPIPV6EX(core->mac[MRQC])) {
38
+ return E1000_MRQ_RSS_TYPE_IPV6TCPEX;
39
}
40
41
if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) {
42
@@ -XXX,XX +XXX,XX @@ e1000e_rss_calc_hash(E1000ECore *core,
43
case E1000_MRQ_RSS_TYPE_IPV4TCP:
44
type = NetPktRssIpV4Tcp;
45
break;
46
- case E1000_MRQ_RSS_TYPE_IPV6TCP:
47
+ case E1000_MRQ_RSS_TYPE_IPV6TCPEX:
48
type = NetPktRssIpV6TcpEx;
49
break;
50
case E1000_MRQ_RSS_TYPE_IPV6:
51
diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
52
index XXXXXXX..XXXXXXX 100644
53
--- a/hw/net/e1000x_regs.h
54
+++ b/hw/net/e1000x_regs.h
55
@@ -XXX,XX +XXX,XX @@
56
#define E1000_RETA_IDX(hash) ((hash) & (BIT(7) - 1))
57
#define E1000_RETA_VAL(reta, hash) (((uint8_t *)(reta))[E1000_RETA_IDX(hash)])
58
59
-#define E1000_MRQC_EN_TCPIPV4(mrqc) ((mrqc) & BIT(16))
60
-#define E1000_MRQC_EN_IPV4(mrqc) ((mrqc) & BIT(17))
61
-#define E1000_MRQC_EN_TCPIPV6(mrqc) ((mrqc) & BIT(18))
62
-#define E1000_MRQC_EN_IPV6EX(mrqc) ((mrqc) & BIT(19))
63
-#define E1000_MRQC_EN_IPV6(mrqc) ((mrqc) & BIT(20))
64
-
65
-#define E1000_MRQ_RSS_TYPE_NONE (0)
66
-#define E1000_MRQ_RSS_TYPE_IPV4TCP (1)
67
-#define E1000_MRQ_RSS_TYPE_IPV4 (2)
68
-#define E1000_MRQ_RSS_TYPE_IPV6TCP (3)
69
-#define E1000_MRQ_RSS_TYPE_IPV6EX (4)
70
-#define E1000_MRQ_RSS_TYPE_IPV6 (5)
71
+#define E1000_MRQC_EN_TCPIPV4(mrqc) ((mrqc) & BIT(16))
72
+#define E1000_MRQC_EN_IPV4(mrqc) ((mrqc) & BIT(17))
73
+#define E1000_MRQC_EN_TCPIPV6EX(mrqc) ((mrqc) & BIT(18))
74
+#define E1000_MRQC_EN_IPV6EX(mrqc) ((mrqc) & BIT(19))
75
+#define E1000_MRQC_EN_IPV6(mrqc) ((mrqc) & BIT(20))
76
+
77
+#define E1000_MRQ_RSS_TYPE_NONE (0)
78
+#define E1000_MRQ_RSS_TYPE_IPV4TCP (1)
79
+#define E1000_MRQ_RSS_TYPE_IPV4 (2)
80
+#define E1000_MRQ_RSS_TYPE_IPV6TCPEX (3)
81
+#define E1000_MRQ_RSS_TYPE_IPV6EX (4)
82
+#define E1000_MRQ_RSS_TYPE_IPV6 (5)
83
84
#define E1000_ICR_ASSERTED BIT(31)
85
#define E1000_EIAC_MASK 0x01F00000
86
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
87
index XXXXXXX..XXXXXXX 100644
88
--- a/hw/net/igb_core.c
89
+++ b/hw/net/igb_core.c
90
@@ -XXX,XX +XXX,XX @@ igb_rss_get_hash_type(IGBCore *core, struct NetRxPkt *pkt)
91
ip6info->rss_ex_dst_valid,
92
ip6info->rss_ex_src_valid,
93
core->mac[MRQC],
94
- E1000_MRQC_EN_TCPIPV6(core->mac[MRQC]),
95
+ E1000_MRQC_EN_TCPIPV6EX(core->mac[MRQC]),
96
E1000_MRQC_EN_IPV6EX(core->mac[MRQC]),
97
E1000_MRQC_EN_IPV6(core->mac[MRQC]));
98
99
@@ -XXX,XX +XXX,XX @@ igb_rss_get_hash_type(IGBCore *core, struct NetRxPkt *pkt)
100
ip6info->rss_ex_src_valid))) {
101
102
if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP &&
103
- E1000_MRQC_EN_TCPIPV6(core->mac[MRQC])) {
104
- return E1000_MRQ_RSS_TYPE_IPV6TCP;
105
+ E1000_MRQC_EN_TCPIPV6EX(core->mac[MRQC])) {
106
+ return E1000_MRQ_RSS_TYPE_IPV6TCPEX;
107
}
108
109
if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) {
110
@@ -XXX,XX +XXX,XX @@ igb_rss_calc_hash(IGBCore *core, struct NetRxPkt *pkt, E1000E_RSSInfo *info)
111
case E1000_MRQ_RSS_TYPE_IPV4TCP:
112
type = NetPktRssIpV4Tcp;
113
break;
114
- case E1000_MRQ_RSS_TYPE_IPV6TCP:
115
+ case E1000_MRQ_RSS_TYPE_IPV6TCPEX:
116
type = NetPktRssIpV6TcpEx;
117
break;
118
case E1000_MRQ_RSS_TYPE_IPV6:
119
diff --git a/hw/net/trace-events b/hw/net/trace-events
120
index XXXXXXX..XXXXXXX 100644
121
--- a/hw/net/trace-events
122
+++ b/hw/net/trace-events
123
@@ -XXX,XX +XXX,XX @@ e1000e_rx_rss_disabled(void) "RSS is disabled"
124
e1000e_rx_rss_type(uint32_t type) "RSS type is %u"
125
e1000e_rx_rss_ip4(int l4hdr_proto, uint32_t mrqc, bool tcpipv4_enabled, bool ipv4_enabled) "RSS IPv4: L4 header protocol %d, mrqc 0x%X, tcpipv4 enabled %d, ipv4 enabled %d"
126
e1000e_rx_rss_ip6_rfctl(uint32_t rfctl) "RSS IPv6: rfctl 0x%X"
127
-e1000e_rx_rss_ip6(bool ex_dis, bool new_ex_dis, int l4hdr_proto, bool has_ext_headers, bool ex_dst_valid, bool ex_src_valid, uint32_t mrqc, bool tcpipv6_enabled, bool ipv6ex_enabled, bool ipv6_enabled) "RSS IPv6: ex_dis: %d, new_ex_dis: %d, L4 header protocol %d, has_ext_headers %d, ex_dst_valid %d, ex_src_valid %d, mrqc 0x%X, tcpipv6 enabled %d, ipv6ex enabled %d, ipv6 enabled %d"
128
+e1000e_rx_rss_ip6(bool ex_dis, bool new_ex_dis, int l4hdr_proto, bool has_ext_headers, bool ex_dst_valid, bool ex_src_valid, uint32_t mrqc, bool tcpipv6ex_enabled, bool ipv6ex_enabled, bool ipv6_enabled) "RSS IPv6: ex_dis: %d, new_ex_dis: %d, L4 header protocol %d, has_ext_headers %d, ex_dst_valid %d, ex_src_valid %d, mrqc 0x%X, tcpipv6ex enabled %d, ipv6ex enabled %d, ipv6 enabled %d"
129
130
e1000e_rx_metadata_protocols(bool hasip4, bool hasip6, int l4hdr_protocol) "protocols: ip4: %d, ip6: %d, l4hdr: %d"
131
e1000e_rx_metadata_vlan(uint16_t vlan_tag) "VLAN tag is 0x%X"
132
--
133
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Without this change, the status flags may not be traced e.g. if checksum
4
offloading is disabled.
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/net/e1000e_core.c | 3 +--
11
1 file changed, 1 insertion(+), 2 deletions(-)
12
13
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/net/e1000e_core.c
16
+++ b/hw/net/e1000e_core.c
17
@@ -XXX,XX +XXX,XX @@ e1000e_build_rx_metadata(E1000ECore *core,
18
trace_e1000e_rx_metadata_l4_cso_disabled();
19
}
20
21
- trace_e1000e_rx_metadata_status_flags(*status_flags);
22
-
23
func_exit:
24
+ trace_e1000e_rx_metadata_status_flags(*status_flags);
25
*status_flags = cpu_to_le32(*status_flags);
26
}
27
28
--
29
2.7.4
30
31
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
It does not make much sense to limit these commands to the legacy 'vlan'
3
Without this change, the status flags may not be traced e.g. if checksum
4
concept only, they should work with the modern netdevs, too. So now
4
offloading is disabled.
5
it is possible to use this command with one, two or three parameters.
6
5
7
With one parameter, the command installs a hostfwd rule on the default
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
8
"user" network:
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
hostfwd_add tcp:...
10
11
With two parameters, the command installs a hostfwd rule on a netdev
12
(that's the new way of using this command):
13
hostfwd_add netdev_id tcp:...
14
15
With three parameters, the command installs a rule on a 'vlan' (aka hub):
16
hostfwd_add hub_id name tcp:...
17
18
Same applies to the hostfwd_remove command now.
19
20
Signed-off-by: Thomas Huth <thuth@redhat.com>
21
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
22
---
9
---
23
hmp-commands.hx | 4 ++--
10
hw/net/igb_core.c | 3 +--
24
net/slirp.c | 33 +++++++++++++++++++++++----------
11
1 file changed, 1 insertion(+), 2 deletions(-)
25
2 files changed, 25 insertions(+), 12 deletions(-)
26
12
27
diff --git a/hmp-commands.hx b/hmp-commands.hx
13
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
28
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
29
--- a/hmp-commands.hx
15
--- a/hw/net/igb_core.c
30
+++ b/hmp-commands.hx
16
+++ b/hw/net/igb_core.c
31
@@ -XXX,XX +XXX,XX @@ ETEXI
17
@@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core,
32
{
18
trace_e1000e_rx_metadata_l4_cso_disabled();
33
.name = "hostfwd_add",
19
}
34
.args_type = "arg1:s,arg2:s?,arg3:s?",
20
35
- .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
21
- trace_e1000e_rx_metadata_status_flags(*status_flags);
36
+ .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport",
22
-
37
.help = "redirect TCP or UDP connections from host to guest (requires -net user)",
23
func_exit:
38
.cmd = hmp_hostfwd_add,
24
+ trace_e1000e_rx_metadata_status_flags(*status_flags);
39
},
25
*status_flags = cpu_to_le32(*status_flags);
40
@@ -XXX,XX +XXX,XX @@ ETEXI
41
{
42
.name = "hostfwd_remove",
43
.args_type = "arg1:s,arg2:s?,arg3:s?",
44
- .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport",
45
+ .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport",
46
.help = "remove host-to-guest TCP or UDP redirection",
47
.cmd = hmp_hostfwd_remove,
48
},
49
diff --git a/net/slirp.c b/net/slirp.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/net/slirp.c
52
+++ b/net/slirp.c
53
@@ -XXX,XX +XXX,XX @@ error:
54
return -1;
55
}
26
}
56
27
57
-static SlirpState *slirp_lookup(Monitor *mon, const char *vlan,
58
- const char *stack)
59
+static SlirpState *slirp_lookup(Monitor *mon, const char *hub_id,
60
+ const char *name)
61
{
62
-
63
- if (vlan) {
64
+ if (name) {
65
NetClientState *nc;
66
- nc = net_hub_find_client_by_name(strtol(vlan, NULL, 0), stack);
67
- if (!nc) {
68
- monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n");
69
- return NULL;
70
+ if (hub_id) {
71
+ nc = net_hub_find_client_by_name(strtol(hub_id, NULL, 0), name);
72
+ if (!nc) {
73
+ monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n");
74
+ return NULL;
75
+ }
76
+ } else {
77
+ nc = qemu_find_netdev(name);
78
+ if (!nc) {
79
+ monitor_printf(mon, "unrecognized netdev id '%s'\n", name);
80
+ return NULL;
81
+ }
82
}
83
if (strcmp(nc->model, "user")) {
84
monitor_printf(mon, "invalid device specified\n");
85
@@ -XXX,XX +XXX,XX @@ void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict)
86
const char *arg2 = qdict_get_try_str(qdict, "arg2");
87
const char *arg3 = qdict_get_try_str(qdict, "arg3");
88
89
- if (arg2) {
90
+ if (arg3) {
91
s = slirp_lookup(mon, arg1, arg2);
92
src_str = arg3;
93
+ } else if (arg2) {
94
+ s = slirp_lookup(mon, NULL, arg1);
95
+ src_str = arg2;
96
} else {
97
s = slirp_lookup(mon, NULL, NULL);
98
src_str = arg1;
99
@@ -XXX,XX +XXX,XX @@ void hmp_hostfwd_add(Monitor *mon, const QDict *qdict)
100
const char *arg2 = qdict_get_try_str(qdict, "arg2");
101
const char *arg3 = qdict_get_try_str(qdict, "arg3");
102
103
- if (arg2) {
104
+ if (arg3) {
105
s = slirp_lookup(mon, arg1, arg2);
106
redir_str = arg3;
107
+ } else if (arg2) {
108
+ s = slirp_lookup(mon, NULL, arg1);
109
+ redir_str = arg2;
110
} else {
111
s = slirp_lookup(mon, NULL, NULL);
112
redir_str = arg1;
113
--
28
--
114
2.7.4
29
2.7.4
115
30
116
31
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
The goto is a bit confusing as it changes the control flow only if L4
4
protocol is not recognized. It is also different from e1000e, and
5
noisy when comparing e1000e and igb.
6
7
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
8
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/net/igb_core.c | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
13
14
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/igb_core.c
17
+++ b/hw/net/igb_core.c
18
@@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core,
19
break;
20
21
default:
22
- goto func_exit;
23
+ break;
24
}
25
} else {
26
trace_e1000e_rx_metadata_l4_cso_disabled();
27
--
28
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Section 7.2.2.3 Advanced Transmit Data Descriptor says:
4
> For frames that spans multiple descriptors, all fields apart from
5
> DCMD.EOP, DCMD.RS, DCMD.DEXT, DTALEN, Address and DTYP are valid only
6
> in the first descriptors and are ignored in the subsequent ones.
7
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
9
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
hw/net/igb_core.c | 2 +-
13
1 file changed, 1 insertion(+), 1 deletion(-)
14
15
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/igb_core.c
18
+++ b/hw/net/igb_core.c
19
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
20
idx = (tx->first_olinfo_status >> 4) & 1;
21
igb_tx_insert_vlan(core, queue_index, tx,
22
tx->ctx[idx].vlan_macip_lens >> 16,
23
- !!(cmd_type_len & E1000_TXD_CMD_VLE));
24
+ !!(tx->first_cmd_type_len & E1000_TXD_CMD_VLE));
25
26
if (igb_tx_pkt_send(core, tx, queue_index)) {
27
igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
28
--
29
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Keeping Tx packet state after the transmit queue is emptied has some
4
problems:
5
- The datasheet says the descriptors can be reused after the transmit
6
queue is emptied, but the Tx packet state may keep references to them.
7
- The Tx packet state cannot be migrated so it can be reset anytime the
8
migration happens.
9
10
Always reset Tx packet state always after the queue is emptied.
11
12
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
15
hw/net/e1000e_core.c | 6 ++----
16
1 file changed, 2 insertions(+), 4 deletions(-)
17
18
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
19
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/net/e1000e_core.c
21
+++ b/hw/net/e1000e_core.c
22
@@ -XXX,XX +XXX,XX @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr)
23
if (!ide || !e1000e_intrmgr_delay_tx_causes(core, &cause)) {
24
e1000e_set_interrupt_cause(core, cause);
25
}
26
+
27
+ net_tx_pkt_reset(txr->tx->tx_pkt, net_tx_pkt_unmap_frag_pci, core->owner);
28
}
29
30
static bool
31
@@ -XXX,XX +XXX,XX @@ e1000e_core_pci_uninit(E1000ECore *core)
32
qemu_del_vm_change_state_handler(core->vmstate);
33
34
for (i = 0; i < E1000E_NUM_QUEUES; i++) {
35
- net_tx_pkt_reset(core->tx[i].tx_pkt,
36
- net_tx_pkt_unmap_frag_pci, core->owner);
37
net_tx_pkt_uninit(core->tx[i].tx_pkt);
38
}
39
40
@@ -XXX,XX +XXX,XX @@ static void e1000e_reset(E1000ECore *core, bool sw)
41
e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
42
43
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
44
- net_tx_pkt_reset(core->tx[i].tx_pkt,
45
- net_tx_pkt_unmap_frag_pci, core->owner);
46
memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
47
core->tx[i].skip_cp = false;
48
}
49
--
50
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Keeping Tx packet state after the transmit queue is emptied but this
4
behavior is unreliable as the state can be reset anytime the migration
5
happens.
6
7
Always reset Tx packet state always after the queue is emptied.
8
9
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
hw/net/vmxnet3.c | 3 ++-
13
1 file changed, 2 insertions(+), 1 deletion(-)
14
15
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/vmxnet3.c
18
+++ b/hw/net/vmxnet3.c
19
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
20
net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
21
}
22
}
23
+
24
+ net_tx_pkt_reset(s->tx_pkt, net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
25
}
26
27
static inline void
28
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_deactivate_device(VMXNET3State *s)
29
{
30
if (s->device_active) {
31
VMW_CBPRN("Deactivating vmxnet3...");
32
- net_tx_pkt_reset(s->tx_pkt, net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s));
33
net_tx_pkt_uninit(s->tx_pkt);
34
net_rx_pkt_uninit(s->rx_pkt);
35
s->device_active = false;
36
--
37
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
---
7
hw/net/igb_core.c | 4 ++--
8
hw/net/igb_regs.h | 32 +++++++++++++++++++++++++++-----
9
2 files changed, 29 insertions(+), 7 deletions(-)
10
11
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/hw/net/igb_core.c
14
+++ b/hw/net/igb_core.c
15
@@ -XXX,XX +XXX,XX @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
16
{
17
if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
18
uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
19
- uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16;
20
+ uint32_t mss = tx->ctx[idx].mss_l4len_idx >> E1000_ADVTXD_MSS_SHIFT;
21
if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) {
22
return false;
23
}
24
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
25
if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
26
idx = (tx->first_olinfo_status >> 4) & 1;
27
igb_tx_insert_vlan(core, queue_index, tx,
28
- tx->ctx[idx].vlan_macip_lens >> 16,
29
+ tx->ctx[idx].vlan_macip_lens >> IGB_TX_FLAGS_VLAN_SHIFT,
30
!!(tx->first_cmd_type_len & E1000_TXD_CMD_VLE));
31
32
if (igb_tx_pkt_send(core, tx, queue_index)) {
33
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
34
index XXXXXXX..XXXXXXX 100644
35
--- a/hw/net/igb_regs.h
36
+++ b/hw/net/igb_regs.h
37
@@ -XXX,XX +XXX,XX @@ union e1000_adv_tx_desc {
38
} wb;
39
};
40
41
-#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */
42
-#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */
43
-#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor Extension (1=Adv) */
44
-#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP/UDP Segmentation Enable */
45
-
46
#define E1000_ADVTXD_POTS_IXSM 0x00000100 /* Insert TCP/UDP Checksum */
47
#define E1000_ADVTXD_POTS_TXSM 0x00000200 /* Insert TCP/UDP Checksum */
48
49
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
50
#define IGB_82576_VF_DEV_ID 0x10CA
51
#define IGB_I350_VF_DEV_ID 0x1520
52
53
+/* VLAN info */
54
+#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000
55
+#define IGB_TX_FLAGS_VLAN_SHIFT 16
56
+
57
/* from igb/e1000_82575.h */
58
59
#define E1000_MRQC_ENABLE_RSS_MQ 0x00000002
60
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
61
#define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000
62
#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000
63
64
+/* Adv Transmit Descriptor Config Masks */
65
+#define E1000_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp packet */
66
+#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */
67
+#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */
68
+#define E1000_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */
69
+#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */
70
+#define E1000_ADVTXD_DCMD_RS 0x08000000 /* Report Status */
71
+#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */
72
+#define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */
73
+#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */
74
+#define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */
75
+
76
+#define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */
77
+#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */
78
+#define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */
79
+#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */
80
+#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */
81
+/* IPSec Encrypt Enable for ESP */
82
+#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */
83
+#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */
84
+/* Adv ctxt IPSec SA IDX mask */
85
+/* Adv ctxt IPSec ESP len mask */
86
+
87
/* Additional Transmit Descriptor Control definitions */
88
#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Tx Queue */
89
90
--
91
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
The constants need to be consistent between the PF and VF.
4
5
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
6
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
7
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/net/igb.c | 10 +++++-----
11
hw/net/igb_common.h | 8 ++++++++
12
hw/net/igbvf.c | 7 -------
13
3 files changed, 13 insertions(+), 12 deletions(-)
14
15
diff --git a/hw/net/igb.c b/hw/net/igb.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/igb.c
18
+++ b/hw/net/igb.c
19
@@ -XXX,XX +XXX,XX @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp)
20
21
pcie_ari_init(pci_dev, 0x150, 1);
22
23
- pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, "igbvf",
24
+ pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF,
25
IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS,
26
IGB_VF_OFFSET, IGB_VF_STRIDE);
27
28
- pcie_sriov_pf_init_vf_bar(pci_dev, 0,
29
+ pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MMIO_BAR_IDX,
30
PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH,
31
- 16 * KiB);
32
- pcie_sriov_pf_init_vf_bar(pci_dev, 3,
33
+ IGBVF_MMIO_SIZE);
34
+ pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MSIX_BAR_IDX,
35
PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH,
36
- 16 * KiB);
37
+ IGBVF_MSIX_SIZE);
38
39
igb_init_net_peer(s, pci_dev, macaddr);
40
41
diff --git a/hw/net/igb_common.h b/hw/net/igb_common.h
42
index XXXXXXX..XXXXXXX 100644
43
--- a/hw/net/igb_common.h
44
+++ b/hw/net/igb_common.h
45
@@ -XXX,XX +XXX,XX @@
46
47
#include "igb_regs.h"
48
49
+#define TYPE_IGBVF "igbvf"
50
+
51
+#define IGBVF_MMIO_BAR_IDX (0)
52
+#define IGBVF_MSIX_BAR_IDX (3)
53
+
54
+#define IGBVF_MMIO_SIZE (16 * 1024)
55
+#define IGBVF_MSIX_SIZE (16 * 1024)
56
+
57
#define defreg(x) x = (E1000_##x >> 2)
58
#define defreg_indexed(x, i) x##i = (E1000_##x(i) >> 2)
59
#define defreg_indexeda(x, i) x##i##_A = (E1000_##x##_A(i) >> 2)
60
diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c
61
index XXXXXXX..XXXXXXX 100644
62
--- a/hw/net/igbvf.c
63
+++ b/hw/net/igbvf.c
64
@@ -XXX,XX +XXX,XX @@
65
#include "trace.h"
66
#include "qapi/error.h"
67
68
-#define TYPE_IGBVF "igbvf"
69
OBJECT_DECLARE_SIMPLE_TYPE(IgbVfState, IGBVF)
70
71
-#define IGBVF_MMIO_BAR_IDX (0)
72
-#define IGBVF_MSIX_BAR_IDX (3)
73
-
74
-#define IGBVF_MMIO_SIZE (16 * 1024)
75
-#define IGBVF_MSIX_SIZE (16 * 1024)
76
-
77
struct IgbVfState {
78
PCIDevice parent_obj;
79
80
--
81
2.7.4
82
83
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
---
7
hw/net/igb_core.c | 96 +++++++++++++++++++++++++++----------------------------
8
1 file changed, 48 insertions(+), 48 deletions(-)
9
10
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/hw/net/igb_core.c
13
+++ b/hw/net/igb_core.c
14
@@ -XXX,XX +XXX,XX @@ static const uint32_t igb_mac_reg_init[] = {
15
[VMOLR0 ... VMOLR0 + 7] = 0x2600 | E1000_VMOLR_STRCRC,
16
[RPLOLR] = E1000_RPLOLR_STRCRC,
17
[RLPML] = 0x2600,
18
- [TXCTL0] = E1000_DCA_TXCTRL_DATA_RRO_EN |
19
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
20
- E1000_DCA_TXCTRL_DESC_RRO_EN,
21
- [TXCTL1] = E1000_DCA_TXCTRL_DATA_RRO_EN |
22
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
23
- E1000_DCA_TXCTRL_DESC_RRO_EN,
24
- [TXCTL2] = E1000_DCA_TXCTRL_DATA_RRO_EN |
25
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
26
- E1000_DCA_TXCTRL_DESC_RRO_EN,
27
- [TXCTL3] = E1000_DCA_TXCTRL_DATA_RRO_EN |
28
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
29
- E1000_DCA_TXCTRL_DESC_RRO_EN,
30
- [TXCTL4] = E1000_DCA_TXCTRL_DATA_RRO_EN |
31
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
32
- E1000_DCA_TXCTRL_DESC_RRO_EN,
33
- [TXCTL5] = E1000_DCA_TXCTRL_DATA_RRO_EN |
34
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
35
- E1000_DCA_TXCTRL_DESC_RRO_EN,
36
- [TXCTL6] = E1000_DCA_TXCTRL_DATA_RRO_EN |
37
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
38
- E1000_DCA_TXCTRL_DESC_RRO_EN,
39
- [TXCTL7] = E1000_DCA_TXCTRL_DATA_RRO_EN |
40
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
41
- E1000_DCA_TXCTRL_DESC_RRO_EN,
42
- [TXCTL8] = E1000_DCA_TXCTRL_DATA_RRO_EN |
43
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
44
- E1000_DCA_TXCTRL_DESC_RRO_EN,
45
- [TXCTL9] = E1000_DCA_TXCTRL_DATA_RRO_EN |
46
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
47
- E1000_DCA_TXCTRL_DESC_RRO_EN,
48
- [TXCTL10] = E1000_DCA_TXCTRL_DATA_RRO_EN |
49
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
50
- E1000_DCA_TXCTRL_DESC_RRO_EN,
51
- [TXCTL11] = E1000_DCA_TXCTRL_DATA_RRO_EN |
52
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
53
- E1000_DCA_TXCTRL_DESC_RRO_EN,
54
- [TXCTL12] = E1000_DCA_TXCTRL_DATA_RRO_EN |
55
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
56
- E1000_DCA_TXCTRL_DESC_RRO_EN,
57
- [TXCTL13] = E1000_DCA_TXCTRL_DATA_RRO_EN |
58
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
59
- E1000_DCA_TXCTRL_DESC_RRO_EN,
60
- [TXCTL14] = E1000_DCA_TXCTRL_DATA_RRO_EN |
61
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
62
- E1000_DCA_TXCTRL_DESC_RRO_EN,
63
- [TXCTL15] = E1000_DCA_TXCTRL_DATA_RRO_EN |
64
- E1000_DCA_TXCTRL_TX_WB_RO_EN |
65
- E1000_DCA_TXCTRL_DESC_RRO_EN,
66
+ [TXCTL0] = E1000_DCA_TXCTRL_DATA_RRO_EN |
67
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
68
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
69
+ [TXCTL1] = E1000_DCA_TXCTRL_DATA_RRO_EN |
70
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
71
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
72
+ [TXCTL2] = E1000_DCA_TXCTRL_DATA_RRO_EN |
73
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
74
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
75
+ [TXCTL3] = E1000_DCA_TXCTRL_DATA_RRO_EN |
76
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
77
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
78
+ [TXCTL4] = E1000_DCA_TXCTRL_DATA_RRO_EN |
79
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
80
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
81
+ [TXCTL5] = E1000_DCA_TXCTRL_DATA_RRO_EN |
82
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
83
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
84
+ [TXCTL6] = E1000_DCA_TXCTRL_DATA_RRO_EN |
85
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
86
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
87
+ [TXCTL7] = E1000_DCA_TXCTRL_DATA_RRO_EN |
88
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
89
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
90
+ [TXCTL8] = E1000_DCA_TXCTRL_DATA_RRO_EN |
91
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
92
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
93
+ [TXCTL9] = E1000_DCA_TXCTRL_DATA_RRO_EN |
94
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
95
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
96
+ [TXCTL10] = E1000_DCA_TXCTRL_DATA_RRO_EN |
97
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
98
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
99
+ [TXCTL11] = E1000_DCA_TXCTRL_DATA_RRO_EN |
100
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
101
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
102
+ [TXCTL12] = E1000_DCA_TXCTRL_DATA_RRO_EN |
103
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
104
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
105
+ [TXCTL13] = E1000_DCA_TXCTRL_DATA_RRO_EN |
106
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
107
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
108
+ [TXCTL14] = E1000_DCA_TXCTRL_DATA_RRO_EN |
109
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
110
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
111
+ [TXCTL15] = E1000_DCA_TXCTRL_DATA_RRO_EN |
112
+ E1000_DCA_TXCTRL_TX_WB_RO_EN |
113
+ E1000_DCA_TXCTRL_DESC_RRO_EN,
114
};
115
116
static void igb_reset(IGBCore *core, bool sw)
117
--
118
2.7.4
119
120
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Section 7.3.4.1 says:
4
> When auto-clear is enabled for an interrupt cause, the EICR bit is
5
> set when a cause event mapped to this vector occurs. When the EITR
6
> Counter reaches zero, the MSI-X message is sent on PCIe. Then the
7
> EICR bit is cleared and enabled to be set by a new cause event
8
9
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
12
hw/net/igb_core.c | 21 ++++++++++++---------
13
1 file changed, 12 insertions(+), 9 deletions(-)
14
15
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/igb_core.c
18
+++ b/hw/net/igb_core.c
19
@@ -XXX,XX +XXX,XX @@ igb_lower_legacy_irq(IGBCore *core)
20
pci_set_irq(core->owner, 0);
21
}
22
23
-static void igb_msix_notify(IGBCore *core, unsigned int vector)
24
+static void igb_msix_notify(IGBCore *core, unsigned int cause)
25
{
26
PCIDevice *dev = core->owner;
27
uint16_t vfn;
28
+ uint32_t effective_eiac;
29
+ unsigned int vector;
30
31
- vfn = 8 - (vector + 2) / IGBVF_MSIX_VEC_NUM;
32
+ vfn = 8 - (cause + 2) / IGBVF_MSIX_VEC_NUM;
33
if (vfn < pcie_sriov_num_vfs(core->owner)) {
34
dev = pcie_sriov_get_vf_at_index(core->owner, vfn);
35
assert(dev);
36
- vector = (vector + 2) % IGBVF_MSIX_VEC_NUM;
37
- } else if (vector >= IGB_MSIX_VEC_NUM) {
38
+ vector = (cause + 2) % IGBVF_MSIX_VEC_NUM;
39
+ } else if (cause >= IGB_MSIX_VEC_NUM) {
40
qemu_log_mask(LOG_GUEST_ERROR,
41
"igb: Tried to use vector unavailable for PF");
42
return;
43
+ } else {
44
+ vector = cause;
45
}
46
47
msix_notify(dev, vector);
48
+
49
+ trace_e1000e_irq_icr_clear_eiac(core->mac[EICR], core->mac[EIAC]);
50
+ effective_eiac = core->mac[EIAC] & BIT(cause);
51
+ core->mac[EICR] &= ~effective_eiac;
52
}
53
54
static inline void
55
@@ -XXX,XX +XXX,XX @@ igb_eitr_should_postpone(IGBCore *core, int idx)
56
static void igb_send_msix(IGBCore *core)
57
{
58
uint32_t causes = core->mac[EICR] & core->mac[EIMS];
59
- uint32_t effective_eiac;
60
int vector;
61
62
for (vector = 0; vector < IGB_INTR_NUM; ++vector) {
63
@@ -XXX,XX +XXX,XX @@ static void igb_send_msix(IGBCore *core)
64
65
trace_e1000e_irq_msix_notify_vec(vector);
66
igb_msix_notify(core, vector);
67
-
68
- trace_e1000e_irq_icr_clear_eiac(core->mac[EICR], core->mac[EIAC]);
69
- effective_eiac = core->mac[EIAC] & BIT(vector);
70
- core->mac[EICR] &= ~effective_eiac;
71
}
72
}
73
}
74
--
75
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Rename variable "n" to "causes", which properly represents the content
4
of the variable.
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
hw/net/e1000e_core.c | 18 +++++++++---------
10
1 file changed, 9 insertions(+), 9 deletions(-)
11
12
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
13
index XXXXXXX..XXXXXXX 100644
14
--- a/hw/net/e1000e_core.c
15
+++ b/hw/net/e1000e_core.c
16
@@ -XXX,XX +XXX,XX @@ static ssize_t
17
e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
18
bool has_vnet)
19
{
20
- uint32_t n = 0;
21
+ uint32_t causes = 0;
22
uint8_t buf[ETH_ZLEN];
23
struct iovec min_iov;
24
size_t size, orig_size;
25
@@ -XXX,XX +XXX,XX @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
26
27
/* Perform small receive detection (RSRPD) */
28
if (total_size < core->mac[RSRPD]) {
29
- n |= E1000_ICS_SRPD;
30
+ causes |= E1000_ICS_SRPD;
31
}
32
33
/* Perform ACK receive detection */
34
if (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS) &&
35
(e1000e_is_tcp_ack(core, core->rx_pkt))) {
36
- n |= E1000_ICS_ACK;
37
+ causes |= E1000_ICS_ACK;
38
}
39
40
/* Check if receive descriptor minimum threshold hit */
41
rdmts_hit = e1000e_rx_descr_threshold_hit(core, rxr.i);
42
- n |= e1000e_rx_wb_interrupt_cause(core, rxr.i->idx, rdmts_hit);
43
+ causes |= e1000e_rx_wb_interrupt_cause(core, rxr.i->idx, rdmts_hit);
44
45
trace_e1000e_rx_written_to_guest(rxr.i->idx);
46
} else {
47
- n |= E1000_ICS_RXO;
48
+ causes |= E1000_ICS_RXO;
49
retval = 0;
50
51
trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
52
}
53
54
- if (!e1000e_intrmgr_delay_rx_causes(core, &n)) {
55
- trace_e1000e_rx_interrupt_set(n);
56
- e1000e_set_interrupt_cause(core, n);
57
+ if (!e1000e_intrmgr_delay_rx_causes(core, &causes)) {
58
+ trace_e1000e_rx_interrupt_set(causes);
59
+ e1000e_set_interrupt_cause(core, causes);
60
} else {
61
- trace_e1000e_rx_interrupt_delayed(n);
62
+ trace_e1000e_rx_interrupt_delayed(causes);
63
}
64
65
return retval;
66
--
67
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Rename variable "n" to "causes", which properly represents the content
4
of the variable.
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/net/igb_core.c | 12 ++++++------
11
1 file changed, 6 insertions(+), 6 deletions(-)
12
13
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/net/igb_core.c
16
+++ b/hw/net/igb_core.c
17
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
18
bool has_vnet, bool *external_tx)
19
{
20
uint16_t queues = 0;
21
- uint32_t n = 0;
22
+ uint32_t causes = 0;
23
union {
24
L2Header l2_header;
25
uint8_t octets[ETH_ZLEN];
26
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
27
e1000x_fcs_len(core->mac);
28
29
if (!igb_has_rxbufs(core, rxr.i, total_size)) {
30
- n |= E1000_ICS_RXO;
31
+ causes |= E1000_ICS_RXO;
32
trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
33
continue;
34
}
35
36
- n |= E1000_ICR_RXDW;
37
+ causes |= E1000_ICR_RXDW;
38
39
igb_rx_fix_l4_csum(core, core->rx_pkt);
40
igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info);
41
42
/* Check if receive descriptor minimum threshold hit */
43
if (igb_rx_descr_threshold_hit(core, rxr.i)) {
44
- n |= E1000_ICS_RXDMT0;
45
+ causes |= E1000_ICS_RXDMT0;
46
}
47
48
core->mac[EICR] |= igb_rx_wb_eic(core, rxr.i->idx);
49
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
50
trace_e1000e_rx_written_to_guest(rxr.i->idx);
51
}
52
53
- trace_e1000e_rx_interrupt_set(n);
54
- igb_set_interrupt_cause(core, n);
55
+ trace_e1000e_rx_interrupt_set(causes);
56
+ igb_set_interrupt_cause(core, causes);
57
58
return orig_size;
59
}
60
--
61
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
The uses of uint8_t pointers were misleading as they are never accessed
4
as an array of octets and it even require more strict alignment to
5
access as struct eth_header.
6
7
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
include/net/eth.h | 4 ++--
12
net/eth.c | 6 +++---
13
2 files changed, 5 insertions(+), 5 deletions(-)
14
15
diff --git a/include/net/eth.h b/include/net/eth.h
16
index XXXXXXX..XXXXXXX 100644
17
--- a/include/net/eth.h
18
+++ b/include/net/eth.h
19
@@ -XXX,XX +XXX,XX @@ eth_get_pkt_tci(const void *p)
20
21
size_t
22
eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
23
- uint8_t *new_ehdr_buf,
24
+ void *new_ehdr_buf,
25
uint16_t *payload_offset, uint16_t *tci);
26
27
size_t
28
eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
29
- uint16_t vet, uint8_t *new_ehdr_buf,
30
+ uint16_t vet, void *new_ehdr_buf,
31
uint16_t *payload_offset, uint16_t *tci);
32
33
uint16_t
34
diff --git a/net/eth.c b/net/eth.c
35
index XXXXXXX..XXXXXXX 100644
36
--- a/net/eth.c
37
+++ b/net/eth.c
38
@@ -XXX,XX +XXX,XX @@ void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff,
39
40
size_t
41
eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
42
- uint8_t *new_ehdr_buf,
43
+ void *new_ehdr_buf,
44
uint16_t *payload_offset, uint16_t *tci)
45
{
46
struct vlan_header vlan_hdr;
47
- struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
48
+ struct eth_header *new_ehdr = new_ehdr_buf;
49
50
size_t copied = iov_to_buf(iov, iovcnt, iovoff,
51
new_ehdr, sizeof(*new_ehdr));
52
@@ -XXX,XX +XXX,XX @@ eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
53
54
size_t
55
eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
56
- uint16_t vet, uint8_t *new_ehdr_buf,
57
+ uint16_t vet, void *new_ehdr_buf,
58
uint16_t *payload_offset, uint16_t *tci)
59
{
60
struct vlan_header vlan_hdr;
61
--
62
2.7.4
63
64
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
It is possible to have another VLAN tag even if the packet is already
4
tagged.
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
hw/net/net_tx_pkt.c | 16 +++++++---------
10
include/net/eth.h | 4 ++--
11
net/eth.c | 22 ++++++----------------
12
3 files changed, 15 insertions(+), 27 deletions(-)
13
14
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/net_tx_pkt.c
17
+++ b/hw/net/net_tx_pkt.c
18
@@ -XXX,XX +XXX,XX @@ struct NetTxPkt {
19
20
struct iovec *vec;
21
22
- uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN];
23
+ struct {
24
+ struct eth_header eth;
25
+ struct vlan_header vlan[3];
26
+ } l2_hdr;
27
union {
28
struct ip_header ip;
29
struct ip6_header ip6;
30
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable,
31
void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt,
32
uint16_t vlan, uint16_t vlan_ethtype)
33
{
34
- bool is_new;
35
assert(pkt);
36
37
eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base,
38
- vlan, vlan_ethtype, &is_new);
39
+ &pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len,
40
+ vlan, vlan_ethtype);
41
42
- /* update l2hdrlen */
43
- if (is_new) {
44
- pkt->hdr_len += sizeof(struct vlan_header);
45
- pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len +=
46
- sizeof(struct vlan_header);
47
- }
48
+ pkt->hdr_len += sizeof(struct vlan_header);
49
}
50
51
bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t len)
52
diff --git a/include/net/eth.h b/include/net/eth.h
53
index XXXXXXX..XXXXXXX 100644
54
--- a/include/net/eth.h
55
+++ b/include/net/eth.h
56
@@ -XXX,XX +XXX,XX @@ eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
57
uint16_t
58
eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len);
59
60
-void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
61
- uint16_t vlan_ethtype, bool *is_new);
62
+void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size,
63
+ uint16_t vlan_tag, uint16_t vlan_ethtype);
64
65
66
uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto);
67
diff --git a/net/eth.c b/net/eth.c
68
index XXXXXXX..XXXXXXX 100644
69
--- a/net/eth.c
70
+++ b/net/eth.c
71
@@ -XXX,XX +XXX,XX @@
72
#include "net/checksum.h"
73
#include "net/tap.h"
74
75
-void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag,
76
- uint16_t vlan_ethtype, bool *is_new)
77
+void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size,
78
+ uint16_t vlan_tag, uint16_t vlan_ethtype)
79
{
80
struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr);
81
82
- switch (be16_to_cpu(ehdr->h_proto)) {
83
- case ETH_P_VLAN:
84
- case ETH_P_DVLAN:
85
- /* vlan hdr exists */
86
- *is_new = false;
87
- break;
88
-
89
- default:
90
- /* No VLAN header, put a new one */
91
- vhdr->h_proto = ehdr->h_proto;
92
- ehdr->h_proto = cpu_to_be16(vlan_ethtype);
93
- *is_new = true;
94
- break;
95
- }
96
+ memmove(vhdr + 1, vhdr, *ehdr_size - ETH_HLEN);
97
vhdr->h_tci = cpu_to_be16(vlan_tag);
98
+ vhdr->h_proto = ehdr->h_proto;
99
+ ehdr->h_proto = cpu_to_be16(vlan_ethtype);
100
+ *ehdr_size += sizeof(*vhdr);
101
}
102
103
uint8_t
104
--
105
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
eth_strip_vlan and eth_strip_vlan_ex refers to ehdr_buf as struct
4
eth_header. Enforce alignment for the structure.
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/net/net_rx_pkt.c | 11 +++++++----
11
1 file changed, 7 insertions(+), 4 deletions(-)
12
13
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/net/net_rx_pkt.c
16
+++ b/hw/net/net_rx_pkt.c
17
@@ -XXX,XX +XXX,XX @@
18
19
struct NetRxPkt {
20
struct virtio_net_hdr virt_hdr;
21
- uint8_t ehdr_buf[sizeof(struct eth_header) + sizeof(struct vlan_header)];
22
+ struct {
23
+ struct eth_header eth;
24
+ struct vlan_header vlan;
25
+ } ehdr_buf;
26
struct iovec *vec;
27
uint16_t vec_len_total;
28
uint16_t vec_len;
29
@@ -XXX,XX +XXX,XX @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt,
30
if (pkt->ehdr_buf_len) {
31
net_rx_pkt_iovec_realloc(pkt, iovcnt + 1);
32
33
- pkt->vec[0].iov_base = pkt->ehdr_buf;
34
+ pkt->vec[0].iov_base = &pkt->ehdr_buf;
35
pkt->vec[0].iov_len = pkt->ehdr_buf_len;
36
37
pkt->tot_len = pllen + pkt->ehdr_buf_len;
38
@@ -XXX,XX +XXX,XX @@ void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
39
assert(pkt);
40
41
if (strip_vlan) {
42
- pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf,
43
+ pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, &pkt->ehdr_buf,
44
&ploff, &tci);
45
} else {
46
pkt->ehdr_buf_len = 0;
47
@@ -XXX,XX +XXX,XX @@ void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
48
49
if (strip_vlan) {
50
pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet,
51
- pkt->ehdr_buf,
52
+ &pkt->ehdr_buf,
53
&ploff, &tci);
54
} else {
55
pkt->ehdr_buf_len = 0;
56
--
57
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
GPIE.Multiple_MSIX is not set by default, and needs to be set to get
4
interrupts from multiple MSI-X vectors.
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
tests/qtest/libqos/igb.c | 1 +
11
1 file changed, 1 insertion(+)
12
13
diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/tests/qtest/libqos/igb.c
16
+++ b/tests/qtest/libqos/igb.c
17
@@ -XXX,XX +XXX,XX @@ static void igb_pci_start_hw(QOSGraphObject *obj)
18
e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN);
19
20
/* Enable all interrupts */
21
+ e1000e_macreg_write(&d->e1000e, E1000_GPIE, E1000_GPIE_MSIX_MODE);
22
e1000e_macreg_write(&d->e1000e, E1000_IMS, 0xFFFFFFFF);
23
e1000e_macreg_write(&d->e1000e, E1000_EIMS, 0xFFFFFFFF);
24
25
--
26
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
---
7
hw/net/igb_core.c | 9 +++++++--
8
1 file changed, 7 insertions(+), 2 deletions(-)
9
10
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
11
index XXXXXXX..XXXXXXX 100644
12
--- a/hw/net/igb_core.c
13
+++ b/hw/net/igb_core.c
14
@@ -XXX,XX +XXX,XX @@ igb_update_interrupt_state(IGBCore *core)
15
16
icr = core->mac[ICR] & core->mac[IMS];
17
18
- if (msix_enabled(core->owner)) {
19
+ if (core->mac[GPIE] & E1000_GPIE_MSIX_MODE) {
20
if (icr) {
21
causes = 0;
22
if (icr & E1000_ICR_DRSTA) {
23
@@ -XXX,XX +XXX,XX @@ igb_update_interrupt_state(IGBCore *core)
24
trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS],
25
core->mac[ICR], core->mac[IMS]);
26
27
- if (msi_enabled(core->owner)) {
28
+ if (msix_enabled(core->owner)) {
29
+ if (icr) {
30
+ trace_e1000e_irq_msix_notify_vec(0);
31
+ msix_notify(core->owner, 0);
32
+ }
33
+ } else if (msi_enabled(core->owner)) {
34
if (icr) {
35
msi_notify(core->owner, 0);
36
}
37
--
38
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
e1000e does not support using UDP for RSS hash, but igb does.
4
5
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
6
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
hw/net/igb_core.c | 16 ++++++++++++++++
10
hw/net/igb_regs.h | 3 +++
11
2 files changed, 19 insertions(+)
12
13
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/net/igb_core.c
16
+++ b/hw/net/igb_core.c
17
@@ -XXX,XX +XXX,XX @@ igb_rss_get_hash_type(IGBCore *core, struct NetRxPkt *pkt)
18
return E1000_MRQ_RSS_TYPE_IPV4TCP;
19
}
20
21
+ if (l4hdr_proto == ETH_L4_HDR_PROTO_UDP &&
22
+ (core->mac[MRQC] & E1000_MRQC_RSS_FIELD_IPV4_UDP)) {
23
+ return E1000_MRQ_RSS_TYPE_IPV4UDP;
24
+ }
25
+
26
if (E1000_MRQC_EN_IPV4(core->mac[MRQC])) {
27
return E1000_MRQ_RSS_TYPE_IPV4;
28
}
29
@@ -XXX,XX +XXX,XX @@ igb_rss_get_hash_type(IGBCore *core, struct NetRxPkt *pkt)
30
return E1000_MRQ_RSS_TYPE_IPV6TCPEX;
31
}
32
33
+ if (l4hdr_proto == ETH_L4_HDR_PROTO_UDP &&
34
+ (core->mac[MRQC] & E1000_MRQC_RSS_FIELD_IPV6_UDP)) {
35
+ return E1000_MRQ_RSS_TYPE_IPV6UDP;
36
+ }
37
+
38
if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) {
39
return E1000_MRQ_RSS_TYPE_IPV6EX;
40
}
41
@@ -XXX,XX +XXX,XX @@ igb_rss_calc_hash(IGBCore *core, struct NetRxPkt *pkt, E1000E_RSSInfo *info)
42
case E1000_MRQ_RSS_TYPE_IPV6EX:
43
type = NetPktRssIpV6Ex;
44
break;
45
+ case E1000_MRQ_RSS_TYPE_IPV4UDP:
46
+ type = NetPktRssIpV4Udp;
47
+ break;
48
+ case E1000_MRQ_RSS_TYPE_IPV6UDP:
49
+ type = NetPktRssIpV6Udp;
50
+ break;
51
default:
52
assert(false);
53
return 0;
54
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
55
index XXXXXXX..XXXXXXX 100644
56
--- a/hw/net/igb_regs.h
57
+++ b/hw/net/igb_regs.h
58
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
59
60
#define E1000_RSS_QUEUE(reta, hash) (E1000_RETA_VAL(reta, hash) & 0x0F)
61
62
+#define E1000_MRQ_RSS_TYPE_IPV4UDP 7
63
+#define E1000_MRQ_RSS_TYPE_IPV6UDP 8
64
+
65
#define E1000_STATUS_IOV_MODE 0x00040000
66
67
#define E1000_STATUS_NUM_VFS_SHIFT 14
68
--
69
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
---
7
hw/net/e1000e_core.c | 5 ++++
8
hw/net/igb_core.c | 15 +++++++++++-
9
hw/net/igb_regs.h | 1 +
10
hw/net/net_rx_pkt.c | 64 ++++++++++++++++++++++++++++++++++++++++++---------
11
include/net/eth.h | 4 +++-
12
include/qemu/crc32c.h | 1 +
13
net/eth.c | 4 ++++
14
util/crc32c.c | 8 +++++++
15
8 files changed, 89 insertions(+), 13 deletions(-)
16
17
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/e1000e_core.c
20
+++ b/hw/net/e1000e_core.c
21
@@ -XXX,XX +XXX,XX @@ e1000e_verify_csum_in_sw(E1000ECore *core,
22
return;
23
}
24
25
+ if (l4hdr_proto != ETH_L4_HDR_PROTO_TCP &&
26
+ l4hdr_proto != ETH_L4_HDR_PROTO_UDP) {
27
+ return;
28
+ }
29
+
30
if (!net_rx_pkt_validate_l4_csum(pkt, &csum_valid)) {
31
trace_e1000e_rx_metadata_l4_csum_validation_failed();
32
return;
33
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
34
index XXXXXXX..XXXXXXX 100644
35
--- a/hw/net/igb_core.c
36
+++ b/hw/net/igb_core.c
37
@@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core,
38
uint16_t *vlan_tag)
39
{
40
struct virtio_net_hdr *vhdr;
41
- bool hasip4, hasip6;
42
+ bool hasip4, hasip6, csum_valid;
43
EthL4HdrProto l4hdr_proto;
44
45
*status_flags = E1000_RXD_STAT_DD;
46
@@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core,
47
*pkt_info |= E1000_ADVRXD_PKT_UDP;
48
break;
49
50
+ case ETH_L4_HDR_PROTO_SCTP:
51
+ *pkt_info |= E1000_ADVRXD_PKT_SCTP;
52
+ break;
53
+
54
default:
55
break;
56
}
57
@@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core,
58
59
if (igb_rx_l4_cso_enabled(core)) {
60
switch (l4hdr_proto) {
61
+ case ETH_L4_HDR_PROTO_SCTP:
62
+ if (!net_rx_pkt_validate_l4_csum(pkt, &csum_valid)) {
63
+ trace_e1000e_rx_metadata_l4_csum_validation_failed();
64
+ goto func_exit;
65
+ }
66
+ if (!csum_valid) {
67
+ *status_flags |= E1000_RXDEXT_STATERR_TCPE;
68
+ }
69
+ /* fall through */
70
case ETH_L4_HDR_PROTO_TCP:
71
*status_flags |= E1000_RXD_STAT_TCPCS;
72
break;
73
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
74
index XXXXXXX..XXXXXXX 100644
75
--- a/hw/net/igb_regs.h
76
+++ b/hw/net/igb_regs.h
77
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
78
#define E1000_ADVRXD_PKT_IP6 BIT(6)
79
#define E1000_ADVRXD_PKT_TCP BIT(8)
80
#define E1000_ADVRXD_PKT_UDP BIT(9)
81
+#define E1000_ADVRXD_PKT_SCTP BIT(10)
82
83
static inline uint8_t igb_ivar_entry_rx(uint8_t i)
84
{
85
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
86
index XXXXXXX..XXXXXXX 100644
87
--- a/hw/net/net_rx_pkt.c
88
+++ b/hw/net/net_rx_pkt.c
89
@@ -XXX,XX +XXX,XX @@
90
*/
91
92
#include "qemu/osdep.h"
93
+#include "qemu/crc32c.h"
94
#include "trace.h"
95
#include "net_rx_pkt.h"
96
#include "net/checksum.h"
97
@@ -XXX,XX +XXX,XX @@ _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt)
98
return csum;
99
}
100
101
-bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid)
102
+static bool
103
+_net_rx_pkt_validate_sctp_sum(struct NetRxPkt *pkt)
104
{
105
- uint16_t csum;
106
+ size_t csum_off;
107
+ size_t off = pkt->l4hdr_off;
108
+ size_t vec_len = pkt->vec_len;
109
+ struct iovec *vec;
110
+ uint32_t calculated = 0;
111
+ uint32_t original;
112
+ bool valid;
113
114
- trace_net_rx_pkt_l4_csum_validate_entry();
115
+ for (vec = pkt->vec; vec->iov_len < off; vec++) {
116
+ off -= vec->iov_len;
117
+ vec_len--;
118
+ }
119
120
- if (pkt->l4hdr_info.proto != ETH_L4_HDR_PROTO_TCP &&
121
- pkt->l4hdr_info.proto != ETH_L4_HDR_PROTO_UDP) {
122
- trace_net_rx_pkt_l4_csum_validate_not_xxp();
123
+ csum_off = off + 8;
124
+
125
+ if (!iov_to_buf(vec, vec_len, csum_off, &original, sizeof(original))) {
126
return false;
127
}
128
129
- if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP &&
130
- pkt->l4hdr_info.hdr.udp.uh_sum == 0) {
131
- trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
132
+ if (!iov_from_buf(vec, vec_len, csum_off,
133
+ &calculated, sizeof(calculated))) {
134
return false;
135
}
136
137
+ calculated = crc32c(0xffffffff,
138
+ (uint8_t *)vec->iov_base + off, vec->iov_len - off);
139
+ calculated = iov_crc32c(calculated ^ 0xffffffff, vec + 1, vec_len - 1);
140
+ valid = calculated == le32_to_cpu(original);
141
+ iov_from_buf(vec, vec_len, csum_off, &original, sizeof(original));
142
+
143
+ return valid;
144
+}
145
+
146
+bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid)
147
+{
148
+ uint32_t csum;
149
+
150
+ trace_net_rx_pkt_l4_csum_validate_entry();
151
+
152
if (pkt->hasip4 && pkt->ip4hdr_info.fragment) {
153
trace_net_rx_pkt_l4_csum_validate_ip4_fragment();
154
return false;
155
}
156
157
- csum = _net_rx_pkt_calc_l4_csum(pkt);
158
+ switch (pkt->l4hdr_info.proto) {
159
+ case ETH_L4_HDR_PROTO_UDP:
160
+ if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) {
161
+ trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum();
162
+ return false;
163
+ }
164
+ /* fall through */
165
+ case ETH_L4_HDR_PROTO_TCP:
166
+ csum = _net_rx_pkt_calc_l4_csum(pkt);
167
+ *csum_valid = ((csum == 0) || (csum == 0xFFFF));
168
+ break;
169
+
170
+ case ETH_L4_HDR_PROTO_SCTP:
171
+ *csum_valid = _net_rx_pkt_validate_sctp_sum(pkt);
172
+ break;
173
174
- *csum_valid = ((csum == 0) || (csum == 0xFFFF));
175
+ default:
176
+ trace_net_rx_pkt_l4_csum_validate_not_xxp();
177
+ return false;
178
+ }
179
180
trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid);
181
182
diff --git a/include/net/eth.h b/include/net/eth.h
183
index XXXXXXX..XXXXXXX 100644
184
--- a/include/net/eth.h
185
+++ b/include/net/eth.h
186
@@ -XXX,XX +XXX,XX @@ struct tcp_hdr {
187
#define IP_HEADER_VERSION_6 (6)
188
#define IP_PROTO_TCP (6)
189
#define IP_PROTO_UDP (17)
190
+#define IP_PROTO_SCTP (132)
191
#define IPTOS_ECN_MASK 0x03
192
#define IPTOS_ECN(x) ((x) & IPTOS_ECN_MASK)
193
#define IPTOS_ECN_CE 0x03
194
@@ -XXX,XX +XXX,XX @@ typedef struct eth_ip4_hdr_info_st {
195
typedef enum EthL4HdrProto {
196
ETH_L4_HDR_PROTO_INVALID,
197
ETH_L4_HDR_PROTO_TCP,
198
- ETH_L4_HDR_PROTO_UDP
199
+ ETH_L4_HDR_PROTO_UDP,
200
+ ETH_L4_HDR_PROTO_SCTP
201
} EthL4HdrProto;
202
203
typedef struct eth_l4_hdr_info_st {
204
diff --git a/include/qemu/crc32c.h b/include/qemu/crc32c.h
205
index XXXXXXX..XXXXXXX 100644
206
--- a/include/qemu/crc32c.h
207
+++ b/include/qemu/crc32c.h
208
@@ -XXX,XX +XXX,XX @@
209
210
211
uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length);
212
+uint32_t iov_crc32c(uint32_t crc, const struct iovec *iov, size_t iov_cnt);
213
214
#endif
215
diff --git a/net/eth.c b/net/eth.c
216
index XXXXXXX..XXXXXXX 100644
217
--- a/net/eth.c
218
+++ b/net/eth.c
219
@@ -XXX,XX +XXX,XX @@ void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff,
220
*l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp);
221
}
222
break;
223
+
224
+ case IP_PROTO_SCTP:
225
+ l4hdr_info->proto = ETH_L4_HDR_PROTO_SCTP;
226
+ break;
227
}
228
}
229
230
diff --git a/util/crc32c.c b/util/crc32c.c
231
index XXXXXXX..XXXXXXX 100644
232
--- a/util/crc32c.c
233
+++ b/util/crc32c.c
234
@@ -XXX,XX +XXX,XX @@ uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length)
235
return crc^0xffffffff;
236
}
237
238
+uint32_t iov_crc32c(uint32_t crc, const struct iovec *iov, size_t iov_cnt)
239
+{
240
+ while (iov_cnt--) {
241
+ crc = crc32c(crc, iov->iov_base, iov->iov_len) ^ 0xffffffff;
242
+ iov++;
243
+ }
244
+ return crc ^ 0xffffffff;
245
+}
246
--
247
2.7.4
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
QEMU can emulate hubs to connect NICs and netdevs. This is currently
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
primarily used for the mis-named 'vlan' feature of the networking
4
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
5
subsystem. Now the 'vlan' feature has been marked as deprecated, since
6
its name is rather confusing and the users often rather mis-configure
7
their network when trying to use it. But while the 'vlan' parameter
8
should be removed at one point in time, the basic idea of emulating
9
a hub in QEMU is still good: It's useful for bundling up the output of
10
multiple NICs into one single l2tp netdev for example.
11
12
Now to be able to use the hubport feature without 'vlan's, there is one
13
missing piece: The possibility to connect a hubport to a netdev, too.
14
This patch adds this possibility by introducing a new "netdev=..."
15
parameter to the hubports.
16
17
To bundle up the output of multiple NICs into one socket netdev, you can
18
now run QEMU with these parameters for example:
19
20
qemu-system-ppc64 ... -netdev socket,id=s1,connect=:11122 \
21
-netdev hubport,hubid=1,id=h1,netdev=s1 \
22
-netdev hubport,hubid=1,id=h2 -device e1000,netdev=h2 \
23
-netdev hubport,hubid=1,id=h3 -device virtio-net-pci,netdev=h3
24
25
For using the socket netdev, you have got to start another QEMU as the
26
receiving side first, for example with network dumping enabled:
27
28
qemu-system-x86_64 -M isapc -netdev socket,id=s0,listen=:11122 \
29
-device ne2k_isa,netdev=s0 \
30
-object filter-dump,id=f1,netdev=s0,file=/tmp/dump.dat
31
32
After the ppc64 guest tried to boot from both NICs, you can see in the
33
dump file (using Wireshark, for example), that the output of both NICs
34
(the e1000 and the virtio-net-pci) has been successfully transfered
35
via the socket netdev in this case.
36
37
Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
38
Signed-off-by: Thomas Huth <thuth@redhat.com>
39
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
40
---
6
---
41
net/hub.c | 27 +++++++++++++++++++++------
7
hw/net/igb_core.c | 12 +++++++-----
42
net/hub.h | 3 ++-
8
hw/net/net_tx_pkt.c | 18 ++++++++++++++++++
43
net/net.c | 2 +-
9
hw/net/net_tx_pkt.h | 8 ++++++++
44
qapi/net.json | 4 +++-
10
3 files changed, 33 insertions(+), 5 deletions(-)
45
qemu-options.hx | 8 +++++---
46
5 files changed, 32 insertions(+), 12 deletions(-)
47
11
48
diff --git a/net/hub.c b/net/hub.c
12
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
49
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
50
--- a/net/hub.c
14
--- a/hw/net/igb_core.c
51
+++ b/net/hub.c
15
+++ b/hw/net/igb_core.c
16
@@ -XXX,XX +XXX,XX @@ igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx,
17
static bool
18
igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
19
{
20
+ uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
21
+
22
if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
23
- uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
24
uint32_t mss = tx->ctx[idx].mss_l4len_idx >> E1000_ADVTXD_MSS_SHIFT;
25
if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) {
26
return false;
27
@@ -XXX,XX +XXX,XX @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
28
return true;
29
}
30
31
- if (tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) {
32
- if (!net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0)) {
33
- return false;
34
- }
35
+ if ((tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) &&
36
+ !((tx->ctx[idx].type_tucmd_mlhl & E1000_ADVTXD_TUCMD_L4T_SCTP) ?
37
+ net_tx_pkt_update_sctp_checksum(tx->tx_pkt) :
38
+ net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0))) {
39
+ return false;
40
}
41
42
if (tx->first_olinfo_status & E1000_ADVTXD_POTS_IXSM) {
43
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
44
index XXXXXXX..XXXXXXX 100644
45
--- a/hw/net/net_tx_pkt.c
46
+++ b/hw/net/net_tx_pkt.c
52
@@ -XXX,XX +XXX,XX @@
47
@@ -XXX,XX +XXX,XX @@
53
*/
48
*/
54
49
55
#include "qemu/osdep.h"
50
#include "qemu/osdep.h"
56
+#include "qapi/error.h"
51
+#include "qemu/crc32c.h"
57
#include "monitor/monitor.h"
52
#include "net/eth.h"
58
#include "net/net.h"
53
#include "net/checksum.h"
59
#include "clients.h"
54
#include "net/tap.h"
60
@@ -XXX,XX +XXX,XX @@ static NetClientInfo net_hub_port_info = {
55
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
61
.cleanup = net_hub_port_cleanup,
56
pkt->virt_hdr.csum_offset, &csum, sizeof(csum));
62
};
63
64
-static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
65
+static NetHubPort *net_hub_port_new(NetHub *hub, const char *name,
66
+ NetClientState *hubpeer)
67
{
68
NetClientState *nc;
69
NetHubPort *port;
70
@@ -XXX,XX +XXX,XX @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
71
name = default_name;
72
}
73
74
- nc = qemu_new_net_client(&net_hub_port_info, NULL, "hub", name);
75
+ nc = qemu_new_net_client(&net_hub_port_info, hubpeer, "hub", name);
76
port = DO_UPCAST(NetHubPort, nc, nc);
77
port->id = id;
78
port->hub = hub;
79
@@ -XXX,XX +XXX,XX @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name)
80
81
/**
82
* Create a port on a given hub
83
+ * @hub_id: Number of the hub
84
* @name: Net client name or NULL for default name.
85
+ * @hubpeer: Peer to use (if "netdev=id" has been specified)
86
*
87
* If there is no existing hub with the given id then a new hub is created.
88
*/
89
-NetClientState *net_hub_add_port(int hub_id, const char *name)
90
+NetClientState *net_hub_add_port(int hub_id, const char *name,
91
+ NetClientState *hubpeer)
92
{
93
NetHub *hub;
94
NetHubPort *port;
95
@@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_add_port(int hub_id, const char *name)
96
hub = net_hub_new(hub_id);
97
}
98
99
- port = net_hub_port_new(hub, name);
100
+ port = net_hub_port_new(hub, name, hubpeer);
101
return &port->nc;
102
}
57
}
103
58
104
@@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_port_find(int hub_id)
59
+bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt)
105
}
60
+{
106
}
61
+ uint32_t csum = 0;
107
62
+ struct iovec *pl_start_frag = pkt->vec + NET_TX_PKT_PL_START_FRAG;
108
- nc = net_hub_add_port(hub_id, NULL);
63
+
109
+ nc = net_hub_add_port(hub_id, NULL, NULL);
64
+ if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) {
110
return nc;
65
+ return false;
111
}
112
113
@@ -XXX,XX +XXX,XX @@ int net_init_hubport(const Netdev *netdev, const char *name,
114
NetClientState *peer, Error **errp)
115
{
116
const NetdevHubPortOptions *hubport;
117
+ NetClientState *hubpeer = NULL;
118
119
assert(netdev->type == NET_CLIENT_DRIVER_HUBPORT);
120
assert(!peer);
121
hubport = &netdev->u.hubport;
122
123
- net_hub_add_port(hubport->hubid, name);
124
+ if (hubport->has_netdev) {
125
+ hubpeer = qemu_find_netdev(hubport->netdev);
126
+ if (!hubpeer) {
127
+ error_setg(errp, "netdev '%s' not found", hubport->netdev);
128
+ return -1;
129
+ }
130
+ }
66
+ }
131
+
67
+
132
+ net_hub_add_port(hubport->hubid, name, hubpeer);
68
+ csum = cpu_to_le32(iov_crc32c(0xffffffff, pl_start_frag, pkt->payload_frags));
69
+ if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) {
70
+ return false;
71
+ }
133
+
72
+
134
return 0;
73
+ return true;
135
}
74
+}
136
75
+
137
diff --git a/net/hub.h b/net/hub.h
76
static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt)
77
{
78
pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len +
79
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
138
index XXXXXXX..XXXXXXX 100644
80
index XXXXXXX..XXXXXXX 100644
139
--- a/net/hub.h
81
--- a/hw/net/net_tx_pkt.h
140
+++ b/net/hub.h
82
+++ b/hw/net/net_tx_pkt.h
141
@@ -XXX,XX +XXX,XX @@
83
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt);
142
84
void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt);
143
#include "qemu-common.h"
85
144
86
/**
145
-NetClientState *net_hub_add_port(int hub_id, const char *name);
87
+ * Calculate the SCTP checksum.
146
+NetClientState *net_hub_add_port(int hub_id, const char *name,
88
+ *
147
+ NetClientState *hubpeer);
89
+ * @pkt: packet
148
NetClientState *net_hub_find_client_by_name(int hub_id, const char *name);
90
+ *
149
void net_hub_info(Monitor *mon);
91
+ */
150
void net_hub_check_clients(void);
92
+bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt);
151
diff --git a/net/net.c b/net/net.c
93
+
152
index XXXXXXX..XXXXXXX 100644
94
+/**
153
--- a/net/net.c
95
* get length of all populated data.
154
+++ b/net/net.c
96
*
155
@@ -XXX,XX +XXX,XX @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp)
97
* @pkt: packet
156
/* Do not add to a vlan if it's a nic with a netdev= parameter. */
157
if (netdev->type != NET_CLIENT_DRIVER_NIC ||
158
!opts->u.nic.has_netdev) {
159
- peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL);
160
+ peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL, NULL);
161
}
162
163
if (net->has_vlan && !vlan_warned) {
164
diff --git a/qapi/net.json b/qapi/net.json
165
index XXXXXXX..XXXXXXX 100644
166
--- a/qapi/net.json
167
+++ b/qapi/net.json
168
@@ -XXX,XX +XXX,XX @@
169
# Connect two or more net clients through a software hub.
170
#
171
# @hubid: hub identifier number
172
+# @netdev: used to connect hub to a netdev instead of a device (since 2.12)
173
#
174
# Since: 1.2
175
##
176
{ 'struct': 'NetdevHubPortOptions',
177
'data': {
178
- 'hubid': 'int32' } }
179
+ 'hubid': 'int32',
180
+ '*netdev': 'str' } }
181
182
##
183
# @NetdevNetmapOptions:
184
diff --git a/qemu-options.hx b/qemu-options.hx
185
index XXXXXXX..XXXXXXX 100644
186
--- a/qemu-options.hx
187
+++ b/qemu-options.hx
188
@@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev,
189
#endif
190
"-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n"
191
" configure a vhost-user network, backed by a chardev 'dev'\n"
192
- "-netdev hubport,id=str,hubid=n\n"
193
+ "-netdev hubport,id=str,hubid=n[,netdev=nd]\n"
194
" configure a hub port on QEMU VLAN 'n'\n", QEMU_ARCH_ALL)
195
DEF("net", HAS_ARG, QEMU_OPTION_net,
196
"-net nic[,vlan=n][,netdev=nd][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v]\n"
197
@@ -XXX,XX +XXX,XX @@ vde_switch -F -sock /tmp/myswitch
198
qemu-system-i386 linux.img -net nic -net vde,sock=/tmp/myswitch
199
@end example
200
201
-@item -netdev hubport,id=@var{id},hubid=@var{hubid}
202
+@item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}]
203
204
Create a hub port on QEMU "vlan" @var{hubid}.
205
206
The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single
207
netdev. @code{-net} and @code{-device} with parameter @option{vlan} create the
208
-required hub automatically.
209
+required hub automatically. Alternatively, you can also connect the hubport
210
+to another netdev with ID @var{nd} by using the @option{netdev=@var{nd}}
211
+option.
212
213
@item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n]
214
215
--
98
--
216
2.7.4
99
2.7.4
217
218
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
6
hw/net/e1000e_core.c | 3 ++-
7
hw/net/igb_core.c | 14 ++++++++++++--
8
hw/net/net_rx_pkt.c | 15 ++++++---------
9
hw/net/net_rx_pkt.h | 19 ++++++++++---------
10
include/net/eth.h | 4 ++--
11
net/eth.c | 52 +++++++++++++++++++++++++++++++++-------------------
12
6 files changed, 65 insertions(+), 42 deletions(-)
13
14
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/e1000e_core.c
17
+++ b/hw/net/e1000e_core.c
18
@@ -XXX,XX +XXX,XX @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt,
19
}
20
21
net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
22
- e1000x_vlan_enabled(core->mac), core->mac[VET]);
23
+ e1000x_vlan_enabled(core->mac) ? 0 : -1,
24
+ core->mac[VET], 0);
25
26
e1000e_rss_parse_packet(core, core->rx_pkt, &rss_info);
27
e1000e_rx_ring_init(core, &rxr, rss_info.queue);
28
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
29
index XXXXXXX..XXXXXXX 100644
30
--- a/hw/net/igb_core.c
31
+++ b/hw/net/igb_core.c
32
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
33
E1000E_RxRing rxr;
34
E1000E_RSSInfo rss_info;
35
size_t total_size;
36
+ int strip_vlan_index;
37
int i;
38
39
trace_e1000e_rx_receive_iov(iovcnt);
40
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
41
42
igb_rx_ring_init(core, &rxr, i);
43
44
+ if (!igb_rx_strip_vlan(core, rxr.i)) {
45
+ strip_vlan_index = -1;
46
+ } else if (core->mac[CTRL_EXT] & BIT(26)) {
47
+ strip_vlan_index = 1;
48
+ } else {
49
+ strip_vlan_index = 0;
50
+ }
51
+
52
net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
53
- igb_rx_strip_vlan(core, rxr.i),
54
- core->mac[VET] & 0xffff);
55
+ strip_vlan_index,
56
+ core->mac[VET] & 0xffff,
57
+ core->mac[VET] >> 16);
58
59
total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
60
e1000x_fcs_len(core->mac);
61
diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c
62
index XXXXXXX..XXXXXXX 100644
63
--- a/hw/net/net_rx_pkt.c
64
+++ b/hw/net/net_rx_pkt.c
65
@@ -XXX,XX +XXX,XX @@ void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
66
67
void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
68
const struct iovec *iov, int iovcnt,
69
- size_t iovoff, bool strip_vlan,
70
- uint16_t vet)
71
+ size_t iovoff, int strip_vlan_index,
72
+ uint16_t vet, uint16_t vet_ext)
73
{
74
uint16_t tci = 0;
75
uint16_t ploff = iovoff;
76
assert(pkt);
77
78
- if (strip_vlan) {
79
- pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet,
80
- &pkt->ehdr_buf,
81
- &ploff, &tci);
82
- } else {
83
- pkt->ehdr_buf_len = 0;
84
- }
85
+ pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff,
86
+ strip_vlan_index, vet, vet_ext,
87
+ &pkt->ehdr_buf,
88
+ &ploff, &tci);
89
90
pkt->tci = tci;
91
92
diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h
93
index XXXXXXX..XXXXXXX 100644
94
--- a/hw/net/net_rx_pkt.h
95
+++ b/hw/net/net_rx_pkt.h
96
@@ -XXX,XX +XXX,XX @@ void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt,
97
/**
98
* attach scatter-gather data to rx packet
99
*
100
-* @pkt: packet
101
-* @iov: received data scatter-gather list
102
-* @iovcnt number of elements in iov
103
-* @iovoff data start offset in the iov
104
-* @strip_vlan: should the module strip vlan from data
105
-* @vet: VLAN tag Ethernet type
106
+* @pkt: packet
107
+* @iov: received data scatter-gather list
108
+* @iovcnt: number of elements in iov
109
+* @iovoff: data start offset in the iov
110
+* @strip_vlan_index: index of Q tag if it is to be stripped. negative otherwise.
111
+* @vet: VLAN tag Ethernet type
112
+* @vet_ext: outer VLAN tag Ethernet type
113
*
114
*/
115
void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt,
116
- const struct iovec *iov, int iovcnt,
117
- size_t iovoff, bool strip_vlan,
118
- uint16_t vet);
119
+ const struct iovec *iov, int iovcnt,
120
+ size_t iovoff, int strip_vlan_index,
121
+ uint16_t vet, uint16_t vet_ext);
122
123
/**
124
* attach data to rx packet
125
diff --git a/include/net/eth.h b/include/net/eth.h
126
index XXXXXXX..XXXXXXX 100644
127
--- a/include/net/eth.h
128
+++ b/include/net/eth.h
129
@@ -XXX,XX +XXX,XX @@ eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
130
uint16_t *payload_offset, uint16_t *tci);
131
132
size_t
133
-eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
134
- uint16_t vet, void *new_ehdr_buf,
135
+eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, int index,
136
+ uint16_t vet, uint16_t vet_ext, void *new_ehdr_buf,
137
uint16_t *payload_offset, uint16_t *tci);
138
139
uint16_t
140
diff --git a/net/eth.c b/net/eth.c
141
index XXXXXXX..XXXXXXX 100644
142
--- a/net/eth.c
143
+++ b/net/eth.c
144
@@ -XXX,XX +XXX,XX @@ eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff,
145
}
146
147
size_t
148
-eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff,
149
- uint16_t vet, void *new_ehdr_buf,
150
+eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, int index,
151
+ uint16_t vet, uint16_t vet_ext, void *new_ehdr_buf,
152
uint16_t *payload_offset, uint16_t *tci)
153
{
154
struct vlan_header vlan_hdr;
155
- struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf;
156
-
157
- size_t copied = iov_to_buf(iov, iovcnt, iovoff,
158
- new_ehdr, sizeof(*new_ehdr));
159
-
160
- if (copied < sizeof(*new_ehdr)) {
161
- return 0;
162
- }
163
+ uint16_t *new_ehdr_proto;
164
+ size_t new_ehdr_size;
165
+ size_t copied;
166
167
- if (be16_to_cpu(new_ehdr->h_proto) == vet) {
168
- copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr),
169
- &vlan_hdr, sizeof(vlan_hdr));
170
+ switch (index) {
171
+ case 0:
172
+ new_ehdr_proto = &PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto;
173
+ new_ehdr_size = sizeof(struct eth_header);
174
+ copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size);
175
+ break;
176
177
- if (copied < sizeof(vlan_hdr)) {
178
+ case 1:
179
+ new_ehdr_proto = &PKT_GET_VLAN_HDR(new_ehdr_buf)->h_proto;
180
+ new_ehdr_size = sizeof(struct eth_header) + sizeof(struct vlan_header);
181
+ copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size);
182
+ if (be16_to_cpu(PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto) != vet_ext) {
183
return 0;
184
}
185
+ break;
186
187
- new_ehdr->h_proto = vlan_hdr.h_proto;
188
+ default:
189
+ return 0;
190
+ }
191
192
- *tci = be16_to_cpu(vlan_hdr.h_tci);
193
- *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr);
194
- return sizeof(struct eth_header);
195
+ if (copied < new_ehdr_size || be16_to_cpu(*new_ehdr_proto) != vet) {
196
+ return 0;
197
+ }
198
+
199
+ copied = iov_to_buf(iov, iovcnt, iovoff + new_ehdr_size,
200
+ &vlan_hdr, sizeof(vlan_hdr));
201
+ if (copied < sizeof(vlan_hdr)) {
202
+ return 0;
203
}
204
205
- return 0;
206
+ *new_ehdr_proto = vlan_hdr.h_proto;
207
+ *payload_offset = iovoff + new_ehdr_size + sizeof(vlan_hdr);
208
+ *tci = be16_to_cpu(vlan_hdr.h_tci);
209
+
210
+ return new_ehdr_size;
211
}
212
213
void
214
--
215
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
6
hw/net/igb_core.c | 23 ++++++++++++++++++-----
7
1 file changed, 18 insertions(+), 5 deletions(-)
8
9
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
10
index XXXXXXX..XXXXXXX 100644
11
--- a/hw/net/igb_core.c
12
+++ b/hw/net/igb_core.c
13
@@ -XXX,XX +XXX,XX @@ typedef struct IGBTxPktVmdqCallbackContext {
14
15
typedef struct L2Header {
16
struct eth_header eth;
17
- struct vlan_header vlan;
18
+ struct vlan_header vlan[2];
19
} L2Header;
20
21
static ssize_t
22
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
23
uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
24
uint16_t queues = 0;
25
uint16_t oversized = 0;
26
- uint16_t vid = be16_to_cpu(l2_header->vlan.h_tci) & VLAN_VID_MASK;
27
+ size_t vlan_num = 0;
28
int i;
29
30
memset(rss_info, 0, sizeof(E1000E_RSSInfo));
31
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
32
*external_tx = true;
33
}
34
35
- if (e1000x_is_vlan_packet(ehdr, core->mac[VET] & 0xffff) &&
36
- !e1000x_rx_vlan_filter(core->mac, PKT_GET_VLAN_HDR(ehdr))) {
37
+ if (core->mac[CTRL_EXT] & BIT(26)) {
38
+ if (be16_to_cpu(ehdr->h_proto) == core->mac[VET] >> 16 &&
39
+ be16_to_cpu(l2_header->vlan[0].h_proto) == (core->mac[VET] & 0xffff)) {
40
+ vlan_num = 2;
41
+ }
42
+ } else {
43
+ if (be16_to_cpu(ehdr->h_proto) == (core->mac[VET] & 0xffff)) {
44
+ vlan_num = 1;
45
+ }
46
+ }
47
+
48
+ if (vlan_num &&
49
+ !e1000x_rx_vlan_filter(core->mac, l2_header->vlan + vlan_num - 1)) {
50
return queues;
51
}
52
53
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
54
if (e1000x_vlan_rx_filter_enabled(core->mac)) {
55
uint16_t mask = 0;
56
57
- if (e1000x_is_vlan_packet(ehdr, core->mac[VET] & 0xffff)) {
58
+ if (vlan_num) {
59
+ uint16_t vid = be16_to_cpu(l2_header->vlan[vlan_num - 1].h_tci) & VLAN_VID_MASK;
60
+
61
for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
62
if ((core->mac[VLVF0 + i] & E1000_VLVF_VLANID_MASK) == vid &&
63
(core->mac[VLVF0 + i] & E1000_VLVF_VLANID_ENABLE)) {
64
--
65
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
igb has a configurable size limit for LPE, and uses different limits
4
depending on whether the packet is treated as a VLAN packet.
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
10
hw/net/igb_core.c | 36 +++++++++++++++++++++---------------
11
1 file changed, 21 insertions(+), 15 deletions(-)
12
13
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/net/igb_core.c
16
+++ b/hw/net/igb_core.c
17
@@ -XXX,XX +XXX,XX @@ igb_rx_l4_cso_enabled(IGBCore *core)
18
return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
19
}
20
21
-static bool
22
-igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
23
+static bool igb_rx_is_oversized(IGBCore *core, const struct eth_header *ehdr,
24
+ size_t size, size_t vlan_num,
25
+ bool lpe, uint16_t rlpml)
26
{
27
- uint16_t pool = qn % IGB_NUM_VM_POOLS;
28
- bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
29
- int max_ethernet_lpe_size =
30
- core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
31
- int max_ethernet_vlan_size = 1522;
32
-
33
- return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
34
+ size_t vlan_header_size = sizeof(struct vlan_header) * vlan_num;
35
+ size_t header_size = sizeof(struct eth_header) + vlan_header_size;
36
+ return lpe ? size + ETH_FCS_LEN > rlpml : size > header_size + ETH_MTU;
37
}
38
39
static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
40
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
41
uint16_t queues = 0;
42
uint16_t oversized = 0;
43
size_t vlan_num = 0;
44
+ bool lpe;
45
+ uint16_t rlpml;
46
int i;
47
48
memset(rss_info, 0, sizeof(E1000E_RSSInfo));
49
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
50
}
51
}
52
53
+ lpe = !!(core->mac[RCTL] & E1000_RCTL_LPE);
54
+ rlpml = core->mac[RLPML];
55
+ if (!(core->mac[RCTL] & E1000_RCTL_SBP) &&
56
+ igb_rx_is_oversized(core, ehdr, size, vlan_num, lpe, rlpml)) {
57
+ trace_e1000x_rx_oversized(size);
58
+ return queues;
59
+ }
60
+
61
if (vlan_num &&
62
!e1000x_rx_vlan_filter(core->mac, l2_header->vlan + vlan_num - 1)) {
63
return queues;
64
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
65
queues &= core->mac[VFRE];
66
if (queues) {
67
for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
68
- if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) {
69
+ lpe = !!(core->mac[VMOLR0 + i] & E1000_VMOLR_LPE);
70
+ rlpml = core->mac[VMOLR0 + i] & E1000_VMOLR_RLPML_MASK;
71
+ if ((queues & BIT(i)) &&
72
+ igb_rx_is_oversized(core, ehdr, size, vlan_num,
73
+ lpe, rlpml)) {
74
oversized |= BIT(i);
75
}
76
}
77
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
78
iov_to_buf(iov, iovcnt, iov_ofs, &buf, sizeof(buf.l2_header));
79
}
80
81
- /* Discard oversized packets if !LPE and !SBP. */
82
- if (e1000x_is_oversized(core->mac, size)) {
83
- return orig_size;
84
- }
85
-
86
net_rx_pkt_set_packet_type(core->rx_pkt,
87
get_eth_packet_type(&buf.l2_header.eth));
88
net_rx_pkt_set_protocols(core->rx_pkt, iov, iovcnt, iov_ofs);
89
--
90
2.7.4
diff view generated by jsdifflib
1
From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
Packet size some time different or when network is busy.
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
Based on same payload size, but TCP protocol can not
5
guarantee send the same one packet in the same way,
6
7
like that:
8
We send this payload:
9
------------------------------
10
| header |1|2|3|4|5|6|7|8|9|0|
11
------------------------------
12
13
primary:
14
ppkt1:
15
----------------
16
| header |1|2|3|
17
----------------
18
ppkt2:
19
------------------------
20
| header |4|5|6|7|8|9|0|
21
------------------------
22
23
secondary:
24
spkt1:
25
------------------------------
26
| header |1|2|3|4|5|6|7|8|9|0|
27
------------------------------
28
29
In the original method, ppkt1 and ppkt2 are different in size and
30
spkt1, so they can't compare and trigger the checkpoint.
31
32
I have tested FTP get 200M and 1G file many times, I found that
33
the performance was less than 1% of the native.
34
35
Now I reconstructed the comparison of TCP packets based on the
36
TCP sequence number. first of all, ppkt1 and spkt1 have the same
37
starting sequence number, so they can compare, even though their
38
length is different. And then ppkt1 with a smaller payload length
39
is used as the comparison length, if the payload is same, send
40
out the ppkt1 and record the offset(the length of ppkt1 payload)
41
in spkt1. The next comparison, ppkt2 and spkt1 can be compared
42
from the recorded position of spkt1.
43
44
like that:
45
----------------
46
| header |1|2|3| ppkt1
47
---------|-----|
48
| |
49
---------v-----v--------------
50
| header |1|2|3|4|5|6|7|8|9|0| spkt1
51
---------------|\------------|
52
| \offset |
53
---------v-------------v
54
| header |4|5|6|7|8|9|0| ppkt2
55
------------------------
56
57
In this way, the performance can reach native 20% in my multiple
58
tests.
59
60
Cc: Zhang Chen <zhangckid@gmail.com>
61
Cc: Li Zhijian <lizhijian@cn.fujitsu.com>
62
Cc: Jason Wang <jasowang@redhat.com>
63
64
Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com>
65
Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com>
66
Signed-off-by: Zhang Chen <zhangckid@gmail.com>
67
Reviewed-by: Zhang Chen <zhangckid@gmail.com>
68
Tested-by: Zhang Chen <zhangckid@gmail.com>
69
Signed-off-by: Jason Wang <jasowang@redhat.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
70
---
5
---
71
net/colo-compare.c | 343 +++++++++++++++++++++++++++++++++++------------------
6
hw/net/igb_common.h | 16 ++++---
72
net/colo.c | 9 ++
7
hw/net/igb_core.c | 129 ++++++++++++++++++++++++++++++++++++++--------------
73
net/colo.h | 15 +++
8
hw/net/igb_regs.h | 23 ++++++++++
74
net/trace-events | 2 +-
9
3 files changed, 127 insertions(+), 41 deletions(-)
75
4 files changed, 250 insertions(+), 119 deletions(-)
76
10
77
diff --git a/net/colo-compare.c b/net/colo-compare.c
11
diff --git a/hw/net/igb_common.h b/hw/net/igb_common.h
78
index XXXXXXX..XXXXXXX 100644
12
index XXXXXXX..XXXXXXX 100644
79
--- a/net/colo-compare.c
13
--- a/hw/net/igb_common.h
80
+++ b/net/colo-compare.c
14
+++ b/hw/net/igb_common.h
81
@@ -XXX,XX +XXX,XX @@
15
@@ -XXX,XX +XXX,XX @@
82
#define COMPARE_READ_LEN_MAX NET_BUFSIZE
16
defreg_indexeda(x, 0), defreg_indexeda(x, 1), \
83
#define MAX_QUEUE_SIZE 1024
17
defreg_indexeda(x, 2), defreg_indexeda(x, 3)
84
18
85
+#define COLO_COMPARE_FREE_PRIMARY 0x01
19
-#define defregv(x) defreg_indexed(x, 0), defreg_indexed(x, 1), \
86
+#define COLO_COMPARE_FREE_SECONDARY 0x02
20
+#define defreg8(x) defreg_indexed(x, 0), defreg_indexed(x, 1), \
87
+
21
defreg_indexed(x, 2), defreg_indexed(x, 3), \
88
/* TODO: Should be configurable */
22
defreg_indexed(x, 4), defreg_indexed(x, 5), \
89
#define REGULAR_PACKET_CHECK_MS 3000
23
defreg_indexed(x, 6), defreg_indexed(x, 7)
90
24
@@ -XXX,XX +XXX,XX @@ enum {
91
@@ -XXX,XX +XXX,XX @@ static gint seq_sorter(Packet *a, Packet *b, gpointer data)
25
defreg(EICS), defreg(EIMS), defreg(EIMC), defreg(EIAM),
92
return ntohl(atcp->th_seq) - ntohl(btcp->th_seq);
26
defreg(EICR), defreg(IVAR_MISC), defreg(GPIE),
27
28
+ defreg(TSYNCRXCFG), defreg8(ETQF),
29
+
30
defreg(RXPBS), defregd(RDBAL), defregd(RDBAH), defregd(RDLEN),
31
defregd(SRRCTL), defregd(RDH), defregd(RDT),
32
defregd(RXDCTL), defregd(RXCTL), defregd(RQDPC), defreg(RA2),
33
@@ -XXX,XX +XXX,XX @@ enum {
34
35
defreg(VT_CTL),
36
37
- defregv(P2VMAILBOX), defregv(V2PMAILBOX), defreg(MBVFICR), defreg(MBVFIMR),
38
+ defreg8(P2VMAILBOX), defreg8(V2PMAILBOX), defreg(MBVFICR), defreg(MBVFIMR),
39
defreg(VFLRE), defreg(VFRE), defreg(VFTE), defreg(WVBR),
40
defreg(QDE), defreg(DTXSWC), defreg_indexed(VLVF, 0),
41
- defregv(VMOLR), defreg(RPLOLR), defregv(VMBMEM), defregv(VMVIR),
42
+ defreg8(VMOLR), defreg(RPLOLR), defreg8(VMBMEM), defreg8(VMVIR),
43
44
- defregv(PVTCTRL), defregv(PVTEICS), defregv(PVTEIMS), defregv(PVTEIMC),
45
- defregv(PVTEIAC), defregv(PVTEIAM), defregv(PVTEICR), defregv(PVFGPRC),
46
- defregv(PVFGPTC), defregv(PVFGORC), defregv(PVFGOTC), defregv(PVFMPRC),
47
- defregv(PVFGPRLBC), defregv(PVFGPTLBC), defregv(PVFGORLBC), defregv(PVFGOTLBC),
48
+ defreg8(PVTCTRL), defreg8(PVTEICS), defreg8(PVTEIMS), defreg8(PVTEIMC),
49
+ defreg8(PVTEIAC), defreg8(PVTEIAM), defreg8(PVTEICR), defreg8(PVFGPRC),
50
+ defreg8(PVFGPTC), defreg8(PVFGORC), defreg8(PVFGOTC), defreg8(PVFMPRC),
51
+ defreg8(PVFGPRLBC), defreg8(PVFGPTLBC), defreg8(PVFGORLBC), defreg8(PVFGOTLBC),
52
53
defreg(MTA_A),
54
55
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/hw/net/igb_core.c
58
+++ b/hw/net/igb_core.c
59
@@ -XXX,XX +XXX,XX @@ typedef struct L2Header {
60
struct vlan_header vlan[2];
61
} L2Header;
62
63
+typedef struct PTP2 {
64
+ uint8_t message_id_transport_specific;
65
+ uint8_t version_ptp;
66
+ uint16_t message_length;
67
+ uint8_t subdomain_number;
68
+ uint8_t reserved0;
69
+ uint16_t flags;
70
+ uint64_t correction;
71
+ uint8_t reserved1[5];
72
+ uint8_t source_communication_technology;
73
+ uint32_t source_uuid_lo;
74
+ uint16_t source_uuid_hi;
75
+ uint16_t source_port_id;
76
+ uint16_t sequence_id;
77
+ uint8_t control;
78
+ uint8_t log_message_period;
79
+} PTP2;
80
+
81
static ssize_t
82
igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
83
bool has_vnet, bool *external_tx);
84
@@ -XXX,XX +XXX,XX @@ static bool igb_rx_is_oversized(IGBCore *core, const struct eth_header *ehdr,
85
return lpe ? size + ETH_FCS_LEN > rlpml : size > header_size + ETH_MTU;
93
}
86
}
94
87
95
+static void fill_pkt_tcp_info(void *data, uint32_t *max_ack)
88
-static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
96
+{
89
- size_t size, E1000E_RSSInfo *rss_info,
97
+ Packet *pkt = data;
90
- bool *external_tx)
98
+ struct tcphdr *tcphd;
91
+static uint16_t igb_receive_assign(IGBCore *core, const struct iovec *iov,
99
+
92
+ size_t iovcnt, size_t iov_ofs,
100
+ tcphd = (struct tcphdr *)pkt->transport_header;
93
+ const L2Header *l2_header, size_t size,
101
+
94
+ E1000E_RSSInfo *rss_info,
102
+ pkt->tcp_seq = ntohl(tcphd->th_seq);
95
+ uint16_t *etqf, bool *ts, bool *external_tx)
103
+ pkt->tcp_ack = ntohl(tcphd->th_ack);
96
{
104
+ *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack;
97
static const int ta_shift[] = { 4, 3, 2, 0 };
105
+ pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data
98
const struct eth_header *ehdr = &l2_header->eth;
106
+ + (tcphd->th_off << 2) - pkt->vnet_hdr_len;
99
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
107
+ pkt->payload_size = pkt->size - pkt->header_size;
100
uint16_t queues = 0;
108
+ pkt->seq_end = pkt->tcp_seq + pkt->payload_size;
101
uint16_t oversized = 0;
109
+ pkt->flags = tcphd->th_flags;
102
size_t vlan_num = 0;
110
+}
103
+ PTP2 ptp2;
111
+
104
bool lpe;
112
/*
105
uint16_t rlpml;
113
* Return 1 on success, if return 0 means the
106
int i;
114
* packet will be dropped
107
115
*/
108
memset(rss_info, 0, sizeof(E1000E_RSSInfo));
116
-static int colo_insert_packet(GQueue *queue, Packet *pkt)
109
+ *ts = false;
117
+static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack)
110
118
{
111
if (external_tx) {
119
if (g_queue_get_length(queue) <= MAX_QUEUE_SIZE) {
112
*external_tx = true;
120
if (pkt->ip->ip_p == IPPROTO_TCP) {
113
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header,
121
+ fill_pkt_tcp_info(pkt, max_ack);
114
return queues;
122
g_queue_insert_sorted(queue,
123
pkt,
124
(GCompareDataFunc)seq_sorter,
125
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
126
}
115
}
127
116
128
if (mode == PRIMARY_IN) {
117
+ for (*etqf = 0; *etqf < 8; (*etqf)++) {
129
- if (!colo_insert_packet(&conn->primary_list, pkt)) {
118
+ if ((core->mac[ETQF0 + *etqf] & E1000_ETQF_FILTER_ENABLE) &&
130
+ if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) {
119
+ be16_to_cpu(ehdr->h_proto) == (core->mac[ETQF0 + *etqf] & E1000_ETQF_ETYPE_MASK)) {
131
error_report("colo compare primary queue size too big,"
120
+ if ((core->mac[ETQF0 + *etqf] & E1000_ETQF_1588) &&
132
"drop packet");
121
+ (core->mac[TSYNCRXCTL] & E1000_TSYNCRXCTL_ENABLED) &&
133
}
122
+ !(core->mac[TSYNCRXCTL] & E1000_TSYNCRXCTL_VALID) &&
134
} else {
123
+ iov_to_buf(iov, iovcnt, iov_ofs + ETH_HLEN, &ptp2, sizeof(ptp2)) >= sizeof(ptp2) &&
135
- if (!colo_insert_packet(&conn->secondary_list, pkt)) {
124
+ (ptp2.version_ptp & 15) == 2 &&
136
+ if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) {
125
+ ptp2.message_id_transport_specific == ((core->mac[TSYNCRXCFG] >> 8) & 255)) {
137
error_report("colo compare secondary queue size too big,"
126
+ e1000x_timestamp(core->mac, core->timadj, RXSTMPL, RXSTMPH);
138
"drop packet");
127
+ *ts = true;
139
}
128
+ core->mac[TSYNCRXCTL] |= E1000_TSYNCRXCTL_VALID;
140
@@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con)
129
+ core->mac[RXSATRL] = le32_to_cpu(ptp2.source_uuid_lo);
141
return 0;
130
+ core->mac[RXSATRH] = le16_to_cpu(ptp2.source_uuid_hi) |
142
}
131
+ (le16_to_cpu(ptp2.sequence_id) << 16);
143
132
+ }
144
+static inline bool after(uint32_t seq1, uint32_t seq2)
133
+ break;
145
+{
146
+ return (int32_t)(seq1 - seq2) > 0;
147
+}
148
+
149
+static void colo_release_primary_pkt(CompareState *s, Packet *pkt)
150
+{
151
+ int ret;
152
+ ret = compare_chr_send(s,
153
+ pkt->data,
154
+ pkt->size,
155
+ pkt->vnet_hdr_len);
156
+ if (ret < 0) {
157
+ error_report("colo send primary packet failed");
158
+ }
159
+ trace_colo_compare_main("packet same and release packet");
160
+ packet_destroy(pkt, NULL);
161
+}
162
+
163
/*
164
* The IP packets sent by primary and secondary
165
* will be compared in here
166
@@ -XXX,XX +XXX,XX @@ static int colo_compare_packet_payload(Packet *ppkt,
167
}
168
169
/*
170
- * Called from the compare thread on the primary
171
- * for compare tcp packet
172
- * compare_tcp copied from Dr. David Alan Gilbert's branch
173
- */
174
-static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt)
175
+ * return true means that the payload is consist and
176
+ * need to make the next comparison, false means do
177
+ * the checkpoint
178
+*/
179
+static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt,
180
+ int8_t *mark, uint32_t max_ack)
181
{
182
- struct tcphdr *ptcp, *stcp;
183
- int res;
184
+ *mark = 0;
185
+
186
+ if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
187
+ if (colo_compare_packet_payload(ppkt, spkt,
188
+ ppkt->header_size, spkt->header_size,
189
+ ppkt->payload_size)) {
190
+ *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
191
+ return true;
192
+ }
134
+ }
193
+ }
135
+ }
194
+ if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) {
136
+
195
+ if (colo_compare_packet_payload(ppkt, spkt,
137
if (vlan_num &&
196
+ ppkt->header_size, spkt->header_size,
138
!e1000x_rx_vlan_filter(core->mac, l2_header->vlan + vlan_num - 1)) {
197
+ ppkt->payload_size)) {
139
return queues;
198
+ *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY;
140
@@ -XXX,XX +XXX,XX @@ static void
199
+ return true;
141
igb_build_rx_metadata(IGBCore *core,
200
+ }
142
struct NetRxPkt *pkt,
143
bool is_eop,
144
- const E1000E_RSSInfo *rss_info,
145
+ const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
146
uint16_t *pkt_info, uint16_t *hdr_info,
147
uint32_t *rss,
148
uint32_t *status_flags,
149
@@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core,
150
if (pkt_info) {
151
*pkt_info = rss_info->enabled ? rss_info->type : 0;
152
153
- if (hasip4) {
154
- *pkt_info |= E1000_ADVRXD_PKT_IP4;
155
- }
156
+ if (etqf < 8) {
157
+ *pkt_info |= (BIT(11) | etqf) << 4;
158
+ } else {
159
+ if (hasip4) {
160
+ *pkt_info |= E1000_ADVRXD_PKT_IP4;
161
+ }
162
163
- if (hasip6) {
164
- *pkt_info |= E1000_ADVRXD_PKT_IP6;
165
- }
166
+ if (hasip6) {
167
+ *pkt_info |= E1000_ADVRXD_PKT_IP6;
168
+ }
169
170
- switch (l4hdr_proto) {
171
- case ETH_L4_HDR_PROTO_TCP:
172
- *pkt_info |= E1000_ADVRXD_PKT_TCP;
173
- break;
174
+ switch (l4hdr_proto) {
175
+ case ETH_L4_HDR_PROTO_TCP:
176
+ *pkt_info |= E1000_ADVRXD_PKT_TCP;
177
+ break;
178
179
- case ETH_L4_HDR_PROTO_UDP:
180
- *pkt_info |= E1000_ADVRXD_PKT_UDP;
181
- break;
182
+ case ETH_L4_HDR_PROTO_UDP:
183
+ *pkt_info |= E1000_ADVRXD_PKT_UDP;
184
+ break;
185
186
- case ETH_L4_HDR_PROTO_SCTP:
187
- *pkt_info |= E1000_ADVRXD_PKT_SCTP;
188
- break;
189
+ case ETH_L4_HDR_PROTO_SCTP:
190
+ *pkt_info |= E1000_ADVRXD_PKT_SCTP;
191
+ break;
192
193
- default:
194
- break;
195
+ default:
196
+ break;
197
+ }
198
}
199
}
200
201
@@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core,
202
*hdr_info = 0;
203
}
204
205
+ if (ts) {
206
+ *status_flags |= BIT(16);
201
+ }
207
+ }
202
+
208
+
203
+ /* one part of secondary packet payload still need to be compared */
209
/* RX CSO information */
204
+ if (!after(ppkt->seq_end, spkt->seq_end)) {
210
if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) {
205
+ if (colo_compare_packet_payload(ppkt, spkt,
211
trace_e1000e_rx_metadata_ipv6_sum_disabled();
206
+ ppkt->header_size + ppkt->offset,
212
@@ -XXX,XX +XXX,XX @@ func_exit:
207
+ spkt->header_size + spkt->offset,
213
static inline void
208
+ ppkt->payload_size - ppkt->offset)) {
214
igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
209
+ if (!after(ppkt->tcp_ack, max_ack)) {
215
struct NetRxPkt *pkt,
210
+ *mark = COLO_COMPARE_FREE_PRIMARY;
216
- const E1000E_RSSInfo *rss_info,
211
+ spkt->offset += ppkt->payload_size - ppkt->offset;
217
+ const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
212
+ return true;
218
uint16_t length)
213
+ } else {
219
{
214
+ /* secondary guest hasn't ack the data, don't send
220
uint32_t status_flags, rss;
215
+ * out this packet
221
@@ -XXX,XX +XXX,XX @@ igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
216
+ */
222
desc->csum = 0;
217
+ return false;
223
218
+ }
224
igb_build_rx_metadata(core, pkt, pkt != NULL,
219
+ }
225
- rss_info,
220
+ } else {
226
+ rss_info, etqf, ts,
221
+ /* primary packet is longer than secondary packet, compare
227
NULL, NULL, &rss,
222
+ * the same part and mark the primary packet offset
228
&status_flags, &ip_id,
223
+ */
229
&desc->special);
224
+ if (colo_compare_packet_payload(ppkt, spkt,
230
@@ -XXX,XX +XXX,XX @@ igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc,
225
+ ppkt->header_size + ppkt->offset,
231
static inline void
226
+ spkt->header_size + spkt->offset,
232
igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
227
+ spkt->payload_size - spkt->offset)) {
233
struct NetRxPkt *pkt,
228
+ *mark = COLO_COMPARE_FREE_SECONDARY;
234
- const E1000E_RSSInfo *rss_info,
229
+ ppkt->offset += spkt->payload_size - spkt->offset;
235
+ const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts,
230
+ return true;
236
uint16_t length)
231
+ }
237
{
232
+ }
238
memset(&desc->wb, 0, sizeof(desc->wb));
233
239
@@ -XXX,XX +XXX,XX @@ igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
234
- trace_colo_compare_main("compare tcp");
240
desc->wb.upper.length = cpu_to_le16(length);
235
+ return false;
241
236
+}
242
igb_build_rx_metadata(core, pkt, pkt != NULL,
237
243
- rss_info,
238
- ptcp = (struct tcphdr *)ppkt->transport_header;
244
+ rss_info, etqf, ts,
239
- stcp = (struct tcphdr *)spkt->transport_header;
245
&desc->wb.lower.lo_dword.pkt_info,
240
+static void colo_compare_tcp(CompareState *s, Connection *conn)
246
&desc->wb.lower.lo_dword.hdr_info,
241
+{
247
&desc->wb.lower.hi_dword.rss,
242
+ Packet *ppkt = NULL, *spkt = NULL;
248
@@ -XXX,XX +XXX,XX @@ igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc,
243
+ int8_t mark;
249
244
250
static inline void
245
/*
251
igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc,
246
- * The 'identification' field in the IP header is *very* random
252
-struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, uint16_t length)
247
- * it almost never matches. Fudge this by ignoring differences in
253
+ struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info,
248
- * unfragmented packets; they'll normally sort themselves out if different
254
+ uint16_t etqf, bool ts, uint16_t length)
249
- * anyway, and it should recover at the TCP level.
255
{
250
- * An alternative would be to get both the primary and secondary to rewrite
256
if (igb_rx_use_legacy_descriptor(core)) {
251
- * somehow; but that would need some sync traffic to sync the state
257
- igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, length);
252
- */
258
+ igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info,
253
- if (ntohs(ppkt->ip->ip_off) & IP_DF) {
259
+ etqf, ts, length);
254
- spkt->ip->ip_id = ppkt->ip->ip_id;
255
- /* and the sum will be different if the IDs were different */
256
- spkt->ip->ip_sum = ppkt->ip->ip_sum;
257
+ * If ppkt and spkt have the same payload, but ppkt's ACK
258
+ * is greater than spkt's ACK, in this case we can not
259
+ * send the ppkt because it will cause the secondary guest
260
+ * to miss sending some data in the next. Therefore, we
261
+ * record the maximum ACK in the current queue at both
262
+ * primary side and secondary side. Only when the ack is
263
+ * less than the smaller of the two maximum ack, then we
264
+ * can ensure that the packet's payload is acknowledged by
265
+ * primary and secondary.
266
+ */
267
+ uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack;
268
+
269
+pri:
270
+ if (g_queue_is_empty(&conn->primary_list)) {
271
+ return;
272
}
273
+ ppkt = g_queue_pop_head(&conn->primary_list);
274
+sec:
275
+ if (g_queue_is_empty(&conn->secondary_list)) {
276
+ g_queue_push_head(&conn->primary_list, ppkt);
277
+ return;
278
+ }
279
+ spkt = g_queue_pop_head(&conn->secondary_list);
280
281
- /*
282
- * Check tcp header length for tcp option field.
283
- * th_off > 5 means this tcp packet have options field.
284
- * The tcp options maybe always different.
285
- * for example:
286
- * From RFC 7323.
287
- * TCP Timestamps option (TSopt):
288
- * Kind: 8
289
- *
290
- * Length: 10 bytes
291
- *
292
- * +-------+-------+---------------------+---------------------+
293
- * |Kind=8 | 10 | TS Value (TSval) |TS Echo Reply (TSecr)|
294
- * +-------+-------+---------------------+---------------------+
295
- * 1 1 4 4
296
- *
297
- * In this case the primary guest's timestamp always different with
298
- * the secondary guest's timestamp. COLO just focus on payload,
299
- * so we just need skip this field.
300
- */
301
+ if (ppkt->tcp_seq == ppkt->seq_end) {
302
+ colo_release_primary_pkt(s, ppkt);
303
+ ppkt = NULL;
304
+ }
305
306
- ptrdiff_t ptcp_offset, stcp_offset;
307
+ if (ppkt && conn->compare_seq && !after(ppkt->seq_end, conn->compare_seq)) {
308
+ trace_colo_compare_main("pri: this packet has compared");
309
+ colo_release_primary_pkt(s, ppkt);
310
+ ppkt = NULL;
311
+ }
312
313
- ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data
314
- + (ptcp->th_off << 2) - ppkt->vnet_hdr_len;
315
- stcp_offset = spkt->transport_header - (uint8_t *)spkt->data
316
- + (stcp->th_off << 2) - spkt->vnet_hdr_len;
317
- if (ppkt->size - ptcp_offset == spkt->size - stcp_offset) {
318
- res = colo_compare_packet_payload(ppkt, spkt,
319
- ptcp_offset, stcp_offset,
320
- ppkt->size - ptcp_offset);
321
+ if (spkt->tcp_seq == spkt->seq_end) {
322
+ packet_destroy(spkt, NULL);
323
+ if (!ppkt) {
324
+ goto pri;
325
+ } else {
326
+ goto sec;
327
+ }
328
} else {
260
} else {
329
- trace_colo_compare_main("TCP: payload size of packets are different");
261
- igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, length);
330
- res = -1;
262
+ igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info,
331
+ if (conn->compare_seq && !after(spkt->seq_end, conn->compare_seq)) {
263
+ etqf, ts, length);
332
+ trace_colo_compare_main("sec: this packet has compared");
333
+ packet_destroy(spkt, NULL);
334
+ if (!ppkt) {
335
+ goto pri;
336
+ } else {
337
+ goto sec;
338
+ }
339
+ }
340
+ if (!ppkt) {
341
+ g_queue_push_head(&conn->secondary_list, spkt);
342
+ goto pri;
343
+ }
344
}
345
346
- if (res != 0 &&
347
- trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) {
348
- char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20];
349
-
350
- strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src));
351
- strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst));
352
- strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src));
353
- strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst));
354
-
355
- trace_colo_compare_ip_info(ppkt->size, pri_ip_src,
356
- pri_ip_dst, spkt->size,
357
- sec_ip_src, sec_ip_dst);
358
-
359
- trace_colo_compare_tcp_info("pri tcp packet",
360
- ntohl(ptcp->th_seq),
361
- ntohl(ptcp->th_ack),
362
- res, ptcp->th_flags,
363
- ppkt->size);
364
-
365
- trace_colo_compare_tcp_info("sec tcp packet",
366
- ntohl(stcp->th_seq),
367
- ntohl(stcp->th_ack),
368
- res, stcp->th_flags,
369
- spkt->size);
370
+ if (colo_mark_tcp_pkt(ppkt, spkt, &mark, min_ack)) {
371
+ trace_colo_compare_tcp_info("pri",
372
+ ppkt->tcp_seq, ppkt->tcp_ack,
373
+ ppkt->header_size, ppkt->payload_size,
374
+ ppkt->offset, ppkt->flags);
375
+
376
+ trace_colo_compare_tcp_info("sec",
377
+ spkt->tcp_seq, spkt->tcp_ack,
378
+ spkt->header_size, spkt->payload_size,
379
+ spkt->offset, spkt->flags);
380
+
381
+ if (mark == COLO_COMPARE_FREE_PRIMARY) {
382
+ conn->compare_seq = ppkt->seq_end;
383
+ colo_release_primary_pkt(s, ppkt);
384
+ g_queue_push_head(&conn->secondary_list, spkt);
385
+ goto pri;
386
+ }
387
+ if (mark == COLO_COMPARE_FREE_SECONDARY) {
388
+ conn->compare_seq = spkt->seq_end;
389
+ packet_destroy(spkt, NULL);
390
+ goto sec;
391
+ }
392
+ if (mark == (COLO_COMPARE_FREE_PRIMARY | COLO_COMPARE_FREE_SECONDARY)) {
393
+ conn->compare_seq = ppkt->seq_end;
394
+ colo_release_primary_pkt(s, ppkt);
395
+ packet_destroy(spkt, NULL);
396
+ goto pri;
397
+ }
398
+ } else {
399
+ g_queue_push_head(&conn->primary_list, ppkt);
400
+ g_queue_push_head(&conn->secondary_list, spkt);
401
402
qemu_hexdump((char *)ppkt->data, stderr,
403
"colo-compare ppkt", ppkt->size);
404
qemu_hexdump((char *)spkt->data, stderr,
405
"colo-compare spkt", spkt->size);
406
- }
407
408
- return res;
409
+ /*
410
+ * colo_compare_inconsistent_notify();
411
+ * TODO: notice to checkpoint();
412
+ */
413
+ }
414
}
415
416
+
417
/*
418
* Called from the compare thread on the primary
419
* for compare udp packet
420
@@ -XXX,XX +XXX,XX @@ static void colo_old_packet_check(void *opaque)
421
(GCompareFunc)colo_old_packet_check_one_conn);
422
}
423
424
-/*
425
- * Called from the compare thread on the primary
426
- * for compare packet with secondary list of the
427
- * specified connection when a new packet was
428
- * queued to it.
429
- */
430
-static void colo_compare_connection(void *opaque, void *user_data)
431
+static void colo_compare_packet(CompareState *s, Connection *conn,
432
+ int (*HandlePacket)(Packet *spkt,
433
+ Packet *ppkt))
434
{
435
- CompareState *s = user_data;
436
- Connection *conn = opaque;
437
Packet *pkt = NULL;
438
GList *result = NULL;
439
- int ret;
440
441
while (!g_queue_is_empty(&conn->primary_list) &&
442
!g_queue_is_empty(&conn->secondary_list)) {
443
pkt = g_queue_pop_head(&conn->primary_list);
444
- switch (conn->ip_proto) {
445
- case IPPROTO_TCP:
446
- result = g_queue_find_custom(&conn->secondary_list,
447
- pkt, (GCompareFunc)colo_packet_compare_tcp);
448
- break;
449
- case IPPROTO_UDP:
450
- result = g_queue_find_custom(&conn->secondary_list,
451
- pkt, (GCompareFunc)colo_packet_compare_udp);
452
- break;
453
- case IPPROTO_ICMP:
454
- result = g_queue_find_custom(&conn->secondary_list,
455
- pkt, (GCompareFunc)colo_packet_compare_icmp);
456
- break;
457
- default:
458
- result = g_queue_find_custom(&conn->secondary_list,
459
- pkt, (GCompareFunc)colo_packet_compare_other);
460
- break;
461
- }
462
+ result = g_queue_find_custom(&conn->secondary_list,
463
+ pkt, (GCompareFunc)HandlePacket);
464
465
if (result) {
466
- ret = compare_chr_send(s,
467
- pkt->data,
468
- pkt->size,
469
- pkt->vnet_hdr_len);
470
- if (ret < 0) {
471
- error_report("colo_send_primary_packet failed");
472
- }
473
- trace_colo_compare_main("packet same and release packet");
474
+ colo_release_primary_pkt(s, pkt);
475
g_queue_remove(&conn->secondary_list, result->data);
476
- packet_destroy(pkt, NULL);
477
} else {
478
/*
479
* If one packet arrive late, the secondary_list or
480
@@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data)
481
}
264
}
482
}
265
}
483
266
484
+/*
267
@@ -XXX,XX +XXX,XX @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi)
485
+ * Called from the compare thread on the primary
268
static void
486
+ * for compare packet with secondary list of the
269
igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
487
+ * specified connection when a new packet was
270
const E1000E_RxRing *rxr,
488
+ * queued to it.
271
- const E1000E_RSSInfo *rss_info)
489
+ */
272
+ const E1000E_RSSInfo *rss_info,
490
+static void colo_compare_connection(void *opaque, void *user_data)
273
+ uint16_t etqf, bool ts)
491
+{
274
{
492
+ CompareState *s = user_data;
275
PCIDevice *d;
493
+ Connection *conn = opaque;
276
dma_addr_t base;
494
+
277
@@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
495
+ switch (conn->ip_proto) {
278
}
496
+ case IPPROTO_TCP:
279
497
+ colo_compare_tcp(s, conn);
280
igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL,
498
+ break;
281
- rss_info, written);
499
+ case IPPROTO_UDP:
282
+ rss_info, etqf, ts, written);
500
+ colo_compare_packet(s, conn, colo_packet_compare_udp);
283
igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len);
501
+ break;
284
502
+ case IPPROTO_ICMP:
285
igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN);
503
+ colo_compare_packet(s, conn, colo_packet_compare_icmp);
286
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
504
+ break;
287
size_t iov_ofs = 0;
505
+ default:
288
E1000E_RxRing rxr;
506
+ colo_compare_packet(s, conn, colo_packet_compare_other);
289
E1000E_RSSInfo rss_info;
507
+ break;
290
+ uint16_t etqf;
508
+ }
291
+ bool ts;
509
+}
292
size_t total_size;
510
+
293
int strip_vlan_index;
511
static int compare_chr_send(CompareState *s,
294
int i;
512
const uint8_t *buf,
295
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
513
uint32_t size,
296
get_eth_packet_type(&buf.l2_header.eth));
514
diff --git a/net/colo.c b/net/colo.c
297
net_rx_pkt_set_protocols(core->rx_pkt, iov, iovcnt, iov_ofs);
298
299
- queues = igb_receive_assign(core, &buf.l2_header, size,
300
- &rss_info, external_tx);
301
+ queues = igb_receive_assign(core, iov, iovcnt, iov_ofs,
302
+ &buf.l2_header, size,
303
+ &rss_info, &etqf, &ts, external_tx);
304
if (!queues) {
305
trace_e1000e_rx_flt_dropped();
306
return orig_size;
307
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
308
causes |= E1000_ICR_RXDW;
309
310
igb_rx_fix_l4_csum(core, core->rx_pkt);
311
- igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info);
312
+ igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info, etqf, ts);
313
314
/* Check if receive descriptor minimum threshold hit */
315
if (igb_rx_descr_threshold_hit(core, rxr.i)) {
316
@@ -XXX,XX +XXX,XX @@ static const readops igb_macreg_readops[] = {
317
[EIAM] = igb_mac_readreg,
318
[IVAR0 ... IVAR0 + 7] = igb_mac_readreg,
319
igb_getreg(IVAR_MISC),
320
+ igb_getreg(TSYNCRXCFG),
321
+ [ETQF0 ... ETQF0 + 7] = igb_mac_readreg,
322
igb_getreg(VT_CTL),
323
[P2VMAILBOX0 ... P2VMAILBOX7] = igb_mac_readreg,
324
[V2PMAILBOX0 ... V2PMAILBOX7] = igb_mac_vfmailbox_read,
325
@@ -XXX,XX +XXX,XX @@ static const writeops igb_macreg_writeops[] = {
326
[EIMS] = igb_set_eims,
327
[IVAR0 ... IVAR0 + 7] = igb_mac_writereg,
328
igb_putreg(IVAR_MISC),
329
+ igb_putreg(TSYNCRXCFG),
330
+ [ETQF0 ... ETQF0 + 7] = igb_mac_writereg,
331
igb_putreg(VT_CTL),
332
[P2VMAILBOX0 ... P2VMAILBOX7] = igb_set_pfmailbox,
333
[V2PMAILBOX0 ... V2PMAILBOX7] = igb_set_vfmailbox,
334
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
515
index XXXXXXX..XXXXXXX 100644
335
index XXXXXXX..XXXXXXX 100644
516
--- a/net/colo.c
336
--- a/hw/net/igb_regs.h
517
+++ b/net/colo.c
337
+++ b/hw/net/igb_regs.h
518
@@ -XXX,XX +XXX,XX @@ Connection *connection_new(ConnectionKey *key)
338
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
519
conn->processing = false;
339
#define E1000_DCA_TXCTRL_CPUID_SHIFT 24 /* Tx CPUID now in the last byte */
520
conn->offset = 0;
340
#define E1000_DCA_RXCTRL_CPUID_SHIFT 24 /* Rx CPUID now in the last byte */
521
conn->syn_flag = 0;
341
522
+ conn->pack = 0;
342
+/* ETQF register bit definitions */
523
+ conn->sack = 0;
343
+#define E1000_ETQF_FILTER_ENABLE BIT(26)
524
g_queue_init(&conn->primary_list);
344
+#define E1000_ETQF_1588 BIT(30)
525
g_queue_init(&conn->secondary_list);
345
+#define E1000_ETQF_IMM_INT BIT(29)
526
346
+#define E1000_ETQF_QUEUE_ENABLE BIT(31)
527
@@ -XXX,XX +XXX,XX @@ Packet *packet_new(const void *data, int size, int vnet_hdr_len)
347
+#define E1000_ETQF_QUEUE_SHIFT 16
528
pkt->size = size;
348
+#define E1000_ETQF_QUEUE_MASK 0x00070000
529
pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST);
349
+#define E1000_ETQF_ETYPE_MASK 0x0000FFFF
530
pkt->vnet_hdr_len = vnet_hdr_len;
350
+
531
+ pkt->tcp_seq = 0;
351
#define E1000_DTXSWC_MAC_SPOOF_MASK 0x000000FF /* Per VF MAC spoof control */
532
+ pkt->tcp_ack = 0;
352
#define E1000_DTXSWC_VLAN_SPOOF_MASK 0x0000FF00 /* Per VF VLAN spoof control */
533
+ pkt->seq_end = 0;
353
#define E1000_DTXSWC_LLE_MASK 0x00FF0000 /* Per VF Local LB enables */
534
+ pkt->header_size = 0;
354
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
535
+ pkt->payload_size = 0;
355
#define E1000_FRTIMER 0x01048 /* Free Running Timer - RW */
536
+ pkt->offset = 0;
356
#define E1000_FCRTV 0x02460 /* Flow Control Refresh Timer Value - RW */
537
+ pkt->flags = 0;
357
538
358
+#define E1000_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */
539
return pkt;
359
+
540
}
360
+/* Filtering Registers */
541
diff --git a/net/colo.h b/net/colo.h
361
+#define E1000_SAQF(_n) (0x5980 + 4 * (_n))
542
index XXXXXXX..XXXXXXX 100644
362
+#define E1000_DAQF(_n) (0x59A0 + 4 * (_n))
543
--- a/net/colo.h
363
+#define E1000_SPQF(_n) (0x59C0 + 4 * (_n))
544
+++ b/net/colo.h
364
+#define E1000_FTQF(_n) (0x59E0 + 4 * (_n))
545
@@ -XXX,XX +XXX,XX @@ typedef struct Packet {
365
+#define E1000_SAQF0 E1000_SAQF(0)
546
int64_t creation_ms;
366
+#define E1000_DAQF0 E1000_DAQF(0)
547
/* Get vnet_hdr_len from filter */
367
+#define E1000_SPQF0 E1000_SPQF(0)
548
uint32_t vnet_hdr_len;
368
+#define E1000_FTQF0 E1000_FTQF(0)
549
+ uint32_t tcp_seq; /* sequence number */
369
+#define E1000_SYNQF(_n) (0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */
550
+ uint32_t tcp_ack; /* acknowledgement number */
370
+#define E1000_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */
551
+ /* the sequence number of the last byte of the packet */
371
+
552
+ uint32_t seq_end;
372
#define E1000_RQDPC(_n) (0x0C030 + ((_n) * 0x40))
553
+ uint8_t header_size; /* the header length */
373
554
+ uint16_t payload_size; /* the payload length */
374
#define E1000_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */
555
+ /* record the payload offset(the length that has been compared) */
556
+ uint16_t offset;
557
+ uint8_t flags; /* Flags(aka Control bits) */
558
} Packet;
559
560
typedef struct ConnectionKey {
561
@@ -XXX,XX +XXX,XX @@ typedef struct Connection {
562
/* flag to enqueue unprocessed_connections */
563
bool processing;
564
uint8_t ip_proto;
565
+ /* record the sequence number that has been compared */
566
+ uint32_t compare_seq;
567
+ /* the maximum of acknowledgement number in primary_list queue */
568
+ uint32_t pack;
569
+ /* the maximum of acknowledgement number in secondary_list queue */
570
+ uint32_t sack;
571
/* offset = secondary_seq - primary_seq */
572
tcp_seq offset;
573
/*
574
diff --git a/net/trace-events b/net/trace-events
575
index XXXXXXX..XXXXXXX 100644
576
--- a/net/trace-events
577
+++ b/net/trace-events
578
@@ -XXX,XX +XXX,XX @@ colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d"
579
colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s"
580
colo_old_packet_check_found(int64_t old_time) "%" PRId64
581
colo_compare_miscompare(void) ""
582
-colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int res, uint32_t flag, int size) "side: %s seq/ack= %u/%u res= %d flags= 0x%x pkt_size: %d\n"
583
+colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int hdlen, int pdlen, int offset, int flags) "%s: seq/ack= %u/%u hdlen= %d pdlen= %d offset= %d flags=%d\n"
584
585
# net/filter-rewriter.c
586
colo_filter_rewriter_debug(void) ""
587
--
375
--
588
2.7.4
376
2.7.4
589
590
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
---
7
hw/net/igb_core.c | 7 +++++++
8
hw/net/igb_regs.h | 3 +++
9
2 files changed, 10 insertions(+)
10
11
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/hw/net/igb_core.c
14
+++ b/hw/net/igb_core.c
15
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
16
tx->ctx[idx].vlan_macip_lens >> IGB_TX_FLAGS_VLAN_SHIFT,
17
!!(tx->first_cmd_type_len & E1000_TXD_CMD_VLE));
18
19
+ if ((tx->first_cmd_type_len & E1000_ADVTXD_MAC_TSTAMP) &&
20
+ (core->mac[TSYNCTXCTL] & E1000_TSYNCTXCTL_ENABLED) &&
21
+ !(core->mac[TSYNCTXCTL] & E1000_TSYNCTXCTL_VALID)) {
22
+ core->mac[TSYNCTXCTL] |= E1000_TSYNCTXCTL_VALID;
23
+ e1000x_timestamp(core->mac, core->timadj, TXSTMPL, TXSTMPH);
24
+ }
25
+
26
if (igb_tx_pkt_send(core, tx, queue_index)) {
27
igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
28
}
29
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
30
index XXXXXXX..XXXXXXX 100644
31
--- a/hw/net/igb_regs.h
32
+++ b/hw/net/igb_regs.h
33
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
34
/* E1000_EITR_CNT_IGNR is only for 82576 and newer */
35
#define E1000_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */
36
37
+#define E1000_TSYNCTXCTL_VALID 0x00000001 /* tx timestamp valid */
38
+#define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable tx timestampping */
39
+
40
/* PCI Express Control */
41
#define E1000_GCR_CMPL_TMOUT_MASK 0x0000F000
42
#define E1000_GCR_CMPL_TMOUT_10ms 0x00001000
43
--
44
2.7.4
diff view generated by jsdifflib
New patch
1
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
3
In MSI-X mode, if there are interrupts already notified but not cleared
4
and a new interrupt arrives, e1000e incorrectly notifies the notified
5
ones again along with the new one.
6
7
To fix this issue, replace e1000e_update_interrupt_state() with
8
two new functions: e1000e_raise_interrupts() and
9
e1000e_lower_interrupts(). These functions don't only raise or lower
10
interrupts, but it also performs register writes which updates the
11
interrupt state. Before it performs a register write, these function
12
determines the interrupts already raised, and compares with the
13
interrupts raised after the register write to determine the interrupts
14
to notify.
15
16
The introduction of these functions made tracepoints which assumes that
17
the caller of e1000e_update_interrupt_state() performs register writes
18
obsolete. These tracepoints are now removed, and alternative ones are
19
added to the new functions.
20
21
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
22
Signed-off-by: Jason Wang <jasowang@redhat.com>
23
---
24
hw/net/e1000e_core.c | 153 ++++++++++++++++++++++-----------------------------
25
hw/net/e1000e_core.h | 2 -
26
hw/net/trace-events | 2 +
27
3 files changed, 69 insertions(+), 88 deletions(-)
28
29
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
30
index XXXXXXX..XXXXXXX 100644
31
--- a/hw/net/e1000e_core.c
32
+++ b/hw/net/e1000e_core.c
33
@@ -XXX,XX +XXX,XX @@ e1000e_intrmgr_on_throttling_timer(void *opaque)
34
35
timer->running = false;
36
37
- if (msi_enabled(timer->core->owner)) {
38
- trace_e1000e_irq_msi_notify_postponed();
39
- /* Clear msi_causes_pending to fire MSI eventually */
40
- timer->core->msi_causes_pending = 0;
41
- e1000e_set_interrupt_cause(timer->core, 0);
42
- } else {
43
- trace_e1000e_irq_legacy_notify_postponed();
44
- e1000e_set_interrupt_cause(timer->core, 0);
45
+ if (timer->core->mac[IMS] & timer->core->mac[ICR]) {
46
+ if (msi_enabled(timer->core->owner)) {
47
+ trace_e1000e_irq_msi_notify_postponed();
48
+ msi_notify(timer->core->owner, 0);
49
+ } else {
50
+ trace_e1000e_irq_legacy_notify_postponed();
51
+ e1000e_raise_legacy_irq(timer->core);
52
+ }
53
}
54
}
55
56
@@ -XXX,XX +XXX,XX @@ static void
57
e1000e_intrmgr_fire_all_timers(E1000ECore *core)
58
{
59
int i;
60
- uint32_t val = e1000e_intmgr_collect_delayed_causes(core);
61
-
62
- trace_e1000e_irq_adding_delayed_causes(val, core->mac[ICR]);
63
- core->mac[ICR] |= val;
64
65
if (core->itr.running) {
66
timer_del(core->itr.timer);
67
@@ -XXX,XX +XXX,XX @@ void(*e1000e_phyreg_writeops[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE])
68
}
69
};
70
71
-static inline void
72
-e1000e_clear_ims_bits(E1000ECore *core, uint32_t bits)
73
-{
74
- trace_e1000e_irq_clear_ims(bits, core->mac[IMS], core->mac[IMS] & ~bits);
75
- core->mac[IMS] &= ~bits;
76
-}
77
-
78
static inline bool
79
e1000e_postpone_interrupt(E1000IntrDelayTimer *timer)
80
{
81
@@ -XXX,XX +XXX,XX @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg)
82
effective_eiac = core->mac[EIAC] & cause;
83
84
core->mac[ICR] &= ~effective_eiac;
85
- core->msi_causes_pending &= ~effective_eiac;
86
87
if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
88
core->mac[IMS] &= ~effective_eiac;
89
@@ -XXX,XX +XXX,XX @@ e1000e_fix_icr_asserted(E1000ECore *core)
90
trace_e1000e_irq_fix_icr_asserted(core->mac[ICR]);
91
}
92
93
-static void
94
-e1000e_send_msi(E1000ECore *core, bool msix)
95
+static void e1000e_raise_interrupts(E1000ECore *core,
96
+ size_t index, uint32_t causes)
97
{
98
- uint32_t causes = core->mac[ICR] & core->mac[IMS] & ~E1000_ICR_ASSERTED;
99
-
100
- core->msi_causes_pending &= causes;
101
- causes ^= core->msi_causes_pending;
102
- if (causes == 0) {
103
- return;
104
- }
105
- core->msi_causes_pending |= causes;
106
+ bool is_msix = msix_enabled(core->owner);
107
+ uint32_t old_causes = core->mac[IMS] & core->mac[ICR];
108
+ uint32_t raised_causes;
109
110
- if (msix) {
111
- e1000e_msix_notify(core, causes);
112
- } else {
113
- if (!e1000e_itr_should_postpone(core)) {
114
- trace_e1000e_irq_msi_notify(causes);
115
- msi_notify(core->owner, 0);
116
- }
117
- }
118
-}
119
+ trace_e1000e_irq_set(index << 2,
120
+ core->mac[index], core->mac[index] | causes);
121
122
-static void
123
-e1000e_update_interrupt_state(E1000ECore *core)
124
-{
125
- bool interrupts_pending;
126
- bool is_msix = msix_enabled(core->owner);
127
+ core->mac[index] |= causes;
128
129
/* Set ICR[OTHER] for MSI-X */
130
if (is_msix) {
131
@@ -XXX,XX +XXX,XX @@ e1000e_update_interrupt_state(E1000ECore *core)
132
*/
133
core->mac[ICS] = core->mac[ICR];
134
135
- interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true : false;
136
- if (!interrupts_pending) {
137
- core->msi_causes_pending = 0;
138
- }
139
-
140
trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS],
141
core->mac[ICR], core->mac[IMS]);
142
143
- if (is_msix || msi_enabled(core->owner)) {
144
- if (interrupts_pending) {
145
- e1000e_send_msi(core, is_msix);
146
- }
147
- } else {
148
- if (interrupts_pending) {
149
- if (!e1000e_itr_should_postpone(core)) {
150
- e1000e_raise_legacy_irq(core);
151
- }
152
+ raised_causes = core->mac[IMS] & core->mac[ICR] & ~old_causes;
153
+ if (!raised_causes) {
154
+ return;
155
+ }
156
+
157
+ if (is_msix) {
158
+ e1000e_msix_notify(core, raised_causes & ~E1000_ICR_ASSERTED);
159
+ } else if (!e1000e_itr_should_postpone(core)) {
160
+ if (msi_enabled(core->owner)) {
161
+ trace_e1000e_irq_msi_notify(raised_causes);
162
+ msi_notify(core->owner, 0);
163
} else {
164
- e1000e_lower_legacy_irq(core);
165
+ e1000e_raise_legacy_irq(core);
166
}
167
}
168
}
169
170
-static void
171
-e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val)
172
+static void e1000e_lower_interrupts(E1000ECore *core,
173
+ size_t index, uint32_t causes)
174
{
175
- trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]);
176
+ trace_e1000e_irq_clear(index << 2,
177
+ core->mac[index], core->mac[index] & ~causes);
178
179
- val |= e1000e_intmgr_collect_delayed_causes(core);
180
- core->mac[ICR] |= val;
181
+ core->mac[index] &= ~causes;
182
183
- trace_e1000e_irq_set_cause_exit(val, core->mac[ICR]);
184
+ /*
185
+ * Make sure ICR and ICS registers have the same value.
186
+ * The spec says that the ICS register is write-only. However in practice,
187
+ * on real hardware ICS is readable, and for reads it has the same value as
188
+ * ICR (except that ICS does not have the clear on read behaviour of ICR).
189
+ *
190
+ * The VxWorks PRO/1000 driver uses this behaviour.
191
+ */
192
+ core->mac[ICS] = core->mac[ICR];
193
+
194
+ trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS],
195
+ core->mac[ICR], core->mac[IMS]);
196
197
- e1000e_update_interrupt_state(core);
198
+ if (!(core->mac[IMS] & core->mac[ICR]) &&
199
+ !msix_enabled(core->owner) && !msi_enabled(core->owner)) {
200
+ e1000e_lower_legacy_irq(core);
201
+ }
202
+}
203
+
204
+static void
205
+e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val)
206
+{
207
+ val |= e1000e_intmgr_collect_delayed_causes(core);
208
+ e1000e_raise_interrupts(core, ICR, val);
209
}
210
211
static inline void
212
@@ -XXX,XX +XXX,XX @@ e1000e_set_ics(E1000ECore *core, int index, uint32_t val)
213
static void
214
e1000e_set_icr(E1000ECore *core, int index, uint32_t val)
215
{
216
- uint32_t icr = 0;
217
if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
218
(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
219
trace_e1000e_irq_icr_process_iame();
220
- e1000e_clear_ims_bits(core, core->mac[IAM]);
221
+ e1000e_lower_interrupts(core, IMS, core->mac[IAM]);
222
}
223
224
- icr = core->mac[ICR] & ~val;
225
/*
226
* Windows driver expects that the "receive overrun" bit and other
227
* ones to be cleared when the "Other" bit (#24) is cleared.
228
*/
229
- icr = (val & E1000_ICR_OTHER) ? (icr & ~E1000_ICR_OTHER_CAUSES) : icr;
230
- trace_e1000e_irq_icr_write(val, core->mac[ICR], icr);
231
- core->mac[ICR] = icr;
232
- e1000e_update_interrupt_state(core);
233
+ if (val & E1000_ICR_OTHER) {
234
+ val |= E1000_ICR_OTHER_CAUSES;
235
+ }
236
+ e1000e_lower_interrupts(core, ICR, val);
237
}
238
239
static void
240
e1000e_set_imc(E1000ECore *core, int index, uint32_t val)
241
{
242
trace_e1000e_irq_ims_clear_set_imc(val);
243
- e1000e_clear_ims_bits(core, val);
244
- e1000e_update_interrupt_state(core);
245
+ e1000e_lower_interrupts(core, IMS, val);
246
}
247
248
static void
249
@@ -XXX,XX +XXX,XX @@ e1000e_set_ims(E1000ECore *core, int index, uint32_t val)
250
251
uint32_t valid_val = val & ims_valid_mask;
252
253
- trace_e1000e_irq_set_ims(val, core->mac[IMS], core->mac[IMS] | valid_val);
254
- core->mac[IMS] |= valid_val;
255
-
256
if ((valid_val & ims_ext_mask) &&
257
(core->mac[CTRL_EXT] & E1000_CTRL_EXT_PBA_CLR) &&
258
msix_enabled(core->owner)) {
259
@@ -XXX,XX +XXX,XX @@ e1000e_set_ims(E1000ECore *core, int index, uint32_t val)
260
e1000e_intrmgr_fire_all_timers(core);
261
}
262
263
- e1000e_update_interrupt_state(core);
264
+ e1000e_raise_interrupts(core, IMS, valid_val);
265
}
266
267
static void
268
@@ -XXX,XX +XXX,XX @@ static uint32_t
269
e1000e_mac_icr_read(E1000ECore *core, int index)
270
{
271
uint32_t ret = core->mac[ICR];
272
- trace_e1000e_irq_icr_read_entry(ret);
273
274
if (core->mac[IMS] == 0) {
275
trace_e1000e_irq_icr_clear_zero_ims();
276
- core->mac[ICR] = 0;
277
+ e1000e_lower_interrupts(core, ICR, 0xffffffff);
278
}
279
280
if (!msix_enabled(core->owner)) {
281
trace_e1000e_irq_icr_clear_nonmsix_icr_read();
282
- core->mac[ICR] = 0;
283
+ e1000e_lower_interrupts(core, ICR, 0xffffffff);
284
}
285
286
if ((core->mac[ICR] & E1000_ICR_ASSERTED) &&
287
(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) {
288
trace_e1000e_irq_icr_clear_iame();
289
- core->mac[ICR] = 0;
290
+ e1000e_lower_interrupts(core, ICR, 0xffffffff);
291
trace_e1000e_irq_icr_process_iame();
292
- e1000e_clear_ims_bits(core, core->mac[IAM]);
293
+ e1000e_lower_interrupts(core, IMS, core->mac[IAM]);
294
}
295
296
- trace_e1000e_irq_icr_read_exit(core->mac[ICR]);
297
- e1000e_update_interrupt_state(core);
298
return ret;
299
}
300
301
diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
302
index XXXXXXX..XXXXXXX 100644
303
--- a/hw/net/e1000e_core.h
304
+++ b/hw/net/e1000e_core.h
305
@@ -XXX,XX +XXX,XX @@ struct E1000Core {
306
PCIDevice *owner;
307
void (*owner_start_recv)(PCIDevice *d);
308
309
- uint32_t msi_causes_pending;
310
-
311
int64_t timadj;
312
};
313
314
diff --git a/hw/net/trace-events b/hw/net/trace-events
315
index XXXXXXX..XXXXXXX 100644
316
--- a/hw/net/trace-events
317
+++ b/hw/net/trace-events
318
@@ -XXX,XX +XXX,XX @@ e1000e_irq_msix_notify_postponed_vec(int idx) "Sending MSI-X postponed by EITR[%
319
e1000e_irq_legacy_notify(bool level) "IRQ line state: %d"
320
e1000e_irq_msix_notify_vec(uint32_t vector) "MSI-X notify vector 0x%x"
321
e1000e_irq_postponed_by_xitr(uint32_t reg) "Interrupt postponed by [E]ITR register 0x%x"
322
+e1000e_irq_clear(uint32_t offset, uint32_t old, uint32_t new) "Clearing interrupt register 0x%x: 0x%x --> 0x%x"
323
+e1000e_irq_set(uint32_t offset, uint32_t old, uint32_t new) "Setting interrupt register 0x%x: 0x%x --> 0x%x"
324
e1000e_irq_clear_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Clearing IMS bits 0x%x: 0x%x --> 0x%x"
325
e1000e_irq_set_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Setting IMS bits 0x%x: 0x%x --> 0x%x"
326
e1000e_irq_fix_icr_asserted(uint32_t new_val) "ICR_ASSERTED bit fixed: 0x%x"
327
--
328
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
This follows the corresponding change for e1000e. This fixes:
4
tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb
5
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
hw/net/igb_core.c | 201 +++++++++------------
10
hw/net/trace-events | 11 +-
11
scripts/ci/org.centos/stream/8/x86_64/test-avocado | 1 +
12
tests/avocado/netdev-ethtool.py | 4 -
13
4 files changed, 87 insertions(+), 130 deletions(-)
14
15
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
16
index XXXXXXX..XXXXXXX 100644
17
--- a/hw/net/igb_core.c
18
+++ b/hw/net/igb_core.c
19
@@ -XXX,XX +XXX,XX @@ static ssize_t
20
igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
21
bool has_vnet, bool *external_tx);
22
23
-static inline void
24
-igb_set_interrupt_cause(IGBCore *core, uint32_t val);
25
-
26
-static void igb_update_interrupt_state(IGBCore *core);
27
+static void igb_raise_interrupts(IGBCore *core, size_t index, uint32_t causes);
28
static void igb_reset(IGBCore *core, bool sw);
29
30
static inline void
31
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
32
}
33
34
if (eic) {
35
- core->mac[EICR] |= eic;
36
- igb_set_interrupt_cause(core, E1000_ICR_TXDW);
37
+ igb_raise_interrupts(core, EICR, eic);
38
+ igb_raise_interrupts(core, ICR, E1000_ICR_TXDW);
39
}
40
41
net_tx_pkt_reset(txr->tx->tx_pkt, net_tx_pkt_unmap_frag_pci, d);
42
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
43
{
44
uint16_t queues = 0;
45
uint32_t causes = 0;
46
+ uint32_t ecauses = 0;
47
union {
48
L2Header l2_header;
49
uint8_t octets[ETH_ZLEN];
50
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
51
causes |= E1000_ICS_RXDMT0;
52
}
53
54
- core->mac[EICR] |= igb_rx_wb_eic(core, rxr.i->idx);
55
+ ecauses |= igb_rx_wb_eic(core, rxr.i->idx);
56
57
trace_e1000e_rx_written_to_guest(rxr.i->idx);
58
}
59
60
trace_e1000e_rx_interrupt_set(causes);
61
- igb_set_interrupt_cause(core, causes);
62
+ igb_raise_interrupts(core, EICR, ecauses);
63
+ igb_raise_interrupts(core, ICR, causes);
64
65
return orig_size;
66
}
67
@@ -XXX,XX +XXX,XX @@ void igb_core_set_link_status(IGBCore *core)
68
}
69
70
if (core->mac[STATUS] != old_status) {
71
- igb_set_interrupt_cause(core, E1000_ICR_LSC);
72
+ igb_raise_interrupts(core, ICR, E1000_ICR_LSC);
73
}
74
}
75
76
@@ -XXX,XX +XXX,XX @@ igb_set_rx_control(IGBCore *core, int index, uint32_t val)
77
}
78
}
79
80
-static inline void
81
-igb_clear_ims_bits(IGBCore *core, uint32_t bits)
82
-{
83
- trace_e1000e_irq_clear_ims(bits, core->mac[IMS], core->mac[IMS] & ~bits);
84
- core->mac[IMS] &= ~bits;
85
-}
86
-
87
static inline bool
88
igb_postpone_interrupt(IGBIntrDelayTimer *timer)
89
{
90
@@ -XXX,XX +XXX,XX @@ igb_eitr_should_postpone(IGBCore *core, int idx)
91
return igb_postpone_interrupt(&core->eitr[idx]);
92
}
93
94
-static void igb_send_msix(IGBCore *core)
95
+static void igb_send_msix(IGBCore *core, uint32_t causes)
96
{
97
- uint32_t causes = core->mac[EICR] & core->mac[EIMS];
98
int vector;
99
100
for (vector = 0; vector < IGB_INTR_NUM; ++vector) {
101
@@ -XXX,XX +XXX,XX @@ igb_fix_icr_asserted(IGBCore *core)
102
trace_e1000e_irq_fix_icr_asserted(core->mac[ICR]);
103
}
104
105
-static void
106
-igb_update_interrupt_state(IGBCore *core)
107
+static void igb_raise_interrupts(IGBCore *core, size_t index, uint32_t causes)
108
{
109
- uint32_t icr;
110
- uint32_t causes;
111
+ uint32_t old_causes = core->mac[ICR] & core->mac[IMS];
112
+ uint32_t old_ecauses = core->mac[EICR] & core->mac[EIMS];
113
+ uint32_t raised_causes;
114
+ uint32_t raised_ecauses;
115
uint32_t int_alloc;
116
117
- icr = core->mac[ICR] & core->mac[IMS];
118
+ trace_e1000e_irq_set(index << 2,
119
+ core->mac[index], core->mac[index] | causes);
120
+
121
+ core->mac[index] |= causes;
122
123
if (core->mac[GPIE] & E1000_GPIE_MSIX_MODE) {
124
- if (icr) {
125
- causes = 0;
126
- if (icr & E1000_ICR_DRSTA) {
127
- int_alloc = core->mac[IVAR_MISC] & 0xff;
128
- if (int_alloc & E1000_IVAR_VALID) {
129
- causes |= BIT(int_alloc & 0x1f);
130
- }
131
+ raised_causes = core->mac[ICR] & core->mac[IMS] & ~old_causes;
132
+
133
+ if (raised_causes & E1000_ICR_DRSTA) {
134
+ int_alloc = core->mac[IVAR_MISC] & 0xff;
135
+ if (int_alloc & E1000_IVAR_VALID) {
136
+ core->mac[EICR] |= BIT(int_alloc & 0x1f);
137
}
138
- /* Check if other bits (excluding the TCP Timer) are enabled. */
139
- if (icr & ~E1000_ICR_DRSTA) {
140
- int_alloc = (core->mac[IVAR_MISC] >> 8) & 0xff;
141
- if (int_alloc & E1000_IVAR_VALID) {
142
- causes |= BIT(int_alloc & 0x1f);
143
- }
144
- trace_e1000e_irq_add_msi_other(core->mac[EICR]);
145
+ }
146
+ /* Check if other bits (excluding the TCP Timer) are enabled. */
147
+ if (raised_causes & ~E1000_ICR_DRSTA) {
148
+ int_alloc = (core->mac[IVAR_MISC] >> 8) & 0xff;
149
+ if (int_alloc & E1000_IVAR_VALID) {
150
+ core->mac[EICR] |= BIT(int_alloc & 0x1f);
151
}
152
- core->mac[EICR] |= causes;
153
}
154
155
- if ((core->mac[EICR] & core->mac[EIMS])) {
156
- igb_send_msix(core);
157
+ raised_ecauses = core->mac[EICR] & core->mac[EIMS] & ~old_ecauses;
158
+ if (!raised_ecauses) {
159
+ return;
160
}
161
+
162
+ igb_send_msix(core, raised_ecauses);
163
} else {
164
igb_fix_icr_asserted(core);
165
166
- if (icr) {
167
- core->mac[EICR] |= (icr & E1000_ICR_DRSTA) | E1000_EICR_OTHER;
168
- } else {
169
- core->mac[EICR] &= ~E1000_EICR_OTHER;
170
+ raised_causes = core->mac[ICR] & core->mac[IMS] & ~old_causes;
171
+ if (!raised_causes) {
172
+ return;
173
}
174
175
- trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS],
176
- core->mac[ICR], core->mac[IMS]);
177
+ core->mac[EICR] |= (raised_causes & E1000_ICR_DRSTA) | E1000_EICR_OTHER;
178
179
if (msix_enabled(core->owner)) {
180
- if (icr) {
181
- trace_e1000e_irq_msix_notify_vec(0);
182
- msix_notify(core->owner, 0);
183
- }
184
+ trace_e1000e_irq_msix_notify_vec(0);
185
+ msix_notify(core->owner, 0);
186
} else if (msi_enabled(core->owner)) {
187
- if (icr) {
188
- msi_notify(core->owner, 0);
189
- }
190
+ trace_e1000e_irq_msi_notify(raised_causes);
191
+ msi_notify(core->owner, 0);
192
} else {
193
- if (icr) {
194
- igb_raise_legacy_irq(core);
195
- } else {
196
- igb_lower_legacy_irq(core);
197
- }
198
+ igb_raise_legacy_irq(core);
199
}
200
}
201
}
202
203
-static void
204
-igb_set_interrupt_cause(IGBCore *core, uint32_t val)
205
+static void igb_lower_interrupts(IGBCore *core, size_t index, uint32_t causes)
206
{
207
- trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]);
208
+ trace_e1000e_irq_clear(index << 2,
209
+ core->mac[index], core->mac[index] & ~causes);
210
+
211
+ core->mac[index] &= ~causes;
212
213
- core->mac[ICR] |= val;
214
+ trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS],
215
+ core->mac[ICR], core->mac[IMS]);
216
217
- trace_e1000e_irq_set_cause_exit(val, core->mac[ICR]);
218
+ if (!(core->mac[ICR] & core->mac[IMS]) &&
219
+ !(core->mac[GPIE] & E1000_GPIE_MSIX_MODE)) {
220
+ core->mac[EICR] &= ~E1000_EICR_OTHER;
221
222
- igb_update_interrupt_state(core);
223
+ if (!msix_enabled(core->owner) && !msi_enabled(core->owner)) {
224
+ igb_lower_legacy_irq(core);
225
+ }
226
+ }
227
}
228
229
static void igb_set_eics(IGBCore *core, int index, uint32_t val)
230
{
231
bool msix = !!(core->mac[GPIE] & E1000_GPIE_MSIX_MODE);
232
+ uint32_t mask = msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK;
233
234
trace_igb_irq_write_eics(val, msix);
235
-
236
- core->mac[EICS] |=
237
- val & (msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK);
238
-
239
- /*
240
- * TODO: Move to igb_update_interrupt_state if EICS is modified in other
241
- * places.
242
- */
243
- core->mac[EICR] = core->mac[EICS];
244
-
245
- igb_update_interrupt_state(core);
246
+ igb_raise_interrupts(core, EICR, val & mask);
247
}
248
249
static void igb_set_eims(IGBCore *core, int index, uint32_t val)
250
{
251
bool msix = !!(core->mac[GPIE] & E1000_GPIE_MSIX_MODE);
252
+ uint32_t mask = msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK;
253
254
trace_igb_irq_write_eims(val, msix);
255
-
256
- core->mac[EIMS] |=
257
- val & (msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK);
258
-
259
- igb_update_interrupt_state(core);
260
+ igb_raise_interrupts(core, EIMS, val & mask);
261
}
262
263
static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn)
264
{
265
uint32_t ent = core->mac[VTIVAR_MISC + vfn];
266
+ uint32_t causes;
267
268
if ((ent & E1000_IVAR_VALID)) {
269
- core->mac[EICR] |= (ent & 0x3) << (22 - vfn * IGBVF_MSIX_VEC_NUM);
270
- igb_update_interrupt_state(core);
271
+ causes = (ent & 0x3) << (22 - vfn * IGBVF_MSIX_VEC_NUM);
272
+ igb_raise_interrupts(core, EICR, causes);
273
}
274
}
275
276
static void mailbox_interrupt_to_pf(IGBCore *core)
277
{
278
- igb_set_interrupt_cause(core, E1000_ICR_VMMB);
279
+ igb_raise_interrupts(core, ICR, E1000_ICR_VMMB);
280
}
281
282
static void igb_set_pfmailbox(IGBCore *core, int index, uint32_t val)
283
@@ -XXX,XX +XXX,XX @@ static void igb_w1c(IGBCore *core, int index, uint32_t val)
284
static void igb_set_eimc(IGBCore *core, int index, uint32_t val)
285
{
286
bool msix = !!(core->mac[GPIE] & E1000_GPIE_MSIX_MODE);
287
+ uint32_t mask = msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK;
288
289
- /* Interrupts are disabled via a write to EIMC and reflected in EIMS. */
290
- core->mac[EIMS] &=
291
- ~(val & (msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK));
292
+ trace_igb_irq_write_eimc(val, msix);
293
294
- trace_igb_irq_write_eimc(val, core->mac[EIMS], msix);
295
- igb_update_interrupt_state(core);
296
+ /* Interrupts are disabled via a write to EIMC and reflected in EIMS. */
297
+ igb_lower_interrupts(core, EIMS, val & mask);
298
}
299
300
static void igb_set_eiac(IGBCore *core, int index, uint32_t val)
301
@@ -XXX,XX +XXX,XX @@ static void igb_set_eicr(IGBCore *core, int index, uint32_t val)
302
* TODO: In IOV mode, only bit zero of this vector is available for the PF
303
* function.
304
*/
305
- core->mac[EICR] &=
306
- ~(val & (msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK));
307
+ uint32_t mask = msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK;
308
309
trace_igb_irq_write_eicr(val, msix);
310
- igb_update_interrupt_state(core);
311
+ igb_lower_interrupts(core, EICR, val & mask);
312
}
313
314
static void igb_set_vtctrl(IGBCore *core, int index, uint32_t val)
315
@@ -XXX,XX +XXX,XX @@ igb_autoneg_timer(void *opaque)
316
317
igb_update_flowctl_status(core);
318
/* signal link status change to the guest */
319
- igb_set_interrupt_cause(core, E1000_ICR_LSC);
320
+ igb_raise_interrupts(core, ICR, E1000_ICR_LSC);
321
}
322
}
323
324
@@ -XXX,XX +XXX,XX @@ igb_set_mdic(IGBCore *core, int index, uint32_t val)
325
core->mac[MDIC] = val | E1000_MDIC_READY;
326
327
if (val & E1000_MDIC_INT_EN) {
328
- igb_set_interrupt_cause(core, E1000_ICR_MDAC);
329
+ igb_raise_interrupts(core, ICR, E1000_ICR_MDAC);
330
}
331
}
332
333
@@ -XXX,XX +XXX,XX @@ static void
334
igb_set_ics(IGBCore *core, int index, uint32_t val)
335
{
336
trace_e1000e_irq_write_ics(val);
337
- igb_set_interrupt_cause(core, val);
338
+ igb_raise_interrupts(core, ICR, val);
339
}
340
341
static void
342
igb_set_imc(IGBCore *core, int index, uint32_t val)
343
{
344
trace_e1000e_irq_ims_clear_set_imc(val);
345
- igb_clear_ims_bits(core, val);
346
- igb_update_interrupt_state(core);
347
+ igb_lower_interrupts(core, IMS, val);
348
}
349
350
static void
351
igb_set_ims(IGBCore *core, int index, uint32_t val)
352
{
353
- uint32_t valid_val = val & 0x77D4FBFD;
354
-
355
- trace_e1000e_irq_set_ims(val, core->mac[IMS], core->mac[IMS] | valid_val);
356
- core->mac[IMS] |= valid_val;
357
- igb_update_interrupt_state(core);
358
+ igb_raise_interrupts(core, IMS, val & 0x77D4FBFD);
359
}
360
361
-static void igb_commit_icr(IGBCore *core)
362
+static void igb_nsicr(IGBCore *core)
363
{
364
/*
365
* If GPIE.NSICR = 0, then the clear of IMS will occur only if at
366
@@ -XXX,XX +XXX,XX @@ static void igb_commit_icr(IGBCore *core)
367
*/
368
if ((core->mac[GPIE] & E1000_GPIE_NSICR) ||
369
(core->mac[IMS] && (core->mac[ICR] & E1000_ICR_INT_ASSERTED))) {
370
- igb_clear_ims_bits(core, core->mac[IAM]);
371
+ igb_lower_interrupts(core, IMS, core->mac[IAM]);
372
}
373
-
374
- igb_update_interrupt_state(core);
375
}
376
377
static void igb_set_icr(IGBCore *core, int index, uint32_t val)
378
{
379
- uint32_t icr = core->mac[ICR] & ~val;
380
-
381
- trace_igb_irq_icr_write(val, core->mac[ICR], icr);
382
- core->mac[ICR] = icr;
383
- igb_commit_icr(core);
384
+ igb_nsicr(core);
385
+ igb_lower_interrupts(core, ICR, val);
386
}
387
388
static uint32_t
389
@@ -XXX,XX +XXX,XX @@ static uint32_t
390
igb_mac_icr_read(IGBCore *core, int index)
391
{
392
uint32_t ret = core->mac[ICR];
393
- trace_e1000e_irq_icr_read_entry(ret);
394
395
if (core->mac[GPIE] & E1000_GPIE_NSICR) {
396
trace_igb_irq_icr_clear_gpie_nsicr();
397
- core->mac[ICR] = 0;
398
+ igb_lower_interrupts(core, ICR, 0xffffffff);
399
} else if (core->mac[IMS] == 0) {
400
trace_e1000e_irq_icr_clear_zero_ims();
401
- core->mac[ICR] = 0;
402
+ igb_lower_interrupts(core, ICR, 0xffffffff);
403
} else if (!msix_enabled(core->owner)) {
404
trace_e1000e_irq_icr_clear_nonmsix_icr_read();
405
- core->mac[ICR] = 0;
406
+ igb_lower_interrupts(core, ICR, 0xffffffff);
407
}
408
409
- trace_e1000e_irq_icr_read_exit(core->mac[ICR]);
410
- igb_commit_icr(core);
411
+ igb_nsicr(core);
412
return ret;
413
}
414
415
diff --git a/hw/net/trace-events b/hw/net/trace-events
416
index XXXXXXX..XXXXXXX 100644
417
--- a/hw/net/trace-events
418
+++ b/hw/net/trace-events
419
@@ -XXX,XX +XXX,XX @@ e1000e_irq_msix_notify_vec(uint32_t vector) "MSI-X notify vector 0x%x"
420
e1000e_irq_postponed_by_xitr(uint32_t reg) "Interrupt postponed by [E]ITR register 0x%x"
421
e1000e_irq_clear(uint32_t offset, uint32_t old, uint32_t new) "Clearing interrupt register 0x%x: 0x%x --> 0x%x"
422
e1000e_irq_set(uint32_t offset, uint32_t old, uint32_t new) "Setting interrupt register 0x%x: 0x%x --> 0x%x"
423
-e1000e_irq_clear_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Clearing IMS bits 0x%x: 0x%x --> 0x%x"
424
-e1000e_irq_set_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Setting IMS bits 0x%x: 0x%x --> 0x%x"
425
e1000e_irq_fix_icr_asserted(uint32_t new_val) "ICR_ASSERTED bit fixed: 0x%x"
426
e1000e_irq_add_msi_other(uint32_t new_val) "ICR_OTHER bit added: 0x%x"
427
e1000e_irq_pending_interrupts(uint32_t pending, uint32_t icr, uint32_t ims) "ICR PENDING: 0x%x (ICR: 0x%x, IMS: 0x%x)"
428
-e1000e_irq_set_cause_entry(uint32_t val, uint32_t icr) "Going to set IRQ cause 0x%x, ICR: 0x%x"
429
-e1000e_irq_set_cause_exit(uint32_t val, uint32_t icr) "Set IRQ cause 0x%x, ICR: 0x%x"
430
-e1000e_irq_icr_write(uint32_t bits, uint32_t old_icr, uint32_t new_icr) "Clearing ICR bits 0x%x: 0x%x --> 0x%x"
431
e1000e_irq_write_ics(uint32_t val) "Adding ICR bits 0x%x"
432
e1000e_irq_icr_process_iame(void) "Clearing IMS bits due to IAME"
433
e1000e_irq_read_ics(uint32_t ics) "Current ICS: 0x%x"
434
e1000e_irq_read_ims(uint32_t ims) "Current IMS: 0x%x"
435
e1000e_irq_icr_clear_nonmsix_icr_read(void) "Clearing ICR on read due to non MSI-X int"
436
-e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR read. Current ICR: 0x%x"
437
-e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x"
438
e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS"
439
e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME"
440
e1000e_irq_iam_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X"
441
@@ -XXX,XX +XXX,XX @@ e1000e_irq_tidv_fpd_not_running(void) "FPD written while TIDV was not running"
442
e1000e_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = %u"
443
e1000e_irq_itr_set(uint32_t val) "ITR = %u"
444
e1000e_irq_fire_all_timers(uint32_t val) "Firing all delay/throttling timers on all interrupts enable (0x%X written to IMS)"
445
-e1000e_irq_adding_delayed_causes(uint32_t val, uint32_t icr) "Merging delayed causes 0x%X to ICR 0x%X"
446
e1000e_irq_msix_pending_clearing(uint32_t cause, uint32_t int_cfg, uint32_t vec) "Clearing MSI-X pending bit for cause 0x%x, IVAR config 0x%x, vector %u"
447
448
e1000e_wrn_msix_vec_wrong(uint32_t cause, uint32_t cfg) "Invalid configuration for cause 0x%x: 0x%x"
449
@@ -XXX,XX +XXX,XX @@ igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint3
450
igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X"
451
452
igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled"
453
-igb_irq_icr_write(uint32_t bits, uint32_t old_icr, uint32_t new_icr) "Clearing ICR bits 0x%x: 0x%x --> 0x%x"
454
igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x"
455
igb_irq_read_iam(uint32_t icr) "Current IAM: 0x%x"
456
igb_irq_write_eics(uint32_t val, bool msix) "Update EICS: 0x%x MSI-X: %d"
457
igb_irq_write_eims(uint32_t val, bool msix) "Update EIMS: 0x%x MSI-X: %d"
458
-igb_irq_write_eimc(uint32_t val, uint32_t eims, bool msix) "Update EIMC: 0x%x EIMS: 0x%x MSI-X: %d"
459
+igb_irq_write_eimc(uint32_t val, bool msix) "Update EIMC: 0x%x MSI-X: %d"
460
igb_irq_write_eiac(uint32_t val) "Update EIAC: 0x%x"
461
igb_irq_write_eiam(uint32_t val, bool msix) "Update EIAM: 0x%x MSI-X: %d"
462
igb_irq_write_eicr(uint32_t val, bool msix) "Update EICR: 0x%x MSI-X: %d"
463
diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado
464
index XXXXXXX..XXXXXXX 100755
465
--- a/scripts/ci/org.centos/stream/8/x86_64/test-avocado
466
+++ b/scripts/ci/org.centos/stream/8/x86_64/test-avocado
467
@@ -XXX,XX +XXX,XX @@ make get-vm-images
468
tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \
469
tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \
470
tests/avocado/hotplug_cpu.py:HotPlugCPU.test \
471
+ tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb \
472
tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb_nomsi \
473
tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \
474
tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \
475
diff --git a/tests/avocado/netdev-ethtool.py b/tests/avocado/netdev-ethtool.py
476
index XXXXXXX..XXXXXXX 100644
477
--- a/tests/avocado/netdev-ethtool.py
478
+++ b/tests/avocado/netdev-ethtool.py
479
@@ -XXX,XX +XXX,XX @@ def common_test_code(self, netdev, extra_args=None):
480
# no need to gracefully shutdown, just finish
481
self.vm.kill()
482
483
- # Skip testing for MSI for now. Allegedly it was fixed by:
484
- # 28e96556ba (igb: Allocate MSI-X vector when testing)
485
- # but I'm seeing oops in the kernel
486
- @skip("Kernel bug with MSI enabled")
487
def test_igb(self):
488
"""
489
:avocado: tags=device:igb
490
--
491
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
For GPIE.NSICR, Section 7.3.2.1.2 says:
4
> ICR bits are cleared on register read. If GPIE.NSICR = 0b, then the
5
> clear on read occurs only if no bit is set in the IMS or at least one
6
> bit is set in the IMS and there is a true interrupt as reflected in
7
> ICR.INTA.
8
9
e1000e does similar though it checks for CTRL_EXT.IAME, which does not
10
exist on igb.
11
12
Suggested-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
13
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
---
16
hw/net/igb_core.c | 2 ++
17
1 file changed, 2 insertions(+)
18
19
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/net/igb_core.c
22
+++ b/hw/net/igb_core.c
23
@@ -XXX,XX +XXX,XX @@ igb_mac_icr_read(IGBCore *core, int index)
24
} else if (core->mac[IMS] == 0) {
25
trace_e1000e_irq_icr_clear_zero_ims();
26
igb_lower_interrupts(core, ICR, 0xffffffff);
27
+ } else if (core->mac[ICR] & E1000_ICR_INT_ASSERTED) {
28
+ igb_lower_interrupts(core, ICR, 0xffffffff);
29
} else if (!msix_enabled(core->owner)) {
30
trace_e1000e_irq_icr_clear_nonmsix_icr_read();
31
igb_lower_interrupts(core, ICR, 0xffffffff);
32
--
33
2.7.4
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
vmxnet3 has no dependency on PC, and VMware Fusion actually makes it
4
available on Apple Silicon according to:
5
https://kb.vmware.com/s/article/90364
6
7
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
8
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/net/Kconfig | 2 +-
12
1 file changed, 1 insertion(+), 1 deletion(-)
13
14
diff --git a/hw/net/Kconfig b/hw/net/Kconfig
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/Kconfig
17
+++ b/hw/net/Kconfig
18
@@ -XXX,XX +XXX,XX @@ config RTL8139_PCI
19
20
config VMXNET3_PCI
21
bool
22
- default y if PCI_DEVICES && PC_PCI
23
+ default y if PCI_DEVICES
24
depends on PCI
25
26
config SMC91C111
27
--
28
2.7.4
29
30
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
gently asked by his automatic reply :)
3
I have made significant changes for network packet abstractions so add
4
me as a reviewer.
4
5
5
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
6
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
7
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
9
---
8
MAINTAINERS | 8 ++++----
10
MAINTAINERS | 1 +
9
1 file changed, 4 insertions(+), 4 deletions(-)
11
1 file changed, 1 insertion(+)
10
12
11
diff --git a/MAINTAINERS b/MAINTAINERS
13
diff --git a/MAINTAINERS b/MAINTAINERS
12
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
13
--- a/MAINTAINERS
15
--- a/MAINTAINERS
14
+++ b/MAINTAINERS
16
+++ b/MAINTAINERS
15
@@ -XXX,XX +XXX,XX @@ F: hw/scsi/mfi.h
17
@@ -XXX,XX +XXX,XX @@ F: tests/qtest/fuzz-megasas-test.c
16
F: tests/megasas-test.c
17
18
18
Network packet abstractions
19
Network packet abstractions
19
-M: Dmitry Fleytman <dmitry@daynix.com>
20
M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
20
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
21
+R: Akihiko Odaki <akihiko.odaki@daynix.com>
21
S: Maintained
22
S: Maintained
22
F: include/net/eth.h
23
F: include/net/eth.h
23
F: net/eth.c
24
F: net/eth.c
24
@@ -XXX,XX +XXX,XX @@ F: hw/net/net_rx_pkt*
25
F: hw/net/net_tx_pkt*
26
27
Vmware
28
-M: Dmitry Fleytman <dmitry@daynix.com>
29
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
30
S: Maintained
31
F: hw/net/vmxnet*
32
F: hw/scsi/vmw_pvscsi*
33
@@ -XXX,XX +XXX,XX @@ F: hw/mem/nvdimm.c
34
F: include/hw/mem/nvdimm.h
35
36
e1000x
37
-M: Dmitry Fleytman <dmitry@daynix.com>
38
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
39
S: Maintained
40
F: hw/net/e1000x*
41
42
e1000e
43
-M: Dmitry Fleytman <dmitry@daynix.com>
44
+M: Dmitry Fleytman <dmitry.fleytman@gmail.com>
45
S: Maintained
46
F: hw/net/e1000e*
47
48
--
25
--
49
2.7.4
26
2.7.4
50
27
51
28
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
5
---
6
docs/system/devices/igb.rst | 12 +++++++-----
7
1 file changed, 7 insertions(+), 5 deletions(-)
8
9
diff --git a/docs/system/devices/igb.rst b/docs/system/devices/igb.rst
10
index XXXXXXX..XXXXXXX 100644
11
--- a/docs/system/devices/igb.rst
12
+++ b/docs/system/devices/igb.rst
13
@@ -XXX,XX +XXX,XX @@ Limitations
14
===========
15
16
This igb implementation was tested with Linux Test Project [2]_ and Windows HLK
17
-[3]_ during the initial development. The command used when testing with LTP is:
18
+[3]_ during the initial development. Later it was also tested with DPDK Test
19
+Suite [4]_. The command used when testing with LTP is:
20
21
.. code-block:: shell
22
23
@@ -XXX,XX +XXX,XX @@ This igb implementation was tested with Linux Test Project [2]_ and Windows HLK
24
25
Be aware that this implementation lacks many functionalities available with the
26
actual hardware, and you may experience various failures if you try to use it
27
-with a different operating system other than Linux and Windows or if you try
28
-functionalities not covered by the tests.
29
+with a different operating system other than DPDK, Linux, and Windows or if you
30
+try functionalities not covered by the tests.
31
32
Using igb
33
=========
34
@@ -XXX,XX +XXX,XX @@ Using igb should be nothing different from using another network device. See
35
:ref:`Network_emulation` in general.
36
37
However, you may also need to perform additional steps to activate SR-IOV
38
-feature on your guest. For Linux, refer to [4]_.
39
+feature on your guest. For Linux, refer to [5]_.
40
41
Developing igb
42
==============
43
@@ -XXX,XX +XXX,XX @@ References
44
.. [1] https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/82576eb-gigabit-ethernet-controller-datasheet.pdf
45
.. [2] https://github.com/linux-test-project/ltp
46
.. [3] https://learn.microsoft.com/en-us/windows-hardware/test/hlk/
47
-.. [4] https://docs.kernel.org/PCI/pci-iov-howto.html
48
+.. [4] https://doc.dpdk.org/dts/gsg/
49
+.. [5] https://docs.kernel.org/PCI/pci-iov-howto.html
50
--
51
2.7.4
diff view generated by jsdifflib
1
From: Thomas Huth <thuth@redhat.com>
1
From: Stefan Hajnoczi <stefanha@redhat.com>
2
2
3
The vlan concept is marked as deprecated, so we should not use
3
If the driver sets large_send_mss to 0 then a divide-by-zero occurs.
4
this for examples in the documentation anymore.
4
Even if the division wasn't a problem, the for loop that emits MSS-sized
5
packets would never terminate.
5
6
6
Signed-off-by: Thomas Huth <thuth@redhat.com>
7
Solve these issues by skipping offloading when large_send_mss=0.
8
9
This issue was found by OSS-Fuzz as part of Alexander Bulekov's device
10
fuzzing work. The reproducer is:
11
12
$ cat << EOF | ./qemu-system-i386 -display none -machine accel=qtest, -m \
13
512M,slots=1,maxmem=0xffff000000000000 -machine q35 -nodefaults -device \
14
rtl8139,netdev=net0 -netdev user,id=net0 -device \
15
pc-dimm,id=nv1,memdev=mem1,addr=0xb800a64602800000 -object \
16
memory-backend-ram,id=mem1,size=2M -qtest stdio
17
outl 0xcf8 0x80000814
18
outl 0xcfc 0xe0000000
19
outl 0xcf8 0x80000804
20
outw 0xcfc 0x06
21
write 0xe0000037 0x1 0x04
22
write 0xe00000e0 0x2 0x01
23
write 0x1 0x1 0x04
24
write 0x3 0x1 0x98
25
write 0xa 0x1 0x8c
26
write 0xb 0x1 0x02
27
write 0xc 0x1 0x46
28
write 0xd 0x1 0xa6
29
write 0xf 0x1 0xb8
30
write 0xb800a646028c000c 0x1 0x08
31
write 0xb800a646028c000e 0x1 0x47
32
write 0xb800a646028c0010 0x1 0x02
33
write 0xb800a646028c0017 0x1 0x06
34
write 0xb800a646028c0036 0x1 0x80
35
write 0xe00000d9 0x1 0x40
36
EOF
37
38
Buglink: https://gitlab.com/qemu-project/qemu/-/issues/1582
39
Closes: https://gitlab.com/qemu-project/qemu/-/issues/1582
40
Cc: qemu-stable@nongnu.org
41
Cc: Peter Maydell <peter.maydell@linaro.org>
42
Fixes: 6d71357a3b65 ("rtl8139: honor large send MSS value")
43
Reported-by: Alexander Bulekov <alxndr@bu.edu>
44
Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org>
45
Tested-by: Alexander Bulekov <alxndr@bu.edu>
46
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
47
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
48
---
9
qemu-options.hx | 4 ++--
49
hw/net/rtl8139.c | 3 +++
10
1 file changed, 2 insertions(+), 2 deletions(-)
50
1 file changed, 3 insertions(+)
11
51
12
diff --git a/qemu-options.hx b/qemu-options.hx
52
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
13
index XXXXXXX..XXXXXXX 100644
53
index XXXXXXX..XXXXXXX 100644
14
--- a/qemu-options.hx
54
--- a/hw/net/rtl8139.c
15
+++ b/qemu-options.hx
55
+++ b/hw/net/rtl8139.c
16
@@ -XXX,XX +XXX,XX @@ qemu-system-i386 linux.img -net nic -net tap
56
@@ -XXX,XX +XXX,XX @@ static int rtl8139_cplus_transmit_one(RTL8139State *s)
17
#launch a QEMU instance with two NICs, each one connected
57
18
#to a TAP device
58
int large_send_mss = (txdw0 >> CP_TC_LGSEN_MSS_SHIFT) &
19
qemu-system-i386 linux.img \
59
CP_TC_LGSEN_MSS_MASK;
20
- -net nic,vlan=0 -net tap,vlan=0,ifname=tap0 \
60
+ if (large_send_mss == 0) {
21
- -net nic,vlan=1 -net tap,vlan=1,ifname=tap1
61
+ goto skip_offload;
22
+ -netdev tap,id=nd0,ifname=tap0 -device e1000,netdev=nd0 \
62
+ }
23
+ -netdev tap,id=nd1,ifname=tap1 -device rtl8139,netdev=nd1
63
24
@end example
64
DPRINTF("+++ C+ mode offloaded task TSO IP data %d "
25
65
"frame data %d specified MSS=%d\n",
26
@example
27
--
66
--
28
2.7.4
67
2.7.4
29
68
30
69
diff view generated by jsdifflib