1 | The following changes since commit e607bbee553cfe73072870cef458cfa4e78133e2: | 1 | The following changes since commit 886c0453cbf10eebd42a9ccf89c3e46eb389c357: |
---|---|---|---|
2 | 2 | ||
3 | Merge remote-tracking branch 'remotes/edgar/tags/edgar/xilinx-next-2018-01-26.for-upstream' into staging (2018-01-26 14:24:25 +0000) | 3 | Merge tag 'pull-qapi-2023-05-17-v2' of https://repo.or.cz/qemu/armbru into staging (2023-05-22 15:54:21 -0700) |
4 | 4 | ||
5 | are available in the git repository at: | 5 | are available in the git repository at: |
6 | 6 | ||
7 | https://github.com/jasowang/qemu.git tags/net-pull-request | 7 | https://github.com/jasowang/qemu.git tags/net-pull-request |
8 | 8 | ||
9 | for you to fetch changes up to bf4835a4d5338bb7424827715df22570a8adc67c: | 9 | for you to fetch changes up to 792676c165159c11412346870fd58fd243ab2166: |
10 | 10 | ||
11 | MAINTAINERS: update Dmitry Fleytman email (2018-01-29 16:05:38 +0800) | 11 | rtl8139: fix large_send_mss divide-by-zero (2023-05-23 15:20:15 +0800) |
12 | 12 | ||
13 | ---------------------------------------------------------------- | 13 | ---------------------------------------------------------------- |
14 | 14 | ||
15 | ---------------------------------------------------------------- | 15 | ---------------------------------------------------------------- |
16 | Mao Zhongyi (2): | 16 | Akihiko Odaki (48): |
17 | colo: modified the payload compare function | 17 | hw/net/net_tx_pkt: Decouple implementation from PCI |
18 | colo: compare the packet based on the tcp sequence number | 18 | hw/net/net_tx_pkt: Decouple interface from PCI |
19 | e1000x: Fix BPRC and MPRC | ||
20 | igb: Fix Rx packet type encoding | ||
21 | igb: Do not require CTRL.VME for tx VLAN tagging | ||
22 | igb: Clear IMS bits when committing ICR access | ||
23 | net/net_rx_pkt: Use iovec for net_rx_pkt_set_protocols() | ||
24 | e1000e: Always copy ethernet header | ||
25 | igb: Always copy ethernet header | ||
26 | Fix references to igb Avocado test | ||
27 | tests/avocado: Remove unused imports | ||
28 | tests/avocado: Remove test_igb_nomsi_kvm | ||
29 | hw/net/net_tx_pkt: Remove net_rx_pkt_get_l4_info | ||
30 | net/eth: Rename eth_setup_vlan_headers_ex | ||
31 | e1000x: Share more Rx filtering logic | ||
32 | e1000x: Take CRC into consideration for size check | ||
33 | e1000x: Rename TcpIpv6 into TcpIpv6Ex | ||
34 | e1000e: Always log status after building rx metadata | ||
35 | igb: Always log status after building rx metadata | ||
36 | igb: Remove goto | ||
37 | igb: Read DCMD.VLE of the first Tx descriptor | ||
38 | e1000e: Reset packet state after emptying Tx queue | ||
39 | vmxnet3: Reset packet state after emptying Tx queue | ||
40 | igb: Add more definitions for Tx descriptor | ||
41 | igb: Share common VF constants | ||
42 | igb: Fix igb_mac_reg_init coding style alignment | ||
43 | igb: Clear EICR bits for delayed MSI-X interrupts | ||
44 | e1000e: Rename a variable in e1000e_receive_internal() | ||
45 | igb: Rename a variable in igb_receive_internal() | ||
46 | net/eth: Use void pointers | ||
47 | net/eth: Always add VLAN tag | ||
48 | hw/net/net_rx_pkt: Enforce alignment for eth_header | ||
49 | tests/qtest/libqos/igb: Set GPIE.Multiple_MSIX | ||
50 | igb: Implement MSI-X single vector mode | ||
51 | igb: Use UDP for RSS hash | ||
52 | igb: Implement Rx SCTP CSO | ||
53 | igb: Implement Tx SCTP CSO | ||
54 | igb: Strip the second VLAN tag for extended VLAN | ||
55 | igb: Filter with the second VLAN tag for extended VLAN | ||
56 | igb: Implement igb-specific oversize check | ||
57 | igb: Implement Rx PTP2 timestamp | ||
58 | igb: Implement Tx timestamp | ||
59 | e1000e: Notify only new interrupts | ||
60 | igb: Notify only new interrupts | ||
61 | igb: Clear-on-read ICR when ICR.INTA is set | ||
62 | vmxnet3: Do not depend on PC | ||
63 | MAINTAINERS: Add a reviewer for network packet abstractions | ||
64 | docs/system/devices/igb: Note igb is tested for DPDK | ||
19 | 65 | ||
20 | Philippe Mathieu-Daudé (1): | 66 | Stefan Hajnoczi (1): |
21 | MAINTAINERS: update Dmitry Fleytman email | 67 | rtl8139: fix large_send_mss divide-by-zero |
22 | 68 | ||
23 | Thomas Huth (3): | 69 | timothee.cocault@gmail.com (1): |
24 | net: Allow hubports to connect to other netdevs | 70 | e1000e: Fix tx/rx counters |
25 | net: Allow netdevs to be used with 'hostfwd_add' and 'hostfwd_remove' | ||
26 | qemu-doc: Get rid of "vlan=X" example in the documentation | ||
27 | 71 | ||
28 | MAINTAINERS | 8 +- | 72 | MAINTAINERS | 3 +- |
29 | hmp-commands.hx | 4 +- | 73 | docs/system/devices/igb.rst | 14 +- |
30 | net/colo-compare.c | 411 +++++++++++++++++++++++++++++++++-------------------- | 74 | hw/net/Kconfig | 2 +- |
31 | net/colo.c | 9 ++ | 75 | hw/net/e1000.c | 46 +- |
32 | net/colo.h | 15 ++ | 76 | hw/net/e1000e_core.c | 297 ++++----- |
33 | net/hub.c | 27 +++- | 77 | hw/net/e1000e_core.h | 2 - |
34 | net/hub.h | 3 +- | 78 | hw/net/e1000x_common.c | 82 ++- |
35 | net/net.c | 2 +- | 79 | hw/net/e1000x_common.h | 9 +- |
36 | net/slirp.c | 33 +++-- | 80 | hw/net/e1000x_regs.h | 24 +- |
37 | net/trace-events | 2 +- | 81 | hw/net/igb.c | 10 +- |
38 | qapi/net.json | 4 +- | 82 | hw/net/igb_common.h | 24 +- |
39 | qemu-options.hx | 12 +- | 83 | hw/net/igb_core.c | 722 +++++++++++---------- |
40 | 12 files changed, 347 insertions(+), 183 deletions(-) | 84 | hw/net/igb_regs.h | 67 +- |
41 | 85 | hw/net/igbvf.c | 7 - | |
42 | 86 | hw/net/net_rx_pkt.c | 107 +-- | |
87 | hw/net/net_rx_pkt.h | 38 +- | ||
88 | hw/net/net_tx_pkt.c | 101 +-- | ||
89 | hw/net/net_tx_pkt.h | 46 +- | ||
90 | hw/net/rtl8139.c | 3 + | ||
91 | hw/net/trace-events | 19 +- | ||
92 | hw/net/virtio-net.c | 7 +- | ||
93 | hw/net/vmxnet3.c | 22 +- | ||
94 | include/net/eth.h | 29 +- | ||
95 | include/qemu/crc32c.h | 1 + | ||
96 | net/eth.c | 100 +-- | ||
97 | scripts/ci/org.centos/stream/8/x86_64/test-avocado | 3 +- | ||
98 | tests/avocado/netdev-ethtool.py | 17 +- | ||
99 | tests/qtest/libqos/igb.c | 1 + | ||
100 | util/crc32c.c | 8 + | ||
101 | 29 files changed, 979 insertions(+), 832 deletions(-) | diff view generated by jsdifflib |
1 | From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com> | 1 | From: "timothee.cocault@gmail.com" <timothee.cocault@gmail.com> |
---|---|---|---|
2 | 2 | ||
3 | Modified the function colo_packet_compare_common to prepare for the | 3 | The bytes and packets counter registers are cleared on read. |
4 | tcp packet comparison in the next patch. | ||
5 | 4 | ||
6 | Cc: Zhang Chen <zhangckid@gmail.com> | 5 | Copying the "total counter" registers to the "good counter" registers has |
7 | Cc: Li Zhijian <lizhijian@cn.fujitsu.com> | 6 | side effects. |
8 | Cc: Jason Wang <jasowang@redhat.com> | 7 | If the "total" register is never read by the OS, it only gets incremented. |
8 | This leads to exponential growth of the "good" register. | ||
9 | 9 | ||
10 | Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com> | 10 | This commit increments the counters individually to avoid this. |
11 | Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> | 11 | |
12 | Signed-off-by: Zhang Chen <zhangckid@gmail.com> | 12 | Signed-off-by: Timothée Cocault <timothee.cocault@gmail.com> |
13 | Reviewed-by: Zhang Chen <zhangckid@gmail.com> | ||
14 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 13 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
15 | --- | 14 | --- |
16 | net/colo-compare.c | 88 +++++++++++++++++++++++++++--------------------------- | 15 | hw/net/e1000.c | 5 ++--- |
17 | 1 file changed, 44 insertions(+), 44 deletions(-) | 16 | hw/net/e1000e_core.c | 5 ++--- |
17 | hw/net/e1000x_common.c | 5 ++--- | ||
18 | hw/net/igb_core.c | 5 ++--- | ||
19 | 4 files changed, 8 insertions(+), 12 deletions(-) | ||
18 | 20 | ||
19 | diff --git a/net/colo-compare.c b/net/colo-compare.c | 21 | diff --git a/hw/net/e1000.c b/hw/net/e1000.c |
20 | index XXXXXXX..XXXXXXX 100644 | 22 | index XXXXXXX..XXXXXXX 100644 |
21 | --- a/net/colo-compare.c | 23 | --- a/hw/net/e1000.c |
22 | +++ b/net/colo-compare.c | 24 | +++ b/hw/net/e1000.c |
23 | @@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con) | 25 | @@ -XXX,XX +XXX,XX @@ xmit_seg(E1000State *s) |
24 | * return: 0 means packet same | 26 | |
25 | * > 0 || < 0 means packet different | 27 | e1000x_inc_reg_if_not_full(s->mac_reg, TPT); |
26 | */ | 28 | e1000x_grow_8reg_if_not_full(s->mac_reg, TOTL, s->tx.size + 4); |
27 | -static int colo_packet_compare_common(Packet *ppkt, | 29 | - s->mac_reg[GPTC] = s->mac_reg[TPT]; |
28 | - Packet *spkt, | 30 | - s->mac_reg[GOTCL] = s->mac_reg[TOTL]; |
29 | - int poffset, | 31 | - s->mac_reg[GOTCH] = s->mac_reg[TOTH]; |
30 | - int soffset) | 32 | + e1000x_inc_reg_if_not_full(s->mac_reg, GPTC); |
31 | +static int colo_compare_packet_payload(Packet *ppkt, | 33 | + e1000x_grow_8reg_if_not_full(s->mac_reg, GOTCL, s->tx.size + 4); |
32 | + Packet *spkt, | 34 | } |
33 | + uint16_t poffset, | 35 | |
34 | + uint16_t soffset, | 36 | static void |
35 | + uint16_t len) | 37 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c |
36 | + | 38 | index XXXXXXX..XXXXXXX 100644 |
37 | { | 39 | --- a/hw/net/e1000e_core.c |
38 | if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) { | 40 | +++ b/hw/net/e1000e_core.c |
39 | char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20]; | 41 | @@ -XXX,XX +XXX,XX @@ e1000e_on_tx_done_update_stats(E1000ECore *core, struct NetTxPkt *tx_pkt) |
40 | @@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_common(Packet *ppkt, | 42 | g_assert_not_reached(); |
41 | sec_ip_src, sec_ip_dst); | ||
42 | } | 43 | } |
43 | 44 | ||
44 | - poffset = ppkt->vnet_hdr_len + poffset; | 45 | - core->mac[GPTC] = core->mac[TPT]; |
45 | - soffset = ppkt->vnet_hdr_len + soffset; | 46 | - core->mac[GOTCL] = core->mac[TOTL]; |
46 | - | 47 | - core->mac[GOTCH] = core->mac[TOTH]; |
47 | - if (ppkt->size - poffset == spkt->size - soffset) { | 48 | + e1000x_inc_reg_if_not_full(core->mac, GPTC); |
48 | - return memcmp(ppkt->data + poffset, | 49 | + e1000x_grow_8reg_if_not_full(core->mac, GOTCL, tot_len); |
49 | - spkt->data + soffset, | ||
50 | - spkt->size - soffset); | ||
51 | - } else { | ||
52 | - trace_colo_compare_main("Net packet size are not the same"); | ||
53 | - return -1; | ||
54 | - } | ||
55 | + return memcmp(ppkt->data + poffset, spkt->data + soffset, len); | ||
56 | } | 50 | } |
57 | 51 | ||
58 | /* | 52 | static void |
59 | @@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) | 53 | diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c |
60 | * the secondary guest's timestamp. COLO just focus on payload, | 54 | index XXXXXXX..XXXXXXX 100644 |
61 | * so we just need skip this field. | 55 | --- a/hw/net/e1000x_common.c |
62 | */ | 56 | +++ b/hw/net/e1000x_common.c |
63 | - if (ptcp->th_off > 5) { | 57 | @@ -XXX,XX +XXX,XX @@ e1000x_update_rx_total_stats(uint32_t *mac, |
64 | - ptrdiff_t ptcp_offset, stcp_offset; | 58 | |
65 | 59 | e1000x_increase_size_stats(mac, PRCregs, data_fcs_size); | |
66 | - ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data | 60 | e1000x_inc_reg_if_not_full(mac, TPR); |
67 | - + (ptcp->th_off * 4) - ppkt->vnet_hdr_len; | 61 | - mac[GPRC] = mac[TPR]; |
68 | - stcp_offset = spkt->transport_header - (uint8_t *)spkt->data | 62 | + e1000x_inc_reg_if_not_full(mac, GPRC); |
69 | - + (stcp->th_off * 4) - spkt->vnet_hdr_len; | 63 | /* TOR - Total Octets Received: |
70 | + ptrdiff_t ptcp_offset, stcp_offset; | 64 | * This register includes bytes received in a packet from the <Destination |
71 | 65 | * Address> field through the <CRC> field, inclusively. | |
72 | - /* | 66 | * Always include FCS length (4) in size. |
73 | - * When network is busy, some tcp options(like sack) will unpredictable | 67 | */ |
74 | - * occur in primary side or secondary side. it will make packet size | 68 | e1000x_grow_8reg_if_not_full(mac, TORL, data_size + 4); |
75 | - * not same, but the two packet's payload is identical. colo just | 69 | - mac[GORCL] = mac[TORL]; |
76 | - * care about packet payload, so we skip the option field. | 70 | - mac[GORCH] = mac[TORH]; |
77 | - */ | 71 | + e1000x_grow_8reg_if_not_full(mac, GORCL, data_size + 4); |
78 | - res = colo_packet_compare_common(ppkt, spkt, ptcp_offset, stcp_offset); | 72 | } |
79 | - } else if (ptcp->th_sum == stcp->th_sum) { | 73 | |
80 | - res = colo_packet_compare_common(ppkt, spkt, ETH_HLEN, ETH_HLEN); | 74 | void |
81 | + ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data | 75 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c |
82 | + + (ptcp->th_off << 2) - ppkt->vnet_hdr_len; | 76 | index XXXXXXX..XXXXXXX 100644 |
83 | + stcp_offset = spkt->transport_header - (uint8_t *)spkt->data | 77 | --- a/hw/net/igb_core.c |
84 | + + (stcp->th_off << 2) - spkt->vnet_hdr_len; | 78 | +++ b/hw/net/igb_core.c |
85 | + if (ppkt->size - ptcp_offset == spkt->size - stcp_offset) { | 79 | @@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn) |
86 | + res = colo_compare_packet_payload(ppkt, spkt, | 80 | g_assert_not_reached(); |
87 | + ptcp_offset, stcp_offset, | ||
88 | + ppkt->size - ptcp_offset); | ||
89 | } else { | ||
90 | + trace_colo_compare_main("TCP: payload size of packets are different"); | ||
91 | res = -1; | ||
92 | } | 81 | } |
93 | 82 | ||
94 | @@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) | 83 | - core->mac[GPTC] = core->mac[TPT]; |
95 | */ | 84 | - core->mac[GOTCL] = core->mac[TOTL]; |
96 | static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) | 85 | - core->mac[GOTCH] = core->mac[TOTH]; |
97 | { | 86 | + e1000x_inc_reg_if_not_full(core->mac, GPTC); |
98 | - int ret; | 87 | + e1000x_grow_8reg_if_not_full(core->mac, GOTCL, tot_len); |
99 | - int network_header_length = ppkt->ip->ip_hl * 4; | 88 | |
100 | + uint16_t network_header_length = ppkt->ip->ip_hl << 2; | 89 | if (core->mac[MRQC] & 1) { |
101 | + uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len; | 90 | uint16_t pool = qn % IGB_NUM_VM_POOLS; |
102 | |||
103 | trace_colo_compare_main("compare udp"); | ||
104 | |||
105 | @@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) | ||
106 | * other field like TOS,TTL,IP Checksum. we only need to compare | ||
107 | * the ip payload here. | ||
108 | */ | ||
109 | - ret = colo_packet_compare_common(ppkt, spkt, | ||
110 | - network_header_length + ETH_HLEN, | ||
111 | - network_header_length + ETH_HLEN); | ||
112 | - | ||
113 | - if (ret) { | ||
114 | + if (ppkt->size != spkt->size) { | ||
115 | + trace_colo_compare_main("UDP: payload size of packets are different"); | ||
116 | + return -1; | ||
117 | + } | ||
118 | + if (colo_compare_packet_payload(ppkt, spkt, offset, offset, | ||
119 | + ppkt->size - offset)) { | ||
120 | trace_colo_compare_udp_miscompare("primary pkt size", ppkt->size); | ||
121 | trace_colo_compare_udp_miscompare("Secondary pkt size", spkt->size); | ||
122 | if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) { | ||
123 | @@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) | ||
124 | qemu_hexdump((char *)spkt->data, stderr, "colo-compare sec pkt", | ||
125 | spkt->size); | ||
126 | } | ||
127 | + return -1; | ||
128 | + } else { | ||
129 | + return 0; | ||
130 | } | ||
131 | - | ||
132 | - return ret; | ||
133 | } | ||
134 | |||
135 | /* | ||
136 | @@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_udp(Packet *spkt, Packet *ppkt) | ||
137 | */ | ||
138 | static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt) | ||
139 | { | ||
140 | - int network_header_length = ppkt->ip->ip_hl * 4; | ||
141 | + uint16_t network_header_length = ppkt->ip->ip_hl << 2; | ||
142 | + uint16_t offset = network_header_length + ETH_HLEN + ppkt->vnet_hdr_len; | ||
143 | |||
144 | trace_colo_compare_main("compare icmp"); | ||
145 | |||
146 | @@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt) | ||
147 | * other field like TOS,TTL,IP Checksum. we only need to compare | ||
148 | * the ip payload here. | ||
149 | */ | ||
150 | - if (colo_packet_compare_common(ppkt, spkt, | ||
151 | - network_header_length + ETH_HLEN, | ||
152 | - network_header_length + ETH_HLEN)) { | ||
153 | + if (ppkt->size != spkt->size) { | ||
154 | + trace_colo_compare_main("ICMP: payload size of packets are different"); | ||
155 | + return -1; | ||
156 | + } | ||
157 | + if (colo_compare_packet_payload(ppkt, spkt, offset, offset, | ||
158 | + ppkt->size - offset)) { | ||
159 | trace_colo_compare_icmp_miscompare("primary pkt size", | ||
160 | ppkt->size); | ||
161 | trace_colo_compare_icmp_miscompare("Secondary pkt size", | ||
162 | @@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_icmp(Packet *spkt, Packet *ppkt) | ||
163 | */ | ||
164 | static int colo_packet_compare_other(Packet *spkt, Packet *ppkt) | ||
165 | { | ||
166 | + uint16_t offset = ppkt->vnet_hdr_len; | ||
167 | + | ||
168 | trace_colo_compare_main("compare other"); | ||
169 | if (trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) { | ||
170 | char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20]; | ||
171 | @@ -XXX,XX +XXX,XX @@ static int colo_packet_compare_other(Packet *spkt, Packet *ppkt) | ||
172 | sec_ip_src, sec_ip_dst); | ||
173 | } | ||
174 | |||
175 | - return colo_packet_compare_common(ppkt, spkt, 0, 0); | ||
176 | + if (ppkt->size != spkt->size) { | ||
177 | + trace_colo_compare_main("Other: payload size of packets are different"); | ||
178 | + return -1; | ||
179 | + } | ||
180 | + return colo_compare_packet_payload(ppkt, spkt, offset, offset, | ||
181 | + ppkt->size - offset); | ||
182 | } | ||
183 | |||
184 | static int colo_old_packet_check_one(Packet *pkt, int64_t *check_time) | ||
185 | -- | 91 | -- |
186 | 2.7.4 | 92 | 2.7.4 |
187 | 93 | ||
188 | 94 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | This is intended to be followed by another change for the interface. | ||
4 | It also fixes the leak of memory mapping when the specified memory is | ||
5 | partially mapped. | ||
6 | |||
7 | Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices") | ||
8 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
9 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
10 | --- | ||
11 | hw/net/net_tx_pkt.c | 53 +++++++++++++++++++++++++++++++++-------------------- | ||
12 | hw/net/net_tx_pkt.h | 9 +++++++++ | ||
13 | 2 files changed, 42 insertions(+), 20 deletions(-) | ||
14 | |||
15 | diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/hw/net/net_tx_pkt.c | ||
18 | +++ b/hw/net/net_tx_pkt.c | ||
19 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, | ||
20 | } | ||
21 | } | ||
22 | |||
23 | -bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, | ||
24 | - size_t len) | ||
25 | +static bool net_tx_pkt_add_raw_fragment_common(struct NetTxPkt *pkt, | ||
26 | + void *base, size_t len) | ||
27 | { | ||
28 | - hwaddr mapped_len = 0; | ||
29 | struct iovec *ventry; | ||
30 | assert(pkt); | ||
31 | |||
32 | @@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, | ||
33 | return false; | ||
34 | } | ||
35 | |||
36 | - if (!len) { | ||
37 | - return true; | ||
38 | - } | ||
39 | - | ||
40 | ventry = &pkt->raw[pkt->raw_frags]; | ||
41 | - mapped_len = len; | ||
42 | + ventry->iov_base = base; | ||
43 | + ventry->iov_len = len; | ||
44 | + pkt->raw_frags++; | ||
45 | |||
46 | - ventry->iov_base = pci_dma_map(pkt->pci_dev, pa, | ||
47 | - &mapped_len, DMA_DIRECTION_TO_DEVICE); | ||
48 | - | ||
49 | - if ((ventry->iov_base != NULL) && (len == mapped_len)) { | ||
50 | - ventry->iov_len = mapped_len; | ||
51 | - pkt->raw_frags++; | ||
52 | - return true; | ||
53 | - } else { | ||
54 | - return false; | ||
55 | - } | ||
56 | + return true; | ||
57 | } | ||
58 | |||
59 | bool net_tx_pkt_has_fragments(struct NetTxPkt *pkt) | ||
60 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev) | ||
61 | assert(pkt->raw); | ||
62 | for (i = 0; i < pkt->raw_frags; i++) { | ||
63 | assert(pkt->raw[i].iov_base); | ||
64 | - pci_dma_unmap(pkt->pci_dev, pkt->raw[i].iov_base, | ||
65 | - pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0); | ||
66 | + net_tx_pkt_unmap_frag_pci(pkt->pci_dev, | ||
67 | + pkt->raw[i].iov_base, | ||
68 | + pkt->raw[i].iov_len); | ||
69 | } | ||
70 | } | ||
71 | pkt->pci_dev = pci_dev; | ||
72 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev) | ||
73 | pkt->l4proto = 0; | ||
74 | } | ||
75 | |||
76 | +void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len) | ||
77 | +{ | ||
78 | + pci_dma_unmap(context, base, len, DMA_DIRECTION_TO_DEVICE, 0); | ||
79 | +} | ||
80 | + | ||
81 | +bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, | ||
82 | + size_t len) | ||
83 | +{ | ||
84 | + dma_addr_t mapped_len = len; | ||
85 | + void *base = pci_dma_map(pkt->pci_dev, pa, &mapped_len, | ||
86 | + DMA_DIRECTION_TO_DEVICE); | ||
87 | + if (!base) { | ||
88 | + return false; | ||
89 | + } | ||
90 | + | ||
91 | + if (mapped_len != len || | ||
92 | + !net_tx_pkt_add_raw_fragment_common(pkt, base, len)) { | ||
93 | + net_tx_pkt_unmap_frag_pci(pkt->pci_dev, base, mapped_len); | ||
94 | + return false; | ||
95 | + } | ||
96 | + | ||
97 | + return true; | ||
98 | +} | ||
99 | + | ||
100 | static void net_tx_pkt_do_sw_csum(struct NetTxPkt *pkt, | ||
101 | struct iovec *iov, uint32_t iov_len, | ||
102 | uint16_t csl) | ||
103 | diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h | ||
104 | index XXXXXXX..XXXXXXX 100644 | ||
105 | --- a/hw/net/net_tx_pkt.h | ||
106 | +++ b/hw/net/net_tx_pkt.h | ||
107 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt); | ||
108 | void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev); | ||
109 | |||
110 | /** | ||
111 | + * Unmap a fragment mapped from a PCI device. | ||
112 | + * | ||
113 | + * @context: PCI device owning fragment | ||
114 | + * @base: pointer to fragment | ||
115 | + * @len: length of fragment | ||
116 | + */ | ||
117 | +void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len); | ||
118 | + | ||
119 | +/** | ||
120 | * Send packet to qemu. handles sw offloads if vhdr is not supported. | ||
121 | * | ||
122 | * @pkt: packet | ||
123 | -- | ||
124 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | This allows to use the network packet abstractions even if PCI is not | ||
4 | used. | ||
5 | |||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
8 | --- | ||
9 | hw/net/e1000e_core.c | 13 ++++++++----- | ||
10 | hw/net/igb_core.c | 13 ++++++------- | ||
11 | hw/net/net_tx_pkt.c | 36 +++++++++++++----------------------- | ||
12 | hw/net/net_tx_pkt.h | 31 ++++++++++++++++++++----------- | ||
13 | hw/net/vmxnet3.c | 14 +++++++------- | ||
14 | 5 files changed, 54 insertions(+), 53 deletions(-) | ||
15 | |||
16 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/hw/net/e1000e_core.c | ||
19 | +++ b/hw/net/e1000e_core.c | ||
20 | @@ -XXX,XX +XXX,XX @@ e1000e_process_tx_desc(E1000ECore *core, | ||
21 | addr = le64_to_cpu(dp->buffer_addr); | ||
22 | |||
23 | if (!tx->skip_cp) { | ||
24 | - if (!net_tx_pkt_add_raw_fragment(tx->tx_pkt, addr, split_size)) { | ||
25 | + if (!net_tx_pkt_add_raw_fragment_pci(tx->tx_pkt, core->owner, | ||
26 | + addr, split_size)) { | ||
27 | tx->skip_cp = true; | ||
28 | } | ||
29 | } | ||
30 | @@ -XXX,XX +XXX,XX @@ e1000e_process_tx_desc(E1000ECore *core, | ||
31 | } | ||
32 | |||
33 | tx->skip_cp = false; | ||
34 | - net_tx_pkt_reset(tx->tx_pkt, core->owner); | ||
35 | + net_tx_pkt_reset(tx->tx_pkt, net_tx_pkt_unmap_frag_pci, core->owner); | ||
36 | |||
37 | tx->sum_needed = 0; | ||
38 | tx->cptse = 0; | ||
39 | @@ -XXX,XX +XXX,XX @@ e1000e_core_pci_realize(E1000ECore *core, | ||
40 | qemu_add_vm_change_state_handler(e1000e_vm_state_change, core); | ||
41 | |||
42 | for (i = 0; i < E1000E_NUM_QUEUES; i++) { | ||
43 | - net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS); | ||
44 | + net_tx_pkt_init(&core->tx[i].tx_pkt, E1000E_MAX_TX_FRAGS); | ||
45 | } | ||
46 | |||
47 | net_rx_pkt_init(&core->rx_pkt); | ||
48 | @@ -XXX,XX +XXX,XX @@ e1000e_core_pci_uninit(E1000ECore *core) | ||
49 | qemu_del_vm_change_state_handler(core->vmstate); | ||
50 | |||
51 | for (i = 0; i < E1000E_NUM_QUEUES; i++) { | ||
52 | - net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner); | ||
53 | + net_tx_pkt_reset(core->tx[i].tx_pkt, | ||
54 | + net_tx_pkt_unmap_frag_pci, core->owner); | ||
55 | net_tx_pkt_uninit(core->tx[i].tx_pkt); | ||
56 | } | ||
57 | |||
58 | @@ -XXX,XX +XXX,XX @@ static void e1000e_reset(E1000ECore *core, bool sw) | ||
59 | e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac); | ||
60 | |||
61 | for (i = 0; i < ARRAY_SIZE(core->tx); i++) { | ||
62 | - net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner); | ||
63 | + net_tx_pkt_reset(core->tx[i].tx_pkt, | ||
64 | + net_tx_pkt_unmap_frag_pci, core->owner); | ||
65 | memset(&core->tx[i].props, 0, sizeof(core->tx[i].props)); | ||
66 | core->tx[i].skip_cp = false; | ||
67 | } | ||
68 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/hw/net/igb_core.c | ||
71 | +++ b/hw/net/igb_core.c | ||
72 | @@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core, | ||
73 | length = cmd_type_len & 0xFFFF; | ||
74 | |||
75 | if (!tx->skip_cp) { | ||
76 | - if (!net_tx_pkt_add_raw_fragment(tx->tx_pkt, buffer_addr, length)) { | ||
77 | + if (!net_tx_pkt_add_raw_fragment_pci(tx->tx_pkt, dev, | ||
78 | + buffer_addr, length)) { | ||
79 | tx->skip_cp = true; | ||
80 | } | ||
81 | } | ||
82 | @@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core, | ||
83 | |||
84 | tx->first = true; | ||
85 | tx->skip_cp = false; | ||
86 | - net_tx_pkt_reset(tx->tx_pkt, dev); | ||
87 | + net_tx_pkt_reset(tx->tx_pkt, net_tx_pkt_unmap_frag_pci, dev); | ||
88 | } | ||
89 | } | ||
90 | |||
91 | @@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) | ||
92 | d = core->owner; | ||
93 | } | ||
94 | |||
95 | - net_tx_pkt_reset(txr->tx->tx_pkt, d); | ||
96 | - | ||
97 | while (!igb_ring_empty(core, txi)) { | ||
98 | base = igb_ring_head_descr(core, txi); | ||
99 | |||
100 | @@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) | ||
101 | core->mac[EICR] |= eic; | ||
102 | igb_set_interrupt_cause(core, E1000_ICR_TXDW); | ||
103 | } | ||
104 | + | ||
105 | + net_tx_pkt_reset(txr->tx->tx_pkt, net_tx_pkt_unmap_frag_pci, d); | ||
106 | } | ||
107 | |||
108 | static uint32_t | ||
109 | @@ -XXX,XX +XXX,XX @@ igb_core_pci_realize(IGBCore *core, | ||
110 | core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core); | ||
111 | |||
112 | for (i = 0; i < IGB_NUM_QUEUES; i++) { | ||
113 | - net_tx_pkt_init(&core->tx[i].tx_pkt, NULL, E1000E_MAX_TX_FRAGS); | ||
114 | + net_tx_pkt_init(&core->tx[i].tx_pkt, E1000E_MAX_TX_FRAGS); | ||
115 | } | ||
116 | |||
117 | net_rx_pkt_init(&core->rx_pkt); | ||
118 | @@ -XXX,XX +XXX,XX @@ igb_core_pci_uninit(IGBCore *core) | ||
119 | qemu_del_vm_change_state_handler(core->vmstate); | ||
120 | |||
121 | for (i = 0; i < IGB_NUM_QUEUES; i++) { | ||
122 | - net_tx_pkt_reset(core->tx[i].tx_pkt, NULL); | ||
123 | net_tx_pkt_uninit(core->tx[i].tx_pkt); | ||
124 | } | ||
125 | |||
126 | @@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw) | ||
127 | |||
128 | for (i = 0; i < ARRAY_SIZE(core->tx); i++) { | ||
129 | tx = &core->tx[i]; | ||
130 | - net_tx_pkt_reset(tx->tx_pkt, NULL); | ||
131 | memset(tx->ctx, 0, sizeof(tx->ctx)); | ||
132 | tx->first = true; | ||
133 | tx->skip_cp = false; | ||
134 | diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c | ||
135 | index XXXXXXX..XXXXXXX 100644 | ||
136 | --- a/hw/net/net_tx_pkt.c | ||
137 | +++ b/hw/net/net_tx_pkt.c | ||
138 | @@ -XXX,XX +XXX,XX @@ | ||
139 | */ | ||
140 | |||
141 | #include "qemu/osdep.h" | ||
142 | -#include "net_tx_pkt.h" | ||
143 | #include "net/eth.h" | ||
144 | #include "net/checksum.h" | ||
145 | #include "net/tap.h" | ||
146 | #include "net/net.h" | ||
147 | #include "hw/pci/pci_device.h" | ||
148 | +#include "net_tx_pkt.h" | ||
149 | |||
150 | enum { | ||
151 | NET_TX_PKT_VHDR_FRAG = 0, | ||
152 | @@ -XXX,XX +XXX,XX @@ enum { | ||
153 | |||
154 | /* TX packet private context */ | ||
155 | struct NetTxPkt { | ||
156 | - PCIDevice *pci_dev; | ||
157 | - | ||
158 | struct virtio_net_hdr virt_hdr; | ||
159 | |||
160 | struct iovec *raw; | ||
161 | @@ -XXX,XX +XXX,XX @@ struct NetTxPkt { | ||
162 | uint8_t l4proto; | ||
163 | }; | ||
164 | |||
165 | -void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev, | ||
166 | - uint32_t max_frags) | ||
167 | +void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags) | ||
168 | { | ||
169 | struct NetTxPkt *p = g_malloc0(sizeof *p); | ||
170 | |||
171 | - p->pci_dev = pci_dev; | ||
172 | - | ||
173 | p->vec = g_new(struct iovec, max_frags + NET_TX_PKT_PL_START_FRAG); | ||
174 | |||
175 | p->raw = g_new(struct iovec, max_frags); | ||
176 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, | ||
177 | } | ||
178 | } | ||
179 | |||
180 | -static bool net_tx_pkt_add_raw_fragment_common(struct NetTxPkt *pkt, | ||
181 | - void *base, size_t len) | ||
182 | +bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t len) | ||
183 | { | ||
184 | struct iovec *ventry; | ||
185 | assert(pkt); | ||
186 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt) | ||
187 | #endif | ||
188 | } | ||
189 | |||
190 | -void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev) | ||
191 | +void net_tx_pkt_reset(struct NetTxPkt *pkt, | ||
192 | + NetTxPktFreeFrag callback, void *context) | ||
193 | { | ||
194 | int i; | ||
195 | |||
196 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev) | ||
197 | assert(pkt->raw); | ||
198 | for (i = 0; i < pkt->raw_frags; i++) { | ||
199 | assert(pkt->raw[i].iov_base); | ||
200 | - net_tx_pkt_unmap_frag_pci(pkt->pci_dev, | ||
201 | - pkt->raw[i].iov_base, | ||
202 | - pkt->raw[i].iov_len); | ||
203 | + callback(context, pkt->raw[i].iov_base, pkt->raw[i].iov_len); | ||
204 | } | ||
205 | } | ||
206 | - pkt->pci_dev = pci_dev; | ||
207 | pkt->raw_frags = 0; | ||
208 | |||
209 | pkt->hdr_len = 0; | ||
210 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len) | ||
211 | pci_dma_unmap(context, base, len, DMA_DIRECTION_TO_DEVICE, 0); | ||
212 | } | ||
213 | |||
214 | -bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, | ||
215 | - size_t len) | ||
216 | +bool net_tx_pkt_add_raw_fragment_pci(struct NetTxPkt *pkt, PCIDevice *pci_dev, | ||
217 | + dma_addr_t pa, size_t len) | ||
218 | { | ||
219 | dma_addr_t mapped_len = len; | ||
220 | - void *base = pci_dma_map(pkt->pci_dev, pa, &mapped_len, | ||
221 | - DMA_DIRECTION_TO_DEVICE); | ||
222 | + void *base = pci_dma_map(pci_dev, pa, &mapped_len, DMA_DIRECTION_TO_DEVICE); | ||
223 | if (!base) { | ||
224 | return false; | ||
225 | } | ||
226 | |||
227 | - if (mapped_len != len || | ||
228 | - !net_tx_pkt_add_raw_fragment_common(pkt, base, len)) { | ||
229 | - net_tx_pkt_unmap_frag_pci(pkt->pci_dev, base, mapped_len); | ||
230 | + if (mapped_len != len || !net_tx_pkt_add_raw_fragment(pkt, base, len)) { | ||
231 | + net_tx_pkt_unmap_frag_pci(pci_dev, base, mapped_len); | ||
232 | return false; | ||
233 | } | ||
234 | |||
235 | @@ -XXX,XX +XXX,XX @@ static void net_tx_pkt_udp_fragment_fix(struct NetTxPkt *pkt, | ||
236 | } | ||
237 | |||
238 | static bool net_tx_pkt_do_sw_fragmentation(struct NetTxPkt *pkt, | ||
239 | - NetTxPktCallback callback, | ||
240 | + NetTxPktSend callback, | ||
241 | void *context) | ||
242 | { | ||
243 | uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN; | ||
244 | @@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc) | ||
245 | } | ||
246 | |||
247 | bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload, | ||
248 | - NetTxPktCallback callback, void *context) | ||
249 | + NetTxPktSend callback, void *context) | ||
250 | { | ||
251 | assert(pkt); | ||
252 | |||
253 | diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h | ||
254 | index XXXXXXX..XXXXXXX 100644 | ||
255 | --- a/hw/net/net_tx_pkt.h | ||
256 | +++ b/hw/net/net_tx_pkt.h | ||
257 | @@ -XXX,XX +XXX,XX @@ | ||
258 | |||
259 | struct NetTxPkt; | ||
260 | |||
261 | -typedef void (* NetTxPktCallback)(void *, const struct iovec *, int, const struct iovec *, int); | ||
262 | +typedef void (*NetTxPktFreeFrag)(void *, void *, size_t); | ||
263 | +typedef void (*NetTxPktSend)(void *, const struct iovec *, int, const struct iovec *, int); | ||
264 | |||
265 | /** | ||
266 | * Init function for tx packet functionality | ||
267 | * | ||
268 | * @pkt: packet pointer | ||
269 | - * @pci_dev: PCI device processing this packet | ||
270 | * @max_frags: max tx ip fragments | ||
271 | */ | ||
272 | -void net_tx_pkt_init(struct NetTxPkt **pkt, PCIDevice *pci_dev, | ||
273 | - uint32_t max_frags); | ||
274 | +void net_tx_pkt_init(struct NetTxPkt **pkt, uint32_t max_frags); | ||
275 | |||
276 | /** | ||
277 | * Clean all tx packet resources. | ||
278 | @@ -XXX,XX +XXX,XX @@ net_tx_pkt_setup_vlan_header(struct NetTxPkt *pkt, uint16_t vlan) | ||
279 | * populate data fragment into pkt context. | ||
280 | * | ||
281 | * @pkt: packet | ||
282 | - * @pa: physical address of fragment | ||
283 | + * @base: pointer to fragment | ||
284 | * @len: length of fragment | ||
285 | * | ||
286 | */ | ||
287 | -bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, hwaddr pa, | ||
288 | - size_t len); | ||
289 | +bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t len); | ||
290 | |||
291 | /** | ||
292 | * Fix ip header fields and calculate IP header and pseudo header checksums. | ||
293 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt); | ||
294 | * reset tx packet private context (needed to be called between packets) | ||
295 | * | ||
296 | * @pkt: packet | ||
297 | - * @dev: PCI device processing the next packet | ||
298 | - * | ||
299 | + * @callback: function to free the fragments | ||
300 | + * @context: pointer to be passed to the callback | ||
301 | */ | ||
302 | -void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev); | ||
303 | +void net_tx_pkt_reset(struct NetTxPkt *pkt, | ||
304 | + NetTxPktFreeFrag callback, void *context); | ||
305 | |||
306 | /** | ||
307 | * Unmap a fragment mapped from a PCI device. | ||
308 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev); | ||
309 | void net_tx_pkt_unmap_frag_pci(void *context, void *base, size_t len); | ||
310 | |||
311 | /** | ||
312 | + * map data fragment from PCI device and populate it into pkt context. | ||
313 | + * | ||
314 | + * @pci_dev: PCI device owning fragment | ||
315 | + * @pa: physical address of fragment | ||
316 | + * @len: length of fragment | ||
317 | + */ | ||
318 | +bool net_tx_pkt_add_raw_fragment_pci(struct NetTxPkt *pkt, PCIDevice *pci_dev, | ||
319 | + dma_addr_t pa, size_t len); | ||
320 | + | ||
321 | +/** | ||
322 | * Send packet to qemu. handles sw offloads if vhdr is not supported. | ||
323 | * | ||
324 | * @pkt: packet | ||
325 | @@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send(struct NetTxPkt *pkt, NetClientState *nc); | ||
326 | * @ret: operation result | ||
327 | */ | ||
328 | bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload, | ||
329 | - NetTxPktCallback callback, void *context); | ||
330 | + NetTxPktSend callback, void *context); | ||
331 | |||
332 | /** | ||
333 | * parse raw packet data and analyze offload requirements. | ||
334 | diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c | ||
335 | index XXXXXXX..XXXXXXX 100644 | ||
336 | --- a/hw/net/vmxnet3.c | ||
337 | +++ b/hw/net/vmxnet3.c | ||
338 | @@ -XXX,XX +XXX,XX @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) | ||
339 | data_len = (txd.len > 0) ? txd.len : VMXNET3_MAX_TX_BUF_SIZE; | ||
340 | data_pa = txd.addr; | ||
341 | |||
342 | - if (!net_tx_pkt_add_raw_fragment(s->tx_pkt, | ||
343 | - data_pa, | ||
344 | - data_len)) { | ||
345 | + if (!net_tx_pkt_add_raw_fragment_pci(s->tx_pkt, PCI_DEVICE(s), | ||
346 | + data_pa, data_len)) { | ||
347 | s->skip_current_tx_pkt = true; | ||
348 | } | ||
349 | } | ||
350 | @@ -XXX,XX +XXX,XX @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) | ||
351 | vmxnet3_complete_packet(s, qidx, txd_idx); | ||
352 | s->tx_sop = true; | ||
353 | s->skip_current_tx_pkt = false; | ||
354 | - net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s)); | ||
355 | + net_tx_pkt_reset(s->tx_pkt, | ||
356 | + net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s)); | ||
357 | } | ||
358 | } | ||
359 | } | ||
360 | @@ -XXX,XX +XXX,XX @@ static void vmxnet3_deactivate_device(VMXNET3State *s) | ||
361 | { | ||
362 | if (s->device_active) { | ||
363 | VMW_CBPRN("Deactivating vmxnet3..."); | ||
364 | - net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s)); | ||
365 | + net_tx_pkt_reset(s->tx_pkt, net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s)); | ||
366 | net_tx_pkt_uninit(s->tx_pkt); | ||
367 | net_rx_pkt_uninit(s->rx_pkt); | ||
368 | s->device_active = false; | ||
369 | @@ -XXX,XX +XXX,XX @@ static void vmxnet3_activate_device(VMXNET3State *s) | ||
370 | |||
371 | /* Preallocate TX packet wrapper */ | ||
372 | VMW_CFPRN("Max TX fragments is %u", s->max_tx_frags); | ||
373 | - net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), s->max_tx_frags); | ||
374 | + net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags); | ||
375 | net_rx_pkt_init(&s->rx_pkt); | ||
376 | |||
377 | /* Read rings memory locations for RX queues */ | ||
378 | @@ -XXX,XX +XXX,XX @@ static int vmxnet3_post_load(void *opaque, int version_id) | ||
379 | { | ||
380 | VMXNET3State *s = opaque; | ||
381 | |||
382 | - net_tx_pkt_init(&s->tx_pkt, PCI_DEVICE(s), s->max_tx_frags); | ||
383 | + net_tx_pkt_init(&s->tx_pkt, s->max_tx_frags); | ||
384 | net_rx_pkt_init(&s->rx_pkt); | ||
385 | |||
386 | if (s->msix_used) { | ||
387 | -- | ||
388 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | |
2 | |||
3 | Before this change, e1000 and the common code updated BPRC and MPRC | ||
4 | depending on the matched filter, but e1000e and igb decided to update | ||
5 | those counters by deriving the packet type independently. This | ||
6 | inconsistency caused a multicast packet to be counted twice. | ||
7 | |||
8 | Updating BPRC and MPRC depending on are fundamentally flawed anyway as | ||
9 | a filter can be used for different types of packets. For example, it is | ||
10 | possible to filter broadcast packets with MTA. | ||
11 | |||
12 | Always determine what counters to update by inspecting the packets. | ||
13 | |||
14 | Fixes: 3b27430177 ("e1000: Implementing various counters") | ||
15 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
16 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
17 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
18 | --- | ||
19 | hw/net/e1000.c | 6 +++--- | ||
20 | hw/net/e1000e_core.c | 20 +++----------------- | ||
21 | hw/net/e1000x_common.c | 25 +++++++++++++++++++------ | ||
22 | hw/net/e1000x_common.h | 5 +++-- | ||
23 | hw/net/igb_core.c | 22 +++++----------------- | ||
24 | 5 files changed, 33 insertions(+), 45 deletions(-) | ||
25 | |||
26 | diff --git a/hw/net/e1000.c b/hw/net/e1000.c | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/hw/net/e1000.c | ||
29 | +++ b/hw/net/e1000.c | ||
30 | @@ -XXX,XX +XXX,XX @@ receive_filter(E1000State *s, const uint8_t *buf, int size) | ||
31 | } | ||
32 | |||
33 | if (ismcast && (rctl & E1000_RCTL_MPE)) { /* promiscuous mcast */ | ||
34 | - e1000x_inc_reg_if_not_full(s->mac_reg, MPRC); | ||
35 | return 1; | ||
36 | } | ||
37 | |||
38 | if (isbcast && (rctl & E1000_RCTL_BAM)) { /* broadcast enabled */ | ||
39 | - e1000x_inc_reg_if_not_full(s->mac_reg, BPRC); | ||
40 | return 1; | ||
41 | } | ||
42 | |||
43 | @@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) | ||
44 | size_t desc_offset; | ||
45 | size_t desc_size; | ||
46 | size_t total_size; | ||
47 | + eth_pkt_types_e pkt_type; | ||
48 | |||
49 | if (!e1000x_hw_rx_enabled(s->mac_reg)) { | ||
50 | return -1; | ||
51 | @@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) | ||
52 | size -= 4; | ||
53 | } | ||
54 | |||
55 | + pkt_type = get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf)); | ||
56 | rdh_start = s->mac_reg[RDH]; | ||
57 | desc_offset = 0; | ||
58 | total_size = size + e1000x_fcs_len(s->mac_reg); | ||
59 | @@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) | ||
60 | } | ||
61 | } while (desc_offset < total_size); | ||
62 | |||
63 | - e1000x_update_rx_total_stats(s->mac_reg, size, total_size); | ||
64 | + e1000x_update_rx_total_stats(s->mac_reg, pkt_type, size, total_size); | ||
65 | |||
66 | n = E1000_ICS_RXT0; | ||
67 | if ((rdt = s->mac_reg[RDT]) < s->mac_reg[RDH]) | ||
68 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
69 | index XXXXXXX..XXXXXXX 100644 | ||
70 | --- a/hw/net/e1000e_core.c | ||
71 | +++ b/hw/net/e1000e_core.c | ||
72 | @@ -XXX,XX +XXX,XX @@ e1000e_write_to_rx_buffers(E1000ECore *core, | ||
73 | } | ||
74 | |||
75 | static void | ||
76 | -e1000e_update_rx_stats(E1000ECore *core, | ||
77 | - size_t data_size, | ||
78 | - size_t data_fcs_size) | ||
79 | +e1000e_update_rx_stats(E1000ECore *core, size_t pkt_size, size_t pkt_fcs_size) | ||
80 | { | ||
81 | - e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size); | ||
82 | - | ||
83 | - switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { | ||
84 | - case ETH_PKT_BCAST: | ||
85 | - e1000x_inc_reg_if_not_full(core->mac, BPRC); | ||
86 | - break; | ||
87 | - | ||
88 | - case ETH_PKT_MCAST: | ||
89 | - e1000x_inc_reg_if_not_full(core->mac, MPRC); | ||
90 | - break; | ||
91 | - | ||
92 | - default: | ||
93 | - break; | ||
94 | - } | ||
95 | + eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt); | ||
96 | + e1000x_update_rx_total_stats(core->mac, pkt_type, pkt_size, pkt_fcs_size); | ||
97 | } | ||
98 | |||
99 | static inline bool | ||
100 | diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c | ||
101 | index XXXXXXX..XXXXXXX 100644 | ||
102 | --- a/hw/net/e1000x_common.c | ||
103 | +++ b/hw/net/e1000x_common.c | ||
104 | @@ -XXX,XX +XXX,XX @@ bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf) | ||
105 | f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; | ||
106 | f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff; | ||
107 | if (mac[MTA + (f >> 5)] & (1 << (f & 0x1f))) { | ||
108 | - e1000x_inc_reg_if_not_full(mac, MPRC); | ||
109 | return true; | ||
110 | } | ||
111 | |||
112 | @@ -XXX,XX +XXX,XX @@ e1000x_rxbufsize(uint32_t rctl) | ||
113 | |||
114 | void | ||
115 | e1000x_update_rx_total_stats(uint32_t *mac, | ||
116 | - size_t data_size, | ||
117 | - size_t data_fcs_size) | ||
118 | + eth_pkt_types_e pkt_type, | ||
119 | + size_t pkt_size, | ||
120 | + size_t pkt_fcs_size) | ||
121 | { | ||
122 | static const int PRCregs[6] = { PRC64, PRC127, PRC255, PRC511, | ||
123 | PRC1023, PRC1522 }; | ||
124 | |||
125 | - e1000x_increase_size_stats(mac, PRCregs, data_fcs_size); | ||
126 | + e1000x_increase_size_stats(mac, PRCregs, pkt_fcs_size); | ||
127 | e1000x_inc_reg_if_not_full(mac, TPR); | ||
128 | e1000x_inc_reg_if_not_full(mac, GPRC); | ||
129 | /* TOR - Total Octets Received: | ||
130 | @@ -XXX,XX +XXX,XX @@ e1000x_update_rx_total_stats(uint32_t *mac, | ||
131 | * Address> field through the <CRC> field, inclusively. | ||
132 | * Always include FCS length (4) in size. | ||
133 | */ | ||
134 | - e1000x_grow_8reg_if_not_full(mac, TORL, data_size + 4); | ||
135 | - e1000x_grow_8reg_if_not_full(mac, GORCL, data_size + 4); | ||
136 | + e1000x_grow_8reg_if_not_full(mac, TORL, pkt_size + 4); | ||
137 | + e1000x_grow_8reg_if_not_full(mac, GORCL, pkt_size + 4); | ||
138 | + | ||
139 | + switch (pkt_type) { | ||
140 | + case ETH_PKT_BCAST: | ||
141 | + e1000x_inc_reg_if_not_full(mac, BPRC); | ||
142 | + break; | ||
143 | + | ||
144 | + case ETH_PKT_MCAST: | ||
145 | + e1000x_inc_reg_if_not_full(mac, MPRC); | ||
146 | + break; | ||
147 | + | ||
148 | + default: | ||
149 | + break; | ||
150 | + } | ||
151 | } | ||
152 | |||
153 | void | ||
154 | diff --git a/hw/net/e1000x_common.h b/hw/net/e1000x_common.h | ||
155 | index XXXXXXX..XXXXXXX 100644 | ||
156 | --- a/hw/net/e1000x_common.h | ||
157 | +++ b/hw/net/e1000x_common.h | ||
158 | @@ -XXX,XX +XXX,XX @@ e1000x_update_regs_on_link_up(uint32_t *mac, uint16_t *phy) | ||
159 | } | ||
160 | |||
161 | void e1000x_update_rx_total_stats(uint32_t *mac, | ||
162 | - size_t data_size, | ||
163 | - size_t data_fcs_size); | ||
164 | + eth_pkt_types_e pkt_type, | ||
165 | + size_t pkt_size, | ||
166 | + size_t pkt_fcs_size); | ||
167 | |||
168 | void e1000x_core_prepare_eeprom(uint16_t *eeprom, | ||
169 | const uint16_t *templ, | ||
170 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
171 | index XXXXXXX..XXXXXXX 100644 | ||
172 | --- a/hw/net/igb_core.c | ||
173 | +++ b/hw/net/igb_core.c | ||
174 | @@ -XXX,XX +XXX,XX @@ igb_write_to_rx_buffers(IGBCore *core, | ||
175 | |||
176 | static void | ||
177 | igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, | ||
178 | - size_t data_size, size_t data_fcs_size) | ||
179 | + size_t pkt_size, size_t pkt_fcs_size) | ||
180 | { | ||
181 | - e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size); | ||
182 | - | ||
183 | - switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { | ||
184 | - case ETH_PKT_BCAST: | ||
185 | - e1000x_inc_reg_if_not_full(core->mac, BPRC); | ||
186 | - break; | ||
187 | - | ||
188 | - case ETH_PKT_MCAST: | ||
189 | - e1000x_inc_reg_if_not_full(core->mac, MPRC); | ||
190 | - break; | ||
191 | - | ||
192 | - default: | ||
193 | - break; | ||
194 | - } | ||
195 | + eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt); | ||
196 | + e1000x_update_rx_total_stats(core->mac, pkt_type, pkt_size, pkt_fcs_size); | ||
197 | |||
198 | if (core->mac[MRQC] & 1) { | ||
199 | uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS; | ||
200 | |||
201 | - core->mac[PVFGORC0 + (pool * 64)] += data_size + 4; | ||
202 | + core->mac[PVFGORC0 + (pool * 64)] += pkt_size + 4; | ||
203 | core->mac[PVFGPRC0 + (pool * 64)]++; | ||
204 | - if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) { | ||
205 | + if (pkt_type == ETH_PKT_MCAST) { | ||
206 | core->mac[PVFMPRC0 + (pool * 64)]++; | ||
207 | } | ||
208 | } | ||
209 | -- | ||
210 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | igb's advanced descriptor uses a packet type encoding different from | ||
4 | one used in e1000e's extended descriptor. Fix the logic to encode | ||
5 | Rx packet type accordingly. | ||
6 | |||
7 | Fixes: 3a977deebe ("Intrdocue igb device emulation") | ||
8 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
9 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
10 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
11 | --- | ||
12 | hw/net/igb_core.c | 38 +++++++++++++++++++------------------- | ||
13 | hw/net/igb_regs.h | 5 +++++ | ||
14 | 2 files changed, 24 insertions(+), 19 deletions(-) | ||
15 | |||
16 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
17 | index XXXXXXX..XXXXXXX 100644 | ||
18 | --- a/hw/net/igb_core.c | ||
19 | +++ b/hw/net/igb_core.c | ||
20 | @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, | ||
21 | struct virtio_net_hdr *vhdr; | ||
22 | bool hasip4, hasip6; | ||
23 | EthL4HdrProto l4hdr_proto; | ||
24 | - uint32_t pkt_type; | ||
25 | |||
26 | *status_flags = E1000_RXD_STAT_DD; | ||
27 | |||
28 | @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, | ||
29 | trace_e1000e_rx_metadata_ack(); | ||
30 | } | ||
31 | |||
32 | - if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { | ||
33 | - trace_e1000e_rx_metadata_ipv6_filtering_disabled(); | ||
34 | - pkt_type = E1000_RXD_PKT_MAC; | ||
35 | - } else if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP || | ||
36 | - l4hdr_proto == ETH_L4_HDR_PROTO_UDP) { | ||
37 | - pkt_type = hasip4 ? E1000_RXD_PKT_IP4_XDP : E1000_RXD_PKT_IP6_XDP; | ||
38 | - } else if (hasip4 || hasip6) { | ||
39 | - pkt_type = hasip4 ? E1000_RXD_PKT_IP4 : E1000_RXD_PKT_IP6; | ||
40 | - } else { | ||
41 | - pkt_type = E1000_RXD_PKT_MAC; | ||
42 | - } | ||
43 | + if (pkt_info) { | ||
44 | + *pkt_info = rss_info->enabled ? rss_info->type : 0; | ||
45 | |||
46 | - trace_e1000e_rx_metadata_pkt_type(pkt_type); | ||
47 | + if (hasip4) { | ||
48 | + *pkt_info |= E1000_ADVRXD_PKT_IP4; | ||
49 | + } | ||
50 | |||
51 | - if (pkt_info) { | ||
52 | - if (rss_info->enabled) { | ||
53 | - *pkt_info = rss_info->type; | ||
54 | + if (hasip6) { | ||
55 | + *pkt_info |= E1000_ADVRXD_PKT_IP6; | ||
56 | } | ||
57 | |||
58 | - *pkt_info |= (pkt_type << 4); | ||
59 | - } else { | ||
60 | - *status_flags |= E1000_RXD_PKT_TYPE(pkt_type); | ||
61 | + switch (l4hdr_proto) { | ||
62 | + case ETH_L4_HDR_PROTO_TCP: | ||
63 | + *pkt_info |= E1000_ADVRXD_PKT_TCP; | ||
64 | + break; | ||
65 | + | ||
66 | + case ETH_L4_HDR_PROTO_UDP: | ||
67 | + *pkt_info |= E1000_ADVRXD_PKT_UDP; | ||
68 | + break; | ||
69 | + | ||
70 | + default: | ||
71 | + break; | ||
72 | + } | ||
73 | } | ||
74 | |||
75 | if (hdr_info) { | ||
76 | diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h | ||
77 | index XXXXXXX..XXXXXXX 100644 | ||
78 | --- a/hw/net/igb_regs.h | ||
79 | +++ b/hw/net/igb_regs.h | ||
80 | @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { | ||
81 | |||
82 | #define E1000_STATUS_NUM_VFS_SHIFT 14 | ||
83 | |||
84 | +#define E1000_ADVRXD_PKT_IP4 BIT(4) | ||
85 | +#define E1000_ADVRXD_PKT_IP6 BIT(6) | ||
86 | +#define E1000_ADVRXD_PKT_TCP BIT(8) | ||
87 | +#define E1000_ADVRXD_PKT_UDP BIT(9) | ||
88 | + | ||
89 | static inline uint8_t igb_ivar_entry_rx(uint8_t i) | ||
90 | { | ||
91 | return i < 8 ? i * 4 : (i - 8) * 4 + 2; | ||
92 | -- | ||
93 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | While the datasheet of e1000e says it checks CTRL.VME for tx VLAN | ||
4 | tagging, igb's datasheet has no such statements. It also says for | ||
5 | "CTRL.VLE": | ||
6 | > This register only affects the VLAN Strip in Rx it does not have any | ||
7 | > influence in the Tx path in the 82576. | ||
8 | (Appendix A. Changes from the 82575) | ||
9 | |||
10 | There is no "CTRL.VLE" so it is more likely that it is a mistake of | ||
11 | CTRL.VME. | ||
12 | |||
13 | Fixes: fba7c3b788 ("igb: respect VMVIR and VMOLR for VLAN") | ||
14 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
15 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
16 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
17 | --- | ||
18 | hw/net/igb_core.c | 2 +- | ||
19 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
20 | |||
21 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/hw/net/igb_core.c | ||
24 | +++ b/hw/net/igb_core.c | ||
25 | @@ -XXX,XX +XXX,XX @@ igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx, | ||
26 | } | ||
27 | } | ||
28 | |||
29 | - if (insert_vlan && e1000x_vlan_enabled(core->mac)) { | ||
30 | + if (insert_vlan) { | ||
31 | net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, | ||
32 | core->mac[VET] & 0xffff); | ||
33 | } | ||
34 | -- | ||
35 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | The datasheet says contradicting statements regarding ICR accesses so it | ||
4 | is not reliable to determine the behavior of ICR accesses. However, | ||
5 | e1000e does clear IMS bits when reading ICR accesses and Linux also | ||
6 | expects ICR accesses will clear IMS bits according to: | ||
7 | https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/net/ethernet/intel/igb/igb_main.c?h=v6.2#n8048 | ||
8 | |||
9 | Fixes: 3a977deebe ("Intrdocue igb device emulation") | ||
10 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
11 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
12 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
13 | --- | ||
14 | hw/net/igb_core.c | 8 ++++---- | ||
15 | 1 file changed, 4 insertions(+), 4 deletions(-) | ||
16 | |||
17 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/hw/net/igb_core.c | ||
20 | +++ b/hw/net/igb_core.c | ||
21 | @@ -XXX,XX +XXX,XX @@ igb_set_ims(IGBCore *core, int index, uint32_t val) | ||
22 | static void igb_commit_icr(IGBCore *core) | ||
23 | { | ||
24 | /* | ||
25 | - * If GPIE.NSICR = 0, then the copy of IAM to IMS will occur only if at | ||
26 | + * If GPIE.NSICR = 0, then the clear of IMS will occur only if at | ||
27 | * least one bit is set in the IMS and there is a true interrupt as | ||
28 | * reflected in ICR.INTA. | ||
29 | */ | ||
30 | if ((core->mac[GPIE] & E1000_GPIE_NSICR) || | ||
31 | (core->mac[IMS] && (core->mac[ICR] & E1000_ICR_INT_ASSERTED))) { | ||
32 | - igb_set_ims(core, IMS, core->mac[IAM]); | ||
33 | - } else { | ||
34 | - igb_update_interrupt_state(core); | ||
35 | + igb_clear_ims_bits(core, core->mac[IAM]); | ||
36 | } | ||
37 | + | ||
38 | + igb_update_interrupt_state(core); | ||
39 | } | ||
40 | |||
41 | static void igb_set_icr(IGBCore *core, int index, uint32_t val) | ||
42 | -- | ||
43 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | igb does not properly ensure the buffer passed to | ||
4 | net_rx_pkt_set_protocols() is contiguous for the entire L2/L3/L4 header. | ||
5 | Allow it to pass scattered data to net_rx_pkt_set_protocols(). | ||
6 | |||
7 | Fixes: 3a977deebe ("Intrdocue igb device emulation") | ||
8 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
9 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
10 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
11 | --- | ||
12 | hw/net/igb_core.c | 2 +- | ||
13 | hw/net/net_rx_pkt.c | 14 +++++--------- | ||
14 | hw/net/net_rx_pkt.h | 10 ++++++---- | ||
15 | hw/net/virtio-net.c | 7 +++++-- | ||
16 | hw/net/vmxnet3.c | 7 ++++++- | ||
17 | include/net/eth.h | 6 +++--- | ||
18 | net/eth.c | 18 ++++++++---------- | ||
19 | 7 files changed, 34 insertions(+), 30 deletions(-) | ||
20 | |||
21 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
22 | index XXXXXXX..XXXXXXX 100644 | ||
23 | --- a/hw/net/igb_core.c | ||
24 | +++ b/hw/net/igb_core.c | ||
25 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
26 | |||
27 | ehdr = PKT_GET_ETH_HDR(filter_buf); | ||
28 | net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr)); | ||
29 | - net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size); | ||
30 | + net_rx_pkt_set_protocols(core->rx_pkt, iov, iovcnt, iov_ofs); | ||
31 | |||
32 | queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx); | ||
33 | if (!queues) { | ||
34 | diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/hw/net/net_rx_pkt.c | ||
37 | +++ b/hw/net/net_rx_pkt.c | ||
38 | @@ -XXX,XX +XXX,XX @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt, | ||
39 | iov, iovcnt, ploff, pkt->tot_len); | ||
40 | } | ||
41 | |||
42 | - eth_get_protocols(pkt->vec, pkt->vec_len, &pkt->hasip4, &pkt->hasip6, | ||
43 | + eth_get_protocols(pkt->vec, pkt->vec_len, 0, &pkt->hasip4, &pkt->hasip6, | ||
44 | &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, | ||
45 | &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); | ||
46 | |||
47 | @@ -XXX,XX +XXX,XX @@ size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt) | ||
48 | return pkt->tot_len; | ||
49 | } | ||
50 | |||
51 | -void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, | ||
52 | - size_t len) | ||
53 | +void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, | ||
54 | + const struct iovec *iov, size_t iovcnt, | ||
55 | + size_t iovoff) | ||
56 | { | ||
57 | - const struct iovec iov = { | ||
58 | - .iov_base = (void *)data, | ||
59 | - .iov_len = len | ||
60 | - }; | ||
61 | - | ||
62 | assert(pkt); | ||
63 | |||
64 | - eth_get_protocols(&iov, 1, &pkt->hasip4, &pkt->hasip6, | ||
65 | + eth_get_protocols(iov, iovcnt, iovoff, &pkt->hasip4, &pkt->hasip6, | ||
66 | &pkt->l3hdr_off, &pkt->l4hdr_off, &pkt->l5hdr_off, | ||
67 | &pkt->ip6hdr_info, &pkt->ip4hdr_info, &pkt->l4hdr_info); | ||
68 | } | ||
69 | diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h | ||
70 | index XXXXXXX..XXXXXXX 100644 | ||
71 | --- a/hw/net/net_rx_pkt.h | ||
72 | +++ b/hw/net/net_rx_pkt.h | ||
73 | @@ -XXX,XX +XXX,XX @@ size_t net_rx_pkt_get_total_len(struct NetRxPkt *pkt); | ||
74 | * parse and set packet analysis results | ||
75 | * | ||
76 | * @pkt: packet | ||
77 | - * @data: pointer to the data buffer to be parsed | ||
78 | - * @len: data length | ||
79 | + * @iov: received data scatter-gather list | ||
80 | + * @iovcnt: number of elements in iov | ||
81 | + * @iovoff: data start offset in the iov | ||
82 | * | ||
83 | */ | ||
84 | -void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, const void *data, | ||
85 | - size_t len); | ||
86 | +void net_rx_pkt_set_protocols(struct NetRxPkt *pkt, | ||
87 | + const struct iovec *iov, size_t iovcnt, | ||
88 | + size_t iovoff); | ||
89 | |||
90 | /** | ||
91 | * fetches packet analysis results | ||
92 | diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/hw/net/virtio-net.c | ||
95 | +++ b/hw/net/virtio-net.c | ||
96 | @@ -XXX,XX +XXX,XX @@ static int virtio_net_process_rss(NetClientState *nc, const uint8_t *buf, | ||
97 | VIRTIO_NET_HASH_REPORT_UDPv6, | ||
98 | VIRTIO_NET_HASH_REPORT_UDPv6_EX | ||
99 | }; | ||
100 | + struct iovec iov = { | ||
101 | + .iov_base = (void *)buf, | ||
102 | + .iov_len = size | ||
103 | + }; | ||
104 | |||
105 | - net_rx_pkt_set_protocols(pkt, buf + n->host_hdr_len, | ||
106 | - size - n->host_hdr_len); | ||
107 | + net_rx_pkt_set_protocols(pkt, &iov, 1, n->host_hdr_len); | ||
108 | net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); | ||
109 | net_hash_type = virtio_net_get_hash_type(hasip4, hasip6, l4hdr_proto, | ||
110 | n->rss_data.hash_types); | ||
111 | diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c | ||
112 | index XXXXXXX..XXXXXXX 100644 | ||
113 | --- a/hw/net/vmxnet3.c | ||
114 | +++ b/hw/net/vmxnet3.c | ||
115 | @@ -XXX,XX +XXX,XX @@ vmxnet3_receive(NetClientState *nc, const uint8_t *buf, size_t size) | ||
116 | get_eth_packet_type(PKT_GET_ETH_HDR(buf))); | ||
117 | |||
118 | if (vmxnet3_rx_filter_may_indicate(s, buf, size)) { | ||
119 | - net_rx_pkt_set_protocols(s->rx_pkt, buf, size); | ||
120 | + struct iovec iov = { | ||
121 | + .iov_base = (void *)buf, | ||
122 | + .iov_len = size | ||
123 | + }; | ||
124 | + | ||
125 | + net_rx_pkt_set_protocols(s->rx_pkt, &iov, 1, 0); | ||
126 | vmxnet3_rx_need_csum_calculate(s->rx_pkt, buf, size); | ||
127 | net_rx_pkt_attach_data(s->rx_pkt, buf, size, s->rx_vlan_stripping); | ||
128 | bytes_indicated = vmxnet3_indicate_packet(s) ? size : -1; | ||
129 | diff --git a/include/net/eth.h b/include/net/eth.h | ||
130 | index XXXXXXX..XXXXXXX 100644 | ||
131 | --- a/include/net/eth.h | ||
132 | +++ b/include/net/eth.h | ||
133 | @@ -XXX,XX +XXX,XX @@ eth_get_l2_hdr_length(const void *p) | ||
134 | } | ||
135 | |||
136 | static inline uint32_t | ||
137 | -eth_get_l2_hdr_length_iov(const struct iovec *iov, int iovcnt) | ||
138 | +eth_get_l2_hdr_length_iov(const struct iovec *iov, size_t iovcnt, size_t iovoff) | ||
139 | { | ||
140 | uint8_t p[sizeof(struct eth_header) + sizeof(struct vlan_header)]; | ||
141 | - size_t copied = iov_to_buf(iov, iovcnt, 0, p, ARRAY_SIZE(p)); | ||
142 | + size_t copied = iov_to_buf(iov, iovcnt, iovoff, p, ARRAY_SIZE(p)); | ||
143 | |||
144 | if (copied < ARRAY_SIZE(p)) { | ||
145 | return copied; | ||
146 | @@ -XXX,XX +XXX,XX @@ typedef struct eth_l4_hdr_info_st { | ||
147 | bool has_tcp_data; | ||
148 | } eth_l4_hdr_info; | ||
149 | |||
150 | -void eth_get_protocols(const struct iovec *iov, int iovcnt, | ||
151 | +void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff, | ||
152 | bool *hasip4, bool *hasip6, | ||
153 | size_t *l3hdr_off, | ||
154 | size_t *l4hdr_off, | ||
155 | diff --git a/net/eth.c b/net/eth.c | ||
156 | index XXXXXXX..XXXXXXX 100644 | ||
157 | --- a/net/eth.c | ||
158 | +++ b/net/eth.c | ||
159 | @@ -XXX,XX +XXX,XX @@ _eth_tcp_has_data(bool is_ip4, | ||
160 | return l4len > TCP_HEADER_DATA_OFFSET(tcp); | ||
161 | } | ||
162 | |||
163 | -void eth_get_protocols(const struct iovec *iov, int iovcnt, | ||
164 | +void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff, | ||
165 | bool *hasip4, bool *hasip6, | ||
166 | size_t *l3hdr_off, | ||
167 | size_t *l4hdr_off, | ||
168 | @@ -XXX,XX +XXX,XX @@ void eth_get_protocols(const struct iovec *iov, int iovcnt, | ||
169 | { | ||
170 | int proto; | ||
171 | bool fragment = false; | ||
172 | - size_t l2hdr_len = eth_get_l2_hdr_length_iov(iov, iovcnt); | ||
173 | size_t input_size = iov_size(iov, iovcnt); | ||
174 | size_t copied; | ||
175 | uint8_t ip_p; | ||
176 | |||
177 | *hasip4 = *hasip6 = false; | ||
178 | + *l3hdr_off = iovoff + eth_get_l2_hdr_length_iov(iov, iovcnt, iovoff); | ||
179 | l4hdr_info->proto = ETH_L4_HDR_PROTO_INVALID; | ||
180 | |||
181 | - proto = eth_get_l3_proto(iov, iovcnt, l2hdr_len); | ||
182 | - | ||
183 | - *l3hdr_off = l2hdr_len; | ||
184 | + proto = eth_get_l3_proto(iov, iovcnt, *l3hdr_off); | ||
185 | |||
186 | if (proto == ETH_P_IP) { | ||
187 | struct ip_header *iphdr = &ip4hdr_info->ip4_hdr; | ||
188 | |||
189 | - if (input_size < l2hdr_len) { | ||
190 | + if (input_size < *l3hdr_off) { | ||
191 | return; | ||
192 | } | ||
193 | |||
194 | - copied = iov_to_buf(iov, iovcnt, l2hdr_len, iphdr, sizeof(*iphdr)); | ||
195 | + copied = iov_to_buf(iov, iovcnt, *l3hdr_off, iphdr, sizeof(*iphdr)); | ||
196 | if (copied < sizeof(*iphdr) || | ||
197 | IP_HEADER_VERSION(iphdr) != IP_HEADER_VERSION_4) { | ||
198 | return; | ||
199 | @@ -XXX,XX +XXX,XX @@ void eth_get_protocols(const struct iovec *iov, int iovcnt, | ||
200 | *hasip4 = true; | ||
201 | ip_p = iphdr->ip_p; | ||
202 | ip4hdr_info->fragment = IP4_IS_FRAGMENT(iphdr); | ||
203 | - *l4hdr_off = l2hdr_len + IP_HDR_GET_LEN(iphdr); | ||
204 | + *l4hdr_off = *l3hdr_off + IP_HDR_GET_LEN(iphdr); | ||
205 | |||
206 | fragment = ip4hdr_info->fragment; | ||
207 | } else if (proto == ETH_P_IPV6) { | ||
208 | - if (!eth_parse_ipv6_hdr(iov, iovcnt, l2hdr_len, ip6hdr_info)) { | ||
209 | + if (!eth_parse_ipv6_hdr(iov, iovcnt, *l3hdr_off, ip6hdr_info)) { | ||
210 | return; | ||
211 | } | ||
212 | |||
213 | *hasip6 = true; | ||
214 | ip_p = ip6hdr_info->l4proto; | ||
215 | - *l4hdr_off = l2hdr_len + ip6hdr_info->full_hdr_len; | ||
216 | + *l4hdr_off = *l3hdr_off + ip6hdr_info->full_hdr_len; | ||
217 | fragment = ip6hdr_info->fragment; | ||
218 | } else { | ||
219 | return; | ||
220 | -- | ||
221 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | e1000e_receive_internal() used to check the iov length to determine | ||
4 | copy the iovs to a contiguous buffer, but the check is flawed in two | ||
5 | ways: | ||
6 | - It does not ensure that iovcnt > 0. | ||
7 | - It does not take virtio-net header into consideration. | ||
8 | |||
9 | The size of this copy is just 18 octets, which can be even less than | ||
10 | the code size required for checks. This (wrong) optimization is probably | ||
11 | not worth so just remove it. | ||
12 | |||
13 | Fixes: 6f3fbe4ed0 ("net: Introduce e1000e device emulation") | ||
14 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
15 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
16 | --- | ||
17 | hw/net/e1000e_core.c | 26 ++++++++++---------------- | ||
18 | 1 file changed, 10 insertions(+), 16 deletions(-) | ||
19 | |||
20 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
21 | index XXXXXXX..XXXXXXX 100644 | ||
22 | --- a/hw/net/e1000e_core.c | ||
23 | +++ b/hw/net/e1000e_core.c | ||
24 | @@ -XXX,XX +XXX,XX @@ static ssize_t | ||
25 | e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt, | ||
26 | bool has_vnet) | ||
27 | { | ||
28 | - static const int maximum_ethernet_hdr_len = (ETH_HLEN + 4); | ||
29 | - | ||
30 | uint32_t n = 0; | ||
31 | - uint8_t min_buf[ETH_ZLEN]; | ||
32 | + uint8_t buf[ETH_ZLEN]; | ||
33 | struct iovec min_iov; | ||
34 | - uint8_t *filter_buf; | ||
35 | size_t size, orig_size; | ||
36 | size_t iov_ofs = 0; | ||
37 | E1000E_RxRing rxr; | ||
38 | @@ -XXX,XX +XXX,XX @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt, | ||
39 | net_rx_pkt_unset_vhdr(core->rx_pkt); | ||
40 | } | ||
41 | |||
42 | - filter_buf = iov->iov_base + iov_ofs; | ||
43 | orig_size = iov_size(iov, iovcnt); | ||
44 | size = orig_size - iov_ofs; | ||
45 | |||
46 | /* Pad to minimum Ethernet frame length */ | ||
47 | - if (size < sizeof(min_buf)) { | ||
48 | - iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size); | ||
49 | - memset(&min_buf[size], 0, sizeof(min_buf) - size); | ||
50 | + if (size < sizeof(buf)) { | ||
51 | + iov_to_buf(iov, iovcnt, iov_ofs, buf, size); | ||
52 | + memset(&buf[size], 0, sizeof(buf) - size); | ||
53 | e1000x_inc_reg_if_not_full(core->mac, RUC); | ||
54 | - min_iov.iov_base = filter_buf = min_buf; | ||
55 | - min_iov.iov_len = size = sizeof(min_buf); | ||
56 | + min_iov.iov_base = buf; | ||
57 | + min_iov.iov_len = size = sizeof(buf); | ||
58 | iovcnt = 1; | ||
59 | iov = &min_iov; | ||
60 | iov_ofs = 0; | ||
61 | - } else if (iov->iov_len < maximum_ethernet_hdr_len) { | ||
62 | - /* This is very unlikely, but may happen. */ | ||
63 | - iov_to_buf(iov, iovcnt, iov_ofs, min_buf, maximum_ethernet_hdr_len); | ||
64 | - filter_buf = min_buf; | ||
65 | + } else { | ||
66 | + iov_to_buf(iov, iovcnt, iov_ofs, buf, ETH_HLEN + 4); | ||
67 | } | ||
68 | |||
69 | /* Discard oversized packets if !LPE and !SBP. */ | ||
70 | @@ -XXX,XX +XXX,XX @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt, | ||
71 | } | ||
72 | |||
73 | net_rx_pkt_set_packet_type(core->rx_pkt, | ||
74 | - get_eth_packet_type(PKT_GET_ETH_HDR(filter_buf))); | ||
75 | + get_eth_packet_type(PKT_GET_ETH_HDR(buf))); | ||
76 | |||
77 | - if (!e1000e_receive_filter(core, filter_buf, size)) { | ||
78 | + if (!e1000e_receive_filter(core, buf, size)) { | ||
79 | trace_e1000e_rx_flt_dropped(); | ||
80 | return orig_size; | ||
81 | } | ||
82 | -- | ||
83 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | igb_receive_internal() used to check the iov length to determine | ||
4 | copy the iovs to a contiguous buffer, but the check is flawed in two | ||
5 | ways: | ||
6 | - It does not ensure that iovcnt > 0. | ||
7 | - It does not take virtio-net header into consideration. | ||
8 | |||
9 | The size of this copy is just 22 octets, which can be even less than | ||
10 | the code size required for checks. This (wrong) optimization is probably | ||
11 | not worth so just remove it. Removing this also allows igb to assume | ||
12 | aligned accesses for the ethernet header. | ||
13 | |||
14 | Fixes: 3a977deebe ("Intrdocue igb device emulation") | ||
15 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
16 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
17 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
18 | --- | ||
19 | hw/net/igb_core.c | 43 +++++++++++++++++++++++-------------------- | ||
20 | 1 file changed, 23 insertions(+), 20 deletions(-) | ||
21 | |||
22 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/hw/net/igb_core.c | ||
25 | +++ b/hw/net/igb_core.c | ||
26 | @@ -XXX,XX +XXX,XX @@ typedef struct IGBTxPktVmdqCallbackContext { | ||
27 | NetClientState *nc; | ||
28 | } IGBTxPktVmdqCallbackContext; | ||
29 | |||
30 | +typedef struct L2Header { | ||
31 | + struct eth_header eth; | ||
32 | + struct vlan_header vlan; | ||
33 | +} L2Header; | ||
34 | + | ||
35 | static ssize_t | ||
36 | igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
37 | bool has_vnet, bool *external_tx); | ||
38 | @@ -XXX,XX +XXX,XX @@ igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size) | ||
39 | return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size); | ||
40 | } | ||
41 | |||
42 | -static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr, | ||
43 | +static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, | ||
44 | size_t size, E1000E_RSSInfo *rss_info, | ||
45 | bool *external_tx) | ||
46 | { | ||
47 | static const int ta_shift[] = { 4, 3, 2, 0 }; | ||
48 | + const struct eth_header *ehdr = &l2_header->eth; | ||
49 | uint32_t f, ra[2], *macp, rctl = core->mac[RCTL]; | ||
50 | uint16_t queues = 0; | ||
51 | uint16_t oversized = 0; | ||
52 | - uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK; | ||
53 | + uint16_t vid = be16_to_cpu(l2_header->vlan.h_tci) & VLAN_VID_MASK; | ||
54 | bool accepted = false; | ||
55 | int i; | ||
56 | |||
57 | @@ -XXX,XX +XXX,XX @@ static ssize_t | ||
58 | igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
59 | bool has_vnet, bool *external_tx) | ||
60 | { | ||
61 | - static const int maximum_ethernet_hdr_len = (ETH_HLEN + 4); | ||
62 | - | ||
63 | uint16_t queues = 0; | ||
64 | uint32_t n = 0; | ||
65 | - uint8_t min_buf[ETH_ZLEN]; | ||
66 | + union { | ||
67 | + L2Header l2_header; | ||
68 | + uint8_t octets[ETH_ZLEN]; | ||
69 | + } buf; | ||
70 | struct iovec min_iov; | ||
71 | - struct eth_header *ehdr; | ||
72 | - uint8_t *filter_buf; | ||
73 | size_t size, orig_size; | ||
74 | size_t iov_ofs = 0; | ||
75 | E1000E_RxRing rxr; | ||
76 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
77 | net_rx_pkt_unset_vhdr(core->rx_pkt); | ||
78 | } | ||
79 | |||
80 | - filter_buf = iov->iov_base + iov_ofs; | ||
81 | orig_size = iov_size(iov, iovcnt); | ||
82 | size = orig_size - iov_ofs; | ||
83 | |||
84 | /* Pad to minimum Ethernet frame length */ | ||
85 | - if (size < sizeof(min_buf)) { | ||
86 | - iov_to_buf(iov, iovcnt, iov_ofs, min_buf, size); | ||
87 | - memset(&min_buf[size], 0, sizeof(min_buf) - size); | ||
88 | + if (size < sizeof(buf)) { | ||
89 | + iov_to_buf(iov, iovcnt, iov_ofs, &buf, size); | ||
90 | + memset(&buf.octets[size], 0, sizeof(buf) - size); | ||
91 | e1000x_inc_reg_if_not_full(core->mac, RUC); | ||
92 | - min_iov.iov_base = filter_buf = min_buf; | ||
93 | - min_iov.iov_len = size = sizeof(min_buf); | ||
94 | + min_iov.iov_base = &buf; | ||
95 | + min_iov.iov_len = size = sizeof(buf); | ||
96 | iovcnt = 1; | ||
97 | iov = &min_iov; | ||
98 | iov_ofs = 0; | ||
99 | - } else if (iov->iov_len < maximum_ethernet_hdr_len) { | ||
100 | - /* This is very unlikely, but may happen. */ | ||
101 | - iov_to_buf(iov, iovcnt, iov_ofs, min_buf, maximum_ethernet_hdr_len); | ||
102 | - filter_buf = min_buf; | ||
103 | + } else { | ||
104 | + iov_to_buf(iov, iovcnt, iov_ofs, &buf, sizeof(buf.l2_header)); | ||
105 | } | ||
106 | |||
107 | /* Discard oversized packets if !LPE and !SBP. */ | ||
108 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
109 | return orig_size; | ||
110 | } | ||
111 | |||
112 | - ehdr = PKT_GET_ETH_HDR(filter_buf); | ||
113 | - net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr)); | ||
114 | + net_rx_pkt_set_packet_type(core->rx_pkt, | ||
115 | + get_eth_packet_type(&buf.l2_header.eth)); | ||
116 | net_rx_pkt_set_protocols(core->rx_pkt, iov, iovcnt, iov_ofs); | ||
117 | |||
118 | - queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx); | ||
119 | + queues = igb_receive_assign(core, &buf.l2_header, size, | ||
120 | + &rss_info, external_tx); | ||
121 | if (!queues) { | ||
122 | trace_e1000e_rx_flt_dropped(); | ||
123 | return orig_size; | ||
124 | -- | ||
125 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Fixes: 9f95111474 ("tests/avocado: re-factor igb test to avoid timeouts") | ||
4 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
5 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
7 | --- | ||
8 | MAINTAINERS | 2 +- | ||
9 | docs/system/devices/igb.rst | 2 +- | ||
10 | scripts/ci/org.centos/stream/8/x86_64/test-avocado | 2 +- | ||
11 | 3 files changed, 3 insertions(+), 3 deletions(-) | ||
12 | |||
13 | diff --git a/MAINTAINERS b/MAINTAINERS | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/MAINTAINERS | ||
16 | +++ b/MAINTAINERS | ||
17 | @@ -XXX,XX +XXX,XX @@ R: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
18 | S: Maintained | ||
19 | F: docs/system/devices/igb.rst | ||
20 | F: hw/net/igb* | ||
21 | -F: tests/avocado/igb.py | ||
22 | +F: tests/avocado/netdev-ethtool.py | ||
23 | F: tests/qtest/igb-test.c | ||
24 | F: tests/qtest/libqos/igb.c | ||
25 | |||
26 | diff --git a/docs/system/devices/igb.rst b/docs/system/devices/igb.rst | ||
27 | index XXXXXXX..XXXXXXX 100644 | ||
28 | --- a/docs/system/devices/igb.rst | ||
29 | +++ b/docs/system/devices/igb.rst | ||
30 | @@ -XXX,XX +XXX,XX @@ Avocado test and can be ran with the following command: | ||
31 | |||
32 | .. code:: shell | ||
33 | |||
34 | - make check-avocado AVOCADO_TESTS=tests/avocado/igb.py | ||
35 | + make check-avocado AVOCADO_TESTS=tests/avocado/netdev-ethtool.py | ||
36 | |||
37 | References | ||
38 | ========== | ||
39 | diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado | ||
40 | index XXXXXXX..XXXXXXX 100755 | ||
41 | --- a/scripts/ci/org.centos/stream/8/x86_64/test-avocado | ||
42 | +++ b/scripts/ci/org.centos/stream/8/x86_64/test-avocado | ||
43 | @@ -XXX,XX +XXX,XX @@ make get-vm-images | ||
44 | tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \ | ||
45 | tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \ | ||
46 | tests/avocado/hotplug_cpu.py:HotPlugCPU.test \ | ||
47 | - tests/avocado/igb.py:IGB.test \ | ||
48 | + tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb_nomsi \ | ||
49 | tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \ | ||
50 | tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \ | ||
51 | tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu_pt \ | ||
52 | -- | ||
53 | 2.7.4 | ||
54 | |||
55 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
4 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
5 | --- | ||
6 | tests/avocado/netdev-ethtool.py | 1 - | ||
7 | 1 file changed, 1 deletion(-) | ||
8 | |||
9 | diff --git a/tests/avocado/netdev-ethtool.py b/tests/avocado/netdev-ethtool.py | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/tests/avocado/netdev-ethtool.py | ||
12 | +++ b/tests/avocado/netdev-ethtool.py | ||
13 | @@ -XXX,XX +XXX,XX @@ | ||
14 | |||
15 | from avocado import skip | ||
16 | from avocado_qemu import QemuSystemTest | ||
17 | -from avocado_qemu import exec_command, exec_command_and_wait_for_pattern | ||
18 | from avocado_qemu import wait_for_console_pattern | ||
19 | |||
20 | class NetDevEthtool(QemuSystemTest): | ||
21 | -- | ||
22 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | It is unlikely to find more bugs with KVM so remove test_igb_nomsi_kvm | ||
4 | to save time to run it. | ||
5 | |||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
7 | Reviewed-by: Thomas Huth <thuth@redhat.com> | ||
8 | Acked-by: Alex Bennée <alex.bennee@linaro.org> | ||
9 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
10 | --- | ||
11 | tests/avocado/netdev-ethtool.py | 12 +----------- | ||
12 | 1 file changed, 1 insertion(+), 11 deletions(-) | ||
13 | |||
14 | diff --git a/tests/avocado/netdev-ethtool.py b/tests/avocado/netdev-ethtool.py | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/tests/avocado/netdev-ethtool.py | ||
17 | +++ b/tests/avocado/netdev-ethtool.py | ||
18 | @@ -XXX,XX +XXX,XX @@ def get_asset(self, name, sha1): | ||
19 | # URL into a unique one | ||
20 | return self.fetch_asset(name=name, locations=(url), asset_hash=sha1) | ||
21 | |||
22 | - def common_test_code(self, netdev, extra_args=None, kvm=False): | ||
23 | + def common_test_code(self, netdev, extra_args=None): | ||
24 | |||
25 | # This custom kernel has drivers for all the supported network | ||
26 | # devices we can emulate in QEMU | ||
27 | @@ -XXX,XX +XXX,XX @@ def common_test_code(self, netdev, extra_args=None, kvm=False): | ||
28 | '-drive', drive, | ||
29 | '-device', netdev) | ||
30 | |||
31 | - if kvm: | ||
32 | - self.vm.add_args('-accel', 'kvm') | ||
33 | - | ||
34 | self.vm.set_console(console_index=0) | ||
35 | self.vm.launch() | ||
36 | |||
37 | @@ -XXX,XX +XXX,XX @@ def test_igb_nomsi(self): | ||
38 | """ | ||
39 | self.common_test_code("igb", "pci=nomsi") | ||
40 | |||
41 | - def test_igb_nomsi_kvm(self): | ||
42 | - """ | ||
43 | - :avocado: tags=device:igb | ||
44 | - """ | ||
45 | - self.require_accelerator('kvm') | ||
46 | - self.common_test_code("igb", "pci=nomsi", True) | ||
47 | - | ||
48 | # It seems the other popular cards we model in QEMU currently fail | ||
49 | # the pattern test with: | ||
50 | # | ||
51 | -- | ||
52 | 2.7.4 | ||
53 | |||
54 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | This function is not used. | ||
4 | |||
5 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
6 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
7 | --- | ||
8 | hw/net/net_rx_pkt.c | 5 ----- | ||
9 | hw/net/net_rx_pkt.h | 9 --------- | ||
10 | 2 files changed, 14 deletions(-) | ||
11 | |||
12 | diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/hw/net/net_rx_pkt.c | ||
15 | +++ b/hw/net/net_rx_pkt.c | ||
16 | @@ -XXX,XX +XXX,XX @@ eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt) | ||
17 | return &pkt->ip4hdr_info; | ||
18 | } | ||
19 | |||
20 | -eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt) | ||
21 | -{ | ||
22 | - return &pkt->l4hdr_info; | ||
23 | -} | ||
24 | - | ||
25 | static inline void | ||
26 | _net_rx_rss_add_chunk(uint8_t *rss_input, size_t *bytes_written, | ||
27 | void *ptr, size_t size) | ||
28 | diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/hw/net/net_rx_pkt.h | ||
31 | +++ b/hw/net/net_rx_pkt.h | ||
32 | @@ -XXX,XX +XXX,XX @@ eth_ip6_hdr_info *net_rx_pkt_get_ip6_info(struct NetRxPkt *pkt); | ||
33 | */ | ||
34 | eth_ip4_hdr_info *net_rx_pkt_get_ip4_info(struct NetRxPkt *pkt); | ||
35 | |||
36 | -/** | ||
37 | - * fetches L4 header analysis results | ||
38 | - * | ||
39 | - * Return: pointer to analysis results structure which is stored in internal | ||
40 | - * packet area. | ||
41 | - * | ||
42 | - */ | ||
43 | -eth_l4_hdr_info *net_rx_pkt_get_l4_info(struct NetRxPkt *pkt); | ||
44 | - | ||
45 | typedef enum { | ||
46 | NetPktRssIpV4, | ||
47 | NetPktRssIpV4Tcp, | ||
48 | -- | ||
49 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | The old eth_setup_vlan_headers has no user so remove it and rename | ||
4 | eth_setup_vlan_headers_ex. | ||
5 | |||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | hw/net/net_tx_pkt.c | 2 +- | ||
11 | include/net/eth.h | 9 +-------- | ||
12 | net/eth.c | 2 +- | ||
13 | 3 files changed, 3 insertions(+), 10 deletions(-) | ||
14 | |||
15 | diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/hw/net/net_tx_pkt.c | ||
18 | +++ b/hw/net/net_tx_pkt.c | ||
19 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, | ||
20 | bool is_new; | ||
21 | assert(pkt); | ||
22 | |||
23 | - eth_setup_vlan_headers_ex(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, | ||
24 | + eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, | ||
25 | vlan, vlan_ethtype, &is_new); | ||
26 | |||
27 | /* update l2hdrlen */ | ||
28 | diff --git a/include/net/eth.h b/include/net/eth.h | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/include/net/eth.h | ||
31 | +++ b/include/net/eth.h | ||
32 | @@ -XXX,XX +XXX,XX @@ eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, | ||
33 | uint16_t | ||
34 | eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len); | ||
35 | |||
36 | -void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag, | ||
37 | +void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, | ||
38 | uint16_t vlan_ethtype, bool *is_new); | ||
39 | |||
40 | -static inline void | ||
41 | -eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, | ||
42 | - bool *is_new) | ||
43 | -{ | ||
44 | - eth_setup_vlan_headers_ex(ehdr, vlan_tag, ETH_P_VLAN, is_new); | ||
45 | -} | ||
46 | - | ||
47 | |||
48 | uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto); | ||
49 | |||
50 | diff --git a/net/eth.c b/net/eth.c | ||
51 | index XXXXXXX..XXXXXXX 100644 | ||
52 | --- a/net/eth.c | ||
53 | +++ b/net/eth.c | ||
54 | @@ -XXX,XX +XXX,XX @@ | ||
55 | #include "net/checksum.h" | ||
56 | #include "net/tap.h" | ||
57 | |||
58 | -void eth_setup_vlan_headers_ex(struct eth_header *ehdr, uint16_t vlan_tag, | ||
59 | +void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, | ||
60 | uint16_t vlan_ethtype, bool *is_new) | ||
61 | { | ||
62 | struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr); | ||
63 | -- | ||
64 | 2.7.4 | ||
65 | |||
66 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | This saves some code and enables tracepoint for e1000's VLAN filtering. | ||
4 | |||
5 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
6 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
8 | --- | ||
9 | hw/net/e1000.c | 35 +++++------------------------------ | ||
10 | hw/net/e1000e_core.c | 47 +++++------------------------------------------ | ||
11 | hw/net/e1000x_common.c | 44 ++++++++++++++++++++++++++++++++++++++------ | ||
12 | hw/net/e1000x_common.h | 4 +++- | ||
13 | hw/net/igb_core.c | 41 +++-------------------------------------- | ||
14 | hw/net/trace-events | 4 ++-- | ||
15 | 6 files changed, 56 insertions(+), 119 deletions(-) | ||
16 | |||
17 | diff --git a/hw/net/e1000.c b/hw/net/e1000.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/hw/net/e1000.c | ||
20 | +++ b/hw/net/e1000.c | ||
21 | @@ -XXX,XX +XXX,XX @@ start_xmit(E1000State *s) | ||
22 | } | ||
23 | |||
24 | static int | ||
25 | -receive_filter(E1000State *s, const uint8_t *buf, int size) | ||
26 | +receive_filter(E1000State *s, const void *buf) | ||
27 | { | ||
28 | - uint32_t rctl = s->mac_reg[RCTL]; | ||
29 | - int isbcast = is_broadcast_ether_addr(buf); | ||
30 | - int ismcast = is_multicast_ether_addr(buf); | ||
31 | - | ||
32 | - if (e1000x_is_vlan_packet(buf, le16_to_cpu(s->mac_reg[VET])) && | ||
33 | - e1000x_vlan_rx_filter_enabled(s->mac_reg)) { | ||
34 | - uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(buf)->h_tci); | ||
35 | - uint32_t vfta = | ||
36 | - ldl_le_p((uint32_t *)(s->mac_reg + VFTA) + | ||
37 | - ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK)); | ||
38 | - if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) { | ||
39 | - return 0; | ||
40 | - } | ||
41 | - } | ||
42 | - | ||
43 | - if (!isbcast && !ismcast && (rctl & E1000_RCTL_UPE)) { /* promiscuous ucast */ | ||
44 | - return 1; | ||
45 | - } | ||
46 | - | ||
47 | - if (ismcast && (rctl & E1000_RCTL_MPE)) { /* promiscuous mcast */ | ||
48 | - return 1; | ||
49 | - } | ||
50 | - | ||
51 | - if (isbcast && (rctl & E1000_RCTL_BAM)) { /* broadcast enabled */ | ||
52 | - return 1; | ||
53 | - } | ||
54 | - | ||
55 | - return e1000x_rx_group_filter(s->mac_reg, buf); | ||
56 | + return (!e1000x_is_vlan_packet(buf, s->mac_reg[VET]) || | ||
57 | + e1000x_rx_vlan_filter(s->mac_reg, PKT_GET_VLAN_HDR(buf))) && | ||
58 | + e1000x_rx_group_filter(s->mac_reg, buf); | ||
59 | } | ||
60 | |||
61 | static void | ||
62 | @@ -XXX,XX +XXX,XX @@ e1000_receive_iov(NetClientState *nc, const struct iovec *iov, int iovcnt) | ||
63 | return size; | ||
64 | } | ||
65 | |||
66 | - if (!receive_filter(s, filter_buf, size)) { | ||
67 | + if (!receive_filter(s, filter_buf)) { | ||
68 | return size; | ||
69 | } | ||
70 | |||
71 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
72 | index XXXXXXX..XXXXXXX 100644 | ||
73 | --- a/hw/net/e1000e_core.c | ||
74 | +++ b/hw/net/e1000e_core.c | ||
75 | @@ -XXX,XX +XXX,XX @@ e1000e_rx_l4_cso_enabled(E1000ECore *core) | ||
76 | } | ||
77 | |||
78 | static bool | ||
79 | -e1000e_receive_filter(E1000ECore *core, const uint8_t *buf, int size) | ||
80 | +e1000e_receive_filter(E1000ECore *core, const void *buf) | ||
81 | { | ||
82 | - uint32_t rctl = core->mac[RCTL]; | ||
83 | - | ||
84 | - if (e1000x_is_vlan_packet(buf, core->mac[VET]) && | ||
85 | - e1000x_vlan_rx_filter_enabled(core->mac)) { | ||
86 | - uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(buf)->h_tci); | ||
87 | - uint32_t vfta = | ||
88 | - ldl_le_p((uint32_t *)(core->mac + VFTA) + | ||
89 | - ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK)); | ||
90 | - if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) { | ||
91 | - trace_e1000e_rx_flt_vlan_mismatch(vid); | ||
92 | - return false; | ||
93 | - } else { | ||
94 | - trace_e1000e_rx_flt_vlan_match(vid); | ||
95 | - } | ||
96 | - } | ||
97 | - | ||
98 | - switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { | ||
99 | - case ETH_PKT_UCAST: | ||
100 | - if (rctl & E1000_RCTL_UPE) { | ||
101 | - return true; /* promiscuous ucast */ | ||
102 | - } | ||
103 | - break; | ||
104 | - | ||
105 | - case ETH_PKT_BCAST: | ||
106 | - if (rctl & E1000_RCTL_BAM) { | ||
107 | - return true; /* broadcast enabled */ | ||
108 | - } | ||
109 | - break; | ||
110 | - | ||
111 | - case ETH_PKT_MCAST: | ||
112 | - if (rctl & E1000_RCTL_MPE) { | ||
113 | - return true; /* promiscuous mcast */ | ||
114 | - } | ||
115 | - break; | ||
116 | - | ||
117 | - default: | ||
118 | - g_assert_not_reached(); | ||
119 | - } | ||
120 | - | ||
121 | - return e1000x_rx_group_filter(core->mac, buf); | ||
122 | + return (!e1000x_is_vlan_packet(buf, core->mac[VET]) || | ||
123 | + e1000x_rx_vlan_filter(core->mac, PKT_GET_VLAN_HDR(buf))) && | ||
124 | + e1000x_rx_group_filter(core->mac, buf); | ||
125 | } | ||
126 | |||
127 | static inline void | ||
128 | @@ -XXX,XX +XXX,XX @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt, | ||
129 | net_rx_pkt_set_packet_type(core->rx_pkt, | ||
130 | get_eth_packet_type(PKT_GET_ETH_HDR(buf))); | ||
131 | |||
132 | - if (!e1000e_receive_filter(core, buf, size)) { | ||
133 | + if (!e1000e_receive_filter(core, buf)) { | ||
134 | trace_e1000e_rx_flt_dropped(); | ||
135 | return orig_size; | ||
136 | } | ||
137 | diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c | ||
138 | index XXXXXXX..XXXXXXX 100644 | ||
139 | --- a/hw/net/e1000x_common.c | ||
140 | +++ b/hw/net/e1000x_common.c | ||
141 | @@ -XXX,XX +XXX,XX @@ bool e1000x_is_vlan_packet(const void *buf, uint16_t vet) | ||
142 | return res; | ||
143 | } | ||
144 | |||
145 | -bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf) | ||
146 | +bool e1000x_rx_vlan_filter(uint32_t *mac, const struct vlan_header *vhdr) | ||
147 | +{ | ||
148 | + if (e1000x_vlan_rx_filter_enabled(mac)) { | ||
149 | + uint16_t vid = lduw_be_p(&vhdr->h_tci); | ||
150 | + uint32_t vfta = | ||
151 | + ldl_le_p((uint32_t *)(mac + VFTA) + | ||
152 | + ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK)); | ||
153 | + if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) { | ||
154 | + trace_e1000x_rx_flt_vlan_mismatch(vid); | ||
155 | + return false; | ||
156 | + } | ||
157 | + | ||
158 | + trace_e1000x_rx_flt_vlan_match(vid); | ||
159 | + } | ||
160 | + | ||
161 | + return true; | ||
162 | +} | ||
163 | + | ||
164 | +bool e1000x_rx_group_filter(uint32_t *mac, const struct eth_header *ehdr) | ||
165 | { | ||
166 | static const int mta_shift[] = { 4, 3, 2, 0 }; | ||
167 | uint32_t f, ra[2], *rp, rctl = mac[RCTL]; | ||
168 | |||
169 | + if (is_broadcast_ether_addr(ehdr->h_dest)) { | ||
170 | + if (rctl & E1000_RCTL_BAM) { | ||
171 | + return true; | ||
172 | + } | ||
173 | + } else if (is_multicast_ether_addr(ehdr->h_dest)) { | ||
174 | + if (rctl & E1000_RCTL_MPE) { | ||
175 | + return true; | ||
176 | + } | ||
177 | + } else { | ||
178 | + if (rctl & E1000_RCTL_UPE) { | ||
179 | + return true; | ||
180 | + } | ||
181 | + } | ||
182 | + | ||
183 | for (rp = mac + RA; rp < mac + RA + 32; rp += 2) { | ||
184 | if (!(rp[1] & E1000_RAH_AV)) { | ||
185 | continue; | ||
186 | } | ||
187 | ra[0] = cpu_to_le32(rp[0]); | ||
188 | ra[1] = cpu_to_le32(rp[1]); | ||
189 | - if (!memcmp(buf, (uint8_t *)ra, ETH_ALEN)) { | ||
190 | + if (!memcmp(ehdr->h_dest, (uint8_t *)ra, ETH_ALEN)) { | ||
191 | trace_e1000x_rx_flt_ucast_match((int)(rp - mac - RA) / 2, | ||
192 | - MAC_ARG(buf)); | ||
193 | + MAC_ARG(ehdr->h_dest)); | ||
194 | return true; | ||
195 | } | ||
196 | } | ||
197 | - trace_e1000x_rx_flt_ucast_mismatch(MAC_ARG(buf)); | ||
198 | + trace_e1000x_rx_flt_ucast_mismatch(MAC_ARG(ehdr->h_dest)); | ||
199 | |||
200 | f = mta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3]; | ||
201 | - f = (((buf[5] << 8) | buf[4]) >> f) & 0xfff; | ||
202 | + f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff; | ||
203 | if (mac[MTA + (f >> 5)] & (1 << (f & 0x1f))) { | ||
204 | return true; | ||
205 | } | ||
206 | |||
207 | - trace_e1000x_rx_flt_inexact_mismatch(MAC_ARG(buf), | ||
208 | + trace_e1000x_rx_flt_inexact_mismatch(MAC_ARG(ehdr->h_dest), | ||
209 | (rctl >> E1000_RCTL_MO_SHIFT) & 3, | ||
210 | f >> 5, | ||
211 | mac[MTA + (f >> 5)]); | ||
212 | diff --git a/hw/net/e1000x_common.h b/hw/net/e1000x_common.h | ||
213 | index XXXXXXX..XXXXXXX 100644 | ||
214 | --- a/hw/net/e1000x_common.h | ||
215 | +++ b/hw/net/e1000x_common.h | ||
216 | @@ -XXX,XX +XXX,XX @@ bool e1000x_rx_ready(PCIDevice *d, uint32_t *mac); | ||
217 | |||
218 | bool e1000x_is_vlan_packet(const void *buf, uint16_t vet); | ||
219 | |||
220 | -bool e1000x_rx_group_filter(uint32_t *mac, const uint8_t *buf); | ||
221 | +bool e1000x_rx_vlan_filter(uint32_t *mac, const struct vlan_header *vhdr); | ||
222 | + | ||
223 | +bool e1000x_rx_group_filter(uint32_t *mac, const struct eth_header *ehdr); | ||
224 | |||
225 | bool e1000x_hw_rx_enabled(uint32_t *mac); | ||
226 | |||
227 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
228 | index XXXXXXX..XXXXXXX 100644 | ||
229 | --- a/hw/net/igb_core.c | ||
230 | +++ b/hw/net/igb_core.c | ||
231 | @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, | ||
232 | uint16_t queues = 0; | ||
233 | uint16_t oversized = 0; | ||
234 | uint16_t vid = be16_to_cpu(l2_header->vlan.h_tci) & VLAN_VID_MASK; | ||
235 | - bool accepted = false; | ||
236 | int i; | ||
237 | |||
238 | memset(rss_info, 0, sizeof(E1000E_RSSInfo)); | ||
239 | @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, | ||
240 | } | ||
241 | |||
242 | if (e1000x_is_vlan_packet(ehdr, core->mac[VET] & 0xffff) && | ||
243 | - e1000x_vlan_rx_filter_enabled(core->mac)) { | ||
244 | - uint32_t vfta = | ||
245 | - ldl_le_p((uint32_t *)(core->mac + VFTA) + | ||
246 | - ((vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK)); | ||
247 | - if ((vfta & (1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK))) == 0) { | ||
248 | - trace_e1000e_rx_flt_vlan_mismatch(vid); | ||
249 | - return queues; | ||
250 | - } else { | ||
251 | - trace_e1000e_rx_flt_vlan_match(vid); | ||
252 | - } | ||
253 | + !e1000x_rx_vlan_filter(core->mac, PKT_GET_VLAN_HDR(ehdr))) { | ||
254 | + return queues; | ||
255 | } | ||
256 | |||
257 | if (core->mac[MRQC] & 1) { | ||
258 | @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, | ||
259 | } | ||
260 | } | ||
261 | } else { | ||
262 | - switch (net_rx_pkt_get_packet_type(core->rx_pkt)) { | ||
263 | - case ETH_PKT_UCAST: | ||
264 | - if (rctl & E1000_RCTL_UPE) { | ||
265 | - accepted = true; /* promiscuous ucast */ | ||
266 | - } | ||
267 | - break; | ||
268 | - | ||
269 | - case ETH_PKT_BCAST: | ||
270 | - if (rctl & E1000_RCTL_BAM) { | ||
271 | - accepted = true; /* broadcast enabled */ | ||
272 | - } | ||
273 | - break; | ||
274 | - | ||
275 | - case ETH_PKT_MCAST: | ||
276 | - if (rctl & E1000_RCTL_MPE) { | ||
277 | - accepted = true; /* promiscuous mcast */ | ||
278 | - } | ||
279 | - break; | ||
280 | - | ||
281 | - default: | ||
282 | - g_assert_not_reached(); | ||
283 | - } | ||
284 | - | ||
285 | - if (!accepted) { | ||
286 | - accepted = e1000x_rx_group_filter(core->mac, ehdr->h_dest); | ||
287 | - } | ||
288 | - | ||
289 | + bool accepted = e1000x_rx_group_filter(core->mac, ehdr); | ||
290 | if (!accepted) { | ||
291 | for (macp = core->mac + RA2; macp < core->mac + RA2 + 16; macp += 2) { | ||
292 | if (!(macp[1] & E1000_RAH_AV)) { | ||
293 | diff --git a/hw/net/trace-events b/hw/net/trace-events | ||
294 | index XXXXXXX..XXXXXXX 100644 | ||
295 | --- a/hw/net/trace-events | ||
296 | +++ b/hw/net/trace-events | ||
297 | @@ -XXX,XX +XXX,XX @@ e1000_receiver_overrun(size_t s, uint32_t rdh, uint32_t rdt) "Receiver overrun: | ||
298 | # e1000x_common.c | ||
299 | e1000x_rx_can_recv_disabled(bool link_up, bool rx_enabled, bool pci_master) "link_up: %d, rx_enabled %d, pci_master %d" | ||
300 | e1000x_vlan_is_vlan_pkt(bool is_vlan_pkt, uint16_t eth_proto, uint16_t vet) "Is VLAN packet: %d, ETH proto: 0x%X, VET: 0x%X" | ||
301 | +e1000x_rx_flt_vlan_mismatch(uint16_t vid) "VID mismatch: 0x%X" | ||
302 | +e1000x_rx_flt_vlan_match(uint16_t vid) "VID match: 0x%X" | ||
303 | e1000x_rx_flt_ucast_match(uint32_t idx, uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "unicast match[%d]: %02x:%02x:%02x:%02x:%02x:%02x" | ||
304 | e1000x_rx_flt_ucast_mismatch(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5) "unicast mismatch: %02x:%02x:%02x:%02x:%02x:%02x" | ||
305 | e1000x_rx_flt_inexact_mismatch(uint8_t b0, uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4, uint8_t b5, uint32_t mo, uint32_t mta, uint32_t mta_val) "inexact mismatch: %02x:%02x:%02x:%02x:%02x:%02x MO %d MTA[%d] 0x%x" | ||
306 | @@ -XXX,XX +XXX,XX @@ e1000e_rx_can_recv_rings_full(void) "Cannot receive: all rings are full" | ||
307 | e1000e_rx_can_recv(void) "Can receive" | ||
308 | e1000e_rx_has_buffers(int ridx, uint32_t free_desc, size_t total_size, uint32_t desc_buf_size) "ring #%d: free descr: %u, packet size %zu, descr buffer size %u" | ||
309 | e1000e_rx_null_descriptor(void) "Null RX descriptor!!" | ||
310 | -e1000e_rx_flt_vlan_mismatch(uint16_t vid) "VID mismatch: 0x%X" | ||
311 | -e1000e_rx_flt_vlan_match(uint16_t vid) "VID match: 0x%X" | ||
312 | e1000e_rx_desc_ps_read(uint64_t a0, uint64_t a1, uint64_t a2, uint64_t a3) "buffers: [0x%"PRIx64", 0x%"PRIx64", 0x%"PRIx64", 0x%"PRIx64"]" | ||
313 | e1000e_rx_desc_ps_write(uint16_t a0, uint16_t a1, uint16_t a2, uint16_t a3) "bytes written: [%u, %u, %u, %u]" | ||
314 | e1000e_rx_desc_buff_sizes(uint32_t b0, uint32_t b1, uint32_t b2, uint32_t b3) "buffer sizes: [%u, %u, %u, %u]" | ||
315 | -- | ||
316 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Section 13.7.15 Receive Length Error Count says: | ||
4 | > Packets over 1522 bytes are oversized if LongPacketEnable is 0b | ||
5 | > (RCTL.LPE). If LongPacketEnable (LPE) is 1b, then an incoming packet | ||
6 | > is considered oversized if it exceeds 16384 bytes. | ||
7 | |||
8 | > These lengths are based on bytes in the received packet from | ||
9 | > <Destination Address> through <CRC>, inclusively. | ||
10 | |||
11 | As QEMU processes packets without CRC, the number of bytes for CRC | ||
12 | need to be subtracted. This change adds some size definitions to be used | ||
13 | to derive the new size thresholds to eth.h. | ||
14 | |||
15 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
16 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
17 | --- | ||
18 | hw/net/e1000x_common.c | 10 +++++----- | ||
19 | include/net/eth.h | 2 ++ | ||
20 | 2 files changed, 7 insertions(+), 5 deletions(-) | ||
21 | |||
22 | diff --git a/hw/net/e1000x_common.c b/hw/net/e1000x_common.c | ||
23 | index XXXXXXX..XXXXXXX 100644 | ||
24 | --- a/hw/net/e1000x_common.c | ||
25 | +++ b/hw/net/e1000x_common.c | ||
26 | @@ -XXX,XX +XXX,XX @@ bool e1000x_hw_rx_enabled(uint32_t *mac) | ||
27 | |||
28 | bool e1000x_is_oversized(uint32_t *mac, size_t size) | ||
29 | { | ||
30 | + size_t header_size = sizeof(struct eth_header) + sizeof(struct vlan_header); | ||
31 | /* this is the size past which hardware will | ||
32 | drop packets when setting LPE=0 */ | ||
33 | - static const int maximum_ethernet_vlan_size = 1522; | ||
34 | + size_t maximum_short_size = header_size + ETH_MTU; | ||
35 | /* this is the size past which hardware will | ||
36 | drop packets when setting LPE=1 */ | ||
37 | - static const int maximum_ethernet_lpe_size = 16 * KiB; | ||
38 | + size_t maximum_large_size = 16 * KiB - ETH_FCS_LEN; | ||
39 | |||
40 | - if ((size > maximum_ethernet_lpe_size || | ||
41 | - (size > maximum_ethernet_vlan_size | ||
42 | - && !(mac[RCTL] & E1000_RCTL_LPE))) | ||
43 | + if ((size > maximum_large_size || | ||
44 | + (size > maximum_short_size && !(mac[RCTL] & E1000_RCTL_LPE))) | ||
45 | && !(mac[RCTL] & E1000_RCTL_SBP)) { | ||
46 | e1000x_inc_reg_if_not_full(mac, ROC); | ||
47 | trace_e1000x_rx_oversized(size); | ||
48 | diff --git a/include/net/eth.h b/include/net/eth.h | ||
49 | index XXXXXXX..XXXXXXX 100644 | ||
50 | --- a/include/net/eth.h | ||
51 | +++ b/include/net/eth.h | ||
52 | @@ -XXX,XX +XXX,XX @@ | ||
53 | #define ETH_ALEN 6 | ||
54 | #define ETH_HLEN 14 | ||
55 | #define ETH_ZLEN 60 /* Min. octets in frame without FCS */ | ||
56 | +#define ETH_FCS_LEN 4 | ||
57 | +#define ETH_MTU 1500 | ||
58 | |||
59 | struct eth_header { | ||
60 | uint8_t h_dest[ETH_ALEN]; /* destination eth addr */ | ||
61 | -- | ||
62 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | e1000e and igb employs NetPktRssIpV6TcpEx for RSS hash if TcpIpv6 MRQC | ||
4 | bit is set. Moreover, igb also has a MRQC bit for NetPktRssIpV6Tcp | ||
5 | though it is not implemented yet. Rename it to TcpIpv6Ex to avoid | ||
6 | confusion. | ||
7 | |||
8 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
9 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
10 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
11 | --- | ||
12 | hw/net/e1000e_core.c | 8 ++++---- | ||
13 | hw/net/e1000x_regs.h | 24 ++++++++++++------------ | ||
14 | hw/net/igb_core.c | 8 ++++---- | ||
15 | hw/net/trace-events | 2 +- | ||
16 | 4 files changed, 21 insertions(+), 21 deletions(-) | ||
17 | |||
18 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/hw/net/e1000e_core.c | ||
21 | +++ b/hw/net/e1000e_core.c | ||
22 | @@ -XXX,XX +XXX,XX @@ e1000e_rss_get_hash_type(E1000ECore *core, struct NetRxPkt *pkt) | ||
23 | ip6info->rss_ex_dst_valid, | ||
24 | ip6info->rss_ex_src_valid, | ||
25 | core->mac[MRQC], | ||
26 | - E1000_MRQC_EN_TCPIPV6(core->mac[MRQC]), | ||
27 | + E1000_MRQC_EN_TCPIPV6EX(core->mac[MRQC]), | ||
28 | E1000_MRQC_EN_IPV6EX(core->mac[MRQC]), | ||
29 | E1000_MRQC_EN_IPV6(core->mac[MRQC])); | ||
30 | |||
31 | @@ -XXX,XX +XXX,XX @@ e1000e_rss_get_hash_type(E1000ECore *core, struct NetRxPkt *pkt) | ||
32 | ip6info->rss_ex_src_valid))) { | ||
33 | |||
34 | if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && | ||
35 | - E1000_MRQC_EN_TCPIPV6(core->mac[MRQC])) { | ||
36 | - return E1000_MRQ_RSS_TYPE_IPV6TCP; | ||
37 | + E1000_MRQC_EN_TCPIPV6EX(core->mac[MRQC])) { | ||
38 | + return E1000_MRQ_RSS_TYPE_IPV6TCPEX; | ||
39 | } | ||
40 | |||
41 | if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) { | ||
42 | @@ -XXX,XX +XXX,XX @@ e1000e_rss_calc_hash(E1000ECore *core, | ||
43 | case E1000_MRQ_RSS_TYPE_IPV4TCP: | ||
44 | type = NetPktRssIpV4Tcp; | ||
45 | break; | ||
46 | - case E1000_MRQ_RSS_TYPE_IPV6TCP: | ||
47 | + case E1000_MRQ_RSS_TYPE_IPV6TCPEX: | ||
48 | type = NetPktRssIpV6TcpEx; | ||
49 | break; | ||
50 | case E1000_MRQ_RSS_TYPE_IPV6: | ||
51 | diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h | ||
52 | index XXXXXXX..XXXXXXX 100644 | ||
53 | --- a/hw/net/e1000x_regs.h | ||
54 | +++ b/hw/net/e1000x_regs.h | ||
55 | @@ -XXX,XX +XXX,XX @@ | ||
56 | #define E1000_RETA_IDX(hash) ((hash) & (BIT(7) - 1)) | ||
57 | #define E1000_RETA_VAL(reta, hash) (((uint8_t *)(reta))[E1000_RETA_IDX(hash)]) | ||
58 | |||
59 | -#define E1000_MRQC_EN_TCPIPV4(mrqc) ((mrqc) & BIT(16)) | ||
60 | -#define E1000_MRQC_EN_IPV4(mrqc) ((mrqc) & BIT(17)) | ||
61 | -#define E1000_MRQC_EN_TCPIPV6(mrqc) ((mrqc) & BIT(18)) | ||
62 | -#define E1000_MRQC_EN_IPV6EX(mrqc) ((mrqc) & BIT(19)) | ||
63 | -#define E1000_MRQC_EN_IPV6(mrqc) ((mrqc) & BIT(20)) | ||
64 | - | ||
65 | -#define E1000_MRQ_RSS_TYPE_NONE (0) | ||
66 | -#define E1000_MRQ_RSS_TYPE_IPV4TCP (1) | ||
67 | -#define E1000_MRQ_RSS_TYPE_IPV4 (2) | ||
68 | -#define E1000_MRQ_RSS_TYPE_IPV6TCP (3) | ||
69 | -#define E1000_MRQ_RSS_TYPE_IPV6EX (4) | ||
70 | -#define E1000_MRQ_RSS_TYPE_IPV6 (5) | ||
71 | +#define E1000_MRQC_EN_TCPIPV4(mrqc) ((mrqc) & BIT(16)) | ||
72 | +#define E1000_MRQC_EN_IPV4(mrqc) ((mrqc) & BIT(17)) | ||
73 | +#define E1000_MRQC_EN_TCPIPV6EX(mrqc) ((mrqc) & BIT(18)) | ||
74 | +#define E1000_MRQC_EN_IPV6EX(mrqc) ((mrqc) & BIT(19)) | ||
75 | +#define E1000_MRQC_EN_IPV6(mrqc) ((mrqc) & BIT(20)) | ||
76 | + | ||
77 | +#define E1000_MRQ_RSS_TYPE_NONE (0) | ||
78 | +#define E1000_MRQ_RSS_TYPE_IPV4TCP (1) | ||
79 | +#define E1000_MRQ_RSS_TYPE_IPV4 (2) | ||
80 | +#define E1000_MRQ_RSS_TYPE_IPV6TCPEX (3) | ||
81 | +#define E1000_MRQ_RSS_TYPE_IPV6EX (4) | ||
82 | +#define E1000_MRQ_RSS_TYPE_IPV6 (5) | ||
83 | |||
84 | #define E1000_ICR_ASSERTED BIT(31) | ||
85 | #define E1000_EIAC_MASK 0x01F00000 | ||
86 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
87 | index XXXXXXX..XXXXXXX 100644 | ||
88 | --- a/hw/net/igb_core.c | ||
89 | +++ b/hw/net/igb_core.c | ||
90 | @@ -XXX,XX +XXX,XX @@ igb_rss_get_hash_type(IGBCore *core, struct NetRxPkt *pkt) | ||
91 | ip6info->rss_ex_dst_valid, | ||
92 | ip6info->rss_ex_src_valid, | ||
93 | core->mac[MRQC], | ||
94 | - E1000_MRQC_EN_TCPIPV6(core->mac[MRQC]), | ||
95 | + E1000_MRQC_EN_TCPIPV6EX(core->mac[MRQC]), | ||
96 | E1000_MRQC_EN_IPV6EX(core->mac[MRQC]), | ||
97 | E1000_MRQC_EN_IPV6(core->mac[MRQC])); | ||
98 | |||
99 | @@ -XXX,XX +XXX,XX @@ igb_rss_get_hash_type(IGBCore *core, struct NetRxPkt *pkt) | ||
100 | ip6info->rss_ex_src_valid))) { | ||
101 | |||
102 | if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && | ||
103 | - E1000_MRQC_EN_TCPIPV6(core->mac[MRQC])) { | ||
104 | - return E1000_MRQ_RSS_TYPE_IPV6TCP; | ||
105 | + E1000_MRQC_EN_TCPIPV6EX(core->mac[MRQC])) { | ||
106 | + return E1000_MRQ_RSS_TYPE_IPV6TCPEX; | ||
107 | } | ||
108 | |||
109 | if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) { | ||
110 | @@ -XXX,XX +XXX,XX @@ igb_rss_calc_hash(IGBCore *core, struct NetRxPkt *pkt, E1000E_RSSInfo *info) | ||
111 | case E1000_MRQ_RSS_TYPE_IPV4TCP: | ||
112 | type = NetPktRssIpV4Tcp; | ||
113 | break; | ||
114 | - case E1000_MRQ_RSS_TYPE_IPV6TCP: | ||
115 | + case E1000_MRQ_RSS_TYPE_IPV6TCPEX: | ||
116 | type = NetPktRssIpV6TcpEx; | ||
117 | break; | ||
118 | case E1000_MRQ_RSS_TYPE_IPV6: | ||
119 | diff --git a/hw/net/trace-events b/hw/net/trace-events | ||
120 | index XXXXXXX..XXXXXXX 100644 | ||
121 | --- a/hw/net/trace-events | ||
122 | +++ b/hw/net/trace-events | ||
123 | @@ -XXX,XX +XXX,XX @@ e1000e_rx_rss_disabled(void) "RSS is disabled" | ||
124 | e1000e_rx_rss_type(uint32_t type) "RSS type is %u" | ||
125 | e1000e_rx_rss_ip4(int l4hdr_proto, uint32_t mrqc, bool tcpipv4_enabled, bool ipv4_enabled) "RSS IPv4: L4 header protocol %d, mrqc 0x%X, tcpipv4 enabled %d, ipv4 enabled %d" | ||
126 | e1000e_rx_rss_ip6_rfctl(uint32_t rfctl) "RSS IPv6: rfctl 0x%X" | ||
127 | -e1000e_rx_rss_ip6(bool ex_dis, bool new_ex_dis, int l4hdr_proto, bool has_ext_headers, bool ex_dst_valid, bool ex_src_valid, uint32_t mrqc, bool tcpipv6_enabled, bool ipv6ex_enabled, bool ipv6_enabled) "RSS IPv6: ex_dis: %d, new_ex_dis: %d, L4 header protocol %d, has_ext_headers %d, ex_dst_valid %d, ex_src_valid %d, mrqc 0x%X, tcpipv6 enabled %d, ipv6ex enabled %d, ipv6 enabled %d" | ||
128 | +e1000e_rx_rss_ip6(bool ex_dis, bool new_ex_dis, int l4hdr_proto, bool has_ext_headers, bool ex_dst_valid, bool ex_src_valid, uint32_t mrqc, bool tcpipv6ex_enabled, bool ipv6ex_enabled, bool ipv6_enabled) "RSS IPv6: ex_dis: %d, new_ex_dis: %d, L4 header protocol %d, has_ext_headers %d, ex_dst_valid %d, ex_src_valid %d, mrqc 0x%X, tcpipv6ex enabled %d, ipv6ex enabled %d, ipv6 enabled %d" | ||
129 | |||
130 | e1000e_rx_metadata_protocols(bool hasip4, bool hasip6, int l4hdr_protocol) "protocols: ip4: %d, ip6: %d, l4hdr: %d" | ||
131 | e1000e_rx_metadata_vlan(uint16_t vlan_tag) "VLAN tag is 0x%X" | ||
132 | -- | ||
133 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Without this change, the status flags may not be traced e.g. if checksum | ||
4 | offloading is disabled. | ||
5 | |||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | hw/net/e1000e_core.c | 3 +-- | ||
11 | 1 file changed, 1 insertion(+), 2 deletions(-) | ||
12 | |||
13 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/hw/net/e1000e_core.c | ||
16 | +++ b/hw/net/e1000e_core.c | ||
17 | @@ -XXX,XX +XXX,XX @@ e1000e_build_rx_metadata(E1000ECore *core, | ||
18 | trace_e1000e_rx_metadata_l4_cso_disabled(); | ||
19 | } | ||
20 | |||
21 | - trace_e1000e_rx_metadata_status_flags(*status_flags); | ||
22 | - | ||
23 | func_exit: | ||
24 | + trace_e1000e_rx_metadata_status_flags(*status_flags); | ||
25 | *status_flags = cpu_to_le32(*status_flags); | ||
26 | } | ||
27 | |||
28 | -- | ||
29 | 2.7.4 | ||
30 | |||
31 | diff view generated by jsdifflib |
1 | From: Thomas Huth <thuth@redhat.com> | 1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> |
---|---|---|---|
2 | 2 | ||
3 | It does not make much sense to limit these commands to the legacy 'vlan' | 3 | Without this change, the status flags may not be traced e.g. if checksum |
4 | concept only, they should work with the modern netdevs, too. So now | 4 | offloading is disabled. |
5 | it is possible to use this command with one, two or three parameters. | ||
6 | 5 | ||
7 | With one parameter, the command installs a hostfwd rule on the default | 6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> |
8 | "user" network: | 7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> |
9 | hostfwd_add tcp:... | ||
10 | |||
11 | With two parameters, the command installs a hostfwd rule on a netdev | ||
12 | (that's the new way of using this command): | ||
13 | hostfwd_add netdev_id tcp:... | ||
14 | |||
15 | With three parameters, the command installs a rule on a 'vlan' (aka hub): | ||
16 | hostfwd_add hub_id name tcp:... | ||
17 | |||
18 | Same applies to the hostfwd_remove command now. | ||
19 | |||
20 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
21 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 8 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
22 | --- | 9 | --- |
23 | hmp-commands.hx | 4 ++-- | 10 | hw/net/igb_core.c | 3 +-- |
24 | net/slirp.c | 33 +++++++++++++++++++++++---------- | 11 | 1 file changed, 1 insertion(+), 2 deletions(-) |
25 | 2 files changed, 25 insertions(+), 12 deletions(-) | ||
26 | 12 | ||
27 | diff --git a/hmp-commands.hx b/hmp-commands.hx | 13 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c |
28 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
29 | --- a/hmp-commands.hx | 15 | --- a/hw/net/igb_core.c |
30 | +++ b/hmp-commands.hx | 16 | +++ b/hw/net/igb_core.c |
31 | @@ -XXX,XX +XXX,XX @@ ETEXI | 17 | @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, |
32 | { | 18 | trace_e1000e_rx_metadata_l4_cso_disabled(); |
33 | .name = "hostfwd_add", | 19 | } |
34 | .args_type = "arg1:s,arg2:s?,arg3:s?", | 20 | |
35 | - .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport", | 21 | - trace_e1000e_rx_metadata_status_flags(*status_flags); |
36 | + .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport-[guestaddr]:guestport", | 22 | - |
37 | .help = "redirect TCP or UDP connections from host to guest (requires -net user)", | 23 | func_exit: |
38 | .cmd = hmp_hostfwd_add, | 24 | + trace_e1000e_rx_metadata_status_flags(*status_flags); |
39 | }, | 25 | *status_flags = cpu_to_le32(*status_flags); |
40 | @@ -XXX,XX +XXX,XX @@ ETEXI | ||
41 | { | ||
42 | .name = "hostfwd_remove", | ||
43 | .args_type = "arg1:s,arg2:s?,arg3:s?", | ||
44 | - .params = "[vlan_id name] [tcp|udp]:[hostaddr]:hostport", | ||
45 | + .params = "[hub_id name]|[netdev_id] [tcp|udp]:[hostaddr]:hostport", | ||
46 | .help = "remove host-to-guest TCP or UDP redirection", | ||
47 | .cmd = hmp_hostfwd_remove, | ||
48 | }, | ||
49 | diff --git a/net/slirp.c b/net/slirp.c | ||
50 | index XXXXXXX..XXXXXXX 100644 | ||
51 | --- a/net/slirp.c | ||
52 | +++ b/net/slirp.c | ||
53 | @@ -XXX,XX +XXX,XX @@ error: | ||
54 | return -1; | ||
55 | } | 26 | } |
56 | 27 | ||
57 | -static SlirpState *slirp_lookup(Monitor *mon, const char *vlan, | ||
58 | - const char *stack) | ||
59 | +static SlirpState *slirp_lookup(Monitor *mon, const char *hub_id, | ||
60 | + const char *name) | ||
61 | { | ||
62 | - | ||
63 | - if (vlan) { | ||
64 | + if (name) { | ||
65 | NetClientState *nc; | ||
66 | - nc = net_hub_find_client_by_name(strtol(vlan, NULL, 0), stack); | ||
67 | - if (!nc) { | ||
68 | - monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n"); | ||
69 | - return NULL; | ||
70 | + if (hub_id) { | ||
71 | + nc = net_hub_find_client_by_name(strtol(hub_id, NULL, 0), name); | ||
72 | + if (!nc) { | ||
73 | + monitor_printf(mon, "unrecognized (vlan-id, stackname) pair\n"); | ||
74 | + return NULL; | ||
75 | + } | ||
76 | + } else { | ||
77 | + nc = qemu_find_netdev(name); | ||
78 | + if (!nc) { | ||
79 | + monitor_printf(mon, "unrecognized netdev id '%s'\n", name); | ||
80 | + return NULL; | ||
81 | + } | ||
82 | } | ||
83 | if (strcmp(nc->model, "user")) { | ||
84 | monitor_printf(mon, "invalid device specified\n"); | ||
85 | @@ -XXX,XX +XXX,XX @@ void hmp_hostfwd_remove(Monitor *mon, const QDict *qdict) | ||
86 | const char *arg2 = qdict_get_try_str(qdict, "arg2"); | ||
87 | const char *arg3 = qdict_get_try_str(qdict, "arg3"); | ||
88 | |||
89 | - if (arg2) { | ||
90 | + if (arg3) { | ||
91 | s = slirp_lookup(mon, arg1, arg2); | ||
92 | src_str = arg3; | ||
93 | + } else if (arg2) { | ||
94 | + s = slirp_lookup(mon, NULL, arg1); | ||
95 | + src_str = arg2; | ||
96 | } else { | ||
97 | s = slirp_lookup(mon, NULL, NULL); | ||
98 | src_str = arg1; | ||
99 | @@ -XXX,XX +XXX,XX @@ void hmp_hostfwd_add(Monitor *mon, const QDict *qdict) | ||
100 | const char *arg2 = qdict_get_try_str(qdict, "arg2"); | ||
101 | const char *arg3 = qdict_get_try_str(qdict, "arg3"); | ||
102 | |||
103 | - if (arg2) { | ||
104 | + if (arg3) { | ||
105 | s = slirp_lookup(mon, arg1, arg2); | ||
106 | redir_str = arg3; | ||
107 | + } else if (arg2) { | ||
108 | + s = slirp_lookup(mon, NULL, arg1); | ||
109 | + redir_str = arg2; | ||
110 | } else { | ||
111 | s = slirp_lookup(mon, NULL, NULL); | ||
112 | redir_str = arg1; | ||
113 | -- | 28 | -- |
114 | 2.7.4 | 29 | 2.7.4 |
115 | 30 | ||
116 | 31 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | The goto is a bit confusing as it changes the control flow only if L4 | ||
4 | protocol is not recognized. It is also different from e1000e, and | ||
5 | noisy when comparing e1000e and igb. | ||
6 | |||
7 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
8 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
9 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
10 | --- | ||
11 | hw/net/igb_core.c | 2 +- | ||
12 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
13 | |||
14 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/hw/net/igb_core.c | ||
17 | +++ b/hw/net/igb_core.c | ||
18 | @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, | ||
19 | break; | ||
20 | |||
21 | default: | ||
22 | - goto func_exit; | ||
23 | + break; | ||
24 | } | ||
25 | } else { | ||
26 | trace_e1000e_rx_metadata_l4_cso_disabled(); | ||
27 | -- | ||
28 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Section 7.2.2.3 Advanced Transmit Data Descriptor says: | ||
4 | > For frames that spans multiple descriptors, all fields apart from | ||
5 | > DCMD.EOP, DCMD.RS, DCMD.DEXT, DTALEN, Address and DTYP are valid only | ||
6 | > in the first descriptors and are ignored in the subsequent ones. | ||
7 | |||
8 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
9 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
10 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
11 | --- | ||
12 | hw/net/igb_core.c | 2 +- | ||
13 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
14 | |||
15 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/hw/net/igb_core.c | ||
18 | +++ b/hw/net/igb_core.c | ||
19 | @@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core, | ||
20 | idx = (tx->first_olinfo_status >> 4) & 1; | ||
21 | igb_tx_insert_vlan(core, queue_index, tx, | ||
22 | tx->ctx[idx].vlan_macip_lens >> 16, | ||
23 | - !!(cmd_type_len & E1000_TXD_CMD_VLE)); | ||
24 | + !!(tx->first_cmd_type_len & E1000_TXD_CMD_VLE)); | ||
25 | |||
26 | if (igb_tx_pkt_send(core, tx, queue_index)) { | ||
27 | igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index); | ||
28 | -- | ||
29 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Keeping Tx packet state after the transmit queue is emptied has some | ||
4 | problems: | ||
5 | - The datasheet says the descriptors can be reused after the transmit | ||
6 | queue is emptied, but the Tx packet state may keep references to them. | ||
7 | - The Tx packet state cannot be migrated so it can be reset anytime the | ||
8 | migration happens. | ||
9 | |||
10 | Always reset Tx packet state always after the queue is emptied. | ||
11 | |||
12 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
13 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
14 | --- | ||
15 | hw/net/e1000e_core.c | 6 ++---- | ||
16 | 1 file changed, 2 insertions(+), 4 deletions(-) | ||
17 | |||
18 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
19 | index XXXXXXX..XXXXXXX 100644 | ||
20 | --- a/hw/net/e1000e_core.c | ||
21 | +++ b/hw/net/e1000e_core.c | ||
22 | @@ -XXX,XX +XXX,XX @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) | ||
23 | if (!ide || !e1000e_intrmgr_delay_tx_causes(core, &cause)) { | ||
24 | e1000e_set_interrupt_cause(core, cause); | ||
25 | } | ||
26 | + | ||
27 | + net_tx_pkt_reset(txr->tx->tx_pkt, net_tx_pkt_unmap_frag_pci, core->owner); | ||
28 | } | ||
29 | |||
30 | static bool | ||
31 | @@ -XXX,XX +XXX,XX @@ e1000e_core_pci_uninit(E1000ECore *core) | ||
32 | qemu_del_vm_change_state_handler(core->vmstate); | ||
33 | |||
34 | for (i = 0; i < E1000E_NUM_QUEUES; i++) { | ||
35 | - net_tx_pkt_reset(core->tx[i].tx_pkt, | ||
36 | - net_tx_pkt_unmap_frag_pci, core->owner); | ||
37 | net_tx_pkt_uninit(core->tx[i].tx_pkt); | ||
38 | } | ||
39 | |||
40 | @@ -XXX,XX +XXX,XX @@ static void e1000e_reset(E1000ECore *core, bool sw) | ||
41 | e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac); | ||
42 | |||
43 | for (i = 0; i < ARRAY_SIZE(core->tx); i++) { | ||
44 | - net_tx_pkt_reset(core->tx[i].tx_pkt, | ||
45 | - net_tx_pkt_unmap_frag_pci, core->owner); | ||
46 | memset(&core->tx[i].props, 0, sizeof(core->tx[i].props)); | ||
47 | core->tx[i].skip_cp = false; | ||
48 | } | ||
49 | -- | ||
50 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Keeping Tx packet state after the transmit queue is emptied but this | ||
4 | behavior is unreliable as the state can be reset anytime the migration | ||
5 | happens. | ||
6 | |||
7 | Always reset Tx packet state always after the queue is emptied. | ||
8 | |||
9 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
10 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
11 | --- | ||
12 | hw/net/vmxnet3.c | 3 ++- | ||
13 | 1 file changed, 2 insertions(+), 1 deletion(-) | ||
14 | |||
15 | diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/hw/net/vmxnet3.c | ||
18 | +++ b/hw/net/vmxnet3.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx) | ||
20 | net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s)); | ||
21 | } | ||
22 | } | ||
23 | + | ||
24 | + net_tx_pkt_reset(s->tx_pkt, net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s)); | ||
25 | } | ||
26 | |||
27 | static inline void | ||
28 | @@ -XXX,XX +XXX,XX @@ static void vmxnet3_deactivate_device(VMXNET3State *s) | ||
29 | { | ||
30 | if (s->device_active) { | ||
31 | VMW_CBPRN("Deactivating vmxnet3..."); | ||
32 | - net_tx_pkt_reset(s->tx_pkt, net_tx_pkt_unmap_frag_pci, PCI_DEVICE(s)); | ||
33 | net_tx_pkt_uninit(s->tx_pkt); | ||
34 | net_rx_pkt_uninit(s->rx_pkt); | ||
35 | s->device_active = false; | ||
36 | -- | ||
37 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
4 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
5 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
6 | --- | ||
7 | hw/net/igb_core.c | 4 ++-- | ||
8 | hw/net/igb_regs.h | 32 +++++++++++++++++++++++++++----- | ||
9 | 2 files changed, 29 insertions(+), 7 deletions(-) | ||
10 | |||
11 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/hw/net/igb_core.c | ||
14 | +++ b/hw/net/igb_core.c | ||
15 | @@ -XXX,XX +XXX,XX @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx) | ||
16 | { | ||
17 | if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) { | ||
18 | uint32_t idx = (tx->first_olinfo_status >> 4) & 1; | ||
19 | - uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16; | ||
20 | + uint32_t mss = tx->ctx[idx].mss_l4len_idx >> E1000_ADVTXD_MSS_SHIFT; | ||
21 | if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) { | ||
22 | return false; | ||
23 | } | ||
24 | @@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core, | ||
25 | if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) { | ||
26 | idx = (tx->first_olinfo_status >> 4) & 1; | ||
27 | igb_tx_insert_vlan(core, queue_index, tx, | ||
28 | - tx->ctx[idx].vlan_macip_lens >> 16, | ||
29 | + tx->ctx[idx].vlan_macip_lens >> IGB_TX_FLAGS_VLAN_SHIFT, | ||
30 | !!(tx->first_cmd_type_len & E1000_TXD_CMD_VLE)); | ||
31 | |||
32 | if (igb_tx_pkt_send(core, tx, queue_index)) { | ||
33 | diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/hw/net/igb_regs.h | ||
36 | +++ b/hw/net/igb_regs.h | ||
37 | @@ -XXX,XX +XXX,XX @@ union e1000_adv_tx_desc { | ||
38 | } wb; | ||
39 | }; | ||
40 | |||
41 | -#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ | ||
42 | -#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ | ||
43 | -#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor Extension (1=Adv) */ | ||
44 | -#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP/UDP Segmentation Enable */ | ||
45 | - | ||
46 | #define E1000_ADVTXD_POTS_IXSM 0x00000100 /* Insert TCP/UDP Checksum */ | ||
47 | #define E1000_ADVTXD_POTS_TXSM 0x00000200 /* Insert TCP/UDP Checksum */ | ||
48 | |||
49 | @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { | ||
50 | #define IGB_82576_VF_DEV_ID 0x10CA | ||
51 | #define IGB_I350_VF_DEV_ID 0x1520 | ||
52 | |||
53 | +/* VLAN info */ | ||
54 | +#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 | ||
55 | +#define IGB_TX_FLAGS_VLAN_SHIFT 16 | ||
56 | + | ||
57 | /* from igb/e1000_82575.h */ | ||
58 | |||
59 | #define E1000_MRQC_ENABLE_RSS_MQ 0x00000002 | ||
60 | @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { | ||
61 | #define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 | ||
62 | #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 | ||
63 | |||
64 | +/* Adv Transmit Descriptor Config Masks */ | ||
65 | +#define E1000_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp packet */ | ||
66 | +#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ | ||
67 | +#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ | ||
68 | +#define E1000_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */ | ||
69 | +#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ | ||
70 | +#define E1000_ADVTXD_DCMD_RS 0x08000000 /* Report Status */ | ||
71 | +#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ | ||
72 | +#define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ | ||
73 | +#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ | ||
74 | +#define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ | ||
75 | + | ||
76 | +#define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ | ||
77 | +#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ | ||
78 | +#define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ | ||
79 | +#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ | ||
80 | +#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 packet TYPE of SCTP */ | ||
81 | +/* IPSec Encrypt Enable for ESP */ | ||
82 | +#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ | ||
83 | +#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ | ||
84 | +/* Adv ctxt IPSec SA IDX mask */ | ||
85 | +/* Adv ctxt IPSec ESP len mask */ | ||
86 | + | ||
87 | /* Additional Transmit Descriptor Control definitions */ | ||
88 | #define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Tx Queue */ | ||
89 | |||
90 | -- | ||
91 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | The constants need to be consistent between the PF and VF. | ||
4 | |||
5 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
6 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
7 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | hw/net/igb.c | 10 +++++----- | ||
11 | hw/net/igb_common.h | 8 ++++++++ | ||
12 | hw/net/igbvf.c | 7 ------- | ||
13 | 3 files changed, 13 insertions(+), 12 deletions(-) | ||
14 | |||
15 | diff --git a/hw/net/igb.c b/hw/net/igb.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/hw/net/igb.c | ||
18 | +++ b/hw/net/igb.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp) | ||
20 | |||
21 | pcie_ari_init(pci_dev, 0x150, 1); | ||
22 | |||
23 | - pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, "igbvf", | ||
24 | + pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF, | ||
25 | IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS, | ||
26 | IGB_VF_OFFSET, IGB_VF_STRIDE); | ||
27 | |||
28 | - pcie_sriov_pf_init_vf_bar(pci_dev, 0, | ||
29 | + pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MMIO_BAR_IDX, | ||
30 | PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, | ||
31 | - 16 * KiB); | ||
32 | - pcie_sriov_pf_init_vf_bar(pci_dev, 3, | ||
33 | + IGBVF_MMIO_SIZE); | ||
34 | + pcie_sriov_pf_init_vf_bar(pci_dev, IGBVF_MSIX_BAR_IDX, | ||
35 | PCI_BASE_ADDRESS_MEM_TYPE_64 | PCI_BASE_ADDRESS_MEM_PREFETCH, | ||
36 | - 16 * KiB); | ||
37 | + IGBVF_MSIX_SIZE); | ||
38 | |||
39 | igb_init_net_peer(s, pci_dev, macaddr); | ||
40 | |||
41 | diff --git a/hw/net/igb_common.h b/hw/net/igb_common.h | ||
42 | index XXXXXXX..XXXXXXX 100644 | ||
43 | --- a/hw/net/igb_common.h | ||
44 | +++ b/hw/net/igb_common.h | ||
45 | @@ -XXX,XX +XXX,XX @@ | ||
46 | |||
47 | #include "igb_regs.h" | ||
48 | |||
49 | +#define TYPE_IGBVF "igbvf" | ||
50 | + | ||
51 | +#define IGBVF_MMIO_BAR_IDX (0) | ||
52 | +#define IGBVF_MSIX_BAR_IDX (3) | ||
53 | + | ||
54 | +#define IGBVF_MMIO_SIZE (16 * 1024) | ||
55 | +#define IGBVF_MSIX_SIZE (16 * 1024) | ||
56 | + | ||
57 | #define defreg(x) x = (E1000_##x >> 2) | ||
58 | #define defreg_indexed(x, i) x##i = (E1000_##x(i) >> 2) | ||
59 | #define defreg_indexeda(x, i) x##i##_A = (E1000_##x##_A(i) >> 2) | ||
60 | diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c | ||
61 | index XXXXXXX..XXXXXXX 100644 | ||
62 | --- a/hw/net/igbvf.c | ||
63 | +++ b/hw/net/igbvf.c | ||
64 | @@ -XXX,XX +XXX,XX @@ | ||
65 | #include "trace.h" | ||
66 | #include "qapi/error.h" | ||
67 | |||
68 | -#define TYPE_IGBVF "igbvf" | ||
69 | OBJECT_DECLARE_SIMPLE_TYPE(IgbVfState, IGBVF) | ||
70 | |||
71 | -#define IGBVF_MMIO_BAR_IDX (0) | ||
72 | -#define IGBVF_MSIX_BAR_IDX (3) | ||
73 | - | ||
74 | -#define IGBVF_MMIO_SIZE (16 * 1024) | ||
75 | -#define IGBVF_MSIX_SIZE (16 * 1024) | ||
76 | - | ||
77 | struct IgbVfState { | ||
78 | PCIDevice parent_obj; | ||
79 | |||
80 | -- | ||
81 | 2.7.4 | ||
82 | |||
83 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
4 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
5 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
6 | --- | ||
7 | hw/net/igb_core.c | 96 +++++++++++++++++++++++++++---------------------------- | ||
8 | 1 file changed, 48 insertions(+), 48 deletions(-) | ||
9 | |||
10 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/hw/net/igb_core.c | ||
13 | +++ b/hw/net/igb_core.c | ||
14 | @@ -XXX,XX +XXX,XX @@ static const uint32_t igb_mac_reg_init[] = { | ||
15 | [VMOLR0 ... VMOLR0 + 7] = 0x2600 | E1000_VMOLR_STRCRC, | ||
16 | [RPLOLR] = E1000_RPLOLR_STRCRC, | ||
17 | [RLPML] = 0x2600, | ||
18 | - [TXCTL0] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
19 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
20 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
21 | - [TXCTL1] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
22 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
23 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
24 | - [TXCTL2] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
25 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
26 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
27 | - [TXCTL3] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
28 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
29 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
30 | - [TXCTL4] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
31 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
32 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
33 | - [TXCTL5] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
34 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
35 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
36 | - [TXCTL6] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
37 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
38 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
39 | - [TXCTL7] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
40 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
41 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
42 | - [TXCTL8] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
43 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
44 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
45 | - [TXCTL9] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
46 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
47 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
48 | - [TXCTL10] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
49 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
50 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
51 | - [TXCTL11] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
52 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
53 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
54 | - [TXCTL12] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
55 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
56 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
57 | - [TXCTL13] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
58 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
59 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
60 | - [TXCTL14] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
61 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
62 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
63 | - [TXCTL15] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
64 | - E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
65 | - E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
66 | + [TXCTL0] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
67 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
68 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
69 | + [TXCTL1] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
70 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
71 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
72 | + [TXCTL2] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
73 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
74 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
75 | + [TXCTL3] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
76 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
77 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
78 | + [TXCTL4] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
79 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
80 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
81 | + [TXCTL5] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
82 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
83 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
84 | + [TXCTL6] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
85 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
86 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
87 | + [TXCTL7] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
88 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
89 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
90 | + [TXCTL8] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
91 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
92 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
93 | + [TXCTL9] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
94 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
95 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
96 | + [TXCTL10] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
97 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
98 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
99 | + [TXCTL11] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
100 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
101 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
102 | + [TXCTL12] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
103 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
104 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
105 | + [TXCTL13] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
106 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
107 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
108 | + [TXCTL14] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
109 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
110 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
111 | + [TXCTL15] = E1000_DCA_TXCTRL_DATA_RRO_EN | | ||
112 | + E1000_DCA_TXCTRL_TX_WB_RO_EN | | ||
113 | + E1000_DCA_TXCTRL_DESC_RRO_EN, | ||
114 | }; | ||
115 | |||
116 | static void igb_reset(IGBCore *core, bool sw) | ||
117 | -- | ||
118 | 2.7.4 | ||
119 | |||
120 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Section 7.3.4.1 says: | ||
4 | > When auto-clear is enabled for an interrupt cause, the EICR bit is | ||
5 | > set when a cause event mapped to this vector occurs. When the EITR | ||
6 | > Counter reaches zero, the MSI-X message is sent on PCIe. Then the | ||
7 | > EICR bit is cleared and enabled to be set by a new cause event | ||
8 | |||
9 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
10 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
11 | --- | ||
12 | hw/net/igb_core.c | 21 ++++++++++++--------- | ||
13 | 1 file changed, 12 insertions(+), 9 deletions(-) | ||
14 | |||
15 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/hw/net/igb_core.c | ||
18 | +++ b/hw/net/igb_core.c | ||
19 | @@ -XXX,XX +XXX,XX @@ igb_lower_legacy_irq(IGBCore *core) | ||
20 | pci_set_irq(core->owner, 0); | ||
21 | } | ||
22 | |||
23 | -static void igb_msix_notify(IGBCore *core, unsigned int vector) | ||
24 | +static void igb_msix_notify(IGBCore *core, unsigned int cause) | ||
25 | { | ||
26 | PCIDevice *dev = core->owner; | ||
27 | uint16_t vfn; | ||
28 | + uint32_t effective_eiac; | ||
29 | + unsigned int vector; | ||
30 | |||
31 | - vfn = 8 - (vector + 2) / IGBVF_MSIX_VEC_NUM; | ||
32 | + vfn = 8 - (cause + 2) / IGBVF_MSIX_VEC_NUM; | ||
33 | if (vfn < pcie_sriov_num_vfs(core->owner)) { | ||
34 | dev = pcie_sriov_get_vf_at_index(core->owner, vfn); | ||
35 | assert(dev); | ||
36 | - vector = (vector + 2) % IGBVF_MSIX_VEC_NUM; | ||
37 | - } else if (vector >= IGB_MSIX_VEC_NUM) { | ||
38 | + vector = (cause + 2) % IGBVF_MSIX_VEC_NUM; | ||
39 | + } else if (cause >= IGB_MSIX_VEC_NUM) { | ||
40 | qemu_log_mask(LOG_GUEST_ERROR, | ||
41 | "igb: Tried to use vector unavailable for PF"); | ||
42 | return; | ||
43 | + } else { | ||
44 | + vector = cause; | ||
45 | } | ||
46 | |||
47 | msix_notify(dev, vector); | ||
48 | + | ||
49 | + trace_e1000e_irq_icr_clear_eiac(core->mac[EICR], core->mac[EIAC]); | ||
50 | + effective_eiac = core->mac[EIAC] & BIT(cause); | ||
51 | + core->mac[EICR] &= ~effective_eiac; | ||
52 | } | ||
53 | |||
54 | static inline void | ||
55 | @@ -XXX,XX +XXX,XX @@ igb_eitr_should_postpone(IGBCore *core, int idx) | ||
56 | static void igb_send_msix(IGBCore *core) | ||
57 | { | ||
58 | uint32_t causes = core->mac[EICR] & core->mac[EIMS]; | ||
59 | - uint32_t effective_eiac; | ||
60 | int vector; | ||
61 | |||
62 | for (vector = 0; vector < IGB_INTR_NUM; ++vector) { | ||
63 | @@ -XXX,XX +XXX,XX @@ static void igb_send_msix(IGBCore *core) | ||
64 | |||
65 | trace_e1000e_irq_msix_notify_vec(vector); | ||
66 | igb_msix_notify(core, vector); | ||
67 | - | ||
68 | - trace_e1000e_irq_icr_clear_eiac(core->mac[EICR], core->mac[EIAC]); | ||
69 | - effective_eiac = core->mac[EIAC] & BIT(vector); | ||
70 | - core->mac[EICR] &= ~effective_eiac; | ||
71 | } | ||
72 | } | ||
73 | } | ||
74 | -- | ||
75 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Rename variable "n" to "causes", which properly represents the content | ||
4 | of the variable. | ||
5 | |||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
8 | --- | ||
9 | hw/net/e1000e_core.c | 18 +++++++++--------- | ||
10 | 1 file changed, 9 insertions(+), 9 deletions(-) | ||
11 | |||
12 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
13 | index XXXXXXX..XXXXXXX 100644 | ||
14 | --- a/hw/net/e1000e_core.c | ||
15 | +++ b/hw/net/e1000e_core.c | ||
16 | @@ -XXX,XX +XXX,XX @@ static ssize_t | ||
17 | e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt, | ||
18 | bool has_vnet) | ||
19 | { | ||
20 | - uint32_t n = 0; | ||
21 | + uint32_t causes = 0; | ||
22 | uint8_t buf[ETH_ZLEN]; | ||
23 | struct iovec min_iov; | ||
24 | size_t size, orig_size; | ||
25 | @@ -XXX,XX +XXX,XX @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt, | ||
26 | |||
27 | /* Perform small receive detection (RSRPD) */ | ||
28 | if (total_size < core->mac[RSRPD]) { | ||
29 | - n |= E1000_ICS_SRPD; | ||
30 | + causes |= E1000_ICS_SRPD; | ||
31 | } | ||
32 | |||
33 | /* Perform ACK receive detection */ | ||
34 | if (!(core->mac[RFCTL] & E1000_RFCTL_ACK_DIS) && | ||
35 | (e1000e_is_tcp_ack(core, core->rx_pkt))) { | ||
36 | - n |= E1000_ICS_ACK; | ||
37 | + causes |= E1000_ICS_ACK; | ||
38 | } | ||
39 | |||
40 | /* Check if receive descriptor minimum threshold hit */ | ||
41 | rdmts_hit = e1000e_rx_descr_threshold_hit(core, rxr.i); | ||
42 | - n |= e1000e_rx_wb_interrupt_cause(core, rxr.i->idx, rdmts_hit); | ||
43 | + causes |= e1000e_rx_wb_interrupt_cause(core, rxr.i->idx, rdmts_hit); | ||
44 | |||
45 | trace_e1000e_rx_written_to_guest(rxr.i->idx); | ||
46 | } else { | ||
47 | - n |= E1000_ICS_RXO; | ||
48 | + causes |= E1000_ICS_RXO; | ||
49 | retval = 0; | ||
50 | |||
51 | trace_e1000e_rx_not_written_to_guest(rxr.i->idx); | ||
52 | } | ||
53 | |||
54 | - if (!e1000e_intrmgr_delay_rx_causes(core, &n)) { | ||
55 | - trace_e1000e_rx_interrupt_set(n); | ||
56 | - e1000e_set_interrupt_cause(core, n); | ||
57 | + if (!e1000e_intrmgr_delay_rx_causes(core, &causes)) { | ||
58 | + trace_e1000e_rx_interrupt_set(causes); | ||
59 | + e1000e_set_interrupt_cause(core, causes); | ||
60 | } else { | ||
61 | - trace_e1000e_rx_interrupt_delayed(n); | ||
62 | + trace_e1000e_rx_interrupt_delayed(causes); | ||
63 | } | ||
64 | |||
65 | return retval; | ||
66 | -- | ||
67 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Rename variable "n" to "causes", which properly represents the content | ||
4 | of the variable. | ||
5 | |||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
7 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | hw/net/igb_core.c | 12 ++++++------ | ||
11 | 1 file changed, 6 insertions(+), 6 deletions(-) | ||
12 | |||
13 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/hw/net/igb_core.c | ||
16 | +++ b/hw/net/igb_core.c | ||
17 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
18 | bool has_vnet, bool *external_tx) | ||
19 | { | ||
20 | uint16_t queues = 0; | ||
21 | - uint32_t n = 0; | ||
22 | + uint32_t causes = 0; | ||
23 | union { | ||
24 | L2Header l2_header; | ||
25 | uint8_t octets[ETH_ZLEN]; | ||
26 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
27 | e1000x_fcs_len(core->mac); | ||
28 | |||
29 | if (!igb_has_rxbufs(core, rxr.i, total_size)) { | ||
30 | - n |= E1000_ICS_RXO; | ||
31 | + causes |= E1000_ICS_RXO; | ||
32 | trace_e1000e_rx_not_written_to_guest(rxr.i->idx); | ||
33 | continue; | ||
34 | } | ||
35 | |||
36 | - n |= E1000_ICR_RXDW; | ||
37 | + causes |= E1000_ICR_RXDW; | ||
38 | |||
39 | igb_rx_fix_l4_csum(core, core->rx_pkt); | ||
40 | igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info); | ||
41 | |||
42 | /* Check if receive descriptor minimum threshold hit */ | ||
43 | if (igb_rx_descr_threshold_hit(core, rxr.i)) { | ||
44 | - n |= E1000_ICS_RXDMT0; | ||
45 | + causes |= E1000_ICS_RXDMT0; | ||
46 | } | ||
47 | |||
48 | core->mac[EICR] |= igb_rx_wb_eic(core, rxr.i->idx); | ||
49 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
50 | trace_e1000e_rx_written_to_guest(rxr.i->idx); | ||
51 | } | ||
52 | |||
53 | - trace_e1000e_rx_interrupt_set(n); | ||
54 | - igb_set_interrupt_cause(core, n); | ||
55 | + trace_e1000e_rx_interrupt_set(causes); | ||
56 | + igb_set_interrupt_cause(core, causes); | ||
57 | |||
58 | return orig_size; | ||
59 | } | ||
60 | -- | ||
61 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | The uses of uint8_t pointers were misleading as they are never accessed | ||
4 | as an array of octets and it even require more strict alignment to | ||
5 | access as struct eth_header. | ||
6 | |||
7 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
10 | --- | ||
11 | include/net/eth.h | 4 ++-- | ||
12 | net/eth.c | 6 +++--- | ||
13 | 2 files changed, 5 insertions(+), 5 deletions(-) | ||
14 | |||
15 | diff --git a/include/net/eth.h b/include/net/eth.h | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/include/net/eth.h | ||
18 | +++ b/include/net/eth.h | ||
19 | @@ -XXX,XX +XXX,XX @@ eth_get_pkt_tci(const void *p) | ||
20 | |||
21 | size_t | ||
22 | eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, | ||
23 | - uint8_t *new_ehdr_buf, | ||
24 | + void *new_ehdr_buf, | ||
25 | uint16_t *payload_offset, uint16_t *tci); | ||
26 | |||
27 | size_t | ||
28 | eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, | ||
29 | - uint16_t vet, uint8_t *new_ehdr_buf, | ||
30 | + uint16_t vet, void *new_ehdr_buf, | ||
31 | uint16_t *payload_offset, uint16_t *tci); | ||
32 | |||
33 | uint16_t | ||
34 | diff --git a/net/eth.c b/net/eth.c | ||
35 | index XXXXXXX..XXXXXXX 100644 | ||
36 | --- a/net/eth.c | ||
37 | +++ b/net/eth.c | ||
38 | @@ -XXX,XX +XXX,XX @@ void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff, | ||
39 | |||
40 | size_t | ||
41 | eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, | ||
42 | - uint8_t *new_ehdr_buf, | ||
43 | + void *new_ehdr_buf, | ||
44 | uint16_t *payload_offset, uint16_t *tci) | ||
45 | { | ||
46 | struct vlan_header vlan_hdr; | ||
47 | - struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; | ||
48 | + struct eth_header *new_ehdr = new_ehdr_buf; | ||
49 | |||
50 | size_t copied = iov_to_buf(iov, iovcnt, iovoff, | ||
51 | new_ehdr, sizeof(*new_ehdr)); | ||
52 | @@ -XXX,XX +XXX,XX @@ eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, | ||
53 | |||
54 | size_t | ||
55 | eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, | ||
56 | - uint16_t vet, uint8_t *new_ehdr_buf, | ||
57 | + uint16_t vet, void *new_ehdr_buf, | ||
58 | uint16_t *payload_offset, uint16_t *tci) | ||
59 | { | ||
60 | struct vlan_header vlan_hdr; | ||
61 | -- | ||
62 | 2.7.4 | ||
63 | |||
64 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | It is possible to have another VLAN tag even if the packet is already | ||
4 | tagged. | ||
5 | |||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
8 | --- | ||
9 | hw/net/net_tx_pkt.c | 16 +++++++--------- | ||
10 | include/net/eth.h | 4 ++-- | ||
11 | net/eth.c | 22 ++++++---------------- | ||
12 | 3 files changed, 15 insertions(+), 27 deletions(-) | ||
13 | |||
14 | diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/hw/net/net_tx_pkt.c | ||
17 | +++ b/hw/net/net_tx_pkt.c | ||
18 | @@ -XXX,XX +XXX,XX @@ struct NetTxPkt { | ||
19 | |||
20 | struct iovec *vec; | ||
21 | |||
22 | - uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN]; | ||
23 | + struct { | ||
24 | + struct eth_header eth; | ||
25 | + struct vlan_header vlan[3]; | ||
26 | + } l2_hdr; | ||
27 | union { | ||
28 | struct ip_header ip; | ||
29 | struct ip6_header ip6; | ||
30 | @@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_build_vheader(struct NetTxPkt *pkt, bool tso_enable, | ||
31 | void net_tx_pkt_setup_vlan_header_ex(struct NetTxPkt *pkt, | ||
32 | uint16_t vlan, uint16_t vlan_ethtype) | ||
33 | { | ||
34 | - bool is_new; | ||
35 | assert(pkt); | ||
36 | |||
37 | eth_setup_vlan_headers(pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_base, | ||
38 | - vlan, vlan_ethtype, &is_new); | ||
39 | + &pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len, | ||
40 | + vlan, vlan_ethtype); | ||
41 | |||
42 | - /* update l2hdrlen */ | ||
43 | - if (is_new) { | ||
44 | - pkt->hdr_len += sizeof(struct vlan_header); | ||
45 | - pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len += | ||
46 | - sizeof(struct vlan_header); | ||
47 | - } | ||
48 | + pkt->hdr_len += sizeof(struct vlan_header); | ||
49 | } | ||
50 | |||
51 | bool net_tx_pkt_add_raw_fragment(struct NetTxPkt *pkt, void *base, size_t len) | ||
52 | diff --git a/include/net/eth.h b/include/net/eth.h | ||
53 | index XXXXXXX..XXXXXXX 100644 | ||
54 | --- a/include/net/eth.h | ||
55 | +++ b/include/net/eth.h | ||
56 | @@ -XXX,XX +XXX,XX @@ eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, | ||
57 | uint16_t | ||
58 | eth_get_l3_proto(const struct iovec *l2hdr_iov, int iovcnt, size_t l2hdr_len); | ||
59 | |||
60 | -void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, | ||
61 | - uint16_t vlan_ethtype, bool *is_new); | ||
62 | +void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size, | ||
63 | + uint16_t vlan_tag, uint16_t vlan_ethtype); | ||
64 | |||
65 | |||
66 | uint8_t eth_get_gso_type(uint16_t l3_proto, uint8_t *l3_hdr, uint8_t l4proto); | ||
67 | diff --git a/net/eth.c b/net/eth.c | ||
68 | index XXXXXXX..XXXXXXX 100644 | ||
69 | --- a/net/eth.c | ||
70 | +++ b/net/eth.c | ||
71 | @@ -XXX,XX +XXX,XX @@ | ||
72 | #include "net/checksum.h" | ||
73 | #include "net/tap.h" | ||
74 | |||
75 | -void eth_setup_vlan_headers(struct eth_header *ehdr, uint16_t vlan_tag, | ||
76 | - uint16_t vlan_ethtype, bool *is_new) | ||
77 | +void eth_setup_vlan_headers(struct eth_header *ehdr, size_t *ehdr_size, | ||
78 | + uint16_t vlan_tag, uint16_t vlan_ethtype) | ||
79 | { | ||
80 | struct vlan_header *vhdr = PKT_GET_VLAN_HDR(ehdr); | ||
81 | |||
82 | - switch (be16_to_cpu(ehdr->h_proto)) { | ||
83 | - case ETH_P_VLAN: | ||
84 | - case ETH_P_DVLAN: | ||
85 | - /* vlan hdr exists */ | ||
86 | - *is_new = false; | ||
87 | - break; | ||
88 | - | ||
89 | - default: | ||
90 | - /* No VLAN header, put a new one */ | ||
91 | - vhdr->h_proto = ehdr->h_proto; | ||
92 | - ehdr->h_proto = cpu_to_be16(vlan_ethtype); | ||
93 | - *is_new = true; | ||
94 | - break; | ||
95 | - } | ||
96 | + memmove(vhdr + 1, vhdr, *ehdr_size - ETH_HLEN); | ||
97 | vhdr->h_tci = cpu_to_be16(vlan_tag); | ||
98 | + vhdr->h_proto = ehdr->h_proto; | ||
99 | + ehdr->h_proto = cpu_to_be16(vlan_ethtype); | ||
100 | + *ehdr_size += sizeof(*vhdr); | ||
101 | } | ||
102 | |||
103 | uint8_t | ||
104 | -- | ||
105 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | eth_strip_vlan and eth_strip_vlan_ex refers to ehdr_buf as struct | ||
4 | eth_header. Enforce alignment for the structure. | ||
5 | |||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
7 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | hw/net/net_rx_pkt.c | 11 +++++++---- | ||
11 | 1 file changed, 7 insertions(+), 4 deletions(-) | ||
12 | |||
13 | diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/hw/net/net_rx_pkt.c | ||
16 | +++ b/hw/net/net_rx_pkt.c | ||
17 | @@ -XXX,XX +XXX,XX @@ | ||
18 | |||
19 | struct NetRxPkt { | ||
20 | struct virtio_net_hdr virt_hdr; | ||
21 | - uint8_t ehdr_buf[sizeof(struct eth_header) + sizeof(struct vlan_header)]; | ||
22 | + struct { | ||
23 | + struct eth_header eth; | ||
24 | + struct vlan_header vlan; | ||
25 | + } ehdr_buf; | ||
26 | struct iovec *vec; | ||
27 | uint16_t vec_len_total; | ||
28 | uint16_t vec_len; | ||
29 | @@ -XXX,XX +XXX,XX @@ net_rx_pkt_pull_data(struct NetRxPkt *pkt, | ||
30 | if (pkt->ehdr_buf_len) { | ||
31 | net_rx_pkt_iovec_realloc(pkt, iovcnt + 1); | ||
32 | |||
33 | - pkt->vec[0].iov_base = pkt->ehdr_buf; | ||
34 | + pkt->vec[0].iov_base = &pkt->ehdr_buf; | ||
35 | pkt->vec[0].iov_len = pkt->ehdr_buf_len; | ||
36 | |||
37 | pkt->tot_len = pllen + pkt->ehdr_buf_len; | ||
38 | @@ -XXX,XX +XXX,XX @@ void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, | ||
39 | assert(pkt); | ||
40 | |||
41 | if (strip_vlan) { | ||
42 | - pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, pkt->ehdr_buf, | ||
43 | + pkt->ehdr_buf_len = eth_strip_vlan(iov, iovcnt, iovoff, &pkt->ehdr_buf, | ||
44 | &ploff, &tci); | ||
45 | } else { | ||
46 | pkt->ehdr_buf_len = 0; | ||
47 | @@ -XXX,XX +XXX,XX @@ void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, | ||
48 | |||
49 | if (strip_vlan) { | ||
50 | pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet, | ||
51 | - pkt->ehdr_buf, | ||
52 | + &pkt->ehdr_buf, | ||
53 | &ploff, &tci); | ||
54 | } else { | ||
55 | pkt->ehdr_buf_len = 0; | ||
56 | -- | ||
57 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | GPIE.Multiple_MSIX is not set by default, and needs to be set to get | ||
4 | interrupts from multiple MSI-X vectors. | ||
5 | |||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
7 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | tests/qtest/libqos/igb.c | 1 + | ||
11 | 1 file changed, 1 insertion(+) | ||
12 | |||
13 | diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/tests/qtest/libqos/igb.c | ||
16 | +++ b/tests/qtest/libqos/igb.c | ||
17 | @@ -XXX,XX +XXX,XX @@ static void igb_pci_start_hw(QOSGraphObject *obj) | ||
18 | e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN); | ||
19 | |||
20 | /* Enable all interrupts */ | ||
21 | + e1000e_macreg_write(&d->e1000e, E1000_GPIE, E1000_GPIE_MSIX_MODE); | ||
22 | e1000e_macreg_write(&d->e1000e, E1000_IMS, 0xFFFFFFFF); | ||
23 | e1000e_macreg_write(&d->e1000e, E1000_EIMS, 0xFFFFFFFF); | ||
24 | |||
25 | -- | ||
26 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
4 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
5 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
6 | --- | ||
7 | hw/net/igb_core.c | 9 +++++++-- | ||
8 | 1 file changed, 7 insertions(+), 2 deletions(-) | ||
9 | |||
10 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
11 | index XXXXXXX..XXXXXXX 100644 | ||
12 | --- a/hw/net/igb_core.c | ||
13 | +++ b/hw/net/igb_core.c | ||
14 | @@ -XXX,XX +XXX,XX @@ igb_update_interrupt_state(IGBCore *core) | ||
15 | |||
16 | icr = core->mac[ICR] & core->mac[IMS]; | ||
17 | |||
18 | - if (msix_enabled(core->owner)) { | ||
19 | + if (core->mac[GPIE] & E1000_GPIE_MSIX_MODE) { | ||
20 | if (icr) { | ||
21 | causes = 0; | ||
22 | if (icr & E1000_ICR_DRSTA) { | ||
23 | @@ -XXX,XX +XXX,XX @@ igb_update_interrupt_state(IGBCore *core) | ||
24 | trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS], | ||
25 | core->mac[ICR], core->mac[IMS]); | ||
26 | |||
27 | - if (msi_enabled(core->owner)) { | ||
28 | + if (msix_enabled(core->owner)) { | ||
29 | + if (icr) { | ||
30 | + trace_e1000e_irq_msix_notify_vec(0); | ||
31 | + msix_notify(core->owner, 0); | ||
32 | + } | ||
33 | + } else if (msi_enabled(core->owner)) { | ||
34 | if (icr) { | ||
35 | msi_notify(core->owner, 0); | ||
36 | } | ||
37 | -- | ||
38 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | e1000e does not support using UDP for RSS hash, but igb does. | ||
4 | |||
5 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
6 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
8 | --- | ||
9 | hw/net/igb_core.c | 16 ++++++++++++++++ | ||
10 | hw/net/igb_regs.h | 3 +++ | ||
11 | 2 files changed, 19 insertions(+) | ||
12 | |||
13 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/hw/net/igb_core.c | ||
16 | +++ b/hw/net/igb_core.c | ||
17 | @@ -XXX,XX +XXX,XX @@ igb_rss_get_hash_type(IGBCore *core, struct NetRxPkt *pkt) | ||
18 | return E1000_MRQ_RSS_TYPE_IPV4TCP; | ||
19 | } | ||
20 | |||
21 | + if (l4hdr_proto == ETH_L4_HDR_PROTO_UDP && | ||
22 | + (core->mac[MRQC] & E1000_MRQC_RSS_FIELD_IPV4_UDP)) { | ||
23 | + return E1000_MRQ_RSS_TYPE_IPV4UDP; | ||
24 | + } | ||
25 | + | ||
26 | if (E1000_MRQC_EN_IPV4(core->mac[MRQC])) { | ||
27 | return E1000_MRQ_RSS_TYPE_IPV4; | ||
28 | } | ||
29 | @@ -XXX,XX +XXX,XX @@ igb_rss_get_hash_type(IGBCore *core, struct NetRxPkt *pkt) | ||
30 | return E1000_MRQ_RSS_TYPE_IPV6TCPEX; | ||
31 | } | ||
32 | |||
33 | + if (l4hdr_proto == ETH_L4_HDR_PROTO_UDP && | ||
34 | + (core->mac[MRQC] & E1000_MRQC_RSS_FIELD_IPV6_UDP)) { | ||
35 | + return E1000_MRQ_RSS_TYPE_IPV6UDP; | ||
36 | + } | ||
37 | + | ||
38 | if (E1000_MRQC_EN_IPV6EX(core->mac[MRQC])) { | ||
39 | return E1000_MRQ_RSS_TYPE_IPV6EX; | ||
40 | } | ||
41 | @@ -XXX,XX +XXX,XX @@ igb_rss_calc_hash(IGBCore *core, struct NetRxPkt *pkt, E1000E_RSSInfo *info) | ||
42 | case E1000_MRQ_RSS_TYPE_IPV6EX: | ||
43 | type = NetPktRssIpV6Ex; | ||
44 | break; | ||
45 | + case E1000_MRQ_RSS_TYPE_IPV4UDP: | ||
46 | + type = NetPktRssIpV4Udp; | ||
47 | + break; | ||
48 | + case E1000_MRQ_RSS_TYPE_IPV6UDP: | ||
49 | + type = NetPktRssIpV6Udp; | ||
50 | + break; | ||
51 | default: | ||
52 | assert(false); | ||
53 | return 0; | ||
54 | diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h | ||
55 | index XXXXXXX..XXXXXXX 100644 | ||
56 | --- a/hw/net/igb_regs.h | ||
57 | +++ b/hw/net/igb_regs.h | ||
58 | @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { | ||
59 | |||
60 | #define E1000_RSS_QUEUE(reta, hash) (E1000_RETA_VAL(reta, hash) & 0x0F) | ||
61 | |||
62 | +#define E1000_MRQ_RSS_TYPE_IPV4UDP 7 | ||
63 | +#define E1000_MRQ_RSS_TYPE_IPV6UDP 8 | ||
64 | + | ||
65 | #define E1000_STATUS_IOV_MODE 0x00040000 | ||
66 | |||
67 | #define E1000_STATUS_NUM_VFS_SHIFT 14 | ||
68 | -- | ||
69 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
4 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
5 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
6 | --- | ||
7 | hw/net/e1000e_core.c | 5 ++++ | ||
8 | hw/net/igb_core.c | 15 +++++++++++- | ||
9 | hw/net/igb_regs.h | 1 + | ||
10 | hw/net/net_rx_pkt.c | 64 ++++++++++++++++++++++++++++++++++++++++++--------- | ||
11 | include/net/eth.h | 4 +++- | ||
12 | include/qemu/crc32c.h | 1 + | ||
13 | net/eth.c | 4 ++++ | ||
14 | util/crc32c.c | 8 +++++++ | ||
15 | 8 files changed, 89 insertions(+), 13 deletions(-) | ||
16 | |||
17 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
18 | index XXXXXXX..XXXXXXX 100644 | ||
19 | --- a/hw/net/e1000e_core.c | ||
20 | +++ b/hw/net/e1000e_core.c | ||
21 | @@ -XXX,XX +XXX,XX @@ e1000e_verify_csum_in_sw(E1000ECore *core, | ||
22 | return; | ||
23 | } | ||
24 | |||
25 | + if (l4hdr_proto != ETH_L4_HDR_PROTO_TCP && | ||
26 | + l4hdr_proto != ETH_L4_HDR_PROTO_UDP) { | ||
27 | + return; | ||
28 | + } | ||
29 | + | ||
30 | if (!net_rx_pkt_validate_l4_csum(pkt, &csum_valid)) { | ||
31 | trace_e1000e_rx_metadata_l4_csum_validation_failed(); | ||
32 | return; | ||
33 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
34 | index XXXXXXX..XXXXXXX 100644 | ||
35 | --- a/hw/net/igb_core.c | ||
36 | +++ b/hw/net/igb_core.c | ||
37 | @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, | ||
38 | uint16_t *vlan_tag) | ||
39 | { | ||
40 | struct virtio_net_hdr *vhdr; | ||
41 | - bool hasip4, hasip6; | ||
42 | + bool hasip4, hasip6, csum_valid; | ||
43 | EthL4HdrProto l4hdr_proto; | ||
44 | |||
45 | *status_flags = E1000_RXD_STAT_DD; | ||
46 | @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, | ||
47 | *pkt_info |= E1000_ADVRXD_PKT_UDP; | ||
48 | break; | ||
49 | |||
50 | + case ETH_L4_HDR_PROTO_SCTP: | ||
51 | + *pkt_info |= E1000_ADVRXD_PKT_SCTP; | ||
52 | + break; | ||
53 | + | ||
54 | default: | ||
55 | break; | ||
56 | } | ||
57 | @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, | ||
58 | |||
59 | if (igb_rx_l4_cso_enabled(core)) { | ||
60 | switch (l4hdr_proto) { | ||
61 | + case ETH_L4_HDR_PROTO_SCTP: | ||
62 | + if (!net_rx_pkt_validate_l4_csum(pkt, &csum_valid)) { | ||
63 | + trace_e1000e_rx_metadata_l4_csum_validation_failed(); | ||
64 | + goto func_exit; | ||
65 | + } | ||
66 | + if (!csum_valid) { | ||
67 | + *status_flags |= E1000_RXDEXT_STATERR_TCPE; | ||
68 | + } | ||
69 | + /* fall through */ | ||
70 | case ETH_L4_HDR_PROTO_TCP: | ||
71 | *status_flags |= E1000_RXD_STAT_TCPCS; | ||
72 | break; | ||
73 | diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h | ||
74 | index XXXXXXX..XXXXXXX 100644 | ||
75 | --- a/hw/net/igb_regs.h | ||
76 | +++ b/hw/net/igb_regs.h | ||
77 | @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { | ||
78 | #define E1000_ADVRXD_PKT_IP6 BIT(6) | ||
79 | #define E1000_ADVRXD_PKT_TCP BIT(8) | ||
80 | #define E1000_ADVRXD_PKT_UDP BIT(9) | ||
81 | +#define E1000_ADVRXD_PKT_SCTP BIT(10) | ||
82 | |||
83 | static inline uint8_t igb_ivar_entry_rx(uint8_t i) | ||
84 | { | ||
85 | diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c | ||
86 | index XXXXXXX..XXXXXXX 100644 | ||
87 | --- a/hw/net/net_rx_pkt.c | ||
88 | +++ b/hw/net/net_rx_pkt.c | ||
89 | @@ -XXX,XX +XXX,XX @@ | ||
90 | */ | ||
91 | |||
92 | #include "qemu/osdep.h" | ||
93 | +#include "qemu/crc32c.h" | ||
94 | #include "trace.h" | ||
95 | #include "net_rx_pkt.h" | ||
96 | #include "net/checksum.h" | ||
97 | @@ -XXX,XX +XXX,XX @@ _net_rx_pkt_calc_l4_csum(struct NetRxPkt *pkt) | ||
98 | return csum; | ||
99 | } | ||
100 | |||
101 | -bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid) | ||
102 | +static bool | ||
103 | +_net_rx_pkt_validate_sctp_sum(struct NetRxPkt *pkt) | ||
104 | { | ||
105 | - uint16_t csum; | ||
106 | + size_t csum_off; | ||
107 | + size_t off = pkt->l4hdr_off; | ||
108 | + size_t vec_len = pkt->vec_len; | ||
109 | + struct iovec *vec; | ||
110 | + uint32_t calculated = 0; | ||
111 | + uint32_t original; | ||
112 | + bool valid; | ||
113 | |||
114 | - trace_net_rx_pkt_l4_csum_validate_entry(); | ||
115 | + for (vec = pkt->vec; vec->iov_len < off; vec++) { | ||
116 | + off -= vec->iov_len; | ||
117 | + vec_len--; | ||
118 | + } | ||
119 | |||
120 | - if (pkt->l4hdr_info.proto != ETH_L4_HDR_PROTO_TCP && | ||
121 | - pkt->l4hdr_info.proto != ETH_L4_HDR_PROTO_UDP) { | ||
122 | - trace_net_rx_pkt_l4_csum_validate_not_xxp(); | ||
123 | + csum_off = off + 8; | ||
124 | + | ||
125 | + if (!iov_to_buf(vec, vec_len, csum_off, &original, sizeof(original))) { | ||
126 | return false; | ||
127 | } | ||
128 | |||
129 | - if (pkt->l4hdr_info.proto == ETH_L4_HDR_PROTO_UDP && | ||
130 | - pkt->l4hdr_info.hdr.udp.uh_sum == 0) { | ||
131 | - trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum(); | ||
132 | + if (!iov_from_buf(vec, vec_len, csum_off, | ||
133 | + &calculated, sizeof(calculated))) { | ||
134 | return false; | ||
135 | } | ||
136 | |||
137 | + calculated = crc32c(0xffffffff, | ||
138 | + (uint8_t *)vec->iov_base + off, vec->iov_len - off); | ||
139 | + calculated = iov_crc32c(calculated ^ 0xffffffff, vec + 1, vec_len - 1); | ||
140 | + valid = calculated == le32_to_cpu(original); | ||
141 | + iov_from_buf(vec, vec_len, csum_off, &original, sizeof(original)); | ||
142 | + | ||
143 | + return valid; | ||
144 | +} | ||
145 | + | ||
146 | +bool net_rx_pkt_validate_l4_csum(struct NetRxPkt *pkt, bool *csum_valid) | ||
147 | +{ | ||
148 | + uint32_t csum; | ||
149 | + | ||
150 | + trace_net_rx_pkt_l4_csum_validate_entry(); | ||
151 | + | ||
152 | if (pkt->hasip4 && pkt->ip4hdr_info.fragment) { | ||
153 | trace_net_rx_pkt_l4_csum_validate_ip4_fragment(); | ||
154 | return false; | ||
155 | } | ||
156 | |||
157 | - csum = _net_rx_pkt_calc_l4_csum(pkt); | ||
158 | + switch (pkt->l4hdr_info.proto) { | ||
159 | + case ETH_L4_HDR_PROTO_UDP: | ||
160 | + if (pkt->l4hdr_info.hdr.udp.uh_sum == 0) { | ||
161 | + trace_net_rx_pkt_l4_csum_validate_udp_with_no_checksum(); | ||
162 | + return false; | ||
163 | + } | ||
164 | + /* fall through */ | ||
165 | + case ETH_L4_HDR_PROTO_TCP: | ||
166 | + csum = _net_rx_pkt_calc_l4_csum(pkt); | ||
167 | + *csum_valid = ((csum == 0) || (csum == 0xFFFF)); | ||
168 | + break; | ||
169 | + | ||
170 | + case ETH_L4_HDR_PROTO_SCTP: | ||
171 | + *csum_valid = _net_rx_pkt_validate_sctp_sum(pkt); | ||
172 | + break; | ||
173 | |||
174 | - *csum_valid = ((csum == 0) || (csum == 0xFFFF)); | ||
175 | + default: | ||
176 | + trace_net_rx_pkt_l4_csum_validate_not_xxp(); | ||
177 | + return false; | ||
178 | + } | ||
179 | |||
180 | trace_net_rx_pkt_l4_csum_validate_csum(*csum_valid); | ||
181 | |||
182 | diff --git a/include/net/eth.h b/include/net/eth.h | ||
183 | index XXXXXXX..XXXXXXX 100644 | ||
184 | --- a/include/net/eth.h | ||
185 | +++ b/include/net/eth.h | ||
186 | @@ -XXX,XX +XXX,XX @@ struct tcp_hdr { | ||
187 | #define IP_HEADER_VERSION_6 (6) | ||
188 | #define IP_PROTO_TCP (6) | ||
189 | #define IP_PROTO_UDP (17) | ||
190 | +#define IP_PROTO_SCTP (132) | ||
191 | #define IPTOS_ECN_MASK 0x03 | ||
192 | #define IPTOS_ECN(x) ((x) & IPTOS_ECN_MASK) | ||
193 | #define IPTOS_ECN_CE 0x03 | ||
194 | @@ -XXX,XX +XXX,XX @@ typedef struct eth_ip4_hdr_info_st { | ||
195 | typedef enum EthL4HdrProto { | ||
196 | ETH_L4_HDR_PROTO_INVALID, | ||
197 | ETH_L4_HDR_PROTO_TCP, | ||
198 | - ETH_L4_HDR_PROTO_UDP | ||
199 | + ETH_L4_HDR_PROTO_UDP, | ||
200 | + ETH_L4_HDR_PROTO_SCTP | ||
201 | } EthL4HdrProto; | ||
202 | |||
203 | typedef struct eth_l4_hdr_info_st { | ||
204 | diff --git a/include/qemu/crc32c.h b/include/qemu/crc32c.h | ||
205 | index XXXXXXX..XXXXXXX 100644 | ||
206 | --- a/include/qemu/crc32c.h | ||
207 | +++ b/include/qemu/crc32c.h | ||
208 | @@ -XXX,XX +XXX,XX @@ | ||
209 | |||
210 | |||
211 | uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length); | ||
212 | +uint32_t iov_crc32c(uint32_t crc, const struct iovec *iov, size_t iov_cnt); | ||
213 | |||
214 | #endif | ||
215 | diff --git a/net/eth.c b/net/eth.c | ||
216 | index XXXXXXX..XXXXXXX 100644 | ||
217 | --- a/net/eth.c | ||
218 | +++ b/net/eth.c | ||
219 | @@ -XXX,XX +XXX,XX @@ void eth_get_protocols(const struct iovec *iov, size_t iovcnt, size_t iovoff, | ||
220 | *l5hdr_off = *l4hdr_off + sizeof(l4hdr_info->hdr.udp); | ||
221 | } | ||
222 | break; | ||
223 | + | ||
224 | + case IP_PROTO_SCTP: | ||
225 | + l4hdr_info->proto = ETH_L4_HDR_PROTO_SCTP; | ||
226 | + break; | ||
227 | } | ||
228 | } | ||
229 | |||
230 | diff --git a/util/crc32c.c b/util/crc32c.c | ||
231 | index XXXXXXX..XXXXXXX 100644 | ||
232 | --- a/util/crc32c.c | ||
233 | +++ b/util/crc32c.c | ||
234 | @@ -XXX,XX +XXX,XX @@ uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length) | ||
235 | return crc^0xffffffff; | ||
236 | } | ||
237 | |||
238 | +uint32_t iov_crc32c(uint32_t crc, const struct iovec *iov, size_t iov_cnt) | ||
239 | +{ | ||
240 | + while (iov_cnt--) { | ||
241 | + crc = crc32c(crc, iov->iov_base, iov->iov_len) ^ 0xffffffff; | ||
242 | + iov++; | ||
243 | + } | ||
244 | + return crc ^ 0xffffffff; | ||
245 | +} | ||
246 | -- | ||
247 | 2.7.4 | diff view generated by jsdifflib |
1 | From: Thomas Huth <thuth@redhat.com> | 1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> |
---|---|---|---|
2 | 2 | ||
3 | QEMU can emulate hubs to connect NICs and netdevs. This is currently | 3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> |
4 | primarily used for the mis-named 'vlan' feature of the networking | 4 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> |
5 | subsystem. Now the 'vlan' feature has been marked as deprecated, since | ||
6 | its name is rather confusing and the users often rather mis-configure | ||
7 | their network when trying to use it. But while the 'vlan' parameter | ||
8 | should be removed at one point in time, the basic idea of emulating | ||
9 | a hub in QEMU is still good: It's useful for bundling up the output of | ||
10 | multiple NICs into one single l2tp netdev for example. | ||
11 | |||
12 | Now to be able to use the hubport feature without 'vlan's, there is one | ||
13 | missing piece: The possibility to connect a hubport to a netdev, too. | ||
14 | This patch adds this possibility by introducing a new "netdev=..." | ||
15 | parameter to the hubports. | ||
16 | |||
17 | To bundle up the output of multiple NICs into one socket netdev, you can | ||
18 | now run QEMU with these parameters for example: | ||
19 | |||
20 | qemu-system-ppc64 ... -netdev socket,id=s1,connect=:11122 \ | ||
21 | -netdev hubport,hubid=1,id=h1,netdev=s1 \ | ||
22 | -netdev hubport,hubid=1,id=h2 -device e1000,netdev=h2 \ | ||
23 | -netdev hubport,hubid=1,id=h3 -device virtio-net-pci,netdev=h3 | ||
24 | |||
25 | For using the socket netdev, you have got to start another QEMU as the | ||
26 | receiving side first, for example with network dumping enabled: | ||
27 | |||
28 | qemu-system-x86_64 -M isapc -netdev socket,id=s0,listen=:11122 \ | ||
29 | -device ne2k_isa,netdev=s0 \ | ||
30 | -object filter-dump,id=f1,netdev=s0,file=/tmp/dump.dat | ||
31 | |||
32 | After the ppc64 guest tried to boot from both NICs, you can see in the | ||
33 | dump file (using Wireshark, for example), that the output of both NICs | ||
34 | (the e1000 and the virtio-net-pci) has been successfully transfered | ||
35 | via the socket netdev in this case. | ||
36 | |||
37 | Suggested-by: Paolo Bonzini <pbonzini@redhat.com> | ||
38 | Signed-off-by: Thomas Huth <thuth@redhat.com> | ||
39 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 5 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
40 | --- | 6 | --- |
41 | net/hub.c | 27 +++++++++++++++++++++------ | 7 | hw/net/igb_core.c | 12 +++++++----- |
42 | net/hub.h | 3 ++- | 8 | hw/net/net_tx_pkt.c | 18 ++++++++++++++++++ |
43 | net/net.c | 2 +- | 9 | hw/net/net_tx_pkt.h | 8 ++++++++ |
44 | qapi/net.json | 4 +++- | 10 | 3 files changed, 33 insertions(+), 5 deletions(-) |
45 | qemu-options.hx | 8 +++++--- | ||
46 | 5 files changed, 32 insertions(+), 12 deletions(-) | ||
47 | 11 | ||
48 | diff --git a/net/hub.c b/net/hub.c | 12 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c |
49 | index XXXXXXX..XXXXXXX 100644 | 13 | index XXXXXXX..XXXXXXX 100644 |
50 | --- a/net/hub.c | 14 | --- a/hw/net/igb_core.c |
51 | +++ b/net/hub.c | 15 | +++ b/hw/net/igb_core.c |
16 | @@ -XXX,XX +XXX,XX @@ igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx, | ||
17 | static bool | ||
18 | igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx) | ||
19 | { | ||
20 | + uint32_t idx = (tx->first_olinfo_status >> 4) & 1; | ||
21 | + | ||
22 | if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) { | ||
23 | - uint32_t idx = (tx->first_olinfo_status >> 4) & 1; | ||
24 | uint32_t mss = tx->ctx[idx].mss_l4len_idx >> E1000_ADVTXD_MSS_SHIFT; | ||
25 | if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) { | ||
26 | return false; | ||
27 | @@ -XXX,XX +XXX,XX @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx) | ||
28 | return true; | ||
29 | } | ||
30 | |||
31 | - if (tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) { | ||
32 | - if (!net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0)) { | ||
33 | - return false; | ||
34 | - } | ||
35 | + if ((tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) && | ||
36 | + !((tx->ctx[idx].type_tucmd_mlhl & E1000_ADVTXD_TUCMD_L4T_SCTP) ? | ||
37 | + net_tx_pkt_update_sctp_checksum(tx->tx_pkt) : | ||
38 | + net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0))) { | ||
39 | + return false; | ||
40 | } | ||
41 | |||
42 | if (tx->first_olinfo_status & E1000_ADVTXD_POTS_IXSM) { | ||
43 | diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c | ||
44 | index XXXXXXX..XXXXXXX 100644 | ||
45 | --- a/hw/net/net_tx_pkt.c | ||
46 | +++ b/hw/net/net_tx_pkt.c | ||
52 | @@ -XXX,XX +XXX,XX @@ | 47 | @@ -XXX,XX +XXX,XX @@ |
53 | */ | 48 | */ |
54 | 49 | ||
55 | #include "qemu/osdep.h" | 50 | #include "qemu/osdep.h" |
56 | +#include "qapi/error.h" | 51 | +#include "qemu/crc32c.h" |
57 | #include "monitor/monitor.h" | 52 | #include "net/eth.h" |
58 | #include "net/net.h" | 53 | #include "net/checksum.h" |
59 | #include "clients.h" | 54 | #include "net/tap.h" |
60 | @@ -XXX,XX +XXX,XX @@ static NetClientInfo net_hub_port_info = { | 55 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt) |
61 | .cleanup = net_hub_port_cleanup, | 56 | pkt->virt_hdr.csum_offset, &csum, sizeof(csum)); |
62 | }; | ||
63 | |||
64 | -static NetHubPort *net_hub_port_new(NetHub *hub, const char *name) | ||
65 | +static NetHubPort *net_hub_port_new(NetHub *hub, const char *name, | ||
66 | + NetClientState *hubpeer) | ||
67 | { | ||
68 | NetClientState *nc; | ||
69 | NetHubPort *port; | ||
70 | @@ -XXX,XX +XXX,XX @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name) | ||
71 | name = default_name; | ||
72 | } | ||
73 | |||
74 | - nc = qemu_new_net_client(&net_hub_port_info, NULL, "hub", name); | ||
75 | + nc = qemu_new_net_client(&net_hub_port_info, hubpeer, "hub", name); | ||
76 | port = DO_UPCAST(NetHubPort, nc, nc); | ||
77 | port->id = id; | ||
78 | port->hub = hub; | ||
79 | @@ -XXX,XX +XXX,XX @@ static NetHubPort *net_hub_port_new(NetHub *hub, const char *name) | ||
80 | |||
81 | /** | ||
82 | * Create a port on a given hub | ||
83 | + * @hub_id: Number of the hub | ||
84 | * @name: Net client name or NULL for default name. | ||
85 | + * @hubpeer: Peer to use (if "netdev=id" has been specified) | ||
86 | * | ||
87 | * If there is no existing hub with the given id then a new hub is created. | ||
88 | */ | ||
89 | -NetClientState *net_hub_add_port(int hub_id, const char *name) | ||
90 | +NetClientState *net_hub_add_port(int hub_id, const char *name, | ||
91 | + NetClientState *hubpeer) | ||
92 | { | ||
93 | NetHub *hub; | ||
94 | NetHubPort *port; | ||
95 | @@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_add_port(int hub_id, const char *name) | ||
96 | hub = net_hub_new(hub_id); | ||
97 | } | ||
98 | |||
99 | - port = net_hub_port_new(hub, name); | ||
100 | + port = net_hub_port_new(hub, name, hubpeer); | ||
101 | return &port->nc; | ||
102 | } | 57 | } |
103 | 58 | ||
104 | @@ -XXX,XX +XXX,XX @@ NetClientState *net_hub_port_find(int hub_id) | 59 | +bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt) |
105 | } | 60 | +{ |
106 | } | 61 | + uint32_t csum = 0; |
107 | 62 | + struct iovec *pl_start_frag = pkt->vec + NET_TX_PKT_PL_START_FRAG; | |
108 | - nc = net_hub_add_port(hub_id, NULL); | 63 | + |
109 | + nc = net_hub_add_port(hub_id, NULL, NULL); | 64 | + if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) { |
110 | return nc; | 65 | + return false; |
111 | } | ||
112 | |||
113 | @@ -XXX,XX +XXX,XX @@ int net_init_hubport(const Netdev *netdev, const char *name, | ||
114 | NetClientState *peer, Error **errp) | ||
115 | { | ||
116 | const NetdevHubPortOptions *hubport; | ||
117 | + NetClientState *hubpeer = NULL; | ||
118 | |||
119 | assert(netdev->type == NET_CLIENT_DRIVER_HUBPORT); | ||
120 | assert(!peer); | ||
121 | hubport = &netdev->u.hubport; | ||
122 | |||
123 | - net_hub_add_port(hubport->hubid, name); | ||
124 | + if (hubport->has_netdev) { | ||
125 | + hubpeer = qemu_find_netdev(hubport->netdev); | ||
126 | + if (!hubpeer) { | ||
127 | + error_setg(errp, "netdev '%s' not found", hubport->netdev); | ||
128 | + return -1; | ||
129 | + } | ||
130 | + } | 66 | + } |
131 | + | 67 | + |
132 | + net_hub_add_port(hubport->hubid, name, hubpeer); | 68 | + csum = cpu_to_le32(iov_crc32c(0xffffffff, pl_start_frag, pkt->payload_frags)); |
69 | + if (iov_from_buf(pl_start_frag, pkt->payload_frags, 8, &csum, sizeof(csum)) < sizeof(csum)) { | ||
70 | + return false; | ||
71 | + } | ||
133 | + | 72 | + |
134 | return 0; | 73 | + return true; |
135 | } | 74 | +} |
136 | 75 | + | |
137 | diff --git a/net/hub.h b/net/hub.h | 76 | static void net_tx_pkt_calculate_hdr_len(struct NetTxPkt *pkt) |
77 | { | ||
78 | pkt->hdr_len = pkt->vec[NET_TX_PKT_L2HDR_FRAG].iov_len + | ||
79 | diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h | ||
138 | index XXXXXXX..XXXXXXX 100644 | 80 | index XXXXXXX..XXXXXXX 100644 |
139 | --- a/net/hub.h | 81 | --- a/hw/net/net_tx_pkt.h |
140 | +++ b/net/hub.h | 82 | +++ b/hw/net/net_tx_pkt.h |
141 | @@ -XXX,XX +XXX,XX @@ | 83 | @@ -XXX,XX +XXX,XX @@ void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt); |
142 | 84 | void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt); | |
143 | #include "qemu-common.h" | 85 | |
144 | 86 | /** | |
145 | -NetClientState *net_hub_add_port(int hub_id, const char *name); | 87 | + * Calculate the SCTP checksum. |
146 | +NetClientState *net_hub_add_port(int hub_id, const char *name, | 88 | + * |
147 | + NetClientState *hubpeer); | 89 | + * @pkt: packet |
148 | NetClientState *net_hub_find_client_by_name(int hub_id, const char *name); | 90 | + * |
149 | void net_hub_info(Monitor *mon); | 91 | + */ |
150 | void net_hub_check_clients(void); | 92 | +bool net_tx_pkt_update_sctp_checksum(struct NetTxPkt *pkt); |
151 | diff --git a/net/net.c b/net/net.c | 93 | + |
152 | index XXXXXXX..XXXXXXX 100644 | 94 | +/** |
153 | --- a/net/net.c | 95 | * get length of all populated data. |
154 | +++ b/net/net.c | 96 | * |
155 | @@ -XXX,XX +XXX,XX @@ static int net_client_init1(const void *object, bool is_netdev, Error **errp) | 97 | * @pkt: packet |
156 | /* Do not add to a vlan if it's a nic with a netdev= parameter. */ | ||
157 | if (netdev->type != NET_CLIENT_DRIVER_NIC || | ||
158 | !opts->u.nic.has_netdev) { | ||
159 | - peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL); | ||
160 | + peer = net_hub_add_port(net->has_vlan ? net->vlan : 0, NULL, NULL); | ||
161 | } | ||
162 | |||
163 | if (net->has_vlan && !vlan_warned) { | ||
164 | diff --git a/qapi/net.json b/qapi/net.json | ||
165 | index XXXXXXX..XXXXXXX 100644 | ||
166 | --- a/qapi/net.json | ||
167 | +++ b/qapi/net.json | ||
168 | @@ -XXX,XX +XXX,XX @@ | ||
169 | # Connect two or more net clients through a software hub. | ||
170 | # | ||
171 | # @hubid: hub identifier number | ||
172 | +# @netdev: used to connect hub to a netdev instead of a device (since 2.12) | ||
173 | # | ||
174 | # Since: 1.2 | ||
175 | ## | ||
176 | { 'struct': 'NetdevHubPortOptions', | ||
177 | 'data': { | ||
178 | - 'hubid': 'int32' } } | ||
179 | + 'hubid': 'int32', | ||
180 | + '*netdev': 'str' } } | ||
181 | |||
182 | ## | ||
183 | # @NetdevNetmapOptions: | ||
184 | diff --git a/qemu-options.hx b/qemu-options.hx | ||
185 | index XXXXXXX..XXXXXXX 100644 | ||
186 | --- a/qemu-options.hx | ||
187 | +++ b/qemu-options.hx | ||
188 | @@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, | ||
189 | #endif | ||
190 | "-netdev vhost-user,id=str,chardev=dev[,vhostforce=on|off]\n" | ||
191 | " configure a vhost-user network, backed by a chardev 'dev'\n" | ||
192 | - "-netdev hubport,id=str,hubid=n\n" | ||
193 | + "-netdev hubport,id=str,hubid=n[,netdev=nd]\n" | ||
194 | " configure a hub port on QEMU VLAN 'n'\n", QEMU_ARCH_ALL) | ||
195 | DEF("net", HAS_ARG, QEMU_OPTION_net, | ||
196 | "-net nic[,vlan=n][,netdev=nd][,macaddr=mac][,model=type][,name=str][,addr=str][,vectors=v]\n" | ||
197 | @@ -XXX,XX +XXX,XX @@ vde_switch -F -sock /tmp/myswitch | ||
198 | qemu-system-i386 linux.img -net nic -net vde,sock=/tmp/myswitch | ||
199 | @end example | ||
200 | |||
201 | -@item -netdev hubport,id=@var{id},hubid=@var{hubid} | ||
202 | +@item -netdev hubport,id=@var{id},hubid=@var{hubid}[,netdev=@var{nd}] | ||
203 | |||
204 | Create a hub port on QEMU "vlan" @var{hubid}. | ||
205 | |||
206 | The hubport netdev lets you connect a NIC to a QEMU "vlan" instead of a single | ||
207 | netdev. @code{-net} and @code{-device} with parameter @option{vlan} create the | ||
208 | -required hub automatically. | ||
209 | +required hub automatically. Alternatively, you can also connect the hubport | ||
210 | +to another netdev with ID @var{nd} by using the @option{netdev=@var{nd}} | ||
211 | +option. | ||
212 | |||
213 | @item -netdev vhost-user,chardev=@var{id}[,vhostforce=on|off][,queues=n] | ||
214 | |||
215 | -- | 98 | -- |
216 | 2.7.4 | 99 | 2.7.4 |
217 | |||
218 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
4 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
5 | --- | ||
6 | hw/net/e1000e_core.c | 3 ++- | ||
7 | hw/net/igb_core.c | 14 ++++++++++++-- | ||
8 | hw/net/net_rx_pkt.c | 15 ++++++--------- | ||
9 | hw/net/net_rx_pkt.h | 19 ++++++++++--------- | ||
10 | include/net/eth.h | 4 ++-- | ||
11 | net/eth.c | 52 +++++++++++++++++++++++++++++++++------------------- | ||
12 | 6 files changed, 65 insertions(+), 42 deletions(-) | ||
13 | |||
14 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/hw/net/e1000e_core.c | ||
17 | +++ b/hw/net/e1000e_core.c | ||
18 | @@ -XXX,XX +XXX,XX @@ e1000e_receive_internal(E1000ECore *core, const struct iovec *iov, int iovcnt, | ||
19 | } | ||
20 | |||
21 | net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, | ||
22 | - e1000x_vlan_enabled(core->mac), core->mac[VET]); | ||
23 | + e1000x_vlan_enabled(core->mac) ? 0 : -1, | ||
24 | + core->mac[VET], 0); | ||
25 | |||
26 | e1000e_rss_parse_packet(core, core->rx_pkt, &rss_info); | ||
27 | e1000e_rx_ring_init(core, &rxr, rss_info.queue); | ||
28 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
29 | index XXXXXXX..XXXXXXX 100644 | ||
30 | --- a/hw/net/igb_core.c | ||
31 | +++ b/hw/net/igb_core.c | ||
32 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
33 | E1000E_RxRing rxr; | ||
34 | E1000E_RSSInfo rss_info; | ||
35 | size_t total_size; | ||
36 | + int strip_vlan_index; | ||
37 | int i; | ||
38 | |||
39 | trace_e1000e_rx_receive_iov(iovcnt); | ||
40 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
41 | |||
42 | igb_rx_ring_init(core, &rxr, i); | ||
43 | |||
44 | + if (!igb_rx_strip_vlan(core, rxr.i)) { | ||
45 | + strip_vlan_index = -1; | ||
46 | + } else if (core->mac[CTRL_EXT] & BIT(26)) { | ||
47 | + strip_vlan_index = 1; | ||
48 | + } else { | ||
49 | + strip_vlan_index = 0; | ||
50 | + } | ||
51 | + | ||
52 | net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs, | ||
53 | - igb_rx_strip_vlan(core, rxr.i), | ||
54 | - core->mac[VET] & 0xffff); | ||
55 | + strip_vlan_index, | ||
56 | + core->mac[VET] & 0xffff, | ||
57 | + core->mac[VET] >> 16); | ||
58 | |||
59 | total_size = net_rx_pkt_get_total_len(core->rx_pkt) + | ||
60 | e1000x_fcs_len(core->mac); | ||
61 | diff --git a/hw/net/net_rx_pkt.c b/hw/net/net_rx_pkt.c | ||
62 | index XXXXXXX..XXXXXXX 100644 | ||
63 | --- a/hw/net/net_rx_pkt.c | ||
64 | +++ b/hw/net/net_rx_pkt.c | ||
65 | @@ -XXX,XX +XXX,XX @@ void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, | ||
66 | |||
67 | void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, | ||
68 | const struct iovec *iov, int iovcnt, | ||
69 | - size_t iovoff, bool strip_vlan, | ||
70 | - uint16_t vet) | ||
71 | + size_t iovoff, int strip_vlan_index, | ||
72 | + uint16_t vet, uint16_t vet_ext) | ||
73 | { | ||
74 | uint16_t tci = 0; | ||
75 | uint16_t ploff = iovoff; | ||
76 | assert(pkt); | ||
77 | |||
78 | - if (strip_vlan) { | ||
79 | - pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, vet, | ||
80 | - &pkt->ehdr_buf, | ||
81 | - &ploff, &tci); | ||
82 | - } else { | ||
83 | - pkt->ehdr_buf_len = 0; | ||
84 | - } | ||
85 | + pkt->ehdr_buf_len = eth_strip_vlan_ex(iov, iovcnt, iovoff, | ||
86 | + strip_vlan_index, vet, vet_ext, | ||
87 | + &pkt->ehdr_buf, | ||
88 | + &ploff, &tci); | ||
89 | |||
90 | pkt->tci = tci; | ||
91 | |||
92 | diff --git a/hw/net/net_rx_pkt.h b/hw/net/net_rx_pkt.h | ||
93 | index XXXXXXX..XXXXXXX 100644 | ||
94 | --- a/hw/net/net_rx_pkt.h | ||
95 | +++ b/hw/net/net_rx_pkt.h | ||
96 | @@ -XXX,XX +XXX,XX @@ void net_rx_pkt_attach_iovec(struct NetRxPkt *pkt, | ||
97 | /** | ||
98 | * attach scatter-gather data to rx packet | ||
99 | * | ||
100 | -* @pkt: packet | ||
101 | -* @iov: received data scatter-gather list | ||
102 | -* @iovcnt number of elements in iov | ||
103 | -* @iovoff data start offset in the iov | ||
104 | -* @strip_vlan: should the module strip vlan from data | ||
105 | -* @vet: VLAN tag Ethernet type | ||
106 | +* @pkt: packet | ||
107 | +* @iov: received data scatter-gather list | ||
108 | +* @iovcnt: number of elements in iov | ||
109 | +* @iovoff: data start offset in the iov | ||
110 | +* @strip_vlan_index: index of Q tag if it is to be stripped. negative otherwise. | ||
111 | +* @vet: VLAN tag Ethernet type | ||
112 | +* @vet_ext: outer VLAN tag Ethernet type | ||
113 | * | ||
114 | */ | ||
115 | void net_rx_pkt_attach_iovec_ex(struct NetRxPkt *pkt, | ||
116 | - const struct iovec *iov, int iovcnt, | ||
117 | - size_t iovoff, bool strip_vlan, | ||
118 | - uint16_t vet); | ||
119 | + const struct iovec *iov, int iovcnt, | ||
120 | + size_t iovoff, int strip_vlan_index, | ||
121 | + uint16_t vet, uint16_t vet_ext); | ||
122 | |||
123 | /** | ||
124 | * attach data to rx packet | ||
125 | diff --git a/include/net/eth.h b/include/net/eth.h | ||
126 | index XXXXXXX..XXXXXXX 100644 | ||
127 | --- a/include/net/eth.h | ||
128 | +++ b/include/net/eth.h | ||
129 | @@ -XXX,XX +XXX,XX @@ eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, | ||
130 | uint16_t *payload_offset, uint16_t *tci); | ||
131 | |||
132 | size_t | ||
133 | -eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, | ||
134 | - uint16_t vet, void *new_ehdr_buf, | ||
135 | +eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, int index, | ||
136 | + uint16_t vet, uint16_t vet_ext, void *new_ehdr_buf, | ||
137 | uint16_t *payload_offset, uint16_t *tci); | ||
138 | |||
139 | uint16_t | ||
140 | diff --git a/net/eth.c b/net/eth.c | ||
141 | index XXXXXXX..XXXXXXX 100644 | ||
142 | --- a/net/eth.c | ||
143 | +++ b/net/eth.c | ||
144 | @@ -XXX,XX +XXX,XX @@ eth_strip_vlan(const struct iovec *iov, int iovcnt, size_t iovoff, | ||
145 | } | ||
146 | |||
147 | size_t | ||
148 | -eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, | ||
149 | - uint16_t vet, void *new_ehdr_buf, | ||
150 | +eth_strip_vlan_ex(const struct iovec *iov, int iovcnt, size_t iovoff, int index, | ||
151 | + uint16_t vet, uint16_t vet_ext, void *new_ehdr_buf, | ||
152 | uint16_t *payload_offset, uint16_t *tci) | ||
153 | { | ||
154 | struct vlan_header vlan_hdr; | ||
155 | - struct eth_header *new_ehdr = (struct eth_header *) new_ehdr_buf; | ||
156 | - | ||
157 | - size_t copied = iov_to_buf(iov, iovcnt, iovoff, | ||
158 | - new_ehdr, sizeof(*new_ehdr)); | ||
159 | - | ||
160 | - if (copied < sizeof(*new_ehdr)) { | ||
161 | - return 0; | ||
162 | - } | ||
163 | + uint16_t *new_ehdr_proto; | ||
164 | + size_t new_ehdr_size; | ||
165 | + size_t copied; | ||
166 | |||
167 | - if (be16_to_cpu(new_ehdr->h_proto) == vet) { | ||
168 | - copied = iov_to_buf(iov, iovcnt, iovoff + sizeof(*new_ehdr), | ||
169 | - &vlan_hdr, sizeof(vlan_hdr)); | ||
170 | + switch (index) { | ||
171 | + case 0: | ||
172 | + new_ehdr_proto = &PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto; | ||
173 | + new_ehdr_size = sizeof(struct eth_header); | ||
174 | + copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size); | ||
175 | + break; | ||
176 | |||
177 | - if (copied < sizeof(vlan_hdr)) { | ||
178 | + case 1: | ||
179 | + new_ehdr_proto = &PKT_GET_VLAN_HDR(new_ehdr_buf)->h_proto; | ||
180 | + new_ehdr_size = sizeof(struct eth_header) + sizeof(struct vlan_header); | ||
181 | + copied = iov_to_buf(iov, iovcnt, iovoff, new_ehdr_buf, new_ehdr_size); | ||
182 | + if (be16_to_cpu(PKT_GET_ETH_HDR(new_ehdr_buf)->h_proto) != vet_ext) { | ||
183 | return 0; | ||
184 | } | ||
185 | + break; | ||
186 | |||
187 | - new_ehdr->h_proto = vlan_hdr.h_proto; | ||
188 | + default: | ||
189 | + return 0; | ||
190 | + } | ||
191 | |||
192 | - *tci = be16_to_cpu(vlan_hdr.h_tci); | ||
193 | - *payload_offset = iovoff + sizeof(*new_ehdr) + sizeof(vlan_hdr); | ||
194 | - return sizeof(struct eth_header); | ||
195 | + if (copied < new_ehdr_size || be16_to_cpu(*new_ehdr_proto) != vet) { | ||
196 | + return 0; | ||
197 | + } | ||
198 | + | ||
199 | + copied = iov_to_buf(iov, iovcnt, iovoff + new_ehdr_size, | ||
200 | + &vlan_hdr, sizeof(vlan_hdr)); | ||
201 | + if (copied < sizeof(vlan_hdr)) { | ||
202 | + return 0; | ||
203 | } | ||
204 | |||
205 | - return 0; | ||
206 | + *new_ehdr_proto = vlan_hdr.h_proto; | ||
207 | + *payload_offset = iovoff + new_ehdr_size + sizeof(vlan_hdr); | ||
208 | + *tci = be16_to_cpu(vlan_hdr.h_tci); | ||
209 | + | ||
210 | + return new_ehdr_size; | ||
211 | } | ||
212 | |||
213 | void | ||
214 | -- | ||
215 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
4 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
5 | --- | ||
6 | hw/net/igb_core.c | 23 ++++++++++++++++++----- | ||
7 | 1 file changed, 18 insertions(+), 5 deletions(-) | ||
8 | |||
9 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/hw/net/igb_core.c | ||
12 | +++ b/hw/net/igb_core.c | ||
13 | @@ -XXX,XX +XXX,XX @@ typedef struct IGBTxPktVmdqCallbackContext { | ||
14 | |||
15 | typedef struct L2Header { | ||
16 | struct eth_header eth; | ||
17 | - struct vlan_header vlan; | ||
18 | + struct vlan_header vlan[2]; | ||
19 | } L2Header; | ||
20 | |||
21 | static ssize_t | ||
22 | @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, | ||
23 | uint32_t f, ra[2], *macp, rctl = core->mac[RCTL]; | ||
24 | uint16_t queues = 0; | ||
25 | uint16_t oversized = 0; | ||
26 | - uint16_t vid = be16_to_cpu(l2_header->vlan.h_tci) & VLAN_VID_MASK; | ||
27 | + size_t vlan_num = 0; | ||
28 | int i; | ||
29 | |||
30 | memset(rss_info, 0, sizeof(E1000E_RSSInfo)); | ||
31 | @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, | ||
32 | *external_tx = true; | ||
33 | } | ||
34 | |||
35 | - if (e1000x_is_vlan_packet(ehdr, core->mac[VET] & 0xffff) && | ||
36 | - !e1000x_rx_vlan_filter(core->mac, PKT_GET_VLAN_HDR(ehdr))) { | ||
37 | + if (core->mac[CTRL_EXT] & BIT(26)) { | ||
38 | + if (be16_to_cpu(ehdr->h_proto) == core->mac[VET] >> 16 && | ||
39 | + be16_to_cpu(l2_header->vlan[0].h_proto) == (core->mac[VET] & 0xffff)) { | ||
40 | + vlan_num = 2; | ||
41 | + } | ||
42 | + } else { | ||
43 | + if (be16_to_cpu(ehdr->h_proto) == (core->mac[VET] & 0xffff)) { | ||
44 | + vlan_num = 1; | ||
45 | + } | ||
46 | + } | ||
47 | + | ||
48 | + if (vlan_num && | ||
49 | + !e1000x_rx_vlan_filter(core->mac, l2_header->vlan + vlan_num - 1)) { | ||
50 | return queues; | ||
51 | } | ||
52 | |||
53 | @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, | ||
54 | if (e1000x_vlan_rx_filter_enabled(core->mac)) { | ||
55 | uint16_t mask = 0; | ||
56 | |||
57 | - if (e1000x_is_vlan_packet(ehdr, core->mac[VET] & 0xffff)) { | ||
58 | + if (vlan_num) { | ||
59 | + uint16_t vid = be16_to_cpu(l2_header->vlan[vlan_num - 1].h_tci) & VLAN_VID_MASK; | ||
60 | + | ||
61 | for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { | ||
62 | if ((core->mac[VLVF0 + i] & E1000_VLVF_VLANID_MASK) == vid && | ||
63 | (core->mac[VLVF0 + i] & E1000_VLVF_VLANID_ENABLE)) { | ||
64 | -- | ||
65 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | igb has a configurable size limit for LPE, and uses different limits | ||
4 | depending on whether the packet is treated as a VLAN packet. | ||
5 | |||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
7 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
8 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
9 | --- | ||
10 | hw/net/igb_core.c | 36 +++++++++++++++++++++--------------- | ||
11 | 1 file changed, 21 insertions(+), 15 deletions(-) | ||
12 | |||
13 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
14 | index XXXXXXX..XXXXXXX 100644 | ||
15 | --- a/hw/net/igb_core.c | ||
16 | +++ b/hw/net/igb_core.c | ||
17 | @@ -XXX,XX +XXX,XX @@ igb_rx_l4_cso_enabled(IGBCore *core) | ||
18 | return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD); | ||
19 | } | ||
20 | |||
21 | -static bool | ||
22 | -igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size) | ||
23 | +static bool igb_rx_is_oversized(IGBCore *core, const struct eth_header *ehdr, | ||
24 | + size_t size, size_t vlan_num, | ||
25 | + bool lpe, uint16_t rlpml) | ||
26 | { | ||
27 | - uint16_t pool = qn % IGB_NUM_VM_POOLS; | ||
28 | - bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE); | ||
29 | - int max_ethernet_lpe_size = | ||
30 | - core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK; | ||
31 | - int max_ethernet_vlan_size = 1522; | ||
32 | - | ||
33 | - return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size); | ||
34 | + size_t vlan_header_size = sizeof(struct vlan_header) * vlan_num; | ||
35 | + size_t header_size = sizeof(struct eth_header) + vlan_header_size; | ||
36 | + return lpe ? size + ETH_FCS_LEN > rlpml : size > header_size + ETH_MTU; | ||
37 | } | ||
38 | |||
39 | static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, | ||
40 | @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, | ||
41 | uint16_t queues = 0; | ||
42 | uint16_t oversized = 0; | ||
43 | size_t vlan_num = 0; | ||
44 | + bool lpe; | ||
45 | + uint16_t rlpml; | ||
46 | int i; | ||
47 | |||
48 | memset(rss_info, 0, sizeof(E1000E_RSSInfo)); | ||
49 | @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, | ||
50 | } | ||
51 | } | ||
52 | |||
53 | + lpe = !!(core->mac[RCTL] & E1000_RCTL_LPE); | ||
54 | + rlpml = core->mac[RLPML]; | ||
55 | + if (!(core->mac[RCTL] & E1000_RCTL_SBP) && | ||
56 | + igb_rx_is_oversized(core, ehdr, size, vlan_num, lpe, rlpml)) { | ||
57 | + trace_e1000x_rx_oversized(size); | ||
58 | + return queues; | ||
59 | + } | ||
60 | + | ||
61 | if (vlan_num && | ||
62 | !e1000x_rx_vlan_filter(core->mac, l2_header->vlan + vlan_num - 1)) { | ||
63 | return queues; | ||
64 | @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, | ||
65 | queues &= core->mac[VFRE]; | ||
66 | if (queues) { | ||
67 | for (i = 0; i < IGB_NUM_VM_POOLS; i++) { | ||
68 | - if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) { | ||
69 | + lpe = !!(core->mac[VMOLR0 + i] & E1000_VMOLR_LPE); | ||
70 | + rlpml = core->mac[VMOLR0 + i] & E1000_VMOLR_RLPML_MASK; | ||
71 | + if ((queues & BIT(i)) && | ||
72 | + igb_rx_is_oversized(core, ehdr, size, vlan_num, | ||
73 | + lpe, rlpml)) { | ||
74 | oversized |= BIT(i); | ||
75 | } | ||
76 | } | ||
77 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
78 | iov_to_buf(iov, iovcnt, iov_ofs, &buf, sizeof(buf.l2_header)); | ||
79 | } | ||
80 | |||
81 | - /* Discard oversized packets if !LPE and !SBP. */ | ||
82 | - if (e1000x_is_oversized(core->mac, size)) { | ||
83 | - return orig_size; | ||
84 | - } | ||
85 | - | ||
86 | net_rx_pkt_set_packet_type(core->rx_pkt, | ||
87 | get_eth_packet_type(&buf.l2_header.eth)); | ||
88 | net_rx_pkt_set_protocols(core->rx_pkt, iov, iovcnt, iov_ofs); | ||
89 | -- | ||
90 | 2.7.4 | diff view generated by jsdifflib |
1 | From: Mao Zhongyi <maozy.fnst@cn.fujitsu.com> | 1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> |
---|---|---|---|
2 | 2 | ||
3 | Packet size some time different or when network is busy. | 3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> |
4 | Based on same payload size, but TCP protocol can not | ||
5 | guarantee send the same one packet in the same way, | ||
6 | |||
7 | like that: | ||
8 | We send this payload: | ||
9 | ------------------------------ | ||
10 | | header |1|2|3|4|5|6|7|8|9|0| | ||
11 | ------------------------------ | ||
12 | |||
13 | primary: | ||
14 | ppkt1: | ||
15 | ---------------- | ||
16 | | header |1|2|3| | ||
17 | ---------------- | ||
18 | ppkt2: | ||
19 | ------------------------ | ||
20 | | header |4|5|6|7|8|9|0| | ||
21 | ------------------------ | ||
22 | |||
23 | secondary: | ||
24 | spkt1: | ||
25 | ------------------------------ | ||
26 | | header |1|2|3|4|5|6|7|8|9|0| | ||
27 | ------------------------------ | ||
28 | |||
29 | In the original method, ppkt1 and ppkt2 are different in size and | ||
30 | spkt1, so they can't compare and trigger the checkpoint. | ||
31 | |||
32 | I have tested FTP get 200M and 1G file many times, I found that | ||
33 | the performance was less than 1% of the native. | ||
34 | |||
35 | Now I reconstructed the comparison of TCP packets based on the | ||
36 | TCP sequence number. first of all, ppkt1 and spkt1 have the same | ||
37 | starting sequence number, so they can compare, even though their | ||
38 | length is different. And then ppkt1 with a smaller payload length | ||
39 | is used as the comparison length, if the payload is same, send | ||
40 | out the ppkt1 and record the offset(the length of ppkt1 payload) | ||
41 | in spkt1. The next comparison, ppkt2 and spkt1 can be compared | ||
42 | from the recorded position of spkt1. | ||
43 | |||
44 | like that: | ||
45 | ---------------- | ||
46 | | header |1|2|3| ppkt1 | ||
47 | ---------|-----| | ||
48 | | | | ||
49 | ---------v-----v-------------- | ||
50 | | header |1|2|3|4|5|6|7|8|9|0| spkt1 | ||
51 | ---------------|\------------| | ||
52 | | \offset | | ||
53 | ---------v-------------v | ||
54 | | header |4|5|6|7|8|9|0| ppkt2 | ||
55 | ------------------------ | ||
56 | |||
57 | In this way, the performance can reach native 20% in my multiple | ||
58 | tests. | ||
59 | |||
60 | Cc: Zhang Chen <zhangckid@gmail.com> | ||
61 | Cc: Li Zhijian <lizhijian@cn.fujitsu.com> | ||
62 | Cc: Jason Wang <jasowang@redhat.com> | ||
63 | |||
64 | Signed-off-by: Mao Zhongyi <maozy.fnst@cn.fujitsu.com> | ||
65 | Signed-off-by: Li Zhijian <lizhijian@cn.fujitsu.com> | ||
66 | Signed-off-by: Zhang Chen <zhangckid@gmail.com> | ||
67 | Reviewed-by: Zhang Chen <zhangckid@gmail.com> | ||
68 | Tested-by: Zhang Chen <zhangckid@gmail.com> | ||
69 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 4 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
70 | --- | 5 | --- |
71 | net/colo-compare.c | 343 +++++++++++++++++++++++++++++++++++------------------ | 6 | hw/net/igb_common.h | 16 ++++--- |
72 | net/colo.c | 9 ++ | 7 | hw/net/igb_core.c | 129 ++++++++++++++++++++++++++++++++++++++-------------- |
73 | net/colo.h | 15 +++ | 8 | hw/net/igb_regs.h | 23 ++++++++++ |
74 | net/trace-events | 2 +- | 9 | 3 files changed, 127 insertions(+), 41 deletions(-) |
75 | 4 files changed, 250 insertions(+), 119 deletions(-) | ||
76 | 10 | ||
77 | diff --git a/net/colo-compare.c b/net/colo-compare.c | 11 | diff --git a/hw/net/igb_common.h b/hw/net/igb_common.h |
78 | index XXXXXXX..XXXXXXX 100644 | 12 | index XXXXXXX..XXXXXXX 100644 |
79 | --- a/net/colo-compare.c | 13 | --- a/hw/net/igb_common.h |
80 | +++ b/net/colo-compare.c | 14 | +++ b/hw/net/igb_common.h |
81 | @@ -XXX,XX +XXX,XX @@ | 15 | @@ -XXX,XX +XXX,XX @@ |
82 | #define COMPARE_READ_LEN_MAX NET_BUFSIZE | 16 | defreg_indexeda(x, 0), defreg_indexeda(x, 1), \ |
83 | #define MAX_QUEUE_SIZE 1024 | 17 | defreg_indexeda(x, 2), defreg_indexeda(x, 3) |
84 | 18 | ||
85 | +#define COLO_COMPARE_FREE_PRIMARY 0x01 | 19 | -#define defregv(x) defreg_indexed(x, 0), defreg_indexed(x, 1), \ |
86 | +#define COLO_COMPARE_FREE_SECONDARY 0x02 | 20 | +#define defreg8(x) defreg_indexed(x, 0), defreg_indexed(x, 1), \ |
87 | + | 21 | defreg_indexed(x, 2), defreg_indexed(x, 3), \ |
88 | /* TODO: Should be configurable */ | 22 | defreg_indexed(x, 4), defreg_indexed(x, 5), \ |
89 | #define REGULAR_PACKET_CHECK_MS 3000 | 23 | defreg_indexed(x, 6), defreg_indexed(x, 7) |
90 | 24 | @@ -XXX,XX +XXX,XX @@ enum { | |
91 | @@ -XXX,XX +XXX,XX @@ static gint seq_sorter(Packet *a, Packet *b, gpointer data) | 25 | defreg(EICS), defreg(EIMS), defreg(EIMC), defreg(EIAM), |
92 | return ntohl(atcp->th_seq) - ntohl(btcp->th_seq); | 26 | defreg(EICR), defreg(IVAR_MISC), defreg(GPIE), |
27 | |||
28 | + defreg(TSYNCRXCFG), defreg8(ETQF), | ||
29 | + | ||
30 | defreg(RXPBS), defregd(RDBAL), defregd(RDBAH), defregd(RDLEN), | ||
31 | defregd(SRRCTL), defregd(RDH), defregd(RDT), | ||
32 | defregd(RXDCTL), defregd(RXCTL), defregd(RQDPC), defreg(RA2), | ||
33 | @@ -XXX,XX +XXX,XX @@ enum { | ||
34 | |||
35 | defreg(VT_CTL), | ||
36 | |||
37 | - defregv(P2VMAILBOX), defregv(V2PMAILBOX), defreg(MBVFICR), defreg(MBVFIMR), | ||
38 | + defreg8(P2VMAILBOX), defreg8(V2PMAILBOX), defreg(MBVFICR), defreg(MBVFIMR), | ||
39 | defreg(VFLRE), defreg(VFRE), defreg(VFTE), defreg(WVBR), | ||
40 | defreg(QDE), defreg(DTXSWC), defreg_indexed(VLVF, 0), | ||
41 | - defregv(VMOLR), defreg(RPLOLR), defregv(VMBMEM), defregv(VMVIR), | ||
42 | + defreg8(VMOLR), defreg(RPLOLR), defreg8(VMBMEM), defreg8(VMVIR), | ||
43 | |||
44 | - defregv(PVTCTRL), defregv(PVTEICS), defregv(PVTEIMS), defregv(PVTEIMC), | ||
45 | - defregv(PVTEIAC), defregv(PVTEIAM), defregv(PVTEICR), defregv(PVFGPRC), | ||
46 | - defregv(PVFGPTC), defregv(PVFGORC), defregv(PVFGOTC), defregv(PVFMPRC), | ||
47 | - defregv(PVFGPRLBC), defregv(PVFGPTLBC), defregv(PVFGORLBC), defregv(PVFGOTLBC), | ||
48 | + defreg8(PVTCTRL), defreg8(PVTEICS), defreg8(PVTEIMS), defreg8(PVTEIMC), | ||
49 | + defreg8(PVTEIAC), defreg8(PVTEIAM), defreg8(PVTEICR), defreg8(PVFGPRC), | ||
50 | + defreg8(PVFGPTC), defreg8(PVFGORC), defreg8(PVFGOTC), defreg8(PVFMPRC), | ||
51 | + defreg8(PVFGPRLBC), defreg8(PVFGPTLBC), defreg8(PVFGORLBC), defreg8(PVFGOTLBC), | ||
52 | |||
53 | defreg(MTA_A), | ||
54 | |||
55 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
56 | index XXXXXXX..XXXXXXX 100644 | ||
57 | --- a/hw/net/igb_core.c | ||
58 | +++ b/hw/net/igb_core.c | ||
59 | @@ -XXX,XX +XXX,XX @@ typedef struct L2Header { | ||
60 | struct vlan_header vlan[2]; | ||
61 | } L2Header; | ||
62 | |||
63 | +typedef struct PTP2 { | ||
64 | + uint8_t message_id_transport_specific; | ||
65 | + uint8_t version_ptp; | ||
66 | + uint16_t message_length; | ||
67 | + uint8_t subdomain_number; | ||
68 | + uint8_t reserved0; | ||
69 | + uint16_t flags; | ||
70 | + uint64_t correction; | ||
71 | + uint8_t reserved1[5]; | ||
72 | + uint8_t source_communication_technology; | ||
73 | + uint32_t source_uuid_lo; | ||
74 | + uint16_t source_uuid_hi; | ||
75 | + uint16_t source_port_id; | ||
76 | + uint16_t sequence_id; | ||
77 | + uint8_t control; | ||
78 | + uint8_t log_message_period; | ||
79 | +} PTP2; | ||
80 | + | ||
81 | static ssize_t | ||
82 | igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
83 | bool has_vnet, bool *external_tx); | ||
84 | @@ -XXX,XX +XXX,XX @@ static bool igb_rx_is_oversized(IGBCore *core, const struct eth_header *ehdr, | ||
85 | return lpe ? size + ETH_FCS_LEN > rlpml : size > header_size + ETH_MTU; | ||
93 | } | 86 | } |
94 | 87 | ||
95 | +static void fill_pkt_tcp_info(void *data, uint32_t *max_ack) | 88 | -static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, |
96 | +{ | 89 | - size_t size, E1000E_RSSInfo *rss_info, |
97 | + Packet *pkt = data; | 90 | - bool *external_tx) |
98 | + struct tcphdr *tcphd; | 91 | +static uint16_t igb_receive_assign(IGBCore *core, const struct iovec *iov, |
99 | + | 92 | + size_t iovcnt, size_t iov_ofs, |
100 | + tcphd = (struct tcphdr *)pkt->transport_header; | 93 | + const L2Header *l2_header, size_t size, |
101 | + | 94 | + E1000E_RSSInfo *rss_info, |
102 | + pkt->tcp_seq = ntohl(tcphd->th_seq); | 95 | + uint16_t *etqf, bool *ts, bool *external_tx) |
103 | + pkt->tcp_ack = ntohl(tcphd->th_ack); | 96 | { |
104 | + *max_ack = *max_ack > pkt->tcp_ack ? *max_ack : pkt->tcp_ack; | 97 | static const int ta_shift[] = { 4, 3, 2, 0 }; |
105 | + pkt->header_size = pkt->transport_header - (uint8_t *)pkt->data | 98 | const struct eth_header *ehdr = &l2_header->eth; |
106 | + + (tcphd->th_off << 2) - pkt->vnet_hdr_len; | 99 | @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, |
107 | + pkt->payload_size = pkt->size - pkt->header_size; | 100 | uint16_t queues = 0; |
108 | + pkt->seq_end = pkt->tcp_seq + pkt->payload_size; | 101 | uint16_t oversized = 0; |
109 | + pkt->flags = tcphd->th_flags; | 102 | size_t vlan_num = 0; |
110 | +} | 103 | + PTP2 ptp2; |
111 | + | 104 | bool lpe; |
112 | /* | 105 | uint16_t rlpml; |
113 | * Return 1 on success, if return 0 means the | 106 | int i; |
114 | * packet will be dropped | 107 | |
115 | */ | 108 | memset(rss_info, 0, sizeof(E1000E_RSSInfo)); |
116 | -static int colo_insert_packet(GQueue *queue, Packet *pkt) | 109 | + *ts = false; |
117 | +static int colo_insert_packet(GQueue *queue, Packet *pkt, uint32_t *max_ack) | 110 | |
118 | { | 111 | if (external_tx) { |
119 | if (g_queue_get_length(queue) <= MAX_QUEUE_SIZE) { | 112 | *external_tx = true; |
120 | if (pkt->ip->ip_p == IPPROTO_TCP) { | 113 | @@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const L2Header *l2_header, |
121 | + fill_pkt_tcp_info(pkt, max_ack); | 114 | return queues; |
122 | g_queue_insert_sorted(queue, | ||
123 | pkt, | ||
124 | (GCompareDataFunc)seq_sorter, | ||
125 | @@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con) | ||
126 | } | 115 | } |
127 | 116 | ||
128 | if (mode == PRIMARY_IN) { | 117 | + for (*etqf = 0; *etqf < 8; (*etqf)++) { |
129 | - if (!colo_insert_packet(&conn->primary_list, pkt)) { | 118 | + if ((core->mac[ETQF0 + *etqf] & E1000_ETQF_FILTER_ENABLE) && |
130 | + if (!colo_insert_packet(&conn->primary_list, pkt, &conn->pack)) { | 119 | + be16_to_cpu(ehdr->h_proto) == (core->mac[ETQF0 + *etqf] & E1000_ETQF_ETYPE_MASK)) { |
131 | error_report("colo compare primary queue size too big," | 120 | + if ((core->mac[ETQF0 + *etqf] & E1000_ETQF_1588) && |
132 | "drop packet"); | 121 | + (core->mac[TSYNCRXCTL] & E1000_TSYNCRXCTL_ENABLED) && |
133 | } | 122 | + !(core->mac[TSYNCRXCTL] & E1000_TSYNCRXCTL_VALID) && |
134 | } else { | 123 | + iov_to_buf(iov, iovcnt, iov_ofs + ETH_HLEN, &ptp2, sizeof(ptp2)) >= sizeof(ptp2) && |
135 | - if (!colo_insert_packet(&conn->secondary_list, pkt)) { | 124 | + (ptp2.version_ptp & 15) == 2 && |
136 | + if (!colo_insert_packet(&conn->secondary_list, pkt, &conn->sack)) { | 125 | + ptp2.message_id_transport_specific == ((core->mac[TSYNCRXCFG] >> 8) & 255)) { |
137 | error_report("colo compare secondary queue size too big," | 126 | + e1000x_timestamp(core->mac, core->timadj, RXSTMPL, RXSTMPH); |
138 | "drop packet"); | 127 | + *ts = true; |
139 | } | 128 | + core->mac[TSYNCRXCTL] |= E1000_TSYNCRXCTL_VALID; |
140 | @@ -XXX,XX +XXX,XX @@ static int packet_enqueue(CompareState *s, int mode, Connection **con) | 129 | + core->mac[RXSATRL] = le32_to_cpu(ptp2.source_uuid_lo); |
141 | return 0; | 130 | + core->mac[RXSATRH] = le16_to_cpu(ptp2.source_uuid_hi) | |
142 | } | 131 | + (le16_to_cpu(ptp2.sequence_id) << 16); |
143 | 132 | + } | |
144 | +static inline bool after(uint32_t seq1, uint32_t seq2) | 133 | + break; |
145 | +{ | ||
146 | + return (int32_t)(seq1 - seq2) > 0; | ||
147 | +} | ||
148 | + | ||
149 | +static void colo_release_primary_pkt(CompareState *s, Packet *pkt) | ||
150 | +{ | ||
151 | + int ret; | ||
152 | + ret = compare_chr_send(s, | ||
153 | + pkt->data, | ||
154 | + pkt->size, | ||
155 | + pkt->vnet_hdr_len); | ||
156 | + if (ret < 0) { | ||
157 | + error_report("colo send primary packet failed"); | ||
158 | + } | ||
159 | + trace_colo_compare_main("packet same and release packet"); | ||
160 | + packet_destroy(pkt, NULL); | ||
161 | +} | ||
162 | + | ||
163 | /* | ||
164 | * The IP packets sent by primary and secondary | ||
165 | * will be compared in here | ||
166 | @@ -XXX,XX +XXX,XX @@ static int colo_compare_packet_payload(Packet *ppkt, | ||
167 | } | ||
168 | |||
169 | /* | ||
170 | - * Called from the compare thread on the primary | ||
171 | - * for compare tcp packet | ||
172 | - * compare_tcp copied from Dr. David Alan Gilbert's branch | ||
173 | - */ | ||
174 | -static int colo_packet_compare_tcp(Packet *spkt, Packet *ppkt) | ||
175 | + * return true means that the payload is consist and | ||
176 | + * need to make the next comparison, false means do | ||
177 | + * the checkpoint | ||
178 | +*/ | ||
179 | +static bool colo_mark_tcp_pkt(Packet *ppkt, Packet *spkt, | ||
180 | + int8_t *mark, uint32_t max_ack) | ||
181 | { | ||
182 | - struct tcphdr *ptcp, *stcp; | ||
183 | - int res; | ||
184 | + *mark = 0; | ||
185 | + | ||
186 | + if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) { | ||
187 | + if (colo_compare_packet_payload(ppkt, spkt, | ||
188 | + ppkt->header_size, spkt->header_size, | ||
189 | + ppkt->payload_size)) { | ||
190 | + *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY; | ||
191 | + return true; | ||
192 | + } | 134 | + } |
193 | + } | 135 | + } |
194 | + if (ppkt->tcp_seq == spkt->tcp_seq && ppkt->seq_end == spkt->seq_end) { | 136 | + |
195 | + if (colo_compare_packet_payload(ppkt, spkt, | 137 | if (vlan_num && |
196 | + ppkt->header_size, spkt->header_size, | 138 | !e1000x_rx_vlan_filter(core->mac, l2_header->vlan + vlan_num - 1)) { |
197 | + ppkt->payload_size)) { | 139 | return queues; |
198 | + *mark = COLO_COMPARE_FREE_SECONDARY | COLO_COMPARE_FREE_PRIMARY; | 140 | @@ -XXX,XX +XXX,XX @@ static void |
199 | + return true; | 141 | igb_build_rx_metadata(IGBCore *core, |
200 | + } | 142 | struct NetRxPkt *pkt, |
143 | bool is_eop, | ||
144 | - const E1000E_RSSInfo *rss_info, | ||
145 | + const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, | ||
146 | uint16_t *pkt_info, uint16_t *hdr_info, | ||
147 | uint32_t *rss, | ||
148 | uint32_t *status_flags, | ||
149 | @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, | ||
150 | if (pkt_info) { | ||
151 | *pkt_info = rss_info->enabled ? rss_info->type : 0; | ||
152 | |||
153 | - if (hasip4) { | ||
154 | - *pkt_info |= E1000_ADVRXD_PKT_IP4; | ||
155 | - } | ||
156 | + if (etqf < 8) { | ||
157 | + *pkt_info |= (BIT(11) | etqf) << 4; | ||
158 | + } else { | ||
159 | + if (hasip4) { | ||
160 | + *pkt_info |= E1000_ADVRXD_PKT_IP4; | ||
161 | + } | ||
162 | |||
163 | - if (hasip6) { | ||
164 | - *pkt_info |= E1000_ADVRXD_PKT_IP6; | ||
165 | - } | ||
166 | + if (hasip6) { | ||
167 | + *pkt_info |= E1000_ADVRXD_PKT_IP6; | ||
168 | + } | ||
169 | |||
170 | - switch (l4hdr_proto) { | ||
171 | - case ETH_L4_HDR_PROTO_TCP: | ||
172 | - *pkt_info |= E1000_ADVRXD_PKT_TCP; | ||
173 | - break; | ||
174 | + switch (l4hdr_proto) { | ||
175 | + case ETH_L4_HDR_PROTO_TCP: | ||
176 | + *pkt_info |= E1000_ADVRXD_PKT_TCP; | ||
177 | + break; | ||
178 | |||
179 | - case ETH_L4_HDR_PROTO_UDP: | ||
180 | - *pkt_info |= E1000_ADVRXD_PKT_UDP; | ||
181 | - break; | ||
182 | + case ETH_L4_HDR_PROTO_UDP: | ||
183 | + *pkt_info |= E1000_ADVRXD_PKT_UDP; | ||
184 | + break; | ||
185 | |||
186 | - case ETH_L4_HDR_PROTO_SCTP: | ||
187 | - *pkt_info |= E1000_ADVRXD_PKT_SCTP; | ||
188 | - break; | ||
189 | + case ETH_L4_HDR_PROTO_SCTP: | ||
190 | + *pkt_info |= E1000_ADVRXD_PKT_SCTP; | ||
191 | + break; | ||
192 | |||
193 | - default: | ||
194 | - break; | ||
195 | + default: | ||
196 | + break; | ||
197 | + } | ||
198 | } | ||
199 | } | ||
200 | |||
201 | @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, | ||
202 | *hdr_info = 0; | ||
203 | } | ||
204 | |||
205 | + if (ts) { | ||
206 | + *status_flags |= BIT(16); | ||
201 | + } | 207 | + } |
202 | + | 208 | + |
203 | + /* one part of secondary packet payload still need to be compared */ | 209 | /* RX CSO information */ |
204 | + if (!after(ppkt->seq_end, spkt->seq_end)) { | 210 | if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { |
205 | + if (colo_compare_packet_payload(ppkt, spkt, | 211 | trace_e1000e_rx_metadata_ipv6_sum_disabled(); |
206 | + ppkt->header_size + ppkt->offset, | 212 | @@ -XXX,XX +XXX,XX @@ func_exit: |
207 | + spkt->header_size + spkt->offset, | 213 | static inline void |
208 | + ppkt->payload_size - ppkt->offset)) { | 214 | igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, |
209 | + if (!after(ppkt->tcp_ack, max_ack)) { | 215 | struct NetRxPkt *pkt, |
210 | + *mark = COLO_COMPARE_FREE_PRIMARY; | 216 | - const E1000E_RSSInfo *rss_info, |
211 | + spkt->offset += ppkt->payload_size - ppkt->offset; | 217 | + const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, |
212 | + return true; | 218 | uint16_t length) |
213 | + } else { | 219 | { |
214 | + /* secondary guest hasn't ack the data, don't send | 220 | uint32_t status_flags, rss; |
215 | + * out this packet | 221 | @@ -XXX,XX +XXX,XX @@ igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, |
216 | + */ | 222 | desc->csum = 0; |
217 | + return false; | 223 | |
218 | + } | 224 | igb_build_rx_metadata(core, pkt, pkt != NULL, |
219 | + } | 225 | - rss_info, |
220 | + } else { | 226 | + rss_info, etqf, ts, |
221 | + /* primary packet is longer than secondary packet, compare | 227 | NULL, NULL, &rss, |
222 | + * the same part and mark the primary packet offset | 228 | &status_flags, &ip_id, |
223 | + */ | 229 | &desc->special); |
224 | + if (colo_compare_packet_payload(ppkt, spkt, | 230 | @@ -XXX,XX +XXX,XX @@ igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, |
225 | + ppkt->header_size + ppkt->offset, | 231 | static inline void |
226 | + spkt->header_size + spkt->offset, | 232 | igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, |
227 | + spkt->payload_size - spkt->offset)) { | 233 | struct NetRxPkt *pkt, |
228 | + *mark = COLO_COMPARE_FREE_SECONDARY; | 234 | - const E1000E_RSSInfo *rss_info, |
229 | + ppkt->offset += spkt->payload_size - spkt->offset; | 235 | + const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, |
230 | + return true; | 236 | uint16_t length) |
231 | + } | 237 | { |
232 | + } | 238 | memset(&desc->wb, 0, sizeof(desc->wb)); |
233 | 239 | @@ -XXX,XX +XXX,XX @@ igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, | |
234 | - trace_colo_compare_main("compare tcp"); | 240 | desc->wb.upper.length = cpu_to_le16(length); |
235 | + return false; | 241 | |
236 | +} | 242 | igb_build_rx_metadata(core, pkt, pkt != NULL, |
237 | 243 | - rss_info, | |
238 | - ptcp = (struct tcphdr *)ppkt->transport_header; | 244 | + rss_info, etqf, ts, |
239 | - stcp = (struct tcphdr *)spkt->transport_header; | 245 | &desc->wb.lower.lo_dword.pkt_info, |
240 | +static void colo_compare_tcp(CompareState *s, Connection *conn) | 246 | &desc->wb.lower.lo_dword.hdr_info, |
241 | +{ | 247 | &desc->wb.lower.hi_dword.rss, |
242 | + Packet *ppkt = NULL, *spkt = NULL; | 248 | @@ -XXX,XX +XXX,XX @@ igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, |
243 | + int8_t mark; | 249 | |
244 | 250 | static inline void | |
245 | /* | 251 | igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, |
246 | - * The 'identification' field in the IP header is *very* random | 252 | -struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, uint16_t length) |
247 | - * it almost never matches. Fudge this by ignoring differences in | 253 | + struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, |
248 | - * unfragmented packets; they'll normally sort themselves out if different | 254 | + uint16_t etqf, bool ts, uint16_t length) |
249 | - * anyway, and it should recover at the TCP level. | 255 | { |
250 | - * An alternative would be to get both the primary and secondary to rewrite | 256 | if (igb_rx_use_legacy_descriptor(core)) { |
251 | - * somehow; but that would need some sync traffic to sync the state | 257 | - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, length); |
252 | - */ | 258 | + igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, |
253 | - if (ntohs(ppkt->ip->ip_off) & IP_DF) { | 259 | + etqf, ts, length); |
254 | - spkt->ip->ip_id = ppkt->ip->ip_id; | ||
255 | - /* and the sum will be different if the IDs were different */ | ||
256 | - spkt->ip->ip_sum = ppkt->ip->ip_sum; | ||
257 | + * If ppkt and spkt have the same payload, but ppkt's ACK | ||
258 | + * is greater than spkt's ACK, in this case we can not | ||
259 | + * send the ppkt because it will cause the secondary guest | ||
260 | + * to miss sending some data in the next. Therefore, we | ||
261 | + * record the maximum ACK in the current queue at both | ||
262 | + * primary side and secondary side. Only when the ack is | ||
263 | + * less than the smaller of the two maximum ack, then we | ||
264 | + * can ensure that the packet's payload is acknowledged by | ||
265 | + * primary and secondary. | ||
266 | + */ | ||
267 | + uint32_t min_ack = conn->pack > conn->sack ? conn->sack : conn->pack; | ||
268 | + | ||
269 | +pri: | ||
270 | + if (g_queue_is_empty(&conn->primary_list)) { | ||
271 | + return; | ||
272 | } | ||
273 | + ppkt = g_queue_pop_head(&conn->primary_list); | ||
274 | +sec: | ||
275 | + if (g_queue_is_empty(&conn->secondary_list)) { | ||
276 | + g_queue_push_head(&conn->primary_list, ppkt); | ||
277 | + return; | ||
278 | + } | ||
279 | + spkt = g_queue_pop_head(&conn->secondary_list); | ||
280 | |||
281 | - /* | ||
282 | - * Check tcp header length for tcp option field. | ||
283 | - * th_off > 5 means this tcp packet have options field. | ||
284 | - * The tcp options maybe always different. | ||
285 | - * for example: | ||
286 | - * From RFC 7323. | ||
287 | - * TCP Timestamps option (TSopt): | ||
288 | - * Kind: 8 | ||
289 | - * | ||
290 | - * Length: 10 bytes | ||
291 | - * | ||
292 | - * +-------+-------+---------------------+---------------------+ | ||
293 | - * |Kind=8 | 10 | TS Value (TSval) |TS Echo Reply (TSecr)| | ||
294 | - * +-------+-------+---------------------+---------------------+ | ||
295 | - * 1 1 4 4 | ||
296 | - * | ||
297 | - * In this case the primary guest's timestamp always different with | ||
298 | - * the secondary guest's timestamp. COLO just focus on payload, | ||
299 | - * so we just need skip this field. | ||
300 | - */ | ||
301 | + if (ppkt->tcp_seq == ppkt->seq_end) { | ||
302 | + colo_release_primary_pkt(s, ppkt); | ||
303 | + ppkt = NULL; | ||
304 | + } | ||
305 | |||
306 | - ptrdiff_t ptcp_offset, stcp_offset; | ||
307 | + if (ppkt && conn->compare_seq && !after(ppkt->seq_end, conn->compare_seq)) { | ||
308 | + trace_colo_compare_main("pri: this packet has compared"); | ||
309 | + colo_release_primary_pkt(s, ppkt); | ||
310 | + ppkt = NULL; | ||
311 | + } | ||
312 | |||
313 | - ptcp_offset = ppkt->transport_header - (uint8_t *)ppkt->data | ||
314 | - + (ptcp->th_off << 2) - ppkt->vnet_hdr_len; | ||
315 | - stcp_offset = spkt->transport_header - (uint8_t *)spkt->data | ||
316 | - + (stcp->th_off << 2) - spkt->vnet_hdr_len; | ||
317 | - if (ppkt->size - ptcp_offset == spkt->size - stcp_offset) { | ||
318 | - res = colo_compare_packet_payload(ppkt, spkt, | ||
319 | - ptcp_offset, stcp_offset, | ||
320 | - ppkt->size - ptcp_offset); | ||
321 | + if (spkt->tcp_seq == spkt->seq_end) { | ||
322 | + packet_destroy(spkt, NULL); | ||
323 | + if (!ppkt) { | ||
324 | + goto pri; | ||
325 | + } else { | ||
326 | + goto sec; | ||
327 | + } | ||
328 | } else { | 260 | } else { |
329 | - trace_colo_compare_main("TCP: payload size of packets are different"); | 261 | - igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, length); |
330 | - res = -1; | 262 | + igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, |
331 | + if (conn->compare_seq && !after(spkt->seq_end, conn->compare_seq)) { | 263 | + etqf, ts, length); |
332 | + trace_colo_compare_main("sec: this packet has compared"); | ||
333 | + packet_destroy(spkt, NULL); | ||
334 | + if (!ppkt) { | ||
335 | + goto pri; | ||
336 | + } else { | ||
337 | + goto sec; | ||
338 | + } | ||
339 | + } | ||
340 | + if (!ppkt) { | ||
341 | + g_queue_push_head(&conn->secondary_list, spkt); | ||
342 | + goto pri; | ||
343 | + } | ||
344 | } | ||
345 | |||
346 | - if (res != 0 && | ||
347 | - trace_event_get_state_backends(TRACE_COLO_COMPARE_MISCOMPARE)) { | ||
348 | - char pri_ip_src[20], pri_ip_dst[20], sec_ip_src[20], sec_ip_dst[20]; | ||
349 | - | ||
350 | - strcpy(pri_ip_src, inet_ntoa(ppkt->ip->ip_src)); | ||
351 | - strcpy(pri_ip_dst, inet_ntoa(ppkt->ip->ip_dst)); | ||
352 | - strcpy(sec_ip_src, inet_ntoa(spkt->ip->ip_src)); | ||
353 | - strcpy(sec_ip_dst, inet_ntoa(spkt->ip->ip_dst)); | ||
354 | - | ||
355 | - trace_colo_compare_ip_info(ppkt->size, pri_ip_src, | ||
356 | - pri_ip_dst, spkt->size, | ||
357 | - sec_ip_src, sec_ip_dst); | ||
358 | - | ||
359 | - trace_colo_compare_tcp_info("pri tcp packet", | ||
360 | - ntohl(ptcp->th_seq), | ||
361 | - ntohl(ptcp->th_ack), | ||
362 | - res, ptcp->th_flags, | ||
363 | - ppkt->size); | ||
364 | - | ||
365 | - trace_colo_compare_tcp_info("sec tcp packet", | ||
366 | - ntohl(stcp->th_seq), | ||
367 | - ntohl(stcp->th_ack), | ||
368 | - res, stcp->th_flags, | ||
369 | - spkt->size); | ||
370 | + if (colo_mark_tcp_pkt(ppkt, spkt, &mark, min_ack)) { | ||
371 | + trace_colo_compare_tcp_info("pri", | ||
372 | + ppkt->tcp_seq, ppkt->tcp_ack, | ||
373 | + ppkt->header_size, ppkt->payload_size, | ||
374 | + ppkt->offset, ppkt->flags); | ||
375 | + | ||
376 | + trace_colo_compare_tcp_info("sec", | ||
377 | + spkt->tcp_seq, spkt->tcp_ack, | ||
378 | + spkt->header_size, spkt->payload_size, | ||
379 | + spkt->offset, spkt->flags); | ||
380 | + | ||
381 | + if (mark == COLO_COMPARE_FREE_PRIMARY) { | ||
382 | + conn->compare_seq = ppkt->seq_end; | ||
383 | + colo_release_primary_pkt(s, ppkt); | ||
384 | + g_queue_push_head(&conn->secondary_list, spkt); | ||
385 | + goto pri; | ||
386 | + } | ||
387 | + if (mark == COLO_COMPARE_FREE_SECONDARY) { | ||
388 | + conn->compare_seq = spkt->seq_end; | ||
389 | + packet_destroy(spkt, NULL); | ||
390 | + goto sec; | ||
391 | + } | ||
392 | + if (mark == (COLO_COMPARE_FREE_PRIMARY | COLO_COMPARE_FREE_SECONDARY)) { | ||
393 | + conn->compare_seq = ppkt->seq_end; | ||
394 | + colo_release_primary_pkt(s, ppkt); | ||
395 | + packet_destroy(spkt, NULL); | ||
396 | + goto pri; | ||
397 | + } | ||
398 | + } else { | ||
399 | + g_queue_push_head(&conn->primary_list, ppkt); | ||
400 | + g_queue_push_head(&conn->secondary_list, spkt); | ||
401 | |||
402 | qemu_hexdump((char *)ppkt->data, stderr, | ||
403 | "colo-compare ppkt", ppkt->size); | ||
404 | qemu_hexdump((char *)spkt->data, stderr, | ||
405 | "colo-compare spkt", spkt->size); | ||
406 | - } | ||
407 | |||
408 | - return res; | ||
409 | + /* | ||
410 | + * colo_compare_inconsistent_notify(); | ||
411 | + * TODO: notice to checkpoint(); | ||
412 | + */ | ||
413 | + } | ||
414 | } | ||
415 | |||
416 | + | ||
417 | /* | ||
418 | * Called from the compare thread on the primary | ||
419 | * for compare udp packet | ||
420 | @@ -XXX,XX +XXX,XX @@ static void colo_old_packet_check(void *opaque) | ||
421 | (GCompareFunc)colo_old_packet_check_one_conn); | ||
422 | } | ||
423 | |||
424 | -/* | ||
425 | - * Called from the compare thread on the primary | ||
426 | - * for compare packet with secondary list of the | ||
427 | - * specified connection when a new packet was | ||
428 | - * queued to it. | ||
429 | - */ | ||
430 | -static void colo_compare_connection(void *opaque, void *user_data) | ||
431 | +static void colo_compare_packet(CompareState *s, Connection *conn, | ||
432 | + int (*HandlePacket)(Packet *spkt, | ||
433 | + Packet *ppkt)) | ||
434 | { | ||
435 | - CompareState *s = user_data; | ||
436 | - Connection *conn = opaque; | ||
437 | Packet *pkt = NULL; | ||
438 | GList *result = NULL; | ||
439 | - int ret; | ||
440 | |||
441 | while (!g_queue_is_empty(&conn->primary_list) && | ||
442 | !g_queue_is_empty(&conn->secondary_list)) { | ||
443 | pkt = g_queue_pop_head(&conn->primary_list); | ||
444 | - switch (conn->ip_proto) { | ||
445 | - case IPPROTO_TCP: | ||
446 | - result = g_queue_find_custom(&conn->secondary_list, | ||
447 | - pkt, (GCompareFunc)colo_packet_compare_tcp); | ||
448 | - break; | ||
449 | - case IPPROTO_UDP: | ||
450 | - result = g_queue_find_custom(&conn->secondary_list, | ||
451 | - pkt, (GCompareFunc)colo_packet_compare_udp); | ||
452 | - break; | ||
453 | - case IPPROTO_ICMP: | ||
454 | - result = g_queue_find_custom(&conn->secondary_list, | ||
455 | - pkt, (GCompareFunc)colo_packet_compare_icmp); | ||
456 | - break; | ||
457 | - default: | ||
458 | - result = g_queue_find_custom(&conn->secondary_list, | ||
459 | - pkt, (GCompareFunc)colo_packet_compare_other); | ||
460 | - break; | ||
461 | - } | ||
462 | + result = g_queue_find_custom(&conn->secondary_list, | ||
463 | + pkt, (GCompareFunc)HandlePacket); | ||
464 | |||
465 | if (result) { | ||
466 | - ret = compare_chr_send(s, | ||
467 | - pkt->data, | ||
468 | - pkt->size, | ||
469 | - pkt->vnet_hdr_len); | ||
470 | - if (ret < 0) { | ||
471 | - error_report("colo_send_primary_packet failed"); | ||
472 | - } | ||
473 | - trace_colo_compare_main("packet same and release packet"); | ||
474 | + colo_release_primary_pkt(s, pkt); | ||
475 | g_queue_remove(&conn->secondary_list, result->data); | ||
476 | - packet_destroy(pkt, NULL); | ||
477 | } else { | ||
478 | /* | ||
479 | * If one packet arrive late, the secondary_list or | ||
480 | @@ -XXX,XX +XXX,XX @@ static void colo_compare_connection(void *opaque, void *user_data) | ||
481 | } | 264 | } |
482 | } | 265 | } |
483 | 266 | ||
484 | +/* | 267 | @@ -XXX,XX +XXX,XX @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) |
485 | + * Called from the compare thread on the primary | 268 | static void |
486 | + * for compare packet with secondary list of the | 269 | igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, |
487 | + * specified connection when a new packet was | 270 | const E1000E_RxRing *rxr, |
488 | + * queued to it. | 271 | - const E1000E_RSSInfo *rss_info) |
489 | + */ | 272 | + const E1000E_RSSInfo *rss_info, |
490 | +static void colo_compare_connection(void *opaque, void *user_data) | 273 | + uint16_t etqf, bool ts) |
491 | +{ | 274 | { |
492 | + CompareState *s = user_data; | 275 | PCIDevice *d; |
493 | + Connection *conn = opaque; | 276 | dma_addr_t base; |
494 | + | 277 | @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, |
495 | + switch (conn->ip_proto) { | 278 | } |
496 | + case IPPROTO_TCP: | 279 | |
497 | + colo_compare_tcp(s, conn); | 280 | igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, |
498 | + break; | 281 | - rss_info, written); |
499 | + case IPPROTO_UDP: | 282 | + rss_info, etqf, ts, written); |
500 | + colo_compare_packet(s, conn, colo_packet_compare_udp); | 283 | igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); |
501 | + break; | 284 | |
502 | + case IPPROTO_ICMP: | 285 | igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN); |
503 | + colo_compare_packet(s, conn, colo_packet_compare_icmp); | 286 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, |
504 | + break; | 287 | size_t iov_ofs = 0; |
505 | + default: | 288 | E1000E_RxRing rxr; |
506 | + colo_compare_packet(s, conn, colo_packet_compare_other); | 289 | E1000E_RSSInfo rss_info; |
507 | + break; | 290 | + uint16_t etqf; |
508 | + } | 291 | + bool ts; |
509 | +} | 292 | size_t total_size; |
510 | + | 293 | int strip_vlan_index; |
511 | static int compare_chr_send(CompareState *s, | 294 | int i; |
512 | const uint8_t *buf, | 295 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, |
513 | uint32_t size, | 296 | get_eth_packet_type(&buf.l2_header.eth)); |
514 | diff --git a/net/colo.c b/net/colo.c | 297 | net_rx_pkt_set_protocols(core->rx_pkt, iov, iovcnt, iov_ofs); |
298 | |||
299 | - queues = igb_receive_assign(core, &buf.l2_header, size, | ||
300 | - &rss_info, external_tx); | ||
301 | + queues = igb_receive_assign(core, iov, iovcnt, iov_ofs, | ||
302 | + &buf.l2_header, size, | ||
303 | + &rss_info, &etqf, &ts, external_tx); | ||
304 | if (!queues) { | ||
305 | trace_e1000e_rx_flt_dropped(); | ||
306 | return orig_size; | ||
307 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
308 | causes |= E1000_ICR_RXDW; | ||
309 | |||
310 | igb_rx_fix_l4_csum(core, core->rx_pkt); | ||
311 | - igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info); | ||
312 | + igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info, etqf, ts); | ||
313 | |||
314 | /* Check if receive descriptor minimum threshold hit */ | ||
315 | if (igb_rx_descr_threshold_hit(core, rxr.i)) { | ||
316 | @@ -XXX,XX +XXX,XX @@ static const readops igb_macreg_readops[] = { | ||
317 | [EIAM] = igb_mac_readreg, | ||
318 | [IVAR0 ... IVAR0 + 7] = igb_mac_readreg, | ||
319 | igb_getreg(IVAR_MISC), | ||
320 | + igb_getreg(TSYNCRXCFG), | ||
321 | + [ETQF0 ... ETQF0 + 7] = igb_mac_readreg, | ||
322 | igb_getreg(VT_CTL), | ||
323 | [P2VMAILBOX0 ... P2VMAILBOX7] = igb_mac_readreg, | ||
324 | [V2PMAILBOX0 ... V2PMAILBOX7] = igb_mac_vfmailbox_read, | ||
325 | @@ -XXX,XX +XXX,XX @@ static const writeops igb_macreg_writeops[] = { | ||
326 | [EIMS] = igb_set_eims, | ||
327 | [IVAR0 ... IVAR0 + 7] = igb_mac_writereg, | ||
328 | igb_putreg(IVAR_MISC), | ||
329 | + igb_putreg(TSYNCRXCFG), | ||
330 | + [ETQF0 ... ETQF0 + 7] = igb_mac_writereg, | ||
331 | igb_putreg(VT_CTL), | ||
332 | [P2VMAILBOX0 ... P2VMAILBOX7] = igb_set_pfmailbox, | ||
333 | [V2PMAILBOX0 ... V2PMAILBOX7] = igb_set_vfmailbox, | ||
334 | diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h | ||
515 | index XXXXXXX..XXXXXXX 100644 | 335 | index XXXXXXX..XXXXXXX 100644 |
516 | --- a/net/colo.c | 336 | --- a/hw/net/igb_regs.h |
517 | +++ b/net/colo.c | 337 | +++ b/hw/net/igb_regs.h |
518 | @@ -XXX,XX +XXX,XX @@ Connection *connection_new(ConnectionKey *key) | 338 | @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { |
519 | conn->processing = false; | 339 | #define E1000_DCA_TXCTRL_CPUID_SHIFT 24 /* Tx CPUID now in the last byte */ |
520 | conn->offset = 0; | 340 | #define E1000_DCA_RXCTRL_CPUID_SHIFT 24 /* Rx CPUID now in the last byte */ |
521 | conn->syn_flag = 0; | 341 | |
522 | + conn->pack = 0; | 342 | +/* ETQF register bit definitions */ |
523 | + conn->sack = 0; | 343 | +#define E1000_ETQF_FILTER_ENABLE BIT(26) |
524 | g_queue_init(&conn->primary_list); | 344 | +#define E1000_ETQF_1588 BIT(30) |
525 | g_queue_init(&conn->secondary_list); | 345 | +#define E1000_ETQF_IMM_INT BIT(29) |
526 | 346 | +#define E1000_ETQF_QUEUE_ENABLE BIT(31) | |
527 | @@ -XXX,XX +XXX,XX @@ Packet *packet_new(const void *data, int size, int vnet_hdr_len) | 347 | +#define E1000_ETQF_QUEUE_SHIFT 16 |
528 | pkt->size = size; | 348 | +#define E1000_ETQF_QUEUE_MASK 0x00070000 |
529 | pkt->creation_ms = qemu_clock_get_ms(QEMU_CLOCK_HOST); | 349 | +#define E1000_ETQF_ETYPE_MASK 0x0000FFFF |
530 | pkt->vnet_hdr_len = vnet_hdr_len; | 350 | + |
531 | + pkt->tcp_seq = 0; | 351 | #define E1000_DTXSWC_MAC_SPOOF_MASK 0x000000FF /* Per VF MAC spoof control */ |
532 | + pkt->tcp_ack = 0; | 352 | #define E1000_DTXSWC_VLAN_SPOOF_MASK 0x0000FF00 /* Per VF VLAN spoof control */ |
533 | + pkt->seq_end = 0; | 353 | #define E1000_DTXSWC_LLE_MASK 0x00FF0000 /* Per VF Local LB enables */ |
534 | + pkt->header_size = 0; | 354 | @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { |
535 | + pkt->payload_size = 0; | 355 | #define E1000_FRTIMER 0x01048 /* Free Running Timer - RW */ |
536 | + pkt->offset = 0; | 356 | #define E1000_FCRTV 0x02460 /* Flow Control Refresh Timer Value - RW */ |
537 | + pkt->flags = 0; | 357 | |
538 | 358 | +#define E1000_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */ | |
539 | return pkt; | 359 | + |
540 | } | 360 | +/* Filtering Registers */ |
541 | diff --git a/net/colo.h b/net/colo.h | 361 | +#define E1000_SAQF(_n) (0x5980 + 4 * (_n)) |
542 | index XXXXXXX..XXXXXXX 100644 | 362 | +#define E1000_DAQF(_n) (0x59A0 + 4 * (_n)) |
543 | --- a/net/colo.h | 363 | +#define E1000_SPQF(_n) (0x59C0 + 4 * (_n)) |
544 | +++ b/net/colo.h | 364 | +#define E1000_FTQF(_n) (0x59E0 + 4 * (_n)) |
545 | @@ -XXX,XX +XXX,XX @@ typedef struct Packet { | 365 | +#define E1000_SAQF0 E1000_SAQF(0) |
546 | int64_t creation_ms; | 366 | +#define E1000_DAQF0 E1000_DAQF(0) |
547 | /* Get vnet_hdr_len from filter */ | 367 | +#define E1000_SPQF0 E1000_SPQF(0) |
548 | uint32_t vnet_hdr_len; | 368 | +#define E1000_FTQF0 E1000_FTQF(0) |
549 | + uint32_t tcp_seq; /* sequence number */ | 369 | +#define E1000_SYNQF(_n) (0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */ |
550 | + uint32_t tcp_ack; /* acknowledgement number */ | 370 | +#define E1000_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */ |
551 | + /* the sequence number of the last byte of the packet */ | 371 | + |
552 | + uint32_t seq_end; | 372 | #define E1000_RQDPC(_n) (0x0C030 + ((_n) * 0x40)) |
553 | + uint8_t header_size; /* the header length */ | 373 | |
554 | + uint16_t payload_size; /* the payload length */ | 374 | #define E1000_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ |
555 | + /* record the payload offset(the length that has been compared) */ | ||
556 | + uint16_t offset; | ||
557 | + uint8_t flags; /* Flags(aka Control bits) */ | ||
558 | } Packet; | ||
559 | |||
560 | typedef struct ConnectionKey { | ||
561 | @@ -XXX,XX +XXX,XX @@ typedef struct Connection { | ||
562 | /* flag to enqueue unprocessed_connections */ | ||
563 | bool processing; | ||
564 | uint8_t ip_proto; | ||
565 | + /* record the sequence number that has been compared */ | ||
566 | + uint32_t compare_seq; | ||
567 | + /* the maximum of acknowledgement number in primary_list queue */ | ||
568 | + uint32_t pack; | ||
569 | + /* the maximum of acknowledgement number in secondary_list queue */ | ||
570 | + uint32_t sack; | ||
571 | /* offset = secondary_seq - primary_seq */ | ||
572 | tcp_seq offset; | ||
573 | /* | ||
574 | diff --git a/net/trace-events b/net/trace-events | ||
575 | index XXXXXXX..XXXXXXX 100644 | ||
576 | --- a/net/trace-events | ||
577 | +++ b/net/trace-events | ||
578 | @@ -XXX,XX +XXX,XX @@ colo_compare_icmp_miscompare(const char *sta, int size) ": %s = %d" | ||
579 | colo_compare_ip_info(int psize, const char *sta, const char *stb, int ssize, const char *stc, const char *std) "ppkt size = %d, ip_src = %s, ip_dst = %s, spkt size = %d, ip_src = %s, ip_dst = %s" | ||
580 | colo_old_packet_check_found(int64_t old_time) "%" PRId64 | ||
581 | colo_compare_miscompare(void) "" | ||
582 | -colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int res, uint32_t flag, int size) "side: %s seq/ack= %u/%u res= %d flags= 0x%x pkt_size: %d\n" | ||
583 | +colo_compare_tcp_info(const char *pkt, uint32_t seq, uint32_t ack, int hdlen, int pdlen, int offset, int flags) "%s: seq/ack= %u/%u hdlen= %d pdlen= %d offset= %d flags=%d\n" | ||
584 | |||
585 | # net/filter-rewriter.c | ||
586 | colo_filter_rewriter_debug(void) "" | ||
587 | -- | 375 | -- |
588 | 2.7.4 | 376 | 2.7.4 |
589 | |||
590 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
4 | Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
5 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
6 | --- | ||
7 | hw/net/igb_core.c | 7 +++++++ | ||
8 | hw/net/igb_regs.h | 3 +++ | ||
9 | 2 files changed, 10 insertions(+) | ||
10 | |||
11 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
12 | index XXXXXXX..XXXXXXX 100644 | ||
13 | --- a/hw/net/igb_core.c | ||
14 | +++ b/hw/net/igb_core.c | ||
15 | @@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core, | ||
16 | tx->ctx[idx].vlan_macip_lens >> IGB_TX_FLAGS_VLAN_SHIFT, | ||
17 | !!(tx->first_cmd_type_len & E1000_TXD_CMD_VLE)); | ||
18 | |||
19 | + if ((tx->first_cmd_type_len & E1000_ADVTXD_MAC_TSTAMP) && | ||
20 | + (core->mac[TSYNCTXCTL] & E1000_TSYNCTXCTL_ENABLED) && | ||
21 | + !(core->mac[TSYNCTXCTL] & E1000_TSYNCTXCTL_VALID)) { | ||
22 | + core->mac[TSYNCTXCTL] |= E1000_TSYNCTXCTL_VALID; | ||
23 | + e1000x_timestamp(core->mac, core->timadj, TXSTMPL, TXSTMPH); | ||
24 | + } | ||
25 | + | ||
26 | if (igb_tx_pkt_send(core, tx, queue_index)) { | ||
27 | igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index); | ||
28 | } | ||
29 | diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/hw/net/igb_regs.h | ||
32 | +++ b/hw/net/igb_regs.h | ||
33 | @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { | ||
34 | /* E1000_EITR_CNT_IGNR is only for 82576 and newer */ | ||
35 | #define E1000_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */ | ||
36 | |||
37 | +#define E1000_TSYNCTXCTL_VALID 0x00000001 /* tx timestamp valid */ | ||
38 | +#define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable tx timestampping */ | ||
39 | + | ||
40 | /* PCI Express Control */ | ||
41 | #define E1000_GCR_CMPL_TMOUT_MASK 0x0000F000 | ||
42 | #define E1000_GCR_CMPL_TMOUT_10ms 0x00001000 | ||
43 | -- | ||
44 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | 1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | |
2 | |||
3 | In MSI-X mode, if there are interrupts already notified but not cleared | ||
4 | and a new interrupt arrives, e1000e incorrectly notifies the notified | ||
5 | ones again along with the new one. | ||
6 | |||
7 | To fix this issue, replace e1000e_update_interrupt_state() with | ||
8 | two new functions: e1000e_raise_interrupts() and | ||
9 | e1000e_lower_interrupts(). These functions don't only raise or lower | ||
10 | interrupts, but it also performs register writes which updates the | ||
11 | interrupt state. Before it performs a register write, these function | ||
12 | determines the interrupts already raised, and compares with the | ||
13 | interrupts raised after the register write to determine the interrupts | ||
14 | to notify. | ||
15 | |||
16 | The introduction of these functions made tracepoints which assumes that | ||
17 | the caller of e1000e_update_interrupt_state() performs register writes | ||
18 | obsolete. These tracepoints are now removed, and alternative ones are | ||
19 | added to the new functions. | ||
20 | |||
21 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
22 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
23 | --- | ||
24 | hw/net/e1000e_core.c | 153 ++++++++++++++++++++++----------------------------- | ||
25 | hw/net/e1000e_core.h | 2 - | ||
26 | hw/net/trace-events | 2 + | ||
27 | 3 files changed, 69 insertions(+), 88 deletions(-) | ||
28 | |||
29 | diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c | ||
30 | index XXXXXXX..XXXXXXX 100644 | ||
31 | --- a/hw/net/e1000e_core.c | ||
32 | +++ b/hw/net/e1000e_core.c | ||
33 | @@ -XXX,XX +XXX,XX @@ e1000e_intrmgr_on_throttling_timer(void *opaque) | ||
34 | |||
35 | timer->running = false; | ||
36 | |||
37 | - if (msi_enabled(timer->core->owner)) { | ||
38 | - trace_e1000e_irq_msi_notify_postponed(); | ||
39 | - /* Clear msi_causes_pending to fire MSI eventually */ | ||
40 | - timer->core->msi_causes_pending = 0; | ||
41 | - e1000e_set_interrupt_cause(timer->core, 0); | ||
42 | - } else { | ||
43 | - trace_e1000e_irq_legacy_notify_postponed(); | ||
44 | - e1000e_set_interrupt_cause(timer->core, 0); | ||
45 | + if (timer->core->mac[IMS] & timer->core->mac[ICR]) { | ||
46 | + if (msi_enabled(timer->core->owner)) { | ||
47 | + trace_e1000e_irq_msi_notify_postponed(); | ||
48 | + msi_notify(timer->core->owner, 0); | ||
49 | + } else { | ||
50 | + trace_e1000e_irq_legacy_notify_postponed(); | ||
51 | + e1000e_raise_legacy_irq(timer->core); | ||
52 | + } | ||
53 | } | ||
54 | } | ||
55 | |||
56 | @@ -XXX,XX +XXX,XX @@ static void | ||
57 | e1000e_intrmgr_fire_all_timers(E1000ECore *core) | ||
58 | { | ||
59 | int i; | ||
60 | - uint32_t val = e1000e_intmgr_collect_delayed_causes(core); | ||
61 | - | ||
62 | - trace_e1000e_irq_adding_delayed_causes(val, core->mac[ICR]); | ||
63 | - core->mac[ICR] |= val; | ||
64 | |||
65 | if (core->itr.running) { | ||
66 | timer_del(core->itr.timer); | ||
67 | @@ -XXX,XX +XXX,XX @@ void(*e1000e_phyreg_writeops[E1000E_PHY_PAGES][E1000E_PHY_PAGE_SIZE]) | ||
68 | } | ||
69 | }; | ||
70 | |||
71 | -static inline void | ||
72 | -e1000e_clear_ims_bits(E1000ECore *core, uint32_t bits) | ||
73 | -{ | ||
74 | - trace_e1000e_irq_clear_ims(bits, core->mac[IMS], core->mac[IMS] & ~bits); | ||
75 | - core->mac[IMS] &= ~bits; | ||
76 | -} | ||
77 | - | ||
78 | static inline bool | ||
79 | e1000e_postpone_interrupt(E1000IntrDelayTimer *timer) | ||
80 | { | ||
81 | @@ -XXX,XX +XXX,XX @@ e1000e_msix_notify_one(E1000ECore *core, uint32_t cause, uint32_t int_cfg) | ||
82 | effective_eiac = core->mac[EIAC] & cause; | ||
83 | |||
84 | core->mac[ICR] &= ~effective_eiac; | ||
85 | - core->msi_causes_pending &= ~effective_eiac; | ||
86 | |||
87 | if (!(core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { | ||
88 | core->mac[IMS] &= ~effective_eiac; | ||
89 | @@ -XXX,XX +XXX,XX @@ e1000e_fix_icr_asserted(E1000ECore *core) | ||
90 | trace_e1000e_irq_fix_icr_asserted(core->mac[ICR]); | ||
91 | } | ||
92 | |||
93 | -static void | ||
94 | -e1000e_send_msi(E1000ECore *core, bool msix) | ||
95 | +static void e1000e_raise_interrupts(E1000ECore *core, | ||
96 | + size_t index, uint32_t causes) | ||
97 | { | ||
98 | - uint32_t causes = core->mac[ICR] & core->mac[IMS] & ~E1000_ICR_ASSERTED; | ||
99 | - | ||
100 | - core->msi_causes_pending &= causes; | ||
101 | - causes ^= core->msi_causes_pending; | ||
102 | - if (causes == 0) { | ||
103 | - return; | ||
104 | - } | ||
105 | - core->msi_causes_pending |= causes; | ||
106 | + bool is_msix = msix_enabled(core->owner); | ||
107 | + uint32_t old_causes = core->mac[IMS] & core->mac[ICR]; | ||
108 | + uint32_t raised_causes; | ||
109 | |||
110 | - if (msix) { | ||
111 | - e1000e_msix_notify(core, causes); | ||
112 | - } else { | ||
113 | - if (!e1000e_itr_should_postpone(core)) { | ||
114 | - trace_e1000e_irq_msi_notify(causes); | ||
115 | - msi_notify(core->owner, 0); | ||
116 | - } | ||
117 | - } | ||
118 | -} | ||
119 | + trace_e1000e_irq_set(index << 2, | ||
120 | + core->mac[index], core->mac[index] | causes); | ||
121 | |||
122 | -static void | ||
123 | -e1000e_update_interrupt_state(E1000ECore *core) | ||
124 | -{ | ||
125 | - bool interrupts_pending; | ||
126 | - bool is_msix = msix_enabled(core->owner); | ||
127 | + core->mac[index] |= causes; | ||
128 | |||
129 | /* Set ICR[OTHER] for MSI-X */ | ||
130 | if (is_msix) { | ||
131 | @@ -XXX,XX +XXX,XX @@ e1000e_update_interrupt_state(E1000ECore *core) | ||
132 | */ | ||
133 | core->mac[ICS] = core->mac[ICR]; | ||
134 | |||
135 | - interrupts_pending = (core->mac[IMS] & core->mac[ICR]) ? true : false; | ||
136 | - if (!interrupts_pending) { | ||
137 | - core->msi_causes_pending = 0; | ||
138 | - } | ||
139 | - | ||
140 | trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS], | ||
141 | core->mac[ICR], core->mac[IMS]); | ||
142 | |||
143 | - if (is_msix || msi_enabled(core->owner)) { | ||
144 | - if (interrupts_pending) { | ||
145 | - e1000e_send_msi(core, is_msix); | ||
146 | - } | ||
147 | - } else { | ||
148 | - if (interrupts_pending) { | ||
149 | - if (!e1000e_itr_should_postpone(core)) { | ||
150 | - e1000e_raise_legacy_irq(core); | ||
151 | - } | ||
152 | + raised_causes = core->mac[IMS] & core->mac[ICR] & ~old_causes; | ||
153 | + if (!raised_causes) { | ||
154 | + return; | ||
155 | + } | ||
156 | + | ||
157 | + if (is_msix) { | ||
158 | + e1000e_msix_notify(core, raised_causes & ~E1000_ICR_ASSERTED); | ||
159 | + } else if (!e1000e_itr_should_postpone(core)) { | ||
160 | + if (msi_enabled(core->owner)) { | ||
161 | + trace_e1000e_irq_msi_notify(raised_causes); | ||
162 | + msi_notify(core->owner, 0); | ||
163 | } else { | ||
164 | - e1000e_lower_legacy_irq(core); | ||
165 | + e1000e_raise_legacy_irq(core); | ||
166 | } | ||
167 | } | ||
168 | } | ||
169 | |||
170 | -static void | ||
171 | -e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val) | ||
172 | +static void e1000e_lower_interrupts(E1000ECore *core, | ||
173 | + size_t index, uint32_t causes) | ||
174 | { | ||
175 | - trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]); | ||
176 | + trace_e1000e_irq_clear(index << 2, | ||
177 | + core->mac[index], core->mac[index] & ~causes); | ||
178 | |||
179 | - val |= e1000e_intmgr_collect_delayed_causes(core); | ||
180 | - core->mac[ICR] |= val; | ||
181 | + core->mac[index] &= ~causes; | ||
182 | |||
183 | - trace_e1000e_irq_set_cause_exit(val, core->mac[ICR]); | ||
184 | + /* | ||
185 | + * Make sure ICR and ICS registers have the same value. | ||
186 | + * The spec says that the ICS register is write-only. However in practice, | ||
187 | + * on real hardware ICS is readable, and for reads it has the same value as | ||
188 | + * ICR (except that ICS does not have the clear on read behaviour of ICR). | ||
189 | + * | ||
190 | + * The VxWorks PRO/1000 driver uses this behaviour. | ||
191 | + */ | ||
192 | + core->mac[ICS] = core->mac[ICR]; | ||
193 | + | ||
194 | + trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS], | ||
195 | + core->mac[ICR], core->mac[IMS]); | ||
196 | |||
197 | - e1000e_update_interrupt_state(core); | ||
198 | + if (!(core->mac[IMS] & core->mac[ICR]) && | ||
199 | + !msix_enabled(core->owner) && !msi_enabled(core->owner)) { | ||
200 | + e1000e_lower_legacy_irq(core); | ||
201 | + } | ||
202 | +} | ||
203 | + | ||
204 | +static void | ||
205 | +e1000e_set_interrupt_cause(E1000ECore *core, uint32_t val) | ||
206 | +{ | ||
207 | + val |= e1000e_intmgr_collect_delayed_causes(core); | ||
208 | + e1000e_raise_interrupts(core, ICR, val); | ||
209 | } | ||
210 | |||
211 | static inline void | ||
212 | @@ -XXX,XX +XXX,XX @@ e1000e_set_ics(E1000ECore *core, int index, uint32_t val) | ||
213 | static void | ||
214 | e1000e_set_icr(E1000ECore *core, int index, uint32_t val) | ||
215 | { | ||
216 | - uint32_t icr = 0; | ||
217 | if ((core->mac[ICR] & E1000_ICR_ASSERTED) && | ||
218 | (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { | ||
219 | trace_e1000e_irq_icr_process_iame(); | ||
220 | - e1000e_clear_ims_bits(core, core->mac[IAM]); | ||
221 | + e1000e_lower_interrupts(core, IMS, core->mac[IAM]); | ||
222 | } | ||
223 | |||
224 | - icr = core->mac[ICR] & ~val; | ||
225 | /* | ||
226 | * Windows driver expects that the "receive overrun" bit and other | ||
227 | * ones to be cleared when the "Other" bit (#24) is cleared. | ||
228 | */ | ||
229 | - icr = (val & E1000_ICR_OTHER) ? (icr & ~E1000_ICR_OTHER_CAUSES) : icr; | ||
230 | - trace_e1000e_irq_icr_write(val, core->mac[ICR], icr); | ||
231 | - core->mac[ICR] = icr; | ||
232 | - e1000e_update_interrupt_state(core); | ||
233 | + if (val & E1000_ICR_OTHER) { | ||
234 | + val |= E1000_ICR_OTHER_CAUSES; | ||
235 | + } | ||
236 | + e1000e_lower_interrupts(core, ICR, val); | ||
237 | } | ||
238 | |||
239 | static void | ||
240 | e1000e_set_imc(E1000ECore *core, int index, uint32_t val) | ||
241 | { | ||
242 | trace_e1000e_irq_ims_clear_set_imc(val); | ||
243 | - e1000e_clear_ims_bits(core, val); | ||
244 | - e1000e_update_interrupt_state(core); | ||
245 | + e1000e_lower_interrupts(core, IMS, val); | ||
246 | } | ||
247 | |||
248 | static void | ||
249 | @@ -XXX,XX +XXX,XX @@ e1000e_set_ims(E1000ECore *core, int index, uint32_t val) | ||
250 | |||
251 | uint32_t valid_val = val & ims_valid_mask; | ||
252 | |||
253 | - trace_e1000e_irq_set_ims(val, core->mac[IMS], core->mac[IMS] | valid_val); | ||
254 | - core->mac[IMS] |= valid_val; | ||
255 | - | ||
256 | if ((valid_val & ims_ext_mask) && | ||
257 | (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PBA_CLR) && | ||
258 | msix_enabled(core->owner)) { | ||
259 | @@ -XXX,XX +XXX,XX @@ e1000e_set_ims(E1000ECore *core, int index, uint32_t val) | ||
260 | e1000e_intrmgr_fire_all_timers(core); | ||
261 | } | ||
262 | |||
263 | - e1000e_update_interrupt_state(core); | ||
264 | + e1000e_raise_interrupts(core, IMS, valid_val); | ||
265 | } | ||
266 | |||
267 | static void | ||
268 | @@ -XXX,XX +XXX,XX @@ static uint32_t | ||
269 | e1000e_mac_icr_read(E1000ECore *core, int index) | ||
270 | { | ||
271 | uint32_t ret = core->mac[ICR]; | ||
272 | - trace_e1000e_irq_icr_read_entry(ret); | ||
273 | |||
274 | if (core->mac[IMS] == 0) { | ||
275 | trace_e1000e_irq_icr_clear_zero_ims(); | ||
276 | - core->mac[ICR] = 0; | ||
277 | + e1000e_lower_interrupts(core, ICR, 0xffffffff); | ||
278 | } | ||
279 | |||
280 | if (!msix_enabled(core->owner)) { | ||
281 | trace_e1000e_irq_icr_clear_nonmsix_icr_read(); | ||
282 | - core->mac[ICR] = 0; | ||
283 | + e1000e_lower_interrupts(core, ICR, 0xffffffff); | ||
284 | } | ||
285 | |||
286 | if ((core->mac[ICR] & E1000_ICR_ASSERTED) && | ||
287 | (core->mac[CTRL_EXT] & E1000_CTRL_EXT_IAME)) { | ||
288 | trace_e1000e_irq_icr_clear_iame(); | ||
289 | - core->mac[ICR] = 0; | ||
290 | + e1000e_lower_interrupts(core, ICR, 0xffffffff); | ||
291 | trace_e1000e_irq_icr_process_iame(); | ||
292 | - e1000e_clear_ims_bits(core, core->mac[IAM]); | ||
293 | + e1000e_lower_interrupts(core, IMS, core->mac[IAM]); | ||
294 | } | ||
295 | |||
296 | - trace_e1000e_irq_icr_read_exit(core->mac[ICR]); | ||
297 | - e1000e_update_interrupt_state(core); | ||
298 | return ret; | ||
299 | } | ||
300 | |||
301 | diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h | ||
302 | index XXXXXXX..XXXXXXX 100644 | ||
303 | --- a/hw/net/e1000e_core.h | ||
304 | +++ b/hw/net/e1000e_core.h | ||
305 | @@ -XXX,XX +XXX,XX @@ struct E1000Core { | ||
306 | PCIDevice *owner; | ||
307 | void (*owner_start_recv)(PCIDevice *d); | ||
308 | |||
309 | - uint32_t msi_causes_pending; | ||
310 | - | ||
311 | int64_t timadj; | ||
312 | }; | ||
313 | |||
314 | diff --git a/hw/net/trace-events b/hw/net/trace-events | ||
315 | index XXXXXXX..XXXXXXX 100644 | ||
316 | --- a/hw/net/trace-events | ||
317 | +++ b/hw/net/trace-events | ||
318 | @@ -XXX,XX +XXX,XX @@ e1000e_irq_msix_notify_postponed_vec(int idx) "Sending MSI-X postponed by EITR[% | ||
319 | e1000e_irq_legacy_notify(bool level) "IRQ line state: %d" | ||
320 | e1000e_irq_msix_notify_vec(uint32_t vector) "MSI-X notify vector 0x%x" | ||
321 | e1000e_irq_postponed_by_xitr(uint32_t reg) "Interrupt postponed by [E]ITR register 0x%x" | ||
322 | +e1000e_irq_clear(uint32_t offset, uint32_t old, uint32_t new) "Clearing interrupt register 0x%x: 0x%x --> 0x%x" | ||
323 | +e1000e_irq_set(uint32_t offset, uint32_t old, uint32_t new) "Setting interrupt register 0x%x: 0x%x --> 0x%x" | ||
324 | e1000e_irq_clear_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Clearing IMS bits 0x%x: 0x%x --> 0x%x" | ||
325 | e1000e_irq_set_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Setting IMS bits 0x%x: 0x%x --> 0x%x" | ||
326 | e1000e_irq_fix_icr_asserted(uint32_t new_val) "ICR_ASSERTED bit fixed: 0x%x" | ||
327 | -- | ||
328 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | This follows the corresponding change for e1000e. This fixes: | ||
4 | tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb | ||
5 | |||
6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
8 | --- | ||
9 | hw/net/igb_core.c | 201 +++++++++------------ | ||
10 | hw/net/trace-events | 11 +- | ||
11 | scripts/ci/org.centos/stream/8/x86_64/test-avocado | 1 + | ||
12 | tests/avocado/netdev-ethtool.py | 4 - | ||
13 | 4 files changed, 87 insertions(+), 130 deletions(-) | ||
14 | |||
15 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
16 | index XXXXXXX..XXXXXXX 100644 | ||
17 | --- a/hw/net/igb_core.c | ||
18 | +++ b/hw/net/igb_core.c | ||
19 | @@ -XXX,XX +XXX,XX @@ static ssize_t | ||
20 | igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
21 | bool has_vnet, bool *external_tx); | ||
22 | |||
23 | -static inline void | ||
24 | -igb_set_interrupt_cause(IGBCore *core, uint32_t val); | ||
25 | - | ||
26 | -static void igb_update_interrupt_state(IGBCore *core); | ||
27 | +static void igb_raise_interrupts(IGBCore *core, size_t index, uint32_t causes); | ||
28 | static void igb_reset(IGBCore *core, bool sw); | ||
29 | |||
30 | static inline void | ||
31 | @@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) | ||
32 | } | ||
33 | |||
34 | if (eic) { | ||
35 | - core->mac[EICR] |= eic; | ||
36 | - igb_set_interrupt_cause(core, E1000_ICR_TXDW); | ||
37 | + igb_raise_interrupts(core, EICR, eic); | ||
38 | + igb_raise_interrupts(core, ICR, E1000_ICR_TXDW); | ||
39 | } | ||
40 | |||
41 | net_tx_pkt_reset(txr->tx->tx_pkt, net_tx_pkt_unmap_frag_pci, d); | ||
42 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
43 | { | ||
44 | uint16_t queues = 0; | ||
45 | uint32_t causes = 0; | ||
46 | + uint32_t ecauses = 0; | ||
47 | union { | ||
48 | L2Header l2_header; | ||
49 | uint8_t octets[ETH_ZLEN]; | ||
50 | @@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt, | ||
51 | causes |= E1000_ICS_RXDMT0; | ||
52 | } | ||
53 | |||
54 | - core->mac[EICR] |= igb_rx_wb_eic(core, rxr.i->idx); | ||
55 | + ecauses |= igb_rx_wb_eic(core, rxr.i->idx); | ||
56 | |||
57 | trace_e1000e_rx_written_to_guest(rxr.i->idx); | ||
58 | } | ||
59 | |||
60 | trace_e1000e_rx_interrupt_set(causes); | ||
61 | - igb_set_interrupt_cause(core, causes); | ||
62 | + igb_raise_interrupts(core, EICR, ecauses); | ||
63 | + igb_raise_interrupts(core, ICR, causes); | ||
64 | |||
65 | return orig_size; | ||
66 | } | ||
67 | @@ -XXX,XX +XXX,XX @@ void igb_core_set_link_status(IGBCore *core) | ||
68 | } | ||
69 | |||
70 | if (core->mac[STATUS] != old_status) { | ||
71 | - igb_set_interrupt_cause(core, E1000_ICR_LSC); | ||
72 | + igb_raise_interrupts(core, ICR, E1000_ICR_LSC); | ||
73 | } | ||
74 | } | ||
75 | |||
76 | @@ -XXX,XX +XXX,XX @@ igb_set_rx_control(IGBCore *core, int index, uint32_t val) | ||
77 | } | ||
78 | } | ||
79 | |||
80 | -static inline void | ||
81 | -igb_clear_ims_bits(IGBCore *core, uint32_t bits) | ||
82 | -{ | ||
83 | - trace_e1000e_irq_clear_ims(bits, core->mac[IMS], core->mac[IMS] & ~bits); | ||
84 | - core->mac[IMS] &= ~bits; | ||
85 | -} | ||
86 | - | ||
87 | static inline bool | ||
88 | igb_postpone_interrupt(IGBIntrDelayTimer *timer) | ||
89 | { | ||
90 | @@ -XXX,XX +XXX,XX @@ igb_eitr_should_postpone(IGBCore *core, int idx) | ||
91 | return igb_postpone_interrupt(&core->eitr[idx]); | ||
92 | } | ||
93 | |||
94 | -static void igb_send_msix(IGBCore *core) | ||
95 | +static void igb_send_msix(IGBCore *core, uint32_t causes) | ||
96 | { | ||
97 | - uint32_t causes = core->mac[EICR] & core->mac[EIMS]; | ||
98 | int vector; | ||
99 | |||
100 | for (vector = 0; vector < IGB_INTR_NUM; ++vector) { | ||
101 | @@ -XXX,XX +XXX,XX @@ igb_fix_icr_asserted(IGBCore *core) | ||
102 | trace_e1000e_irq_fix_icr_asserted(core->mac[ICR]); | ||
103 | } | ||
104 | |||
105 | -static void | ||
106 | -igb_update_interrupt_state(IGBCore *core) | ||
107 | +static void igb_raise_interrupts(IGBCore *core, size_t index, uint32_t causes) | ||
108 | { | ||
109 | - uint32_t icr; | ||
110 | - uint32_t causes; | ||
111 | + uint32_t old_causes = core->mac[ICR] & core->mac[IMS]; | ||
112 | + uint32_t old_ecauses = core->mac[EICR] & core->mac[EIMS]; | ||
113 | + uint32_t raised_causes; | ||
114 | + uint32_t raised_ecauses; | ||
115 | uint32_t int_alloc; | ||
116 | |||
117 | - icr = core->mac[ICR] & core->mac[IMS]; | ||
118 | + trace_e1000e_irq_set(index << 2, | ||
119 | + core->mac[index], core->mac[index] | causes); | ||
120 | + | ||
121 | + core->mac[index] |= causes; | ||
122 | |||
123 | if (core->mac[GPIE] & E1000_GPIE_MSIX_MODE) { | ||
124 | - if (icr) { | ||
125 | - causes = 0; | ||
126 | - if (icr & E1000_ICR_DRSTA) { | ||
127 | - int_alloc = core->mac[IVAR_MISC] & 0xff; | ||
128 | - if (int_alloc & E1000_IVAR_VALID) { | ||
129 | - causes |= BIT(int_alloc & 0x1f); | ||
130 | - } | ||
131 | + raised_causes = core->mac[ICR] & core->mac[IMS] & ~old_causes; | ||
132 | + | ||
133 | + if (raised_causes & E1000_ICR_DRSTA) { | ||
134 | + int_alloc = core->mac[IVAR_MISC] & 0xff; | ||
135 | + if (int_alloc & E1000_IVAR_VALID) { | ||
136 | + core->mac[EICR] |= BIT(int_alloc & 0x1f); | ||
137 | } | ||
138 | - /* Check if other bits (excluding the TCP Timer) are enabled. */ | ||
139 | - if (icr & ~E1000_ICR_DRSTA) { | ||
140 | - int_alloc = (core->mac[IVAR_MISC] >> 8) & 0xff; | ||
141 | - if (int_alloc & E1000_IVAR_VALID) { | ||
142 | - causes |= BIT(int_alloc & 0x1f); | ||
143 | - } | ||
144 | - trace_e1000e_irq_add_msi_other(core->mac[EICR]); | ||
145 | + } | ||
146 | + /* Check if other bits (excluding the TCP Timer) are enabled. */ | ||
147 | + if (raised_causes & ~E1000_ICR_DRSTA) { | ||
148 | + int_alloc = (core->mac[IVAR_MISC] >> 8) & 0xff; | ||
149 | + if (int_alloc & E1000_IVAR_VALID) { | ||
150 | + core->mac[EICR] |= BIT(int_alloc & 0x1f); | ||
151 | } | ||
152 | - core->mac[EICR] |= causes; | ||
153 | } | ||
154 | |||
155 | - if ((core->mac[EICR] & core->mac[EIMS])) { | ||
156 | - igb_send_msix(core); | ||
157 | + raised_ecauses = core->mac[EICR] & core->mac[EIMS] & ~old_ecauses; | ||
158 | + if (!raised_ecauses) { | ||
159 | + return; | ||
160 | } | ||
161 | + | ||
162 | + igb_send_msix(core, raised_ecauses); | ||
163 | } else { | ||
164 | igb_fix_icr_asserted(core); | ||
165 | |||
166 | - if (icr) { | ||
167 | - core->mac[EICR] |= (icr & E1000_ICR_DRSTA) | E1000_EICR_OTHER; | ||
168 | - } else { | ||
169 | - core->mac[EICR] &= ~E1000_EICR_OTHER; | ||
170 | + raised_causes = core->mac[ICR] & core->mac[IMS] & ~old_causes; | ||
171 | + if (!raised_causes) { | ||
172 | + return; | ||
173 | } | ||
174 | |||
175 | - trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS], | ||
176 | - core->mac[ICR], core->mac[IMS]); | ||
177 | + core->mac[EICR] |= (raised_causes & E1000_ICR_DRSTA) | E1000_EICR_OTHER; | ||
178 | |||
179 | if (msix_enabled(core->owner)) { | ||
180 | - if (icr) { | ||
181 | - trace_e1000e_irq_msix_notify_vec(0); | ||
182 | - msix_notify(core->owner, 0); | ||
183 | - } | ||
184 | + trace_e1000e_irq_msix_notify_vec(0); | ||
185 | + msix_notify(core->owner, 0); | ||
186 | } else if (msi_enabled(core->owner)) { | ||
187 | - if (icr) { | ||
188 | - msi_notify(core->owner, 0); | ||
189 | - } | ||
190 | + trace_e1000e_irq_msi_notify(raised_causes); | ||
191 | + msi_notify(core->owner, 0); | ||
192 | } else { | ||
193 | - if (icr) { | ||
194 | - igb_raise_legacy_irq(core); | ||
195 | - } else { | ||
196 | - igb_lower_legacy_irq(core); | ||
197 | - } | ||
198 | + igb_raise_legacy_irq(core); | ||
199 | } | ||
200 | } | ||
201 | } | ||
202 | |||
203 | -static void | ||
204 | -igb_set_interrupt_cause(IGBCore *core, uint32_t val) | ||
205 | +static void igb_lower_interrupts(IGBCore *core, size_t index, uint32_t causes) | ||
206 | { | ||
207 | - trace_e1000e_irq_set_cause_entry(val, core->mac[ICR]); | ||
208 | + trace_e1000e_irq_clear(index << 2, | ||
209 | + core->mac[index], core->mac[index] & ~causes); | ||
210 | + | ||
211 | + core->mac[index] &= ~causes; | ||
212 | |||
213 | - core->mac[ICR] |= val; | ||
214 | + trace_e1000e_irq_pending_interrupts(core->mac[ICR] & core->mac[IMS], | ||
215 | + core->mac[ICR], core->mac[IMS]); | ||
216 | |||
217 | - trace_e1000e_irq_set_cause_exit(val, core->mac[ICR]); | ||
218 | + if (!(core->mac[ICR] & core->mac[IMS]) && | ||
219 | + !(core->mac[GPIE] & E1000_GPIE_MSIX_MODE)) { | ||
220 | + core->mac[EICR] &= ~E1000_EICR_OTHER; | ||
221 | |||
222 | - igb_update_interrupt_state(core); | ||
223 | + if (!msix_enabled(core->owner) && !msi_enabled(core->owner)) { | ||
224 | + igb_lower_legacy_irq(core); | ||
225 | + } | ||
226 | + } | ||
227 | } | ||
228 | |||
229 | static void igb_set_eics(IGBCore *core, int index, uint32_t val) | ||
230 | { | ||
231 | bool msix = !!(core->mac[GPIE] & E1000_GPIE_MSIX_MODE); | ||
232 | + uint32_t mask = msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK; | ||
233 | |||
234 | trace_igb_irq_write_eics(val, msix); | ||
235 | - | ||
236 | - core->mac[EICS] |= | ||
237 | - val & (msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK); | ||
238 | - | ||
239 | - /* | ||
240 | - * TODO: Move to igb_update_interrupt_state if EICS is modified in other | ||
241 | - * places. | ||
242 | - */ | ||
243 | - core->mac[EICR] = core->mac[EICS]; | ||
244 | - | ||
245 | - igb_update_interrupt_state(core); | ||
246 | + igb_raise_interrupts(core, EICR, val & mask); | ||
247 | } | ||
248 | |||
249 | static void igb_set_eims(IGBCore *core, int index, uint32_t val) | ||
250 | { | ||
251 | bool msix = !!(core->mac[GPIE] & E1000_GPIE_MSIX_MODE); | ||
252 | + uint32_t mask = msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK; | ||
253 | |||
254 | trace_igb_irq_write_eims(val, msix); | ||
255 | - | ||
256 | - core->mac[EIMS] |= | ||
257 | - val & (msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK); | ||
258 | - | ||
259 | - igb_update_interrupt_state(core); | ||
260 | + igb_raise_interrupts(core, EIMS, val & mask); | ||
261 | } | ||
262 | |||
263 | static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn) | ||
264 | { | ||
265 | uint32_t ent = core->mac[VTIVAR_MISC + vfn]; | ||
266 | + uint32_t causes; | ||
267 | |||
268 | if ((ent & E1000_IVAR_VALID)) { | ||
269 | - core->mac[EICR] |= (ent & 0x3) << (22 - vfn * IGBVF_MSIX_VEC_NUM); | ||
270 | - igb_update_interrupt_state(core); | ||
271 | + causes = (ent & 0x3) << (22 - vfn * IGBVF_MSIX_VEC_NUM); | ||
272 | + igb_raise_interrupts(core, EICR, causes); | ||
273 | } | ||
274 | } | ||
275 | |||
276 | static void mailbox_interrupt_to_pf(IGBCore *core) | ||
277 | { | ||
278 | - igb_set_interrupt_cause(core, E1000_ICR_VMMB); | ||
279 | + igb_raise_interrupts(core, ICR, E1000_ICR_VMMB); | ||
280 | } | ||
281 | |||
282 | static void igb_set_pfmailbox(IGBCore *core, int index, uint32_t val) | ||
283 | @@ -XXX,XX +XXX,XX @@ static void igb_w1c(IGBCore *core, int index, uint32_t val) | ||
284 | static void igb_set_eimc(IGBCore *core, int index, uint32_t val) | ||
285 | { | ||
286 | bool msix = !!(core->mac[GPIE] & E1000_GPIE_MSIX_MODE); | ||
287 | + uint32_t mask = msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK; | ||
288 | |||
289 | - /* Interrupts are disabled via a write to EIMC and reflected in EIMS. */ | ||
290 | - core->mac[EIMS] &= | ||
291 | - ~(val & (msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK)); | ||
292 | + trace_igb_irq_write_eimc(val, msix); | ||
293 | |||
294 | - trace_igb_irq_write_eimc(val, core->mac[EIMS], msix); | ||
295 | - igb_update_interrupt_state(core); | ||
296 | + /* Interrupts are disabled via a write to EIMC and reflected in EIMS. */ | ||
297 | + igb_lower_interrupts(core, EIMS, val & mask); | ||
298 | } | ||
299 | |||
300 | static void igb_set_eiac(IGBCore *core, int index, uint32_t val) | ||
301 | @@ -XXX,XX +XXX,XX @@ static void igb_set_eicr(IGBCore *core, int index, uint32_t val) | ||
302 | * TODO: In IOV mode, only bit zero of this vector is available for the PF | ||
303 | * function. | ||
304 | */ | ||
305 | - core->mac[EICR] &= | ||
306 | - ~(val & (msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK)); | ||
307 | + uint32_t mask = msix ? E1000_EICR_MSIX_MASK : E1000_EICR_LEGACY_MASK; | ||
308 | |||
309 | trace_igb_irq_write_eicr(val, msix); | ||
310 | - igb_update_interrupt_state(core); | ||
311 | + igb_lower_interrupts(core, EICR, val & mask); | ||
312 | } | ||
313 | |||
314 | static void igb_set_vtctrl(IGBCore *core, int index, uint32_t val) | ||
315 | @@ -XXX,XX +XXX,XX @@ igb_autoneg_timer(void *opaque) | ||
316 | |||
317 | igb_update_flowctl_status(core); | ||
318 | /* signal link status change to the guest */ | ||
319 | - igb_set_interrupt_cause(core, E1000_ICR_LSC); | ||
320 | + igb_raise_interrupts(core, ICR, E1000_ICR_LSC); | ||
321 | } | ||
322 | } | ||
323 | |||
324 | @@ -XXX,XX +XXX,XX @@ igb_set_mdic(IGBCore *core, int index, uint32_t val) | ||
325 | core->mac[MDIC] = val | E1000_MDIC_READY; | ||
326 | |||
327 | if (val & E1000_MDIC_INT_EN) { | ||
328 | - igb_set_interrupt_cause(core, E1000_ICR_MDAC); | ||
329 | + igb_raise_interrupts(core, ICR, E1000_ICR_MDAC); | ||
330 | } | ||
331 | } | ||
332 | |||
333 | @@ -XXX,XX +XXX,XX @@ static void | ||
334 | igb_set_ics(IGBCore *core, int index, uint32_t val) | ||
335 | { | ||
336 | trace_e1000e_irq_write_ics(val); | ||
337 | - igb_set_interrupt_cause(core, val); | ||
338 | + igb_raise_interrupts(core, ICR, val); | ||
339 | } | ||
340 | |||
341 | static void | ||
342 | igb_set_imc(IGBCore *core, int index, uint32_t val) | ||
343 | { | ||
344 | trace_e1000e_irq_ims_clear_set_imc(val); | ||
345 | - igb_clear_ims_bits(core, val); | ||
346 | - igb_update_interrupt_state(core); | ||
347 | + igb_lower_interrupts(core, IMS, val); | ||
348 | } | ||
349 | |||
350 | static void | ||
351 | igb_set_ims(IGBCore *core, int index, uint32_t val) | ||
352 | { | ||
353 | - uint32_t valid_val = val & 0x77D4FBFD; | ||
354 | - | ||
355 | - trace_e1000e_irq_set_ims(val, core->mac[IMS], core->mac[IMS] | valid_val); | ||
356 | - core->mac[IMS] |= valid_val; | ||
357 | - igb_update_interrupt_state(core); | ||
358 | + igb_raise_interrupts(core, IMS, val & 0x77D4FBFD); | ||
359 | } | ||
360 | |||
361 | -static void igb_commit_icr(IGBCore *core) | ||
362 | +static void igb_nsicr(IGBCore *core) | ||
363 | { | ||
364 | /* | ||
365 | * If GPIE.NSICR = 0, then the clear of IMS will occur only if at | ||
366 | @@ -XXX,XX +XXX,XX @@ static void igb_commit_icr(IGBCore *core) | ||
367 | */ | ||
368 | if ((core->mac[GPIE] & E1000_GPIE_NSICR) || | ||
369 | (core->mac[IMS] && (core->mac[ICR] & E1000_ICR_INT_ASSERTED))) { | ||
370 | - igb_clear_ims_bits(core, core->mac[IAM]); | ||
371 | + igb_lower_interrupts(core, IMS, core->mac[IAM]); | ||
372 | } | ||
373 | - | ||
374 | - igb_update_interrupt_state(core); | ||
375 | } | ||
376 | |||
377 | static void igb_set_icr(IGBCore *core, int index, uint32_t val) | ||
378 | { | ||
379 | - uint32_t icr = core->mac[ICR] & ~val; | ||
380 | - | ||
381 | - trace_igb_irq_icr_write(val, core->mac[ICR], icr); | ||
382 | - core->mac[ICR] = icr; | ||
383 | - igb_commit_icr(core); | ||
384 | + igb_nsicr(core); | ||
385 | + igb_lower_interrupts(core, ICR, val); | ||
386 | } | ||
387 | |||
388 | static uint32_t | ||
389 | @@ -XXX,XX +XXX,XX @@ static uint32_t | ||
390 | igb_mac_icr_read(IGBCore *core, int index) | ||
391 | { | ||
392 | uint32_t ret = core->mac[ICR]; | ||
393 | - trace_e1000e_irq_icr_read_entry(ret); | ||
394 | |||
395 | if (core->mac[GPIE] & E1000_GPIE_NSICR) { | ||
396 | trace_igb_irq_icr_clear_gpie_nsicr(); | ||
397 | - core->mac[ICR] = 0; | ||
398 | + igb_lower_interrupts(core, ICR, 0xffffffff); | ||
399 | } else if (core->mac[IMS] == 0) { | ||
400 | trace_e1000e_irq_icr_clear_zero_ims(); | ||
401 | - core->mac[ICR] = 0; | ||
402 | + igb_lower_interrupts(core, ICR, 0xffffffff); | ||
403 | } else if (!msix_enabled(core->owner)) { | ||
404 | trace_e1000e_irq_icr_clear_nonmsix_icr_read(); | ||
405 | - core->mac[ICR] = 0; | ||
406 | + igb_lower_interrupts(core, ICR, 0xffffffff); | ||
407 | } | ||
408 | |||
409 | - trace_e1000e_irq_icr_read_exit(core->mac[ICR]); | ||
410 | - igb_commit_icr(core); | ||
411 | + igb_nsicr(core); | ||
412 | return ret; | ||
413 | } | ||
414 | |||
415 | diff --git a/hw/net/trace-events b/hw/net/trace-events | ||
416 | index XXXXXXX..XXXXXXX 100644 | ||
417 | --- a/hw/net/trace-events | ||
418 | +++ b/hw/net/trace-events | ||
419 | @@ -XXX,XX +XXX,XX @@ e1000e_irq_msix_notify_vec(uint32_t vector) "MSI-X notify vector 0x%x" | ||
420 | e1000e_irq_postponed_by_xitr(uint32_t reg) "Interrupt postponed by [E]ITR register 0x%x" | ||
421 | e1000e_irq_clear(uint32_t offset, uint32_t old, uint32_t new) "Clearing interrupt register 0x%x: 0x%x --> 0x%x" | ||
422 | e1000e_irq_set(uint32_t offset, uint32_t old, uint32_t new) "Setting interrupt register 0x%x: 0x%x --> 0x%x" | ||
423 | -e1000e_irq_clear_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Clearing IMS bits 0x%x: 0x%x --> 0x%x" | ||
424 | -e1000e_irq_set_ims(uint32_t bits, uint32_t old_ims, uint32_t new_ims) "Setting IMS bits 0x%x: 0x%x --> 0x%x" | ||
425 | e1000e_irq_fix_icr_asserted(uint32_t new_val) "ICR_ASSERTED bit fixed: 0x%x" | ||
426 | e1000e_irq_add_msi_other(uint32_t new_val) "ICR_OTHER bit added: 0x%x" | ||
427 | e1000e_irq_pending_interrupts(uint32_t pending, uint32_t icr, uint32_t ims) "ICR PENDING: 0x%x (ICR: 0x%x, IMS: 0x%x)" | ||
428 | -e1000e_irq_set_cause_entry(uint32_t val, uint32_t icr) "Going to set IRQ cause 0x%x, ICR: 0x%x" | ||
429 | -e1000e_irq_set_cause_exit(uint32_t val, uint32_t icr) "Set IRQ cause 0x%x, ICR: 0x%x" | ||
430 | -e1000e_irq_icr_write(uint32_t bits, uint32_t old_icr, uint32_t new_icr) "Clearing ICR bits 0x%x: 0x%x --> 0x%x" | ||
431 | e1000e_irq_write_ics(uint32_t val) "Adding ICR bits 0x%x" | ||
432 | e1000e_irq_icr_process_iame(void) "Clearing IMS bits due to IAME" | ||
433 | e1000e_irq_read_ics(uint32_t ics) "Current ICS: 0x%x" | ||
434 | e1000e_irq_read_ims(uint32_t ims) "Current IMS: 0x%x" | ||
435 | e1000e_irq_icr_clear_nonmsix_icr_read(void) "Clearing ICR on read due to non MSI-X int" | ||
436 | -e1000e_irq_icr_read_entry(uint32_t icr) "Starting ICR read. Current ICR: 0x%x" | ||
437 | -e1000e_irq_icr_read_exit(uint32_t icr) "Ending ICR read. Current ICR: 0x%x" | ||
438 | e1000e_irq_icr_clear_zero_ims(void) "Clearing ICR on read due to zero IMS" | ||
439 | e1000e_irq_icr_clear_iame(void) "Clearing ICR on read due to IAME" | ||
440 | e1000e_irq_iam_clear_eiame(uint32_t iam, uint32_t cause) "Clearing IMS due to EIAME, IAM: 0x%X, cause: 0x%X" | ||
441 | @@ -XXX,XX +XXX,XX @@ e1000e_irq_tidv_fpd_not_running(void) "FPD written while TIDV was not running" | ||
442 | e1000e_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = %u" | ||
443 | e1000e_irq_itr_set(uint32_t val) "ITR = %u" | ||
444 | e1000e_irq_fire_all_timers(uint32_t val) "Firing all delay/throttling timers on all interrupts enable (0x%X written to IMS)" | ||
445 | -e1000e_irq_adding_delayed_causes(uint32_t val, uint32_t icr) "Merging delayed causes 0x%X to ICR 0x%X" | ||
446 | e1000e_irq_msix_pending_clearing(uint32_t cause, uint32_t int_cfg, uint32_t vec) "Clearing MSI-X pending bit for cause 0x%x, IVAR config 0x%x, vector %u" | ||
447 | |||
448 | e1000e_wrn_msix_vec_wrong(uint32_t cause, uint32_t cfg) "Invalid configuration for cause 0x%x: 0x%x" | ||
449 | @@ -XXX,XX +XXX,XX @@ igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint3 | ||
450 | igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X" | ||
451 | |||
452 | igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled" | ||
453 | -igb_irq_icr_write(uint32_t bits, uint32_t old_icr, uint32_t new_icr) "Clearing ICR bits 0x%x: 0x%x --> 0x%x" | ||
454 | igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x" | ||
455 | igb_irq_read_iam(uint32_t icr) "Current IAM: 0x%x" | ||
456 | igb_irq_write_eics(uint32_t val, bool msix) "Update EICS: 0x%x MSI-X: %d" | ||
457 | igb_irq_write_eims(uint32_t val, bool msix) "Update EIMS: 0x%x MSI-X: %d" | ||
458 | -igb_irq_write_eimc(uint32_t val, uint32_t eims, bool msix) "Update EIMC: 0x%x EIMS: 0x%x MSI-X: %d" | ||
459 | +igb_irq_write_eimc(uint32_t val, bool msix) "Update EIMC: 0x%x MSI-X: %d" | ||
460 | igb_irq_write_eiac(uint32_t val) "Update EIAC: 0x%x" | ||
461 | igb_irq_write_eiam(uint32_t val, bool msix) "Update EIAM: 0x%x MSI-X: %d" | ||
462 | igb_irq_write_eicr(uint32_t val, bool msix) "Update EICR: 0x%x MSI-X: %d" | ||
463 | diff --git a/scripts/ci/org.centos/stream/8/x86_64/test-avocado b/scripts/ci/org.centos/stream/8/x86_64/test-avocado | ||
464 | index XXXXXXX..XXXXXXX 100755 | ||
465 | --- a/scripts/ci/org.centos/stream/8/x86_64/test-avocado | ||
466 | +++ b/scripts/ci/org.centos/stream/8/x86_64/test-avocado | ||
467 | @@ -XXX,XX +XXX,XX @@ make get-vm-images | ||
468 | tests/avocado/cpu_queries.py:QueryCPUModelExpansion.test \ | ||
469 | tests/avocado/empty_cpu_model.py:EmptyCPUModel.test \ | ||
470 | tests/avocado/hotplug_cpu.py:HotPlugCPU.test \ | ||
471 | + tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb \ | ||
472 | tests/avocado/netdev-ethtool.py:NetDevEthtool.test_igb_nomsi \ | ||
473 | tests/avocado/info_usernet.py:InfoUsernet.test_hostfwd \ | ||
474 | tests/avocado/intel_iommu.py:IntelIOMMU.test_intel_iommu \ | ||
475 | diff --git a/tests/avocado/netdev-ethtool.py b/tests/avocado/netdev-ethtool.py | ||
476 | index XXXXXXX..XXXXXXX 100644 | ||
477 | --- a/tests/avocado/netdev-ethtool.py | ||
478 | +++ b/tests/avocado/netdev-ethtool.py | ||
479 | @@ -XXX,XX +XXX,XX @@ def common_test_code(self, netdev, extra_args=None): | ||
480 | # no need to gracefully shutdown, just finish | ||
481 | self.vm.kill() | ||
482 | |||
483 | - # Skip testing for MSI for now. Allegedly it was fixed by: | ||
484 | - # 28e96556ba (igb: Allocate MSI-X vector when testing) | ||
485 | - # but I'm seeing oops in the kernel | ||
486 | - @skip("Kernel bug with MSI enabled") | ||
487 | def test_igb(self): | ||
488 | """ | ||
489 | :avocado: tags=device:igb | ||
490 | -- | ||
491 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | For GPIE.NSICR, Section 7.3.2.1.2 says: | ||
4 | > ICR bits are cleared on register read. If GPIE.NSICR = 0b, then the | ||
5 | > clear on read occurs only if no bit is set in the IMS or at least one | ||
6 | > bit is set in the IMS and there is a true interrupt as reflected in | ||
7 | > ICR.INTA. | ||
8 | |||
9 | e1000e does similar though it checks for CTRL_EXT.IAME, which does not | ||
10 | exist on igb. | ||
11 | |||
12 | Suggested-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech> | ||
13 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
14 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
15 | --- | ||
16 | hw/net/igb_core.c | 2 ++ | ||
17 | 1 file changed, 2 insertions(+) | ||
18 | |||
19 | diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c | ||
20 | index XXXXXXX..XXXXXXX 100644 | ||
21 | --- a/hw/net/igb_core.c | ||
22 | +++ b/hw/net/igb_core.c | ||
23 | @@ -XXX,XX +XXX,XX @@ igb_mac_icr_read(IGBCore *core, int index) | ||
24 | } else if (core->mac[IMS] == 0) { | ||
25 | trace_e1000e_irq_icr_clear_zero_ims(); | ||
26 | igb_lower_interrupts(core, ICR, 0xffffffff); | ||
27 | + } else if (core->mac[ICR] & E1000_ICR_INT_ASSERTED) { | ||
28 | + igb_lower_interrupts(core, ICR, 0xffffffff); | ||
29 | } else if (!msix_enabled(core->owner)) { | ||
30 | trace_e1000e_irq_icr_clear_nonmsix_icr_read(); | ||
31 | igb_lower_interrupts(core, ICR, 0xffffffff); | ||
32 | -- | ||
33 | 2.7.4 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | vmxnet3 has no dependency on PC, and VMware Fusion actually makes it | ||
4 | available on Apple Silicon according to: | ||
5 | https://kb.vmware.com/s/article/90364 | ||
6 | |||
7 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
8 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
9 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
10 | --- | ||
11 | hw/net/Kconfig | 2 +- | ||
12 | 1 file changed, 1 insertion(+), 1 deletion(-) | ||
13 | |||
14 | diff --git a/hw/net/Kconfig b/hw/net/Kconfig | ||
15 | index XXXXXXX..XXXXXXX 100644 | ||
16 | --- a/hw/net/Kconfig | ||
17 | +++ b/hw/net/Kconfig | ||
18 | @@ -XXX,XX +XXX,XX @@ config RTL8139_PCI | ||
19 | |||
20 | config VMXNET3_PCI | ||
21 | bool | ||
22 | - default y if PCI_DEVICES && PC_PCI | ||
23 | + default y if PCI_DEVICES | ||
24 | depends on PCI | ||
25 | |||
26 | config SMC91C111 | ||
27 | -- | ||
28 | 2.7.4 | ||
29 | |||
30 | diff view generated by jsdifflib |
1 | From: Philippe Mathieu-Daudé <f4bug@amsat.org> | 1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> |
---|---|---|---|
2 | 2 | ||
3 | gently asked by his automatic reply :) | 3 | I have made significant changes for network packet abstractions so add |
4 | me as a reviewer. | ||
4 | 5 | ||
5 | Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> | 6 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> |
7 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
6 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 8 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
7 | --- | 9 | --- |
8 | MAINTAINERS | 8 ++++---- | 10 | MAINTAINERS | 1 + |
9 | 1 file changed, 4 insertions(+), 4 deletions(-) | 11 | 1 file changed, 1 insertion(+) |
10 | 12 | ||
11 | diff --git a/MAINTAINERS b/MAINTAINERS | 13 | diff --git a/MAINTAINERS b/MAINTAINERS |
12 | index XXXXXXX..XXXXXXX 100644 | 14 | index XXXXXXX..XXXXXXX 100644 |
13 | --- a/MAINTAINERS | 15 | --- a/MAINTAINERS |
14 | +++ b/MAINTAINERS | 16 | +++ b/MAINTAINERS |
15 | @@ -XXX,XX +XXX,XX @@ F: hw/scsi/mfi.h | 17 | @@ -XXX,XX +XXX,XX @@ F: tests/qtest/fuzz-megasas-test.c |
16 | F: tests/megasas-test.c | ||
17 | 18 | ||
18 | Network packet abstractions | 19 | Network packet abstractions |
19 | -M: Dmitry Fleytman <dmitry@daynix.com> | 20 | M: Dmitry Fleytman <dmitry.fleytman@gmail.com> |
20 | +M: Dmitry Fleytman <dmitry.fleytman@gmail.com> | 21 | +R: Akihiko Odaki <akihiko.odaki@daynix.com> |
21 | S: Maintained | 22 | S: Maintained |
22 | F: include/net/eth.h | 23 | F: include/net/eth.h |
23 | F: net/eth.c | 24 | F: net/eth.c |
24 | @@ -XXX,XX +XXX,XX @@ F: hw/net/net_rx_pkt* | ||
25 | F: hw/net/net_tx_pkt* | ||
26 | |||
27 | Vmware | ||
28 | -M: Dmitry Fleytman <dmitry@daynix.com> | ||
29 | +M: Dmitry Fleytman <dmitry.fleytman@gmail.com> | ||
30 | S: Maintained | ||
31 | F: hw/net/vmxnet* | ||
32 | F: hw/scsi/vmw_pvscsi* | ||
33 | @@ -XXX,XX +XXX,XX @@ F: hw/mem/nvdimm.c | ||
34 | F: include/hw/mem/nvdimm.h | ||
35 | |||
36 | e1000x | ||
37 | -M: Dmitry Fleytman <dmitry@daynix.com> | ||
38 | +M: Dmitry Fleytman <dmitry.fleytman@gmail.com> | ||
39 | S: Maintained | ||
40 | F: hw/net/e1000x* | ||
41 | |||
42 | e1000e | ||
43 | -M: Dmitry Fleytman <dmitry@daynix.com> | ||
44 | +M: Dmitry Fleytman <dmitry.fleytman@gmail.com> | ||
45 | S: Maintained | ||
46 | F: hw/net/e1000e* | ||
47 | |||
48 | -- | 25 | -- |
49 | 2.7.4 | 26 | 2.7.4 |
50 | 27 | ||
51 | 28 | diff view generated by jsdifflib |
New patch | |||
---|---|---|---|
1 | From: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
1 | 2 | ||
3 | Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com> | ||
4 | Signed-off-by: Jason Wang <jasowang@redhat.com> | ||
5 | --- | ||
6 | docs/system/devices/igb.rst | 12 +++++++----- | ||
7 | 1 file changed, 7 insertions(+), 5 deletions(-) | ||
8 | |||
9 | diff --git a/docs/system/devices/igb.rst b/docs/system/devices/igb.rst | ||
10 | index XXXXXXX..XXXXXXX 100644 | ||
11 | --- a/docs/system/devices/igb.rst | ||
12 | +++ b/docs/system/devices/igb.rst | ||
13 | @@ -XXX,XX +XXX,XX @@ Limitations | ||
14 | =========== | ||
15 | |||
16 | This igb implementation was tested with Linux Test Project [2]_ and Windows HLK | ||
17 | -[3]_ during the initial development. The command used when testing with LTP is: | ||
18 | +[3]_ during the initial development. Later it was also tested with DPDK Test | ||
19 | +Suite [4]_. The command used when testing with LTP is: | ||
20 | |||
21 | .. code-block:: shell | ||
22 | |||
23 | @@ -XXX,XX +XXX,XX @@ This igb implementation was tested with Linux Test Project [2]_ and Windows HLK | ||
24 | |||
25 | Be aware that this implementation lacks many functionalities available with the | ||
26 | actual hardware, and you may experience various failures if you try to use it | ||
27 | -with a different operating system other than Linux and Windows or if you try | ||
28 | -functionalities not covered by the tests. | ||
29 | +with a different operating system other than DPDK, Linux, and Windows or if you | ||
30 | +try functionalities not covered by the tests. | ||
31 | |||
32 | Using igb | ||
33 | ========= | ||
34 | @@ -XXX,XX +XXX,XX @@ Using igb should be nothing different from using another network device. See | ||
35 | :ref:`Network_emulation` in general. | ||
36 | |||
37 | However, you may also need to perform additional steps to activate SR-IOV | ||
38 | -feature on your guest. For Linux, refer to [4]_. | ||
39 | +feature on your guest. For Linux, refer to [5]_. | ||
40 | |||
41 | Developing igb | ||
42 | ============== | ||
43 | @@ -XXX,XX +XXX,XX @@ References | ||
44 | .. [1] https://www.intel.com/content/dam/www/public/us/en/documents/datasheets/82576eb-gigabit-ethernet-controller-datasheet.pdf | ||
45 | .. [2] https://github.com/linux-test-project/ltp | ||
46 | .. [3] https://learn.microsoft.com/en-us/windows-hardware/test/hlk/ | ||
47 | -.. [4] https://docs.kernel.org/PCI/pci-iov-howto.html | ||
48 | +.. [4] https://doc.dpdk.org/dts/gsg/ | ||
49 | +.. [5] https://docs.kernel.org/PCI/pci-iov-howto.html | ||
50 | -- | ||
51 | 2.7.4 | diff view generated by jsdifflib |
1 | From: Thomas Huth <thuth@redhat.com> | 1 | From: Stefan Hajnoczi <stefanha@redhat.com> |
---|---|---|---|
2 | 2 | ||
3 | The vlan concept is marked as deprecated, so we should not use | 3 | If the driver sets large_send_mss to 0 then a divide-by-zero occurs. |
4 | this for examples in the documentation anymore. | 4 | Even if the division wasn't a problem, the for loop that emits MSS-sized |
5 | packets would never terminate. | ||
5 | 6 | ||
6 | Signed-off-by: Thomas Huth <thuth@redhat.com> | 7 | Solve these issues by skipping offloading when large_send_mss=0. |
8 | |||
9 | This issue was found by OSS-Fuzz as part of Alexander Bulekov's device | ||
10 | fuzzing work. The reproducer is: | ||
11 | |||
12 | $ cat << EOF | ./qemu-system-i386 -display none -machine accel=qtest, -m \ | ||
13 | 512M,slots=1,maxmem=0xffff000000000000 -machine q35 -nodefaults -device \ | ||
14 | rtl8139,netdev=net0 -netdev user,id=net0 -device \ | ||
15 | pc-dimm,id=nv1,memdev=mem1,addr=0xb800a64602800000 -object \ | ||
16 | memory-backend-ram,id=mem1,size=2M -qtest stdio | ||
17 | outl 0xcf8 0x80000814 | ||
18 | outl 0xcfc 0xe0000000 | ||
19 | outl 0xcf8 0x80000804 | ||
20 | outw 0xcfc 0x06 | ||
21 | write 0xe0000037 0x1 0x04 | ||
22 | write 0xe00000e0 0x2 0x01 | ||
23 | write 0x1 0x1 0x04 | ||
24 | write 0x3 0x1 0x98 | ||
25 | write 0xa 0x1 0x8c | ||
26 | write 0xb 0x1 0x02 | ||
27 | write 0xc 0x1 0x46 | ||
28 | write 0xd 0x1 0xa6 | ||
29 | write 0xf 0x1 0xb8 | ||
30 | write 0xb800a646028c000c 0x1 0x08 | ||
31 | write 0xb800a646028c000e 0x1 0x47 | ||
32 | write 0xb800a646028c0010 0x1 0x02 | ||
33 | write 0xb800a646028c0017 0x1 0x06 | ||
34 | write 0xb800a646028c0036 0x1 0x80 | ||
35 | write 0xe00000d9 0x1 0x40 | ||
36 | EOF | ||
37 | |||
38 | Buglink: https://gitlab.com/qemu-project/qemu/-/issues/1582 | ||
39 | Closes: https://gitlab.com/qemu-project/qemu/-/issues/1582 | ||
40 | Cc: qemu-stable@nongnu.org | ||
41 | Cc: Peter Maydell <peter.maydell@linaro.org> | ||
42 | Fixes: 6d71357a3b65 ("rtl8139: honor large send MSS value") | ||
43 | Reported-by: Alexander Bulekov <alxndr@bu.edu> | ||
44 | Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> | ||
45 | Tested-by: Alexander Bulekov <alxndr@bu.edu> | ||
46 | Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com> | ||
7 | Signed-off-by: Jason Wang <jasowang@redhat.com> | 47 | Signed-off-by: Jason Wang <jasowang@redhat.com> |
8 | --- | 48 | --- |
9 | qemu-options.hx | 4 ++-- | 49 | hw/net/rtl8139.c | 3 +++ |
10 | 1 file changed, 2 insertions(+), 2 deletions(-) | 50 | 1 file changed, 3 insertions(+) |
11 | 51 | ||
12 | diff --git a/qemu-options.hx b/qemu-options.hx | 52 | diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c |
13 | index XXXXXXX..XXXXXXX 100644 | 53 | index XXXXXXX..XXXXXXX 100644 |
14 | --- a/qemu-options.hx | 54 | --- a/hw/net/rtl8139.c |
15 | +++ b/qemu-options.hx | 55 | +++ b/hw/net/rtl8139.c |
16 | @@ -XXX,XX +XXX,XX @@ qemu-system-i386 linux.img -net nic -net tap | 56 | @@ -XXX,XX +XXX,XX @@ static int rtl8139_cplus_transmit_one(RTL8139State *s) |
17 | #launch a QEMU instance with two NICs, each one connected | 57 | |
18 | #to a TAP device | 58 | int large_send_mss = (txdw0 >> CP_TC_LGSEN_MSS_SHIFT) & |
19 | qemu-system-i386 linux.img \ | 59 | CP_TC_LGSEN_MSS_MASK; |
20 | - -net nic,vlan=0 -net tap,vlan=0,ifname=tap0 \ | 60 | + if (large_send_mss == 0) { |
21 | - -net nic,vlan=1 -net tap,vlan=1,ifname=tap1 | 61 | + goto skip_offload; |
22 | + -netdev tap,id=nd0,ifname=tap0 -device e1000,netdev=nd0 \ | 62 | + } |
23 | + -netdev tap,id=nd1,ifname=tap1 -device rtl8139,netdev=nd1 | 63 | |
24 | @end example | 64 | DPRINTF("+++ C+ mode offloaded task TSO IP data %d " |
25 | 65 | "frame data %d specified MSS=%d\n", | |
26 | @example | ||
27 | -- | 66 | -- |
28 | 2.7.4 | 67 | 2.7.4 |
29 | 68 | ||
30 | 69 | diff view generated by jsdifflib |