1
The following changes since commit 23895cbd82be95428e90168b12e925d0d3ca2f06:
1
The following changes since commit e3debd5e7d0ce031356024878a0a18b9d109354a:
2
2
3
Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20201123.0' into staging (2020-11-23 18:51:13 +0000)
3
Merge tag 'pull-request-2023-03-24' of https://gitlab.com/thuth/qemu into staging (2023-03-24 16:08:46 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 9925990d01a92564af55f6f69d0f5f59b47609b1:
9
for you to fetch changes up to fba7c3b788dfcb99a3f9253f7d99cc0d217d6d3c:
10
10
11
net: Use correct default-path macro for downscript (2020-11-24 10:40:17 +0800)
11
igb: respect VMVIR and VMOLR for VLAN (2023-03-28 13:10:55 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
----------------------------------------------------------------
15
----------------------------------------------------------------
16
Keqian Zhu (1):
16
Akihiko Odaki (4):
17
net: Use correct default-path macro for downscript
17
igb: Save more Tx states
18
igb: Fix DMA requester specification for Tx packet
19
hw/net/net_tx_pkt: Ignore ECN bit
20
hw/net/net_tx_pkt: Align l3_hdr
18
21
19
Paolo Bonzini (1):
22
Sriram Yagnaraman (8):
20
net: do not exit on "netdev_add help" monitor command
23
MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer
24
igb: handle PF/VF reset properly
25
igb: add ICR_RXDW
26
igb: implement VFRE and VFTE registers
27
igb: check oversized packets for VMDq
28
igb: respect E1000_VMOLR_RSSE
29
igb: implement VF Tx and Rx stats
30
igb: respect VMVIR and VMOLR for VLAN
21
31
22
Prasad J Pandit (1):
32
MAINTAINERS | 1 +
23
hw/net/e1000e: advance desc_offset in case of null descriptor
33
hw/net/e1000e_core.c | 6 +-
24
34
hw/net/e1000x_regs.h | 4 +
25
Yuri Benditovich (1):
35
hw/net/igb.c | 26 ++++--
26
net: purge queued rx packets on queue deletion
36
hw/net/igb_core.c | 256 ++++++++++++++++++++++++++++++++++++++-------------
27
37
hw/net/igb_core.h | 9 +-
28
yuanjungong (1):
38
hw/net/igb_regs.h | 6 ++
29
tap: fix a memory leak
39
hw/net/net_tx_pkt.c | 30 +++---
30
40
hw/net/net_tx_pkt.h | 3 +-
31
hw/net/e1000e_core.c | 8 +++---
41
hw/net/trace-events | 2 +
32
include/net/net.h | 1 +
42
hw/net/vmxnet3.c | 4 +-
33
monitor/hmp-cmds.c | 6 ++++
43
11 files changed, 254 insertions(+), 93 deletions(-)
34
net/net.c | 80 +++++++++++++++++++++++++++-------------------------
35
net/tap.c | 5 +++-
36
5 files changed, 57 insertions(+), 43 deletions(-)
37
38
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
The current implementation of igb uses only part of a advanced Tx
4
context descriptor and first data descriptor because it misses some
5
features and sniffs the trait of the packet instead of respecting the
6
packet type specified in the descriptor. However, we will certainly
7
need the entire Tx context descriptor when we update igb to respect
8
these ignored fields. Save the entire context descriptor and first
9
data descriptor except the buffer address to prepare for such a change.
10
11
This also introduces the distinction of contexts with different
12
indexes, which was not present in e1000e but in igb.
13
14
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
15
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
18
hw/net/igb.c | 26 +++++++++++++++++++-------
19
hw/net/igb_core.c | 39 +++++++++++++++++++--------------------
20
hw/net/igb_core.h | 8 +++-----
21
3 files changed, 41 insertions(+), 32 deletions(-)
22
23
diff --git a/hw/net/igb.c b/hw/net/igb.c
24
index XXXXXXX..XXXXXXX 100644
25
--- a/hw/net/igb.c
26
+++ b/hw/net/igb.c
27
@@ -XXX,XX +XXX,XX @@ static int igb_post_load(void *opaque, int version_id)
28
return igb_core_post_load(&s->core);
29
}
30
31
-static const VMStateDescription igb_vmstate_tx = {
32
- .name = "igb-tx",
33
+static const VMStateDescription igb_vmstate_tx_ctx = {
34
+ .name = "igb-tx-ctx",
35
.version_id = 1,
36
.minimum_version_id = 1,
37
.fields = (VMStateField[]) {
38
- VMSTATE_UINT16(vlan, struct igb_tx),
39
- VMSTATE_UINT16(mss, struct igb_tx),
40
- VMSTATE_BOOL(tse, struct igb_tx),
41
- VMSTATE_BOOL(ixsm, struct igb_tx),
42
- VMSTATE_BOOL(txsm, struct igb_tx),
43
+ VMSTATE_UINT32(vlan_macip_lens, struct e1000_adv_tx_context_desc),
44
+ VMSTATE_UINT32(seqnum_seed, struct e1000_adv_tx_context_desc),
45
+ VMSTATE_UINT32(type_tucmd_mlhl, struct e1000_adv_tx_context_desc),
46
+ VMSTATE_UINT32(mss_l4len_idx, struct e1000_adv_tx_context_desc),
47
+ VMSTATE_END_OF_LIST()
48
+ }
49
+};
50
+
51
+static const VMStateDescription igb_vmstate_tx = {
52
+ .name = "igb-tx",
53
+ .version_id = 2,
54
+ .minimum_version_id = 2,
55
+ .fields = (VMStateField[]) {
56
+ VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0, igb_vmstate_tx_ctx,
57
+ struct e1000_adv_tx_context_desc),
58
+ VMSTATE_UINT32(first_cmd_type_len, struct igb_tx),
59
+ VMSTATE_UINT32(first_olinfo_status, struct igb_tx),
60
VMSTATE_BOOL(first, struct igb_tx),
61
VMSTATE_BOOL(skip_cp, struct igb_tx),
62
VMSTATE_END_OF_LIST()
63
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
64
index XXXXXXX..XXXXXXX 100644
65
--- a/hw/net/igb_core.c
66
+++ b/hw/net/igb_core.c
67
@@ -XXX,XX +XXX,XX @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx,
68
static bool
69
igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
70
{
71
- if (tx->tse) {
72
- if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) {
73
+ if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
74
+ uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
75
+ uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16;
76
+ if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) {
77
return false;
78
}
79
80
@@ -XXX,XX +XXX,XX @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
81
return true;
82
}
83
84
- if (tx->txsm) {
85
+ if (tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) {
86
if (!net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0)) {
87
return false;
88
}
89
}
90
91
- if (tx->ixsm) {
92
+ if (tx->first_olinfo_status & E1000_ADVTXD_POTS_IXSM) {
93
net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt);
94
}
95
96
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
97
{
98
struct e1000_adv_tx_context_desc *tx_ctx_desc;
99
uint32_t cmd_type_len;
100
- uint32_t olinfo_status;
101
+ uint32_t idx;
102
uint64_t buffer_addr;
103
uint16_t length;
104
105
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
106
E1000_ADVTXD_DTYP_DATA) {
107
/* advanced transmit data descriptor */
108
if (tx->first) {
109
- olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
110
-
111
- tx->tse = !!(cmd_type_len & E1000_ADVTXD_DCMD_TSE);
112
- tx->ixsm = !!(olinfo_status & E1000_ADVTXD_POTS_IXSM);
113
- tx->txsm = !!(olinfo_status & E1000_ADVTXD_POTS_TXSM);
114
-
115
+ tx->first_cmd_type_len = cmd_type_len;
116
+ tx->first_olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
117
tx->first = false;
118
}
119
} else if ((cmd_type_len & E1000_ADVTXD_DTYP_CTXT) ==
120
E1000_ADVTXD_DTYP_CTXT) {
121
/* advanced transmit context descriptor */
122
tx_ctx_desc = (struct e1000_adv_tx_context_desc *)tx_desc;
123
- tx->vlan = le32_to_cpu(tx_ctx_desc->vlan_macip_lens) >> 16;
124
- tx->mss = le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 16;
125
+ idx = (le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 4) & 1;
126
+ tx->ctx[idx].vlan_macip_lens = le32_to_cpu(tx_ctx_desc->vlan_macip_lens);
127
+ tx->ctx[idx].seqnum_seed = le32_to_cpu(tx_ctx_desc->seqnum_seed);
128
+ tx->ctx[idx].type_tucmd_mlhl = le32_to_cpu(tx_ctx_desc->type_tucmd_mlhl);
129
+ tx->ctx[idx].mss_l4len_idx = le32_to_cpu(tx_ctx_desc->mss_l4len_idx);
130
return;
131
} else {
132
/* unknown descriptor type */
133
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
134
if (cmd_type_len & E1000_TXD_CMD_EOP) {
135
if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
136
if (cmd_type_len & E1000_TXD_CMD_VLE) {
137
- net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan,
138
- core->mac[VET] & 0xffff);
139
+ idx = (tx->first_olinfo_status >> 4) & 1;
140
+ uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
141
+ uint16_t vet = core->mac[VET] & 0xffff;
142
+ net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
143
}
144
if (igb_tx_pkt_send(core, tx, queue_index)) {
145
igb_on_tx_done_update_stats(core, tx->tx_pkt);
146
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
147
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
148
tx = &core->tx[i];
149
net_tx_pkt_reset(tx->tx_pkt);
150
- tx->vlan = 0;
151
- tx->mss = 0;
152
- tx->tse = false;
153
- tx->ixsm = false;
154
- tx->txsm = false;
155
+ memset(tx->ctx, 0, sizeof(tx->ctx));
156
tx->first = true;
157
tx->skip_cp = false;
158
}
159
diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h
160
index XXXXXXX..XXXXXXX 100644
161
--- a/hw/net/igb_core.h
162
+++ b/hw/net/igb_core.h
163
@@ -XXX,XX +XXX,XX @@ struct IGBCore {
164
QEMUTimer *autoneg_timer;
165
166
struct igb_tx {
167
- uint16_t vlan; /* VLAN Tag */
168
- uint16_t mss; /* Maximum Segment Size */
169
- bool tse; /* TCP/UDP Segmentation Enable */
170
- bool ixsm; /* Insert IP Checksum */
171
- bool txsm; /* Insert TCP/UDP Checksum */
172
+ struct e1000_adv_tx_context_desc ctx[2];
173
+ uint32_t first_cmd_type_len;
174
+ uint32_t first_olinfo_status;
175
176
bool first;
177
bool skip_cp;
178
--
179
2.7.4
diff view generated by jsdifflib
1
From: Prasad J Pandit <pjp@fedoraproject.org>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
While receiving packets via e1000e_write_packet_to_guest() routine,
3
igb used to specify the PF as DMA requester when reading Tx packets.
4
'desc_offset' is advanced only when RX descriptor is processed. And
4
This made Tx requests from VFs to be performed on the address space of
5
RX descriptor is not processed if it has NULL buffer address.
5
the PF, defeating the purpose of SR-IOV. Add some logic to change the
6
This may lead to an infinite loop condition. Increament 'desc_offset'
6
requester depending on the queue, which can be assigned to a VF.
7
to process next descriptor in the ring to avoid infinite loop.
8
7
9
Reported-by: Cheol-woo Myung <330cjfdn@gmail.com>
8
Fixes: 3a977deebe ("Intrdocue igb device emulation")
10
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
9
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
11
---
13
hw/net/e1000e_core.c | 8 ++++----
12
hw/net/e1000e_core.c | 6 +++---
14
1 file changed, 4 insertions(+), 4 deletions(-)
13
hw/net/igb_core.c | 13 ++++++++-----
14
hw/net/net_tx_pkt.c | 3 ++-
15
hw/net/net_tx_pkt.h | 3 ++-
16
hw/net/vmxnet3.c | 4 ++--
17
5 files changed, 17 insertions(+), 12 deletions(-)
15
18
16
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
19
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
17
index XXXXXXX..XXXXXXX 100644
20
index XXXXXXX..XXXXXXX 100644
18
--- a/hw/net/e1000e_core.c
21
--- a/hw/net/e1000e_core.c
19
+++ b/hw/net/e1000e_core.c
22
+++ b/hw/net/e1000e_core.c
20
@@ -XXX,XX +XXX,XX @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
23
@@ -XXX,XX +XXX,XX @@ e1000e_process_tx_desc(E1000ECore *core,
21
(const char *) &fcs_pad, e1000x_fcs_len(core->mac));
22
}
23
}
24
- desc_offset += desc_size;
25
- if (desc_offset >= total_size) {
26
- is_last = true;
27
- }
28
} else { /* as per intel docs; skip descriptors with null buf addr */
29
trace_e1000e_rx_null_descriptor();
30
}
24
}
31
+ desc_offset += desc_size;
25
32
+ if (desc_offset >= total_size) {
26
tx->skip_cp = false;
33
+ is_last = true;
27
- net_tx_pkt_reset(tx->tx_pkt);
34
+ }
28
+ net_tx_pkt_reset(tx->tx_pkt, core->owner);
35
29
36
e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL,
30
tx->sum_needed = 0;
37
rss_info, do_ps ? ps_hdr_len : 0, &bastate.written);
31
tx->cptse = 0;
32
@@ -XXX,XX +XXX,XX @@ e1000e_core_pci_uninit(E1000ECore *core)
33
qemu_del_vm_change_state_handler(core->vmstate);
34
35
for (i = 0; i < E1000E_NUM_QUEUES; i++) {
36
- net_tx_pkt_reset(core->tx[i].tx_pkt);
37
+ net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
38
net_tx_pkt_uninit(core->tx[i].tx_pkt);
39
}
40
41
@@ -XXX,XX +XXX,XX @@ static void e1000e_reset(E1000ECore *core, bool sw)
42
e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
43
44
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
45
- net_tx_pkt_reset(core->tx[i].tx_pkt);
46
+ net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
47
memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
48
core->tx[i].skip_cp = false;
49
}
50
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
51
index XXXXXXX..XXXXXXX 100644
52
--- a/hw/net/igb_core.c
53
+++ b/hw/net/igb_core.c
54
@@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
55
56
static void
57
igb_process_tx_desc(IGBCore *core,
58
+ PCIDevice *dev,
59
struct igb_tx *tx,
60
union e1000_adv_tx_desc *tx_desc,
61
int queue_index)
62
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
63
64
tx->first = true;
65
tx->skip_cp = false;
66
- net_tx_pkt_reset(tx->tx_pkt);
67
+ net_tx_pkt_reset(tx->tx_pkt, dev);
68
}
69
}
70
71
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
72
d = core->owner;
73
}
74
75
+ net_tx_pkt_reset(txr->tx->tx_pkt, d);
76
+
77
while (!igb_ring_empty(core, txi)) {
78
base = igb_ring_head_descr(core, txi);
79
80
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
81
trace_e1000e_tx_descr((void *)(intptr_t)desc.read.buffer_addr,
82
desc.read.cmd_type_len, desc.wb.status);
83
84
- igb_process_tx_desc(core, txr->tx, &desc, txi->idx);
85
+ igb_process_tx_desc(core, d, txr->tx, &desc, txi->idx);
86
igb_ring_advance(core, txi, 1);
87
eic |= igb_txdesc_writeback(core, base, &desc, txi);
88
}
89
@@ -XXX,XX +XXX,XX @@ igb_core_pci_realize(IGBCore *core,
90
core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core);
91
92
for (i = 0; i < IGB_NUM_QUEUES; i++) {
93
- net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS);
94
+ net_tx_pkt_init(&core->tx[i].tx_pkt, NULL, E1000E_MAX_TX_FRAGS);
95
}
96
97
net_rx_pkt_init(&core->rx_pkt);
98
@@ -XXX,XX +XXX,XX @@ igb_core_pci_uninit(IGBCore *core)
99
qemu_del_vm_change_state_handler(core->vmstate);
100
101
for (i = 0; i < IGB_NUM_QUEUES; i++) {
102
- net_tx_pkt_reset(core->tx[i].tx_pkt);
103
+ net_tx_pkt_reset(core->tx[i].tx_pkt, NULL);
104
net_tx_pkt_uninit(core->tx[i].tx_pkt);
105
}
106
107
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
108
109
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
110
tx = &core->tx[i];
111
- net_tx_pkt_reset(tx->tx_pkt);
112
+ net_tx_pkt_reset(tx->tx_pkt, NULL);
113
memset(tx->ctx, 0, sizeof(tx->ctx));
114
tx->first = true;
115
tx->skip_cp = false;
116
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/hw/net/net_tx_pkt.c
119
+++ b/hw/net/net_tx_pkt.c
120
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt)
121
#endif
122
}
123
124
-void net_tx_pkt_reset(struct NetTxPkt *pkt)
125
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
126
{
127
int i;
128
129
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt)
130
pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0);
131
}
132
}
133
+ pkt->pci_dev = pci_dev;
134
pkt->raw_frags = 0;
135
136
pkt->hdr_len = 0;
137
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
138
index XXXXXXX..XXXXXXX 100644
139
--- a/hw/net/net_tx_pkt.h
140
+++ b/hw/net/net_tx_pkt.h
141
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt);
142
* reset tx packet private context (needed to be called between packets)
143
*
144
* @pkt: packet
145
+ * @dev: PCI device processing the next packet
146
*
147
*/
148
-void net_tx_pkt_reset(struct NetTxPkt *pkt);
149
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev);
150
151
/**
152
* Send packet to qemu. handles sw offloads if vhdr is not supported.
153
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
154
index XXXXXXX..XXXXXXX 100644
155
--- a/hw/net/vmxnet3.c
156
+++ b/hw/net/vmxnet3.c
157
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
158
vmxnet3_complete_packet(s, qidx, txd_idx);
159
s->tx_sop = true;
160
s->skip_current_tx_pkt = false;
161
- net_tx_pkt_reset(s->tx_pkt);
162
+ net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
163
}
164
}
165
}
166
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_deactivate_device(VMXNET3State *s)
167
{
168
if (s->device_active) {
169
VMW_CBPRN("Deactivating vmxnet3...");
170
- net_tx_pkt_reset(s->tx_pkt);
171
+ net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
172
net_tx_pkt_uninit(s->tx_pkt);
173
net_rx_pkt_uninit(s->rx_pkt);
174
s->device_active = false;
38
--
175
--
39
2.7.4
176
2.7.4
40
41
diff view generated by jsdifflib
1
From: Yuri Benditovich <yuri.benditovich@daynix.com>
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
2
2
3
https://bugzilla.redhat.com/show_bug.cgi?id=1829272
3
No segmentation should be performed if gso type is
4
When deleting queue pair, purge pending RX packets if any.
4
VIRTIO_NET_HDR_GSO_NONE even if ECN bit is set.
5
Example of problematic flow:
6
1. Bring up q35 VM with tap (vhost off) and virtio-net or e1000e
7
2. Run ping flood to the VM NIC ( 1 ms interval)
8
3. Hot unplug the NIC device (device_del)
9
During unplug process one or more packets come, the NIC
10
can't receive, tap disables read_poll
11
4. Hot plug the device (device_add) with the same netdev
12
The tap stays with read_poll disabled and does not receive
13
any packets anymore (tap_send never triggered)
14
5
15
Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
6
Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices")
7
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1544
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
10
---
18
net/net.c | 12 ++++++++----
11
hw/net/net_tx_pkt.c | 6 ++++--
19
1 file changed, 8 insertions(+), 4 deletions(-)
12
1 file changed, 4 insertions(+), 2 deletions(-)
20
13
21
diff --git a/net/net.c b/net/net.c
14
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
22
index XXXXXXX..XXXXXXX 100644
15
index XXXXXXX..XXXXXXX 100644
23
--- a/net/net.c
16
--- a/hw/net/net_tx_pkt.c
24
+++ b/net/net.c
17
+++ b/hw/net/net_tx_pkt.c
25
@@ -XXX,XX +XXX,XX @@ void qemu_del_nic(NICState *nic)
18
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
26
19
{
27
qemu_macaddr_set_free(&nic->conf->macaddr);
20
assert(pkt);
28
21
29
- /* If this is a peer NIC and peer has already been deleted, free it now. */
22
+ uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
30
- if (nic->peer_deleted) {
23
+
31
- for (i = 0; i < queues; i++) {
24
/*
32
- qemu_free_net_client(qemu_get_subqueue(nic, i)->peer);
25
* Since underlying infrastructure does not support IP datagrams longer
33
+ for (i = 0; i < queues; i++) {
26
* than 64K we should drop such packets and don't even try to send
34
+ NetClientState *nc = qemu_get_subqueue(nic, i);
27
*/
35
+ /* If this is a peer NIC and peer has already been deleted, free it now. */
28
- if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) {
36
+ if (nic->peer_deleted) {
29
+ if (VIRTIO_NET_HDR_GSO_NONE != gso_type) {
37
+ qemu_free_net_client(nc->peer);
30
if (pkt->payload_len >
38
+ } else if (nc->peer) {
31
ETH_MAX_IP_DGRAM_LEN -
39
+ /* if there are RX packets pending, complete them */
32
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) {
40
+ qemu_purge_queued_packets(nc->peer);
33
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
41
}
34
}
42
}
35
}
43
36
37
- if (offload || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
38
+ if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) {
39
if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
40
net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
41
pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
44
--
42
--
45
2.7.4
43
2.7.4
46
47
diff view generated by jsdifflib
New patch
1
From: Akihiko Odaki <akihiko.odaki@daynix.com>
1
2
3
Align the l3_hdr member of NetTxPkt by defining it as a union of
4
ip_header, ip6_header, and an array of octets.
5
6
Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices")
7
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1544
8
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
11
hw/net/net_tx_pkt.c | 21 +++++++++++----------
12
1 file changed, 11 insertions(+), 10 deletions(-)
13
14
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/net_tx_pkt.c
17
+++ b/hw/net/net_tx_pkt.c
18
@@ -XXX,XX +XXX,XX @@ struct NetTxPkt {
19
struct iovec *vec;
20
21
uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN];
22
- uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN];
23
+ union {
24
+ struct ip_header ip;
25
+ struct ip6_header ip6;
26
+ uint8_t octets[ETH_MAX_IP_DGRAM_LEN];
27
+ } l3_hdr;
28
29
uint32_t payload_len;
30
31
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt)
32
{
33
uint16_t csum;
34
assert(pkt);
35
- struct ip_header *ip_hdr;
36
- ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
37
38
- ip_hdr->ip_len = cpu_to_be16(pkt->payload_len +
39
+ pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len +
40
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
41
42
- ip_hdr->ip_sum = 0;
43
- csum = net_raw_checksum((uint8_t *)ip_hdr,
44
+ pkt->l3_hdr.ip.ip_sum = 0;
45
+ csum = net_raw_checksum(pkt->l3_hdr.octets,
46
pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
47
- ip_hdr->ip_sum = cpu_to_be16(csum);
48
+ pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum);
49
}
50
51
void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
52
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt)
53
{
54
struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
55
if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) {
56
- struct ip6_header *ip6 = (struct ip6_header *) pkt->l3_hdr;
57
/*
58
* TODO: if qemu would support >64K packets - add jumbo option check
59
* something like that:
60
* 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {'
61
*/
62
- if (ip6->ip6_plen == 0) {
63
+ if (pkt->l3_hdr.ip6.ip6_plen == 0) {
64
if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) {
65
- ip6->ip6_plen = htons(pkt->payload_len);
66
+ pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len);
67
}
68
/*
69
* TODO: if qemu would support >64K packets
70
--
71
2.7.4
diff view generated by jsdifflib
New patch
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
2
3
I would like to review and be informed on changes to igb device
4
5
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
MAINTAINERS | 1 +
9
1 file changed, 1 insertion(+)
10
11
diff --git a/MAINTAINERS b/MAINTAINERS
12
index XXXXXXX..XXXXXXX 100644
13
--- a/MAINTAINERS
14
+++ b/MAINTAINERS
15
@@ -XXX,XX +XXX,XX @@ F: tests/qtest/libqos/e1000e.*
16
17
igb
18
M: Akihiko Odaki <akihiko.odaki@daynix.com>
19
+R: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
20
S: Maintained
21
F: docs/system/devices/igb.rst
22
F: hw/net/igb*
23
--
24
2.7.4
diff view generated by jsdifflib
New patch
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
2
3
Use PFRSTD to reset RSTI bit for VFs, and raise VFLRE interrupt when VF
4
is reset.
5
6
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
hw/net/igb_core.c | 38 ++++++++++++++++++++++++++------------
10
hw/net/igb_regs.h | 3 +++
11
hw/net/trace-events | 2 ++
12
3 files changed, 31 insertions(+), 12 deletions(-)
13
14
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
15
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/igb_core.c
17
+++ b/hw/net/igb_core.c
18
@@ -XXX,XX +XXX,XX @@ static void igb_set_eims(IGBCore *core, int index, uint32_t val)
19
igb_update_interrupt_state(core);
20
}
21
22
-static void igb_vf_reset(IGBCore *core, uint16_t vfn)
23
-{
24
- /* TODO: Reset of the queue enable and the interrupt registers of the VF. */
25
-
26
- core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
27
- core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD;
28
-}
29
-
30
static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn)
31
{
32
uint32_t ent = core->mac[VTIVAR_MISC + vfn];
33
@@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val)
34
}
35
}
36
37
+static void igb_vf_reset(IGBCore *core, uint16_t vfn)
38
+{
39
+ /* disable Rx and Tx for the VF*/
40
+ core->mac[VFTE] &= ~BIT(vfn);
41
+ core->mac[VFRE] &= ~BIT(vfn);
42
+ /* indicate VF reset to PF */
43
+ core->mac[VFLRE] |= BIT(vfn);
44
+ /* VFLRE and mailbox use the same interrupt cause */
45
+ mailbox_interrupt_to_pf(core);
46
+}
47
+
48
static void igb_w1c(IGBCore *core, int index, uint32_t val)
49
{
50
core->mac[index] &= ~val;
51
@@ -XXX,XX +XXX,XX @@ igb_set_status(IGBCore *core, int index, uint32_t val)
52
static void
53
igb_set_ctrlext(IGBCore *core, int index, uint32_t val)
54
{
55
- trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
56
- !!(val & E1000_CTRL_EXT_SPD_BYPS));
57
-
58
- /* TODO: PFRSTD */
59
+ trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
60
+ !!(val & E1000_CTRL_EXT_SPD_BYPS),
61
+ !!(val & E1000_CTRL_EXT_PFRSTD));
62
63
/* Zero self-clearing bits */
64
val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST);
65
core->mac[CTRL_EXT] = val;
66
+
67
+ if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) {
68
+ for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
69
+ core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
70
+ core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD;
71
+ }
72
+ }
73
}
74
75
static void
76
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
77
78
e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
79
80
+ for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
81
+ /* Set RSTI, so VF can identify a PF reset is in progress */
82
+ core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTI;
83
+ }
84
+
85
for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
86
tx = &core->tx[i];
87
net_tx_pkt_reset(tx->tx_pkt, NULL);
88
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
89
index XXXXXXX..XXXXXXX 100644
90
--- a/hw/net/igb_regs.h
91
+++ b/hw/net/igb_regs.h
92
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
93
94
/* from igb/e1000_defines.h */
95
96
+/* Physical Func Reset Done Indication */
97
+#define E1000_CTRL_EXT_PFRSTD 0x00004000
98
+
99
#define E1000_IVAR_VALID 0x80
100
#define E1000_GPIE_NSICR 0x00000001
101
#define E1000_GPIE_MSIX_MODE 0x00000010
102
diff --git a/hw/net/trace-events b/hw/net/trace-events
103
index XXXXXXX..XXXXXXX 100644
104
--- a/hw/net/trace-events
105
+++ b/hw/net/trace-events
106
@@ -XXX,XX +XXX,XX @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: PHY[%u] UNHANDLED"
107
igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x"
108
igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
109
110
+igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"
111
+
112
igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
113
igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
114
115
--
116
2.7.4
diff view generated by jsdifflib
New patch
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
2
3
IGB uses RXDW ICR bit to indicate that rx descriptor has been written
4
back. This is the same as RXT0 bit in older HW.
5
6
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
9
hw/net/e1000x_regs.h | 4 ++++
10
hw/net/igb_core.c | 2 +-
11
2 files changed, 5 insertions(+), 1 deletion(-)
12
13
diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
14
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/net/e1000x_regs.h
16
+++ b/hw/net/e1000x_regs.h
17
@@ -XXX,XX +XXX,XX @@
18
#define E1000_ICR_RXDMT0 0x00000010 /* rx desc min. threshold (0) */
19
#define E1000_ICR_RXO 0x00000040 /* rx overrun */
20
#define E1000_ICR_RXT0 0x00000080 /* rx timer intr (ring 0) */
21
+#define E1000_ICR_RXDW 0x00000080 /* rx desc written back */
22
#define E1000_ICR_MDAC 0x00000200 /* MDIO access complete */
23
#define E1000_ICR_RXCFG 0x00000400 /* RX /c/ ordered set */
24
#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */
25
@@ -XXX,XX +XXX,XX @@
26
#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
27
#define E1000_ICS_RXO E1000_ICR_RXO /* rx overrun */
28
#define E1000_ICS_RXT0 E1000_ICR_RXT0 /* rx timer intr */
29
+#define E1000_ICS_RXDW E1000_ICR_RXDW /* rx desc written back */
30
#define E1000_ICS_MDAC E1000_ICR_MDAC /* MDIO access complete */
31
#define E1000_ICS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
32
#define E1000_ICS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
33
@@ -XXX,XX +XXX,XX @@
34
#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
35
#define E1000_IMS_RXO E1000_ICR_RXO /* rx overrun */
36
#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* rx timer intr */
37
+#define E1000_IMS_RXDW E1000_ICR_RXDW /* rx desc written back */
38
#define E1000_IMS_MDAC E1000_ICR_MDAC /* MDIO access complete */
39
#define E1000_IMS_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
40
#define E1000_IMS_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
41
@@ -XXX,XX +XXX,XX @@
42
#define E1000_IMC_RXDMT0 E1000_ICR_RXDMT0 /* rx desc min. threshold */
43
#define E1000_IMC_RXO E1000_ICR_RXO /* rx overrun */
44
#define E1000_IMC_RXT0 E1000_ICR_RXT0 /* rx timer intr */
45
+#define E1000_IMC_RXDW E1000_ICR_RXDW /* rx desc written back */
46
#define E1000_IMC_MDAC E1000_ICR_MDAC /* MDIO access complete */
47
#define E1000_IMC_RXCFG E1000_ICR_RXCFG /* RX /c/ ordered set */
48
#define E1000_IMC_GPI_EN0 E1000_ICR_GPI_EN0 /* GP Int 0 */
49
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
50
index XXXXXXX..XXXXXXX 100644
51
--- a/hw/net/igb_core.c
52
+++ b/hw/net/igb_core.c
53
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
54
continue;
55
}
56
57
- n |= E1000_ICR_RXT0;
58
+ n |= E1000_ICR_RXDW;
59
60
igb_rx_fix_l4_csum(core, core->rx_pkt);
61
igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info);
62
--
63
2.7.4
diff view generated by jsdifflib
1
From: Keqian Zhu <zhukeqian1@huawei.com>
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
2
2
3
Fixes: 63c4db4c2e6d (net: relocate paths to helpers and scripts)
3
Also introduce:
4
Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
4
- Checks for RXDCTL/TXDCTL queue enable bits
5
- IGB_NUM_VM_POOLS enum (Sec 1.5: Table 1-7)
6
7
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
5
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
6
---
9
---
7
net/tap.c | 3 ++-
10
hw/net/igb_core.c | 38 +++++++++++++++++++++++++++++++-------
8
1 file changed, 2 insertions(+), 1 deletion(-)
11
hw/net/igb_core.h | 1 +
12
hw/net/igb_regs.h | 3 +++
13
3 files changed, 35 insertions(+), 7 deletions(-)
9
14
10
diff --git a/net/tap.c b/net/tap.c
15
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
11
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
12
--- a/net/tap.c
17
--- a/hw/net/igb_core.c
13
+++ b/net/tap.c
18
+++ b/hw/net/igb_core.c
14
@@ -XXX,XX +XXX,XX @@ free_fail:
19
@@ -XXX,XX +XXX,XX @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
15
script = default_script = get_relocated_path(DEFAULT_NETWORK_SCRIPT);
20
return igb_tx_wb_eic(core, txi->idx);
21
}
22
23
+static inline bool
24
+igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
25
+{
26
+ bool vmdq = core->mac[MRQC] & 1;
27
+ uint16_t qn = txi->idx;
28
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
29
+
30
+ return (core->mac[TCTL] & E1000_TCTL_EN) &&
31
+ (!vmdq || core->mac[VFTE] & BIT(pool)) &&
32
+ (core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE);
33
+}
34
+
35
static void
36
igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
37
{
38
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
39
const E1000E_RingInfo *txi = txr->i;
40
uint32_t eic = 0;
41
42
- /* TODO: check if the queue itself is enabled too. */
43
- if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
44
+ if (!igb_tx_enabled(core, txi)) {
45
trace_e1000e_tx_disabled();
46
return;
47
}
48
@@ -XXX,XX +XXX,XX @@ igb_can_receive(IGBCore *core)
49
50
for (i = 0; i < IGB_NUM_QUEUES; i++) {
51
E1000E_RxRing rxr;
52
+ if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
53
+ continue;
54
+ }
55
56
igb_rx_ring_init(core, &rxr, i);
57
if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) {
58
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
59
60
if (core->mac[MRQC] & 1) {
61
if (is_broadcast_ether_addr(ehdr->h_dest)) {
62
- for (i = 0; i < 8; i++) {
63
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
64
if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) {
65
queues |= BIT(i);
66
}
67
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
68
f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
69
f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff;
70
if (macp[f >> 5] & (1 << (f & 0x1f))) {
71
- for (i = 0; i < 8; i++) {
72
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
73
if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) {
74
queues |= BIT(i);
75
}
76
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
77
}
78
}
79
} else {
80
- for (i = 0; i < 8; i++) {
81
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
82
if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) {
83
mask |= BIT(i);
84
}
85
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
86
queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT);
16
}
87
}
17
if (!downscript) {
88
18
- downscript = default_downscript = get_relocated_path(DEFAULT_NETWORK_SCRIPT);
89
+ queues &= core->mac[VFRE];
19
+ downscript = default_downscript =
90
igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info);
20
+ get_relocated_path(DEFAULT_NETWORK_DOWN_SCRIPT);
91
if (rss_info->queue & 1) {
92
queues <<= 8;
93
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
94
e1000x_fcs_len(core->mac);
95
96
for (i = 0; i < IGB_NUM_QUEUES; i++) {
97
- if (!(queues & BIT(i))) {
98
+ if (!(queues & BIT(i)) ||
99
+ !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
100
continue;
21
}
101
}
22
102
23
if (tap->has_ifname) {
103
@@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val)
104
105
static void igb_vf_reset(IGBCore *core, uint16_t vfn)
106
{
107
+ uint16_t qn0 = vfn;
108
+ uint16_t qn1 = vfn + IGB_NUM_VM_POOLS;
109
+
110
/* disable Rx and Tx for the VF*/
111
- core->mac[VFTE] &= ~BIT(vfn);
112
+ core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
113
+ core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
114
+ core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
115
+ core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
116
core->mac[VFRE] &= ~BIT(vfn);
117
+ core->mac[VFTE] &= ~BIT(vfn);
118
/* indicate VF reset to PF */
119
core->mac[VFLRE] |= BIT(vfn);
120
/* VFLRE and mailbox use the same interrupt cause */
121
@@ -XXX,XX +XXX,XX @@ igb_phy_reg_init[] = {
122
static const uint32_t igb_mac_reg_init[] = {
123
[LEDCTL] = 2 | (3 << 8) | BIT(15) | (6 << 16) | (7 << 24),
124
[EEMNGCTL] = BIT(31),
125
+ [TXDCTL0] = E1000_TXDCTL_QUEUE_ENABLE,
126
[RXDCTL0] = E1000_RXDCTL_QUEUE_ENABLE | (1 << 16),
127
[RXDCTL1] = 1 << 16,
128
[RXDCTL2] = 1 << 16,
129
diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h
130
index XXXXXXX..XXXXXXX 100644
131
--- a/hw/net/igb_core.h
132
+++ b/hw/net/igb_core.h
133
@@ -XXX,XX +XXX,XX @@
134
#define IGB_MSIX_VEC_NUM (10)
135
#define IGBVF_MSIX_VEC_NUM (3)
136
#define IGB_NUM_QUEUES (16)
137
+#define IGB_NUM_VM_POOLS (8)
138
139
typedef struct IGBCore IGBCore;
140
141
diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
142
index XXXXXXX..XXXXXXX 100644
143
--- a/hw/net/igb_regs.h
144
+++ b/hw/net/igb_regs.h
145
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
146
#define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000
147
#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000
148
149
+/* Additional Transmit Descriptor Control definitions */
150
+#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Tx Queue */
151
+
152
/* Additional Receive Descriptor Control definitions */
153
#define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Enable specific Rx Queue */
154
24
--
155
--
25
2.7.4
156
2.7.4
26
27
diff view generated by jsdifflib
1
From: yuanjungong <ruc_gongyuanjun@163.com>
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
2
2
3
Close fd before returning.
3
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
4
5
Buglink: https://bugs.launchpad.net/qemu/+bug/1904486
6
7
Signed-off-by: yuanjungong <ruc_gongyuanjun@163.com>
8
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
4
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
5
---
11
net/tap.c | 2 ++
6
hw/net/igb_core.c | 41 ++++++++++++++++++++++++++++++++++++-----
12
1 file changed, 2 insertions(+)
7
1 file changed, 36 insertions(+), 5 deletions(-)
13
8
14
diff --git a/net/tap.c b/net/tap.c
9
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
15
index XXXXXXX..XXXXXXX 100644
10
index XXXXXXX..XXXXXXX 100644
16
--- a/net/tap.c
11
--- a/hw/net/igb_core.c
17
+++ b/net/tap.c
12
+++ b/hw/net/igb_core.c
18
@@ -XXX,XX +XXX,XX @@ int net_init_tap(const Netdev *netdev, const char *name,
13
@@ -XXX,XX +XXX,XX @@ igb_rx_l4_cso_enabled(IGBCore *core)
19
if (ret < 0) {
14
return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
20
error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
15
}
21
name, fd);
16
22
+ close(fd);
17
+static bool
23
return -1;
18
+igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
19
+{
20
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
21
+ bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
22
+ int max_ethernet_lpe_size =
23
+ core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
24
+ int max_ethernet_vlan_size = 1522;
25
+
26
+ return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
27
+}
28
+
29
static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
30
- E1000E_RSSInfo *rss_info, bool *external_tx)
31
+ size_t size, E1000E_RSSInfo *rss_info,
32
+ bool *external_tx)
33
{
34
static const int ta_shift[] = { 4, 3, 2, 0 };
35
uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
36
uint16_t queues = 0;
37
+ uint16_t oversized = 0;
38
uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
39
bool accepted = false;
40
int i;
41
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
24
}
42
}
25
43
26
@@ -XXX,XX +XXX,XX @@ int net_init_tap(const Netdev *netdev, const char *name,
44
queues &= core->mac[VFRE];
27
vhostfdname, vnet_hdr, fd, &err);
45
- igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info);
28
if (err) {
46
- if (rss_info->queue & 1) {
29
error_propagate(errp, err);
47
- queues <<= 8;
30
+ close(fd);
48
+ if (queues) {
31
return -1;
49
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
50
+ if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) {
51
+ oversized |= BIT(i);
52
+ }
53
+ }
54
+ /* 8.19.37 increment ROC if packet is oversized for all queues */
55
+ if (oversized == queues) {
56
+ trace_e1000x_rx_oversized(size);
57
+ e1000x_inc_reg_if_not_full(core->mac, ROC);
58
+ }
59
+ queues &= ~oversized;
60
+ }
61
+
62
+ if (queues) {
63
+ igb_rss_parse_packet(core, core->rx_pkt,
64
+ external_tx != NULL, rss_info);
65
+ if (rss_info->queue & 1) {
66
+ queues <<= 8;
67
+ }
32
}
68
}
33
} else if (tap->has_fds) {
69
} else {
70
switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
71
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
72
e1000x_vlan_enabled(core->mac),
73
core->mac[VET] & 0xffff);
74
75
- queues = igb_receive_assign(core, ehdr, &rss_info, external_tx);
76
+ queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
77
if (!queues) {
78
trace_e1000e_rx_flt_dropped();
79
return orig_size;
34
--
80
--
35
2.7.4
81
2.7.4
36
37
diff view generated by jsdifflib
New patch
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
2
3
RSS for VFs is only enabled if VMOLR[n].RSSE is set.
4
5
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
6
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
---
8
hw/net/igb_core.c | 9 ++++++++-
9
1 file changed, 8 insertions(+), 1 deletion(-)
10
11
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
12
index XXXXXXX..XXXXXXX 100644
13
--- a/hw/net/igb_core.c
14
+++ b/hw/net/igb_core.c
15
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
16
if (queues) {
17
igb_rss_parse_packet(core, core->rx_pkt,
18
external_tx != NULL, rss_info);
19
+ /* Sec 8.26.1: PQn = VFn + VQn*8 */
20
if (rss_info->queue & 1) {
21
- queues <<= 8;
22
+ for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
23
+ if ((queues & BIT(i)) &&
24
+ (core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) {
25
+ queues |= BIT(i + IGB_NUM_VM_POOLS);
26
+ queues &= ~BIT(i);
27
+ }
28
+ }
29
}
30
}
31
} else {
32
--
33
2.7.4
diff view generated by jsdifflib
1
From: Paolo Bonzini <pbonzini@redhat.com>
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
2
2
3
"netdev_add help" is causing QEMU to exit because the code that
3
Please note that loopback counters for VM to VM traffic is not
4
invokes show_netdevs is shared between CLI and HMP processing.
4
implemented yet: VFGOTLBC, VFGPTLBC, VFGORLBC and VFGPRLBC.
5
Move the check to the callers so that exit(0) remains only
6
in the CLI flow.
7
5
8
"netdev_add help" is not fixed by this patch; that is left for
6
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
9
later work.
10
11
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
8
---
14
include/net/net.h | 1 +
9
hw/net/igb_core.c | 26 ++++++++++++++++++++++----
15
monitor/hmp-cmds.c | 6 +++++
10
1 file changed, 22 insertions(+), 4 deletions(-)
16
net/net.c | 68 +++++++++++++++++++++++++++---------------------------
17
3 files changed, 41 insertions(+), 34 deletions(-)
18
11
19
diff --git a/include/net/net.h b/include/net/net.h
12
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
20
index XXXXXXX..XXXXXXX 100644
13
index XXXXXXX..XXXXXXX 100644
21
--- a/include/net/net.h
14
--- a/hw/net/igb_core.c
22
+++ b/include/net/net.h
15
+++ b/hw/net/igb_core.c
23
@@ -XXX,XX +XXX,XX @@ extern const char *host_net_devices[];
16
@@ -XXX,XX +XXX,XX @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int queue_index)
24
17
}
25
/* from net.c */
18
26
int net_client_parse(QemuOptsList *opts_list, const char *str);
19
static void
27
+void show_netdevs(void);
20
-igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
28
int net_init_clients(Error **errp);
21
+igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
29
void net_check_clients(void);
30
void net_cleanup(void);
31
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
32
index XXXXXXX..XXXXXXX 100644
33
--- a/monitor/hmp-cmds.c
34
+++ b/monitor/hmp-cmds.c
35
@@ -XXX,XX +XXX,XX @@
36
#include "qemu/option.h"
37
#include "qemu/timer.h"
38
#include "qemu/sockets.h"
39
+#include "qemu/help_option.h"
40
#include "monitor/monitor-internal.h"
41
#include "qapi/error.h"
42
#include "qapi/clone-visitor.h"
43
@@ -XXX,XX +XXX,XX @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict)
44
{
22
{
45
Error *err = NULL;
23
static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
46
QemuOpts *opts;
24
PTC1023, PTC1522 };
47
+ const char *type = qdict_get_try_str(qdict, "type");
25
@@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
48
26
core->mac[GPTC] = core->mac[TPT];
49
+ if (type && is_help_option(type)) {
27
core->mac[GOTCL] = core->mac[TOTL];
50
+ show_netdevs();
28
core->mac[GOTCH] = core->mac[TOTH];
51
+ return;
29
+
30
+ if (core->mac[MRQC] & 1) {
31
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
32
+
33
+ core->mac[PVFGOTC0 + (pool * 64)] += tot_len;
34
+ core->mac[PVFGPTC0 + (pool * 64)]++;
52
+ }
35
+ }
53
opts = qemu_opts_from_qdict(qemu_find_opts("netdev"), qdict, &err);
54
if (err) {
55
goto out;
56
diff --git a/net/net.c b/net/net.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/net/net.c
59
+++ b/net/net.c
60
@@ -XXX,XX +XXX,XX @@
61
#include "qemu/config-file.h"
62
#include "qemu/ctype.h"
63
#include "qemu/iov.h"
64
+#include "qemu/qemu-print.h"
65
#include "qemu/main-loop.h"
66
#include "qemu/option.h"
67
#include "qapi/error.h"
68
@@ -XXX,XX +XXX,XX @@ static int net_client_init1(const Netdev *netdev, bool is_netdev, Error **errp)
69
return 0;
70
}
36
}
71
37
72
-static void show_netdevs(void)
38
static void
73
+void show_netdevs(void)
39
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
40
net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
41
}
42
if (igb_tx_pkt_send(core, tx, queue_index)) {
43
- igb_on_tx_done_update_stats(core, tx->tx_pkt);
44
+ igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
45
}
46
}
47
48
@@ -XXX,XX +XXX,XX @@ igb_write_to_rx_buffers(IGBCore *core,
49
}
50
51
static void
52
-igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
53
+igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
54
+ size_t data_size, size_t data_fcs_size)
74
{
55
{
75
int idx;
56
e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
76
const char *available_netdevs[] = {
57
77
@@ -XXX,XX +XXX,XX @@ static void show_netdevs(void)
58
@@ -XXX,XX +XXX,XX @@ igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
78
#endif
59
default:
79
};
60
break;
80
81
- printf("Available netdev backend types:\n");
82
+ qemu_printf("Available netdev backend types:\n");
83
for (idx = 0; idx < ARRAY_SIZE(available_netdevs); idx++) {
84
- puts(available_netdevs[idx]);
85
+ qemu_printf("%s\n", available_netdevs[idx]);
86
}
61
}
62
+
63
+ if (core->mac[MRQC] & 1) {
64
+ uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
65
+
66
+ core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
67
+ core->mac[PVFGPRC0 + (pool * 64)]++;
68
+ if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
69
+ core->mac[PVFMPRC0 + (pool * 64)]++;
70
+ }
71
+ }
87
}
72
}
88
73
89
@@ -XXX,XX +XXX,XX @@ static int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp)
74
static inline bool
90
int ret = -1;
75
@@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
91
Visitor *v = opts_visitor_new(opts);
76
92
77
} while (desc_offset < total_size);
93
- const char *type = qemu_opt_get(opts, "type");
78
94
-
79
- igb_update_rx_stats(core, size, total_size);
95
- if (is_netdev && type && is_help_option(type)) {
80
+ igb_update_rx_stats(core, rxi, size, total_size);
96
- show_netdevs();
97
- exit(0);
98
- } else {
99
- /* Parse convenience option format ip6-net=fec0::0[/64] */
100
- const char *ip6_net = qemu_opt_get(opts, "ipv6-net");
101
+ /* Parse convenience option format ip6-net=fec0::0[/64] */
102
+ const char *ip6_net = qemu_opt_get(opts, "ipv6-net");
103
104
- if (ip6_net) {
105
- char *prefix_addr;
106
- unsigned long prefix_len = 64; /* Default 64bit prefix length. */
107
+ if (ip6_net) {
108
+ char *prefix_addr;
109
+ unsigned long prefix_len = 64; /* Default 64bit prefix length. */
110
111
- substrings = g_strsplit(ip6_net, "/", 2);
112
- if (!substrings || !substrings[0]) {
113
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "ipv6-net",
114
- "a valid IPv6 prefix");
115
- goto out;
116
- }
117
+ substrings = g_strsplit(ip6_net, "/", 2);
118
+ if (!substrings || !substrings[0]) {
119
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "ipv6-net",
120
+ "a valid IPv6 prefix");
121
+ goto out;
122
+ }
123
124
- prefix_addr = substrings[0];
125
+ prefix_addr = substrings[0];
126
127
- /* Handle user-specified prefix length. */
128
- if (substrings[1] &&
129
- qemu_strtoul(substrings[1], NULL, 10, &prefix_len))
130
- {
131
- error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
132
- "ipv6-prefixlen", "a number");
133
- goto out;
134
- }
135
-
136
- qemu_opt_set(opts, "ipv6-prefix", prefix_addr, &error_abort);
137
- qemu_opt_set_number(opts, "ipv6-prefixlen", prefix_len,
138
- &error_abort);
139
- qemu_opt_unset(opts, "ipv6-net");
140
+ /* Handle user-specified prefix length. */
141
+ if (substrings[1] &&
142
+ qemu_strtoul(substrings[1], NULL, 10, &prefix_len))
143
+ {
144
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
145
+ "ipv6-prefixlen", "a number");
146
+ goto out;
147
}
148
+
149
+ qemu_opt_set(opts, "ipv6-prefix", prefix_addr, &error_abort);
150
+ qemu_opt_set_number(opts, "ipv6-prefixlen", prefix_len,
151
+ &error_abort);
152
+ qemu_opt_unset(opts, "ipv6-net");
153
}
154
155
/* Create an ID for -net if the user did not specify one */
156
@@ -XXX,XX +XXX,XX @@ static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
157
158
static int net_init_netdev(void *dummy, QemuOpts *opts, Error **errp)
159
{
160
+ const char *type = qemu_opt_get(opts, "type");
161
+
162
+ if (type && is_help_option(type)) {
163
+ show_netdevs();
164
+ exit(0);
165
+ }
166
return net_client_init(opts, true, errp);
167
}
81
}
168
82
83
static inline void
169
--
84
--
170
2.7.4
85
2.7.4
171
172
diff view generated by jsdifflib
New patch
1
From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
1
2
3
Add support for stripping/inserting VLAN for VFs.
4
5
Had to move CSUM calculation back into the for loop, since packet data
6
is pulled inside the loop based on strip VLAN decision for every VF.
7
8
net_rx_pkt_fix_l4_csum should be extended to accept a buffer instead for
9
igb. Work for a future patch.
10
11
Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
13
---
14
hw/net/igb_core.c | 62 +++++++++++++++++++++++++++++++++++++++++++------------
15
1 file changed, 49 insertions(+), 13 deletions(-)
16
17
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
18
index XXXXXXX..XXXXXXX 100644
19
--- a/hw/net/igb_core.c
20
+++ b/hw/net/igb_core.c
21
@@ -XXX,XX +XXX,XX @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx,
22
info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash);
23
}
24
25
+static void
26
+igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx,
27
+ uint16_t vlan, bool insert_vlan)
28
+{
29
+ if (core->mac[MRQC] & 1) {
30
+ uint16_t pool = qn % IGB_NUM_VM_POOLS;
31
+
32
+ if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) {
33
+ /* always insert default VLAN */
34
+ insert_vlan = true;
35
+ vlan = core->mac[VMVIR0 + pool] & 0xffff;
36
+ } else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) {
37
+ insert_vlan = false;
38
+ }
39
+ }
40
+
41
+ if (insert_vlan && e1000x_vlan_enabled(core->mac)) {
42
+ net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan,
43
+ core->mac[VET] & 0xffff);
44
+ }
45
+}
46
+
47
static bool
48
igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
49
{
50
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
51
52
if (cmd_type_len & E1000_TXD_CMD_EOP) {
53
if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
54
- if (cmd_type_len & E1000_TXD_CMD_VLE) {
55
- idx = (tx->first_olinfo_status >> 4) & 1;
56
- uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
57
- uint16_t vet = core->mac[VET] & 0xffff;
58
- net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
59
- }
60
+ idx = (tx->first_olinfo_status >> 4) & 1;
61
+ igb_tx_insert_vlan(core, queue_index, tx,
62
+ tx->ctx[idx].vlan_macip_lens >> 16,
63
+ !!(cmd_type_len & E1000_TXD_CMD_VLE));
64
+
65
if (igb_tx_pkt_send(core, tx, queue_index)) {
66
igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
67
}
68
@@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
69
igb_update_rx_stats(core, rxi, size, total_size);
70
}
71
72
+static bool
73
+igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
74
+{
75
+ if (core->mac[MRQC] & 1) {
76
+ uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
77
+ /* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */
78
+ return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ?
79
+ core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN :
80
+ core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN;
81
+ }
82
+
83
+ return e1000x_vlan_enabled(core->mac);
84
+}
85
+
86
static inline void
87
igb_rx_fix_l4_csum(IGBCore *core, struct NetRxPkt *pkt)
88
{
89
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
90
91
ehdr = PKT_GET_ETH_HDR(filter_buf);
92
net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
93
-
94
- net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
95
- e1000x_vlan_enabled(core->mac),
96
- core->mac[VET] & 0xffff);
97
+ net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
98
99
queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
100
if (!queues) {
101
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
102
return orig_size;
103
}
104
105
- total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
106
- e1000x_fcs_len(core->mac);
107
-
108
for (i = 0; i < IGB_NUM_QUEUES; i++) {
109
if (!(queues & BIT(i)) ||
110
!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
111
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
112
113
igb_rx_ring_init(core, &rxr, i);
114
115
+ net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
116
+ igb_rx_strip_vlan(core, rxr.i),
117
+ core->mac[VET] & 0xffff);
118
+
119
+ total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
120
+ e1000x_fcs_len(core->mac);
121
+
122
if (!igb_has_rxbufs(core, rxr.i, total_size)) {
123
n |= E1000_ICS_RXO;
124
trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
125
--
126
2.7.4
diff view generated by jsdifflib