1
The following changes since commit 2a95551e8b1456aa53ce54fac573df18809340a6:
1
The following changes since commit 352998df1c53b366413690d95b35f76d0721ebed:
2
2
3
Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20200330' into staging (2020-03-31 11:20:21 +0100)
3
Merge tag 'i2c-20220314' of https://github.com/philmd/qemu into staging (2022-03-14 14:39:33 +0000)
4
4
5
are available in the git repository at:
5
are available in the git repository at:
6
6
7
https://github.com/jasowang/qemu.git tags/net-pull-request
7
https://github.com/jasowang/qemu.git tags/net-pull-request
8
8
9
for you to fetch changes up to 1153cf9f5b67fad41ca6f8571e9a26e2c7c70759:
9
for you to fetch changes up to 12a195fa343aae2ead1301ce04727bd0ae25eb15:
10
10
11
qtest: add tulip test case (2020-03-31 21:14:35 +0800)
11
vdpa: Expose VHOST_F_LOG_ALL on SVQ (2022-03-15 13:57:44 +0800)
12
12
13
----------------------------------------------------------------
13
----------------------------------------------------------------
14
14
15
Changes from V1:
15
Changes since V2:
16
16
- fix 32bit build errros
17
- fix the compiling error
18
- include qtest for tulip OOB
19
17
20
----------------------------------------------------------------
18
----------------------------------------------------------------
21
Andrew Melnychenko (1):
19
Eugenio Pérez (14):
22
Fixed integer overflow in e1000e
20
vhost: Add VhostShadowVirtqueue
21
vhost: Add Shadow VirtQueue kick forwarding capabilities
22
vhost: Add Shadow VirtQueue call forwarding capabilities
23
vhost: Add vhost_svq_valid_features to shadow vq
24
virtio: Add vhost_svq_get_vring_addr
25
vdpa: adapt vhost_ops callbacks to svq
26
vhost: Shadow virtqueue buffers forwarding
27
util: Add iova_tree_alloc_map
28
util: add iova_tree_find_iova
29
vhost: Add VhostIOVATree
30
vdpa: Add custom IOTLB translations to SVQ
31
vdpa: Adapt vhost_vdpa_get_vring_base to SVQ
32
vdpa: Never set log_base addr if SVQ is enabled
33
vdpa: Expose VHOST_F_LOG_ALL on SVQ
23
34
24
Li Qiang (1):
35
Jason Wang (1):
25
qtest: add tulip test case
36
virtio-net: fix map leaking on error during receive
26
37
27
Peter Maydell (2):
38
hw/net/virtio-net.c | 1 +
28
hw/net/i82596.c: Avoid reading off end of buffer in i82596_receive()
39
hw/virtio/meson.build | 2 +-
29
hw/net/allwinner-sun8i-emac.c: Fix REG_ADDR_HIGH/LOW reads
40
hw/virtio/vhost-iova-tree.c | 110 +++++++
30
41
hw/virtio/vhost-iova-tree.h | 27 ++
31
Philippe Mathieu-Daudé (7):
42
hw/virtio/vhost-shadow-virtqueue.c | 636 +++++++++++++++++++++++++++++++++++++
32
hw/net/i82596: Correct command bitmask (CID 1419392)
43
hw/virtio/vhost-shadow-virtqueue.h | 87 +++++
33
hw/net/e1000e_core: Let e1000e_can_receive() return a boolean
44
hw/virtio/vhost-vdpa.c | 522 +++++++++++++++++++++++++++++-
34
hw/net/smc91c111: Let smc91c111_can_receive() return a boolean
45
include/hw/virtio/vhost-vdpa.h | 8 +
35
hw/net/rtl8139: Simplify if/else statement
46
include/qemu/iova-tree.h | 38 ++-
36
hw/net/rtl8139: Update coding style to make checkpatch.pl happy
47
util/iova-tree.c | 170 ++++++++++
37
hw/net: Make NetCanReceive() return a boolean
48
10 files changed, 1584 insertions(+), 17 deletions(-)
38
hw/net/can: Make CanBusClientInfo::can_receive() return a boolean
49
create mode 100644 hw/virtio/vhost-iova-tree.c
39
50
create mode 100644 hw/virtio/vhost-iova-tree.h
40
Prasad J Pandit (1):
51
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
41
net: tulip: check frame size and r/w data length
52
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
42
43
Zhang Chen (2):
44
net/colo-compare.c: Expose "compare_timeout" to users
45
net/colo-compare.c: Expose "expired_scan_cycle" to users
46
47
hw/net/allwinner-sun8i-emac.c | 14 +++----
48
hw/net/allwinner_emac.c | 2 +-
49
hw/net/cadence_gem.c | 8 ++--
50
hw/net/can/can_sja1000.c | 8 ++--
51
hw/net/can/can_sja1000.h | 2 +-
52
hw/net/dp8393x.c | 8 ++--
53
hw/net/e1000.c | 2 +-
54
hw/net/e1000e.c | 4 +-
55
hw/net/e1000e_core.c | 2 +-
56
hw/net/e1000e_core.h | 2 +-
57
hw/net/ftgmac100.c | 6 +--
58
hw/net/i82596.c | 66 ++++++++++++++++++++----------
59
hw/net/i82596.h | 2 +-
60
hw/net/imx_fec.c | 2 +-
61
hw/net/opencores_eth.c | 5 +--
62
hw/net/rtl8139.c | 22 +++++-----
63
hw/net/smc91c111.c | 10 ++---
64
hw/net/spapr_llan.c | 4 +-
65
hw/net/sungem.c | 6 +--
66
hw/net/sunhme.c | 4 +-
67
hw/net/tulip.c | 36 ++++++++++++----
68
hw/net/virtio-net.c | 10 ++---
69
hw/net/xilinx_ethlite.c | 2 +-
70
include/net/can_emu.h | 2 +-
71
include/net/net.h | 2 +-
72
net/can/can_socketcan.c | 4 +-
73
net/colo-compare.c | 95 ++++++++++++++++++++++++++++++++++++++++---
74
net/filter-buffer.c | 2 +-
75
net/hub.c | 6 +--
76
qemu-options.hx | 10 +++--
77
tests/qtest/Makefile.include | 1 +
78
tests/qtest/tulip-test.c | 91 +++++++++++++++++++++++++++++++++++++++++
79
32 files changed, 328 insertions(+), 112 deletions(-)
80
create mode 100644 tests/qtest/tulip-test.c
81
53
82
54
55
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
Commit bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
2
tries to fix the use after free of the sg by caching the virtqueue
3
elements in an array and unmap them at once after receiving the
4
packets, But it forgot to unmap the cached elements on error which
5
will lead to leaking of mapping and other unexpected results.
2
6
3
The NetCanReceive handler return whether the device can or
7
Fixing this by detaching the cached elements on error. This addresses
4
can not receive new packets. Make it obvious by returning
8
CVE-2022-26353.
5
a boolean type.
6
9
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
10
Reported-by: Victor Tom <vv474172261@gmail.com>
8
Acked-by: David Gibson <david@gibson.dropbear.id.au>
11
Cc: qemu-stable@nongnu.org
9
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
12
Fixes: CVE-2022-26353
10
Reviewed-by: Cédric Le Goater <clg@kaod.org>
13
Fixes: bedd7e93d0196 ("virtio-net: fix use after unmap/free for sg")
14
Reviewed-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
16
---
13
hw/net/allwinner_emac.c | 2 +-
17
hw/net/virtio-net.c | 1 +
14
hw/net/cadence_gem.c | 8 ++++----
18
1 file changed, 1 insertion(+)
15
hw/net/dp8393x.c | 8 +++-----
16
hw/net/e1000.c | 2 +-
17
hw/net/e1000e.c | 2 +-
18
hw/net/ftgmac100.c | 6 +++---
19
hw/net/i82596.c | 10 +++++-----
20
hw/net/i82596.h | 2 +-
21
hw/net/imx_fec.c | 2 +-
22
hw/net/opencores_eth.c | 5 ++---
23
hw/net/rtl8139.c | 8 ++++----
24
hw/net/smc91c111.c | 2 +-
25
hw/net/spapr_llan.c | 4 ++--
26
hw/net/sungem.c | 6 +++---
27
hw/net/sunhme.c | 4 ++--
28
hw/net/virtio-net.c | 10 +++++-----
29
hw/net/xilinx_ethlite.c | 2 +-
30
include/net/net.h | 2 +-
31
net/filter-buffer.c | 2 +-
32
net/hub.c | 6 +++---
33
20 files changed, 45 insertions(+), 48 deletions(-)
34
19
35
diff --git a/hw/net/allwinner_emac.c b/hw/net/allwinner_emac.c
36
index XXXXXXX..XXXXXXX 100644
37
--- a/hw/net/allwinner_emac.c
38
+++ b/hw/net/allwinner_emac.c
39
@@ -XXX,XX +XXX,XX @@ static uint32_t fifo8_pop_word(Fifo8 *fifo)
40
return ret;
41
}
42
43
-static int aw_emac_can_receive(NetClientState *nc)
44
+static bool aw_emac_can_receive(NetClientState *nc)
45
{
46
AwEmacState *s = qemu_get_nic_opaque(nc);
47
48
diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c
49
index XXXXXXX..XXXXXXX 100644
50
--- a/hw/net/cadence_gem.c
51
+++ b/hw/net/cadence_gem.c
52
@@ -XXX,XX +XXX,XX @@ static void phy_update_link(CadenceGEMState *s)
53
}
54
}
55
56
-static int gem_can_receive(NetClientState *nc)
57
+static bool gem_can_receive(NetClientState *nc)
58
{
59
CadenceGEMState *s;
60
int i;
61
@@ -XXX,XX +XXX,XX @@ static int gem_can_receive(NetClientState *nc)
62
s->can_rx_state = 1;
63
DB_PRINT("can't receive - no enable\n");
64
}
65
- return 0;
66
+ return false;
67
}
68
69
for (i = 0; i < s->num_priority_queues; i++) {
70
@@ -XXX,XX +XXX,XX @@ static int gem_can_receive(NetClientState *nc)
71
s->can_rx_state = 2;
72
DB_PRINT("can't receive - all the buffer descriptors are busy\n");
73
}
74
- return 0;
75
+ return false;
76
}
77
78
if (s->can_rx_state != 0) {
79
s->can_rx_state = 0;
80
DB_PRINT("can receive\n");
81
}
82
- return 1;
83
+ return true;
84
}
85
86
/*
87
diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c
88
index XXXXXXX..XXXXXXX 100644
89
--- a/hw/net/dp8393x.c
90
+++ b/hw/net/dp8393x.c
91
@@ -XXX,XX +XXX,XX @@ static void dp8393x_do_stop_timer(dp8393xState *s)
92
dp8393x_update_wt_regs(s);
93
}
94
95
-static int dp8393x_can_receive(NetClientState *nc);
96
+static bool dp8393x_can_receive(NetClientState *nc);
97
98
static void dp8393x_do_receiver_enable(dp8393xState *s)
99
{
100
@@ -XXX,XX +XXX,XX @@ static void dp8393x_watchdog(void *opaque)
101
dp8393x_update_irq(s);
102
}
103
104
-static int dp8393x_can_receive(NetClientState *nc)
105
+static bool dp8393x_can_receive(NetClientState *nc)
106
{
107
dp8393xState *s = qemu_get_nic_opaque(nc);
108
109
- if (!(s->regs[SONIC_CR] & SONIC_CR_RXEN))
110
- return 0;
111
- return 1;
112
+ return !!(s->regs[SONIC_CR] & SONIC_CR_RXEN);
113
}
114
115
static int dp8393x_receive_filter(dp8393xState *s, const uint8_t * buf,
116
diff --git a/hw/net/e1000.c b/hw/net/e1000.c
117
index XXXXXXX..XXXXXXX 100644
118
--- a/hw/net/e1000.c
119
+++ b/hw/net/e1000.c
120
@@ -XXX,XX +XXX,XX @@ static bool e1000_has_rxbufs(E1000State *s, size_t total_size)
121
return total_size <= bufs * s->rxbuf_size;
122
}
123
124
-static int
125
+static bool
126
e1000_can_receive(NetClientState *nc)
127
{
128
E1000State *s = qemu_get_nic_opaque(nc);
129
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
130
index XXXXXXX..XXXXXXX 100644
131
--- a/hw/net/e1000e.c
132
+++ b/hw/net/e1000e.c
133
@@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps io_ops = {
134
},
135
};
136
137
-static int
138
+static bool
139
e1000e_nc_can_receive(NetClientState *nc)
140
{
141
E1000EState *s = qemu_get_nic_opaque(nc);
142
diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c
143
index XXXXXXX..XXXXXXX 100644
144
--- a/hw/net/ftgmac100.c
145
+++ b/hw/net/ftgmac100.c
146
@@ -XXX,XX +XXX,XX @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t tx_ring,
147
ftgmac100_update_irq(s);
148
}
149
150
-static int ftgmac100_can_receive(NetClientState *nc)
151
+static bool ftgmac100_can_receive(NetClientState *nc)
152
{
153
FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc));
154
FTGMAC100Desc bd;
155
156
if ((s->maccr & (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN))
157
!= (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN)) {
158
- return 0;
159
+ return false;
160
}
161
162
if (ftgmac100_read_bd(&bd, s->rx_descriptor)) {
163
- return 0;
164
+ return false;
165
}
166
return !(bd.des0 & FTGMAC100_RXDES0_RXPKT_RDY);
167
}
168
diff --git a/hw/net/i82596.c b/hw/net/i82596.c
169
index XXXXXXX..XXXXXXX 100644
170
--- a/hw/net/i82596.c
171
+++ b/hw/net/i82596.c
172
@@ -XXX,XX +XXX,XX @@ void i82596_h_reset(void *opaque)
173
i82596_s_reset(s);
174
}
175
176
-int i82596_can_receive(NetClientState *nc)
177
+bool i82596_can_receive(NetClientState *nc)
178
{
179
I82596State *s = qemu_get_nic_opaque(nc);
180
181
if (s->rx_status == RX_SUSPENDED) {
182
- return 0;
183
+ return false;
184
}
185
186
if (!s->lnkst) {
187
- return 0;
188
+ return false;
189
}
190
191
if (USE_TIMER && !timer_pending(s->flush_queue_timer)) {
192
- return 1;
193
+ return true;
194
}
195
196
- return 1;
197
+ return true;
198
}
199
200
#define MIN_BUF_SIZE 60
201
diff --git a/hw/net/i82596.h b/hw/net/i82596.h
202
index XXXXXXX..XXXXXXX 100644
203
--- a/hw/net/i82596.h
204
+++ b/hw/net/i82596.h
205
@@ -XXX,XX +XXX,XX @@ void i82596_ioport_writel(void *opaque, uint32_t addr, uint32_t val);
206
uint32_t i82596_ioport_readl(void *opaque, uint32_t addr);
207
uint32_t i82596_bcr_readw(I82596State *s, uint32_t rap);
208
ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t size_);
209
-int i82596_can_receive(NetClientState *nc);
210
+bool i82596_can_receive(NetClientState *nc);
211
void i82596_set_link_status(NetClientState *nc);
212
void i82596_common_init(DeviceState *dev, I82596State *s, NetClientInfo *info);
213
extern const VMStateDescription vmstate_i82596;
214
diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c
215
index XXXXXXX..XXXXXXX 100644
216
--- a/hw/net/imx_fec.c
217
+++ b/hw/net/imx_fec.c
218
@@ -XXX,XX +XXX,XX @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value,
219
imx_eth_update(s);
220
}
221
222
-static int imx_eth_can_receive(NetClientState *nc)
223
+static bool imx_eth_can_receive(NetClientState *nc)
224
{
225
IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc));
226
227
diff --git a/hw/net/opencores_eth.c b/hw/net/opencores_eth.c
228
index XXXXXXX..XXXXXXX 100644
229
--- a/hw/net/opencores_eth.c
230
+++ b/hw/net/opencores_eth.c
231
@@ -XXX,XX +XXX,XX @@ static void open_eth_reset(void *opaque)
232
open_eth_set_link_status(qemu_get_queue(s->nic));
233
}
234
235
-static int open_eth_can_receive(NetClientState *nc)
236
+static bool open_eth_can_receive(NetClientState *nc)
237
{
238
OpenEthState *s = qemu_get_nic_opaque(nc);
239
240
- return GET_REGBIT(s, MODER, RXEN) &&
241
- (s->regs[TX_BD_NUM] < 0x80);
242
+ return GET_REGBIT(s, MODER, RXEN) && (s->regs[TX_BD_NUM] < 0x80);
243
}
244
245
static ssize_t open_eth_receive(NetClientState *nc,
246
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
247
index XXXXXXX..XXXXXXX 100644
248
--- a/hw/net/rtl8139.c
249
+++ b/hw/net/rtl8139.c
250
@@ -XXX,XX +XXX,XX @@ static bool rtl8139_cp_rx_valid(RTL8139State *s)
251
return !(s->RxRingAddrLO == 0 && s->RxRingAddrHI == 0);
252
}
253
254
-static int rtl8139_can_receive(NetClientState *nc)
255
+static bool rtl8139_can_receive(NetClientState *nc)
256
{
257
RTL8139State *s = qemu_get_nic_opaque(nc);
258
int avail;
259
260
/* Receive (drop) packets if card is disabled. */
261
if (!s->clock_enabled) {
262
- return 1;
263
+ return true;
264
}
265
if (!rtl8139_receiver_enabled(s)) {
266
- return 1;
267
+ return true;
268
}
269
270
if (rtl8139_cp_receiver_enabled(s) && rtl8139_cp_rx_valid(s)) {
271
/* ??? Flow control not implemented in c+ mode.
272
This is a hack to work around slirp deficiencies anyway. */
273
- return 1;
274
+ return true;
275
}
276
277
avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr,
278
diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c
279
index XXXXXXX..XXXXXXX 100644
280
--- a/hw/net/smc91c111.c
281
+++ b/hw/net/smc91c111.c
282
@@ -XXX,XX +XXX,XX @@ static void smc91c111_writefn(void *opaque, hwaddr addr,
283
}
284
}
285
286
-static int smc91c111_can_receive_nc(NetClientState *nc)
287
+static bool smc91c111_can_receive_nc(NetClientState *nc)
288
{
289
smc91c111_state *s = qemu_get_nic_opaque(nc);
290
291
diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c
292
index XXXXXXX..XXXXXXX 100644
293
--- a/hw/net/spapr_llan.c
294
+++ b/hw/net/spapr_llan.c
295
@@ -XXX,XX +XXX,XX @@ typedef struct SpaprVioVlan {
296
RxBufPool *rx_pool[RX_MAX_POOLS]; /* Receive buffer descriptor pools */
297
} SpaprVioVlan;
298
299
-static int spapr_vlan_can_receive(NetClientState *nc)
300
+static bool spapr_vlan_can_receive(NetClientState *nc)
301
{
302
SpaprVioVlan *dev = qemu_get_nic_opaque(nc);
303
304
- return (dev->isopen && dev->rx_bufs > 0);
305
+ return dev->isopen && dev->rx_bufs > 0;
306
}
307
308
/**
309
diff --git a/hw/net/sungem.c b/hw/net/sungem.c
310
index XXXXXXX..XXXXXXX 100644
311
--- a/hw/net/sungem.c
312
+++ b/hw/net/sungem.c
313
@@ -XXX,XX +XXX,XX @@ static bool sungem_rx_full(SunGEMState *s, uint32_t kick, uint32_t done)
314
return kick == ((done + 1) & s->rx_mask);
315
}
316
317
-static int sungem_can_receive(NetClientState *nc)
318
+static bool sungem_can_receive(NetClientState *nc)
319
{
320
SunGEMState *s = qemu_get_nic_opaque(nc);
321
uint32_t kick, done, rxdma_cfg, rxmac_cfg;
322
@@ -XXX,XX +XXX,XX @@ static int sungem_can_receive(NetClientState *nc)
323
/* If MAC disabled, can't receive */
324
if ((rxmac_cfg & MAC_RXCFG_ENAB) == 0) {
325
trace_sungem_rx_mac_disabled();
326
- return 0;
327
+ return false;
328
}
329
if ((rxdma_cfg & RXDMA_CFG_ENABLE) == 0) {
330
trace_sungem_rx_txdma_disabled();
331
- return 0;
332
+ return false;
333
}
334
335
/* Check RX availability */
336
diff --git a/hw/net/sunhme.c b/hw/net/sunhme.c
337
index XXXXXXX..XXXXXXX 100644
338
--- a/hw/net/sunhme.c
339
+++ b/hw/net/sunhme.c
340
@@ -XXX,XX +XXX,XX @@ static void sunhme_transmit(SunHMEState *s)
341
sunhme_update_irq(s);
342
}
343
344
-static int sunhme_can_receive(NetClientState *nc)
345
+static bool sunhme_can_receive(NetClientState *nc)
346
{
347
SunHMEState *s = qemu_get_nic_opaque(nc);
348
349
- return s->macregs[HME_MACI_RXCFG >> 2] & HME_MAC_RXCFG_ENABLE;
350
+ return !!(s->macregs[HME_MACI_RXCFG >> 2] & HME_MAC_RXCFG_ENABLE);
351
}
352
353
static void sunhme_link_status_changed(NetClientState *nc)
354
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
20
diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c
355
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
356
--- a/hw/net/virtio-net.c
22
--- a/hw/net/virtio-net.c
357
+++ b/hw/net/virtio-net.c
23
+++ b/hw/net/virtio-net.c
358
@@ -XXX,XX +XXX,XX @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq)
24
@@ -XXX,XX +XXX,XX @@ static ssize_t virtio_net_receive_rcu(NetClientState *nc, const uint8_t *buf,
359
qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index));
25
360
}
26
err:
361
27
for (j = 0; j < i; j++) {
362
-static int virtio_net_can_receive(NetClientState *nc)
28
+ virtqueue_detach_element(q->rx_vq, elems[j], lens[j]);
363
+static bool virtio_net_can_receive(NetClientState *nc)
29
g_free(elems[j]);
364
{
365
VirtIONet *n = qemu_get_nic_opaque(nc);
366
VirtIODevice *vdev = VIRTIO_DEVICE(n);
367
VirtIONetQueue *q = virtio_net_get_subqueue(nc);
368
369
if (!vdev->vm_running) {
370
- return 0;
371
+ return false;
372
}
30
}
373
31
374
if (nc->queue_index >= n->curr_queues) {
375
- return 0;
376
+ return false;
377
}
378
379
if (!virtio_queue_ready(q->rx_vq) ||
380
!(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) {
381
- return 0;
382
+ return false;
383
}
384
385
- return 1;
386
+ return true;
387
}
388
389
static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize)
390
diff --git a/hw/net/xilinx_ethlite.c b/hw/net/xilinx_ethlite.c
391
index XXXXXXX..XXXXXXX 100644
392
--- a/hw/net/xilinx_ethlite.c
393
+++ b/hw/net/xilinx_ethlite.c
394
@@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps eth_ops = {
395
}
396
};
397
398
-static int eth_can_rx(NetClientState *nc)
399
+static bool eth_can_rx(NetClientState *nc)
400
{
401
struct xlx_ethlite *s = qemu_get_nic_opaque(nc);
402
unsigned int rxbase = s->rxbuf * (0x800 / 4);
403
diff --git a/include/net/net.h b/include/net/net.h
404
index XXXXXXX..XXXXXXX 100644
405
--- a/include/net/net.h
406
+++ b/include/net/net.h
407
@@ -XXX,XX +XXX,XX @@ typedef struct NICConf {
408
/* Net clients */
409
410
typedef void (NetPoll)(NetClientState *, bool enable);
411
-typedef int (NetCanReceive)(NetClientState *);
412
+typedef bool (NetCanReceive)(NetClientState *);
413
typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t);
414
typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int);
415
typedef void (NetCleanup) (NetClientState *);
416
diff --git a/net/filter-buffer.c b/net/filter-buffer.c
417
index XXXXXXX..XXXXXXX 100644
418
--- a/net/filter-buffer.c
419
+++ b/net/filter-buffer.c
420
@@ -XXX,XX +XXX,XX @@ static ssize_t filter_buffer_receive_iov(NetFilterState *nf,
421
* the filter can still accept packets until its internal queue is full.
422
* For example:
423
* For some reason, receiver could not receive more packets
424
- * (.can_receive() returns zero). Without a filter, at most one packet
425
+ * (.can_receive() returns false). Without a filter, at most one packet
426
* will be queued in incoming queue and sender's poll will be disabled
427
* unit its sent_cb() was called. With a filter, it will keep receiving
428
* the packets without caring about the receiver. This is suboptimal.
429
diff --git a/net/hub.c b/net/hub.c
430
index XXXXXXX..XXXXXXX 100644
431
--- a/net/hub.c
432
+++ b/net/hub.c
433
@@ -XXX,XX +XXX,XX @@ static NetHub *net_hub_new(int id)
434
return hub;
435
}
436
437
-static int net_hub_port_can_receive(NetClientState *nc)
438
+static bool net_hub_port_can_receive(NetClientState *nc)
439
{
440
NetHubPort *port;
441
NetHubPort *src_port = DO_UPCAST(NetHubPort, nc, nc);
442
@@ -XXX,XX +XXX,XX @@ static int net_hub_port_can_receive(NetClientState *nc)
443
}
444
445
if (qemu_can_send_packet(&port->nc)) {
446
- return 1;
447
+ return true;
448
}
449
}
450
451
- return 0;
452
+ return false;
453
}
454
455
static ssize_t net_hub_port_receive(NetClientState *nc,
456
--
32
--
457
2.5.0
33
2.7.4
458
459
diff view generated by jsdifflib
1
From: Peter Maydell <peter.maydell@linaro.org>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Coverity points out (CID 1421926) that the read code for
3
Vhost shadow virtqueue (SVQ) is an intermediate jump for virtqueue
4
REG_ADDR_HIGH reads off the end of the buffer, because it does a
4
notifications and buffers, allowing qemu to track them. While qemu is
5
32-bit read from byte 4 of a 6-byte buffer.
5
forwarding the buffers and virtqueue changes, it is able to commit the
6
memory it's being dirtied, the same way regular qemu's VirtIO devices
7
do.
6
8
7
The code also has an endianness issue for both REG_ADDR_HIGH and
9
This commit only exposes basic SVQ allocation and free. Next patches of
8
REG_ADDR_LOW, because it will do the wrong thing on a big-endian
10
the series add functionality like notifications and buffers forwarding.
9
host.
10
11
11
Rewrite the read code to use ldl_le_p() and lduw_le_p() to fix this;
12
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
12
the write code is not incorrect, but for consistency we make it use
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
13
stl_le_p() and stw_le_p().
14
15
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
16
Tested-by: Niek Linnenbank <nieklinnenbank@gmail.com>
17
Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com>
18
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
19
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
20
---
15
---
21
hw/net/allwinner-sun8i-emac.c | 12 ++++--------
16
hw/virtio/meson.build | 2 +-
22
1 file changed, 4 insertions(+), 8 deletions(-)
17
hw/virtio/vhost-shadow-virtqueue.c | 62 ++++++++++++++++++++++++++++++++++++++
18
hw/virtio/vhost-shadow-virtqueue.h | 28 +++++++++++++++++
19
3 files changed, 91 insertions(+), 1 deletion(-)
20
create mode 100644 hw/virtio/vhost-shadow-virtqueue.c
21
create mode 100644 hw/virtio/vhost-shadow-virtqueue.h
23
22
24
diff --git a/hw/net/allwinner-sun8i-emac.c b/hw/net/allwinner-sun8i-emac.c
23
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
25
index XXXXXXX..XXXXXXX 100644
24
index XXXXXXX..XXXXXXX 100644
26
--- a/hw/net/allwinner-sun8i-emac.c
25
--- a/hw/virtio/meson.build
27
+++ b/hw/net/allwinner-sun8i-emac.c
26
+++ b/hw/virtio/meson.build
28
@@ -XXX,XX +XXX,XX @@ static uint64_t allwinner_sun8i_emac_read(void *opaque, hwaddr offset,
27
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
29
value = s->mii_data;
28
30
break;
29
virtio_ss = ss.source_set()
31
case REG_ADDR_HIGH: /* MAC Address High */
30
virtio_ss.add(files('virtio.c'))
32
- value = *(((uint32_t *) (s->conf.macaddr.a)) + 1);
31
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c'))
33
+ value = lduw_le_p(s->conf.macaddr.a + 4);
32
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
34
break;
33
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
35
case REG_ADDR_LOW: /* MAC Address Low */
34
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
36
- value = *(uint32_t *) (s->conf.macaddr.a);
35
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
37
+ value = ldl_le_p(s->conf.macaddr.a);
36
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
38
break;
37
new file mode 100644
39
case REG_TX_DMA_STA: /* Transmit DMA Status */
38
index XXXXXXX..XXXXXXX
40
break;
39
--- /dev/null
41
@@ -XXX,XX +XXX,XX @@ static void allwinner_sun8i_emac_write(void *opaque, hwaddr offset,
40
+++ b/hw/virtio/vhost-shadow-virtqueue.c
42
s->mii_data = value;
41
@@ -XXX,XX +XXX,XX @@
43
break;
42
+/*
44
case REG_ADDR_HIGH: /* MAC Address High */
43
+ * vhost shadow virtqueue
45
- s->conf.macaddr.a[4] = (value & 0xff);
44
+ *
46
- s->conf.macaddr.a[5] = (value & 0xff00) >> 8;
45
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
47
+ stw_le_p(s->conf.macaddr.a + 4, value);
46
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
48
break;
47
+ *
49
case REG_ADDR_LOW: /* MAC Address Low */
48
+ * SPDX-License-Identifier: GPL-2.0-or-later
50
- s->conf.macaddr.a[0] = (value & 0xff);
49
+ */
51
- s->conf.macaddr.a[1] = (value & 0xff00) >> 8;
50
+
52
- s->conf.macaddr.a[2] = (value & 0xff0000) >> 16;
51
+#include "qemu/osdep.h"
53
- s->conf.macaddr.a[3] = (value & 0xff000000) >> 24;
52
+#include "hw/virtio/vhost-shadow-virtqueue.h"
54
+ stl_le_p(s->conf.macaddr.a, value);
53
+
55
break;
54
+#include "qemu/error-report.h"
56
case REG_TX_DMA_STA: /* Transmit DMA Status */
55
+
57
case REG_TX_CUR_DESC: /* Transmit Current Descriptor */
56
+/**
57
+ * Creates vhost shadow virtqueue, and instructs the vhost device to use the
58
+ * shadow methods and file descriptors.
59
+ *
60
+ * Returns the new virtqueue or NULL.
61
+ *
62
+ * In case of error, reason is reported through error_report.
63
+ */
64
+VhostShadowVirtqueue *vhost_svq_new(void)
65
+{
66
+ g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
67
+ int r;
68
+
69
+ r = event_notifier_init(&svq->hdev_kick, 0);
70
+ if (r != 0) {
71
+ error_report("Couldn't create kick event notifier: %s (%d)",
72
+ g_strerror(errno), errno);
73
+ goto err_init_hdev_kick;
74
+ }
75
+
76
+ r = event_notifier_init(&svq->hdev_call, 0);
77
+ if (r != 0) {
78
+ error_report("Couldn't create call event notifier: %s (%d)",
79
+ g_strerror(errno), errno);
80
+ goto err_init_hdev_call;
81
+ }
82
+
83
+ return g_steal_pointer(&svq);
84
+
85
+err_init_hdev_call:
86
+ event_notifier_cleanup(&svq->hdev_kick);
87
+
88
+err_init_hdev_kick:
89
+ return NULL;
90
+}
91
+
92
+/**
93
+ * Free the resources of the shadow virtqueue.
94
+ *
95
+ * @pvq: gpointer to SVQ so it can be used by autofree functions.
96
+ */
97
+void vhost_svq_free(gpointer pvq)
98
+{
99
+ VhostShadowVirtqueue *vq = pvq;
100
+ event_notifier_cleanup(&vq->hdev_kick);
101
+ event_notifier_cleanup(&vq->hdev_call);
102
+ g_free(vq);
103
+}
104
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
105
new file mode 100644
106
index XXXXXXX..XXXXXXX
107
--- /dev/null
108
+++ b/hw/virtio/vhost-shadow-virtqueue.h
109
@@ -XXX,XX +XXX,XX @@
110
+/*
111
+ * vhost shadow virtqueue
112
+ *
113
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
114
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
115
+ *
116
+ * SPDX-License-Identifier: GPL-2.0-or-later
117
+ */
118
+
119
+#ifndef VHOST_SHADOW_VIRTQUEUE_H
120
+#define VHOST_SHADOW_VIRTQUEUE_H
121
+
122
+#include "qemu/event_notifier.h"
123
+
124
+/* Shadow virtqueue to relay notifications */
125
+typedef struct VhostShadowVirtqueue {
126
+ /* Shadow kick notifier, sent to vhost */
127
+ EventNotifier hdev_kick;
128
+ /* Shadow call notifier, sent to vhost */
129
+ EventNotifier hdev_call;
130
+} VhostShadowVirtqueue;
131
+
132
+VhostShadowVirtqueue *vhost_svq_new(void);
133
+
134
+void vhost_svq_free(gpointer vq);
135
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
136
+
137
+#endif
58
--
138
--
59
2.5.0
139
2.7.4
60
140
61
141
diff view generated by jsdifflib
New patch
1
1
From: Eugenio Pérez <eperezma@redhat.com>
2
3
At this mode no buffer forwarding will be performed in SVQ mode: Qemu
4
will just forward the guest's kicks to the device.
5
6
Host memory notifiers regions are left out for simplicity, and they will
7
not be addressed in this series.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
---
13
hw/virtio/vhost-shadow-virtqueue.c | 55 ++++++++++++++
14
hw/virtio/vhost-shadow-virtqueue.h | 14 ++++
15
hw/virtio/vhost-vdpa.c | 144 ++++++++++++++++++++++++++++++++++++-
16
include/hw/virtio/vhost-vdpa.h | 4 ++
17
4 files changed, 215 insertions(+), 2 deletions(-)
18
19
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
20
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/hw/virtio/vhost-shadow-virtqueue.c
23
@@ -XXX,XX +XXX,XX @@
24
#include "hw/virtio/vhost-shadow-virtqueue.h"
25
26
#include "qemu/error-report.h"
27
+#include "qemu/main-loop.h"
28
+#include "linux-headers/linux/vhost.h"
29
+
30
+/**
31
+ * Forward guest notifications.
32
+ *
33
+ * @n: guest kick event notifier, the one that guest set to notify svq.
34
+ */
35
+static void vhost_handle_guest_kick(EventNotifier *n)
36
+{
37
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
38
+ event_notifier_test_and_clear(n);
39
+ event_notifier_set(&svq->hdev_kick);
40
+}
41
+
42
+/**
43
+ * Set a new file descriptor for the guest to kick the SVQ and notify for avail
44
+ *
45
+ * @svq: The svq
46
+ * @svq_kick_fd: The svq kick fd
47
+ *
48
+ * Note that the SVQ will never close the old file descriptor.
49
+ */
50
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
51
+{
52
+ EventNotifier *svq_kick = &svq->svq_kick;
53
+ bool poll_stop = VHOST_FILE_UNBIND != event_notifier_get_fd(svq_kick);
54
+ bool poll_start = svq_kick_fd != VHOST_FILE_UNBIND;
55
+
56
+ if (poll_stop) {
57
+ event_notifier_set_handler(svq_kick, NULL);
58
+ }
59
+
60
+ /*
61
+ * event_notifier_set_handler already checks for guest's notifications if
62
+ * they arrive at the new file descriptor in the switch, so there is no
63
+ * need to explicitly check for them.
64
+ */
65
+ if (poll_start) {
66
+ event_notifier_init_fd(svq_kick, svq_kick_fd);
67
+ event_notifier_set(svq_kick);
68
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
69
+ }
70
+}
71
+
72
+/**
73
+ * Stop the shadow virtqueue operation.
74
+ * @svq: Shadow Virtqueue
75
+ */
76
+void vhost_svq_stop(VhostShadowVirtqueue *svq)
77
+{
78
+ event_notifier_set_handler(&svq->svq_kick, NULL);
79
+}
80
81
/**
82
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
83
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
84
goto err_init_hdev_call;
85
}
86
87
+ event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
88
return g_steal_pointer(&svq);
89
90
err_init_hdev_call:
91
@@ -XXX,XX +XXX,XX @@ err_init_hdev_kick:
92
void vhost_svq_free(gpointer pvq)
93
{
94
VhostShadowVirtqueue *vq = pvq;
95
+ vhost_svq_stop(vq);
96
event_notifier_cleanup(&vq->hdev_kick);
97
event_notifier_cleanup(&vq->hdev_call);
98
g_free(vq);
99
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/hw/virtio/vhost-shadow-virtqueue.h
102
+++ b/hw/virtio/vhost-shadow-virtqueue.h
103
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
104
EventNotifier hdev_kick;
105
/* Shadow call notifier, sent to vhost */
106
EventNotifier hdev_call;
107
+
108
+ /*
109
+ * Borrowed virtqueue's guest to host notifier. To borrow it in this event
110
+ * notifier allows to recover the VhostShadowVirtqueue from the event loop
111
+ * easily. If we use the VirtQueue's one, we don't have an easy way to
112
+ * retrieve VhostShadowVirtqueue.
113
+ *
114
+ * So shadow virtqueue must not clean it, or we would lose VirtQueue one.
115
+ */
116
+ EventNotifier svq_kick;
117
} VhostShadowVirtqueue;
118
119
+void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
120
+
121
+void vhost_svq_stop(VhostShadowVirtqueue *svq);
122
+
123
VhostShadowVirtqueue *vhost_svq_new(void);
124
125
void vhost_svq_free(gpointer vq);
126
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
127
index XXXXXXX..XXXXXXX 100644
128
--- a/hw/virtio/vhost-vdpa.c
129
+++ b/hw/virtio/vhost-vdpa.c
130
@@ -XXX,XX +XXX,XX @@
131
#include "hw/virtio/vhost.h"
132
#include "hw/virtio/vhost-backend.h"
133
#include "hw/virtio/virtio-net.h"
134
+#include "hw/virtio/vhost-shadow-virtqueue.h"
135
#include "hw/virtio/vhost-vdpa.h"
136
#include "exec/address-spaces.h"
137
#include "qemu/main-loop.h"
138
#include "cpu.h"
139
#include "trace.h"
140
#include "qemu-common.h"
141
+#include "qapi/error.h"
142
143
/*
144
* Return one past the end of the end of section. Be careful with uint64_t
145
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
146
return v->index != 0;
147
}
148
149
+static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
150
+ Error **errp)
151
+{
152
+ g_autoptr(GPtrArray) shadow_vqs = NULL;
153
+
154
+ if (!v->shadow_vqs_enabled) {
155
+ return 0;
156
+ }
157
+
158
+ shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
159
+ for (unsigned n = 0; n < hdev->nvqs; ++n) {
160
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
161
+
162
+ if (unlikely(!svq)) {
163
+ error_setg(errp, "Cannot create svq %u", n);
164
+ return -1;
165
+ }
166
+ g_ptr_array_add(shadow_vqs, g_steal_pointer(&svq));
167
+ }
168
+
169
+ v->shadow_vqs = g_steal_pointer(&shadow_vqs);
170
+ return 0;
171
+}
172
+
173
static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
174
{
175
struct vhost_vdpa *v;
176
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
177
dev->opaque = opaque ;
178
v->listener = vhost_vdpa_memory_listener;
179
v->msg_type = VHOST_IOTLB_MSG_V2;
180
+ ret = vhost_vdpa_init_svq(dev, v, errp);
181
+ if (ret) {
182
+ goto err;
183
+ }
184
185
vhost_vdpa_get_iova_range(v);
186
187
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp)
188
VIRTIO_CONFIG_S_DRIVER);
189
190
return 0;
191
+
192
+err:
193
+ ram_block_discard_disable(false);
194
+ return ret;
195
}
196
197
static void vhost_vdpa_host_notifier_uninit(struct vhost_dev *dev,
198
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_host_notifiers_uninit(struct vhost_dev *dev, int n)
199
200
static void vhost_vdpa_host_notifiers_init(struct vhost_dev *dev)
201
{
202
+ struct vhost_vdpa *v = dev->opaque;
203
int i;
204
205
+ if (v->shadow_vqs_enabled) {
206
+ /* FIXME SVQ is not compatible with host notifiers mr */
207
+ return;
208
+ }
209
+
210
for (i = dev->vq_index; i < dev->vq_index + dev->nvqs; i++) {
211
if (vhost_vdpa_host_notifier_init(dev, i)) {
212
goto err;
213
@@ -XXX,XX +XXX,XX @@ err:
214
return;
215
}
216
217
+static void vhost_vdpa_svq_cleanup(struct vhost_dev *dev)
218
+{
219
+ struct vhost_vdpa *v = dev->opaque;
220
+ size_t idx;
221
+
222
+ if (!v->shadow_vqs) {
223
+ return;
224
+ }
225
+
226
+ for (idx = 0; idx < v->shadow_vqs->len; ++idx) {
227
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, idx));
228
+ }
229
+ g_ptr_array_free(v->shadow_vqs, true);
230
+}
231
+
232
static int vhost_vdpa_cleanup(struct vhost_dev *dev)
233
{
234
struct vhost_vdpa *v;
235
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev)
236
trace_vhost_vdpa_cleanup(dev, v);
237
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
238
memory_listener_unregister(&v->listener);
239
+ vhost_vdpa_svq_cleanup(dev);
240
241
dev->opaque = NULL;
242
ram_block_discard_disable(false);
243
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_device_id(struct vhost_dev *dev,
244
return ret;
245
}
246
247
+static void vhost_vdpa_reset_svq(struct vhost_vdpa *v)
248
+{
249
+ if (!v->shadow_vqs_enabled) {
250
+ return;
251
+ }
252
+
253
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
254
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
255
+ vhost_svq_stop(svq);
256
+ }
257
+}
258
+
259
static int vhost_vdpa_reset_device(struct vhost_dev *dev)
260
{
261
+ struct vhost_vdpa *v = dev->opaque;
262
int ret;
263
uint8_t status = 0;
264
265
+ vhost_vdpa_reset_svq(v);
266
+
267
ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status);
268
trace_vhost_vdpa_reset_device(dev, status);
269
return ret;
270
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
271
return ret;
272
}
273
274
+static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
275
+ struct vhost_vring_file *file)
276
+{
277
+ trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
278
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
279
+}
280
+
281
+/**
282
+ * Set the shadow virtqueue descriptors to the device
283
+ *
284
+ * @dev: The vhost device model
285
+ * @svq: The shadow virtqueue
286
+ * @idx: The index of the virtqueue in the vhost device
287
+ * @errp: Error
288
+ */
289
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
290
+ VhostShadowVirtqueue *svq, unsigned idx,
291
+ Error **errp)
292
+{
293
+ struct vhost_vring_file file = {
294
+ .index = dev->vq_index + idx,
295
+ };
296
+ const EventNotifier *event_notifier = &svq->hdev_kick;
297
+ int r;
298
+
299
+ file.fd = event_notifier_get_fd(event_notifier);
300
+ r = vhost_vdpa_set_vring_dev_kick(dev, &file);
301
+ if (unlikely(r != 0)) {
302
+ error_setg_errno(errp, -r, "Can't set device kick fd");
303
+ }
304
+
305
+ return r == 0;
306
+}
307
+
308
+static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
309
+{
310
+ struct vhost_vdpa *v = dev->opaque;
311
+ Error *err = NULL;
312
+ unsigned i;
313
+
314
+ if (!v->shadow_vqs) {
315
+ return true;
316
+ }
317
+
318
+ for (i = 0; i < v->shadow_vqs->len; ++i) {
319
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
320
+ bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
321
+ if (unlikely(!ok)) {
322
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
323
+ return false;
324
+ }
325
+ }
326
+
327
+ return true;
328
+}
329
+
330
static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
331
{
332
struct vhost_vdpa *v = dev->opaque;
333
+ bool ok;
334
trace_vhost_vdpa_dev_start(dev, started);
335
336
if (started) {
337
vhost_vdpa_host_notifiers_init(dev);
338
+ ok = vhost_vdpa_svqs_start(dev);
339
+ if (unlikely(!ok)) {
340
+ return -1;
341
+ }
342
vhost_vdpa_set_vring_ready(dev);
343
} else {
344
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
345
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
346
static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
347
struct vhost_vring_file *file)
348
{
349
- trace_vhost_vdpa_set_vring_kick(dev, file->index, file->fd);
350
- return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
351
+ struct vhost_vdpa *v = dev->opaque;
352
+ int vdpa_idx = file->index - dev->vq_index;
353
+
354
+ if (v->shadow_vqs_enabled) {
355
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
356
+ vhost_svq_set_svq_kick_fd(svq, file->fd);
357
+ return 0;
358
+ } else {
359
+ return vhost_vdpa_set_vring_dev_kick(dev, file);
360
+ }
361
}
362
363
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
364
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
365
index XXXXXXX..XXXXXXX 100644
366
--- a/include/hw/virtio/vhost-vdpa.h
367
+++ b/include/hw/virtio/vhost-vdpa.h
368
@@ -XXX,XX +XXX,XX @@
369
#ifndef HW_VIRTIO_VHOST_VDPA_H
370
#define HW_VIRTIO_VHOST_VDPA_H
371
372
+#include <gmodule.h>
373
+
374
#include "hw/virtio/virtio.h"
375
#include "standard-headers/linux/vhost_types.h"
376
377
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
378
bool iotlb_batch_begin_sent;
379
MemoryListener listener;
380
struct vhost_vdpa_iova_range iova_range;
381
+ bool shadow_vqs_enabled;
382
+ GPtrArray *shadow_vqs;
383
struct vhost_dev *dev;
384
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
385
} VhostVDPA;
386
--
387
2.7.4
388
389
diff view generated by jsdifflib
1
From: Zhang Chen <chen.zhang@intel.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The "compare_timeout" determines the maximum time to hold the primary net packet.
3
This will make qemu aware of the device used buffers, allowing it to
4
This patch expose the "compare_timeout", make user have ability to
4
write the guest memory with its contents if needed.
5
adjest the value according to application scenarios.
6
5
7
QMP command demo:
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
8
{ "execute": "qom-get",
7
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
"arguments": { "path": "/objects/comp0",
10
"property": "compare_timeout" } }
11
12
{ "execute": "qom-set",
13
"arguments": { "path": "/objects/comp0",
14
"property": "compare_timeout",
15
"value": 5000} }
16
17
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
18
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
19
---
9
---
20
net/colo-compare.c | 47 +++++++++++++++++++++++++++++++++++++++++++++--
10
hw/virtio/vhost-shadow-virtqueue.c | 38 ++++++++++++++++++++++++++++++++++++++
21
qemu-options.hx | 8 +++++---
11
hw/virtio/vhost-shadow-virtqueue.h | 4 ++++
22
2 files changed, 50 insertions(+), 5 deletions(-)
12
hw/virtio/vhost-vdpa.c | 31 +++++++++++++++++++++++++++++--
13
3 files changed, 71 insertions(+), 2 deletions(-)
23
14
24
diff --git a/net/colo-compare.c b/net/colo-compare.c
15
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
25
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
26
--- a/net/colo-compare.c
17
--- a/hw/virtio/vhost-shadow-virtqueue.c
27
+++ b/net/colo-compare.c
18
+++ b/hw/virtio/vhost-shadow-virtqueue.c
28
@@ -XXX,XX +XXX,XX @@ static NotifierList colo_compare_notifiers =
19
@@ -XXX,XX +XXX,XX @@ static void vhost_handle_guest_kick(EventNotifier *n)
29
30
/* TODO: Should be configurable */
31
#define REGULAR_PACKET_CHECK_MS 3000
32
+#define DEFAULT_TIME_OUT_MS 3000
33
34
static QemuMutex event_mtx;
35
static QemuCond event_complete_cond;
36
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
37
SocketReadState sec_rs;
38
SocketReadState notify_rs;
39
bool vnet_hdr;
40
+ uint32_t compare_timeout;
41
42
/*
43
* Record the connection that through the NIC
44
@@ -XXX,XX +XXX,XX @@ static int colo_old_packet_check_one_conn(Connection *conn,
45
CompareState *s)
46
{
47
GList *result = NULL;
48
- int64_t check_time = REGULAR_PACKET_CHECK_MS;
49
50
result = g_queue_find_custom(&conn->primary_list,
51
- &check_time,
52
+ &s->compare_timeout,
53
(GCompareFunc)colo_old_packet_check_one);
54
55
if (result) {
56
@@ -XXX,XX +XXX,XX @@ static void compare_set_notify_dev(Object *obj, const char *value, Error **errp)
57
s->notify_dev = g_strdup(value);
58
}
20
}
59
21
60
+static void compare_get_timeout(Object *obj, Visitor *v,
22
/**
61
+ const char *name, void *opaque,
23
+ * Forward vhost notifications
62
+ Error **errp)
24
+ *
25
+ * @n: hdev call event notifier, the one that device set to notify svq.
26
+ */
27
+static void vhost_svq_handle_call(EventNotifier *n)
63
+{
28
+{
64
+ CompareState *s = COLO_COMPARE(obj);
29
+ VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
65
+ uint32_t value = s->compare_timeout;
30
+ hdev_call);
66
+
31
+ event_notifier_test_and_clear(n);
67
+ visit_type_uint32(v, name, &value, errp);
32
+ event_notifier_set(&svq->svq_call);
68
+}
33
+}
69
+
34
+
70
+static void compare_set_timeout(Object *obj, Visitor *v,
35
+/**
71
+ const char *name, void *opaque,
36
+ * Set the call notifier for the SVQ to call the guest
72
+ Error **errp)
37
+ *
38
+ * @svq: Shadow virtqueue
39
+ * @call_fd: call notifier
40
+ *
41
+ * Called on BQL context.
42
+ */
43
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
73
+{
44
+{
74
+ CompareState *s = COLO_COMPARE(obj);
45
+ if (call_fd == VHOST_FILE_UNBIND) {
75
+ Error *local_err = NULL;
46
+ /*
76
+ uint32_t value;
47
+ * Fail event_notifier_set if called handling device call.
77
+
48
+ *
78
+ visit_type_uint32(v, name, &value, &local_err);
49
+ * SVQ still needs device notifications, since it needs to keep
79
+ if (local_err) {
50
+ * forwarding used buffers even with the unbind.
80
+ goto out;
51
+ */
52
+ memset(&svq->svq_call, 0, sizeof(svq->svq_call));
53
+ } else {
54
+ event_notifier_init_fd(&svq->svq_call, call_fd);
81
+ }
55
+ }
82
+ if (!value) {
83
+ error_setg(&local_err, "Property '%s.%s' requires a positive value",
84
+ object_get_typename(obj), name);
85
+ goto out;
86
+ }
87
+ s->compare_timeout = value;
88
+
89
+out:
90
+ error_propagate(errp, local_err);
91
+}
56
+}
92
+
57
+
93
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
58
+/**
94
{
59
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
95
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
60
*
96
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
61
* @svq: The svq
97
return;
62
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
98
}
63
}
99
64
100
+ if (!s->compare_timeout) {
65
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
101
+ /* Set default value to 3000 MS */
66
+ event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
102
+ s->compare_timeout = DEFAULT_TIME_OUT_MS;
67
return g_steal_pointer(&svq);
68
69
err_init_hdev_call:
70
@@ -XXX,XX +XXX,XX @@ void vhost_svq_free(gpointer pvq)
71
VhostShadowVirtqueue *vq = pvq;
72
vhost_svq_stop(vq);
73
event_notifier_cleanup(&vq->hdev_kick);
74
+ event_notifier_set_handler(&vq->hdev_call, NULL);
75
event_notifier_cleanup(&vq->hdev_call);
76
g_free(vq);
77
}
78
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
79
index XXXXXXX..XXXXXXX 100644
80
--- a/hw/virtio/vhost-shadow-virtqueue.h
81
+++ b/hw/virtio/vhost-shadow-virtqueue.h
82
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
83
* So shadow virtqueue must not clean it, or we would lose VirtQueue one.
84
*/
85
EventNotifier svq_kick;
86
+
87
+ /* Guest's call notifier, where the SVQ calls guest. */
88
+ EventNotifier svq_call;
89
} VhostShadowVirtqueue;
90
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
+void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
void vhost_svq_stop(VhostShadowVirtqueue *svq);
95
96
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
97
index XXXXXXX..XXXXXXX 100644
98
--- a/hw/virtio/vhost-vdpa.c
99
+++ b/hw/virtio/vhost-vdpa.c
100
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
101
return vhost_vdpa_call(dev, VHOST_SET_VRING_KICK, file);
102
}
103
104
+static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
105
+ struct vhost_vring_file *file)
106
+{
107
+ trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
108
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
109
+}
110
+
111
/**
112
* Set the shadow virtqueue descriptors to the device
113
*
114
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
115
* @svq: The shadow virtqueue
116
* @idx: The index of the virtqueue in the vhost device
117
* @errp: Error
118
+ *
119
+ * Note that this function does not rewind kick file descriptor if cannot set
120
+ * call one.
121
*/
122
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
123
VhostShadowVirtqueue *svq, unsigned idx,
124
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
125
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
126
if (unlikely(r != 0)) {
127
error_setg_errno(errp, -r, "Can't set device kick fd");
128
+ return false;
103
+ }
129
+ }
104
+
130
+
105
if (find_and_check_chardev(&chr, s->pri_indev, errp) ||
131
+ event_notifier = &svq->hdev_call;
106
!qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) {
132
+ file.fd = event_notifier_get_fd(event_notifier);
107
return;
133
+ r = vhost_vdpa_set_vring_dev_call(dev, &file);
108
@@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj)
134
+ if (unlikely(r != 0)) {
109
compare_get_notify_dev, compare_set_notify_dev,
135
+ error_setg_errno(errp, -r, "Can't set device call fd");
110
NULL);
136
}
111
137
112
+ object_property_add(obj, "compare_timeout", "uint32",
138
return r == 0;
113
+ compare_get_timeout,
139
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_kick(struct vhost_dev *dev,
114
+ compare_set_timeout, NULL, NULL, NULL);
140
static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
141
struct vhost_vring_file *file)
142
{
143
- trace_vhost_vdpa_set_vring_call(dev, file->index, file->fd);
144
- return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
145
+ struct vhost_vdpa *v = dev->opaque;
115
+
146
+
116
s->vnet_hdr = false;
147
+ if (v->shadow_vqs_enabled) {
117
object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
148
+ int vdpa_idx = file->index - dev->vq_index;
118
compare_set_vnet_hdr, NULL);
149
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, vdpa_idx);
119
diff --git a/qemu-options.hx b/qemu-options.hx
150
+
120
index XXXXXXX..XXXXXXX 100644
151
+ vhost_svq_set_svq_call_fd(svq, file->fd);
121
--- a/qemu-options.hx
152
+ return 0;
122
+++ b/qemu-options.hx
153
+ } else {
123
@@ -XXX,XX +XXX,XX @@ SRST
154
+ return vhost_vdpa_set_vring_dev_call(dev, file);
124
stored. The file format is libpcap, so it can be analyzed with
155
+ }
125
tools such as tcpdump or Wireshark.
156
}
126
157
127
- ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id]``
158
static int vhost_vdpa_get_features(struct vhost_dev *dev,
128
+ ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}]``
129
Colo-compare gets packet from primary\_inchardevid and
130
secondary\_inchardevid, than compare primary packet with
131
secondary packet. If the packets are same, we will output
132
@@ -XXX,XX +XXX,XX @@ SRST
133
outdevchardevid. In order to improve efficiency, we need to put
134
the task of comparison in another thread. If it has the
135
vnet\_hdr\_support flag, colo compare will send/recv packet with
136
- vnet\_hdr\_len. If you want to use Xen COLO, will need the
137
- notify\_dev to notify Xen colo-frame to do checkpoint.
138
+ vnet\_hdr\_len. Then compare\_timeout=@var{ms} determines the
139
+ maximum delay colo-compare wait for the packet.
140
+ If you want to use Xen COLO, will need the notify\_dev to
141
+ notify Xen colo-frame to do checkpoint.
142
143
we must use it with the help of filter-mirror and
144
filter-redirector.
145
--
159
--
146
2.5.0
160
2.7.4
147
161
148
162
diff view generated by jsdifflib
1
From: Andrew Melnychenko <andrew@daynix.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1737400
3
This allows SVQ to negotiate features with the guest and the device. For
4
Fixed setting max_queue_num if there are no peers in
4
the device, SVQ is a driver. While this function bypasses all
5
NICConf. qemu_new_nic() creates NICState with 1 NetClientState(index
5
non-transport features, it needs to disable the features that SVQ does
6
0) without peers, set max_queue_num to 0 - It prevents undefined
6
not support when forwarding buffers. This includes packed vq layout,
7
behavior and possible crashes, especially during pcie hotplug.
7
indirect descriptors or event idx.
8
8
9
Fixes: 6f3fbe4ed06 ("net: Introduce e1000e device emulation")
9
Future changes can add support to offer more features to the guest,
10
Signed-off-by: Andrew Melnychenko <andrew@daynix.com>
10
since the use of VirtQueue gives this for free. This is left out at the
11
Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com>
11
moment for simplicity.
12
Reviewed-by: Dmitry Fleytman <dmitry.fleytman@gmail.com>
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
13
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
---
16
---
15
hw/net/e1000e.c | 2 +-
17
hw/virtio/vhost-shadow-virtqueue.c | 44 ++++++++++++++++++++++++++++++++++++++
16
1 file changed, 1 insertion(+), 1 deletion(-)
18
hw/virtio/vhost-shadow-virtqueue.h | 2 ++
19
hw/virtio/vhost-vdpa.c | 15 +++++++++++++
20
3 files changed, 61 insertions(+)
17
21
18
diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c
22
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
19
index XXXXXXX..XXXXXXX 100644
23
index XXXXXXX..XXXXXXX 100644
20
--- a/hw/net/e1000e.c
24
--- a/hw/virtio/vhost-shadow-virtqueue.c
21
+++ b/hw/net/e1000e.c
25
+++ b/hw/virtio/vhost-shadow-virtqueue.c
22
@@ -XXX,XX +XXX,XX @@ e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr)
26
@@ -XXX,XX +XXX,XX @@
23
s->nic = qemu_new_nic(&net_e1000e_info, &s->conf,
27
#include "hw/virtio/vhost-shadow-virtqueue.h"
24
object_get_typename(OBJECT(s)), dev->id, s);
28
25
29
#include "qemu/error-report.h"
26
- s->core.max_queue_num = s->conf.peers.queues - 1;
30
+#include "qapi/error.h"
27
+ s->core.max_queue_num = s->conf.peers.queues ? s->conf.peers.queues - 1 : 0;
31
#include "qemu/main-loop.h"
28
32
#include "linux-headers/linux/vhost.h"
29
trace_e1000e_mac_set_permanent(MAC_ARG(macaddr));
33
30
memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac));
34
/**
35
+ * Validate the transport device features that both guests can use with the SVQ
36
+ * and SVQs can use with the device.
37
+ *
38
+ * @dev_features: The features
39
+ * @errp: Error pointer
40
+ */
41
+bool vhost_svq_valid_features(uint64_t features, Error **errp)
42
+{
43
+ bool ok = true;
44
+ uint64_t svq_features = features;
45
+
46
+ for (uint64_t b = VIRTIO_TRANSPORT_F_START; b <= VIRTIO_TRANSPORT_F_END;
47
+ ++b) {
48
+ switch (b) {
49
+ case VIRTIO_F_ANY_LAYOUT:
50
+ continue;
51
+
52
+ case VIRTIO_F_ACCESS_PLATFORM:
53
+ /* SVQ trust in the host's IOMMU to translate addresses */
54
+ case VIRTIO_F_VERSION_1:
55
+ /* SVQ trust that the guest vring is little endian */
56
+ if (!(svq_features & BIT_ULL(b))) {
57
+ svq_features |= BIT_ULL(b);
58
+ ok = false;
59
+ }
60
+ continue;
61
+
62
+ default:
63
+ if (svq_features & BIT_ULL(b)) {
64
+ svq_features &= ~BIT_ULL(b);
65
+ ok = false;
66
+ }
67
+ }
68
+ }
69
+
70
+ if (!ok) {
71
+ error_setg(errp, "SVQ Invalid device feature flags, offer: 0x%"PRIx64
72
+ ", ok: 0x%"PRIx64, features, svq_features);
73
+ }
74
+ return ok;
75
+}
76
+
77
+/**
78
* Forward guest notifications.
79
*
80
* @n: guest kick event notifier, the one that guest set to notify svq.
81
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
82
index XXXXXXX..XXXXXXX 100644
83
--- a/hw/virtio/vhost-shadow-virtqueue.h
84
+++ b/hw/virtio/vhost-shadow-virtqueue.h
85
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
86
EventNotifier svq_call;
87
} VhostShadowVirtqueue;
88
89
+bool vhost_svq_valid_features(uint64_t features, Error **errp);
90
+
91
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
92
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
93
94
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
95
index XXXXXXX..XXXXXXX 100644
96
--- a/hw/virtio/vhost-vdpa.c
97
+++ b/hw/virtio/vhost-vdpa.c
98
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
99
Error **errp)
100
{
101
g_autoptr(GPtrArray) shadow_vqs = NULL;
102
+ uint64_t dev_features, svq_features;
103
+ int r;
104
+ bool ok;
105
106
if (!v->shadow_vqs_enabled) {
107
return 0;
108
}
109
110
+ r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
111
+ if (r != 0) {
112
+ error_setg_errno(errp, -r, "Can't get vdpa device features");
113
+ return r;
114
+ }
115
+
116
+ svq_features = dev_features;
117
+ ok = vhost_svq_valid_features(svq_features, errp);
118
+ if (unlikely(!ok)) {
119
+ return -1;
120
+ }
121
+
122
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
123
for (unsigned n = 0; n < hdev->nvqs; ++n) {
124
g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
31
--
125
--
32
2.5.0
126
2.7.4
33
127
34
128
diff view generated by jsdifflib
1
From: Zhang Chen <chen.zhang@intel.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The "expired_scan_cycle" determines period of scanning expired
3
It reports the shadow virtqueue address from qemu virtual address space.
4
primary node net packets.
5
4
6
Signed-off-by: Zhang Chen <chen.zhang@intel.com>
5
Since this will be different from the guest's vaddr, but the device can
6
access it, SVQ takes special care about its alignment & lack of garbage
7
data. It assumes that IOMMU will work in host_page_size ranges for that.
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
7
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
---
12
---
9
net/colo-compare.c | 48 +++++++++++++++++++++++++++++++++++++++++++++---
13
hw/virtio/vhost-shadow-virtqueue.c | 29 +++++++++++++++++++++++++++++
10
qemu-options.hx | 4 +++-
14
hw/virtio/vhost-shadow-virtqueue.h | 9 +++++++++
11
2 files changed, 48 insertions(+), 4 deletions(-)
15
2 files changed, 38 insertions(+)
12
16
13
diff --git a/net/colo-compare.c b/net/colo-compare.c
17
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
14
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
15
--- a/net/colo-compare.c
19
--- a/hw/virtio/vhost-shadow-virtqueue.c
16
+++ b/net/colo-compare.c
20
+++ b/hw/virtio/vhost-shadow-virtqueue.c
17
@@ -XXX,XX +XXX,XX @@ static NotifierList colo_compare_notifiers =
21
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
18
#define COLO_COMPARE_FREE_PRIMARY 0x01
19
#define COLO_COMPARE_FREE_SECONDARY 0x02
20
21
-/* TODO: Should be configurable */
22
#define REGULAR_PACKET_CHECK_MS 3000
23
#define DEFAULT_TIME_OUT_MS 3000
24
25
@@ -XXX,XX +XXX,XX @@ typedef struct CompareState {
26
SocketReadState notify_rs;
27
bool vnet_hdr;
28
uint32_t compare_timeout;
29
+ uint32_t expired_scan_cycle;
30
31
/*
32
* Record the connection that through the NIC
33
@@ -XXX,XX +XXX,XX @@ static void check_old_packet_regular(void *opaque)
34
/* if have old packet we will notify checkpoint */
35
colo_old_packet_check(s);
36
timer_mod(s->packet_check_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
37
- REGULAR_PACKET_CHECK_MS);
38
+ s->expired_scan_cycle);
39
}
22
}
40
23
41
/* Public API, Used for COLO frame to notify compare event */
24
/**
42
@@ -XXX,XX +XXX,XX @@ static void colo_compare_timer_init(CompareState *s)
25
+ * Get the shadow vq vring address.
43
SCALE_MS, check_old_packet_regular,
26
+ * @svq: Shadow virtqueue
44
s);
27
+ * @addr: Destination to store address
45
timer_mod(s->packet_check_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) +
28
+ */
46
- REGULAR_PACKET_CHECK_MS);
29
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
47
+ s->expired_scan_cycle);
30
+ struct vhost_vring_addr *addr)
48
}
49
50
static void colo_compare_timer_del(CompareState *s)
51
@@ -XXX,XX +XXX,XX @@ out:
52
error_propagate(errp, local_err);
53
}
54
55
+static void compare_get_expired_scan_cycle(Object *obj, Visitor *v,
56
+ const char *name, void *opaque,
57
+ Error **errp)
58
+{
31
+{
59
+ CompareState *s = COLO_COMPARE(obj);
32
+ addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
60
+ uint32_t value = s->expired_scan_cycle;
33
+ addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
61
+
34
+ addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
62
+ visit_type_uint32(v, name, &value, errp);
63
+}
35
+}
64
+
36
+
65
+static void compare_set_expired_scan_cycle(Object *obj, Visitor *v,
37
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
66
+ const char *name, void *opaque,
67
+ Error **errp)
68
+{
38
+{
69
+ CompareState *s = COLO_COMPARE(obj);
39
+ size_t desc_size = sizeof(vring_desc_t) * svq->vring.num;
70
+ Error *local_err = NULL;
40
+ size_t avail_size = offsetof(vring_avail_t, ring) +
71
+ uint32_t value;
41
+ sizeof(uint16_t) * svq->vring.num;
72
+
42
+
73
+ visit_type_uint32(v, name, &value, &local_err);
43
+ return ROUND_UP(desc_size + avail_size, qemu_real_host_page_size);
74
+ if (local_err) {
75
+ goto out;
76
+ }
77
+ if (!value) {
78
+ error_setg(&local_err, "Property '%s.%s' requires a positive value",
79
+ object_get_typename(obj), name);
80
+ goto out;
81
+ }
82
+ s->expired_scan_cycle = value;
83
+
84
+out:
85
+ error_propagate(errp, local_err);
86
+}
44
+}
87
+
45
+
88
static void compare_pri_rs_finalize(SocketReadState *pri_rs)
46
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq)
89
{
47
+{
90
CompareState *s = container_of(pri_rs, CompareState, pri_rs);
48
+ size_t used_size = offsetof(vring_used_t, ring) +
91
@@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp)
49
+ sizeof(vring_used_elem_t) * svq->vring.num;
92
s->compare_timeout = DEFAULT_TIME_OUT_MS;
50
+ return ROUND_UP(used_size, qemu_real_host_page_size);
93
}
51
+}
94
95
+ if (!s->expired_scan_cycle) {
96
+ /* Set default value to 3000 MS */
97
+ s->expired_scan_cycle = REGULAR_PACKET_CHECK_MS;
98
+ }
99
+
52
+
100
if (find_and_check_chardev(&chr, s->pri_indev, errp) ||
53
+/**
101
!qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) {
54
* Set a new file descriptor for the guest to kick the SVQ and notify for avail
102
return;
55
*
103
@@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj)
56
* @svq: The svq
104
compare_get_timeout,
57
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
105
compare_set_timeout, NULL, NULL, NULL);
58
index XXXXXXX..XXXXXXX 100644
106
59
--- a/hw/virtio/vhost-shadow-virtqueue.h
107
+ object_property_add(obj, "expired_scan_cycle", "uint32",
60
+++ b/hw/virtio/vhost-shadow-virtqueue.h
108
+ compare_get_expired_scan_cycle,
61
@@ -XXX,XX +XXX,XX @@
109
+ compare_set_expired_scan_cycle, NULL, NULL, NULL);
62
#define VHOST_SHADOW_VIRTQUEUE_H
63
64
#include "qemu/event_notifier.h"
65
+#include "hw/virtio/virtio.h"
66
+#include "standard-headers/linux/vhost_types.h"
67
68
/* Shadow virtqueue to relay notifications */
69
typedef struct VhostShadowVirtqueue {
70
+ /* Shadow vring */
71
+ struct vring vring;
110
+
72
+
111
s->vnet_hdr = false;
73
/* Shadow kick notifier, sent to vhost */
112
object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr,
74
EventNotifier hdev_kick;
113
compare_set_vnet_hdr, NULL);
75
/* Shadow call notifier, sent to vhost */
114
diff --git a/qemu-options.hx b/qemu-options.hx
76
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp);
115
index XXXXXXX..XXXXXXX 100644
77
116
--- a/qemu-options.hx
78
void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd);
117
+++ b/qemu-options.hx
79
void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd);
118
@@ -XXX,XX +XXX,XX @@ SRST
80
+void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
119
stored. The file format is libpcap, so it can be analyzed with
81
+ struct vhost_vring_addr *addr);
120
tools such as tcpdump or Wireshark.
82
+size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
121
83
+size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
122
- ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}]``
84
123
+ ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}][,expired_scan_cycle=@var{ms}``
85
void vhost_svq_stop(VhostShadowVirtqueue *svq);
124
Colo-compare gets packet from primary\_inchardevid and
125
secondary\_inchardevid, than compare primary packet with
126
secondary packet. If the packets are same, we will output
127
@@ -XXX,XX +XXX,XX @@ SRST
128
vnet\_hdr\_support flag, colo compare will send/recv packet with
129
vnet\_hdr\_len. Then compare\_timeout=@var{ms} determines the
130
maximum delay colo-compare wait for the packet.
131
+ The expired\_scan\_cycle=@var{ms} to set the period of scanning
132
+ expired primary node network packets.
133
If you want to use Xen COLO, will need the notify\_dev to
134
notify Xen colo-frame to do checkpoint.
135
86
136
--
87
--
137
2.5.0
88
2.7.4
138
89
139
90
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The smc91c111_can_receive() function simply returns a boolean value.
3
First half of the buffers forwarding part, preparing vhost-vdpa
4
callbacks to SVQ to offer it. QEMU cannot enable it at this moment, so
5
this is effectively dead code at the moment, but it helps to reduce
6
patch size.
4
7
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
8
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
9
Acked-by: Michael S. Tsirkin <mst@redhat.com>
7
Reviewed-by: Cédric Le Goater <clg@kaod.org>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
11
---
10
hw/net/smc91c111.c | 8 ++++----
12
hw/virtio/vhost-vdpa.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
11
1 file changed, 4 insertions(+), 4 deletions(-)
13
1 file changed, 41 insertions(+), 7 deletions(-)
12
14
13
diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c
15
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
14
index XXXXXXX..XXXXXXX 100644
16
index XXXXXXX..XXXXXXX 100644
15
--- a/hw/net/smc91c111.c
17
--- a/hw/virtio/vhost-vdpa.c
16
+++ b/hw/net/smc91c111.c
18
+++ b/hw/virtio/vhost-vdpa.c
17
@@ -XXX,XX +XXX,XX @@ static void smc91c111_update(smc91c111_state *s)
19
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_get_config(struct vhost_dev *dev, uint8_t *config,
18
qemu_set_irq(s->irq, level);
20
return ret;
21
}
22
23
+static int vhost_vdpa_set_dev_vring_base(struct vhost_dev *dev,
24
+ struct vhost_vring_state *ring)
25
+{
26
+ trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
27
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
28
+}
29
+
30
static int vhost_vdpa_set_vring_dev_kick(struct vhost_dev *dev,
31
struct vhost_vring_file *file)
32
{
33
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_call(struct vhost_dev *dev,
34
return vhost_vdpa_call(dev, VHOST_SET_VRING_CALL, file);
19
}
35
}
20
36
21
-static int smc91c111_can_receive(smc91c111_state *s)
37
+static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
22
+static bool smc91c111_can_receive(smc91c111_state *s)
38
+ struct vhost_vring_addr *addr)
39
+{
40
+ trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
41
+ addr->desc_user_addr, addr->used_user_addr,
42
+ addr->avail_user_addr,
43
+ addr->log_guest_addr);
44
+
45
+ return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
46
+
47
+}
48
+
49
/**
50
* Set the shadow virtqueue descriptors to the device
51
*
52
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
53
static int vhost_vdpa_set_vring_addr(struct vhost_dev *dev,
54
struct vhost_vring_addr *addr)
23
{
55
{
24
if ((s->rcr & RCR_RXEN) == 0 || (s->rcr & RCR_SOFT_RST)) {
56
- trace_vhost_vdpa_set_vring_addr(dev, addr->index, addr->flags,
25
- return 1;
57
- addr->desc_user_addr, addr->used_user_addr,
26
+ return true;
58
- addr->avail_user_addr,
27
}
59
- addr->log_guest_addr);
28
if (s->allocated == (1 << NUM_PACKETS) - 1 ||
60
- return vhost_vdpa_call(dev, VHOST_SET_VRING_ADDR, addr);
29
s->rx_fifo_len == NUM_PACKETS) {
61
+ struct vhost_vdpa *v = dev->opaque;
30
- return 0;
62
+
31
+ return false;
63
+ if (v->shadow_vqs_enabled) {
32
}
64
+ /*
33
- return 1;
65
+ * Device vring addr was set at device start. SVQ base is handled by
34
+ return true;
66
+ * VirtQueue code.
67
+ */
68
+ return 0;
69
+ }
70
+
71
+ return vhost_vdpa_set_vring_dev_addr(dev, addr);
35
}
72
}
36
73
37
static inline void smc91c111_flush_queued_packets(smc91c111_state *s)
74
static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
75
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_num(struct vhost_dev *dev,
76
static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
77
struct vhost_vring_state *ring)
78
{
79
- trace_vhost_vdpa_set_vring_base(dev, ring->index, ring->num);
80
- return vhost_vdpa_call(dev, VHOST_SET_VRING_BASE, ring);
81
+ struct vhost_vdpa *v = dev->opaque;
82
+
83
+ if (v->shadow_vqs_enabled) {
84
+ /*
85
+ * Device vring base was set at device start. SVQ base is handled by
86
+ * VirtQueue code.
87
+ */
88
+ return 0;
89
+ }
90
+
91
+ return vhost_vdpa_set_dev_vring_base(dev, ring);
92
}
93
94
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
38
--
95
--
39
2.5.0
96
2.7.4
40
97
41
98
diff view generated by jsdifflib
1
From: Prasad J Pandit <pjp@fedoraproject.org>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Tulip network driver while copying tx/rx buffers does not check
3
Initial version of shadow virtqueue that actually forward buffers. There
4
frame size against r/w data length. This may lead to OOB buffer
4
is no iommu support at the moment, and that will be addressed in future
5
access. Add check to avoid it.
5
patches of this series. Since all vhost-vdpa devices use forced IOMMU,
6
6
this means that SVQ is not usable at this point of the series on any
7
Limit iterations over descriptors to avoid potential infinite
7
device.
8
loop issue in tulip_xmit_list_update.
8
9
9
For simplicity it only supports modern devices, that expects vring
10
Reported-by: Li Qiang <pangpei.lq@antfin.com>
10
in little endian, with split ring and no event idx or indirect
11
Reported-by: Ziming Zhang <ezrakiez@gmail.com>
11
descriptors. Support for them will not be added in this series.
12
Reported-by: Jason Wang <jasowang@redhat.com>
12
13
Tested-by: Li Qiang <liq3ea@gmail.com>
13
It reuses the VirtQueue code for the device part. The driver part is
14
Reviewed-by: Li Qiang <liq3ea@gmail.com>
14
based on Linux's virtio_ring driver, but with stripped functionality
15
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
15
and optimizations so it's easier to review.
16
17
However, forwarding buffers have some particular pieces: One of the most
18
unexpected ones is that a guest's buffer can expand through more than
19
one descriptor in SVQ. While this is handled gracefully by qemu's
20
emulated virtio devices, it may cause unexpected SVQ queue full. This
21
patch also solves it by checking for this condition at both guest's
22
kicks and device's calls. The code may be more elegant in the future if
23
SVQ code runs in its own iocontext.
24
25
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
26
Acked-by: Michael S. Tsirkin <mst@redhat.com>
16
Signed-off-by: Jason Wang <jasowang@redhat.com>
27
Signed-off-by: Jason Wang <jasowang@redhat.com>
17
---
28
---
18
hw/net/tulip.c | 36 +++++++++++++++++++++++++++---------
29
hw/virtio/vhost-shadow-virtqueue.c | 352 ++++++++++++++++++++++++++++++++++++-
19
1 file changed, 27 insertions(+), 9 deletions(-)
30
hw/virtio/vhost-shadow-virtqueue.h | 26 +++
20
31
hw/virtio/vhost-vdpa.c | 155 +++++++++++++++-
21
diff --git a/hw/net/tulip.c b/hw/net/tulip.c
32
3 files changed, 522 insertions(+), 11 deletions(-)
33
34
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
22
index XXXXXXX..XXXXXXX 100644
35
index XXXXXXX..XXXXXXX 100644
23
--- a/hw/net/tulip.c
36
--- a/hw/virtio/vhost-shadow-virtqueue.c
24
+++ b/hw/net/tulip.c
37
+++ b/hw/virtio/vhost-shadow-virtqueue.c
25
@@ -XXX,XX +XXX,XX @@ static void tulip_copy_rx_bytes(TULIPState *s, struct tulip_descriptor *desc)
38
@@ -XXX,XX +XXX,XX @@
26
} else {
39
#include "qemu/error-report.h"
27
len = s->rx_frame_len;
40
#include "qapi/error.h"
28
}
41
#include "qemu/main-loop.h"
29
+
42
+#include "qemu/log.h"
30
+ if (s->rx_frame_len + len > sizeof(s->rx_frame)) {
43
+#include "qemu/memalign.h"
31
+ return;
44
#include "linux-headers/linux/vhost.h"
32
+ }
45
33
pci_dma_write(&s->dev, desc->buf_addr1, s->rx_frame +
46
/**
34
(s->rx_frame_size - s->rx_frame_len), len);
47
@@ -XXX,XX +XXX,XX @@ bool vhost_svq_valid_features(uint64_t features, Error **errp)
35
s->rx_frame_len -= len;
36
@@ -XXX,XX +XXX,XX @@ static void tulip_copy_rx_bytes(TULIPState *s, struct tulip_descriptor *desc)
37
} else {
38
len = s->rx_frame_len;
39
}
40
+
41
+ if (s->rx_frame_len + len > sizeof(s->rx_frame)) {
42
+ return;
43
+ }
44
pci_dma_write(&s->dev, desc->buf_addr2, s->rx_frame +
45
(s->rx_frame_size - s->rx_frame_len), len);
46
s->rx_frame_len -= len;
47
@@ -XXX,XX +XXX,XX @@ static ssize_t tulip_receive(TULIPState *s, const uint8_t *buf, size_t size)
48
49
trace_tulip_receive(buf, size);
50
51
- if (size < 14 || size > 2048 || s->rx_frame_len || tulip_rx_stopped(s)) {
52
+ if (size < 14 || size > sizeof(s->rx_frame) - 4
53
+ || s->rx_frame_len || tulip_rx_stopped(s)) {
54
return 0;
55
}
56
57
@@ -XXX,XX +XXX,XX @@ static ssize_t tulip_receive_nc(NetClientState *nc,
58
return tulip_receive(qemu_get_nic_opaque(nc), buf, size);
59
}
48
}
60
49
61
-
50
/**
62
static NetClientInfo net_tulip_info = {
51
- * Forward guest notifications.
63
.type = NET_CLIENT_DRIVER_NIC,
52
+ * Number of descriptors that the SVQ can make available from the guest.
64
.size = sizeof(NICState),
53
+ *
65
@@ -XXX,XX +XXX,XX @@ static void tulip_tx(TULIPState *s, struct tulip_descriptor *desc)
54
+ * @svq: The svq
66
if ((s->csr[6] >> CSR6_OM_SHIFT) & CSR6_OM_MASK) {
55
+ */
67
/* Internal or external Loopback */
56
+static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
68
tulip_receive(s, s->tx_frame, s->tx_frame_len);
57
+{
69
- } else {
58
+ return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
70
+ } else if (s->tx_frame_len <= sizeof(s->tx_frame)) {
59
+}
71
qemu_send_packet(qemu_get_queue(s->nic),
60
+
72
s->tx_frame, s->tx_frame_len);
61
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
73
}
62
+ const struct iovec *iovec, size_t num,
74
@@ -XXX,XX +XXX,XX @@ static void tulip_tx(TULIPState *s, struct tulip_descriptor *desc)
63
+ bool more_descs, bool write)
64
+{
65
+ uint16_t i = svq->free_head, last = svq->free_head;
66
+ unsigned n;
67
+ uint16_t flags = write ? cpu_to_le16(VRING_DESC_F_WRITE) : 0;
68
+ vring_desc_t *descs = svq->vring.desc;
69
+
70
+ if (num == 0) {
71
+ return;
72
+ }
73
+
74
+ for (n = 0; n < num; n++) {
75
+ if (more_descs || (n + 1 < num)) {
76
+ descs[i].flags = flags | cpu_to_le16(VRING_DESC_F_NEXT);
77
+ } else {
78
+ descs[i].flags = flags;
79
+ }
80
+ descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
81
+ descs[i].len = cpu_to_le32(iovec[n].iov_len);
82
+
83
+ last = i;
84
+ i = cpu_to_le16(descs[i].next);
85
+ }
86
+
87
+ svq->free_head = le16_to_cpu(descs[last].next);
88
+}
89
+
90
+static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
91
+ VirtQueueElement *elem, unsigned *head)
92
+{
93
+ unsigned avail_idx;
94
+ vring_avail_t *avail = svq->vring.avail;
95
+
96
+ *head = svq->free_head;
97
+
98
+ /* We need some descriptors here */
99
+ if (unlikely(!elem->out_num && !elem->in_num)) {
100
+ qemu_log_mask(LOG_GUEST_ERROR,
101
+ "Guest provided element with no descriptors");
102
+ return false;
103
+ }
104
+
105
+ vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
106
+ false);
107
+ vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
108
+
109
+ /*
110
+ * Put the entry in the available array (but don't update avail->idx until
111
+ * they do sync).
112
+ */
113
+ avail_idx = svq->shadow_avail_idx & (svq->vring.num - 1);
114
+ avail->ring[avail_idx] = cpu_to_le16(*head);
115
+ svq->shadow_avail_idx++;
116
+
117
+ /* Update the avail index after write the descriptor */
118
+ smp_wmb();
119
+ avail->idx = cpu_to_le16(svq->shadow_avail_idx);
120
+
121
+ return true;
122
+}
123
+
124
+static bool vhost_svq_add(VhostShadowVirtqueue *svq, VirtQueueElement *elem)
125
+{
126
+ unsigned qemu_head;
127
+ bool ok = vhost_svq_add_split(svq, elem, &qemu_head);
128
+ if (unlikely(!ok)) {
129
+ return false;
130
+ }
131
+
132
+ svq->ring_id_maps[qemu_head] = elem;
133
+ return true;
134
+}
135
+
136
+static void vhost_svq_kick(VhostShadowVirtqueue *svq)
137
+{
138
+ /*
139
+ * We need to expose the available array entries before checking the used
140
+ * flags
141
+ */
142
+ smp_mb();
143
+ if (svq->vring.used->flags & VRING_USED_F_NO_NOTIFY) {
144
+ return;
145
+ }
146
+
147
+ event_notifier_set(&svq->hdev_kick);
148
+}
149
+
150
+/**
151
+ * Forward available buffers.
152
+ *
153
+ * @svq: Shadow VirtQueue
154
+ *
155
+ * Note that this function does not guarantee that all guest's available
156
+ * buffers are available to the device in SVQ avail ring. The guest may have
157
+ * exposed a GPA / GIOVA contiguous buffer, but it may not be contiguous in
158
+ * qemu vaddr.
159
+ *
160
+ * If that happens, guest's kick notifications will be disabled until the
161
+ * device uses some buffers.
162
+ */
163
+static void vhost_handle_guest_kick(VhostShadowVirtqueue *svq)
164
+{
165
+ /* Clear event notifier */
166
+ event_notifier_test_and_clear(&svq->svq_kick);
167
+
168
+ /* Forward to the device as many available buffers as possible */
169
+ do {
170
+ virtio_queue_set_notification(svq->vq, false);
171
+
172
+ while (true) {
173
+ VirtQueueElement *elem;
174
+ bool ok;
175
+
176
+ if (svq->next_guest_avail_elem) {
177
+ elem = g_steal_pointer(&svq->next_guest_avail_elem);
178
+ } else {
179
+ elem = virtqueue_pop(svq->vq, sizeof(*elem));
180
+ }
181
+
182
+ if (!elem) {
183
+ break;
184
+ }
185
+
186
+ if (elem->out_num + elem->in_num > vhost_svq_available_slots(svq)) {
187
+ /*
188
+ * This condition is possible since a contiguous buffer in GPA
189
+ * does not imply a contiguous buffer in qemu's VA
190
+ * scatter-gather segments. If that happens, the buffer exposed
191
+ * to the device needs to be a chain of descriptors at this
192
+ * moment.
193
+ *
194
+ * SVQ cannot hold more available buffers if we are here:
195
+ * queue the current guest descriptor and ignore further kicks
196
+ * until some elements are used.
197
+ */
198
+ svq->next_guest_avail_elem = elem;
199
+ return;
200
+ }
201
+
202
+ ok = vhost_svq_add(svq, elem);
203
+ if (unlikely(!ok)) {
204
+ /* VQ is broken, just return and ignore any other kicks */
205
+ return;
206
+ }
207
+ vhost_svq_kick(svq);
208
+ }
209
+
210
+ virtio_queue_set_notification(svq->vq, true);
211
+ } while (!virtio_queue_empty(svq->vq));
212
+}
213
+
214
+/**
215
+ * Handle guest's kick.
216
*
217
* @n: guest kick event notifier, the one that guest set to notify svq.
218
*/
219
-static void vhost_handle_guest_kick(EventNotifier *n)
220
+static void vhost_handle_guest_kick_notifier(EventNotifier *n)
221
{
222
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue, svq_kick);
223
event_notifier_test_and_clear(n);
224
- event_notifier_set(&svq->hdev_kick);
225
+ vhost_handle_guest_kick(svq);
226
+}
227
+
228
+static bool vhost_svq_more_used(VhostShadowVirtqueue *svq)
229
+{
230
+ if (svq->last_used_idx != svq->shadow_used_idx) {
231
+ return true;
232
+ }
233
+
234
+ svq->shadow_used_idx = cpu_to_le16(svq->vring.used->idx);
235
+
236
+ return svq->last_used_idx != svq->shadow_used_idx;
237
}
238
239
/**
240
- * Forward vhost notifications
241
+ * Enable vhost device calls after disable them.
242
+ *
243
+ * @svq: The svq
244
+ *
245
+ * It returns false if there are pending used buffers from the vhost device,
246
+ * avoiding the possible races between SVQ checking for more work and enabling
247
+ * callbacks. True if SVQ used vring has no more pending buffers.
248
+ */
249
+static bool vhost_svq_enable_notification(VhostShadowVirtqueue *svq)
250
+{
251
+ svq->vring.avail->flags &= ~cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
252
+ /* Make sure the flag is written before the read of used_idx */
253
+ smp_mb();
254
+ return !vhost_svq_more_used(svq);
255
+}
256
+
257
+static void vhost_svq_disable_notification(VhostShadowVirtqueue *svq)
258
+{
259
+ svq->vring.avail->flags |= cpu_to_le16(VRING_AVAIL_F_NO_INTERRUPT);
260
+}
261
+
262
+static VirtQueueElement *vhost_svq_get_buf(VhostShadowVirtqueue *svq,
263
+ uint32_t *len)
264
+{
265
+ vring_desc_t *descs = svq->vring.desc;
266
+ const vring_used_t *used = svq->vring.used;
267
+ vring_used_elem_t used_elem;
268
+ uint16_t last_used;
269
+
270
+ if (!vhost_svq_more_used(svq)) {
271
+ return NULL;
272
+ }
273
+
274
+ /* Only get used array entries after they have been exposed by dev */
275
+ smp_rmb();
276
+ last_used = svq->last_used_idx & (svq->vring.num - 1);
277
+ used_elem.id = le32_to_cpu(used->ring[last_used].id);
278
+ used_elem.len = le32_to_cpu(used->ring[last_used].len);
279
+
280
+ svq->last_used_idx++;
281
+ if (unlikely(used_elem.id >= svq->vring.num)) {
282
+ qemu_log_mask(LOG_GUEST_ERROR, "Device %s says index %u is used",
283
+ svq->vdev->name, used_elem.id);
284
+ return NULL;
285
+ }
286
+
287
+ if (unlikely(!svq->ring_id_maps[used_elem.id])) {
288
+ qemu_log_mask(LOG_GUEST_ERROR,
289
+ "Device %s says index %u is used, but it was not available",
290
+ svq->vdev->name, used_elem.id);
291
+ return NULL;
292
+ }
293
+
294
+ descs[used_elem.id].next = svq->free_head;
295
+ svq->free_head = used_elem.id;
296
+
297
+ *len = used_elem.len;
298
+ return g_steal_pointer(&svq->ring_id_maps[used_elem.id]);
299
+}
300
+
301
+static void vhost_svq_flush(VhostShadowVirtqueue *svq,
302
+ bool check_for_avail_queue)
303
+{
304
+ VirtQueue *vq = svq->vq;
305
+
306
+ /* Forward as many used buffers as possible. */
307
+ do {
308
+ unsigned i = 0;
309
+
310
+ vhost_svq_disable_notification(svq);
311
+ while (true) {
312
+ uint32_t len;
313
+ g_autofree VirtQueueElement *elem = vhost_svq_get_buf(svq, &len);
314
+ if (!elem) {
315
+ break;
316
+ }
317
+
318
+ if (unlikely(i >= svq->vring.num)) {
319
+ qemu_log_mask(LOG_GUEST_ERROR,
320
+ "More than %u used buffers obtained in a %u size SVQ",
321
+ i, svq->vring.num);
322
+ virtqueue_fill(vq, elem, len, i);
323
+ virtqueue_flush(vq, i);
324
+ return;
325
+ }
326
+ virtqueue_fill(vq, elem, len, i++);
327
+ }
328
+
329
+ virtqueue_flush(vq, i);
330
+ event_notifier_set(&svq->svq_call);
331
+
332
+ if (check_for_avail_queue && svq->next_guest_avail_elem) {
333
+ /*
334
+ * Avail ring was full when vhost_svq_flush was called, so it's a
335
+ * good moment to make more descriptors available if possible.
336
+ */
337
+ vhost_handle_guest_kick(svq);
338
+ }
339
+ } while (!vhost_svq_enable_notification(svq));
340
+}
341
+
342
+/**
343
+ * Forward used buffers.
344
*
345
* @n: hdev call event notifier, the one that device set to notify svq.
346
+ *
347
+ * Note that we are not making any buffers available in the loop, there is no
348
+ * way that it runs more than virtqueue size times.
349
*/
350
static void vhost_svq_handle_call(EventNotifier *n)
351
{
352
VhostShadowVirtqueue *svq = container_of(n, VhostShadowVirtqueue,
353
hdev_call);
354
event_notifier_test_and_clear(n);
355
- event_notifier_set(&svq->svq_call);
356
+ vhost_svq_flush(svq, true);
357
}
358
359
/**
360
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
361
if (poll_start) {
362
event_notifier_init_fd(svq_kick, svq_kick_fd);
363
event_notifier_set(svq_kick);
364
- event_notifier_set_handler(svq_kick, vhost_handle_guest_kick);
365
+ event_notifier_set_handler(svq_kick, vhost_handle_guest_kick_notifier);
366
+ }
367
+}
368
+
369
+/**
370
+ * Start the shadow virtqueue operation.
371
+ *
372
+ * @svq: Shadow Virtqueue
373
+ * @vdev: VirtIO device
374
+ * @vq: Virtqueue to shadow
375
+ */
376
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
377
+ VirtQueue *vq)
378
+{
379
+ size_t desc_size, driver_size, device_size;
380
+
381
+ svq->next_guest_avail_elem = NULL;
382
+ svq->shadow_avail_idx = 0;
383
+ svq->shadow_used_idx = 0;
384
+ svq->last_used_idx = 0;
385
+ svq->vdev = vdev;
386
+ svq->vq = vq;
387
+
388
+ svq->vring.num = virtio_queue_get_num(vdev, virtio_get_queue_index(vq));
389
+ driver_size = vhost_svq_driver_area_size(svq);
390
+ device_size = vhost_svq_device_area_size(svq);
391
+ svq->vring.desc = qemu_memalign(qemu_real_host_page_size, driver_size);
392
+ desc_size = sizeof(vring_desc_t) * svq->vring.num;
393
+ svq->vring.avail = (void *)((char *)svq->vring.desc + desc_size);
394
+ memset(svq->vring.desc, 0, driver_size);
395
+ svq->vring.used = qemu_memalign(qemu_real_host_page_size, device_size);
396
+ memset(svq->vring.used, 0, device_size);
397
+ svq->ring_id_maps = g_new0(VirtQueueElement *, svq->vring.num);
398
+ for (unsigned i = 0; i < svq->vring.num - 1; i++) {
399
+ svq->vring.desc[i].next = cpu_to_le16(i + 1);
75
}
400
}
76
}
401
}
77
402
78
-static void tulip_copy_tx_buffers(TULIPState *s, struct tulip_descriptor *desc)
403
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_kick_fd(VhostShadowVirtqueue *svq, int svq_kick_fd)
79
+static int tulip_copy_tx_buffers(TULIPState *s, struct tulip_descriptor *desc)
404
void vhost_svq_stop(VhostShadowVirtqueue *svq)
80
{
405
{
81
int len1 = (desc->control >> TDES1_BUF1_SIZE_SHIFT) & TDES1_BUF1_SIZE_MASK;
406
event_notifier_set_handler(&svq->svq_kick, NULL);
82
int len2 = (desc->control >> TDES1_BUF2_SIZE_SHIFT) & TDES1_BUF2_SIZE_MASK;
407
+ g_autofree VirtQueueElement *next_avail_elem = NULL;
83
408
+
84
+ if (s->tx_frame_len + len1 > sizeof(s->tx_frame)) {
409
+ if (!svq->vq) {
85
+ return -1;
410
+ return;
86
+ }
411
+ }
87
if (len1) {
412
+
88
pci_dma_read(&s->dev, desc->buf_addr1,
413
+ /* Send all pending used descriptors to guest */
89
s->tx_frame + s->tx_frame_len, len1);
414
+ vhost_svq_flush(svq, false);
90
s->tx_frame_len += len1;
415
+
416
+ for (unsigned i = 0; i < svq->vring.num; ++i) {
417
+ g_autofree VirtQueueElement *elem = NULL;
418
+ elem = g_steal_pointer(&svq->ring_id_maps[i]);
419
+ if (elem) {
420
+ virtqueue_detach_element(svq->vq, elem, 0);
421
+ }
422
+ }
423
+
424
+ next_avail_elem = g_steal_pointer(&svq->next_guest_avail_elem);
425
+ if (next_avail_elem) {
426
+ virtqueue_detach_element(svq->vq, next_avail_elem, 0);
427
+ }
428
+ svq->vq = NULL;
429
+ g_free(svq->ring_id_maps);
430
+ qemu_vfree(svq->vring.desc);
431
+ qemu_vfree(svq->vring.used);
432
}
433
434
/**
435
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
436
index XXXXXXX..XXXXXXX 100644
437
--- a/hw/virtio/vhost-shadow-virtqueue.h
438
+++ b/hw/virtio/vhost-shadow-virtqueue.h
439
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
440
441
/* Guest's call notifier, where the SVQ calls guest. */
442
EventNotifier svq_call;
443
+
444
+ /* Virtio queue shadowing */
445
+ VirtQueue *vq;
446
+
447
+ /* Virtio device */
448
+ VirtIODevice *vdev;
449
+
450
+ /* Map for use the guest's descriptors */
451
+ VirtQueueElement **ring_id_maps;
452
+
453
+ /* Next VirtQueue element that guest made available */
454
+ VirtQueueElement *next_guest_avail_elem;
455
+
456
+ /* Next head to expose to the device */
457
+ uint16_t shadow_avail_idx;
458
+
459
+ /* Next free descriptor */
460
+ uint16_t free_head;
461
+
462
+ /* Last seen used idx */
463
+ uint16_t shadow_used_idx;
464
+
465
+ /* Next head to consume from the device */
466
+ uint16_t last_used_idx;
467
} VhostShadowVirtqueue;
468
469
bool vhost_svq_valid_features(uint64_t features, Error **errp);
470
@@ -XXX,XX +XXX,XX @@ void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
471
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq);
472
size_t vhost_svq_device_area_size(const VhostShadowVirtqueue *svq);
473
474
+void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
475
+ VirtQueue *vq);
476
void vhost_svq_stop(VhostShadowVirtqueue *svq);
477
478
VhostShadowVirtqueue *vhost_svq_new(void);
479
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
480
index XXXXXXX..XXXXXXX 100644
481
--- a/hw/virtio/vhost-vdpa.c
482
+++ b/hw/virtio/vhost-vdpa.c
483
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_dev_addr(struct vhost_dev *dev,
484
* Note that this function does not rewind kick file descriptor if cannot set
485
* call one.
486
*/
487
-static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
488
- VhostShadowVirtqueue *svq, unsigned idx,
489
- Error **errp)
490
+static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
491
+ VhostShadowVirtqueue *svq, unsigned idx,
492
+ Error **errp)
493
{
494
struct vhost_vring_file file = {
495
.index = dev->vq_index + idx,
496
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
497
r = vhost_vdpa_set_vring_dev_kick(dev, &file);
498
if (unlikely(r != 0)) {
499
error_setg_errno(errp, -r, "Can't set device kick fd");
500
- return false;
501
+ return r;
91
}
502
}
92
503
93
+ if (s->tx_frame_len + len2 > sizeof(s->tx_frame)) {
504
event_notifier = &svq->hdev_call;
94
+ return -1;
505
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
95
+ }
506
error_setg_errno(errp, -r, "Can't set device call fd");
96
if (len2) {
97
pci_dma_read(&s->dev, desc->buf_addr2,
98
s->tx_frame + s->tx_frame_len, len2);
99
s->tx_frame_len += len2;
100
}
507
}
101
desc->status = (len1 + len2) ? 0 : 0x7fffffff;
508
102
+
509
+ return r;
103
+ return 0;
510
+}
511
+
512
+/**
513
+ * Unmap a SVQ area in the device
514
+ */
515
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
516
+ hwaddr size)
517
+{
518
+ int r;
519
+
520
+ size = ROUND_UP(size, qemu_real_host_page_size);
521
+ r = vhost_vdpa_dma_unmap(v, iova, size);
522
+ return r == 0;
523
+}
524
+
525
+static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
526
+ const VhostShadowVirtqueue *svq)
527
+{
528
+ struct vhost_vdpa *v = dev->opaque;
529
+ struct vhost_vring_addr svq_addr;
530
+ size_t device_size = vhost_svq_device_area_size(svq);
531
+ size_t driver_size = vhost_svq_driver_area_size(svq);
532
+ bool ok;
533
+
534
+ vhost_svq_get_vring_addr(svq, &svq_addr);
535
+
536
+ ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
537
+ if (unlikely(!ok)) {
538
+ return false;
539
+ }
540
+
541
+ return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
542
+}
543
+
544
+/**
545
+ * Map the shadow virtqueue rings in the device
546
+ *
547
+ * @dev: The vhost device
548
+ * @svq: The shadow virtqueue
549
+ * @addr: Assigned IOVA addresses
550
+ * @errp: Error pointer
551
+ */
552
+static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
553
+ const VhostShadowVirtqueue *svq,
554
+ struct vhost_vring_addr *addr,
555
+ Error **errp)
556
+{
557
+ struct vhost_vdpa *v = dev->opaque;
558
+ size_t device_size = vhost_svq_device_area_size(svq);
559
+ size_t driver_size = vhost_svq_driver_area_size(svq);
560
+ int r;
561
+
562
+ ERRP_GUARD();
563
+ vhost_svq_get_vring_addr(svq, addr);
564
+
565
+ r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
566
+ (void *)(uintptr_t)addr->desc_user_addr, true);
567
+ if (unlikely(r != 0)) {
568
+ error_setg_errno(errp, -r, "Cannot create vq driver region: ");
569
+ return false;
570
+ }
571
+
572
+ r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
573
+ (void *)(intptr_t)addr->used_user_addr, false);
574
+ if (unlikely(r != 0)) {
575
+ error_setg_errno(errp, -r, "Cannot create vq device region: ");
576
+ }
577
+
578
+ return r == 0;
579
+}
580
+
581
+static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
582
+ VhostShadowVirtqueue *svq, unsigned idx,
583
+ Error **errp)
584
+{
585
+ uint16_t vq_index = dev->vq_index + idx;
586
+ struct vhost_vring_state s = {
587
+ .index = vq_index,
588
+ };
589
+ int r;
590
+
591
+ r = vhost_vdpa_set_dev_vring_base(dev, &s);
592
+ if (unlikely(r)) {
593
+ error_setg_errno(errp, -r, "Cannot set vring base");
594
+ return false;
595
+ }
596
+
597
+ r = vhost_vdpa_svq_set_fds(dev, svq, idx, errp);
598
return r == 0;
104
}
599
}
105
600
106
static void tulip_setup_filter_addr(TULIPState *s, uint8_t *buf, int n)
601
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev)
107
@@ -XXX,XX +XXX,XX @@ static uint32_t tulip_ts(TULIPState *s)
108
109
static void tulip_xmit_list_update(TULIPState *s)
110
{
111
+#define TULIP_DESC_MAX 128
112
+ uint8_t i = 0;
113
struct tulip_descriptor desc;
114
115
if (tulip_ts(s) != CSR5_TS_SUSPENDED) {
116
return;
117
}
602
}
118
603
119
- for (;;) {
604
for (i = 0; i < v->shadow_vqs->len; ++i) {
120
+ for (i = 0; i < TULIP_DESC_MAX; i++) {
605
+ VirtQueue *vq = virtio_get_queue(dev->vdev, dev->vq_index + i);
121
tulip_desc_read(s, s->current_tx_desc, &desc);
606
VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
122
tulip_dump_tx_descriptor(s, &desc);
607
+ struct vhost_vring_addr addr = {
123
608
+ .index = i,
124
@@ -XXX,XX +XXX,XX @@ static void tulip_xmit_list_update(TULIPState *s)
609
+ };
125
s->tx_frame_len = 0;
610
+ int r;
126
}
611
bool ok = vhost_vdpa_svq_setup(dev, svq, i, &err);
127
612
if (unlikely(!ok)) {
128
- tulip_copy_tx_buffers(s, &desc);
613
- error_reportf_err(err, "Cannot setup SVQ %u: ", i);
129
-
614
+ goto err;
130
- if (desc.control & TDES1_LS) {
615
+ }
131
- tulip_tx(s, &desc);
616
+
132
+ if (!tulip_copy_tx_buffers(s, &desc)) {
617
+ vhost_svq_start(svq, dev->vdev, vq);
133
+ if (desc.control & TDES1_LS) {
618
+ ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err);
134
+ tulip_tx(s, &desc);
619
+ if (unlikely(!ok)) {
135
+ }
620
+ goto err_map;
136
}
621
+ }
622
+
623
+ /* Override vring GPA set by vhost subsystem */
624
+ r = vhost_vdpa_set_vring_dev_addr(dev, &addr);
625
+ if (unlikely(r != 0)) {
626
+ error_setg_errno(&err, -r, "Cannot set device address");
627
+ goto err_set_addr;
628
+ }
629
+ }
630
+
631
+ return true;
632
+
633
+err_set_addr:
634
+ vhost_vdpa_svq_unmap_rings(dev, g_ptr_array_index(v->shadow_vqs, i));
635
+
636
+err_map:
637
+ vhost_svq_stop(g_ptr_array_index(v->shadow_vqs, i));
638
+
639
+err:
640
+ error_reportf_err(err, "Cannot setup SVQ %u: ", i);
641
+ for (unsigned j = 0; j < i; ++j) {
642
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, j);
643
+ vhost_vdpa_svq_unmap_rings(dev, svq);
644
+ vhost_svq_stop(svq);
645
+ }
646
+
647
+ return false;
648
+}
649
+
650
+static bool vhost_vdpa_svqs_stop(struct vhost_dev *dev)
651
+{
652
+ struct vhost_vdpa *v = dev->opaque;
653
+
654
+ if (!v->shadow_vqs) {
655
+ return true;
656
+ }
657
+
658
+ for (unsigned i = 0; i < v->shadow_vqs->len; ++i) {
659
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs, i);
660
+ bool ok = vhost_vdpa_svq_unmap_rings(dev, svq);
661
+ if (unlikely(!ok)) {
662
return false;
137
}
663
}
138
tulip_desc_write(s, s->current_tx_desc, &desc);
664
}
665
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
666
}
667
vhost_vdpa_set_vring_ready(dev);
668
} else {
669
+ ok = vhost_vdpa_svqs_stop(dev);
670
+ if (unlikely(!ok)) {
671
+ return -1;
672
+ }
673
vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs);
674
}
675
139
--
676
--
140
2.5.0
677
2.7.4
141
678
142
679
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
Rewrite:
3
This iova tree function allows it to look for a hole in allocated
4
4
regions and return a totally new translation for a given translated
5
if (E) {
5
address.
6
return A;
6
7
} else {
7
It's usage is mainly to allow devices to access qemu address space,
8
return B;
8
remapping guest's one into a new iova space where qemu can add chunks of
9
}
9
addresses.
10
/* EOF */
10
11
}
11
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
12
12
Reviewed-by: Peter Xu <peterx@redhat.com>
13
as:
13
Acked-by: Michael S. Tsirkin <mst@redhat.com>
14
15
if (E) {
16
return A;
17
}
18
return B;
19
}
20
21
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
22
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
23
Reviewed-by: Cédric Le Goater <clg@kaod.org>
24
Signed-off-by: Jason Wang <jasowang@redhat.com>
14
Signed-off-by: Jason Wang <jasowang@redhat.com>
25
---
15
---
26
hw/net/rtl8139.c | 8 ++++----
16
include/qemu/iova-tree.h | 18 +++++++
27
1 file changed, 4 insertions(+), 4 deletions(-)
17
util/iova-tree.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++
28
18
2 files changed, 154 insertions(+)
29
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
19
20
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
30
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
31
--- a/hw/net/rtl8139.c
22
--- a/include/qemu/iova-tree.h
32
+++ b/hw/net/rtl8139.c
23
+++ b/include/qemu/iova-tree.h
33
@@ -XXX,XX +XXX,XX @@ static int rtl8139_can_receive(NetClientState *nc)
24
@@ -XXX,XX +XXX,XX @@
34
/* ??? Flow control not implemented in c+ mode.
25
#define IOVA_OK (0)
35
This is a hack to work around slirp deficiencies anyway. */
26
#define IOVA_ERR_INVALID (-1) /* Invalid parameters */
36
return 1;
27
#define IOVA_ERR_OVERLAP (-2) /* IOVA range overlapped */
37
- } else {
28
+#define IOVA_ERR_NOMEM (-3) /* Cannot allocate */
38
- avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr,
29
39
- s->RxBufferSize);
30
typedef struct IOVATree IOVATree;
40
- return (avail == 0 || avail >= 1514 || (s->IntrMask & RxOverflow));
31
typedef struct DMAMap {
41
}
32
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova);
42
+
33
void iova_tree_foreach(IOVATree *tree, iova_tree_iterator iterator);
43
+ avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr,
34
44
+ s->RxBufferSize);
35
/**
45
+ return avail == 0 || avail >= 1514 || (s->IntrMask & RxOverflow);
36
+ * iova_tree_alloc_map:
37
+ *
38
+ * @tree: the iova tree to allocate from
39
+ * @map: the new map (as translated addr & size) to allocate in the iova region
40
+ * @iova_begin: the minimum address of the allocation
41
+ * @iova_end: the maximum addressable direction of the allocation
42
+ *
43
+ * Allocates a new region of a given size, between iova_min and iova_max.
44
+ *
45
+ * Return: Same as iova_tree_insert, but cannot overlap and can return error if
46
+ * iova tree is out of free contiguous range. The caller gets the assigned iova
47
+ * in map->iova.
48
+ */
49
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
50
+ hwaddr iova_end);
51
+
52
+/**
53
* iova_tree_destroy:
54
*
55
* @tree: the iova tree to destroy
56
diff --git a/util/iova-tree.c b/util/iova-tree.c
57
index XXXXXXX..XXXXXXX 100644
58
--- a/util/iova-tree.c
59
+++ b/util/iova-tree.c
60
@@ -XXX,XX +XXX,XX @@ struct IOVATree {
61
GTree *tree;
62
};
63
64
+/* Args to pass to iova_tree_alloc foreach function. */
65
+struct IOVATreeAllocArgs {
66
+ /* Size of the desired allocation */
67
+ size_t new_size;
68
+
69
+ /* The minimum address allowed in the allocation */
70
+ hwaddr iova_begin;
71
+
72
+ /* Map at the left of the hole, can be NULL if "this" is first one */
73
+ const DMAMap *prev;
74
+
75
+ /* Map at the right of the hole, can be NULL if "prev" is the last one */
76
+ const DMAMap *this;
77
+
78
+ /* If found, we fill in the IOVA here */
79
+ hwaddr iova_result;
80
+
81
+ /* Whether have we found a valid IOVA */
82
+ bool iova_found;
83
+};
84
+
85
+/**
86
+ * Iterate args to the next hole
87
+ *
88
+ * @args: The alloc arguments
89
+ * @next: The next mapping in the tree. Can be NULL to signal the last one
90
+ */
91
+static void iova_tree_alloc_args_iterate(struct IOVATreeAllocArgs *args,
92
+ const DMAMap *next)
93
+{
94
+ args->prev = args->this;
95
+ args->this = next;
96
+}
97
+
98
static int iova_tree_compare(gconstpointer a, gconstpointer b, gpointer data)
99
{
100
const DMAMap *m1 = a, *m2 = b;
101
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map)
102
return IOVA_OK;
46
}
103
}
47
104
48
static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t size_, int do_interrupt)
105
+/**
106
+ * Try to find an unallocated IOVA range between prev and this elements.
107
+ *
108
+ * @args: Arguments to allocation
109
+ *
110
+ * Cases:
111
+ *
112
+ * (1) !prev, !this: No entries allocated, always succeed
113
+ *
114
+ * (2) !prev, this: We're iterating at the 1st element.
115
+ *
116
+ * (3) prev, !this: We're iterating at the last element.
117
+ *
118
+ * (4) prev, this: this is the most common case, we'll try to find a hole
119
+ * between "prev" and "this" mapping.
120
+ *
121
+ * Note that this function assumes the last valid iova is HWADDR_MAX, but it
122
+ * searches linearly so it's easy to discard the result if it's not the case.
123
+ */
124
+static void iova_tree_alloc_map_in_hole(struct IOVATreeAllocArgs *args)
125
+{
126
+ const DMAMap *prev = args->prev, *this = args->this;
127
+ uint64_t hole_start, hole_last;
128
+
129
+ if (this && this->iova + this->size < args->iova_begin) {
130
+ return;
131
+ }
132
+
133
+ hole_start = MAX(prev ? prev->iova + prev->size + 1 : 0, args->iova_begin);
134
+ hole_last = this ? this->iova : HWADDR_MAX;
135
+
136
+ if (hole_last - hole_start > args->new_size) {
137
+ args->iova_result = hole_start;
138
+ args->iova_found = true;
139
+ }
140
+}
141
+
142
+/**
143
+ * Foreach dma node in the tree, compare if there is a hole with its previous
144
+ * node (or minimum iova address allowed) and the node.
145
+ *
146
+ * @key: Node iterating
147
+ * @value: Node iterating
148
+ * @pargs: Struct to communicate with the outside world
149
+ *
150
+ * Return: false to keep iterating, true if needs break.
151
+ */
152
+static gboolean iova_tree_alloc_traverse(gpointer key, gpointer value,
153
+ gpointer pargs)
154
+{
155
+ struct IOVATreeAllocArgs *args = pargs;
156
+ DMAMap *node = value;
157
+
158
+ assert(key == value);
159
+
160
+ iova_tree_alloc_args_iterate(args, node);
161
+ iova_tree_alloc_map_in_hole(args);
162
+ return args->iova_found;
163
+}
164
+
165
+int iova_tree_alloc_map(IOVATree *tree, DMAMap *map, hwaddr iova_begin,
166
+ hwaddr iova_last)
167
+{
168
+ struct IOVATreeAllocArgs args = {
169
+ .new_size = map->size,
170
+ .iova_begin = iova_begin,
171
+ };
172
+
173
+ if (unlikely(iova_last < iova_begin)) {
174
+ return IOVA_ERR_INVALID;
175
+ }
176
+
177
+ /*
178
+ * Find a valid hole for the mapping
179
+ *
180
+ * Assuming low iova_begin, so no need to do a binary search to
181
+ * locate the first node.
182
+ *
183
+ * TODO: Replace all this with g_tree_node_first/next/last when available
184
+ * (from glib since 2.68). To do it with g_tree_foreach complicates the
185
+ * code a lot.
186
+ *
187
+ */
188
+ g_tree_foreach(tree->tree, iova_tree_alloc_traverse, &args);
189
+ if (!args.iova_found) {
190
+ /*
191
+ * Either tree is empty or the last hole is still not checked.
192
+ * g_tree_foreach does not compare (last, iova_last] range, so we check
193
+ * it here.
194
+ */
195
+ iova_tree_alloc_args_iterate(&args, NULL);
196
+ iova_tree_alloc_map_in_hole(&args);
197
+ }
198
+
199
+ if (!args.iova_found || args.iova_result + map->size > iova_last) {
200
+ return IOVA_ERR_NOMEM;
201
+ }
202
+
203
+ map->iova = args.iova_result;
204
+ return iova_tree_insert(tree, map);
205
+}
206
+
207
void iova_tree_destroy(IOVATree *tree)
208
{
209
g_tree_destroy(tree->tree);
49
--
210
--
50
2.5.0
211
2.7.4
51
212
52
213
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The e1000e_can_receive() function simply returns a boolean value.
3
This function does the reverse operation of iova_tree_find: To look for
4
a mapping that match a translated address so we can do the reverse.
4
5
5
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
This have linear complexity instead of logarithmic, but it supports
6
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
overlapping HVA. Future developments could reduce it.
7
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
8
9
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
10
Acked-by: Michael S. Tsirkin <mst@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
Signed-off-by: Jason Wang <jasowang@redhat.com>
9
---
12
---
10
hw/net/e1000e_core.c | 2 +-
13
include/qemu/iova-tree.h | 20 +++++++++++++++++++-
11
hw/net/e1000e_core.h | 2 +-
14
util/iova-tree.c | 34 ++++++++++++++++++++++++++++++++++
12
2 files changed, 2 insertions(+), 2 deletions(-)
15
2 files changed, 53 insertions(+), 1 deletion(-)
13
16
14
diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
17
diff --git a/include/qemu/iova-tree.h b/include/qemu/iova-tree.h
15
index XXXXXXX..XXXXXXX 100644
18
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/e1000e_core.c
19
--- a/include/qemu/iova-tree.h
17
+++ b/hw/net/e1000e_core.c
20
+++ b/include/qemu/iova-tree.h
18
@@ -XXX,XX +XXX,XX @@ e1000e_start_recv(E1000ECore *core)
21
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
19
}
22
* @tree: the iova tree to search from
23
* @map: the mapping to search
24
*
25
- * Search for a mapping in the iova tree that overlaps with the
26
+ * Search for a mapping in the iova tree that iova overlaps with the
27
* mapping range specified. Only the first found mapping will be
28
* returned.
29
*
30
@@ -XXX,XX +XXX,XX @@ int iova_tree_remove(IOVATree *tree, const DMAMap *map);
31
const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map);
32
33
/**
34
+ * iova_tree_find_iova:
35
+ *
36
+ * @tree: the iova tree to search from
37
+ * @map: the mapping to search
38
+ *
39
+ * Search for a mapping in the iova tree that translated_addr overlaps with the
40
+ * mapping range specified. Only the first found mapping will be
41
+ * returned.
42
+ *
43
+ * Return: DMAMap pointer if found, or NULL if not found. Note that
44
+ * the returned DMAMap pointer is maintained internally. User should
45
+ * only read the content but never modify or free the content. Also,
46
+ * user is responsible to make sure the pointer is valid (say, no
47
+ * concurrent deletion in progress).
48
+ */
49
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map);
50
+
51
+/**
52
* iova_tree_find_address:
53
*
54
* @tree: the iova tree to search from
55
diff --git a/util/iova-tree.c b/util/iova-tree.c
56
index XXXXXXX..XXXXXXX 100644
57
--- a/util/iova-tree.c
58
+++ b/util/iova-tree.c
59
@@ -XXX,XX +XXX,XX @@ struct IOVATreeAllocArgs {
60
bool iova_found;
61
};
62
63
+typedef struct IOVATreeFindIOVAArgs {
64
+ const DMAMap *needle;
65
+ const DMAMap *result;
66
+} IOVATreeFindIOVAArgs;
67
+
68
/**
69
* Iterate args to the next hole
70
*
71
@@ -XXX,XX +XXX,XX @@ const DMAMap *iova_tree_find(const IOVATree *tree, const DMAMap *map)
72
return g_tree_lookup(tree->tree, map);
20
}
73
}
21
74
22
-int
75
+static gboolean iova_tree_find_address_iterator(gpointer key, gpointer value,
23
+bool
76
+ gpointer data)
24
e1000e_can_receive(E1000ECore *core)
77
+{
78
+ const DMAMap *map = key;
79
+ IOVATreeFindIOVAArgs *args = data;
80
+ const DMAMap *needle;
81
+
82
+ g_assert(key == value);
83
+
84
+ needle = args->needle;
85
+ if (map->translated_addr + map->size < needle->translated_addr ||
86
+ needle->translated_addr + needle->size < map->translated_addr) {
87
+ return false;
88
+ }
89
+
90
+ args->result = map;
91
+ return true;
92
+}
93
+
94
+const DMAMap *iova_tree_find_iova(const IOVATree *tree, const DMAMap *map)
95
+{
96
+ IOVATreeFindIOVAArgs args = {
97
+ .needle = map,
98
+ };
99
+
100
+ g_tree_foreach(tree->tree, iova_tree_find_address_iterator, &args);
101
+ return args.result;
102
+}
103
+
104
const DMAMap *iova_tree_find_address(const IOVATree *tree, hwaddr iova)
25
{
105
{
26
int i;
106
const DMAMap map = { .iova = iova, .size = 0 };
27
diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h
28
index XXXXXXX..XXXXXXX 100644
29
--- a/hw/net/e1000e_core.h
30
+++ b/hw/net/e1000e_core.h
31
@@ -XXX,XX +XXX,XX @@ e1000e_core_set_link_status(E1000ECore *core);
32
void
33
e1000e_core_pci_uninit(E1000ECore *core);
34
35
-int
36
+bool
37
e1000e_can_receive(E1000ECore *core);
38
39
ssize_t
40
--
107
--
41
2.5.0
108
2.7.4
42
109
43
110
diff view generated by jsdifflib
1
From: Li Qiang <liq3ea@163.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The tulip networking card emulation has an OOB issue in
3
This tree is able to look for a translated address from an IOVA address.
4
'tulip_copy_tx_buffers' when the guest provide malformed descriptor.
5
This test will trigger a ASAN heap overflow crash. To trigger this
6
issue we can construct the data as following:
7
4
8
1. construct a 'tulip_descriptor'. Its control is set to
5
At first glance it is similar to util/iova-tree. However, SVQ working on
9
'0x7ff | 0x7ff << 11', this will make the 'tulip_copy_tx_buffers's
6
devices with limited IOVA space need more capabilities, like allocating
10
'len1' and 'len2' to 0x7ff(2047). So 'len1+len2' will overflow
7
IOVA chunks or performing reverse translations (qemu addresses to iova).
11
'TULIPState's 'tx_frame' field. This descriptor's 'buf_addr1' and
12
'buf_addr2' should set to a guest address.
13
8
14
2. write this descriptor to tulip device's CSR4 register. This will
9
The allocation capability, as "assign a free IOVA address to this chunk
15
set the 'TULIPState's 'current_tx_desc' field.
10
of memory in qemu's address space" allows shadow virtqueue to create a
11
new address space that is not restricted by guest's addressable one, so
12
we can allocate shadow vqs vrings outside of it.
16
13
17
3. write 'CSR6_ST' to tulip device's CSR6 register. This will trigger
14
It duplicates the tree so it can search efficiently in both directions,
18
'tulip_xmit_list_update' and finally calls 'tulip_copy_tx_buffers'.
15
and it will signal overlap if iova or the translated address is present
16
in any tree.
19
17
20
Following shows the backtrack of crash:
18
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
21
19
Acked-by: Michael S. Tsirkin <mst@redhat.com>
22
==31781==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x628000007cd0 at pc 0x7fe03c5a077a bp 0x7fff05b46770 sp 0x7fff05b45f18
23
WRITE of size 2047 at 0x628000007cd0 thread T0
24
#0 0x7fe03c5a0779 (/usr/lib/x86_64-linux-gnu/libasan.so.4+0x79779)
25
#1 0x5575fb6daa6a in flatview_read_continue /home/test/qemu/exec.c:3194
26
#2 0x5575fb6daccb in flatview_read /home/test/qemu/exec.c:3227
27
#3 0x5575fb6dae66 in address_space_read_full /home/test/qemu/exec.c:3240
28
#4 0x5575fb6db0cb in address_space_rw /home/test/qemu/exec.c:3268
29
#5 0x5575fbdfd460 in dma_memory_rw_relaxed /home/test/qemu/include/sysemu/dma.h:87
30
#6 0x5575fbdfd4b5 in dma_memory_rw /home/test/qemu/include/sysemu/dma.h:110
31
#7 0x5575fbdfd866 in pci_dma_rw /home/test/qemu/include/hw/pci/pci.h:787
32
#8 0x5575fbdfd8a3 in pci_dma_read /home/test/qemu/include/hw/pci/pci.h:794
33
#9 0x5575fbe02761 in tulip_copy_tx_buffers hw/net/tulip.c:585
34
#10 0x5575fbe0366b in tulip_xmit_list_update hw/net/tulip.c:678
35
#11 0x5575fbe04073 in tulip_write hw/net/tulip.c:783
36
37
Signed-off-by: Li Qiang <liq3ea@163.com>
38
Signed-off-by: Jason Wang <jasowang@redhat.com>
20
Signed-off-by: Jason Wang <jasowang@redhat.com>
39
---
21
---
40
tests/qtest/Makefile.include | 1 +
22
hw/virtio/meson.build | 2 +-
41
tests/qtest/tulip-test.c | 91 ++++++++++++++++++++++++++++++++++++++++++++
23
hw/virtio/vhost-iova-tree.c | 110 ++++++++++++++++++++++++++++++++++++++++++++
42
2 files changed, 92 insertions(+)
24
hw/virtio/vhost-iova-tree.h | 27 +++++++++++
43
create mode 100644 tests/qtest/tulip-test.c
25
3 files changed, 138 insertions(+), 1 deletion(-)
26
create mode 100644 hw/virtio/vhost-iova-tree.c
27
create mode 100644 hw/virtio/vhost-iova-tree.h
44
28
45
diff --git a/tests/qtest/Makefile.include b/tests/qtest/Makefile.include
29
diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build
46
index XXXXXXX..XXXXXXX 100644
30
index XXXXXXX..XXXXXXX 100644
47
--- a/tests/qtest/Makefile.include
31
--- a/hw/virtio/meson.build
48
+++ b/tests/qtest/Makefile.include
32
+++ b/hw/virtio/meson.build
49
@@ -XXX,XX +XXX,XX @@ qos-test-obj-y += tests/qtest/es1370-test.o
33
@@ -XXX,XX +XXX,XX @@ softmmu_ss.add(when: 'CONFIG_ALL', if_true: files('vhost-stub.c'))
50
qos-test-obj-y += tests/qtest/ipoctal232-test.o
34
51
qos-test-obj-y += tests/qtest/megasas-test.o
35
virtio_ss = ss.source_set()
52
qos-test-obj-y += tests/qtest/ne2000-test.o
36
virtio_ss.add(files('virtio.c'))
53
+qos-test-obj-y += tests/qtest/tulip-test.o
37
-virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c'))
54
qos-test-obj-y += tests/qtest/nvme-test.o
38
+virtio_ss.add(when: 'CONFIG_VHOST', if_true: files('vhost.c', 'vhost-backend.c', 'vhost-shadow-virtqueue.c', 'vhost-iova-tree.c'))
55
qos-test-obj-y += tests/qtest/pca9552-test.o
39
virtio_ss.add(when: 'CONFIG_VHOST_USER', if_true: files('vhost-user.c'))
56
qos-test-obj-y += tests/qtest/pci-test.o
40
virtio_ss.add(when: 'CONFIG_VHOST_VDPA', if_true: files('vhost-vdpa.c'))
57
diff --git a/tests/qtest/tulip-test.c b/tests/qtest/tulip-test.c
41
virtio_ss.add(when: 'CONFIG_VIRTIO_BALLOON', if_true: files('virtio-balloon.c'))
42
diff --git a/hw/virtio/vhost-iova-tree.c b/hw/virtio/vhost-iova-tree.c
58
new file mode 100644
43
new file mode 100644
59
index XXXXXXX..XXXXXXX
44
index XXXXXXX..XXXXXXX
60
--- /dev/null
45
--- /dev/null
61
+++ b/tests/qtest/tulip-test.c
46
+++ b/hw/virtio/vhost-iova-tree.c
62
@@ -XXX,XX +XXX,XX @@
47
@@ -XXX,XX +XXX,XX @@
63
+/*
48
+/*
64
+ * QTest testcase for DEC/Intel Tulip 21143
49
+ * vhost software live migration iova tree
65
+ *
50
+ *
66
+ * Copyright (c) 2020 Li Qiang <liq3ea@gmail.com>
51
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
52
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
67
+ *
53
+ *
68
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
54
+ * SPDX-License-Identifier: GPL-2.0-or-later
69
+ * See the COPYING file in the top-level directory.
70
+ */
55
+ */
71
+
56
+
72
+#include "qemu/osdep.h"
57
+#include "qemu/osdep.h"
73
+#include "libqtest.h"
58
+#include "qemu/iova-tree.h"
74
+#include "qemu/module.h"
59
+#include "vhost-iova-tree.h"
75
+#include "libqos/qgraph.h"
76
+#include "libqos/pci.h"
77
+#include "qemu/bitops.h"
78
+#include "hw/net/tulip.h"
79
+
60
+
80
+typedef struct QTulip_pci QTulip_pci;
61
+#define iova_min_addr qemu_real_host_page_size
81
+
62
+
82
+struct QTulip_pci {
63
+/**
83
+ QOSGraphObject obj;
64
+ * VhostIOVATree, able to:
84
+ QPCIDevice dev;
65
+ * - Translate iova address
66
+ * - Reverse translate iova address (from translated to iova)
67
+ * - Allocate IOVA regions for translated range (linear operation)
68
+ */
69
+struct VhostIOVATree {
70
+ /* First addressable iova address in the device */
71
+ uint64_t iova_first;
72
+
73
+ /* Last addressable iova address in the device */
74
+ uint64_t iova_last;
75
+
76
+ /* IOVA address to qemu memory maps. */
77
+ IOVATree *iova_taddr_map;
85
+};
78
+};
86
+
79
+
87
+static void *tulip_pci_get_driver(void *obj, const char *interface)
80
+/**
81
+ * Create a new IOVA tree
82
+ *
83
+ * Returns the new IOVA tree
84
+ */
85
+VhostIOVATree *vhost_iova_tree_new(hwaddr iova_first, hwaddr iova_last)
88
+{
86
+{
89
+ QTulip_pci *tulip_pci = obj;
87
+ VhostIOVATree *tree = g_new(VhostIOVATree, 1);
90
+
88
+
91
+ if (!g_strcmp0(interface, "pci-device")) {
89
+ /* Some devices do not like 0 addresses */
92
+ return &tulip_pci->dev;
90
+ tree->iova_first = MAX(iova_first, iova_min_addr);
91
+ tree->iova_last = iova_last;
92
+
93
+ tree->iova_taddr_map = iova_tree_new();
94
+ return tree;
95
+}
96
+
97
+/**
98
+ * Delete an iova tree
99
+ */
100
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree)
101
+{
102
+ iova_tree_destroy(iova_tree->iova_taddr_map);
103
+ g_free(iova_tree);
104
+}
105
+
106
+/**
107
+ * Find the IOVA address stored from a memory address
108
+ *
109
+ * @tree: The iova tree
110
+ * @map: The map with the memory address
111
+ *
112
+ * Return the stored mapping, or NULL if not found.
113
+ */
114
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *tree,
115
+ const DMAMap *map)
116
+{
117
+ return iova_tree_find_iova(tree->iova_taddr_map, map);
118
+}
119
+
120
+/**
121
+ * Allocate a new mapping
122
+ *
123
+ * @tree: The iova tree
124
+ * @map: The iova map
125
+ *
126
+ * Returns:
127
+ * - IOVA_OK if the map fits in the container
128
+ * - IOVA_ERR_INVALID if the map does not make sense (like size overflow)
129
+ * - IOVA_ERR_NOMEM if tree cannot allocate more space.
130
+ *
131
+ * It returns assignated iova in map->iova if return value is VHOST_DMA_MAP_OK.
132
+ */
133
+int vhost_iova_tree_map_alloc(VhostIOVATree *tree, DMAMap *map)
134
+{
135
+ /* Some vhost devices do not like addr 0. Skip first page */
136
+ hwaddr iova_first = tree->iova_first ?: qemu_real_host_page_size;
137
+
138
+ if (map->translated_addr + map->size < map->translated_addr ||
139
+ map->perm == IOMMU_NONE) {
140
+ return IOVA_ERR_INVALID;
93
+ }
141
+ }
94
+
142
+
95
+ fprintf(stderr, "%s not present in tulip_pci\n", interface);
143
+ /* Allocate a node in IOVA address */
96
+ g_assert_not_reached();
144
+ return iova_tree_alloc_map(tree->iova_taddr_map, map, iova_first,
145
+ tree->iova_last);
97
+}
146
+}
98
+
147
+
99
+static void *tulip_pci_create(void *pci_bus, QGuestAllocator *alloc, void *addr)
148
+/**
149
+ * Remove existing mappings from iova tree
150
+ *
151
+ * @iova_tree: The vhost iova tree
152
+ * @map: The map to remove
153
+ */
154
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map)
100
+{
155
+{
101
+ QTulip_pci *tulip_pci = g_new0(QTulip_pci, 1);
156
+ iova_tree_remove(iova_tree->iova_taddr_map, map);
102
+ QPCIBus *bus = pci_bus;
157
+}
158
diff --git a/hw/virtio/vhost-iova-tree.h b/hw/virtio/vhost-iova-tree.h
159
new file mode 100644
160
index XXXXXXX..XXXXXXX
161
--- /dev/null
162
+++ b/hw/virtio/vhost-iova-tree.h
163
@@ -XXX,XX +XXX,XX @@
164
+/*
165
+ * vhost software live migration iova tree
166
+ *
167
+ * SPDX-FileCopyrightText: Red Hat, Inc. 2021
168
+ * SPDX-FileContributor: Author: Eugenio Pérez <eperezma@redhat.com>
169
+ *
170
+ * SPDX-License-Identifier: GPL-2.0-or-later
171
+ */
103
+
172
+
104
+ qpci_device_init(&tulip_pci->dev, bus, addr);
173
+#ifndef HW_VIRTIO_VHOST_IOVA_TREE_H
105
+ tulip_pci->obj.get_driver = tulip_pci_get_driver;
174
+#define HW_VIRTIO_VHOST_IOVA_TREE_H
106
+
175
+
107
+ return &tulip_pci->obj;
176
+#include "qemu/iova-tree.h"
108
+}
177
+#include "exec/memory.h"
109
+
178
+
110
+static void tulip_large_tx(void *obj, void *data, QGuestAllocator *alloc)
179
+typedef struct VhostIOVATree VhostIOVATree;
111
+{
112
+ QTulip_pci *tulip_pci = obj;
113
+ QPCIDevice *dev = &tulip_pci->dev;
114
+ QPCIBar bar;
115
+ struct tulip_descriptor context;
116
+ char guest_data[4096];
117
+ uint64_t context_pa;
118
+ uint64_t guest_pa;
119
+
180
+
120
+ qpci_device_enable(dev);
181
+VhostIOVATree *vhost_iova_tree_new(uint64_t iova_first, uint64_t iova_last);
121
+ bar = qpci_iomap(dev, 0, NULL);
182
+void vhost_iova_tree_delete(VhostIOVATree *iova_tree);
122
+ context_pa = guest_alloc(alloc, sizeof(context));
183
+G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostIOVATree, vhost_iova_tree_delete);
123
+ guest_pa = guest_alloc(alloc, 4096);
124
+ memset(guest_data, 'A', sizeof(guest_data));
125
+ context.status = TDES0_OWN;
126
+ context.control = TDES1_BUF2_SIZE_MASK << TDES1_BUF2_SIZE_SHIFT |
127
+ TDES1_BUF1_SIZE_MASK << TDES1_BUF1_SIZE_SHIFT;
128
+ context.buf_addr2 = guest_pa;
129
+ context.buf_addr1 = guest_pa;
130
+
184
+
131
+ qtest_memwrite(dev->bus->qts, context_pa, &context, sizeof(context));
185
+const DMAMap *vhost_iova_tree_find_iova(const VhostIOVATree *iova_tree,
132
+ qtest_memwrite(dev->bus->qts, guest_pa, guest_data, sizeof(guest_data));
186
+ const DMAMap *map);
133
+ qpci_io_writel(dev, bar, 0x20, context_pa);
187
+int vhost_iova_tree_map_alloc(VhostIOVATree *iova_tree, DMAMap *map);
134
+ qpci_io_writel(dev, bar, 0x30, CSR6_ST);
188
+void vhost_iova_tree_remove(VhostIOVATree *iova_tree, const DMAMap *map);
135
+ guest_free(alloc, context_pa);
136
+ guest_free(alloc, guest_pa);
137
+}
138
+
189
+
139
+static void tulip_register_nodes(void)
190
+#endif
140
+{
141
+ QOSGraphEdgeOptions opts = {
142
+ .extra_device_opts = "addr=04.0",
143
+ };
144
+ add_qpci_address(&opts, &(QPCIAddress) { .devfn = QPCI_DEVFN(4, 0) });
145
+
146
+ qos_node_create_driver("tulip", tulip_pci_create);
147
+ qos_node_consumes("tulip", "pci-bus", &opts);
148
+ qos_node_produces("tulip", "pci-device");
149
+
150
+ qos_add_test("tulip_large_tx", "tulip", tulip_large_tx, NULL);
151
+}
152
+
153
+libqos_init(tulip_register_nodes);
154
--
191
--
155
2.5.0
192
2.7.4
156
193
157
194
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The CanBusClientInfo::can_receive handler return whether the
3
Use translations added in VhostIOVATree in SVQ.
4
device can or can not receive new frames. Make it obvious by
4
5
returning a boolean type.
5
Only introduce usage here, not allocation and deallocation. As with
6
6
previous patches, we use the dead code paths of shadow_vqs_enabled to
7
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
7
avoid commiting too many changes at once. These are impossible to take
8
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
8
at the moment.
9
Reviewed-by: Cédric Le Goater <clg@kaod.org>
9
10
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
11
Acked-by: Michael S. Tsirkin <mst@redhat.com>
10
Signed-off-by: Jason Wang <jasowang@redhat.com>
12
Signed-off-by: Jason Wang <jasowang@redhat.com>
11
---
13
---
12
hw/net/allwinner-sun8i-emac.c | 2 +-
14
hw/virtio/vhost-shadow-virtqueue.c | 86 +++++++++++++++++++++++---
13
hw/net/can/can_sja1000.c | 8 ++++----
15
hw/virtio/vhost-shadow-virtqueue.h | 6 +-
14
hw/net/can/can_sja1000.h | 2 +-
16
hw/virtio/vhost-vdpa.c | 122 +++++++++++++++++++++++++++++++------
15
include/net/can_emu.h | 2 +-
17
include/hw/virtio/vhost-vdpa.h | 3 +
16
net/can/can_socketcan.c | 4 ++--
18
4 files changed, 187 insertions(+), 30 deletions(-)
17
5 files changed, 9 insertions(+), 9 deletions(-)
19
18
20
diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c
19
diff --git a/hw/net/allwinner-sun8i-emac.c b/hw/net/allwinner-sun8i-emac.c
20
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
21
--- a/hw/net/allwinner-sun8i-emac.c
22
--- a/hw/virtio/vhost-shadow-virtqueue.c
22
+++ b/hw/net/allwinner-sun8i-emac.c
23
+++ b/hw/virtio/vhost-shadow-virtqueue.c
23
@@ -XXX,XX +XXX,XX @@ static void allwinner_sun8i_emac_flush_desc(FrameDescriptor *desc,
24
@@ -XXX,XX +XXX,XX @@ static uint16_t vhost_svq_available_slots(const VhostShadowVirtqueue *svq)
24
cpu_physical_memory_write(phys_addr, desc, sizeof(*desc));
25
return svq->vring.num - (svq->shadow_avail_idx - svq->shadow_used_idx);
25
}
26
}
26
27
27
-static int allwinner_sun8i_emac_can_receive(NetClientState *nc)
28
-static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
28
+static bool allwinner_sun8i_emac_can_receive(NetClientState *nc)
29
+/**
29
{
30
+ * Translate addresses between the qemu's virtual address and the SVQ IOVA
30
AwSun8iEmacState *s = qemu_get_nic_opaque(nc);
31
+ *
31
FrameDescriptor desc;
32
+ * @svq: Shadow VirtQueue
32
diff --git a/hw/net/can/can_sja1000.c b/hw/net/can/can_sja1000.c
33
+ * @vaddr: Translated IOVA addresses
34
+ * @iovec: Source qemu's VA addresses
35
+ * @num: Length of iovec and minimum length of vaddr
36
+ */
37
+static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq,
38
+ hwaddr *addrs, const struct iovec *iovec,
39
+ size_t num)
40
+{
41
+ if (num == 0) {
42
+ return true;
43
+ }
44
+
45
+ for (size_t i = 0; i < num; ++i) {
46
+ DMAMap needle = {
47
+ .translated_addr = (hwaddr)(uintptr_t)iovec[i].iov_base,
48
+ .size = iovec[i].iov_len,
49
+ };
50
+ Int128 needle_last, map_last;
51
+ size_t off;
52
+
53
+ const DMAMap *map = vhost_iova_tree_find_iova(svq->iova_tree, &needle);
54
+ /*
55
+ * Map cannot be NULL since iova map contains all guest space and
56
+ * qemu already has a physical address mapped
57
+ */
58
+ if (unlikely(!map)) {
59
+ qemu_log_mask(LOG_GUEST_ERROR,
60
+ "Invalid address 0x%"HWADDR_PRIx" given by guest",
61
+ needle.translated_addr);
62
+ return false;
63
+ }
64
+
65
+ off = needle.translated_addr - map->translated_addr;
66
+ addrs[i] = map->iova + off;
67
+
68
+ needle_last = int128_add(int128_make64(needle.translated_addr),
69
+ int128_make64(iovec[i].iov_len));
70
+ map_last = int128_make64(map->translated_addr + map->size);
71
+ if (unlikely(int128_gt(needle_last, map_last))) {
72
+ qemu_log_mask(LOG_GUEST_ERROR,
73
+ "Guest buffer expands over iova range");
74
+ return false;
75
+ }
76
+ }
77
+
78
+ return true;
79
+}
80
+
81
+static void vhost_vring_write_descs(VhostShadowVirtqueue *svq, hwaddr *sg,
82
const struct iovec *iovec, size_t num,
83
bool more_descs, bool write)
84
{
85
@@ -XXX,XX +XXX,XX @@ static void vhost_vring_write_descs(VhostShadowVirtqueue *svq,
86
} else {
87
descs[i].flags = flags;
88
}
89
- descs[i].addr = cpu_to_le64((hwaddr)(intptr_t)iovec[n].iov_base);
90
+ descs[i].addr = cpu_to_le64(sg[n]);
91
descs[i].len = cpu_to_le32(iovec[n].iov_len);
92
93
last = i;
94
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
95
{
96
unsigned avail_idx;
97
vring_avail_t *avail = svq->vring.avail;
98
+ bool ok;
99
+ g_autofree hwaddr *sgs = g_new(hwaddr, MAX(elem->out_num, elem->in_num));
100
101
*head = svq->free_head;
102
103
@@ -XXX,XX +XXX,XX @@ static bool vhost_svq_add_split(VhostShadowVirtqueue *svq,
104
return false;
105
}
106
107
- vhost_vring_write_descs(svq, elem->out_sg, elem->out_num, elem->in_num > 0,
108
- false);
109
- vhost_vring_write_descs(svq, elem->in_sg, elem->in_num, false, true);
110
+ ok = vhost_svq_translate_addr(svq, sgs, elem->out_sg, elem->out_num);
111
+ if (unlikely(!ok)) {
112
+ return false;
113
+ }
114
+ vhost_vring_write_descs(svq, sgs, elem->out_sg, elem->out_num,
115
+ elem->in_num > 0, false);
116
+
117
+
118
+ ok = vhost_svq_translate_addr(svq, sgs, elem->in_sg, elem->in_num);
119
+ if (unlikely(!ok)) {
120
+ return false;
121
+ }
122
+
123
+ vhost_vring_write_descs(svq, sgs, elem->in_sg, elem->in_num, false, true);
124
125
/*
126
* Put the entry in the available array (but don't update avail->idx until
127
@@ -XXX,XX +XXX,XX @@ void vhost_svq_set_svq_call_fd(VhostShadowVirtqueue *svq, int call_fd)
128
void vhost_svq_get_vring_addr(const VhostShadowVirtqueue *svq,
129
struct vhost_vring_addr *addr)
130
{
131
- addr->desc_user_addr = (uint64_t)(intptr_t)svq->vring.desc;
132
- addr->avail_user_addr = (uint64_t)(intptr_t)svq->vring.avail;
133
- addr->used_user_addr = (uint64_t)(intptr_t)svq->vring.used;
134
+ addr->desc_user_addr = (uint64_t)(uintptr_t)svq->vring.desc;
135
+ addr->avail_user_addr = (uint64_t)(uintptr_t)svq->vring.avail;
136
+ addr->used_user_addr = (uint64_t)(uintptr_t)svq->vring.used;
137
}
138
139
size_t vhost_svq_driver_area_size(const VhostShadowVirtqueue *svq)
140
@@ -XXX,XX +XXX,XX @@ void vhost_svq_stop(VhostShadowVirtqueue *svq)
141
* Creates vhost shadow virtqueue, and instructs the vhost device to use the
142
* shadow methods and file descriptors.
143
*
144
+ * @iova_tree: Tree to perform descriptors translations
145
+ *
146
* Returns the new virtqueue or NULL.
147
*
148
* In case of error, reason is reported through error_report.
149
*/
150
-VhostShadowVirtqueue *vhost_svq_new(void)
151
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree)
152
{
153
g_autofree VhostShadowVirtqueue *svq = g_new0(VhostShadowVirtqueue, 1);
154
int r;
155
@@ -XXX,XX +XXX,XX @@ VhostShadowVirtqueue *vhost_svq_new(void)
156
157
event_notifier_init_fd(&svq->svq_kick, VHOST_FILE_UNBIND);
158
event_notifier_set_handler(&svq->hdev_call, vhost_svq_handle_call);
159
+ svq->iova_tree = iova_tree;
160
return g_steal_pointer(&svq);
161
162
err_init_hdev_call:
163
diff --git a/hw/virtio/vhost-shadow-virtqueue.h b/hw/virtio/vhost-shadow-virtqueue.h
33
index XXXXXXX..XXXXXXX 100644
164
index XXXXXXX..XXXXXXX 100644
34
--- a/hw/net/can/can_sja1000.c
165
--- a/hw/virtio/vhost-shadow-virtqueue.h
35
+++ b/hw/net/can/can_sja1000.c
166
+++ b/hw/virtio/vhost-shadow-virtqueue.h
36
@@ -XXX,XX +XXX,XX @@ uint64_t can_sja_mem_read(CanSJA1000State *s, hwaddr addr, unsigned size)
167
@@ -XXX,XX +XXX,XX @@
37
return temp;
168
#include "qemu/event_notifier.h"
169
#include "hw/virtio/virtio.h"
170
#include "standard-headers/linux/vhost_types.h"
171
+#include "hw/virtio/vhost-iova-tree.h"
172
173
/* Shadow virtqueue to relay notifications */
174
typedef struct VhostShadowVirtqueue {
175
@@ -XXX,XX +XXX,XX @@ typedef struct VhostShadowVirtqueue {
176
/* Virtio device */
177
VirtIODevice *vdev;
178
179
+ /* IOVA mapping */
180
+ VhostIOVATree *iova_tree;
181
+
182
/* Map for use the guest's descriptors */
183
VirtQueueElement **ring_id_maps;
184
185
@@ -XXX,XX +XXX,XX @@ void vhost_svq_start(VhostShadowVirtqueue *svq, VirtIODevice *vdev,
186
VirtQueue *vq);
187
void vhost_svq_stop(VhostShadowVirtqueue *svq);
188
189
-VhostShadowVirtqueue *vhost_svq_new(void);
190
+VhostShadowVirtqueue *vhost_svq_new(VhostIOVATree *iova_tree);
191
192
void vhost_svq_free(gpointer vq);
193
G_DEFINE_AUTOPTR_CLEANUP_FUNC(VhostShadowVirtqueue, vhost_svq_free);
194
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
195
index XXXXXXX..XXXXXXX 100644
196
--- a/hw/virtio/vhost-vdpa.c
197
+++ b/hw/virtio/vhost-vdpa.c
198
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener,
199
vaddr, section->readonly);
200
201
llsize = int128_sub(llend, int128_make64(iova));
202
+ if (v->shadow_vqs_enabled) {
203
+ DMAMap mem_region = {
204
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
205
+ .size = int128_get64(llsize) - 1,
206
+ .perm = IOMMU_ACCESS_FLAG(true, section->readonly),
207
+ };
208
+
209
+ int r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region);
210
+ if (unlikely(r != IOVA_OK)) {
211
+ error_report("Can't allocate a mapping (%d)", r);
212
+ goto fail;
213
+ }
214
+
215
+ iova = mem_region.iova;
216
+ }
217
218
vhost_vdpa_iotlb_batch_begin_once(v);
219
ret = vhost_vdpa_dma_map(v, iova, int128_get64(llsize),
220
@@ -XXX,XX +XXX,XX @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener,
221
222
llsize = int128_sub(llend, int128_make64(iova));
223
224
+ if (v->shadow_vqs_enabled) {
225
+ const DMAMap *result;
226
+ const void *vaddr = memory_region_get_ram_ptr(section->mr) +
227
+ section->offset_within_region +
228
+ (iova - section->offset_within_address_space);
229
+ DMAMap mem_region = {
230
+ .translated_addr = (hwaddr)(uintptr_t)vaddr,
231
+ .size = int128_get64(llsize) - 1,
232
+ };
233
+
234
+ result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region);
235
+ iova = result->iova;
236
+ vhost_iova_tree_remove(v->iova_tree, &mem_region);
237
+ }
238
vhost_vdpa_iotlb_batch_begin_once(v);
239
ret = vhost_vdpa_dma_unmap(v, iova, int128_get64(llsize));
240
if (ret) {
241
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
242
243
shadow_vqs = g_ptr_array_new_full(hdev->nvqs, vhost_svq_free);
244
for (unsigned n = 0; n < hdev->nvqs; ++n) {
245
- g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new();
246
+ g_autoptr(VhostShadowVirtqueue) svq = vhost_svq_new(v->iova_tree);
247
248
if (unlikely(!svq)) {
249
error_setg(errp, "Cannot create svq %u", n);
250
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_svq_set_fds(struct vhost_dev *dev,
251
/**
252
* Unmap a SVQ area in the device
253
*/
254
-static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr iova,
255
- hwaddr size)
256
+static bool vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v,
257
+ const DMAMap *needle)
258
{
259
+ const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, needle);
260
+ hwaddr size;
261
int r;
262
263
- size = ROUND_UP(size, qemu_real_host_page_size);
264
- r = vhost_vdpa_dma_unmap(v, iova, size);
265
+ if (unlikely(!result)) {
266
+ error_report("Unable to find SVQ address to unmap");
267
+ return false;
268
+ }
269
+
270
+ size = ROUND_UP(result->size, qemu_real_host_page_size);
271
+ r = vhost_vdpa_dma_unmap(v, result->iova, size);
272
return r == 0;
38
}
273
}
39
274
40
-int can_sja_can_receive(CanBusClientState *client)
275
static bool vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev,
41
+bool can_sja_can_receive(CanBusClientState *client)
276
const VhostShadowVirtqueue *svq)
42
{
277
{
43
CanSJA1000State *s = container_of(client, CanSJA1000State, bus_client);
278
+ DMAMap needle = {};
44
279
struct vhost_vdpa *v = dev->opaque;
45
if (s->clock & 0x80) { /* PeliCAN Mode */
280
struct vhost_vring_addr svq_addr;
46
if (s->mode & 0x01) { /* reset mode. */
281
- size_t device_size = vhost_svq_device_area_size(svq);
47
- return 0;
282
- size_t driver_size = vhost_svq_driver_area_size(svq);
48
+ return false;
283
bool ok;
49
}
284
50
} else { /* BasicCAN mode */
285
vhost_svq_get_vring_addr(svq, &svq_addr);
51
if (s->control & 0x01) {
286
52
- return 0;
287
- ok = vhost_vdpa_svq_unmap_ring(v, svq_addr.desc_user_addr, driver_size);
53
+ return false;
288
+ needle.translated_addr = svq_addr.desc_user_addr;
54
}
289
+ ok = vhost_vdpa_svq_unmap_ring(v, &needle);
290
if (unlikely(!ok)) {
291
return false;
55
}
292
}
56
293
57
- return 1; /* always return 1, when operation mode */
294
- return vhost_vdpa_svq_unmap_ring(v, svq_addr.used_user_addr, device_size);
58
+ return true; /* always return true, when operation mode */
295
+ needle.translated_addr = svq_addr.used_user_addr;
296
+ return vhost_vdpa_svq_unmap_ring(v, &needle);
297
+}
298
+
299
+/**
300
+ * Map the SVQ area in the device
301
+ *
302
+ * @v: Vhost-vdpa device
303
+ * @needle: The area to search iova
304
+ * @errorp: Error pointer
305
+ */
306
+static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle,
307
+ Error **errp)
308
+{
309
+ int r;
310
+
311
+ r = vhost_iova_tree_map_alloc(v->iova_tree, needle);
312
+ if (unlikely(r != IOVA_OK)) {
313
+ error_setg(errp, "Cannot allocate iova (%d)", r);
314
+ return false;
315
+ }
316
+
317
+ r = vhost_vdpa_dma_map(v, needle->iova, needle->size + 1,
318
+ (void *)(uintptr_t)needle->translated_addr,
319
+ needle->perm == IOMMU_RO);
320
+ if (unlikely(r != 0)) {
321
+ error_setg_errno(errp, -r, "Cannot map region to device");
322
+ vhost_iova_tree_remove(v->iova_tree, needle);
323
+ }
324
+
325
+ return r == 0;
59
}
326
}
60
327
61
ssize_t can_sja_receive(CanBusClientState *client, const qemu_can_frame *frames,
328
/**
62
diff --git a/hw/net/can/can_sja1000.h b/hw/net/can/can_sja1000.h
329
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_svq_map_rings(struct vhost_dev *dev,
330
struct vhost_vring_addr *addr,
331
Error **errp)
332
{
333
+ DMAMap device_region, driver_region;
334
+ struct vhost_vring_addr svq_addr;
335
struct vhost_vdpa *v = dev->opaque;
336
size_t device_size = vhost_svq_device_area_size(svq);
337
size_t driver_size = vhost_svq_driver_area_size(svq);
338
- int r;
339
+ size_t avail_offset;
340
+ bool ok;
341
342
ERRP_GUARD();
343
- vhost_svq_get_vring_addr(svq, addr);
344
+ vhost_svq_get_vring_addr(svq, &svq_addr);
345
346
- r = vhost_vdpa_dma_map(v, addr->desc_user_addr, driver_size,
347
- (void *)(uintptr_t)addr->desc_user_addr, true);
348
- if (unlikely(r != 0)) {
349
- error_setg_errno(errp, -r, "Cannot create vq driver region: ");
350
+ driver_region = (DMAMap) {
351
+ .translated_addr = svq_addr.desc_user_addr,
352
+ .size = driver_size - 1,
353
+ .perm = IOMMU_RO,
354
+ };
355
+ ok = vhost_vdpa_svq_map_ring(v, &driver_region, errp);
356
+ if (unlikely(!ok)) {
357
+ error_prepend(errp, "Cannot create vq driver region: ");
358
return false;
359
}
360
+ addr->desc_user_addr = driver_region.iova;
361
+ avail_offset = svq_addr.avail_user_addr - svq_addr.desc_user_addr;
362
+ addr->avail_user_addr = driver_region.iova + avail_offset;
363
364
- r = vhost_vdpa_dma_map(v, addr->used_user_addr, device_size,
365
- (void *)(intptr_t)addr->used_user_addr, false);
366
- if (unlikely(r != 0)) {
367
- error_setg_errno(errp, -r, "Cannot create vq device region: ");
368
+ device_region = (DMAMap) {
369
+ .translated_addr = svq_addr.used_user_addr,
370
+ .size = device_size - 1,
371
+ .perm = IOMMU_RW,
372
+ };
373
+ ok = vhost_vdpa_svq_map_ring(v, &device_region, errp);
374
+ if (unlikely(!ok)) {
375
+ error_prepend(errp, "Cannot create vq device region: ");
376
+ vhost_vdpa_svq_unmap_ring(v, &driver_region);
377
}
378
+ addr->used_user_addr = device_region.iova;
379
380
- return r == 0;
381
+ return ok;
382
}
383
384
static bool vhost_vdpa_svq_setup(struct vhost_dev *dev,
385
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
63
index XXXXXXX..XXXXXXX 100644
386
index XXXXXXX..XXXXXXX 100644
64
--- a/hw/net/can/can_sja1000.h
387
--- a/include/hw/virtio/vhost-vdpa.h
65
+++ b/hw/net/can/can_sja1000.h
388
+++ b/include/hw/virtio/vhost-vdpa.h
66
@@ -XXX,XX +XXX,XX @@ void can_sja_disconnect(CanSJA1000State *s);
389
@@ -XXX,XX +XXX,XX @@
67
390
68
int can_sja_init(CanSJA1000State *s, qemu_irq irq);
391
#include <gmodule.h>
69
392
70
-int can_sja_can_receive(CanBusClientState *client);
393
+#include "hw/virtio/vhost-iova-tree.h"
71
+bool can_sja_can_receive(CanBusClientState *client);
394
#include "hw/virtio/virtio.h"
72
395
#include "standard-headers/linux/vhost_types.h"
73
ssize_t can_sja_receive(CanBusClientState *client,
396
74
const qemu_can_frame *frames, size_t frames_cnt);
397
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
75
diff --git a/include/net/can_emu.h b/include/net/can_emu.h
398
MemoryListener listener;
76
index XXXXXXX..XXXXXXX 100644
399
struct vhost_vdpa_iova_range iova_range;
77
--- a/include/net/can_emu.h
400
bool shadow_vqs_enabled;
78
+++ b/include/net/can_emu.h
401
+ /* IOVA mapping used by the Shadow Virtqueue */
79
@@ -XXX,XX +XXX,XX @@ typedef struct CanBusClientState CanBusClientState;
402
+ VhostIOVATree *iova_tree;
80
typedef struct CanBusState CanBusState;
403
GPtrArray *shadow_vqs;
81
404
struct vhost_dev *dev;
82
typedef struct CanBusClientInfo {
405
VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX];
83
- int (*can_receive)(CanBusClientState *);
84
+ bool (*can_receive)(CanBusClientState *);
85
ssize_t (*receive)(CanBusClientState *,
86
const struct qemu_can_frame *frames, size_t frames_cnt);
87
} CanBusClientInfo;
88
diff --git a/net/can/can_socketcan.c b/net/can/can_socketcan.c
89
index XXXXXXX..XXXXXXX 100644
90
--- a/net/can/can_socketcan.c
91
+++ b/net/can/can_socketcan.c
92
@@ -XXX,XX +XXX,XX @@ static void can_host_socketcan_read(void *opaque)
93
}
94
}
95
96
-static int can_host_socketcan_can_receive(CanBusClientState *client)
97
+static bool can_host_socketcan_can_receive(CanBusClientState *client)
98
{
99
- return 1;
100
+ return true;
101
}
102
103
static ssize_t can_host_socketcan_receive(CanBusClientState *client,
104
--
406
--
105
2.5.0
407
2.7.4
106
408
107
409
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <f4bug@amsat.org>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The command is 32-bit, but we are loading the 16 upper bits with
3
This is needed to achieve migration, so the destination can restore its
4
the 'get_uint16(s->scb + 2)' call.
4
index.
5
5
6
Once shifted by 16, the command bits match the status bits:
6
Setting base as last used idx, so destination will see as available all
7
the entries that the device did not use, including the in-flight
8
processing ones.
7
9
8
- Command
10
This is ok for networking, but other kinds of devices might have
9
Bit 31 ACK-CX Acknowledges that the CU completed an Action Command.
11
problems with these retransmissions.
10
Bit 30 ACK-FR Acknowledges that the RU received a frame.
11
Bit 29 ACK-CNA Acknowledges that the Command Unit became not active.
12
Bit 28 ACK-RNR Acknowledges that the Receive Unit became not ready.
13
12
14
- Status
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
15
Bit 15 CX The CU finished executing a command with its I(interrupt) bit set.
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
16
Bit 14 FR The RU finished receiving a frame.
17
Bit 13 CNA The Command Unit left the Active state.
18
Bit 12 RNR The Receive Unit left the Ready state.
19
20
Add the SCB_COMMAND_ACK_MASK definition to simplify the code.
21
22
This fixes Coverity 1419392 (CONSTANT_EXPRESSION_RESULT):
23
24
/hw/net/i82596.c: 352 in examine_scb()
25
346 cuc = (command >> 8) & 0x7;
26
347 ruc = (command >> 4) & 0x7;
27
348 DBG(printf("MAIN COMMAND %04x cuc %02x ruc %02x\n", command, cuc, ruc));
28
349 /* and clear the scb command word */
29
350 set_uint16(s->scb + 2, 0);
30
351
31
>>> CID 1419392: (CONSTANT_EXPRESSION_RESULT)
32
>>> "command & (2147483648UL /* 1UL << 31 */)" is always 0 regardless of the values of its operands. This occurs as the logical operand of "if".
33
352 if (command & BIT(31)) /* ACK-CX */
34
353 s->scb_status &= ~SCB_STATUS_CX;
35
>>> CID 1419392: (CONSTANT_EXPRESSION_RESULT)
36
>>> "command & (1073741824UL /* 1UL << 30 */)" is always 0 regardless of the values of its operands. This occurs as the logical operand of "if".
37
354 if (command & BIT(30)) /*ACK-FR */
38
355 s->scb_status &= ~SCB_STATUS_FR;
39
>>> CID 1419392: (CONSTANT_EXPRESSION_RESULT)
40
>>> "command & (536870912UL /* 1UL << 29 */)" is always 0 regardless of the values of its operands. This occurs as the logical operand of "if".
41
356 if (command & BIT(29)) /*ACK-CNA */
42
357 s->scb_status &= ~SCB_STATUS_CNA;
43
>>> CID 1419392: (CONSTANT_EXPRESSION_RESULT)
44
>>> "command & (268435456UL /* 1UL << 28 */)" is always 0 regardless of the values of its operands. This occurs as the logical operand of "if".
45
358 if (command & BIT(28)) /*ACK-RNR */
46
359 s->scb_status &= ~SCB_STATUS_RNR;
47
48
Fixes: Covertiy CID 1419392 (commit 376b851909)
49
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
50
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
51
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
52
---
16
---
53
hw/net/i82596.c | 12 ++++--------
17
hw/virtio/vhost-vdpa.c | 17 +++++++++++++++++
54
1 file changed, 4 insertions(+), 8 deletions(-)
18
1 file changed, 17 insertions(+)
55
19
56
diff --git a/hw/net/i82596.c b/hw/net/i82596.c
20
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
57
index XXXXXXX..XXXXXXX 100644
21
index XXXXXXX..XXXXXXX 100644
58
--- a/hw/net/i82596.c
22
--- a/hw/virtio/vhost-vdpa.c
59
+++ b/hw/net/i82596.c
23
+++ b/hw/virtio/vhost-vdpa.c
60
@@ -XXX,XX +XXX,XX @@
24
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_base(struct vhost_dev *dev,
61
#define SCB_STATUS_CNA 0x2000 /* CU left active state */
25
static int vhost_vdpa_get_vring_base(struct vhost_dev *dev,
62
#define SCB_STATUS_RNR 0x1000 /* RU left active state */
26
struct vhost_vring_state *ring)
63
27
{
64
+#define SCB_COMMAND_ACK_MASK \
28
+ struct vhost_vdpa *v = dev->opaque;
65
+ (SCB_STATUS_CX | SCB_STATUS_FR | SCB_STATUS_CNA | SCB_STATUS_RNR)
29
int ret;
30
31
+ if (v->shadow_vqs_enabled) {
32
+ VhostShadowVirtqueue *svq = g_ptr_array_index(v->shadow_vqs,
33
+ ring->index);
66
+
34
+
67
#define CU_IDLE 0
35
+ /*
68
#define CU_SUSPENDED 1
36
+ * Setting base as last used idx, so destination will see as available
69
#define CU_ACTIVE 2
37
+ * all the entries that the device did not use, including the in-flight
70
@@ -XXX,XX +XXX,XX @@ static void examine_scb(I82596State *s)
38
+ * processing ones.
71
/* and clear the scb command word */
39
+ *
72
set_uint16(s->scb + 2, 0);
40
+ * TODO: This is ok for networking, but other kinds of devices might
73
41
+ * have problems with these retransmissions.
74
- if (command & BIT(31)) /* ACK-CX */
42
+ */
75
- s->scb_status &= ~SCB_STATUS_CX;
43
+ ring->num = svq->last_used_idx;
76
- if (command & BIT(30)) /*ACK-FR */
44
+ return 0;
77
- s->scb_status &= ~SCB_STATUS_FR;
45
+ }
78
- if (command & BIT(29)) /*ACK-CNA */
46
+
79
- s->scb_status &= ~SCB_STATUS_CNA;
47
ret = vhost_vdpa_call(dev, VHOST_GET_VRING_BASE, ring);
80
- if (command & BIT(28)) /*ACK-RNR */
48
trace_vhost_vdpa_get_vring_base(dev, ring->index, ring->num);
81
- s->scb_status &= ~SCB_STATUS_RNR;
49
return ret;
82
+ s->scb_status &= ~(command & SCB_COMMAND_ACK_MASK);
83
84
switch (cuc) {
85
case 0: /* no change */
86
--
50
--
87
2.5.0
51
2.7.4
88
52
89
53
diff view generated by jsdifflib
1
From: Peter Maydell <peter.maydell@linaro.org>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
The i82596_receive() function attempts to pass the guest a buffer
3
Setting the log address would make the device start reporting invalid
4
which is effectively the concatenation of the data it is passed and a
4
dirty memory because the SVQ vrings are located in qemu's memory.
5
4 byte CRC value. However, rather than implementing this as "write
6
the data; then write the CRC" it instead bumps the length value of
7
the data by 4, and writes 4 extra bytes from beyond the end of the
8
buffer, which it then overwrites with the CRC. It also assumed that
9
we could always fit all four bytes of the CRC into the final receive
10
buffer, which might not be true if the CRC needs to be split over two
11
receive buffers.
12
5
13
Calculate separately how many bytes we need to transfer into the
6
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
guest's receive buffer from the source buffer, and how many we need
7
Acked-by: Michael S. Tsirkin <mst@redhat.com>
15
to transfer from the CRC work.
16
17
We add a count 'bufsz' of the number of bytes left in the source
18
buffer, which we use purely to assert() that we don't overrun.
19
20
Spotted by Coverity (CID 1419396) for the specific case when we end
21
up using a local array as the source buffer.
22
23
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
24
Signed-off-by: Jason Wang <jasowang@redhat.com>
8
Signed-off-by: Jason Wang <jasowang@redhat.com>
25
---
9
---
26
hw/net/i82596.c | 44 +++++++++++++++++++++++++++++++++++---------
10
hw/virtio/vhost-vdpa.c | 3 ++-
27
1 file changed, 35 insertions(+), 9 deletions(-)
11
1 file changed, 2 insertions(+), 1 deletion(-)
28
12
29
diff --git a/hw/net/i82596.c b/hw/net/i82596.c
13
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
30
index XXXXXXX..XXXXXXX 100644
14
index XXXXXXX..XXXXXXX 100644
31
--- a/hw/net/i82596.c
15
--- a/hw/virtio/vhost-vdpa.c
32
+++ b/hw/net/i82596.c
16
+++ b/hw/virtio/vhost-vdpa.c
33
@@ -XXX,XX +XXX,XX @@ ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz)
17
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started)
34
uint32_t rfd_p;
18
static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base,
35
uint32_t rbd;
19
struct vhost_log *log)
36
uint16_t is_broadcast = 0;
20
{
37
- size_t len = sz;
21
- if (vhost_vdpa_one_time_request(dev)) {
38
+ size_t len = sz; /* length of data for guest (including CRC) */
22
+ struct vhost_vdpa *v = dev->opaque;
39
+ size_t bufsz = sz; /* length of data in buf */
23
+ if (v->shadow_vqs_enabled || vhost_vdpa_one_time_request(dev)) {
40
uint32_t crc;
24
return 0;
41
uint8_t *crc_ptr;
42
uint8_t buf1[MIN_BUF_SIZE + VLAN_HLEN];
43
@@ -XXX,XX +XXX,XX @@ ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz)
44
if (len < MIN_BUF_SIZE) {
45
len = MIN_BUF_SIZE;
46
}
47
+ bufsz = len;
48
}
25
}
49
26
50
/* Calculate the ethernet checksum (4 bytes) */
51
@@ -XXX,XX +XXX,XX @@ ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz)
52
while (len) {
53
uint16_t buffer_size, num;
54
uint32_t rba;
55
+ size_t bufcount, crccount;
56
57
/* printf("Receive: rbd is %08x\n", rbd); */
58
buffer_size = get_uint16(rbd + 12);
59
@@ -XXX,XX +XXX,XX @@ ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz)
60
}
61
rba = get_uint32(rbd + 8);
62
/* printf("rba is 0x%x\n", rba); */
63
- address_space_write(&address_space_memory, rba,
64
- MEMTXATTRS_UNSPECIFIED, buf, num);
65
- rba += num;
66
- buf += num;
67
- len -= num;
68
- if (len == 0) { /* copy crc */
69
- address_space_write(&address_space_memory, rba - 4,
70
- MEMTXATTRS_UNSPECIFIED, crc_ptr, 4);
71
+ /*
72
+ * Calculate how many bytes we want from buf[] and how many
73
+ * from the CRC.
74
+ */
75
+ if ((len - num) >= 4) {
76
+ /* The whole guest buffer, we haven't hit the CRC yet */
77
+ bufcount = num;
78
+ } else {
79
+ /* All that's left of buf[] */
80
+ bufcount = len - 4;
81
+ }
82
+ crccount = num - bufcount;
83
+
84
+ if (bufcount > 0) {
85
+ /* Still some of the actual data buffer to transfer */
86
+ assert(bufsz >= bufcount);
87
+ bufsz -= bufcount;
88
+ address_space_write(&address_space_memory, rba,
89
+ MEMTXATTRS_UNSPECIFIED, buf, bufcount);
90
+ rba += bufcount;
91
+ buf += bufcount;
92
+ len -= bufcount;
93
+ }
94
+
95
+ /* Write as much of the CRC as fits */
96
+ if (crccount > 0) {
97
+ address_space_write(&address_space_memory, rba,
98
+ MEMTXATTRS_UNSPECIFIED, crc_ptr, crccount);
99
+ rba += crccount;
100
+ crc_ptr += crccount;
101
+ len -= crccount;
102
}
103
104
num |= 0x4000; /* set F BIT */
105
--
27
--
106
2.5.0
28
2.7.4
107
29
108
30
diff view generated by jsdifflib
1
From: Philippe Mathieu-Daudé <philmd@redhat.com>
1
From: Eugenio Pérez <eperezma@redhat.com>
2
2
3
We will modify this code in the next commit. Clean it up
3
SVQ is able to log the dirty bits by itself, so let's use it to not
4
first to avoid checkpatch.pl errors.
4
block migration.
5
5
6
Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
6
Also, ignore set and clear of VHOST_F_LOG_ALL on set_features if SVQ is
7
Reviewed-by: Alistair Francis <alistair.francis@wdc.com>
7
enabled. Even if the device supports it, the reports would be nonsense
8
Reviewed-by: Cédric Le Goater <clg@kaod.org>
8
because SVQ memory is in the qemu region.
9
10
The log region is still allocated. Future changes might skip that, but
11
this series is already long enough.
12
13
Signed-off-by: Eugenio Pérez <eperezma@redhat.com>
14
Acked-by: Michael S. Tsirkin <mst@redhat.com>
9
Signed-off-by: Jason Wang <jasowang@redhat.com>
15
Signed-off-by: Jason Wang <jasowang@redhat.com>
10
---
16
---
11
hw/net/rtl8139.c | 10 ++++++----
17
hw/virtio/vhost-vdpa.c | 39 +++++++++++++++++++++++++++++++++++----
12
1 file changed, 6 insertions(+), 4 deletions(-)
18
include/hw/virtio/vhost-vdpa.h | 1 +
19
2 files changed, 36 insertions(+), 4 deletions(-)
13
20
14
diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c
21
diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c
15
index XXXXXXX..XXXXXXX 100644
22
index XXXXXXX..XXXXXXX 100644
16
--- a/hw/net/rtl8139.c
23
--- a/hw/virtio/vhost-vdpa.c
17
+++ b/hw/net/rtl8139.c
24
+++ b/hw/virtio/vhost-vdpa.c
18
@@ -XXX,XX +XXX,XX @@ static int rtl8139_can_receive(NetClientState *nc)
25
@@ -XXX,XX +XXX,XX @@ static bool vhost_vdpa_one_time_request(struct vhost_dev *dev)
19
int avail;
26
return v->index != 0;
20
27
}
21
/* Receive (drop) packets if card is disabled. */
28
22
- if (!s->clock_enabled)
29
+static int vhost_vdpa_get_dev_features(struct vhost_dev *dev,
23
- return 1;
30
+ uint64_t *features)
24
- if (!rtl8139_receiver_enabled(s))
31
+{
25
- return 1;
32
+ int ret;
26
+ if (!s->clock_enabled) {
33
+
27
+ return 1;
34
+ ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
35
+ trace_vhost_vdpa_get_features(dev, *features);
36
+ return ret;
37
+}
38
+
39
static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
40
Error **errp)
41
{
42
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v,
43
return 0;
44
}
45
46
- r = hdev->vhost_ops->vhost_get_features(hdev, &dev_features);
47
+ r = vhost_vdpa_get_dev_features(hdev, &dev_features);
48
if (r != 0) {
49
error_setg_errno(errp, -r, "Can't get vdpa device features");
50
return r;
51
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_mem_table(struct vhost_dev *dev,
52
static int vhost_vdpa_set_features(struct vhost_dev *dev,
53
uint64_t features)
54
{
55
+ struct vhost_vdpa *v = dev->opaque;
56
int ret;
57
58
if (vhost_vdpa_one_time_request(dev)) {
59
return 0;
60
}
61
62
+ if (v->shadow_vqs_enabled) {
63
+ if ((v->acked_features ^ features) == BIT_ULL(VHOST_F_LOG_ALL)) {
64
+ /*
65
+ * QEMU is just trying to enable or disable logging. SVQ handles
66
+ * this sepparately, so no need to forward this.
67
+ */
68
+ v->acked_features = features;
69
+ return 0;
70
+ }
71
+
72
+ v->acked_features = features;
73
+
74
+ /* We must not ack _F_LOG if SVQ is enabled */
75
+ features &= ~BIT_ULL(VHOST_F_LOG_ALL);
28
+ }
76
+ }
29
+ if (!rtl8139_receiver_enabled(s)) {
77
+
30
+ return 1;
78
trace_vhost_vdpa_set_features(dev, features);
79
ret = vhost_vdpa_call(dev, VHOST_SET_FEATURES, &features);
80
if (ret) {
81
@@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_set_vring_call(struct vhost_dev *dev,
82
static int vhost_vdpa_get_features(struct vhost_dev *dev,
83
uint64_t *features)
84
{
85
- int ret;
86
+ struct vhost_vdpa *v = dev->opaque;
87
+ int ret = vhost_vdpa_get_dev_features(dev, features);
88
+
89
+ if (ret == 0 && v->shadow_vqs_enabled) {
90
+ /* Add SVQ logging capabilities */
91
+ *features |= BIT_ULL(VHOST_F_LOG_ALL);
31
+ }
92
+ }
32
93
33
if (rtl8139_cp_receiver_enabled(s) && rtl8139_cp_rx_valid(s)) {
94
- ret = vhost_vdpa_call(dev, VHOST_GET_FEATURES, features);
34
/* ??? Flow control not implemented in c+ mode.
95
- trace_vhost_vdpa_get_features(dev, *features);
96
return ret;
97
}
98
99
diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h
100
index XXXXXXX..XXXXXXX 100644
101
--- a/include/hw/virtio/vhost-vdpa.h
102
+++ b/include/hw/virtio/vhost-vdpa.h
103
@@ -XXX,XX +XXX,XX @@ typedef struct vhost_vdpa {
104
bool iotlb_batch_begin_sent;
105
MemoryListener listener;
106
struct vhost_vdpa_iova_range iova_range;
107
+ uint64_t acked_features;
108
bool shadow_vqs_enabled;
109
/* IOVA mapping used by the Shadow Virtqueue */
110
VhostIOVATree *iova_tree;
35
--
111
--
36
2.5.0
112
2.7.4
37
113
38
114
diff view generated by jsdifflib