:p
atchew
Login
The following changes since commit cfe68ae025f704f336d7dd3d1903ce37b445831d: Merge remote-tracking branch 'remotes/vivier2/tags/linux-user-for-5.0-pull-request' into staging (2020-03-26 20:55:54 +0000) are available in the git repository at: https://github.com/jasowang/qemu.git tags/net-pull-request for you to fetch changes up to f3b364f4f77fcb24cec468f518bf5e093dc27cb7: hw/net/allwinner-sun8i-emac.c: Fix REG_ADDR_HIGH/LOW reads (2020-03-27 18:59:47 +0800) ---------------------------------------------------------------- ---------------------------------------------------------------- Andrew Melnychenko (1): Fixed integer overflow in e1000e Peter Maydell (2): hw/net/i82596.c: Avoid reading off end of buffer in i82596_receive() hw/net/allwinner-sun8i-emac.c: Fix REG_ADDR_HIGH/LOW reads Philippe Mathieu-Daudé (7): hw/net/i82596: Correct command bitmask (CID 1419392) hw/net/e1000e_core: Let e1000e_can_receive() return a boolean hw/net/smc91c111: Let smc91c111_can_receive() return a boolean hw/net/rtl8139: Simplify if/else statement hw/net/rtl8139: Update coding style to make checkpatch.pl happy hw/net: Make NetCanReceive() return a boolean hw/net/can: Make CanBusClientInfo::can_receive() return a boolean Prasad J Pandit (1): net: tulip: check frame size and r/w data length Zhang Chen (2): net/colo-compare.c: Expose "compare_timeout" to users net/colo-compare.c: Expose "expired_scan_cycle" to users hw/net/allwinner-sun8i-emac.c | 12 ++---- hw/net/allwinner_emac.c | 2 +- hw/net/cadence_gem.c | 8 ++-- hw/net/can/can_sja1000.c | 8 ++-- hw/net/can/can_sja1000.h | 2 +- hw/net/dp8393x.c | 8 ++-- hw/net/e1000.c | 2 +- hw/net/e1000e.c | 4 +- hw/net/e1000e_core.c | 2 +- hw/net/e1000e_core.h | 2 +- hw/net/ftgmac100.c | 6 +-- hw/net/i82596.c | 66 ++++++++++++++++++++---------- hw/net/i82596.h | 2 +- hw/net/imx_fec.c | 2 +- hw/net/opencores_eth.c | 5 +-- hw/net/rtl8139.c | 22 +++++----- hw/net/smc91c111.c | 10 ++--- hw/net/spapr_llan.c | 4 +- hw/net/sungem.c | 6 +-- hw/net/sunhme.c | 4 +- hw/net/tulip.c | 36 ++++++++++++---- hw/net/virtio-net.c | 10 ++--- hw/net/xilinx_ethlite.c | 2 +- include/net/can_emu.h | 2 +- include/net/net.h | 2 +- net/can/can_socketcan.c | 4 +- net/colo-compare.c | 95 ++++++++++++++++++++++++++++++++++++++++--- net/filter-buffer.c | 2 +- net/hub.c | 6 +-- qemu-options.hx | 10 +++-- 30 files changed, 235 insertions(+), 111 deletions(-)
From: Philippe Mathieu-Daudé <f4bug@amsat.org> The command is 32-bit, but we are loading the 16 upper bits with the 'get_uint16(s->scb + 2)' call. Once shifted by 16, the command bits match the status bits: - Command Bit 31 ACK-CX Acknowledges that the CU completed an Action Command. Bit 30 ACK-FR Acknowledges that the RU received a frame. Bit 29 ACK-CNA Acknowledges that the Command Unit became not active. Bit 28 ACK-RNR Acknowledges that the Receive Unit became not ready. - Status Bit 15 CX The CU finished executing a command with its I(interrupt) bit set. Bit 14 FR The RU finished receiving a frame. Bit 13 CNA The Command Unit left the Active state. Bit 12 RNR The Receive Unit left the Ready state. Add the SCB_COMMAND_ACK_MASK definition to simplify the code. This fixes Coverity 1419392 (CONSTANT_EXPRESSION_RESULT): /hw/net/i82596.c: 352 in examine_scb() 346 cuc = (command >> 8) & 0x7; 347 ruc = (command >> 4) & 0x7; 348 DBG(printf("MAIN COMMAND %04x cuc %02x ruc %02x\n", command, cuc, ruc)); 349 /* and clear the scb command word */ 350 set_uint16(s->scb + 2, 0); 351 >>> CID 1419392: (CONSTANT_EXPRESSION_RESULT) >>> "command & (2147483648UL /* 1UL << 31 */)" is always 0 regardless of the values of its operands. This occurs as the logical operand of "if". 352 if (command & BIT(31)) /* ACK-CX */ 353 s->scb_status &= ~SCB_STATUS_CX; >>> CID 1419392: (CONSTANT_EXPRESSION_RESULT) >>> "command & (1073741824UL /* 1UL << 30 */)" is always 0 regardless of the values of its operands. This occurs as the logical operand of "if". 354 if (command & BIT(30)) /*ACK-FR */ 355 s->scb_status &= ~SCB_STATUS_FR; >>> CID 1419392: (CONSTANT_EXPRESSION_RESULT) >>> "command & (536870912UL /* 1UL << 29 */)" is always 0 regardless of the values of its operands. This occurs as the logical operand of "if". 356 if (command & BIT(29)) /*ACK-CNA */ 357 s->scb_status &= ~SCB_STATUS_CNA; >>> CID 1419392: (CONSTANT_EXPRESSION_RESULT) >>> "command & (268435456UL /* 1UL << 28 */)" is always 0 regardless of the values of its operands. This occurs as the logical operand of "if". 358 if (command & BIT(28)) /*ACK-RNR */ 359 s->scb_status &= ~SCB_STATUS_RNR; Fixes: Covertiy CID 1419392 (commit 376b851909) Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org> Reviewed-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/i82596.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/hw/net/i82596.c b/hw/net/i82596.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/i82596.c +++ b/hw/net/i82596.c @@ -XXX,XX +XXX,XX @@ #define SCB_STATUS_CNA 0x2000 /* CU left active state */ #define SCB_STATUS_RNR 0x1000 /* RU left active state */ +#define SCB_COMMAND_ACK_MASK \ + (SCB_STATUS_CX | SCB_STATUS_FR | SCB_STATUS_CNA | SCB_STATUS_RNR) + #define CU_IDLE 0 #define CU_SUSPENDED 1 #define CU_ACTIVE 2 @@ -XXX,XX +XXX,XX @@ static void examine_scb(I82596State *s) /* and clear the scb command word */ set_uint16(s->scb + 2, 0); - if (command & BIT(31)) /* ACK-CX */ - s->scb_status &= ~SCB_STATUS_CX; - if (command & BIT(30)) /*ACK-FR */ - s->scb_status &= ~SCB_STATUS_FR; - if (command & BIT(29)) /*ACK-CNA */ - s->scb_status &= ~SCB_STATUS_CNA; - if (command & BIT(28)) /*ACK-RNR */ - s->scb_status &= ~SCB_STATUS_RNR; + s->scb_status &= ~(command & SCB_COMMAND_ACK_MASK); switch (cuc) { case 0: /* no change */ -- 2.5.0
From: Peter Maydell <peter.maydell@linaro.org> The i82596_receive() function attempts to pass the guest a buffer which is effectively the concatenation of the data it is passed and a 4 byte CRC value. However, rather than implementing this as "write the data; then write the CRC" it instead bumps the length value of the data by 4, and writes 4 extra bytes from beyond the end of the buffer, which it then overwrites with the CRC. It also assumed that we could always fit all four bytes of the CRC into the final receive buffer, which might not be true if the CRC needs to be split over two receive buffers. Calculate separately how many bytes we need to transfer into the guest's receive buffer from the source buffer, and how many we need to transfer from the CRC work. We add a count 'bufsz' of the number of bytes left in the source buffer, which we use purely to assert() that we don't overrun. Spotted by Coverity (CID 1419396) for the specific case when we end up using a local array as the source buffer. Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/i82596.c | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/hw/net/i82596.c b/hw/net/i82596.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/i82596.c +++ b/hw/net/i82596.c @@ -XXX,XX +XXX,XX @@ ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz) uint32_t rfd_p; uint32_t rbd; uint16_t is_broadcast = 0; - size_t len = sz; + size_t len = sz; /* length of data for guest (including CRC) */ + size_t bufsz = sz; /* length of data in buf */ uint32_t crc; uint8_t *crc_ptr; uint8_t buf1[MIN_BUF_SIZE + VLAN_HLEN]; @@ -XXX,XX +XXX,XX @@ ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz) if (len < MIN_BUF_SIZE) { len = MIN_BUF_SIZE; } + bufsz = len; } /* Calculate the ethernet checksum (4 bytes) */ @@ -XXX,XX +XXX,XX @@ ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz) while (len) { uint16_t buffer_size, num; uint32_t rba; + size_t bufcount, crccount; /* printf("Receive: rbd is %08x\n", rbd); */ buffer_size = get_uint16(rbd + 12); @@ -XXX,XX +XXX,XX @@ ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t sz) } rba = get_uint32(rbd + 8); /* printf("rba is 0x%x\n", rba); */ - address_space_write(&address_space_memory, rba, - MEMTXATTRS_UNSPECIFIED, buf, num); - rba += num; - buf += num; - len -= num; - if (len == 0) { /* copy crc */ - address_space_write(&address_space_memory, rba - 4, - MEMTXATTRS_UNSPECIFIED, crc_ptr, 4); + /* + * Calculate how many bytes we want from buf[] and how many + * from the CRC. + */ + if ((len - num) >= 4) { + /* The whole guest buffer, we haven't hit the CRC yet */ + bufcount = num; + } else { + /* All that's left of buf[] */ + bufcount = len - 4; + } + crccount = num - bufcount; + + if (bufcount > 0) { + /* Still some of the actual data buffer to transfer */ + bufsz -= bufcount; + assert(bufsz >= 0); + address_space_write(&address_space_memory, rba, + MEMTXATTRS_UNSPECIFIED, buf, bufcount); + rba += bufcount; + buf += bufcount; + len -= bufcount; + } + + /* Write as much of the CRC as fits */ + if (crccount > 0) { + address_space_write(&address_space_memory, rba, + MEMTXATTRS_UNSPECIFIED, crc_ptr, crccount); + rba += crccount; + crc_ptr += crccount; + len -= crccount; } num |= 0x4000; /* set F BIT */ -- 2.5.0
From: Andrew Melnychenko <andrew@daynix.com> Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=1737400 Fixed setting max_queue_num if there are no peers in NICConf. qemu_new_nic() creates NICState with 1 NetClientState(index 0) without peers, set max_queue_num to 0 - It prevents undefined behavior and possible crashes, especially during pcie hotplug. Fixes: 6f3fbe4ed06 ("net: Introduce e1000e device emulation") Signed-off-by: Andrew Melnychenko <andrew@daynix.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Dmitry Fleytman <dmitry.fleytman@gmail.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/e1000e.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -XXX,XX +XXX,XX @@ e1000e_init_net_peer(E1000EState *s, PCIDevice *pci_dev, uint8_t *macaddr) s->nic = qemu_new_nic(&net_e1000e_info, &s->conf, object_get_typename(OBJECT(s)), dev->id, s); - s->core.max_queue_num = s->conf.peers.queues - 1; + s->core.max_queue_num = s->conf.peers.queues ? s->conf.peers.queues - 1 : 0; trace_e1000e_mac_set_permanent(MAC_ARG(macaddr)); memcpy(s->core.permanent_mac, macaddr, sizeof(s->core.permanent_mac)); -- 2.5.0
From: Philippe Mathieu-Daudé <philmd@redhat.com> The e1000e_can_receive() function simply returns a boolean value. Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Paolo Bonzini <pbonzini@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/e1000e_core.c | 2 +- hw/net/e1000e_core.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -XXX,XX +XXX,XX @@ e1000e_start_recv(E1000ECore *core) } } -int +bool e1000e_can_receive(E1000ECore *core) { int i; diff --git a/hw/net/e1000e_core.h b/hw/net/e1000e_core.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/e1000e_core.h +++ b/hw/net/e1000e_core.h @@ -XXX,XX +XXX,XX @@ e1000e_core_set_link_status(E1000ECore *core); void e1000e_core_pci_uninit(E1000ECore *core); -int +bool e1000e_can_receive(E1000ECore *core); ssize_t -- 2.5.0
From: Philippe Mathieu-Daudé <philmd@redhat.com> The smc91c111_can_receive() function simply returns a boolean value. Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/smc91c111.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/smc91c111.c +++ b/hw/net/smc91c111.c @@ -XXX,XX +XXX,XX @@ static void smc91c111_update(smc91c111_state *s) qemu_set_irq(s->irq, level); } -static int smc91c111_can_receive(smc91c111_state *s) +static bool smc91c111_can_receive(smc91c111_state *s) { if ((s->rcr & RCR_RXEN) == 0 || (s->rcr & RCR_SOFT_RST)) { - return 1; + return true; } if (s->allocated == (1 << NUM_PACKETS) - 1 || s->rx_fifo_len == NUM_PACKETS) { - return 0; + return false; } - return 1; + return true; } static inline void smc91c111_flush_queued_packets(smc91c111_state *s) -- 2.5.0
From: Philippe Mathieu-Daudé <philmd@redhat.com> Rewrite: if (E) { return A; } else { return B; } /* EOF */ } as: if (E) { return A; } return B; } Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/rtl8139.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -XXX,XX +XXX,XX @@ static int rtl8139_can_receive(NetClientState *nc) /* ??? Flow control not implemented in c+ mode. This is a hack to work around slirp deficiencies anyway. */ return 1; - } else { - avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr, - s->RxBufferSize); - return (avail == 0 || avail >= 1514 || (s->IntrMask & RxOverflow)); } + + avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr, + s->RxBufferSize); + return avail == 0 || avail >= 1514 || (s->IntrMask & RxOverflow); } static ssize_t rtl8139_do_receive(NetClientState *nc, const uint8_t *buf, size_t size_, int do_interrupt) -- 2.5.0
From: Philippe Mathieu-Daudé <philmd@redhat.com> We will modify this code in the next commit. Clean it up first to avoid checkpatch.pl errors. Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/rtl8139.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -XXX,XX +XXX,XX @@ static int rtl8139_can_receive(NetClientState *nc) int avail; /* Receive (drop) packets if card is disabled. */ - if (!s->clock_enabled) - return 1; - if (!rtl8139_receiver_enabled(s)) - return 1; + if (!s->clock_enabled) { + return 1; + } + if (!rtl8139_receiver_enabled(s)) { + return 1; + } if (rtl8139_cp_receiver_enabled(s) && rtl8139_cp_rx_valid(s)) { /* ??? Flow control not implemented in c+ mode. -- 2.5.0
From: Philippe Mathieu-Daudé <philmd@redhat.com> The NetCanReceive handler return whether the device can or can not receive new packets. Make it obvious by returning a boolean type. Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Acked-by: David Gibson <david@gibson.dropbear.id.au> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/allwinner_emac.c | 2 +- hw/net/cadence_gem.c | 8 ++++---- hw/net/dp8393x.c | 8 +++----- hw/net/e1000.c | 2 +- hw/net/e1000e.c | 2 +- hw/net/ftgmac100.c | 6 +++--- hw/net/i82596.c | 10 +++++----- hw/net/i82596.h | 2 +- hw/net/imx_fec.c | 2 +- hw/net/opencores_eth.c | 5 ++--- hw/net/rtl8139.c | 8 ++++---- hw/net/smc91c111.c | 2 +- hw/net/spapr_llan.c | 4 ++-- hw/net/sungem.c | 6 +++--- hw/net/sunhme.c | 4 ++-- hw/net/virtio-net.c | 10 +++++----- hw/net/xilinx_ethlite.c | 2 +- include/net/net.h | 2 +- net/filter-buffer.c | 2 +- net/hub.c | 6 +++--- 20 files changed, 45 insertions(+), 48 deletions(-) diff --git a/hw/net/allwinner_emac.c b/hw/net/allwinner_emac.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/allwinner_emac.c +++ b/hw/net/allwinner_emac.c @@ -XXX,XX +XXX,XX @@ static uint32_t fifo8_pop_word(Fifo8 *fifo) return ret; } -static int aw_emac_can_receive(NetClientState *nc) +static bool aw_emac_can_receive(NetClientState *nc) { AwEmacState *s = qemu_get_nic_opaque(nc); diff --git a/hw/net/cadence_gem.c b/hw/net/cadence_gem.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/cadence_gem.c +++ b/hw/net/cadence_gem.c @@ -XXX,XX +XXX,XX @@ static void phy_update_link(CadenceGEMState *s) } } -static int gem_can_receive(NetClientState *nc) +static bool gem_can_receive(NetClientState *nc) { CadenceGEMState *s; int i; @@ -XXX,XX +XXX,XX @@ static int gem_can_receive(NetClientState *nc) s->can_rx_state = 1; DB_PRINT("can't receive - no enable\n"); } - return 0; + return false; } for (i = 0; i < s->num_priority_queues; i++) { @@ -XXX,XX +XXX,XX @@ static int gem_can_receive(NetClientState *nc) s->can_rx_state = 2; DB_PRINT("can't receive - all the buffer descriptors are busy\n"); } - return 0; + return false; } if (s->can_rx_state != 0) { s->can_rx_state = 0; DB_PRINT("can receive\n"); } - return 1; + return true; } /* diff --git a/hw/net/dp8393x.c b/hw/net/dp8393x.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/dp8393x.c +++ b/hw/net/dp8393x.c @@ -XXX,XX +XXX,XX @@ static void dp8393x_do_stop_timer(dp8393xState *s) dp8393x_update_wt_regs(s); } -static int dp8393x_can_receive(NetClientState *nc); +static bool dp8393x_can_receive(NetClientState *nc); static void dp8393x_do_receiver_enable(dp8393xState *s) { @@ -XXX,XX +XXX,XX @@ static void dp8393x_watchdog(void *opaque) dp8393x_update_irq(s); } -static int dp8393x_can_receive(NetClientState *nc) +static bool dp8393x_can_receive(NetClientState *nc) { dp8393xState *s = qemu_get_nic_opaque(nc); - if (!(s->regs[SONIC_CR] & SONIC_CR_RXEN)) - return 0; - return 1; + return !!(s->regs[SONIC_CR] & SONIC_CR_RXEN); } static int dp8393x_receive_filter(dp8393xState *s, const uint8_t * buf, diff --git a/hw/net/e1000.c b/hw/net/e1000.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/e1000.c +++ b/hw/net/e1000.c @@ -XXX,XX +XXX,XX @@ static bool e1000_has_rxbufs(E1000State *s, size_t total_size) return total_size <= bufs * s->rxbuf_size; } -static int +static bool e1000_can_receive(NetClientState *nc) { E1000State *s = qemu_get_nic_opaque(nc); diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps io_ops = { }, }; -static int +static bool e1000e_nc_can_receive(NetClientState *nc) { E1000EState *s = qemu_get_nic_opaque(nc); diff --git a/hw/net/ftgmac100.c b/hw/net/ftgmac100.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/ftgmac100.c +++ b/hw/net/ftgmac100.c @@ -XXX,XX +XXX,XX @@ static void ftgmac100_do_tx(FTGMAC100State *s, uint32_t tx_ring, ftgmac100_update_irq(s); } -static int ftgmac100_can_receive(NetClientState *nc) +static bool ftgmac100_can_receive(NetClientState *nc) { FTGMAC100State *s = FTGMAC100(qemu_get_nic_opaque(nc)); FTGMAC100Desc bd; if ((s->maccr & (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN)) != (FTGMAC100_MACCR_RXDMA_EN | FTGMAC100_MACCR_RXMAC_EN)) { - return 0; + return false; } if (ftgmac100_read_bd(&bd, s->rx_descriptor)) { - return 0; + return false; } return !(bd.des0 & FTGMAC100_RXDES0_RXPKT_RDY); } diff --git a/hw/net/i82596.c b/hw/net/i82596.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/i82596.c +++ b/hw/net/i82596.c @@ -XXX,XX +XXX,XX @@ void i82596_h_reset(void *opaque) i82596_s_reset(s); } -int i82596_can_receive(NetClientState *nc) +bool i82596_can_receive(NetClientState *nc) { I82596State *s = qemu_get_nic_opaque(nc); if (s->rx_status == RX_SUSPENDED) { - return 0; + return false; } if (!s->lnkst) { - return 0; + return false; } if (USE_TIMER && !timer_pending(s->flush_queue_timer)) { - return 1; + return true; } - return 1; + return true; } #define MIN_BUF_SIZE 60 diff --git a/hw/net/i82596.h b/hw/net/i82596.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/i82596.h +++ b/hw/net/i82596.h @@ -XXX,XX +XXX,XX @@ void i82596_ioport_writel(void *opaque, uint32_t addr, uint32_t val); uint32_t i82596_ioport_readl(void *opaque, uint32_t addr); uint32_t i82596_bcr_readw(I82596State *s, uint32_t rap); ssize_t i82596_receive(NetClientState *nc, const uint8_t *buf, size_t size_); -int i82596_can_receive(NetClientState *nc); +bool i82596_can_receive(NetClientState *nc); void i82596_set_link_status(NetClientState *nc); void i82596_common_init(DeviceState *dev, I82596State *s, NetClientInfo *info); extern const VMStateDescription vmstate_i82596; diff --git a/hw/net/imx_fec.c b/hw/net/imx_fec.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/imx_fec.c +++ b/hw/net/imx_fec.c @@ -XXX,XX +XXX,XX @@ static void imx_eth_write(void *opaque, hwaddr offset, uint64_t value, imx_eth_update(s); } -static int imx_eth_can_receive(NetClientState *nc) +static bool imx_eth_can_receive(NetClientState *nc) { IMXFECState *s = IMX_FEC(qemu_get_nic_opaque(nc)); diff --git a/hw/net/opencores_eth.c b/hw/net/opencores_eth.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/opencores_eth.c +++ b/hw/net/opencores_eth.c @@ -XXX,XX +XXX,XX @@ static void open_eth_reset(void *opaque) open_eth_set_link_status(qemu_get_queue(s->nic)); } -static int open_eth_can_receive(NetClientState *nc) +static bool open_eth_can_receive(NetClientState *nc) { OpenEthState *s = qemu_get_nic_opaque(nc); - return GET_REGBIT(s, MODER, RXEN) && - (s->regs[TX_BD_NUM] < 0x80); + return GET_REGBIT(s, MODER, RXEN) && (s->regs[TX_BD_NUM] < 0x80); } static ssize_t open_eth_receive(NetClientState *nc, diff --git a/hw/net/rtl8139.c b/hw/net/rtl8139.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/rtl8139.c +++ b/hw/net/rtl8139.c @@ -XXX,XX +XXX,XX @@ static bool rtl8139_cp_rx_valid(RTL8139State *s) return !(s->RxRingAddrLO == 0 && s->RxRingAddrHI == 0); } -static int rtl8139_can_receive(NetClientState *nc) +static bool rtl8139_can_receive(NetClientState *nc) { RTL8139State *s = qemu_get_nic_opaque(nc); int avail; /* Receive (drop) packets if card is disabled. */ if (!s->clock_enabled) { - return 1; + return true; } if (!rtl8139_receiver_enabled(s)) { - return 1; + return true; } if (rtl8139_cp_receiver_enabled(s) && rtl8139_cp_rx_valid(s)) { /* ??? Flow control not implemented in c+ mode. This is a hack to work around slirp deficiencies anyway. */ - return 1; + return true; } avail = MOD2(s->RxBufferSize + s->RxBufPtr - s->RxBufAddr, diff --git a/hw/net/smc91c111.c b/hw/net/smc91c111.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/smc91c111.c +++ b/hw/net/smc91c111.c @@ -XXX,XX +XXX,XX @@ static void smc91c111_writefn(void *opaque, hwaddr addr, } } -static int smc91c111_can_receive_nc(NetClientState *nc) +static bool smc91c111_can_receive_nc(NetClientState *nc) { smc91c111_state *s = qemu_get_nic_opaque(nc); diff --git a/hw/net/spapr_llan.c b/hw/net/spapr_llan.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/spapr_llan.c +++ b/hw/net/spapr_llan.c @@ -XXX,XX +XXX,XX @@ typedef struct SpaprVioVlan { RxBufPool *rx_pool[RX_MAX_POOLS]; /* Receive buffer descriptor pools */ } SpaprVioVlan; -static int spapr_vlan_can_receive(NetClientState *nc) +static bool spapr_vlan_can_receive(NetClientState *nc) { SpaprVioVlan *dev = qemu_get_nic_opaque(nc); - return (dev->isopen && dev->rx_bufs > 0); + return dev->isopen && dev->rx_bufs > 0; } /** diff --git a/hw/net/sungem.c b/hw/net/sungem.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/sungem.c +++ b/hw/net/sungem.c @@ -XXX,XX +XXX,XX @@ static bool sungem_rx_full(SunGEMState *s, uint32_t kick, uint32_t done) return kick == ((done + 1) & s->rx_mask); } -static int sungem_can_receive(NetClientState *nc) +static bool sungem_can_receive(NetClientState *nc) { SunGEMState *s = qemu_get_nic_opaque(nc); uint32_t kick, done, rxdma_cfg, rxmac_cfg; @@ -XXX,XX +XXX,XX @@ static int sungem_can_receive(NetClientState *nc) /* If MAC disabled, can't receive */ if ((rxmac_cfg & MAC_RXCFG_ENAB) == 0) { trace_sungem_rx_mac_disabled(); - return 0; + return false; } if ((rxdma_cfg & RXDMA_CFG_ENABLE) == 0) { trace_sungem_rx_txdma_disabled(); - return 0; + return false; } /* Check RX availability */ diff --git a/hw/net/sunhme.c b/hw/net/sunhme.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/sunhme.c +++ b/hw/net/sunhme.c @@ -XXX,XX +XXX,XX @@ static void sunhme_transmit(SunHMEState *s) sunhme_update_irq(s); } -static int sunhme_can_receive(NetClientState *nc) +static bool sunhme_can_receive(NetClientState *nc) { SunHMEState *s = qemu_get_nic_opaque(nc); - return s->macregs[HME_MACI_RXCFG >> 2] & HME_MAC_RXCFG_ENABLE; + return !!(s->macregs[HME_MACI_RXCFG >> 2] & HME_MAC_RXCFG_ENABLE); } static void sunhme_link_status_changed(NetClientState *nc) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -XXX,XX +XXX,XX @@ static void virtio_net_handle_rx(VirtIODevice *vdev, VirtQueue *vq) qemu_flush_queued_packets(qemu_get_subqueue(n->nic, queue_index)); } -static int virtio_net_can_receive(NetClientState *nc) +static bool virtio_net_can_receive(NetClientState *nc) { VirtIONet *n = qemu_get_nic_opaque(nc); VirtIODevice *vdev = VIRTIO_DEVICE(n); VirtIONetQueue *q = virtio_net_get_subqueue(nc); if (!vdev->vm_running) { - return 0; + return false; } if (nc->queue_index >= n->curr_queues) { - return 0; + return false; } if (!virtio_queue_ready(q->rx_vq) || !(vdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) { - return 0; + return false; } - return 1; + return true; } static int virtio_net_has_buffers(VirtIONetQueue *q, int bufsize) diff --git a/hw/net/xilinx_ethlite.c b/hw/net/xilinx_ethlite.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/xilinx_ethlite.c +++ b/hw/net/xilinx_ethlite.c @@ -XXX,XX +XXX,XX @@ static const MemoryRegionOps eth_ops = { } }; -static int eth_can_rx(NetClientState *nc) +static bool eth_can_rx(NetClientState *nc) { struct xlx_ethlite *s = qemu_get_nic_opaque(nc); unsigned int rxbase = s->rxbuf * (0x800 / 4); diff --git a/include/net/net.h b/include/net/net.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -XXX,XX +XXX,XX @@ typedef struct NICConf { /* Net clients */ typedef void (NetPoll)(NetClientState *, bool enable); -typedef int (NetCanReceive)(NetClientState *); +typedef bool (NetCanReceive)(NetClientState *); typedef ssize_t (NetReceive)(NetClientState *, const uint8_t *, size_t); typedef ssize_t (NetReceiveIOV)(NetClientState *, const struct iovec *, int); typedef void (NetCleanup) (NetClientState *); diff --git a/net/filter-buffer.c b/net/filter-buffer.c index XXXXXXX..XXXXXXX 100644 --- a/net/filter-buffer.c +++ b/net/filter-buffer.c @@ -XXX,XX +XXX,XX @@ static ssize_t filter_buffer_receive_iov(NetFilterState *nf, * the filter can still accept packets until its internal queue is full. * For example: * For some reason, receiver could not receive more packets - * (.can_receive() returns zero). Without a filter, at most one packet + * (.can_receive() returns false). Without a filter, at most one packet * will be queued in incoming queue and sender's poll will be disabled * unit its sent_cb() was called. With a filter, it will keep receiving * the packets without caring about the receiver. This is suboptimal. diff --git a/net/hub.c b/net/hub.c index XXXXXXX..XXXXXXX 100644 --- a/net/hub.c +++ b/net/hub.c @@ -XXX,XX +XXX,XX @@ static NetHub *net_hub_new(int id) return hub; } -static int net_hub_port_can_receive(NetClientState *nc) +static bool net_hub_port_can_receive(NetClientState *nc) { NetHubPort *port; NetHubPort *src_port = DO_UPCAST(NetHubPort, nc, nc); @@ -XXX,XX +XXX,XX @@ static int net_hub_port_can_receive(NetClientState *nc) } if (qemu_can_send_packet(&port->nc)) { - return 1; + return true; } } - return 0; + return false; } static ssize_t net_hub_port_receive(NetClientState *nc, -- 2.5.0
From: Philippe Mathieu-Daudé <philmd@redhat.com> The CanBusClientInfo::can_receive handler return whether the device can or can not receive new frames. Make it obvious by returning a boolean type. Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com> Reviewed-by: Alistair Francis <alistair.francis@wdc.com> Reviewed-by: Cédric Le Goater <clg@kaod.org> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/can/can_sja1000.c | 8 ++++---- hw/net/can/can_sja1000.h | 2 +- include/net/can_emu.h | 2 +- net/can/can_socketcan.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hw/net/can/can_sja1000.c b/hw/net/can/can_sja1000.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/can/can_sja1000.c +++ b/hw/net/can/can_sja1000.c @@ -XXX,XX +XXX,XX @@ uint64_t can_sja_mem_read(CanSJA1000State *s, hwaddr addr, unsigned size) return temp; } -int can_sja_can_receive(CanBusClientState *client) +bool can_sja_can_receive(CanBusClientState *client) { CanSJA1000State *s = container_of(client, CanSJA1000State, bus_client); if (s->clock & 0x80) { /* PeliCAN Mode */ if (s->mode & 0x01) { /* reset mode. */ - return 0; + return false; } } else { /* BasicCAN mode */ if (s->control & 0x01) { - return 0; + return false; } } - return 1; /* always return 1, when operation mode */ + return true; /* always return true, when operation mode */ } ssize_t can_sja_receive(CanBusClientState *client, const qemu_can_frame *frames, diff --git a/hw/net/can/can_sja1000.h b/hw/net/can/can_sja1000.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/can/can_sja1000.h +++ b/hw/net/can/can_sja1000.h @@ -XXX,XX +XXX,XX @@ void can_sja_disconnect(CanSJA1000State *s); int can_sja_init(CanSJA1000State *s, qemu_irq irq); -int can_sja_can_receive(CanBusClientState *client); +bool can_sja_can_receive(CanBusClientState *client); ssize_t can_sja_receive(CanBusClientState *client, const qemu_can_frame *frames, size_t frames_cnt); diff --git a/include/net/can_emu.h b/include/net/can_emu.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/can_emu.h +++ b/include/net/can_emu.h @@ -XXX,XX +XXX,XX @@ typedef struct CanBusClientState CanBusClientState; typedef struct CanBusState CanBusState; typedef struct CanBusClientInfo { - int (*can_receive)(CanBusClientState *); + bool (*can_receive)(CanBusClientState *); ssize_t (*receive)(CanBusClientState *, const struct qemu_can_frame *frames, size_t frames_cnt); } CanBusClientInfo; diff --git a/net/can/can_socketcan.c b/net/can/can_socketcan.c index XXXXXXX..XXXXXXX 100644 --- a/net/can/can_socketcan.c +++ b/net/can/can_socketcan.c @@ -XXX,XX +XXX,XX @@ static void can_host_socketcan_read(void *opaque) } } -static int can_host_socketcan_can_receive(CanBusClientState *client) +static bool can_host_socketcan_can_receive(CanBusClientState *client) { - return 1; + return true; } static ssize_t can_host_socketcan_receive(CanBusClientState *client, -- 2.5.0
From: Zhang Chen <chen.zhang@intel.com> The "compare_timeout" determines the maximum time to hold the primary net packet. This patch expose the "compare_timeout", make user have ability to adjest the value according to application scenarios. QMP command demo: { "execute": "qom-get", "arguments": { "path": "/objects/comp0", "property": "compare_timeout" } } { "execute": "qom-set", "arguments": { "path": "/objects/comp0", "property": "compare_timeout", "value": 5000} } Signed-off-by: Zhang Chen <chen.zhang@intel.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- net/colo-compare.c | 47 +++++++++++++++++++++++++++++++++++++++++++++-- qemu-options.hx | 8 +++++--- 2 files changed, 50 insertions(+), 5 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index XXXXXXX..XXXXXXX 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -XXX,XX +XXX,XX @@ static NotifierList colo_compare_notifiers = /* TODO: Should be configurable */ #define REGULAR_PACKET_CHECK_MS 3000 +#define DEFAULT_TIME_OUT_MS 3000 static QemuMutex event_mtx; static QemuCond event_complete_cond; @@ -XXX,XX +XXX,XX @@ typedef struct CompareState { SocketReadState sec_rs; SocketReadState notify_rs; bool vnet_hdr; + uint32_t compare_timeout; /* * Record the connection that through the NIC @@ -XXX,XX +XXX,XX @@ static int colo_old_packet_check_one_conn(Connection *conn, CompareState *s) { GList *result = NULL; - int64_t check_time = REGULAR_PACKET_CHECK_MS; result = g_queue_find_custom(&conn->primary_list, - &check_time, + &s->compare_timeout, (GCompareFunc)colo_old_packet_check_one); if (result) { @@ -XXX,XX +XXX,XX @@ static void compare_set_notify_dev(Object *obj, const char *value, Error **errp) s->notify_dev = g_strdup(value); } +static void compare_get_timeout(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + CompareState *s = COLO_COMPARE(obj); + uint32_t value = s->compare_timeout; + + visit_type_uint32(v, name, &value, errp); +} + +static void compare_set_timeout(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + CompareState *s = COLO_COMPARE(obj); + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, name, &value, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' requires a positive value", + object_get_typename(obj), name); + goto out; + } + s->compare_timeout = value; + +out: + error_propagate(errp, local_err); +} + static void compare_pri_rs_finalize(SocketReadState *pri_rs) { CompareState *s = container_of(pri_rs, CompareState, pri_rs); @@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) return; } + if (!s->compare_timeout) { + /* Set default value to 3000 MS */ + s->compare_timeout = DEFAULT_TIME_OUT_MS; + } + if (find_and_check_chardev(&chr, s->pri_indev, errp) || !qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) { return; @@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj) compare_get_notify_dev, compare_set_notify_dev, NULL); + object_property_add(obj, "compare_timeout", "uint32", + compare_get_timeout, + compare_set_timeout, NULL, NULL, NULL); + s->vnet_hdr = false; object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr, compare_set_vnet_hdr, NULL); diff --git a/qemu-options.hx b/qemu-options.hx index XXXXXXX..XXXXXXX 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -XXX,XX +XXX,XX @@ SRST stored. The file format is libpcap, so it can be analyzed with tools such as tcpdump or Wireshark. - ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id]`` + ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}]`` Colo-compare gets packet from primary\_inchardevid and secondary\_inchardevid, than compare primary packet with secondary packet. If the packets are same, we will output @@ -XXX,XX +XXX,XX @@ SRST outdevchardevid. In order to improve efficiency, we need to put the task of comparison in another thread. If it has the vnet\_hdr\_support flag, colo compare will send/recv packet with - vnet\_hdr\_len. If you want to use Xen COLO, will need the - notify\_dev to notify Xen colo-frame to do checkpoint. + vnet\_hdr\_len. Then compare\_timeout=@var{ms} determines the + maximum delay colo-compare wait for the packet. + If you want to use Xen COLO, will need the notify\_dev to + notify Xen colo-frame to do checkpoint. we must use it with the help of filter-mirror and filter-redirector. -- 2.5.0
From: Zhang Chen <chen.zhang@intel.com> The "expired_scan_cycle" determines period of scanning expired primary node net packets. Signed-off-by: Zhang Chen <chen.zhang@intel.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- net/colo-compare.c | 48 +++++++++++++++++++++++++++++++++++++++++++++--- qemu-options.hx | 4 +++- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/net/colo-compare.c b/net/colo-compare.c index XXXXXXX..XXXXXXX 100644 --- a/net/colo-compare.c +++ b/net/colo-compare.c @@ -XXX,XX +XXX,XX @@ static NotifierList colo_compare_notifiers = #define COLO_COMPARE_FREE_PRIMARY 0x01 #define COLO_COMPARE_FREE_SECONDARY 0x02 -/* TODO: Should be configurable */ #define REGULAR_PACKET_CHECK_MS 3000 #define DEFAULT_TIME_OUT_MS 3000 @@ -XXX,XX +XXX,XX @@ typedef struct CompareState { SocketReadState notify_rs; bool vnet_hdr; uint32_t compare_timeout; + uint32_t expired_scan_cycle; /* * Record the connection that through the NIC @@ -XXX,XX +XXX,XX @@ static void check_old_packet_regular(void *opaque) /* if have old packet we will notify checkpoint */ colo_old_packet_check(s); timer_mod(s->packet_check_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + - REGULAR_PACKET_CHECK_MS); + s->expired_scan_cycle); } /* Public API, Used for COLO frame to notify compare event */ @@ -XXX,XX +XXX,XX @@ static void colo_compare_timer_init(CompareState *s) SCALE_MS, check_old_packet_regular, s); timer_mod(s->packet_check_timer, qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL) + - REGULAR_PACKET_CHECK_MS); + s->expired_scan_cycle); } static void colo_compare_timer_del(CompareState *s) @@ -XXX,XX +XXX,XX @@ out: error_propagate(errp, local_err); } +static void compare_get_expired_scan_cycle(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + CompareState *s = COLO_COMPARE(obj); + uint32_t value = s->expired_scan_cycle; + + visit_type_uint32(v, name, &value, errp); +} + +static void compare_set_expired_scan_cycle(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + CompareState *s = COLO_COMPARE(obj); + Error *local_err = NULL; + uint32_t value; + + visit_type_uint32(v, name, &value, &local_err); + if (local_err) { + goto out; + } + if (!value) { + error_setg(&local_err, "Property '%s.%s' requires a positive value", + object_get_typename(obj), name); + goto out; + } + s->expired_scan_cycle = value; + +out: + error_propagate(errp, local_err); +} + static void compare_pri_rs_finalize(SocketReadState *pri_rs) { CompareState *s = container_of(pri_rs, CompareState, pri_rs); @@ -XXX,XX +XXX,XX @@ static void colo_compare_complete(UserCreatable *uc, Error **errp) s->compare_timeout = DEFAULT_TIME_OUT_MS; } + if (!s->expired_scan_cycle) { + /* Set default value to 3000 MS */ + s->expired_scan_cycle = REGULAR_PACKET_CHECK_MS; + } + if (find_and_check_chardev(&chr, s->pri_indev, errp) || !qemu_chr_fe_init(&s->chr_pri_in, chr, errp)) { return; @@ -XXX,XX +XXX,XX @@ static void colo_compare_init(Object *obj) compare_get_timeout, compare_set_timeout, NULL, NULL, NULL); + object_property_add(obj, "expired_scan_cycle", "uint32", + compare_get_expired_scan_cycle, + compare_set_expired_scan_cycle, NULL, NULL, NULL); + s->vnet_hdr = false; object_property_add_bool(obj, "vnet_hdr_support", compare_get_vnet_hdr, compare_set_vnet_hdr, NULL); diff --git a/qemu-options.hx b/qemu-options.hx index XXXXXXX..XXXXXXX 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -XXX,XX +XXX,XX @@ SRST stored. The file format is libpcap, so it can be analyzed with tools such as tcpdump or Wireshark. - ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}]`` + ``-object colo-compare,id=id,primary_in=chardevid,secondary_in=chardevid,outdev=chardevid,iothread=id[,vnet_hdr_support][,notify_dev=id][,compare_timeout=@var{ms}][,expired_scan_cycle=@var{ms}`` Colo-compare gets packet from primary\_inchardevid and secondary\_inchardevid, than compare primary packet with secondary packet. If the packets are same, we will output @@ -XXX,XX +XXX,XX @@ SRST vnet\_hdr\_support flag, colo compare will send/recv packet with vnet\_hdr\_len. Then compare\_timeout=@var{ms} determines the maximum delay colo-compare wait for the packet. + The expired\_scan\_cycle=@var{ms} to set the period of scanning + expired primary node network packets. If you want to use Xen COLO, will need the notify\_dev to notify Xen colo-frame to do checkpoint. -- 2.5.0
From: Prasad J Pandit <pjp@fedoraproject.org> Tulip network driver while copying tx/rx buffers does not check frame size against r/w data length. This may lead to OOB buffer access. Add check to avoid it. Limit iterations over descriptors to avoid potential infinite loop issue in tulip_xmit_list_update. Reported-by: Li Qiang <pangpei.lq@antfin.com> Reported-by: Ziming Zhang <ezrakiez@gmail.com> Reported-by: Jason Wang <jasowang@redhat.com> Tested-by: Li Qiang <liq3ea@gmail.com> Reviewed-by: Li Qiang <liq3ea@gmail.com> Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/tulip.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/hw/net/tulip.c b/hw/net/tulip.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/tulip.c +++ b/hw/net/tulip.c @@ -XXX,XX +XXX,XX @@ static void tulip_copy_rx_bytes(TULIPState *s, struct tulip_descriptor *desc) } else { len = s->rx_frame_len; } + + if (s->rx_frame_len + len > sizeof(s->rx_frame)) { + return; + } pci_dma_write(&s->dev, desc->buf_addr1, s->rx_frame + (s->rx_frame_size - s->rx_frame_len), len); s->rx_frame_len -= len; @@ -XXX,XX +XXX,XX @@ static void tulip_copy_rx_bytes(TULIPState *s, struct tulip_descriptor *desc) } else { len = s->rx_frame_len; } + + if (s->rx_frame_len + len > sizeof(s->rx_frame)) { + return; + } pci_dma_write(&s->dev, desc->buf_addr2, s->rx_frame + (s->rx_frame_size - s->rx_frame_len), len); s->rx_frame_len -= len; @@ -XXX,XX +XXX,XX @@ static ssize_t tulip_receive(TULIPState *s, const uint8_t *buf, size_t size) trace_tulip_receive(buf, size); - if (size < 14 || size > 2048 || s->rx_frame_len || tulip_rx_stopped(s)) { + if (size < 14 || size > sizeof(s->rx_frame) - 4 + || s->rx_frame_len || tulip_rx_stopped(s)) { return 0; } @@ -XXX,XX +XXX,XX @@ static ssize_t tulip_receive_nc(NetClientState *nc, return tulip_receive(qemu_get_nic_opaque(nc), buf, size); } - static NetClientInfo net_tulip_info = { .type = NET_CLIENT_DRIVER_NIC, .size = sizeof(NICState), @@ -XXX,XX +XXX,XX @@ static void tulip_tx(TULIPState *s, struct tulip_descriptor *desc) if ((s->csr[6] >> CSR6_OM_SHIFT) & CSR6_OM_MASK) { /* Internal or external Loopback */ tulip_receive(s, s->tx_frame, s->tx_frame_len); - } else { + } else if (s->tx_frame_len <= sizeof(s->tx_frame)) { qemu_send_packet(qemu_get_queue(s->nic), s->tx_frame, s->tx_frame_len); } @@ -XXX,XX +XXX,XX @@ static void tulip_tx(TULIPState *s, struct tulip_descriptor *desc) } } -static void tulip_copy_tx_buffers(TULIPState *s, struct tulip_descriptor *desc) +static int tulip_copy_tx_buffers(TULIPState *s, struct tulip_descriptor *desc) { int len1 = (desc->control >> TDES1_BUF1_SIZE_SHIFT) & TDES1_BUF1_SIZE_MASK; int len2 = (desc->control >> TDES1_BUF2_SIZE_SHIFT) & TDES1_BUF2_SIZE_MASK; + if (s->tx_frame_len + len1 > sizeof(s->tx_frame)) { + return -1; + } if (len1) { pci_dma_read(&s->dev, desc->buf_addr1, s->tx_frame + s->tx_frame_len, len1); s->tx_frame_len += len1; } + if (s->tx_frame_len + len2 > sizeof(s->tx_frame)) { + return -1; + } if (len2) { pci_dma_read(&s->dev, desc->buf_addr2, s->tx_frame + s->tx_frame_len, len2); s->tx_frame_len += len2; } desc->status = (len1 + len2) ? 0 : 0x7fffffff; + + return 0; } static void tulip_setup_filter_addr(TULIPState *s, uint8_t *buf, int n) @@ -XXX,XX +XXX,XX @@ static uint32_t tulip_ts(TULIPState *s) static void tulip_xmit_list_update(TULIPState *s) { +#define TULIP_DESC_MAX 128 + uint8_t i = 0; struct tulip_descriptor desc; if (tulip_ts(s) != CSR5_TS_SUSPENDED) { return; } - for (;;) { + for (i = 0; i < TULIP_DESC_MAX; i++) { tulip_desc_read(s, s->current_tx_desc, &desc); tulip_dump_tx_descriptor(s, &desc); @@ -XXX,XX +XXX,XX @@ static void tulip_xmit_list_update(TULIPState *s) s->tx_frame_len = 0; } - tulip_copy_tx_buffers(s, &desc); - - if (desc.control & TDES1_LS) { - tulip_tx(s, &desc); + if (!tulip_copy_tx_buffers(s, &desc)) { + if (desc.control & TDES1_LS) { + tulip_tx(s, &desc); + } } } tulip_desc_write(s, s->current_tx_desc, &desc); -- 2.5.0
From: Peter Maydell <peter.maydell@linaro.org> Coverity points out (CID 1421926) that the read code for REG_ADDR_HIGH reads off the end of the buffer, because it does a 32-bit read from byte 4 of a 6-byte buffer. The code also has an endianness issue for both REG_ADDR_HIGH and REG_ADDR_LOW, because it will do the wrong thing on a big-endian host. Rewrite the read code to use ldl_le_p() and lduw_le_p() to fix this; the write code is not incorrect, but for consistency we make it use stl_le_p() and stw_le_p(). Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Tested-by: Niek Linnenbank <nieklinnenbank@gmail.com> Reviewed-by: Niek Linnenbank <nieklinnenbank@gmail.com> Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/allwinner-sun8i-emac.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/hw/net/allwinner-sun8i-emac.c b/hw/net/allwinner-sun8i-emac.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/allwinner-sun8i-emac.c +++ b/hw/net/allwinner-sun8i-emac.c @@ -XXX,XX +XXX,XX @@ static uint64_t allwinner_sun8i_emac_read(void *opaque, hwaddr offset, value = s->mii_data; break; case REG_ADDR_HIGH: /* MAC Address High */ - value = *(((uint32_t *) (s->conf.macaddr.a)) + 1); + value = lduw_le_p(s->conf.macaddr.a + 4); break; case REG_ADDR_LOW: /* MAC Address Low */ - value = *(uint32_t *) (s->conf.macaddr.a); + value = ldl_le_p(s->conf.macaddr.a); break; case REG_TX_DMA_STA: /* Transmit DMA Status */ break; @@ -XXX,XX +XXX,XX @@ static void allwinner_sun8i_emac_write(void *opaque, hwaddr offset, s->mii_data = value; break; case REG_ADDR_HIGH: /* MAC Address High */ - s->conf.macaddr.a[4] = (value & 0xff); - s->conf.macaddr.a[5] = (value & 0xff00) >> 8; + stw_le_p(s->conf.macaddr.a + 4, value); break; case REG_ADDR_LOW: /* MAC Address Low */ - s->conf.macaddr.a[0] = (value & 0xff); - s->conf.macaddr.a[1] = (value & 0xff00) >> 8; - s->conf.macaddr.a[2] = (value & 0xff0000) >> 16; - s->conf.macaddr.a[3] = (value & 0xff000000) >> 24; + stl_le_p(s->conf.macaddr.a, value); break; case REG_TX_DMA_STA: /* Transmit DMA Status */ case REG_TX_CUR_DESC: /* Transmit Current Descriptor */ -- 2.5.0
The following changes since commit 6dffbe36af79e26a4d23f94a9a1c1201de99c261: Merge tag 'migration-20230215-pull-request' of https://gitlab.com/juan.quintela/qemu into staging (2023-02-16 13:09:51 +0000) are available in the git repository at: https://github.com/jasowang/qemu.git tags/net-pull-request for you to fetch changes up to 525ae115222f0b0b6de7f9665976f640d18c200a: vdpa: fix VHOST_BACKEND_F_IOTLB_ASID flag check (2023-02-17 13:31:33 +0800) ---------------------------------------------------------------- Changes since V2: - drop patch hw/net/can/xlnx-zynqmp-can: fix assertion failures in transfer_fifo() Changes since V1: - Fix the wrong guest error detection in xlnx-zynqmp-can ---------------------------------------------------------------- Christian Svensson (1): net: Increase L2TPv3 buffer to fit jumboframes Eugenio Pérez (1): vdpa: fix VHOST_BACKEND_F_IOTLB_ASID flag check Fiona Ebner (1): hw/net/vmxnet3: allow VMXNET3_MAX_MTU itself as a value Joelle van Dyne (1): vmnet: stop recieving events when VM is stopped Laurent Vivier (1): net: stream: add a new option to automatically reconnect Qiang Liu (1): hw/net/lan9118: log [read|write]b when mode_16bit is enabled rather than abort Thomas Huth (3): net: Move the code to collect available NIC models to a separate function net: Restore printing of the help text with "-nic help" net: Replace "Supported NIC models" with "Available NIC models" hw/net/lan9118.c | 17 ++++---- hw/net/vmxnet3.c | 2 +- hw/pci/pci.c | 29 +------------ include/net/net.h | 14 ++++++ net/l2tpv3.c | 2 +- net/net.c | 50 ++++++++++++++++++++-- net/stream.c | 53 ++++++++++++++++++++++- net/vhost-vdpa.c | 2 +- net/vmnet-common.m | 48 +++++++++++++++------ net/vmnet_int.h | 2 + qapi/net.json | 7 ++- qemu-options.hx | 6 +-- tests/qtest/netdev-socket.c | 101 ++++++++++++++++++++++++++++++++++++++++++++ 13 files changed, 272 insertions(+), 61 deletions(-)
From: Thomas Huth <thuth@redhat.com> The code that collects the available NIC models is not really specific to PCI anymore and will be required in the next patch, too, so let's move this into a new separate function in net.c instead. Signed-off-by: Thomas Huth <thuth@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/pci/pci.c | 29 +---------------------------- include/net/net.h | 14 ++++++++++++++ net/net.c | 34 ++++++++++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 28 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index XXXXXXX..XXXXXXX 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -XXX,XX +XXX,XX @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus, const char *default_devaddr) { const char *devaddr = nd->devaddr ? nd->devaddr : default_devaddr; - GSList *list; GPtrArray *pci_nic_models; PCIBus *bus; PCIDevice *pci_dev; @@ -XXX,XX +XXX,XX @@ PCIDevice *pci_nic_init_nofail(NICInfo *nd, PCIBus *rootbus, nd->model = g_strdup("virtio-net-pci"); } - list = object_class_get_list_sorted(TYPE_PCI_DEVICE, false); - pci_nic_models = g_ptr_array_new(); - while (list) { - DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, list->data, - TYPE_DEVICE); - GSList *next; - if (test_bit(DEVICE_CATEGORY_NETWORK, dc->categories) && - dc->user_creatable) { - const char *name = object_class_get_name(list->data); - /* - * A network device might also be something else than a NIC, see - * e.g. the "rocker" device. Thus we have to look for the "netdev" - * property, too. Unfortunately, some devices like virtio-net only - * create this property during instance_init, so we have to create - * a temporary instance here to be able to check it. - */ - Object *obj = object_new_with_class(OBJECT_CLASS(dc)); - if (object_property_find(obj, "netdev")) { - g_ptr_array_add(pci_nic_models, (gpointer)name); - } - object_unref(obj); - } - next = list->next; - g_slist_free_1(list); - list = next; - } - g_ptr_array_add(pci_nic_models, NULL); + pci_nic_models = qemu_get_nic_models(TYPE_PCI_DEVICE); if (qemu_show_nic_models(nd->model, (const char **)pci_nic_models->pdata)) { exit(0); diff --git a/include/net/net.h b/include/net/net.h index XXXXXXX..XXXXXXX 100644 --- a/include/net/net.h +++ b/include/net/net.h @@ -XXX,XX +XXX,XX @@ void net_socket_rs_init(SocketReadState *rs, bool vnet_hdr); NetClientState *qemu_get_peer(NetClientState *nc, int queue_index); +/** + * qemu_get_nic_models: + * @device_type: Defines which devices should be taken into consideration + * (e.g. TYPE_DEVICE for all devices, or TYPE_PCI_DEVICE for PCI) + * + * Get an array of pointers to names of NIC devices that are available in + * the QEMU binary. The array is terminated with a NULL pointer entry. + * The caller is responsible for freeing the memory when it is not required + * anymore, e.g. with g_ptr_array_free(..., true). + * + * Returns: Pointer to the array that contains the pointers to the names. + */ +GPtrArray *qemu_get_nic_models(const char *device_type); + /* NIC info */ #define MAX_NICS 8 diff --git a/net/net.c b/net/net.c index XXXXXXX..XXXXXXX 100644 --- a/net/net.c +++ b/net/net.c @@ -XXX,XX +XXX,XX @@ static int nic_get_free_idx(void) return -1; } +GPtrArray *qemu_get_nic_models(const char *device_type) +{ + GPtrArray *nic_models = g_ptr_array_new(); + GSList *list = object_class_get_list_sorted(device_type, false); + + while (list) { + DeviceClass *dc = OBJECT_CLASS_CHECK(DeviceClass, list->data, + TYPE_DEVICE); + GSList *next; + if (test_bit(DEVICE_CATEGORY_NETWORK, dc->categories) && + dc->user_creatable) { + const char *name = object_class_get_name(list->data); + /* + * A network device might also be something else than a NIC, see + * e.g. the "rocker" device. Thus we have to look for the "netdev" + * property, too. Unfortunately, some devices like virtio-net only + * create this property during instance_init, so we have to create + * a temporary instance here to be able to check it. + */ + Object *obj = object_new_with_class(OBJECT_CLASS(dc)); + if (object_property_find(obj, "netdev")) { + g_ptr_array_add(nic_models, (gpointer)name); + } + object_unref(obj); + } + next = list->next; + g_slist_free_1(list); + list = next; + } + g_ptr_array_add(nic_models, NULL); + + return nic_models; +} + int qemu_show_nic_models(const char *arg, const char *const *models) { int i; -- 2.7.4
From: Thomas Huth <thuth@redhat.com> Running QEMU with "-nic help" used to work in QEMU 5.2 and earlier versions (it showed the available netdev backends), but this feature got broken during some refactoring in version 6.0. Let's restore the old behavior, and while we're at it, let's also print the available NIC models here now since this option can be used to configure both, netdev backend and model in one go. Fixes: ad6f932fe8 ("net: do not exit on "netdev_add help" monitor command") Signed-off-by: Thomas Huth <thuth@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- net/net.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/net.c b/net/net.c index XXXXXXX..XXXXXXX 100644 --- a/net/net.c +++ b/net/net.c @@ -XXX,XX +XXX,XX @@ static int net_param_nic(void *dummy, QemuOpts *opts, Error **errp) const char *type; type = qemu_opt_get(opts, "type"); - if (type && g_str_equal(type, "none")) { - return 0; /* Nothing to do, default_net is cleared in vl.c */ + if (type) { + if (g_str_equal(type, "none")) { + return 0; /* Nothing to do, default_net is cleared in vl.c */ + } + if (is_help_option(type)) { + GPtrArray *nic_models = qemu_get_nic_models(TYPE_DEVICE); + show_netdevs(); + printf("\n"); + qemu_show_nic_models(type, (const char **)nic_models->pdata); + g_ptr_array_free(nic_models, true); + exit(0); + } } idx = nic_get_free_idx(); -- 2.7.4
From: Thomas Huth <thuth@redhat.com> Just because a NIC model is compiled into the QEMU binary does not necessary mean that it can be used with each and every machine. So let's rather talk about "available" models instead of "supported" models, just to avoid confusion. Reviewed-by: Claudio Fontana <cfontana@suse.de> Signed-off-by: Thomas Huth <thuth@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- net/net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/net.c b/net/net.c index XXXXXXX..XXXXXXX 100644 --- a/net/net.c +++ b/net/net.c @@ -XXX,XX +XXX,XX @@ int qemu_show_nic_models(const char *arg, const char *const *models) return 0; } - printf("Supported NIC models:\n"); + printf("Available NIC models:\n"); for (i = 0 ; models[i]; i++) { printf("%s\n", models[i]); } -- 2.7.4
From: Qiang Liu <cyruscyliu@gmail.com> This patch replaces hw_error to guest error log for [read|write]b accesses when mode_16bit is enabled. This avoids aborting qemu. Fixes: 1248f8d4cbc3 ("hw/lan9118: Add basic 16-bit mode support.") Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1433 Reported-by: Qiang Liu <cyruscyliu@gmail.com> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Signed-off-by: Qiang Liu <cyruscyliu@gmail.com> Suggested-by: Philippe Mathieu-Daudé <philmd@linaro.org> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/lan9118.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/hw/net/lan9118.c b/hw/net/lan9118.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/lan9118.c +++ b/hw/net/lan9118.c @@ -XXX,XX +XXX,XX @@ #include "migration/vmstate.h" #include "net/net.h" #include "net/eth.h" -#include "hw/hw.h" #include "hw/irq.h" #include "hw/net/lan9118.h" #include "hw/ptimer.h" @@ -XXX,XX +XXX,XX @@ #ifdef DEBUG_LAN9118 #define DPRINTF(fmt, ...) \ do { printf("lan9118: " fmt , ## __VA_ARGS__); } while (0) -#define BADF(fmt, ...) \ -do { hw_error("lan9118: error: " fmt , ## __VA_ARGS__);} while (0) #else #define DPRINTF(fmt, ...) do {} while(0) -#define BADF(fmt, ...) \ -do { fprintf(stderr, "lan9118: error: " fmt , ## __VA_ARGS__);} while (0) #endif /* The tx and rx fifo ports are a range of aliased 32-bit registers */ @@ -XXX,XX +XXX,XX @@ static uint32_t do_phy_read(lan9118_state *s, int reg) case 30: /* Interrupt mask */ return s->phy_int_mask; default: - BADF("PHY read reg %d\n", reg); + qemu_log_mask(LOG_GUEST_ERROR, + "do_phy_read: PHY read reg %d\n", reg); return 0; } } @@ -XXX,XX +XXX,XX @@ static void do_phy_write(lan9118_state *s, int reg, uint32_t val) phy_update_irq(s); break; default: - BADF("PHY write reg %d = 0x%04x\n", reg, val); + qemu_log_mask(LOG_GUEST_ERROR, + "do_phy_write: PHY write reg %d = 0x%04x\n", reg, val); } } @@ -XXX,XX +XXX,XX @@ static void lan9118_16bit_mode_write(void *opaque, hwaddr offset, return; } - hw_error("lan9118_write: Bad size 0x%x\n", size); + qemu_log_mask(LOG_GUEST_ERROR, + "lan9118_16bit_mode_write: Bad size 0x%x\n", size); } static uint64_t lan9118_readl(void *opaque, hwaddr offset, @@ -XXX,XX +XXX,XX @@ static uint64_t lan9118_16bit_mode_read(void *opaque, hwaddr offset, return lan9118_readl(opaque, offset, size); } - hw_error("lan9118_read: Bad size 0x%x\n", size); + qemu_log_mask(LOG_GUEST_ERROR, + "lan9118_16bit_mode_read: Bad size 0x%x\n", size); return 0; } -- 2.7.4
From: Fiona Ebner <f.ebner@proxmox.com> Currently, VMXNET3_MAX_MTU itself (being 9000) is not considered a valid value for the MTU, but a guest running ESXi 7.0 might try to set it and fail the assert [0]. In the Linux kernel, dev->max_mtu itself is a valid value for the MTU and for the vmxnet3 driver it's 9000, so a guest running Linux will also fail the assert when trying to set an MTU of 9000. VMXNET3_MAX_MTU and s->mtu don't seem to be used in relation to buffer allocations/accesses, so allowing the upper limit itself as a value should be fine. [0]: https://forum.proxmox.com/threads/114011/ Fixes: d05dcd94ae ("net: vmxnet3: validate configuration values during activate (CVE-2021-20203)") Signed-off-by: Fiona Ebner <f.ebner@proxmox.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- hw/net/vmxnet3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/vmxnet3.c +++ b/hw/net/vmxnet3.c @@ -XXX,XX +XXX,XX @@ static void vmxnet3_activate_device(VMXNET3State *s) vmxnet3_setup_rx_filtering(s); /* Cache fields from shared memory */ s->mtu = VMXNET3_READ_DRV_SHARED32(d, s->drv_shmem, devRead.misc.mtu); - assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu < VMXNET3_MAX_MTU); + assert(VMXNET3_MIN_MTU <= s->mtu && s->mtu <= VMXNET3_MAX_MTU); VMW_CFPRN("MTU is %u", s->mtu); s->max_rx_frags = -- 2.7.4
From: Christian Svensson <blue@cmd.nu> Increase the allocated buffer size to fit larger packets. Given that jumboframes can commonly be up to 9000 bytes the closest suitable value seems to be 16 KiB. Tested by running qemu towards a Linux L2TPv3 endpoint and pushing jumboframe traffic through the interfaces. Signed-off-by: Christian Svensson <blue@cmd.nu> Signed-off-by: Jason Wang <jasowang@redhat.com> --- net/l2tpv3.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/l2tpv3.c b/net/l2tpv3.c index XXXXXXX..XXXXXXX 100644 --- a/net/l2tpv3.c +++ b/net/l2tpv3.c @@ -XXX,XX +XXX,XX @@ */ #define BUFFER_ALIGN sysconf(_SC_PAGESIZE) -#define BUFFER_SIZE 2048 +#define BUFFER_SIZE 16384 #define IOVSIZE 2 #define MAX_L2TPV3_MSGCNT 64 #define MAX_L2TPV3_IOVCNT (MAX_L2TPV3_MSGCNT * IOVSIZE) -- 2.7.4
From: Joelle van Dyne <j@getutm.app> When the VM is stopped using the HMP command "stop", soon the handler will stop reading from the vmnet interface. This causes a flood of `VMNET_INTERFACE_PACKETS_AVAILABLE` events to arrive and puts the host CPU at 100%. We fix this by removing the event handler from vmnet when the VM is no longer in a running state and restore it when we return to a running state. Signed-off-by: Joelle van Dyne <j@getutm.app> Signed-off-by: Jason Wang <jasowang@redhat.com> --- net/vmnet-common.m | 48 +++++++++++++++++++++++++++++++++++------------- net/vmnet_int.h | 2 ++ 2 files changed, 37 insertions(+), 13 deletions(-) diff --git a/net/vmnet-common.m b/net/vmnet-common.m index XXXXXXX..XXXXXXX 100644 --- a/net/vmnet-common.m +++ b/net/vmnet-common.m @@ -XXX,XX +XXX,XX @@ #include "clients.h" #include "qemu/error-report.h" #include "qapi/error.h" +#include "sysemu/runstate.h" #include <vmnet/vmnet.h> #include <dispatch/dispatch.h> @@ -XXX,XX +XXX,XX @@ static void vmnet_bufs_init(VmnetState *s) } } +/** + * Called on state change to un-register/re-register handlers + */ +static void vmnet_vm_state_change_cb(void *opaque, bool running, RunState state) +{ + VmnetState *s = opaque; + + if (running) { + vmnet_interface_set_event_callback( + s->vmnet_if, + VMNET_INTERFACE_PACKETS_AVAILABLE, + s->if_queue, + ^(interface_event_t event_id, xpc_object_t event) { + assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE); + /* + * This function is being called from a non qemu thread, so + * we only schedule a BH, and do the rest of the io completion + * handling from vmnet_send_bh() which runs in a qemu context. + */ + qemu_bh_schedule(s->send_bh); + }); + } else { + vmnet_interface_set_event_callback( + s->vmnet_if, + VMNET_INTERFACE_PACKETS_AVAILABLE, + NULL, + NULL); + } +} int vmnet_if_create(NetClientState *nc, xpc_object_t if_desc, @@ -XXX,XX +XXX,XX @@ int vmnet_if_create(NetClientState *nc, s->packets_send_current_pos = 0; s->packets_send_end_pos = 0; - vmnet_interface_set_event_callback( - s->vmnet_if, - VMNET_INTERFACE_PACKETS_AVAILABLE, - s->if_queue, - ^(interface_event_t event_id, xpc_object_t event) { - assert(event_id == VMNET_INTERFACE_PACKETS_AVAILABLE); - /* - * This function is being called from a non qemu thread, so - * we only schedule a BH, and do the rest of the io completion - * handling from vmnet_send_bh() which runs in a qemu context. - */ - qemu_bh_schedule(s->send_bh); - }); + vmnet_vm_state_change_cb(s, 1, RUN_STATE_RUNNING); + + s->change = qemu_add_vm_change_state_handler(vmnet_vm_state_change_cb, s); return 0; } @@ -XXX,XX +XXX,XX @@ void vmnet_cleanup_common(NetClientState *nc) return; } + vmnet_vm_state_change_cb(s, 0, RUN_STATE_SHUTDOWN); + qemu_del_vm_change_state_handler(s->change); if_stopped_sem = dispatch_semaphore_create(0); vmnet_stop_interface( s->vmnet_if, diff --git a/net/vmnet_int.h b/net/vmnet_int.h index XXXXXXX..XXXXXXX 100644 --- a/net/vmnet_int.h +++ b/net/vmnet_int.h @@ -XXX,XX +XXX,XX @@ typedef struct VmnetState { int packets_send_end_pos; struct iovec iov_buf[VMNET_PACKETS_LIMIT]; + + VMChangeStateEntry *change; } VmnetState; const char *vmnet_status_map_str(vmnet_return_t status); -- 2.7.4
From: Laurent Vivier <lvivier@redhat.com> In stream mode, if the server shuts down there is currently no way to reconnect the client to a new server without removing the NIC device and the netdev backend (or to reboot). This patch introduces a reconnect option that specifies a delay to try to reconnect with the same parameters. Add a new test in qtest to test the reconnect option and the connect/disconnect events. Signed-off-by: Laurent Vivier <lvivier@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- net/stream.c | 53 ++++++++++++++++++++++- qapi/net.json | 7 ++- qemu-options.hx | 6 +-- tests/qtest/netdev-socket.c | 101 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 162 insertions(+), 5 deletions(-) diff --git a/net/stream.c b/net/stream.c index XXXXXXX..XXXXXXX 100644 --- a/net/stream.c +++ b/net/stream.c @@ -XXX,XX +XXX,XX @@ #include "io/channel-socket.h" #include "io/net-listener.h" #include "qapi/qapi-events-net.h" +#include "qapi/qapi-visit-sockets.h" +#include "qapi/clone-visitor.h" typedef struct NetStreamState { NetClientState nc; @@ -XXX,XX +XXX,XX @@ typedef struct NetStreamState { guint ioc_write_tag; SocketReadState rs; unsigned int send_index; /* number of bytes sent*/ + uint32_t reconnect; + guint timer_tag; + SocketAddress *addr; } NetStreamState; static void net_stream_listen(QIONetListener *listener, QIOChannelSocket *cioc, void *opaque); +static void net_stream_arm_reconnect(NetStreamState *s); static gboolean net_stream_writable(QIOChannel *ioc, GIOCondition condition, @@ -XXX,XX +XXX,XX @@ static gboolean net_stream_send(QIOChannel *ioc, qemu_set_info_str(&s->nc, "%s", ""); qapi_event_send_netdev_stream_disconnected(s->nc.name); + net_stream_arm_reconnect(s); return G_SOURCE_REMOVE; } @@ -XXX,XX +XXX,XX @@ static gboolean net_stream_send(QIOChannel *ioc, static void net_stream_cleanup(NetClientState *nc) { NetStreamState *s = DO_UPCAST(NetStreamState, nc, nc); + if (s->timer_tag) { + g_source_remove(s->timer_tag); + s->timer_tag = 0; + } + if (s->addr) { + qapi_free_SocketAddress(s->addr); + s->addr = NULL; + } if (s->ioc) { if (QIO_CHANNEL_SOCKET(s->ioc)->fd != -1) { if (s->ioc_read_tag) { @@ -XXX,XX +XXX,XX @@ static void net_stream_client_connected(QIOTask *task, gpointer opaque) error: object_unref(OBJECT(s->ioc)); s->ioc = NULL; + net_stream_arm_reconnect(s); +} + +static gboolean net_stream_reconnect(gpointer data) +{ + NetStreamState *s = data; + QIOChannelSocket *sioc; + + s->timer_tag = 0; + + sioc = qio_channel_socket_new(); + s->ioc = QIO_CHANNEL(sioc); + qio_channel_socket_connect_async(sioc, s->addr, + net_stream_client_connected, s, + NULL, NULL); + return G_SOURCE_REMOVE; +} + +static void net_stream_arm_reconnect(NetStreamState *s) +{ + if (s->reconnect && s->timer_tag == 0) { + s->timer_tag = g_timeout_add_seconds(s->reconnect, + net_stream_reconnect, s); + } } static int net_stream_client_init(NetClientState *peer, const char *model, const char *name, SocketAddress *addr, + uint32_t reconnect, Error **errp) { NetStreamState *s; @@ -XXX,XX +XXX,XX @@ static int net_stream_client_init(NetClientState *peer, s->ioc = QIO_CHANNEL(sioc); s->nc.link_down = true; + s->reconnect = reconnect; + if (reconnect) { + s->addr = QAPI_CLONE(SocketAddress, addr); + } qio_channel_socket_connect_async(sioc, addr, net_stream_client_connected, s, NULL, NULL); @@ -XXX,XX +XXX,XX @@ int net_init_stream(const Netdev *netdev, const char *name, sock = &netdev->u.stream; if (!sock->has_server || !sock->server) { - return net_stream_client_init(peer, "stream", name, sock->addr, errp); + return net_stream_client_init(peer, "stream", name, sock->addr, + sock->has_reconnect ? sock->reconnect : 0, + errp); + } + if (sock->has_reconnect) { + error_setg(errp, "'reconnect' option is incompatible with " + "socket in server mode"); + return -1; } return net_stream_server_init(peer, "stream", name, sock->addr, errp); } diff --git a/qapi/net.json b/qapi/net.json index XXXXXXX..XXXXXXX 100644 --- a/qapi/net.json +++ b/qapi/net.json @@ -XXX,XX +XXX,XX @@ # @addr: socket address to listen on (server=true) # or connect to (server=false) # @server: create server socket (default: false) +# @reconnect: For a client socket, if a socket is disconnected, +# then attempt a reconnect after the given number of seconds. +# Setting this to zero disables this function. (default: 0) +# (since 8.0) # # Only SocketAddress types 'unix', 'inet' and 'fd' are supported. # @@ -XXX,XX +XXX,XX @@ { 'struct': 'NetdevStreamOptions', 'data': { 'addr': 'SocketAddress', - '*server': 'bool' } } + '*server': 'bool', + '*reconnect': 'uint32' } } ## # @NetdevDgramOptions: diff --git a/qemu-options.hx b/qemu-options.hx index XXXXXXX..XXXXXXX 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -XXX,XX +XXX,XX @@ DEF("netdev", HAS_ARG, QEMU_OPTION_netdev, "-netdev socket,id=str[,fd=h][,udp=host:port][,localaddr=host:port]\n" " configure a network backend to connect to another network\n" " using an UDP tunnel\n" - "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off]\n" - "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off]\n" - "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor\n" + "-netdev stream,id=str[,server=on|off],addr.type=inet,addr.host=host,addr.port=port[,to=maxport][,numeric=on|off][,keep-alive=on|off][,mptcp=on|off][,addr.ipv4=on|off][,addr.ipv6=on|off][,reconnect=seconds]\n" + "-netdev stream,id=str[,server=on|off],addr.type=unix,addr.path=path[,abstract=on|off][,tight=on|off][,reconnect=seconds]\n" + "-netdev stream,id=str[,server=on|off],addr.type=fd,addr.str=file-descriptor[,reconnect=seconds]\n" " configure a network backend to connect to another network\n" " using a socket connection in stream mode.\n" "-netdev dgram,id=str,remote.type=inet,remote.host=maddr,remote.port=port[,local.type=inet,local.host=addr]\n" diff --git a/tests/qtest/netdev-socket.c b/tests/qtest/netdev-socket.c index XXXXXXX..XXXXXXX 100644 --- a/tests/qtest/netdev-socket.c +++ b/tests/qtest/netdev-socket.c @@ -XXX,XX +XXX,XX @@ #include <glib/gstdio.h> #include "../unit/socket-helpers.h" #include "libqtest.h" +#include "qapi/qmp/qstring.h" +#include "qemu/sockets.h" +#include "qapi/qobject-input-visitor.h" +#include "qapi/qapi-visit-sockets.h" #define CONNECTION_TIMEOUT 60 @@ -XXX,XX +XXX,XX @@ static void test_stream_inet_ipv4(void) qtest_quit(qts0); } +static void wait_stream_connected(QTestState *qts, const char *id, + SocketAddress **addr) +{ + QDict *resp, *data; + QString *qstr; + QObject *obj; + Visitor *v = NULL; + + resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_CONNECTED"); + g_assert_nonnull(resp); + data = qdict_get_qdict(resp, "data"); + g_assert_nonnull(data); + + qstr = qobject_to(QString, qdict_get(data, "netdev-id")); + g_assert_nonnull(data); + + g_assert(!strcmp(qstring_get_str(qstr), id)); + + obj = qdict_get(data, "addr"); + + v = qobject_input_visitor_new(obj); + visit_type_SocketAddress(v, NULL, addr, NULL); + visit_free(v); + qobject_unref(resp); +} + +static void wait_stream_disconnected(QTestState *qts, const char *id) +{ + QDict *resp, *data; + QString *qstr; + + resp = qtest_qmp_eventwait_ref(qts, "NETDEV_STREAM_DISCONNECTED"); + g_assert_nonnull(resp); + data = qdict_get_qdict(resp, "data"); + g_assert_nonnull(data); + + qstr = qobject_to(QString, qdict_get(data, "netdev-id")); + g_assert_nonnull(data); + + g_assert(!strcmp(qstring_get_str(qstr), id)); + qobject_unref(resp); +} + +static void test_stream_inet_reconnect(void) +{ + QTestState *qts0, *qts1; + int port; + SocketAddress *addr; + + port = inet_get_free_port(false); + qts0 = qtest_initf("-nodefaults -M none " + "-netdev stream,id=st0,server=true,addr.type=inet," + "addr.ipv4=on,addr.ipv6=off," + "addr.host=127.0.0.1,addr.port=%d", port); + + EXPECT_STATE(qts0, "st0: index=0,type=stream,\r\n", 0); + + qts1 = qtest_initf("-nodefaults -M none " + "-netdev stream,server=false,id=st0,addr.type=inet," + "addr.ipv4=on,addr.ipv6=off,reconnect=1," + "addr.host=127.0.0.1,addr.port=%d", port); + + wait_stream_connected(qts0, "st0", &addr); + g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); + g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); + qapi_free_SocketAddress(addr); + + /* kill server */ + qtest_quit(qts0); + + /* check client has been disconnected */ + wait_stream_disconnected(qts1, "st0"); + + /* restart server */ + qts0 = qtest_initf("-nodefaults -M none " + "-netdev stream,id=st0,server=true,addr.type=inet," + "addr.ipv4=on,addr.ipv6=off," + "addr.host=127.0.0.1,addr.port=%d", port); + + /* wait connection events*/ + wait_stream_connected(qts0, "st0", &addr); + g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); + g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); + qapi_free_SocketAddress(addr); + + wait_stream_connected(qts1, "st0", &addr); + g_assert_cmpint(addr->type, ==, SOCKET_ADDRESS_TYPE_INET); + g_assert_cmpstr(addr->u.inet.host, ==, "127.0.0.1"); + g_assert_cmpint(atoi(addr->u.inet.port), ==, port); + qapi_free_SocketAddress(addr); + + qtest_quit(qts1); + qtest_quit(qts0); +} + static void test_stream_inet_ipv6(void) { QTestState *qts0, *qts1; @@ -XXX,XX +XXX,XX @@ int main(int argc, char **argv) #ifndef _WIN32 qtest_add_func("/netdev/dgram/mcast", test_dgram_mcast); #endif + qtest_add_func("/netdev/stream/inet/reconnect", + test_stream_inet_reconnect); } if (has_ipv6) { qtest_add_func("/netdev/stream/inet/ipv6", test_stream_inet_ipv6); -- 2.7.4
From: Eugenio Pérez <eperezma@redhat.com> VHOST_BACKEND_F_IOTLB_ASID is the feature bit, not the bitmask. Since the device under test also provided VHOST_BACKEND_F_IOTLB_MSG_V2 and VHOST_BACKEND_F_IOTLB_BATCH, this went unnoticed. Fixes: c1a1008685 ("vdpa: always start CVQ in SVQ mode if possible") Signed-off-by: Eugenio Pérez <eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin <mst@redhat.com> Acked-by: Jason Wang <jasowang@redhat.com> Signed-off-by: Jason Wang <jasowang@redhat.com> --- net/vhost-vdpa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index XXXXXXX..XXXXXXX 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -XXX,XX +XXX,XX @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) g_strerror(errno), errno); return -1; } - if (!(backend_features & VHOST_BACKEND_F_IOTLB_ASID) || + if (!(backend_features & BIT_ULL(VHOST_BACKEND_F_IOTLB_ASID)) || !vhost_vdpa_net_valid_svq_features(v->dev->features, NULL)) { return 0; } -- 2.7.4