Series comparison

-[PULL 0/5] Net patches
+[PULL 00/12] Net patches
-The following changes since commit 23895cbd82be95428e90168b12e925d0d3ca2f06:
+The following changes since commit e3debd5e7d0ce031356024878a0a18b9d109354a:
-  Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20201123.0' into staging (2020-11-23 18:51:13 +0000)
+  Merge tag 'pull-request-2023-03-24' of https://gitlab.com/thuth/qemu into staging (2023-03-24 16:08:46 +0000)
 are available in the git repository at:
   https://github.com/jasowang/qemu.git tags/net-pull-request
-for you to fetch changes up to 9925990d01a92564af55f6f69d0f5f59b47609b1:
+for you to fetch changes up to fba7c3b788dfcb99a3f9253f7d99cc0d217d6d3c:
-  net: Use correct default-path macro for downscript (2020-11-24 10:40:17 +0800)
+  igb: respect VMVIR and VMOLR for VLAN (2023-03-28 13:10:55 +0800)
 ----------------------------------------------------------------
 ----------------------------------------------------------------
-Keqian Zhu (1):
+Akihiko Odaki (4):
-      net: Use correct default-path macro for downscript
+      igb: Save more Tx states
       igb: Fix DMA requester specification for Tx packet
       hw/net/net_tx_pkt: Ignore ECN bit
       hw/net/net_tx_pkt: Align l3_hdr
-Paolo Bonzini (1):
+Sriram Yagnaraman (8):
-      net: do not exit on "netdev_add help" monitor command
+      MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer
       igb: handle PF/VF reset properly
       igb: add ICR_RXDW
       igb: implement VFRE and VFTE registers
       igb: check oversized packets for VMDq
       igb: respect E1000_VMOLR_RSSE
       igb: implement VF Tx and Rx stats
       igb: respect VMVIR and VMOLR for VLAN
-Prasad J Pandit (1):
+ MAINTAINERS          |   1 +
-      hw/net/e1000e: advance desc_offset in case of null descriptor
+ hw/net/e1000e_core.c |   6 +-
+ hw/net/e1000x_regs.h |   4 +
-Yuri Benditovich (1):
+ hw/net/igb.c         |  26 ++++--
-      net: purge queued rx packets on queue deletion
+ hw/net/igb_core.c    | 256 ++++++++++++++++++++++++++++++++++++++-------------
+ hw/net/igb_core.h    |   9 +-
-yuanjungong (1):
+ hw/net/igb_regs.h    |   6 ++
-      tap: fix a memory leak
+ hw/net/net_tx_pkt.c  |  30 +++---
+ hw/net/net_tx_pkt.h  |   3 +-
- hw/net/e1000e_core.c |  8 +++---
+ hw/net/trace-events  |   2 +
- include/net/net.h    |  1 +
+ hw/net/vmxnet3.c     |   4 +-
- monitor/hmp-cmds.c   |  6 ++++
+files changed, 254 insertions(+), 93 deletions(-)
  net/net.c            | 80 +++++++++++++++++++++++++++-------------------------
  net/tap.c            |  5 +++-
 files changed, 57 insertions(+), 43 deletions(-)

-New patch
+[PULL 01/12] igb: Save more Tx states
+From: Akihiko Odaki <akihiko.odaki@daynix.com>
+The current implementation of igb uses only part of a advanced Tx
+context descriptor and first data descriptor because it misses some
+features and sniffs the trait of the packet instead of respecting the
+packet type specified in the descriptor. However, we will certainly
+need the entire Tx context descriptor when we update igb to respect
+these ignored fields. Save the entire context descriptor and first
+data descriptor except the buffer address to prepare for such a change.
+This also introduces the distinction of contexts with different
+indexes, which was not present in e1000e but in igb.
+Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
+Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/igb.c      | 26 +++++++++++++++++++-------
+ hw/net/igb_core.c | 39 +++++++++++++++++++--------------------
+ hw/net/igb_core.h |  8 +++-----
+files changed, 41 insertions(+), 32 deletions(-)
+diff --git a/hw/net/igb.c b/hw/net/igb.c
+index XXXXXXX..XXXXXXX 100644
+--- a/hw/net/igb.c
++++ b/hw/net/igb.c
+@@ -XXX,XX +XXX,XX @@ static int igb_post_load(void *opaque, int version_id)
+     return igb_core_post_load(&s->core);
+ }
+-static const VMStateDescription igb_vmstate_tx = {
+-    .name = "igb-tx",
++static const VMStateDescription igb_vmstate_tx_ctx = {
++    .name = "igb-tx-ctx",
+     .version_id = 1,
+     .minimum_version_id = 1,
+     .fields = (VMStateField[]) {
+-        VMSTATE_UINT16(vlan, struct igb_tx),
+-        VMSTATE_UINT16(mss, struct igb_tx),
+-        VMSTATE_BOOL(tse, struct igb_tx),
+-        VMSTATE_BOOL(ixsm, struct igb_tx),
+-        VMSTATE_BOOL(txsm, struct igb_tx),
++        VMSTATE_UINT32(vlan_macip_lens, struct e1000_adv_tx_context_desc),
++        VMSTATE_UINT32(seqnum_seed, struct e1000_adv_tx_context_desc),
++        VMSTATE_UINT32(type_tucmd_mlhl, struct e1000_adv_tx_context_desc),
++        VMSTATE_UINT32(mss_l4len_idx, struct e1000_adv_tx_context_desc),
++        VMSTATE_END_OF_LIST()
++    }
++};
++
++static const VMStateDescription igb_vmstate_tx = {
++    .name = "igb-tx",
++    .version_id = 2,
++    .minimum_version_id = 2,
++    .fields = (VMStateField[]) {
++        VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0, igb_vmstate_tx_ctx,
++                             struct e1000_adv_tx_context_desc),
++        VMSTATE_UINT32(first_cmd_type_len, struct igb_tx),
++        VMSTATE_UINT32(first_olinfo_status, struct igb_tx),
+         VMSTATE_BOOL(first, struct igb_tx),
+         VMSTATE_BOOL(skip_cp, struct igb_tx),
+         VMSTATE_END_OF_LIST()
+diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
+index XXXXXXX..XXXXXXX 100644
+--- a/hw/net/igb_core.c
++++ b/hw/net/igb_core.c
+@@ -XXX,XX +XXX,XX @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx,
+ static bool
+ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
+ {
+-    if (tx->tse) {
+-        if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) {
++    if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
++        uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
++        uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16;
++        if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) {
+             return false;
+         }
+@@ -XXX,XX +XXX,XX @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
+         return true;
+     }
+-    if (tx->txsm) {
++    if (tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) {
+         if (!net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0)) {
+             return false;
+         }
+     }
+-    if (tx->ixsm) {
++    if (tx->first_olinfo_status & E1000_ADVTXD_POTS_IXSM) {
+         net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt);
+     }
+@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
+ {
+     struct e1000_adv_tx_context_desc *tx_ctx_desc;
+     uint32_t cmd_type_len;
+-    uint32_t olinfo_status;
++    uint32_t idx;
+     uint64_t buffer_addr;
+     uint16_t length;
+@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
+             E1000_ADVTXD_DTYP_DATA) {
+             /* advanced transmit data descriptor */
+             if (tx->first) {
+-                olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
+-
+-                tx->tse = !!(cmd_type_len & E1000_ADVTXD_DCMD_TSE);
+-                tx->ixsm = !!(olinfo_status & E1000_ADVTXD_POTS_IXSM);
+-                tx->txsm = !!(olinfo_status & E1000_ADVTXD_POTS_TXSM);
+-
++                tx->first_cmd_type_len = cmd_type_len;
++                tx->first_olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
+                 tx->first = false;
+             }
+         } else if ((cmd_type_len & E1000_ADVTXD_DTYP_CTXT) ==
+                    E1000_ADVTXD_DTYP_CTXT) {
+             /* advanced transmit context descriptor */
+             tx_ctx_desc = (struct e1000_adv_tx_context_desc *)tx_desc;
+-            tx->vlan = le32_to_cpu(tx_ctx_desc->vlan_macip_lens) >> 16;
+-            tx->mss = le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 16;
++            idx = (le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 4) & 1;
++            tx->ctx[idx].vlan_macip_lens = le32_to_cpu(tx_ctx_desc->vlan_macip_lens);
++            tx->ctx[idx].seqnum_seed = le32_to_cpu(tx_ctx_desc->seqnum_seed);
++            tx->ctx[idx].type_tucmd_mlhl = le32_to_cpu(tx_ctx_desc->type_tucmd_mlhl);
++            tx->ctx[idx].mss_l4len_idx = le32_to_cpu(tx_ctx_desc->mss_l4len_idx);
+             return;
+         } else {
+             /* unknown descriptor type */
+@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
+     if (cmd_type_len & E1000_TXD_CMD_EOP) {
+         if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
+             if (cmd_type_len & E1000_TXD_CMD_VLE) {
+-                net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan,
+-                    core->mac[VET] & 0xffff);
++                idx = (tx->first_olinfo_status >> 4) & 1;
++                uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
++                uint16_t vet = core->mac[VET] & 0xffff;
++                net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
+             }
+             if (igb_tx_pkt_send(core, tx, queue_index)) {
+                 igb_on_tx_done_update_stats(core, tx->tx_pkt);
+@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
+     for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
+         tx = &core->tx[i];
+         net_tx_pkt_reset(tx->tx_pkt);
+-        tx->vlan = 0;
+-        tx->mss = 0;
+-        tx->tse = false;
+-        tx->ixsm = false;
+-        tx->txsm = false;
++        memset(tx->ctx, 0, sizeof(tx->ctx));
+         tx->first = true;
+         tx->skip_cp = false;
+     }
+diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h
+index XXXXXXX..XXXXXXX 100644
+--- a/hw/net/igb_core.h
++++ b/hw/net/igb_core.h
+@@ -XXX,XX +XXX,XX @@ struct IGBCore {
+     QEMUTimer *autoneg_timer;
+     struct igb_tx {
+-        uint16_t vlan;  /* VLAN Tag */
+-        uint16_t mss;   /* Maximum Segment Size */
+-        bool tse;       /* TCP/UDP Segmentation Enable */
+-        bool ixsm;      /* Insert IP Checksum */
+-        bool txsm;      /* Insert TCP/UDP Checksum */
++        struct e1000_adv_tx_context_desc ctx[2];
++        uint32_t first_cmd_type_len;
++        uint32_t first_olinfo_status;
+         bool first;
+         bool skip_cp;
+--
+.7.4

-[PULL 1/5] hw/net/e1000e: advance desc_offset in case of null descriptor
+[PULL 02/12] igb: Fix DMA requester specification for Tx packet
-From: Prasad J Pandit <pjp@fedoraproject.org>
+From: Akihiko Odaki <akihiko.odaki@daynix.com>
-While receiving packets via e1000e_write_packet_to_guest() routine,
+igb used to specify the PF as DMA requester when reading Tx packets.
-'desc_offset' is advanced only when RX descriptor is processed. And
+This made Tx requests from VFs to be performed on the address space of
-RX descriptor is not processed if it has NULL buffer address.
+the PF, defeating the purpose of SR-IOV. Add some logic to change the
-This may lead to an infinite loop condition. Increament 'desc_offset'
+requester depending on the queue, which can be assigned to a VF.
 to process next descriptor in the ring to avoid infinite loop.
-Reported-by: Cheol-woo Myung <330cjfdn@gmail.com>
+Fixes: 3a977deebe ("Intrdocue igb device emulation")
-Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
+Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
 Signed-off-by: Jason Wang <jasowang@redhat.com>
 ---
- hw/net/e1000e_core.c | 8 ++++----
+ hw/net/e1000e_core.c |  6 +++---
-file changed, 4 insertions(+), 4 deletions(-)
+ hw/net/igb_core.c    | 13 ++++++++-----
  hw/net/net_tx_pkt.c  |  3 ++-
  hw/net/net_tx_pkt.h  |  3 ++-
  hw/net/vmxnet3.c     |  4 ++--
 files changed, 17 insertions(+), 12 deletions(-)
 diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
 index XXXXXXX..XXXXXXX 100644
 --- a/hw/net/e1000e_core.c
 +++ b/hw/net/e1000e_core.c
-@@ -XXX,XX +XXX,XX @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
+@@ -XXX,XX +XXX,XX @@ e1000e_process_tx_desc(E1000ECore *core,
                            (const char *) &fcs_pad, e1000x_fcs_len(core->mac));
                  }
              }
 -            desc_offset += desc_size;
 -            if (desc_offset >= total_size) {
 -                is_last = true;
 -            }
          } else { /* as per intel docs; skip descriptors with null buf addr */
              trace_e1000e_rx_null_descriptor();
          }
-+        desc_offset += desc_size;
-+        if (desc_offset >= total_size) {
+         tx->skip_cp = false;
-+            is_last = true;
+-        net_tx_pkt_reset(tx->tx_pkt);
-+        }
++        net_tx_pkt_reset(tx->tx_pkt, core->owner);
-         e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL,
+         tx->sum_needed = 0;
-                            rss_info, do_ps ? ps_hdr_len : 0, &bastate.written);
+         tx->cptse = 0;
@@ -XXX,XX +XXX,XX @@ e1000e_core_pci_uninit(E1000ECore *core)
      qemu_del_vm_change_state_handler(core->vmstate);
      for (i = 0; i < E1000E_NUM_QUEUES; i++) {
 -        net_tx_pkt_reset(core->tx[i].tx_pkt);
 +        net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
          net_tx_pkt_uninit(core->tx[i].tx_pkt);
      }
@@ -XXX,XX +XXX,XX @@ static void e1000e_reset(E1000ECore *core, bool sw)
      e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
      for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
 -        net_tx_pkt_reset(core->tx[i].tx_pkt);
 +        net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
          memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
          core->tx[i].skip_cp = false;
      }
 diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
 index XXXXXXX..XXXXXXX 100644
 --- a/hw/net/igb_core.c
 +++ b/hw/net/igb_core.c
@@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
  static void
  igb_process_tx_desc(IGBCore *core,
 +                    PCIDevice *dev,
                      struct igb_tx *tx,
                      union e1000_adv_tx_desc *tx_desc,
                      int queue_index)
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
          tx->first = true;
          tx->skip_cp = false;
 -        net_tx_pkt_reset(tx->tx_pkt);
 +        net_tx_pkt_reset(tx->tx_pkt, dev);
      }
  }
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
          d = core->owner;
      }
 +    net_tx_pkt_reset(txr->tx->tx_pkt, d);
 +
      while (!igb_ring_empty(core, txi)) {
          base = igb_ring_head_descr(core, txi);
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
          trace_e1000e_tx_descr((void *)(intptr_t)desc.read.buffer_addr,
                                desc.read.cmd_type_len, desc.wb.status);
 -        igb_process_tx_desc(core, txr->tx, &desc, txi->idx);
 +        igb_process_tx_desc(core, d, txr->tx, &desc, txi->idx);
          igb_ring_advance(core, txi, 1);
          eic |= igb_txdesc_writeback(core, base, &desc, txi);
      }
@@ -XXX,XX +XXX,XX @@ igb_core_pci_realize(IGBCore        *core,
      core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core);
      for (i = 0; i < IGB_NUM_QUEUES; i++) {
 -        net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS);
 +        net_tx_pkt_init(&core->tx[i].tx_pkt, NULL, E1000E_MAX_TX_FRAGS);
      }
      net_rx_pkt_init(&core->rx_pkt);
@@ -XXX,XX +XXX,XX @@ igb_core_pci_uninit(IGBCore *core)
      qemu_del_vm_change_state_handler(core->vmstate);
      for (i = 0; i < IGB_NUM_QUEUES; i++) {
 -        net_tx_pkt_reset(core->tx[i].tx_pkt);
 +        net_tx_pkt_reset(core->tx[i].tx_pkt, NULL);
          net_tx_pkt_uninit(core->tx[i].tx_pkt);
      }
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
      for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
          tx = &core->tx[i];
 -        net_tx_pkt_reset(tx->tx_pkt);
 +        net_tx_pkt_reset(tx->tx_pkt, NULL);
          memset(tx->ctx, 0, sizeof(tx->ctx));
          tx->first = true;
          tx->skip_cp = false;
 diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
 index XXXXXXX..XXXXXXX 100644
 --- a/hw/net/net_tx_pkt.c
 +++ b/hw/net/net_tx_pkt.c
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt)
  #endif
  }
 -void net_tx_pkt_reset(struct NetTxPkt *pkt)
 +void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
  {
      int i;
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt)
                            pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0);
          }
      }
 +    pkt->pci_dev = pci_dev;
      pkt->raw_frags = 0;
      pkt->hdr_len = 0;
 diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
 index XXXXXXX..XXXXXXX 100644
 --- a/hw/net/net_tx_pkt.h
 +++ b/hw/net/net_tx_pkt.h
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt);
   * reset tx packet private context (needed to be called between packets)
   *
   * @pkt:            packet
 + * @dev:            PCI device processing the next packet
   *
   */
 -void net_tx_pkt_reset(struct NetTxPkt *pkt);
 +void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev);
  /**
   * Send packet to qemu. handles sw offloads if vhdr is not supported.
 diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
 index XXXXXXX..XXXXXXX 100644
 --- a/hw/net/vmxnet3.c
 +++ b/hw/net/vmxnet3.c
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
              vmxnet3_complete_packet(s, qidx, txd_idx);
              s->tx_sop = true;
              s->skip_current_tx_pkt = false;
 -            net_tx_pkt_reset(s->tx_pkt);
 +            net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
          }
      }
  }
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_deactivate_device(VMXNET3State *s)
  {
      if (s->device_active) {
          VMW_CBPRN("Deactivating vmxnet3...");
 -        net_tx_pkt_reset(s->tx_pkt);
 +        net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
          net_tx_pkt_uninit(s->tx_pkt);
          net_rx_pkt_uninit(s->rx_pkt);
          s->device_active = false;
 --
 .7.4

-[PULL 3/5] net: purge queued rx packets on queue deletion
+[PULL 03/12] hw/net/net_tx_pkt: Ignore ECN bit
-From: Yuri Benditovich <yuri.benditovich@daynix.com>
+From: Akihiko Odaki <akihiko.odaki@daynix.com>
-https://bugzilla.redhat.com/show_bug.cgi?id=1829272
+No segmentation should be performed if gso type is
-When deleting queue pair, purge pending RX packets if any.
+VIRTIO_NET_HDR_GSO_NONE even if ECN bit is set.
 Example of problematic flow:
 . Bring up q35 VM with tap (vhost off) and virtio-net or e1000e
 . Run ping flood to the VM NIC ( 1 ms interval)
 . Hot unplug the NIC device (device_del)
    During unplug process one or more packets come, the NIC
    can't receive, tap disables read_poll
 . Hot plug the device (device_add) with the same netdev
 The tap stays with read_poll disabled and does not receive
 any packets anymore (tap_send never triggered)
-Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
+Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices")
 Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1544
 Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
 Signed-off-by: Jason Wang <jasowang@redhat.com>
 ---
- net/net.c | 12 ++++++++----
+ hw/net/net_tx_pkt.c | 6 ++++--
-file changed, 8 insertions(+), 4 deletions(-)
+file changed, 4 insertions(+), 2 deletions(-)
-diff --git a/net/net.c b/net/net.c
+diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
 index XXXXXXX..XXXXXXX 100644
---- a/net/net.c
+--- a/hw/net/net_tx_pkt.c
-+++ b/net/net.c
++++ b/hw/net/net_tx_pkt.c
-@@ -XXX,XX +XXX,XX @@ void qemu_del_nic(NICState *nic)
+@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
+ {
-     qemu_macaddr_set_free(&nic->conf->macaddr);
+     assert(pkt);
--    /* If this is a peer NIC and peer has already been deleted, free it now. */
++    uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
--    if (nic->peer_deleted) {
++
--        for (i = 0; i < queues; i++) {
+     /*
--            qemu_free_net_client(qemu_get_subqueue(nic, i)->peer);
+      * Since underlying infrastructure does not support IP datagrams longer
-+    for (i = 0; i < queues; i++) {
+      * than 64K we should drop such packets and don't even try to send
-+        NetClientState *nc = qemu_get_subqueue(nic, i);
+      */
-+        /* If this is a peer NIC and peer has already been deleted, free it now. */
+-    if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) {
-+        if (nic->peer_deleted) {
++    if (VIRTIO_NET_HDR_GSO_NONE != gso_type) {
-+            qemu_free_net_client(nc->peer);
+         if (pkt->payload_len >
-+        } else if (nc->peer) {
+             ETH_MAX_IP_DGRAM_LEN -
-+            /* if there are RX packets pending, complete them */
+             pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) {
-+            qemu_purge_queued_packets(nc->peer);
+@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
          }
      }
+-    if (offload || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
++    if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) {
+         if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+             net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
+                                   pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
 --
 .7.4

-New patch
+[PULL 04/12] hw/net/net_tx_pkt: Align l3_hdr
+From: Akihiko Odaki <akihiko.odaki@daynix.com>
+Align the l3_hdr member of NetTxPkt by defining it as a union of
+ip_header, ip6_header, and an array of octets.
+Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices")
+Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1544
+Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/net_tx_pkt.c | 21 +++++++++++----------
+file changed, 11 insertions(+), 10 deletions(-)
+diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
+index XXXXXXX..XXXXXXX 100644
+--- a/hw/net/net_tx_pkt.c
++++ b/hw/net/net_tx_pkt.c
+@@ -XXX,XX +XXX,XX @@ struct NetTxPkt {
+     struct iovec *vec;
+     uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN];
+-    uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN];
++    union {
++        struct ip_header ip;
++        struct ip6_header ip6;
++        uint8_t octets[ETH_MAX_IP_DGRAM_LEN];
++    } l3_hdr;
+     uint32_t payload_len;
+@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt)
+ {
+     uint16_t csum;
+     assert(pkt);
+-    struct ip_header *ip_hdr;
+-    ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
+-    ip_hdr->ip_len = cpu_to_be16(pkt->payload_len +
++    pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len +
+         pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
+-    ip_hdr->ip_sum = 0;
+-    csum = net_raw_checksum((uint8_t *)ip_hdr,
++    pkt->l3_hdr.ip.ip_sum = 0;
++    csum = net_raw_checksum(pkt->l3_hdr.octets,
+         pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
+-    ip_hdr->ip_sum = cpu_to_be16(csum);
++    pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum);
+ }
+ void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
+@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt)
+ {
+     struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
+     if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) {
+-        struct ip6_header *ip6 = (struct ip6_header *) pkt->l3_hdr;
+         /*
+          * TODO: if qemu would support >64K packets - add jumbo option check
+          * something like that:
+          * 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {'
+          */
+-        if (ip6->ip6_plen == 0) {
++        if (pkt->l3_hdr.ip6.ip6_plen == 0) {
+             if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) {
+-                ip6->ip6_plen = htons(pkt->payload_len);
++                pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len);
+             }
+             /*
+              * TODO: if qemu would support >64K packets
+--
+.7.4

-New patch
+[PULL 05/12] MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+I would like to review and be informed on changes to igb device
+Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ MAINTAINERS | 1 +
+file changed, 1 insertion(+)
+diff --git a/MAINTAINERS b/MAINTAINERS
+index XXXXXXX..XXXXXXX 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -XXX,XX +XXX,XX @@ F: tests/qtest/libqos/e1000e.*
+ igb
+ M: Akihiko Odaki <akihiko.odaki@daynix.com>
++R: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+ S: Maintained
+ F: docs/system/devices/igb.rst
+ F: hw/net/igb*
+--
+.7.4

-New patch
+[PULL 06/12] igb: handle PF/VF reset properly
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+Use PFRSTD to reset RSTI bit for VFs, and raise VFLRE interrupt when VF
+is reset.
+Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/igb_core.c   | 38 ++++++++++++++++++++++++++------------
+ hw/net/igb_regs.h   |  3 +++
+ hw/net/trace-events |  2 ++
+files changed, 31 insertions(+), 12 deletions(-)
+diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
+index XXXXXXX..XXXXXXX 100644
+--- a/hw/net/igb_core.c
++++ b/hw/net/igb_core.c
+@@ -XXX,XX +XXX,XX @@ static void igb_set_eims(IGBCore *core, int index, uint32_t val)
+     igb_update_interrupt_state(core);
+ }
+-static void igb_vf_reset(IGBCore *core, uint16_t vfn)
+-{
+-    /* TODO: Reset of the queue enable and the interrupt registers of the VF. */
+-
+-    core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
+-    core->mac[V2PMAILBOX0 + vfn] = E1000_V2PMAILBOX_RSTD;
+-}
+-
+ static void mailbox_interrupt_to_vf(IGBCore *core, uint16_t vfn)
+ {
+     uint32_t ent = core->mac[VTIVAR_MISC + vfn];
+@@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val)
+     }
+ }
++static void igb_vf_reset(IGBCore *core, uint16_t vfn)
++{
++    /* disable Rx and Tx for the VF*/
++    core->mac[VFTE] &= ~BIT(vfn);
++    core->mac[VFRE] &= ~BIT(vfn);
++    /* indicate VF reset to PF */
++    core->mac[VFLRE] |= BIT(vfn);
++    /* VFLRE and mailbox use the same interrupt cause */
++    mailbox_interrupt_to_pf(core);
++}
++
+ static void igb_w1c(IGBCore *core, int index, uint32_t val)
+ {
+     core->mac[index] &= ~val;
+@@ -XXX,XX +XXX,XX @@ igb_set_status(IGBCore *core, int index, uint32_t val)
+ static void
+ igb_set_ctrlext(IGBCore *core, int index, uint32_t val)
+ {
+-    trace_e1000e_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
+-                                     !!(val & E1000_CTRL_EXT_SPD_BYPS));
+-
+-    /* TODO: PFRSTD */
++    trace_igb_link_set_ext_params(!!(val & E1000_CTRL_EXT_ASDCHK),
++                                  !!(val & E1000_CTRL_EXT_SPD_BYPS),
++                                  !!(val & E1000_CTRL_EXT_PFRSTD));
+     /* Zero self-clearing bits */
+     val &= ~(E1000_CTRL_EXT_ASDCHK | E1000_CTRL_EXT_EE_RST);
+     core->mac[CTRL_EXT] = val;
++
++    if (core->mac[CTRL_EXT] & E1000_CTRL_EXT_PFRSTD) {
++        for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
++            core->mac[V2PMAILBOX0 + vfn] &= ~E1000_V2PMAILBOX_RSTI;
++            core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTD;
++        }
++    }
+ }
+ static void
+@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
+     e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
++    for (int vfn = 0; vfn < IGB_MAX_VF_FUNCTIONS; vfn++) {
++        /* Set RSTI, so VF can identify a PF reset is in progress */
++        core->mac[V2PMAILBOX0 + vfn] |= E1000_V2PMAILBOX_RSTI;
++    }
++
+     for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
+         tx = &core->tx[i];
+         net_tx_pkt_reset(tx->tx_pkt, NULL);
+diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
+index XXXXXXX..XXXXXXX 100644
+--- a/hw/net/igb_regs.h
++++ b/hw/net/igb_regs.h
+@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
+ /* from igb/e1000_defines.h */
++/* Physical Func Reset Done Indication */
++#define E1000_CTRL_EXT_PFRSTD   0x00004000
++
+ #define E1000_IVAR_VALID     0x80
+ #define E1000_GPIE_NSICR     0x00000001
+ #define E1000_GPIE_MSIX_MODE 0x00000010
+diff --git a/hw/net/trace-events b/hw/net/trace-events
+index XXXXXXX..XXXXXXX 100644
+--- a/hw/net/trace-events
++++ b/hw/net/trace-events
+@@ -XXX,XX +XXX,XX @@ igb_core_mdic_read_unhandled(uint32_t addr) "MDIC READ: PHY[%u] UNHANDLED"
+ igb_core_mdic_write(uint32_t addr, uint32_t data) "MDIC WRITE: PHY[%u] = 0x%x"
+ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED"
++igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d"
++
+ igb_rx_desc_buff_size(uint32_t b) "buffer size: %u"
+ igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u"
+--
+.7.4

-New patch
+[PULL 07/12] igb: add ICR_RXDW
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+IGB uses RXDW ICR bit to indicate that rx descriptor has been written
+back. This is the same as RXT0 bit in older HW.
+Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/e1000x_regs.h | 4 ++++
+ hw/net/igb_core.c    | 2 +-
+files changed, 5 insertions(+), 1 deletion(-)
+diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h
+index XXXXXXX..XXXXXXX 100644
+--- a/hw/net/e1000x_regs.h
++++ b/hw/net/e1000x_regs.h
+@@ -XXX,XX +XXX,XX @@
+ #define E1000_ICR_RXDMT0        0x00000010 /* rx desc min. threshold (0) */
+ #define E1000_ICR_RXO           0x00000040 /* rx overrun */
+ #define E1000_ICR_RXT0          0x00000080 /* rx timer intr (ring 0) */
++#define E1000_ICR_RXDW          0x00000080 /* rx desc written back */
+ #define E1000_ICR_MDAC          0x00000200 /* MDIO access complete */
+ #define E1000_ICR_RXCFG         0x00000400 /* RX /c/ ordered set */
+ #define E1000_ICR_GPI_EN0       0x00000800 /* GP Int 0 */
+@@ -XXX,XX +XXX,XX @@
+ #define E1000_ICS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
+ #define E1000_ICS_RXO       E1000_ICR_RXO       /* rx overrun */
+ #define E1000_ICS_RXT0      E1000_ICR_RXT0      /* rx timer intr */
++#define E1000_ICS_RXDW      E1000_ICR_RXDW      /* rx desc written back */
+ #define E1000_ICS_MDAC      E1000_ICR_MDAC      /* MDIO access complete */
+ #define E1000_ICS_RXCFG     E1000_ICR_RXCFG     /* RX /c/ ordered set */
+ #define E1000_ICS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
+@@ -XXX,XX +XXX,XX @@
+ #define E1000_IMS_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
+ #define E1000_IMS_RXO       E1000_ICR_RXO       /* rx overrun */
+ #define E1000_IMS_RXT0      E1000_ICR_RXT0      /* rx timer intr */
++#define E1000_IMS_RXDW      E1000_ICR_RXDW      /* rx desc written back */
+ #define E1000_IMS_MDAC      E1000_ICR_MDAC      /* MDIO access complete */
+ #define E1000_IMS_RXCFG     E1000_ICR_RXCFG     /* RX /c/ ordered set */
+ #define E1000_IMS_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
+@@ -XXX,XX +XXX,XX @@
+ #define E1000_IMC_RXDMT0    E1000_ICR_RXDMT0    /* rx desc min. threshold */
+ #define E1000_IMC_RXO       E1000_ICR_RXO       /* rx overrun */
+ #define E1000_IMC_RXT0      E1000_ICR_RXT0      /* rx timer intr */
++#define E1000_IMC_RXDW      E1000_ICR_RXDW      /* rx desc written back */
+ #define E1000_IMC_MDAC      E1000_ICR_MDAC      /* MDIO access complete */
+ #define E1000_IMC_RXCFG     E1000_ICR_RXCFG     /* RX /c/ ordered set */
+ #define E1000_IMC_GPI_EN0   E1000_ICR_GPI_EN0   /* GP Int 0 */
+diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
+index XXXXXXX..XXXXXXX 100644
+--- a/hw/net/igb_core.c
++++ b/hw/net/igb_core.c
+@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
+             continue;
+         }
+-        n |= E1000_ICR_RXT0;
++        n |= E1000_ICR_RXDW;
+         igb_rx_fix_l4_csum(core, core->rx_pkt);
+         igb_write_packet_to_guest(core, core->rx_pkt, &rxr, &rss_info);
+--
+.7.4

-[PULL 5/5] net: Use correct default-path macro for downscript
+[PULL 08/12] igb: implement VFRE and VFTE registers
-From: Keqian Zhu <zhukeqian1@huawei.com>
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
-Fixes: 63c4db4c2e6d (net: relocate paths to helpers and scripts)
+Also introduce:
-Signed-off-by: Keqian Zhu <zhukeqian1@huawei.com>
+- Checks for RXDCTL/TXDCTL queue enable bits
 - IGB_NUM_VM_POOLS enum (Sec 1.5: Table 1-7)
 Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
 Signed-off-by: Jason Wang <jasowang@redhat.com>
 ---
- net/tap.c | 3 ++-
+ hw/net/igb_core.c | 38 +++++++++++++++++++++++++++++++-------
-file changed, 2 insertions(+), 1 deletion(-)
+ hw/net/igb_core.h |  1 +
  hw/net/igb_regs.h |  3 +++
 files changed, 35 insertions(+), 7 deletions(-)
-diff --git a/net/tap.c b/net/tap.c
+diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
 index XXXXXXX..XXXXXXX 100644
---- a/net/tap.c
+--- a/hw/net/igb_core.c
-+++ b/net/tap.c
++++ b/hw/net/igb_core.c
-@@ -XXX,XX +XXX,XX @@ free_fail:
+@@ -XXX,XX +XXX,XX @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base,
-             script = default_script = get_relocated_path(DEFAULT_NETWORK_SCRIPT);
+     return igb_tx_wb_eic(core, txi->idx);
  }
 +static inline bool
 +igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi)
 +{
 +    bool vmdq = core->mac[MRQC] & 1;
 +    uint16_t qn = txi->idx;
 +    uint16_t pool = qn % IGB_NUM_VM_POOLS;
 +
 +    return (core->mac[TCTL] & E1000_TCTL_EN) &&
 +        (!vmdq || core->mac[VFTE] & BIT(pool)) &&
 +        (core->mac[TXDCTL0 + (qn * 16)] & E1000_TXDCTL_QUEUE_ENABLE);
 +}
 +
  static void
  igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
  {
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
      const E1000E_RingInfo *txi = txr->i;
      uint32_t eic = 0;
 -    /* TODO: check if the queue itself is enabled too. */
 -    if (!(core->mac[TCTL] & E1000_TCTL_EN)) {
 +    if (!igb_tx_enabled(core, txi)) {
          trace_e1000e_tx_disabled();
          return;
      }
@@ -XXX,XX +XXX,XX @@ igb_can_receive(IGBCore *core)
      for (i = 0; i < IGB_NUM_QUEUES; i++) {
          E1000E_RxRing rxr;
 +        if (!(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
 +            continue;
 +        }
          igb_rx_ring_init(core, &rxr, i);
          if (igb_ring_enabled(core, rxr.i) && igb_has_rxbufs(core, rxr.i, 1)) {
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
      if (core->mac[MRQC] & 1) {
          if (is_broadcast_ether_addr(ehdr->h_dest)) {
 -            for (i = 0; i < 8; i++) {
 +            for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
                  if (core->mac[VMOLR0 + i] & E1000_VMOLR_BAM) {
                      queues |= BIT(i);
                  }
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
                  f = ta_shift[(rctl >> E1000_RCTL_MO_SHIFT) & 3];
                  f = (((ehdr->h_dest[5] << 8) | ehdr->h_dest[4]) >> f) & 0xfff;
                  if (macp[f >> 5] & (1 << (f & 0x1f))) {
 -                    for (i = 0; i < 8; i++) {
 +                    for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
                          if (core->mac[VMOLR0 + i] & E1000_VMOLR_ROMPE) {
                              queues |= BIT(i);
                          }
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
                      }
                  }
              } else {
 -                for (i = 0; i < 8; i++) {
 +                for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
                      if (core->mac[VMOLR0 + i] & E1000_VMOLR_AUPE) {
                          mask |= BIT(i);
                      }
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
              queues = BIT(def_pl >> E1000_VT_CTL_DEFAULT_POOL_SHIFT);
          }
-         if (!downscript) {
--            downscript = default_downscript = get_relocated_path(DEFAULT_NETWORK_SCRIPT);
++        queues &= core->mac[VFRE];
-+            downscript = default_downscript =
+         igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info);
-+                                 get_relocated_path(DEFAULT_NETWORK_DOWN_SCRIPT);
+         if (rss_info->queue & 1) {
              queues <<= 8;
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
          e1000x_fcs_len(core->mac);
      for (i = 0; i < IGB_NUM_QUEUES; i++) {
 -        if (!(queues & BIT(i))) {
 +        if (!(queues & BIT(i)) ||
 +            !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
              continue;
          }
-         if (tap->has_ifname) {
+@@ -XXX,XX +XXX,XX @@ static void igb_set_vfmailbox(IGBCore *core, int index, uint32_t val)
  static void igb_vf_reset(IGBCore *core, uint16_t vfn)
  {
 +    uint16_t qn0 = vfn;
 +    uint16_t qn1 = vfn + IGB_NUM_VM_POOLS;
 +
      /* disable Rx and Tx for the VF*/
 -    core->mac[VFTE] &= ~BIT(vfn);
 +    core->mac[RXDCTL0 + (qn0 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
 +    core->mac[RXDCTL0 + (qn1 * 16)] &= ~E1000_RXDCTL_QUEUE_ENABLE;
 +    core->mac[TXDCTL0 + (qn0 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
 +    core->mac[TXDCTL0 + (qn1 * 16)] &= ~E1000_TXDCTL_QUEUE_ENABLE;
      core->mac[VFRE] &= ~BIT(vfn);
 +    core->mac[VFTE] &= ~BIT(vfn);
      /* indicate VF reset to PF */
      core->mac[VFLRE] |= BIT(vfn);
      /* VFLRE and mailbox use the same interrupt cause */
@@ -XXX,XX +XXX,XX @@ igb_phy_reg_init[] = {
  static const uint32_t igb_mac_reg_init[] = {
      [LEDCTL]        = 2 | (3 << 8) | BIT(15) | (6 << 16) | (7 << 24),
      [EEMNGCTL]      = BIT(31),
 +    [TXDCTL0]       = E1000_TXDCTL_QUEUE_ENABLE,
      [RXDCTL0]       = E1000_RXDCTL_QUEUE_ENABLE | (1 << 16),
      [RXDCTL1]       = 1 << 16,
      [RXDCTL2]       = 1 << 16,
 diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h
 index XXXXXXX..XXXXXXX 100644
 --- a/hw/net/igb_core.h
 +++ b/hw/net/igb_core.h
@@ -XXX,XX +XXX,XX @@
  #define IGB_MSIX_VEC_NUM        (10)
  #define IGBVF_MSIX_VEC_NUM      (3)
  #define IGB_NUM_QUEUES          (16)
 +#define IGB_NUM_VM_POOLS        (8)
  typedef struct IGBCore IGBCore;
 diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h
 index XXXXXXX..XXXXXXX 100644
 --- a/hw/net/igb_regs.h
 +++ b/hw/net/igb_regs.h
@@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc {
  #define E1000_MRQC_RSS_FIELD_IPV6_UDP       0x00800000
  #define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX    0x01000000
 +/* Additional Transmit Descriptor Control definitions */
 +#define E1000_TXDCTL_QUEUE_ENABLE  0x02000000 /* Enable specific Tx Queue */
 +
  /* Additional Receive Descriptor Control definitions */
  #define E1000_RXDCTL_QUEUE_ENABLE  0x02000000 /* Enable specific Rx Queue */
 --
 .7.4

-[PULL 4/5] tap: fix a memory leak
+[PULL 09/12] igb: check oversized packets for VMDq
-From: yuanjungong <ruc_gongyuanjun@163.com>
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
-Close fd before returning.
+Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
 Buglink: https://bugs.launchpad.net/qemu/+bug/1904486
 Signed-off-by: yuanjungong <ruc_gongyuanjun@163.com>
 Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
 Signed-off-by: Jason Wang <jasowang@redhat.com>
 ---
- net/tap.c | 2 ++
+ hw/net/igb_core.c | 41 ++++++++++++++++++++++++++++++++++++-----
-file changed, 2 insertions(+)
+file changed, 36 insertions(+), 5 deletions(-)
-diff --git a/net/tap.c b/net/tap.c
+diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
 index XXXXXXX..XXXXXXX 100644
---- a/net/tap.c
+--- a/hw/net/igb_core.c
-+++ b/net/tap.c
++++ b/hw/net/igb_core.c
-@@ -XXX,XX +XXX,XX @@ int net_init_tap(const Netdev *netdev, const char *name,
+@@ -XXX,XX +XXX,XX @@ igb_rx_l4_cso_enabled(IGBCore *core)
-         if (ret < 0) {
+     return !!(core->mac[RXCSUM] & E1000_RXCSUM_TUOFLD);
-             error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
+ }
-                              name, fd);
-+            close(fd);
++static bool
-             return -1;
++igb_rx_is_oversized(IGBCore *core, uint16_t qn, size_t size)
 +{
 +    uint16_t pool = qn % IGB_NUM_VM_POOLS;
 +    bool lpe = !!(core->mac[VMOLR0 + pool] & E1000_VMOLR_LPE);
 +    int max_ethernet_lpe_size =
 +        core->mac[VMOLR0 + pool] & E1000_VMOLR_RLPML_MASK;
 +    int max_ethernet_vlan_size = 1522;
 +
 +    return size > (lpe ? max_ethernet_lpe_size : max_ethernet_vlan_size);
 +}
 +
  static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
 -                                   E1000E_RSSInfo *rss_info, bool *external_tx)
 +                                   size_t size, E1000E_RSSInfo *rss_info,
 +                                   bool *external_tx)
  {
      static const int ta_shift[] = { 4, 3, 2, 0 };
      uint32_t f, ra[2], *macp, rctl = core->mac[RCTL];
      uint16_t queues = 0;
 +    uint16_t oversized = 0;
      uint16_t vid = lduw_be_p(&PKT_GET_VLAN_HDR(ehdr)->h_tci) & VLAN_VID_MASK;
      bool accepted = false;
      int i;
@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
          }
-@@ -XXX,XX +XXX,XX @@ int net_init_tap(const Netdev *netdev, const char *name,
+         queues &= core->mac[VFRE];
-                          vhostfdname, vnet_hdr, fd, &err);
+-        igb_rss_parse_packet(core, core->rx_pkt, external_tx != NULL, rss_info);
-         if (err) {
+-        if (rss_info->queue & 1) {
-             error_propagate(errp, err);
+-            queues <<= 8;
-+            close(fd);
++        if (queues) {
-             return -1;
++            for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
 +                if ((queues & BIT(i)) && igb_rx_is_oversized(core, i, size)) {
 +                    oversized |= BIT(i);
 +                }
 +            }
 +            /* 8.19.37 increment ROC if packet is oversized for all queues */
 +            if (oversized == queues) {
 +                trace_e1000x_rx_oversized(size);
 +                e1000x_inc_reg_if_not_full(core->mac, ROC);
 +            }
 +            queues &= ~oversized;
 +        }
 +
 +        if (queues) {
 +            igb_rss_parse_packet(core, core->rx_pkt,
 +                                 external_tx != NULL, rss_info);
 +            if (rss_info->queue & 1) {
 +                queues <<= 8;
 +            }
          }
-     } else if (tap->has_fds) {
+     } else {
          switch (net_rx_pkt_get_packet_type(core->rx_pkt)) {
@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
                                 e1000x_vlan_enabled(core->mac),
                                 core->mac[VET] & 0xffff);
 -    queues = igb_receive_assign(core, ehdr, &rss_info, external_tx);
 +    queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
      if (!queues) {
          trace_e1000e_rx_flt_dropped();
          return orig_size;
 --
 .7.4

-New patch
+[PULL 10/12] igb: respect E1000_VMOLR_RSSE
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+RSS for VFs is only enabled if VMOLR[n].RSSE is set.
+Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/igb_core.c | 9 ++++++++-
+file changed, 8 insertions(+), 1 deletion(-)
+diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
+index XXXXXXX..XXXXXXX 100644
+--- a/hw/net/igb_core.c
++++ b/hw/net/igb_core.c
+@@ -XXX,XX +XXX,XX @@ static uint16_t igb_receive_assign(IGBCore *core, const struct eth_header *ehdr,
+         if (queues) {
+             igb_rss_parse_packet(core, core->rx_pkt,
+                                  external_tx != NULL, rss_info);
++            /* Sec 8.26.1: PQn = VFn + VQn*8 */
+             if (rss_info->queue & 1) {
+-                queues <<= 8;
++                for (i = 0; i < IGB_NUM_VM_POOLS; i++) {
++                    if ((queues & BIT(i)) &&
++                        (core->mac[VMOLR0 + i] & E1000_VMOLR_RSSE)) {
++                        queues |= BIT(i + IGB_NUM_VM_POOLS);
++                        queues &= ~BIT(i);
++                    }
++                }
+             }
+         }
+     } else {
+--
+.7.4

-[PULL 2/5] net: do not exit on "netdev_add help" monitor command
+[PULL 11/12] igb: implement VF Tx and Rx stats
-From: Paolo Bonzini <pbonzini@redhat.com>
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
-"netdev_add help" is causing QEMU to exit because the code that
+Please note that loopback counters for VM to VM traffic is not
-invokes show_netdevs is shared between CLI and HMP processing.
+implemented yet: VFGOTLBC, VFGPTLBC, VFGORLBC and VFGPRLBC.
 Move the check to the callers so that exit(0) remains only
 in the CLI flow.
-"netdev_add help" is not fixed by this patch; that is left for
+Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
 later work.
 Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
 Signed-off-by: Jason Wang <jasowang@redhat.com>
 ---
- include/net/net.h  |  1 +
+ hw/net/igb_core.c | 26 ++++++++++++++++++++++----
- monitor/hmp-cmds.c |  6 +++++
+file changed, 22 insertions(+), 4 deletions(-)
  net/net.c          | 68 +++++++++++++++++++++++++++---------------------------
 files changed, 41 insertions(+), 34 deletions(-)
-diff --git a/include/net/net.h b/include/net/net.h
+diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
 index XXXXXXX..XXXXXXX 100644
---- a/include/net/net.h
+--- a/hw/net/igb_core.c
-+++ b/include/net/net.h
++++ b/hw/net/igb_core.c
-@@ -XXX,XX +XXX,XX @@ extern const char *host_net_devices[];
+@@ -XXX,XX +XXX,XX @@ igb_tx_pkt_send(IGBCore *core, struct igb_tx *tx, int queue_index)
+ }
- /* from net.c */
- int net_client_parse(QemuOptsList *opts_list, const char *str);
+ static void
-+void show_netdevs(void);
+-igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
- int net_init_clients(Error **errp);
++igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt, int qn)
  void net_check_clients(void);
  void net_cleanup(void);
 diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
 index XXXXXXX..XXXXXXX 100644
 --- a/monitor/hmp-cmds.c
 +++ b/monitor/hmp-cmds.c
@@ -XXX,XX +XXX,XX @@
  #include "qemu/option.h"
  #include "qemu/timer.h"
  #include "qemu/sockets.h"
 +#include "qemu/help_option.h"
  #include "monitor/monitor-internal.h"
  #include "qapi/error.h"
  #include "qapi/clone-visitor.h"
@@ -XXX,XX +XXX,XX @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict)
  {
-     Error *err = NULL;
+     static const int PTCregs[6] = { PTC64, PTC127, PTC255, PTC511,
-     QemuOpts *opts;
+                                     PTC1023, PTC1522 };
-+    const char *type = qdict_get_try_str(qdict, "type");
+@@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
+     core->mac[GPTC] = core->mac[TPT];
-+    if (type && is_help_option(type)) {
+     core->mac[GOTCL] = core->mac[TOTL];
-+        show_netdevs();
+     core->mac[GOTCH] = core->mac[TOTH];
-+        return;
++
 +    if (core->mac[MRQC] & 1) {
 +        uint16_t pool = qn % IGB_NUM_VM_POOLS;
 +
 +        core->mac[PVFGOTC0 + (pool * 64)] += tot_len;
 +        core->mac[PVFGPTC0 + (pool * 64)]++;
 +    }
-     opts = qemu_opts_from_qdict(qemu_find_opts("netdev"), qdict, &err);
-     if (err) {
-         goto out;
-diff --git a/net/net.c b/net/net.c
-index XXXXXXX..XXXXXXX 100644
---- a/net/net.c
-+++ b/net/net.c
-@@ -XXX,XX +XXX,XX @@
- #include "qemu/config-file.h"
- #include "qemu/ctype.h"
- #include "qemu/iov.h"
-+#include "qemu/qemu-print.h"
- #include "qemu/main-loop.h"
- #include "qemu/option.h"
- #include "qapi/error.h"
-@@ -XXX,XX +XXX,XX @@ static int net_client_init1(const Netdev *netdev, bool is_netdev, Error **errp)
-     return 0;
  }
--static void show_netdevs(void)
+ static void
-+void show_netdevs(void)
+@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
                  net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
              }
              if (igb_tx_pkt_send(core, tx, queue_index)) {
 -                igb_on_tx_done_update_stats(core, tx->tx_pkt);
 +                igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
              }
          }
@@ -XXX,XX +XXX,XX @@ igb_write_to_rx_buffers(IGBCore *core,
  }
  static void
 -igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
 +igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi,
 +                    size_t data_size, size_t data_fcs_size)
  {
-     int idx;
+     e1000x_update_rx_total_stats(core->mac, data_size, data_fcs_size);
-     const char *available_netdevs[] = {
-@@ -XXX,XX +XXX,XX @@ static void show_netdevs(void)
+@@ -XXX,XX +XXX,XX @@ igb_update_rx_stats(IGBCore *core, size_t data_size, size_t data_fcs_size)
- #endif
+     default:
-     };
+         break;
 -    printf("Available netdev backend types:\n");
 +    qemu_printf("Available netdev backend types:\n");
      for (idx = 0; idx < ARRAY_SIZE(available_netdevs); idx++) {
 -        puts(available_netdevs[idx]);
 +        qemu_printf("%s\n", available_netdevs[idx]);
      }
++
++    if (core->mac[MRQC] & 1) {
++        uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
++
++        core->mac[PVFGORC0 + (pool * 64)] += data_size + 4;
++        core->mac[PVFGPRC0 + (pool * 64)]++;
++        if (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) {
++            core->mac[PVFMPRC0 + (pool * 64)]++;
++        }
++    }
  }
-@@ -XXX,XX +XXX,XX @@ static int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp)
+ static inline bool
-     int ret = -1;
+@@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
-     Visitor *v = opts_visitor_new(opts);
+     } while (desc_offset < total_size);
--    const char *type = qemu_opt_get(opts, "type");
--
+-    igb_update_rx_stats(core, size, total_size);
--    if (is_netdev && type && is_help_option(type)) {
++    igb_update_rx_stats(core, rxi, size, total_size);
 -        show_netdevs();
 -        exit(0);
 -    } else {
 -        /* Parse convenience option format ip6-net=fec0::0[/64] */
 -        const char *ip6_net = qemu_opt_get(opts, "ipv6-net");
 +    /* Parse convenience option format ip6-net=fec0::0[/64] */
 +    const char *ip6_net = qemu_opt_get(opts, "ipv6-net");
 -        if (ip6_net) {
 -            char *prefix_addr;
 -            unsigned long prefix_len = 64; /* Default 64bit prefix length. */
 +    if (ip6_net) {
 +        char *prefix_addr;
 +        unsigned long prefix_len = 64; /* Default 64bit prefix length. */
 -            substrings = g_strsplit(ip6_net, "/", 2);
 -            if (!substrings || !substrings[0]) {
 -                error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "ipv6-net",
 -                           "a valid IPv6 prefix");
 -                goto out;
 -            }
 +        substrings = g_strsplit(ip6_net, "/", 2);
 +        if (!substrings || !substrings[0]) {
 +            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "ipv6-net",
 +                       "a valid IPv6 prefix");
 +            goto out;
 +        }
 -            prefix_addr = substrings[0];
 +        prefix_addr = substrings[0];
 -            /* Handle user-specified prefix length. */
 -            if (substrings[1] &&
 -                qemu_strtoul(substrings[1], NULL, 10, &prefix_len))
 -            {
 -                error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 -                           "ipv6-prefixlen", "a number");
 -                goto out;
 -            }
 -
 -            qemu_opt_set(opts, "ipv6-prefix", prefix_addr, &error_abort);
 -            qemu_opt_set_number(opts, "ipv6-prefixlen", prefix_len,
 -                                &error_abort);
 -            qemu_opt_unset(opts, "ipv6-net");
 +        /* Handle user-specified prefix length. */
 +        if (substrings[1] &&
 +            qemu_strtoul(substrings[1], NULL, 10, &prefix_len))
 +        {
 +            error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
 +                       "ipv6-prefixlen", "a number");
 +            goto out;
          }
 +
 +        qemu_opt_set(opts, "ipv6-prefix", prefix_addr, &error_abort);
 +        qemu_opt_set_number(opts, "ipv6-prefixlen", prefix_len,
 +                            &error_abort);
 +        qemu_opt_unset(opts, "ipv6-net");
      }
      /* Create an ID for -net if the user did not specify one */
@@ -XXX,XX +XXX,XX @@ static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
  static int net_init_netdev(void *dummy, QemuOpts *opts, Error **errp)
  {
 +    const char *type = qemu_opt_get(opts, "type");
 +
 +    if (type && is_help_option(type)) {
 +        show_netdevs();
 +        exit(0);
 +    }
      return net_client_init(opts, true, errp);
  }
+ static inline void
 --
 .7.4

-New patch
+[PULL 12/12] igb: respect VMVIR and VMOLR for VLAN
+From: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+Add support for stripping/inserting VLAN for VFs.
+Had to move CSUM calculation back into the for loop, since packet data
+is pulled inside the loop based on strip VLAN decision for every VF.
+net_rx_pkt_fix_l4_csum should be extended to accept a buffer instead for
+igb. Work for a future patch.
+Signed-off-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/igb_core.c | 62 +++++++++++++++++++++++++++++++++++++++++++------------
+file changed, 49 insertions(+), 13 deletions(-)
+diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
+index XXXXXXX..XXXXXXX 100644
+--- a/hw/net/igb_core.c
++++ b/hw/net/igb_core.c
+@@ -XXX,XX +XXX,XX @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx,
+     info->queue = E1000_RSS_QUEUE(&core->mac[RETA], info->hash);
+ }
++static void
++igb_tx_insert_vlan(IGBCore *core, uint16_t qn, struct igb_tx *tx,
++    uint16_t vlan, bool insert_vlan)
++{
++    if (core->mac[MRQC] & 1) {
++        uint16_t pool = qn % IGB_NUM_VM_POOLS;
++
++        if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_DEFAULT) {
++            /* always insert default VLAN */
++            insert_vlan = true;
++            vlan = core->mac[VMVIR0 + pool] & 0xffff;
++        } else if (core->mac[VMVIR0 + pool] & E1000_VMVIR_VLANA_NEVER) {
++            insert_vlan = false;
++        }
++    }
++
++    if (insert_vlan && e1000x_vlan_enabled(core->mac)) {
++        net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan,
++            core->mac[VET] & 0xffff);
++    }
++}
++
+ static bool
+ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
+ {
+@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
+     if (cmd_type_len & E1000_TXD_CMD_EOP) {
+         if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
+-            if (cmd_type_len & E1000_TXD_CMD_VLE) {
+-                idx = (tx->first_olinfo_status >> 4) & 1;
+-                uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
+-                uint16_t vet = core->mac[VET] & 0xffff;
+-                net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
+-            }
++            idx = (tx->first_olinfo_status >> 4) & 1;
++            igb_tx_insert_vlan(core, queue_index, tx,
++                tx->ctx[idx].vlan_macip_lens >> 16,
++                !!(cmd_type_len & E1000_TXD_CMD_VLE));
++
+             if (igb_tx_pkt_send(core, tx, queue_index)) {
+                 igb_on_tx_done_update_stats(core, tx->tx_pkt, queue_index);
+             }
+@@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt,
+     igb_update_rx_stats(core, rxi, size, total_size);
+ }
++static bool
++igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi)
++{
++    if (core->mac[MRQC] & 1) {
++        uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS;
++        /* Sec 7.10.3.8: CTRL.VME is ignored, only VMOLR/RPLOLR is used */
++        return (net_rx_pkt_get_packet_type(core->rx_pkt) == ETH_PKT_MCAST) ?
++                core->mac[RPLOLR] & E1000_RPLOLR_STRVLAN :
++                core->mac[VMOLR0 + pool] & E1000_VMOLR_STRVLAN;
++    }
++
++    return e1000x_vlan_enabled(core->mac);
++}
++
+ static inline void
+ igb_rx_fix_l4_csum(IGBCore *core, struct NetRxPkt *pkt)
+ {
+@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
+     ehdr = PKT_GET_ETH_HDR(filter_buf);
+     net_rx_pkt_set_packet_type(core->rx_pkt, get_eth_packet_type(ehdr));
+-
+-    net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
+-                               e1000x_vlan_enabled(core->mac),
+-                               core->mac[VET] & 0xffff);
++    net_rx_pkt_set_protocols(core->rx_pkt, filter_buf, size);
+     queues = igb_receive_assign(core, ehdr, size, &rss_info, external_tx);
+     if (!queues) {
+@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
+         return orig_size;
+     }
+-    total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
+-        e1000x_fcs_len(core->mac);
+-
+     for (i = 0; i < IGB_NUM_QUEUES; i++) {
+         if (!(queues & BIT(i)) ||
+             !(core->mac[RXDCTL0 + (i * 16)] & E1000_RXDCTL_QUEUE_ENABLE)) {
+@@ -XXX,XX +XXX,XX @@ igb_receive_internal(IGBCore *core, const struct iovec *iov, int iovcnt,
+         igb_rx_ring_init(core, &rxr, i);
++        net_rx_pkt_attach_iovec_ex(core->rx_pkt, iov, iovcnt, iov_ofs,
++                                   igb_rx_strip_vlan(core, rxr.i),
++                                   core->mac[VET] & 0xffff);
++
++        total_size = net_rx_pkt_get_total_len(core->rx_pkt) +
++            e1000x_fcs_len(core->mac);
++
+         if (!igb_has_rxbufs(core, rxr.i, total_size)) {
+             n |= E1000_ICS_RXO;
+             trace_e1000e_rx_not_written_to_guest(rxr.i->idx);
+--
+.7.4

The following changes since commit 23895cbd82be95428e90168b12e925d0d3ca2f06:

Merge remote-tracking branch 'remotes/awilliam/tags/vfio-update-20201123.0' into staging (2020-11-23 18:51:13 +0000)

are available in the git repository at:

https://github.com/jasowang/qemu.git tags/net-pull-request

for you to fetch changes up to 9925990d01a92564af55f6f69d0f5f59b47609b1:

net: Use correct default-path macro for downscript (2020-11-24 10:40:17 +0800)

----------------------------------------------------------------

----------------------------------------------------------------
Keqian Zhu (1):
      net: Use correct default-path macro for downscript

Paolo Bonzini (1):
      net: do not exit on "netdev_add help" monitor command

Prasad J Pandit (1):
      hw/net/e1000e: advance desc_offset in case of null descriptor

Yuri Benditovich (1):
      net: purge queued rx packets on queue deletion

yuanjungong (1):
      tap: fix a memory leak

From: Prasad J Pandit <pjp@fedoraproject.org>

While receiving packets via e1000e_write_packet_to_guest() routine,
'desc_offset' is advanced only when RX descriptor is processed. And
RX descriptor is not processed if it has NULL buffer address.
This may lead to an infinite loop condition. Increament 'desc_offset'
to process next descriptor in the ring to avoid infinite loop.

Reported-by: Cheol-woo Myung <330cjfdn@gmail.com>
Signed-off-by: Prasad J Pandit <pjp@fedoraproject.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/e1000e_core.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -XXX,XX +XXX,XX @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt,
                           (const char *) &fcs_pad, e1000x_fcs_len(core->mac));
                 }
             }
-            desc_offset += desc_size;
-            if (desc_offset >= total_size) {
-                is_last = true;
-            }
         } else { /* as per intel docs; skip descriptors with null buf addr */
             trace_e1000e_rx_null_descriptor();
         }
+        desc_offset += desc_size;
+        if (desc_offset >= total_size) {
+            is_last = true;
+        }
 
         e1000e_write_rx_descr(core, desc, is_last ? core->rx_pkt : NULL,
                            rss_info, do_ps ? ps_hdr_len : 0, &bastate.written);
-- 
2.7.4

From: Paolo Bonzini <pbonzini@redhat.com>

"netdev_add help" is causing QEMU to exit because the code that
invokes show_netdevs is shared between CLI and HMP processing.
Move the check to the callers so that exit(0) remains only
in the CLI flow.

"netdev_add help" is not fixed by this patch; that is left for
later work.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 include/net/net.h  |  1 +
 monitor/hmp-cmds.c |  6 +++++
 net/net.c          | 68 +++++++++++++++++++++++++++---------------------------
 3 files changed, 41 insertions(+), 34 deletions(-)

diff --git a/include/net/net.h b/include/net/net.h
index XXXXXXX..XXXXXXX 100644
--- a/include/net/net.h
+++ b/include/net/net.h
@@ -XXX,XX +XXX,XX @@ extern const char *host_net_devices[];
 
 /* from net.c */
 int net_client_parse(QemuOptsList *opts_list, const char *str);
+void show_netdevs(void);
 int net_init_clients(Error **errp);
 void net_check_clients(void);
 void net_cleanup(void);
diff --git a/monitor/hmp-cmds.c b/monitor/hmp-cmds.c
index XXXXXXX..XXXXXXX 100644
--- a/monitor/hmp-cmds.c
+++ b/monitor/hmp-cmds.c
@@ -XXX,XX +XXX,XX @@
 #include "qemu/option.h"
 #include "qemu/timer.h"
 #include "qemu/sockets.h"
+#include "qemu/help_option.h"
 #include "monitor/monitor-internal.h"
 #include "qapi/error.h"
 #include "qapi/clone-visitor.h"
@@ -XXX,XX +XXX,XX @@ void hmp_netdev_add(Monitor *mon, const QDict *qdict)
 {
     Error *err = NULL;
     QemuOpts *opts;
+    const char *type = qdict_get_try_str(qdict, "type");
 
+    if (type && is_help_option(type)) {
+        show_netdevs();
+        return;
+    }
     opts = qemu_opts_from_qdict(qemu_find_opts("netdev"), qdict, &err);
     if (err) {
         goto out;
diff --git a/net/net.c b/net/net.c
index XXXXXXX..XXXXXXX 100644
--- a/net/net.c
+++ b/net/net.c
@@ -XXX,XX +XXX,XX @@
 #include "qemu/config-file.h"
 #include "qemu/ctype.h"
 #include "qemu/iov.h"
+#include "qemu/qemu-print.h"
 #include "qemu/main-loop.h"
 #include "qemu/option.h"
 #include "qapi/error.h"
@@ -XXX,XX +XXX,XX @@ static int net_client_init1(const Netdev *netdev, bool is_netdev, Error **errp)
     return 0;
 }
 
-static void show_netdevs(void)
+void show_netdevs(void)
 {
     int idx;
     const char *available_netdevs[] = {
@@ -XXX,XX +XXX,XX @@ static void show_netdevs(void)
 #endif
     };
 
-    printf("Available netdev backend types:\n");
+    qemu_printf("Available netdev backend types:\n");
     for (idx = 0; idx < ARRAY_SIZE(available_netdevs); idx++) {
-        puts(available_netdevs[idx]);
+        qemu_printf("%s\n", available_netdevs[idx]);
     }
 }
 
@@ -XXX,XX +XXX,XX @@ static int net_client_init(QemuOpts *opts, bool is_netdev, Error **errp)
     int ret = -1;
     Visitor *v = opts_visitor_new(opts);
 
-    const char *type = qemu_opt_get(opts, "type");
-
-    if (is_netdev && type && is_help_option(type)) {
-        show_netdevs();
-        exit(0);
-    } else {
-        /* Parse convenience option format ip6-net=fec0::0[/64] */
-        const char *ip6_net = qemu_opt_get(opts, "ipv6-net");
+    /* Parse convenience option format ip6-net=fec0::0[/64] */
+    const char *ip6_net = qemu_opt_get(opts, "ipv6-net");
 
-        if (ip6_net) {
-            char *prefix_addr;
-            unsigned long prefix_len = 64; /* Default 64bit prefix length. */
+    if (ip6_net) {
+        char *prefix_addr;
+        unsigned long prefix_len = 64; /* Default 64bit prefix length. */
 
-            substrings = g_strsplit(ip6_net, "/", 2);
-            if (!substrings || !substrings[0]) {
-                error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "ipv6-net",
-                           "a valid IPv6 prefix");
-                goto out;
-            }
+        substrings = g_strsplit(ip6_net, "/", 2);
+        if (!substrings || !substrings[0]) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "ipv6-net",
+                       "a valid IPv6 prefix");
+            goto out;
+        }
 
-            prefix_addr = substrings[0];
+        prefix_addr = substrings[0];
 
-            /* Handle user-specified prefix length. */
-            if (substrings[1] &&
-                qemu_strtoul(substrings[1], NULL, 10, &prefix_len))
-            {
-                error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
-                           "ipv6-prefixlen", "a number");
-                goto out;
-            }
-
-            qemu_opt_set(opts, "ipv6-prefix", prefix_addr, &error_abort);
-            qemu_opt_set_number(opts, "ipv6-prefixlen", prefix_len,
-                                &error_abort);
-            qemu_opt_unset(opts, "ipv6-net");
+        /* Handle user-specified prefix length. */
+        if (substrings[1] &&
+            qemu_strtoul(substrings[1], NULL, 10, &prefix_len))
+        {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                       "ipv6-prefixlen", "a number");
+            goto out;
         }
+
+        qemu_opt_set(opts, "ipv6-prefix", prefix_addr, &error_abort);
+        qemu_opt_set_number(opts, "ipv6-prefixlen", prefix_len,
+                            &error_abort);
+        qemu_opt_unset(opts, "ipv6-net");
     }
 
     /* Create an ID for -net if the user did not specify one */
@@ -XXX,XX +XXX,XX @@ static int net_init_client(void *dummy, QemuOpts *opts, Error **errp)
 
 static int net_init_netdev(void *dummy, QemuOpts *opts, Error **errp)
 {
+    const char *type = qemu_opt_get(opts, "type");
+
+    if (type && is_help_option(type)) {
+        show_netdevs();
+        exit(0);
+    }
     return net_client_init(opts, true, errp);
 }
 
-- 
2.7.4

From: Yuri Benditovich <yuri.benditovich@daynix.com>

https://bugzilla.redhat.com/show_bug.cgi?id=1829272
When deleting queue pair, purge pending RX packets if any.
Example of problematic flow:
1. Bring up q35 VM with tap (vhost off) and virtio-net or e1000e
2. Run ping flood to the VM NIC ( 1 ms interval)
3. Hot unplug the NIC device (device_del)
   During unplug process one or more packets come, the NIC
   can't receive, tap disables read_poll
4. Hot plug the device (device_add) with the same netdev
The tap stays with read_poll disabled and does not receive
any packets anymore (tap_send never triggered)

Signed-off-by: Yuri Benditovich <yuri.benditovich@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 net/net.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/net/net.c b/net/net.c
index XXXXXXX..XXXXXXX 100644
--- a/net/net.c
+++ b/net/net.c
@@ -XXX,XX +XXX,XX @@ void qemu_del_nic(NICState *nic)
 
     qemu_macaddr_set_free(&nic->conf->macaddr);
 
-    /* If this is a peer NIC and peer has already been deleted, free it now. */
-    if (nic->peer_deleted) {
-        for (i = 0; i < queues; i++) {
-            qemu_free_net_client(qemu_get_subqueue(nic, i)->peer);
+    for (i = 0; i < queues; i++) {
+        NetClientState *nc = qemu_get_subqueue(nic, i);
+        /* If this is a peer NIC and peer has already been deleted, free it now. */
+        if (nic->peer_deleted) {
+            qemu_free_net_client(nc->peer);
+        } else if (nc->peer) {
+            /* if there are RX packets pending, complete them */
+            qemu_purge_queued_packets(nc->peer);
         }
     }
 
-- 
2.7.4

From: yuanjungong <ruc_gongyuanjun@163.com>

Close fd before returning.

Buglink: https://bugs.launchpad.net/qemu/+bug/1904486

Signed-off-by: yuanjungong <ruc_gongyuanjun@163.com>
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 net/tap.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/tap.c b/net/tap.c
index XXXXXXX..XXXXXXX 100644
--- a/net/tap.c
+++ b/net/tap.c
@@ -XXX,XX +XXX,XX @@ int net_init_tap(const Netdev *netdev, const char *name,
         if (ret < 0) {
             error_setg_errno(errp, -ret, "%s: Can't use file descriptor %d",
                              name, fd);
+            close(fd);
             return -1;
         }
 
@@ -XXX,XX +XXX,XX @@ int net_init_tap(const Netdev *netdev, const char *name,
                          vhostfdname, vnet_hdr, fd, &err);
         if (err) {
             error_propagate(errp, err);
+            close(fd);
             return -1;
         }
     } else if (tap->has_fds) {
-- 
2.7.4

The following changes since commit e3debd5e7d0ce031356024878a0a18b9d109354a:

Merge tag 'pull-request-2023-03-24' of https://gitlab.com/thuth/qemu into staging (2023-03-24 16:08:46 +0000)

are available in the git repository at:

https://github.com/jasowang/qemu.git tags/net-pull-request

for you to fetch changes up to fba7c3b788dfcb99a3f9253f7d99cc0d217d6d3c:

igb: respect VMVIR and VMOLR for VLAN (2023-03-28 13:10:55 +0800)

----------------------------------------------------------------

----------------------------------------------------------------
Akihiko Odaki (4):
      igb: Save more Tx states
      igb: Fix DMA requester specification for Tx packet
      hw/net/net_tx_pkt: Ignore ECN bit
      hw/net/net_tx_pkt: Align l3_hdr

Sriram Yagnaraman (8):
      MAINTAINERS: Add Sriram Yagnaraman as a igb reviewer
      igb: handle PF/VF reset properly
      igb: add ICR_RXDW
      igb: implement VFRE and VFTE registers
      igb: check oversized packets for VMDq
      igb: respect E1000_VMOLR_RSSE
      igb: implement VF Tx and Rx stats
      igb: respect VMVIR and VMOLR for VLAN

From: Akihiko Odaki <akihiko.odaki@daynix.com>

The current implementation of igb uses only part of a advanced Tx
context descriptor and first data descriptor because it misses some
features and sniffs the trait of the packet instead of respecting the
packet type specified in the descriptor. However, we will certainly
need the entire Tx context descriptor when we update igb to respect
these ignored fields. Save the entire context descriptor and first
data descriptor except the buffer address to prepare for such a change.

This also introduces the distinction of contexts with different
indexes, which was not present in e1000e but in igb.

Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Reviewed-by: Sriram Yagnaraman <sriram.yagnaraman@est.tech>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/igb.c      | 26 +++++++++++++++++++-------
 hw/net/igb_core.c | 39 +++++++++++++++++++--------------------
 hw/net/igb_core.h |  8 +++-----
 3 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/hw/net/igb.c b/hw/net/igb.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/net/igb.c
+++ b/hw/net/igb.c
@@ -XXX,XX +XXX,XX @@ static int igb_post_load(void *opaque, int version_id)
     return igb_core_post_load(&s->core);
 }
 
-static const VMStateDescription igb_vmstate_tx = {
-    .name = "igb-tx",
+static const VMStateDescription igb_vmstate_tx_ctx = {
+    .name = "igb-tx-ctx",
     .version_id = 1,
     .minimum_version_id = 1,
     .fields = (VMStateField[]) {
-        VMSTATE_UINT16(vlan, struct igb_tx),
-        VMSTATE_UINT16(mss, struct igb_tx),
-        VMSTATE_BOOL(tse, struct igb_tx),
-        VMSTATE_BOOL(ixsm, struct igb_tx),
-        VMSTATE_BOOL(txsm, struct igb_tx),
+        VMSTATE_UINT32(vlan_macip_lens, struct e1000_adv_tx_context_desc),
+        VMSTATE_UINT32(seqnum_seed, struct e1000_adv_tx_context_desc),
+        VMSTATE_UINT32(type_tucmd_mlhl, struct e1000_adv_tx_context_desc),
+        VMSTATE_UINT32(mss_l4len_idx, struct e1000_adv_tx_context_desc),
+        VMSTATE_END_OF_LIST()
+    }
+};
+
+static const VMStateDescription igb_vmstate_tx = {
+    .name = "igb-tx",
+    .version_id = 2,
+    .minimum_version_id = 2,
+    .fields = (VMStateField[]) {
+        VMSTATE_STRUCT_ARRAY(ctx, struct igb_tx, 2, 0, igb_vmstate_tx_ctx,
+                             struct e1000_adv_tx_context_desc),
+        VMSTATE_UINT32(first_cmd_type_len, struct igb_tx),
+        VMSTATE_UINT32(first_olinfo_status, struct igb_tx),
         VMSTATE_BOOL(first, struct igb_tx),
         VMSTATE_BOOL(skip_cp, struct igb_tx),
         VMSTATE_END_OF_LIST()
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -XXX,XX +XXX,XX @@ igb_rss_parse_packet(IGBCore *core, struct NetRxPkt *pkt, bool tx,
 static bool
 igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
 {
-    if (tx->tse) {
-        if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, tx->mss)) {
+    if (tx->first_cmd_type_len & E1000_ADVTXD_DCMD_TSE) {
+        uint32_t idx = (tx->first_olinfo_status >> 4) & 1;
+        uint32_t mss = tx->ctx[idx].mss_l4len_idx >> 16;
+        if (!net_tx_pkt_build_vheader(tx->tx_pkt, true, true, mss)) {
             return false;
         }
 
@@ -XXX,XX +XXX,XX @@ igb_setup_tx_offloads(IGBCore *core, struct igb_tx *tx)
         return true;
     }
 
-    if (tx->txsm) {
+    if (tx->first_olinfo_status & E1000_ADVTXD_POTS_TXSM) {
         if (!net_tx_pkt_build_vheader(tx->tx_pkt, false, true, 0)) {
             return false;
         }
     }
 
-    if (tx->ixsm) {
+    if (tx->first_olinfo_status & E1000_ADVTXD_POTS_IXSM) {
         net_tx_pkt_update_ip_hdr_checksum(tx->tx_pkt);
     }
 
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
 {
     struct e1000_adv_tx_context_desc *tx_ctx_desc;
     uint32_t cmd_type_len;
-    uint32_t olinfo_status;
+    uint32_t idx;
     uint64_t buffer_addr;
     uint16_t length;
 
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
             E1000_ADVTXD_DTYP_DATA) {
             /* advanced transmit data descriptor */
             if (tx->first) {
-                olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
-
-                tx->tse = !!(cmd_type_len & E1000_ADVTXD_DCMD_TSE);
-                tx->ixsm = !!(olinfo_status & E1000_ADVTXD_POTS_IXSM);
-                tx->txsm = !!(olinfo_status & E1000_ADVTXD_POTS_TXSM);
-
+                tx->first_cmd_type_len = cmd_type_len;
+                tx->first_olinfo_status = le32_to_cpu(tx_desc->read.olinfo_status);
                 tx->first = false;
             }
         } else if ((cmd_type_len & E1000_ADVTXD_DTYP_CTXT) ==
                    E1000_ADVTXD_DTYP_CTXT) {
             /* advanced transmit context descriptor */
             tx_ctx_desc = (struct e1000_adv_tx_context_desc *)tx_desc;
-            tx->vlan = le32_to_cpu(tx_ctx_desc->vlan_macip_lens) >> 16;
-            tx->mss = le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 16;
+            idx = (le32_to_cpu(tx_ctx_desc->mss_l4len_idx) >> 4) & 1;
+            tx->ctx[idx].vlan_macip_lens = le32_to_cpu(tx_ctx_desc->vlan_macip_lens);
+            tx->ctx[idx].seqnum_seed = le32_to_cpu(tx_ctx_desc->seqnum_seed);
+            tx->ctx[idx].type_tucmd_mlhl = le32_to_cpu(tx_ctx_desc->type_tucmd_mlhl);
+            tx->ctx[idx].mss_l4len_idx = le32_to_cpu(tx_ctx_desc->mss_l4len_idx);
             return;
         } else {
             /* unknown descriptor type */
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
     if (cmd_type_len & E1000_TXD_CMD_EOP) {
         if (!tx->skip_cp && net_tx_pkt_parse(tx->tx_pkt)) {
             if (cmd_type_len & E1000_TXD_CMD_VLE) {
-                net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, tx->vlan,
-                    core->mac[VET] & 0xffff);
+                idx = (tx->first_olinfo_status >> 4) & 1;
+                uint16_t vlan = tx->ctx[idx].vlan_macip_lens >> 16;
+                uint16_t vet = core->mac[VET] & 0xffff;
+                net_tx_pkt_setup_vlan_header_ex(tx->tx_pkt, vlan, vet);
             }
             if (igb_tx_pkt_send(core, tx, queue_index)) {
                 igb_on_tx_done_update_stats(core, tx->tx_pkt);
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
     for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
         tx = &core->tx[i];
         net_tx_pkt_reset(tx->tx_pkt);
-        tx->vlan = 0;
-        tx->mss = 0;
-        tx->tse = false;
-        tx->ixsm = false;
-        tx->txsm = false;
+        memset(tx->ctx, 0, sizeof(tx->ctx));
         tx->first = true;
         tx->skip_cp = false;
     }
diff --git a/hw/net/igb_core.h b/hw/net/igb_core.h
index XXXXXXX..XXXXXXX 100644
--- a/hw/net/igb_core.h
+++ b/hw/net/igb_core.h
@@ -XXX,XX +XXX,XX @@ struct IGBCore {
     QEMUTimer *autoneg_timer;
 
     struct igb_tx {
-        uint16_t vlan;  /* VLAN Tag */
-        uint16_t mss;   /* Maximum Segment Size */
-        bool tse;       /* TCP/UDP Segmentation Enable */
-        bool ixsm;      /* Insert IP Checksum */
-        bool txsm;      /* Insert TCP/UDP Checksum */
+        struct e1000_adv_tx_context_desc ctx[2];
+        uint32_t first_cmd_type_len;
+        uint32_t first_olinfo_status;
 
         bool first;
         bool skip_cp;
-- 
2.7.4

From: Akihiko Odaki <akihiko.odaki@daynix.com>

igb used to specify the PF as DMA requester when reading Tx packets.
This made Tx requests from VFs to be performed on the address space of
the PF, defeating the purpose of SR-IOV. Add some logic to change the
requester depending on the queue, which can be assigned to a VF.

Fixes: 3a977deebe ("Intrdocue igb device emulation")
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/e1000e_core.c |  6 +++---
 hw/net/igb_core.c    | 13 ++++++++-----
 hw/net/net_tx_pkt.c  |  3 ++-
 hw/net/net_tx_pkt.h  |  3 ++-
 hw/net/vmxnet3.c     |  4 ++--
 5 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/net/e1000e_core.c
+++ b/hw/net/e1000e_core.c
@@ -XXX,XX +XXX,XX @@ e1000e_process_tx_desc(E1000ECore *core,
         }
 
         tx->skip_cp = false;
-        net_tx_pkt_reset(tx->tx_pkt);
+        net_tx_pkt_reset(tx->tx_pkt, core->owner);
 
         tx->sum_needed = 0;
         tx->cptse = 0;
@@ -XXX,XX +XXX,XX @@ e1000e_core_pci_uninit(E1000ECore *core)
     qemu_del_vm_change_state_handler(core->vmstate);
 
     for (i = 0; i < E1000E_NUM_QUEUES; i++) {
-        net_tx_pkt_reset(core->tx[i].tx_pkt);
+        net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
         net_tx_pkt_uninit(core->tx[i].tx_pkt);
     }
 
@@ -XXX,XX +XXX,XX @@ static void e1000e_reset(E1000ECore *core, bool sw)
     e1000x_reset_mac_addr(core->owner_nic, core->mac, core->permanent_mac);
 
     for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
-        net_tx_pkt_reset(core->tx[i].tx_pkt);
+        net_tx_pkt_reset(core->tx[i].tx_pkt, core->owner);
         memset(&core->tx[i].props, 0, sizeof(core->tx[i].props));
         core->tx[i].skip_cp = false;
     }
diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/net/igb_core.c
+++ b/hw/net/igb_core.c
@@ -XXX,XX +XXX,XX @@ igb_on_tx_done_update_stats(IGBCore *core, struct NetTxPkt *tx_pkt)
 
 static void
 igb_process_tx_desc(IGBCore *core,
+                    PCIDevice *dev,
                     struct igb_tx *tx,
                     union e1000_adv_tx_desc *tx_desc,
                     int queue_index)
@@ -XXX,XX +XXX,XX @@ igb_process_tx_desc(IGBCore *core,
 
         tx->first = true;
         tx->skip_cp = false;
-        net_tx_pkt_reset(tx->tx_pkt);
+        net_tx_pkt_reset(tx->tx_pkt, dev);
     }
 }
 
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
         d = core->owner;
     }
 
+    net_tx_pkt_reset(txr->tx->tx_pkt, d);
+
     while (!igb_ring_empty(core, txi)) {
         base = igb_ring_head_descr(core, txi);
 
@@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr)
         trace_e1000e_tx_descr((void *)(intptr_t)desc.read.buffer_addr,
                               desc.read.cmd_type_len, desc.wb.status);
 
-        igb_process_tx_desc(core, txr->tx, &desc, txi->idx);
+        igb_process_tx_desc(core, d, txr->tx, &desc, txi->idx);
         igb_ring_advance(core, txi, 1);
         eic |= igb_txdesc_writeback(core, base, &desc, txi);
     }
@@ -XXX,XX +XXX,XX @@ igb_core_pci_realize(IGBCore        *core,
     core->vmstate = qemu_add_vm_change_state_handler(igb_vm_state_change, core);
 
     for (i = 0; i < IGB_NUM_QUEUES; i++) {
-        net_tx_pkt_init(&core->tx[i].tx_pkt, core->owner, E1000E_MAX_TX_FRAGS);
+        net_tx_pkt_init(&core->tx[i].tx_pkt, NULL, E1000E_MAX_TX_FRAGS);
     }
 
     net_rx_pkt_init(&core->rx_pkt);
@@ -XXX,XX +XXX,XX @@ igb_core_pci_uninit(IGBCore *core)
     qemu_del_vm_change_state_handler(core->vmstate);
 
     for (i = 0; i < IGB_NUM_QUEUES; i++) {
-        net_tx_pkt_reset(core->tx[i].tx_pkt);
+        net_tx_pkt_reset(core->tx[i].tx_pkt, NULL);
         net_tx_pkt_uninit(core->tx[i].tx_pkt);
     }
 
@@ -XXX,XX +XXX,XX @@ static void igb_reset(IGBCore *core, bool sw)
 
     for (i = 0; i < ARRAY_SIZE(core->tx); i++) {
         tx = &core->tx[i];
-        net_tx_pkt_reset(tx->tx_pkt);
+        net_tx_pkt_reset(tx->tx_pkt, NULL);
         memset(tx->ctx, 0, sizeof(tx->ctx));
         tx->first = true;
         tx->skip_cp = false;
diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt)
 #endif
 }
 
-void net_tx_pkt_reset(struct NetTxPkt *pkt)
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *pci_dev)
 {
     int i;
 
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_reset(struct NetTxPkt *pkt)
                           pkt->raw[i].iov_len, DMA_DIRECTION_TO_DEVICE, 0);
         }
     }
+    pkt->pci_dev = pci_dev;
     pkt->raw_frags = 0;
 
     pkt->hdr_len = 0;
diff --git a/hw/net/net_tx_pkt.h b/hw/net/net_tx_pkt.h
index XXXXXXX..XXXXXXX 100644
--- a/hw/net/net_tx_pkt.h
+++ b/hw/net/net_tx_pkt.h
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_dump(struct NetTxPkt *pkt);
  * reset tx packet private context (needed to be called between packets)
  *
  * @pkt:            packet
+ * @dev:            PCI device processing the next packet
  *
  */
-void net_tx_pkt_reset(struct NetTxPkt *pkt);
+void net_tx_pkt_reset(struct NetTxPkt *pkt, PCIDevice *dev);
 
 /**
  * Send packet to qemu. handles sw offloads if vhdr is not supported.
diff --git a/hw/net/vmxnet3.c b/hw/net/vmxnet3.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/net/vmxnet3.c
+++ b/hw/net/vmxnet3.c
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_process_tx_queue(VMXNET3State *s, int qidx)
             vmxnet3_complete_packet(s, qidx, txd_idx);
             s->tx_sop = true;
             s->skip_current_tx_pkt = false;
-            net_tx_pkt_reset(s->tx_pkt);
+            net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
         }
     }
 }
@@ -XXX,XX +XXX,XX @@ static void vmxnet3_deactivate_device(VMXNET3State *s)
 {
     if (s->device_active) {
         VMW_CBPRN("Deactivating vmxnet3...");
-        net_tx_pkt_reset(s->tx_pkt);
+        net_tx_pkt_reset(s->tx_pkt, PCI_DEVICE(s));
         net_tx_pkt_uninit(s->tx_pkt);
         net_rx_pkt_uninit(s->rx_pkt);
         s->device_active = false;
-- 
2.7.4

From: Akihiko Odaki <akihiko.odaki@daynix.com>

No segmentation should be performed if gso type is
VIRTIO_NET_HDR_GSO_NONE even if ECN bit is set.

Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1544
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/net_tx_pkt.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
 {
     assert(pkt);
 
+    uint8_t gso_type = pkt->virt_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN;
+
     /*
      * Since underlying infrastructure does not support IP datagrams longer
      * than 64K we should drop such packets and don't even try to send
      */
-    if (VIRTIO_NET_HDR_GSO_NONE != pkt->virt_hdr.gso_type) {
+    if (VIRTIO_NET_HDR_GSO_NONE != gso_type) {
         if (pkt->payload_len >
             ETH_MAX_IP_DGRAM_LEN -
             pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len) {
@@ -XXX,XX +XXX,XX @@ bool net_tx_pkt_send_custom(struct NetTxPkt *pkt, bool offload,
         }
     }
 
-    if (offload || pkt->virt_hdr.gso_type == VIRTIO_NET_HDR_GSO_NONE) {
+    if (offload || gso_type == VIRTIO_NET_HDR_GSO_NONE) {
         if (!offload && pkt->virt_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
             net_tx_pkt_do_sw_csum(pkt, &pkt->vec[NET_TX_PKT_L2HDR_FRAG],
                                   pkt->payload_frags + NET_TX_PKT_PL_START_FRAG - 1,
-- 
2.7.4

From: Akihiko Odaki <akihiko.odaki@daynix.com>

Align the l3_hdr member of NetTxPkt by defining it as a union of
ip_header, ip6_header, and an array of octets.

Fixes: e263cd49c7 ("Packet abstraction for VMWARE network devices")
Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1544
Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
Signed-off-by: Jason Wang <jasowang@redhat.com>
---
 hw/net/net_tx_pkt.c | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/hw/net/net_tx_pkt.c b/hw/net/net_tx_pkt.c
index XXXXXXX..XXXXXXX 100644
--- a/hw/net/net_tx_pkt.c
+++ b/hw/net/net_tx_pkt.c
@@ -XXX,XX +XXX,XX @@ struct NetTxPkt {
     struct iovec *vec;
 
     uint8_t l2_hdr[ETH_MAX_L2_HDR_LEN];
-    uint8_t l3_hdr[ETH_MAX_IP_DGRAM_LEN];
+    union {
+        struct ip_header ip;
+        struct ip6_header ip6;
+        uint8_t octets[ETH_MAX_IP_DGRAM_LEN];
+    } l3_hdr;
 
     uint32_t payload_len;
 
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_update_ip_hdr_checksum(struct NetTxPkt *pkt)
 {
     uint16_t csum;
     assert(pkt);
-    struct ip_header *ip_hdr;
-    ip_hdr = pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_base;
 
-    ip_hdr->ip_len = cpu_to_be16(pkt->payload_len +
+    pkt->l3_hdr.ip.ip_len = cpu_to_be16(pkt->payload_len +
         pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
 
-    ip_hdr->ip_sum = 0;
-    csum = net_raw_checksum((uint8_t *)ip_hdr,
+    pkt->l3_hdr.ip.ip_sum = 0;
+    csum = net_raw_checksum(pkt->l3_hdr.octets,
         pkt->vec[NET_TX_PKT_L3HDR_FRAG].iov_len);
-    ip_hdr->ip_sum = cpu_to_be16(csum);
+    pkt->l3_hdr.ip.ip_sum = cpu_to_be16(csum);
 }
 
 void net_tx_pkt_update_ip_checksums(struct NetTxPkt *pkt)
@@ -XXX,XX +XXX,XX @@ void net_tx_pkt_fix_ip6_payload_len(struct NetTxPkt *pkt)
 {
     struct iovec *l2 = &pkt->vec[NET_TX_PKT_L2HDR_FRAG];
     if (eth_get_l3_proto(l2, 1, l2->iov_len) == ETH_P_IPV6) {
-        struct ip6_header *ip6 = (struct ip6_header *) pkt->l3_hdr;
         /*
          * TODO: if qemu would support >64K packets - add jumbo option check
          * something like that:
          * 'if (ip6->ip6_plen == 0 && !has_jumbo_option(ip6)) {'
          */
-        if (ip6->ip6_plen == 0) {
+        if (pkt->l3_hdr.ip6.ip6_plen == 0) {
             if (pkt->payload_len <= ETH_MAX_IP_DGRAM_LEN) {
-                ip6->ip6_plen = htons(pkt->payload_len);
+                pkt->l3_hdr.ip6.ip6_plen = htons(pkt->payload_len);
             }
             /*
              * TODO: if qemu would support >64K packets
-- 
2.7.4