:p
atchew
Login
Packet-split descriptors are used by Linux VF driver for MTU values from 2048 upwards. Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> --- hw/net/e1000x_regs.h | 1 + hw/net/igb_core.c | 701 ++++++++++++++++++++++++++++++----------- hw/net/igb_regs.h | 18 ++ hw/net/trace-events | 4 +- tests/qtest/igb-test.c | 4 + 5 files changed, 542 insertions(+), 186 deletions(-) diff --git a/hw/net/e1000x_regs.h b/hw/net/e1000x_regs.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/e1000x_regs.h +++ b/hw/net/e1000x_regs.h @@ -XXX,XX +XXX,XX @@ union e1000_rx_desc_packet_split { /* Receive Checksum Control bits */ #define E1000_RXCSUM_IPOFLD 0x100 /* IP Checksum Offload Enable */ #define E1000_RXCSUM_TUOFLD 0x200 /* TCP/UDP Checksum Offload Enable */ +#define E1000_RXCSUM_IPPCSE 0x1000 /* IP Payload Checksum enable */ #define E1000_RXCSUM_PCSD 0x2000 /* Packet Checksum Disable */ #define E1000_RING_DESC_LEN (16) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_rx_use_legacy_descriptor(IGBCore *core) return false; } +static uint32_t +igb_rx_queue_desctyp_get(IGBCore *core, int qidx) +{ + return (core->mac[E1000_SRRCTL(qidx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK) >> + E1000_SRRCTL_DESCTYPE_OFFSET; +} + +static inline bool +igb_rx_use_ps_descriptor(IGBCore *core, int qidx) +{ + uint32_t desctyp = igb_rx_queue_desctyp_get(core, qidx); + return desctyp == 2 || desctyp == 5; +} + static inline bool igb_rss_enabled(IGBCore *core) { @@ -XXX,XX +XXX,XX @@ igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r) return e1000x_rxbufsize(core->mac[RCTL]); } +static size_t +igb_calc_rxdesclen(IGBCore *core) +{ + size_t rxdesc_size; + + if (igb_rx_use_legacy_descriptor(core)) { + rxdesc_size = sizeof(struct e1000_rx_desc); + } else { + rxdesc_size = sizeof(union e1000_adv_rx_desc); + } + + trace_e1000e_rx_desc_len(rxdesc_size); + return rxdesc_size; +} + static bool igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size) { @@ -XXX,XX +XXX,XX @@ igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size) uint32_t bufsize = igb_rxbufsize(core, r); trace_e1000e_rx_has_buffers(r->idx, bufs, total_size, bufsize); - - return total_size <= bufs / (core->rx_desc_len / E1000_MIN_RX_DESC_LEN) * - bufsize; + return total_size <= bufs / (igb_calc_rxdesclen(core) / + E1000_MIN_RX_DESC_LEN) * bufsize; } void @@ -XXX,XX +XXX,XX @@ igb_read_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, } static inline void -igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, - hwaddr *buff_addr) +igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc, + hwaddr *buff_addr) { *buff_addr = le64_to_cpu(desc->read.pkt_addr); } +static inline void +igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc, + hwaddr *buff_addr) +{ + buff_addr[0] = le64_to_cpu(desc->read.hdr_addr); + buff_addr[1] = le64_to_cpu(desc->read.pkt_addr); +} + static inline void igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, - hwaddr *buff_addr) + hwaddr (*buff_addr)[MAX_PS_BUFFERS], + int qidx) { if (igb_rx_use_legacy_descriptor(core)) { - igb_read_lgcy_rx_descr(core, &desc->legacy, buff_addr); - } else { - igb_read_adv_rx_descr(core, &desc->adv, buff_addr); + igb_read_lgcy_rx_descr(core, &desc->legacy, &(*buff_addr)[1]); + (*buff_addr)[0] = 0; + return; } + + /* advanced header split descriptor */ + if (igb_rx_use_ps_descriptor(core, qidx)) { + igb_read_adv_rx_split_buf_descr(core, &desc->adv, &(*buff_addr)[0]); + return; + } + + /* descriptor replication modes not supported */ + assert(igb_rx_queue_desctyp_get(core, qidx) == 1); + + /* advanced single buffer descriptor */ + igb_read_adv_rx_single_buf_descr(core, &desc->adv, &(*buff_addr)[1]); + (*buff_addr)[0] = 0; } static void @@ -XXX,XX +XXX,XX @@ igb_verify_csum_in_sw(IGBCore *core, } static void -igb_build_rx_metadata(IGBCore *core, - struct NetRxPkt *pkt, - bool is_eop, - const E1000E_RSSInfo *rss_info, - uint16_t *pkt_info, uint16_t *hdr_info, - uint32_t *rss, - uint32_t *status_flags, - uint16_t *ip_id, - uint16_t *vlan_tag) +igb_build_rx_metadata_common(IGBCore *core, + struct NetRxPkt *pkt, + bool is_eop, + uint32_t *status_flags, + uint16_t *vlan_tag) { struct virtio_net_hdr *vhdr; bool hasip4, hasip6; EthL4HdrProto l4hdr_proto; - uint32_t pkt_type; *status_flags = E1000_RXD_STAT_DD; /* No additional metadata needed for non-EOP descriptors */ - /* TODO: EOP apply only to status so don't skip whole function. */ if (!is_eop) { goto func_exit; } @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, trace_e1000e_rx_metadata_vlan(*vlan_tag); } - /* Packet parsing results */ - if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { - if (rss_info->enabled) { - *rss = cpu_to_le32(rss_info->hash); - trace_igb_rx_metadata_rss(*rss); - } - } else if (hasip4) { - *status_flags |= E1000_RXD_STAT_IPIDV; - *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); - trace_e1000e_rx_metadata_ip_id(*ip_id); - } - - if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) { - *status_flags |= E1000_RXD_STAT_ACK; - trace_e1000e_rx_metadata_ack(); - } - - if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { - trace_e1000e_rx_metadata_ipv6_filtering_disabled(); - pkt_type = E1000_RXD_PKT_MAC; - } else if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP || - l4hdr_proto == ETH_L4_HDR_PROTO_UDP) { - pkt_type = hasip4 ? E1000_RXD_PKT_IP4_XDP : E1000_RXD_PKT_IP6_XDP; - } else if (hasip4 || hasip6) { - pkt_type = hasip4 ? E1000_RXD_PKT_IP4 : E1000_RXD_PKT_IP6; - } else { - pkt_type = E1000_RXD_PKT_MAC; - } - - trace_e1000e_rx_metadata_pkt_type(pkt_type); - - if (pkt_info) { - if (rss_info->enabled) { - *pkt_info = rss_info->type; - } - - *pkt_info |= (pkt_type << 4); - } else { - *status_flags |= E1000_RXD_PKT_TYPE(pkt_type); - } - - if (hdr_info) { - *hdr_info = 0; - } - /* RX CSO information */ if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { trace_e1000e_rx_metadata_ipv6_sum_disabled(); @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, trace_e1000e_rx_metadata_l4_cso_disabled(); } - trace_e1000e_rx_metadata_status_flags(*status_flags); - func_exit: + trace_e1000e_rx_metadata_status_flags(*status_flags); *status_flags = cpu_to_le32(*status_flags); } static inline void -igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, +igb_write_lgcy_rx_descr(IGBCore *core, + union e1000_rx_desc_union *desc, struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, uint16_t length) { - uint32_t status_flags, rss; - uint16_t ip_id; + uint32_t status_flags; + struct e1000_rx_desc *d = &desc->legacy; assert(!rss_info->enabled); - desc->length = cpu_to_le16(length); - desc->csum = 0; - igb_build_rx_metadata(core, pkt, pkt != NULL, - rss_info, - NULL, NULL, &rss, - &status_flags, &ip_id, - &desc->special); - desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); - desc->status = (uint8_t) le32_to_cpu(status_flags); + memset(d, 0, sizeof(*d)); + d->length = cpu_to_le16(length); + igb_build_rx_metadata_common(core, pkt, pkt != NULL, + &status_flags, + &d->special); + + d->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); + d->status = (uint8_t) le32_to_cpu(status_flags); +} + +typedef struct { + bool sph; + bool hbo; + size_t descriptor_hdr_len; +} igb_split_descriptor_data; + +static inline bool +igb_rx_ps_descriptor_split_always(IGBCore *core, int qidx) +{ + uint32_t desctyp = igb_rx_queue_desctyp_get(core, qidx); + return desctyp == 5; +} + +static uint16_t +igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt) +{ + uint16_t pkt_type = 0; + bool hasip4, hasip6, issctp = false; + EthL4HdrProto l4hdr_proto; + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { + eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt); + pkt_type |= ip6hdr_info->has_ext_hdrs ? IGB_RX_DESC_PKT_TYPE_IPV6E : + IGB_RX_DESC_PKT_TYPE_IPV6; + issctp = ip6hdr_info->l4proto == IPPROTO_SCTP; + } else if (hasip4) { + pkt_type = IGB_RX_DESC_PKT_TYPE_IPV4; + eth_ip4_hdr_info *ip4hdr_info = net_rx_pkt_get_ip4_info(pkt); + issctp = ip4hdr_info->ip4_hdr.ip_p == IPPROTO_SCTP; + } + + if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP) { + pkt_type |= IGB_RX_DESC_PKT_TYPE_TCP; + } else if (l4hdr_proto == ETH_L4_HDR_PROTO_UDP) { + pkt_type |= IGB_RX_DESC_PKT_TYPE_UDP; + } else if (issctp) { + pkt_type |= IGB_RX_DESC_PKT_TYPE_SCTP; + } + + return pkt_type; } static inline void -igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, +igb_write_adv_rx_descr(IGBCore *core, + union e1000_rx_desc_union *desc, struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, uint16_t length) { - memset(&desc->wb, 0, sizeof(desc->wb)); + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + uint16_t rss_type = 0, pkt_type; + bool eop = (pkt != NULL); + union e1000_adv_rx_desc *d = &desc->adv; + memset(&d->wb, 0, sizeof(d->wb)); - desc->wb.upper.length = cpu_to_le16(length); + d->wb.upper.length = cpu_to_le16(length); + igb_build_rx_metadata_common(core, pkt, eop, + &d->wb.upper.status_error, + &d->wb.upper.vlan); - igb_build_rx_metadata(core, pkt, pkt != NULL, - rss_info, - &desc->wb.lower.lo_dword.pkt_info, - &desc->wb.lower.lo_dword.hdr_info, - &desc->wb.lower.hi_dword.rss, - &desc->wb.upper.status_error, - &desc->wb.lower.hi_dword.csum_ip.ip_id, - &desc->wb.upper.vlan); + if (!eop) { + return; + } + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) && rss_info->enabled) { + d->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash); + rss_type = rss_info->type; + trace_igb_rx_metadata_rss(d->wb.lower.hi_dword.rss, rss_type); + } else if (!(core->mac[RXCSUM] & E1000_RXCSUM_PCSD) && + core->mac[RXCSUM] & E1000_RXCSUM_IPPCSE && + hasip4) { + d->wb.lower.hi_dword.csum_ip.ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); + trace_e1000e_rx_metadata_ip_id(d->wb.lower.hi_dword.csum_ip.ip_id); + } + + pkt_type = igb_rx_desc_get_packet_type(core, pkt); + trace_e1000e_rx_metadata_pkt_type(pkt_type); + d->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4)); } static inline void -igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, -struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, uint16_t length) +igb_write_adv_ps_split_rx_descr(IGBCore *core, + union e1000_rx_desc_union *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + igb_split_descriptor_data *ps_desc_data, + uint16_t(*written)[MAX_PS_BUFFERS]) +{ + size_t pkt_len; + bool split_always; + + size_t hdr_len = ps_desc_data->descriptor_hdr_len; + union e1000_adv_rx_desc *d = &desc->adv; + + split_always = igb_rx_ps_descriptor_split_always(core, rss_info->queue); + if (!split_always) { + if ((!ps_desc_data->sph && !ps_desc_data->hbo) || + ( ps_desc_data->sph && ps_desc_data->hbo)) { + pkt_len = (*written)[0] + (*written)[1]; + } else { + assert(!ps_desc_data->hbo); + pkt_len = (*written)[1]; + } + } else { + pkt_len = (*written)[1]; + } + + igb_write_adv_rx_descr(core, desc, pkt, rss_info, pkt_len); + + d->wb.lower.lo_dword.hdr_info = (hdr_len << RX_DESC_ADV_HDR_LEN_OFFSET) & + RX_DESC_ADV_HDR_LEN_MASK; + d->wb.lower.lo_dword.hdr_info |= ps_desc_data->sph ? RX_DESC_ADV_HDR_SPH + : 0; + d->wb.upper.status_error |= ps_desc_data->hbo ? + RX_DESC_ADV_ST_ERR_HBO_OFFSET : 0; +} + +static inline void +igb_write_rx_descr(IGBCore *core, + union e1000_rx_desc_union *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + igb_split_descriptor_data *ps_desc_data, + uint16_t(*written)[MAX_PS_BUFFERS], + int qidx) { if (igb_rx_use_legacy_descriptor(core)) { - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, length); + igb_write_lgcy_rx_descr(core, desc, pkt, rss_info, (*written)[1]); + } else if (igb_rx_use_ps_descriptor(core, qidx)) { + igb_write_adv_ps_split_rx_descr(core, desc, pkt, rss_info, + ps_desc_data, written); } else { - igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, length); + igb_write_adv_rx_descr(core, desc, pkt, rss_info, (*written)[1]); } } @@ -XXX,XX +XXX,XX @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr, } } -static void -igb_write_to_rx_buffers(IGBCore *core, - PCIDevice *d, - hwaddr ba, - uint16_t *written, - const char *data, - dma_addr_t data_len) -{ - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); - pci_dma_write(d, ba + *written, data, data_len); - *written += data_len; -} - static void igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, size_t data_size, size_t data_fcs_size) @@ -XXX,XX +XXX,XX @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; } +static uint32_t +igb_get_queue_rx_desc_header_buf_size(IGBCore *core, int qidx) +{ + uint32_t srr_size = (core->mac[E1000_SRRCTL(qidx) >> 2] & + E1000_SRRCTL_BSIZEHDR_MASK) >> 8; + return srr_size * 64; +} + +static bool +igb_do_ps(IGBCore *core, + int qidx, + struct NetRxPkt *pkt, + size_t *hdr_len, + igb_split_descriptor_data *ps_desc_data) +{ + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + bool fragment; + bool split_always; + size_t bheader_size; + size_t total_pkt_len; + + if (!igb_rx_use_ps_descriptor(core, qidx)) { + return false; + } + + memset(ps_desc_data, 0, sizeof(igb_split_descriptor_data)); + + total_pkt_len = net_rx_pkt_get_total_len(pkt); + bheader_size = igb_get_queue_rx_desc_header_buf_size(core, qidx); + split_always = igb_rx_ps_descriptor_split_always(core, qidx); + if (split_always && total_pkt_len <= bheader_size) { + *hdr_len = total_pkt_len; + ps_desc_data->descriptor_hdr_len = total_pkt_len; + return true; + } + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if (hasip4) { + fragment = net_rx_pkt_get_ip4_info(pkt)->fragment; + } else if (hasip6) { + fragment = net_rx_pkt_get_ip6_info(pkt)->fragment; + } else { + ps_desc_data->descriptor_hdr_len = bheader_size; + goto header_cant_be_decoded_or_bigger_than_header_buffer; + } + + if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) { + ps_desc_data->descriptor_hdr_len = bheader_size; + goto header_cant_be_decoded_or_bigger_than_header_buffer; + } + + /* no header splitting for SCTP */ + if (!fragment && (l4hdr_proto == ETH_L4_HDR_PROTO_UDP || + l4hdr_proto == ETH_L4_HDR_PROTO_TCP)) { + *hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt); + } else { + *hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt); + } + + ps_desc_data->sph = true; + ps_desc_data->descriptor_hdr_len = *hdr_len; + + if (*hdr_len > bheader_size) { + ps_desc_data->hbo = true; + goto header_cant_be_decoded_or_bigger_than_header_buffer; + } + + return true; + +header_cant_be_decoded_or_bigger_than_header_buffer: + if (split_always) { + *hdr_len = bheader_size; + return true; + } + + return false; +} + +typedef struct igb_ba_state_st { + uint16_t written[MAX_PS_BUFFERS]; + uint8_t cur_idx; +} igb_ba_state; + +typedef struct { + size_t size; + size_t total_size; + size_t ps_hdr_len; + size_t desc_size; + size_t desc_offset; + uint32_t rx_desc_packet_buf_size; + uint32_t rx_desc_header_buf_size; + size_t iov_ofs; + bool do_ps; + bool is_first; + struct iovec *iov; + igb_ba_state bastate; + hwaddr ba[MAX_PS_BUFFERS]; +} igb_packet_dma_state; + +static inline void +igb_truncate_to_descriptor_size(igb_packet_dma_state *pdma_st, size_t *size) +{ + if (pdma_st->do_ps && pdma_st->is_first) { + if (*size > pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len) { + *size = pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len; + } + } else { + if (*size > pdma_st->rx_desc_packet_buf_size) { + *size = pdma_st->rx_desc_packet_buf_size; + } + } +} + +static inline void +igb_write_hdr_to_rx_buffers(IGBCore *core, + PCIDevice *d, + hwaddr (*ba)[MAX_PS_BUFFERS], + igb_ba_state *bastate, + uint32_t rx_desc_header_buf_size, + const char *data, + dma_addr_t data_len) +{ + assert(data_len <= rx_desc_header_buf_size - bastate->written[0]); + pci_dma_write(d, (*ba)[0] + bastate->written[0], data, data_len); + bastate->written[0] += data_len; + bastate->cur_idx = 1; +} + +static void +igb_write_packet_hdr_to_descr_addr(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + igb_packet_dma_state *pdma_st, + size_t *copy_size) +{ + size_t iov_copy; + size_t ps_hdr_copied = 0; + + if (!pdma_st->is_first) { + /* Leave buffer 0 of each descriptor except first */ + /* empty */ + igb_write_hdr_to_rx_buffers(core, d, &pdma_st->ba, &pdma_st->bastate, + pdma_st->rx_desc_header_buf_size, + NULL, 0); + return; + } + + do { + iov_copy = MIN(pdma_st->ps_hdr_len - ps_hdr_copied, + pdma_st->iov->iov_len - pdma_st->iov_ofs); + + igb_write_hdr_to_rx_buffers(core, d, &pdma_st->ba, + &pdma_st->bastate, + pdma_st->rx_desc_header_buf_size, + pdma_st->iov->iov_base, + iov_copy); + + *copy_size -= iov_copy; + ps_hdr_copied += iov_copy; + + pdma_st->iov_ofs += iov_copy; + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { + pdma_st->iov++; + pdma_st->iov_ofs = 0; + } + } while (ps_hdr_copied < pdma_st->ps_hdr_len); + + pdma_st->is_first = false; +} + +static void +igb_write_to_rx_buffers(IGBCore *core, + PCIDevice *d, + hwaddr (*ba)[MAX_PS_BUFFERS], + igb_ba_state *bastate, + uint32_t cur_buf_len, + const char *data, + dma_addr_t data_len) +{ + while (data_len > 0) { + assert(bastate->cur_idx < MAX_PS_BUFFERS); + + uint32_t cur_buf_bytes_left = cur_buf_len - + bastate->written[bastate->cur_idx]; + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); + + trace_igb_rx_desc_buff_write(bastate->cur_idx, + (*ba)[bastate->cur_idx], + bastate->written[bastate->cur_idx], + data, + bytes_to_write); + + pci_dma_write(d, + (*ba)[bastate->cur_idx] + + bastate->written[bastate->cur_idx], + data, bytes_to_write); + + bastate->written[bastate->cur_idx] += bytes_to_write; + data += bytes_to_write; + data_len -= bytes_to_write; + + if (bastate->written[bastate->cur_idx] == cur_buf_len) { + bastate->cur_idx++; + } + } +} + +static void +igb_write_packet_payload_to_descr_addr(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + igb_packet_dma_state *pdma_st, + size_t *copy_size) +{ + static const uint32_t fcs_pad; + size_t iov_copy; + + /* Copy packet payload */ + while (*copy_size) { + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); + igb_write_to_rx_buffers(core, d, &pdma_st->ba, &pdma_st->bastate, + pdma_st->rx_desc_packet_buf_size, + pdma_st->iov->iov_base + pdma_st->iov_ofs, + iov_copy); + + *copy_size -= iov_copy; + pdma_st->iov_ofs += iov_copy; + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { + pdma_st->iov++; + pdma_st->iov_ofs = 0; + } + } + + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { + /* Simulate FCS checksum presence in the last descriptor */ + igb_write_to_rx_buffers(core, d, &pdma_st->ba, &pdma_st->bastate, + pdma_st->rx_desc_packet_buf_size, + (const char *) &fcs_pad, + e1000x_fcs_len(core->mac)); + } +} + +static void +igb_write_packet_to_descr_addr(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + igb_packet_dma_state *pdma_st) +{ + size_t copy_size; + + if (!(pdma_st->ba)[1]) { + /* as per intel docs; skip descriptors with null buf addr */ + trace_e1000e_rx_null_descriptor(); + return; + } + + if (pdma_st->desc_offset >= pdma_st->size) { + return; + } + + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset; + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); + copy_size = pdma_st->size - pdma_st->desc_offset; + igb_truncate_to_descriptor_size(pdma_st, ©_size); + + /* For PS mode copy the packet header first */ + if (pdma_st->do_ps) { + igb_write_packet_hdr_to_descr_addr(core, pkt, d, pdma_st, ©_size); + } else { + pdma_st->bastate.cur_idx = 1; + } + + igb_write_packet_payload_to_descr_addr(core, pkt, d, pdma_st, ©_size); +} + static void igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, const E1000E_RxRing *rxr, @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, PCIDevice *d; dma_addr_t base; union e1000_rx_desc_union desc; - size_t desc_size; - size_t desc_offset = 0; - size_t iov_ofs = 0; - - struct iovec *iov = net_rx_pkt_get_iovec(pkt); - size_t size = net_rx_pkt_get_total_len(pkt); - size_t total_size = size + e1000x_fcs_len(core->mac); - const E1000E_RingInfo *rxi = rxr->i; - size_t bufsize = igb_rxbufsize(core, rxi); - + const E1000E_RingInfo *rxi; + size_t rx_desc_len; + igb_split_descriptor_data ps_desc_data; + + igb_packet_dma_state pdma_st = {0}; + pdma_st.is_first = true; + pdma_st.size = net_rx_pkt_get_total_len(pkt); + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac); + + rxi = rxr->i; + rx_desc_len = igb_calc_rxdesclen(core); + pdma_st.rx_desc_packet_buf_size = + igb_rxbufsize(core, rxi); + pdma_st.rx_desc_header_buf_size = + igb_get_queue_rx_desc_header_buf_size(core, rxi->idx); + pdma_st.iov = net_rx_pkt_get_iovec(pkt); d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); if (!d) { d = core->owner; } + pdma_st.do_ps = igb_do_ps(core, rxi->idx, pkt, + &pdma_st.ps_hdr_len, + &ps_desc_data); + do { - hwaddr ba; - uint16_t written = 0; + memset(&pdma_st.bastate, 0, sizeof(igb_ba_state)); bool is_last = false; - desc_size = total_size - desc_offset; - - if (desc_size > bufsize) { - desc_size = bufsize; - } - if (igb_ring_empty(core, rxi)) { return; } base = igb_ring_head_descr(core, rxi); + pci_dma_read(d, base, &desc, rx_desc_len); + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); - pci_dma_read(d, base, &desc, core->rx_desc_len); - - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); - - igb_read_rx_descr(core, &desc, &ba); - - if (ba) { - if (desc_offset < size) { - static const uint32_t fcs_pad; - size_t iov_copy; - size_t copy_size = size - desc_offset; - if (copy_size > bufsize) { - copy_size = bufsize; - } - - /* Copy packet payload */ - while (copy_size) { - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); - - igb_write_to_rx_buffers(core, d, ba, &written, - iov->iov_base + iov_ofs, iov_copy); - - copy_size -= iov_copy; - iov_ofs += iov_copy; - if (iov_ofs == iov->iov_len) { - iov++; - iov_ofs = 0; - } - } + igb_read_rx_descr(core, &desc, &pdma_st.ba, rxi->idx); - if (desc_offset + desc_size >= total_size) { - /* Simulate FCS checksum presence in the last descriptor */ - igb_write_to_rx_buffers(core, d, ba, &written, - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); - } - } - } else { /* as per intel docs; skip descriptors with null buf addr */ - trace_e1000e_rx_null_descriptor(); - } - desc_offset += desc_size; - if (desc_offset >= total_size) { + igb_write_packet_to_descr_addr(core, pkt, d, &pdma_st); + pdma_st.desc_offset += pdma_st.desc_size; + if (pdma_st.desc_offset >= pdma_st.total_size) { is_last = true; } - igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, - rss_info, written); - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); + igb_write_rx_descr(core, &desc, + is_last ? pkt : NULL, + rss_info, + &ps_desc_data, + &pdma_st.bastate.written, + rxi->idx); + pci_dma_write(d, base, &desc, rx_desc_len); + igb_ring_advance(core, rxi, + rx_desc_len / E1000_MIN_RX_DESC_LEN); + } while (pdma_st.desc_offset < pdma_st.total_size); - igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN); - - } while (desc_offset < total_size); - - igb_update_rx_stats(core, rxi, size, total_size); + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size); } static bool @@ -XXX,XX +XXX,XX @@ igb_set_rfctl(IGBCore *core, int index, uint32_t val) core->mac[RFCTL] = val; } -static void -igb_calc_rxdesclen(IGBCore *core) -{ - if (igb_rx_use_legacy_descriptor(core)) { - core->rx_desc_len = sizeof(struct e1000_rx_desc); - } else { - core->rx_desc_len = sizeof(union e1000_adv_rx_desc); - } - trace_e1000e_rx_desc_len(core->rx_desc_len); -} - static void igb_set_rx_control(IGBCore *core, int index, uint32_t val) { diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { } wb; /* writeback */ }; +#define IGB_MAX_PS_BUFFERS 2 +#define IGB_MAX_RX_DESC_LEN (sizeof(uint64_t) * MAX_PS_BUFFERS) + +#define IGB_RX_DESC_PKT_TYPE_IPV4 BIT(0) +#define IGB_RX_DESC_PKT_TYPE_IPV4E BIT(1) +#define IGB_RX_DESC_PKT_TYPE_IPV6 BIT(2) +#define IGB_RX_DESC_PKT_TYPE_IPV6E BIT(3) +#define IGB_RX_DESC_PKT_TYPE_TCP BIT(4) +#define IGB_RX_DESC_PKT_TYPE_UDP BIT(5) +#define IGB_RX_DESC_PKT_TYPE_SCTP BIT(6) + +#define RX_DESC_ADV_HDR_LEN_OFFSET (21 - 16) +#define RX_DESC_ADV_HDR_LEN_MASK ((BIT(10) - 1) << \ + RX_DESC_ADV_HDR_LEN_OFFSET) +#define RX_DESC_ADV_HDR_SPH BIT(15) +#define RX_DESC_ADV_ST_ERR_HBO_OFFSET BIT(3 + 20) + /* from igb/e1000_phy.h */ /* IGP01E1000 Specific Registers */ @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 #define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 +#define E1000_SRRCTL_DESCTYPE_OFFSET 25 #define E1000_SRRCTL_DROP_EN 0x80000000 #define E1000_SRRCTL_BSIZEPKT_MASK 0x0000007F diff --git a/hw/net/trace-events b/hw/net/trace-events index XXXXXXX..XXXXXXX 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -XXX,XX +XXX,XX @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED" igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d" igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" -igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" +igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer #%u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" -igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X" +igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X" igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled" igb_irq_icr_write(uint32_t bits, uint32_t old_icr, uint32_t new_icr) "Clearing ICR bits 0x%x: 0x%x --> 0x%x" diff --git a/tests/qtest/igb-test.c b/tests/qtest/igb-test.c index XXXXXXX..XXXXXXX 100644 --- a/tests/qtest/igb-test.c +++ b/tests/qtest/igb-test.c @@ -XXX,XX +XXX,XX @@ static void igb_receive_verify(QE1000E *d, int *test_sockets, QGuestAllocator *a char buffer[64]; int ret; + /* Set supported descriptor mode */ + uint32_t srrctl0 = e1000e_macreg_read(d, E1000_SRRCTL(0)); + e1000e_macreg_write(d, E1000_SRRCTL(0), srrctl0 | BIT(25)); + /* Send a dummy packet to device's socket*/ ret = iov_send(test_sockets[0], iov, 2, 0, sizeof(len) + sizeof(packet)); g_assert_cmpint(ret, == , sizeof(packet) + sizeof(len)); -- 2.25.1
Based-on: <20230523024339.50875-1-akihiko.odaki@daynix.com> ("[PATCH v5 00/48] igb: Fix for DPDK") Purposes of this series of patches: * introduce packet-split RX descriptors support. This feature is used by Linux VF driver for MTU values from 2048. * refactor RX descriptor handling for introduction of packet-split RX descriptors support * fix descriptors flags handling In addition to comments from previous review endianess issues in igb_write_adv_ps_rx_descr were fixed. Tomasz Dzieciol (7): igb: remove TCP ACK detection igb: rename E1000E_RingInfo_st igb: RX descriptors guest writting refactoring igb: RX payload guest writting refactoring igb: add IPv6 extended headers traffic detection igb: packet-split descriptors support e1000e: rename e1000e_ba_state and e1000e_write_hdr_to_rx_buffers hw/net/e1000e_core.c | 78 +++-- hw/net/igb_core.c | 740 ++++++++++++++++++++++++++++----------- hw/net/igb_regs.h | 20 +- hw/net/trace-events | 6 +- tests/qtest/libqos/igb.c | 5 + 5 files changed, 602 insertions(+), 247 deletions(-) -- 2.25.1
TCP ACK detection is no longer present in igb. Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> --- hw/net/igb_core.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, trace_e1000e_rx_metadata_ip_id(*ip_id); } - if (l4hdr_proto == ETH_L4_HDR_PROTO_TCP && net_rx_pkt_is_tcp_ack(pkt)) { - *status_flags |= E1000_RXD_STAT_ACK; - trace_e1000e_rx_metadata_ack(); - } - if (pkt_info) { *pkt_info = rss_info->enabled ? rss_info->type : 0; -- 2.25.1
Rename E1000E_RingInfo_st and E1000E_RingInfo according to qemu typdefs guide. Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> --- hw/net/e1000e_core.c | 34 +++++++++++++++++----------------- hw/net/igb_core.c | 42 +++++++++++++++++++++--------------------- 2 files changed, 38 insertions(+), 38 deletions(-) diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -XXX,XX +XXX,XX @@ e1000e_txdesc_writeback(E1000ECore *core, dma_addr_t base, return e1000e_tx_wb_interrupt_cause(core, queue_idx); } -typedef struct E1000E_RingInfo_st { +typedef struct E1000ERingInfo { int dbah; int dbal; int dlen; int dh; int dt; int idx; -} E1000E_RingInfo; +} E1000ERingInfo; static inline bool -e1000e_ring_empty(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_empty(E1000ECore *core, const E1000ERingInfo *r) { return core->mac[r->dh] == core->mac[r->dt] || core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN; } static inline uint64_t -e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_base(E1000ECore *core, const E1000ERingInfo *r) { uint64_t bah = core->mac[r->dbah]; uint64_t bal = core->mac[r->dbal]; @@ -XXX,XX +XXX,XX @@ e1000e_ring_base(E1000ECore *core, const E1000E_RingInfo *r) } static inline uint64_t -e1000e_ring_head_descr(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_head_descr(E1000ECore *core, const E1000ERingInfo *r) { return e1000e_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh]; } static inline void -e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count) +e1000e_ring_advance(E1000ECore *core, const E1000ERingInfo *r, uint32_t count) { core->mac[r->dh] += count; @@ -XXX,XX +XXX,XX @@ e1000e_ring_advance(E1000ECore *core, const E1000E_RingInfo *r, uint32_t count) } static inline uint32_t -e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_free_descr_num(E1000ECore *core, const E1000ERingInfo *r) { trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen], core->mac[r->dh], core->mac[r->dt]); @@ -XXX,XX +XXX,XX @@ e1000e_ring_free_descr_num(E1000ECore *core, const E1000E_RingInfo *r) } static inline bool -e1000e_ring_enabled(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_enabled(E1000ECore *core, const E1000ERingInfo *r) { return core->mac[r->dlen] > 0; } static inline uint32_t -e1000e_ring_len(E1000ECore *core, const E1000E_RingInfo *r) +e1000e_ring_len(E1000ECore *core, const E1000ERingInfo *r) { return core->mac[r->dlen]; } typedef struct E1000E_TxRing_st { - const E1000E_RingInfo *i; + const E1000ERingInfo *i; struct e1000e_tx *tx; } E1000E_TxRing; @@ -XXX,XX +XXX,XX @@ e1000e_mq_queue_idx(int base_reg_idx, int reg_idx) static inline void e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx) { - static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = { + static const E1000ERingInfo i[E1000E_NUM_QUEUES] = { { TDBAH, TDBAL, TDLEN, TDH, TDT, 0 }, { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 } }; @@ -XXX,XX +XXX,XX @@ e1000e_tx_ring_init(E1000ECore *core, E1000E_TxRing *txr, int idx) } typedef struct E1000E_RxRing_st { - const E1000E_RingInfo *i; + const E1000ERingInfo *i; } E1000E_RxRing; static inline void e1000e_rx_ring_init(E1000ECore *core, E1000E_RxRing *rxr, int idx) { - static const E1000E_RingInfo i[E1000E_NUM_QUEUES] = { + static const E1000ERingInfo i[E1000E_NUM_QUEUES] = { { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 }, { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 } }; @@ -XXX,XX +XXX,XX @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) dma_addr_t base; struct e1000_tx_desc desc; bool ide = false; - const E1000E_RingInfo *txi = txr->i; + const E1000ERingInfo *txi = txr->i; uint32_t cause = E1000_ICS_TXQE; if (!(core->mac[TCTL] & E1000_TCTL_EN)) { @@ -XXX,XX +XXX,XX @@ e1000e_start_xmit(E1000ECore *core, const E1000E_TxRing *txr) } static bool -e1000e_has_rxbufs(E1000ECore *core, const E1000E_RingInfo *r, +e1000e_has_rxbufs(E1000ECore *core, const E1000ERingInfo *r, size_t total_size) { uint32_t bufs = e1000e_ring_free_descr_num(core, r); @@ -XXX,XX +XXX,XX @@ e1000e_update_rx_stats(E1000ECore *core, size_t pkt_size, size_t pkt_fcs_size) } static inline bool -e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000E_RingInfo *rxi) +e1000e_rx_descr_threshold_hit(E1000ECore *core, const E1000ERingInfo *rxi) { return e1000e_ring_free_descr_num(core, rxi) == e1000e_ring_len(core, rxi) >> core->rxbuf_min_shift; @@ -XXX,XX +XXX,XX @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, struct iovec *iov = net_rx_pkt_get_iovec(pkt); size_t size = net_rx_pkt_get_total_len(pkt); size_t total_size = size + e1000x_fcs_len(core->mac); - const E1000E_RingInfo *rxi; + const E1000ERingInfo *rxi; size_t ps_hdr_len = 0; bool do_ps = e1000e_do_ps(core, pkt, &ps_hdr_len); bool is_first = true; diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx) return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0; } -typedef struct E1000E_RingInfo_st { +typedef struct E1000ERingInfo { int dbah; int dbal; int dlen; int dh; int dt; int idx; -} E1000E_RingInfo; +} E1000ERingInfo; static inline bool -igb_ring_empty(IGBCore *core, const E1000E_RingInfo *r) +igb_ring_empty(IGBCore *core, const E1000ERingInfo *r) { return core->mac[r->dh] == core->mac[r->dt] || core->mac[r->dt] >= core->mac[r->dlen] / E1000_RING_DESC_LEN; } static inline uint64_t -igb_ring_base(IGBCore *core, const E1000E_RingInfo *r) +igb_ring_base(IGBCore *core, const E1000ERingInfo *r) { uint64_t bah = core->mac[r->dbah]; uint64_t bal = core->mac[r->dbal]; @@ -XXX,XX +XXX,XX @@ igb_ring_base(IGBCore *core, const E1000E_RingInfo *r) } static inline uint64_t -igb_ring_head_descr(IGBCore *core, const E1000E_RingInfo *r) +igb_ring_head_descr(IGBCore *core, const E1000ERingInfo *r) { return igb_ring_base(core, r) + E1000_RING_DESC_LEN * core->mac[r->dh]; } static inline void -igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count) +igb_ring_advance(IGBCore *core, const E1000ERingInfo *r, uint32_t count) { core->mac[r->dh] += count; @@ -XXX,XX +XXX,XX @@ igb_ring_advance(IGBCore *core, const E1000E_RingInfo *r, uint32_t count) } static inline uint32_t -igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r) +igb_ring_free_descr_num(IGBCore *core, const E1000ERingInfo *r) { trace_e1000e_ring_free_space(r->idx, core->mac[r->dlen], core->mac[r->dh], core->mac[r->dt]); @@ -XXX,XX +XXX,XX @@ igb_ring_free_descr_num(IGBCore *core, const E1000E_RingInfo *r) } static inline bool -igb_ring_enabled(IGBCore *core, const E1000E_RingInfo *r) +igb_ring_enabled(IGBCore *core, const E1000ERingInfo *r) { return core->mac[r->dlen] > 0; } typedef struct IGB_TxRing_st { - const E1000E_RingInfo *i; + const E1000ERingInfo *i; struct igb_tx *tx; } IGB_TxRing; @@ -XXX,XX +XXX,XX @@ igb_mq_queue_idx(int base_reg_idx, int reg_idx) static inline void igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx) { - static const E1000E_RingInfo i[IGB_NUM_QUEUES] = { + static const E1000ERingInfo i[IGB_NUM_QUEUES] = { { TDBAH0, TDBAL0, TDLEN0, TDH0, TDT0, 0 }, { TDBAH1, TDBAL1, TDLEN1, TDH1, TDT1, 1 }, { TDBAH2, TDBAL2, TDLEN2, TDH2, TDT2, 2 }, @@ -XXX,XX +XXX,XX @@ igb_tx_ring_init(IGBCore *core, IGB_TxRing *txr, int idx) } typedef struct E1000E_RxRing_st { - const E1000E_RingInfo *i; + const E1000ERingInfo *i; } E1000E_RxRing; static inline void igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx) { - static const E1000E_RingInfo i[IGB_NUM_QUEUES] = { + static const E1000ERingInfo i[IGB_NUM_QUEUES] = { { RDBAH0, RDBAL0, RDLEN0, RDH0, RDT0, 0 }, { RDBAH1, RDBAL1, RDLEN1, RDH1, RDT1, 1 }, { RDBAH2, RDBAL2, RDLEN2, RDH2, RDT2, 2 }, @@ -XXX,XX +XXX,XX @@ igb_rx_ring_init(IGBCore *core, E1000E_RxRing *rxr, int idx) static uint32_t igb_txdesc_writeback(IGBCore *core, dma_addr_t base, union e1000_adv_tx_desc *tx_desc, - const E1000E_RingInfo *txi) + const E1000ERingInfo *txi) { PCIDevice *d; uint32_t cmd_type_len = le32_to_cpu(tx_desc->read.cmd_type_len); @@ -XXX,XX +XXX,XX @@ igb_txdesc_writeback(IGBCore *core, dma_addr_t base, } static inline bool -igb_tx_enabled(IGBCore *core, const E1000E_RingInfo *txi) +igb_tx_enabled(IGBCore *core, const E1000ERingInfo *txi) { bool vmdq = core->mac[MRQC] & 1; uint16_t qn = txi->idx; @@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) PCIDevice *d; dma_addr_t base; union e1000_adv_tx_desc desc; - const E1000E_RingInfo *txi = txr->i; + const E1000ERingInfo *txi = txr->i; uint32_t eic = 0; if (!igb_tx_enabled(core, txi)) { @@ -XXX,XX +XXX,XX @@ igb_start_xmit(IGBCore *core, const IGB_TxRing *txr) } static uint32_t -igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r) +igb_rxbufsize(IGBCore *core, const E1000ERingInfo *r) { uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; uint32_t bsizepkt = srrctl & E1000_SRRCTL_BSIZEPKT_MASK; @@ -XXX,XX +XXX,XX @@ igb_rxbufsize(IGBCore *core, const E1000E_RingInfo *r) } static bool -igb_has_rxbufs(IGBCore *core, const E1000E_RingInfo *r, size_t total_size) +igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size) { uint32_t bufs = igb_ring_free_descr_num(core, r); uint32_t bufsize = igb_rxbufsize(core, r); @@ -XXX,XX +XXX,XX @@ igb_write_to_rx_buffers(IGBCore *core, } static void -igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, +igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi, size_t pkt_size, size_t pkt_fcs_size) { eth_pkt_types_e pkt_type = net_rx_pkt_get_packet_type(core->rx_pkt); @@ -XXX,XX +XXX,XX @@ igb_update_rx_stats(IGBCore *core, const E1000E_RingInfo *rxi, } static inline bool -igb_rx_descr_threshold_hit(IGBCore *core, const E1000E_RingInfo *rxi) +igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi) { return igb_ring_free_descr_num(core, rxi) == ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, struct iovec *iov = net_rx_pkt_get_iovec(pkt); size_t size = net_rx_pkt_get_total_len(pkt); size_t total_size = size + e1000x_fcs_len(core->mac); - const E1000E_RingInfo *rxi = rxr->i; + const E1000ERingInfo *rxi = rxr->i; size_t bufsize = igb_rxbufsize(core, rxi); d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, } static bool -igb_rx_strip_vlan(IGBCore *core, const E1000E_RingInfo *rxi) +igb_rx_strip_vlan(IGBCore *core, const E1000ERingInfo *rxi) { if (core->mac[MRQC] & 1) { uint16_t pool = rxi->idx % IGB_NUM_VM_POOLS; -- 2.25.1
Refactoring is done in preparation for support of multiple advanced descriptors RX modes, especially packet-split modes. Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> --- hw/net/igb_core.c | 170 +++++++++++++++++++++++--------------------- hw/net/igb_regs.h | 10 +-- hw/net/trace-events | 4 +- 3 files changed, 96 insertions(+), 88 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_verify_csum_in_sw(IGBCore *core, } static void -igb_build_rx_metadata(IGBCore *core, - struct NetRxPkt *pkt, - bool is_eop, - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, - uint16_t *pkt_info, uint16_t *hdr_info, - uint32_t *rss, - uint32_t *status_flags, - uint16_t *ip_id, - uint16_t *vlan_tag) +igb_build_rx_metadata_common(IGBCore *core, + struct NetRxPkt *pkt, + bool is_eop, + uint32_t *status_flags, + uint16_t *vlan_tag) { struct virtio_net_hdr *vhdr; bool hasip4, hasip6, csum_valid; @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, *status_flags = E1000_RXD_STAT_DD; /* No additional metadata needed for non-EOP descriptors */ - /* TODO: EOP apply only to status so don't skip whole function. */ if (!is_eop) { goto func_exit; } @@ -XXX,XX +XXX,XX @@ igb_build_rx_metadata(IGBCore *core, trace_e1000e_rx_metadata_vlan(*vlan_tag); } - /* Packet parsing results */ - if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { - if (rss_info->enabled) { - *rss = cpu_to_le32(rss_info->hash); - trace_igb_rx_metadata_rss(*rss); - } - } else if (hasip4) { - *status_flags |= E1000_RXD_STAT_IPIDV; - *ip_id = cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); - trace_e1000e_rx_metadata_ip_id(*ip_id); - } - - if (pkt_info) { - *pkt_info = rss_info->enabled ? rss_info->type : 0; - - if (etqf < 8) { - *pkt_info |= (BIT(11) | etqf) << 4; - } else { - if (hasip4) { - *pkt_info |= E1000_ADVRXD_PKT_IP4; - } - - if (hasip6) { - *pkt_info |= E1000_ADVRXD_PKT_IP6; - } - - switch (l4hdr_proto) { - case ETH_L4_HDR_PROTO_TCP: - *pkt_info |= E1000_ADVRXD_PKT_TCP; - break; - - case ETH_L4_HDR_PROTO_UDP: - *pkt_info |= E1000_ADVRXD_PKT_UDP; - break; - - case ETH_L4_HDR_PROTO_SCTP: - *pkt_info |= E1000_ADVRXD_PKT_SCTP; - break; - - default: - break; - } - } - } - - if (hdr_info) { - *hdr_info = 0; - } - - if (ts) { - *status_flags |= BIT(16); - } - /* RX CSO information */ if (hasip6 && (core->mac[RFCTL] & E1000_RFCTL_IPV6_XSUM_DIS)) { trace_e1000e_rx_metadata_ipv6_sum_disabled(); @@ -XXX,XX +XXX,XX @@ func_exit: static inline void igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, struct NetRxPkt *pkt, - const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, + const E1000E_RSSInfo *rss_info, uint16_t length) { - uint32_t status_flags, rss; - uint16_t ip_id; + uint32_t status_flags; assert(!rss_info->enabled); + + memset(desc, 0, sizeof(*desc)); desc->length = cpu_to_le16(length); - desc->csum = 0; + igb_build_rx_metadata_common(core, pkt, pkt != NULL, + &status_flags, + &desc->special); - igb_build_rx_metadata(core, pkt, pkt != NULL, - rss_info, etqf, ts, - NULL, NULL, &rss, - &status_flags, &ip_id, - &desc->special); desc->errors = (uint8_t) (le32_to_cpu(status_flags) >> 24); desc->status = (uint8_t) le32_to_cpu(status_flags); } +static uint16_t +igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf) +{ + uint16_t pkt_type; + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + + if (etqf < 8) { + pkt_type = BIT(11) | etqf; + return pkt_type; + } + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { + pkt_type = E1000_ADVRXD_PKT_IP6; + } else if (hasip4) { + pkt_type = E1000_ADVRXD_PKT_IP4; + } else { + pkt_type = 0; + } + + switch (l4hdr_proto) { + case ETH_L4_HDR_PROTO_TCP: + pkt_type |= E1000_ADVRXD_PKT_TCP; + break; + case ETH_L4_HDR_PROTO_UDP: + pkt_type |= E1000_ADVRXD_PKT_UDP; + break; + case ETH_L4_HDR_PROTO_SCTP: + pkt_type |= E1000_ADVRXD_PKT_SCTP; + break; + default: + break; + } + + return pkt_type; +} + static inline void igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, uint16_t etqf, bool ts, uint16_t length) { + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + uint16_t rss_type = 0, pkt_type; + bool eop = (pkt != NULL); + uint32_t adv_desc_status_error = 0; memset(&desc->wb, 0, sizeof(desc->wb)); desc->wb.upper.length = cpu_to_le16(length); + igb_build_rx_metadata_common(core, pkt, eop, + &desc->wb.upper.status_error, + &desc->wb.upper.vlan); + + if (!eop) { + return; + } + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if ((core->mac[RXCSUM] & E1000_RXCSUM_PCSD) != 0) { + if (rss_info->enabled) { + desc->wb.lower.hi_dword.rss = cpu_to_le32(rss_info->hash); + rss_type = rss_info->type; + trace_igb_rx_metadata_rss(desc->wb.lower.hi_dword.rss, rss_type); + } + } else if (hasip4) { + adv_desc_status_error |= E1000_RXD_STAT_IPIDV; + desc->wb.lower.hi_dword.csum_ip.ip_id = + cpu_to_le16(net_rx_pkt_get_ip_id(pkt)); + trace_e1000e_rx_metadata_ip_id( + desc->wb.lower.hi_dword.csum_ip.ip_id); + } + + if (ts) { + adv_desc_status_error |= BIT(16); + } - igb_build_rx_metadata(core, pkt, pkt != NULL, - rss_info, etqf, ts, - &desc->wb.lower.lo_dword.pkt_info, - &desc->wb.lower.lo_dword.hdr_info, - &desc->wb.lower.hi_dword.rss, - &desc->wb.upper.status_error, - &desc->wb.lower.hi_dword.csum_ip.ip_id, - &desc->wb.upper.vlan); + pkt_type = igb_rx_desc_get_packet_type(core, pkt, etqf); + trace_e1000e_rx_metadata_pkt_type(pkt_type); + desc->wb.lower.lo_dword.pkt_info = cpu_to_le16(rss_type | (pkt_type << 4)); + desc->wb.upper.status_error |= cpu_to_le32(adv_desc_status_error); } static inline void @@ -XXX,XX +XXX,XX @@ igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, uint16_t etqf, bool ts, uint16_t length) { if (igb_rx_use_legacy_descriptor(core)) { - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, - etqf, ts, length); + igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, length); } else { igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, etqf, ts, length); diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { #define E1000_STATUS_NUM_VFS_SHIFT 14 -#define E1000_ADVRXD_PKT_IP4 BIT(4) -#define E1000_ADVRXD_PKT_IP6 BIT(6) -#define E1000_ADVRXD_PKT_TCP BIT(8) -#define E1000_ADVRXD_PKT_UDP BIT(9) -#define E1000_ADVRXD_PKT_SCTP BIT(10) +#define E1000_ADVRXD_PKT_IP4 BIT(0) +#define E1000_ADVRXD_PKT_IP6 BIT(2) +#define E1000_ADVRXD_PKT_TCP BIT(4) +#define E1000_ADVRXD_PKT_UDP BIT(5) +#define E1000_ADVRXD_PKT_SCTP BIT(6) static inline uint8_t igb_ivar_entry_rx(uint8_t i) { diff --git a/hw/net/trace-events b/hw/net/trace-events index XXXXXXX..XXXXXXX 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -XXX,XX +XXX,XX @@ igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) " igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" -igb_rx_metadata_rss(uint32_t rss) "RSS data: 0x%X" +igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X" igb_irq_icr_clear_gpie_nsicr(void) "Clearing ICR on read due to GPIE.NSICR enabled" igb_irq_set_iam(uint32_t icr) "Update IAM: 0x%x" @@ -XXX,XX +XXX,XX @@ igb_irq_eitr_set(uint32_t eitr_num, uint32_t val) "EITR[%u] = 0x%x" igb_set_pfmailbox(uint32_t vf_num, uint32_t val) "PFMailbox[%d]: 0x%x" igb_set_vfmailbox(uint32_t vf_num, uint32_t val) "VFMailbox[%d]: 0x%x" +igb_wrn_rx_desc_modes_not_supp(int desc_type) "Not supported descriptor type: %d" + # igbvf.c igbvf_wrn_io_addr_unknown(uint64_t addr) "IO unknown register 0x%"PRIx64 -- 2.25.1
Refactoring is done in preparation for support of multiple advanced descriptors RX modes, especially packet-split modes. Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> --- hw/net/e1000e_core.c | 18 ++-- hw/net/igb_core.c | 214 +++++++++++++++++++++++++-------------- tests/qtest/libqos/igb.c | 5 + 3 files changed, 151 insertions(+), 86 deletions(-) diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -XXX,XX +XXX,XX @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core, } static void -e1000e_write_to_rx_buffers(E1000ECore *core, - hwaddr ba[MAX_PS_BUFFERS], - e1000e_ba_state *bastate, - const char *data, - dma_addr_t data_len) +e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, + hwaddr ba[MAX_PS_BUFFERS], + e1000e_ba_state *bastate, + const char *data, + dma_addr_t data_len) { while (data_len > 0) { uint32_t cur_buf_len = core->rxbuf_sizes[bastate->cur_idx]; @@ -XXX,XX +XXX,XX @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, while (copy_size) { iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); - e1000e_write_to_rx_buffers(core, ba, &bastate, - iov->iov_base + iov_ofs, iov_copy); + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, + iov->iov_base + + iov_ofs, + iov_copy); copy_size -= iov_copy; iov_ofs += iov_copy; @@ -XXX,XX +XXX,XX @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, if (desc_offset + desc_size >= total_size) { /* Simulate FCS checksum presence in the last descriptor */ - e1000e_write_to_rx_buffers(core, ba, &bastate, + e1000e_write_payload_frag_to_rx_buffers(core, ba, &bastate, (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); } } diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_has_rxbufs(IGBCore *core, const E1000ERingInfo *r, size_t total_size) bufsize; } +static uint32_t +igb_rxhdrbufsize(IGBCore *core, const E1000ERingInfo *r) +{ + uint32_t srrctl = core->mac[E1000_SRRCTL(r->idx) >> 2]; + return (srrctl & E1000_SRRCTL_BSIZEHDRSIZE_MASK) >> + E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; +} + void igb_start_recv(IGBCore *core) { @@ -XXX,XX +XXX,XX @@ igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, *buff_addr = le64_to_cpu(desc->read.pkt_addr); } +typedef struct IGBPacketRxDMAState { + size_t size; + size_t total_size; + size_t ps_hdr_len; + size_t desc_size; + size_t desc_offset; + uint32_t rx_desc_packet_buf_size; + uint32_t rx_desc_header_buf_size; + struct iovec *iov; + size_t iov_ofs; + bool is_first; + uint16_t written; + hwaddr ba; +} IGBPacketRxDMAState; + static inline void igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, hwaddr *buff_addr) @@ -XXX,XX +XXX,XX @@ igb_pci_dma_write_rx_desc(IGBCore *core, PCIDevice *dev, dma_addr_t addr, } } -static void -igb_write_to_rx_buffers(IGBCore *core, - PCIDevice *d, - hwaddr ba, - uint16_t *written, - const char *data, - dma_addr_t data_len) -{ - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); - pci_dma_write(d, ba + *written, data, data_len); - *written += data_len; -} - static void igb_update_rx_stats(IGBCore *core, const E1000ERingInfo *rxi, size_t pkt_size, size_t pkt_fcs_size) @@ -XXX,XX +XXX,XX @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi) ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; } +static void +igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size) +{ + if (*size > pdma_st->rx_desc_packet_buf_size) { + *size = pdma_st->rx_desc_packet_buf_size; + } +} + +static void +igb_write_payload_frag_to_rx_buffers(IGBCore *core, + PCIDevice *d, + hwaddr ba, + uint16_t *written, + uint32_t cur_buf_len, + const char *data, + dma_addr_t data_len) +{ + trace_igb_rx_desc_buff_write(ba, *written, data, data_len); + pci_dma_write(d, ba + *written, data, data_len); + *written += data_len; +} + +static void +igb_write_payload_to_rx_buffers(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + IGBPacketRxDMAState *pdma_st, + size_t *copy_size) +{ + static const uint32_t fcs_pad; + size_t iov_copy; + + /* Copy packet payload */ + while (*copy_size) { + iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); + igb_write_payload_frag_to_rx_buffers(core, d, + pdma_st->ba, + &pdma_st->written, + pdma_st->rx_desc_packet_buf_size, + pdma_st->iov->iov_base + + pdma_st->iov_ofs, + iov_copy); + + *copy_size -= iov_copy; + pdma_st->iov_ofs += iov_copy; + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { + pdma_st->iov++; + pdma_st->iov_ofs = 0; + } + } + + if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { + /* Simulate FCS checksum presence in the last descriptor */ + igb_write_payload_frag_to_rx_buffers(core, d, + pdma_st->ba, + &pdma_st->written, + pdma_st->rx_desc_packet_buf_size, + (const char *) &fcs_pad, + e1000x_fcs_len(core->mac)); + } +} + +static void +igb_write_to_rx_buffers(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + IGBPacketRxDMAState *pdma_st) +{ + size_t copy_size; + + if (!pdma_st->ba) { + /* as per intel docs; skip descriptors with null buf addr */ + trace_e1000e_rx_null_descriptor(); + return; + } + + if (pdma_st->desc_offset >= pdma_st->size) { + return; + } + + pdma_st->desc_size = pdma_st->total_size - pdma_st->desc_offset; + igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); + copy_size = pdma_st->size - pdma_st->desc_offset; + igb_truncate_to_descriptor_size(pdma_st, ©_size); + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size); +} + static void igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, const E1000E_RxRing *rxr, @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, PCIDevice *d; dma_addr_t base; union e1000_rx_desc_union desc; - size_t desc_size; - size_t desc_offset = 0; - size_t iov_ofs = 0; - - struct iovec *iov = net_rx_pkt_get_iovec(pkt); - size_t size = net_rx_pkt_get_total_len(pkt); - size_t total_size = size + e1000x_fcs_len(core->mac); - const E1000ERingInfo *rxi = rxr->i; - size_t bufsize = igb_rxbufsize(core, rxi); - + const E1000ERingInfo *rxi; + size_t rx_desc_len; + + IGBPacketRxDMAState pdma_st = {0}; + pdma_st.is_first = true; + pdma_st.size = net_rx_pkt_get_total_len(pkt); + pdma_st.total_size = pdma_st.size + e1000x_fcs_len(core->mac); + + rxi = rxr->i; + rx_desc_len = core->rx_desc_len; + pdma_st.rx_desc_packet_buf_size = igb_rxbufsize(core, rxi); + pdma_st.rx_desc_header_buf_size = igb_rxhdrbufsize(core, rxi); + pdma_st.iov = net_rx_pkt_get_iovec(pkt); d = pcie_sriov_get_vf_at_index(core->owner, rxi->idx % 8); if (!d) { d = core->owner; } do { - hwaddr ba; - uint16_t written = 0; + pdma_st.written = 0; bool is_last = false; - desc_size = total_size - desc_offset; - - if (desc_size > bufsize) { - desc_size = bufsize; - } - if (igb_ring_empty(core, rxi)) { return; } base = igb_ring_head_descr(core, rxi); + pci_dma_read(d, base, &desc, rx_desc_len); + trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); - pci_dma_read(d, base, &desc, core->rx_desc_len); - - trace_e1000e_rx_descr(rxi->idx, base, core->rx_desc_len); - - igb_read_rx_descr(core, &desc, &ba); - - if (ba) { - if (desc_offset < size) { - static const uint32_t fcs_pad; - size_t iov_copy; - size_t copy_size = size - desc_offset; - if (copy_size > bufsize) { - copy_size = bufsize; - } - - /* Copy packet payload */ - while (copy_size) { - iov_copy = MIN(copy_size, iov->iov_len - iov_ofs); - - igb_write_to_rx_buffers(core, d, ba, &written, - iov->iov_base + iov_ofs, iov_copy); + igb_read_rx_descr(core, &desc, &pdma_st.ba); - copy_size -= iov_copy; - iov_ofs += iov_copy; - if (iov_ofs == iov->iov_len) { - iov++; - iov_ofs = 0; - } - } - - if (desc_offset + desc_size >= total_size) { - /* Simulate FCS checksum presence in the last descriptor */ - igb_write_to_rx_buffers(core, d, ba, &written, - (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); - } - } - } else { /* as per intel docs; skip descriptors with null buf addr */ - trace_e1000e_rx_null_descriptor(); - } - desc_offset += desc_size; - if (desc_offset >= total_size) { + igb_write_to_rx_buffers(core, pkt, d, &pdma_st); + pdma_st.desc_offset += pdma_st.desc_size; + if (pdma_st.desc_offset >= pdma_st.total_size) { is_last = true; } igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, - rss_info, etqf, ts, written); - igb_pci_dma_write_rx_desc(core, d, base, &desc, core->rx_desc_len); - - igb_ring_advance(core, rxi, core->rx_desc_len / E1000_MIN_RX_DESC_LEN); - - } while (desc_offset < total_size); + rss_info, etqf, ts, pdma_st.written); + igb_pci_dma_write_rx_desc(core, d, base, &desc, rx_desc_len); + igb_ring_advance(core, rxi, + rx_desc_len / E1000_MIN_RX_DESC_LEN); + } while (pdma_st.desc_offset < pdma_st.total_size); - igb_update_rx_stats(core, rxi, size, total_size); + igb_update_rx_stats(core, rxi, pdma_st.size, pdma_st.total_size); } static bool diff --git a/tests/qtest/libqos/igb.c b/tests/qtest/libqos/igb.c index XXXXXXX..XXXXXXX 100644 --- a/tests/qtest/libqos/igb.c +++ b/tests/qtest/libqos/igb.c @@ -XXX,XX +XXX,XX @@ static void igb_pci_start_hw(QOSGraphObject *obj) E1000_RAH_AV | E1000_RAH_POOL_1 | le16_to_cpu(*(uint16_t *)(address + 4))); + /* Set supported receive descriptor mode */ + e1000e_macreg_write(&d->e1000e, + E1000_SRRCTL(0), + E1000_SRRCTL_DESCTYPE_ADV_ONEBUF); + /* Enable receive */ e1000e_macreg_write(&d->e1000e, E1000_RFCTL, E1000_RFCTL_EXTEN); e1000e_macreg_write(&d->e1000e, E1000_RCTL, E1000_RCTL_EN); -- 2.25.1
Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> --- hw/net/igb_core.c | 4 +++- hw/net/igb_regs.h | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf) net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); if (hasip6 && !(core->mac[RFCTL] & E1000_RFCTL_IPV6_DIS)) { - pkt_type = E1000_ADVRXD_PKT_IP6; + eth_ip6_hdr_info *ip6hdr_info = net_rx_pkt_get_ip6_info(pkt); + pkt_type = ip6hdr_info->has_ext_hdrs ? E1000_ADVRXD_PKT_IP6E : + E1000_ADVRXD_PKT_IP6; } else if (hasip4) { pkt_type = E1000_ADVRXD_PKT_IP4; } else { diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { #define E1000_ADVRXD_PKT_IP4 BIT(0) #define E1000_ADVRXD_PKT_IP6 BIT(2) +#define E1000_ADVRXD_PKT_IP6E BIT(3) #define E1000_ADVRXD_PKT_TCP BIT(4) #define E1000_ADVRXD_PKT_UDP BIT(5) #define E1000_ADVRXD_PKT_SCTP BIT(6) -- 2.25.1
Packet-split descriptors are used by Linux VF driver for MTU values from 2048 Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> --- hw/net/igb_core.c | 357 ++++++++++++++++++++++++++++++++++++++------ hw/net/igb_regs.h | 9 ++ hw/net/trace-events | 2 +- 3 files changed, 325 insertions(+), 43 deletions(-) diff --git a/hw/net/igb_core.c b/hw/net/igb_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_core.c +++ b/hw/net/igb_core.c @@ -XXX,XX +XXX,XX @@ igb_rx_use_legacy_descriptor(IGBCore *core) return false; } +typedef struct E1000ERingInfo { + int dbah; + int dbal; + int dlen; + int dh; + int dt; + int idx; +} E1000ERingInfo; + +static uint32_t +igb_rx_queue_desctyp_get(IGBCore *core, const E1000ERingInfo *r) +{ + return core->mac[E1000_SRRCTL(r->idx) >> 2] & E1000_SRRCTL_DESCTYPE_MASK; +} + +static bool +igb_rx_use_ps_descriptor(IGBCore *core, const E1000ERingInfo *r) +{ + uint32_t desctyp = igb_rx_queue_desctyp_get(core, r); + return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT || + desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; +} + static inline bool igb_rss_enabled(IGBCore *core) { @@ -XXX,XX +XXX,XX @@ static uint32_t igb_rx_wb_eic(IGBCore *core, int queue_idx) return (ent & E1000_IVAR_VALID) ? BIT(ent & 0x1f) : 0; } -typedef struct E1000ERingInfo { - int dbah; - int dbal; - int dlen; - int dh; - int dt; - int idx; -} E1000ERingInfo; - static inline bool igb_ring_empty(IGBCore *core, const E1000ERingInfo *r) { @@ -XXX,XX +XXX,XX @@ igb_read_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, } static inline void -igb_read_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, - hwaddr *buff_addr) +igb_read_adv_rx_single_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc, + hwaddr *buff_addr) { *buff_addr = le64_to_cpu(desc->read.pkt_addr); } +static inline void +igb_read_adv_rx_split_buf_descr(IGBCore *core, union e1000_adv_rx_desc *desc, + hwaddr *buff_addr) +{ + buff_addr[0] = le64_to_cpu(desc->read.hdr_addr); + buff_addr[1] = le64_to_cpu(desc->read.pkt_addr); +} + +typedef struct IGBBAState { + uint16_t written[IGB_MAX_PS_BUFFERS]; + uint8_t cur_idx; +} IGBBAState; + typedef struct IGBPacketRxDMAState { size_t size; size_t total_size; @@ -XXX,XX +XXX,XX @@ typedef struct IGBPacketRxDMAState { uint32_t rx_desc_header_buf_size; struct iovec *iov; size_t iov_ofs; + bool do_ps; bool is_first; - uint16_t written; - hwaddr ba; + IGBBAState bastate; + hwaddr ba[IGB_MAX_PS_BUFFERS]; } IGBPacketRxDMAState; static inline void -igb_read_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, - hwaddr *buff_addr) +igb_read_rx_descr(IGBCore *core, + union e1000_rx_desc_union *desc, + IGBPacketRxDMAState *pdma_st, + const E1000ERingInfo *r) { + uint32_t desc_type; + if (igb_rx_use_legacy_descriptor(core)) { - igb_read_lgcy_rx_descr(core, &desc->legacy, buff_addr); - } else { - igb_read_adv_rx_descr(core, &desc->adv, buff_addr); + igb_read_lgcy_rx_descr(core, &desc->legacy, &pdma_st->ba[1]); + pdma_st->ba[0] = 0; + return; + } + + /* advanced header split descriptor */ + if (igb_rx_use_ps_descriptor(core, r)) { + igb_read_adv_rx_split_buf_descr(core, &desc->adv, &pdma_st->ba[0]); + return; + } + + /* descriptor replication modes not supported */ + desc_type = igb_rx_queue_desctyp_get(core, r); + if (desc_type != E1000_SRRCTL_DESCTYPE_ADV_ONEBUF) { + trace_igb_wrn_rx_desc_modes_not_supp(desc_type); } + + /* advanced single buffer descriptor */ + igb_read_adv_rx_single_buf_descr(core, &desc->adv, &pdma_st->ba[1]); + pdma_st->ba[0] = 0; } static void @@ -XXX,XX +XXX,XX @@ igb_write_lgcy_rx_descr(IGBCore *core, struct e1000_rx_desc *desc, desc->status = (uint8_t) le32_to_cpu(status_flags); } +static bool +igb_rx_ps_descriptor_split_always(IGBCore *core, const E1000ERingInfo *r) +{ + uint32_t desctyp = igb_rx_queue_desctyp_get(core, r); + return desctyp == E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS; +} + static uint16_t igb_rx_desc_get_packet_type(IGBCore *core, struct NetRxPkt *pkt, uint16_t etqf) { @@ -XXX,XX +XXX,XX @@ igb_write_adv_rx_descr(IGBCore *core, union e1000_adv_rx_desc *desc, desc->wb.upper.status_error |= cpu_to_le32(adv_desc_status_error); } +typedef struct IGBSplitDescriptorData { + bool sph; + bool hbo; + size_t hdr_len; +} IGBSplitDescriptorData; + static inline void -igb_write_rx_descr(IGBCore *core, union e1000_rx_desc_union *desc, - struct NetRxPkt *pkt, const E1000E_RSSInfo *rss_info, - uint16_t etqf, bool ts, uint16_t length) +igb_write_adv_ps_rx_descr(IGBCore *core, + union e1000_adv_rx_desc *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + const E1000ERingInfo *r, + uint16_t etqf, + bool ts, + IGBSplitDescriptorData *ps_desc_data, + IGBPacketRxDMAState *pdma_st) +{ + size_t pkt_len; + size_t hdr_len = ps_desc_data->hdr_len; + uint16_t hdr_info = 0; + + if (pdma_st->do_ps) { + pkt_len = pdma_st->bastate.written[1]; + } else { + pkt_len = pdma_st->bastate.written[0] + pdma_st->bastate.written[1]; + } + + igb_write_adv_rx_descr(core, desc, pkt, rss_info, etqf, ts, pkt_len); + + hdr_info = (hdr_len << E1000_ADVRXD_HDR_LEN_OFFSET) & + E1000_ADVRXD_ADV_HDR_LEN_MASK; + hdr_info |= ps_desc_data->sph ? E1000_ADVRXD_HDR_SPH : 0; + desc->wb.lower.lo_dword.hdr_info = cpu_to_le16(hdr_info); + + desc->wb.upper.status_error |= cpu_to_le32( + ps_desc_data->hbo ? E1000_ADVRXD_ST_ERR_HBO_OFFSET : 0); +} + +static inline void +igb_write_rx_descr(IGBCore *core, + union e1000_rx_desc_union *desc, + struct NetRxPkt *pkt, + const E1000E_RSSInfo *rss_info, + uint16_t etqf, + bool ts, + IGBSplitDescriptorData *ps_desc_data, + IGBPacketRxDMAState *pdma_st, + const E1000ERingInfo *r) { if (igb_rx_use_legacy_descriptor(core)) { - igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, length); + igb_write_lgcy_rx_descr(core, &desc->legacy, pkt, rss_info, + pdma_st->bastate.written[1]); + } else if (igb_rx_use_ps_descriptor(core, r)) { + igb_write_adv_ps_rx_descr(core, &desc->adv, pkt, rss_info, r, etqf, ts, + ps_desc_data, pdma_st); } else { igb_write_adv_rx_descr(core, &desc->adv, pkt, rss_info, - etqf, ts, length); + etqf, ts, pdma_st->bastate.written[1]); } } @@ -XXX,XX +XXX,XX @@ igb_rx_descr_threshold_hit(IGBCore *core, const E1000ERingInfo *rxi) ((core->mac[E1000_SRRCTL(rxi->idx) >> 2] >> 20) & 31) * 16; } +static bool +igb_do_ps(IGBCore *core, + const E1000ERingInfo *r, + struct NetRxPkt *pkt, + size_t *hdr_len, + IGBSplitDescriptorData *ps_desc_data) +{ + bool hasip4, hasip6; + EthL4HdrProto l4hdr_proto; + bool fragment; + bool split_always; + size_t bheader_size; + size_t total_pkt_len; + + if (!igb_rx_use_ps_descriptor(core, r)) { + return false; + } + + memset(ps_desc_data, 0, sizeof(IGBSplitDescriptorData)); + + total_pkt_len = net_rx_pkt_get_total_len(pkt); + bheader_size = igb_rxhdrbufsize(core, r); + split_always = igb_rx_ps_descriptor_split_always(core, r); + if (split_always && total_pkt_len <= bheader_size) { + *hdr_len = total_pkt_len; + ps_desc_data->hdr_len = total_pkt_len; + return true; + } + + net_rx_pkt_get_protocols(pkt, &hasip4, &hasip6, &l4hdr_proto); + + if (hasip4) { + fragment = net_rx_pkt_get_ip4_info(pkt)->fragment; + } else if (hasip6) { + fragment = net_rx_pkt_get_ip6_info(pkt)->fragment; + } else { + ps_desc_data->hdr_len = bheader_size; + goto header_not_handled; + } + + if (fragment && (core->mac[RFCTL] & E1000_RFCTL_IPFRSP_DIS)) { + ps_desc_data->hdr_len = bheader_size; + goto header_not_handled; + } + + /* no header splitting for SCTP */ + if (!fragment && (l4hdr_proto == ETH_L4_HDR_PROTO_UDP || + l4hdr_proto == ETH_L4_HDR_PROTO_TCP)) { + *hdr_len = net_rx_pkt_get_l5_hdr_offset(pkt); + } else { + *hdr_len = net_rx_pkt_get_l4_hdr_offset(pkt); + } + + ps_desc_data->sph = true; + ps_desc_data->hdr_len = *hdr_len; + + if (*hdr_len > bheader_size) { + ps_desc_data->hbo = true; + goto header_not_handled; + } + + return true; + +header_not_handled: + if (split_always) { + *hdr_len = bheader_size; + return true; + } + + return false; +} + static void igb_truncate_to_descriptor_size(IGBPacketRxDMAState *pdma_st, size_t *size) { - if (*size > pdma_st->rx_desc_packet_buf_size) { - *size = pdma_st->rx_desc_packet_buf_size; + if (pdma_st->do_ps && pdma_st->is_first) { + if (*size > pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len) { + *size = pdma_st->rx_desc_packet_buf_size + pdma_st->ps_hdr_len; + } + } else { + if (*size > pdma_st->rx_desc_packet_buf_size) { + *size = pdma_st->rx_desc_packet_buf_size; + } } } +static inline void +igb_write_hdr_frag_to_rx_buffers(IGBCore *core, + PCIDevice *d, + IGBPacketRxDMAState *pdma_st, + const char *data, + dma_addr_t data_len) +{ + assert(data_len <= pdma_st->rx_desc_header_buf_size - + pdma_st->bastate.written[0]); + pci_dma_write(d, + pdma_st->ba[0] + pdma_st->bastate.written[0], + data, data_len); + pdma_st->bastate.written[0] += data_len; + pdma_st->bastate.cur_idx = 1; +} + +static void +igb_write_header_to_rx_buffers(IGBCore *core, + struct NetRxPkt *pkt, + PCIDevice *d, + IGBPacketRxDMAState *pdma_st, + size_t *copy_size) +{ + size_t iov_copy; + size_t ps_hdr_copied = 0; + + if (!pdma_st->is_first) { + /* Leave buffer 0 of each descriptor except first */ + /* empty */ + pdma_st->bastate.cur_idx = 1; + return; + } + + do { + iov_copy = MIN(pdma_st->ps_hdr_len - ps_hdr_copied, + pdma_st->iov->iov_len - pdma_st->iov_ofs); + + igb_write_hdr_frag_to_rx_buffers(core, d, pdma_st, + pdma_st->iov->iov_base, + iov_copy); + + *copy_size -= iov_copy; + ps_hdr_copied += iov_copy; + + pdma_st->iov_ofs += iov_copy; + if (pdma_st->iov_ofs == pdma_st->iov->iov_len) { + pdma_st->iov++; + pdma_st->iov_ofs = 0; + } + } while (ps_hdr_copied < pdma_st->ps_hdr_len); + + pdma_st->is_first = false; +} + static void igb_write_payload_frag_to_rx_buffers(IGBCore *core, PCIDevice *d, - hwaddr ba, - uint16_t *written, - uint32_t cur_buf_len, + IGBPacketRxDMAState *pdma_st, const char *data, dma_addr_t data_len) { - trace_igb_rx_desc_buff_write(ba, *written, data, data_len); - pci_dma_write(d, ba + *written, data, data_len); - *written += data_len; + while (data_len > 0) { + assert(pdma_st->bastate.cur_idx < IGB_MAX_PS_BUFFERS); + + uint32_t cur_buf_bytes_left = + pdma_st->rx_desc_packet_buf_size - + pdma_st->bastate.written[pdma_st->bastate.cur_idx]; + uint32_t bytes_to_write = MIN(data_len, cur_buf_bytes_left); + + trace_igb_rx_desc_buff_write( + pdma_st->bastate.cur_idx, + pdma_st->ba[pdma_st->bastate.cur_idx], + pdma_st->bastate.written[pdma_st->bastate.cur_idx], + data, + bytes_to_write); + + pci_dma_write(d, + pdma_st->ba[pdma_st->bastate.cur_idx] + + pdma_st->bastate.written[pdma_st->bastate.cur_idx], + data, bytes_to_write); + + pdma_st->bastate.written[pdma_st->bastate.cur_idx] += bytes_to_write; + data += bytes_to_write; + data_len -= bytes_to_write; + + if (pdma_st->bastate.written[pdma_st->bastate.cur_idx] == + pdma_st->rx_desc_packet_buf_size) { + pdma_st->bastate.cur_idx++; + } + } } static void @@ -XXX,XX +XXX,XX @@ igb_write_payload_to_rx_buffers(IGBCore *core, while (*copy_size) { iov_copy = MIN(*copy_size, pdma_st->iov->iov_len - pdma_st->iov_ofs); igb_write_payload_frag_to_rx_buffers(core, d, - pdma_st->ba, - &pdma_st->written, - pdma_st->rx_desc_packet_buf_size, + pdma_st, pdma_st->iov->iov_base + pdma_st->iov_ofs, iov_copy); @@ -XXX,XX +XXX,XX @@ igb_write_payload_to_rx_buffers(IGBCore *core, if (pdma_st->desc_offset + pdma_st->desc_size >= pdma_st->total_size) { /* Simulate FCS checksum presence in the last descriptor */ igb_write_payload_frag_to_rx_buffers(core, d, - pdma_st->ba, - &pdma_st->written, - pdma_st->rx_desc_packet_buf_size, + pdma_st, (const char *) &fcs_pad, e1000x_fcs_len(core->mac)); } @@ -XXX,XX +XXX,XX @@ igb_write_to_rx_buffers(IGBCore *core, { size_t copy_size; - if (!pdma_st->ba) { + if (!(pdma_st->ba)[1] || (pdma_st->do_ps && !(pdma_st->ba[0]))) { /* as per intel docs; skip descriptors with null buf addr */ trace_e1000e_rx_null_descriptor(); return; @@ -XXX,XX +XXX,XX @@ igb_write_to_rx_buffers(IGBCore *core, igb_truncate_to_descriptor_size(pdma_st, &pdma_st->desc_size); copy_size = pdma_st->size - pdma_st->desc_offset; igb_truncate_to_descriptor_size(pdma_st, ©_size); + + /* For PS mode copy the packet header first */ + if (pdma_st->do_ps) { + igb_write_header_to_rx_buffers(core, pkt, d, pdma_st, ©_size); + } else { + pdma_st->bastate.cur_idx = 1; + } + igb_write_payload_to_rx_buffers(core, pkt, d, pdma_st, ©_size); } @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, union e1000_rx_desc_union desc; const E1000ERingInfo *rxi; size_t rx_desc_len; + IGBSplitDescriptorData ps_desc_data; IGBPacketRxDMAState pdma_st = {0}; pdma_st.is_first = true; @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, d = core->owner; } + pdma_st.do_ps = igb_do_ps(core, rxi, pkt, + &pdma_st.ps_hdr_len, + &ps_desc_data); + do { - pdma_st.written = 0; + memset(&pdma_st.bastate, 0, sizeof(IGBBAState)); bool is_last = false; if (igb_ring_empty(core, rxi)) { @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, pci_dma_read(d, base, &desc, rx_desc_len); trace_e1000e_rx_descr(rxi->idx, base, rx_desc_len); - igb_read_rx_descr(core, &desc, &pdma_st.ba); + igb_read_rx_descr(core, &desc, &pdma_st, rxi); igb_write_to_rx_buffers(core, pkt, d, &pdma_st); pdma_st.desc_offset += pdma_st.desc_size; @@ -XXX,XX +XXX,XX @@ igb_write_packet_to_guest(IGBCore *core, struct NetRxPkt *pkt, is_last = true; } - igb_write_rx_descr(core, &desc, is_last ? core->rx_pkt : NULL, - rss_info, etqf, ts, pdma_st.written); + igb_write_rx_descr(core, &desc, + is_last ? pkt : NULL, + rss_info, + etqf, ts, + &ps_desc_data, + &pdma_st, + rxi); igb_pci_dma_write_rx_desc(core, d, base, &desc, rx_desc_len); igb_ring_advance(core, rxi, rx_desc_len / E1000_MIN_RX_DESC_LEN); diff --git a/hw/net/igb_regs.h b/hw/net/igb_regs.h index XXXXXXX..XXXXXXX 100644 --- a/hw/net/igb_regs.h +++ b/hw/net/igb_regs.h @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { #define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 #define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ #define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 #define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 #define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 #define E1000_SRRCTL_DROP_EN 0x80000000 @@ -XXX,XX +XXX,XX @@ union e1000_adv_rx_desc { #define E1000_ADVRXD_PKT_UDP BIT(5) #define E1000_ADVRXD_PKT_SCTP BIT(6) +#define IGB_MAX_PS_BUFFERS 2 + +#define E1000_ADVRXD_HDR_LEN_OFFSET (21 - 16) +#define E1000_ADVRXD_ADV_HDR_LEN_MASK ((BIT(10) - 1) << \ + E1000_ADVRXD_HDR_LEN_OFFSET) +#define E1000_ADVRXD_HDR_SPH BIT(15) +#define E1000_ADVRXD_ST_ERR_HBO_OFFSET BIT(3 + 20) + static inline uint8_t igb_ivar_entry_rx(uint8_t i) { return i < 8 ? i * 4 : (i - 8) * 4 + 2; diff --git a/hw/net/trace-events b/hw/net/trace-events index XXXXXXX..XXXXXXX 100644 --- a/hw/net/trace-events +++ b/hw/net/trace-events @@ -XXX,XX +XXX,XX @@ igb_core_mdic_write_unhandled(uint32_t addr) "MDIC WRITE: PHY[%u] UNHANDLED" igb_link_set_ext_params(bool asd_check, bool speed_select_bypass, bool pfrstd) "Set extended link params: ASD check: %d, Speed select bypass: %d, PF reset done: %d" igb_rx_desc_buff_size(uint32_t b) "buffer size: %u" -igb_rx_desc_buff_write(uint64_t addr, uint16_t offset, const void* source, uint32_t len) "addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" +igb_rx_desc_buff_write(uint8_t idx, uint64_t addr, uint16_t offset, const void* source, uint32_t len) "buffer %u, addr: 0x%"PRIx64", offset: %u, from: %p, length: %u" igb_rx_metadata_rss(uint32_t rss, uint16_t rss_pkt_type) "RSS data: rss: 0x%X, rss_pkt_type: 0x%X" -- 2.25.1
Rename e1000e_ba_state according and e1000e_write_hdr_to_rx_buffers for consistency with IGB. Signed-off-by: Tomasz Dzieciol <t.dzieciol@partner.samsung.com> --- hw/net/e1000e_core.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/hw/net/e1000e_core.c b/hw/net/e1000e_core.c index XXXXXXX..XXXXXXX 100644 --- a/hw/net/e1000e_core.c +++ b/hw/net/e1000e_core.c @@ -XXX,XX +XXX,XX @@ e1000e_pci_dma_write_rx_desc(E1000ECore *core, dma_addr_t addr, } } -typedef struct e1000e_ba_state_st { +typedef struct E1000EBAState { uint16_t written[MAX_PS_BUFFERS]; uint8_t cur_idx; -} e1000e_ba_state; +} E1000EBAState; static inline void -e1000e_write_hdr_to_rx_buffers(E1000ECore *core, - hwaddr ba[MAX_PS_BUFFERS], - e1000e_ba_state *bastate, - const char *data, - dma_addr_t data_len) +e1000e_write_hdr_frag_to_rx_buffers(E1000ECore *core, + hwaddr ba[MAX_PS_BUFFERS], + E1000EBAState *bastate, + const char *data, + dma_addr_t data_len) { assert(data_len <= core->rxbuf_sizes[0] - bastate->written[0]); @@ -XXX,XX +XXX,XX @@ e1000e_write_hdr_to_rx_buffers(E1000ECore *core, static void e1000e_write_payload_frag_to_rx_buffers(E1000ECore *core, hwaddr ba[MAX_PS_BUFFERS], - e1000e_ba_state *bastate, + E1000EBAState *bastate, const char *data, dma_addr_t data_len) { @@ -XXX,XX +XXX,XX @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, do { hwaddr ba[MAX_PS_BUFFERS]; - e1000e_ba_state bastate = { { 0 } }; + E1000EBAState bastate = { { 0 } }; bool is_last = false; desc_size = total_size - desc_offset; @@ -XXX,XX +XXX,XX @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, iov_copy = MIN(ps_hdr_len - ps_hdr_copied, iov->iov_len - iov_ofs); - e1000e_write_hdr_to_rx_buffers(core, ba, &bastate, - iov->iov_base, iov_copy); + e1000e_write_hdr_frag_to_rx_buffers(core, ba, + &bastate, + iov->iov_base, + iov_copy); copy_size -= iov_copy; ps_hdr_copied += iov_copy; @@ -XXX,XX +XXX,XX @@ e1000e_write_packet_to_guest(E1000ECore *core, struct NetRxPkt *pkt, } else { /* Leave buffer 0 of each descriptor except first */ /* empty as per spec 7.1.5.1 */ - e1000e_write_hdr_to_rx_buffers(core, ba, &bastate, - NULL, 0); + e1000e_write_hdr_frag_to_rx_buffers(core, ba, &bastate, + NULL, 0); } } -- 2.25.1