Populate packet buffers into the RX and AGG rings while these
rings are being initialized.
Signed-off-by: Bhargava Marreddy <bhargava.marreddy@broadcom.com>
Reviewed-by: Vikas Gupta <vikas.gupta@broadcom.com>
Reviewed-by: Rajashekar Hudumula <rajashekar.hudumula@broadcom.com>
---
.../net/ethernet/broadcom/bnge/bnge_netdev.c | 223 ++++++++++++++++++
1 file changed, 223 insertions(+)
diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
index 77bd8f6ce39..ee7cf8596cd 100644
--- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
+++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c
@@ -257,6 +257,76 @@ static bool bnge_separate_head_pool(struct bnge_rx_ring_info *rxr)
return rxr->need_head_pool || PAGE_SIZE > BNGE_RX_PAGE_SIZE;
}
+static void bnge_free_one_rx_ring(struct bnge_net *bn,
+ struct bnge_rx_ring_info *rxr)
+{
+ int i, max_idx;
+
+ if (!rxr->rx_buf_ring)
+ return;
+
+ max_idx = bn->rx_nr_pages * RX_DESC_CNT;
+
+ for (i = 0; i < max_idx; i++) {
+ struct bnge_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i];
+ void *data = rx_buf->data;
+
+ if (!data)
+ continue;
+
+ rx_buf->data = NULL;
+ page_pool_free_va(rxr->head_pool, data, true);
+ }
+}
+
+static void bnge_free_one_rx_agg_ring(struct bnge_net *bn,
+ struct bnge_rx_ring_info *rxr)
+{
+ int i, max_idx;
+
+ if (!rxr->rx_agg_buf_ring)
+ return;
+
+ max_idx = bn->rx_agg_nr_pages * RX_DESC_CNT;
+
+ for (i = 0; i < max_idx; i++) {
+ struct bnge_sw_rx_agg_bd *rx_agg_buf = &rxr->rx_agg_buf_ring[i];
+ netmem_ref netmem = rx_agg_buf->netmem;
+
+ if (!netmem)
+ continue;
+
+ rx_agg_buf->netmem = 0;
+ __clear_bit(i, rxr->rx_agg_bmap);
+
+ page_pool_recycle_direct_netmem(rxr->page_pool, netmem);
+ }
+}
+
+static void bnge_free_one_rx_pkt_mem(struct bnge_net *bn,
+ struct bnge_rx_ring_info *rxr)
+{
+ bnge_free_one_rx_ring(bn, rxr);
+ bnge_free_one_rx_agg_ring(bn, rxr);
+}
+
+static void bnge_free_rx_pkt_bufs(struct bnge_net *bn)
+{
+ struct bnge_dev *bd = bn->bd;
+ int i;
+
+ if (!bn->rx_ring)
+ return;
+
+ for (i = 0; i < bd->rx_nr_rings; i++)
+ bnge_free_one_rx_pkt_mem(bn, &bn->rx_ring[i]);
+}
+
+static void bnge_free_pkts_mem(struct bnge_net *bn)
+{
+ bnge_free_rx_pkt_bufs(bn);
+}
+
static void bnge_free_rx_rings(struct bnge_net *bn)
{
struct bnge_dev *bd = bn->bd;
@@ -737,6 +807,156 @@ static void bnge_init_nq_tree(struct bnge_net *bn)
}
}
+static netmem_ref __bnge_alloc_rx_netmem(struct bnge_net *bn,
+ dma_addr_t *mapping,
+ struct bnge_rx_ring_info *rxr,
+ unsigned int *offset,
+ gfp_t gfp)
+{
+ netmem_ref netmem;
+
+ if (PAGE_SIZE > BNGE_RX_PAGE_SIZE) {
+ netmem = page_pool_alloc_frag_netmem(rxr->page_pool, offset,
+ BNGE_RX_PAGE_SIZE, gfp);
+ } else {
+ netmem = page_pool_alloc_netmems(rxr->page_pool, gfp);
+ *offset = 0;
+ }
+ if (!netmem)
+ return 0;
+
+ *mapping = page_pool_get_dma_addr_netmem(netmem) + *offset;
+ return netmem;
+}
+
+static u8 *__bnge_alloc_rx_frag(struct bnge_net *bn, dma_addr_t *mapping,
+ struct bnge_rx_ring_info *rxr,
+ gfp_t gfp)
+{
+ unsigned int offset;
+ struct page *page;
+
+ page = page_pool_alloc_frag(rxr->head_pool, &offset,
+ bn->rx_buf_size, gfp);
+ if (!page)
+ return NULL;
+
+ *mapping = page_pool_get_dma_addr(page) + bn->rx_dma_offset + offset;
+ return page_address(page) + offset;
+}
+
+static int bnge_alloc_rx_data(struct bnge_net *bn,
+ struct bnge_rx_ring_info *rxr,
+ u16 prod, gfp_t gfp)
+{
+ struct bnge_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[RING_RX(bn, prod)];
+ struct rx_bd *rxbd;
+ dma_addr_t mapping;
+ u8 *data;
+
+ rxbd = &rxr->rx_desc_ring[RX_RING(bn, prod)][RX_IDX(prod)];
+ data = __bnge_alloc_rx_frag(bn, &mapping, rxr, gfp);
+ if (!data)
+ return -ENOMEM;
+
+ rx_buf->data = data;
+ rx_buf->data_ptr = data + bn->rx_offset;
+ rx_buf->mapping = mapping;
+
+ rxbd->rx_bd_haddr = cpu_to_le64(mapping);
+
+ return 0;
+}
+
+static void bnge_alloc_one_rx_pkt_mem(struct bnge_net *bn,
+ struct bnge_rx_ring_info *rxr,
+ int ring_nr)
+{
+ u32 prod;
+ int i;
+
+ prod = rxr->rx_prod;
+ for (i = 0; i < bn->rx_ring_size; i++) {
+ if (bnge_alloc_rx_data(bn, rxr, prod, GFP_KERNEL)) {
+ netdev_warn(bn->netdev, "init'ed rx ring %d with %d/%d skbs only\n",
+ ring_nr, i, bn->rx_ring_size);
+ break;
+ }
+ prod = NEXT_RX(prod);
+ }
+ rxr->rx_prod = prod;
+}
+
+static u16 bnge_find_next_agg_idx(struct bnge_rx_ring_info *rxr, u16 idx)
+{
+ u16 next, max = rxr->rx_agg_bmap_size;
+
+ next = find_next_zero_bit(rxr->rx_agg_bmap, max, idx);
+ if (next >= max)
+ next = find_first_zero_bit(rxr->rx_agg_bmap, max);
+ return next;
+}
+
+static int bnge_alloc_rx_netmem(struct bnge_net *bn,
+ struct bnge_rx_ring_info *rxr,
+ u16 prod, gfp_t gfp)
+{
+ struct bnge_sw_rx_agg_bd *rx_agg_buf;
+ u16 sw_prod = rxr->rx_sw_agg_prod;
+ unsigned int offset = 0;
+ struct rx_bd *rxbd;
+ dma_addr_t mapping;
+ netmem_ref netmem;
+
+ rxbd = &rxr->rx_agg_desc_ring[RX_AGG_RING(bn, prod)][RX_IDX(prod)];
+ netmem = __bnge_alloc_rx_netmem(bn, &mapping, rxr, &offset, gfp);
+ if (!netmem)
+ return -ENOMEM;
+
+ if (unlikely(test_bit(sw_prod, rxr->rx_agg_bmap)))
+ sw_prod = bnge_find_next_agg_idx(rxr, sw_prod);
+
+ __set_bit(sw_prod, rxr->rx_agg_bmap);
+ rx_agg_buf = &rxr->rx_agg_buf_ring[sw_prod];
+ rxr->rx_sw_agg_prod = RING_RX_AGG(bn, NEXT_RX_AGG(sw_prod));
+
+ rx_agg_buf->netmem = netmem;
+ rx_agg_buf->offset = offset;
+ rx_agg_buf->mapping = mapping;
+ rxbd->rx_bd_haddr = cpu_to_le64(mapping);
+ rxbd->rx_bd_opaque = sw_prod;
+ return 0;
+}
+
+static void bnge_alloc_one_rx_ring_netmem(struct bnge_net *bn,
+ struct bnge_rx_ring_info *rxr,
+ int ring_nr)
+{
+ u32 prod;
+ int i;
+
+ prod = rxr->rx_agg_prod;
+ for (i = 0; i < bn->rx_agg_ring_size; i++) {
+ if (bnge_alloc_rx_netmem(bn, rxr, prod, GFP_KERNEL)) {
+ netdev_warn(bn->netdev, "init'ed rx agg ring %d with %d/%d pages only\n",
+ ring_nr, i, bn->rx_agg_ring_size);
+ break;
+ }
+ prod = NEXT_RX_AGG(prod);
+ }
+ rxr->rx_agg_prod = prod;
+}
+
+static void bnge_alloc_one_rx_ring(struct bnge_net *bn, int ring_nr)
+{
+ struct bnge_rx_ring_info *rxr = &bn->rx_ring[ring_nr];
+
+ bnge_alloc_one_rx_pkt_mem(bn, rxr, ring_nr);
+
+ if (bnge_is_agg_reqd(bn->bd))
+ bnge_alloc_one_rx_ring_netmem(bn, rxr, ring_nr);
+}
+
static void bnge_init_rxbd_pages(struct bnge_ring_struct *ring, u32 type)
{
struct rx_bd **rx_desc_ring;
@@ -799,6 +1019,8 @@ static void bnge_init_one_rx_ring(struct bnge_net *bn, int ring_nr)
&rxr->bnapi->napi);
bnge_init_one_rx_agg_ring_rxbd(bn, rxr);
+
+ bnge_alloc_one_rx_ring(bn, ring_nr);
}
static void bnge_init_rx_rings(struct bnge_net *bn)
@@ -1106,6 +1328,7 @@ static void bnge_close_core(struct bnge_net *bn)
struct bnge_dev *bd = bn->bd;
clear_bit(BNGE_STATE_OPEN, &bd->state);
+ bnge_free_pkts_mem(bn);
bnge_free_irq(bn);
bnge_del_napi(bn);
--
2.47.3
On Fri, 12 Sep 2025 01:05:01 +0530 Bhargava Marreddy wrote: > +static void bnge_alloc_one_rx_pkt_mem(struct bnge_net *bn, > + struct bnge_rx_ring_info *rxr, > + int ring_nr) > +{ > + u32 prod; > + int i; > + > + prod = rxr->rx_prod; > + for (i = 0; i < bn->rx_ring_size; i++) { > + if (bnge_alloc_rx_data(bn, rxr, prod, GFP_KERNEL)) { > + netdev_warn(bn->netdev, "init'ed rx ring %d with %d/%d skbs only\n", > + ring_nr, i, bn->rx_ring_size); > + break; > + } > + prod = NEXT_RX(prod); > + } > + rxr->rx_prod = prod; You should have some sort of minimal fill level of the Rx rings. Right now ndo_open will succeed even when Rx rings are completely empty. Looks like you made even more functions void since v6, this is going in the wrong direction. Most drivers actually expect the entire ring to be filled. You can have a partial fill, but knowing bnxt I'm worried the driver will actually never try to fill the rings back up. -- pw-bot: cr
On Mon, Sep 15, 2025 at 2:01 AM Jakub Kicinski <kuba@kernel.org> wrote: > > On Fri, 12 Sep 2025 01:05:01 +0530 Bhargava Marreddy wrote: > > +static void bnge_alloc_one_rx_pkt_mem(struct bnge_net *bn, > > + struct bnge_rx_ring_info *rxr, > > + int ring_nr) > > +{ > > + u32 prod; > > + int i; > > + > > + prod = rxr->rx_prod; > > + for (i = 0; i < bn->rx_ring_size; i++) { > > + if (bnge_alloc_rx_data(bn, rxr, prod, GFP_KERNEL)) { > > + netdev_warn(bn->netdev, "init'ed rx ring %d with %d/%d skbs only\n", > > + ring_nr, i, bn->rx_ring_size); > > + break; > > + } > > + prod = NEXT_RX(prod); > > + } > > + rxr->rx_prod = prod; > > You should have some sort of minimal fill level of the Rx rings. > Right now ndo_open will succeed even when Rx rings are completely empty. > Looks like you made even more functions void since v6, this is going in I changed those functions to void only because in this patchset they can’t fail. > the wrong direction. Most drivers actually expect the entire ring to be > filled. You can have a partial fill, but knowing bnxt I'm worried the > driver will actually never try to fill the rings back up. I believe the driver should return an error if any buffer allocation fails and handle the unwinding accordingly. What do you think? Thanks, Bhargava Marreddy > -- > pw-bot: cr
On Mon, 15 Sep 2025 23:26:07 +0530 Bhargava Chenna Marreddy wrote: > > You should have some sort of minimal fill level of the Rx rings. > > Right now ndo_open will succeed even when Rx rings are completely empty. > > Looks like you made even more functions void since v6, this is going in > I changed those functions to void only because in this patchset they can’t fail. > > the wrong direction. Most drivers actually expect the entire ring to be > > filled. You can have a partial fill, but knowing bnxt I'm worried the > > driver will actually never try to fill the rings back up. > I believe the driver should return an error if any buffer allocation > fails and handle the unwinding accordingly. Yes, that's also my preference. I think allowing Rx buffer lists to not be completely filled is okay if the driver author prefers that, but in that case there needs to be some minimal "fill level" which makes the device operational. Speaking of Rx fill -- bnxt drops packets when it can't allocate a replacement buffer. This used to be the recommended way of handling allocation failures years ago. In modern drivers I believe it's better to let the queue run dry and have a watchdog / service tasks which periodically checks for complete depletion and kicks NAPI in. Getting constantly interrupted with new packets when machine is trying to recover from a hard OOM is not very helpful. That's just a future note, I don't think this series itself contains much of Rx.
© 2016 - 2025 Red Hat, Inc.