Implement NDO_QUEUE_RX_BUF_SIZE and take the rx buf size from the memory
providers.
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
---
drivers/net/ethernet/broadcom/bnxt/bnxt.c | 34 +++++++++++++++++++++++
drivers/net/ethernet/broadcom/bnxt/bnxt.h | 1 +
2 files changed, 35 insertions(+)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index e9840165c7d0..0eff527c267b 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -15932,16 +15932,46 @@ static const struct netdev_stat_ops bnxt_stat_ops = {
.get_base_stats = bnxt_get_base_stats,
};
+static ssize_t bnxt_get_rx_buf_size(struct bnxt *bp, int rxq_idx)
+{
+ struct netdev_rx_queue *rxq = __netif_get_rx_queue(bp->dev, rxq_idx);
+ size_t rx_buf_size;
+
+ rx_buf_size = rxq->mp_params.rx_buf_len;
+ if (!rx_buf_size)
+ return BNXT_RX_PAGE_SIZE;
+
+ /* Older chips need MSS calc so rx_buf_len is not supported,
+ * but we don't set queue ops for them so we should never get here.
+ */
+ if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
+ return -EINVAL;
+
+ if (!is_power_of_2(rx_buf_size))
+ return -ERANGE;
+
+ if (rx_buf_size < BNXT_RX_PAGE_SIZE ||
+ rx_buf_size > BNXT_MAX_RX_PAGE_SIZE)
+ return -ERANGE;
+
+ return rx_buf_size;
+}
+
static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
{
struct bnxt_rx_ring_info *rxr, *clone;
struct bnxt *bp = netdev_priv(dev);
struct bnxt_ring_struct *ring;
+ ssize_t rx_buf_size;
int rc;
if (!bp->rx_ring)
return -ENETDOWN;
+ rx_buf_size = bnxt_get_rx_buf_size(bp, idx);
+ if (rx_buf_size < 0)
+ return rx_buf_size;
+
rxr = &bp->rx_ring[idx];
clone = qmem;
memcpy(clone, rxr, sizeof(*rxr));
@@ -15953,6 +15983,7 @@ static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
clone->rx_sw_agg_prod = 0;
clone->rx_next_cons = 0;
clone->need_head_pool = false;
+ clone->rx_page_size = rx_buf_size;
rc = bnxt_alloc_rx_page_pool(bp, clone, rxr->page_pool->p.nid);
if (rc)
@@ -16079,6 +16110,8 @@ static void bnxt_copy_rx_ring(struct bnxt *bp,
src_ring = &src->rx_agg_ring_struct;
src_rmem = &src_ring->ring_mem;
+ dst->rx_page_size = src->rx_page_size;
+
WARN_ON(dst_rmem->nr_pages != src_rmem->nr_pages);
WARN_ON(dst_rmem->page_size != src_rmem->page_size);
WARN_ON(dst_rmem->flags != src_rmem->flags);
@@ -16231,6 +16264,7 @@ static const struct netdev_queue_mgmt_ops bnxt_queue_mgmt_ops = {
.ndo_queue_mem_free = bnxt_queue_mem_free,
.ndo_queue_start = bnxt_queue_start,
.ndo_queue_stop = bnxt_queue_stop,
+ .supported_params = NDO_QUEUE_RX_BUF_SIZE,
};
static void bnxt_remove_one(struct pci_dev *pdev)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 4c880a9fba92..d245eefbbdda 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -760,6 +760,7 @@ struct nqe_cn {
#endif
#define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT)
+#define BNXT_MAX_RX_PAGE_SIZE BIT(15)
#define BNXT_MAX_MTU 9500
--
2.52.0
On Sun, 30 Nov 2025 23:35:22 +0000 Pavel Begunkov wrote:
> +static ssize_t bnxt_get_rx_buf_size(struct bnxt *bp, int rxq_idx)
> +{
> + struct netdev_rx_queue *rxq = __netif_get_rx_queue(bp->dev, rxq_idx);
> + size_t rx_buf_size;
> +
> + rx_buf_size = rxq->mp_params.rx_buf_len;
> + if (!rx_buf_size)
> + return BNXT_RX_PAGE_SIZE;
I'd like to retain my cfg objects in the queue API, if you don't mind.
I guess we just need the way for drivers to fill in the defaults and
then plumb them into the ops.
When drivers implement the logic to consolidate the configuration from
different APIs into the effective one they inevitably diverge in their
interpretations :/ We should keep it in the core from the start and
present to the driver the final queue config.
> + /* Older chips need MSS calc so rx_buf_len is not supported,
> + * but we don't set queue ops for them so we should never get here.
> + */
> + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS))
> + return -EINVAL;
> +
> + if (!is_power_of_2(rx_buf_size))
> + return -ERANGE;
> +
> + if (rx_buf_size < BNXT_RX_PAGE_SIZE ||
> + rx_buf_size > BNXT_MAX_RX_PAGE_SIZE)
> + return -ERANGE;
> +
> + return rx_buf_size;
> +}
> +
> static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
> {
> struct bnxt_rx_ring_info *rxr, *clone;
> struct bnxt *bp = netdev_priv(dev);
> struct bnxt_ring_struct *ring;
> + ssize_t rx_buf_size;
> int rc;
>
> if (!bp->rx_ring)
> return -ENETDOWN;
>
> + rx_buf_size = bnxt_get_rx_buf_size(bp, idx);
> + if (rx_buf_size < 0)
> + return rx_buf_size;
Does this survive full ring reconfig? IIRC the large changes to the NIC
config (like changing ring sizes) free and reallocate all rings in bnxt,
but due to "historic reasons?" they don't go thru the queue ops.
On 12/2/25 18:58, Jakub Kicinski wrote:
> On Sun, 30 Nov 2025 23:35:22 +0000 Pavel Begunkov wrote:
>> +static ssize_t bnxt_get_rx_buf_size(struct bnxt *bp, int rxq_idx)
>> +{
>> + struct netdev_rx_queue *rxq = __netif_get_rx_queue(bp->dev, rxq_idx);
>> + size_t rx_buf_size;
>> +
>> + rx_buf_size = rxq->mp_params.rx_buf_len;
>> + if (!rx_buf_size)
>> + return BNXT_RX_PAGE_SIZE;
>
> I'd like to retain my cfg objects in the queue API, if you don't mind.
> I guess we just need the way for drivers to fill in the defaults and
> then plumb them into the ops.
It was problematic, I wanted to split it into more digestible chunks.
My main problem is that it was not really optional and could break
drivers that don't even care about this qcfg len option but allow
setting it device-wise via ethtool, and I won't even have a way to
test them.
Maybe there is a way to strip down qcfg and only apply it to marked
queue api enabled drivers for now, and then extend the idea it in
the future. E.g.
set 1) optional and for qapi drivers only
set 2) patch up all qapi drivers and make it mandatory
set 3) convert all other drivers that set the length.
I can take a look at implementing 1) in this series. It should help
to keep complexity manageable.
...
>> static int bnxt_queue_mem_alloc(struct net_device *dev, void *qmem, int idx)
>> {
>> struct bnxt_rx_ring_info *rxr, *clone;
>> struct bnxt *bp = netdev_priv(dev);
>> struct bnxt_ring_struct *ring;
>> + ssize_t rx_buf_size;
>> int rc;
>>
>> if (!bp->rx_ring)
>> return -ENETDOWN;
>>
>> + rx_buf_size = bnxt_get_rx_buf_size(bp, idx);
>> + if (rx_buf_size < 0)
>> + return rx_buf_size;
>
> Does this survive full ring reconfig? IIRC the large changes to the NIC
> config (like changing ring sizes) free and reallocate all rings in bnxt,
> but due to "historic reasons?" they don't go thru the queue ops.
I'll check when I'm back from lpc, but I was coming from an assumption
that the qcfg series was doing it right, and I believe only the restart
path was looking up the set len value. I'll double check.
--
Pavel Begunkov
On Thu, 11 Dec 2025 01:39:25 +0000 Pavel Begunkov wrote:
> On 12/2/25 18:58, Jakub Kicinski wrote:
> > On Sun, 30 Nov 2025 23:35:22 +0000 Pavel Begunkov wrote:
> >> +static ssize_t bnxt_get_rx_buf_size(struct bnxt *bp, int rxq_idx)
> >> +{
> >> + struct netdev_rx_queue *rxq = __netif_get_rx_queue(bp->dev, rxq_idx);
> >> + size_t rx_buf_size;
> >> +
> >> + rx_buf_size = rxq->mp_params.rx_buf_len;
> >> + if (!rx_buf_size)
> >> + return BNXT_RX_PAGE_SIZE;
> >
> > I'd like to retain my cfg objects in the queue API, if you don't mind.
> > I guess we just need the way for drivers to fill in the defaults and
> > then plumb them into the ops.
>
> It was problematic, I wanted to split it into more digestible chunks.
> My main problem is that it was not really optional and could break
> drivers that don't even care about this qcfg len option but allow
> setting it device-wise via ethtool, and I won't even have a way to
> test them.
>
> Maybe there is a way to strip down qcfg and only apply it to marked
> queue api enabled drivers for now, and then extend the idea it in
> the future. E.g.
Yes, I mean a stripped down version, since we're not shadowing the
ethtool knob any more the full set of changes I had will be too much.
Off the top of my head I think we'd need to retain:
- the qcfg struct passed as an argument to the queue callbacks
(drivers other than bnxt won't use it which is okay since they don't
set .supported_params)
- the ability to conjure the qcfg struct for any given queue by the
driver at any time (netdev_queue_config())
- probably the callback to fill in the defaults so that the driver
doesn't have to check "is the value set by the user" explicitly
© 2016 - 2026 Red Hat, Inc.