From nobody Sun May 24 18:41:10 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-alma10-1.taild15c8.ts.net [100.103.45.18]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BC8D333F8BE; Fri, 22 May 2026 11:32:39 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=100.103.45.18 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779449562; cv=none; b=NxtdHdkkuwXFlPO/6DAMHZKVLgpOKrrWdaobRkDo9tca1hcTDOuB4FktJwjrdStlOLzQWTTDOmKKJrDyWQ0xHh8wOCaUDkb7OEfPc67sFt4TpWOyP+TaDqbFFQKqbMrl0rY0QdpFYnOtyIQQjPyumB4xZ4ViXJbKbt16bgD3ROY= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779449562; c=relaxed/simple; bh=UpuDhCtO1abQsM/sljVoZYRmS4qrJGRqJuhHDNEtMlM=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=YGtqSYwHRR0V3Ngz7bSQnitdexZocjYyCwo07m+giiosmTAGtTWW4zkfleLPp1e0Rk+02rmbcv/RAKynyDHn4PMspOyXQP7xQg/GjQCsglNdXR1GsOt1z1P3dxTxivd90JJ7oNMwR3hCzwYlkPkEWREEetmJ5otQmMZeO9OrRVg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Re+t7wup; arc=none smtp.client-ip=100.103.45.18 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Re+t7wup" Received: by smtp.kernel.org (Postfix) with ESMTPSA id E7BF61F00A3F; Fri, 22 May 2026 11:32:35 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=kernel.org; s=k20260515; t=1779449558; bh=A1/jHdabRmrVtf8CjfHpK5fQ1bWAk/qm0GAv69dLX+s=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=Re+t7wupo7auz9uokeFuCF2XB2REMlWo4KriB8U0OzGnsxTcBwaPqYiewREnRG8QA ie62tKtZpqmfzlbcoLSxaqyJ8C8rkQOgFsp+bKQr1NnXh4XB2vvav2FF4Dxk83is5O tFfNrZ+nVdDGD77ibFHmuBlejojQ7iI35FtlQo3GVGk63HfcDv2X+qJwoh5hVTJo2P BkTGDvrdo69TKGLFmAFkzjb4+kPJ0WaW4LzlPa+knHuDUcPdIyuu5BttfUZi9MZndf UTBv5ohZRjPwYMifCihGCbxLSTRu/AsD0Pb+HGmpiSf1a5ZYK8CSX6aS/bE0uh11l2 s3Q7GPVYpV2Xg== From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= To: Alexander Duyck , Jakub Kicinski , kernel-team@meta.com, Andrew Lunn , "David S. Miller" , Eric Dumazet , Paolo Abeni , Shuah Khan , netdev@vger.kernel.org Cc: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= , Jacob Keller , Mohsin Bashir , "Mike Marciniszyn (Meta)" , Pavel Begunkov , linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org Subject: [PATCH net-next 1/3] fbnic: Track BDQ fragment geometry per ring Date: Fri, 22 May 2026 13:32:20 +0200 Message-ID: <20260522113225.241337-2-bjorn@kernel.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260522113225.241337-1-bjorn@kernel.org> References: <20260522113225.241337-1-bjorn@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Fbnic programs BDQs in 4 KiB fragments, but the driver has so far decoded buffer IDs using PAGE_SIZE-derived constants. That works while HPQ and PPQ both use PAGE_SIZE buffers, but it makes the fragment layout global even though the layout really belongs to the queue. Store the fragment shift on each BDQ and use it when programming buffer descriptors and decoding receive completions. HPQ and PPQ still get the same PAGE_SIZE-derived value, so this does not change behavior yet. This prepares PPQ to use a larger io_uring zcrx buffer size without changing the HPQ layout. Signed-off-by: Bj=C3=B6rn T=C3=B6pel --- drivers/net/ethernet/meta/fbnic/fbnic_csr.h | 29 ++------ .../net/ethernet/meta/fbnic/fbnic_debugfs.c | 5 +- drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 68 ++++++++++++------- drivers/net/ethernet/meta/fbnic/fbnic_txrx.h | 6 ++ 4 files changed, 58 insertions(+), 50 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethe= rnet/meta/fbnic/fbnic_csr.h index 64b958df7774..0ff972f8febc 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -109,17 +109,13 @@ enum { =20 /* Rx Buffer Descriptor Format * - * The layout of this can vary depending on the page size of the system. + * Buffer descriptors describe 4 KiB BDQ fragments. A BDQ buffer may be one + * fragment, or a power-of-two number of fragments. * - * If the page size is 4K then the layout will simply consist of ID for - * the 16 most significant bits, and the lower 46 are essentially the page - * address with the lowest 12 bits being reserved 0 due to the fact that - * a page will be aligned. - * - * If the page size is larger than 4K then the lower n bits of the ID and - * page address will be reserved for the fragment ID. This fragment will - * be 4K in size and will be used to index both the DMA address and the ID - * by the same amount. + * The address field stores the 4 KiB-aligned DMA address. The ID field st= ores + * the software buffer ID, with the low n bits used as the fragment ID whe= n a + * buffer spans multiple 4 KiB fragments. The driver increments both the + * address and ID by one fragment for each descriptor belonging to a buffe= r. */ #define FBNIC_BD_DESC_ADDR_MASK DESC_GENMASK(45, 12) #define FBNIC_BD_DESC_ID_MASK DESC_GENMASK(63, 48) @@ -127,16 +123,6 @@ enum { (FBNIC_BD_DESC_ADDR_MASK & ~(FBNIC_BD_DESC_ADDR_MASK - 1)) #define FBNIC_BD_FRAG_COUNT \ (PAGE_SIZE / FBNIC_BD_FRAG_SIZE) -#define FBNIC_BD_FRAG_ADDR_MASK \ - (FBNIC_BD_DESC_ADDR_MASK & \ - ~(FBNIC_BD_DESC_ADDR_MASK * FBNIC_BD_FRAG_COUNT)) -#define FBNIC_BD_FRAG_ID_MASK \ - (FBNIC_BD_DESC_ID_MASK & \ - ~(FBNIC_BD_DESC_ID_MASK * FBNIC_BD_FRAG_COUNT)) -#define FBNIC_BD_PAGE_ADDR_MASK \ - (FBNIC_BD_DESC_ADDR_MASK & ~FBNIC_BD_FRAG_ADDR_MASK) -#define FBNIC_BD_PAGE_ID_MASK \ - (FBNIC_BD_DESC_ID_MASK & ~FBNIC_BD_FRAG_ID_MASK) =20 /* Rx Completion Queue Descriptors */ #define FBNIC_RCD_TYPE_MASK DESC_GENMASK(62, 61) @@ -151,9 +137,6 @@ enum { =20 /* Address/Length Completion Descriptors */ #define FBNIC_RCD_AL_BUFF_ID_MASK DESC_GENMASK(15, 0) -#define FBNIC_RCD_AL_BUFF_FRAG_MASK (FBNIC_BD_FRAG_COUNT - 1) -#define FBNIC_RCD_AL_BUFF_PAGE_MASK \ - (FBNIC_RCD_AL_BUFF_ID_MASK & ~FBNIC_RCD_AL_BUFF_FRAG_MASK) #define FBNIC_RCD_AL_BUFF_LEN_MASK DESC_GENMASK(28, 16) #define FBNIC_RCD_AL_BUFF_OFF_MASK DESC_GENMASK(43, 32) #define FBNIC_RCD_AL_PAGE_FIN DESC_BIT(60) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c b/drivers/net/= ethernet/meta/fbnic/fbnic_debugfs.c index 3c4563c8f403..1cd9dbab423b 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_debugfs.c @@ -181,8 +181,8 @@ static int fbnic_dbg_tcq_desc_seq_show(struct seq_file = *s, void *v) static int fbnic_dbg_bdq_desc_seq_show(struct seq_file *s, void *v) { struct fbnic_ring *ring =3D s->private; + unsigned int desc_count, i; char hdr[80]; - int i; =20 /* Generate header on first entry */ fbnic_dbg_ring_show(s); @@ -197,7 +197,8 @@ static int fbnic_dbg_bdq_desc_seq_show(struct seq_file = *s, void *v) return 0; } =20 - for (i =3D 0; i < (ring->size_mask + 1) * FBNIC_BD_FRAG_COUNT; i++) { + desc_count =3D (ring->size_mask + 1) * fbnic_bdq_frag_count(ring); + for (i =3D 0; i < desc_count; i++) { u64 bd =3D le64_to_cpu(ring->desc[i]); =20 seq_printf(s, "%04x %#04llx %#014llx\n", i, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/eth= ernet/meta/fbnic/fbnic_txrx.c index 9cd85a0d0c3a..9a9675d04c16 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -870,19 +870,31 @@ static void fbnic_clean_bdq(struct fbnic_ring *ring, = unsigned int hw_head, ring->head =3D head; } =20 +static u16 fbnic_rcd_bdq_idx(const struct fbnic_ring *bdq, u64 rcd) +{ + return FIELD_GET(FBNIC_RCD_AL_BUFF_ID_MASK, rcd) >> bdq->frag_shift; +} + +static unsigned int fbnic_rcd_frag_offset(const struct fbnic_ring *bdq, + u64 rcd) +{ + return (FIELD_GET(FBNIC_RCD_AL_BUFF_ID_MASK, rcd) & + (fbnic_bdq_frag_count(bdq) - 1)) * FBNIC_BD_FRAG_SIZE; +} + static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, netmem_ref netme= m) { - __le64 *bdq_desc =3D &bdq->desc[id * FBNIC_BD_FRAG_COUNT]; + u16 frag_count =3D fbnic_bdq_frag_count(bdq); + __le64 *bdq_desc =3D &bdq->desc[id * frag_count]; dma_addr_t dma =3D page_pool_get_dma_addr_netmem(netmem); - u64 bd, i =3D FBNIC_BD_FRAG_COUNT; + u64 bd, i =3D frag_count; =20 - bd =3D (FBNIC_BD_PAGE_ADDR_MASK & dma) | - FIELD_PREP(FBNIC_BD_PAGE_ID_MASK, id); + bd =3D (FBNIC_BD_DESC_ADDR_MASK & dma) | + FIELD_PREP(FBNIC_BD_DESC_ID_MASK, (u64)id << bdq->frag_shift); =20 - /* In the case that a page size is larger than 4K we will map a - * single page to multiple fragments. The fragments will be - * FBNIC_BD_FRAG_COUNT in size and the lower n bits will be use - * to indicate the individual fragment IDs. + /* In the case that the buffer is larger than 4K we will map it + * to multiple fragments. The lower n bits will be used to + * indicate the individual fragment IDs. */ do { *bdq_desc =3D cpu_to_le64(bd); @@ -927,7 +939,7 @@ static void fbnic_fill_bdq(struct fbnic_ring *bdq) /* Force DMA writes to flush before writing to tail */ dma_wmb(); =20 - writel(i * FBNIC_BD_FRAG_COUNT, bdq->doorbell); + writel(i * fbnic_bdq_frag_count(bdq), bdq->doorbell); } } =20 @@ -958,7 +970,8 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector = *nv, u64 rcd, struct fbnic_pkt_buff *pkt, struct fbnic_q_triad *qt) { - unsigned int hdr_pg_idx =3D FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); + struct fbnic_ring *hpq =3D &qt->sub0; + unsigned int hdr_pg_idx =3D fbnic_rcd_bdq_idx(hpq, rcd); unsigned int hdr_pg_off =3D FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); struct page *page =3D fbnic_page_pool_get_head(qt, hdr_pg_idx); unsigned int len =3D FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); @@ -976,8 +989,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector = *nv, u64 rcd, headroom =3D hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD; frame_sz =3D hdr_pg_end - hdr_pg_start; xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq); - hdr_pg_start +=3D (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * - FBNIC_BD_FRAG_SIZE; + hdr_pg_start +=3D fbnic_rcd_frag_offset(hpq, rcd); =20 /* Sync DMA buffer */ dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page), @@ -998,7 +1010,8 @@ static void fbnic_add_rx_frag(struct fbnic_napi_vector= *nv, u64 rcd, struct fbnic_pkt_buff *pkt, struct fbnic_q_triad *qt) { - unsigned int pg_idx =3D FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); + struct fbnic_ring *ppq =3D &qt->sub1; + unsigned int pg_idx =3D fbnic_rcd_bdq_idx(ppq, rcd); unsigned int pg_off =3D FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); unsigned int len =3D FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); netmem_ref netmem =3D fbnic_page_pool_get_data(qt, pg_idx); @@ -1008,12 +1021,11 @@ static void fbnic_add_rx_frag(struct fbnic_napi_vec= tor *nv, u64 rcd, truesize =3D FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128); =20 - pg_off +=3D (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * - FBNIC_BD_FRAG_SIZE; + pg_off +=3D fbnic_rcd_frag_offset(ppq, rcd); =20 /* Sync DMA buffer */ - page_pool_dma_sync_netmem_for_cpu(qt->sub1.page_pool, netmem, - pg_off, truesize); + page_pool_dma_sync_netmem_for_cpu(ppq->page_pool, netmem, pg_off, + truesize); =20 added =3D xdp_buff_add_frag(&pkt->buff, netmem, pg_off, len, truesize); if (unlikely(!added)) { @@ -1256,12 +1268,12 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector= *nv, =20 switch (FIELD_GET(FBNIC_RCD_TYPE_MASK, rcd)) { case FBNIC_RCD_TYPE_HDR_AL: - head0 =3D FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); + head0 =3D fbnic_rcd_bdq_idx(&qt->sub0, rcd); fbnic_pkt_prepare(nv, rcd, pkt, qt); =20 break; case FBNIC_RCD_TYPE_PAY_AL: - head1 =3D FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); + head1 =3D fbnic_rcd_bdq_idx(&qt->sub1, rcd); fbnic_add_rx_frag(nv, rcd, pkt, qt); =20 break; @@ -1609,6 +1621,7 @@ static void fbnic_ring_init(struct fbnic_ring *ring, = u32 __iomem *doorbell, ring->doorbell =3D doorbell; ring->q_idx =3D q_idx; ring->flags =3D flags; + ring->frag_shift =3D ilog2(FBNIC_BD_FRAG_COUNT); ring->deferred_head =3D -1; } =20 @@ -1890,15 +1903,18 @@ static int fbnic_alloc_rx_ring_desc(struct fbnic_ne= t *fbn, size_t desc_size =3D sizeof(*rxr->desc); u32 rxq_size; size_t size; + u16 frag_count; =20 switch (rxr->doorbell - fbnic_ring_csr_base(rxr)) { case FBNIC_QUEUE_BDQ_HPQ_TAIL: - rxq_size =3D fbn->hpq_size / FBNIC_BD_FRAG_COUNT; - desc_size *=3D FBNIC_BD_FRAG_COUNT; + frag_count =3D fbnic_bdq_frag_count(rxr); + rxq_size =3D fbn->hpq_size / frag_count; + desc_size *=3D frag_count; break; case FBNIC_QUEUE_BDQ_PPQ_TAIL: - rxq_size =3D fbn->ppq_size / FBNIC_BD_FRAG_COUNT; - desc_size *=3D FBNIC_BD_FRAG_COUNT; + frag_count =3D fbnic_bdq_frag_count(rxr); + rxq_size =3D fbn->ppq_size / frag_count; + desc_size *=3D frag_count; break; case FBNIC_QUEUE_RCQ_HEAD: rxq_size =3D fbn->rcq_size; @@ -2564,7 +2580,7 @@ static void fbnic_enable_bdq(struct fbnic_ring *hpq, = struct fbnic_ring *ppq) hpq->tail =3D 0; hpq->head =3D 0; =20 - log_size =3D fls(hpq->size_mask) + ilog2(FBNIC_BD_FRAG_COUNT); + log_size =3D fls(hpq->size_mask) + hpq->frag_shift; =20 /* Store descriptor ring address and size */ fbnic_ring_wr32(hpq, FBNIC_QUEUE_BDQ_HPQ_BAL, lower_32_bits(hpq->dma)); @@ -2576,7 +2592,7 @@ static void fbnic_enable_bdq(struct fbnic_ring *hpq, = struct fbnic_ring *ppq) if (!ppq->size_mask) goto write_ctl; =20 - log_size =3D fls(ppq->size_mask) + ilog2(FBNIC_BD_FRAG_COUNT); + log_size =3D fls(ppq->size_mask) + ppq->frag_shift; =20 /* Add enabling of PPQ to BDQ control */ bdq_ctl |=3D FBNIC_QUEUE_BDQ_CTL_PPQ_ENABLE; @@ -2845,8 +2861,10 @@ static int fbnic_queue_mem_alloc(struct net_device *= dev, =20 fbnic_ring_init(&qt->sub0, real->sub0.doorbell, real->sub0.q_idx, real->sub0.flags); + qt->sub0.frag_shift =3D real->sub0.frag_shift; fbnic_ring_init(&qt->sub1, real->sub1.doorbell, real->sub1.q_idx, real->sub1.flags); + qt->sub1.frag_shift =3D real->sub1.frag_shift; fbnic_ring_init(&qt->cmpl, real->cmpl.doorbell, real->cmpl.q_idx, real->cmpl.flags); =20 diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/eth= ernet/meta/fbnic/fbnic_txrx.h index e03c9d2c38dc..332cd0e29e15 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -121,6 +121,7 @@ struct fbnic_ring { u16 size_mask; /* Size of ring in descriptors - 1 */ u8 q_idx; /* Logical netdev ring index */ u8 flags; /* Ring flags (FBNIC_RING_F_*) */ + u8 frag_shift; /* BDQ: ilog2(buf_size / 4096) */ =20 u32 head, tail; /* Head/Tail of ring */ =20 @@ -162,6 +163,11 @@ struct fbnic_napi_vector { =20 extern const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops; =20 +static inline u16 fbnic_bdq_frag_count(const struct fbnic_ring *bdq) +{ + return 1U << bdq->frag_shift; +} + netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev); netdev_features_t fbnic_features_check(struct sk_buff *skb, struct net_device *dev, --=20 2.53.0 From nobody Sun May 24 18:41:10 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-alma10-1.taild15c8.ts.net [100.103.45.18]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4E6793D4131; Fri, 22 May 2026 11:32:42 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=100.103.45.18 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779449565; cv=none; b=VXzZYWq3mtmsA7e1Udw/t93MWvS9lpSYP7v1dZ6jLLCcD3ltMmpJUfJNYJ/p6Ov8p2t6768+yzvt+ACUhSnV3PXHsHnbwCW2hGhw7hnOnrD4L8riAgWx1+zn5WS3sURaZN04ee6o5vJH/gMVvrC4mCrpITgEfC7fyhpyv/K0VMI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779449565; c=relaxed/simple; bh=ku8DBQRCIbZKc8iQag9r7zooL+B7U2bRLgO4QQyao7I=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=K479C5yeUc+aA7e5YfZaJtZWjClTLoUGXSE6MOTDfS6WzXR38D0hFgXShFHgYC3Ur+21CDRO6sWPt7UqwXi0lMJb6u7Q4ngvMLbOcTHrO9LxcAnk8coPcSKSe52JB42RzYBKkA91znnfZJFNSOBH7Fv+y7ukRoZc26VhUAp77CE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=ivb3r9bC; arc=none smtp.client-ip=100.103.45.18 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="ivb3r9bC" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 557701F00ADE; Fri, 22 May 2026 11:32:39 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=kernel.org; s=k20260515; t=1779449562; bh=6zikfVjQRcLYEUZCe+ZKJoOY4gEgswqSMq69Lf0Fe6Y=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=ivb3r9bCklaWI/M/ALbmUOQ9wjap6gYT+UXVQwSb0CoEILiudN1RSof17JJ4t4CZC Dv48zLDiHVqbijBKFeu1AVpnuhIgjB00tox4QyUTAdTxHAsX87sj4CmtNaqaGYvXJk zIAgAvwNEsm4ITQhUnzfs12xrvyKHrcqpvrJvlRGM8c/37QufLKa6dNS3Hx4kD47d/ Nj2mtn/F5Z9qXnxNCxe+vVWRur4Ejq9jNpGPg2/sGJKJNzJLAIH3/uSol9qfAXes5I 5FYEA6Y6cXpvwo+OunlfDbtLxF8hHir+LceQkjOdwt15dQUXHjwWzo9IvY8AS+lIqh TwUzClvjftHEA== From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= To: Alexander Duyck , Jakub Kicinski , kernel-team@meta.com, Andrew Lunn , "David S. Miller" , Eric Dumazet , Paolo Abeni , Shuah Khan , netdev@vger.kernel.org Cc: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= , Jacob Keller , Mohsin Bashir , "Mike Marciniszyn (Meta)" , Pavel Begunkov , linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org Subject: [PATCH net-next 2/3] fbnic: Support larger zcrx receive buffers Date: Fri, 22 May 2026 13:32:21 +0200 Message-ID: <20260522113225.241337-3-bjorn@kernel.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260522113225.241337-1-bjorn@kernel.org> References: <20260522113225.241337-1-bjorn@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable io_uring zcrx can provide receive buffers larger than PAGE_SIZE through QCFG_RX_PAGE_SIZE. Advertise the parameter and use the configured size when creating the PPQ page pool. The NIC still consumes PPQ buffers as 4 KiB BDQ fragments. For larger zcrx buffers, allocate the page pool with the requested order and set the PPQ fragment shift from rx_page_size, so one net_iov can cover multiple hardware fragments. The core validates the zcrx request and checks that the imported memory can be represented as rx_buf_len-sized DMA chunks. Fbnic still has to validate the rendered queue configuration against its own BDQ geometry: larger receive buffers consume multiple 4 KiB PPQ entries, and the PPQ must retain usable depth after that expansion. Use the rendered per-queue rx_page_size on the normal open path as well. This preserves a memory-provider binding made while the netdev is down instead of falling back to the default PPQ geometry on open. Signed-off-by: Bj=C3=B6rn T=C3=B6pel --- drivers/net/ethernet/meta/fbnic/fbnic_txrx.c | 102 +++++++++++++++++-- 1 file changed, 94 insertions(+), 8 deletions(-) diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/eth= ernet/meta/fbnic/fbnic_txrx.c index 9a9675d04c16..57b3277fcd4e 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -1559,9 +1559,62 @@ void fbnic_free_napi_vectors(struct fbnic_net *fbn) fbnic_free_napi_vector(fbn, fbn->napi[i]); } =20 +static u32 fbnic_qcfg_rx_page_size(const struct netdev_queue_config *qcfg) +{ + return qcfg->rx_page_size ?: PAGE_SIZE; +} + +static u32 fbnic_rx_page_frag_count(u32 rx_page_size) +{ + return rx_page_size / FBNIC_BD_FRAG_SIZE; +} + +static u8 fbnic_rx_page_frag_shift(u32 rx_page_size) +{ + return ilog2(fbnic_rx_page_frag_count(rx_page_size)); +} + +static int fbnic_validate_rx_page_size(struct fbnic_net *fbn, u32 rx_page_= size, + struct netlink_ext_ack *extack) +{ + u32 frag_count, ppq_bufs; + + if (!is_power_of_2(rx_page_size)) { + NL_SET_ERR_MSG_MOD(extack, + "rx_page_size must be a power of 2"); + return -EINVAL; + } + + if (rx_page_size < PAGE_SIZE) { + NL_SET_ERR_MSG_MOD(extack, + "rx_page_size must be at least PAGE_SIZE"); + return -EINVAL; + } + + if (!IS_ALIGNED(rx_page_size, FBNIC_BD_FRAG_SIZE)) { + NL_SET_ERR_MSG_MOD(extack, + "rx_page_size must be 4K aligned"); + return -EINVAL; + } + + frag_count =3D fbnic_rx_page_frag_count(rx_page_size); + ppq_bufs =3D fbn->ppq_size / frag_count; + /* The PPQ is sized in 4K hardware fragments, but the software ring + * has one entry per page-pool allocation. Keep at least two entries so + * empty/full ring accounting still leaves one postable buffer. + */ + if (ppq_bufs < 2) { + NL_SET_ERR_MSG_MOD(extack, + "rx_page_size leaves too few PPQ buffers"); + return -EINVAL; + } + + return 0; +} + static int fbnic_alloc_qt_page_pools(struct fbnic_net *fbn, struct fbnic_q_triad *qt, - unsigned int rxq_idx) + unsigned int rxq_idx, u32 rx_page_size) { struct page_pool_params pp_params =3D { .order =3D 0, @@ -1596,6 +1649,8 @@ fbnic_alloc_qt_page_pools(struct fbnic_net *fbn, stru= ct fbnic_q_triad *qt, =20 qt->sub0.page_pool =3D pp; if (netif_rxq_has_unreadable_mp(fbn->netdev, rxq_idx)) { + pp_params.order =3D ilog2(rx_page_size) - PAGE_SHIFT; + pp_params.max_len =3D rx_page_size; pp_params.flags |=3D PP_FLAG_ALLOW_UNREADABLE_NETMEM; pp_params.dma_dir =3D DMA_FROM_DEVICE; =20 @@ -2018,12 +2073,19 @@ static int fbnic_alloc_tx_qt_resources(struct fbnic= _net *fbn, =20 static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn, struct fbnic_napi_vector *nv, - struct fbnic_q_triad *qt) + struct fbnic_q_triad *qt, + u32 rx_page_size) { struct device *dev =3D fbn->netdev->dev.parent; int err; =20 - err =3D fbnic_alloc_qt_page_pools(fbn, qt, qt->cmpl.q_idx); + err =3D fbnic_validate_rx_page_size(fbn, rx_page_size, NULL); + if (err) + return err; + + qt->sub1.frag_shift =3D fbnic_rx_page_frag_shift(rx_page_size); + + err =3D fbnic_alloc_qt_page_pools(fbn, qt, qt->cmpl.q_idx, rx_page_size); if (err) return err; =20 @@ -2087,7 +2149,13 @@ static int fbnic_alloc_nv_resources(struct fbnic_net= *fbn, =20 /* Allocate Rx Resources */ for (j =3D 0; j < nv->rxt_count; j++, i++) { - err =3D fbnic_alloc_rx_qt_resources(fbn, nv, &nv->qt[i]); + struct netdev_queue_config qcfg; + u32 rx_page_size; + + netdev_queue_config(fbn->netdev, nv->qt[i].cmpl.q_idx, &qcfg); + rx_page_size =3D fbnic_qcfg_rx_page_size(&qcfg); + err =3D fbnic_alloc_rx_qt_resources(fbn, nv, &nv->qt[i], + rx_page_size); if (err) goto free_qt_resources; } @@ -2852,9 +2920,16 @@ static int fbnic_queue_mem_alloc(struct net_device *= dev, const struct fbnic_q_triad *real; struct fbnic_q_triad *qt =3D qmem; struct fbnic_napi_vector *nv; + u32 rx_page_size =3D fbnic_qcfg_rx_page_size(qcfg); + int err; =20 - if (!netif_running(dev)) - return fbnic_alloc_qt_page_pools(fbn, qt, idx); + if (!netif_running(dev)) { + err =3D fbnic_validate_rx_page_size(fbn, rx_page_size, NULL); + if (err) + return err; + + return fbnic_alloc_qt_page_pools(fbn, qt, idx, rx_page_size); + } =20 real =3D container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); nv =3D fbn->napi[idx % fbn->num_napi]; @@ -2864,11 +2939,20 @@ static int fbnic_queue_mem_alloc(struct net_device = *dev, qt->sub0.frag_shift =3D real->sub0.frag_shift; fbnic_ring_init(&qt->sub1, real->sub1.doorbell, real->sub1.q_idx, real->sub1.flags); - qt->sub1.frag_shift =3D real->sub1.frag_shift; fbnic_ring_init(&qt->cmpl, real->cmpl.doorbell, real->cmpl.q_idx, real->cmpl.flags); =20 - return fbnic_alloc_rx_qt_resources(fbn, nv, qt); + return fbnic_alloc_rx_qt_resources(fbn, nv, qt, rx_page_size); +} + +static int fbnic_validate_qcfg(struct net_device *dev, + struct netdev_queue_config *qcfg, + struct netlink_ext_ack *extack) +{ + struct fbnic_net *fbn =3D netdev_priv(dev); + + return fbnic_validate_rx_page_size(fbn, fbnic_qcfg_rx_page_size(qcfg), + extack); } =20 static void fbnic_queue_mem_free(struct net_device *dev, void *qmem) @@ -2970,4 +3054,6 @@ const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_o= ps =3D { .ndo_queue_mem_free =3D fbnic_queue_mem_free, .ndo_queue_start =3D fbnic_queue_start, .ndo_queue_stop =3D fbnic_queue_stop, + .ndo_validate_qcfg =3D fbnic_validate_qcfg, + .supported_params =3D QCFG_RX_PAGE_SIZE, }; --=20 2.53.0 From nobody Sun May 24 18:41:10 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-alma10-1.taild15c8.ts.net [100.103.45.18]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 161D43E5EDA; Fri, 22 May 2026 11:32:45 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=100.103.45.18 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779449567; cv=none; b=J7lpTzXVaqLOwkPaywwdZ9ghbeOFWyVDqrV/oE4xMtv+jSZfi/wgfCKqnwtYdFhTdPOhhba56vG665pK4y9nvcPKlsonnb90ZE0Rpu73qZhjxYvNf5JQ8O7sAb5o0Z8Y86xLOUGGcH7kx2B/dhtbNVxXFDnpjS/PDoo4Mqffntw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1779449567; c=relaxed/simple; bh=R1KMAI23+7i1Wc6O1oXgl53R9JvNI3FJ8hKBnXBkFx0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version:Content-Type; b=tW8RzPOL9TLkYczXM+48GiHYUvlBqUD0LfxnQHott4OPzKjjWlKk1R4Gv5936BSPIKdaKwSKlTuL3rL97ZpE0IlzG/rMzrGtNyYUahGqTzIEgMK+VOKahJ6JO6wEmGlPgHOax6r4LtyidjROb5RbhMHXFat2/YJrg6WRz2Zq5KE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Xstujc3T; arc=none smtp.client-ip=100.103.45.18 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Xstujc3T" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B74451F000E9; Fri, 22 May 2026 11:32:42 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=kernel.org; s=k20260515; t=1779449565; bh=FA0JT2MU1cjq+IOgE1QiCa+xKqbwI7TFJcp+fEW0hS0=; h=From:To:Cc:Subject:Date:In-Reply-To:References; b=Xstujc3TsCHLHgbRC/WvNwHYPcgN/PSk36AA87VL0Rsvsp4wx4sJ55AaVTr29sb4T 8KduXT1yFtgS2ysVH+wgWqYzPRPA/1wvpi+eXpMu6oxOPLLvoLJ+gFWEJHDdoK83YE gO4/HHM/L2ZYYhBrPG4qkbEUUCcaUx77UKPKZ1aoY0cr/QvGNesNDS7lXDvUCyb5ah v3SldAS8hyB8M/3IGNbbUtnmPiaBjDjkn5PZR+jMSbjxRQhEBkNBzs/wVMzAUzPxUI a+U3zqpSFFUmv+4h6aXuJ9F9V/zIG9AVGsZ9B/O7yrgHnphGjAcxRKbtwkGh7zH51X vjYHZ2vioPCkQ== From: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= To: Alexander Duyck , Jakub Kicinski , kernel-team@meta.com, Andrew Lunn , "David S. Miller" , Eric Dumazet , Paolo Abeni , Shuah Khan , netdev@vger.kernel.org Cc: =?UTF-8?q?Bj=C3=B6rn=20T=C3=B6pel?= , Jacob Keller , Mohsin Bashir , "Mike Marciniszyn (Meta)" , Pavel Begunkov , linux-kernel@vger.kernel.org, linux-kselftest@vger.kernel.org Subject: [PATCH net-next 3/3] selftests: drv-net: Add zcrx payload offset check Date: Fri, 22 May 2026 13:32:22 +0200 Message-ID: <20260522113225.241337-4-bjorn@kernel.org> X-Mailer: git-send-email 2.54.0 In-Reply-To: <20260522113225.241337-1-bjorn@kernel.org> References: <20260522113225.241337-1-bjorn@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Add an optional iou-zcrx receiver check for payload CQE offsets. With -F, the receiver fails if no zero-copy receive CQE lands at or beyond the requested offset within an rx_buf_len-sized buffer. This is useful for manual driver testing where the driver is expected to split a larger zcrx buffer into smaller hardware receive fragments. Do not wire it into the generic large-chunk test, since different drivers may legitimately return different CQE boundaries. Signed-off-by: Bj=C3=B6rn T=C3=B6pel --- .../selftests/drivers/net/hw/iou-zcrx.c | 28 +++++++++++++++++-- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/test= ing/selftests/drivers/net/hw/iou-zcrx.c index 240d13dbc54e..0fb0410aaada 100644 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c @@ -85,6 +85,8 @@ static int cfg_send_size =3D SEND_SIZE; static struct sockaddr_in6 cfg_addr; static unsigned int cfg_rx_buf_len; static bool cfg_dry_run; +static bool cfg_check_payload_offset, cfg_seen_payload_offset; +static unsigned int cfg_min_payload_offset; =20 static char *payload; static void *area_ptr; @@ -298,6 +300,13 @@ static void process_recvzc(struct io_uring *ring, stru= ct io_uring_cqe *cqe) mask =3D (1ULL << IORING_ZCRX_AREA_SHIFT) - 1; data =3D (char *)area_ptr + (rcqe->off & mask); =20 + if (cfg_check_payload_offset) { + unsigned int rx_buf_len =3D cfg_rx_buf_len ?: page_size; + + if ((rcqe->off & mask) % rx_buf_len >=3D cfg_min_payload_offset) + cfg_seen_payload_offset =3D true; + } + for (i =3D 0; i < n; i++) { if (*(data + i) !=3D payload[(received + i)]) error(1, 0, "payload mismatch at %d", i); @@ -374,6 +383,9 @@ static void run_server(void) =20 if (!stop) error(1, 0, "test failed\n"); + if (cfg_check_payload_offset && !cfg_seen_payload_offset) + error(1, 0, "no payload CQE at offset >=3D %u\n", + cfg_min_payload_offset); } =20 static void run_client(void) @@ -406,8 +418,11 @@ static void run_client(void) =20 static void usage(const char *filepath) { - error(1, 0, "Usage: %s (-4|-6) (-s|-c) -h -p " - "-l -i -q", filepath); + error(1, 0, + "Usage: %s (-4|-6) (-s|-c) -h -p\n" + "\t-l -i -q\n" + "\t[-x] [-F] [-d]\n", + filepath); } =20 static void parse_opts(int argc, char **argv) @@ -425,7 +440,7 @@ static void parse_opts(int argc, char **argv) usage(argv[0]); cfg_payload_len =3D max_payload_len; =20 - while ((c =3D getopt(argc, argv, "sch:p:l:i:q:o:z:x:d")) !=3D -1) { + while ((c =3D getopt(argc, argv, "sch:p:l:i:q:o:z:x:F:d")) !=3D -1) { switch (c) { case 's': if (cfg_client) @@ -463,6 +478,10 @@ static void parse_opts(int argc, char **argv) case 'x': cfg_rx_buf_len =3D page_size * strtoul(optarg, NULL, 0); break; + case 'F': + cfg_check_payload_offset =3D true; + cfg_min_payload_offset =3D strtoul(optarg, NULL, 0); + break; case 'd': cfg_dry_run =3D true; break; @@ -484,6 +503,9 @@ static void parse_opts(int argc, char **argv) =20 if (cfg_payload_len > max_payload_len) error(1, 0, "-l: payload exceeds max (%d)", max_payload_len); + if (cfg_check_payload_offset && + cfg_min_payload_offset >=3D (cfg_rx_buf_len ?: page_size)) + error(1, 0, "-F: offset outside rx_buf_len"); } =20 int main(int argc, char **argv) --=20 2.53.0