Update RX data path to allocate and use RX queue DMA buffers with
proper size based on potentially various MTU sizes.
Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
---
V3:
Refectored to multiple patches for readability. Suggested by Jacob Keller.
V2:
Refectored to multiple patches for readability. Suggested by Yunsheng Lin.
---
drivers/net/ethernet/microsoft/mana/mana_en.c | 38 ++++++++++++++-----
include/net/mana/mana.h | 7 ++++
2 files changed, 35 insertions(+), 10 deletions(-)
diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 911954ff84ee..8e7fa6e9c3b5 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1185,10 +1185,10 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq)
WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
}
-static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
- struct xdp_buff *xdp)
+static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
+ uint pkt_len, struct xdp_buff *xdp)
{
- struct sk_buff *skb = napi_build_skb(buf_va, PAGE_SIZE);
+ struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size);
if (!skb)
return NULL;
@@ -1196,11 +1196,12 @@ static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
if (xdp->data_hard_start) {
skb_reserve(skb, xdp->data - xdp->data_hard_start);
skb_put(skb, xdp->data_end - xdp->data);
- } else {
- skb_reserve(skb, XDP_PACKET_HEADROOM);
- skb_put(skb, pkt_len);
+ return skb;
}
+ skb_reserve(skb, rxq->headroom);
+ skb_put(skb, pkt_len);
+
return skb;
}
@@ -1233,7 +1234,7 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
if (act != XDP_PASS && act != XDP_TX)
goto drop_xdp;
- skb = mana_build_skb(buf_va, pkt_len, &xdp);
+ skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp);
if (!skb)
goto drop;
@@ -1301,6 +1302,14 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
if (rxq->xdp_save_va) {
va = rxq->xdp_save_va;
rxq->xdp_save_va = NULL;
+ } else if (rxq->alloc_size > PAGE_SIZE) {
+ if (is_napi)
+ va = napi_alloc_frag(rxq->alloc_size);
+ else
+ va = netdev_alloc_frag(rxq->alloc_size);
+
+ if (!va)
+ return NULL;
} else {
page = dev_alloc_page();
if (!page)
@@ -1309,7 +1318,7 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
va = page_to_virt(page);
}
- *da = dma_map_single(dev, va + XDP_PACKET_HEADROOM, rxq->datasize,
+ *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
DMA_FROM_DEVICE);
if (dma_mapping_error(dev, *da)) {
@@ -1732,7 +1741,7 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc,
u32 buf_idx;
int ret;
- WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE);
+ WARN_ON(rxq->datasize == 0);
*rxq_size = 0;
*cq_size = 0;
@@ -1788,6 +1797,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
struct gdma_dev *gd = apc->ac->gdma_dev;
struct mana_obj_spec wq_spec;
struct mana_obj_spec cq_spec;
+ unsigned int mtu = ndev->mtu;
struct gdma_queue_spec spec;
struct mana_cq *cq = NULL;
struct gdma_context *gc;
@@ -1807,7 +1817,15 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
rxq->rxq_idx = rxq_idx;
rxq->rxobj = INVALID_MANA_HANDLE;
- rxq->datasize = ALIGN(ETH_FRAME_LEN, 64);
+ rxq->datasize = ALIGN(mtu + ETH_HLEN, 64);
+
+ if (mtu > MANA_XDP_MTU_MAX) {
+ rxq->alloc_size = mtu + MANA_RXBUF_PAD;
+ rxq->headroom = 0;
+ } else {
+ rxq->alloc_size = mtu + MANA_RXBUF_PAD + XDP_PACKET_HEADROOM;
+ rxq->headroom = XDP_PACKET_HEADROOM;
+ }
err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
if (err)
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 037bcabf6b98..fee99d704281 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -291,6 +291,11 @@ struct mana_recv_buf_oob {
struct gdma_posted_wqe_info wqe_inf;
};
+#define MANA_RXBUF_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \
+ + ETH_HLEN)
+
+#define MANA_XDP_MTU_MAX (PAGE_SIZE - MANA_RXBUF_PAD - XDP_PACKET_HEADROOM)
+
struct mana_rxq {
struct gdma_queue *gdma_rq;
/* Cache the gdma receive queue id */
@@ -300,6 +305,8 @@ struct mana_rxq {
u32 rxq_idx;
u32 datasize;
+ u32 alloc_size;
+ u32 headroom;
mana_handle_t rxobj;
--
2.25.1
On Wed, 12 Apr 2023 14:16:02 -0700 Haiyang Zhang wrote:
> + } else if (rxq->alloc_size > PAGE_SIZE) {
> + if (is_napi)
> + va = napi_alloc_frag(rxq->alloc_size);
Allocating frag larger than a page is not safe.
Frag allocator falls back to allocating single pages, doesn't it?
> -----Original Message-----
> From: Jakub Kicinski <kuba@kernel.org>
> Sent: Friday, April 14, 2023 10:06 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: linux-hyperv@vger.kernel.org; netdev@vger.kernel.org; Dexuan Cui
> <decui@microsoft.com>; KY Srinivasan <kys@microsoft.com>; Paul Rosswurm
> <paulros@microsoft.com>; olaf@aepfle.de; vkuznets@redhat.com;
> davem@davemloft.net; wei.liu@kernel.org; edumazet@google.com;
> pabeni@redhat.com; leon@kernel.org; Long Li <longli@microsoft.com>;
> ssengar@linux.microsoft.com; linux-rdma@vger.kernel.org;
> daniel@iogearbox.net; john.fastabend@gmail.com; bpf@vger.kernel.org;
> ast@kernel.org; Ajay Sharma <sharmaajay@microsoft.com>;
> hawk@kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle
> various MTU sizes
>
> On Wed, 12 Apr 2023 14:16:02 -0700 Haiyang Zhang wrote:
> > + } else if (rxq->alloc_size > PAGE_SIZE) {
> > + if (is_napi)
> > + va = napi_alloc_frag(rxq->alloc_size);
>
> Allocating frag larger than a page is not safe.
I saw other drivers doing this - use napi_alloc_frag for size bigger than a page.
And it returns compound page. Why it's not safe? Should we use other allocator
when need compound pages?
> Frag allocator falls back to allocating single pages, doesn't it?
Actually I checked it. Compound page is still returned for size smaller than PAGE_SIZE,
so I used single page allocation for that.
Thanks,
- Haiyang
On Sat, 15 Apr 2023 14:25:29 +0000 Haiyang Zhang wrote: > > Allocating frag larger than a page is not safe. > > I saw other drivers doing this - use napi_alloc_frag for size bigger than a page. > And it returns compound page. Why it's not safe? Should we use other allocator > when need compound pages? I believe so. There was a thread about this within the last year. Someone was trying to fix the page frag allocator to not fall back to order 0 pages in case of failure if requested size is > PAGE_SIZE. But there was push back and folks were saying that it's simply not a case supported by the frag allocator. 🤷️ > > Frag allocator falls back to allocating single pages, doesn't it? > > Actually I checked it. Compound page is still returned for size smaller than PAGE_SIZE, > so I used single page allocation for that. https://elixir.bootlin.com/linux/v6.3-rc6/source/mm/page_alloc.c#L5723 Jumbo frames should really be supported as scatter transfers, if possible.
> -----Original Message----- > From: Jakub Kicinski <kuba@kernel.org> > Sent: Monday, April 17, 2023 1:52 PM > To: Haiyang Zhang <haiyangz@microsoft.com> > Cc: linux-hyperv@vger.kernel.org; netdev@vger.kernel.org; Dexuan Cui > <decui@microsoft.com>; KY Srinivasan <kys@microsoft.com>; Paul Rosswurm > <paulros@microsoft.com>; olaf@aepfle.de; vkuznets@redhat.com; > davem@davemloft.net; wei.liu@kernel.org; edumazet@google.com; > pabeni@redhat.com; leon@kernel.org; Long Li <longli@microsoft.com>; > ssengar@linux.microsoft.com; linux-rdma@vger.kernel.org; > daniel@iogearbox.net; john.fastabend@gmail.com; bpf@vger.kernel.org; > ast@kernel.org; Ajay Sharma <sharmaajay@microsoft.com>; > hawk@kernel.org; linux-kernel@vger.kernel.org > Subject: Re: [PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle > various MTU sizes > > On Sat, 15 Apr 2023 14:25:29 +0000 Haiyang Zhang wrote: > > > Allocating frag larger than a page is not safe. > > > > I saw other drivers doing this - use napi_alloc_frag for size bigger than a > page. > > And it returns compound page. Why it's not safe? Should we use other > allocator > > when need compound pages? > > I believe so. There was a thread about this within the last year. > Someone was trying to fix the page frag allocator to not fall back > to order 0 pages in case of failure if requested size is > PAGE_SIZE. > But there was push back and folks were saying that it's simply not > a case supported by the frag allocator. 🤷️ Thanks, I will use other allocator for compound pages. > > > > Frag allocator falls back to allocating single pages, doesn't it? > > > > Actually I checked it. Compound page is still returned for size smaller than > PAGE_SIZE, > > so I used single page allocation for that. > > https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir > .bootlin.com%2Flinux%2Fv6.3- > rc6%2Fsource%2Fmm%2Fpage_alloc.c%23L5723&data=05%7C01%7Chaiyan > gz%40microsoft.com%7C00ca9f15ae314a4aa2ee08db3f6c8699%7C72f988 > bf86f141af91ab2d7cd011db47%7C1%7C0%7C638173507608724670%7C > Unknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJB > TiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=87QqFbWrxU > BMtqYpC397nlQxOJfU7lkt2%2FKAOGUjzjw%3D&reserved=0 > > Jumbo frames should really be supported as scatter transfers, > if possible. Our HW has much bigger overhead for scatter transfer on RX, so I use compound Page. Thanks, - Haiyang
© 2016 - 2026 Red Hat, Inc.