[PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle various MTU sizes

Haiyang Zhang posted 4 patches 2 years, 10 months ago
[PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle various MTU sizes
Posted by Haiyang Zhang 2 years, 10 months ago
Update RX data path to allocate and use RX queue DMA buffers with
proper size based on potentially various MTU sizes.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
---
V3:
Refectored to multiple patches for readability. Suggested by Jacob Keller.

V2:
Refectored to multiple patches for readability. Suggested by Yunsheng Lin.

---
 drivers/net/ethernet/microsoft/mana/mana_en.c | 38 ++++++++++++++-----
 include/net/mana/mana.h                       |  7 ++++
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c
index 911954ff84ee..8e7fa6e9c3b5 100644
--- a/drivers/net/ethernet/microsoft/mana/mana_en.c
+++ b/drivers/net/ethernet/microsoft/mana/mana_en.c
@@ -1185,10 +1185,10 @@ static void mana_post_pkt_rxq(struct mana_rxq *rxq)
 	WARN_ON_ONCE(recv_buf_oob->wqe_inf.wqe_size_in_bu != 1);
 }
 
-static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
-				      struct xdp_buff *xdp)
+static struct sk_buff *mana_build_skb(struct mana_rxq *rxq, void *buf_va,
+				      uint pkt_len, struct xdp_buff *xdp)
 {
-	struct sk_buff *skb = napi_build_skb(buf_va, PAGE_SIZE);
+	struct sk_buff *skb = napi_build_skb(buf_va, rxq->alloc_size);
 
 	if (!skb)
 		return NULL;
@@ -1196,11 +1196,12 @@ static struct sk_buff *mana_build_skb(void *buf_va, uint pkt_len,
 	if (xdp->data_hard_start) {
 		skb_reserve(skb, xdp->data - xdp->data_hard_start);
 		skb_put(skb, xdp->data_end - xdp->data);
-	} else {
-		skb_reserve(skb, XDP_PACKET_HEADROOM);
-		skb_put(skb, pkt_len);
+		return skb;
 	}
 
+	skb_reserve(skb, rxq->headroom);
+	skb_put(skb, pkt_len);
+
 	return skb;
 }
 
@@ -1233,7 +1234,7 @@ static void mana_rx_skb(void *buf_va, struct mana_rxcomp_oob *cqe,
 	if (act != XDP_PASS && act != XDP_TX)
 		goto drop_xdp;
 
-	skb = mana_build_skb(buf_va, pkt_len, &xdp);
+	skb = mana_build_skb(rxq, buf_va, pkt_len, &xdp);
 
 	if (!skb)
 		goto drop;
@@ -1301,6 +1302,14 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
 	if (rxq->xdp_save_va) {
 		va = rxq->xdp_save_va;
 		rxq->xdp_save_va = NULL;
+	} else if (rxq->alloc_size > PAGE_SIZE) {
+		if (is_napi)
+			va = napi_alloc_frag(rxq->alloc_size);
+		else
+			va = netdev_alloc_frag(rxq->alloc_size);
+
+		if (!va)
+			return NULL;
 	} else {
 		page = dev_alloc_page();
 		if (!page)
@@ -1309,7 +1318,7 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev,
 		va = page_to_virt(page);
 	}
 
-	*da = dma_map_single(dev, va + XDP_PACKET_HEADROOM, rxq->datasize,
+	*da = dma_map_single(dev, va + rxq->headroom, rxq->datasize,
 			     DMA_FROM_DEVICE);
 
 	if (dma_mapping_error(dev, *da)) {
@@ -1732,7 +1741,7 @@ static int mana_alloc_rx_wqe(struct mana_port_context *apc,
 	u32 buf_idx;
 	int ret;
 
-	WARN_ON(rxq->datasize == 0 || rxq->datasize > PAGE_SIZE);
+	WARN_ON(rxq->datasize == 0);
 
 	*rxq_size = 0;
 	*cq_size = 0;
@@ -1788,6 +1797,7 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 	struct gdma_dev *gd = apc->ac->gdma_dev;
 	struct mana_obj_spec wq_spec;
 	struct mana_obj_spec cq_spec;
+	unsigned int mtu = ndev->mtu;
 	struct gdma_queue_spec spec;
 	struct mana_cq *cq = NULL;
 	struct gdma_context *gc;
@@ -1807,7 +1817,15 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc,
 	rxq->rxq_idx = rxq_idx;
 	rxq->rxobj = INVALID_MANA_HANDLE;
 
-	rxq->datasize = ALIGN(ETH_FRAME_LEN, 64);
+	rxq->datasize = ALIGN(mtu + ETH_HLEN, 64);
+
+	if (mtu > MANA_XDP_MTU_MAX) {
+		rxq->alloc_size = mtu + MANA_RXBUF_PAD;
+		rxq->headroom = 0;
+	} else {
+		rxq->alloc_size = mtu + MANA_RXBUF_PAD + XDP_PACKET_HEADROOM;
+		rxq->headroom = XDP_PACKET_HEADROOM;
+	}
 
 	err = mana_alloc_rx_wqe(apc, rxq, &rq_size, &cq_size);
 	if (err)
diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h
index 037bcabf6b98..fee99d704281 100644
--- a/include/net/mana/mana.h
+++ b/include/net/mana/mana.h
@@ -291,6 +291,11 @@ struct mana_recv_buf_oob {
 	struct gdma_posted_wqe_info wqe_inf;
 };
 
+#define MANA_RXBUF_PAD (SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) \
+			+ ETH_HLEN)
+
+#define MANA_XDP_MTU_MAX (PAGE_SIZE - MANA_RXBUF_PAD - XDP_PACKET_HEADROOM)
+
 struct mana_rxq {
 	struct gdma_queue *gdma_rq;
 	/* Cache the gdma receive queue id */
@@ -300,6 +305,8 @@ struct mana_rxq {
 	u32 rxq_idx;
 
 	u32 datasize;
+	u32 alloc_size;
+	u32 headroom;
 
 	mana_handle_t rxobj;
 
-- 
2.25.1
Re: [PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle various MTU sizes
Posted by Jakub Kicinski 2 years, 10 months ago
On Wed, 12 Apr 2023 14:16:02 -0700 Haiyang Zhang wrote:
> +	} else if (rxq->alloc_size > PAGE_SIZE) {
> +		if (is_napi)
> +			va = napi_alloc_frag(rxq->alloc_size);

Allocating frag larger than a page is not safe.
Frag allocator falls back to allocating single pages, doesn't it?
RE: [PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle various MTU sizes
Posted by Haiyang Zhang 2 years, 10 months ago

> -----Original Message-----
> From: Jakub Kicinski <kuba@kernel.org>
> Sent: Friday, April 14, 2023 10:06 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: linux-hyperv@vger.kernel.org; netdev@vger.kernel.org; Dexuan Cui
> <decui@microsoft.com>; KY Srinivasan <kys@microsoft.com>; Paul Rosswurm
> <paulros@microsoft.com>; olaf@aepfle.de; vkuznets@redhat.com;
> davem@davemloft.net; wei.liu@kernel.org; edumazet@google.com;
> pabeni@redhat.com; leon@kernel.org; Long Li <longli@microsoft.com>;
> ssengar@linux.microsoft.com; linux-rdma@vger.kernel.org;
> daniel@iogearbox.net; john.fastabend@gmail.com; bpf@vger.kernel.org;
> ast@kernel.org; Ajay Sharma <sharmaajay@microsoft.com>;
> hawk@kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle
> various MTU sizes
> 
> On Wed, 12 Apr 2023 14:16:02 -0700 Haiyang Zhang wrote:
> > +	} else if (rxq->alloc_size > PAGE_SIZE) {
> > +		if (is_napi)
> > +			va = napi_alloc_frag(rxq->alloc_size);
> 
> Allocating frag larger than a page is not safe.

 I saw other drivers doing this - use napi_alloc_frag for size bigger than a page.
And it returns compound page. Why it's not safe? Should we use other allocator
when need compound pages?

> Frag allocator falls back to allocating single pages, doesn't it?

Actually I checked it. Compound page is still returned for size smaller than PAGE_SIZE,
so I used single page allocation for that.

Thanks,
- Haiyang
Re: [PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle various MTU sizes
Posted by Jakub Kicinski 2 years, 9 months ago
On Sat, 15 Apr 2023 14:25:29 +0000 Haiyang Zhang wrote:
> > Allocating frag larger than a page is not safe.  
> 
>  I saw other drivers doing this - use napi_alloc_frag for size bigger than a page.
> And it returns compound page. Why it's not safe? Should we use other allocator
> when need compound pages?

I believe so. There was a thread about this within the last year.
Someone was trying to fix the page frag allocator to not fall back
to order 0 pages in case of failure if requested size is > PAGE_SIZE.
But there was push back and folks were saying that it's simply not 
a case supported by the frag allocator. 🤷️

> > Frag allocator falls back to allocating single pages, doesn't it?  
> 
> Actually I checked it. Compound page is still returned for size smaller than PAGE_SIZE,
> so I used single page allocation for that.

https://elixir.bootlin.com/linux/v6.3-rc6/source/mm/page_alloc.c#L5723

Jumbo frames should really be supported as scatter transfers, 
if possible.
RE: [PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle various MTU sizes
Posted by Haiyang Zhang 2 years, 9 months ago

> -----Original Message-----
> From: Jakub Kicinski <kuba@kernel.org>
> Sent: Monday, April 17, 2023 1:52 PM
> To: Haiyang Zhang <haiyangz@microsoft.com>
> Cc: linux-hyperv@vger.kernel.org; netdev@vger.kernel.org; Dexuan Cui
> <decui@microsoft.com>; KY Srinivasan <kys@microsoft.com>; Paul Rosswurm
> <paulros@microsoft.com>; olaf@aepfle.de; vkuznets@redhat.com;
> davem@davemloft.net; wei.liu@kernel.org; edumazet@google.com;
> pabeni@redhat.com; leon@kernel.org; Long Li <longli@microsoft.com>;
> ssengar@linux.microsoft.com; linux-rdma@vger.kernel.org;
> daniel@iogearbox.net; john.fastabend@gmail.com; bpf@vger.kernel.org;
> ast@kernel.org; Ajay Sharma <sharmaajay@microsoft.com>;
> hawk@kernel.org; linux-kernel@vger.kernel.org
> Subject: Re: [PATCH V3,net-next, 3/4] net: mana: Enable RX path to handle
> various MTU sizes
> 
> On Sat, 15 Apr 2023 14:25:29 +0000 Haiyang Zhang wrote:
> > > Allocating frag larger than a page is not safe.
> >
> >  I saw other drivers doing this - use napi_alloc_frag for size bigger than a
> page.
> > And it returns compound page. Why it's not safe? Should we use other
> allocator
> > when need compound pages?
> 
> I believe so. There was a thread about this within the last year.
> Someone was trying to fix the page frag allocator to not fall back
> to order 0 pages in case of failure if requested size is > PAGE_SIZE.
> But there was push back and folks were saying that it's simply not
> a case supported by the frag allocator. 🤷️

Thanks, I will use other allocator for compound pages.

> 
> > > Frag allocator falls back to allocating single pages, doesn't it?
> >
> > Actually I checked it. Compound page is still returned for size smaller than
> PAGE_SIZE,
> > so I used single page allocation for that.
> 
> https://nam06.safelinks.protection.outlook.com/?url=https%3A%2F%2Felixir
> .bootlin.com%2Flinux%2Fv6.3-
> rc6%2Fsource%2Fmm%2Fpage_alloc.c%23L5723&data=05%7C01%7Chaiyan
> gz%40microsoft.com%7C00ca9f15ae314a4aa2ee08db3f6c8699%7C72f988
> bf86f141af91ab2d7cd011db47%7C1%7C0%7C638173507608724670%7C
> Unknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJB
> TiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&sdata=87QqFbWrxU
> BMtqYpC397nlQxOJfU7lkt2%2FKAOGUjzjw%3D&reserved=0
> 
> Jumbo frames should really be supported as scatter transfers,
> if possible.

Our HW has much bigger overhead for scatter transfer on RX, so I use compound
Page.

Thanks,
- Haiyang