[PATCH net-next 1/6] mm/page_alloc: modify page_frag_alloc_align() to accept align as an argument

Yunsheng Lin posted 6 patches 8 months, 2 weeks ago
There is a newer version of this series
[PATCH net-next 1/6] mm/page_alloc: modify page_frag_alloc_align() to accept align as an argument
Posted by Yunsheng Lin 8 months, 2 weeks ago
napi_alloc_frag_align() and netdev_alloc_frag_align() accept
align as an argument, and they are thin wrappers around the
__napi_alloc_frag_align() and __netdev_alloc_frag_align() APIs
doing the align and align_mask conversion, in order to call
page_frag_alloc_align() directly.

As __napi_alloc_frag_align() and __netdev_alloc_frag_align()
APIs are only used by the above thin wrappers, it seems that
it makes more sense to remove align and align_mask conversion
and call page_frag_alloc_align() directly. By doing that, we
can also avoid the confusion between napi_alloc_frag_align()
accepting align as an argument and page_frag_alloc_align()
accepting align_mask as an argument when they both have the
'align' suffix.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
CC: Alexander Duyck <alexander.duyck@gmail.com>
---
 include/linux/gfp.h    |  4 ++--
 include/linux/skbuff.h | 22 ++++------------------
 mm/page_alloc.c        |  6 ++++--
 net/core/skbuff.c      | 14 +++++++-------
 4 files changed, 17 insertions(+), 29 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index de292a007138..bbd75976541e 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -314,12 +314,12 @@ struct page_frag_cache;
 extern void __page_frag_cache_drain(struct page *page, unsigned int count);
 extern void *page_frag_alloc_align(struct page_frag_cache *nc,
 				   unsigned int fragsz, gfp_t gfp_mask,
-				   unsigned int align_mask);
+				   unsigned int align);
 
 static inline void *page_frag_alloc(struct page_frag_cache *nc,
 			     unsigned int fragsz, gfp_t gfp_mask)
 {
-	return page_frag_alloc_align(nc, fragsz, gfp_mask, ~0u);
+	return page_frag_alloc_align(nc, fragsz, gfp_mask, 1);
 }
 
 extern void page_frag_free(void *addr);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a5ae952454c8..c0a3b44ef5da 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3192,7 +3192,7 @@ static inline void skb_queue_purge(struct sk_buff_head *list)
 unsigned int skb_rbtree_purge(struct rb_root *root);
 void skb_errqueue_purge(struct sk_buff_head *list);
 
-void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask);
+void *netdev_alloc_frag_align(unsigned int fragsz, unsigned int align);
 
 /**
  * netdev_alloc_frag - allocate a page fragment
@@ -3203,14 +3203,7 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask);
  */
 static inline void *netdev_alloc_frag(unsigned int fragsz)
 {
-	return __netdev_alloc_frag_align(fragsz, ~0u);
-}
-
-static inline void *netdev_alloc_frag_align(unsigned int fragsz,
-					    unsigned int align)
-{
-	WARN_ON_ONCE(!is_power_of_2(align));
-	return __netdev_alloc_frag_align(fragsz, -align);
+	return netdev_alloc_frag_align(fragsz, 1);
 }
 
 struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length,
@@ -3270,18 +3263,11 @@ static inline void skb_free_frag(void *addr)
 	page_frag_free(addr);
 }
 
-void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask);
+void *napi_alloc_frag_align(unsigned int fragsz, unsigned int align);
 
 static inline void *napi_alloc_frag(unsigned int fragsz)
 {
-	return __napi_alloc_frag_align(fragsz, ~0u);
-}
-
-static inline void *napi_alloc_frag_align(unsigned int fragsz,
-					  unsigned int align)
-{
-	WARN_ON_ONCE(!is_power_of_2(align));
-	return __napi_alloc_frag_align(fragsz, -align);
+	return napi_alloc_frag_align(fragsz, 1);
 }
 
 struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 37ca4f4b62bf..9a16305cf985 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4718,12 +4718,14 @@ EXPORT_SYMBOL(__page_frag_cache_drain);
 
 void *page_frag_alloc_align(struct page_frag_cache *nc,
 		      unsigned int fragsz, gfp_t gfp_mask,
-		      unsigned int align_mask)
+		      unsigned int align)
 {
 	unsigned int size = PAGE_SIZE;
 	struct page *page;
 	int offset;
 
+	WARN_ON_ONCE(!is_power_of_2(align));
+
 	if (unlikely(!nc->va)) {
 refill:
 		page = __page_frag_cache_refill(nc, gfp_mask);
@@ -4782,7 +4784,7 @@ void *page_frag_alloc_align(struct page_frag_cache *nc,
 	}
 
 	nc->pagecnt_bias--;
-	offset &= align_mask;
+	offset &= -align;
 	nc->offset = offset;
 
 	return nc->va + offset;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 12d22c0b8551..84c29a48f1a8 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -291,17 +291,17 @@ void napi_get_frags_check(struct napi_struct *napi)
 	local_bh_enable();
 }
 
-void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
+void *napi_alloc_frag_align(unsigned int fragsz, unsigned int align)
 {
 	struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
 
 	fragsz = SKB_DATA_ALIGN(fragsz);
 
-	return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
+	return page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align);
 }
-EXPORT_SYMBOL(__napi_alloc_frag_align);
+EXPORT_SYMBOL(napi_alloc_frag_align);
 
-void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
+void *netdev_alloc_frag_align(unsigned int fragsz, unsigned int align)
 {
 	void *data;
 
@@ -309,18 +309,18 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
 	if (in_hardirq() || irqs_disabled()) {
 		struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache);
 
-		data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align_mask);
+		data = page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, align);
 	} else {
 		struct napi_alloc_cache *nc;
 
 		local_bh_disable();
 		nc = this_cpu_ptr(&napi_alloc_cache);
-		data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align_mask);
+		data = page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, align);
 		local_bh_enable();
 	}
 	return data;
 }
-EXPORT_SYMBOL(__netdev_alloc_frag_align);
+EXPORT_SYMBOL(netdev_alloc_frag_align);
 
 static struct sk_buff *napi_skb_cache_get(void)
 {
-- 
2.33.0
Re: [PATCH net-next 1/6] mm/page_alloc: modify page_frag_alloc_align() to accept align as an argument
Posted by Alexander H Duyck 8 months, 2 weeks ago
On Wed, 2024-01-03 at 17:56 +0800, Yunsheng Lin wrote:
> napi_alloc_frag_align() and netdev_alloc_frag_align() accept
> align as an argument, and they are thin wrappers around the
> __napi_alloc_frag_align() and __netdev_alloc_frag_align() APIs
> doing the align and align_mask conversion, in order to call
> page_frag_alloc_align() directly.
> 
> As __napi_alloc_frag_align() and __netdev_alloc_frag_align()
> APIs are only used by the above thin wrappers, it seems that
> it makes more sense to remove align and align_mask conversion
> and call page_frag_alloc_align() directly. By doing that, we
> can also avoid the confusion between napi_alloc_frag_align()
> accepting align as an argument and page_frag_alloc_align()
> accepting align_mask as an argument when they both have the
> 'align' suffix.
> 
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> CC: Alexander Duyck <alexander.duyck@gmail.com>

This patch overlooks much of the main reason for having the wrappers.
By having the in-line wrapper and passing the argument as a mask we can
avoid having to verify the alignment value during execution time since
it can usually be handled during compile time.

By moving it into the function itself we are adding additional CPU
overhead per page as we will have to go through and validate the
alignment value for every single page instead of when the driver using
the function is compiled.

The overhead may not seem like much, but when you are having to deal
with it per page and you are processing pages at millions per second it
can quickly start to add up.

This is essentially a code cleanup at the cost of some amount of
performance.
Re: [PATCH net-next 1/6] mm/page_alloc: modify page_frag_alloc_align() to accept align as an argument
Posted by Yunsheng Lin 8 months, 2 weeks ago
On 2024/1/5 23:28, Alexander H Duyck wrote:
> On Wed, 2024-01-03 at 17:56 +0800, Yunsheng Lin wrote:
>> napi_alloc_frag_align() and netdev_alloc_frag_align() accept
>> align as an argument, and they are thin wrappers around the
>> __napi_alloc_frag_align() and __netdev_alloc_frag_align() APIs
>> doing the align and align_mask conversion, in order to call
>> page_frag_alloc_align() directly.
>>
>> As __napi_alloc_frag_align() and __netdev_alloc_frag_align()
>> APIs are only used by the above thin wrappers, it seems that
>> it makes more sense to remove align and align_mask conversion
>> and call page_frag_alloc_align() directly. By doing that, we
>> can also avoid the confusion between napi_alloc_frag_align()
>> accepting align as an argument and page_frag_alloc_align()
>> accepting align_mask as an argument when they both have the
>> 'align' suffix.
>>
>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
>> CC: Alexander Duyck <alexander.duyck@gmail.com>
> 
> This patch overlooks much of the main reason for having the wrappers.
> By having the in-line wrapper and passing the argument as a mask we can
> avoid having to verify the alignment value during execution time since
> it can usually be handled during compile time.

When it can not be handled during compile time, we might have a bigger
executable size for kernel, right? doesn't that may defeat some purpose of
inlining?

But from the callers of those API, it does seems the handling during compile
time is the ususal case here. Will add a __page_frag_alloc_align which is
passed with the mask the original function expected as you suggested.

> 
> By moving it into the function itself we are adding additional CPU
> overhead per page as we will have to go through and validate the
> alignment value for every single page instead of when the driver using
> the function is compiled.
> 
> The overhead may not seem like much, but when you are having to deal
> with it per page and you are processing pages at millions per second it
> can quickly start to add up.
> 
> This is essentially a code cleanup at the cost of some amount of
> performance.
> 
> .
>