[PATCH net-next v19 06/14] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'

Yunsheng Lin posted 14 patches 1 month, 4 weeks ago
There is a newer version of this series
[PATCH net-next v19 06/14] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'
Posted by Yunsheng Lin 1 month, 4 weeks ago
Currently there is one 'struct page_frag' for every 'struct
sock' and 'struct task_struct', we are about to replace the
'struct page_frag' with 'struct page_frag_cache' for them.
Before begin the replacing, we need to ensure the size of
'struct page_frag_cache' is not bigger than the size of
'struct page_frag', as there may be tens of thousands of
'struct sock' and 'struct task_struct' instances in the
system.

By or'ing the page order & pfmemalloc with lower bits of
'va' instead of using 'u16' or 'u32' for page size and 'u8'
for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
And page address & pfmemalloc & order is unchanged for the
same page in the same 'page_frag_cache' instance, it makes
sense to fit them together.

After this patch, the size of 'struct page_frag_cache' should be
the same as the size of 'struct page_frag'.

CC: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/linux/mm_types_task.h   | 19 +++++----
 include/linux/page_frag_cache.h | 26 +++++++++++-
 mm/page_frag_cache.c            | 75 +++++++++++++++++++++++----------
 3 files changed, 88 insertions(+), 32 deletions(-)

diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 0ac6daebdd5c..a82aa80c0ba4 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -47,18 +47,21 @@ struct page_frag {
 #define PAGE_FRAG_CACHE_MAX_SIZE	__ALIGN_MASK(32768, ~PAGE_MASK)
 #define PAGE_FRAG_CACHE_MAX_ORDER	get_order(PAGE_FRAG_CACHE_MAX_SIZE)
 struct page_frag_cache {
-	void *va;
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+	/* encoded_page consists of the virtual address, pfmemalloc bit and
+	 * order of a page.
+	 */
+	unsigned long encoded_page;
+
+	/* we maintain a pagecount bias, so that we dont dirty cache line
+	 * containing page->_refcount every time we allocate a fragment.
+	 */
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
 	__u16 offset;
-	__u16 size;
+	__u16 pagecnt_bias;
 #else
 	__u32 offset;
+	__u32 pagecnt_bias;
 #endif
-	/* we maintain a pagecount bias, so that we dont dirty cache line
-	 * containing page->_refcount every time we allocate a fragment.
-	 */
-	unsigned int		pagecnt_bias;
-	bool pfmemalloc;
 };
 
 /* Track pages that require TLB flushes */
diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index 0a52f7a179c8..75aaad6eaea2 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -3,18 +3,40 @@
 #ifndef _LINUX_PAGE_FRAG_CACHE_H
 #define _LINUX_PAGE_FRAG_CACHE_H
 
+#include <linux/bits.h>
 #include <linux/log2.h>
 #include <linux/mm_types_task.h>
 #include <linux/types.h>
 
+#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
+/* Use a full byte here to enable assembler optimization as the shift
+ * operation is usually expecting a byte.
+ */
+#define PAGE_FRAG_CACHE_ORDER_MASK		GENMASK(7, 0)
+#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT	8
+#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT		BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT)
+#else
+/* Compiler should be able to figure out we don't read things as any value
+ * ANDed with 0 is 0.
+ */
+#define PAGE_FRAG_CACHE_ORDER_MASK		0
+#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT	0
+#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT		BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT)
+#endif
+
+static inline bool page_frag_encoded_page_pfmemalloc(unsigned long encoded_page)
+{
+	return !!(encoded_page & PAGE_FRAG_CACHE_PFMEMALLOC_BIT);
+}
+
 static inline void page_frag_cache_init(struct page_frag_cache *nc)
 {
-	nc->va = NULL;
+	nc->encoded_page = 0;
 }
 
 static inline bool page_frag_cache_is_pfmemalloc(struct page_frag_cache *nc)
 {
-	return !!nc->pfmemalloc;
+	return page_frag_encoded_page_pfmemalloc(nc->encoded_page);
 }
 
 void page_frag_cache_drain(struct page_frag_cache *nc);
diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
index 4c8e04379cb3..cf9375a81a64 100644
--- a/mm/page_frag_cache.c
+++ b/mm/page_frag_cache.c
@@ -12,6 +12,7 @@
  * be used in the "frags" portion of skb_shared_info.
  */
 
+#include <linux/build_bug.h>
 #include <linux/export.h>
 #include <linux/gfp_types.h>
 #include <linux/init.h>
@@ -19,9 +20,41 @@
 #include <linux/page_frag_cache.h>
 #include "internal.h"
 
+static unsigned long page_frag_encode_page(struct page *page, unsigned int order,
+					   bool pfmemalloc)
+{
+	BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_FRAG_CACHE_ORDER_MASK);
+	BUILD_BUG_ON(PAGE_FRAG_CACHE_PFMEMALLOC_BIT >= PAGE_SIZE);
+
+	return (unsigned long)page_address(page) |
+		(order & PAGE_FRAG_CACHE_ORDER_MASK) |
+		((unsigned long)pfmemalloc << PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT);
+}
+
+static unsigned long page_frag_encoded_page_order(unsigned long encoded_page)
+{
+	return encoded_page & PAGE_FRAG_CACHE_ORDER_MASK;
+}
+
+static void *page_frag_encoded_page_address(unsigned long encoded_page)
+{
+	return (void *)(encoded_page & PAGE_MASK);
+}
+
+static struct page *page_frag_encoded_page_ptr(unsigned long encoded_page)
+{
+	return virt_to_page((void *)encoded_page);
+}
+
+static unsigned int page_frag_cache_page_size(unsigned long encoded_page)
+{
+	return PAGE_SIZE << page_frag_encoded_page_order(encoded_page);
+}
+
 static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
 					     gfp_t gfp_mask)
 {
+	unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
 	struct page *page = NULL;
 	gfp_t gfp = gfp_mask;
 
@@ -30,23 +63,26 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
 		   __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
 	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
 				PAGE_FRAG_CACHE_MAX_ORDER);
-	nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
 #endif
-	if (unlikely(!page))
+	if (unlikely(!page)) {
 		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
+		order = 0;
+	}
 
-	nc->va = page ? page_address(page) : NULL;
+	nc->encoded_page = page ?
+		page_frag_encode_page(page, order, page_is_pfmemalloc(page)) : 0;
 
 	return page;
 }
 
 void page_frag_cache_drain(struct page_frag_cache *nc)
 {
-	if (!nc->va)
+	if (!nc->encoded_page)
 		return;
 
-	__page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
-	nc->va = NULL;
+	__page_frag_cache_drain(page_frag_encoded_page_ptr(nc->encoded_page),
+				nc->pagecnt_bias);
+	nc->encoded_page = 0;
 }
 EXPORT_SYMBOL(page_frag_cache_drain);
 
@@ -63,35 +99,29 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
 			      unsigned int fragsz, gfp_t gfp_mask,
 			      unsigned int align_mask)
 {
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-	unsigned int size = nc->size;
-#else
-	unsigned int size = PAGE_SIZE;
-#endif
-	unsigned int offset;
+	unsigned long encoded_page = nc->encoded_page;
+	unsigned int size, offset;
 	struct page *page;
 
-	if (unlikely(!nc->va)) {
+	if (unlikely(!encoded_page)) {
 refill:
 		page = __page_frag_cache_refill(nc, gfp_mask);
 		if (!page)
 			return NULL;
 
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
-		/* if size can vary use size else just use PAGE_SIZE */
-		size = nc->size;
-#endif
+		encoded_page = nc->encoded_page;
+
 		/* Even if we own the page, we do not use atomic_set().
 		 * This would break get_page_unless_zero() users.
 		 */
 		page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
 
 		/* reset page count bias and offset to start of new frag */
-		nc->pfmemalloc = page_is_pfmemalloc(page);
 		nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
 		nc->offset = 0;
 	}
 
+	size = page_frag_cache_page_size(encoded_page);
 	offset = __ALIGN_KERNEL_MASK(nc->offset, ~align_mask);
 	if (unlikely(offset + fragsz > size)) {
 		if (unlikely(fragsz > PAGE_SIZE)) {
@@ -107,13 +137,14 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
 			return NULL;
 		}
 
-		page = virt_to_page(nc->va);
+		page = page_frag_encoded_page_ptr(encoded_page);
 
 		if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
 			goto refill;
 
-		if (unlikely(nc->pfmemalloc)) {
-			free_unref_page(page, compound_order(page));
+		if (unlikely(page_frag_encoded_page_pfmemalloc(encoded_page))) {
+			free_unref_page(page,
+					page_frag_encoded_page_order(encoded_page));
 			goto refill;
 		}
 
@@ -128,7 +159,7 @@ void *__page_frag_alloc_align(struct page_frag_cache *nc,
 	nc->pagecnt_bias--;
 	nc->offset = offset + fragsz;
 
-	return nc->va + offset;
+	return page_frag_encoded_page_address(encoded_page) + offset;
 }
 EXPORT_SYMBOL(__page_frag_alloc_align);
 
-- 
2.34.1
Re: [PATCH net-next v19 06/14] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'
Posted by Alexander Duyck 1 month, 3 weeks ago
On Tue, Oct 1, 2024 at 12:59 AM Yunsheng Lin <yunshenglin0825@gmail.com> wrote:
>
> Currently there is one 'struct page_frag' for every 'struct
> sock' and 'struct task_struct', we are about to replace the
> 'struct page_frag' with 'struct page_frag_cache' for them.
> Before begin the replacing, we need to ensure the size of
> 'struct page_frag_cache' is not bigger than the size of
> 'struct page_frag', as there may be tens of thousands of
> 'struct sock' and 'struct task_struct' instances in the
> system.
>
> By or'ing the page order & pfmemalloc with lower bits of
> 'va' instead of using 'u16' or 'u32' for page size and 'u8'
> for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
> And page address & pfmemalloc & order is unchanged for the
> same page in the same 'page_frag_cache' instance, it makes
> sense to fit them together.
>
> After this patch, the size of 'struct page_frag_cache' should be
> the same as the size of 'struct page_frag'.
>
> CC: Alexander Duyck <alexander.duyck@gmail.com>
> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> ---
>  include/linux/mm_types_task.h   | 19 +++++----
>  include/linux/page_frag_cache.h | 26 +++++++++++-
>  mm/page_frag_cache.c            | 75 +++++++++++++++++++++++----------
>  3 files changed, 88 insertions(+), 32 deletions(-)
>
> diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
> index 0ac6daebdd5c..a82aa80c0ba4 100644
> --- a/include/linux/mm_types_task.h
> +++ b/include/linux/mm_types_task.h
> @@ -47,18 +47,21 @@ struct page_frag {
>  #define PAGE_FRAG_CACHE_MAX_SIZE       __ALIGN_MASK(32768, ~PAGE_MASK)
>  #define PAGE_FRAG_CACHE_MAX_ORDER      get_order(PAGE_FRAG_CACHE_MAX_SIZE)
>  struct page_frag_cache {
> -       void *va;
> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +       /* encoded_page consists of the virtual address, pfmemalloc bit and
> +        * order of a page.
> +        */
> +       unsigned long encoded_page;
> +
> +       /* we maintain a pagecount bias, so that we dont dirty cache line
> +        * containing page->_refcount every time we allocate a fragment.
> +        */
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
>         __u16 offset;
> -       __u16 size;
> +       __u16 pagecnt_bias;
>  #else
>         __u32 offset;
> +       __u32 pagecnt_bias;
>  #endif
> -       /* we maintain a pagecount bias, so that we dont dirty cache line
> -        * containing page->_refcount every time we allocate a fragment.
> -        */
> -       unsigned int            pagecnt_bias;
> -       bool pfmemalloc;
>  };
>
>  /* Track pages that require TLB flushes */
> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> index 0a52f7a179c8..75aaad6eaea2 100644
> --- a/include/linux/page_frag_cache.h
> +++ b/include/linux/page_frag_cache.h
> @@ -3,18 +3,40 @@
>  #ifndef _LINUX_PAGE_FRAG_CACHE_H
>  #define _LINUX_PAGE_FRAG_CACHE_H
>
> +#include <linux/bits.h>
>  #include <linux/log2.h>
>  #include <linux/mm_types_task.h>
>  #include <linux/types.h>
>
> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> +/* Use a full byte here to enable assembler optimization as the shift
> + * operation is usually expecting a byte.
> + */
> +#define PAGE_FRAG_CACHE_ORDER_MASK             GENMASK(7, 0)
> +#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT       8
> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT)
> +#else
> +/* Compiler should be able to figure out we don't read things as any value
> + * ANDed with 0 is 0.
> + */
> +#define PAGE_FRAG_CACHE_ORDER_MASK             0
> +#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT       0
> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT)
> +#endif
> +

Minor nit on this. You probably only need to have
PAGE_FRAG_CACHE_ORDER_SHIFT defined in the ifdef. The PFMEMALLOC bit
code is the same in both so you could pull it out.

Also depending on how you defined it you could just define the
PFMEMALLOC_BIT as the ORDER_MASK + 1.
Re: [PATCH net-next v19 06/14] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'
Posted by Yunsheng Lin 1 month, 3 weeks ago
On 10/4/2024 6:40 AM, Alexander Duyck wrote:
> On Tue, Oct 1, 2024 at 12:59 AM Yunsheng Lin <yunshenglin0825@gmail.com> wrote:
>>
>> Currently there is one 'struct page_frag' for every 'struct
>> sock' and 'struct task_struct', we are about to replace the
>> 'struct page_frag' with 'struct page_frag_cache' for them.
>> Before begin the replacing, we need to ensure the size of
>> 'struct page_frag_cache' is not bigger than the size of
>> 'struct page_frag', as there may be tens of thousands of
>> 'struct sock' and 'struct task_struct' instances in the
>> system.
>>
>> By or'ing the page order & pfmemalloc with lower bits of
>> 'va' instead of using 'u16' or 'u32' for page size and 'u8'
>> for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
>> And page address & pfmemalloc & order is unchanged for the
>> same page in the same 'page_frag_cache' instance, it makes
>> sense to fit them together.
>>
>> After this patch, the size of 'struct page_frag_cache' should be
>> the same as the size of 'struct page_frag'.
>>
>> CC: Alexander Duyck <alexander.duyck@gmail.com>
>> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
>> ---
>>   include/linux/mm_types_task.h   | 19 +++++----
>>   include/linux/page_frag_cache.h | 26 +++++++++++-
>>   mm/page_frag_cache.c            | 75 +++++++++++++++++++++++----------
>>   3 files changed, 88 insertions(+), 32 deletions(-)
>>
>> diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
>> index 0ac6daebdd5c..a82aa80c0ba4 100644
>> --- a/include/linux/mm_types_task.h
>> +++ b/include/linux/mm_types_task.h
>> @@ -47,18 +47,21 @@ struct page_frag {
>>   #define PAGE_FRAG_CACHE_MAX_SIZE       __ALIGN_MASK(32768, ~PAGE_MASK)
>>   #define PAGE_FRAG_CACHE_MAX_ORDER      get_order(PAGE_FRAG_CACHE_MAX_SIZE)
>>   struct page_frag_cache {
>> -       void *va;
>> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>> +       /* encoded_page consists of the virtual address, pfmemalloc bit and
>> +        * order of a page.
>> +        */
>> +       unsigned long encoded_page;
>> +
>> +       /* we maintain a pagecount bias, so that we dont dirty cache line
>> +        * containing page->_refcount every time we allocate a fragment.
>> +        */
>> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
>>          __u16 offset;
>> -       __u16 size;
>> +       __u16 pagecnt_bias;
>>   #else
>>          __u32 offset;
>> +       __u32 pagecnt_bias;
>>   #endif
>> -       /* we maintain a pagecount bias, so that we dont dirty cache line
>> -        * containing page->_refcount every time we allocate a fragment.
>> -        */
>> -       unsigned int            pagecnt_bias;
>> -       bool pfmemalloc;
>>   };
>>
>>   /* Track pages that require TLB flushes */
>> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
>> index 0a52f7a179c8..75aaad6eaea2 100644
>> --- a/include/linux/page_frag_cache.h
>> +++ b/include/linux/page_frag_cache.h
>> @@ -3,18 +3,40 @@
>>   #ifndef _LINUX_PAGE_FRAG_CACHE_H
>>   #define _LINUX_PAGE_FRAG_CACHE_H
>>
>> +#include <linux/bits.h>
>>   #include <linux/log2.h>
>>   #include <linux/mm_types_task.h>
>>   #include <linux/types.h>
>>
>> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
>> +/* Use a full byte here to enable assembler optimization as the shift
>> + * operation is usually expecting a byte.
>> + */
>> +#define PAGE_FRAG_CACHE_ORDER_MASK             GENMASK(7, 0)
>> +#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT       8
>> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT)
>> +#else
>> +/* Compiler should be able to figure out we don't read things as any value
>> + * ANDed with 0 is 0.
>> + */
>> +#define PAGE_FRAG_CACHE_ORDER_MASK             0
>> +#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT       0
>> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT)
>> +#endif
>> +
> 
> Minor nit on this. You probably only need to have
> PAGE_FRAG_CACHE_ORDER_SHIFT defined in the ifdef. The PFMEMALLOC bit

I guess you meant PAGE_FRAG_CACHE_ORDER_MASK here instead of
PAGE_FRAG_CACHE_ORDER_SHIFT, as the ORDER_SHIFT is always
zero?

> code is the same in both so you could pull it out.
> 
> Also depending on how you defined it you could just define the
> PFMEMALLOC_BIT as the ORDER_MASK + 1.

But the PFMEMALLOC_SHIFT still need to be defined as it is used in
page_frag_encode_page(), right? I am not sure if I understand what is
the point of defining the PFMEMALLOC_BIT as the ORDER_MASK + 1 instead
of defining the PFMEMALLOC_BIT as BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT) 
here.

Re: [PATCH net-next v19 06/14] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc'
Posted by Alexander Duyck 1 month, 3 weeks ago
On Sat, Oct 5, 2024 at 6:06 AM Yunsheng Lin <yunshenglin0825@gmail.com> wrote:
>
> On 10/4/2024 6:40 AM, Alexander Duyck wrote:
> > On Tue, Oct 1, 2024 at 12:59 AM Yunsheng Lin <yunshenglin0825@gmail.com> wrote:
> >>
> >> Currently there is one 'struct page_frag' for every 'struct
> >> sock' and 'struct task_struct', we are about to replace the
> >> 'struct page_frag' with 'struct page_frag_cache' for them.
> >> Before begin the replacing, we need to ensure the size of
> >> 'struct page_frag_cache' is not bigger than the size of
> >> 'struct page_frag', as there may be tens of thousands of
> >> 'struct sock' and 'struct task_struct' instances in the
> >> system.
> >>
> >> By or'ing the page order & pfmemalloc with lower bits of
> >> 'va' instead of using 'u16' or 'u32' for page size and 'u8'
> >> for pfmemalloc, we are able to avoid 3 or 5 bytes space waste.
> >> And page address & pfmemalloc & order is unchanged for the
> >> same page in the same 'page_frag_cache' instance, it makes
> >> sense to fit them together.
> >>
> >> After this patch, the size of 'struct page_frag_cache' should be
> >> the same as the size of 'struct page_frag'.
> >>
> >> CC: Alexander Duyck <alexander.duyck@gmail.com>
> >> Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
> >> ---
> >>   include/linux/mm_types_task.h   | 19 +++++----
> >>   include/linux/page_frag_cache.h | 26 +++++++++++-
> >>   mm/page_frag_cache.c            | 75 +++++++++++++++++++++++----------
> >>   3 files changed, 88 insertions(+), 32 deletions(-)
> >>
> >> diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
> >> index 0ac6daebdd5c..a82aa80c0ba4 100644
> >> --- a/include/linux/mm_types_task.h
> >> +++ b/include/linux/mm_types_task.h
> >> @@ -47,18 +47,21 @@ struct page_frag {
> >>   #define PAGE_FRAG_CACHE_MAX_SIZE       __ALIGN_MASK(32768, ~PAGE_MASK)
> >>   #define PAGE_FRAG_CACHE_MAX_ORDER      get_order(PAGE_FRAG_CACHE_MAX_SIZE)
> >>   struct page_frag_cache {
> >> -       void *va;
> >> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> >> +       /* encoded_page consists of the virtual address, pfmemalloc bit and
> >> +        * order of a page.
> >> +        */
> >> +       unsigned long encoded_page;
> >> +
> >> +       /* we maintain a pagecount bias, so that we dont dirty cache line
> >> +        * containing page->_refcount every time we allocate a fragment.
> >> +        */
> >> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32)
> >>          __u16 offset;
> >> -       __u16 size;
> >> +       __u16 pagecnt_bias;
> >>   #else
> >>          __u32 offset;
> >> +       __u32 pagecnt_bias;
> >>   #endif
> >> -       /* we maintain a pagecount bias, so that we dont dirty cache line
> >> -        * containing page->_refcount every time we allocate a fragment.
> >> -        */
> >> -       unsigned int            pagecnt_bias;
> >> -       bool pfmemalloc;
> >>   };
> >>
> >>   /* Track pages that require TLB flushes */
> >> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
> >> index 0a52f7a179c8..75aaad6eaea2 100644
> >> --- a/include/linux/page_frag_cache.h
> >> +++ b/include/linux/page_frag_cache.h
> >> @@ -3,18 +3,40 @@
> >>   #ifndef _LINUX_PAGE_FRAG_CACHE_H
> >>   #define _LINUX_PAGE_FRAG_CACHE_H
> >>
> >> +#include <linux/bits.h>
> >>   #include <linux/log2.h>
> >>   #include <linux/mm_types_task.h>
> >>   #include <linux/types.h>
> >>
> >> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
> >> +/* Use a full byte here to enable assembler optimization as the shift
> >> + * operation is usually expecting a byte.
> >> + */
> >> +#define PAGE_FRAG_CACHE_ORDER_MASK             GENMASK(7, 0)
> >> +#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT       8
> >> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT)
> >> +#else
> >> +/* Compiler should be able to figure out we don't read things as any value
> >> + * ANDed with 0 is 0.
> >> + */
> >> +#define PAGE_FRAG_CACHE_ORDER_MASK             0
> >> +#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT       0
> >> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT         BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT)
> >> +#endif
> >> +
> >
> > Minor nit on this. You probably only need to have
> > PAGE_FRAG_CACHE_ORDER_SHIFT defined in the ifdef. The PFMEMALLOC bit
>
> I guess you meant PAGE_FRAG_CACHE_ORDER_MASK here instead of
> PAGE_FRAG_CACHE_ORDER_SHIFT, as the ORDER_SHIFT is always
> zero?

Yes.

> > code is the same in both so you could pull it out.
> >
> > Also depending on how you defined it you could just define the
> > PFMEMALLOC_BIT as the ORDER_MASK + 1.
>
> But the PFMEMALLOC_SHIFT still need to be defined as it is used in
> page_frag_encode_page(), right? I am not sure if I understand what is
> the point of defining the PFMEMALLOC_BIT as the ORDER_MASK + 1 instead
> of defining the PFMEMALLOC_BIT as BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT)
> here.

Actually the shift probably isn't needed. Since it is a single bit
value you could just use a multiply by the bit and it would accomplish
the same thing as the shift and would likely be converted to the same
assembler code.