During page isolation, the original migratetype is overwritten, since
MIGRATE_* are enums and stored in pageblock bitmaps. Change
MIGRATE_ISOLATE to be stored a standalone bit, PB_migrate_isolate, like
PB_migrate_skip, so that migratetype is not lost during pageblock
isolation. pageblock bits needs to be word aligned, so expand
the number of pageblock bits from 4 to 8 and make PB_migrate_isolate bit 7.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
include/linux/mmzone.h | 15 ++++++++------
include/linux/pageblock-flags.h | 9 ++++++++-
mm/page_alloc.c | 36 ++++++++++++++++++++++++++++++++-
mm/page_isolation.c | 11 ++++++++++
4 files changed, 63 insertions(+), 8 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b19a98c20de8..7ef01fe148ce 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -106,14 +106,17 @@ static inline bool migratetype_is_mergeable(int mt)
extern int page_group_by_mobility_disabled;
-#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
+#ifdef CONFIG_MEMORY_ISOLATION
+#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
+#else
+#define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
+#endif
+
+unsigned long get_pageblock_migratetype(const struct page *page);
-#define get_pageblock_migratetype(page) \
- get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
+#define folio_migratetype(folio) \
+ get_pageblock_migratetype(&folio->page)
-#define folio_migratetype(folio) \
- get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \
- MIGRATETYPE_MASK)
struct free_area {
struct list_head free_list[MIGRATE_TYPES];
unsigned long nr_free;
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index 0c4963339f0b..00040e7df8c8 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -20,7 +20,10 @@ enum pageblock_bits {
PB_migrate_end = PB_migrate + PB_migratetype_bits - 1,
/* 3 bits required for migrate types */
PB_migrate_skip,/* If set the block is skipped by compaction */
-
+#ifdef CONFIG_MEMORY_ISOLATION
+ PB_migrate_isolate = 7, /* If set the block is isolated */
+ /* set it to 7 to make pageblock bit word aligned */
+#endif
/*
* Assume the bits will always align on a word. If this assumption
* changes then get/set pageblock needs updating.
@@ -28,6 +31,10 @@ enum pageblock_bits {
NR_PAGEBLOCK_BITS
};
+#ifdef CONFIG_MEMORY_ISOLATION
+#define PB_migrate_isolate_bit BIT(PB_migrate_isolate)
+#endif
+
#if defined(CONFIG_PAGE_BLOCK_ORDER)
#define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER
#else
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c77592b22256..04e301fb4879 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -381,10 +381,31 @@ unsigned long get_pfnblock_flags_mask(const struct page *page,
return (word >> bitidx) & mask;
}
+unsigned long get_pageblock_migratetype(const struct page *page)
+{
+ unsigned long flags;
+
+ flags = get_pfnblock_flags_mask(page, page_to_pfn(page),
+ MIGRATETYPE_MASK);
+#ifdef CONFIG_MEMORY_ISOLATION
+ if (flags & PB_migrate_isolate_bit)
+ return MIGRATE_ISOLATE;
+#endif
+ return flags;
+}
+
static __always_inline int get_pfnblock_migratetype(const struct page *page,
unsigned long pfn)
{
- return get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
+ unsigned long flags;
+
+ flags = get_pfnblock_flags_mask(page, pfn,
+ MIGRATETYPE_MASK);
+#ifdef CONFIG_MEMORY_ISOLATION
+ if (flags & PB_migrate_isolate_bit)
+ return MIGRATE_ISOLATE;
+#endif
+ return flags;
}
/**
@@ -402,8 +423,14 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
unsigned long bitidx, word_bitidx;
unsigned long word;
+#ifdef CONFIG_MEMORY_ISOLATION
+ BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 8);
+ /* extra one for MIGRATE_ISOLATE */
+ BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits) + 1);
+#else
BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
+#endif
bitmap = get_pageblock_bitmap(page, pfn);
bitidx = pfn_to_bitidx(page, pfn);
@@ -426,6 +453,13 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
migratetype < MIGRATE_PCPTYPES))
migratetype = MIGRATE_UNMOVABLE;
+#ifdef CONFIG_MEMORY_ISOLATION
+ if (migratetype == MIGRATE_ISOLATE) {
+ set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
+ page_to_pfn(page), PB_migrate_isolate_bit);
+ return;
+ }
+#endif
set_pfnblock_flags_mask(page, (unsigned long)migratetype,
page_to_pfn(page), MIGRATETYPE_MASK);
}
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index b2fc5266e3d2..751e21f6d85e 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -15,6 +15,17 @@
#define CREATE_TRACE_POINTS
#include <trace/events/page_isolation.h>
+static inline bool __maybe_unused get_pageblock_isolate(struct page *page)
+{
+ return get_pfnblock_flags_mask(page, page_to_pfn(page),
+ PB_migrate_isolate_bit);
+}
+static inline void clear_pageblock_isolate(struct page *page)
+{
+ set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
+ PB_migrate_isolate_bit);
+}
+
/*
* This function checks whether the range [start_pfn, end_pfn) includes
* unmovable pages or not. The range must fall into a single pageblock and
--
2.47.2
On 09.05.25 22:01, Zi Yan wrote:
> During page isolation, the original migratetype is overwritten, since
> MIGRATE_* are enums and stored in pageblock bitmaps. Change
> MIGRATE_ISOLATE to be stored a standalone bit, PB_migrate_isolate, like
> PB_migrate_skip, so that migratetype is not lost during pageblock
> isolation. pageblock bits needs to be word aligned, so expand
> the number of pageblock bits from 4 to 8 and make PB_migrate_isolate bit 7.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
> include/linux/mmzone.h | 15 ++++++++------
> include/linux/pageblock-flags.h | 9 ++++++++-
> mm/page_alloc.c | 36 ++++++++++++++++++++++++++++++++-
> mm/page_isolation.c | 11 ++++++++++
> 4 files changed, 63 insertions(+), 8 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index b19a98c20de8..7ef01fe148ce 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -106,14 +106,17 @@ static inline bool migratetype_is_mergeable(int mt)
>
> extern int page_group_by_mobility_disabled;
>
> -#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
> +#ifdef CONFIG_MEMORY_ISOLATION
> +#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
> +#else
> +#define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
> +#endif
> +
> +unsigned long get_pageblock_migratetype(const struct page *page);
>
> -#define get_pageblock_migratetype(page) \
> - get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
> +#define folio_migratetype(folio) \
> + get_pageblock_migratetype(&folio->page)
>
> -#define folio_migratetype(folio) \
> - get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \
> - MIGRATETYPE_MASK)
> struct free_area {
> struct list_head free_list[MIGRATE_TYPES];
> unsigned long nr_free;
> diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
> index 0c4963339f0b..00040e7df8c8 100644
> --- a/include/linux/pageblock-flags.h
> +++ b/include/linux/pageblock-flags.h
> @@ -20,7 +20,10 @@ enum pageblock_bits {
> PB_migrate_end = PB_migrate + PB_migratetype_bits - 1,
> /* 3 bits required for migrate types */
> PB_migrate_skip,/* If set the block is skipped by compaction */
> -
> +#ifdef CONFIG_MEMORY_ISOLATION
> + PB_migrate_isolate = 7, /* If set the block is isolated */
> + /* set it to 7 to make pageblock bit word aligned */
I think what we want to do here is align NR_PAGEBLOCK_BITS up to 4 bits
at relevant places. Or go to the next power-of-2.
Could we simply to that using something like
#ifdef CONFIG_MEMORY_ISOLATION
PB_migrate_isolate, /* If set the block is isolated */
#endif
__NR_PAGEBLOCK_BITS
};
/* We always want the bits to be a power of 2. */
#define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS))
Would something like that work?
> +#endif
> /*
> * Assume the bits will always align on a word. If this assumption
> * changes then get/set pageblock needs updating.
> @@ -28,6 +31,10 @@ enum pageblock_bits {
> NR_PAGEBLOCK_BITS
> };>
> +#ifdef CONFIG_MEMORY_ISOLATION
> +#define PB_migrate_isolate_bit BIT(PB_migrate_isolate)
> +#endif
> +
I assume we should first change users ot "1 << (PB_migrate_skip)" to
PB_migrate_skip_bit to keep it similar.
> #if defined(CONFIG_PAGE_BLOCK_ORDER)
> #define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER
> #else
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index c77592b22256..04e301fb4879 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -381,10 +381,31 @@ unsigned long get_pfnblock_flags_mask(const struct page *page,
> return (word >> bitidx) & mask;
> }
>
> +unsigned long get_pageblock_migratetype(const struct page *page)
> +{
> + unsigned long flags;
> +
> + flags = get_pfnblock_flags_mask(page, page_to_pfn(page),
> + MIGRATETYPE_MASK);
When calling functions, we usually indent up to the beginning of the
parameters. Same for the other cases below.
... or just exceed the 80 chars a bit in this case. :)
> +#ifdef CONFIG_MEMORY_ISOLATION
> + if (flags & PB_migrate_isolate_bit)
> + return MIGRATE_ISOLATE;
> +#endif
> + return flags;
> +}
> +
> static __always_inline int get_pfnblock_migratetype(const struct page *page,
> unsigned long pfn)
> {
> - return get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
> + unsigned long flags;
> +
> + flags = get_pfnblock_flags_mask(page, pfn,
> + MIGRATETYPE_MASK);
This should fit into a single line.
> +#ifdef CONFIG_MEMORY_ISOLATION
> + if (flags & PB_migrate_isolate_bit)
> + return MIGRATE_ISOLATE;
> +#endif
If you call get_pfnblock_flags_mask() with MIGRATETYPE_MASK, how could
you ever get PB_migrate_isolate_bit?
I think what we should do is
1) Rename get_pfnblock_flags_mask() to get_pfnblock_flags()
2) Remove the mask parameter
3) Perform the masking in all callers.
Maybe, we should convert set_pfnblock_flags_mask() to
void set_clear_pfnblock_flags(struct page *page, unsigned long
set_flags, unsigned long clear_flags);
And better, splitting it up (or providing helpers)
set_pfnblock_flags(struct page *page, unsigned long flags);
clear_pfnblock_flags(struct page *page, unsigned long flags);
This implies some more code cleanups first that make the code easier to
extend.
> + return flags;
> }
>
> /**
> @@ -402,8 +423,14 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
> unsigned long bitidx, word_bitidx;
> unsigned long word;
>
> +#ifdef CONFIG_MEMORY_ISOLATION
> + BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 8);
> + /* extra one for MIGRATE_ISOLATE */
> + BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits) + 1);
> +#else
> BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
> BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
> +#endif
>
> bitmap = get_pageblock_bitmap(page, pfn);
> bitidx = pfn_to_bitidx(page, pfn);
> @@ -426,6 +453,13 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
> migratetype < MIGRATE_PCPTYPES))
> migratetype = MIGRATE_UNMOVABLE;
>
> +#ifdef CONFIG_MEMORY_ISOLATION
> + if (migratetype == MIGRATE_ISOLATE) {
> + set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
> + page_to_pfn(page), PB_migrate_isolate_bit);
> + return;
> + }
> +#endif
> set_pfnblock_flags_mask(page, (unsigned long)migratetype,
> page_to_pfn(page), MIGRATETYPE_MASK);
> }
> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> index b2fc5266e3d2..751e21f6d85e 100644
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -15,6 +15,17 @@
> #define CREATE_TRACE_POINTS
> #include <trace/events/page_isolation.h>
>
> +static inline bool __maybe_unused get_pageblock_isolate(struct page *page)
> +{
> + return get_pfnblock_flags_mask(page, page_to_pfn(page),
> + PB_migrate_isolate_bit);
> +}
> +static inline void clear_pageblock_isolate(struct page *page)
> +{
> + set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
> + PB_migrate_isolate_bit);
> +}
Should these reside in include/linux/pageblock-flags.h, just like the
CONFIG_COMPACTION "skip" variants?
--
Cheers,
David / dhildenb
On 19 May 2025, at 4:08, David Hildenbrand wrote:
> On 09.05.25 22:01, Zi Yan wrote:
>> During page isolation, the original migratetype is overwritten, since
>> MIGRATE_* are enums and stored in pageblock bitmaps. Change
>> MIGRATE_ISOLATE to be stored a standalone bit, PB_migrate_isolate, like
>> PB_migrate_skip, so that migratetype is not lost during pageblock
>> isolation. pageblock bits needs to be word aligned, so expand
>> the number of pageblock bits from 4 to 8 and make PB_migrate_isolate bit 7.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>> include/linux/mmzone.h | 15 ++++++++------
>> include/linux/pageblock-flags.h | 9 ++++++++-
>> mm/page_alloc.c | 36 ++++++++++++++++++++++++++++++++-
>> mm/page_isolation.c | 11 ++++++++++
>> 4 files changed, 63 insertions(+), 8 deletions(-)
>>
>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>> index b19a98c20de8..7ef01fe148ce 100644
>> --- a/include/linux/mmzone.h
>> +++ b/include/linux/mmzone.h
>> @@ -106,14 +106,17 @@ static inline bool migratetype_is_mergeable(int mt)
>> extern int page_group_by_mobility_disabled;
>> -#define MIGRATETYPE_MASK ((1UL << PB_migratetype_bits) - 1)
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> +#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
>> +#else
>> +#define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
>> +#endif
>> +
>> +unsigned long get_pageblock_migratetype(const struct page *page);
>> -#define get_pageblock_migratetype(page) \
>> - get_pfnblock_flags_mask(page, page_to_pfn(page), MIGRATETYPE_MASK)
>> +#define folio_migratetype(folio) \
>> + get_pageblock_migratetype(&folio->page)
>> -#define folio_migratetype(folio) \
>> - get_pfnblock_flags_mask(&folio->page, folio_pfn(folio), \
>> - MIGRATETYPE_MASK)
>> struct free_area {
>> struct list_head free_list[MIGRATE_TYPES];
>> unsigned long nr_free;
>> diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
>> index 0c4963339f0b..00040e7df8c8 100644
>> --- a/include/linux/pageblock-flags.h
>> +++ b/include/linux/pageblock-flags.h
>> @@ -20,7 +20,10 @@ enum pageblock_bits {
>> PB_migrate_end = PB_migrate + PB_migratetype_bits - 1,
>> /* 3 bits required for migrate types */
>> PB_migrate_skip,/* If set the block is skipped by compaction */
>> -
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + PB_migrate_isolate = 7, /* If set the block is isolated */
>> + /* set it to 7 to make pageblock bit word aligned */
>
> I think what we want to do here is align NR_PAGEBLOCK_BITS up to 4 bits at relevant places. Or go to the next power-of-2.
>
> Could we simply to that using something like
>
> #ifdef CONFIG_MEMORY_ISOLATION
> PB_migrate_isolate, /* If set the block is isolated */
> #endif
> __NR_PAGEBLOCK_BITS
> };
>
> /* We always want the bits to be a power of 2. */
> #define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS))
>
>
> Would something like that work?
Yes, it builds and boots on x86_64 for MEMROY_ISOLATION and !MEMORY_ISOLATION.
Will add this change.
>
>> +#endif
>> /*
>> * Assume the bits will always align on a word. If this assumption
>> * changes then get/set pageblock needs updating.
>> @@ -28,6 +31,10 @@ enum pageblock_bits {
>> NR_PAGEBLOCK_BITS
>> };>
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> +#define PB_migrate_isolate_bit BIT(PB_migrate_isolate)
>> +#endif
>> +
>
> I assume we should first change users ot "1 << (PB_migrate_skip)" to PB_migrate_skip_bit to keep it similar.
Will add this.
>
>> #if defined(CONFIG_PAGE_BLOCK_ORDER)
>> #define PAGE_BLOCK_ORDER CONFIG_PAGE_BLOCK_ORDER
>> #else
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index c77592b22256..04e301fb4879 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -381,10 +381,31 @@ unsigned long get_pfnblock_flags_mask(const struct page *page,
>> return (word >> bitidx) & mask;
>> }
>> +unsigned long get_pageblock_migratetype(const struct page *page)
>> +{
>> + unsigned long flags;
>> +
>> + flags = get_pfnblock_flags_mask(page, page_to_pfn(page),
>> + MIGRATETYPE_MASK);
>
> When calling functions, we usually indent up to the beginning of the parameters. Same for the other cases below.
OK, will follow this. I was confused, since I see various indentations across files.
There is .clang-format and clang-format indeed indent parameters like you said,
then I will use clang-format.
>
> ... or just exceed the 80 chars a bit in this case. :)
>
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + if (flags & PB_migrate_isolate_bit)
>> + return MIGRATE_ISOLATE;
>> +#endif
>> + return flags;
>> +}
>> +
>> static __always_inline int get_pfnblock_migratetype(const struct page *page,
>> unsigned long pfn)
>> {
>> - return get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
>> + unsigned long flags;
>> +
>> + flags = get_pfnblock_flags_mask(page, pfn,
>> + MIGRATETYPE_MASK);
>
> This should fit into a single line.
Sure.
>
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + if (flags & PB_migrate_isolate_bit)
>> + return MIGRATE_ISOLATE;
>> +#endif
>
> If you call get_pfnblock_flags_mask() with MIGRATETYPE_MASK, how could you ever get PB_migrate_isolate_bit?
MIGRATETYPE_MASK is ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit),
so it gets PB_migrate_isolate_bit.
>
>
> I think what we should do is
>
> 1) Rename get_pfnblock_flags_mask() to get_pfnblock_flags()
>
> 2) Remove the mask parameter
>
> 3) Perform the masking in all callers.
get_pfnblock_flags_mask() is also used by get_pageblock_skip() to
get PB_migrate_skip. I do not think we want to include PB_migrate_skip
in the mask to confuse readers.
>
>
>
> Maybe, we should convert set_pfnblock_flags_mask() to
>
> void set_clear_pfnblock_flags(struct page *page, unsigned long
> set_flags, unsigned long clear_flags);
>
> And better, splitting it up (or providing helpers)
>
> set_pfnblock_flags(struct page *page, unsigned long flags);
> clear_pfnblock_flags(struct page *page, unsigned long flags);
>
>
> This implies some more code cleanups first that make the code easier to extend.
>
The same due to PB_migrate_skip.
Based on your suggestion, we could make {set,get}_pfnblock_flags_mask()
internal APIs by prepending "__". They are only used by the new
{get, set, clear}_pfnblock_flags() and {get, set, clear}_pageblock_{skip, isolate}().
Then use {get, set, clear}_pfnblock_flags() for all migratetype operations.
WDYT?
>> + return flags;
>> }
>> /**
>> @@ -402,8 +423,14 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
>> unsigned long bitidx, word_bitidx;
>> unsigned long word;
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 8);
>> + /* extra one for MIGRATE_ISOLATE */
>> + BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits) + 1);
>> +#else
>> BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
>> BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
>> +#endif
>> bitmap = get_pageblock_bitmap(page, pfn);
>> bitidx = pfn_to_bitidx(page, pfn);
>> @@ -426,6 +453,13 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
>> migratetype < MIGRATE_PCPTYPES))
>> migratetype = MIGRATE_UNMOVABLE;
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + if (migratetype == MIGRATE_ISOLATE) {
>> + set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
>> + page_to_pfn(page), PB_migrate_isolate_bit);
>> + return;
>> + }
>> +#endif
>> set_pfnblock_flags_mask(page, (unsigned long)migratetype,
>> page_to_pfn(page), MIGRATETYPE_MASK);
>> }
>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>> index b2fc5266e3d2..751e21f6d85e 100644
>> --- a/mm/page_isolation.c
>> +++ b/mm/page_isolation.c
>> @@ -15,6 +15,17 @@
>> #define CREATE_TRACE_POINTS
>> #include <trace/events/page_isolation.h>
>> +static inline bool __maybe_unused get_pageblock_isolate(struct page *page)
>> +{
>> + return get_pfnblock_flags_mask(page, page_to_pfn(page),
>> + PB_migrate_isolate_bit);
>> +}
>> +static inline void clear_pageblock_isolate(struct page *page)
>> +{
>> + set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
>> + PB_migrate_isolate_bit);
>> +}
>
> Should these reside in include/linux/pageblock-flags.h, just like the
> CONFIG_COMPACTION "skip" variants?
They are only used inside mm/page_isolation.c, so I would leave them
here until other users come out.
--
Best Regards,
Yan, Zi
>>> +#ifdef CONFIG_MEMORY_ISOLATION
>>> + if (flags & PB_migrate_isolate_bit)
>>> + return MIGRATE_ISOLATE;
>>> +#endif
>>
>> If you call get_pfnblock_flags_mask() with MIGRATETYPE_MASK, how could you ever get PB_migrate_isolate_bit?
>
> MIGRATETYPE_MASK is ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit),
> so it gets PB_migrate_isolate_bit.
>
Oh ... that's confusing.
>>
>>
>> I think what we should do is
>>
>> 1) Rename get_pfnblock_flags_mask() to get_pfnblock_flags()
>>
>> 2) Remove the mask parameter
>>
>> 3) Perform the masking in all callers.
>
> get_pfnblock_flags_mask() is also used by get_pageblock_skip() to
> get PB_migrate_skip. I do not think we want to include PB_migrate_skip
> in the mask to confuse readers.
The masking will be handled in the caller.
So get_pageblock_skip() would essentially do a
return get_pfnblock_flags() & PB_migrate_skip_bit;
etc.
>
>>
>>
>>
>> Maybe, we should convert set_pfnblock_flags_mask() to
>>
>> void set_clear_pfnblock_flags(struct page *page, unsigned long
>> set_flags, unsigned long clear_flags);
>>
>> And better, splitting it up (or providing helpers)
>>
>> set_pfnblock_flags(struct page *page, unsigned long flags);
>> clear_pfnblock_flags(struct page *page, unsigned long flags);
>>
>>
>> This implies some more code cleanups first that make the code easier to extend.
>>
>
> The same due to PB_migrate_skip.
>
> Based on your suggestion, we could make {set,get}_pfnblock_flags_mask()
> internal APIs by prepending "__". They are only used by the new
> {get, set, clear}_pfnblock_flags() and {get, set, clear}_pageblock_{skip, isolate}().
> Then use {get, set, clear}_pfnblock_flags() for all migratetype operations.
>
> WDYT?
In general, lgtm. I just hope we can avoid the "_mask" part and just
handle it in these functions directly?
>
>>> + return flags;
>>> }
>>> /**
>>> @@ -402,8 +423,14 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
>>> unsigned long bitidx, word_bitidx;
>>> unsigned long word;
>>> +#ifdef CONFIG_MEMORY_ISOLATION
>>> + BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 8);
>>> + /* extra one for MIGRATE_ISOLATE */
>>> + BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits) + 1);
>>> +#else
>>> BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
>>> BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
>>> +#endif
>>> bitmap = get_pageblock_bitmap(page, pfn);
>>> bitidx = pfn_to_bitidx(page, pfn);
>>> @@ -426,6 +453,13 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
>>> migratetype < MIGRATE_PCPTYPES))
>>> migratetype = MIGRATE_UNMOVABLE;
>>> +#ifdef CONFIG_MEMORY_ISOLATION
>>> + if (migratetype == MIGRATE_ISOLATE) {
>>> + set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
>>> + page_to_pfn(page), PB_migrate_isolate_bit);
>>> + return;
>>> + }
>>> +#endif
>>> set_pfnblock_flags_mask(page, (unsigned long)migratetype,
>>> page_to_pfn(page), MIGRATETYPE_MASK);
>>> }
>>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>>> index b2fc5266e3d2..751e21f6d85e 100644
>>> --- a/mm/page_isolation.c
>>> +++ b/mm/page_isolation.c
>>> @@ -15,6 +15,17 @@
>>> #define CREATE_TRACE_POINTS
>>> #include <trace/events/page_isolation.h>
>>> +static inline bool __maybe_unused get_pageblock_isolate(struct page *page)
>>> +{
>>> + return get_pfnblock_flags_mask(page, page_to_pfn(page),
>>> + PB_migrate_isolate_bit);
>>> +}
>>> +static inline void clear_pageblock_isolate(struct page *page)
>>> +{
>>> + set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
>>> + PB_migrate_isolate_bit);
>>> +}
>>
>> Should these reside in include/linux/pageblock-flags.h, just like the
>> CONFIG_COMPACTION "skip" variants?
>
> They are only used inside mm/page_isolation.c, so I would leave them
> here until other users come out.
get_pageblock_skip() and friends are also only used in mm/compaction.c.
Having these simple wrapper as inline functions in the same header
should make it consistent.
... and avoid tricks like "__maybe_unused" here :)
--
Cheers,
David / dhildenb
On 19 May 2025, at 12:42, David Hildenbrand wrote:
>>>> +#ifdef CONFIG_MEMORY_ISOLATION
>>>> + if (flags & PB_migrate_isolate_bit)
>>>> + return MIGRATE_ISOLATE;
>>>> +#endif
>>>
>>> If you call get_pfnblock_flags_mask() with MIGRATETYPE_MASK, how could you ever get PB_migrate_isolate_bit?
>>
>> MIGRATETYPE_MASK is ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit),
>> so it gets PB_migrate_isolate_bit.
>>
>
> Oh ... that's confusing.
>
>>>
>>>
>>> I think what we should do is
>>>
>>> 1) Rename get_pfnblock_flags_mask() to get_pfnblock_flags()
>>>
>>> 2) Remove the mask parameter
>>>
>>> 3) Perform the masking in all callers.
>>
>> get_pfnblock_flags_mask() is also used by get_pageblock_skip() to
>> get PB_migrate_skip. I do not think we want to include PB_migrate_skip
>> in the mask to confuse readers.
>
> The masking will be handled in the caller.
>
> So get_pageblock_skip() would essentially do a
>
> return get_pfnblock_flags() & PB_migrate_skip_bit;
>
> etc.
>
>>
>>>
>>>
>>>
>>> Maybe, we should convert set_pfnblock_flags_mask() to
>>>
>>> void set_clear_pfnblock_flags(struct page *page, unsigned long
>>> set_flags, unsigned long clear_flags);
>>>
>>> And better, splitting it up (or providing helpers)
>>>
>>> set_pfnblock_flags(struct page *page, unsigned long flags);
>>> clear_pfnblock_flags(struct page *page, unsigned long flags);
>>>
>>>
>>> This implies some more code cleanups first that make the code easier to extend.
>>>
>>
>> The same due to PB_migrate_skip.
>>
>> Based on your suggestion, we could make {set,get}_pfnblock_flags_mask()
>> internal APIs by prepending "__". They are only used by the new
>> {get, set, clear}_pfnblock_flags() and {get, set, clear}_pageblock_{skip, isolate}().
>> Then use {get, set, clear}_pfnblock_flags() for all migratetype operations.
>>
>> WDYT?
>
> In general, lgtm. I just hope we can avoid the "_mask" part and just handle it in these functions directly?
After implementing {get, set, clear}_pfnblock_flags(), I find that
get_pfnblock_flags() is easy like you wrote above, but set and clear are not,
since migratetype and skip/isolate bits are in the same word, meaning
I will need to first read them out, change the field, then write them back.
But it will cause inconsistency if there is a parallel writer to the same
word. So for set and clear, mask is required.
I can try to implement {get, set, clear}_pfnblock_bits(page,pfn, bits) to
only handle standalone bits by using the given @bits as the mask and
{set,get}_pageblock_migratetype() still use the mask.
WDYT?
--
Best Regards,
Yan, Zi
On 21.05.25 13:16, Zi Yan wrote:
> On 19 May 2025, at 12:42, David Hildenbrand wrote:
>
>>>>> +#ifdef CONFIG_MEMORY_ISOLATION
>>>>> + if (flags & PB_migrate_isolate_bit)
>>>>> + return MIGRATE_ISOLATE;
>>>>> +#endif
>>>>
>>>> If you call get_pfnblock_flags_mask() with MIGRATETYPE_MASK, how could you ever get PB_migrate_isolate_bit?
>>>
>>> MIGRATETYPE_MASK is ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit),
>>> so it gets PB_migrate_isolate_bit.
>>>
>>
>> Oh ... that's confusing.
>>
>>>>
>>>>
>>>> I think what we should do is
>>>>
>>>> 1) Rename get_pfnblock_flags_mask() to get_pfnblock_flags()
>>>>
>>>> 2) Remove the mask parameter
>>>>
>>>> 3) Perform the masking in all callers.
>>>
>>> get_pfnblock_flags_mask() is also used by get_pageblock_skip() to
>>> get PB_migrate_skip. I do not think we want to include PB_migrate_skip
>>> in the mask to confuse readers.
>>
>> The masking will be handled in the caller.
>>
>> So get_pageblock_skip() would essentially do a
>>
>> return get_pfnblock_flags() & PB_migrate_skip_bit;
>>
>> etc.
>>
>>>
>>>>
>>>>
>>>>
>>>> Maybe, we should convert set_pfnblock_flags_mask() to
>>>>
>>>> void set_clear_pfnblock_flags(struct page *page, unsigned long
>>>> set_flags, unsigned long clear_flags);
>>>>
>>>> And better, splitting it up (or providing helpers)
>>>>
>>>> set_pfnblock_flags(struct page *page, unsigned long flags);
>>>> clear_pfnblock_flags(struct page *page, unsigned long flags);
>>>>
>>>>
>>>> This implies some more code cleanups first that make the code easier to extend.
>>>>
>>>
>>> The same due to PB_migrate_skip.
>>>
>>> Based on your suggestion, we could make {set,get}_pfnblock_flags_mask()
>>> internal APIs by prepending "__". They are only used by the new
>>> {get, set, clear}_pfnblock_flags() and {get, set, clear}_pageblock_{skip, isolate}().
>>> Then use {get, set, clear}_pfnblock_flags() for all migratetype operations.
>>>
>>> WDYT?
>>
>> In general, lgtm. I just hope we can avoid the "_mask" part and just handle it in these functions directly?
>
> After implementing {get, set, clear}_pfnblock_flags(), I find that
> get_pfnblock_flags() is easy like you wrote above, but set and clear are not,
> since migratetype and skip/isolate bits are in the same word, meaning
> I will need to first read them out, change the field, then write them back.
Like existing set_pfnblock_flags_mask() I guess, with the try_cmpxchg()
loop.
> But it will cause inconsistency if there is a parallel writer to the same
> word. So for set and clear, mask is required.
>
> I can try to implement {get, set, clear}_pfnblock_bits(page,pfn, bits) to
> only handle standalone bits by using the given @bits as the mask and
> {set,get}_pageblock_migratetype() still use the mask.
We'd still have to do the try_cmpxchg() when dealing with multiple bits,
right?
For single bits, we could just use set_bit() etc.
--
Cheers,
David / dhildenb
On 21 May 2025, at 7:57, David Hildenbrand wrote:
> On 21.05.25 13:16, Zi Yan wrote:
>> On 19 May 2025, at 12:42, David Hildenbrand wrote:
>>
>>>>>> +#ifdef CONFIG_MEMORY_ISOLATION
>>>>>> + if (flags & PB_migrate_isolate_bit)
>>>>>> + return MIGRATE_ISOLATE;
>>>>>> +#endif
>>>>>
>>>>> If you call get_pfnblock_flags_mask() with MIGRATETYPE_MASK, how could you ever get PB_migrate_isolate_bit?
>>>>
>>>> MIGRATETYPE_MASK is ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit),
>>>> so it gets PB_migrate_isolate_bit.
>>>>
>>>
>>> Oh ... that's confusing.
>>>
>>>>>
>>>>>
>>>>> I think what we should do is
>>>>>
>>>>> 1) Rename get_pfnblock_flags_mask() to get_pfnblock_flags()
>>>>>
>>>>> 2) Remove the mask parameter
>>>>>
>>>>> 3) Perform the masking in all callers.
>>>>
>>>> get_pfnblock_flags_mask() is also used by get_pageblock_skip() to
>>>> get PB_migrate_skip. I do not think we want to include PB_migrate_skip
>>>> in the mask to confuse readers.
>>>
>>> The masking will be handled in the caller.
>>>
>>> So get_pageblock_skip() would essentially do a
>>>
>>> return get_pfnblock_flags() & PB_migrate_skip_bit;
>>>
>>> etc.
>>>
>>>>
>>>>>
>>>>>
>>>>>
>>>>> Maybe, we should convert set_pfnblock_flags_mask() to
>>>>>
>>>>> void set_clear_pfnblock_flags(struct page *page, unsigned long
>>>>> set_flags, unsigned long clear_flags);
>>>>>
>>>>> And better, splitting it up (or providing helpers)
>>>>>
>>>>> set_pfnblock_flags(struct page *page, unsigned long flags);
>>>>> clear_pfnblock_flags(struct page *page, unsigned long flags);
>>>>>
>>>>>
>>>>> This implies some more code cleanups first that make the code easier to extend.
>>>>>
>>>>
>>>> The same due to PB_migrate_skip.
>>>>
>>>> Based on your suggestion, we could make {set,get}_pfnblock_flags_mask()
>>>> internal APIs by prepending "__". They are only used by the new
>>>> {get, set, clear}_pfnblock_flags() and {get, set, clear}_pageblock_{skip, isolate}().
>>>> Then use {get, set, clear}_pfnblock_flags() for all migratetype operations.
>>>>
>>>> WDYT?
>>>
>>> In general, lgtm. I just hope we can avoid the "_mask" part and just handle it in these functions directly?
>>
>> After implementing {get, set, clear}_pfnblock_flags(), I find that
>> get_pfnblock_flags() is easy like you wrote above, but set and clear are not,
>> since migratetype and skip/isolate bits are in the same word, meaning
>> I will need to first read them out, change the field, then write them back.
>
> Like existing set_pfnblock_flags_mask() I guess, with the try_cmpxchg() loop.
Are you saying I duplicate the code in set_pfnblock_flags_mask() to implement
set_pfnblock_flags()? Or just replace set_pfnblock_flags_mask() entirely?
>
>> But it will cause inconsistency if there is a parallel writer to the same
>> word. So for set and clear, mask is required.
>>
>> I can try to implement {get, set, clear}_pfnblock_bits(page,pfn, bits) to
>> only handle standalone bits by using the given @bits as the mask and
>> {set,get}_pageblock_migratetype() still use the mask.
>
> We'd still have to do the try_cmpxchg() when dealing with multiple bits, right?
>
> For single bits, we could just use set_bit() etc.
Mel moved from set_bit() to try_cmpxchg() a word for performance reason. I am
not sure we want to move back.
--
Best Regards,
Yan, Zi
On 21.05.25 14:00, Zi Yan wrote:
> On 21 May 2025, at 7:57, David Hildenbrand wrote:
>
>> On 21.05.25 13:16, Zi Yan wrote:
>>> On 19 May 2025, at 12:42, David Hildenbrand wrote:
>>>
>>>>>>> +#ifdef CONFIG_MEMORY_ISOLATION
>>>>>>> + if (flags & PB_migrate_isolate_bit)
>>>>>>> + return MIGRATE_ISOLATE;
>>>>>>> +#endif
>>>>>>
>>>>>> If you call get_pfnblock_flags_mask() with MIGRATETYPE_MASK, how could you ever get PB_migrate_isolate_bit?
>>>>>
>>>>> MIGRATETYPE_MASK is ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit),
>>>>> so it gets PB_migrate_isolate_bit.
>>>>>
>>>>
>>>> Oh ... that's confusing.
>>>>
>>>>>>
>>>>>>
>>>>>> I think what we should do is
>>>>>>
>>>>>> 1) Rename get_pfnblock_flags_mask() to get_pfnblock_flags()
>>>>>>
>>>>>> 2) Remove the mask parameter
>>>>>>
>>>>>> 3) Perform the masking in all callers.
>>>>>
>>>>> get_pfnblock_flags_mask() is also used by get_pageblock_skip() to
>>>>> get PB_migrate_skip. I do not think we want to include PB_migrate_skip
>>>>> in the mask to confuse readers.
>>>>
>>>> The masking will be handled in the caller.
>>>>
>>>> So get_pageblock_skip() would essentially do a
>>>>
>>>> return get_pfnblock_flags() & PB_migrate_skip_bit;
>>>>
>>>> etc.
>>>>
>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> Maybe, we should convert set_pfnblock_flags_mask() to
>>>>>>
>>>>>> void set_clear_pfnblock_flags(struct page *page, unsigned long
>>>>>> set_flags, unsigned long clear_flags);
>>>>>>
>>>>>> And better, splitting it up (or providing helpers)
>>>>>>
>>>>>> set_pfnblock_flags(struct page *page, unsigned long flags);
>>>>>> clear_pfnblock_flags(struct page *page, unsigned long flags);
>>>>>>
>>>>>>
>>>>>> This implies some more code cleanups first that make the code easier to extend.
>>>>>>
>>>>>
>>>>> The same due to PB_migrate_skip.
>>>>>
>>>>> Based on your suggestion, we could make {set,get}_pfnblock_flags_mask()
>>>>> internal APIs by prepending "__". They are only used by the new
>>>>> {get, set, clear}_pfnblock_flags() and {get, set, clear}_pageblock_{skip, isolate}().
>>>>> Then use {get, set, clear}_pfnblock_flags() for all migratetype operations.
>>>>>
>>>>> WDYT?
>>>>
>>>> In general, lgtm. I just hope we can avoid the "_mask" part and just handle it in these functions directly?
>>>
>>> After implementing {get, set, clear}_pfnblock_flags(), I find that
>>> get_pfnblock_flags() is easy like you wrote above, but set and clear are not,
>>> since migratetype and skip/isolate bits are in the same word, meaning
>>> I will need to first read them out, change the field, then write them back.
>>
>> Like existing set_pfnblock_flags_mask() I guess, with the try_cmpxchg() loop.
>
> Are you saying I duplicate the code in set_pfnblock_flags_mask() to implement
> set_pfnblock_flags()? Or just replace set_pfnblock_flags_mask() entirely?
The latter as possible.
>
>>
>>> But it will cause inconsistency if there is a parallel writer to the same
>>> word. So for set and clear, mask is required.
>>>
>>> I can try to implement {get, set, clear}_pfnblock_bits(page,pfn, bits) to
>>> only handle standalone bits by using the given @bits as the mask and
>>> {set,get}_pageblock_migratetype() still use the mask.
>>
>> We'd still have to do the try_cmpxchg() when dealing with multiple bits, right?
>>
>> For single bits, we could just use set_bit() etc.
>
> Mel moved from set_bit() to try_cmpxchg() a word for performance reason. I am
> not sure we want to move back.
In e58469bafd05 we moved from multiple set_bit etc to a cmpxchange.
- for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
- if (flags & value)
- __set_bit(bitidx + start_bitidx, bitmap);
- else
- __clear_bit(bitidx + start_bitidx, bitmap);
However, when only setting/clearing a single bit (e.g., isolated),
set_bit etc should be much cheaper.
For multiple bits, the existing try_cmpxchg should be kept IMHO.
--
Cheers,
David / dhildenb
On 21 May 2025, at 8:11, David Hildenbrand wrote:
> On 21.05.25 14:00, Zi Yan wrote:
>> On 21 May 2025, at 7:57, David Hildenbrand wrote:
>>
>>> On 21.05.25 13:16, Zi Yan wrote:
>>>> On 19 May 2025, at 12:42, David Hildenbrand wrote:
>>>>
>>>>>>>> +#ifdef CONFIG_MEMORY_ISOLATION
>>>>>>>> + if (flags & PB_migrate_isolate_bit)
>>>>>>>> + return MIGRATE_ISOLATE;
>>>>>>>> +#endif
>>>>>>>
>>>>>>> If you call get_pfnblock_flags_mask() with MIGRATETYPE_MASK, how could you ever get PB_migrate_isolate_bit?
>>>>>>
>>>>>> MIGRATETYPE_MASK is ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit),
>>>>>> so it gets PB_migrate_isolate_bit.
>>>>>>
>>>>>
>>>>> Oh ... that's confusing.
>>>>>
>>>>>>>
>>>>>>>
>>>>>>> I think what we should do is
>>>>>>>
>>>>>>> 1) Rename get_pfnblock_flags_mask() to get_pfnblock_flags()
>>>>>>>
>>>>>>> 2) Remove the mask parameter
>>>>>>>
>>>>>>> 3) Perform the masking in all callers.
>>>>>>
>>>>>> get_pfnblock_flags_mask() is also used by get_pageblock_skip() to
>>>>>> get PB_migrate_skip. I do not think we want to include PB_migrate_skip
>>>>>> in the mask to confuse readers.
>>>>>
>>>>> The masking will be handled in the caller.
>>>>>
>>>>> So get_pageblock_skip() would essentially do a
>>>>>
>>>>> return get_pfnblock_flags() & PB_migrate_skip_bit;
>>>>>
>>>>> etc.
>>>>>
>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> Maybe, we should convert set_pfnblock_flags_mask() to
>>>>>>>
>>>>>>> void set_clear_pfnblock_flags(struct page *page, unsigned long
>>>>>>> set_flags, unsigned long clear_flags);
>>>>>>>
>>>>>>> And better, splitting it up (or providing helpers)
>>>>>>>
>>>>>>> set_pfnblock_flags(struct page *page, unsigned long flags);
>>>>>>> clear_pfnblock_flags(struct page *page, unsigned long flags);
>>>>>>>
>>>>>>>
>>>>>>> This implies some more code cleanups first that make the code easier to extend.
>>>>>>>
>>>>>>
>>>>>> The same due to PB_migrate_skip.
>>>>>>
>>>>>> Based on your suggestion, we could make {set,get}_pfnblock_flags_mask()
>>>>>> internal APIs by prepending "__". They are only used by the new
>>>>>> {get, set, clear}_pfnblock_flags() and {get, set, clear}_pageblock_{skip, isolate}().
>>>>>> Then use {get, set, clear}_pfnblock_flags() for all migratetype operations.
>>>>>>
>>>>>> WDYT?
>>>>>
>>>>> In general, lgtm. I just hope we can avoid the "_mask" part and just handle it in these functions directly?
>>>>
>>>> After implementing {get, set, clear}_pfnblock_flags(), I find that
>>>> get_pfnblock_flags() is easy like you wrote above, but set and clear are not,
>>>> since migratetype and skip/isolate bits are in the same word, meaning
>>>> I will need to first read them out, change the field, then write them back.
>>>
>>> Like existing set_pfnblock_flags_mask() I guess, with the try_cmpxchg() loop.
>>
>> Are you saying I duplicate the code in set_pfnblock_flags_mask() to implement
>> set_pfnblock_flags()? Or just replace set_pfnblock_flags_mask() entirely?
>
> The latter as possible.
>
>>
>>>
>>>> But it will cause inconsistency if there is a parallel writer to the same
>>>> word. So for set and clear, mask is required.
>>>>
>>>> I can try to implement {get, set, clear}_pfnblock_bits(page,pfn, bits) to
>>>> only handle standalone bits by using the given @bits as the mask and
>>>> {set,get}_pageblock_migratetype() still use the mask.
>>>
>>> We'd still have to do the try_cmpxchg() when dealing with multiple bits, right?
>>>
>>> For single bits, we could just use set_bit() etc.
>>
>> Mel moved from set_bit() to try_cmpxchg() a word for performance reason. I am
>> not sure we want to move back.
>
> In e58469bafd05 we moved from multiple set_bit etc to a cmpxchange.
>
> - for (; start_bitidx <= end_bitidx; start_bitidx++, value <<= 1)
> - if (flags & value)
> - __set_bit(bitidx + start_bitidx, bitmap);
> - else
> - __clear_bit(bitidx + start_bitidx, bitmap);
>
>
> However, when only setting/clearing a single bit (e.g., isolated), set_bit etc should be much cheaper.
>
> For multiple bits, the existing try_cmpxchg should be kept IMHO.
Yes, I was thinking about that too. Let me do that as a standalone cleanup series
first, then resend this one afterwards.
--
Best Regards,
Yan, Zi
On 19 May 2025, at 12:42, David Hildenbrand wrote:
>>>> +#ifdef CONFIG_MEMORY_ISOLATION
>>>> + if (flags & PB_migrate_isolate_bit)
>>>> + return MIGRATE_ISOLATE;
>>>> +#endif
>>>
>>> If you call get_pfnblock_flags_mask() with MIGRATETYPE_MASK, how could you ever get PB_migrate_isolate_bit?
>>
>> MIGRATETYPE_MASK is ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit),
>> so it gets PB_migrate_isolate_bit.
>>
>
> Oh ... that's confusing.
>
>>>
>>>
>>> I think what we should do is
>>>
>>> 1) Rename get_pfnblock_flags_mask() to get_pfnblock_flags()
>>>
>>> 2) Remove the mask parameter
>>>
>>> 3) Perform the masking in all callers.
>>
>> get_pfnblock_flags_mask() is also used by get_pageblock_skip() to
>> get PB_migrate_skip. I do not think we want to include PB_migrate_skip
>> in the mask to confuse readers.
>
> The masking will be handled in the caller.
>
> So get_pageblock_skip() would essentially do a
>
> return get_pfnblock_flags() & PB_migrate_skip_bit;
>
> etc.
Got it. Sounds good to me. Will do this.
>
>>
>>>
>>>
>>>
>>> Maybe, we should convert set_pfnblock_flags_mask() to
>>>
>>> void set_clear_pfnblock_flags(struct page *page, unsigned long
>>> set_flags, unsigned long clear_flags);
>>>
>>> And better, splitting it up (or providing helpers)
>>>
>>> set_pfnblock_flags(struct page *page, unsigned long flags);
>>> clear_pfnblock_flags(struct page *page, unsigned long flags);
>>>
>>>
>>> This implies some more code cleanups first that make the code easier to extend.
>>>
>>
>> The same due to PB_migrate_skip.
>>
>> Based on your suggestion, we could make {set,get}_pfnblock_flags_mask()
>> internal APIs by prepending "__". They are only used by the new
>> {get, set, clear}_pfnblock_flags() and {get, set, clear}_pageblock_{skip, isolate}().
>> Then use {get, set, clear}_pfnblock_flags() for all migratetype operations.
>>
>> WDYT?
>
> In general, lgtm. I just hope we can avoid the "_mask" part and just handle it in these functions directly?
Sounds good to me. Will put this and
"#define NR_PAGEBLOCK_BITS (roundup_pow_of_two(__NR_PAGEBLOCK_BITS))"
in a cleanup patch before Patch 1.
>
>>
>>>> + return flags;
>>>> }
>>>> /**
>>>> @@ -402,8 +423,14 @@ void set_pfnblock_flags_mask(struct page *page, unsigned long flags,
>>>> unsigned long bitidx, word_bitidx;
>>>> unsigned long word;
>>>> +#ifdef CONFIG_MEMORY_ISOLATION
>>>> + BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 8);
>>>> + /* extra one for MIGRATE_ISOLATE */
>>>> + BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits) + 1);
>>>> +#else
>>>> BUILD_BUG_ON(NR_PAGEBLOCK_BITS != 4);
>>>> BUILD_BUG_ON(MIGRATE_TYPES > (1 << PB_migratetype_bits));
>>>> +#endif
>>>> bitmap = get_pageblock_bitmap(page, pfn);
>>>> bitidx = pfn_to_bitidx(page, pfn);
>>>> @@ -426,6 +453,13 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
>>>> migratetype < MIGRATE_PCPTYPES))
>>>> migratetype = MIGRATE_UNMOVABLE;
>>>> +#ifdef CONFIG_MEMORY_ISOLATION
>>>> + if (migratetype == MIGRATE_ISOLATE) {
>>>> + set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
>>>> + page_to_pfn(page), PB_migrate_isolate_bit);
>>>> + return;
>>>> + }
>>>> +#endif
>>>> set_pfnblock_flags_mask(page, (unsigned long)migratetype,
>>>> page_to_pfn(page), MIGRATETYPE_MASK);
>>>> }
>>>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>>>> index b2fc5266e3d2..751e21f6d85e 100644
>>>> --- a/mm/page_isolation.c
>>>> +++ b/mm/page_isolation.c
>>>> @@ -15,6 +15,17 @@
>>>> #define CREATE_TRACE_POINTS
>>>> #include <trace/events/page_isolation.h>
>>>> +static inline bool __maybe_unused get_pageblock_isolate(struct page *page)
>>>> +{
>>>> + return get_pfnblock_flags_mask(page, page_to_pfn(page),
>>>> + PB_migrate_isolate_bit);
>>>> +}
>>>> +static inline void clear_pageblock_isolate(struct page *page)
>>>> +{
>>>> + set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
>>>> + PB_migrate_isolate_bit);
>>>> +}
>>>
>>> Should these reside in include/linux/pageblock-flags.h, just like the
>>> CONFIG_COMPACTION "skip" variants?
>>
>> They are only used inside mm/page_isolation.c, so I would leave them
>> here until other users come out.
>
> get_pageblock_skip() and friends are also only used in mm/compaction.c.
>
> Having these simple wrapper as inline functions in the same header should make it consistent.
>
> ... and avoid tricks like "__maybe_unused" here :)
OK, will do this.
--
Best Regards,
Yan, Zi
Hi Zi,
I hope you don't mind me jumping in on a late revision like this...
On Fri May 9, 2025 at 8:01 PM UTC, Zi Yan wrote:
> During page isolation, the original migratetype is overwritten, since
> MIGRATE_* are enums and stored in pageblock bitmaps. Change
> MIGRATE_ISOLATE to be stored a standalone bit, PB_migrate_isolate, like
> PB_migrate_skip, so that migratetype is not lost during pageblock
> isolation. pageblock bits needs to be word aligned, so expand
> the number of pageblock bits from 4 to 8 and make PB_migrate_isolate bit 7.
Forgive my ignorance but can you help me confirm if I'm following this -
Do you just mean that NR_PAGEBLOCK_BITS must divide the word size? Or is
there something else going on here?
> +#ifdef CONFIG_MEMORY_ISOLATION
> + PB_migrate_isolate = 7, /* If set the block is isolated */
> + /* set it to 7 to make pageblock bit word aligned */
> +#endif
I feel I'm always just asking for commentary so please feel free to
complain if this is annoying. But I think it would be worth adding the
context from the commit message into the code here (or somewhere else),
e.g:
/*
* Page isolation is represented with a separate bit, so that the other
* bits can store the migratetype that the block had before it was
* isolated.
*/
Just adding in that detail about the intent will help readers a lot IMO.
>
> +unsigned long get_pageblock_migratetype(const struct page *page)
> +{
> + unsigned long flags;
> +
> + flags = get_pfnblock_flags_mask(page, page_to_pfn(page),
> + MIGRATETYPE_MASK);
> +#ifdef CONFIG_MEMORY_ISOLATION
> + if (flags & PB_migrate_isolate_bit)
> + return MIGRATE_ISOLATE;
> +#endif
> + return flags;
> +}
Can we just do get_pageblock_migratetype(page, page_to_pfn(page)) here?
> static __always_inline int get_pfnblock_migratetype(const struct page *page,
> unsigned long pfn)
> {
> - return get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
> + unsigned long flags;
> +
> + flags = get_pfnblock_flags_mask(page, pfn,
> + MIGRATETYPE_MASK);
> +#ifdef CONFIG_MEMORY_ISOLATION
> + if (flags & PB_migrate_isolate_bit)
> + return MIGRATE_ISOLATE;
> +#endif
> + return flags;
> }
On 13 May 2025, at 7:32, Brendan Jackman wrote:
> Hi Zi,
>
> I hope you don't mind me jumping in on a late revision like this...
Right on time. :)
>
> On Fri May 9, 2025 at 8:01 PM UTC, Zi Yan wrote:
>> During page isolation, the original migratetype is overwritten, since
>> MIGRATE_* are enums and stored in pageblock bitmaps. Change
>> MIGRATE_ISOLATE to be stored a standalone bit, PB_migrate_isolate, like
>> PB_migrate_skip, so that migratetype is not lost during pageblock
>> isolation. pageblock bits needs to be word aligned, so expand
>> the number of pageblock bits from 4 to 8 and make PB_migrate_isolate bit 7.
>
> Forgive my ignorance but can you help me confirm if I'm following this -
> Do you just mean that NR_PAGEBLOCK_BITS must divide the word size? Or is
> there something else going on here?
You are right. NR_PAGEBLOCK_BITS must divide the word size. I will fix
the commit log in the next version.
>
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + PB_migrate_isolate = 7, /* If set the block is isolated */
>> + /* set it to 7 to make pageblock bit word aligned */
I will fix this comment too.
>> +#endif
>
> I feel I'm always just asking for commentary so please feel free to
> complain if this is annoying. But I think it would be worth adding the
> context from the commit message into the code here (or somewhere else),
> e.g:
>
> /*
> * Page isolation is represented with a separate bit, so that the other
> * bits can store the migratetype that the block had before it was
> * isolated.
> */
>
> Just adding in that detail about the intent will help readers a lot IMO.
Sure. Will add it.
>
>>
>> +unsigned long get_pageblock_migratetype(const struct page *page)
>> +{
>> + unsigned long flags;
>> +
>> + flags = get_pfnblock_flags_mask(page, page_to_pfn(page),
>> + MIGRATETYPE_MASK);
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + if (flags & PB_migrate_isolate_bit)
>> + return MIGRATE_ISOLATE;
>> +#endif
>> + return flags;
>> +}
>
> Can we just do get_pageblock_migratetype(page, page_to_pfn(page)) here?
Based on my observation, the callers all have page and pfn, so using the
current implementation would save a call to page_to_pfn(). I can add a comment
to this function.
/*
* Use get_pageblock_migratetype() if caller already has both @page and @pfn
* to save a call to page_to_pfn().
*/
>
>> static __always_inline int get_pfnblock_migratetype(const struct page *page,
>> unsigned long pfn)
>> {
>> - return get_pfnblock_flags_mask(page, pfn, MIGRATETYPE_MASK);
>> + unsigned long flags;
>> +
>> + flags = get_pfnblock_flags_mask(page, pfn,
>> + MIGRATETYPE_MASK);
>> +#ifdef CONFIG_MEMORY_ISOLATION
>> + if (flags & PB_migrate_isolate_bit)
>> + return MIGRATE_ISOLATE;
>> +#endif
>> + return flags;
>> }
Best Regards,
Yan, Zi
© 2016 - 2025 Red Hat, Inc.