Since migratetype is no longer overwritten during pageblock isolation,
moving pageblocks to and from MIGRATE_ISOLATE no longer needs migratetype.
Add MIGRATETYPE_NO_ISO_MASK to allow read before-isolation migratetype
when a pageblock is isolated. It is used by move_freepages_block_isolate().
Add pageblock_isolate_and_move_free_pages() and
pageblock_unisolate_and_move_free_pages() to be explicit about the page
isolation operations. Both share the common code in
__move_freepages_block_isolate(), which is renamed from
move_freepages_block_isolate().
Make set_pageblock_migratetype() only accept non MIGRATE_ISOLATE types,
so that one should use set_pageblock_isolate() to isolate pageblocks.
Two consequential changes:
1. move pageblock migratetype code out of __move_freepages_block().
2. in online_pages() from mm/memory_hotplug.c, move_pfn_range_to_zone() is
called with MIGRATE_MOVABLE instead of MIGRATE_ISOLATE and all affected
pageblocks are isolated afterwards. Otherwise, all online pageblocks
will have non-determined migratetype.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
include/linux/mmzone.h | 4 +-
include/linux/page-isolation.h | 5 ++-
mm/memory_hotplug.c | 7 +++-
mm/page_alloc.c | 73 +++++++++++++++++++++++++---------
mm/page_isolation.c | 27 ++++++++-----
5 files changed, 82 insertions(+), 34 deletions(-)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 7ef01fe148ce..f66895456974 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -107,8 +107,10 @@ static inline bool migratetype_is_mergeable(int mt)
extern int page_group_by_mobility_disabled;
#ifdef CONFIG_MEMORY_ISOLATION
-#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
+#define MIGRATETYPE_NO_ISO_MASK (BIT(PB_migratetype_bits) - 1)
+#define MIGRATETYPE_MASK (MIGRATETYPE_NO_ISO_MASK | PB_migrate_isolate_bit)
#else
+#define MIGRATETYPE_NO_ISO_MASK MIGRATETYPE_MASK
#define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
#endif
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 898bb788243b..b0a2af0a5357 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -26,9 +26,10 @@ static inline bool is_migrate_isolate(int migratetype)
#define REPORT_FAILURE 0x2
void set_pageblock_migratetype(struct page *page, int migratetype);
+void set_pageblock_isolate(struct page *page);
-bool move_freepages_block_isolate(struct zone *zone, struct page *page,
- int migratetype);
+bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page);
+bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page);
int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
int migratetype, int flags);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b1caedbade5b..c86c47bba019 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1178,6 +1178,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
const int nid = zone_to_nid(zone);
int ret;
struct memory_notify arg;
+ unsigned long isol_pfn;
/*
* {on,off}lining is constrained to full memory sections (or more
@@ -1192,7 +1193,11 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
/* associate pfn range with the zone */
- move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
+ move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_MOVABLE);
+ for (isol_pfn = pfn;
+ isol_pfn < pfn + nr_pages;
+ isol_pfn += pageblock_nr_pages)
+ set_pageblock_isolate(pfn_to_page(isol_pfn));
arg.start_pfn = pfn;
arg.nr_pages = nr_pages;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 04e301fb4879..cfd37b2d992e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -454,11 +454,9 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
migratetype = MIGRATE_UNMOVABLE;
#ifdef CONFIG_MEMORY_ISOLATION
- if (migratetype == MIGRATE_ISOLATE) {
- set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
- page_to_pfn(page), PB_migrate_isolate_bit);
- return;
- }
+ VM_WARN(migratetype == MIGRATE_ISOLATE,
+ "Use set_pageblock_isolate() for pageblock isolation");
+ return;
#endif
set_pfnblock_flags_mask(page, (unsigned long)migratetype,
page_to_pfn(page), MIGRATETYPE_MASK);
@@ -1819,8 +1817,8 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
#endif
/*
- * Change the type of a block and move all its free pages to that
- * type's freelist.
+ * Move all free pages of a block to new type's freelist. Caller needs to
+ * change the block type.
*/
static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
int old_mt, int new_mt)
@@ -1852,8 +1850,6 @@ static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
pages_moved += 1 << order;
}
- set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
-
return pages_moved;
}
@@ -1911,11 +1907,16 @@ static int move_freepages_block(struct zone *zone, struct page *page,
int old_mt, int new_mt)
{
unsigned long start_pfn;
+ int res;
if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
return -1;
- return __move_freepages_block(zone, start_pfn, old_mt, new_mt);
+ res = __move_freepages_block(zone, start_pfn, old_mt, new_mt);
+ set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
+
+ return res;
+
}
#ifdef CONFIG_MEMORY_ISOLATION
@@ -1943,11 +1944,17 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
return start_pfn;
}
+static inline void toggle_pageblock_isolate(struct page *page, bool isolate)
+{
+ set_pfnblock_flags_mask(page, (isolate << PB_migrate_isolate),
+ page_to_pfn(page), PB_migrate_isolate_bit);
+}
+
/**
- * move_freepages_block_isolate - move free pages in block for page isolation
+ * __move_freepages_block_isolate - move free pages in block for page isolation
* @zone: the zone
* @page: the pageblock page
- * @migratetype: migratetype to set on the pageblock
+ * @isolate: to isolate the given pageblock or unisolate it
*
* This is similar to move_freepages_block(), but handles the special
* case encountered in page isolation, where the block of interest
@@ -1962,10 +1969,15 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
*
* Returns %true if pages could be moved, %false otherwise.
*/
-bool move_freepages_block_isolate(struct zone *zone, struct page *page,
- int migratetype)
+static bool __move_freepages_block_isolate(struct zone *zone,
+ struct page *page, bool isolate)
{
unsigned long start_pfn, pfn;
+ int from_mt;
+ int to_mt;
+
+ if (isolate == (get_pageblock_migratetype(page) == MIGRATE_ISOLATE))
+ return false;
if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
return false;
@@ -1982,7 +1994,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
del_page_from_free_list(buddy, zone, order,
get_pfnblock_migratetype(buddy, pfn));
- set_pageblock_migratetype(page, migratetype);
+ toggle_pageblock_isolate(page, isolate);
split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
return true;
}
@@ -1993,16 +2005,38 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
del_page_from_free_list(page, zone, order,
get_pfnblock_migratetype(page, pfn));
- set_pageblock_migratetype(page, migratetype);
+ toggle_pageblock_isolate(page, isolate);
split_large_buddy(zone, page, pfn, order, FPI_NONE);
return true;
}
move:
- __move_freepages_block(zone, start_pfn,
- get_pfnblock_migratetype(page, start_pfn),
- migratetype);
+ /* use MIGRATETYPE_NO_ISO_MASK to get the non-isolate migratetype */
+ if (isolate) {
+ from_mt = get_pfnblock_flags_mask(page, page_to_pfn(page),
+ MIGRATETYPE_NO_ISO_MASK);
+ to_mt = MIGRATE_ISOLATE;
+ } else {
+ from_mt = MIGRATE_ISOLATE;
+ to_mt = get_pfnblock_flags_mask(page, page_to_pfn(page),
+ MIGRATETYPE_NO_ISO_MASK);
+ }
+
+ __move_freepages_block(zone, start_pfn, from_mt, to_mt);
+ toggle_pageblock_isolate(pfn_to_page(start_pfn), isolate);
+
return true;
}
+
+bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page)
+{
+ return __move_freepages_block_isolate(zone, page, true);
+}
+
+bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page)
+{
+ return __move_freepages_block_isolate(zone, page, false);
+}
+
#endif /* CONFIG_MEMORY_ISOLATION */
static void change_pageblock_range(struct page *pageblock_page,
@@ -2194,6 +2228,7 @@ try_to_claim_block(struct zone *zone, struct page *page,
if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
page_group_by_mobility_disabled) {
__move_freepages_block(zone, start_pfn, block_type, start_type);
+ set_pageblock_migratetype(pfn_to_page(start_pfn), start_type);
return __rmqueue_smallest(zone, order, start_type);
}
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 751e21f6d85e..4571940f14db 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -25,6 +25,12 @@ static inline void clear_pageblock_isolate(struct page *page)
set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
PB_migrate_isolate_bit);
}
+void set_pageblock_isolate(struct page *page)
+{
+ set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
+ page_to_pfn(page),
+ PB_migrate_isolate_bit);
+}
/*
* This function checks whether the range [start_pfn, end_pfn) includes
@@ -199,7 +205,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end,
migratetype, isol_flags);
if (!unmovable) {
- if (!move_freepages_block_isolate(zone, page, MIGRATE_ISOLATE)) {
+ if (!pageblock_isolate_and_move_free_pages(zone, page)) {
spin_unlock_irqrestore(&zone->lock, flags);
return -EBUSY;
}
@@ -220,7 +226,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
return -EBUSY;
}
-static void unset_migratetype_isolate(struct page *page, int migratetype)
+static void unset_migratetype_isolate(struct page *page)
{
struct zone *zone;
unsigned long flags;
@@ -273,10 +279,10 @@ static void unset_migratetype_isolate(struct page *page, int migratetype)
* Isolating this block already succeeded, so this
* should not fail on zone boundaries.
*/
- WARN_ON_ONCE(!move_freepages_block_isolate(zone, page, migratetype));
+ WARN_ON_ONCE(!pageblock_unisolate_and_move_free_pages(zone, page));
} else {
- set_pageblock_migratetype(page, migratetype);
- __putback_isolated_page(page, order, migratetype);
+ clear_pageblock_isolate(page);
+ __putback_isolated_page(page, order, get_pageblock_migratetype(page));
}
zone->nr_isolate_pageblock--;
out:
@@ -394,7 +400,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
if (PageBuddy(page)) {
int order = buddy_order(page);
- /* move_freepages_block_isolate() handled this */
+ /* pageblock_isolate_and_move_free_pages() handled this */
VM_WARN_ON_ONCE(pfn + (1 << order) > boundary_pfn);
pfn += 1UL << order;
@@ -444,7 +450,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
failed:
/* restore the original migratetype */
if (!skip_isolation)
- unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype);
+ unset_migratetype_isolate(pfn_to_page(isolate_pageblock));
return -EBUSY;
}
@@ -515,7 +521,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
ret = isolate_single_pageblock(isolate_end, flags, true,
skip_isolation, migratetype);
if (ret) {
- unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
+ unset_migratetype_isolate(pfn_to_page(isolate_start));
return ret;
}
@@ -528,8 +534,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
start_pfn, end_pfn)) {
undo_isolate_page_range(isolate_start, pfn, migratetype);
unset_migratetype_isolate(
- pfn_to_page(isolate_end - pageblock_nr_pages),
- migratetype);
+ pfn_to_page(isolate_end - pageblock_nr_pages));
return -EBUSY;
}
}
@@ -559,7 +564,7 @@ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
page = __first_valid_page(pfn, pageblock_nr_pages);
if (!page || !is_migrate_isolate_page(page))
continue;
- unset_migratetype_isolate(page, migratetype);
+ unset_migratetype_isolate(page);
}
}
/*
--
2.47.2
On 09.05.25 22:01, Zi Yan wrote:
> Since migratetype is no longer overwritten during pageblock isolation,
> moving pageblocks to and from MIGRATE_ISOLATE no longer needs migratetype.
>
> Add MIGRATETYPE_NO_ISO_MASK to allow read before-isolation migratetype
> when a pageblock is isolated. It is used by move_freepages_block_isolate().
>
> Add pageblock_isolate_and_move_free_pages() and
> pageblock_unisolate_and_move_free_pages() to be explicit about the page
> isolation operations. Both share the common code in
> __move_freepages_block_isolate(), which is renamed from
> move_freepages_block_isolate().
>
> Make set_pageblock_migratetype() only accept non MIGRATE_ISOLATE types,
> so that one should use set_pageblock_isolate() to isolate pageblocks.
>
> Two consequential changes:
> 1. move pageblock migratetype code out of __move_freepages_block().
> 2. in online_pages() from mm/memory_hotplug.c, move_pfn_range_to_zone() is
> called with MIGRATE_MOVABLE instead of MIGRATE_ISOLATE and all affected
> pageblocks are isolated afterwards. Otherwise, all online pageblocks
> will have non-determined migratetype.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
> include/linux/mmzone.h | 4 +-
> include/linux/page-isolation.h | 5 ++-
> mm/memory_hotplug.c | 7 +++-
> mm/page_alloc.c | 73 +++++++++++++++++++++++++---------
> mm/page_isolation.c | 27 ++++++++-----
> 5 files changed, 82 insertions(+), 34 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 7ef01fe148ce..f66895456974 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -107,8 +107,10 @@ static inline bool migratetype_is_mergeable(int mt)
> extern int page_group_by_mobility_disabled;
>
> #ifdef CONFIG_MEMORY_ISOLATION
> -#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
> +#define MIGRATETYPE_NO_ISO_MASK (BIT(PB_migratetype_bits) - 1)
> +#define MIGRATETYPE_MASK (MIGRATETYPE_NO_ISO_MASK | PB_migrate_isolate_bit)
> #else
> +#define MIGRATETYPE_NO_ISO_MASK MIGRATETYPE_MASK
> #define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
> #endif
>
> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
> index 898bb788243b..b0a2af0a5357 100644
> --- a/include/linux/page-isolation.h
> +++ b/include/linux/page-isolation.h
> @@ -26,9 +26,10 @@ static inline bool is_migrate_isolate(int migratetype)
> #define REPORT_FAILURE 0x2
>
> void set_pageblock_migratetype(struct page *page, int migratetype);
> +void set_pageblock_isolate(struct page *page);
>
> -bool move_freepages_block_isolate(struct zone *zone, struct page *page,
> - int migratetype);
> +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page);
> +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page);
>
> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> int migratetype, int flags);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index b1caedbade5b..c86c47bba019 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1178,6 +1178,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
> const int nid = zone_to_nid(zone);
> int ret;
> struct memory_notify arg;
> + unsigned long isol_pfn;
>
> /*
> * {on,off}lining is constrained to full memory sections (or more
> @@ -1192,7 +1193,11 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>
>
> /* associate pfn range with the zone */
> - move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
> + move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_MOVABLE);
> + for (isol_pfn = pfn;
> + isol_pfn < pfn + nr_pages;
> + isol_pfn += pageblock_nr_pages)
> + set_pageblock_isolate(pfn_to_page(isol_pfn));
Can we move that all the way into memmap_init_range(), where we do the
set_pageblock_migratetype()?
The MIGRATE_UNMOVABLE in mhp_init_memmap_on_memory() is likely fine: all
pages in that pageblock will be used for the memmap. Everything is unmovable,
but no free pages so ... nobody cares? :)
diff --git a/mm/internal.h b/mm/internal.h
index 6b8ed20177432..bc102846fcf1f 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -821,7 +821,7 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
int nid, bool exact_nid);
void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
- unsigned long, enum meminit_context, struct vmem_altmap *, int);
+ unsigned long, enum meminit_context, struct vmem_altmap *, bool);
#if defined CONFIG_COMPACTION || defined CONFIG_CMA
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index b1caedbade5b1..4b2cf20ad21fb 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -764,13 +764,13 @@ static inline void section_taint_zone_device(unsigned long pfn)
* and resizing the pgdat/zone data to span the added pages. After this
* call, all affected pages are PageOffline().
*
- * All aligned pageblocks are initialized to the specified migratetype
- * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
- * zone stats (e.g., nr_isolate_pageblock) are touched.
+ * All aligned pageblocks are initialized to MIGRATE_MOVABLE, and are isolated
+ * if requested. Besides setting the migratetype, no related zone stats (e.g.,
+ * nr_isolate_pageblock) are touched.
*/
void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
unsigned long nr_pages,
- struct vmem_altmap *altmap, int migratetype)
+ struct vmem_altmap *altmap, bool isolate)
{
struct pglist_data *pgdat = zone->zone_pgdat;
int nid = pgdat->node_id;
@@ -802,7 +802,7 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
* are reserved so nobody should be touching them so we should be safe
*/
memmap_init_range(nr_pages, nid, zone_idx(zone), start_pfn, 0,
- MEMINIT_HOTPLUG, altmap, migratetype);
+ MEMINIT_HOTPLUG, altmap, isolate);
set_zone_contiguous(zone);
}
@@ -1127,7 +1127,7 @@ int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
if (mhp_off_inaccessible)
page_init_poison(pfn_to_page(pfn), sizeof(struct page) * nr_pages);
- move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_UNMOVABLE);
+ move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, false);
for (i = 0; i < nr_pages; i++) {
struct page *page = pfn_to_page(pfn + i);
@@ -1192,7 +1192,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
/* associate pfn range with the zone */
- move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
+ move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, true);
arg.start_pfn = pfn;
arg.nr_pages = nr_pages;
diff --git a/mm/memremap.c b/mm/memremap.c
index c417c843e9b1f..e47f6809f254b 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -254,7 +254,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
move_pfn_range_to_zone(zone, PHYS_PFN(range->start),
PHYS_PFN(range_len(range)), params->altmap,
- MIGRATE_MOVABLE);
+ false);
}
mem_hotplug_done();
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 1c5444e188f82..041106fc524be 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -867,14 +867,14 @@ static void __init init_unavailable_range(unsigned long spfn,
* up by memblock_free_all() once the early boot process is
* done. Non-atomic initialization, single-pass.
*
- * All aligned pageblocks are initialized to the specified migratetype
- * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
- * zone stats (e.g., nr_isolate_pageblock) are touched.
+ * All aligned pageblocks are initialized to MIGRATE_MOVABLE, and are isolated
+ * if requested. Besides setting the migratetype, no related zone stats (e.g.,
+ * nr_isolate_pageblock) are touched.
*/
void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn, unsigned long zone_end_pfn,
enum meminit_context context,
- struct vmem_altmap *altmap, int migratetype)
+ struct vmem_altmap *altmap, bool isolate)
{
unsigned long pfn, end_pfn = start_pfn + size;
struct page *page;
@@ -931,7 +931,9 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
* over the place during system boot.
*/
if (pageblock_aligned(pfn)) {
- set_pageblock_migratetype(page, migratetype);
+ set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+ if (isolate)
+ set_pageblock_isolate(page, isolate)
cond_resched();
}
pfn++;
@@ -954,7 +956,7 @@ static void __init memmap_init_zone_range(struct zone *zone,
return;
memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn,
- zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
+ zone_end_pfn, MEMINIT_EARLY, NULL, false);
if (*hole_pfn < start_pfn)
init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid);
--
2.49.0
As an alterantive, a second "isolate" parameter and make sure that migratetype is
never MIGRATE_ISOLATE.
[...]
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -25,6 +25,12 @@ static inline void clear_pageblock_isolate(struct page *page)
> set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
> PB_migrate_isolate_bit);
> }
> +void set_pageblock_isolate(struct page *page)
> +{
> + set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
> + page_to_pfn(page),
> + PB_migrate_isolate_bit);
> +}
Probably better placed in the previous patch, and in the header (see comment to #1).
--
Cheers,
David / dhildenb
On 19 May 2025, at 4:21, David Hildenbrand wrote:
> On 09.05.25 22:01, Zi Yan wrote:
>> Since migratetype is no longer overwritten during pageblock isolation,
>> moving pageblocks to and from MIGRATE_ISOLATE no longer needs migratetype.
>>
>> Add MIGRATETYPE_NO_ISO_MASK to allow read before-isolation migratetype
>> when a pageblock is isolated. It is used by move_freepages_block_isolate().
>>
>> Add pageblock_isolate_and_move_free_pages() and
>> pageblock_unisolate_and_move_free_pages() to be explicit about the page
>> isolation operations. Both share the common code in
>> __move_freepages_block_isolate(), which is renamed from
>> move_freepages_block_isolate().
>>
>> Make set_pageblock_migratetype() only accept non MIGRATE_ISOLATE types,
>> so that one should use set_pageblock_isolate() to isolate pageblocks.
>>
>> Two consequential changes:
>> 1. move pageblock migratetype code out of __move_freepages_block().
>> 2. in online_pages() from mm/memory_hotplug.c, move_pfn_range_to_zone() is
>> called with MIGRATE_MOVABLE instead of MIGRATE_ISOLATE and all affected
>> pageblocks are isolated afterwards. Otherwise, all online pageblocks
>> will have non-determined migratetype.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>> include/linux/mmzone.h | 4 +-
>> include/linux/page-isolation.h | 5 ++-
>> mm/memory_hotplug.c | 7 +++-
>> mm/page_alloc.c | 73 +++++++++++++++++++++++++---------
>> mm/page_isolation.c | 27 ++++++++-----
>> 5 files changed, 82 insertions(+), 34 deletions(-)
>>
>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>> index 7ef01fe148ce..f66895456974 100644
>> --- a/include/linux/mmzone.h
>> +++ b/include/linux/mmzone.h
>> @@ -107,8 +107,10 @@ static inline bool migratetype_is_mergeable(int mt)
>> extern int page_group_by_mobility_disabled;
>> #ifdef CONFIG_MEMORY_ISOLATION
>> -#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
>> +#define MIGRATETYPE_NO_ISO_MASK (BIT(PB_migratetype_bits) - 1)
>> +#define MIGRATETYPE_MASK (MIGRATETYPE_NO_ISO_MASK | PB_migrate_isolate_bit)
>> #else
>> +#define MIGRATETYPE_NO_ISO_MASK MIGRATETYPE_MASK
>> #define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
>> #endif
>> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
>> index 898bb788243b..b0a2af0a5357 100644
>> --- a/include/linux/page-isolation.h
>> +++ b/include/linux/page-isolation.h
>> @@ -26,9 +26,10 @@ static inline bool is_migrate_isolate(int migratetype)
>> #define REPORT_FAILURE 0x2
>> void set_pageblock_migratetype(struct page *page, int migratetype);
>> +void set_pageblock_isolate(struct page *page);
>> -bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>> - int migratetype);
>> +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page);
>> +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page);
>> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>> int migratetype, int flags);
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index b1caedbade5b..c86c47bba019 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -1178,6 +1178,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>> const int nid = zone_to_nid(zone);
>> int ret;
>> struct memory_notify arg;
>> + unsigned long isol_pfn;
>> /*
>> * {on,off}lining is constrained to full memory sections (or more
>> @@ -1192,7 +1193,11 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>> /* associate pfn range with the zone */
>> - move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
>> + move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_MOVABLE);
>> + for (isol_pfn = pfn;
>> + isol_pfn < pfn + nr_pages;
>> + isol_pfn += pageblock_nr_pages)
>> + set_pageblock_isolate(pfn_to_page(isol_pfn));
>
> Can we move that all the way into memmap_init_range(), where we do the
> set_pageblock_migratetype()?
>
> The MIGRATE_UNMOVABLE in mhp_init_memmap_on_memory() is likely fine: all
> pages in that pageblock will be used for the memmap. Everything is unmovable,
> but no free pages so ... nobody cares? :)
My approach is similar, but a new init_pageblock_migratetype() like
below. Then, I added "bool isolate" instead of replacing the existing
"int migratetype". The advantage is that it saves a call to
set_pfnblock_flags_mask() for each pageblock. Like the alternative
you suggested below.
+void __meminit init_pageblock_migratetype(struct page *page, int migratetype,
+ bool isolate)
+{
+ if (unlikely(page_group_by_mobility_disabled &&
+ migratetype < MIGRATE_PCPTYPES))
+ migratetype = MIGRATE_UNMOVABLE;
+
+#ifdef CONFIG_MEMORY_ISOLATION
+ if (migratetype == MIGRATE_ISOLATE) {
+ VM_WARN(1,
+ "Set isolate=true to isolate pageblock with a migratetype");
+ return;
+ }
+ if (isolate)
+ migratetype |= PB_migrate_isolate_bit;
+#endif
+ set_pfnblock_flags_mask(page, (unsigned long)migratetype,
+ page_to_pfn(page), MIGRATETYPE_MASK);
+}
+
>
>
> diff --git a/mm/internal.h b/mm/internal.h
> index 6b8ed20177432..bc102846fcf1f 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -821,7 +821,7 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,
> int nid, bool exact_nid);
> void memmap_init_range(unsigned long, int, unsigned long, unsigned long,
> - unsigned long, enum meminit_context, struct vmem_altmap *, int);
> + unsigned long, enum meminit_context, struct vmem_altmap *, bool);
> #if defined CONFIG_COMPACTION || defined CONFIG_CMA
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index b1caedbade5b1..4b2cf20ad21fb 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -764,13 +764,13 @@ static inline void section_taint_zone_device(unsigned long pfn)
> * and resizing the pgdat/zone data to span the added pages. After this
> * call, all affected pages are PageOffline().
> *
> - * All aligned pageblocks are initialized to the specified migratetype
> - * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
> - * zone stats (e.g., nr_isolate_pageblock) are touched.
> + * All aligned pageblocks are initialized to MIGRATE_MOVABLE, and are isolated
> + * if requested. Besides setting the migratetype, no related zone stats (e.g.,
> + * nr_isolate_pageblock) are touched.
> */
> void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
> unsigned long nr_pages,
> - struct vmem_altmap *altmap, int migratetype)
> + struct vmem_altmap *altmap, bool isolate)
> {
> struct pglist_data *pgdat = zone->zone_pgdat;
> int nid = pgdat->node_id;
> @@ -802,7 +802,7 @@ void move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
> * are reserved so nobody should be touching them so we should be safe
> */
> memmap_init_range(nr_pages, nid, zone_idx(zone), start_pfn, 0,
> - MEMINIT_HOTPLUG, altmap, migratetype);
> + MEMINIT_HOTPLUG, altmap, isolate);
> set_zone_contiguous(zone);
> }
> @@ -1127,7 +1127,7 @@ int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
> if (mhp_off_inaccessible)
> page_init_poison(pfn_to_page(pfn), sizeof(struct page) * nr_pages);
> - move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_UNMOVABLE);
> + move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, false);
> for (i = 0; i < nr_pages; i++) {
> struct page *page = pfn_to_page(pfn + i);
> @@ -1192,7 +1192,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
> /* associate pfn range with the zone */
> - move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
> + move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, true);
> arg.start_pfn = pfn;
> arg.nr_pages = nr_pages;
> diff --git a/mm/memremap.c b/mm/memremap.c
> index c417c843e9b1f..e47f6809f254b 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -254,7 +254,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
> zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
> move_pfn_range_to_zone(zone, PHYS_PFN(range->start),
> PHYS_PFN(range_len(range)), params->altmap,
> - MIGRATE_MOVABLE);
> + false);
> }
> mem_hotplug_done();
> diff --git a/mm/mm_init.c b/mm/mm_init.c
> index 1c5444e188f82..041106fc524be 100644
> --- a/mm/mm_init.c
> +++ b/mm/mm_init.c
> @@ -867,14 +867,14 @@ static void __init init_unavailable_range(unsigned long spfn,
> * up by memblock_free_all() once the early boot process is
> * done. Non-atomic initialization, single-pass.
> *
> - * All aligned pageblocks are initialized to the specified migratetype
> - * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
> - * zone stats (e.g., nr_isolate_pageblock) are touched.
> + * All aligned pageblocks are initialized to MIGRATE_MOVABLE, and are isolated
> + * if requested. Besides setting the migratetype, no related zone stats (e.g.,
> + * nr_isolate_pageblock) are touched.
> */
> void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone,
> unsigned long start_pfn, unsigned long zone_end_pfn,
> enum meminit_context context,
> - struct vmem_altmap *altmap, int migratetype)
> + struct vmem_altmap *altmap, bool isolate)
> {
> unsigned long pfn, end_pfn = start_pfn + size;
> struct page *page;
> @@ -931,7 +931,9 @@ void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone
> * over the place during system boot.
> */
> if (pageblock_aligned(pfn)) {
> - set_pageblock_migratetype(page, migratetype);
> + set_pageblock_migratetype(page, MIGRATE_MOVABLE);
> + if (isolate)
> + set_pageblock_isolate(page, isolate)
> cond_resched();
> }
> pfn++;
> @@ -954,7 +956,7 @@ static void __init memmap_init_zone_range(struct zone *zone,
> return;
> memmap_init_range(end_pfn - start_pfn, nid, zone_id, start_pfn,
> - zone_end_pfn, MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
> + zone_end_pfn, MEMINIT_EARLY, NULL, false);
> if (*hole_pfn < start_pfn)
> init_unavailable_range(*hole_pfn, start_pfn, zone_id, nid);
> --
> 2.49.0
>
>
>
> As an alterantive, a second "isolate" parameter and make sure that migratetype is
> never MIGRATE_ISOLATE.
>
> [...]
>
>> --- a/mm/page_isolation.c
>> +++ b/mm/page_isolation.c
>> @@ -25,6 +25,12 @@ static inline void clear_pageblock_isolate(struct page *page)
>> set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
>> PB_migrate_isolate_bit);
>> }
>> +void set_pageblock_isolate(struct page *page)
>> +{
>> + set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
>> + page_to_pfn(page),
>> + PB_migrate_isolate_bit);
>> +}
>
> Probably better placed in the previous patch, and in the header (see comment to #1).
Sure.
--
Best Regards,
Yan, Zi
On 20.05.25 01:06, Zi Yan wrote:
> On 19 May 2025, at 4:21, David Hildenbrand wrote:
>
>> On 09.05.25 22:01, Zi Yan wrote:
>>> Since migratetype is no longer overwritten during pageblock isolation,
>>> moving pageblocks to and from MIGRATE_ISOLATE no longer needs migratetype.
>>>
>>> Add MIGRATETYPE_NO_ISO_MASK to allow read before-isolation migratetype
>>> when a pageblock is isolated. It is used by move_freepages_block_isolate().
>>>
>>> Add pageblock_isolate_and_move_free_pages() and
>>> pageblock_unisolate_and_move_free_pages() to be explicit about the page
>>> isolation operations. Both share the common code in
>>> __move_freepages_block_isolate(), which is renamed from
>>> move_freepages_block_isolate().
>>>
>>> Make set_pageblock_migratetype() only accept non MIGRATE_ISOLATE types,
>>> so that one should use set_pageblock_isolate() to isolate pageblocks.
>>>
>>> Two consequential changes:
>>> 1. move pageblock migratetype code out of __move_freepages_block().
>>> 2. in online_pages() from mm/memory_hotplug.c, move_pfn_range_to_zone() is
>>> called with MIGRATE_MOVABLE instead of MIGRATE_ISOLATE and all affected
>>> pageblocks are isolated afterwards. Otherwise, all online pageblocks
>>> will have non-determined migratetype.
>>>
>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>> ---
>>> include/linux/mmzone.h | 4 +-
>>> include/linux/page-isolation.h | 5 ++-
>>> mm/memory_hotplug.c | 7 +++-
>>> mm/page_alloc.c | 73 +++++++++++++++++++++++++---------
>>> mm/page_isolation.c | 27 ++++++++-----
>>> 5 files changed, 82 insertions(+), 34 deletions(-)
>>>
>>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>>> index 7ef01fe148ce..f66895456974 100644
>>> --- a/include/linux/mmzone.h
>>> +++ b/include/linux/mmzone.h
>>> @@ -107,8 +107,10 @@ static inline bool migratetype_is_mergeable(int mt)
>>> extern int page_group_by_mobility_disabled;
>>> #ifdef CONFIG_MEMORY_ISOLATION
>>> -#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
>>> +#define MIGRATETYPE_NO_ISO_MASK (BIT(PB_migratetype_bits) - 1)
>>> +#define MIGRATETYPE_MASK (MIGRATETYPE_NO_ISO_MASK | PB_migrate_isolate_bit)
>>> #else
>>> +#define MIGRATETYPE_NO_ISO_MASK MIGRATETYPE_MASK
>>> #define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
>>> #endif
>>> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
>>> index 898bb788243b..b0a2af0a5357 100644
>>> --- a/include/linux/page-isolation.h
>>> +++ b/include/linux/page-isolation.h
>>> @@ -26,9 +26,10 @@ static inline bool is_migrate_isolate(int migratetype)
>>> #define REPORT_FAILURE 0x2
>>> void set_pageblock_migratetype(struct page *page, int migratetype);
>>> +void set_pageblock_isolate(struct page *page);
>>> -bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>>> - int migratetype);
>>> +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page);
>>> +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page);
>>> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>>> int migratetype, int flags);
>>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>>> index b1caedbade5b..c86c47bba019 100644
>>> --- a/mm/memory_hotplug.c
>>> +++ b/mm/memory_hotplug.c
>>> @@ -1178,6 +1178,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>>> const int nid = zone_to_nid(zone);
>>> int ret;
>>> struct memory_notify arg;
>>> + unsigned long isol_pfn;
>>> /*
>>> * {on,off}lining is constrained to full memory sections (or more
>>> @@ -1192,7 +1193,11 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>>> /* associate pfn range with the zone */
>>> - move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
>>> + move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_MOVABLE);
>>> + for (isol_pfn = pfn;
>>> + isol_pfn < pfn + nr_pages;
>>> + isol_pfn += pageblock_nr_pages)
>>> + set_pageblock_isolate(pfn_to_page(isol_pfn));
>>
>> Can we move that all the way into memmap_init_range(), where we do the
>> set_pageblock_migratetype()?
>>
>> The MIGRATE_UNMOVABLE in mhp_init_memmap_on_memory() is likely fine: all
>> pages in that pageblock will be used for the memmap. Everything is unmovable,
>> but no free pages so ... nobody cares? :)
>
> My approach is similar, but a new init_pageblock_migratetype() like
> below. Then, I added "bool isolate" instead of replacing the existing
> "int migratetype". The advantage is that it saves a call to
> set_pfnblock_flags_mask() for each pageblock. Like the alternative
> you suggested below.
>
> +void __meminit init_pageblock_migratetype(struct page *page, int migratetype,
> + bool isolate)
> +{
> + if (unlikely(page_group_by_mobility_disabled &&
> + migratetype < MIGRATE_PCPTYPES))
> + migratetype = MIGRATE_UNMOVABLE;
> +
> +#ifdef CONFIG_MEMORY_ISOLATION
> + if (migratetype == MIGRATE_ISOLATE) {
> + VM_WARN(1,
> + "Set isolate=true to isolate pageblock with a migratetype");
> + return;
> + }
> + if (isolate)
> + migratetype |= PB_migrate_isolate_bit;
> +#endif
> + set_pfnblock_flags_mask(page, (unsigned long)migratetype,
> + page_to_pfn(page), MIGRATETYPE_MASK);
> +}
> +
See my other reply on maybe introducing a "struct pageblock_info" where
we embed these things, to decouple the actual migratetype from flags.
--
Cheers,
David / dhildenb
Andrew - please drop this series, it's broken in mm-new.
Zi - (as kernel bot reports actually!) I bisected a kernel splat to this
commit, triggerred by the mm/transhuge-stress test (please make sure to run
mm self tests before submitting series :)
You can trigger it manually with:
./transhuge-stress -d 20
(The same invocation run_vmtest.sh uses).
Note that this was reported in [0] (thanks to Harry Yoo for pointing this
out to me off-list! :)
[0]: https://lore.kernel.org/linux-mm/87wmalyktd.fsf@linux.ibm.com/T/#u
The decoded splat (at this commit in mm-new):
[ 55.835700] ------------[ cut here ]------------
[ 55.835705] page type is 0, passed migratetype is 2 (nr=32)
[ 55.835720] WARNING: CPU: 2 PID: 288 at mm/page_alloc.c:727 move_to_free_list (mm/page_alloc.c:727 (discriminator 16))
[ 55.835734] Modules linked in:
[ 55.835739] Tainted: [W]=WARN
[ 55.835740] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
[ 55.835741] RIP: 0010:move_to_free_list (mm/page_alloc.c:727 (discriminator 16))
[ 55.835742] Code: e9 fe ff ff c6 05 f1 9b 7b 01 01 90 48 89 ef e8 11 d7 ff ff 44 89 e1 44 89 ea 48 c7 c7 58 dc 70 82 48 89 c6 e8 1c e3 e0 ff 90 <0f> 0b 90 90 e9 ba fe ff ff 66 90 90 90 90 90 90 90 90 90 90 90 90
All code
========
0: e9 fe ff ff c6 jmp 0xffffffffc7000003
5: 05 f1 9b 7b 01 add $0x17b9bf1,%eax
a: 01 90 48 89 ef e8 add %edx,-0x171076b8(%rax)
10: 11 d7 adc %edx,%edi
12: ff (bad)
13: ff 44 89 e1 incl -0x1f(%rcx,%rcx,4)
17: 44 89 ea mov %r13d,%edx
1a: 48 c7 c7 58 dc 70 82 mov $0xffffffff8270dc58,%rdi
21: 48 89 c6 mov %rax,%rsi
24: e8 1c e3 e0 ff call 0xffffffffffe0e345
29: 90 nop
2a:* 0f 0b ud2 <-- trapping instruction
2c: 90 nop
2d: 90 nop
2e: e9 ba fe ff ff jmp 0xfffffffffffffeed
33: 66 90 xchg %ax,%ax
35: 90 nop
36: 90 nop
37: 90 nop
38: 90 nop
39: 90 nop
3a: 90 nop
3b: 90 nop
3c: 90 nop
3d: 90 nop
3e: 90 nop
3f: 90 nop
Code starting with the faulting instruction
===========================================
0: 0f 0b ud2
2: 90 nop
3: 90 nop
4: e9 ba fe ff ff jmp 0xfffffffffffffec3
9: 66 90 xchg %ax,%ax
b: 90 nop
c: 90 nop
d: 90 nop
e: 90 nop
f: 90 nop
10: 90 nop
11: 90 nop
12: 90 nop
13: 90 nop
14: 90 nop
15: 90 nop
[ 55.835743] RSP: 0018:ffffc900004eba20 EFLAGS: 00010086
[ 55.835744] RAX: 000000000000002f RBX: ffff88826cccb080 RCX: 0000000000000027
[ 55.835745] RDX: ffff888263d17b08 RSI: 0000000000000001 RDI: ffff888263d17b00
[ 55.835746] RBP: ffffea0005fe0000 R08: 00000000ffffdfff R09: ffffffff82b16528
[ 55.835746] R10: 80000000ffffe000 R11: 00000000ffffe000 R12: 0000000000000020
[ 55.835746] R13: 0000000000000002 R14: 0000000000000001 R15: 0000000000000005
[ 55.835750] FS: 00007fef6a06a740(0000) GS:ffff8882e08a0000(0000) knlGS:0000000000000000
[ 55.835751] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 55.835751] CR2: 00007fee20c00000 CR3: 0000000179321000 CR4: 0000000000750ef0
[ 55.835751] PKRU: 55555554
[ 55.835752] Call Trace:
[ 55.835755] <TASK>
[ 55.835756] __move_freepages_block (mm/page_alloc.c:1849)
[ 55.835758] try_to_claim_block (mm/page_alloc.c:452 (discriminator 3) mm/page_alloc.c:2231 (discriminator 3))
[ 55.835759] __rmqueue_pcplist (mm/page_alloc.c:2287 mm/page_alloc.c:2383 mm/page_alloc.c:2422 mm/page_alloc.c:3140)
[ 55.835760] get_page_from_freelist (./include/linux/spinlock.h:391 mm/page_alloc.c:3183 mm/page_alloc.c:3213 mm/page_alloc.c:3739)
[ 55.835761] __alloc_frozen_pages_noprof (mm/page_alloc.c:5032)
[ 55.835763] ? __blk_flush_plug (block/blk-core.c:1227 (discriminator 2))
[ 55.835766] alloc_pages_mpol (mm/mempolicy.c:2413)
[ 55.835768] vma_alloc_folio_noprof (mm/mempolicy.c:2432 mm/mempolicy.c:2465)
[ 55.835769] ? __pte_alloc (mm/memory.c:444)
[ 55.835771] do_anonymous_page (mm/memory.c:1064 (discriminator 4) mm/memory.c:4982 (discriminator 4) mm/memory.c:5039 (discriminator 4))
[ 55.835772] ? do_huge_pmd_anonymous_page (mm/huge_memory.c:1226 mm/huge_memory.c:1372)
[ 55.835774] __handle_mm_fault (mm/memory.c:4197 mm/memory.c:6038 mm/memory.c:6181)
[ 55.835776] handle_mm_fault (mm/memory.c:6350)
[ 55.835777] do_user_addr_fault (arch/x86/mm/fault.c:1338)
[ 55.835779] exc_page_fault (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:114 arch/x86/mm/fault.c:1488 arch/x86/mm/fault.c:1538)
[ 55.835783] asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623)
[ 55.835785] RIP: 0033:0x403824
[ 55.835786] Code: e0 0f 85 7c 01 00 00 ba 0e 00 00 00 be 00 00 20 00 48 89 c7 48 89 c3 e8 4a ea ff ff 85 c0 0f 85 51 01 00 00 8b 0d b4 49 00 00 <48> 89 1b 85 c9 0f 84 b1 00 00 00 83 e9 03 48 89 e6 ba 10 00 00 00
All code
========
0: e0 0f loopne 0x11
2: 85 7c 01 00 test %edi,0x0(%rcx,%rax,1)
6: 00 ba 0e 00 00 00 add %bh,0xe(%rdx)
c: be 00 00 20 00 mov $0x200000,%esi
11: 48 89 c7 mov %rax,%rdi
14: 48 89 c3 mov %rax,%rbx
17: e8 4a ea ff ff call 0xffffffffffffea66
1c: 85 c0 test %eax,%eax
1e: 0f 85 51 01 00 00 jne 0x175
24: 8b 0d b4 49 00 00 mov 0x49b4(%rip),%ecx # 0x49de
2a:* 48 89 1b mov %rbx,(%rbx) <-- trapping instruction
2d: 85 c9 test %ecx,%ecx
2f: 0f 84 b1 00 00 00 je 0xe6
35: 83 e9 03 sub $0x3,%ecx
38: 48 89 e6 mov %rsp,%rsi
3b: ba 10 00 00 00 mov $0x10,%edx
Code starting with the faulting instruction
===========================================
0: 48 89 1b mov %rbx,(%rbx)
3: 85 c9 test %ecx,%ecx
5: 0f 84 b1 00 00 00 je 0xbc
b: 83 e9 03 sub $0x3,%ecx
e: 48 89 e6 mov %rsp,%rsi
11: ba 10 00 00 00 mov $0x10,%edx
[ 55.835786] RSP: 002b:00007ffd50b1e550 EFLAGS: 00010246
[ 55.835787] RAX: 0000000000000000 RBX: 00007fee20c00000 RCX: 000000000000000c
[ 55.835787] RDX: 000000000000000e RSI: 0000000000200000 RDI: 00007fee20c00000
[ 55.835788] RBP: 0000000000000003 R08: 00000000ffffffff R09: 0000000000000000
[ 55.835788] R10: 0000000000004032 R11: 0000000000000246 R12: 00007fee20c00000
[ 55.835788] R13: 00007fef6a000000 R14: 00000000323ca6b0 R15: 0000000000000fd2
[ 55.835789] </TASK>
[ 55.835789] ---[ end trace 0000000000000000 ]---
On Fri, May 09, 2025 at 04:01:09PM -0400, Zi Yan wrote:
> Since migratetype is no longer overwritten during pageblock isolation,
> moving pageblocks to and from MIGRATE_ISOLATE no longer needs migratetype.
>
> Add MIGRATETYPE_NO_ISO_MASK to allow read before-isolation migratetype
> when a pageblock is isolated. It is used by move_freepages_block_isolate().
>
> Add pageblock_isolate_and_move_free_pages() and
> pageblock_unisolate_and_move_free_pages() to be explicit about the page
> isolation operations. Both share the common code in
> __move_freepages_block_isolate(), which is renamed from
> move_freepages_block_isolate().
>
> Make set_pageblock_migratetype() only accept non MIGRATE_ISOLATE types,
> so that one should use set_pageblock_isolate() to isolate pageblocks.
>
> Two consequential changes:
> 1. move pageblock migratetype code out of __move_freepages_block().
> 2. in online_pages() from mm/memory_hotplug.c, move_pfn_range_to_zone() is
> called with MIGRATE_MOVABLE instead of MIGRATE_ISOLATE and all affected
> pageblocks are isolated afterwards. Otherwise, all online pageblocks
> will have non-determined migratetype.
>
> Signed-off-by: Zi Yan <ziy@nvidia.com>
> ---
> include/linux/mmzone.h | 4 +-
> include/linux/page-isolation.h | 5 ++-
> mm/memory_hotplug.c | 7 +++-
> mm/page_alloc.c | 73 +++++++++++++++++++++++++---------
> mm/page_isolation.c | 27 ++++++++-----
> 5 files changed, 82 insertions(+), 34 deletions(-)
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index 7ef01fe148ce..f66895456974 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -107,8 +107,10 @@ static inline bool migratetype_is_mergeable(int mt)
> extern int page_group_by_mobility_disabled;
>
> #ifdef CONFIG_MEMORY_ISOLATION
> -#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
> +#define MIGRATETYPE_NO_ISO_MASK (BIT(PB_migratetype_bits) - 1)
> +#define MIGRATETYPE_MASK (MIGRATETYPE_NO_ISO_MASK | PB_migrate_isolate_bit)
> #else
> +#define MIGRATETYPE_NO_ISO_MASK MIGRATETYPE_MASK
> #define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
> #endif
>
> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
> index 898bb788243b..b0a2af0a5357 100644
> --- a/include/linux/page-isolation.h
> +++ b/include/linux/page-isolation.h
> @@ -26,9 +26,10 @@ static inline bool is_migrate_isolate(int migratetype)
> #define REPORT_FAILURE 0x2
>
> void set_pageblock_migratetype(struct page *page, int migratetype);
> +void set_pageblock_isolate(struct page *page);
>
> -bool move_freepages_block_isolate(struct zone *zone, struct page *page,
> - int migratetype);
> +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page);
> +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page);
>
> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> int migratetype, int flags);
> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> index b1caedbade5b..c86c47bba019 100644
> --- a/mm/memory_hotplug.c
> +++ b/mm/memory_hotplug.c
> @@ -1178,6 +1178,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
> const int nid = zone_to_nid(zone);
> int ret;
> struct memory_notify arg;
> + unsigned long isol_pfn;
>
> /*
> * {on,off}lining is constrained to full memory sections (or more
> @@ -1192,7 +1193,11 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>
>
> /* associate pfn range with the zone */
> - move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
> + move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_MOVABLE);
> + for (isol_pfn = pfn;
> + isol_pfn < pfn + nr_pages;
> + isol_pfn += pageblock_nr_pages)
> + set_pageblock_isolate(pfn_to_page(isol_pfn));
>
> arg.start_pfn = pfn;
> arg.nr_pages = nr_pages;
> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> index 04e301fb4879..cfd37b2d992e 100644
> --- a/mm/page_alloc.c
> +++ b/mm/page_alloc.c
> @@ -454,11 +454,9 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
> migratetype = MIGRATE_UNMOVABLE;
>
> #ifdef CONFIG_MEMORY_ISOLATION
> - if (migratetype == MIGRATE_ISOLATE) {
> - set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
> - page_to_pfn(page), PB_migrate_isolate_bit);
> - return;
> - }
> + VM_WARN(migratetype == MIGRATE_ISOLATE,
> + "Use set_pageblock_isolate() for pageblock isolation");
> + return;
> #endif
> set_pfnblock_flags_mask(page, (unsigned long)migratetype,
> page_to_pfn(page), MIGRATETYPE_MASK);
> @@ -1819,8 +1817,8 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
> #endif
>
> /*
> - * Change the type of a block and move all its free pages to that
> - * type's freelist.
> + * Move all free pages of a block to new type's freelist. Caller needs to
> + * change the block type.
> */
> static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
> int old_mt, int new_mt)
> @@ -1852,8 +1850,6 @@ static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
> pages_moved += 1 << order;
> }
>
> - set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
> -
> return pages_moved;
> }
>
> @@ -1911,11 +1907,16 @@ static int move_freepages_block(struct zone *zone, struct page *page,
> int old_mt, int new_mt)
> {
> unsigned long start_pfn;
> + int res;
>
> if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
> return -1;
>
> - return __move_freepages_block(zone, start_pfn, old_mt, new_mt);
> + res = __move_freepages_block(zone, start_pfn, old_mt, new_mt);
> + set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
> +
> + return res;
> +
> }
>
> #ifdef CONFIG_MEMORY_ISOLATION
> @@ -1943,11 +1944,17 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
> return start_pfn;
> }
>
> +static inline void toggle_pageblock_isolate(struct page *page, bool isolate)
> +{
> + set_pfnblock_flags_mask(page, (isolate << PB_migrate_isolate),
> + page_to_pfn(page), PB_migrate_isolate_bit);
> +}
> +
> /**
> - * move_freepages_block_isolate - move free pages in block for page isolation
> + * __move_freepages_block_isolate - move free pages in block for page isolation
> * @zone: the zone
> * @page: the pageblock page
> - * @migratetype: migratetype to set on the pageblock
> + * @isolate: to isolate the given pageblock or unisolate it
> *
> * This is similar to move_freepages_block(), but handles the special
> * case encountered in page isolation, where the block of interest
> @@ -1962,10 +1969,15 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
> *
> * Returns %true if pages could be moved, %false otherwise.
> */
> -bool move_freepages_block_isolate(struct zone *zone, struct page *page,
> - int migratetype)
> +static bool __move_freepages_block_isolate(struct zone *zone,
> + struct page *page, bool isolate)
> {
> unsigned long start_pfn, pfn;
> + int from_mt;
> + int to_mt;
> +
> + if (isolate == (get_pageblock_migratetype(page) == MIGRATE_ISOLATE))
> + return false;
>
> if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
> return false;
> @@ -1982,7 +1994,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>
> del_page_from_free_list(buddy, zone, order,
> get_pfnblock_migratetype(buddy, pfn));
> - set_pageblock_migratetype(page, migratetype);
> + toggle_pageblock_isolate(page, isolate);
> split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
> return true;
> }
> @@ -1993,16 +2005,38 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>
> del_page_from_free_list(page, zone, order,
> get_pfnblock_migratetype(page, pfn));
> - set_pageblock_migratetype(page, migratetype);
> + toggle_pageblock_isolate(page, isolate);
> split_large_buddy(zone, page, pfn, order, FPI_NONE);
> return true;
> }
> move:
> - __move_freepages_block(zone, start_pfn,
> - get_pfnblock_migratetype(page, start_pfn),
> - migratetype);
> + /* use MIGRATETYPE_NO_ISO_MASK to get the non-isolate migratetype */
> + if (isolate) {
> + from_mt = get_pfnblock_flags_mask(page, page_to_pfn(page),
> + MIGRATETYPE_NO_ISO_MASK);
> + to_mt = MIGRATE_ISOLATE;
> + } else {
> + from_mt = MIGRATE_ISOLATE;
> + to_mt = get_pfnblock_flags_mask(page, page_to_pfn(page),
> + MIGRATETYPE_NO_ISO_MASK);
> + }
> +
> + __move_freepages_block(zone, start_pfn, from_mt, to_mt);
> + toggle_pageblock_isolate(pfn_to_page(start_pfn), isolate);
> +
> return true;
> }
> +
> +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page)
> +{
> + return __move_freepages_block_isolate(zone, page, true);
> +}
> +
> +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page)
> +{
> + return __move_freepages_block_isolate(zone, page, false);
> +}
> +
> #endif /* CONFIG_MEMORY_ISOLATION */
>
> static void change_pageblock_range(struct page *pageblock_page,
> @@ -2194,6 +2228,7 @@ try_to_claim_block(struct zone *zone, struct page *page,
> if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
> page_group_by_mobility_disabled) {
> __move_freepages_block(zone, start_pfn, block_type, start_type);
> + set_pageblock_migratetype(pfn_to_page(start_pfn), start_type);
> return __rmqueue_smallest(zone, order, start_type);
> }
>
> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> index 751e21f6d85e..4571940f14db 100644
> --- a/mm/page_isolation.c
> +++ b/mm/page_isolation.c
> @@ -25,6 +25,12 @@ static inline void clear_pageblock_isolate(struct page *page)
> set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
> PB_migrate_isolate_bit);
> }
> +void set_pageblock_isolate(struct page *page)
> +{
> + set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
> + page_to_pfn(page),
> + PB_migrate_isolate_bit);
> +}
>
> /*
> * This function checks whether the range [start_pfn, end_pfn) includes
> @@ -199,7 +205,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
> unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end,
> migratetype, isol_flags);
> if (!unmovable) {
> - if (!move_freepages_block_isolate(zone, page, MIGRATE_ISOLATE)) {
> + if (!pageblock_isolate_and_move_free_pages(zone, page)) {
> spin_unlock_irqrestore(&zone->lock, flags);
> return -EBUSY;
> }
> @@ -220,7 +226,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
> return -EBUSY;
> }
>
> -static void unset_migratetype_isolate(struct page *page, int migratetype)
> +static void unset_migratetype_isolate(struct page *page)
> {
> struct zone *zone;
> unsigned long flags;
> @@ -273,10 +279,10 @@ static void unset_migratetype_isolate(struct page *page, int migratetype)
> * Isolating this block already succeeded, so this
> * should not fail on zone boundaries.
> */
> - WARN_ON_ONCE(!move_freepages_block_isolate(zone, page, migratetype));
> + WARN_ON_ONCE(!pageblock_unisolate_and_move_free_pages(zone, page));
> } else {
> - set_pageblock_migratetype(page, migratetype);
> - __putback_isolated_page(page, order, migratetype);
> + clear_pageblock_isolate(page);
> + __putback_isolated_page(page, order, get_pageblock_migratetype(page));
> }
> zone->nr_isolate_pageblock--;
> out:
> @@ -394,7 +400,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
> if (PageBuddy(page)) {
> int order = buddy_order(page);
>
> - /* move_freepages_block_isolate() handled this */
> + /* pageblock_isolate_and_move_free_pages() handled this */
> VM_WARN_ON_ONCE(pfn + (1 << order) > boundary_pfn);
>
> pfn += 1UL << order;
> @@ -444,7 +450,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
> failed:
> /* restore the original migratetype */
> if (!skip_isolation)
> - unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype);
> + unset_migratetype_isolate(pfn_to_page(isolate_pageblock));
> return -EBUSY;
> }
>
> @@ -515,7 +521,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> ret = isolate_single_pageblock(isolate_end, flags, true,
> skip_isolation, migratetype);
> if (ret) {
> - unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
> + unset_migratetype_isolate(pfn_to_page(isolate_start));
> return ret;
> }
>
> @@ -528,8 +534,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> start_pfn, end_pfn)) {
> undo_isolate_page_range(isolate_start, pfn, migratetype);
> unset_migratetype_isolate(
> - pfn_to_page(isolate_end - pageblock_nr_pages),
> - migratetype);
> + pfn_to_page(isolate_end - pageblock_nr_pages));
> return -EBUSY;
> }
> }
> @@ -559,7 +564,7 @@ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> page = __first_valid_page(pfn, pageblock_nr_pages);
> if (!page || !is_migrate_isolate_page(page))
> continue;
> - unset_migratetype_isolate(page, migratetype);
> + unset_migratetype_isolate(page);
> }
> }
> /*
> --
> 2.47.2
>
>
>
On 12 May 2025, at 12:10, Lorenzo Stoakes wrote:
> Andrew - please drop this series, it's broken in mm-new.
>
> Zi - (as kernel bot reports actually!) I bisected a kernel splat to this
> commit, triggerred by the mm/transhuge-stress test (please make sure to run
> mm self tests before submitting series :)
>
> You can trigger it manually with:
>
> ./transhuge-stress -d 20
>
> (The same invocation run_vmtest.sh uses).
>
> Note that this was reported in [0] (thanks to Harry Yoo for pointing this
> out to me off-list! :)
>
> [0]: https://lore.kernel.org/linux-mm/87wmalyktd.fsf@linux.ibm.com/T/#u
>
The patch below fixed the issue and all mm tests passed. Will send v5 later
and hopefully I can get some feedback on this series before that.
From 81f4ff35a5e6abf4779597861f69e9c3cce16d41 Mon Sep 17 00:00:00 2001
From: Zi Yan <ziy@nvidia.com>
Date: Mon, 12 May 2025 17:57:49 -0400
Subject: [PATCH] fixup: make set_pageblock_migratetype() set migratetype.
Signed-off-by: Zi Yan <ziy@nvidia.com>
---
mm/page_alloc.c | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b3476e0f59ad..4b9c2c3d1b89 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -454,9 +454,11 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
migratetype = MIGRATE_UNMOVABLE;
#ifdef CONFIG_MEMORY_ISOLATION
- VM_WARN(migratetype == MIGRATE_ISOLATE,
+ if (migratetype == MIGRATE_ISOLATE) {
+ VM_WARN(1,
"Use set_pageblock_isolate() for pageblock isolation");
- return;
+ return;
+ }
#endif
set_pfnblock_flags_mask(page, (unsigned long)migratetype,
page_to_pfn(page), MIGRATETYPE_MASK);
--
2.47.2
Best Regards,
Yan, Zi
On Mon, 12 May 2025 17:10:56 +0100 Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote: > Andrew - please drop this series, it's broken in mm-new. Gone, thanks.
On 12 May 2025, at 12:10, Lorenzo Stoakes wrote:
> Andrew - please drop this series, it's broken in mm-new.
>
> Zi - (as kernel bot reports actually!) I bisected a kernel splat to this
> commit, triggerred by the mm/transhuge-stress test (please make sure to run
> mm self tests before submitting series :)
>
> You can trigger it manually with:
>
> ./transhuge-stress -d 20
Thanks. I will fix the issue and resend.
>
> (The same invocation run_vmtest.sh uses).
>
> Note that this was reported in [0] (thanks to Harry Yoo for pointing this
> out to me off-list! :)
>
> [0]: https://lore.kernel.org/linux-mm/87wmalyktd.fsf@linux.ibm.com/T/#u
>
> The decoded splat (at this commit in mm-new):
>
> [ 55.835700] ------------[ cut here ]------------
> [ 55.835705] page type is 0, passed migratetype is 2 (nr=32)
> [ 55.835720] WARNING: CPU: 2 PID: 288 at mm/page_alloc.c:727 move_to_free_list (mm/page_alloc.c:727 (discriminator 16))
> [ 55.835734] Modules linked in:
> [ 55.835739] Tainted: [W]=WARN
> [ 55.835740] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
> [ 55.835741] RIP: 0010:move_to_free_list (mm/page_alloc.c:727 (discriminator 16))
> [ 55.835742] Code: e9 fe ff ff c6 05 f1 9b 7b 01 01 90 48 89 ef e8 11 d7 ff ff 44 89 e1 44 89 ea 48 c7 c7 58 dc 70 82 48 89 c6 e8 1c e3 e0 ff 90 <0f> 0b 90 90 e9 ba fe ff ff 66 90 90 90 90 90 90 90 90 90 90 90 90
> All code
> ========
> 0: e9 fe ff ff c6 jmp 0xffffffffc7000003
> 5: 05 f1 9b 7b 01 add $0x17b9bf1,%eax
> a: 01 90 48 89 ef e8 add %edx,-0x171076b8(%rax)
> 10: 11 d7 adc %edx,%edi
> 12: ff (bad)
> 13: ff 44 89 e1 incl -0x1f(%rcx,%rcx,4)
> 17: 44 89 ea mov %r13d,%edx
> 1a: 48 c7 c7 58 dc 70 82 mov $0xffffffff8270dc58,%rdi
> 21: 48 89 c6 mov %rax,%rsi
> 24: e8 1c e3 e0 ff call 0xffffffffffe0e345
> 29: 90 nop
> 2a:* 0f 0b ud2 <-- trapping instruction
> 2c: 90 nop
> 2d: 90 nop
> 2e: e9 ba fe ff ff jmp 0xfffffffffffffeed
> 33: 66 90 xchg %ax,%ax
> 35: 90 nop
> 36: 90 nop
> 37: 90 nop
> 38: 90 nop
> 39: 90 nop
> 3a: 90 nop
> 3b: 90 nop
> 3c: 90 nop
> 3d: 90 nop
> 3e: 90 nop
> 3f: 90 nop
>
> Code starting with the faulting instruction
> ===========================================
> 0: 0f 0b ud2
> 2: 90 nop
> 3: 90 nop
> 4: e9 ba fe ff ff jmp 0xfffffffffffffec3
> 9: 66 90 xchg %ax,%ax
> b: 90 nop
> c: 90 nop
> d: 90 nop
> e: 90 nop
> f: 90 nop
> 10: 90 nop
> 11: 90 nop
> 12: 90 nop
> 13: 90 nop
> 14: 90 nop
> 15: 90 nop
> [ 55.835743] RSP: 0018:ffffc900004eba20 EFLAGS: 00010086
> [ 55.835744] RAX: 000000000000002f RBX: ffff88826cccb080 RCX: 0000000000000027
> [ 55.835745] RDX: ffff888263d17b08 RSI: 0000000000000001 RDI: ffff888263d17b00
> [ 55.835746] RBP: ffffea0005fe0000 R08: 00000000ffffdfff R09: ffffffff82b16528
> [ 55.835746] R10: 80000000ffffe000 R11: 00000000ffffe000 R12: 0000000000000020
> [ 55.835746] R13: 0000000000000002 R14: 0000000000000001 R15: 0000000000000005
> [ 55.835750] FS: 00007fef6a06a740(0000) GS:ffff8882e08a0000(0000) knlGS:0000000000000000
> [ 55.835751] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 55.835751] CR2: 00007fee20c00000 CR3: 0000000179321000 CR4: 0000000000750ef0
> [ 55.835751] PKRU: 55555554
> [ 55.835752] Call Trace:
> [ 55.835755] <TASK>
> [ 55.835756] __move_freepages_block (mm/page_alloc.c:1849)
> [ 55.835758] try_to_claim_block (mm/page_alloc.c:452 (discriminator 3) mm/page_alloc.c:2231 (discriminator 3))
> [ 55.835759] __rmqueue_pcplist (mm/page_alloc.c:2287 mm/page_alloc.c:2383 mm/page_alloc.c:2422 mm/page_alloc.c:3140)
> [ 55.835760] get_page_from_freelist (./include/linux/spinlock.h:391 mm/page_alloc.c:3183 mm/page_alloc.c:3213 mm/page_alloc.c:3739)
> [ 55.835761] __alloc_frozen_pages_noprof (mm/page_alloc.c:5032)
> [ 55.835763] ? __blk_flush_plug (block/blk-core.c:1227 (discriminator 2))
> [ 55.835766] alloc_pages_mpol (mm/mempolicy.c:2413)
> [ 55.835768] vma_alloc_folio_noprof (mm/mempolicy.c:2432 mm/mempolicy.c:2465)
> [ 55.835769] ? __pte_alloc (mm/memory.c:444)
> [ 55.835771] do_anonymous_page (mm/memory.c:1064 (discriminator 4) mm/memory.c:4982 (discriminator 4) mm/memory.c:5039 (discriminator 4))
> [ 55.835772] ? do_huge_pmd_anonymous_page (mm/huge_memory.c:1226 mm/huge_memory.c:1372)
> [ 55.835774] __handle_mm_fault (mm/memory.c:4197 mm/memory.c:6038 mm/memory.c:6181)
> [ 55.835776] handle_mm_fault (mm/memory.c:6350)
> [ 55.835777] do_user_addr_fault (arch/x86/mm/fault.c:1338)
> [ 55.835779] exc_page_fault (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:114 arch/x86/mm/fault.c:1488 arch/x86/mm/fault.c:1538)
> [ 55.835783] asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623)
> [ 55.835785] RIP: 0033:0x403824
> [ 55.835786] Code: e0 0f 85 7c 01 00 00 ba 0e 00 00 00 be 00 00 20 00 48 89 c7 48 89 c3 e8 4a ea ff ff 85 c0 0f 85 51 01 00 00 8b 0d b4 49 00 00 <48> 89 1b 85 c9 0f 84 b1 00 00 00 83 e9 03 48 89 e6 ba 10 00 00 00
> All code
> ========
> 0: e0 0f loopne 0x11
> 2: 85 7c 01 00 test %edi,0x0(%rcx,%rax,1)
> 6: 00 ba 0e 00 00 00 add %bh,0xe(%rdx)
> c: be 00 00 20 00 mov $0x200000,%esi
> 11: 48 89 c7 mov %rax,%rdi
> 14: 48 89 c3 mov %rax,%rbx
> 17: e8 4a ea ff ff call 0xffffffffffffea66
> 1c: 85 c0 test %eax,%eax
> 1e: 0f 85 51 01 00 00 jne 0x175
> 24: 8b 0d b4 49 00 00 mov 0x49b4(%rip),%ecx # 0x49de
> 2a:* 48 89 1b mov %rbx,(%rbx) <-- trapping instruction
> 2d: 85 c9 test %ecx,%ecx
> 2f: 0f 84 b1 00 00 00 je 0xe6
> 35: 83 e9 03 sub $0x3,%ecx
> 38: 48 89 e6 mov %rsp,%rsi
> 3b: ba 10 00 00 00 mov $0x10,%edx
>
> Code starting with the faulting instruction
> ===========================================
> 0: 48 89 1b mov %rbx,(%rbx)
> 3: 85 c9 test %ecx,%ecx
> 5: 0f 84 b1 00 00 00 je 0xbc
> b: 83 e9 03 sub $0x3,%ecx
> e: 48 89 e6 mov %rsp,%rsi
> 11: ba 10 00 00 00 mov $0x10,%edx
> [ 55.835786] RSP: 002b:00007ffd50b1e550 EFLAGS: 00010246
> [ 55.835787] RAX: 0000000000000000 RBX: 00007fee20c00000 RCX: 000000000000000c
> [ 55.835787] RDX: 000000000000000e RSI: 0000000000200000 RDI: 00007fee20c00000
> [ 55.835788] RBP: 0000000000000003 R08: 00000000ffffffff R09: 0000000000000000
> [ 55.835788] R10: 0000000000004032 R11: 0000000000000246 R12: 00007fee20c00000
> [ 55.835788] R13: 00007fef6a000000 R14: 00000000323ca6b0 R15: 0000000000000fd2
> [ 55.835789] </TASK>
> [ 55.835789] ---[ end trace 0000000000000000 ]---
>
>
> On Fri, May 09, 2025 at 04:01:09PM -0400, Zi Yan wrote:
>> Since migratetype is no longer overwritten during pageblock isolation,
>> moving pageblocks to and from MIGRATE_ISOLATE no longer needs migratetype.
>>
>> Add MIGRATETYPE_NO_ISO_MASK to allow read before-isolation migratetype
>> when a pageblock is isolated. It is used by move_freepages_block_isolate().
>>
>> Add pageblock_isolate_and_move_free_pages() and
>> pageblock_unisolate_and_move_free_pages() to be explicit about the page
>> isolation operations. Both share the common code in
>> __move_freepages_block_isolate(), which is renamed from
>> move_freepages_block_isolate().
>>
>> Make set_pageblock_migratetype() only accept non MIGRATE_ISOLATE types,
>> so that one should use set_pageblock_isolate() to isolate pageblocks.
>>
>> Two consequential changes:
>> 1. move pageblock migratetype code out of __move_freepages_block().
>> 2. in online_pages() from mm/memory_hotplug.c, move_pfn_range_to_zone() is
>> called with MIGRATE_MOVABLE instead of MIGRATE_ISOLATE and all affected
>> pageblocks are isolated afterwards. Otherwise, all online pageblocks
>> will have non-determined migratetype.
>>
>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>> ---
>> include/linux/mmzone.h | 4 +-
>> include/linux/page-isolation.h | 5 ++-
>> mm/memory_hotplug.c | 7 +++-
>> mm/page_alloc.c | 73 +++++++++++++++++++++++++---------
>> mm/page_isolation.c | 27 ++++++++-----
>> 5 files changed, 82 insertions(+), 34 deletions(-)
>>
>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>> index 7ef01fe148ce..f66895456974 100644
>> --- a/include/linux/mmzone.h
>> +++ b/include/linux/mmzone.h
>> @@ -107,8 +107,10 @@ static inline bool migratetype_is_mergeable(int mt)
>> extern int page_group_by_mobility_disabled;
>>
>> #ifdef CONFIG_MEMORY_ISOLATION
>> -#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
>> +#define MIGRATETYPE_NO_ISO_MASK (BIT(PB_migratetype_bits) - 1)
>> +#define MIGRATETYPE_MASK (MIGRATETYPE_NO_ISO_MASK | PB_migrate_isolate_bit)
>> #else
>> +#define MIGRATETYPE_NO_ISO_MASK MIGRATETYPE_MASK
>> #define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
>> #endif
>>
>> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
>> index 898bb788243b..b0a2af0a5357 100644
>> --- a/include/linux/page-isolation.h
>> +++ b/include/linux/page-isolation.h
>> @@ -26,9 +26,10 @@ static inline bool is_migrate_isolate(int migratetype)
>> #define REPORT_FAILURE 0x2
>>
>> void set_pageblock_migratetype(struct page *page, int migratetype);
>> +void set_pageblock_isolate(struct page *page);
>>
>> -bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>> - int migratetype);
>> +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page);
>> +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page);
>>
>> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>> int migratetype, int flags);
>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>> index b1caedbade5b..c86c47bba019 100644
>> --- a/mm/memory_hotplug.c
>> +++ b/mm/memory_hotplug.c
>> @@ -1178,6 +1178,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>> const int nid = zone_to_nid(zone);
>> int ret;
>> struct memory_notify arg;
>> + unsigned long isol_pfn;
>>
>> /*
>> * {on,off}lining is constrained to full memory sections (or more
>> @@ -1192,7 +1193,11 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>>
>>
>> /* associate pfn range with the zone */
>> - move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
>> + move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_MOVABLE);
>> + for (isol_pfn = pfn;
>> + isol_pfn < pfn + nr_pages;
>> + isol_pfn += pageblock_nr_pages)
>> + set_pageblock_isolate(pfn_to_page(isol_pfn));
>>
>> arg.start_pfn = pfn;
>> arg.nr_pages = nr_pages;
>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>> index 04e301fb4879..cfd37b2d992e 100644
>> --- a/mm/page_alloc.c
>> +++ b/mm/page_alloc.c
>> @@ -454,11 +454,9 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
>> migratetype = MIGRATE_UNMOVABLE;
>>
>> #ifdef CONFIG_MEMORY_ISOLATION
>> - if (migratetype == MIGRATE_ISOLATE) {
>> - set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
>> - page_to_pfn(page), PB_migrate_isolate_bit);
>> - return;
>> - }
>> + VM_WARN(migratetype == MIGRATE_ISOLATE,
>> + "Use set_pageblock_isolate() for pageblock isolation");
>> + return;
>> #endif
>> set_pfnblock_flags_mask(page, (unsigned long)migratetype,
>> page_to_pfn(page), MIGRATETYPE_MASK);
>> @@ -1819,8 +1817,8 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
>> #endif
>>
>> /*
>> - * Change the type of a block and move all its free pages to that
>> - * type's freelist.
>> + * Move all free pages of a block to new type's freelist. Caller needs to
>> + * change the block type.
>> */
>> static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
>> int old_mt, int new_mt)
>> @@ -1852,8 +1850,6 @@ static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
>> pages_moved += 1 << order;
>> }
>>
>> - set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
>> -
>> return pages_moved;
>> }
>>
>> @@ -1911,11 +1907,16 @@ static int move_freepages_block(struct zone *zone, struct page *page,
>> int old_mt, int new_mt)
>> {
>> unsigned long start_pfn;
>> + int res;
>>
>> if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
>> return -1;
>>
>> - return __move_freepages_block(zone, start_pfn, old_mt, new_mt);
>> + res = __move_freepages_block(zone, start_pfn, old_mt, new_mt);
>> + set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
>> +
>> + return res;
>> +
>> }
>>
>> #ifdef CONFIG_MEMORY_ISOLATION
>> @@ -1943,11 +1944,17 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
>> return start_pfn;
>> }
>>
>> +static inline void toggle_pageblock_isolate(struct page *page, bool isolate)
>> +{
>> + set_pfnblock_flags_mask(page, (isolate << PB_migrate_isolate),
>> + page_to_pfn(page), PB_migrate_isolate_bit);
>> +}
>> +
>> /**
>> - * move_freepages_block_isolate - move free pages in block for page isolation
>> + * __move_freepages_block_isolate - move free pages in block for page isolation
>> * @zone: the zone
>> * @page: the pageblock page
>> - * @migratetype: migratetype to set on the pageblock
>> + * @isolate: to isolate the given pageblock or unisolate it
>> *
>> * This is similar to move_freepages_block(), but handles the special
>> * case encountered in page isolation, where the block of interest
>> @@ -1962,10 +1969,15 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
>> *
>> * Returns %true if pages could be moved, %false otherwise.
>> */
>> -bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>> - int migratetype)
>> +static bool __move_freepages_block_isolate(struct zone *zone,
>> + struct page *page, bool isolate)
>> {
>> unsigned long start_pfn, pfn;
>> + int from_mt;
>> + int to_mt;
>> +
>> + if (isolate == (get_pageblock_migratetype(page) == MIGRATE_ISOLATE))
>> + return false;
>>
>> if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
>> return false;
>> @@ -1982,7 +1994,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>>
>> del_page_from_free_list(buddy, zone, order,
>> get_pfnblock_migratetype(buddy, pfn));
>> - set_pageblock_migratetype(page, migratetype);
>> + toggle_pageblock_isolate(page, isolate);
>> split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
>> return true;
>> }
>> @@ -1993,16 +2005,38 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>>
>> del_page_from_free_list(page, zone, order,
>> get_pfnblock_migratetype(page, pfn));
>> - set_pageblock_migratetype(page, migratetype);
>> + toggle_pageblock_isolate(page, isolate);
>> split_large_buddy(zone, page, pfn, order, FPI_NONE);
>> return true;
>> }
>> move:
>> - __move_freepages_block(zone, start_pfn,
>> - get_pfnblock_migratetype(page, start_pfn),
>> - migratetype);
>> + /* use MIGRATETYPE_NO_ISO_MASK to get the non-isolate migratetype */
>> + if (isolate) {
>> + from_mt = get_pfnblock_flags_mask(page, page_to_pfn(page),
>> + MIGRATETYPE_NO_ISO_MASK);
>> + to_mt = MIGRATE_ISOLATE;
>> + } else {
>> + from_mt = MIGRATE_ISOLATE;
>> + to_mt = get_pfnblock_flags_mask(page, page_to_pfn(page),
>> + MIGRATETYPE_NO_ISO_MASK);
>> + }
>> +
>> + __move_freepages_block(zone, start_pfn, from_mt, to_mt);
>> + toggle_pageblock_isolate(pfn_to_page(start_pfn), isolate);
>> +
>> return true;
>> }
>> +
>> +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page)
>> +{
>> + return __move_freepages_block_isolate(zone, page, true);
>> +}
>> +
>> +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page)
>> +{
>> + return __move_freepages_block_isolate(zone, page, false);
>> +}
>> +
>> #endif /* CONFIG_MEMORY_ISOLATION */
>>
>> static void change_pageblock_range(struct page *pageblock_page,
>> @@ -2194,6 +2228,7 @@ try_to_claim_block(struct zone *zone, struct page *page,
>> if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
>> page_group_by_mobility_disabled) {
>> __move_freepages_block(zone, start_pfn, block_type, start_type);
>> + set_pageblock_migratetype(pfn_to_page(start_pfn), start_type);
>> return __rmqueue_smallest(zone, order, start_type);
>> }
>>
>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>> index 751e21f6d85e..4571940f14db 100644
>> --- a/mm/page_isolation.c
>> +++ b/mm/page_isolation.c
>> @@ -25,6 +25,12 @@ static inline void clear_pageblock_isolate(struct page *page)
>> set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
>> PB_migrate_isolate_bit);
>> }
>> +void set_pageblock_isolate(struct page *page)
>> +{
>> + set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
>> + page_to_pfn(page),
>> + PB_migrate_isolate_bit);
>> +}
>>
>> /*
>> * This function checks whether the range [start_pfn, end_pfn) includes
>> @@ -199,7 +205,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
>> unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end,
>> migratetype, isol_flags);
>> if (!unmovable) {
>> - if (!move_freepages_block_isolate(zone, page, MIGRATE_ISOLATE)) {
>> + if (!pageblock_isolate_and_move_free_pages(zone, page)) {
>> spin_unlock_irqrestore(&zone->lock, flags);
>> return -EBUSY;
>> }
>> @@ -220,7 +226,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
>> return -EBUSY;
>> }
>>
>> -static void unset_migratetype_isolate(struct page *page, int migratetype)
>> +static void unset_migratetype_isolate(struct page *page)
>> {
>> struct zone *zone;
>> unsigned long flags;
>> @@ -273,10 +279,10 @@ static void unset_migratetype_isolate(struct page *page, int migratetype)
>> * Isolating this block already succeeded, so this
>> * should not fail on zone boundaries.
>> */
>> - WARN_ON_ONCE(!move_freepages_block_isolate(zone, page, migratetype));
>> + WARN_ON_ONCE(!pageblock_unisolate_and_move_free_pages(zone, page));
>> } else {
>> - set_pageblock_migratetype(page, migratetype);
>> - __putback_isolated_page(page, order, migratetype);
>> + clear_pageblock_isolate(page);
>> + __putback_isolated_page(page, order, get_pageblock_migratetype(page));
>> }
>> zone->nr_isolate_pageblock--;
>> out:
>> @@ -394,7 +400,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>> if (PageBuddy(page)) {
>> int order = buddy_order(page);
>>
>> - /* move_freepages_block_isolate() handled this */
>> + /* pageblock_isolate_and_move_free_pages() handled this */
>> VM_WARN_ON_ONCE(pfn + (1 << order) > boundary_pfn);
>>
>> pfn += 1UL << order;
>> @@ -444,7 +450,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>> failed:
>> /* restore the original migratetype */
>> if (!skip_isolation)
>> - unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype);
>> + unset_migratetype_isolate(pfn_to_page(isolate_pageblock));
>> return -EBUSY;
>> }
>>
>> @@ -515,7 +521,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>> ret = isolate_single_pageblock(isolate_end, flags, true,
>> skip_isolation, migratetype);
>> if (ret) {
>> - unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
>> + unset_migratetype_isolate(pfn_to_page(isolate_start));
>> return ret;
>> }
>>
>> @@ -528,8 +534,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>> start_pfn, end_pfn)) {
>> undo_isolate_page_range(isolate_start, pfn, migratetype);
>> unset_migratetype_isolate(
>> - pfn_to_page(isolate_end - pageblock_nr_pages),
>> - migratetype);
>> + pfn_to_page(isolate_end - pageblock_nr_pages));
>> return -EBUSY;
>> }
>> }
>> @@ -559,7 +564,7 @@ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>> page = __first_valid_page(pfn, pageblock_nr_pages);
>> if (!page || !is_migrate_isolate_page(page))
>> continue;
>> - unset_migratetype_isolate(page, migratetype);
>> + unset_migratetype_isolate(page);
>> }
>> }
>> /*
>> --
>> 2.47.2
>>
>>
>>
--
Best Regards,
Yan, Zi
On Mon, May 12, 2025 at 12:13:35PM -0400, Zi Yan wrote:
> On 12 May 2025, at 12:10, Lorenzo Stoakes wrote:
>
> > Andrew - please drop this series, it's broken in mm-new.
> >
> > Zi - (as kernel bot reports actually!) I bisected a kernel splat to this
> > commit, triggerred by the mm/transhuge-stress test (please make sure to run
> > mm self tests before submitting series :)
> >
> > You can trigger it manually with:
> >
> > ./transhuge-stress -d 20
>
> Thanks. I will fix the issue and resend.
Thanks :)
Sorry re-reading the 'please make sure to run mm self tests' comment sounds
more snarky thank I intended, and I've definitely forgotten to do it
sometimes myself, but obviously a useful thing to do :P
I wonder if the issue I mention below is related, actually, unless they're
running your series on top of v6.15-rc5...
I pinged there anyway just in case.
Cheers, Lorenzo
>
> >
> > (The same invocation run_vmtest.sh uses).
> >
> > Note that this was reported in [0] (thanks to Harry Yoo for pointing this
> > out to me off-list! :)
> >
> > [0]: https://lore.kernel.org/linux-mm/87wmalyktd.fsf@linux.ibm.com/T/#u
> >
> > The decoded splat (at this commit in mm-new):
> >
> > [ 55.835700] ------------[ cut here ]------------
> > [ 55.835705] page type is 0, passed migratetype is 2 (nr=32)
> > [ 55.835720] WARNING: CPU: 2 PID: 288 at mm/page_alloc.c:727 move_to_free_list (mm/page_alloc.c:727 (discriminator 16))
> > [ 55.835734] Modules linked in:
> > [ 55.835739] Tainted: [W]=WARN
> > [ 55.835740] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
> > [ 55.835741] RIP: 0010:move_to_free_list (mm/page_alloc.c:727 (discriminator 16))
> > [ 55.835742] Code: e9 fe ff ff c6 05 f1 9b 7b 01 01 90 48 89 ef e8 11 d7 ff ff 44 89 e1 44 89 ea 48 c7 c7 58 dc 70 82 48 89 c6 e8 1c e3 e0 ff 90 <0f> 0b 90 90 e9 ba fe ff ff 66 90 90 90 90 90 90 90 90 90 90 90 90
> > All code
> > ========
> > 0: e9 fe ff ff c6 jmp 0xffffffffc7000003
> > 5: 05 f1 9b 7b 01 add $0x17b9bf1,%eax
> > a: 01 90 48 89 ef e8 add %edx,-0x171076b8(%rax)
> > 10: 11 d7 adc %edx,%edi
> > 12: ff (bad)
> > 13: ff 44 89 e1 incl -0x1f(%rcx,%rcx,4)
> > 17: 44 89 ea mov %r13d,%edx
> > 1a: 48 c7 c7 58 dc 70 82 mov $0xffffffff8270dc58,%rdi
> > 21: 48 89 c6 mov %rax,%rsi
> > 24: e8 1c e3 e0 ff call 0xffffffffffe0e345
> > 29: 90 nop
> > 2a:* 0f 0b ud2 <-- trapping instruction
> > 2c: 90 nop
> > 2d: 90 nop
> > 2e: e9 ba fe ff ff jmp 0xfffffffffffffeed
> > 33: 66 90 xchg %ax,%ax
> > 35: 90 nop
> > 36: 90 nop
> > 37: 90 nop
> > 38: 90 nop
> > 39: 90 nop
> > 3a: 90 nop
> > 3b: 90 nop
> > 3c: 90 nop
> > 3d: 90 nop
> > 3e: 90 nop
> > 3f: 90 nop
> >
> > Code starting with the faulting instruction
> > ===========================================
> > 0: 0f 0b ud2
> > 2: 90 nop
> > 3: 90 nop
> > 4: e9 ba fe ff ff jmp 0xfffffffffffffec3
> > 9: 66 90 xchg %ax,%ax
> > b: 90 nop
> > c: 90 nop
> > d: 90 nop
> > e: 90 nop
> > f: 90 nop
> > 10: 90 nop
> > 11: 90 nop
> > 12: 90 nop
> > 13: 90 nop
> > 14: 90 nop
> > 15: 90 nop
> > [ 55.835743] RSP: 0018:ffffc900004eba20 EFLAGS: 00010086
> > [ 55.835744] RAX: 000000000000002f RBX: ffff88826cccb080 RCX: 0000000000000027
> > [ 55.835745] RDX: ffff888263d17b08 RSI: 0000000000000001 RDI: ffff888263d17b00
> > [ 55.835746] RBP: ffffea0005fe0000 R08: 00000000ffffdfff R09: ffffffff82b16528
> > [ 55.835746] R10: 80000000ffffe000 R11: 00000000ffffe000 R12: 0000000000000020
> > [ 55.835746] R13: 0000000000000002 R14: 0000000000000001 R15: 0000000000000005
> > [ 55.835750] FS: 00007fef6a06a740(0000) GS:ffff8882e08a0000(0000) knlGS:0000000000000000
> > [ 55.835751] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [ 55.835751] CR2: 00007fee20c00000 CR3: 0000000179321000 CR4: 0000000000750ef0
> > [ 55.835751] PKRU: 55555554
> > [ 55.835752] Call Trace:
> > [ 55.835755] <TASK>
> > [ 55.835756] __move_freepages_block (mm/page_alloc.c:1849)
> > [ 55.835758] try_to_claim_block (mm/page_alloc.c:452 (discriminator 3) mm/page_alloc.c:2231 (discriminator 3))
> > [ 55.835759] __rmqueue_pcplist (mm/page_alloc.c:2287 mm/page_alloc.c:2383 mm/page_alloc.c:2422 mm/page_alloc.c:3140)
> > [ 55.835760] get_page_from_freelist (./include/linux/spinlock.h:391 mm/page_alloc.c:3183 mm/page_alloc.c:3213 mm/page_alloc.c:3739)
> > [ 55.835761] __alloc_frozen_pages_noprof (mm/page_alloc.c:5032)
> > [ 55.835763] ? __blk_flush_plug (block/blk-core.c:1227 (discriminator 2))
> > [ 55.835766] alloc_pages_mpol (mm/mempolicy.c:2413)
> > [ 55.835768] vma_alloc_folio_noprof (mm/mempolicy.c:2432 mm/mempolicy.c:2465)
> > [ 55.835769] ? __pte_alloc (mm/memory.c:444)
> > [ 55.835771] do_anonymous_page (mm/memory.c:1064 (discriminator 4) mm/memory.c:4982 (discriminator 4) mm/memory.c:5039 (discriminator 4))
> > [ 55.835772] ? do_huge_pmd_anonymous_page (mm/huge_memory.c:1226 mm/huge_memory.c:1372)
> > [ 55.835774] __handle_mm_fault (mm/memory.c:4197 mm/memory.c:6038 mm/memory.c:6181)
> > [ 55.835776] handle_mm_fault (mm/memory.c:6350)
> > [ 55.835777] do_user_addr_fault (arch/x86/mm/fault.c:1338)
> > [ 55.835779] exc_page_fault (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:114 arch/x86/mm/fault.c:1488 arch/x86/mm/fault.c:1538)
> > [ 55.835783] asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623)
> > [ 55.835785] RIP: 0033:0x403824
> > [ 55.835786] Code: e0 0f 85 7c 01 00 00 ba 0e 00 00 00 be 00 00 20 00 48 89 c7 48 89 c3 e8 4a ea ff ff 85 c0 0f 85 51 01 00 00 8b 0d b4 49 00 00 <48> 89 1b 85 c9 0f 84 b1 00 00 00 83 e9 03 48 89 e6 ba 10 00 00 00
> > All code
> > ========
> > 0: e0 0f loopne 0x11
> > 2: 85 7c 01 00 test %edi,0x0(%rcx,%rax,1)
> > 6: 00 ba 0e 00 00 00 add %bh,0xe(%rdx)
> > c: be 00 00 20 00 mov $0x200000,%esi
> > 11: 48 89 c7 mov %rax,%rdi
> > 14: 48 89 c3 mov %rax,%rbx
> > 17: e8 4a ea ff ff call 0xffffffffffffea66
> > 1c: 85 c0 test %eax,%eax
> > 1e: 0f 85 51 01 00 00 jne 0x175
> > 24: 8b 0d b4 49 00 00 mov 0x49b4(%rip),%ecx # 0x49de
> > 2a:* 48 89 1b mov %rbx,(%rbx) <-- trapping instruction
> > 2d: 85 c9 test %ecx,%ecx
> > 2f: 0f 84 b1 00 00 00 je 0xe6
> > 35: 83 e9 03 sub $0x3,%ecx
> > 38: 48 89 e6 mov %rsp,%rsi
> > 3b: ba 10 00 00 00 mov $0x10,%edx
> >
> > Code starting with the faulting instruction
> > ===========================================
> > 0: 48 89 1b mov %rbx,(%rbx)
> > 3: 85 c9 test %ecx,%ecx
> > 5: 0f 84 b1 00 00 00 je 0xbc
> > b: 83 e9 03 sub $0x3,%ecx
> > e: 48 89 e6 mov %rsp,%rsi
> > 11: ba 10 00 00 00 mov $0x10,%edx
> > [ 55.835786] RSP: 002b:00007ffd50b1e550 EFLAGS: 00010246
> > [ 55.835787] RAX: 0000000000000000 RBX: 00007fee20c00000 RCX: 000000000000000c
> > [ 55.835787] RDX: 000000000000000e RSI: 0000000000200000 RDI: 00007fee20c00000
> > [ 55.835788] RBP: 0000000000000003 R08: 00000000ffffffff R09: 0000000000000000
> > [ 55.835788] R10: 0000000000004032 R11: 0000000000000246 R12: 00007fee20c00000
> > [ 55.835788] R13: 00007fef6a000000 R14: 00000000323ca6b0 R15: 0000000000000fd2
> > [ 55.835789] </TASK>
> > [ 55.835789] ---[ end trace 0000000000000000 ]---
> >
> >
> > On Fri, May 09, 2025 at 04:01:09PM -0400, Zi Yan wrote:
> >> Since migratetype is no longer overwritten during pageblock isolation,
> >> moving pageblocks to and from MIGRATE_ISOLATE no longer needs migratetype.
> >>
> >> Add MIGRATETYPE_NO_ISO_MASK to allow read before-isolation migratetype
> >> when a pageblock is isolated. It is used by move_freepages_block_isolate().
> >>
> >> Add pageblock_isolate_and_move_free_pages() and
> >> pageblock_unisolate_and_move_free_pages() to be explicit about the page
> >> isolation operations. Both share the common code in
> >> __move_freepages_block_isolate(), which is renamed from
> >> move_freepages_block_isolate().
> >>
> >> Make set_pageblock_migratetype() only accept non MIGRATE_ISOLATE types,
> >> so that one should use set_pageblock_isolate() to isolate pageblocks.
> >>
> >> Two consequential changes:
> >> 1. move pageblock migratetype code out of __move_freepages_block().
> >> 2. in online_pages() from mm/memory_hotplug.c, move_pfn_range_to_zone() is
> >> called with MIGRATE_MOVABLE instead of MIGRATE_ISOLATE and all affected
> >> pageblocks are isolated afterwards. Otherwise, all online pageblocks
> >> will have non-determined migratetype.
> >>
> >> Signed-off-by: Zi Yan <ziy@nvidia.com>
> >> ---
> >> include/linux/mmzone.h | 4 +-
> >> include/linux/page-isolation.h | 5 ++-
> >> mm/memory_hotplug.c | 7 +++-
> >> mm/page_alloc.c | 73 +++++++++++++++++++++++++---------
> >> mm/page_isolation.c | 27 ++++++++-----
> >> 5 files changed, 82 insertions(+), 34 deletions(-)
> >>
> >> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> >> index 7ef01fe148ce..f66895456974 100644
> >> --- a/include/linux/mmzone.h
> >> +++ b/include/linux/mmzone.h
> >> @@ -107,8 +107,10 @@ static inline bool migratetype_is_mergeable(int mt)
> >> extern int page_group_by_mobility_disabled;
> >>
> >> #ifdef CONFIG_MEMORY_ISOLATION
> >> -#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
> >> +#define MIGRATETYPE_NO_ISO_MASK (BIT(PB_migratetype_bits) - 1)
> >> +#define MIGRATETYPE_MASK (MIGRATETYPE_NO_ISO_MASK | PB_migrate_isolate_bit)
> >> #else
> >> +#define MIGRATETYPE_NO_ISO_MASK MIGRATETYPE_MASK
> >> #define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
> >> #endif
> >>
> >> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
> >> index 898bb788243b..b0a2af0a5357 100644
> >> --- a/include/linux/page-isolation.h
> >> +++ b/include/linux/page-isolation.h
> >> @@ -26,9 +26,10 @@ static inline bool is_migrate_isolate(int migratetype)
> >> #define REPORT_FAILURE 0x2
> >>
> >> void set_pageblock_migratetype(struct page *page, int migratetype);
> >> +void set_pageblock_isolate(struct page *page);
> >>
> >> -bool move_freepages_block_isolate(struct zone *zone, struct page *page,
> >> - int migratetype);
> >> +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page);
> >> +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page);
> >>
> >> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> >> int migratetype, int flags);
> >> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
> >> index b1caedbade5b..c86c47bba019 100644
> >> --- a/mm/memory_hotplug.c
> >> +++ b/mm/memory_hotplug.c
> >> @@ -1178,6 +1178,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
> >> const int nid = zone_to_nid(zone);
> >> int ret;
> >> struct memory_notify arg;
> >> + unsigned long isol_pfn;
> >>
> >> /*
> >> * {on,off}lining is constrained to full memory sections (or more
> >> @@ -1192,7 +1193,11 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
> >>
> >>
> >> /* associate pfn range with the zone */
> >> - move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
> >> + move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_MOVABLE);
> >> + for (isol_pfn = pfn;
> >> + isol_pfn < pfn + nr_pages;
> >> + isol_pfn += pageblock_nr_pages)
> >> + set_pageblock_isolate(pfn_to_page(isol_pfn));
> >>
> >> arg.start_pfn = pfn;
> >> arg.nr_pages = nr_pages;
> >> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
> >> index 04e301fb4879..cfd37b2d992e 100644
> >> --- a/mm/page_alloc.c
> >> +++ b/mm/page_alloc.c
> >> @@ -454,11 +454,9 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
> >> migratetype = MIGRATE_UNMOVABLE;
> >>
> >> #ifdef CONFIG_MEMORY_ISOLATION
> >> - if (migratetype == MIGRATE_ISOLATE) {
> >> - set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
> >> - page_to_pfn(page), PB_migrate_isolate_bit);
> >> - return;
> >> - }
> >> + VM_WARN(migratetype == MIGRATE_ISOLATE,
> >> + "Use set_pageblock_isolate() for pageblock isolation");
> >> + return;
> >> #endif
> >> set_pfnblock_flags_mask(page, (unsigned long)migratetype,
> >> page_to_pfn(page), MIGRATETYPE_MASK);
> >> @@ -1819,8 +1817,8 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
> >> #endif
> >>
> >> /*
> >> - * Change the type of a block and move all its free pages to that
> >> - * type's freelist.
> >> + * Move all free pages of a block to new type's freelist. Caller needs to
> >> + * change the block type.
> >> */
> >> static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
> >> int old_mt, int new_mt)
> >> @@ -1852,8 +1850,6 @@ static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
> >> pages_moved += 1 << order;
> >> }
> >>
> >> - set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
> >> -
> >> return pages_moved;
> >> }
> >>
> >> @@ -1911,11 +1907,16 @@ static int move_freepages_block(struct zone *zone, struct page *page,
> >> int old_mt, int new_mt)
> >> {
> >> unsigned long start_pfn;
> >> + int res;
> >>
> >> if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
> >> return -1;
> >>
> >> - return __move_freepages_block(zone, start_pfn, old_mt, new_mt);
> >> + res = __move_freepages_block(zone, start_pfn, old_mt, new_mt);
> >> + set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
> >> +
> >> + return res;
> >> +
> >> }
> >>
> >> #ifdef CONFIG_MEMORY_ISOLATION
> >> @@ -1943,11 +1944,17 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
> >> return start_pfn;
> >> }
> >>
> >> +static inline void toggle_pageblock_isolate(struct page *page, bool isolate)
> >> +{
> >> + set_pfnblock_flags_mask(page, (isolate << PB_migrate_isolate),
> >> + page_to_pfn(page), PB_migrate_isolate_bit);
> >> +}
> >> +
> >> /**
> >> - * move_freepages_block_isolate - move free pages in block for page isolation
> >> + * __move_freepages_block_isolate - move free pages in block for page isolation
> >> * @zone: the zone
> >> * @page: the pageblock page
> >> - * @migratetype: migratetype to set on the pageblock
> >> + * @isolate: to isolate the given pageblock or unisolate it
> >> *
> >> * This is similar to move_freepages_block(), but handles the special
> >> * case encountered in page isolation, where the block of interest
> >> @@ -1962,10 +1969,15 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
> >> *
> >> * Returns %true if pages could be moved, %false otherwise.
> >> */
> >> -bool move_freepages_block_isolate(struct zone *zone, struct page *page,
> >> - int migratetype)
> >> +static bool __move_freepages_block_isolate(struct zone *zone,
> >> + struct page *page, bool isolate)
> >> {
> >> unsigned long start_pfn, pfn;
> >> + int from_mt;
> >> + int to_mt;
> >> +
> >> + if (isolate == (get_pageblock_migratetype(page) == MIGRATE_ISOLATE))
> >> + return false;
> >>
> >> if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
> >> return false;
> >> @@ -1982,7 +1994,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
> >>
> >> del_page_from_free_list(buddy, zone, order,
> >> get_pfnblock_migratetype(buddy, pfn));
> >> - set_pageblock_migratetype(page, migratetype);
> >> + toggle_pageblock_isolate(page, isolate);
> >> split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
> >> return true;
> >> }
> >> @@ -1993,16 +2005,38 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
> >>
> >> del_page_from_free_list(page, zone, order,
> >> get_pfnblock_migratetype(page, pfn));
> >> - set_pageblock_migratetype(page, migratetype);
> >> + toggle_pageblock_isolate(page, isolate);
> >> split_large_buddy(zone, page, pfn, order, FPI_NONE);
> >> return true;
> >> }
> >> move:
> >> - __move_freepages_block(zone, start_pfn,
> >> - get_pfnblock_migratetype(page, start_pfn),
> >> - migratetype);
> >> + /* use MIGRATETYPE_NO_ISO_MASK to get the non-isolate migratetype */
> >> + if (isolate) {
> >> + from_mt = get_pfnblock_flags_mask(page, page_to_pfn(page),
> >> + MIGRATETYPE_NO_ISO_MASK);
> >> + to_mt = MIGRATE_ISOLATE;
> >> + } else {
> >> + from_mt = MIGRATE_ISOLATE;
> >> + to_mt = get_pfnblock_flags_mask(page, page_to_pfn(page),
> >> + MIGRATETYPE_NO_ISO_MASK);
> >> + }
> >> +
> >> + __move_freepages_block(zone, start_pfn, from_mt, to_mt);
> >> + toggle_pageblock_isolate(pfn_to_page(start_pfn), isolate);
> >> +
> >> return true;
> >> }
> >> +
> >> +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page)
> >> +{
> >> + return __move_freepages_block_isolate(zone, page, true);
> >> +}
> >> +
> >> +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page)
> >> +{
> >> + return __move_freepages_block_isolate(zone, page, false);
> >> +}
> >> +
> >> #endif /* CONFIG_MEMORY_ISOLATION */
> >>
> >> static void change_pageblock_range(struct page *pageblock_page,
> >> @@ -2194,6 +2228,7 @@ try_to_claim_block(struct zone *zone, struct page *page,
> >> if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
> >> page_group_by_mobility_disabled) {
> >> __move_freepages_block(zone, start_pfn, block_type, start_type);
> >> + set_pageblock_migratetype(pfn_to_page(start_pfn), start_type);
> >> return __rmqueue_smallest(zone, order, start_type);
> >> }
> >>
> >> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
> >> index 751e21f6d85e..4571940f14db 100644
> >> --- a/mm/page_isolation.c
> >> +++ b/mm/page_isolation.c
> >> @@ -25,6 +25,12 @@ static inline void clear_pageblock_isolate(struct page *page)
> >> set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
> >> PB_migrate_isolate_bit);
> >> }
> >> +void set_pageblock_isolate(struct page *page)
> >> +{
> >> + set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
> >> + page_to_pfn(page),
> >> + PB_migrate_isolate_bit);
> >> +}
> >>
> >> /*
> >> * This function checks whether the range [start_pfn, end_pfn) includes
> >> @@ -199,7 +205,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
> >> unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end,
> >> migratetype, isol_flags);
> >> if (!unmovable) {
> >> - if (!move_freepages_block_isolate(zone, page, MIGRATE_ISOLATE)) {
> >> + if (!pageblock_isolate_and_move_free_pages(zone, page)) {
> >> spin_unlock_irqrestore(&zone->lock, flags);
> >> return -EBUSY;
> >> }
> >> @@ -220,7 +226,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
> >> return -EBUSY;
> >> }
> >>
> >> -static void unset_migratetype_isolate(struct page *page, int migratetype)
> >> +static void unset_migratetype_isolate(struct page *page)
> >> {
> >> struct zone *zone;
> >> unsigned long flags;
> >> @@ -273,10 +279,10 @@ static void unset_migratetype_isolate(struct page *page, int migratetype)
> >> * Isolating this block already succeeded, so this
> >> * should not fail on zone boundaries.
> >> */
> >> - WARN_ON_ONCE(!move_freepages_block_isolate(zone, page, migratetype));
> >> + WARN_ON_ONCE(!pageblock_unisolate_and_move_free_pages(zone, page));
> >> } else {
> >> - set_pageblock_migratetype(page, migratetype);
> >> - __putback_isolated_page(page, order, migratetype);
> >> + clear_pageblock_isolate(page);
> >> + __putback_isolated_page(page, order, get_pageblock_migratetype(page));
> >> }
> >> zone->nr_isolate_pageblock--;
> >> out:
> >> @@ -394,7 +400,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
> >> if (PageBuddy(page)) {
> >> int order = buddy_order(page);
> >>
> >> - /* move_freepages_block_isolate() handled this */
> >> + /* pageblock_isolate_and_move_free_pages() handled this */
> >> VM_WARN_ON_ONCE(pfn + (1 << order) > boundary_pfn);
> >>
> >> pfn += 1UL << order;
> >> @@ -444,7 +450,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
> >> failed:
> >> /* restore the original migratetype */
> >> if (!skip_isolation)
> >> - unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype);
> >> + unset_migratetype_isolate(pfn_to_page(isolate_pageblock));
> >> return -EBUSY;
> >> }
> >>
> >> @@ -515,7 +521,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> >> ret = isolate_single_pageblock(isolate_end, flags, true,
> >> skip_isolation, migratetype);
> >> if (ret) {
> >> - unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
> >> + unset_migratetype_isolate(pfn_to_page(isolate_start));
> >> return ret;
> >> }
> >>
> >> @@ -528,8 +534,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> >> start_pfn, end_pfn)) {
> >> undo_isolate_page_range(isolate_start, pfn, migratetype);
> >> unset_migratetype_isolate(
> >> - pfn_to_page(isolate_end - pageblock_nr_pages),
> >> - migratetype);
> >> + pfn_to_page(isolate_end - pageblock_nr_pages));
> >> return -EBUSY;
> >> }
> >> }
> >> @@ -559,7 +564,7 @@ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
> >> page = __first_valid_page(pfn, pageblock_nr_pages);
> >> if (!page || !is_migrate_isolate_page(page))
> >> continue;
> >> - unset_migratetype_isolate(page, migratetype);
> >> + unset_migratetype_isolate(page);
> >> }
> >> }
> >> /*
> >> --
> >> 2.47.2
> >>
> >>
> >>
>
>
> --
> Best Regards,
> Yan, Zi
On 12 May 2025, at 12:19, Lorenzo Stoakes wrote:
> On Mon, May 12, 2025 at 12:13:35PM -0400, Zi Yan wrote:
>> On 12 May 2025, at 12:10, Lorenzo Stoakes wrote:
>>
>>> Andrew - please drop this series, it's broken in mm-new.
>>>
>>> Zi - (as kernel bot reports actually!) I bisected a kernel splat to this
>>> commit, triggerred by the mm/transhuge-stress test (please make sure to run
>>> mm self tests before submitting series :)
>>>
>>> You can trigger it manually with:
>>>
>>> ./transhuge-stress -d 20
>>
>> Thanks. I will fix the issue and resend.
>
> Thanks :)
>
> Sorry re-reading the 'please make sure to run mm self tests' comment sounds
> more snarky thank I intended, and I've definitely forgotten to do it
> sometimes myself, but obviously a useful thing to do :P
You got me. I did not run mm self tests for my series, but will do that
from now on. I was using memory hotplug and hotremove to test my series,
but obviously it is not enough.
>
> I wonder if the issue I mention below is related, actually, unless they're
> running your series on top of v6.15-rc5...
I wonder if something else is causing it. The warning is the check of
making sure pageblock migratetype matches the free list movement.
Anyway, let me reply there. A bisect would be helpful.
>
> I pinged there anyway just in case.
>
> Cheers, Lorenzo
>
>>
>>>
>>> (The same invocation run_vmtest.sh uses).
>>>
>>> Note that this was reported in [0] (thanks to Harry Yoo for pointing this
>>> out to me off-list! :)
>>>
>>> [0]: https://lore.kernel.org/linux-mm/87wmalyktd.fsf@linux.ibm.com/T/#u
>>>
>>> The decoded splat (at this commit in mm-new):
>>>
>>> [ 55.835700] ------------[ cut here ]------------
>>> [ 55.835705] page type is 0, passed migratetype is 2 (nr=32)
>>> [ 55.835720] WARNING: CPU: 2 PID: 288 at mm/page_alloc.c:727 move_to_free_list (mm/page_alloc.c:727 (discriminator 16))
>>> [ 55.835734] Modules linked in:
>>> [ 55.835739] Tainted: [W]=WARN
>>> [ 55.835740] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
>>> [ 55.835741] RIP: 0010:move_to_free_list (mm/page_alloc.c:727 (discriminator 16))
>>> [ 55.835742] Code: e9 fe ff ff c6 05 f1 9b 7b 01 01 90 48 89 ef e8 11 d7 ff ff 44 89 e1 44 89 ea 48 c7 c7 58 dc 70 82 48 89 c6 e8 1c e3 e0 ff 90 <0f> 0b 90 90 e9 ba fe ff ff 66 90 90 90 90 90 90 90 90 90 90 90 90
>>> All code
>>> ========
>>> 0: e9 fe ff ff c6 jmp 0xffffffffc7000003
>>> 5: 05 f1 9b 7b 01 add $0x17b9bf1,%eax
>>> a: 01 90 48 89 ef e8 add %edx,-0x171076b8(%rax)
>>> 10: 11 d7 adc %edx,%edi
>>> 12: ff (bad)
>>> 13: ff 44 89 e1 incl -0x1f(%rcx,%rcx,4)
>>> 17: 44 89 ea mov %r13d,%edx
>>> 1a: 48 c7 c7 58 dc 70 82 mov $0xffffffff8270dc58,%rdi
>>> 21: 48 89 c6 mov %rax,%rsi
>>> 24: e8 1c e3 e0 ff call 0xffffffffffe0e345
>>> 29: 90 nop
>>> 2a:* 0f 0b ud2 <-- trapping instruction
>>> 2c: 90 nop
>>> 2d: 90 nop
>>> 2e: e9 ba fe ff ff jmp 0xfffffffffffffeed
>>> 33: 66 90 xchg %ax,%ax
>>> 35: 90 nop
>>> 36: 90 nop
>>> 37: 90 nop
>>> 38: 90 nop
>>> 39: 90 nop
>>> 3a: 90 nop
>>> 3b: 90 nop
>>> 3c: 90 nop
>>> 3d: 90 nop
>>> 3e: 90 nop
>>> 3f: 90 nop
>>>
>>> Code starting with the faulting instruction
>>> ===========================================
>>> 0: 0f 0b ud2
>>> 2: 90 nop
>>> 3: 90 nop
>>> 4: e9 ba fe ff ff jmp 0xfffffffffffffec3
>>> 9: 66 90 xchg %ax,%ax
>>> b: 90 nop
>>> c: 90 nop
>>> d: 90 nop
>>> e: 90 nop
>>> f: 90 nop
>>> 10: 90 nop
>>> 11: 90 nop
>>> 12: 90 nop
>>> 13: 90 nop
>>> 14: 90 nop
>>> 15: 90 nop
>>> [ 55.835743] RSP: 0018:ffffc900004eba20 EFLAGS: 00010086
>>> [ 55.835744] RAX: 000000000000002f RBX: ffff88826cccb080 RCX: 0000000000000027
>>> [ 55.835745] RDX: ffff888263d17b08 RSI: 0000000000000001 RDI: ffff888263d17b00
>>> [ 55.835746] RBP: ffffea0005fe0000 R08: 00000000ffffdfff R09: ffffffff82b16528
>>> [ 55.835746] R10: 80000000ffffe000 R11: 00000000ffffe000 R12: 0000000000000020
>>> [ 55.835746] R13: 0000000000000002 R14: 0000000000000001 R15: 0000000000000005
>>> [ 55.835750] FS: 00007fef6a06a740(0000) GS:ffff8882e08a0000(0000) knlGS:0000000000000000
>>> [ 55.835751] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>>> [ 55.835751] CR2: 00007fee20c00000 CR3: 0000000179321000 CR4: 0000000000750ef0
>>> [ 55.835751] PKRU: 55555554
>>> [ 55.835752] Call Trace:
>>> [ 55.835755] <TASK>
>>> [ 55.835756] __move_freepages_block (mm/page_alloc.c:1849)
>>> [ 55.835758] try_to_claim_block (mm/page_alloc.c:452 (discriminator 3) mm/page_alloc.c:2231 (discriminator 3))
>>> [ 55.835759] __rmqueue_pcplist (mm/page_alloc.c:2287 mm/page_alloc.c:2383 mm/page_alloc.c:2422 mm/page_alloc.c:3140)
>>> [ 55.835760] get_page_from_freelist (./include/linux/spinlock.h:391 mm/page_alloc.c:3183 mm/page_alloc.c:3213 mm/page_alloc.c:3739)
>>> [ 55.835761] __alloc_frozen_pages_noprof (mm/page_alloc.c:5032)
>>> [ 55.835763] ? __blk_flush_plug (block/blk-core.c:1227 (discriminator 2))
>>> [ 55.835766] alloc_pages_mpol (mm/mempolicy.c:2413)
>>> [ 55.835768] vma_alloc_folio_noprof (mm/mempolicy.c:2432 mm/mempolicy.c:2465)
>>> [ 55.835769] ? __pte_alloc (mm/memory.c:444)
>>> [ 55.835771] do_anonymous_page (mm/memory.c:1064 (discriminator 4) mm/memory.c:4982 (discriminator 4) mm/memory.c:5039 (discriminator 4))
>>> [ 55.835772] ? do_huge_pmd_anonymous_page (mm/huge_memory.c:1226 mm/huge_memory.c:1372)
>>> [ 55.835774] __handle_mm_fault (mm/memory.c:4197 mm/memory.c:6038 mm/memory.c:6181)
>>> [ 55.835776] handle_mm_fault (mm/memory.c:6350)
>>> [ 55.835777] do_user_addr_fault (arch/x86/mm/fault.c:1338)
>>> [ 55.835779] exc_page_fault (./arch/x86/include/asm/irqflags.h:37 ./arch/x86/include/asm/irqflags.h:114 arch/x86/mm/fault.c:1488 arch/x86/mm/fault.c:1538)
>>> [ 55.835783] asm_exc_page_fault (./arch/x86/include/asm/idtentry.h:623)
>>> [ 55.835785] RIP: 0033:0x403824
>>> [ 55.835786] Code: e0 0f 85 7c 01 00 00 ba 0e 00 00 00 be 00 00 20 00 48 89 c7 48 89 c3 e8 4a ea ff ff 85 c0 0f 85 51 01 00 00 8b 0d b4 49 00 00 <48> 89 1b 85 c9 0f 84 b1 00 00 00 83 e9 03 48 89 e6 ba 10 00 00 00
>>> All code
>>> ========
>>> 0: e0 0f loopne 0x11
>>> 2: 85 7c 01 00 test %edi,0x0(%rcx,%rax,1)
>>> 6: 00 ba 0e 00 00 00 add %bh,0xe(%rdx)
>>> c: be 00 00 20 00 mov $0x200000,%esi
>>> 11: 48 89 c7 mov %rax,%rdi
>>> 14: 48 89 c3 mov %rax,%rbx
>>> 17: e8 4a ea ff ff call 0xffffffffffffea66
>>> 1c: 85 c0 test %eax,%eax
>>> 1e: 0f 85 51 01 00 00 jne 0x175
>>> 24: 8b 0d b4 49 00 00 mov 0x49b4(%rip),%ecx # 0x49de
>>> 2a:* 48 89 1b mov %rbx,(%rbx) <-- trapping instruction
>>> 2d: 85 c9 test %ecx,%ecx
>>> 2f: 0f 84 b1 00 00 00 je 0xe6
>>> 35: 83 e9 03 sub $0x3,%ecx
>>> 38: 48 89 e6 mov %rsp,%rsi
>>> 3b: ba 10 00 00 00 mov $0x10,%edx
>>>
>>> Code starting with the faulting instruction
>>> ===========================================
>>> 0: 48 89 1b mov %rbx,(%rbx)
>>> 3: 85 c9 test %ecx,%ecx
>>> 5: 0f 84 b1 00 00 00 je 0xbc
>>> b: 83 e9 03 sub $0x3,%ecx
>>> e: 48 89 e6 mov %rsp,%rsi
>>> 11: ba 10 00 00 00 mov $0x10,%edx
>>> [ 55.835786] RSP: 002b:00007ffd50b1e550 EFLAGS: 00010246
>>> [ 55.835787] RAX: 0000000000000000 RBX: 00007fee20c00000 RCX: 000000000000000c
>>> [ 55.835787] RDX: 000000000000000e RSI: 0000000000200000 RDI: 00007fee20c00000
>>> [ 55.835788] RBP: 0000000000000003 R08: 00000000ffffffff R09: 0000000000000000
>>> [ 55.835788] R10: 0000000000004032 R11: 0000000000000246 R12: 00007fee20c00000
>>> [ 55.835788] R13: 00007fef6a000000 R14: 00000000323ca6b0 R15: 0000000000000fd2
>>> [ 55.835789] </TASK>
>>> [ 55.835789] ---[ end trace 0000000000000000 ]---
>>>
>>>
>>> On Fri, May 09, 2025 at 04:01:09PM -0400, Zi Yan wrote:
>>>> Since migratetype is no longer overwritten during pageblock isolation,
>>>> moving pageblocks to and from MIGRATE_ISOLATE no longer needs migratetype.
>>>>
>>>> Add MIGRATETYPE_NO_ISO_MASK to allow read before-isolation migratetype
>>>> when a pageblock is isolated. It is used by move_freepages_block_isolate().
>>>>
>>>> Add pageblock_isolate_and_move_free_pages() and
>>>> pageblock_unisolate_and_move_free_pages() to be explicit about the page
>>>> isolation operations. Both share the common code in
>>>> __move_freepages_block_isolate(), which is renamed from
>>>> move_freepages_block_isolate().
>>>>
>>>> Make set_pageblock_migratetype() only accept non MIGRATE_ISOLATE types,
>>>> so that one should use set_pageblock_isolate() to isolate pageblocks.
>>>>
>>>> Two consequential changes:
>>>> 1. move pageblock migratetype code out of __move_freepages_block().
>>>> 2. in online_pages() from mm/memory_hotplug.c, move_pfn_range_to_zone() is
>>>> called with MIGRATE_MOVABLE instead of MIGRATE_ISOLATE and all affected
>>>> pageblocks are isolated afterwards. Otherwise, all online pageblocks
>>>> will have non-determined migratetype.
>>>>
>>>> Signed-off-by: Zi Yan <ziy@nvidia.com>
>>>> ---
>>>> include/linux/mmzone.h | 4 +-
>>>> include/linux/page-isolation.h | 5 ++-
>>>> mm/memory_hotplug.c | 7 +++-
>>>> mm/page_alloc.c | 73 +++++++++++++++++++++++++---------
>>>> mm/page_isolation.c | 27 ++++++++-----
>>>> 5 files changed, 82 insertions(+), 34 deletions(-)
>>>>
>>>> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
>>>> index 7ef01fe148ce..f66895456974 100644
>>>> --- a/include/linux/mmzone.h
>>>> +++ b/include/linux/mmzone.h
>>>> @@ -107,8 +107,10 @@ static inline bool migratetype_is_mergeable(int mt)
>>>> extern int page_group_by_mobility_disabled;
>>>>
>>>> #ifdef CONFIG_MEMORY_ISOLATION
>>>> -#define MIGRATETYPE_MASK ((BIT(PB_migratetype_bits) - 1) | PB_migrate_isolate_bit)
>>>> +#define MIGRATETYPE_NO_ISO_MASK (BIT(PB_migratetype_bits) - 1)
>>>> +#define MIGRATETYPE_MASK (MIGRATETYPE_NO_ISO_MASK | PB_migrate_isolate_bit)
>>>> #else
>>>> +#define MIGRATETYPE_NO_ISO_MASK MIGRATETYPE_MASK
>>>> #define MIGRATETYPE_MASK (BIT(PB_migratetype_bits) - 1)
>>>> #endif
>>>>
>>>> diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
>>>> index 898bb788243b..b0a2af0a5357 100644
>>>> --- a/include/linux/page-isolation.h
>>>> +++ b/include/linux/page-isolation.h
>>>> @@ -26,9 +26,10 @@ static inline bool is_migrate_isolate(int migratetype)
>>>> #define REPORT_FAILURE 0x2
>>>>
>>>> void set_pageblock_migratetype(struct page *page, int migratetype);
>>>> +void set_pageblock_isolate(struct page *page);
>>>>
>>>> -bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>>>> - int migratetype);
>>>> +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page);
>>>> +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page);
>>>>
>>>> int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>>>> int migratetype, int flags);
>>>> diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
>>>> index b1caedbade5b..c86c47bba019 100644
>>>> --- a/mm/memory_hotplug.c
>>>> +++ b/mm/memory_hotplug.c
>>>> @@ -1178,6 +1178,7 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>>>> const int nid = zone_to_nid(zone);
>>>> int ret;
>>>> struct memory_notify arg;
>>>> + unsigned long isol_pfn;
>>>>
>>>> /*
>>>> * {on,off}lining is constrained to full memory sections (or more
>>>> @@ -1192,7 +1193,11 @@ int online_pages(unsigned long pfn, unsigned long nr_pages,
>>>>
>>>>
>>>> /* associate pfn range with the zone */
>>>> - move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
>>>> + move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_MOVABLE);
>>>> + for (isol_pfn = pfn;
>>>> + isol_pfn < pfn + nr_pages;
>>>> + isol_pfn += pageblock_nr_pages)
>>>> + set_pageblock_isolate(pfn_to_page(isol_pfn));
>>>>
>>>> arg.start_pfn = pfn;
>>>> arg.nr_pages = nr_pages;
>>>> diff --git a/mm/page_alloc.c b/mm/page_alloc.c
>>>> index 04e301fb4879..cfd37b2d992e 100644
>>>> --- a/mm/page_alloc.c
>>>> +++ b/mm/page_alloc.c
>>>> @@ -454,11 +454,9 @@ void set_pageblock_migratetype(struct page *page, int migratetype)
>>>> migratetype = MIGRATE_UNMOVABLE;
>>>>
>>>> #ifdef CONFIG_MEMORY_ISOLATION
>>>> - if (migratetype == MIGRATE_ISOLATE) {
>>>> - set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
>>>> - page_to_pfn(page), PB_migrate_isolate_bit);
>>>> - return;
>>>> - }
>>>> + VM_WARN(migratetype == MIGRATE_ISOLATE,
>>>> + "Use set_pageblock_isolate() for pageblock isolation");
>>>> + return;
>>>> #endif
>>>> set_pfnblock_flags_mask(page, (unsigned long)migratetype,
>>>> page_to_pfn(page), MIGRATETYPE_MASK);
>>>> @@ -1819,8 +1817,8 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone,
>>>> #endif
>>>>
>>>> /*
>>>> - * Change the type of a block and move all its free pages to that
>>>> - * type's freelist.
>>>> + * Move all free pages of a block to new type's freelist. Caller needs to
>>>> + * change the block type.
>>>> */
>>>> static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
>>>> int old_mt, int new_mt)
>>>> @@ -1852,8 +1850,6 @@ static int __move_freepages_block(struct zone *zone, unsigned long start_pfn,
>>>> pages_moved += 1 << order;
>>>> }
>>>>
>>>> - set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
>>>> -
>>>> return pages_moved;
>>>> }
>>>>
>>>> @@ -1911,11 +1907,16 @@ static int move_freepages_block(struct zone *zone, struct page *page,
>>>> int old_mt, int new_mt)
>>>> {
>>>> unsigned long start_pfn;
>>>> + int res;
>>>>
>>>> if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
>>>> return -1;
>>>>
>>>> - return __move_freepages_block(zone, start_pfn, old_mt, new_mt);
>>>> + res = __move_freepages_block(zone, start_pfn, old_mt, new_mt);
>>>> + set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt);
>>>> +
>>>> + return res;
>>>> +
>>>> }
>>>>
>>>> #ifdef CONFIG_MEMORY_ISOLATION
>>>> @@ -1943,11 +1944,17 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
>>>> return start_pfn;
>>>> }
>>>>
>>>> +static inline void toggle_pageblock_isolate(struct page *page, bool isolate)
>>>> +{
>>>> + set_pfnblock_flags_mask(page, (isolate << PB_migrate_isolate),
>>>> + page_to_pfn(page), PB_migrate_isolate_bit);
>>>> +}
>>>> +
>>>> /**
>>>> - * move_freepages_block_isolate - move free pages in block for page isolation
>>>> + * __move_freepages_block_isolate - move free pages in block for page isolation
>>>> * @zone: the zone
>>>> * @page: the pageblock page
>>>> - * @migratetype: migratetype to set on the pageblock
>>>> + * @isolate: to isolate the given pageblock or unisolate it
>>>> *
>>>> * This is similar to move_freepages_block(), but handles the special
>>>> * case encountered in page isolation, where the block of interest
>>>> @@ -1962,10 +1969,15 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
>>>> *
>>>> * Returns %true if pages could be moved, %false otherwise.
>>>> */
>>>> -bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>>>> - int migratetype)
>>>> +static bool __move_freepages_block_isolate(struct zone *zone,
>>>> + struct page *page, bool isolate)
>>>> {
>>>> unsigned long start_pfn, pfn;
>>>> + int from_mt;
>>>> + int to_mt;
>>>> +
>>>> + if (isolate == (get_pageblock_migratetype(page) == MIGRATE_ISOLATE))
>>>> + return false;
>>>>
>>>> if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL))
>>>> return false;
>>>> @@ -1982,7 +1994,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>>>>
>>>> del_page_from_free_list(buddy, zone, order,
>>>> get_pfnblock_migratetype(buddy, pfn));
>>>> - set_pageblock_migratetype(page, migratetype);
>>>> + toggle_pageblock_isolate(page, isolate);
>>>> split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
>>>> return true;
>>>> }
>>>> @@ -1993,16 +2005,38 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
>>>>
>>>> del_page_from_free_list(page, zone, order,
>>>> get_pfnblock_migratetype(page, pfn));
>>>> - set_pageblock_migratetype(page, migratetype);
>>>> + toggle_pageblock_isolate(page, isolate);
>>>> split_large_buddy(zone, page, pfn, order, FPI_NONE);
>>>> return true;
>>>> }
>>>> move:
>>>> - __move_freepages_block(zone, start_pfn,
>>>> - get_pfnblock_migratetype(page, start_pfn),
>>>> - migratetype);
>>>> + /* use MIGRATETYPE_NO_ISO_MASK to get the non-isolate migratetype */
>>>> + if (isolate) {
>>>> + from_mt = get_pfnblock_flags_mask(page, page_to_pfn(page),
>>>> + MIGRATETYPE_NO_ISO_MASK);
>>>> + to_mt = MIGRATE_ISOLATE;
>>>> + } else {
>>>> + from_mt = MIGRATE_ISOLATE;
>>>> + to_mt = get_pfnblock_flags_mask(page, page_to_pfn(page),
>>>> + MIGRATETYPE_NO_ISO_MASK);
>>>> + }
>>>> +
>>>> + __move_freepages_block(zone, start_pfn, from_mt, to_mt);
>>>> + toggle_pageblock_isolate(pfn_to_page(start_pfn), isolate);
>>>> +
>>>> return true;
>>>> }
>>>> +
>>>> +bool pageblock_isolate_and_move_free_pages(struct zone *zone, struct page *page)
>>>> +{
>>>> + return __move_freepages_block_isolate(zone, page, true);
>>>> +}
>>>> +
>>>> +bool pageblock_unisolate_and_move_free_pages(struct zone *zone, struct page *page)
>>>> +{
>>>> + return __move_freepages_block_isolate(zone, page, false);
>>>> +}
>>>> +
>>>> #endif /* CONFIG_MEMORY_ISOLATION */
>>>>
>>>> static void change_pageblock_range(struct page *pageblock_page,
>>>> @@ -2194,6 +2228,7 @@ try_to_claim_block(struct zone *zone, struct page *page,
>>>> if (free_pages + alike_pages >= (1 << (pageblock_order-1)) ||
>>>> page_group_by_mobility_disabled) {
>>>> __move_freepages_block(zone, start_pfn, block_type, start_type);
>>>> + set_pageblock_migratetype(pfn_to_page(start_pfn), start_type);
>>>> return __rmqueue_smallest(zone, order, start_type);
>>>> }
>>>>
>>>> diff --git a/mm/page_isolation.c b/mm/page_isolation.c
>>>> index 751e21f6d85e..4571940f14db 100644
>>>> --- a/mm/page_isolation.c
>>>> +++ b/mm/page_isolation.c
>>>> @@ -25,6 +25,12 @@ static inline void clear_pageblock_isolate(struct page *page)
>>>> set_pfnblock_flags_mask(page, 0, page_to_pfn(page),
>>>> PB_migrate_isolate_bit);
>>>> }
>>>> +void set_pageblock_isolate(struct page *page)
>>>> +{
>>>> + set_pfnblock_flags_mask(page, PB_migrate_isolate_bit,
>>>> + page_to_pfn(page),
>>>> + PB_migrate_isolate_bit);
>>>> +}
>>>>
>>>> /*
>>>> * This function checks whether the range [start_pfn, end_pfn) includes
>>>> @@ -199,7 +205,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
>>>> unmovable = has_unmovable_pages(check_unmovable_start, check_unmovable_end,
>>>> migratetype, isol_flags);
>>>> if (!unmovable) {
>>>> - if (!move_freepages_block_isolate(zone, page, MIGRATE_ISOLATE)) {
>>>> + if (!pageblock_isolate_and_move_free_pages(zone, page)) {
>>>> spin_unlock_irqrestore(&zone->lock, flags);
>>>> return -EBUSY;
>>>> }
>>>> @@ -220,7 +226,7 @@ static int set_migratetype_isolate(struct page *page, int migratetype, int isol_
>>>> return -EBUSY;
>>>> }
>>>>
>>>> -static void unset_migratetype_isolate(struct page *page, int migratetype)
>>>> +static void unset_migratetype_isolate(struct page *page)
>>>> {
>>>> struct zone *zone;
>>>> unsigned long flags;
>>>> @@ -273,10 +279,10 @@ static void unset_migratetype_isolate(struct page *page, int migratetype)
>>>> * Isolating this block already succeeded, so this
>>>> * should not fail on zone boundaries.
>>>> */
>>>> - WARN_ON_ONCE(!move_freepages_block_isolate(zone, page, migratetype));
>>>> + WARN_ON_ONCE(!pageblock_unisolate_and_move_free_pages(zone, page));
>>>> } else {
>>>> - set_pageblock_migratetype(page, migratetype);
>>>> - __putback_isolated_page(page, order, migratetype);
>>>> + clear_pageblock_isolate(page);
>>>> + __putback_isolated_page(page, order, get_pageblock_migratetype(page));
>>>> }
>>>> zone->nr_isolate_pageblock--;
>>>> out:
>>>> @@ -394,7 +400,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>> if (PageBuddy(page)) {
>>>> int order = buddy_order(page);
>>>>
>>>> - /* move_freepages_block_isolate() handled this */
>>>> + /* pageblock_isolate_and_move_free_pages() handled this */
>>>> VM_WARN_ON_ONCE(pfn + (1 << order) > boundary_pfn);
>>>>
>>>> pfn += 1UL << order;
>>>> @@ -444,7 +450,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
>>>> failed:
>>>> /* restore the original migratetype */
>>>> if (!skip_isolation)
>>>> - unset_migratetype_isolate(pfn_to_page(isolate_pageblock), migratetype);
>>>> + unset_migratetype_isolate(pfn_to_page(isolate_pageblock));
>>>> return -EBUSY;
>>>> }
>>>>
>>>> @@ -515,7 +521,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>>>> ret = isolate_single_pageblock(isolate_end, flags, true,
>>>> skip_isolation, migratetype);
>>>> if (ret) {
>>>> - unset_migratetype_isolate(pfn_to_page(isolate_start), migratetype);
>>>> + unset_migratetype_isolate(pfn_to_page(isolate_start));
>>>> return ret;
>>>> }
>>>>
>>>> @@ -528,8 +534,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>>>> start_pfn, end_pfn)) {
>>>> undo_isolate_page_range(isolate_start, pfn, migratetype);
>>>> unset_migratetype_isolate(
>>>> - pfn_to_page(isolate_end - pageblock_nr_pages),
>>>> - migratetype);
>>>> + pfn_to_page(isolate_end - pageblock_nr_pages));
>>>> return -EBUSY;
>>>> }
>>>> }
>>>> @@ -559,7 +564,7 @@ void undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
>>>> page = __first_valid_page(pfn, pageblock_nr_pages);
>>>> if (!page || !is_migrate_isolate_page(page))
>>>> continue;
>>>> - unset_migratetype_isolate(page, migratetype);
>>>> + unset_migratetype_isolate(page);
>>>> }
>>>> }
>>>> /*
>>>> --
>>>> 2.47.2
>>>>
>>>>
>>>>
>>
>>
>> --
>> Best Regards,
>> Yan, Zi
--
Best Regards,
Yan, Zi
Hello,
kernel test robot noticed "WARNING:at_mm/page_alloc.c:#__add_to_free_list" on:
commit: c095828c286182c38cfc8837ca4fec8bc4bdb81d ("[PATCH v4 2/4] mm/page_isolation: remove migratetype from move_freepages_block_isolate()")
url: https://github.com/intel-lab-lkp/linux/commits/Zi-Yan/mm-page_isolation-make-page-isolation-a-standalone-bit/20250510-040418
patch link: https://lore.kernel.org/all/20250509200111.3372279-3-ziy@nvidia.com/
patch subject: [PATCH v4 2/4] mm/page_isolation: remove migratetype from move_freepages_block_isolate()
in testcase: boot
config: x86_64-kexec
compiler: clang-20
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G
(please refer to attached dmesg/kmsg for entire log/backtrace)
+-------------------------------------------------------+------------+------------+
| | 8e72f4e133 | c095828c28 |
+-------------------------------------------------------+------------+------------+
| WARNING:at_mm/page_alloc.c:#__add_to_free_list | 0 | 24 |
| RIP:__add_to_free_list | 0 | 24 |
| WARNING:at_mm/page_alloc.c:#__del_page_from_free_list | 0 | 24 |
| RIP:__del_page_from_free_list | 0 | 24 |
+-------------------------------------------------------+------------+------------+
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202505121332.74fc97c-lkp@intel.com
[ 0.337813][ T0] ------------[ cut here ]------------
[ 0.338214][ T0] page type is 0, passed migratetype is 2 (nr=512)
[ 0.338706][ T0] WARNING: CPU: 0 PID: 0 at mm/page_alloc.c:703 __add_to_free_list (kbuild/obj/consumer/x86_64-kexec/mm/page_alloc.c:701)
[ 0.339381][ T0] Modules linked in:
[ 0.339666][ T0] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.15.0-rc5-next-20250509-00002-gc095828c2861 #1 PREEMPT(voluntary)
[ 0.340589][ T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[ 0.341361][ T0] RIP: 0010:__add_to_free_list (kbuild/obj/consumer/x86_64-kexec/mm/page_alloc.c:701)
[ 0.341789][ T0] Code: 48 c1 fe 06 ba 87 00 00 00 e8 53 5f ff ff 84 c0 be 05 00 00 00 48 0f 49 f0 48 c7 c7 b3 9b 7b 82 89 ea 44 89 f1 e8 b7 71 cd ff <0f> 0b e9 35 ff ff ff 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90
All code
========
0: 48 c1 fe 06 sar $0x6,%rsi
4: ba 87 00 00 00 mov $0x87,%edx
9: e8 53 5f ff ff call 0xffffffffffff5f61
e: 84 c0 test %al,%al
10: be 05 00 00 00 mov $0x5,%esi
15: 48 0f 49 f0 cmovns %rax,%rsi
19: 48 c7 c7 b3 9b 7b 82 mov $0xffffffff827b9bb3,%rdi
20: 89 ea mov %ebp,%edx
22: 44 89 f1 mov %r14d,%ecx
25: e8 b7 71 cd ff call 0xffffffffffcd71e1
2a:* 0f 0b ud2 <-- trapping instruction
2c: e9 35 ff ff ff jmp 0xffffffffffffff66
31: 90 nop
32: 90 nop
33: 90 nop
34: 90 nop
35: 90 nop
36: 90 nop
37: 90 nop
38: 90 nop
39: 90 nop
3a: 90 nop
3b: 90 nop
3c: 90 nop
3d: 90 nop
3e: 90 nop
3f: 90 nop
Code starting with the faulting instruction
===========================================
0: 0f 0b ud2
2: e9 35 ff ff ff jmp 0xffffffffffffff3c
7: 90 nop
8: 90 nop
9: 90 nop
a: 90 nop
b: 90 nop
c: 90 nop
d: 90 nop
e: 90 nop
f: 90 nop
10: 90 nop
11: 90 nop
12: 90 nop
13: 90 nop
14: 90 nop
15: 90 nop
[ 0.343261][ T0] RSP: 0000:ffffffff82a038c8 EFLAGS: 00010046
[ 0.343707][ T0] RAX: 0000000000000000 RBX: ffff88843fff1528 RCX: 0000000000000001
[ 0.344296][ T0] RDX: ffffffff82a036b8 RSI: 00000000ffff7fff RDI: 0000000000000001
[ 0.344885][ T0] RBP: 0000000000000002 R08: 0000000000000000 R09: ffffffff82a036b0
[ 0.345473][ T0] R10: 00000000ffff7fff R11: 0000000000000001 R12: ffffea0004328000
[ 0.346062][ T0] R13: 0000000000000002 R14: 0000000000000200 R15: 0000000000000009
[ 0.346650][ T0] FS: 0000000000000000(0000) GS:ffff8884ac41b000(0000) knlGS:0000000000000000
[ 0.347330][ T0] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 0.347813][ T0] CR2: ffff88843ffff000 CR3: 0000000002a30000 CR4: 00000000000000b0
[ 0.348625][ T0] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 0.349523][ T0] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 0.350424][ T0] Call Trace:
[ 0.350791][ T0] <TASK>
[ 0.351122][ T0] try_to_claim_block (kbuild/obj/consumer/x86_64-kexec/mm/page_alloc.c:616)
[ 0.351662][ T0] __rmqueue_pcplist (kbuild/obj/consumer/x86_64-kexec/mm/page_alloc.c:2280)
[ 0.352211][ T0] ? string (kbuild/obj/consumer/x86_64-kexec/lib/vsprintf.c:718)
[ 0.352641][ T0] ? string (kbuild/obj/consumer/x86_64-kexec/lib/vsprintf.c:718)
[ 0.353067][ T0] ? move_right (kbuild/obj/consumer/x86_64-kexec/lib/vsprintf.c:?)
[ 0.353553][ T0] get_page_from_freelist (kbuild/obj/consumer/x86_64-kexec/mm/page_alloc.c:3178)
[ 0.354182][ T0] ? sprintf (kbuild/obj/consumer/x86_64-kexec/lib/vsprintf.c:3039)
[ 0.354632][ T0] ? prb_first_seq (kbuild/obj/consumer/x86_64-kexec/kernel/printk/printk_ringbuffer.c:1963)
[ 0.355163][ T0] __alloc_frozen_pages_noprof (kbuild/obj/consumer/x86_64-kexec/mm/page_alloc.c:5028)
[ 0.355786][ T0] alloc_pages_mpol (kbuild/obj/consumer/x86_64-kexec/mm/mempolicy.c:2411)
[ 0.356329][ T0] new_slab (kbuild/obj/consumer/x86_64-kexec/mm/slub.c:2454)
[ 0.356780][ T0] ___slab_alloc (kbuild/obj/consumer/x86_64-kexec/arch/x86/include/asm/preempt.h:80 kbuild/obj/consumer/x86_64-kexec/mm/slub.c:3859)
[ 0.357310][ T0] ? pcpu_block_refresh_hint (kbuild/obj/consumer/x86_64-kexec/include/linux/find.h:69)
[ 0.357916][ T0] ? radix_tree_node_alloc (kbuild/obj/consumer/x86_64-kexec/include/linux/radix-tree.h:57 kbuild/obj/consumer/x86_64-kexec/lib/radix-tree.c:278)
[ 0.358495][ T0] __slab_alloc (kbuild/obj/consumer/x86_64-kexec/arch/x86/include/asm/preempt.h:95 kbuild/obj/consumer/x86_64-kexec/mm/slub.c:3950)
[ 0.358983][ T0] kmem_cache_alloc_noprof (kbuild/obj/consumer/x86_64-kexec/mm/slub.c:4023)
[ 0.359609][ T0] ? radix_tree_node_alloc (kbuild/obj/consumer/x86_64-kexec/include/linux/radix-tree.h:57 kbuild/obj/consumer/x86_64-kexec/lib/radix-tree.c:278)
[ 0.360207][ T0] radix_tree_node_alloc (kbuild/obj/consumer/x86_64-kexec/include/linux/radix-tree.h:57 kbuild/obj/consumer/x86_64-kexec/lib/radix-tree.c:278)
[ 0.360783][ T0] idr_get_free (kbuild/obj/consumer/x86_64-kexec/lib/radix-tree.c:1508)
[ 0.361266][ T0] idr_alloc_u32 (kbuild/obj/consumer/x86_64-kexec/include/linux/err.h:70 kbuild/obj/consumer/x86_64-kexec/lib/idr.c:47)
[ 0.361765][ T0] idr_alloc (kbuild/obj/consumer/x86_64-kexec/lib/idr.c:88)
[ 0.362220][ T0] init_cpu_worker_pool (kbuild/obj/consumer/x86_64-kexec/kernel/workqueue.c:714 kbuild/obj/consumer/x86_64-kexec/kernel/workqueue.c:7733)
[ 0.362798][ T0] workqueue_init_early (kbuild/obj/consumer/x86_64-kexec/kernel/workqueue.c:7803)
[ 0.363372][ T0] start_kernel (kbuild/obj/consumer/x86_64-kexec/init/main.c:991)
[ 0.363882][ T0] x86_64_start_reservations (??:?)
[ 0.364494][ T0] x86_64_start_kernel (kbuild/obj/consumer/x86_64-kexec/arch/x86/kernel/head64.c:238)
[ 0.365039][ T0] common_startup_64 (kbuild/obj/consumer/x86_64-kexec/arch/x86/kernel/head_64.S:419)
[ 0.365589][ T0] </TASK>
[ 0.365919][ T0] ---[ end trace 0000000000000000 ]---
The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20250512/202505121332.74fc97c-lkp@intel.com
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
© 2016 - 2025 Red Hat, Inc.