include/linux/huge_mm.h | 5 +- mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------ mm/migrate_device.c | 3 +- 3 files changed, 120 insertions(+), 23 deletions(-)
Unmapped was added as a parameter to __folio_split() and related
call sites to support splitting of folios already in the midst
of a migration. This special case arose for device private folio
migration since during migration there could be a disconnect between
source and destination on the folio size.
Introduce split_unmapped_folio_to_order() to handle this special case.
This in turn removes the special casing introduced by the unmapped
parameter in __folio_split().
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Ying Huang <ying.huang@linux.alibaba.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mika Penttilä <mpenttil@redhat.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Suggested-by: Zi Yan <ziy@nvidia.com>
Signed-off-by: Balbir Singh <balbirs@nvidia.com>
---
include/linux/huge_mm.h | 5 +-
mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------
mm/migrate_device.c | 3 +-
3 files changed, 120 insertions(+), 23 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e2e91aa1a042..9155e683c08a 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -371,7 +371,8 @@ enum split_type {
bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
- unsigned int new_order, bool unmapped);
+ unsigned int new_order);
+int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order);
int min_order_for_split(struct folio *folio);
int split_folio_to_list(struct folio *folio, struct list_head *list);
bool folio_split_supported(struct folio *folio, unsigned int new_order,
@@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
unsigned int new_order)
{
- return __split_huge_page_to_list_to_order(page, list, new_order, false);
+ return __split_huge_page_to_list_to_order(page, list, new_order);
}
static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
{
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0184cd915f44..942bd8410c54 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
* @lock_at: a page within @folio to be left locked to caller
* @list: after-split folios will be put on it if non NULL
* @split_type: perform uniform split or not (non-uniform split)
- * @unmapped: The pages are already unmapped, they are migration entries.
*
* It calls __split_unmapped_folio() to perform uniform and non-uniform split.
* It is in charge of checking whether the split is supported or not and
@@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
*/
static int __folio_split(struct folio *folio, unsigned int new_order,
struct page *split_at, struct page *lock_at,
- struct list_head *list, enum split_type split_type, bool unmapped)
+ struct list_head *list, enum split_type split_type)
{
struct deferred_split *ds_queue;
XA_STATE(xas, &folio->mapping->i_pages, folio->index);
@@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
* is taken to serialise against parallel split or collapse
* operations.
*/
- if (!unmapped) {
- anon_vma = folio_get_anon_vma(folio);
- if (!anon_vma) {
- ret = -EBUSY;
- goto out;
- }
- anon_vma_lock_write(anon_vma);
+ anon_vma = folio_get_anon_vma(folio);
+ if (!anon_vma) {
+ ret = -EBUSY;
+ goto out;
}
+ anon_vma_lock_write(anon_vma);
mapping = NULL;
} else {
unsigned int min_order;
@@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
goto out_unlock;
}
- if (!unmapped)
- unmap_folio(folio);
+ unmap_folio(folio);
/* block interrupt reentry in xa_lock and spinlock */
local_irq_disable();
@@ -3976,8 +3972,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
expected_refs = folio_expected_ref_count(new_folio) + 1;
folio_ref_unfreeze(new_folio, expected_refs);
- if (!unmapped)
- lru_add_split_folio(folio, new_folio, lruvec, list);
+ lru_add_split_folio(folio, new_folio, lruvec, list);
/*
* Anonymous folio with swap cache.
@@ -4033,9 +4028,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
local_irq_enable();
- if (unmapped)
- return ret;
-
if (nr_shmem_dropped)
shmem_uncharge(mapping->host, nr_shmem_dropped);
@@ -4079,6 +4071,111 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
return ret;
}
+/*
+ * This function is a helper for splitting folios that have already been unmapped.
+ * The use case is that the device or the CPU can refuse to migrate THP pages in
+ * the middle of migration, due to allocation issues on either side
+ *
+ * The high level code is copied from __folio_split, since the pages are anonymous
+ * and are already isolated from the LRU, the code has been simplified to not
+ * burden __folio_split with unmapped sprinkled into the code.
+ *
+ * None of the split folios are unlocked
+ */
+int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order)
+{
+ int extra_pins;
+ int ret = 0;
+ struct folio *new_folio, *next;
+ struct folio *end_folio = folio_next(folio);
+ struct deferred_split *ds_queue;
+ int old_order = folio_order(folio);
+
+ VM_WARN_ON_FOLIO(folio_mapped(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio);
+
+ if (!can_split_folio(folio, 1, &extra_pins)) {
+ ret = -EAGAIN;
+ goto err;
+ }
+
+ local_irq_disable();
+ /* Prevent deferred_split_scan() touching ->_refcount */
+ ds_queue = folio_split_queue_lock(folio);
+ if (folio_ref_freeze(folio, 1 + extra_pins)) {
+ int expected_refs;
+ struct swap_cluster_info *ci = NULL;
+
+ if (old_order > 1) {
+ if (!list_empty(&folio->_deferred_list)) {
+ ds_queue->split_queue_len--;
+ /*
+ * Reinitialize page_deferred_list after
+ * removing the page from the split_queue,
+ * otherwise a subsequent split will see list
+ * corruption when checking the
+ * page_deferred_list.
+ */
+ list_del_init(&folio->_deferred_list);
+ }
+ if (folio_test_partially_mapped(folio)) {
+ folio_clear_partially_mapped(folio);
+ mod_mthp_stat(old_order,
+ MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
+ }
+ /*
+ * Reinitialize page_deferred_list after removing the
+ * page from the split_queue, otherwise a subsequent
+ * split will see list corruption when checking the
+ * page_deferred_list.
+ */
+ list_del_init(&folio->_deferred_list);
+ }
+ split_queue_unlock(ds_queue);
+
+ if (folio_test_swapcache(folio))
+ ci = swap_cluster_get_and_lock(folio);
+
+ ret = __split_unmapped_folio(folio, new_order, &folio->page,
+ NULL, NULL, SPLIT_TYPE_UNIFORM);
+
+ /*
+ * Unfreeze after-split folios
+ */
+ for (new_folio = folio_next(folio); new_folio != end_folio;
+ new_folio = next) {
+ next = folio_next(new_folio);
+
+ zone_device_private_split_cb(folio, new_folio);
+
+ expected_refs = folio_expected_ref_count(new_folio) + 1;
+ folio_ref_unfreeze(new_folio, expected_refs);
+ if (ci)
+ __swap_cache_replace_folio(ci, folio, new_folio);
+ }
+
+ zone_device_private_split_cb(folio, NULL);
+ /*
+ * Unfreeze @folio only after all page cache entries, which
+ * used to point to it, have been updated with new folios.
+ * Otherwise, a parallel folio_try_get() can grab @folio
+ * and its caller can see stale page cache entries.
+ */
+ expected_refs = folio_expected_ref_count(folio) + 1;
+ folio_ref_unfreeze(folio, expected_refs);
+
+ if (ci)
+ swap_cluster_unlock(ci);
+ } else {
+ split_queue_unlock(ds_queue);
+ ret = -EAGAIN;
+ }
+ local_irq_enable();
+err:
+ return ret;
+}
+
/*
* This function splits a large folio into smaller folios of order @new_order.
* @page can point to any page of the large folio to split. The split operation
@@ -4127,12 +4224,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
* with the folio. Splitting to order 0 is compatible with all folios.
*/
int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
- unsigned int new_order, bool unmapped)
+ unsigned int new_order)
{
struct folio *folio = page_folio(page);
return __folio_split(folio, new_order, &folio->page, page, list,
- SPLIT_TYPE_UNIFORM, unmapped);
+ SPLIT_TYPE_UNIFORM);
}
/**
@@ -4163,7 +4260,7 @@ int folio_split(struct folio *folio, unsigned int new_order,
struct page *split_at, struct list_head *list)
{
return __folio_split(folio, new_order, split_at, &folio->page, list,
- SPLIT_TYPE_NON_UNIFORM, false);
+ SPLIT_TYPE_NON_UNIFORM);
}
int min_order_for_split(struct folio *folio)
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index c50abbd32f21..1abe71b0e77e 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -918,8 +918,7 @@ static int migrate_vma_split_unmapped_folio(struct migrate_vma *migrate,
folio_get(folio);
split_huge_pmd_address(migrate->vma, addr, true);
- ret = __split_huge_page_to_list_to_order(folio_page(folio, 0), NULL,
- 0, true);
+ ret = split_unmapped_folio_to_order(folio, 0);
if (ret)
return ret;
migrate->src[idx] &= ~MIGRATE_PFN_COMPOUND;
--
2.51.1
Hi Balbir,
On Wed, Nov 12, 2025 at 03:46:33PM +1100, Balbir Singh wrote:
> Unmapped was added as a parameter to __folio_split() and related
> call sites to support splitting of folios already in the midst
> of a migration. This special case arose for device private folio
> migration since during migration there could be a disconnect between
> source and destination on the folio size.
>
> Introduce split_unmapped_folio_to_order() to handle this special case.
> This in turn removes the special casing introduced by the unmapped
> parameter in __folio_split().
Such a helper would be needed in drm_pagemap_migrate_to_devmem when
reallocating a device folio to smaller pages.
Could we export it (EXPORT_SYMBOL)?
Thanks,
Francois
>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Zi Yan <ziy@nvidia.com>
> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
> Cc: Rakie Kim <rakie.kim@sk.com>
> Cc: Byungchul Park <byungchul@sk.com>
> Cc: Gregory Price <gourry@gourry.net>
> Cc: Ying Huang <ying.huang@linux.alibaba.com>
> Cc: Alistair Popple <apopple@nvidia.com>
> Cc: Oscar Salvador <osalvador@suse.de>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> Cc: Nico Pache <npache@redhat.com>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: Dev Jain <dev.jain@arm.com>
> Cc: Barry Song <baohua@kernel.org>
> Cc: Lyude Paul <lyude@redhat.com>
> Cc: Danilo Krummrich <dakr@kernel.org>
> Cc: David Airlie <airlied@gmail.com>
> Cc: Simona Vetter <simona@ffwll.ch>
> Cc: Ralph Campbell <rcampbell@nvidia.com>
> Cc: Mika Penttilä <mpenttil@redhat.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Francois Dugast <francois.dugast@intel.com>
>
> Suggested-by: Zi Yan <ziy@nvidia.com>
> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
> ---
> include/linux/huge_mm.h | 5 +-
> mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------
> mm/migrate_device.c | 3 +-
> 3 files changed, 120 insertions(+), 23 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index e2e91aa1a042..9155e683c08a 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -371,7 +371,8 @@ enum split_type {
>
> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> - unsigned int new_order, bool unmapped);
> + unsigned int new_order);
> +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order);
> int min_order_for_split(struct folio *folio);
> int split_folio_to_list(struct folio *folio, struct list_head *list);
> bool folio_split_supported(struct folio *folio, unsigned int new_order,
> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> unsigned int new_order)
> {
> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
> + return __split_huge_page_to_list_to_order(page, list, new_order);
> }
> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
> {
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 0184cd915f44..942bd8410c54 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
> * @lock_at: a page within @folio to be left locked to caller
> * @list: after-split folios will be put on it if non NULL
> * @split_type: perform uniform split or not (non-uniform split)
> - * @unmapped: The pages are already unmapped, they are migration entries.
> *
> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
> * It is in charge of checking whether the split is supported or not and
> @@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
> */
> static int __folio_split(struct folio *folio, unsigned int new_order,
> struct page *split_at, struct page *lock_at,
> - struct list_head *list, enum split_type split_type, bool unmapped)
> + struct list_head *list, enum split_type split_type)
> {
> struct deferred_split *ds_queue;
> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
> @@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> * is taken to serialise against parallel split or collapse
> * operations.
> */
> - if (!unmapped) {
> - anon_vma = folio_get_anon_vma(folio);
> - if (!anon_vma) {
> - ret = -EBUSY;
> - goto out;
> - }
> - anon_vma_lock_write(anon_vma);
> + anon_vma = folio_get_anon_vma(folio);
> + if (!anon_vma) {
> + ret = -EBUSY;
> + goto out;
> }
> + anon_vma_lock_write(anon_vma);
> mapping = NULL;
> } else {
> unsigned int min_order;
> @@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> goto out_unlock;
> }
>
> - if (!unmapped)
> - unmap_folio(folio);
> + unmap_folio(folio);
>
> /* block interrupt reentry in xa_lock and spinlock */
> local_irq_disable();
> @@ -3976,8 +3972,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> expected_refs = folio_expected_ref_count(new_folio) + 1;
> folio_ref_unfreeze(new_folio, expected_refs);
>
> - if (!unmapped)
> - lru_add_split_folio(folio, new_folio, lruvec, list);
> + lru_add_split_folio(folio, new_folio, lruvec, list);
>
> /*
> * Anonymous folio with swap cache.
> @@ -4033,9 +4028,6 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>
> local_irq_enable();
>
> - if (unmapped)
> - return ret;
> -
> if (nr_shmem_dropped)
> shmem_uncharge(mapping->host, nr_shmem_dropped);
>
> @@ -4079,6 +4071,111 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> return ret;
> }
>
> +/*
> + * This function is a helper for splitting folios that have already been unmapped.
> + * The use case is that the device or the CPU can refuse to migrate THP pages in
> + * the middle of migration, due to allocation issues on either side
> + *
> + * The high level code is copied from __folio_split, since the pages are anonymous
> + * and are already isolated from the LRU, the code has been simplified to not
> + * burden __folio_split with unmapped sprinkled into the code.
> + *
> + * None of the split folios are unlocked
> + */
> +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order)
> +{
> + int extra_pins;
> + int ret = 0;
> + struct folio *new_folio, *next;
> + struct folio *end_folio = folio_next(folio);
> + struct deferred_split *ds_queue;
> + int old_order = folio_order(folio);
> +
> + VM_WARN_ON_FOLIO(folio_mapped(folio), folio);
> + VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
> + VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio);
> +
> + if (!can_split_folio(folio, 1, &extra_pins)) {
> + ret = -EAGAIN;
> + goto err;
> + }
> +
> + local_irq_disable();
> + /* Prevent deferred_split_scan() touching ->_refcount */
> + ds_queue = folio_split_queue_lock(folio);
> + if (folio_ref_freeze(folio, 1 + extra_pins)) {
> + int expected_refs;
> + struct swap_cluster_info *ci = NULL;
> +
> + if (old_order > 1) {
> + if (!list_empty(&folio->_deferred_list)) {
> + ds_queue->split_queue_len--;
> + /*
> + * Reinitialize page_deferred_list after
> + * removing the page from the split_queue,
> + * otherwise a subsequent split will see list
> + * corruption when checking the
> + * page_deferred_list.
> + */
> + list_del_init(&folio->_deferred_list);
> + }
> + if (folio_test_partially_mapped(folio)) {
> + folio_clear_partially_mapped(folio);
> + mod_mthp_stat(old_order,
> + MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
> + }
> + /*
> + * Reinitialize page_deferred_list after removing the
> + * page from the split_queue, otherwise a subsequent
> + * split will see list corruption when checking the
> + * page_deferred_list.
> + */
> + list_del_init(&folio->_deferred_list);
> + }
> + split_queue_unlock(ds_queue);
> +
> + if (folio_test_swapcache(folio))
> + ci = swap_cluster_get_and_lock(folio);
> +
> + ret = __split_unmapped_folio(folio, new_order, &folio->page,
> + NULL, NULL, SPLIT_TYPE_UNIFORM);
> +
> + /*
> + * Unfreeze after-split folios
> + */
> + for (new_folio = folio_next(folio); new_folio != end_folio;
> + new_folio = next) {
> + next = folio_next(new_folio);
> +
> + zone_device_private_split_cb(folio, new_folio);
> +
> + expected_refs = folio_expected_ref_count(new_folio) + 1;
> + folio_ref_unfreeze(new_folio, expected_refs);
> + if (ci)
> + __swap_cache_replace_folio(ci, folio, new_folio);
> + }
> +
> + zone_device_private_split_cb(folio, NULL);
> + /*
> + * Unfreeze @folio only after all page cache entries, which
> + * used to point to it, have been updated with new folios.
> + * Otherwise, a parallel folio_try_get() can grab @folio
> + * and its caller can see stale page cache entries.
> + */
> + expected_refs = folio_expected_ref_count(folio) + 1;
> + folio_ref_unfreeze(folio, expected_refs);
> +
> + if (ci)
> + swap_cluster_unlock(ci);
> + } else {
> + split_queue_unlock(ds_queue);
> + ret = -EAGAIN;
> + }
> + local_irq_enable();
> +err:
> + return ret;
> +}
> +
> /*
> * This function splits a large folio into smaller folios of order @new_order.
> * @page can point to any page of the large folio to split. The split operation
> @@ -4127,12 +4224,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> * with the folio. Splitting to order 0 is compatible with all folios.
> */
> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> - unsigned int new_order, bool unmapped)
> + unsigned int new_order)
> {
> struct folio *folio = page_folio(page);
>
> return __folio_split(folio, new_order, &folio->page, page, list,
> - SPLIT_TYPE_UNIFORM, unmapped);
> + SPLIT_TYPE_UNIFORM);
> }
>
> /**
> @@ -4163,7 +4260,7 @@ int folio_split(struct folio *folio, unsigned int new_order,
> struct page *split_at, struct list_head *list)
> {
> return __folio_split(folio, new_order, split_at, &folio->page, list,
> - SPLIT_TYPE_NON_UNIFORM, false);
> + SPLIT_TYPE_NON_UNIFORM);
> }
>
> int min_order_for_split(struct folio *folio)
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index c50abbd32f21..1abe71b0e77e 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -918,8 +918,7 @@ static int migrate_vma_split_unmapped_folio(struct migrate_vma *migrate,
>
> folio_get(folio);
> split_huge_pmd_address(migrate->vma, addr, true);
> - ret = __split_huge_page_to_list_to_order(folio_page(folio, 0), NULL,
> - 0, true);
> + ret = split_unmapped_folio_to_order(folio, 0);
> if (ret)
> return ret;
> migrate->src[idx] &= ~MIGRATE_PFN_COMPOUND;
> --
> 2.51.1
>
On Thu, Nov 13, 2025 at 04:36:01PM +0100, Francois Dugast wrote: > Hi Balbir, > > On Wed, Nov 12, 2025 at 03:46:33PM +1100, Balbir Singh wrote: > > Unmapped was added as a parameter to __folio_split() and related > > call sites to support splitting of folios already in the midst > > of a migration. This special case arose for device private folio > > migration since during migration there could be a disconnect between > > source and destination on the folio size. > > > > Introduce split_unmapped_folio_to_order() to handle this special case. > > This in turn removes the special casing introduced by the unmapped > > parameter in __folio_split(). > > Such a helper would be needed in drm_pagemap_migrate_to_devmem when > reallocating a device folio to smaller pages. > > Could we export it (EXPORT_SYMBOL)? As a rule we don't export things from core mm. And certainly not to non-GPL modules. Unless David feels very differently or there's some enormously compelling reason for it I'd really rather we didn't. Cheers, Lorenzo
On 13 Nov 2025, at 11:02, Lorenzo Stoakes wrote: > On Thu, Nov 13, 2025 at 04:36:01PM +0100, Francois Dugast wrote: >> Hi Balbir, >> >> On Wed, Nov 12, 2025 at 03:46:33PM +1100, Balbir Singh wrote: >>> Unmapped was added as a parameter to __folio_split() and related >>> call sites to support splitting of folios already in the midst >>> of a migration. This special case arose for device private folio >>> migration since during migration there could be a disconnect between >>> source and destination on the folio size. >>> >>> Introduce split_unmapped_folio_to_order() to handle this special case. >>> This in turn removes the special casing introduced by the unmapped >>> parameter in __folio_split(). >> >> Such a helper would be needed in drm_pagemap_migrate_to_devmem when >> reallocating a device folio to smaller pages. >> >> Could we export it (EXPORT_SYMBOL)? drm_pagemap_migrate_to_devmem() is a function defined in tree, you just need to include huge_mm.h to use split_unmapped_folio_to_order(). Why do you need to export split_unmapped_folio_to_order()? > > As a rule we don't export things from core mm. And certainly not to non-GPL > modules. > > Unless David feels very differently or there's some enormously compelling > reason for it I'd really rather we didn't. Best Regards, Yan, Zi
On 13.11.25 17:24, Zi Yan wrote: > On 13 Nov 2025, at 11:02, Lorenzo Stoakes wrote: > >> On Thu, Nov 13, 2025 at 04:36:01PM +0100, Francois Dugast wrote: >>> Hi Balbir, >>> >>> On Wed, Nov 12, 2025 at 03:46:33PM +1100, Balbir Singh wrote: >>>> Unmapped was added as a parameter to __folio_split() and related >>>> call sites to support splitting of folios already in the midst >>>> of a migration. This special case arose for device private folio >>>> migration since during migration there could be a disconnect between >>>> source and destination on the folio size. >>>> >>>> Introduce split_unmapped_folio_to_order() to handle this special case. >>>> This in turn removes the special casing introduced by the unmapped >>>> parameter in __folio_split(). >>> >>> Such a helper would be needed in drm_pagemap_migrate_to_devmem when >>> reallocating a device folio to smaller pages. >>> >>> Could we export it (EXPORT_SYMBOL)? > > drm_pagemap_migrate_to_devmem() is a function defined in tree, you > just need to include huge_mm.h to use split_unmapped_folio_to_order(). > Why do you need to export split_unmapped_folio_to_order()? I guess because DRM_GPUSVM is tristate, so can be built as a module. IIUC, that's where drm_pagemap_migrate_to_devmem() ends up. > >> >> As a rule we don't export things from core mm. And certainly not to non-GPL >> modules. >> >> Unless David feels very differently or there's some enormously compelling >> reason for it I'd really rather we didn't. We'd need a pretty good reason to go down that path indeed :) -- Cheers David
On 12.11.25 05:46, Balbir Singh wrote:
> Unmapped was added as a parameter to __folio_split() and related
> call sites to support splitting of folios already in the midst
> of a migration. This special case arose for device private folio
> migration since during migration there could be a disconnect between
> source and destination on the folio size.
>
> Introduce split_unmapped_folio_to_order() to handle this special case.
> This in turn removes the special casing introduced by the unmapped
> parameter in __folio_split().
As raised recently, I would hope that we can find a way to make all
these splitting functions look more similar in the long term, ideally
starting with "folio_split" / "folio_try_split".
What about
folio_split_unmapped()
Do we really have to spell out the "to order" part in the function name?
And if it's more a mostly-internal helper, maybe
__folio_split_unmapped()
subject: "mm/huge_memory: introduce ..."
>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Zi Yan <ziy@nvidia.com>
> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
> Cc: Rakie Kim <rakie.kim@sk.com>
> Cc: Byungchul Park <byungchul@sk.com>
> Cc: Gregory Price <gourry@gourry.net>
> Cc: Ying Huang <ying.huang@linux.alibaba.com>
> Cc: Alistair Popple <apopple@nvidia.com>
> Cc: Oscar Salvador <osalvador@suse.de>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> Cc: Nico Pache <npache@redhat.com>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: Dev Jain <dev.jain@arm.com>
> Cc: Barry Song <baohua@kernel.org>
> Cc: Lyude Paul <lyude@redhat.com>
> Cc: Danilo Krummrich <dakr@kernel.org>
> Cc: David Airlie <airlied@gmail.com>
> Cc: Simona Vetter <simona@ffwll.ch>
> Cc: Ralph Campbell <rcampbell@nvidia.com>
> Cc: Mika Penttilä <mpenttil@redhat.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Francois Dugast <francois.dugast@intel.com>
>
> Suggested-by: Zi Yan <ziy@nvidia.com>
> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
> ---
> include/linux/huge_mm.h | 5 +-
> mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------
> mm/migrate_device.c | 3 +-
> 3 files changed, 120 insertions(+), 23 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index e2e91aa1a042..9155e683c08a 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -371,7 +371,8 @@ enum split_type {
>
> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> - unsigned int new_order, bool unmapped);
> + unsigned int new_order);
> +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order);
> int min_order_for_split(struct folio *folio);
> int split_folio_to_list(struct folio *folio, struct list_head *list);
> bool folio_split_supported(struct folio *folio, unsigned int new_order,
> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> unsigned int new_order)
> {
> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
> + return __split_huge_page_to_list_to_order(page, list, new_order);
> }
> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
> {
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 0184cd915f44..942bd8410c54 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
> * @lock_at: a page within @folio to be left locked to caller
> * @list: after-split folios will be put on it if non NULL
> * @split_type: perform uniform split or not (non-uniform split)
> - * @unmapped: The pages are already unmapped, they are migration entries.
> *
> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
> * It is in charge of checking whether the split is supported or not and
> @@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
> */
> static int __folio_split(struct folio *folio, unsigned int new_order,
> struct page *split_at, struct page *lock_at,
> - struct list_head *list, enum split_type split_type, bool unmapped)
> + struct list_head *list, enum split_type split_type)
Yeah, nice to see that go.
> {
> struct deferred_split *ds_queue;
> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
> @@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> * is taken to serialise against parallel split or collapse
> * operations.
> */
> - if (!unmapped) {
> - anon_vma = folio_get_anon_vma(folio);
> - if (!anon_vma) {
> - ret = -EBUSY;
> - goto out;
> - }
> - anon_vma_lock_write(anon_vma);
> + anon_vma = folio_get_anon_vma(folio);
> + if (!anon_vma) {
> + ret = -EBUSY;
> + goto out;
> }
> + anon_vma_lock_write(anon_vma);
> mapping = NULL;
> } else {
> unsigned int min_order;
> @@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> goto out_unlock;
> }
>
> - if (!unmapped)
> - unmap_folio(folio);
> + unmap_folio(folio);
>
Hm, I would have hoped that we could factor out the core logic and reuse
it for the new helper, instead of duplicating code.
Did you look into that?
--
Cheers
David
On 11/12/25 21:00, David Hildenbrand (Red Hat) wrote:
> On 12.11.25 05:46, Balbir Singh wrote:
>> Unmapped was added as a parameter to __folio_split() and related
>> call sites to support splitting of folios already in the midst
>> of a migration. This special case arose for device private folio
>> migration since during migration there could be a disconnect between
>> source and destination on the folio size.
>>
>> Introduce split_unmapped_folio_to_order() to handle this special case.
>> This in turn removes the special casing introduced by the unmapped
>> parameter in __folio_split().
>
> As raised recently, I would hope that we can find a way to make all these splitting functions look more similar in the long term, ideally starting with "folio_split" / "folio_try_split".
>
> What about
>
> folio_split_unmapped()
>
> Do we really have to spell out the "to order" part in the function name?
>
> And if it's more a mostly-internal helper, maybe
>
> __folio_split_unmapped()
>
> subject: "mm/huge_memory: introduce ..."
>
I can rename it, but currently it confirms to the split_folio with order in the name
The order is there in the name because in the future with mTHP we will want to
support splitting to various orders.
>>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: David Hildenbrand <david@redhat.com>
>> Cc: Zi Yan <ziy@nvidia.com>
>> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
>> Cc: Rakie Kim <rakie.kim@sk.com>
>> Cc: Byungchul Park <byungchul@sk.com>
>> Cc: Gregory Price <gourry@gourry.net>
>> Cc: Ying Huang <ying.huang@linux.alibaba.com>
>> Cc: Alistair Popple <apopple@nvidia.com>
>> Cc: Oscar Salvador <osalvador@suse.de>
>> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
>> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
>> Cc: Nico Pache <npache@redhat.com>
>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>> Cc: Dev Jain <dev.jain@arm.com>
>> Cc: Barry Song <baohua@kernel.org>
>> Cc: Lyude Paul <lyude@redhat.com>
>> Cc: Danilo Krummrich <dakr@kernel.org>
>> Cc: David Airlie <airlied@gmail.com>
>> Cc: Simona Vetter <simona@ffwll.ch>
>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>> Cc: Mika Penttilä <mpenttil@redhat.com>
>> Cc: Matthew Brost <matthew.brost@intel.com>
>> Cc: Francois Dugast <francois.dugast@intel.com>
>>
>> Suggested-by: Zi Yan <ziy@nvidia.com>
>> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
>> ---
>> include/linux/huge_mm.h | 5 +-
>> mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------
>> mm/migrate_device.c | 3 +-
>> 3 files changed, 120 insertions(+), 23 deletions(-)
>>
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index e2e91aa1a042..9155e683c08a 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -371,7 +371,8 @@ enum split_type {
>> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
>> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>> - unsigned int new_order, bool unmapped);
>> + unsigned int new_order);
>> +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order);
>> int min_order_for_split(struct folio *folio);
>> int split_folio_to_list(struct folio *folio, struct list_head *list);
>> bool folio_split_supported(struct folio *folio, unsigned int new_order,
>> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
>> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>> unsigned int new_order)
>> {
>> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
>> + return __split_huge_page_to_list_to_order(page, list, new_order);
>> }
>> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
>> {
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 0184cd915f44..942bd8410c54 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>> * @lock_at: a page within @folio to be left locked to caller
>> * @list: after-split folios will be put on it if non NULL
>> * @split_type: perform uniform split or not (non-uniform split)
>> - * @unmapped: The pages are already unmapped, they are migration entries.
>> *
>> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
>> * It is in charge of checking whether the split is supported or not and
>> @@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>> */
>> static int __folio_split(struct folio *folio, unsigned int new_order,
>> struct page *split_at, struct page *lock_at,
>> - struct list_head *list, enum split_type split_type, bool unmapped)
>> + struct list_head *list, enum split_type split_type)
>
> Yeah, nice to see that go.
>
>> {
>> struct deferred_split *ds_queue;
>> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
>> @@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>> * is taken to serialise against parallel split or collapse
>> * operations.
>> */
>> - if (!unmapped) {
>> - anon_vma = folio_get_anon_vma(folio);
>> - if (!anon_vma) {
>> - ret = -EBUSY;
>> - goto out;
>> - }
>> - anon_vma_lock_write(anon_vma);
>> + anon_vma = folio_get_anon_vma(folio);
>> + if (!anon_vma) {
>> + ret = -EBUSY;
>> + goto out;
>> }
>> + anon_vma_lock_write(anon_vma);
>> mapping = NULL;
>> } else {
>> unsigned int min_order;
>> @@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>> goto out_unlock;
>> }
>> - if (!unmapped)
>> - unmap_folio(folio);
>> + unmap_folio(folio);
>>
>
> Hm, I would have hoped that we could factor out the core logic and reuse it for the new helper, instead of duplicating code.
>
> Did you look into that?
>
>
I did, I ended up with larger spaghetti, I was hoping to look it as a follow up
after the series with the mTHP changes and support (that is to be designed and
prototyped).
Balbir
On 12.11.25 11:17, Balbir Singh wrote:
> On 11/12/25 21:00, David Hildenbrand (Red Hat) wrote:
>> On 12.11.25 05:46, Balbir Singh wrote:
>>> Unmapped was added as a parameter to __folio_split() and related
>>> call sites to support splitting of folios already in the midst
>>> of a migration. This special case arose for device private folio
>>> migration since during migration there could be a disconnect between
>>> source and destination on the folio size.
>>>
>>> Introduce split_unmapped_folio_to_order() to handle this special case.
>>> This in turn removes the special casing introduced by the unmapped
>>> parameter in __folio_split().
>>
>> As raised recently, I would hope that we can find a way to make all these splitting functions look more similar in the long term, ideally starting with "folio_split" / "folio_try_split".
>>
>> What about
>>
>> folio_split_unmapped()
>>
>> Do we really have to spell out the "to order" part in the function name?
>>
>> And if it's more a mostly-internal helper, maybe
>>
>> __folio_split_unmapped()
>>
>> subject: "mm/huge_memory: introduce ..."
>>
>
> I can rename it, but currently it confirms to the split_folio with order in the name
> The order is there in the name because in the future with mTHP we will want to
> support splitting to various orders.
I think we should start naming them more consistently regarding
folio_split() immediately and cleanup the other ones later.
I don't understand why "_to_order" must be in the name right now. You
can add another variant and start using longer names when really required.
>
>
>>>
>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>> Cc: David Hildenbrand <david@redhat.com>
>>> Cc: Zi Yan <ziy@nvidia.com>
>>> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
>>> Cc: Rakie Kim <rakie.kim@sk.com>
>>> Cc: Byungchul Park <byungchul@sk.com>
>>> Cc: Gregory Price <gourry@gourry.net>
>>> Cc: Ying Huang <ying.huang@linux.alibaba.com>
>>> Cc: Alistair Popple <apopple@nvidia.com>
>>> Cc: Oscar Salvador <osalvador@suse.de>
>>> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
>>> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
>>> Cc: Nico Pache <npache@redhat.com>
>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>> Cc: Dev Jain <dev.jain@arm.com>
>>> Cc: Barry Song <baohua@kernel.org>
>>> Cc: Lyude Paul <lyude@redhat.com>
>>> Cc: Danilo Krummrich <dakr@kernel.org>
>>> Cc: David Airlie <airlied@gmail.com>
>>> Cc: Simona Vetter <simona@ffwll.ch>
>>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>>> Cc: Mika Penttilä <mpenttil@redhat.com>
>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>> Cc: Francois Dugast <francois.dugast@intel.com>
>>>
>>> Suggested-by: Zi Yan <ziy@nvidia.com>
>>> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
>>> ---
>>> include/linux/huge_mm.h | 5 +-
>>> mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------
>>> mm/migrate_device.c | 3 +-
>>> 3 files changed, 120 insertions(+), 23 deletions(-)
>>>
>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>> index e2e91aa1a042..9155e683c08a 100644
>>> --- a/include/linux/huge_mm.h
>>> +++ b/include/linux/huge_mm.h
>>> @@ -371,7 +371,8 @@ enum split_type {
>>> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
>>> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>> - unsigned int new_order, bool unmapped);
>>> + unsigned int new_order);
>>> +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order);
>>> int min_order_for_split(struct folio *folio);
>>> int split_folio_to_list(struct folio *folio, struct list_head *list);
>>> bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
>>> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>> unsigned int new_order)
>>> {
>>> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
>>> + return __split_huge_page_to_list_to_order(page, list, new_order);
>>> }
>>> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
>>> {
>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>> index 0184cd915f44..942bd8410c54 100644
>>> --- a/mm/huge_memory.c
>>> +++ b/mm/huge_memory.c
>>> @@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>> * @lock_at: a page within @folio to be left locked to caller
>>> * @list: after-split folios will be put on it if non NULL
>>> * @split_type: perform uniform split or not (non-uniform split)
>>> - * @unmapped: The pages are already unmapped, they are migration entries.
>>> *
>>> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
>>> * It is in charge of checking whether the split is supported or not and
>>> @@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>> */
>>> static int __folio_split(struct folio *folio, unsigned int new_order,
>>> struct page *split_at, struct page *lock_at,
>>> - struct list_head *list, enum split_type split_type, bool unmapped)
>>> + struct list_head *list, enum split_type split_type)
>>
>> Yeah, nice to see that go.
>>
>>> {
>>> struct deferred_split *ds_queue;
>>> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
>>> @@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>> * is taken to serialise against parallel split or collapse
>>> * operations.
>>> */
>>> - if (!unmapped) {
>>> - anon_vma = folio_get_anon_vma(folio);
>>> - if (!anon_vma) {
>>> - ret = -EBUSY;
>>> - goto out;
>>> - }
>>> - anon_vma_lock_write(anon_vma);
>>> + anon_vma = folio_get_anon_vma(folio);
>>> + if (!anon_vma) {
>>> + ret = -EBUSY;
>>> + goto out;
>>> }
>>> + anon_vma_lock_write(anon_vma);
>>> mapping = NULL;
>>> } else {
>>> unsigned int min_order;
>>> @@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>> goto out_unlock;
>>> }
>>> - if (!unmapped)
>>> - unmap_folio(folio);
>>> + unmap_folio(folio);
>>>
>>
>> Hm, I would have hoped that we could factor out the core logic and reuse it for the new helper, instead of duplicating code.
>>
>> Did you look into that?
>>
>>
>
> I did, I ended up with larger spaghetti, I was hoping to look it as a follow up
> after the series with the mTHP changes and support (that is to be designed and
> prototyped).
Looking at it in more detail, the code duplication is not desired.
We have to find a way to factor the existing code out and reuse it from
any new function.
--
Cheers
David
On 11/12/25 22:34, David Hildenbrand (Red Hat) wrote:
> On 12.11.25 11:17, Balbir Singh wrote:
>> On 11/12/25 21:00, David Hildenbrand (Red Hat) wrote:
>>> On 12.11.25 05:46, Balbir Singh wrote:
>>>> Unmapped was added as a parameter to __folio_split() and related
>>>> call sites to support splitting of folios already in the midst
>>>> of a migration. This special case arose for device private folio
>>>> migration since during migration there could be a disconnect between
>>>> source and destination on the folio size.
>>>>
>>>> Introduce split_unmapped_folio_to_order() to handle this special case.
>>>> This in turn removes the special casing introduced by the unmapped
>>>> parameter in __folio_split().
>>>
>>> As raised recently, I would hope that we can find a way to make all these splitting functions look more similar in the long term, ideally starting with "folio_split" / "folio_try_split".
>>>
>>> What about
>>>
>>> folio_split_unmapped()
>>>
>>> Do we really have to spell out the "to order" part in the function name?
>>>
>>> And if it's more a mostly-internal helper, maybe
>>>
>>> __folio_split_unmapped()
>>>
>>> subject: "mm/huge_memory: introduce ..."
>>>
>>
>> I can rename it, but currently it confirms to the split_folio with order in the name
>> The order is there in the name because in the future with mTHP we will want to
>> support splitting to various orders.
>
> I think we should start naming them more consistently regarding folio_split() immediately and cleanup the other ones later.
>
> I don't understand why "_to_order" must be in the name right now. You can add another variant and start using longer names when really required.
>
Ack
>>
>>
>>>>
>>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>>> Cc: David Hildenbrand <david@redhat.com>
>>>> Cc: Zi Yan <ziy@nvidia.com>
>>>> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
>>>> Cc: Rakie Kim <rakie.kim@sk.com>
>>>> Cc: Byungchul Park <byungchul@sk.com>
>>>> Cc: Gregory Price <gourry@gourry.net>
>>>> Cc: Ying Huang <ying.huang@linux.alibaba.com>
>>>> Cc: Alistair Popple <apopple@nvidia.com>
>>>> Cc: Oscar Salvador <osalvador@suse.de>
>>>> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>>> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
>>>> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
>>>> Cc: Nico Pache <npache@redhat.com>
>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>> Cc: Dev Jain <dev.jain@arm.com>
>>>> Cc: Barry Song <baohua@kernel.org>
>>>> Cc: Lyude Paul <lyude@redhat.com>
>>>> Cc: Danilo Krummrich <dakr@kernel.org>
>>>> Cc: David Airlie <airlied@gmail.com>
>>>> Cc: Simona Vetter <simona@ffwll.ch>
>>>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>>>> Cc: Mika Penttilä <mpenttil@redhat.com>
>>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>>> Cc: Francois Dugast <francois.dugast@intel.com>
>>>>
>>>> Suggested-by: Zi Yan <ziy@nvidia.com>
>>>> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
>>>> ---
>>>> include/linux/huge_mm.h | 5 +-
>>>> mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------
>>>> mm/migrate_device.c | 3 +-
>>>> 3 files changed, 120 insertions(+), 23 deletions(-)
>>>>
>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>> index e2e91aa1a042..9155e683c08a 100644
>>>> --- a/include/linux/huge_mm.h
>>>> +++ b/include/linux/huge_mm.h
>>>> @@ -371,7 +371,8 @@ enum split_type {
>>>> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
>>>> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>> - unsigned int new_order, bool unmapped);
>>>> + unsigned int new_order);
>>>> +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order);
>>>> int min_order_for_split(struct folio *folio);
>>>> int split_folio_to_list(struct folio *folio, struct list_head *list);
>>>> bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
>>>> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>> unsigned int new_order)
>>>> {
>>>> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
>>>> + return __split_huge_page_to_list_to_order(page, list, new_order);
>>>> }
>>>> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
>>>> {
>>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>>> index 0184cd915f44..942bd8410c54 100644
>>>> --- a/mm/huge_memory.c
>>>> +++ b/mm/huge_memory.c
>>>> @@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>> * @lock_at: a page within @folio to be left locked to caller
>>>> * @list: after-split folios will be put on it if non NULL
>>>> * @split_type: perform uniform split or not (non-uniform split)
>>>> - * @unmapped: The pages are already unmapped, they are migration entries.
>>>> *
>>>> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
>>>> * It is in charge of checking whether the split is supported or not and
>>>> @@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>> */
>>>> static int __folio_split(struct folio *folio, unsigned int new_order,
>>>> struct page *split_at, struct page *lock_at,
>>>> - struct list_head *list, enum split_type split_type, bool unmapped)
>>>> + struct list_head *list, enum split_type split_type)
>>>
>>> Yeah, nice to see that go.
>>>
>>>> {
>>>> struct deferred_split *ds_queue;
>>>> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
>>>> @@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>> * is taken to serialise against parallel split or collapse
>>>> * operations.
>>>> */
>>>> - if (!unmapped) {
>>>> - anon_vma = folio_get_anon_vma(folio);
>>>> - if (!anon_vma) {
>>>> - ret = -EBUSY;
>>>> - goto out;
>>>> - }
>>>> - anon_vma_lock_write(anon_vma);
>>>> + anon_vma = folio_get_anon_vma(folio);
>>>> + if (!anon_vma) {
>>>> + ret = -EBUSY;
>>>> + goto out;
>>>> }
>>>> + anon_vma_lock_write(anon_vma);
>>>> mapping = NULL;
>>>> } else {
>>>> unsigned int min_order;
>>>> @@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>> goto out_unlock;
>>>> }
>>>> - if (!unmapped)
>>>> - unmap_folio(folio);
>>>> + unmap_folio(folio);
>>>>
>>>
>>> Hm, I would have hoped that we could factor out the core logic and reuse it for the new helper, instead of duplicating code.
>>>
>>> Did you look into that?
>>>
>>>
>>
>> I did, I ended up with larger spaghetti, I was hoping to look it as a follow up
>> after the series with the mTHP changes and support (that is to be designed and
>> prototyped).
>
> Looking at it in more detail, the code duplication is not desired.
>
> We have to find a way to factor the existing code out and reuse it from any new function.
>
I came up with a helper, but that ends up with another boolean do_lru.
---
include/linux/huge_mm.h | 5 +-
mm/huge_memory.c | 336 +++++++++++++++++++++++-----------------
mm/migrate_device.c | 3 +-
3 files changed, 195 insertions(+), 149 deletions(-)
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index e2e91aa1a042..44c09755bada 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -371,7 +371,8 @@ enum split_type {
bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
- unsigned int new_order, bool unmapped);
+ unsigned int new_order);
+int split_unmapped_folio(struct folio *folio, unsigned int new_order);
int min_order_for_split(struct folio *folio);
int split_folio_to_list(struct folio *folio, struct list_head *list);
bool folio_split_supported(struct folio *folio, unsigned int new_order,
@@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
unsigned int new_order)
{
- return __split_huge_page_to_list_to_order(page, list, new_order, false);
+ return __split_huge_page_to_list_to_order(page, list, new_order);
}
static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
{
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 0184cd915f44..534befe1b7aa 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3739,6 +3739,152 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
return true;
}
+static int __folio_split_unmapped(struct folio *folio, unsigned int new_order,
+ struct page *split_at, struct xa_state *xas,
+ struct address_space *mapping, bool do_lru,
+ struct list_head *list, enum split_type split_type,
+ int extra_pins)
+{
+ struct folio *end_folio = folio_next(folio);
+ struct folio *new_folio, *next;
+ int old_order = folio_order(folio);
+ int nr_shmem_dropped = 0;
+ int ret = 0;
+ pgoff_t end = 0;
+ struct deferred_split *ds_queue;
+
+ /* Prevent deferred_split_scan() touching ->_refcount */
+ ds_queue = folio_split_queue_lock(folio);
+ if (folio_ref_freeze(folio, 1 + extra_pins)) {
+ struct swap_cluster_info *ci = NULL;
+ struct lruvec *lruvec;
+ int expected_refs;
+
+ if (old_order > 1) {
+ if (!list_empty(&folio->_deferred_list)) {
+ ds_queue->split_queue_len--;
+ /*
+ * Reinitialize page_deferred_list after removing the
+ * page from the split_queue, otherwise a subsequent
+ * split will see list corruption when checking the
+ * page_deferred_list.
+ */
+ list_del_init(&folio->_deferred_list);
+ }
+ if (folio_test_partially_mapped(folio)) {
+ folio_clear_partially_mapped(folio);
+ mod_mthp_stat(old_order,
+ MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
+ }
+ }
+ split_queue_unlock(ds_queue);
+ if (mapping) {
+ int nr = folio_nr_pages(folio);
+
+ if (folio_test_pmd_mappable(folio) &&
+ new_order < HPAGE_PMD_ORDER) {
+ if (folio_test_swapbacked(folio)) {
+ __lruvec_stat_mod_folio(folio,
+ NR_SHMEM_THPS, -nr);
+ } else {
+ __lruvec_stat_mod_folio(folio,
+ NR_FILE_THPS, -nr);
+ filemap_nr_thps_dec(mapping);
+ }
+ }
+ }
+
+ if (folio_test_swapcache(folio)) {
+ if (mapping) {
+ VM_WARN_ON_ONCE_FOLIO(mapping, folio);
+ return -EINVAL;
+ }
+
+ ci = swap_cluster_get_and_lock(folio);
+ }
+
+ /* lock lru list/PageCompound, ref frozen by page_ref_freeze */
+ if (do_lru)
+ lruvec = folio_lruvec_lock(folio);
+
+ ret = __split_unmapped_folio(folio, new_order, split_at, xas,
+ mapping, split_type);
+
+ /*
+ * Unfreeze after-split folios and put them back to the right
+ * list. @folio should be kept frozon until page cache
+ * entries are updated with all the other after-split folios
+ * to prevent others seeing stale page cache entries.
+ * As a result, new_folio starts from the next folio of
+ * @folio.
+ */
+ for (new_folio = folio_next(folio); new_folio != end_folio;
+ new_folio = next) {
+ unsigned long nr_pages = folio_nr_pages(new_folio);
+
+ next = folio_next(new_folio);
+
+ zone_device_private_split_cb(folio, new_folio);
+
+ expected_refs = folio_expected_ref_count(new_folio) + 1;
+ folio_ref_unfreeze(new_folio, expected_refs);
+
+ if (do_lru)
+ lru_add_split_folio(folio, new_folio, lruvec, list);
+
+ /*
+ * Anonymous folio with swap cache.
+ * NOTE: shmem in swap cache is not supported yet.
+ */
+ if (ci) {
+ __swap_cache_replace_folio(ci, folio, new_folio);
+ continue;
+ }
+
+ /* Anonymous folio without swap cache */
+ if (!mapping)
+ continue;
+
+ /* Add the new folio to the page cache. */
+ if (new_folio->index < end) {
+ __xa_store(&mapping->i_pages, new_folio->index,
+ new_folio, 0);
+ continue;
+ }
+
+ /* Drop folio beyond EOF: ->index >= end */
+ if (shmem_mapping(mapping))
+ nr_shmem_dropped += nr_pages;
+ else if (folio_test_clear_dirty(new_folio))
+ folio_account_cleaned(
+ new_folio, inode_to_wb(mapping->host));
+ __filemap_remove_folio(new_folio, NULL);
+ folio_put_refs(new_folio, nr_pages);
+ }
+
+ zone_device_private_split_cb(folio, NULL);
+ /*
+ * Unfreeze @folio only after all page cache entries, which
+ * used to point to it, have been updated with new folios.
+ * Otherwise, a parallel folio_try_get() can grab @folio
+ * and its caller can see stale page cache entries.
+ */
+ expected_refs = folio_expected_ref_count(folio) + 1;
+ folio_ref_unfreeze(folio, expected_refs);
+
+ if (do_lru)
+ unlock_page_lruvec(lruvec);
+
+ if (ci)
+ swap_cluster_unlock(ci);
+ } else {
+ split_queue_unlock(ds_queue);
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
/**
* __folio_split() - split a folio at @split_at to a @new_order folio
* @folio: folio to split
@@ -3747,7 +3893,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
* @lock_at: a page within @folio to be left locked to caller
* @list: after-split folios will be put on it if non NULL
* @split_type: perform uniform split or not (non-uniform split)
- * @unmapped: The pages are already unmapped, they are migration entries.
*
* It calls __split_unmapped_folio() to perform uniform and non-uniform split.
* It is in charge of checking whether the split is supported or not and
@@ -3763,9 +3908,8 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
*/
static int __folio_split(struct folio *folio, unsigned int new_order,
struct page *split_at, struct page *lock_at,
- struct list_head *list, enum split_type split_type, bool unmapped)
+ struct list_head *list, enum split_type split_type)
{
- struct deferred_split *ds_queue;
XA_STATE(xas, &folio->mapping->i_pages, folio->index);
struct folio *end_folio = folio_next(folio);
bool is_anon = folio_test_anon(folio);
@@ -3809,14 +3953,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
* is taken to serialise against parallel split or collapse
* operations.
*/
- if (!unmapped) {
- anon_vma = folio_get_anon_vma(folio);
- if (!anon_vma) {
- ret = -EBUSY;
- goto out;
- }
- anon_vma_lock_write(anon_vma);
+ anon_vma = folio_get_anon_vma(folio);
+ if (!anon_vma) {
+ ret = -EBUSY;
+ goto out;
}
+ anon_vma_lock_write(anon_vma);
mapping = NULL;
} else {
unsigned int min_order;
@@ -3882,8 +4024,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
goto out_unlock;
}
- if (!unmapped)
- unmap_folio(folio);
+ unmap_folio(folio);
/* block interrupt reentry in xa_lock and spinlock */
local_irq_disable();
@@ -3900,142 +4041,14 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
}
}
- /* Prevent deferred_split_scan() touching ->_refcount */
- ds_queue = folio_split_queue_lock(folio);
- if (folio_ref_freeze(folio, 1 + extra_pins)) {
- struct swap_cluster_info *ci = NULL;
- struct lruvec *lruvec;
- int expected_refs;
-
- if (old_order > 1) {
- if (!list_empty(&folio->_deferred_list)) {
- ds_queue->split_queue_len--;
- /*
- * Reinitialize page_deferred_list after removing the
- * page from the split_queue, otherwise a subsequent
- * split will see list corruption when checking the
- * page_deferred_list.
- */
- list_del_init(&folio->_deferred_list);
- }
- if (folio_test_partially_mapped(folio)) {
- folio_clear_partially_mapped(folio);
- mod_mthp_stat(old_order,
- MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
- }
- }
- split_queue_unlock(ds_queue);
- if (mapping) {
- int nr = folio_nr_pages(folio);
-
- if (folio_test_pmd_mappable(folio) &&
- new_order < HPAGE_PMD_ORDER) {
- if (folio_test_swapbacked(folio)) {
- __lruvec_stat_mod_folio(folio,
- NR_SHMEM_THPS, -nr);
- } else {
- __lruvec_stat_mod_folio(folio,
- NR_FILE_THPS, -nr);
- filemap_nr_thps_dec(mapping);
- }
- }
- }
-
- if (folio_test_swapcache(folio)) {
- if (mapping) {
- VM_WARN_ON_ONCE_FOLIO(mapping, folio);
- ret = -EINVAL;
- goto fail;
- }
-
- ci = swap_cluster_get_and_lock(folio);
- }
-
- /* lock lru list/PageCompound, ref frozen by page_ref_freeze */
- lruvec = folio_lruvec_lock(folio);
-
- ret = __split_unmapped_folio(folio, new_order, split_at, &xas,
- mapping, split_type);
-
- /*
- * Unfreeze after-split folios and put them back to the right
- * list. @folio should be kept frozon until page cache
- * entries are updated with all the other after-split folios
- * to prevent others seeing stale page cache entries.
- * As a result, new_folio starts from the next folio of
- * @folio.
- */
- for (new_folio = folio_next(folio); new_folio != end_folio;
- new_folio = next) {
- unsigned long nr_pages = folio_nr_pages(new_folio);
-
- next = folio_next(new_folio);
-
- zone_device_private_split_cb(folio, new_folio);
-
- expected_refs = folio_expected_ref_count(new_folio) + 1;
- folio_ref_unfreeze(new_folio, expected_refs);
-
- if (!unmapped)
- lru_add_split_folio(folio, new_folio, lruvec, list);
-
- /*
- * Anonymous folio with swap cache.
- * NOTE: shmem in swap cache is not supported yet.
- */
- if (ci) {
- __swap_cache_replace_folio(ci, folio, new_folio);
- continue;
- }
-
- /* Anonymous folio without swap cache */
- if (!mapping)
- continue;
-
- /* Add the new folio to the page cache. */
- if (new_folio->index < end) {
- __xa_store(&mapping->i_pages, new_folio->index,
- new_folio, 0);
- continue;
- }
-
- /* Drop folio beyond EOF: ->index >= end */
- if (shmem_mapping(mapping))
- nr_shmem_dropped += nr_pages;
- else if (folio_test_clear_dirty(new_folio))
- folio_account_cleaned(
- new_folio, inode_to_wb(mapping->host));
- __filemap_remove_folio(new_folio, NULL);
- folio_put_refs(new_folio, nr_pages);
- }
-
- zone_device_private_split_cb(folio, NULL);
- /*
- * Unfreeze @folio only after all page cache entries, which
- * used to point to it, have been updated with new folios.
- * Otherwise, a parallel folio_try_get() can grab @folio
- * and its caller can see stale page cache entries.
- */
- expected_refs = folio_expected_ref_count(folio) + 1;
- folio_ref_unfreeze(folio, expected_refs);
-
- unlock_page_lruvec(lruvec);
-
- if (ci)
- swap_cluster_unlock(ci);
- } else {
- split_queue_unlock(ds_queue);
- ret = -EAGAIN;
- }
+ ret = __folio_split_unmapped(folio, new_order, split_at, &xas, mapping,
+ true, list, split_type, extra_pins);
fail:
if (mapping)
xas_unlock(&xas);
local_irq_enable();
- if (unmapped)
- return ret;
-
if (nr_shmem_dropped)
shmem_uncharge(mapping->host, nr_shmem_dropped);
@@ -4079,6 +4092,39 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
return ret;
}
+/*
+ * This function is a helper for splitting folios that have already been unmapped.
+ * The use case is that the device or the CPU can refuse to migrate THP pages in
+ * the middle of migration, due to allocation issues on either side
+ *
+ * The high level code is copied from __folio_split, since the pages are anonymous
+ * and are already isolated from the LRU, the code has been simplified to not
+ * burden __folio_split with unmapped sprinkled into the code.
+ *
+ * None of the split folios are unlocked
+ */
+int split_unmapped_folio(struct folio *folio, unsigned int new_order)
+{
+ int extra_pins, ret = 0;
+
+ VM_WARN_ON_FOLIO(folio_mapped(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
+ VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio);
+
+ if (!can_split_folio(folio, 1, &extra_pins)) {
+ ret = -EAGAIN;
+ return ret;
+ }
+
+
+ local_irq_disable();
+ ret = __folio_split_unmapped(folio, new_order, &folio->page, NULL,
+ NULL, false, NULL, SPLIT_TYPE_UNIFORM,
+ extra_pins);
+ local_irq_enable();
+ return ret;
+}
+
/*
* This function splits a large folio into smaller folios of order @new_order.
* @page can point to any page of the large folio to split. The split operation
@@ -4127,12 +4173,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
* with the folio. Splitting to order 0 is compatible with all folios.
*/
int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
- unsigned int new_order, bool unmapped)
+ unsigned int new_order)
{
struct folio *folio = page_folio(page);
return __folio_split(folio, new_order, &folio->page, page, list,
- SPLIT_TYPE_UNIFORM, unmapped);
+ SPLIT_TYPE_UNIFORM);
}
/**
@@ -4163,7 +4209,7 @@ int folio_split(struct folio *folio, unsigned int new_order,
struct page *split_at, struct list_head *list)
{
return __folio_split(folio, new_order, split_at, &folio->page, list,
- SPLIT_TYPE_NON_UNIFORM, false);
+ SPLIT_TYPE_NON_UNIFORM);
}
int min_order_for_split(struct folio *folio)
diff --git a/mm/migrate_device.c b/mm/migrate_device.c
index c50abbd32f21..23b7bd56177c 100644
--- a/mm/migrate_device.c
+++ b/mm/migrate_device.c
@@ -918,8 +918,7 @@ static int migrate_vma_split_unmapped_folio(struct migrate_vma *migrate,
folio_get(folio);
split_huge_pmd_address(migrate->vma, addr, true);
- ret = __split_huge_page_to_list_to_order(folio_page(folio, 0), NULL,
- 0, true);
+ ret = split_unmapped_folio(folio, 0);
if (ret)
return ret;
migrate->src[idx] &= ~MIGRATE_PFN_COMPOUND;
--
2.51.1
On 11/13/25 10:49, Balbir Singh wrote:
> On 11/12/25 22:34, David Hildenbrand (Red Hat) wrote:
>> On 12.11.25 11:17, Balbir Singh wrote:
>>> On 11/12/25 21:00, David Hildenbrand (Red Hat) wrote:
>>>> On 12.11.25 05:46, Balbir Singh wrote:
>>>>> Unmapped was added as a parameter to __folio_split() and related
>>>>> call sites to support splitting of folios already in the midst
>>>>> of a migration. This special case arose for device private folio
>>>>> migration since during migration there could be a disconnect between
>>>>> source and destination on the folio size.
>>>>>
>>>>> Introduce split_unmapped_folio_to_order() to handle this special case.
>>>>> This in turn removes the special casing introduced by the unmapped
>>>>> parameter in __folio_split().
>>>>
>>>> As raised recently, I would hope that we can find a way to make all these splitting functions look more similar in the long term, ideally starting with "folio_split" / "folio_try_split".
>>>>
>>>> What about
>>>>
>>>> folio_split_unmapped()
>>>>
>>>> Do we really have to spell out the "to order" part in the function name?
>>>>
>>>> And if it's more a mostly-internal helper, maybe
>>>>
>>>> __folio_split_unmapped()
>>>>
>>>> subject: "mm/huge_memory: introduce ..."
>>>>
>>>
>>> I can rename it, but currently it confirms to the split_folio with order in the name
>>> The order is there in the name because in the future with mTHP we will want to
>>> support splitting to various orders.
>>
>> I think we should start naming them more consistently regarding folio_split() immediately and cleanup the other ones later.
>>
>> I don't understand why "_to_order" must be in the name right now. You can add another variant and start using longer names when really required.
>>
>
> Ack
>
>>>
>>>
>>>>>
>>>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>>>> Cc: David Hildenbrand <david@redhat.com>
>>>>> Cc: Zi Yan <ziy@nvidia.com>
>>>>> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
>>>>> Cc: Rakie Kim <rakie.kim@sk.com>
>>>>> Cc: Byungchul Park <byungchul@sk.com>
>>>>> Cc: Gregory Price <gourry@gourry.net>
>>>>> Cc: Ying Huang <ying.huang@linux.alibaba.com>
>>>>> Cc: Alistair Popple <apopple@nvidia.com>
>>>>> Cc: Oscar Salvador <osalvador@suse.de>
>>>>> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>>>> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
>>>>> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
>>>>> Cc: Nico Pache <npache@redhat.com>
>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>> Cc: Dev Jain <dev.jain@arm.com>
>>>>> Cc: Barry Song <baohua@kernel.org>
>>>>> Cc: Lyude Paul <lyude@redhat.com>
>>>>> Cc: Danilo Krummrich <dakr@kernel.org>
>>>>> Cc: David Airlie <airlied@gmail.com>
>>>>> Cc: Simona Vetter <simona@ffwll.ch>
>>>>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>>>>> Cc: Mika Penttilä <mpenttil@redhat.com>
>>>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>>>> Cc: Francois Dugast <francois.dugast@intel.com>
>>>>>
>>>>> Suggested-by: Zi Yan <ziy@nvidia.com>
>>>>> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
>>>>> ---
>>>>> include/linux/huge_mm.h | 5 +-
>>>>> mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------
>>>>> mm/migrate_device.c | 3 +-
>>>>> 3 files changed, 120 insertions(+), 23 deletions(-)
>>>>>
>>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>>> index e2e91aa1a042..9155e683c08a 100644
>>>>> --- a/include/linux/huge_mm.h
>>>>> +++ b/include/linux/huge_mm.h
>>>>> @@ -371,7 +371,8 @@ enum split_type {
>>>>> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
>>>>> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>>> - unsigned int new_order, bool unmapped);
>>>>> + unsigned int new_order);
>>>>> +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order);
>>>>> int min_order_for_split(struct folio *folio);
>>>>> int split_folio_to_list(struct folio *folio, struct list_head *list);
>>>>> bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
>>>>> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>>> unsigned int new_order)
>>>>> {
>>>>> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
>>>>> + return __split_huge_page_to_list_to_order(page, list, new_order);
>>>>> }
>>>>> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
>>>>> {
>>>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>>>> index 0184cd915f44..942bd8410c54 100644
>>>>> --- a/mm/huge_memory.c
>>>>> +++ b/mm/huge_memory.c
>>>>> @@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>> * @lock_at: a page within @folio to be left locked to caller
>>>>> * @list: after-split folios will be put on it if non NULL
>>>>> * @split_type: perform uniform split or not (non-uniform split)
>>>>> - * @unmapped: The pages are already unmapped, they are migration entries.
>>>>> *
>>>>> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
>>>>> * It is in charge of checking whether the split is supported or not and
>>>>> @@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>> */
>>>>> static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>> struct page *split_at, struct page *lock_at,
>>>>> - struct list_head *list, enum split_type split_type, bool unmapped)
>>>>> + struct list_head *list, enum split_type split_type)
>>>>
>>>> Yeah, nice to see that go.
>>>>
>>>>> {
>>>>> struct deferred_split *ds_queue;
>>>>> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
>>>>> @@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>> * is taken to serialise against parallel split or collapse
>>>>> * operations.
>>>>> */
>>>>> - if (!unmapped) {
>>>>> - anon_vma = folio_get_anon_vma(folio);
>>>>> - if (!anon_vma) {
>>>>> - ret = -EBUSY;
>>>>> - goto out;
>>>>> - }
>>>>> - anon_vma_lock_write(anon_vma);
>>>>> + anon_vma = folio_get_anon_vma(folio);
>>>>> + if (!anon_vma) {
>>>>> + ret = -EBUSY;
>>>>> + goto out;
>>>>> }
>>>>> + anon_vma_lock_write(anon_vma);
>>>>> mapping = NULL;
>>>>> } else {
>>>>> unsigned int min_order;
>>>>> @@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>> goto out_unlock;
>>>>> }
>>>>> - if (!unmapped)
>>>>> - unmap_folio(folio);
>>>>> + unmap_folio(folio);
>>>>>
>>>>
>>>> Hm, I would have hoped that we could factor out the core logic and reuse it for the new helper, instead of duplicating code.
>>>>
>>>> Did you look into that?
>>>>
>>>>
>>>
>>> I did, I ended up with larger spaghetti, I was hoping to look it as a follow up
>>> after the series with the mTHP changes and support (that is to be designed and
>>> prototyped).
>>
>> Looking at it in more detail, the code duplication is not desired.
>>
>> We have to find a way to factor the existing code out and reuse it from any new function.
>>
>
> I came up with a helper, but that ends up with another boolean do_lru.
>
>
Zi, David, any opinions on the approach below?
> ---
> include/linux/huge_mm.h | 5 +-
> mm/huge_memory.c | 336 +++++++++++++++++++++++-----------------
> mm/migrate_device.c | 3 +-
> 3 files changed, 195 insertions(+), 149 deletions(-)
>
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index e2e91aa1a042..44c09755bada 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -371,7 +371,8 @@ enum split_type {
>
> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> - unsigned int new_order, bool unmapped);
> + unsigned int new_order);
> +int split_unmapped_folio(struct folio *folio, unsigned int new_order);
> int min_order_for_split(struct folio *folio);
> int split_folio_to_list(struct folio *folio, struct list_head *list);
> bool folio_split_supported(struct folio *folio, unsigned int new_order,
> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> unsigned int new_order)
> {
> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
> + return __split_huge_page_to_list_to_order(page, list, new_order);
> }
> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
> {
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 0184cd915f44..534befe1b7aa 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -3739,6 +3739,152 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
> return true;
> }
>
> +static int __folio_split_unmapped(struct folio *folio, unsigned int new_order,
> + struct page *split_at, struct xa_state *xas,
> + struct address_space *mapping, bool do_lru,
> + struct list_head *list, enum split_type split_type,
> + int extra_pins)
> +{
> + struct folio *end_folio = folio_next(folio);
> + struct folio *new_folio, *next;
> + int old_order = folio_order(folio);
> + int nr_shmem_dropped = 0;
> + int ret = 0;
> + pgoff_t end = 0;
> + struct deferred_split *ds_queue;
> +
> + /* Prevent deferred_split_scan() touching ->_refcount */
> + ds_queue = folio_split_queue_lock(folio);
> + if (folio_ref_freeze(folio, 1 + extra_pins)) {
> + struct swap_cluster_info *ci = NULL;
> + struct lruvec *lruvec;
> + int expected_refs;
> +
> + if (old_order > 1) {
> + if (!list_empty(&folio->_deferred_list)) {
> + ds_queue->split_queue_len--;
> + /*
> + * Reinitialize page_deferred_list after removing the
> + * page from the split_queue, otherwise a subsequent
> + * split will see list corruption when checking the
> + * page_deferred_list.
> + */
> + list_del_init(&folio->_deferred_list);
> + }
> + if (folio_test_partially_mapped(folio)) {
> + folio_clear_partially_mapped(folio);
> + mod_mthp_stat(old_order,
> + MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
> + }
> + }
> + split_queue_unlock(ds_queue);
> + if (mapping) {
> + int nr = folio_nr_pages(folio);
> +
> + if (folio_test_pmd_mappable(folio) &&
> + new_order < HPAGE_PMD_ORDER) {
> + if (folio_test_swapbacked(folio)) {
> + __lruvec_stat_mod_folio(folio,
> + NR_SHMEM_THPS, -nr);
> + } else {
> + __lruvec_stat_mod_folio(folio,
> + NR_FILE_THPS, -nr);
> + filemap_nr_thps_dec(mapping);
> + }
> + }
> + }
> +
> + if (folio_test_swapcache(folio)) {
> + if (mapping) {
> + VM_WARN_ON_ONCE_FOLIO(mapping, folio);
> + return -EINVAL;
> + }
> +
> + ci = swap_cluster_get_and_lock(folio);
> + }
> +
> + /* lock lru list/PageCompound, ref frozen by page_ref_freeze */
> + if (do_lru)
> + lruvec = folio_lruvec_lock(folio);
> +
> + ret = __split_unmapped_folio(folio, new_order, split_at, xas,
> + mapping, split_type);
> +
> + /*
> + * Unfreeze after-split folios and put them back to the right
> + * list. @folio should be kept frozon until page cache
> + * entries are updated with all the other after-split folios
> + * to prevent others seeing stale page cache entries.
> + * As a result, new_folio starts from the next folio of
> + * @folio.
> + */
> + for (new_folio = folio_next(folio); new_folio != end_folio;
> + new_folio = next) {
> + unsigned long nr_pages = folio_nr_pages(new_folio);
> +
> + next = folio_next(new_folio);
> +
> + zone_device_private_split_cb(folio, new_folio);
> +
> + expected_refs = folio_expected_ref_count(new_folio) + 1;
> + folio_ref_unfreeze(new_folio, expected_refs);
> +
> + if (do_lru)
> + lru_add_split_folio(folio, new_folio, lruvec, list);
> +
> + /*
> + * Anonymous folio with swap cache.
> + * NOTE: shmem in swap cache is not supported yet.
> + */
> + if (ci) {
> + __swap_cache_replace_folio(ci, folio, new_folio);
> + continue;
> + }
> +
> + /* Anonymous folio without swap cache */
> + if (!mapping)
> + continue;
> +
> + /* Add the new folio to the page cache. */
> + if (new_folio->index < end) {
> + __xa_store(&mapping->i_pages, new_folio->index,
> + new_folio, 0);
> + continue;
> + }
> +
> + /* Drop folio beyond EOF: ->index >= end */
> + if (shmem_mapping(mapping))
> + nr_shmem_dropped += nr_pages;
> + else if (folio_test_clear_dirty(new_folio))
> + folio_account_cleaned(
> + new_folio, inode_to_wb(mapping->host));
> + __filemap_remove_folio(new_folio, NULL);
> + folio_put_refs(new_folio, nr_pages);
> + }
> +
> + zone_device_private_split_cb(folio, NULL);
> + /*
> + * Unfreeze @folio only after all page cache entries, which
> + * used to point to it, have been updated with new folios.
> + * Otherwise, a parallel folio_try_get() can grab @folio
> + * and its caller can see stale page cache entries.
> + */
> + expected_refs = folio_expected_ref_count(folio) + 1;
> + folio_ref_unfreeze(folio, expected_refs);
> +
> + if (do_lru)
> + unlock_page_lruvec(lruvec);
> +
> + if (ci)
> + swap_cluster_unlock(ci);
> + } else {
> + split_queue_unlock(ds_queue);
> + return -EAGAIN;
> + }
> +
> + return 0;
> +}
> +
> /**
> * __folio_split() - split a folio at @split_at to a @new_order folio
> * @folio: folio to split
> @@ -3747,7 +3893,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
> * @lock_at: a page within @folio to be left locked to caller
> * @list: after-split folios will be put on it if non NULL
> * @split_type: perform uniform split or not (non-uniform split)
> - * @unmapped: The pages are already unmapped, they are migration entries.
> *
> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
> * It is in charge of checking whether the split is supported or not and
> @@ -3763,9 +3908,8 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
> */
> static int __folio_split(struct folio *folio, unsigned int new_order,
> struct page *split_at, struct page *lock_at,
> - struct list_head *list, enum split_type split_type, bool unmapped)
> + struct list_head *list, enum split_type split_type)
> {
> - struct deferred_split *ds_queue;
> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
> struct folio *end_folio = folio_next(folio);
> bool is_anon = folio_test_anon(folio);
> @@ -3809,14 +3953,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> * is taken to serialise against parallel split or collapse
> * operations.
> */
> - if (!unmapped) {
> - anon_vma = folio_get_anon_vma(folio);
> - if (!anon_vma) {
> - ret = -EBUSY;
> - goto out;
> - }
> - anon_vma_lock_write(anon_vma);
> + anon_vma = folio_get_anon_vma(folio);
> + if (!anon_vma) {
> + ret = -EBUSY;
> + goto out;
> }
> + anon_vma_lock_write(anon_vma);
> mapping = NULL;
> } else {
> unsigned int min_order;
> @@ -3882,8 +4024,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> goto out_unlock;
> }
>
> - if (!unmapped)
> - unmap_folio(folio);
> + unmap_folio(folio);
>
> /* block interrupt reentry in xa_lock and spinlock */
> local_irq_disable();
> @@ -3900,142 +4041,14 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> }
> }
>
> - /* Prevent deferred_split_scan() touching ->_refcount */
> - ds_queue = folio_split_queue_lock(folio);
> - if (folio_ref_freeze(folio, 1 + extra_pins)) {
> - struct swap_cluster_info *ci = NULL;
> - struct lruvec *lruvec;
> - int expected_refs;
> -
> - if (old_order > 1) {
> - if (!list_empty(&folio->_deferred_list)) {
> - ds_queue->split_queue_len--;
> - /*
> - * Reinitialize page_deferred_list after removing the
> - * page from the split_queue, otherwise a subsequent
> - * split will see list corruption when checking the
> - * page_deferred_list.
> - */
> - list_del_init(&folio->_deferred_list);
> - }
> - if (folio_test_partially_mapped(folio)) {
> - folio_clear_partially_mapped(folio);
> - mod_mthp_stat(old_order,
> - MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
> - }
> - }
> - split_queue_unlock(ds_queue);
> - if (mapping) {
> - int nr = folio_nr_pages(folio);
> -
> - if (folio_test_pmd_mappable(folio) &&
> - new_order < HPAGE_PMD_ORDER) {
> - if (folio_test_swapbacked(folio)) {
> - __lruvec_stat_mod_folio(folio,
> - NR_SHMEM_THPS, -nr);
> - } else {
> - __lruvec_stat_mod_folio(folio,
> - NR_FILE_THPS, -nr);
> - filemap_nr_thps_dec(mapping);
> - }
> - }
> - }
> -
> - if (folio_test_swapcache(folio)) {
> - if (mapping) {
> - VM_WARN_ON_ONCE_FOLIO(mapping, folio);
> - ret = -EINVAL;
> - goto fail;
> - }
> -
> - ci = swap_cluster_get_and_lock(folio);
> - }
> -
> - /* lock lru list/PageCompound, ref frozen by page_ref_freeze */
> - lruvec = folio_lruvec_lock(folio);
> -
> - ret = __split_unmapped_folio(folio, new_order, split_at, &xas,
> - mapping, split_type);
> -
> - /*
> - * Unfreeze after-split folios and put them back to the right
> - * list. @folio should be kept frozon until page cache
> - * entries are updated with all the other after-split folios
> - * to prevent others seeing stale page cache entries.
> - * As a result, new_folio starts from the next folio of
> - * @folio.
> - */
> - for (new_folio = folio_next(folio); new_folio != end_folio;
> - new_folio = next) {
> - unsigned long nr_pages = folio_nr_pages(new_folio);
> -
> - next = folio_next(new_folio);
> -
> - zone_device_private_split_cb(folio, new_folio);
> -
> - expected_refs = folio_expected_ref_count(new_folio) + 1;
> - folio_ref_unfreeze(new_folio, expected_refs);
> -
> - if (!unmapped)
> - lru_add_split_folio(folio, new_folio, lruvec, list);
> -
> - /*
> - * Anonymous folio with swap cache.
> - * NOTE: shmem in swap cache is not supported yet.
> - */
> - if (ci) {
> - __swap_cache_replace_folio(ci, folio, new_folio);
> - continue;
> - }
> -
> - /* Anonymous folio without swap cache */
> - if (!mapping)
> - continue;
> -
> - /* Add the new folio to the page cache. */
> - if (new_folio->index < end) {
> - __xa_store(&mapping->i_pages, new_folio->index,
> - new_folio, 0);
> - continue;
> - }
> -
> - /* Drop folio beyond EOF: ->index >= end */
> - if (shmem_mapping(mapping))
> - nr_shmem_dropped += nr_pages;
> - else if (folio_test_clear_dirty(new_folio))
> - folio_account_cleaned(
> - new_folio, inode_to_wb(mapping->host));
> - __filemap_remove_folio(new_folio, NULL);
> - folio_put_refs(new_folio, nr_pages);
> - }
> -
> - zone_device_private_split_cb(folio, NULL);
> - /*
> - * Unfreeze @folio only after all page cache entries, which
> - * used to point to it, have been updated with new folios.
> - * Otherwise, a parallel folio_try_get() can grab @folio
> - * and its caller can see stale page cache entries.
> - */
> - expected_refs = folio_expected_ref_count(folio) + 1;
> - folio_ref_unfreeze(folio, expected_refs);
> -
> - unlock_page_lruvec(lruvec);
> -
> - if (ci)
> - swap_cluster_unlock(ci);
> - } else {
> - split_queue_unlock(ds_queue);
> - ret = -EAGAIN;
> - }
> + ret = __folio_split_unmapped(folio, new_order, split_at, &xas, mapping,
> + true, list, split_type, extra_pins);
> fail:
> if (mapping)
> xas_unlock(&xas);
>
> local_irq_enable();
>
> - if (unmapped)
> - return ret;
> -
> if (nr_shmem_dropped)
> shmem_uncharge(mapping->host, nr_shmem_dropped);
>
> @@ -4079,6 +4092,39 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> return ret;
> }
>
> +/*
> + * This function is a helper for splitting folios that have already been unmapped.
> + * The use case is that the device or the CPU can refuse to migrate THP pages in
> + * the middle of migration, due to allocation issues on either side
> + *
> + * The high level code is copied from __folio_split, since the pages are anonymous
> + * and are already isolated from the LRU, the code has been simplified to not
> + * burden __folio_split with unmapped sprinkled into the code.
> + *
> + * None of the split folios are unlocked
> + */
> +int split_unmapped_folio(struct folio *folio, unsigned int new_order)
> +{
> + int extra_pins, ret = 0;
> +
> + VM_WARN_ON_FOLIO(folio_mapped(folio), folio);
> + VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
> + VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio);
> +
> + if (!can_split_folio(folio, 1, &extra_pins)) {
> + ret = -EAGAIN;
> + return ret;
> + }
> +
> +
> + local_irq_disable();
> + ret = __folio_split_unmapped(folio, new_order, &folio->page, NULL,
> + NULL, false, NULL, SPLIT_TYPE_UNIFORM,
> + extra_pins);
> + local_irq_enable();
> + return ret;
> +}
> +
> /*
> * This function splits a large folio into smaller folios of order @new_order.
> * @page can point to any page of the large folio to split. The split operation
> @@ -4127,12 +4173,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
> * with the folio. Splitting to order 0 is compatible with all folios.
> */
> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
> - unsigned int new_order, bool unmapped)
> + unsigned int new_order)
> {
> struct folio *folio = page_folio(page);
>
> return __folio_split(folio, new_order, &folio->page, page, list,
> - SPLIT_TYPE_UNIFORM, unmapped);
> + SPLIT_TYPE_UNIFORM);
> }
>
> /**
> @@ -4163,7 +4209,7 @@ int folio_split(struct folio *folio, unsigned int new_order,
> struct page *split_at, struct list_head *list)
> {
> return __folio_split(folio, new_order, split_at, &folio->page, list,
> - SPLIT_TYPE_NON_UNIFORM, false);
> + SPLIT_TYPE_NON_UNIFORM);
> }
>
> int min_order_for_split(struct folio *folio)
> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
> index c50abbd32f21..23b7bd56177c 100644
> --- a/mm/migrate_device.c
> +++ b/mm/migrate_device.c
> @@ -918,8 +918,7 @@ static int migrate_vma_split_unmapped_folio(struct migrate_vma *migrate,
>
> folio_get(folio);
> split_huge_pmd_address(migrate->vma, addr, true);
> - ret = __split_huge_page_to_list_to_order(folio_page(folio, 0), NULL,
> - 0, true);
> + ret = split_unmapped_folio(folio, 0);
> if (ret)
> return ret;
> migrate->src[idx] &= ~MIGRATE_PFN_COMPOUND;
Thanks,
Balbir
On 13 Nov 2025, at 16:39, Balbir Singh wrote:
> On 11/13/25 10:49, Balbir Singh wrote:
>> On 11/12/25 22:34, David Hildenbrand (Red Hat) wrote:
>>> On 12.11.25 11:17, Balbir Singh wrote:
>>>> On 11/12/25 21:00, David Hildenbrand (Red Hat) wrote:
>>>>> On 12.11.25 05:46, Balbir Singh wrote:
>>>>>> Unmapped was added as a parameter to __folio_split() and related
>>>>>> call sites to support splitting of folios already in the midst
>>>>>> of a migration. This special case arose for device private folio
>>>>>> migration since during migration there could be a disconnect between
>>>>>> source and destination on the folio size.
>>>>>>
>>>>>> Introduce split_unmapped_folio_to_order() to handle this special case.
>>>>>> This in turn removes the special casing introduced by the unmapped
>>>>>> parameter in __folio_split().
>>>>>
>>>>> As raised recently, I would hope that we can find a way to make all these splitting functions look more similar in the long term, ideally starting with "folio_split" / "folio_try_split".
>>>>>
>>>>> What about
>>>>>
>>>>> folio_split_unmapped()
>>>>>
>>>>> Do we really have to spell out the "to order" part in the function name?
>>>>>
>>>>> And if it's more a mostly-internal helper, maybe
>>>>>
>>>>> __folio_split_unmapped()
>>>>>
>>>>> subject: "mm/huge_memory: introduce ..."
>>>>>
>>>>
>>>> I can rename it, but currently it confirms to the split_folio with order in the name
>>>> The order is there in the name because in the future with mTHP we will want to
>>>> support splitting to various orders.
>>>
>>> I think we should start naming them more consistently regarding folio_split() immediately and cleanup the other ones later.
>>>
>>> I don't understand why "_to_order" must be in the name right now. You can add another variant and start using longer names when really required.
>>>
>>
>> Ack
>>
>>>>
>>>>
>>>>>>
>>>>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>>>>> Cc: David Hildenbrand <david@redhat.com>
>>>>>> Cc: Zi Yan <ziy@nvidia.com>
>>>>>> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
>>>>>> Cc: Rakie Kim <rakie.kim@sk.com>
>>>>>> Cc: Byungchul Park <byungchul@sk.com>
>>>>>> Cc: Gregory Price <gourry@gourry.net>
>>>>>> Cc: Ying Huang <ying.huang@linux.alibaba.com>
>>>>>> Cc: Alistair Popple <apopple@nvidia.com>
>>>>>> Cc: Oscar Salvador <osalvador@suse.de>
>>>>>> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>>>>> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
>>>>>> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
>>>>>> Cc: Nico Pache <npache@redhat.com>
>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>>> Cc: Dev Jain <dev.jain@arm.com>
>>>>>> Cc: Barry Song <baohua@kernel.org>
>>>>>> Cc: Lyude Paul <lyude@redhat.com>
>>>>>> Cc: Danilo Krummrich <dakr@kernel.org>
>>>>>> Cc: David Airlie <airlied@gmail.com>
>>>>>> Cc: Simona Vetter <simona@ffwll.ch>
>>>>>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>>>>>> Cc: Mika Penttilä <mpenttil@redhat.com>
>>>>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>>>>> Cc: Francois Dugast <francois.dugast@intel.com>
>>>>>>
>>>>>> Suggested-by: Zi Yan <ziy@nvidia.com>
>>>>>> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
>>>>>> ---
>>>>>> include/linux/huge_mm.h | 5 +-
>>>>>> mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------
>>>>>> mm/migrate_device.c | 3 +-
>>>>>> 3 files changed, 120 insertions(+), 23 deletions(-)
>>>>>>
>>>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>>>> index e2e91aa1a042..9155e683c08a 100644
>>>>>> --- a/include/linux/huge_mm.h
>>>>>> +++ b/include/linux/huge_mm.h
>>>>>> @@ -371,7 +371,8 @@ enum split_type {
>>>>>> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
>>>>>> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>>>> - unsigned int new_order, bool unmapped);
>>>>>> + unsigned int new_order);
>>>>>> +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order);
>>>>>> int min_order_for_split(struct folio *folio);
>>>>>> int split_folio_to_list(struct folio *folio, struct list_head *list);
>>>>>> bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
>>>>>> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>>>> unsigned int new_order)
>>>>>> {
>>>>>> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
>>>>>> + return __split_huge_page_to_list_to_order(page, list, new_order);
>>>>>> }
>>>>>> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
>>>>>> {
>>>>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>>>>> index 0184cd915f44..942bd8410c54 100644
>>>>>> --- a/mm/huge_memory.c
>>>>>> +++ b/mm/huge_memory.c
>>>>>> @@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>> * @lock_at: a page within @folio to be left locked to caller
>>>>>> * @list: after-split folios will be put on it if non NULL
>>>>>> * @split_type: perform uniform split or not (non-uniform split)
>>>>>> - * @unmapped: The pages are already unmapped, they are migration entries.
>>>>>> *
>>>>>> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
>>>>>> * It is in charge of checking whether the split is supported or not and
>>>>>> @@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>> */
>>>>>> static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>> struct page *split_at, struct page *lock_at,
>>>>>> - struct list_head *list, enum split_type split_type, bool unmapped)
>>>>>> + struct list_head *list, enum split_type split_type)
>>>>>
>>>>> Yeah, nice to see that go.
>>>>>
>>>>>> {
>>>>>> struct deferred_split *ds_queue;
>>>>>> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
>>>>>> @@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>> * is taken to serialise against parallel split or collapse
>>>>>> * operations.
>>>>>> */
>>>>>> - if (!unmapped) {
>>>>>> - anon_vma = folio_get_anon_vma(folio);
>>>>>> - if (!anon_vma) {
>>>>>> - ret = -EBUSY;
>>>>>> - goto out;
>>>>>> - }
>>>>>> - anon_vma_lock_write(anon_vma);
>>>>>> + anon_vma = folio_get_anon_vma(folio);
>>>>>> + if (!anon_vma) {
>>>>>> + ret = -EBUSY;
>>>>>> + goto out;
>>>>>> }
>>>>>> + anon_vma_lock_write(anon_vma);
>>>>>> mapping = NULL;
>>>>>> } else {
>>>>>> unsigned int min_order;
>>>>>> @@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>> goto out_unlock;
>>>>>> }
>>>>>> - if (!unmapped)
>>>>>> - unmap_folio(folio);
>>>>>> + unmap_folio(folio);
>>>>>>
>>>>>
>>>>> Hm, I would have hoped that we could factor out the core logic and reuse it for the new helper, instead of duplicating code.
>>>>>
>>>>> Did you look into that?
>>>>>
>>>>>
>>>>
>>>> I did, I ended up with larger spaghetti, I was hoping to look it as a follow up
>>>> after the series with the mTHP changes and support (that is to be designed and
>>>> prototyped).
>>>
>>> Looking at it in more detail, the code duplication is not desired.
>>>
>>> We have to find a way to factor the existing code out and reuse it from any new function.
>>>
>>
>> I came up with a helper, but that ends up with another boolean do_lru.
>>
>>
>
> Zi, David, any opinions on the approach below?
Looks good to me. We might want a better name instead of
__folio_split_unmapped(). Or __split_unmapped_folio() should
be renamed, since these two function names are too similar.
Maybe __folio_split_unmapped() -> __freeze_and_split_unmapped_folio().
Feel free to come up with a better name. :)
>
>> ---
>> include/linux/huge_mm.h | 5 +-
>> mm/huge_memory.c | 336 +++++++++++++++++++++++-----------------
>> mm/migrate_device.c | 3 +-
>> 3 files changed, 195 insertions(+), 149 deletions(-)
>>
>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>> index e2e91aa1a042..44c09755bada 100644
>> --- a/include/linux/huge_mm.h
>> +++ b/include/linux/huge_mm.h
>> @@ -371,7 +371,8 @@ enum split_type {
>>
>> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
>> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>> - unsigned int new_order, bool unmapped);
>> + unsigned int new_order);
>> +int split_unmapped_folio(struct folio *folio, unsigned int new_order);
>> int min_order_for_split(struct folio *folio);
>> int split_folio_to_list(struct folio *folio, struct list_head *list);
>> bool folio_split_supported(struct folio *folio, unsigned int new_order,
>> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
>> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>> unsigned int new_order)
>> {
>> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
>> + return __split_huge_page_to_list_to_order(page, list, new_order);
>> }
>> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
>> {
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 0184cd915f44..534befe1b7aa 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -3739,6 +3739,152 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>> return true;
>> }
>>
>> +static int __folio_split_unmapped(struct folio *folio, unsigned int new_order,
>> + struct page *split_at, struct xa_state *xas,
>> + struct address_space *mapping, bool do_lru,
>> + struct list_head *list, enum split_type split_type,
>> + int extra_pins)
>> +{
>> + struct folio *end_folio = folio_next(folio);
>> + struct folio *new_folio, *next;
>> + int old_order = folio_order(folio);
>> + int nr_shmem_dropped = 0;
>> + int ret = 0;
>> + pgoff_t end = 0;
>> + struct deferred_split *ds_queue;
>> +
>> + /* Prevent deferred_split_scan() touching ->_refcount */
>> + ds_queue = folio_split_queue_lock(folio);
>> + if (folio_ref_freeze(folio, 1 + extra_pins)) {
>> + struct swap_cluster_info *ci = NULL;
>> + struct lruvec *lruvec;
>> + int expected_refs;
>> +
>> + if (old_order > 1) {
>> + if (!list_empty(&folio->_deferred_list)) {
>> + ds_queue->split_queue_len--;
>> + /*
>> + * Reinitialize page_deferred_list after removing the
>> + * page from the split_queue, otherwise a subsequent
>> + * split will see list corruption when checking the
>> + * page_deferred_list.
>> + */
>> + list_del_init(&folio->_deferred_list);
>> + }
>> + if (folio_test_partially_mapped(folio)) {
>> + folio_clear_partially_mapped(folio);
>> + mod_mthp_stat(old_order,
>> + MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
>> + }
>> + }
>> + split_queue_unlock(ds_queue);
>> + if (mapping) {
>> + int nr = folio_nr_pages(folio);
>> +
>> + if (folio_test_pmd_mappable(folio) &&
>> + new_order < HPAGE_PMD_ORDER) {
>> + if (folio_test_swapbacked(folio)) {
>> + __lruvec_stat_mod_folio(folio,
>> + NR_SHMEM_THPS, -nr);
>> + } else {
>> + __lruvec_stat_mod_folio(folio,
>> + NR_FILE_THPS, -nr);
>> + filemap_nr_thps_dec(mapping);
>> + }
>> + }
>> + }
>> +
>> + if (folio_test_swapcache(folio)) {
>> + if (mapping) {
>> + VM_WARN_ON_ONCE_FOLIO(mapping, folio);
>> + return -EINVAL;
>> + }
>> +
>> + ci = swap_cluster_get_and_lock(folio);
>> + }
>> +
>> + /* lock lru list/PageCompound, ref frozen by page_ref_freeze */
>> + if (do_lru)
>> + lruvec = folio_lruvec_lock(folio);
>> +
>> + ret = __split_unmapped_folio(folio, new_order, split_at, xas,
>> + mapping, split_type);
>> +
>> + /*
>> + * Unfreeze after-split folios and put them back to the right
>> + * list. @folio should be kept frozon until page cache
>> + * entries are updated with all the other after-split folios
>> + * to prevent others seeing stale page cache entries.
>> + * As a result, new_folio starts from the next folio of
>> + * @folio.
>> + */
>> + for (new_folio = folio_next(folio); new_folio != end_folio;
>> + new_folio = next) {
>> + unsigned long nr_pages = folio_nr_pages(new_folio);
>> +
>> + next = folio_next(new_folio);
>> +
>> + zone_device_private_split_cb(folio, new_folio);
>> +
>> + expected_refs = folio_expected_ref_count(new_folio) + 1;
>> + folio_ref_unfreeze(new_folio, expected_refs);
>> +
>> + if (do_lru)
>> + lru_add_split_folio(folio, new_folio, lruvec, list);
>> +
>> + /*
>> + * Anonymous folio with swap cache.
>> + * NOTE: shmem in swap cache is not supported yet.
>> + */
>> + if (ci) {
>> + __swap_cache_replace_folio(ci, folio, new_folio);
>> + continue;
>> + }
>> +
>> + /* Anonymous folio without swap cache */
>> + if (!mapping)
>> + continue;
>> +
>> + /* Add the new folio to the page cache. */
>> + if (new_folio->index < end) {
>> + __xa_store(&mapping->i_pages, new_folio->index,
>> + new_folio, 0);
>> + continue;
>> + }
>> +
>> + /* Drop folio beyond EOF: ->index >= end */
>> + if (shmem_mapping(mapping))
>> + nr_shmem_dropped += nr_pages;
>> + else if (folio_test_clear_dirty(new_folio))
>> + folio_account_cleaned(
>> + new_folio, inode_to_wb(mapping->host));
>> + __filemap_remove_folio(new_folio, NULL);
>> + folio_put_refs(new_folio, nr_pages);
>> + }
>> +
>> + zone_device_private_split_cb(folio, NULL);
>> + /*
>> + * Unfreeze @folio only after all page cache entries, which
>> + * used to point to it, have been updated with new folios.
>> + * Otherwise, a parallel folio_try_get() can grab @folio
>> + * and its caller can see stale page cache entries.
>> + */
>> + expected_refs = folio_expected_ref_count(folio) + 1;
>> + folio_ref_unfreeze(folio, expected_refs);
>> +
>> + if (do_lru)
>> + unlock_page_lruvec(lruvec);
>> +
>> + if (ci)
>> + swap_cluster_unlock(ci);
>> + } else {
>> + split_queue_unlock(ds_queue);
>> + return -EAGAIN;
>> + }
>> +
>> + return 0;
>> +}
>> +
>> /**
>> * __folio_split() - split a folio at @split_at to a @new_order folio
>> * @folio: folio to split
>> @@ -3747,7 +3893,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>> * @lock_at: a page within @folio to be left locked to caller
>> * @list: after-split folios will be put on it if non NULL
>> * @split_type: perform uniform split or not (non-uniform split)
>> - * @unmapped: The pages are already unmapped, they are migration entries.
>> *
>> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
>> * It is in charge of checking whether the split is supported or not and
>> @@ -3763,9 +3908,8 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>> */
>> static int __folio_split(struct folio *folio, unsigned int new_order,
>> struct page *split_at, struct page *lock_at,
>> - struct list_head *list, enum split_type split_type, bool unmapped)
>> + struct list_head *list, enum split_type split_type)
>> {
>> - struct deferred_split *ds_queue;
>> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
>> struct folio *end_folio = folio_next(folio);
>> bool is_anon = folio_test_anon(folio);
>> @@ -3809,14 +3953,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>> * is taken to serialise against parallel split or collapse
>> * operations.
>> */
>> - if (!unmapped) {
>> - anon_vma = folio_get_anon_vma(folio);
>> - if (!anon_vma) {
>> - ret = -EBUSY;
>> - goto out;
>> - }
>> - anon_vma_lock_write(anon_vma);
>> + anon_vma = folio_get_anon_vma(folio);
>> + if (!anon_vma) {
>> + ret = -EBUSY;
>> + goto out;
>> }
>> + anon_vma_lock_write(anon_vma);
>> mapping = NULL;
>> } else {
>> unsigned int min_order;
>> @@ -3882,8 +4024,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>> goto out_unlock;
>> }
>>
>> - if (!unmapped)
>> - unmap_folio(folio);
>> + unmap_folio(folio);
>>
>> /* block interrupt reentry in xa_lock and spinlock */
>> local_irq_disable();
>> @@ -3900,142 +4041,14 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>> }
>> }
>>
>> - /* Prevent deferred_split_scan() touching ->_refcount */
>> - ds_queue = folio_split_queue_lock(folio);
>> - if (folio_ref_freeze(folio, 1 + extra_pins)) {
>> - struct swap_cluster_info *ci = NULL;
>> - struct lruvec *lruvec;
>> - int expected_refs;
>> -
>> - if (old_order > 1) {
>> - if (!list_empty(&folio->_deferred_list)) {
>> - ds_queue->split_queue_len--;
>> - /*
>> - * Reinitialize page_deferred_list after removing the
>> - * page from the split_queue, otherwise a subsequent
>> - * split will see list corruption when checking the
>> - * page_deferred_list.
>> - */
>> - list_del_init(&folio->_deferred_list);
>> - }
>> - if (folio_test_partially_mapped(folio)) {
>> - folio_clear_partially_mapped(folio);
>> - mod_mthp_stat(old_order,
>> - MTHP_STAT_NR_ANON_PARTIALLY_MAPPED, -1);
>> - }
>> - }
>> - split_queue_unlock(ds_queue);
>> - if (mapping) {
>> - int nr = folio_nr_pages(folio);
>> -
>> - if (folio_test_pmd_mappable(folio) &&
>> - new_order < HPAGE_PMD_ORDER) {
>> - if (folio_test_swapbacked(folio)) {
>> - __lruvec_stat_mod_folio(folio,
>> - NR_SHMEM_THPS, -nr);
>> - } else {
>> - __lruvec_stat_mod_folio(folio,
>> - NR_FILE_THPS, -nr);
>> - filemap_nr_thps_dec(mapping);
>> - }
>> - }
>> - }
>> -
>> - if (folio_test_swapcache(folio)) {
>> - if (mapping) {
>> - VM_WARN_ON_ONCE_FOLIO(mapping, folio);
>> - ret = -EINVAL;
>> - goto fail;
>> - }
>> -
>> - ci = swap_cluster_get_and_lock(folio);
>> - }
>> -
>> - /* lock lru list/PageCompound, ref frozen by page_ref_freeze */
>> - lruvec = folio_lruvec_lock(folio);
>> -
>> - ret = __split_unmapped_folio(folio, new_order, split_at, &xas,
>> - mapping, split_type);
>> -
>> - /*
>> - * Unfreeze after-split folios and put them back to the right
>> - * list. @folio should be kept frozon until page cache
>> - * entries are updated with all the other after-split folios
>> - * to prevent others seeing stale page cache entries.
>> - * As a result, new_folio starts from the next folio of
>> - * @folio.
>> - */
>> - for (new_folio = folio_next(folio); new_folio != end_folio;
>> - new_folio = next) {
>> - unsigned long nr_pages = folio_nr_pages(new_folio);
>> -
>> - next = folio_next(new_folio);
>> -
>> - zone_device_private_split_cb(folio, new_folio);
>> -
>> - expected_refs = folio_expected_ref_count(new_folio) + 1;
>> - folio_ref_unfreeze(new_folio, expected_refs);
>> -
>> - if (!unmapped)
>> - lru_add_split_folio(folio, new_folio, lruvec, list);
>> -
>> - /*
>> - * Anonymous folio with swap cache.
>> - * NOTE: shmem in swap cache is not supported yet.
>> - */
>> - if (ci) {
>> - __swap_cache_replace_folio(ci, folio, new_folio);
>> - continue;
>> - }
>> -
>> - /* Anonymous folio without swap cache */
>> - if (!mapping)
>> - continue;
>> -
>> - /* Add the new folio to the page cache. */
>> - if (new_folio->index < end) {
>> - __xa_store(&mapping->i_pages, new_folio->index,
>> - new_folio, 0);
>> - continue;
>> - }
>> -
>> - /* Drop folio beyond EOF: ->index >= end */
>> - if (shmem_mapping(mapping))
>> - nr_shmem_dropped += nr_pages;
>> - else if (folio_test_clear_dirty(new_folio))
>> - folio_account_cleaned(
>> - new_folio, inode_to_wb(mapping->host));
>> - __filemap_remove_folio(new_folio, NULL);
>> - folio_put_refs(new_folio, nr_pages);
>> - }
>> -
>> - zone_device_private_split_cb(folio, NULL);
>> - /*
>> - * Unfreeze @folio only after all page cache entries, which
>> - * used to point to it, have been updated with new folios.
>> - * Otherwise, a parallel folio_try_get() can grab @folio
>> - * and its caller can see stale page cache entries.
>> - */
>> - expected_refs = folio_expected_ref_count(folio) + 1;
>> - folio_ref_unfreeze(folio, expected_refs);
>> -
>> - unlock_page_lruvec(lruvec);
>> -
>> - if (ci)
>> - swap_cluster_unlock(ci);
>> - } else {
>> - split_queue_unlock(ds_queue);
>> - ret = -EAGAIN;
>> - }
>> + ret = __folio_split_unmapped(folio, new_order, split_at, &xas, mapping,
>> + true, list, split_type, extra_pins);
>> fail:
>> if (mapping)
>> xas_unlock(&xas);
>>
>> local_irq_enable();
>>
>> - if (unmapped)
>> - return ret;
>> -
>> if (nr_shmem_dropped)
>> shmem_uncharge(mapping->host, nr_shmem_dropped);
>>
>> @@ -4079,6 +4092,39 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>> return ret;
>> }
>>
>> +/*
>> + * This function is a helper for splitting folios that have already been unmapped.
>> + * The use case is that the device or the CPU can refuse to migrate THP pages in
>> + * the middle of migration, due to allocation issues on either side
>> + *
>> + * The high level code is copied from __folio_split, since the pages are anonymous
>> + * and are already isolated from the LRU, the code has been simplified to not
>> + * burden __folio_split with unmapped sprinkled into the code.
>> + *
>> + * None of the split folios are unlocked
>> + */
>> +int split_unmapped_folio(struct folio *folio, unsigned int new_order)
>> +{
>> + int extra_pins, ret = 0;
>> +
>> + VM_WARN_ON_FOLIO(folio_mapped(folio), folio);
>> + VM_WARN_ON_ONCE_FOLIO(!folio_test_locked(folio), folio);
>> + VM_WARN_ON_ONCE_FOLIO(!folio_test_large(folio), folio);
>> +
>> + if (!can_split_folio(folio, 1, &extra_pins)) {
>> + ret = -EAGAIN;
>> + return ret;
>> + }
>> +
>> +
>> + local_irq_disable();
>> + ret = __folio_split_unmapped(folio, new_order, &folio->page, NULL,
>> + NULL, false, NULL, SPLIT_TYPE_UNIFORM,
>> + extra_pins);
>> + local_irq_enable();
>> + return ret;
>> +}
>> +
>> /*
>> * This function splits a large folio into smaller folios of order @new_order.
>> * @page can point to any page of the large folio to split. The split operation
>> @@ -4127,12 +4173,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>> * with the folio. Splitting to order 0 is compatible with all folios.
>> */
>> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>> - unsigned int new_order, bool unmapped)
>> + unsigned int new_order)
>> {
>> struct folio *folio = page_folio(page);
>>
>> return __folio_split(folio, new_order, &folio->page, page, list,
>> - SPLIT_TYPE_UNIFORM, unmapped);
>> + SPLIT_TYPE_UNIFORM);
>> }
>>
>> /**
>> @@ -4163,7 +4209,7 @@ int folio_split(struct folio *folio, unsigned int new_order,
>> struct page *split_at, struct list_head *list)
>> {
>> return __folio_split(folio, new_order, split_at, &folio->page, list,
>> - SPLIT_TYPE_NON_UNIFORM, false);
>> + SPLIT_TYPE_NON_UNIFORM);
>> }
>>
>> int min_order_for_split(struct folio *folio)
>> diff --git a/mm/migrate_device.c b/mm/migrate_device.c
>> index c50abbd32f21..23b7bd56177c 100644
>> --- a/mm/migrate_device.c
>> +++ b/mm/migrate_device.c
>> @@ -918,8 +918,7 @@ static int migrate_vma_split_unmapped_folio(struct migrate_vma *migrate,
>>
>> folio_get(folio);
>> split_huge_pmd_address(migrate->vma, addr, true);
>> - ret = __split_huge_page_to_list_to_order(folio_page(folio, 0), NULL,
>> - 0, true);
>> + ret = split_unmapped_folio(folio, 0);
>> if (ret)
>> return ret;
>> migrate->src[idx] &= ~MIGRATE_PFN_COMPOUND;
>
>
> Thanks,
> Balbir
Best Regards,
Yan, Zi
On 11/14/25 08:45, Zi Yan wrote:
> On 13 Nov 2025, at 16:39, Balbir Singh wrote:
>
>> On 11/13/25 10:49, Balbir Singh wrote:
>>> On 11/12/25 22:34, David Hildenbrand (Red Hat) wrote:
>>>> On 12.11.25 11:17, Balbir Singh wrote:
>>>>> On 11/12/25 21:00, David Hildenbrand (Red Hat) wrote:
>>>>>> On 12.11.25 05:46, Balbir Singh wrote:
>>>>>>> Unmapped was added as a parameter to __folio_split() and related
>>>>>>> call sites to support splitting of folios already in the midst
>>>>>>> of a migration. This special case arose for device private folio
>>>>>>> migration since during migration there could be a disconnect between
>>>>>>> source and destination on the folio size.
>>>>>>>
>>>>>>> Introduce split_unmapped_folio_to_order() to handle this special case.
>>>>>>> This in turn removes the special casing introduced by the unmapped
>>>>>>> parameter in __folio_split().
>>>>>>
>>>>>> As raised recently, I would hope that we can find a way to make all these splitting functions look more similar in the long term, ideally starting with "folio_split" / "folio_try_split".
>>>>>>
>>>>>> What about
>>>>>>
>>>>>> folio_split_unmapped()
>>>>>>
>>>>>> Do we really have to spell out the "to order" part in the function name?
>>>>>>
>>>>>> And if it's more a mostly-internal helper, maybe
>>>>>>
>>>>>> __folio_split_unmapped()
>>>>>>
>>>>>> subject: "mm/huge_memory: introduce ..."
>>>>>>
>>>>>
>>>>> I can rename it, but currently it confirms to the split_folio with order in the name
>>>>> The order is there in the name because in the future with mTHP we will want to
>>>>> support splitting to various orders.
>>>>
>>>> I think we should start naming them more consistently regarding folio_split() immediately and cleanup the other ones later.
>>>>
>>>> I don't understand why "_to_order" must be in the name right now. You can add another variant and start using longer names when really required.
>>>>
>>>
>>> Ack
>>>
>>>>>
>>>>>
>>>>>>>
>>>>>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>>>>>> Cc: David Hildenbrand <david@redhat.com>
>>>>>>> Cc: Zi Yan <ziy@nvidia.com>
>>>>>>> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
>>>>>>> Cc: Rakie Kim <rakie.kim@sk.com>
>>>>>>> Cc: Byungchul Park <byungchul@sk.com>
>>>>>>> Cc: Gregory Price <gourry@gourry.net>
>>>>>>> Cc: Ying Huang <ying.huang@linux.alibaba.com>
>>>>>>> Cc: Alistair Popple <apopple@nvidia.com>
>>>>>>> Cc: Oscar Salvador <osalvador@suse.de>
>>>>>>> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>>>>>> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
>>>>>>> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
>>>>>>> Cc: Nico Pache <npache@redhat.com>
>>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>>>> Cc: Dev Jain <dev.jain@arm.com>
>>>>>>> Cc: Barry Song <baohua@kernel.org>
>>>>>>> Cc: Lyude Paul <lyude@redhat.com>
>>>>>>> Cc: Danilo Krummrich <dakr@kernel.org>
>>>>>>> Cc: David Airlie <airlied@gmail.com>
>>>>>>> Cc: Simona Vetter <simona@ffwll.ch>
>>>>>>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>>>>>>> Cc: Mika Penttilä <mpenttil@redhat.com>
>>>>>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>>>>>> Cc: Francois Dugast <francois.dugast@intel.com>
>>>>>>>
>>>>>>> Suggested-by: Zi Yan <ziy@nvidia.com>
>>>>>>> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
>>>>>>> ---
>>>>>>> include/linux/huge_mm.h | 5 +-
>>>>>>> mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------
>>>>>>> mm/migrate_device.c | 3 +-
>>>>>>> 3 files changed, 120 insertions(+), 23 deletions(-)
>>>>>>>
>>>>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>>>>> index e2e91aa1a042..9155e683c08a 100644
>>>>>>> --- a/include/linux/huge_mm.h
>>>>>>> +++ b/include/linux/huge_mm.h
>>>>>>> @@ -371,7 +371,8 @@ enum split_type {
>>>>>>> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
>>>>>>> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>>>>> - unsigned int new_order, bool unmapped);
>>>>>>> + unsigned int new_order);
>>>>>>> +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order);
>>>>>>> int min_order_for_split(struct folio *folio);
>>>>>>> int split_folio_to_list(struct folio *folio, struct list_head *list);
>>>>>>> bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>>> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
>>>>>>> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>>>>> unsigned int new_order)
>>>>>>> {
>>>>>>> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
>>>>>>> + return __split_huge_page_to_list_to_order(page, list, new_order);
>>>>>>> }
>>>>>>> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
>>>>>>> {
>>>>>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>>>>>> index 0184cd915f44..942bd8410c54 100644
>>>>>>> --- a/mm/huge_memory.c
>>>>>>> +++ b/mm/huge_memory.c
>>>>>>> @@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>>> * @lock_at: a page within @folio to be left locked to caller
>>>>>>> * @list: after-split folios will be put on it if non NULL
>>>>>>> * @split_type: perform uniform split or not (non-uniform split)
>>>>>>> - * @unmapped: The pages are already unmapped, they are migration entries.
>>>>>>> *
>>>>>>> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
>>>>>>> * It is in charge of checking whether the split is supported or not and
>>>>>>> @@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>>> */
>>>>>>> static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>>> struct page *split_at, struct page *lock_at,
>>>>>>> - struct list_head *list, enum split_type split_type, bool unmapped)
>>>>>>> + struct list_head *list, enum split_type split_type)
>>>>>>
>>>>>> Yeah, nice to see that go.
>>>>>>
>>>>>>> {
>>>>>>> struct deferred_split *ds_queue;
>>>>>>> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
>>>>>>> @@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>>> * is taken to serialise against parallel split or collapse
>>>>>>> * operations.
>>>>>>> */
>>>>>>> - if (!unmapped) {
>>>>>>> - anon_vma = folio_get_anon_vma(folio);
>>>>>>> - if (!anon_vma) {
>>>>>>> - ret = -EBUSY;
>>>>>>> - goto out;
>>>>>>> - }
>>>>>>> - anon_vma_lock_write(anon_vma);
>>>>>>> + anon_vma = folio_get_anon_vma(folio);
>>>>>>> + if (!anon_vma) {
>>>>>>> + ret = -EBUSY;
>>>>>>> + goto out;
>>>>>>> }
>>>>>>> + anon_vma_lock_write(anon_vma);
>>>>>>> mapping = NULL;
>>>>>>> } else {
>>>>>>> unsigned int min_order;
>>>>>>> @@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>>> goto out_unlock;
>>>>>>> }
>>>>>>> - if (!unmapped)
>>>>>>> - unmap_folio(folio);
>>>>>>> + unmap_folio(folio);
>>>>>>>
>>>>>>
>>>>>> Hm, I would have hoped that we could factor out the core logic and reuse it for the new helper, instead of duplicating code.
>>>>>>
>>>>>> Did you look into that?
>>>>>>
>>>>>>
>>>>>
>>>>> I did, I ended up with larger spaghetti, I was hoping to look it as a follow up
>>>>> after the series with the mTHP changes and support (that is to be designed and
>>>>> prototyped).
>>>>
>>>> Looking at it in more detail, the code duplication is not desired.
>>>>
>>>> We have to find a way to factor the existing code out and reuse it from any new function.
>>>>
>>>
>>> I came up with a helper, but that ends up with another boolean do_lru.
>>>
>>>
>>
>> Zi, David, any opinions on the approach below?
>
> Looks good to me. We might want a better name instead of
> __folio_split_unmapped(). Or __split_unmapped_folio() should
> be renamed, since these two function names are too similar.
>
> Maybe __folio_split_unmapped() -> __freeze_and_split_unmapped_folio().
>
> Feel free to come up with a better name. :)
>
I had __folio_split_unfreeze() to indicate we split the folio and unfreeze at the end, but
it does not reflect that we freeze it as well. Looks like we are trending towards folio_split
as the prefix (IIUC Dave correctly), I like your name, but if folio_split is going to be
required then may be __folio_split_unmapped_unfreeze()?
[...]
Balbir
On 13 Nov 2025, at 16:56, Balbir Singh wrote:
> On 11/14/25 08:45, Zi Yan wrote:
>> On 13 Nov 2025, at 16:39, Balbir Singh wrote:
>>
>>> On 11/13/25 10:49, Balbir Singh wrote:
>>>> On 11/12/25 22:34, David Hildenbrand (Red Hat) wrote:
>>>>> On 12.11.25 11:17, Balbir Singh wrote:
>>>>>> On 11/12/25 21:00, David Hildenbrand (Red Hat) wrote:
>>>>>>> On 12.11.25 05:46, Balbir Singh wrote:
>>>>>>>> Unmapped was added as a parameter to __folio_split() and related
>>>>>>>> call sites to support splitting of folios already in the midst
>>>>>>>> of a migration. This special case arose for device private folio
>>>>>>>> migration since during migration there could be a disconnect between
>>>>>>>> source and destination on the folio size.
>>>>>>>>
>>>>>>>> Introduce split_unmapped_folio_to_order() to handle this special case.
>>>>>>>> This in turn removes the special casing introduced by the unmapped
>>>>>>>> parameter in __folio_split().
>>>>>>>
>>>>>>> As raised recently, I would hope that we can find a way to make all these splitting functions look more similar in the long term, ideally starting with "folio_split" / "folio_try_split".
>>>>>>>
>>>>>>> What about
>>>>>>>
>>>>>>> folio_split_unmapped()
>>>>>>>
>>>>>>> Do we really have to spell out the "to order" part in the function name?
>>>>>>>
>>>>>>> And if it's more a mostly-internal helper, maybe
>>>>>>>
>>>>>>> __folio_split_unmapped()
>>>>>>>
>>>>>>> subject: "mm/huge_memory: introduce ..."
>>>>>>>
>>>>>>
>>>>>> I can rename it, but currently it confirms to the split_folio with order in the name
>>>>>> The order is there in the name because in the future with mTHP we will want to
>>>>>> support splitting to various orders.
>>>>>
>>>>> I think we should start naming them more consistently regarding folio_split() immediately and cleanup the other ones later.
>>>>>
>>>>> I don't understand why "_to_order" must be in the name right now. You can add another variant and start using longer names when really required.
>>>>>
>>>>
>>>> Ack
>>>>
>>>>>>
>>>>>>
>>>>>>>>
>>>>>>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>>>>>>> Cc: David Hildenbrand <david@redhat.com>
>>>>>>>> Cc: Zi Yan <ziy@nvidia.com>
>>>>>>>> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
>>>>>>>> Cc: Rakie Kim <rakie.kim@sk.com>
>>>>>>>> Cc: Byungchul Park <byungchul@sk.com>
>>>>>>>> Cc: Gregory Price <gourry@gourry.net>
>>>>>>>> Cc: Ying Huang <ying.huang@linux.alibaba.com>
>>>>>>>> Cc: Alistair Popple <apopple@nvidia.com>
>>>>>>>> Cc: Oscar Salvador <osalvador@suse.de>
>>>>>>>> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>>>>>>> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
>>>>>>>> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
>>>>>>>> Cc: Nico Pache <npache@redhat.com>
>>>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>>>>> Cc: Dev Jain <dev.jain@arm.com>
>>>>>>>> Cc: Barry Song <baohua@kernel.org>
>>>>>>>> Cc: Lyude Paul <lyude@redhat.com>
>>>>>>>> Cc: Danilo Krummrich <dakr@kernel.org>
>>>>>>>> Cc: David Airlie <airlied@gmail.com>
>>>>>>>> Cc: Simona Vetter <simona@ffwll.ch>
>>>>>>>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>>>>>>>> Cc: Mika Penttilä <mpenttil@redhat.com>
>>>>>>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>>>>>>> Cc: Francois Dugast <francois.dugast@intel.com>
>>>>>>>>
>>>>>>>> Suggested-by: Zi Yan <ziy@nvidia.com>
>>>>>>>> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
>>>>>>>> ---
>>>>>>>> include/linux/huge_mm.h | 5 +-
>>>>>>>> mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------
>>>>>>>> mm/migrate_device.c | 3 +-
>>>>>>>> 3 files changed, 120 insertions(+), 23 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>>>>>> index e2e91aa1a042..9155e683c08a 100644
>>>>>>>> --- a/include/linux/huge_mm.h
>>>>>>>> +++ b/include/linux/huge_mm.h
>>>>>>>> @@ -371,7 +371,8 @@ enum split_type {
>>>>>>>> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
>>>>>>>> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>>>>>> - unsigned int new_order, bool unmapped);
>>>>>>>> + unsigned int new_order);
>>>>>>>> +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order);
>>>>>>>> int min_order_for_split(struct folio *folio);
>>>>>>>> int split_folio_to_list(struct folio *folio, struct list_head *list);
>>>>>>>> bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>>>> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
>>>>>>>> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>>>>>> unsigned int new_order)
>>>>>>>> {
>>>>>>>> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
>>>>>>>> + return __split_huge_page_to_list_to_order(page, list, new_order);
>>>>>>>> }
>>>>>>>> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
>>>>>>>> {
>>>>>>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>>>>>>> index 0184cd915f44..942bd8410c54 100644
>>>>>>>> --- a/mm/huge_memory.c
>>>>>>>> +++ b/mm/huge_memory.c
>>>>>>>> @@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>>>> * @lock_at: a page within @folio to be left locked to caller
>>>>>>>> * @list: after-split folios will be put on it if non NULL
>>>>>>>> * @split_type: perform uniform split or not (non-uniform split)
>>>>>>>> - * @unmapped: The pages are already unmapped, they are migration entries.
>>>>>>>> *
>>>>>>>> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
>>>>>>>> * It is in charge of checking whether the split is supported or not and
>>>>>>>> @@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>>>> */
>>>>>>>> static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>>>> struct page *split_at, struct page *lock_at,
>>>>>>>> - struct list_head *list, enum split_type split_type, bool unmapped)
>>>>>>>> + struct list_head *list, enum split_type split_type)
>>>>>>>
>>>>>>> Yeah, nice to see that go.
>>>>>>>
>>>>>>>> {
>>>>>>>> struct deferred_split *ds_queue;
>>>>>>>> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
>>>>>>>> @@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>>>> * is taken to serialise against parallel split or collapse
>>>>>>>> * operations.
>>>>>>>> */
>>>>>>>> - if (!unmapped) {
>>>>>>>> - anon_vma = folio_get_anon_vma(folio);
>>>>>>>> - if (!anon_vma) {
>>>>>>>> - ret = -EBUSY;
>>>>>>>> - goto out;
>>>>>>>> - }
>>>>>>>> - anon_vma_lock_write(anon_vma);
>>>>>>>> + anon_vma = folio_get_anon_vma(folio);
>>>>>>>> + if (!anon_vma) {
>>>>>>>> + ret = -EBUSY;
>>>>>>>> + goto out;
>>>>>>>> }
>>>>>>>> + anon_vma_lock_write(anon_vma);
>>>>>>>> mapping = NULL;
>>>>>>>> } else {
>>>>>>>> unsigned int min_order;
>>>>>>>> @@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>>>> goto out_unlock;
>>>>>>>> }
>>>>>>>> - if (!unmapped)
>>>>>>>> - unmap_folio(folio);
>>>>>>>> + unmap_folio(folio);
>>>>>>>>
>>>>>>>
>>>>>>> Hm, I would have hoped that we could factor out the core logic and reuse it for the new helper, instead of duplicating code.
>>>>>>>
>>>>>>> Did you look into that?
>>>>>>>
>>>>>>>
>>>>>>
>>>>>> I did, I ended up with larger spaghetti, I was hoping to look it as a follow up
>>>>>> after the series with the mTHP changes and support (that is to be designed and
>>>>>> prototyped).
>>>>>
>>>>> Looking at it in more detail, the code duplication is not desired.
>>>>>
>>>>> We have to find a way to factor the existing code out and reuse it from any new function.
>>>>>
>>>>
>>>> I came up with a helper, but that ends up with another boolean do_lru.
>>>>
>>>>
>>>
>>> Zi, David, any opinions on the approach below?
>>
>> Looks good to me. We might want a better name instead of
>> __folio_split_unmapped(). Or __split_unmapped_folio() should
>> be renamed, since these two function names are too similar.
>>
>> Maybe __folio_split_unmapped() -> __freeze_and_split_unmapped_folio().
>>
>> Feel free to come up with a better name. :)
>>
>
> I had __folio_split_unfreeze() to indicate we split the folio and unfreeze at the end, but
> it does not reflect that we freeze it as well. Looks like we are trending towards folio_split
> as the prefix (IIUC Dave correctly), I like your name, but if folio_split is going to be
> required then may be __folio_split_unmapped_unfreeze()?
>
OK, if __folio prefix is a convention, how about
__folio_freeze_and_split_unmapped()? __folio_split_unmapped_unfreeze() sounds
like folio is frozen when the function is called. Of course, a more accurate
one would be __folio_freeze_split_unfreeze_unmapped(). It can work if
it is not too long. :)
Best Regards,
Yan, Zi
On 14.11.25 01:23, Zi Yan wrote:
> On 13 Nov 2025, at 16:56, Balbir Singh wrote:
>
>> On 11/14/25 08:45, Zi Yan wrote:
>>> On 13 Nov 2025, at 16:39, Balbir Singh wrote:
>>>
>>>> On 11/13/25 10:49, Balbir Singh wrote:
>>>>> On 11/12/25 22:34, David Hildenbrand (Red Hat) wrote:
>>>>>> On 12.11.25 11:17, Balbir Singh wrote:
>>>>>>> On 11/12/25 21:00, David Hildenbrand (Red Hat) wrote:
>>>>>>>> On 12.11.25 05:46, Balbir Singh wrote:
>>>>>>>>> Unmapped was added as a parameter to __folio_split() and related
>>>>>>>>> call sites to support splitting of folios already in the midst
>>>>>>>>> of a migration. This special case arose for device private folio
>>>>>>>>> migration since during migration there could be a disconnect between
>>>>>>>>> source and destination on the folio size.
>>>>>>>>>
>>>>>>>>> Introduce split_unmapped_folio_to_order() to handle this special case.
>>>>>>>>> This in turn removes the special casing introduced by the unmapped
>>>>>>>>> parameter in __folio_split().
>>>>>>>>
>>>>>>>> As raised recently, I would hope that we can find a way to make all these splitting functions look more similar in the long term, ideally starting with "folio_split" / "folio_try_split".
>>>>>>>>
>>>>>>>> What about
>>>>>>>>
>>>>>>>> folio_split_unmapped()
>>>>>>>>
>>>>>>>> Do we really have to spell out the "to order" part in the function name?
>>>>>>>>
>>>>>>>> And if it's more a mostly-internal helper, maybe
>>>>>>>>
>>>>>>>> __folio_split_unmapped()
>>>>>>>>
>>>>>>>> subject: "mm/huge_memory: introduce ..."
>>>>>>>>
>>>>>>>
>>>>>>> I can rename it, but currently it confirms to the split_folio with order in the name
>>>>>>> The order is there in the name because in the future with mTHP we will want to
>>>>>>> support splitting to various orders.
>>>>>>
>>>>>> I think we should start naming them more consistently regarding folio_split() immediately and cleanup the other ones later.
>>>>>>
>>>>>> I don't understand why "_to_order" must be in the name right now. You can add another variant and start using longer names when really required.
>>>>>>
>>>>>
>>>>> Ack
>>>>>
>>>>>>>
>>>>>>>
>>>>>>>>>
>>>>>>>>> Cc: Andrew Morton <akpm@linux-foundation.org>
>>>>>>>>> Cc: David Hildenbrand <david@redhat.com>
>>>>>>>>> Cc: Zi Yan <ziy@nvidia.com>
>>>>>>>>> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
>>>>>>>>> Cc: Rakie Kim <rakie.kim@sk.com>
>>>>>>>>> Cc: Byungchul Park <byungchul@sk.com>
>>>>>>>>> Cc: Gregory Price <gourry@gourry.net>
>>>>>>>>> Cc: Ying Huang <ying.huang@linux.alibaba.com>
>>>>>>>>> Cc: Alistair Popple <apopple@nvidia.com>
>>>>>>>>> Cc: Oscar Salvador <osalvador@suse.de>
>>>>>>>>> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>>>>>>>> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
>>>>>>>>> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
>>>>>>>>> Cc: Nico Pache <npache@redhat.com>
>>>>>>>>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>>>>>>>>> Cc: Dev Jain <dev.jain@arm.com>
>>>>>>>>> Cc: Barry Song <baohua@kernel.org>
>>>>>>>>> Cc: Lyude Paul <lyude@redhat.com>
>>>>>>>>> Cc: Danilo Krummrich <dakr@kernel.org>
>>>>>>>>> Cc: David Airlie <airlied@gmail.com>
>>>>>>>>> Cc: Simona Vetter <simona@ffwll.ch>
>>>>>>>>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>>>>>>>>> Cc: Mika Penttilä <mpenttil@redhat.com>
>>>>>>>>> Cc: Matthew Brost <matthew.brost@intel.com>
>>>>>>>>> Cc: Francois Dugast <francois.dugast@intel.com>
>>>>>>>>>
>>>>>>>>> Suggested-by: Zi Yan <ziy@nvidia.com>
>>>>>>>>> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
>>>>>>>>> ---
>>>>>>>>> include/linux/huge_mm.h | 5 +-
>>>>>>>>> mm/huge_memory.c | 135 ++++++++++++++++++++++++++++++++++------
>>>>>>>>> mm/migrate_device.c | 3 +-
>>>>>>>>> 3 files changed, 120 insertions(+), 23 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>>>>>>> index e2e91aa1a042..9155e683c08a 100644
>>>>>>>>> --- a/include/linux/huge_mm.h
>>>>>>>>> +++ b/include/linux/huge_mm.h
>>>>>>>>> @@ -371,7 +371,8 @@ enum split_type {
>>>>>>>>> bool can_split_folio(struct folio *folio, int caller_pins, int *pextra_pins);
>>>>>>>>> int __split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>>>>>>> - unsigned int new_order, bool unmapped);
>>>>>>>>> + unsigned int new_order);
>>>>>>>>> +int split_unmapped_folio_to_order(struct folio *folio, unsigned int new_order);
>>>>>>>>> int min_order_for_split(struct folio *folio);
>>>>>>>>> int split_folio_to_list(struct folio *folio, struct list_head *list);
>>>>>>>>> bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>>>>> @@ -382,7 +383,7 @@ int folio_split(struct folio *folio, unsigned int new_order, struct page *page,
>>>>>>>>> static inline int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
>>>>>>>>> unsigned int new_order)
>>>>>>>>> {
>>>>>>>>> - return __split_huge_page_to_list_to_order(page, list, new_order, false);
>>>>>>>>> + return __split_huge_page_to_list_to_order(page, list, new_order);
>>>>>>>>> }
>>>>>>>>> static inline int split_huge_page_to_order(struct page *page, unsigned int new_order)
>>>>>>>>> {
>>>>>>>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>>>>>>>> index 0184cd915f44..942bd8410c54 100644
>>>>>>>>> --- a/mm/huge_memory.c
>>>>>>>>> +++ b/mm/huge_memory.c
>>>>>>>>> @@ -3747,7 +3747,6 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>>>>> * @lock_at: a page within @folio to be left locked to caller
>>>>>>>>> * @list: after-split folios will be put on it if non NULL
>>>>>>>>> * @split_type: perform uniform split or not (non-uniform split)
>>>>>>>>> - * @unmapped: The pages are already unmapped, they are migration entries.
>>>>>>>>> *
>>>>>>>>> * It calls __split_unmapped_folio() to perform uniform and non-uniform split.
>>>>>>>>> * It is in charge of checking whether the split is supported or not and
>>>>>>>>> @@ -3763,7 +3762,7 @@ bool folio_split_supported(struct folio *folio, unsigned int new_order,
>>>>>>>>> */
>>>>>>>>> static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>>>>> struct page *split_at, struct page *lock_at,
>>>>>>>>> - struct list_head *list, enum split_type split_type, bool unmapped)
>>>>>>>>> + struct list_head *list, enum split_type split_type)
>>>>>>>>
>>>>>>>> Yeah, nice to see that go.
>>>>>>>>
>>>>>>>>> {
>>>>>>>>> struct deferred_split *ds_queue;
>>>>>>>>> XA_STATE(xas, &folio->mapping->i_pages, folio->index);
>>>>>>>>> @@ -3809,14 +3808,12 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>>>>> * is taken to serialise against parallel split or collapse
>>>>>>>>> * operations.
>>>>>>>>> */
>>>>>>>>> - if (!unmapped) {
>>>>>>>>> - anon_vma = folio_get_anon_vma(folio);
>>>>>>>>> - if (!anon_vma) {
>>>>>>>>> - ret = -EBUSY;
>>>>>>>>> - goto out;
>>>>>>>>> - }
>>>>>>>>> - anon_vma_lock_write(anon_vma);
>>>>>>>>> + anon_vma = folio_get_anon_vma(folio);
>>>>>>>>> + if (!anon_vma) {
>>>>>>>>> + ret = -EBUSY;
>>>>>>>>> + goto out;
>>>>>>>>> }
>>>>>>>>> + anon_vma_lock_write(anon_vma);
>>>>>>>>> mapping = NULL;
>>>>>>>>> } else {
>>>>>>>>> unsigned int min_order;
>>>>>>>>> @@ -3882,8 +3879,7 @@ static int __folio_split(struct folio *folio, unsigned int new_order,
>>>>>>>>> goto out_unlock;
>>>>>>>>> }
>>>>>>>>> - if (!unmapped)
>>>>>>>>> - unmap_folio(folio);
>>>>>>>>> + unmap_folio(folio);
>>>>>>>>>
>>>>>>>>
>>>>>>>> Hm, I would have hoped that we could factor out the core logic and reuse it for the new helper, instead of duplicating code.
>>>>>>>>
>>>>>>>> Did you look into that?
>>>>>>>>
>>>>>>>>
>>>>>>>
>>>>>>> I did, I ended up with larger spaghetti, I was hoping to look it as a follow up
>>>>>>> after the series with the mTHP changes and support (that is to be designed and
>>>>>>> prototyped).
>>>>>>
>>>>>> Looking at it in more detail, the code duplication is not desired.
>>>>>>
>>>>>> We have to find a way to factor the existing code out and reuse it from any new function.
>>>>>>
>>>>>
>>>>> I came up with a helper, but that ends up with another boolean do_lru.
>>>>>
>>>>>
>>>>
>>>> Zi, David, any opinions on the approach below?
>>>
>>> Looks good to me. We might want a better name instead of
>>> __folio_split_unmapped(). Or __split_unmapped_folio() should
>>> be renamed, since these two function names are too similar.
>>>
>>> Maybe __folio_split_unmapped() -> __freeze_and_split_unmapped_folio().
>>>
>>> Feel free to come up with a better name. :)
>>>
>>
>> I had __folio_split_unfreeze() to indicate we split the folio and unfreeze at the end, but
>> it does not reflect that we freeze it as well. Looks like we are trending towards folio_split
>> as the prefix (IIUC Dave correctly), I like your name, but if folio_split is going to be
>> required then may be __folio_split_unmapped_unfreeze()?
>>
>
> OK, if __folio prefix is a convention,
Yes, let's start cleaning all this up.
> how about
> __folio_freeze_and_split_unmapped()? __folio_split_unmapped_unfreeze() sounds
> like folio is frozen when the function is called. Of course, a more accurate
> one would be __folio_freeze_split_unfreeze_unmapped(). It can work if
> it is not too long. :)
Let me take a look at v2.
--
Cheers
David
commit a6ca2ba46390 ("mm: replace pmd_to_swp_entry() with softleaf_from_pmd()")
does not work with device private THP entries. softleaf_is_migration_young()
asserts that the entry be a migration entry, but in the current code, the
entry might already be replaced by a device private entry by the time the
check is made. The issue exists with commit
7385dbdbf841 ("mm/rmap: extend rmap and migration support device-private entries")
Fix this by processing the migration entries prior to conversion to
device private if the folio is device private.
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Hildenbrand <david@kernel.org>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lance Yang <lance.yang@linux.dev>
Signed-off-by: Balbir Singh <balbirs@nvidia.com>
---
mm/huge_memory.c | 25 ++++++++++++++++++-------
1 file changed, 18 insertions(+), 7 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 942bd8410c54..82b019205216 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -4939,6 +4939,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
unsigned long haddr = address & HPAGE_PMD_MASK;
pmd_t pmde;
softleaf_t entry;
+ bool old = false, dirty = false, migration_read_entry = false;
if (!(pvmw->pmd && !pvmw->pte))
return;
@@ -4947,6 +4948,19 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
folio_get(folio);
pmde = folio_mk_pmd(folio, READ_ONCE(vma->vm_page_prot));
+ if (!softleaf_is_migration_young(entry))
+ old = true;
+
+ /* NOTE: this may contain setting soft-dirty on some archs */
+ if (folio_test_dirty(folio) && softleaf_is_migration_dirty(entry))
+ dirty = true;
+
+ if (softleaf_is_migration_write(entry))
+ pmde = pmd_mkwrite(pmde, vma);
+
+ if (!softleaf_is_migration_read(entry))
+ migration_read_entry = true;
+
if (folio_is_device_private(folio)) {
if (pmd_write(pmde))
entry = make_writable_device_private_entry(
@@ -4959,20 +4973,17 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_mksoft_dirty(pmde);
- if (softleaf_is_migration_write(entry))
- pmde = pmd_mkwrite(pmde, vma);
+ if (old)
+ pmde = pmd_mkold(pmde);
if (pmd_swp_uffd_wp(*pvmw->pmd))
pmde = pmd_mkuffd_wp(pmde);
- if (!softleaf_is_migration_young(entry))
- pmde = pmd_mkold(pmde);
- /* NOTE: this may contain setting soft-dirty on some archs */
- if (folio_test_dirty(folio) && softleaf_is_migration_dirty(entry))
+ if (dirty)
pmde = pmd_mkdirty(pmde);
if (folio_test_anon(folio)) {
rmap_t rmap_flags = RMAP_NONE;
- if (!softleaf_is_migration_read(entry))
+ if (migration_read_entry)
rmap_flags |= RMAP_EXCLUSIVE;
folio_add_anon_rmap_pmd(folio, new, vma, haddr, rmap_flags);
--
2.51.1
On Wed, Nov 12, 2025 at 03:46:34PM +1100, Balbir Singh wrote:
> commit a6ca2ba46390 ("mm: replace pmd_to_swp_entry() with softleaf_from_pmd()")
> does not work with device private THP entries. softleaf_is_migration_young()
> asserts that the entry be a migration entry, but in the current code, the
> entry might already be replaced by a device private entry by the time the
> check is made. The issue exists with commit
> 7385dbdbf841 ("mm/rmap: extend rmap and migration support device-private entries")
OK this is _hugely_ confusing.
Is the bug in my patch or in yours?
Why are you replying to your own series with this patch?
You shouldn't reference non-upstream commit messages in general.
If the bug is in 7385dbdbf841, fix it in your series, then perhaps send a
suggested fix-patch to the appropriate patch in my series to make life easier
for Andrew.
As mine I think in this case was purely a mechanical replacement of function
calls I'm guessing it's a bug in yours? So I think this is probably the best
way.
>
> Fix this by processing the migration entries prior to conversion to
> device private if the folio is device private.
>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: David Hildenbrand <david@kernel.org>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Cc: Zi Yan <ziy@nvidia.com>
> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> Cc: Nico Pache <npache@redhat.com>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: Dev Jain <dev.jain@arm.com>
> Cc: Barry Song <baohua@kernel.org>
> Cc: Lance Yang <lance.yang@linux.dev>
>
> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
> ---
> mm/huge_memory.c | 25 ++++++++++++++++++-------
> 1 file changed, 18 insertions(+), 7 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 942bd8410c54..82b019205216 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -4939,6 +4939,7 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
> unsigned long haddr = address & HPAGE_PMD_MASK;
> pmd_t pmde;
> softleaf_t entry;
> + bool old = false, dirty = false, migration_read_entry = false;
>
> if (!(pvmw->pmd && !pvmw->pte))
> return;
> @@ -4947,6 +4948,19 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
> folio_get(folio);
> pmde = folio_mk_pmd(folio, READ_ONCE(vma->vm_page_prot));
>
> + if (!softleaf_is_migration_young(entry))
> + old = true;
> +
> + /* NOTE: this may contain setting soft-dirty on some archs */
'This may contain setting soft-dirty' is confusing. 'This may set soft-dirty on some arches' perhaps?
> + if (folio_test_dirty(folio) && softleaf_is_migration_dirty(entry))
> + dirty = true;
> +
> + if (softleaf_is_migration_write(entry))
> + pmde = pmd_mkwrite(pmde, vma);
> +
> + if (!softleaf_is_migration_read(entry))
> + migration_read_entry = true;
> +
> if (folio_is_device_private(folio)) {
> if (pmd_write(pmde))
> entry = make_writable_device_private_entry(
> @@ -4959,20 +4973,17 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
>
> if (pmd_swp_soft_dirty(*pvmw->pmd))
> pmde = pmd_mksoft_dirty(pmde);
> - if (softleaf_is_migration_write(entry))
> - pmde = pmd_mkwrite(pmde, vma);
> + if (old)
> + pmde = pmd_mkold(pmde);
> if (pmd_swp_uffd_wp(*pvmw->pmd))
> pmde = pmd_mkuffd_wp(pmde);
> - if (!softleaf_is_migration_young(entry))
> - pmde = pmd_mkold(pmde);
> - /* NOTE: this may contain setting soft-dirty on some archs */
> - if (folio_test_dirty(folio) && softleaf_is_migration_dirty(entry))
> + if (dirty)
> pmde = pmd_mkdirty(pmde);
>
> if (folio_test_anon(folio)) {
> rmap_t rmap_flags = RMAP_NONE;
>
> - if (!softleaf_is_migration_read(entry))
> + if (migration_read_entry)
> rmap_flags |= RMAP_EXCLUSIVE;
>
> folio_add_anon_rmap_pmd(folio, new, vma, haddr, rmap_flags);
> --
> 2.51.1
>
Thanks, Lorenzo
I noticed I did not respond to this <snip> >> + /* NOTE: this may contain setting soft-dirty on some archs */ > > 'This may contain setting soft-dirty' is confusing. 'This may set soft-dirty on some arches' perhaps? > This is the existing comment and it already says some archs. Am I missing something? Balbir
On 11/13/25 00:43, Lorenzo Stoakes wrote:
> On Wed, Nov 12, 2025 at 03:46:34PM +1100, Balbir Singh wrote:
>> commit a6ca2ba46390 ("mm: replace pmd_to_swp_entry() with softleaf_from_pmd()")
>> does not work with device private THP entries. softleaf_is_migration_young()
>> asserts that the entry be a migration entry, but in the current code, the
>> entry might already be replaced by a device private entry by the time the
>> check is made. The issue exists with commit
>> 7385dbdbf841 ("mm/rmap: extend rmap and migration support device-private entries")
>
> OK this is _hugely_ confusing.
>
> Is the bug in my patch or in yours?
>
The bug exists in my series (as pointed out in the the issue exists with),
but it is exposed by your changes with the VM_WARN_ON in your changes.
> Why are you replying to your own series with this patch?
>
> You shouldn't reference non-upstream commit messages in general.
>
> If the bug is in 7385dbdbf841, fix it in your series, then perhaps send a
> suggested fix-patch to the appropriate patch in my series to make life easier
> for Andrew.
>
OK, let me split it up then
> As mine I think in this case was purely a mechanical replacement of function
> calls I'm guessing it's a bug in yours? So I think this is probably the best
> way.
>
[...]
Balbir
On 12.11.25 05:46, Balbir Singh wrote:
> commit a6ca2ba46390 ("mm: replace pmd_to_swp_entry() with softleaf_from_pmd()")
So should this be squashed into Lorenzo patch, or incorporated in his
series in case he has to resend?
> does not work with device private THP entries. softleaf_is_migration_young()
> asserts that the entry be a migration entry, but in the current code, the
> entry might already be replaced by a device private entry by the time the
> check is made. The issue exists with commit
> 7385dbdbf841 ("mm/rmap: extend rmap and migration support device-private entries")
>
Because this confuses me. If it's already a problem in the
commit-to-go-upstream-first, it should be fixed in that commit?
--
Cheers
David
On 11/12/25 22:37, David Hildenbrand (Red Hat) wrote:
> On 12.11.25 05:46, Balbir Singh wrote:
>> commit a6ca2ba46390 ("mm: replace pmd_to_swp_entry() with softleaf_from_pmd()")
>
> So should this be squashed into Lorenzo patch, or incorporated in his series in case he has to resend?
>
>> does not work with device private THP entries. softleaf_is_migration_young()
>> asserts that the entry be a migration entry, but in the current code, the
>> entry might already be replaced by a device private entry by the time the
>> check is made. The issue exists with commit
>> 7385dbdbf841 ("mm/rmap: extend rmap and migration support device-private entries")
>>
>
> Because this confuses me. If it's already a problem in the commit-to-go-upstream-first, it should be fixed in that commit?
>
Not sure how to handle this, because that would break rebase of mm/mm-new
or I'd have to send a replacement patch for the original patch from Lorenzo
(which does not seem right).
I'll post a simpler patch, but it needs to be on top of the series
Balbir
On 13.11.25 06:03, Balbir Singh wrote:
> On 11/12/25 22:37, David Hildenbrand (Red Hat) wrote:
>> On 12.11.25 05:46, Balbir Singh wrote:
>>> commit a6ca2ba46390 ("mm: replace pmd_to_swp_entry() with softleaf_from_pmd()")
>>
>> So should this be squashed into Lorenzo patch, or incorporated in his series in case he has to resend?
>>
>>> does not work with device private THP entries. softleaf_is_migration_young()
>>> asserts that the entry be a migration entry, but in the current code, the
>>> entry might already be replaced by a device private entry by the time the
>>> check is made. The issue exists with commit
>>> 7385dbdbf841 ("mm/rmap: extend rmap and migration support device-private entries")
>>>
>>
>> Because this confuses me. If it's already a problem in the commit-to-go-upstream-first, it should be fixed in that commit?
>>
>
> Not sure how to handle this, because that would break rebase of mm/mm-new
> or I'd have to send a replacement patch for the original patch from Lorenzo
> (which does not seem right).
Yes, to be expected. Maybe Andrew can figure out how do address the
rebase, or we can give him a helping hand :)
>
> I'll post a simpler patch, but it needs to be on top of the series
Agreed, thanks.
--
Cheers
David
© 2016 - 2026 Red Hat, Inc.