mm/huge_memory.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-)
Follow the pattern used in remove_migration_pte() in
remove_migration_pmd(). Process the migration entries and if the entry
type is device private, override the pmde with a device private entry
and set the soft dirty and uffd_wp bits with the pmd_swp_mksoft_dirty
and pmd_swp_mkuffd_wp
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Zi Yan <ziy@nvidia.com>
Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
Cc: Rakie Kim <rakie.kim@sk.com>
Cc: Byungchul Park <byungchul@sk.com>
Cc: Gregory Price <gourry@gourry.net>
Cc: Ying Huang <ying.huang@linux.alibaba.com>
Cc: Alistair Popple <apopple@nvidia.com>
Cc: Oscar Salvador <osalvador@suse.de>
Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
Cc: Nico Pache <npache@redhat.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Dev Jain <dev.jain@arm.com>
Cc: Barry Song <baohua@kernel.org>
Cc: Lyude Paul <lyude@redhat.com>
Cc: Danilo Krummrich <dakr@kernel.org>
Cc: David Airlie <airlied@gmail.com>
Cc: Simona Vetter <simona@ffwll.ch>
Cc: Ralph Campbell <rcampbell@nvidia.com>
Cc: Mika Penttilä <mpenttil@redhat.com>
Cc: Matthew Brost <matthew.brost@intel.com>
Cc: Francois Dugast <francois.dugast@intel.com>
Signed-off-by: Balbir Singh <balbirs@nvidia.com>
---
This fixup should be squashed into the patch "mm/rmap: extend rmap and
migration support" of mm/mm-unstable
mm/huge_memory.c | 27 +++++++++++++++++----------
1 file changed, 17 insertions(+), 10 deletions(-)
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9dda8c48daca..50ba458efcab 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -4698,16 +4698,6 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
folio_get(folio);
pmde = folio_mk_pmd(folio, READ_ONCE(vma->vm_page_prot));
- if (folio_is_device_private(folio)) {
- if (pmd_write(pmde))
- entry = make_writable_device_private_entry(
- page_to_pfn(new));
- else
- entry = make_readable_device_private_entry(
- page_to_pfn(new));
- pmde = swp_entry_to_pmd(entry);
- }
-
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_mksoft_dirty(pmde);
if (is_writable_migration_entry(entry))
@@ -4720,6 +4710,23 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
pmde = pmd_mkdirty(pmde);
+ if (folio_is_device_private(folio)) {
+ swp_entry_t entry;
+
+ if (pmd_write(pmde))
+ entry = make_writable_device_private_entry(
+ page_to_pfn(new));
+ else
+ entry = make_readable_device_private_entry(
+ page_to_pfn(new));
+ pmde = swp_entry_to_pmd(entry);
+
+ if (pmd_swp_soft_dirty(*pvmw->pmd))
+ pmde = pmd_swp_mksoft_dirty(pmde);
+ if (pmd_swp_uffd_wp(*pvmw->pmd))
+ pmde = pmd_swp_mkuffd_wp(pmde);
+ }
+
if (folio_test_anon(folio)) {
rmap_t rmap_flags = RMAP_NONE;
--
2.51.1
On 15.11.25 01:28, Balbir Singh wrote:
> Follow the pattern used in remove_migration_pte() in
> remove_migration_pmd(). Process the migration entries and if the entry
> type is device private, override the pmde with a device private entry
> and set the soft dirty and uffd_wp bits with the pmd_swp_mksoft_dirty
> and pmd_swp_mkuffd_wp
>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Zi Yan <ziy@nvidia.com>
> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
> Cc: Rakie Kim <rakie.kim@sk.com>
> Cc: Byungchul Park <byungchul@sk.com>
> Cc: Gregory Price <gourry@gourry.net>
> Cc: Ying Huang <ying.huang@linux.alibaba.com>
> Cc: Alistair Popple <apopple@nvidia.com>
> Cc: Oscar Salvador <osalvador@suse.de>
> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
> Cc: Nico Pache <npache@redhat.com>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: Dev Jain <dev.jain@arm.com>
> Cc: Barry Song <baohua@kernel.org>
> Cc: Lyude Paul <lyude@redhat.com>
> Cc: Danilo Krummrich <dakr@kernel.org>
> Cc: David Airlie <airlied@gmail.com>
> Cc: Simona Vetter <simona@ffwll.ch>
> Cc: Ralph Campbell <rcampbell@nvidia.com>
> Cc: Mika Penttilä <mpenttil@redhat.com>
> Cc: Matthew Brost <matthew.brost@intel.com>
> Cc: Francois Dugast <francois.dugast@intel.com>
>
> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
> ---
> This fixup should be squashed into the patch "mm/rmap: extend rmap and
> migration support" of mm/mm-unstable
>
> mm/huge_memory.c | 27 +++++++++++++++++----------
> 1 file changed, 17 insertions(+), 10 deletions(-)
>
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 9dda8c48daca..50ba458efcab 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -4698,16 +4698,6 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
> folio_get(folio);
> pmde = folio_mk_pmd(folio, READ_ONCE(vma->vm_page_prot));
>
> - if (folio_is_device_private(folio)) {
> - if (pmd_write(pmde))
> - entry = make_writable_device_private_entry(
> - page_to_pfn(new));
> - else
> - entry = make_readable_device_private_entry(
> - page_to_pfn(new));
> - pmde = swp_entry_to_pmd(entry);
> - }
> -
> if (pmd_swp_soft_dirty(*pvmw->pmd))
> pmde = pmd_mksoft_dirty(pmde);
> if (is_writable_migration_entry(entry))
> @@ -4720,6 +4710,23 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
> if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
> pmde = pmd_mkdirty(pmde);
>
> + if (folio_is_device_private(folio)) {
> + swp_entry_t entry;
It's a bit nasty to have the same variable shadowed here.
We could reuse the existing entry by handling the code more similar to
remove_migration_pte(): determine RMAP_EXCLUSIVE earlier.
> +
> + if (pmd_write(pmde))
> + entry = make_writable_device_private_entry(
> + page_to_pfn(new));
> + else
> + entry = make_readable_device_private_entry(
> + page_to_pfn(new));
> + pmde = swp_entry_to_pmd(entry);
> +
> + if (pmd_swp_soft_dirty(*pvmw->pmd))
> + pmde = pmd_swp_mksoft_dirty(pmde);
> + if (pmd_swp_uffd_wp(*pvmw->pmd))
> + pmde = pmd_swp_mkuffd_wp(pmde);
> + }
> +
> if (folio_test_anon(folio)) {
> rmap_t rmap_flags = RMAP_NONE;
>
I guess at some point we could separate both parts completely (no need
to do all this work on pmdb before the folio_is_device_private(folio)
check, so this could be
if (folio_is_device_private(folio)) {
...
} else {
entry = pmd_to_swp_entry(*pvmw->pmd);
folio_get(folio);
...
}
That is something for another day though, and remove_migration_pte()
should be cleaned up then as well.
--
Cheers
David
On 11/17/25 23:58, David Hildenbrand (Red Hat) wrote:
> On 15.11.25 01:28, Balbir Singh wrote:
>> Follow the pattern used in remove_migration_pte() in
>> remove_migration_pmd(). Process the migration entries and if the entry
>> type is device private, override the pmde with a device private entry
>> and set the soft dirty and uffd_wp bits with the pmd_swp_mksoft_dirty
>> and pmd_swp_mkuffd_wp
>>
>> Cc: Andrew Morton <akpm@linux-foundation.org>
>> Cc: David Hildenbrand <david@redhat.com>
>> Cc: Zi Yan <ziy@nvidia.com>
>> Cc: Joshua Hahn <joshua.hahnjy@gmail.com>
>> Cc: Rakie Kim <rakie.kim@sk.com>
>> Cc: Byungchul Park <byungchul@sk.com>
>> Cc: Gregory Price <gourry@gourry.net>
>> Cc: Ying Huang <ying.huang@linux.alibaba.com>
>> Cc: Alistair Popple <apopple@nvidia.com>
>> Cc: Oscar Salvador <osalvador@suse.de>
>> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
>> Cc: "Liam R. Howlett" <Liam.Howlett@oracle.com>
>> Cc: Nico Pache <npache@redhat.com>
>> Cc: Ryan Roberts <ryan.roberts@arm.com>
>> Cc: Dev Jain <dev.jain@arm.com>
>> Cc: Barry Song <baohua@kernel.org>
>> Cc: Lyude Paul <lyude@redhat.com>
>> Cc: Danilo Krummrich <dakr@kernel.org>
>> Cc: David Airlie <airlied@gmail.com>
>> Cc: Simona Vetter <simona@ffwll.ch>
>> Cc: Ralph Campbell <rcampbell@nvidia.com>
>> Cc: Mika Penttilä <mpenttil@redhat.com>
>> Cc: Matthew Brost <matthew.brost@intel.com>
>> Cc: Francois Dugast <francois.dugast@intel.com>
>>
>> Signed-off-by: Balbir Singh <balbirs@nvidia.com>
>> ---
>> This fixup should be squashed into the patch "mm/rmap: extend rmap and
>> migration support" of mm/mm-unstable
>>
>> mm/huge_memory.c | 27 +++++++++++++++++----------
>> 1 file changed, 17 insertions(+), 10 deletions(-)
>>
>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>> index 9dda8c48daca..50ba458efcab 100644
>> --- a/mm/huge_memory.c
>> +++ b/mm/huge_memory.c
>> @@ -4698,16 +4698,6 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
>> folio_get(folio);
>> pmde = folio_mk_pmd(folio, READ_ONCE(vma->vm_page_prot));
>> - if (folio_is_device_private(folio)) {
>> - if (pmd_write(pmde))
>> - entry = make_writable_device_private_entry(
>> - page_to_pfn(new));
>> - else
>> - entry = make_readable_device_private_entry(
>> - page_to_pfn(new));
>> - pmde = swp_entry_to_pmd(entry);
>> - }
>> -
>> if (pmd_swp_soft_dirty(*pvmw->pmd))
>> pmde = pmd_mksoft_dirty(pmde);
>> if (is_writable_migration_entry(entry))
>> @@ -4720,6 +4710,23 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
>> if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))
>> pmde = pmd_mkdirty(pmde);
>> + if (folio_is_device_private(folio)) {
>> + swp_entry_t entry;
>
> It's a bit nasty to have the same variable shadowed here.
>
> We could reuse the existing entry by handling the code more similar to remove_migration_pte(): determine RMAP_EXCLUSIVE earlier.
>
>> +
>> + if (pmd_write(pmde))
>> + entry = make_writable_device_private_entry(
>> + page_to_pfn(new));
>> + else
>> + entry = make_readable_device_private_entry(
>> + page_to_pfn(new));
>> + pmde = swp_entry_to_pmd(entry);
>> +
>> + if (pmd_swp_soft_dirty(*pvmw->pmd))
>> + pmde = pmd_swp_mksoft_dirty(pmde);
>> + if (pmd_swp_uffd_wp(*pvmw->pmd))
>> + pmde = pmd_swp_mkuffd_wp(pmde);
>> + }
>> +
>> if (folio_test_anon(folio)) {
>> rmap_t rmap_flags = RMAP_NONE;
>>
>
> I guess at some point we could separate both parts completely (no need to do all this work on pmdb before the folio_is_device_private(folio) check, so this could be
>
> if (folio_is_device_private(folio)) {
> ...
> } else {
> entry = pmd_to_swp_entry(*pvmw->pmd);
> folio_get(folio);
> ...
> }
>
> That is something for another day though, and remove_migration_pte() should be cleaned up then as well.
>
Agreed and Thanks for the review!
Balbir
On Sat, 15 Nov 2025 11:28:35 +1100 Balbir Singh <balbirs@nvidia.com> wrote:
> Follow the pattern used in remove_migration_pte() in
> remove_migration_pmd(). Process the migration entries and if the entry
> type is device private, override the pmde with a device private entry
> and set the soft dirty and uffd_wp bits with the pmd_swp_mksoft_dirty
> and pmd_swp_mkuffd_wp
>
> ...
>
> This fixup should be squashed into the patch "mm/rmap: extend rmap and
> migration support" of mm/mm-unstable
>
OK. After fixing up
mm-replace-pmd_to_swp_entry-with-softleaf_from_pmd.patch, mm.git's
mm/huge_memory.c has the below. Please double-check.
void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
{
struct folio *folio = page_folio(new);
struct vm_area_struct *vma = pvmw->vma;
struct mm_struct *mm = vma->vm_mm;
unsigned long address = pvmw->address;
unsigned long haddr = address & HPAGE_PMD_MASK;
pmd_t pmde;
softleaf_t entry;
if (!(pvmw->pmd && !pvmw->pte))
return;
entry = softleaf_from_pmd(*pvmw->pmd);
folio_get(folio);
pmde = folio_mk_pmd(folio, READ_ONCE(vma->vm_page_prot));
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_mksoft_dirty(pmde);
if (softleaf_is_migration_write(entry))
pmde = pmd_mkwrite(pmde, vma);
if (pmd_swp_uffd_wp(*pvmw->pmd))
pmde = pmd_mkuffd_wp(pmde);
if (!softleaf_is_migration_young(entry))
pmde = pmd_mkold(pmde);
/* NOTE: this may contain setting soft-dirty on some archs */
if (folio_test_dirty(folio) && softleaf_is_migration_dirty(entry))
pmde = pmd_mkdirty(pmde);
if (folio_is_device_private(folio)) {
swp_entry_t entry;
if (pmd_write(pmde))
entry = make_writable_device_private_entry(
page_to_pfn(new));
else
entry = make_readable_device_private_entry(
page_to_pfn(new));
pmde = swp_entry_to_pmd(entry);
if (pmd_swp_soft_dirty(*pvmw->pmd))
pmde = pmd_swp_mksoft_dirty(pmde);
if (pmd_swp_uffd_wp(*pvmw->pmd))
pmde = pmd_swp_mkuffd_wp(pmde);
}
if (folio_test_anon(folio)) {
rmap_t rmap_flags = RMAP_NONE;
if (!softleaf_is_migration_read(entry))
rmap_flags |= RMAP_EXCLUSIVE;
folio_add_anon_rmap_pmd(folio, new, vma, haddr, rmap_flags);
} else {
folio_add_file_rmap_pmd(folio, new, vma);
}
VM_BUG_ON(pmd_write(pmde) && folio_test_anon(folio) && !PageAnonExclusive(new));
set_pmd_at(mm, haddr, pvmw->pmd, pmde);
/* No need to invalidate - it was non-present before */
update_mmu_cache_pmd(vma, address, pvmw->pmd);
trace_remove_migration_pmd(address, pmd_val(pmde));
}
On 11/15/25 11:51, Andrew Morton wrote:
> On Sat, 15 Nov 2025 11:28:35 +1100 Balbir Singh <balbirs@nvidia.com> wrote:
>
>> Follow the pattern used in remove_migration_pte() in
>> remove_migration_pmd(). Process the migration entries and if the entry
>> type is device private, override the pmde with a device private entry
>> and set the soft dirty and uffd_wp bits with the pmd_swp_mksoft_dirty
>> and pmd_swp_mkuffd_wp
>>
>> ...
>>
>> This fixup should be squashed into the patch "mm/rmap: extend rmap and
>> migration support" of mm/mm-unstable
>>
>
> OK. After fixing up
> mm-replace-pmd_to_swp_entry-with-softleaf_from_pmd.patch, mm.git's
> mm/huge_memory.c has the below. Please double-check.
>
>
> void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new)
> {
> struct folio *folio = page_folio(new);
> struct vm_area_struct *vma = pvmw->vma;
> struct mm_struct *mm = vma->vm_mm;
> unsigned long address = pvmw->address;
> unsigned long haddr = address & HPAGE_PMD_MASK;
> pmd_t pmde;
> softleaf_t entry;
>
> if (!(pvmw->pmd && !pvmw->pte))
> return;
>
> entry = softleaf_from_pmd(*pvmw->pmd);
> folio_get(folio);
> pmde = folio_mk_pmd(folio, READ_ONCE(vma->vm_page_prot));
>
> if (pmd_swp_soft_dirty(*pvmw->pmd))
> pmde = pmd_mksoft_dirty(pmde);
> if (softleaf_is_migration_write(entry))
> pmde = pmd_mkwrite(pmde, vma);
> if (pmd_swp_uffd_wp(*pvmw->pmd))
> pmde = pmd_mkuffd_wp(pmde);
> if (!softleaf_is_migration_young(entry))
> pmde = pmd_mkold(pmde);
> /* NOTE: this may contain setting soft-dirty on some archs */
> if (folio_test_dirty(folio) && softleaf_is_migration_dirty(entry))
> pmde = pmd_mkdirty(pmde);
>
> if (folio_is_device_private(folio)) {
> swp_entry_t entry;
>
> if (pmd_write(pmde))
> entry = make_writable_device_private_entry(
> page_to_pfn(new));
> else
> entry = make_readable_device_private_entry(
> page_to_pfn(new));
> pmde = swp_entry_to_pmd(entry);
>
> if (pmd_swp_soft_dirty(*pvmw->pmd))
> pmde = pmd_swp_mksoft_dirty(pmde);
> if (pmd_swp_uffd_wp(*pvmw->pmd))
> pmde = pmd_swp_mkuffd_wp(pmde);
> }
>
> if (folio_test_anon(folio)) {
> rmap_t rmap_flags = RMAP_NONE;
>
> if (!softleaf_is_migration_read(entry))
> rmap_flags |= RMAP_EXCLUSIVE;
>
> folio_add_anon_rmap_pmd(folio, new, vma, haddr, rmap_flags);
> } else {
> folio_add_file_rmap_pmd(folio, new, vma);
> }
> VM_BUG_ON(pmd_write(pmde) && folio_test_anon(folio) && !PageAnonExclusive(new));
> set_pmd_at(mm, haddr, pvmw->pmd, pmde);
>
> /* No need to invalidate - it was non-present before */
> update_mmu_cache_pmd(vma, address, pvmw->pmd);
> trace_remove_migration_pmd(address, pmd_val(pmde));
> }
Thanks, Andrew! Looks good!
Balbir
© 2016 - 2026 Red Hat, Inc.