There's an established convention in the kernel that we treat PTEs as
containing swap entries (and the unfortunately named non-swap swap entries)
should they be neither empty (i.e. pte_none() evaluating true) nor present
(i.e. pte_present() evaluating true).
However, there is some inconsistency in how this is applied, as we also
have the is_swap_pte() helper which explicitly performs this check:
/* check whether a pte points to a swap entry */
static inline int is_swap_pte(pte_t pte)
{
return !pte_none(pte) && !pte_present(pte);
}
As this represents a predicate, and it's logical to assume that in order to
establish that a PTE entry can correctly be manipulated as a swap/non-swap
entry, this predicate seems as if it must first be checked.
But we instead, we far more often utilise the established convention of
checking pte_none() / pte_present() before operating on entries as if they
were swap/non-swap.
This patch works towards correcting this inconsistency by removing all uses
of is_swap_pte() where we are already in a position where we perform
pte_none()/pte_present() checks anyway or otherwise it is clearly logical
to do so.
We also take advantage of the fact that pte_swp_uffd_wp() is only set on
swap entries.
Additionally, update comments referencing to is_swap_pte() and
non_swap_entry().
No functional change intended.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
fs/proc/task_mmu.c | 49 ++++++++++++++++++++++++-----------
include/linux/userfaultfd_k.h | 3 +--
mm/hugetlb.c | 6 ++---
mm/internal.h | 6 ++---
mm/khugepaged.c | 29 +++++++++++----------
mm/migrate.c | 2 +-
mm/mprotect.c | 43 ++++++++++++++----------------
mm/mremap.c | 7 +++--
mm/page_table_check.c | 13 ++++++----
mm/page_vma_mapped.c | 31 +++++++++++-----------
10 files changed, 104 insertions(+), 85 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 24d26b49d870..ddbf177ecc45 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1017,7 +1017,9 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
young = pte_young(ptent);
dirty = pte_dirty(ptent);
present = true;
- } else if (is_swap_pte(ptent)) {
+ } else if (pte_none(ptent)) {
+ smaps_pte_hole_lookup(addr, walk);
+ } else {
swp_entry_t swpent = pte_to_swp_entry(ptent);
if (!non_swap_entry(swpent)) {
@@ -1038,9 +1040,6 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
present = true;
page = pfn_swap_entry_to_page(swpent);
}
- } else {
- smaps_pte_hole_lookup(addr, walk);
- return;
}
if (!page)
@@ -1611,6 +1610,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
*/
pte_t ptent = ptep_get(pte);
+ if (pte_none(ptent))
+ return;
+
if (pte_present(ptent)) {
pte_t old_pte;
@@ -1620,7 +1622,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
ptent = pte_wrprotect(old_pte);
ptent = pte_clear_soft_dirty(ptent);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
- } else if (is_swap_pte(ptent)) {
+ } else {
ptent = pte_swp_clear_soft_dirty(ptent);
set_pte_at(vma->vm_mm, addr, pte, ptent);
}
@@ -1923,6 +1925,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
struct page *page = NULL;
struct folio *folio;
+ if (pte_none(pte))
+ goto out;
+
if (pte_present(pte)) {
if (pm->show_pfn)
frame = pte_pfn(pte);
@@ -1932,8 +1937,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
flags |= PM_SOFT_DIRTY;
if (pte_uffd_wp(pte))
flags |= PM_UFFD_WP;
- } else if (is_swap_pte(pte)) {
+ } else {
swp_entry_t entry;
+
if (pte_swp_soft_dirty(pte))
flags |= PM_SOFT_DIRTY;
if (pte_swp_uffd_wp(pte))
@@ -1941,6 +1947,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
entry = pte_to_swp_entry(pte);
if (pm->show_pfn) {
pgoff_t offset;
+
/*
* For PFN swap offsets, keeping the offset field
* to be PFN only to be compatible with old smaps.
@@ -1969,6 +1976,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
__folio_page_mapped_exclusively(folio, page))
flags |= PM_MMAP_EXCLUSIVE;
}
+
+out:
if (vma->vm_flags & VM_SOFTDIRTY)
flags |= PM_SOFT_DIRTY;
@@ -2310,12 +2319,16 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
struct vm_area_struct *vma,
unsigned long addr, pte_t pte)
{
- unsigned long categories = 0;
+ unsigned long categories;
+
+ if (pte_none(pte))
+ return 0;
if (pte_present(pte)) {
struct page *page;
- categories |= PAGE_IS_PRESENT;
+ categories = PAGE_IS_PRESENT;
+
if (!pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
@@ -2329,10 +2342,11 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
categories |= PAGE_IS_PFNZERO;
if (pte_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pte(pte)) {
+ } else {
softleaf_t entry;
- categories |= PAGE_IS_SWAPPED;
+ categories = PAGE_IS_SWAPPED;
+
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
@@ -2360,12 +2374,12 @@ static void make_uffd_wp_pte(struct vm_area_struct *vma,
old_pte = ptep_modify_prot_start(vma, addr, pte);
ptent = pte_mkuffd_wp(old_pte);
ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
- } else if (is_swap_pte(ptent)) {
- ptent = pte_swp_mkuffd_wp(ptent);
- set_pte_at(vma->vm_mm, addr, pte, ptent);
- } else {
+ } else if (pte_none(ptent)) {
set_pte_at(vma->vm_mm, addr, pte,
make_pte_marker(PTE_MARKER_UFFD_WP));
+ } else {
+ ptent = pte_swp_mkuffd_wp(ptent);
+ set_pte_at(vma->vm_mm, addr, pte, ptent);
}
}
@@ -2434,6 +2448,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
{
unsigned long categories = PAGE_IS_HUGE;
+ if (pte_none(pte))
+ return categories;
+
/*
* According to pagemap_hugetlb_range(), file-backed HugeTLB
* page cannot be swapped. So PAGE_IS_FILE is not checked for
@@ -2441,6 +2458,7 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
*/
if (pte_present(pte)) {
categories |= PAGE_IS_PRESENT;
+
if (!huge_pte_uffd_wp(pte))
categories |= PAGE_IS_WRITTEN;
if (!PageAnon(pte_page(pte)))
@@ -2449,8 +2467,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
categories |= PAGE_IS_PFNZERO;
if (pte_soft_dirty(pte))
categories |= PAGE_IS_SOFT_DIRTY;
- } else if (is_swap_pte(pte)) {
+ } else {
categories |= PAGE_IS_SWAPPED;
+
if (!pte_swp_uffd_wp_any(pte))
categories |= PAGE_IS_WRITTEN;
if (pte_swp_soft_dirty(pte))
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 983c860a00f1..96b089dff4ef 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -441,9 +441,8 @@ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
static inline bool pte_swp_uffd_wp_any(pte_t pte)
{
#ifdef CONFIG_PTE_MARKER_UFFD_WP
- if (!is_swap_pte(pte))
+ if (pte_present(pte))
return false;
-
if (pte_swp_uffd_wp(pte))
return true;
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a05edefec1ca..a74cde267c2a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5798,13 +5798,13 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz);
- if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte))
+ if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte)) {
huge_pte_clear(mm, new_addr, dst_pte, sz);
- else {
+ } else {
if (need_clear_uffd_wp) {
if (pte_present(pte))
pte = huge_pte_clear_uffd_wp(pte);
- else if (is_swap_pte(pte))
+ else
pte = pte_swp_clear_uffd_wp(pte);
}
set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
diff --git a/mm/internal.h b/mm/internal.h
index 116a1ba85e66..9465129367a4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -325,8 +325,7 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte,
/**
* pte_move_swp_offset - Move the swap entry offset field of a swap pte
* forward or backward by delta
- * @pte: The initial pte state; is_swap_pte(pte) must be true and
- * non_swap_entry() must be false.
+ * @pte: The initial pte state; must be a swap entry
* @delta: The direction and the offset we are moving; forward if delta
* is positive; backward if delta is negative
*
@@ -352,8 +351,7 @@ static inline pte_t pte_move_swp_offset(pte_t pte, long delta)
/**
* pte_next_swp_offset - Increment the swap entry offset field of a swap pte.
- * @pte: The initial pte state; is_swap_pte(pte) must be true and
- * non_swap_entry() must be false.
+ * @pte: The initial pte state; must be a swap entry.
*
* Increments the swap offset, while maintaining all other fields, including
* swap type, and any swp pte bits. The resulting pte is returned.
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index f6ed1072ed6e..a97ff7bcb232 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1019,7 +1019,8 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm,
}
vmf.orig_pte = ptep_get_lockless(pte);
- if (!is_swap_pte(vmf.orig_pte))
+ if (pte_none(vmf.orig_pte) ||
+ pte_present(vmf.orig_pte))
continue;
vmf.pte = pte;
@@ -1276,7 +1277,19 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
for (addr = start_addr, _pte = pte; _pte < pte + HPAGE_PMD_NR;
_pte++, addr += PAGE_SIZE) {
pte_t pteval = ptep_get(_pte);
- if (is_swap_pte(pteval)) {
+ if (pte_none_or_zero(pteval)) {
+ ++none_or_zero;
+ if (!userfaultfd_armed(vma) &&
+ (!cc->is_khugepaged ||
+ none_or_zero <= khugepaged_max_ptes_none)) {
+ continue;
+ } else {
+ result = SCAN_EXCEED_NONE_PTE;
+ count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
+ goto out_unmap;
+ }
+ }
+ if (!pte_present(pteval)) {
++unmapped;
if (!cc->is_khugepaged ||
unmapped <= khugepaged_max_ptes_swap) {
@@ -1296,18 +1309,6 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
goto out_unmap;
}
}
- if (pte_none_or_zero(pteval)) {
- ++none_or_zero;
- if (!userfaultfd_armed(vma) &&
- (!cc->is_khugepaged ||
- none_or_zero <= khugepaged_max_ptes_none)) {
- continue;
- } else {
- result = SCAN_EXCEED_NONE_PTE;
- count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
- goto out_unmap;
- }
- }
if (pte_uffd_wp(pteval)) {
/*
* Don't collapse the page if any of the small
diff --git a/mm/migrate.c b/mm/migrate.c
index ceee354ef215..862b2e261cf9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -492,7 +492,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
pte = ptep_get(ptep);
pte_unmap(ptep);
- if (!is_swap_pte(pte))
+ if (pte_none(pte) || pte_present(pte))
goto out;
entry = pte_to_swp_entry(pte);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 0bae241eb7aa..a3e360a8cdec 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -297,7 +297,26 @@ static long change_pte_range(struct mmu_gather *tlb,
prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent,
nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb);
pages += nr_ptes;
- } else if (is_swap_pte(oldpte)) {
+ } else if (pte_none(oldpte)) {
+ /*
+ * Nobody plays with any none ptes besides
+ * userfaultfd when applying the protections.
+ */
+ if (likely(!uffd_wp))
+ continue;
+
+ if (userfaultfd_wp_use_markers(vma)) {
+ /*
+ * For file-backed mem, we need to be able to
+ * wr-protect a none pte, because even if the
+ * pte is none, the page/swap cache could
+ * exist. Doing that by install a marker.
+ */
+ set_pte_at(vma->vm_mm, addr, pte,
+ make_pte_marker(PTE_MARKER_UFFD_WP));
+ pages++;
+ }
+ } else {
swp_entry_t entry = pte_to_swp_entry(oldpte);
pte_t newpte;
@@ -358,28 +377,6 @@ static long change_pte_range(struct mmu_gather *tlb,
set_pte_at(vma->vm_mm, addr, pte, newpte);
pages++;
}
- } else {
- /* It must be an none page, or what else?.. */
- WARN_ON_ONCE(!pte_none(oldpte));
-
- /*
- * Nobody plays with any none ptes besides
- * userfaultfd when applying the protections.
- */
- if (likely(!uffd_wp))
- continue;
-
- if (userfaultfd_wp_use_markers(vma)) {
- /*
- * For file-backed mem, we need to be able to
- * wr-protect a none pte, because even if the
- * pte is none, the page/swap cache could
- * exist. Doing that by install a marker.
- */
- set_pte_at(vma->vm_mm, addr, pte,
- make_pte_marker(PTE_MARKER_UFFD_WP));
- pages++;
- }
}
} while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
diff --git a/mm/mremap.c b/mm/mremap.c
index 7c21b2ad13f6..62b6827abacf 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -158,6 +158,9 @@ static void drop_rmap_locks(struct vm_area_struct *vma)
static pte_t move_soft_dirty_pte(pte_t pte)
{
+ if (pte_none(pte))
+ return pte;
+
/*
* Set soft dirty bit so we can notice
* in userspace the ptes were moved.
@@ -165,7 +168,7 @@ static pte_t move_soft_dirty_pte(pte_t pte)
#ifdef CONFIG_MEM_SOFT_DIRTY
if (pte_present(pte))
pte = pte_mksoft_dirty(pte);
- else if (is_swap_pte(pte))
+ else
pte = pte_swp_mksoft_dirty(pte);
#endif
return pte;
@@ -294,7 +297,7 @@ static int move_ptes(struct pagetable_move_control *pmc,
if (need_clear_uffd_wp) {
if (pte_present(pte))
pte = pte_clear_uffd_wp(pte);
- else if (is_swap_pte(pte))
+ else
pte = pte_swp_clear_uffd_wp(pte);
}
set_ptes(mm, new_addr, new_ptep, pte, nr_ptes);
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 4eeca782b888..43f75d2f7c36 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -185,12 +185,15 @@ static inline bool swap_cached_writable(swp_entry_t entry)
is_writable_migration_entry(entry);
}
-static inline void page_table_check_pte_flags(pte_t pte)
+static void page_table_check_pte_flags(pte_t pte)
{
- if (pte_present(pte) && pte_uffd_wp(pte))
- WARN_ON_ONCE(pte_write(pte));
- else if (is_swap_pte(pte) && pte_swp_uffd_wp(pte))
- WARN_ON_ONCE(swap_cached_writable(pte_to_swp_entry(pte)));
+ if (pte_present(pte)) {
+ WARN_ON_ONCE(pte_uffd_wp(pte) && pte_write(pte));
+ } else if (pte_swp_uffd_wp(pte)) {
+ const swp_entry_t entry = pte_to_swp_entry(pte);
+
+ WARN_ON_ONCE(swap_cached_writable(entry));
+ }
}
void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index be20468fb5a9..a4e23818f37f 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -16,6 +16,7 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw)
static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
spinlock_t **ptlp)
{
+ bool is_migration;
pte_t ptent;
if (pvmw->flags & PVMW_SYNC) {
@@ -26,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
return !!pvmw->pte;
}
+ is_migration = pvmw->flags & PVMW_MIGRATION;
again:
/*
* It is important to return the ptl corresponding to pte,
@@ -41,11 +43,14 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
ptent = ptep_get(pvmw->pte);
- if (pvmw->flags & PVMW_MIGRATION) {
- if (!is_swap_pte(ptent))
+ if (pte_none(ptent)) {
+ return false;
+ } else if (pte_present(ptent)) {
+ if (is_migration)
return false;
- } else if (is_swap_pte(ptent)) {
+ } else if (!is_migration) {
swp_entry_t entry;
+
/*
* Handle un-addressable ZONE_DEVICE memory.
*
@@ -66,8 +71,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
if (!is_device_private_entry(entry) &&
!is_device_exclusive_entry(entry))
return false;
- } else if (!pte_present(ptent)) {
- return false;
}
spin_lock(*ptlp);
if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) {
@@ -113,21 +116,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
return false;
pfn = softleaf_to_pfn(entry);
- } else if (is_swap_pte(ptent)) {
- swp_entry_t entry;
+ } else if (pte_present(ptent)) {
+ pfn = pte_pfn(ptent);
+ } else {
+ const softleaf_t entry = softleaf_from_pte(ptent);
/* Handle un-addressable ZONE_DEVICE memory */
- entry = pte_to_swp_entry(ptent);
- if (!is_device_private_entry(entry) &&
- !is_device_exclusive_entry(entry))
- return false;
-
- pfn = swp_offset_pfn(entry);
- } else {
- if (!pte_present(ptent))
+ if (!softleaf_is_device_private(entry) &&
+ !softleaf_is_device_exclusive(entry))
return false;
- pfn = pte_pfn(ptent);
+ pfn = softleaf_to_pfn(entry);
}
if ((pfn + pte_nr - 1) < pvmw->pfn)
--
2.51.0
On 11/10/25 23:21, Lorenzo Stoakes wrote:
> There's an established convention in the kernel that we treat PTEs as
> containing swap entries (and the unfortunately named non-swap swap entries)
> should they be neither empty (i.e. pte_none() evaluating true) nor present
> (i.e. pte_present() evaluating true).
>
> However, there is some inconsistency in how this is applied, as we also
> have the is_swap_pte() helper which explicitly performs this check:
>
> /* check whether a pte points to a swap entry */
> static inline int is_swap_pte(pte_t pte)
> {
> return !pte_none(pte) && !pte_present(pte);
> }
>
> As this represents a predicate, and it's logical to assume that in order to
> establish that a PTE entry can correctly be manipulated as a swap/non-swap
> entry, this predicate seems as if it must first be checked.
>
> But we instead, we far more often utilise the established convention of
> checking pte_none() / pte_present() before operating on entries as if they
> were swap/non-swap.
>
> This patch works towards correcting this inconsistency by removing all uses
> of is_swap_pte() where we are already in a position where we perform
> pte_none()/pte_present() checks anyway or otherwise it is clearly logical
> to do so.
>
> We also take advantage of the fact that pte_swp_uffd_wp() is only set on
> swap entries.
>
> Additionally, update comments referencing to is_swap_pte() and
> non_swap_entry().
>
> No functional change intended.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
LGTM (famous last words)
Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
On 10 Nov 2025, at 17:21, Lorenzo Stoakes wrote:
> There's an established convention in the kernel that we treat PTEs as
> containing swap entries (and the unfortunately named non-swap swap entries)
> should they be neither empty (i.e. pte_none() evaluating true) nor present
> (i.e. pte_present() evaluating true).
>
> However, there is some inconsistency in how this is applied, as we also
> have the is_swap_pte() helper which explicitly performs this check:
>
> /* check whether a pte points to a swap entry */
> static inline int is_swap_pte(pte_t pte)
> {
> return !pte_none(pte) && !pte_present(pte);
> }
>
> As this represents a predicate, and it's logical to assume that in order to
> establish that a PTE entry can correctly be manipulated as a swap/non-swap
> entry, this predicate seems as if it must first be checked.
>
> But we instead, we far more often utilise the established convention of
> checking pte_none() / pte_present() before operating on entries as if they
> were swap/non-swap.
>
> This patch works towards correcting this inconsistency by removing all uses
> of is_swap_pte() where we are already in a position where we perform
> pte_none()/pte_present() checks anyway or otherwise it is clearly logical
> to do so.
>
> We also take advantage of the fact that pte_swp_uffd_wp() is only set on
> swap entries.
>
> Additionally, update comments referencing to is_swap_pte() and
> non_swap_entry().
>
> No functional change intended.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> ---
> fs/proc/task_mmu.c | 49 ++++++++++++++++++++++++-----------
> include/linux/userfaultfd_k.h | 3 +--
> mm/hugetlb.c | 6 ++---
> mm/internal.h | 6 ++---
> mm/khugepaged.c | 29 +++++++++++----------
> mm/migrate.c | 2 +-
> mm/mprotect.c | 43 ++++++++++++++----------------
> mm/mremap.c | 7 +++--
> mm/page_table_check.c | 13 ++++++----
> mm/page_vma_mapped.c | 31 +++++++++++-----------
> 10 files changed, 104 insertions(+), 85 deletions(-)
>
<snip>
> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index be20468fb5a9..a4e23818f37f 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -16,6 +16,7 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw)
> static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> spinlock_t **ptlp)
> {
> + bool is_migration;
> pte_t ptent;
>
> if (pvmw->flags & PVMW_SYNC) {
> @@ -26,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> return !!pvmw->pte;
> }
>
> + is_migration = pvmw->flags & PVMW_MIGRATION;
> again:
> /*
> * It is important to return the ptl corresponding to pte,
> @@ -41,11 +43,14 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>
> ptent = ptep_get(pvmw->pte);
>
> - if (pvmw->flags & PVMW_MIGRATION) {
> - if (!is_swap_pte(ptent))
Here, is_migration = true and either pte_none() or pte_present()
would return false, and ...
> + if (pte_none(ptent)) {
> + return false;
> + } else if (pte_present(ptent)) {
> + if (is_migration)
> return false;
> - } else if (is_swap_pte(ptent)) {
> + } else if (!is_migration) {
> swp_entry_t entry;
> +
> /*
> * Handle un-addressable ZONE_DEVICE memory.
> *
> @@ -66,8 +71,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> if (!is_device_private_entry(entry) &&
> !is_device_exclusive_entry(entry))
> return false;
> - } else if (!pte_present(ptent)) {
> - return false;
... is_migration = false and !pte_present() is actually pte_none(),
because of the is_swap_pte() above the added !is_migration check.
So pte_none() should return false regardless of is_migration.
This is a nice cleanup. Thanks.
> }
> spin_lock(*ptlp);
> if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) {
> @@ -113,21 +116,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
> return false;
>
> pfn = softleaf_to_pfn(entry);
> - } else if (is_swap_pte(ptent)) {
> - swp_entry_t entry;
> + } else if (pte_present(ptent)) {
> + pfn = pte_pfn(ptent);
> + } else {
> + const softleaf_t entry = softleaf_from_pte(ptent);
>
> /* Handle un-addressable ZONE_DEVICE memory */
> - entry = pte_to_swp_entry(ptent);
> - if (!is_device_private_entry(entry) &&
> - !is_device_exclusive_entry(entry))
> - return false;
> -
> - pfn = swp_offset_pfn(entry);
> - } else {
> - if (!pte_present(ptent))
This !pte_present() is pte_none(). It seems that there should be
} else if (pte_none(ptent)) {
return false;
}
before the above "} else {".
> + if (!softleaf_is_device_private(entry) &&
> + !softleaf_is_device_exclusive(entry))
> return false;
>
> - pfn = pte_pfn(ptent);
> + pfn = softleaf_to_pfn(entry);
> }
>
> if ((pfn + pte_nr - 1) < pvmw->pfn)
> --
> 2.51.0
Otherwise, LGTM. With the above issue addressed, feel free to
add Reviewed-by: Zi Yan <ziy@nvidia.com>
--
Best Regards,
Yan, Zi
On 11 Nov 2025, at 21:58, Zi Yan wrote:
> On 10 Nov 2025, at 17:21, Lorenzo Stoakes wrote:
>
>> There's an established convention in the kernel that we treat PTEs as
>> containing swap entries (and the unfortunately named non-swap swap entries)
>> should they be neither empty (i.e. pte_none() evaluating true) nor present
>> (i.e. pte_present() evaluating true).
>>
>> However, there is some inconsistency in how this is applied, as we also
>> have the is_swap_pte() helper which explicitly performs this check:
>>
>> /* check whether a pte points to a swap entry */
>> static inline int is_swap_pte(pte_t pte)
>> {
>> return !pte_none(pte) && !pte_present(pte);
>> }
>>
>> As this represents a predicate, and it's logical to assume that in order to
>> establish that a PTE entry can correctly be manipulated as a swap/non-swap
>> entry, this predicate seems as if it must first be checked.
>>
>> But we instead, we far more often utilise the established convention of
>> checking pte_none() / pte_present() before operating on entries as if they
>> were swap/non-swap.
>>
>> This patch works towards correcting this inconsistency by removing all uses
>> of is_swap_pte() where we are already in a position where we perform
>> pte_none()/pte_present() checks anyway or otherwise it is clearly logical
>> to do so.
BTW, I wonder if we could use switch + enum and compiler to prevent future
inconsistencies.
Basically,
enum PTE_State {
PTE_PRESENT,
PTE_NONE,
PTE_SOFTLEAF,
};
enum PTE_State get_pte_state(pte_t pte)
{
if (pte_present(pte))
return PTE_PRESENT;
if (pte_none(pte))
return PTE_NONE;
return PTE_SOFTLEAF;
}
in any code handling pte:
switch (get_pte_state(pte)):
case PTE_PRESENT:
break;
case PTE_NONE:
break;
case PTE_SOFTLEAF:
break;
}
And compiler will yell at you if any enum is missing in the switch case.
Just an idea came to my mind when I am reading the commit message.
Feel free to ignore it. :)
Best Regards,
Yan, Zi
On Tue, Nov 11, 2025 at 09:58:36PM -0500, Zi Yan wrote:
> On 10 Nov 2025, at 17:21, Lorenzo Stoakes wrote:
>
> > There's an established convention in the kernel that we treat PTEs as
> > containing swap entries (and the unfortunately named non-swap swap entries)
> > should they be neither empty (i.e. pte_none() evaluating true) nor present
> > (i.e. pte_present() evaluating true).
> >
> > However, there is some inconsistency in how this is applied, as we also
> > have the is_swap_pte() helper which explicitly performs this check:
> >
> > /* check whether a pte points to a swap entry */
> > static inline int is_swap_pte(pte_t pte)
> > {
> > return !pte_none(pte) && !pte_present(pte);
> > }
> >
> > As this represents a predicate, and it's logical to assume that in order to
> > establish that a PTE entry can correctly be manipulated as a swap/non-swap
> > entry, this predicate seems as if it must first be checked.
> >
> > But we instead, we far more often utilise the established convention of
> > checking pte_none() / pte_present() before operating on entries as if they
> > were swap/non-swap.
> >
> > This patch works towards correcting this inconsistency by removing all uses
> > of is_swap_pte() where we are already in a position where we perform
> > pte_none()/pte_present() checks anyway or otherwise it is clearly logical
> > to do so.
> >
> > We also take advantage of the fact that pte_swp_uffd_wp() is only set on
> > swap entries.
> >
> > Additionally, update comments referencing to is_swap_pte() and
> > non_swap_entry().
> >
> > No functional change intended.
> >
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > ---
> > fs/proc/task_mmu.c | 49 ++++++++++++++++++++++++-----------
> > include/linux/userfaultfd_k.h | 3 +--
> > mm/hugetlb.c | 6 ++---
> > mm/internal.h | 6 ++---
> > mm/khugepaged.c | 29 +++++++++++----------
> > mm/migrate.c | 2 +-
> > mm/mprotect.c | 43 ++++++++++++++----------------
> > mm/mremap.c | 7 +++--
> > mm/page_table_check.c | 13 ++++++----
> > mm/page_vma_mapped.c | 31 +++++++++++-----------
> > 10 files changed, 104 insertions(+), 85 deletions(-)
> >
>
> <snip>
>
> > diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> > index be20468fb5a9..a4e23818f37f 100644
> > --- a/mm/page_vma_mapped.c
> > +++ b/mm/page_vma_mapped.c
> > @@ -16,6 +16,7 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw)
> > static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> > spinlock_t **ptlp)
> > {
> > + bool is_migration;
> > pte_t ptent;
> >
> > if (pvmw->flags & PVMW_SYNC) {
> > @@ -26,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> > return !!pvmw->pte;
> > }
> >
> > + is_migration = pvmw->flags & PVMW_MIGRATION;
> > again:
> > /*
> > * It is important to return the ptl corresponding to pte,
> > @@ -41,11 +43,14 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> >
> > ptent = ptep_get(pvmw->pte);
> >
> > - if (pvmw->flags & PVMW_MIGRATION) {
> > - if (!is_swap_pte(ptent))
>
> Here, is_migration = true and either pte_none() or pte_present()
> would return false, and ...
>
> > + if (pte_none(ptent)) {
> > + return false;
> > + } else if (pte_present(ptent)) {
> > + if (is_migration)
> > return false;
> > - } else if (is_swap_pte(ptent)) {
> > + } else if (!is_migration) {
> > swp_entry_t entry;
> > +
> > /*
> > * Handle un-addressable ZONE_DEVICE memory.
> > *
> > @@ -66,8 +71,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> > if (!is_device_private_entry(entry) &&
> > !is_device_exclusive_entry(entry))
> > return false;
> > - } else if (!pte_present(ptent)) {
> > - return false;
>
> ... is_migration = false and !pte_present() is actually pte_none(),
> because of the is_swap_pte() above the added !is_migration check.
> So pte_none() should return false regardless of is_migration.
I guess you were working this through :) well I decided to also just to
double-check I got it right, maybe useful for you also :P -
Previously:
if (is_migration) {
if (!is_swap_pte(ptent))
return false;
} else if (is_swap_pte(ptent)) {
... ZONE_DEVICE blah ...
} else if (!pte_present(ptent)) {
return false;
}
But is_swap_pte() is the same as !pte_none() && !pte_present(), so
!is_swap_pte() is pte_none() || pte_present() by De Morgan's law:
if (is_migration) {
if (pte_none(ptent) || pte_present(ptent))
return false;
} else if (!pte_none(ptent) && !pte_present(ptent)) {
... ZONE_DEVICE blah ...
} else if (!pte_present(ptent)) {
return false;
}
In the last branch, we know (again by De Morgan's law) that either
pte_none(ptent) or pte_present(ptent).. But we explicitly check for
!pte_present(ptent) so this becomes:
if (is_migration) {
if (pte_none(ptent) || pte_present(ptent))
return false;
} else if (!pte_none(ptent) && !pte_present(ptent)) {
... ZONE_DEVICE blah ...
} else if (pte_none(ptent)) {
return false;
}
So we can generalise - regardless of is_migration, pte_none() returns false:
if (pte_none(ptent)) {
return false;
} else if (is_migration) {
if (pte_none(ptent) || pte_present(ptent))
return false;
} else if (!pte_none(ptent) && !pte_present(ptent)) {
... ZONE_DEVICE blah ...
}
Since we already check for pte_none() ahead of time, we can simplify again:
if (pte_none(ptent)) {
return false;
} else if (is_migration) {
if (pte_present(ptent))
return false;
} else if (!pte_present(ptent)) {
... ZONE_DEVICE blah ...
}
We can then put the pte_present() check in the outer branch:
if (pte_none(ptent)) {
return false;
} else if (pte_present(ptent)) {
if (is_migration)
return false;
} else if (!is_migration) {
... ZONE_DEVICE blah ...
}
Because previously an is_migration && !pte_present() case would result in no
action here.
Which is the code in this patch :)
>
> This is a nice cleanup. Thanks.
>
> > }
> > spin_lock(*ptlp);
> > if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) {
> > @@ -113,21 +116,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
> > return false;
> >
> > pfn = softleaf_to_pfn(entry);
> > - } else if (is_swap_pte(ptent)) {
> > - swp_entry_t entry;
> > + } else if (pte_present(ptent)) {
> > + pfn = pte_pfn(ptent);
> > + } else {
> > + const softleaf_t entry = softleaf_from_pte(ptent);
> >
> > /* Handle un-addressable ZONE_DEVICE memory */
> > - entry = pte_to_swp_entry(ptent);
> > - if (!is_device_private_entry(entry) &&
> > - !is_device_exclusive_entry(entry))
> > - return false;
> > -
> > - pfn = swp_offset_pfn(entry);
> > - } else {
> > - if (!pte_present(ptent))
>
> This !pte_present() is pte_none(). It seems that there should be
Well this should be fine though as:
const softleaf_t entry = softleaf_from_pte(ptent);
/* Handle un-addressable ZONE_DEVICE memory */
if (!softleaf_is_device_private(entry) &&
!softleaf_is_device_exclusive(entry))
return false;
Still correctly handles none - as softleaf_from_pte() in case of pte_none() will
be a none softleaf entry which will fail both of these tests.
So excluding pte_none() as an explicit test here was part of the rework - we no
longer have to do that.
>
> } else if (pte_none(ptent)) {
> return false;
> }
>
> before the above "} else {".
>
> > + if (!softleaf_is_device_private(entry) &&
> > + !softleaf_is_device_exclusive(entry))
> > return false;
> >
> > - pfn = pte_pfn(ptent);
> > + pfn = softleaf_to_pfn(entry);
> > }
> >
> > if ((pfn + pte_nr - 1) < pvmw->pfn)
> > --
> > 2.51.0
>
> Otherwise, LGTM. With the above issue addressed, feel free to
> add Reviewed-by: Zi Yan <ziy@nvidia.com>
Thanks!
>
> --
> Best Regards,
> Yan, Zi
Cheers, Lorenzo
On 12 Nov 2025, at 10:59, Lorenzo Stoakes wrote:
> On Tue, Nov 11, 2025 at 09:58:36PM -0500, Zi Yan wrote:
>> On 10 Nov 2025, at 17:21, Lorenzo Stoakes wrote:
>>
>>> There's an established convention in the kernel that we treat PTEs as
>>> containing swap entries (and the unfortunately named non-swap swap entries)
>>> should they be neither empty (i.e. pte_none() evaluating true) nor present
>>> (i.e. pte_present() evaluating true).
>>>
>>> However, there is some inconsistency in how this is applied, as we also
>>> have the is_swap_pte() helper which explicitly performs this check:
>>>
>>> /* check whether a pte points to a swap entry */
>>> static inline int is_swap_pte(pte_t pte)
>>> {
>>> return !pte_none(pte) && !pte_present(pte);
>>> }
>>>
>>> As this represents a predicate, and it's logical to assume that in order to
>>> establish that a PTE entry can correctly be manipulated as a swap/non-swap
>>> entry, this predicate seems as if it must first be checked.
>>>
>>> But we instead, we far more often utilise the established convention of
>>> checking pte_none() / pte_present() before operating on entries as if they
>>> were swap/non-swap.
>>>
>>> This patch works towards correcting this inconsistency by removing all uses
>>> of is_swap_pte() where we are already in a position where we perform
>>> pte_none()/pte_present() checks anyway or otherwise it is clearly logical
>>> to do so.
>>>
>>> We also take advantage of the fact that pte_swp_uffd_wp() is only set on
>>> swap entries.
>>>
>>> Additionally, update comments referencing to is_swap_pte() and
>>> non_swap_entry().
>>>
>>> No functional change intended.
>>>
>>> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>> ---
>>> fs/proc/task_mmu.c | 49 ++++++++++++++++++++++++-----------
>>> include/linux/userfaultfd_k.h | 3 +--
>>> mm/hugetlb.c | 6 ++---
>>> mm/internal.h | 6 ++---
>>> mm/khugepaged.c | 29 +++++++++++----------
>>> mm/migrate.c | 2 +-
>>> mm/mprotect.c | 43 ++++++++++++++----------------
>>> mm/mremap.c | 7 +++--
>>> mm/page_table_check.c | 13 ++++++----
>>> mm/page_vma_mapped.c | 31 +++++++++++-----------
>>> 10 files changed, 104 insertions(+), 85 deletions(-)
>>>
>>
>> <snip>
>>
>>> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
>>> index be20468fb5a9..a4e23818f37f 100644
>>> --- a/mm/page_vma_mapped.c
>>> +++ b/mm/page_vma_mapped.c
>>> @@ -16,6 +16,7 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw)
>>> static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>>> spinlock_t **ptlp)
>>> {
>>> + bool is_migration;
>>> pte_t ptent;
>>>
>>> if (pvmw->flags & PVMW_SYNC) {
>>> @@ -26,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>>> return !!pvmw->pte;
>>> }
>>>
>>> + is_migration = pvmw->flags & PVMW_MIGRATION;
>>> again:
>>> /*
>>> * It is important to return the ptl corresponding to pte,
>>> @@ -41,11 +43,14 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>>>
>>> ptent = ptep_get(pvmw->pte);
>>>
>>> - if (pvmw->flags & PVMW_MIGRATION) {
>>> - if (!is_swap_pte(ptent))
>>
>> Here, is_migration = true and either pte_none() or pte_present()
>> would return false, and ...
>>
>>> + if (pte_none(ptent)) {
>>> + return false;
>>> + } else if (pte_present(ptent)) {
>>> + if (is_migration)
>>> return false;
>>> - } else if (is_swap_pte(ptent)) {
>>> + } else if (!is_migration) {
>>> swp_entry_t entry;
>>> +
>>> /*
>>> * Handle un-addressable ZONE_DEVICE memory.
>>> *
>>> @@ -66,8 +71,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>>> if (!is_device_private_entry(entry) &&
>>> !is_device_exclusive_entry(entry))
>>> return false;
>>> - } else if (!pte_present(ptent)) {
>>> - return false;
>>
>> ... is_migration = false and !pte_present() is actually pte_none(),
>> because of the is_swap_pte() above the added !is_migration check.
>> So pte_none() should return false regardless of is_migration.
>
> I guess you were working this through :) well I decided to also just to
> double-check I got it right, maybe useful for you also :P -
>
> Previously:
>
> if (is_migration) {
> if (!is_swap_pte(ptent))
> return false;
> } else if (is_swap_pte(ptent)) {
> ... ZONE_DEVICE blah ...
> } else if (!pte_present(ptent)) {
> return false;
> }
>
> But is_swap_pte() is the same as !pte_none() && !pte_present(), so
> !is_swap_pte() is pte_none() || pte_present() by De Morgan's law:
>
> if (is_migration) {
> if (pte_none(ptent) || pte_present(ptent))
> return false;
> } else if (!pte_none(ptent) && !pte_present(ptent)) {
> ... ZONE_DEVICE blah ...
> } else if (!pte_present(ptent)) {
> return false;
> }
>
> In the last branch, we know (again by De Morgan's law) that either
> pte_none(ptent) or pte_present(ptent).. But we explicitly check for
> !pte_present(ptent) so this becomes:
>
> if (is_migration) {
> if (pte_none(ptent) || pte_present(ptent))
> return false;
> } else if (!pte_none(ptent) && !pte_present(ptent)) {
> ... ZONE_DEVICE blah ...
> } else if (pte_none(ptent)) {
> return false;
> }
>
> So we can generalise - regardless of is_migration, pte_none() returns false:
>
> if (pte_none(ptent)) {
> return false;
> } else if (is_migration) {
> if (pte_none(ptent) || pte_present(ptent))
> return false;
> } else if (!pte_none(ptent) && !pte_present(ptent)) {
> ... ZONE_DEVICE blah ...
> }
>
> Since we already check for pte_none() ahead of time, we can simplify again:
>
> if (pte_none(ptent)) {
> return false;
> } else if (is_migration) {
> if (pte_present(ptent))
> return false;
> } else if (!pte_present(ptent)) {
> ... ZONE_DEVICE blah ...
> }
>
> We can then put the pte_present() check in the outer branch:
>
> if (pte_none(ptent)) {
> return false;
> } else if (pte_present(ptent)) {
> if (is_migration)
> return false;
> } else if (!is_migration) {
> ... ZONE_DEVICE blah ...
> }
>
> Because previously an is_migration && !pte_present() case would result in no
> action here.
>
> Which is the code in this patch :)
Thanks again for spelling out the whole process.
>
>>
>> This is a nice cleanup. Thanks.
>>
>>> }
>>> spin_lock(*ptlp);
>>> if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) {
>>> @@ -113,21 +116,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
>>> return false;
>>>
>>> pfn = softleaf_to_pfn(entry);
>>> - } else if (is_swap_pte(ptent)) {
>>> - swp_entry_t entry;
>>> + } else if (pte_present(ptent)) {
>>> + pfn = pte_pfn(ptent);
>>> + } else {
>>> + const softleaf_t entry = softleaf_from_pte(ptent);
>>>
>>> /* Handle un-addressable ZONE_DEVICE memory */
>>> - entry = pte_to_swp_entry(ptent);
>>> - if (!is_device_private_entry(entry) &&
>>> - !is_device_exclusive_entry(entry))
>>> - return false;
>>> -
>>> - pfn = swp_offset_pfn(entry);
>>> - } else {
>>> - if (!pte_present(ptent))
>>
>> This !pte_present() is pte_none(). It seems that there should be
>
> Well this should be fine though as:
>
> const softleaf_t entry = softleaf_from_pte(ptent);
>
> /* Handle un-addressable ZONE_DEVICE memory */
> if (!softleaf_is_device_private(entry) &&
> !softleaf_is_device_exclusive(entry))
> return false;
>
> Still correctly handles none - as softleaf_from_pte() in case of pte_none() will
> be a none softleaf entry which will fail both of these tests.
>
> So excluding pte_none() as an explicit test here was part of the rework - we no
> longer have to do that.
Got it. Now my RB is yours. :)
>
>>
>> } else if (pte_none(ptent)) {
>> return false;
>> }
>>
>> before the above "} else {".
>>
>>> + if (!softleaf_is_device_private(entry) &&
>>> + !softleaf_is_device_exclusive(entry))
>>> return false;
>>>
>>> - pfn = pte_pfn(ptent);
>>> + pfn = softleaf_to_pfn(entry);
>>> }
>>>
>>> if ((pfn + pte_nr - 1) < pvmw->pfn)
>>> --
>>> 2.51.0
>>
>> Otherwise, LGTM. With the above issue addressed, feel free to
>> add Reviewed-by: Zi Yan <ziy@nvidia.com>
>
> Thanks!
>
>>
>> --
>> Best Regards,
>> Yan, Zi
>
> Cheers, Lorenzo
Best Regards,
Yan, Zi
© 2016 - 2025 Red Hat, Inc.