[PATCH v3 03/16] mm: avoid unnecessary uses of is_swap_pte()

Lorenzo Stoakes posted 16 patches 1 month, 1 week ago
[PATCH v3 03/16] mm: avoid unnecessary uses of is_swap_pte()
Posted by Lorenzo Stoakes 1 month, 1 week ago
There's an established convention in the kernel that we treat PTEs as
containing swap entries (and the unfortunately named non-swap swap entries)
should they be neither empty (i.e. pte_none() evaluating true) nor present
(i.e. pte_present() evaluating true).

However, there is some inconsistency in how this is applied, as we also
have the is_swap_pte() helper which explicitly performs this check:

	/* check whether a pte points to a swap entry */
	static inline int is_swap_pte(pte_t pte)
	{
		return !pte_none(pte) && !pte_present(pte);
	}

As this represents a predicate, and it's logical to assume that in order to
establish that a PTE entry can correctly be manipulated as a swap/non-swap
entry, this predicate seems as if it must first be checked.

But we instead, we far more often utilise the established convention of
checking pte_none() / pte_present() before operating on entries as if they
were swap/non-swap.

This patch works towards correcting this inconsistency by removing all uses
of is_swap_pte() where we are already in a position where we perform
pte_none()/pte_present() checks anyway or otherwise it is clearly logical
to do so.

We also take advantage of the fact that pte_swp_uffd_wp() is only set on
swap entries.

Additionally, update comments referencing to is_swap_pte() and
non_swap_entry().

No functional change intended.

Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
 fs/proc/task_mmu.c            | 49 ++++++++++++++++++++++++-----------
 include/linux/userfaultfd_k.h |  3 +--
 mm/hugetlb.c                  |  6 ++---
 mm/internal.h                 |  6 ++---
 mm/khugepaged.c               | 29 +++++++++++----------
 mm/migrate.c                  |  2 +-
 mm/mprotect.c                 | 43 ++++++++++++++----------------
 mm/mremap.c                   |  7 +++--
 mm/page_table_check.c         | 13 ++++++----
 mm/page_vma_mapped.c          | 31 +++++++++++-----------
 10 files changed, 104 insertions(+), 85 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 24d26b49d870..ddbf177ecc45 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1017,7 +1017,9 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 		young = pte_young(ptent);
 		dirty = pte_dirty(ptent);
 		present = true;
-	} else if (is_swap_pte(ptent)) {
+	} else if (pte_none(ptent)) {
+		smaps_pte_hole_lookup(addr, walk);
+	} else {
 		swp_entry_t swpent = pte_to_swp_entry(ptent);
 
 		if (!non_swap_entry(swpent)) {
@@ -1038,9 +1040,6 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
 				present = true;
 			page = pfn_swap_entry_to_page(swpent);
 		}
-	} else {
-		smaps_pte_hole_lookup(addr, walk);
-		return;
 	}
 
 	if (!page)
@@ -1611,6 +1610,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
 	 */
 	pte_t ptent = ptep_get(pte);
 
+	if (pte_none(ptent))
+		return;
+
 	if (pte_present(ptent)) {
 		pte_t old_pte;
 
@@ -1620,7 +1622,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
 		ptent = pte_wrprotect(old_pte);
 		ptent = pte_clear_soft_dirty(ptent);
 		ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
-	} else if (is_swap_pte(ptent)) {
+	} else {
 		ptent = pte_swp_clear_soft_dirty(ptent);
 		set_pte_at(vma->vm_mm, addr, pte, ptent);
 	}
@@ -1923,6 +1925,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 	struct page *page = NULL;
 	struct folio *folio;
 
+	if (pte_none(pte))
+		goto out;
+
 	if (pte_present(pte)) {
 		if (pm->show_pfn)
 			frame = pte_pfn(pte);
@@ -1932,8 +1937,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 			flags |= PM_SOFT_DIRTY;
 		if (pte_uffd_wp(pte))
 			flags |= PM_UFFD_WP;
-	} else if (is_swap_pte(pte)) {
+	} else {
 		swp_entry_t entry;
+
 		if (pte_swp_soft_dirty(pte))
 			flags |= PM_SOFT_DIRTY;
 		if (pte_swp_uffd_wp(pte))
@@ -1941,6 +1947,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 		entry = pte_to_swp_entry(pte);
 		if (pm->show_pfn) {
 			pgoff_t offset;
+
 			/*
 			 * For PFN swap offsets, keeping the offset field
 			 * to be PFN only to be compatible with old smaps.
@@ -1969,6 +1976,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
 		    __folio_page_mapped_exclusively(folio, page))
 			flags |= PM_MMAP_EXCLUSIVE;
 	}
+
+out:
 	if (vma->vm_flags & VM_SOFTDIRTY)
 		flags |= PM_SOFT_DIRTY;
 
@@ -2310,12 +2319,16 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
 					   struct vm_area_struct *vma,
 					   unsigned long addr, pte_t pte)
 {
-	unsigned long categories = 0;
+	unsigned long categories;
+
+	if (pte_none(pte))
+		return 0;
 
 	if (pte_present(pte)) {
 		struct page *page;
 
-		categories |= PAGE_IS_PRESENT;
+		categories = PAGE_IS_PRESENT;
+
 		if (!pte_uffd_wp(pte))
 			categories |= PAGE_IS_WRITTEN;
 
@@ -2329,10 +2342,11 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p,
 			categories |= PAGE_IS_PFNZERO;
 		if (pte_soft_dirty(pte))
 			categories |= PAGE_IS_SOFT_DIRTY;
-	} else if (is_swap_pte(pte)) {
+	} else {
 		softleaf_t entry;
 
-		categories |= PAGE_IS_SWAPPED;
+		categories = PAGE_IS_SWAPPED;
+
 		if (!pte_swp_uffd_wp_any(pte))
 			categories |= PAGE_IS_WRITTEN;
 
@@ -2360,12 +2374,12 @@ static void make_uffd_wp_pte(struct vm_area_struct *vma,
 		old_pte = ptep_modify_prot_start(vma, addr, pte);
 		ptent = pte_mkuffd_wp(old_pte);
 		ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent);
-	} else if (is_swap_pte(ptent)) {
-		ptent = pte_swp_mkuffd_wp(ptent);
-		set_pte_at(vma->vm_mm, addr, pte, ptent);
-	} else {
+	} else if (pte_none(ptent)) {
 		set_pte_at(vma->vm_mm, addr, pte,
 			   make_pte_marker(PTE_MARKER_UFFD_WP));
+	} else {
+		ptent = pte_swp_mkuffd_wp(ptent);
+		set_pte_at(vma->vm_mm, addr, pte, ptent);
 	}
 }
 
@@ -2434,6 +2448,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
 {
 	unsigned long categories = PAGE_IS_HUGE;
 
+	if (pte_none(pte))
+		return categories;
+
 	/*
 	 * According to pagemap_hugetlb_range(), file-backed HugeTLB
 	 * page cannot be swapped. So PAGE_IS_FILE is not checked for
@@ -2441,6 +2458,7 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
 	 */
 	if (pte_present(pte)) {
 		categories |= PAGE_IS_PRESENT;
+
 		if (!huge_pte_uffd_wp(pte))
 			categories |= PAGE_IS_WRITTEN;
 		if (!PageAnon(pte_page(pte)))
@@ -2449,8 +2467,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte)
 			categories |= PAGE_IS_PFNZERO;
 		if (pte_soft_dirty(pte))
 			categories |= PAGE_IS_SOFT_DIRTY;
-	} else if (is_swap_pte(pte)) {
+	} else {
 		categories |= PAGE_IS_SWAPPED;
+
 		if (!pte_swp_uffd_wp_any(pte))
 			categories |= PAGE_IS_WRITTEN;
 		if (pte_swp_soft_dirty(pte))
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index 983c860a00f1..96b089dff4ef 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -441,9 +441,8 @@ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma)
 static inline bool pte_swp_uffd_wp_any(pte_t pte)
 {
 #ifdef CONFIG_PTE_MARKER_UFFD_WP
-	if (!is_swap_pte(pte))
+	if (pte_present(pte))
 		return false;
-
 	if (pte_swp_uffd_wp(pte))
 		return true;
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index a05edefec1ca..a74cde267c2a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -5798,13 +5798,13 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr,
 
 	pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz);
 
-	if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte))
+	if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte)) {
 		huge_pte_clear(mm, new_addr, dst_pte, sz);
-	else {
+	} else {
 		if (need_clear_uffd_wp) {
 			if (pte_present(pte))
 				pte = huge_pte_clear_uffd_wp(pte);
-			else if (is_swap_pte(pte))
+			else
 				pte = pte_swp_clear_uffd_wp(pte);
 		}
 		set_huge_pte_at(mm, new_addr, dst_pte, pte, sz);
diff --git a/mm/internal.h b/mm/internal.h
index 116a1ba85e66..9465129367a4 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -325,8 +325,7 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte,
 /**
  * pte_move_swp_offset - Move the swap entry offset field of a swap pte
  *	 forward or backward by delta
- * @pte: The initial pte state; is_swap_pte(pte) must be true and
- *	 non_swap_entry() must be false.
+ * @pte: The initial pte state; must be a swap entry
  * @delta: The direction and the offset we are moving; forward if delta
  *	 is positive; backward if delta is negative
  *
@@ -352,8 +351,7 @@ static inline pte_t pte_move_swp_offset(pte_t pte, long delta)
 
 /**
  * pte_next_swp_offset - Increment the swap entry offset field of a swap pte.
- * @pte: The initial pte state; is_swap_pte(pte) must be true and
- *	 non_swap_entry() must be false.
+ * @pte: The initial pte state; must be a swap entry.
  *
  * Increments the swap offset, while maintaining all other fields, including
  * swap type, and any swp pte bits. The resulting pte is returned.
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index f6ed1072ed6e..a97ff7bcb232 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1019,7 +1019,8 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm,
 		}
 
 		vmf.orig_pte = ptep_get_lockless(pte);
-		if (!is_swap_pte(vmf.orig_pte))
+		if (pte_none(vmf.orig_pte) ||
+		    pte_present(vmf.orig_pte))
 			continue;
 
 		vmf.pte = pte;
@@ -1276,7 +1277,19 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
 	for (addr = start_addr, _pte = pte; _pte < pte + HPAGE_PMD_NR;
 	     _pte++, addr += PAGE_SIZE) {
 		pte_t pteval = ptep_get(_pte);
-		if (is_swap_pte(pteval)) {
+		if (pte_none_or_zero(pteval)) {
+			++none_or_zero;
+			if (!userfaultfd_armed(vma) &&
+			    (!cc->is_khugepaged ||
+			     none_or_zero <= khugepaged_max_ptes_none)) {
+				continue;
+			} else {
+				result = SCAN_EXCEED_NONE_PTE;
+				count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
+				goto out_unmap;
+			}
+		}
+		if (!pte_present(pteval)) {
 			++unmapped;
 			if (!cc->is_khugepaged ||
 			    unmapped <= khugepaged_max_ptes_swap) {
@@ -1296,18 +1309,6 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm,
 				goto out_unmap;
 			}
 		}
-		if (pte_none_or_zero(pteval)) {
-			++none_or_zero;
-			if (!userfaultfd_armed(vma) &&
-			    (!cc->is_khugepaged ||
-			     none_or_zero <= khugepaged_max_ptes_none)) {
-				continue;
-			} else {
-				result = SCAN_EXCEED_NONE_PTE;
-				count_vm_event(THP_SCAN_EXCEED_NONE_PTE);
-				goto out_unmap;
-			}
-		}
 		if (pte_uffd_wp(pteval)) {
 			/*
 			 * Don't collapse the page if any of the small
diff --git a/mm/migrate.c b/mm/migrate.c
index ceee354ef215..862b2e261cf9 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -492,7 +492,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd,
 	pte = ptep_get(ptep);
 	pte_unmap(ptep);
 
-	if (!is_swap_pte(pte))
+	if (pte_none(pte) || pte_present(pte))
 		goto out;
 
 	entry = pte_to_swp_entry(pte);
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 0bae241eb7aa..a3e360a8cdec 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -297,7 +297,26 @@ static long change_pte_range(struct mmu_gather *tlb,
 				prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent,
 					nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb);
 			pages += nr_ptes;
-		} else if (is_swap_pte(oldpte)) {
+		} else if (pte_none(oldpte)) {
+			/*
+			 * Nobody plays with any none ptes besides
+			 * userfaultfd when applying the protections.
+			 */
+			if (likely(!uffd_wp))
+				continue;
+
+			if (userfaultfd_wp_use_markers(vma)) {
+				/*
+				 * For file-backed mem, we need to be able to
+				 * wr-protect a none pte, because even if the
+				 * pte is none, the page/swap cache could
+				 * exist.  Doing that by install a marker.
+				 */
+				set_pte_at(vma->vm_mm, addr, pte,
+					   make_pte_marker(PTE_MARKER_UFFD_WP));
+				pages++;
+			}
+		} else  {
 			swp_entry_t entry = pte_to_swp_entry(oldpte);
 			pte_t newpte;
 
@@ -358,28 +377,6 @@ static long change_pte_range(struct mmu_gather *tlb,
 				set_pte_at(vma->vm_mm, addr, pte, newpte);
 				pages++;
 			}
-		} else {
-			/* It must be an none page, or what else?.. */
-			WARN_ON_ONCE(!pte_none(oldpte));
-
-			/*
-			 * Nobody plays with any none ptes besides
-			 * userfaultfd when applying the protections.
-			 */
-			if (likely(!uffd_wp))
-				continue;
-
-			if (userfaultfd_wp_use_markers(vma)) {
-				/*
-				 * For file-backed mem, we need to be able to
-				 * wr-protect a none pte, because even if the
-				 * pte is none, the page/swap cache could
-				 * exist.  Doing that by install a marker.
-				 */
-				set_pte_at(vma->vm_mm, addr, pte,
-					   make_pte_marker(PTE_MARKER_UFFD_WP));
-				pages++;
-			}
 		}
 	} while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end);
 	arch_leave_lazy_mmu_mode();
diff --git a/mm/mremap.c b/mm/mremap.c
index 7c21b2ad13f6..62b6827abacf 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -158,6 +158,9 @@ static void drop_rmap_locks(struct vm_area_struct *vma)
 
 static pte_t move_soft_dirty_pte(pte_t pte)
 {
+	if (pte_none(pte))
+		return pte;
+
 	/*
 	 * Set soft dirty bit so we can notice
 	 * in userspace the ptes were moved.
@@ -165,7 +168,7 @@ static pte_t move_soft_dirty_pte(pte_t pte)
 #ifdef CONFIG_MEM_SOFT_DIRTY
 	if (pte_present(pte))
 		pte = pte_mksoft_dirty(pte);
-	else if (is_swap_pte(pte))
+	else
 		pte = pte_swp_mksoft_dirty(pte);
 #endif
 	return pte;
@@ -294,7 +297,7 @@ static int move_ptes(struct pagetable_move_control *pmc,
 			if (need_clear_uffd_wp) {
 				if (pte_present(pte))
 					pte = pte_clear_uffd_wp(pte);
-				else if (is_swap_pte(pte))
+				else
 					pte = pte_swp_clear_uffd_wp(pte);
 			}
 			set_ptes(mm, new_addr, new_ptep, pte, nr_ptes);
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 4eeca782b888..43f75d2f7c36 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -185,12 +185,15 @@ static inline bool swap_cached_writable(swp_entry_t entry)
 	       is_writable_migration_entry(entry);
 }
 
-static inline void page_table_check_pte_flags(pte_t pte)
+static void page_table_check_pte_flags(pte_t pte)
 {
-	if (pte_present(pte) && pte_uffd_wp(pte))
-		WARN_ON_ONCE(pte_write(pte));
-	else if (is_swap_pte(pte) && pte_swp_uffd_wp(pte))
-		WARN_ON_ONCE(swap_cached_writable(pte_to_swp_entry(pte)));
+	if (pte_present(pte)) {
+		WARN_ON_ONCE(pte_uffd_wp(pte) && pte_write(pte));
+	} else if (pte_swp_uffd_wp(pte)) {
+		const swp_entry_t entry = pte_to_swp_entry(pte);
+
+		WARN_ON_ONCE(swap_cached_writable(entry));
+	}
 }
 
 void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte,
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index be20468fb5a9..a4e23818f37f 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -16,6 +16,7 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw)
 static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
 		    spinlock_t **ptlp)
 {
+	bool is_migration;
 	pte_t ptent;
 
 	if (pvmw->flags & PVMW_SYNC) {
@@ -26,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
 		return !!pvmw->pte;
 	}
 
+	is_migration = pvmw->flags & PVMW_MIGRATION;
 again:
 	/*
 	 * It is important to return the ptl corresponding to pte,
@@ -41,11 +43,14 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
 
 	ptent = ptep_get(pvmw->pte);
 
-	if (pvmw->flags & PVMW_MIGRATION) {
-		if (!is_swap_pte(ptent))
+	if (pte_none(ptent)) {
+		return false;
+	} else if (pte_present(ptent)) {
+		if (is_migration)
 			return false;
-	} else if (is_swap_pte(ptent)) {
+	} else if (!is_migration) {
 		swp_entry_t entry;
+
 		/*
 		 * Handle un-addressable ZONE_DEVICE memory.
 		 *
@@ -66,8 +71,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
 		if (!is_device_private_entry(entry) &&
 		    !is_device_exclusive_entry(entry))
 			return false;
-	} else if (!pte_present(ptent)) {
-		return false;
 	}
 	spin_lock(*ptlp);
 	if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) {
@@ -113,21 +116,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
 			return false;
 
 		pfn = softleaf_to_pfn(entry);
-	} else if (is_swap_pte(ptent)) {
-		swp_entry_t entry;
+	} else if (pte_present(ptent)) {
+		pfn = pte_pfn(ptent);
+	} else {
+		const softleaf_t entry = softleaf_from_pte(ptent);
 
 		/* Handle un-addressable ZONE_DEVICE memory */
-		entry = pte_to_swp_entry(ptent);
-		if (!is_device_private_entry(entry) &&
-		    !is_device_exclusive_entry(entry))
-			return false;
-
-		pfn = swp_offset_pfn(entry);
-	} else {
-		if (!pte_present(ptent))
+		if (!softleaf_is_device_private(entry) &&
+		    !softleaf_is_device_exclusive(entry))
 			return false;
 
-		pfn = pte_pfn(ptent);
+		pfn = softleaf_to_pfn(entry);
 	}
 
 	if ((pfn + pte_nr - 1) < pvmw->pfn)
-- 
2.51.0
Re: [PATCH v3 03/16] mm: avoid unnecessary uses of is_swap_pte()
Posted by Vlastimil Babka 1 month, 1 week ago
On 11/10/25 23:21, Lorenzo Stoakes wrote:
> There's an established convention in the kernel that we treat PTEs as
> containing swap entries (and the unfortunately named non-swap swap entries)
> should they be neither empty (i.e. pte_none() evaluating true) nor present
> (i.e. pte_present() evaluating true).
> 
> However, there is some inconsistency in how this is applied, as we also
> have the is_swap_pte() helper which explicitly performs this check:
> 
> 	/* check whether a pte points to a swap entry */
> 	static inline int is_swap_pte(pte_t pte)
> 	{
> 		return !pte_none(pte) && !pte_present(pte);
> 	}
> 
> As this represents a predicate, and it's logical to assume that in order to
> establish that a PTE entry can correctly be manipulated as a swap/non-swap
> entry, this predicate seems as if it must first be checked.
> 
> But we instead, we far more often utilise the established convention of
> checking pte_none() / pte_present() before operating on entries as if they
> were swap/non-swap.
> 
> This patch works towards correcting this inconsistency by removing all uses
> of is_swap_pte() where we are already in a position where we perform
> pte_none()/pte_present() checks anyway or otherwise it is clearly logical
> to do so.
> 
> We also take advantage of the fact that pte_swp_uffd_wp() is only set on
> swap entries.
> 
> Additionally, update comments referencing to is_swap_pte() and
> non_swap_entry().
> 
> No functional change intended.
> 
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>

LGTM (famous last words)

Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
Re: [PATCH v3 03/16] mm: avoid unnecessary uses of is_swap_pte()
Posted by Zi Yan 1 month, 1 week ago
On 10 Nov 2025, at 17:21, Lorenzo Stoakes wrote:

> There's an established convention in the kernel that we treat PTEs as
> containing swap entries (and the unfortunately named non-swap swap entries)
> should they be neither empty (i.e. pte_none() evaluating true) nor present
> (i.e. pte_present() evaluating true).
>
> However, there is some inconsistency in how this is applied, as we also
> have the is_swap_pte() helper which explicitly performs this check:
>
> 	/* check whether a pte points to a swap entry */
> 	static inline int is_swap_pte(pte_t pte)
> 	{
> 		return !pte_none(pte) && !pte_present(pte);
> 	}
>
> As this represents a predicate, and it's logical to assume that in order to
> establish that a PTE entry can correctly be manipulated as a swap/non-swap
> entry, this predicate seems as if it must first be checked.
>
> But we instead, we far more often utilise the established convention of
> checking pte_none() / pte_present() before operating on entries as if they
> were swap/non-swap.
>
> This patch works towards correcting this inconsistency by removing all uses
> of is_swap_pte() where we are already in a position where we perform
> pte_none()/pte_present() checks anyway or otherwise it is clearly logical
> to do so.
>
> We also take advantage of the fact that pte_swp_uffd_wp() is only set on
> swap entries.
>
> Additionally, update comments referencing to is_swap_pte() and
> non_swap_entry().
>
> No functional change intended.
>
> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> ---
>  fs/proc/task_mmu.c            | 49 ++++++++++++++++++++++++-----------
>  include/linux/userfaultfd_k.h |  3 +--
>  mm/hugetlb.c                  |  6 ++---
>  mm/internal.h                 |  6 ++---
>  mm/khugepaged.c               | 29 +++++++++++----------
>  mm/migrate.c                  |  2 +-
>  mm/mprotect.c                 | 43 ++++++++++++++----------------
>  mm/mremap.c                   |  7 +++--
>  mm/page_table_check.c         | 13 ++++++----
>  mm/page_vma_mapped.c          | 31 +++++++++++-----------
>  10 files changed, 104 insertions(+), 85 deletions(-)
>

<snip>

> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> index be20468fb5a9..a4e23818f37f 100644
> --- a/mm/page_vma_mapped.c
> +++ b/mm/page_vma_mapped.c
> @@ -16,6 +16,7 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw)
>  static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>  		    spinlock_t **ptlp)
>  {
> +	bool is_migration;
>  	pte_t ptent;
>
>  	if (pvmw->flags & PVMW_SYNC) {
> @@ -26,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>  		return !!pvmw->pte;
>  	}
>
> +	is_migration = pvmw->flags & PVMW_MIGRATION;
>  again:
>  	/*
>  	 * It is important to return the ptl corresponding to pte,
> @@ -41,11 +43,14 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>
>  	ptent = ptep_get(pvmw->pte);
>
> -	if (pvmw->flags & PVMW_MIGRATION) {
> -		if (!is_swap_pte(ptent))

Here, is_migration = true and either pte_none() or pte_present()
would return false, and ...

> +	if (pte_none(ptent)) {
> +		return false;
> +	} else if (pte_present(ptent)) {
> +		if (is_migration)
>  			return false;
> -	} else if (is_swap_pte(ptent)) {
> +	} else if (!is_migration) {
>  		swp_entry_t entry;
> +
>  		/*
>  		 * Handle un-addressable ZONE_DEVICE memory.
>  		 *
> @@ -66,8 +71,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>  		if (!is_device_private_entry(entry) &&
>  		    !is_device_exclusive_entry(entry))
>  			return false;
> -	} else if (!pte_present(ptent)) {
> -		return false;

... is_migration = false and !pte_present() is actually pte_none(),
because of the is_swap_pte() above the added !is_migration check.
So pte_none() should return false regardless of is_migration.

This is a nice cleanup. Thanks.

>  	}
>  	spin_lock(*ptlp);
>  	if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) {
> @@ -113,21 +116,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
>  			return false;
>
>  		pfn = softleaf_to_pfn(entry);
> -	} else if (is_swap_pte(ptent)) {
> -		swp_entry_t entry;
> +	} else if (pte_present(ptent)) {
> +		pfn = pte_pfn(ptent);
> +	} else {
> +		const softleaf_t entry = softleaf_from_pte(ptent);
>
>  		/* Handle un-addressable ZONE_DEVICE memory */
> -		entry = pte_to_swp_entry(ptent);
> -		if (!is_device_private_entry(entry) &&
> -		    !is_device_exclusive_entry(entry))
> -			return false;
> -
> -		pfn = swp_offset_pfn(entry);
> -	} else {
> -		if (!pte_present(ptent))

This !pte_present() is pte_none(). It seems that there should be

} else if (pte_none(ptent)) {
	return false;
}

before the above "} else {".

> +		if (!softleaf_is_device_private(entry) &&
> +		    !softleaf_is_device_exclusive(entry))
>  			return false;
>
> -		pfn = pte_pfn(ptent);
> +		pfn = softleaf_to_pfn(entry);
>  	}
>
>  	if ((pfn + pte_nr - 1) < pvmw->pfn)
> -- 
> 2.51.0

Otherwise, LGTM. With the above issue addressed, feel free to
add Reviewed-by: Zi Yan <ziy@nvidia.com>

--
Best Regards,
Yan, Zi
Re: [PATCH v3 03/16] mm: avoid unnecessary uses of is_swap_pte()
Posted by Zi Yan 1 month, 1 week ago
On 11 Nov 2025, at 21:58, Zi Yan wrote:

> On 10 Nov 2025, at 17:21, Lorenzo Stoakes wrote:
>
>> There's an established convention in the kernel that we treat PTEs as
>> containing swap entries (and the unfortunately named non-swap swap entries)
>> should they be neither empty (i.e. pte_none() evaluating true) nor present
>> (i.e. pte_present() evaluating true).
>>
>> However, there is some inconsistency in how this is applied, as we also
>> have the is_swap_pte() helper which explicitly performs this check:
>>
>> 	/* check whether a pte points to a swap entry */
>> 	static inline int is_swap_pte(pte_t pte)
>> 	{
>> 		return !pte_none(pte) && !pte_present(pte);
>> 	}
>>
>> As this represents a predicate, and it's logical to assume that in order to
>> establish that a PTE entry can correctly be manipulated as a swap/non-swap
>> entry, this predicate seems as if it must first be checked.
>>
>> But we instead, we far more often utilise the established convention of
>> checking pte_none() / pte_present() before operating on entries as if they
>> were swap/non-swap.
>>
>> This patch works towards correcting this inconsistency by removing all uses
>> of is_swap_pte() where we are already in a position where we perform
>> pte_none()/pte_present() checks anyway or otherwise it is clearly logical
>> to do so.

BTW, I wonder if we could use switch + enum and compiler to prevent future
inconsistencies.

Basically,

enum PTE_State {
	PTE_PRESENT,
	PTE_NONE,
	PTE_SOFTLEAF,
};

enum PTE_State get_pte_state(pte_t pte)
{
	if (pte_present(pte))
		return PTE_PRESENT;
	if (pte_none(pte))
		return PTE_NONE;
	return PTE_SOFTLEAF;
}

in any code handling pte:

switch (get_pte_state(pte)):
	case PTE_PRESENT:
		break;
	case PTE_NONE:
		break;
	case PTE_SOFTLEAF:
		break;
}

And compiler will yell at you if any enum is missing in the switch case.

Just an idea came to my mind when I am reading the commit message.
Feel free to ignore it. :)

Best Regards,
Yan, Zi
Re: [PATCH v3 03/16] mm: avoid unnecessary uses of is_swap_pte()
Posted by Lorenzo Stoakes 1 month, 1 week ago
On Tue, Nov 11, 2025 at 09:58:36PM -0500, Zi Yan wrote:
> On 10 Nov 2025, at 17:21, Lorenzo Stoakes wrote:
>
> > There's an established convention in the kernel that we treat PTEs as
> > containing swap entries (and the unfortunately named non-swap swap entries)
> > should they be neither empty (i.e. pte_none() evaluating true) nor present
> > (i.e. pte_present() evaluating true).
> >
> > However, there is some inconsistency in how this is applied, as we also
> > have the is_swap_pte() helper which explicitly performs this check:
> >
> > 	/* check whether a pte points to a swap entry */
> > 	static inline int is_swap_pte(pte_t pte)
> > 	{
> > 		return !pte_none(pte) && !pte_present(pte);
> > 	}
> >
> > As this represents a predicate, and it's logical to assume that in order to
> > establish that a PTE entry can correctly be manipulated as a swap/non-swap
> > entry, this predicate seems as if it must first be checked.
> >
> > But we instead, we far more often utilise the established convention of
> > checking pte_none() / pte_present() before operating on entries as if they
> > were swap/non-swap.
> >
> > This patch works towards correcting this inconsistency by removing all uses
> > of is_swap_pte() where we are already in a position where we perform
> > pte_none()/pte_present() checks anyway or otherwise it is clearly logical
> > to do so.
> >
> > We also take advantage of the fact that pte_swp_uffd_wp() is only set on
> > swap entries.
> >
> > Additionally, update comments referencing to is_swap_pte() and
> > non_swap_entry().
> >
> > No functional change intended.
> >
> > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
> > ---
> >  fs/proc/task_mmu.c            | 49 ++++++++++++++++++++++++-----------
> >  include/linux/userfaultfd_k.h |  3 +--
> >  mm/hugetlb.c                  |  6 ++---
> >  mm/internal.h                 |  6 ++---
> >  mm/khugepaged.c               | 29 +++++++++++----------
> >  mm/migrate.c                  |  2 +-
> >  mm/mprotect.c                 | 43 ++++++++++++++----------------
> >  mm/mremap.c                   |  7 +++--
> >  mm/page_table_check.c         | 13 ++++++----
> >  mm/page_vma_mapped.c          | 31 +++++++++++-----------
> >  10 files changed, 104 insertions(+), 85 deletions(-)
> >
>
> <snip>
>
> > diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
> > index be20468fb5a9..a4e23818f37f 100644
> > --- a/mm/page_vma_mapped.c
> > +++ b/mm/page_vma_mapped.c
> > @@ -16,6 +16,7 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw)
> >  static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> >  		    spinlock_t **ptlp)
> >  {
> > +	bool is_migration;
> >  	pte_t ptent;
> >
> >  	if (pvmw->flags & PVMW_SYNC) {
> > @@ -26,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> >  		return !!pvmw->pte;
> >  	}
> >
> > +	is_migration = pvmw->flags & PVMW_MIGRATION;
> >  again:
> >  	/*
> >  	 * It is important to return the ptl corresponding to pte,
> > @@ -41,11 +43,14 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> >
> >  	ptent = ptep_get(pvmw->pte);
> >
> > -	if (pvmw->flags & PVMW_MIGRATION) {
> > -		if (!is_swap_pte(ptent))
>
> Here, is_migration = true and either pte_none() or pte_present()
> would return false, and ...
>
> > +	if (pte_none(ptent)) {
> > +		return false;
> > +	} else if (pte_present(ptent)) {
> > +		if (is_migration)
> >  			return false;
> > -	} else if (is_swap_pte(ptent)) {
> > +	} else if (!is_migration) {
> >  		swp_entry_t entry;
> > +
> >  		/*
> >  		 * Handle un-addressable ZONE_DEVICE memory.
> >  		 *
> > @@ -66,8 +71,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
> >  		if (!is_device_private_entry(entry) &&
> >  		    !is_device_exclusive_entry(entry))
> >  			return false;
> > -	} else if (!pte_present(ptent)) {
> > -		return false;
>
> ... is_migration = false and !pte_present() is actually pte_none(),
> because of the is_swap_pte() above the added !is_migration check.
> So pte_none() should return false regardless of is_migration.

I guess you were working this through :) well I decided to also just to
double-check I got it right, maybe useful for you also :P -

Previously:

	if (is_migration) {
		if (!is_swap_pte(ptent))
			return false;
	} else if (is_swap_pte(ptent)) {
		... ZONE_DEVICE blah ...
	} else if (!pte_present(ptent)) {
		return false;
	}

But is_swap_pte() is the same as !pte_none() && !pte_present(), so
!is_swap_pte() is pte_none() || pte_present() by De Morgan's law:

	if (is_migration) {
		if (pte_none(ptent) || pte_present(ptent))
			return false;
	} else if (!pte_none(ptent) && !pte_present(ptent)) {
		... ZONE_DEVICE blah ...
	} else if (!pte_present(ptent)) {
		return false;
	}

In the last branch, we know (again by De Morgan's law) that either
pte_none(ptent) or pte_present(ptent).. But we explicitly check for
!pte_present(ptent) so this becomes:

	if (is_migration) {
		if (pte_none(ptent) || pte_present(ptent))
			return false;
	} else if (!pte_none(ptent) && !pte_present(ptent)) {
		... ZONE_DEVICE blah ...
	} else if (pte_none(ptent)) {
		return false;
	}

So we can generalise - regardless of is_migration, pte_none() returns false:

	if (pte_none(ptent)) {
		return false;
	} else if (is_migration) {
		if (pte_none(ptent) || pte_present(ptent))
			return false;
	} else if (!pte_none(ptent) && !pte_present(ptent)) {
		... ZONE_DEVICE blah ...
	}

Since we already check for pte_none() ahead of time, we can simplify again:

	if (pte_none(ptent)) {
		return false;
	} else if (is_migration) {
		if (pte_present(ptent))
			return false;
	} else if (!pte_present(ptent)) {
		... ZONE_DEVICE blah ...
	}

We can then put the pte_present() check in the outer branch:

	if (pte_none(ptent)) {
		return false;
	} else if (pte_present(ptent)) {
		if (is_migration)
			return false;
	} else if (!is_migration) {
		... ZONE_DEVICE blah ...
	}

Because previously an is_migration && !pte_present() case would result in no
action here.

Which is the code in this patch :)

>
> This is a nice cleanup. Thanks.
>
> >  	}
> >  	spin_lock(*ptlp);
> >  	if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) {
> > @@ -113,21 +116,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
> >  			return false;
> >
> >  		pfn = softleaf_to_pfn(entry);
> > -	} else if (is_swap_pte(ptent)) {
> > -		swp_entry_t entry;
> > +	} else if (pte_present(ptent)) {
> > +		pfn = pte_pfn(ptent);
> > +	} else {
> > +		const softleaf_t entry = softleaf_from_pte(ptent);
> >
> >  		/* Handle un-addressable ZONE_DEVICE memory */
> > -		entry = pte_to_swp_entry(ptent);
> > -		if (!is_device_private_entry(entry) &&
> > -		    !is_device_exclusive_entry(entry))
> > -			return false;
> > -
> > -		pfn = swp_offset_pfn(entry);
> > -	} else {
> > -		if (!pte_present(ptent))
>
> This !pte_present() is pte_none(). It seems that there should be

Well this should be fine though as:

		const softleaf_t entry = softleaf_from_pte(ptent);

		/* Handle un-addressable ZONE_DEVICE memory */
		if (!softleaf_is_device_private(entry) &&
		    !softleaf_is_device_exclusive(entry))
			return false;

Still correctly handles none - as softleaf_from_pte() in case of pte_none() will
be a none softleaf entry which will fail both of these tests.

So excluding pte_none() as an explicit test here was part of the rework - we no
longer have to do that.

>
> } else if (pte_none(ptent)) {
> 	return false;
> }
>
> before the above "} else {".
>
> > +		if (!softleaf_is_device_private(entry) &&
> > +		    !softleaf_is_device_exclusive(entry))
> >  			return false;
> >
> > -		pfn = pte_pfn(ptent);
> > +		pfn = softleaf_to_pfn(entry);
> >  	}
> >
> >  	if ((pfn + pte_nr - 1) < pvmw->pfn)
> > --
> > 2.51.0
>
> Otherwise, LGTM. With the above issue addressed, feel free to
> add Reviewed-by: Zi Yan <ziy@nvidia.com>

Thanks!

>
> --
> Best Regards,
> Yan, Zi

Cheers, Lorenzo
Re: [PATCH v3 03/16] mm: avoid unnecessary uses of is_swap_pte()
Posted by Zi Yan 1 month, 1 week ago
On 12 Nov 2025, at 10:59, Lorenzo Stoakes wrote:

> On Tue, Nov 11, 2025 at 09:58:36PM -0500, Zi Yan wrote:
>> On 10 Nov 2025, at 17:21, Lorenzo Stoakes wrote:
>>
>>> There's an established convention in the kernel that we treat PTEs as
>>> containing swap entries (and the unfortunately named non-swap swap entries)
>>> should they be neither empty (i.e. pte_none() evaluating true) nor present
>>> (i.e. pte_present() evaluating true).
>>>
>>> However, there is some inconsistency in how this is applied, as we also
>>> have the is_swap_pte() helper which explicitly performs this check:
>>>
>>> 	/* check whether a pte points to a swap entry */
>>> 	static inline int is_swap_pte(pte_t pte)
>>> 	{
>>> 		return !pte_none(pte) && !pte_present(pte);
>>> 	}
>>>
>>> As this represents a predicate, and it's logical to assume that in order to
>>> establish that a PTE entry can correctly be manipulated as a swap/non-swap
>>> entry, this predicate seems as if it must first be checked.
>>>
>>> But we instead, we far more often utilise the established convention of
>>> checking pte_none() / pte_present() before operating on entries as if they
>>> were swap/non-swap.
>>>
>>> This patch works towards correcting this inconsistency by removing all uses
>>> of is_swap_pte() where we are already in a position where we perform
>>> pte_none()/pte_present() checks anyway or otherwise it is clearly logical
>>> to do so.
>>>
>>> We also take advantage of the fact that pte_swp_uffd_wp() is only set on
>>> swap entries.
>>>
>>> Additionally, update comments referencing to is_swap_pte() and
>>> non_swap_entry().
>>>
>>> No functional change intended.
>>>
>>> Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
>>> ---
>>>  fs/proc/task_mmu.c            | 49 ++++++++++++++++++++++++-----------
>>>  include/linux/userfaultfd_k.h |  3 +--
>>>  mm/hugetlb.c                  |  6 ++---
>>>  mm/internal.h                 |  6 ++---
>>>  mm/khugepaged.c               | 29 +++++++++++----------
>>>  mm/migrate.c                  |  2 +-
>>>  mm/mprotect.c                 | 43 ++++++++++++++----------------
>>>  mm/mremap.c                   |  7 +++--
>>>  mm/page_table_check.c         | 13 ++++++----
>>>  mm/page_vma_mapped.c          | 31 +++++++++++-----------
>>>  10 files changed, 104 insertions(+), 85 deletions(-)
>>>
>>
>> <snip>
>>
>>> diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
>>> index be20468fb5a9..a4e23818f37f 100644
>>> --- a/mm/page_vma_mapped.c
>>> +++ b/mm/page_vma_mapped.c
>>> @@ -16,6 +16,7 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw)
>>>  static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>>>  		    spinlock_t **ptlp)
>>>  {
>>> +	bool is_migration;
>>>  	pte_t ptent;
>>>
>>>  	if (pvmw->flags & PVMW_SYNC) {
>>> @@ -26,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>>>  		return !!pvmw->pte;
>>>  	}
>>>
>>> +	is_migration = pvmw->flags & PVMW_MIGRATION;
>>>  again:
>>>  	/*
>>>  	 * It is important to return the ptl corresponding to pte,
>>> @@ -41,11 +43,14 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>>>
>>>  	ptent = ptep_get(pvmw->pte);
>>>
>>> -	if (pvmw->flags & PVMW_MIGRATION) {
>>> -		if (!is_swap_pte(ptent))
>>
>> Here, is_migration = true and either pte_none() or pte_present()
>> would return false, and ...
>>
>>> +	if (pte_none(ptent)) {
>>> +		return false;
>>> +	} else if (pte_present(ptent)) {
>>> +		if (is_migration)
>>>  			return false;
>>> -	} else if (is_swap_pte(ptent)) {
>>> +	} else if (!is_migration) {
>>>  		swp_entry_t entry;
>>> +
>>>  		/*
>>>  		 * Handle un-addressable ZONE_DEVICE memory.
>>>  		 *
>>> @@ -66,8 +71,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp,
>>>  		if (!is_device_private_entry(entry) &&
>>>  		    !is_device_exclusive_entry(entry))
>>>  			return false;
>>> -	} else if (!pte_present(ptent)) {
>>> -		return false;
>>
>> ... is_migration = false and !pte_present() is actually pte_none(),
>> because of the is_swap_pte() above the added !is_migration check.
>> So pte_none() should return false regardless of is_migration.
>
> I guess you were working this through :) well I decided to also just to
> double-check I got it right, maybe useful for you also :P -
>
> Previously:
>
> 	if (is_migration) {
> 		if (!is_swap_pte(ptent))
> 			return false;
> 	} else if (is_swap_pte(ptent)) {
> 		... ZONE_DEVICE blah ...
> 	} else if (!pte_present(ptent)) {
> 		return false;
> 	}
>
> But is_swap_pte() is the same as !pte_none() && !pte_present(), so
> !is_swap_pte() is pte_none() || pte_present() by De Morgan's law:
>
> 	if (is_migration) {
> 		if (pte_none(ptent) || pte_present(ptent))
> 			return false;
> 	} else if (!pte_none(ptent) && !pte_present(ptent)) {
> 		... ZONE_DEVICE blah ...
> 	} else if (!pte_present(ptent)) {
> 		return false;
> 	}
>
> In the last branch, we know (again by De Morgan's law) that either
> pte_none(ptent) or pte_present(ptent).. But we explicitly check for
> !pte_present(ptent) so this becomes:
>
> 	if (is_migration) {
> 		if (pte_none(ptent) || pte_present(ptent))
> 			return false;
> 	} else if (!pte_none(ptent) && !pte_present(ptent)) {
> 		... ZONE_DEVICE blah ...
> 	} else if (pte_none(ptent)) {
> 		return false;
> 	}
>
> So we can generalise - regardless of is_migration, pte_none() returns false:
>
> 	if (pte_none(ptent)) {
> 		return false;
> 	} else if (is_migration) {
> 		if (pte_none(ptent) || pte_present(ptent))
> 			return false;
> 	} else if (!pte_none(ptent) && !pte_present(ptent)) {
> 		... ZONE_DEVICE blah ...
> 	}
>
> Since we already check for pte_none() ahead of time, we can simplify again:
>
> 	if (pte_none(ptent)) {
> 		return false;
> 	} else if (is_migration) {
> 		if (pte_present(ptent))
> 			return false;
> 	} else if (!pte_present(ptent)) {
> 		... ZONE_DEVICE blah ...
> 	}
>
> We can then put the pte_present() check in the outer branch:
>
> 	if (pte_none(ptent)) {
> 		return false;
> 	} else if (pte_present(ptent)) {
> 		if (is_migration)
> 			return false;
> 	} else if (!is_migration) {
> 		... ZONE_DEVICE blah ...
> 	}
>
> Because previously an is_migration && !pte_present() case would result in no
> action here.
>
> Which is the code in this patch :)

Thanks again for spelling out the whole process.

>
>>
>> This is a nice cleanup. Thanks.
>>
>>>  	}
>>>  	spin_lock(*ptlp);
>>>  	if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) {
>>> @@ -113,21 +116,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr)
>>>  			return false;
>>>
>>>  		pfn = softleaf_to_pfn(entry);
>>> -	} else if (is_swap_pte(ptent)) {
>>> -		swp_entry_t entry;
>>> +	} else if (pte_present(ptent)) {
>>> +		pfn = pte_pfn(ptent);
>>> +	} else {
>>> +		const softleaf_t entry = softleaf_from_pte(ptent);
>>>
>>>  		/* Handle un-addressable ZONE_DEVICE memory */
>>> -		entry = pte_to_swp_entry(ptent);
>>> -		if (!is_device_private_entry(entry) &&
>>> -		    !is_device_exclusive_entry(entry))
>>> -			return false;
>>> -
>>> -		pfn = swp_offset_pfn(entry);
>>> -	} else {
>>> -		if (!pte_present(ptent))
>>
>> This !pte_present() is pte_none(). It seems that there should be
>
> Well this should be fine though as:
>
> 		const softleaf_t entry = softleaf_from_pte(ptent);
>
> 		/* Handle un-addressable ZONE_DEVICE memory */
> 		if (!softleaf_is_device_private(entry) &&
> 		    !softleaf_is_device_exclusive(entry))
> 			return false;
>
> Still correctly handles none - as softleaf_from_pte() in case of pte_none() will
> be a none softleaf entry which will fail both of these tests.
>
> So excluding pte_none() as an explicit test here was part of the rework - we no
> longer have to do that.

Got it. Now my RB is yours. :)

>
>>
>> } else if (pte_none(ptent)) {
>> 	return false;
>> }
>>
>> before the above "} else {".
>>
>>> +		if (!softleaf_is_device_private(entry) &&
>>> +		    !softleaf_is_device_exclusive(entry))
>>>  			return false;
>>>
>>> -		pfn = pte_pfn(ptent);
>>> +		pfn = softleaf_to_pfn(entry);
>>>  	}
>>>
>>>  	if ((pfn + pte_nr - 1) < pvmw->pfn)
>>> --
>>> 2.51.0
>>
>> Otherwise, LGTM. With the above issue addressed, feel free to
>> add Reviewed-by: Zi Yan <ziy@nvidia.com>
>
> Thanks!
>
>>
>> --
>> Best Regards,
>> Yan, Zi
>
> Cheers, Lorenzo


Best Regards,
Yan, Zi