[PATCH v11 1/5] mm: softdirty: Add pgtable_soft_dirty_supported()

Chunyan Zhang posted 5 patches 3 weeks ago
There is a newer version of this series
[PATCH v11 1/5] mm: softdirty: Add pgtable_soft_dirty_supported()
Posted by Chunyan Zhang 3 weeks ago
Some platforms can customize the PTE PMD entry soft-dirty bit making it
unavailable even if the architecture provides the resource.

Add an API which architectures can define their specific implementations
to detect if soft-dirty bit is available on which device the kernel is
running.

Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
---
 fs/proc/task_mmu.c      | 17 ++++++++++++++++-
 include/linux/pgtable.h | 12 ++++++++++++
 mm/debug_vm_pgtable.c   | 10 +++++-----
 mm/huge_memory.c        | 13 +++++++------
 mm/internal.h           |  2 +-
 mm/mremap.c             | 13 +++++++------
 mm/userfaultfd.c        | 10 ++++------
 7 files changed, 52 insertions(+), 25 deletions(-)

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 29cca0e6d0ff..9e8083b6d4cd 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 	 * -Werror=unterminated-string-initialization warning
 	 *  with GCC 15
 	 */
-	static const char mnemonics[BITS_PER_LONG][3] = {
+	static char mnemonics[BITS_PER_LONG][3] = {
 		/*
 		 * In case if we meet a flag we don't know about.
 		 */
@@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		[ilog2(VM_SEALED)] = "sl",
 #endif
 	};
+/*
+ * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is
+ * unavailable on which the kernel is running, even if the architecture
+ * provides the resource and soft-dirty is compiled in.
+ */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+	if (!pgtable_soft_dirty_supported())
+		mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
+#endif
+
 	size_t i;
 
 	seq_puts(m, "VmFlags: ");
@@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
 static inline void clear_soft_dirty(struct vm_area_struct *vma,
 		unsigned long addr, pte_t *pte)
 {
+	if (!pgtable_soft_dirty_supported())
+		return;
 	/*
 	 * The soft-dirty tracker uses #PF-s to catch writes
 	 * to pages, so write-protect the pte as well. See the
@@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
 {
 	pmd_t old, pmd = *pmdp;
 
+	if (!pgtable_soft_dirty_supported())
+		return;
+
 	if (pmd_present(pmd)) {
 		/* See comment in change_huge_pmd() */
 		old = pmdp_invalidate(vma, addr, pmdp);
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 4c035637eeb7..2a3578a4ae4c 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1537,6 +1537,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 #define arch_start_context_switch(prev)	do {} while (0)
 #endif
 
+/*
+ * Some platforms can customize the PTE soft-dirty bit making it unavailable
+ * even if the architecture provides the resource.
+ * Adding this API allows architectures to add their own checks for the
+ * devices on which the kernel is running.
+ * Note: When overiding it, please make sure the CONFIG_MEM_SOFT_DIRTY
+ * is part of this macro.
+ */
+#ifndef pgtable_soft_dirty_supported
+#define pgtable_soft_dirty_supported()	IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
+#endif
+
 #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
 #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
 static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 830107b6dd08..b32ce2b0b998 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
 {
 	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
 
-	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+	if (!pgtable_soft_dirty_supported())
 		return;
 
 	pr_debug("Validating PTE soft dirty\n");
@@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
 {
 	pte_t pte;
 
-	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+	if (!pgtable_soft_dirty_supported())
 		return;
 
 	pr_debug("Validating PTE swap soft dirty\n");
@@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
 {
 	pmd_t pmd;
 
-	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+	if (!pgtable_soft_dirty_supported())
 		return;
 
 	if (!has_transparent_hugepage())
@@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
 {
 	pmd_t pmd;
 
-	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
-		!IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
+	if (!pgtable_soft_dirty_supported() ||
+	    !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
 		return;
 
 	if (!has_transparent_hugepage())
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9c38a95e9f09..218d430a2ec6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
 
 static pmd_t move_soft_dirty_pmd(pmd_t pmd)
 {
-#ifdef CONFIG_MEM_SOFT_DIRTY
-	if (unlikely(is_pmd_migration_entry(pmd)))
-		pmd = pmd_swp_mksoft_dirty(pmd);
-	else if (pmd_present(pmd))
-		pmd = pmd_mksoft_dirty(pmd);
-#endif
+	if (pgtable_soft_dirty_supported()) {
+		if (unlikely(is_pmd_migration_entry(pmd)))
+			pmd = pmd_swp_mksoft_dirty(pmd);
+		else if (pmd_present(pmd))
+			pmd = pmd_mksoft_dirty(pmd);
+	}
+
 	return pmd;
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index 45b725c3dc03..c6ca62f8ecf3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
 	 * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
 	 * will be constantly true.
 	 */
-	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+	if (!pgtable_soft_dirty_supported())
 		return false;
 
 	/*
diff --git a/mm/mremap.c b/mm/mremap.c
index e618a706aff5..7beb3114dbf5 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -162,12 +162,13 @@ static pte_t move_soft_dirty_pte(pte_t pte)
 	 * Set soft dirty bit so we can notice
 	 * in userspace the ptes were moved.
 	 */
-#ifdef CONFIG_MEM_SOFT_DIRTY
-	if (pte_present(pte))
-		pte = pte_mksoft_dirty(pte);
-	else if (is_swap_pte(pte))
-		pte = pte_swp_mksoft_dirty(pte);
-#endif
+	if (pgtable_soft_dirty_supported()) {
+		if (pte_present(pte))
+			pte = pte_mksoft_dirty(pte);
+		else if (is_swap_pte(pte))
+			pte = pte_swp_mksoft_dirty(pte);
+	}
+
 	return pte;
 }
 
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 45e6290e2e8b..85f43479b67a 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1065,9 +1065,8 @@ static int move_present_pte(struct mm_struct *mm,
 
 	orig_dst_pte = folio_mk_pte(src_folio, dst_vma->vm_page_prot);
 	/* Set soft dirty bit so userspace can notice the pte was moved */
-#ifdef CONFIG_MEM_SOFT_DIRTY
-	orig_dst_pte = pte_mksoft_dirty(orig_dst_pte);
-#endif
+	if (pgtable_soft_dirty_supported())
+		orig_dst_pte = pte_mksoft_dirty(orig_dst_pte);
 	if (pte_dirty(orig_src_pte))
 		orig_dst_pte = pte_mkdirty(orig_dst_pte);
 	orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma);
@@ -1134,9 +1133,8 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
 	}
 
 	orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
-#ifdef CONFIG_MEM_SOFT_DIRTY
-	orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
-#endif
+	if (pgtable_soft_dirty_supported())
+		orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
 	set_pte_at(mm, dst_addr, dst_pte, orig_src_pte);
 	double_pt_unlock(dst_ptl, src_ptl);
 
-- 
2.34.1
Re: [PATCH v11 1/5] mm: softdirty: Add pgtable_soft_dirty_supported()
Posted by David Hildenbrand 3 weeks ago
On 11.09.25 11:55, Chunyan Zhang wrote:
> Some platforms can customize the PTE PMD entry soft-dirty bit making it
> unavailable even if the architecture provides the resource.
> 
> Add an API which architectures can define their specific implementations
> to detect if soft-dirty bit is available on which device the kernel is
> running.

Thinking to myself: maybe pgtable_supports_soft_dirty() would read better
Whatever you prefer.

> 
> Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
> ---
>   fs/proc/task_mmu.c      | 17 ++++++++++++++++-
>   include/linux/pgtable.h | 12 ++++++++++++
>   mm/debug_vm_pgtable.c   | 10 +++++-----
>   mm/huge_memory.c        | 13 +++++++------
>   mm/internal.h           |  2 +-
>   mm/mremap.c             | 13 +++++++------
>   mm/userfaultfd.c        | 10 ++++------
>   7 files changed, 52 insertions(+), 25 deletions(-)
> 
> diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> index 29cca0e6d0ff..9e8083b6d4cd 100644
> --- a/fs/proc/task_mmu.c
> +++ b/fs/proc/task_mmu.c
> @@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
>   	 * -Werror=unterminated-string-initialization warning
>   	 *  with GCC 15
>   	 */
> -	static const char mnemonics[BITS_PER_LONG][3] = {
> +	static char mnemonics[BITS_PER_LONG][3] = {
>   		/*
>   		 * In case if we meet a flag we don't know about.
>   		 */
> @@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
>   		[ilog2(VM_SEALED)] = "sl",
>   #endif
>   	};
> +/*
> + * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is
> + * unavailable on which the kernel is running, even if the architecture
> + * provides the resource and soft-dirty is compiled in.
> + */
> +#ifdef CONFIG_MEM_SOFT_DIRTY
> +	if (!pgtable_soft_dirty_supported())
> +		mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
> +#endif

You can now drop the ifdef.

But, I wonder if could we instead just stop setting the flag. Then we don't
have to worry about any VM_SOFTDIRTY checks.

Something like the following

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 892fe5dbf9de0..8b8bf63a32ef7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -783,6 +783,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
  static inline void vm_flags_init(struct vm_area_struct *vma,
  				 vm_flags_t flags)
  {
+	VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
  	ACCESS_PRIVATE(vma, __vm_flags) = flags;
  }
  
@@ -801,6 +802,7 @@ static inline void vm_flags_reset(struct vm_area_struct *vma,
  static inline void vm_flags_reset_once(struct vm_area_struct *vma,
  				       vm_flags_t flags)
  {
+	VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
  	vma_assert_write_locked(vma);
  	WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags);
  }
@@ -808,6 +810,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma,
  static inline void vm_flags_set(struct vm_area_struct *vma,
  				vm_flags_t flags)
  {
+	VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
  	vma_start_write(vma);
  	ACCESS_PRIVATE(vma, __vm_flags) |= flags;
  }
diff --git a/mm/mmap.c b/mm/mmap.c
index 5fd3b80fda1d5..40cb3fbf9a247 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1451,8 +1451,10 @@ static struct vm_area_struct *__install_special_mapping(
  		return ERR_PTR(-ENOMEM);
  
  	vma_set_range(vma, addr, addr + len, 0);
-	vm_flags_init(vma, (vm_flags | mm->def_flags |
-		      VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK);
+	vm_flags |= mm->def_flags | VM_DONTEXPAND;
+	if (pgtable_soft_dirty_supported())
+		vm_flags |= VM_SOFTDIRTY;
+	vm_flags_init(vma, vm_flags & ~VM_LOCKED_MASK);
  	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
  
  	vma->vm_ops = ops;
diff --git a/mm/vma.c b/mm/vma.c
index abe0da33c8446..16a1ed2a6199c 100644
--- a/mm/vma.c
+++ b/mm/vma.c
@@ -2551,7 +2551,8 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
  	 * then new mapped in-place (which must be aimed as
  	 * a completely new data area).
  	 */
-	vm_flags_set(vma, VM_SOFTDIRTY);
+	if (pgtable_soft_dirty_supported())
+		vm_flags_set(vma, VM_SOFTDIRTY);
  
  	vma_set_page_prot(vma);
  }
@@ -2819,7 +2820,8 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
  	mm->data_vm += len >> PAGE_SHIFT;
  	if (vm_flags & VM_LOCKED)
  		mm->locked_vm += (len >> PAGE_SHIFT);
-	vm_flags_set(vma, VM_SOFTDIRTY);
+	if (pgtable_soft_dirty_supported())
+		vm_flags_set(vma, VM_SOFTDIRTY);
  	return 0;
  
  mas_store_fail:
diff --git a/mm/vma_exec.c b/mm/vma_exec.c
index 922ee51747a68..c06732a5a620a 100644
--- a/mm/vma_exec.c
+++ b/mm/vma_exec.c
@@ -107,6 +107,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
  int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
  			  unsigned long *top_mem_p)
  {
+	unsigned long flags  = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
  	int err;
  	struct vm_area_struct *vma = vm_area_alloc(mm);
  
@@ -137,7 +138,9 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
  	BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
  	vma->vm_end = STACK_TOP_MAX;
  	vma->vm_start = vma->vm_end - PAGE_SIZE;
-	vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
+	if (pgtable_soft_dirty_supported())
+		flags |= VM_SOFTDIRTY;
+	vm_flags_init(vma, flags);
  	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
  
  	err = insert_vm_struct(mm, vma);


> +
>   	size_t i;
>   
>   	seq_puts(m, "VmFlags: ");
> @@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
>   static inline void clear_soft_dirty(struct vm_area_struct *vma,
>   		unsigned long addr, pte_t *pte)
>   {
> +	if (!pgtable_soft_dirty_supported())
> +		return;
>   	/*
>   	 * The soft-dirty tracker uses #PF-s to catch writes
>   	 * to pages, so write-protect the pte as well. See the
> @@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
>   {
>   	pmd_t old, pmd = *pmdp;
>   
> +	if (!pgtable_soft_dirty_supported())
> +		return;
> +
>   	if (pmd_present(pmd)) {
>   		/* See comment in change_huge_pmd() */
>   		old = pmdp_invalidate(vma, addr, pmdp);

That would all be handled with the above never-set-VM_SOFTDIRTY.

> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> index 4c035637eeb7..2a3578a4ae4c 100644
> --- a/include/linux/pgtable.h
> +++ b/include/linux/pgtable.h
> @@ -1537,6 +1537,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
>   #define arch_start_context_switch(prev)	do {} while (0)
>   #endif
>   
> +/*
> + * Some platforms can customize the PTE soft-dirty bit making it unavailable
> + * even if the architecture provides the resource.
> + * Adding this API allows architectures to add their own checks for the
> + * devices on which the kernel is running.
> + * Note: When overiding it, please make sure the CONFIG_MEM_SOFT_DIRTY
> + * is part of this macro.
> + */
> +#ifndef pgtable_soft_dirty_supported
> +#define pgtable_soft_dirty_supported()	IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
> +#endif
> +
>   #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
>   #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
>   static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> index 830107b6dd08..b32ce2b0b998 100644
> --- a/mm/debug_vm_pgtable.c
> +++ b/mm/debug_vm_pgtable.c
> @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
>   {
>   	pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
>   
> -	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> +	if (!pgtable_soft_dirty_supported())
>   		return;
>   
>   	pr_debug("Validating PTE soft dirty\n");
> @@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
>   {
>   	pte_t pte;
>   
> -	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> +	if (!pgtable_soft_dirty_supported())
>   		return;
>   
>   	pr_debug("Validating PTE swap soft dirty\n");
> @@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
>   {
>   	pmd_t pmd;
>   
> -	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> +	if (!pgtable_soft_dirty_supported())
>   		return;
>   
>   	if (!has_transparent_hugepage())
> @@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
>   {
>   	pmd_t pmd;
>   
> -	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
> -		!IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
> +	if (!pgtable_soft_dirty_supported() ||
> +	    !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
>   		return;
>   
>   	if (!has_transparent_hugepage())
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 9c38a95e9f09..218d430a2ec6 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
>   
>   static pmd_t move_soft_dirty_pmd(pmd_t pmd)
>   {
> -#ifdef CONFIG_MEM_SOFT_DIRTY
> -	if (unlikely(is_pmd_migration_entry(pmd)))
> -		pmd = pmd_swp_mksoft_dirty(pmd);
> -	else if (pmd_present(pmd))
> -		pmd = pmd_mksoft_dirty(pmd);
> -#endif
> +	if (pgtable_soft_dirty_supported()) {
> +		if (unlikely(is_pmd_migration_entry(pmd)))
> +			pmd = pmd_swp_mksoft_dirty(pmd);
> +		else if (pmd_present(pmd))
> +			pmd = pmd_mksoft_dirty(pmd);
> +	}
> +

Wondering, should simply the arch take care of that and we can just clal
pmd_swp_mksoft_dirty / pmd_mksoft_dirty?

>   	return pmd;
>   }
>   
> diff --git a/mm/internal.h b/mm/internal.h
> index 45b725c3dc03..c6ca62f8ecf3 100644
> --- a/mm/internal.h
> +++ b/mm/internal.h
> @@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
>   	 * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
>   	 * will be constantly true.
>   	 */
> -	if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> +	if (!pgtable_soft_dirty_supported())
>   		return false;
>   

That should be handled with the above never-set-VM_SOFTDIRTY.

>   	/*
> diff --git a/mm/mremap.c b/mm/mremap.c
> index e618a706aff5..7beb3114dbf5 100644
> --- a/mm/mremap.c
> +++ b/mm/mremap.c
> @@ -162,12 +162,13 @@ static pte_t move_soft_dirty_pte(pte_t pte)
>   	 * Set soft dirty bit so we can notice
>   	 * in userspace the ptes were moved.
>   	 */
> -#ifdef CONFIG_MEM_SOFT_DIRTY
> -	if (pte_present(pte))
> -		pte = pte_mksoft_dirty(pte);
> -	else if (is_swap_pte(pte))
> -		pte = pte_swp_mksoft_dirty(pte);
> -#endif
> +	if (pgtable_soft_dirty_supported()) {
> +		if (pte_present(pte))
> +			pte = pte_mksoft_dirty(pte);
> +		else if (is_swap_pte(pte))
> +			pte = pte_swp_mksoft_dirty(pte);
> +	}
> +
>   	return pte;
>   }
>   
-- 
Cheers

David / dhildenb
Re: [PATCH v11 1/5] mm: softdirty: Add pgtable_soft_dirty_supported()
Posted by Chunyan Zhang 2 weeks, 6 days ago
Hi David,

On Thu, 11 Sept 2025 at 21:09, David Hildenbrand <david@redhat.com> wrote:
>
> On 11.09.25 11:55, Chunyan Zhang wrote:
> > Some platforms can customize the PTE PMD entry soft-dirty bit making it
> > unavailable even if the architecture provides the resource.
> >
> > Add an API which architectures can define their specific implementations
> > to detect if soft-dirty bit is available on which device the kernel is
> > running.
>
> Thinking to myself: maybe pgtable_supports_soft_dirty() would read better
> Whatever you prefer.

I will use pgtable_supports_* in the next version.

> >
> > Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
> > ---
> >   fs/proc/task_mmu.c      | 17 ++++++++++++++++-
> >   include/linux/pgtable.h | 12 ++++++++++++
> >   mm/debug_vm_pgtable.c   | 10 +++++-----
> >   mm/huge_memory.c        | 13 +++++++------
> >   mm/internal.h           |  2 +-
> >   mm/mremap.c             | 13 +++++++------
> >   mm/userfaultfd.c        | 10 ++++------
> >   7 files changed, 52 insertions(+), 25 deletions(-)
> >
> > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
> > index 29cca0e6d0ff..9e8083b6d4cd 100644
> > --- a/fs/proc/task_mmu.c
> > +++ b/fs/proc/task_mmu.c
> > @@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
> >        * -Werror=unterminated-string-initialization warning
> >        *  with GCC 15
> >        */
> > -     static const char mnemonics[BITS_PER_LONG][3] = {
> > +     static char mnemonics[BITS_PER_LONG][3] = {
> >               /*
> >                * In case if we meet a flag we don't know about.
> >                */
> > @@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
> >               [ilog2(VM_SEALED)] = "sl",
> >   #endif
> >       };
> > +/*
> > + * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is
> > + * unavailable on which the kernel is running, even if the architecture
> > + * provides the resource and soft-dirty is compiled in.
> > + */
> > +#ifdef CONFIG_MEM_SOFT_DIRTY
> > +     if (!pgtable_soft_dirty_supported())
> > +             mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
> > +#endif
>
> You can now drop the ifdef.

Ok, you mean define VM_SOFTDIRTY 0x08000000 no matter if
MEM_SOFT_DIRTY is compiled in, right?

Then I need memcpy() to set mnemonics[ilog2(VM_SOFTDIRTY)] here.

>
> But, I wonder if could we instead just stop setting the flag. Then we don't
> have to worry about any VM_SOFTDIRTY checks.
>
> Something like the following
>
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 892fe5dbf9de0..8b8bf63a32ef7 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -783,6 +783,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
>   static inline void vm_flags_init(struct vm_area_struct *vma,
>                                  vm_flags_t flags)
>   {
> +       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
>         ACCESS_PRIVATE(vma, __vm_flags) = flags;
>   }
>
> @@ -801,6 +802,7 @@ static inline void vm_flags_reset(struct vm_area_struct *vma,
>   static inline void vm_flags_reset_once(struct vm_area_struct *vma,
>                                        vm_flags_t flags)
>   {
> +       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
>         vma_assert_write_locked(vma);
>         WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags);
>   }
> @@ -808,6 +810,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma,
>   static inline void vm_flags_set(struct vm_area_struct *vma,
>                                 vm_flags_t flags)
>   {
> +       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
>         vma_start_write(vma);
>         ACCESS_PRIVATE(vma, __vm_flags) |= flags;
>   }
> diff --git a/mm/mmap.c b/mm/mmap.c
> index 5fd3b80fda1d5..40cb3fbf9a247 100644
> --- a/mm/mmap.c
> +++ b/mm/mmap.c
> @@ -1451,8 +1451,10 @@ static struct vm_area_struct *__install_special_mapping(
>                 return ERR_PTR(-ENOMEM);
>
>         vma_set_range(vma, addr, addr + len, 0);
> -       vm_flags_init(vma, (vm_flags | mm->def_flags |
> -                     VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK);
> +       vm_flags |= mm->def_flags | VM_DONTEXPAND;

Why use '|=' rather than not directly setting vm_flags which is an
uninitialized variable?

> +       if (pgtable_soft_dirty_supported())
> +               vm_flags |= VM_SOFTDIRTY;
> +       vm_flags_init(vma, vm_flags & ~VM_LOCKED_MASK);
>         vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
>
>         vma->vm_ops = ops;
> diff --git a/mm/vma.c b/mm/vma.c
> index abe0da33c8446..16a1ed2a6199c 100644
> --- a/mm/vma.c
> +++ b/mm/vma.c
> @@ -2551,7 +2551,8 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma)
>          * then new mapped in-place (which must be aimed as
>          * a completely new data area).
>          */
> -       vm_flags_set(vma, VM_SOFTDIRTY);
> +       if (pgtable_soft_dirty_supported())
> +               vm_flags_set(vma, VM_SOFTDIRTY);
>
>         vma_set_page_prot(vma);
>   }
> @@ -2819,7 +2820,8 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma,
>         mm->data_vm += len >> PAGE_SHIFT;
>         if (vm_flags & VM_LOCKED)
>                 mm->locked_vm += (len >> PAGE_SHIFT);
> -       vm_flags_set(vma, VM_SOFTDIRTY);
> +       if (pgtable_soft_dirty_supported())
> +               vm_flags_set(vma, VM_SOFTDIRTY);
>         return 0;
>
>   mas_store_fail:
> diff --git a/mm/vma_exec.c b/mm/vma_exec.c
> index 922ee51747a68..c06732a5a620a 100644
> --- a/mm/vma_exec.c
> +++ b/mm/vma_exec.c
> @@ -107,6 +107,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift)
>   int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
>                           unsigned long *top_mem_p)
>   {
> +       unsigned long flags  = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
>         int err;
>         struct vm_area_struct *vma = vm_area_alloc(mm);
>
> @@ -137,7 +138,9 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap,
>         BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
>         vma->vm_end = STACK_TOP_MAX;
>         vma->vm_start = vma->vm_end - PAGE_SIZE;
> -       vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP);
> +       if (pgtable_soft_dirty_supported())
> +               flags |= VM_SOFTDIRTY;
> +       vm_flags_init(vma, flags);
>         vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
>
>         err = insert_vm_struct(mm, vma);
>
>
> > +
> >       size_t i;
> >
> >       seq_puts(m, "VmFlags: ");
> > @@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
> >   static inline void clear_soft_dirty(struct vm_area_struct *vma,
> >               unsigned long addr, pte_t *pte)
> >   {
> > +     if (!pgtable_soft_dirty_supported())
> > +             return;
> >       /*
> >        * The soft-dirty tracker uses #PF-s to catch writes
> >        * to pages, so write-protect the pte as well. See the
> > @@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
> >   {
> >       pmd_t old, pmd = *pmdp;
> >
> > +     if (!pgtable_soft_dirty_supported())
> > +             return;
> > +
> >       if (pmd_present(pmd)) {
> >               /* See comment in change_huge_pmd() */
> >               old = pmdp_invalidate(vma, addr, pmdp);
>
> That would all be handled with the above never-set-VM_SOFTDIRTY.

Sorry I'm not sure I understand here, you mean no longer need #ifdef
CONFIG_MEM_SOFT_DIRTY for these function definitions, right?

>
> > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> > index 4c035637eeb7..2a3578a4ae4c 100644
> > --- a/include/linux/pgtable.h
> > +++ b/include/linux/pgtable.h
> > @@ -1537,6 +1537,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
> >   #define arch_start_context_switch(prev)     do {} while (0)
> >   #endif
> >
> > +/*
> > + * Some platforms can customize the PTE soft-dirty bit making it unavailable
> > + * even if the architecture provides the resource.
> > + * Adding this API allows architectures to add their own checks for the
> > + * devices on which the kernel is running.
> > + * Note: When overiding it, please make sure the CONFIG_MEM_SOFT_DIRTY
> > + * is part of this macro.
> > + */
> > +#ifndef pgtable_soft_dirty_supported
> > +#define pgtable_soft_dirty_supported()       IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
> > +#endif
> > +
> >   #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
> >   #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
> >   static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
> > diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> > index 830107b6dd08..b32ce2b0b998 100644
> > --- a/mm/debug_vm_pgtable.c
> > +++ b/mm/debug_vm_pgtable.c
> > @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
> >   {
> >       pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
> >
> > -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> > +     if (!pgtable_soft_dirty_supported())
> >               return;
> >
> >       pr_debug("Validating PTE soft dirty\n");
> > @@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
> >   {
> >       pte_t pte;
> >
> > -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> > +     if (!pgtable_soft_dirty_supported())
> >               return;
> >
> >       pr_debug("Validating PTE swap soft dirty\n");
> > @@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
> >   {
> >       pmd_t pmd;
> >
> > -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> > +     if (!pgtable_soft_dirty_supported())
> >               return;
> >
> >       if (!has_transparent_hugepage())
> > @@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
> >   {
> >       pmd_t pmd;
> >
> > -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
> > -             !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
> > +     if (!pgtable_soft_dirty_supported() ||
> > +         !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
> >               return;
> >
> >       if (!has_transparent_hugepage())
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > index 9c38a95e9f09..218d430a2ec6 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
> >
> >   static pmd_t move_soft_dirty_pmd(pmd_t pmd)
> >   {
> > -#ifdef CONFIG_MEM_SOFT_DIRTY
> > -     if (unlikely(is_pmd_migration_entry(pmd)))
> > -             pmd = pmd_swp_mksoft_dirty(pmd);
> > -     else if (pmd_present(pmd))
> > -             pmd = pmd_mksoft_dirty(pmd);
> > -#endif
> > +     if (pgtable_soft_dirty_supported()) {
> > +             if (unlikely(is_pmd_migration_entry(pmd)))
> > +                     pmd = pmd_swp_mksoft_dirty(pmd);
> > +             else if (pmd_present(pmd))
> > +                     pmd = pmd_mksoft_dirty(pmd);
> > +     }
> > +
>
> Wondering, should simply the arch take care of that and we can just clal
> pmd_swp_mksoft_dirty / pmd_mksoft_dirty?

Ok, I think I can do that in another patchset.

>
> >       return pmd;
> >   }
> >
> > diff --git a/mm/internal.h b/mm/internal.h
> > index 45b725c3dc03..c6ca62f8ecf3 100644
> > --- a/mm/internal.h
> > +++ b/mm/internal.h
> > @@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
> >        * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
> >        * will be constantly true.
> >        */
> > -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> > +     if (!pgtable_soft_dirty_supported())
> >               return false;
> >
>
> That should be handled with the above never-set-VM_SOFTDIRTY.

We don't need to check if (!pgtable_soft_dirty_supported()) if I
understand correctly.

Thanks for the review,
Chunyan

>
> >       /*
> > diff --git a/mm/mremap.c b/mm/mremap.c
> > index e618a706aff5..7beb3114dbf5 100644
> > --- a/mm/mremap.c
> > +++ b/mm/mremap.c
> > @@ -162,12 +162,13 @@ static pte_t move_soft_dirty_pte(pte_t pte)
> >        * Set soft dirty bit so we can notice
> >        * in userspace the ptes were moved.
> >        */
> > -#ifdef CONFIG_MEM_SOFT_DIRTY
> > -     if (pte_present(pte))
> > -             pte = pte_mksoft_dirty(pte);
> > -     else if (is_swap_pte(pte))
> > -             pte = pte_swp_mksoft_dirty(pte);
> > -#endif
> > +     if (pgtable_soft_dirty_supported()) {
> > +             if (pte_present(pte))
> > +                     pte = pte_mksoft_dirty(pte);
> > +             else if (is_swap_pte(pte))
> > +                     pte = pte_swp_mksoft_dirty(pte);
> > +     }
> > +
> >       return pte;
> >   }
> >
> --
> Cheers
>
> David / dhildenb
>
Re: [PATCH v11 1/5] mm: softdirty: Add pgtable_soft_dirty_supported()
Posted by David Hildenbrand 2 weeks, 6 days ago
[...]

>>> +/*
>>> + * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is
>>> + * unavailable on which the kernel is running, even if the architecture
>>> + * provides the resource and soft-dirty is compiled in.
>>> + */
>>> +#ifdef CONFIG_MEM_SOFT_DIRTY
>>> +     if (!pgtable_soft_dirty_supported())
>>> +             mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
>>> +#endif
>>
>> You can now drop the ifdef.
> 
> Ok, you mean define VM_SOFTDIRTY 0x08000000 no matter if
> MEM_SOFT_DIRTY is compiled in, right?
> 
> Then I need memcpy() to set mnemonics[ilog2(VM_SOFTDIRTY)] here.

The whole hunk will not be required when we make sure VM_SOFTDIRTY never 
gets set, correct?

> 
>>
>> But, I wonder if could we instead just stop setting the flag. Then we don't
>> have to worry about any VM_SOFTDIRTY checks.
>>
>> Something like the following
>>
>> diff --git a/include/linux/mm.h b/include/linux/mm.h
>> index 892fe5dbf9de0..8b8bf63a32ef7 100644
>> --- a/include/linux/mm.h
>> +++ b/include/linux/mm.h
>> @@ -783,6 +783,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
>>    static inline void vm_flags_init(struct vm_area_struct *vma,
>>                                   vm_flags_t flags)
>>    {
>> +       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
>>          ACCESS_PRIVATE(vma, __vm_flags) = flags;
>>    }
>>
>> @@ -801,6 +802,7 @@ static inline void vm_flags_reset(struct vm_area_struct *vma,
>>    static inline void vm_flags_reset_once(struct vm_area_struct *vma,
>>                                         vm_flags_t flags)
>>    {
>> +       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
>>          vma_assert_write_locked(vma);
>>          WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags);
>>    }
>> @@ -808,6 +810,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma,
>>    static inline void vm_flags_set(struct vm_area_struct *vma,
>>                                  vm_flags_t flags)
>>    {
>> +       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
>>          vma_start_write(vma);
>>          ACCESS_PRIVATE(vma, __vm_flags) |= flags;
>>    }
>> diff --git a/mm/mmap.c b/mm/mmap.c
>> index 5fd3b80fda1d5..40cb3fbf9a247 100644
>> --- a/mm/mmap.c
>> +++ b/mm/mmap.c
>> @@ -1451,8 +1451,10 @@ static struct vm_area_struct *__install_special_mapping(
>>                  return ERR_PTR(-ENOMEM);
>>
>>          vma_set_range(vma, addr, addr + len, 0);
>> -       vm_flags_init(vma, (vm_flags | mm->def_flags |
>> -                     VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK);
>> +       vm_flags |= mm->def_flags | VM_DONTEXPAND;
> 
> Why use '|=' rather than not directly setting vm_flags which is an
> uninitialized variable?

vm_flags is passed in by the caller?

But just to clarify: this code was just a quick hack, adjust it as you need.

[...]

>>>
>>> +     if (!pgtable_soft_dirty_supported())
>>> +             return;
>>> +
>>>        if (pmd_present(pmd)) {
>>>                /* See comment in change_huge_pmd() */
>>>                old = pmdp_invalidate(vma, addr, pmdp);
>>
>> That would all be handled with the above never-set-VM_SOFTDIRTY.

I meant that there is no need to add the pgtable_soft_dirty_supported() 
check.

> 
> Sorry I'm not sure I understand here, you mean no longer need #ifdef
> CONFIG_MEM_SOFT_DIRTY for these function definitions, right?

Likely we could drop them. VM_SOFTDIRTY will never be set so the code 
will not be invoked.

And for architectures where VM_SOFTDIRTY is never even possible 
(!CONFIG_MEM_SOFT_DIRTY) we keep it as 0.

That way, the compiler can even optimize out all of that code because

"vma->vm_flags & VM_SOFTDIRTY" -> "vma->vm_flags & 0"

will never be true.

> 
>>
>>> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
>>> index 4c035637eeb7..2a3578a4ae4c 100644
>>> --- a/include/linux/pgtable.h
>>> +++ b/include/linux/pgtable.h
>>> @@ -1537,6 +1537,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
>>>    #define arch_start_context_switch(prev)     do {} while (0)
>>>    #endif
>>>
>>> +/*
>>> + * Some platforms can customize the PTE soft-dirty bit making it unavailable
>>> + * even if the architecture provides the resource.
>>> + * Adding this API allows architectures to add their own checks for the
>>> + * devices on which the kernel is running.
>>> + * Note: When overiding it, please make sure the CONFIG_MEM_SOFT_DIRTY
>>> + * is part of this macro.
>>> + */
>>> +#ifndef pgtable_soft_dirty_supported
>>> +#define pgtable_soft_dirty_supported()       IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
>>> +#endif
>>> +
>>>    #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
>>>    #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
>>>    static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
>>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
>>> index 830107b6dd08..b32ce2b0b998 100644
>>> --- a/mm/debug_vm_pgtable.c
>>> +++ b/mm/debug_vm_pgtable.c
>>> @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
>>>    {
>>>        pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
>>>
>>> -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
>>> +     if (!pgtable_soft_dirty_supported())
>>>                return;
>>>
>>>        pr_debug("Validating PTE soft dirty\n");
>>> @@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
>>>    {
>>>        pte_t pte;
>>>
>>> -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
>>> +     if (!pgtable_soft_dirty_supported())
>>>                return;
>>>
>>>        pr_debug("Validating PTE swap soft dirty\n");
>>> @@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
>>>    {
>>>        pmd_t pmd;
>>>
>>> -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
>>> +     if (!pgtable_soft_dirty_supported())
>>>                return;
>>>
>>>        if (!has_transparent_hugepage())
>>> @@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
>>>    {
>>>        pmd_t pmd;
>>>
>>> -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
>>> -             !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
>>> +     if (!pgtable_soft_dirty_supported() ||
>>> +         !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
>>>                return;
>>>
>>>        if (!has_transparent_hugepage())
>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>> index 9c38a95e9f09..218d430a2ec6 100644
>>> --- a/mm/huge_memory.c
>>> +++ b/mm/huge_memory.c
>>> @@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
>>>
>>>    static pmd_t move_soft_dirty_pmd(pmd_t pmd)
>>>    {
>>> -#ifdef CONFIG_MEM_SOFT_DIRTY
>>> -     if (unlikely(is_pmd_migration_entry(pmd)))
>>> -             pmd = pmd_swp_mksoft_dirty(pmd);
>>> -     else if (pmd_present(pmd))
>>> -             pmd = pmd_mksoft_dirty(pmd);
>>> -#endif
>>> +     if (pgtable_soft_dirty_supported()) {
>>> +             if (unlikely(is_pmd_migration_entry(pmd)))
>>> +                     pmd = pmd_swp_mksoft_dirty(pmd);
>>> +             else if (pmd_present(pmd))
>>> +                     pmd = pmd_mksoft_dirty(pmd);
>>> +     }
>>> +
>>
>> Wondering, should simply the arch take care of that and we can just clal
>> pmd_swp_mksoft_dirty / pmd_mksoft_dirty?
> 

I think we have that already in include/linux/pgtable.h:

We have stubs that just don't do anything.

For riscv support you would handle runtime-enablement in these helpers.

> 
>>
>>>        return pmd;
>>>    }
>>>
>>> diff --git a/mm/internal.h b/mm/internal.h
>>> index 45b725c3dc03..c6ca62f8ecf3 100644
>>> --- a/mm/internal.h
>>> +++ b/mm/internal.h
>>> @@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
>>>         * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
>>>         * will be constantly true.
>>>         */
>>> -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
>>> +     if (!pgtable_soft_dirty_supported())
>>>                return false;
>>>
>>
>> That should be handled with the above never-set-VM_SOFTDIRTY.
> 
> We don't need to check if (!pgtable_soft_dirty_supported()) if I
> understand correctly.
Hm, let me think about that. No, I think this has to stay as the comment
says, so this case here is special.

-- 
Cheers

David / dhildenb
Re: [PATCH v11 1/5] mm: softdirty: Add pgtable_soft_dirty_supported()
Posted by Chunyan Zhang 2 weeks, 6 days ago
On Fri, 12 Sept 2025 at 16:41, David Hildenbrand <david@redhat.com> wrote:
>
> [...]
>
> >>> +/*
> >>> + * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is
> >>> + * unavailable on which the kernel is running, even if the architecture
> >>> + * provides the resource and soft-dirty is compiled in.
> >>> + */
> >>> +#ifdef CONFIG_MEM_SOFT_DIRTY
> >>> +     if (!pgtable_soft_dirty_supported())
> >>> +             mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
> >>> +#endif
> >>
> >> You can now drop the ifdef.
> >
> > Ok, you mean define VM_SOFTDIRTY 0x08000000 no matter if
> > MEM_SOFT_DIRTY is compiled in, right?
> >
> > Then I need memcpy() to set mnemonics[ilog2(VM_SOFTDIRTY)] here.
>
> The whole hunk will not be required when we make sure VM_SOFTDIRTY never
> gets set, correct?

Oh no, this hunk code does not set vmflag.
The mnemonics[ilog2(VM_SOFTDIRTY)] is for show_smap_vma_flags(),
something like below:
# cat /proc/1/smaps
5555605c7000-555560680000 r-xp 00000000 fe:00 19
  /bin/busybox
...
VmFlags: rd ex mr mw me sd

'sd' is for soft-dirty

I think this is still needed, right?

>
> >
> >>
> >> But, I wonder if could we instead just stop setting the flag. Then we don't
> >> have to worry about any VM_SOFTDIRTY checks.
> >>
> >> Something like the following
> >>
> >> diff --git a/include/linux/mm.h b/include/linux/mm.h
> >> index 892fe5dbf9de0..8b8bf63a32ef7 100644
> >> --- a/include/linux/mm.h
> >> +++ b/include/linux/mm.h
> >> @@ -783,6 +783,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm)
> >>    static inline void vm_flags_init(struct vm_area_struct *vma,
> >>                                   vm_flags_t flags)
> >>    {
> >> +       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
> >>          ACCESS_PRIVATE(vma, __vm_flags) = flags;
> >>    }
> >>
> >> @@ -801,6 +802,7 @@ static inline void vm_flags_reset(struct vm_area_struct *vma,
> >>    static inline void vm_flags_reset_once(struct vm_area_struct *vma,
> >>                                         vm_flags_t flags)
> >>    {
> >> +       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
> >>          vma_assert_write_locked(vma);
> >>          WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags);
> >>    }
> >> @@ -808,6 +810,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma,
> >>    static inline void vm_flags_set(struct vm_area_struct *vma,
> >>                                  vm_flags_t flags)
> >>    {
> >> +       VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY));
> >>          vma_start_write(vma);
> >>          ACCESS_PRIVATE(vma, __vm_flags) |= flags;
> >>    }
> >> diff --git a/mm/mmap.c b/mm/mmap.c
> >> index 5fd3b80fda1d5..40cb3fbf9a247 100644
> >> --- a/mm/mmap.c
> >> +++ b/mm/mmap.c
> >> @@ -1451,8 +1451,10 @@ static struct vm_area_struct *__install_special_mapping(
> >>                  return ERR_PTR(-ENOMEM);
> >>
> >>          vma_set_range(vma, addr, addr + len, 0);
> >> -       vm_flags_init(vma, (vm_flags | mm->def_flags |
> >> -                     VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK);
> >> +       vm_flags |= mm->def_flags | VM_DONTEXPAND;
> >
> > Why use '|=' rather than not directly setting vm_flags which is an
> > uninitialized variable?
>
> vm_flags is passed in by the caller?
>

Then the original code seems wrong.

> But just to clarify: this code was just a quick hack, adjust it as you need.

Got it.

>
> [...]
>
> >>>
> >>> +     if (!pgtable_soft_dirty_supported())
> >>> +             return;
> >>> +
> >>>        if (pmd_present(pmd)) {
> >>>                /* See comment in change_huge_pmd() */
> >>>                old = pmdp_invalidate(vma, addr, pmdp);
> >>
> >> That would all be handled with the above never-set-VM_SOFTDIRTY.
>
> I meant that there is no need to add the pgtable_soft_dirty_supported()
> check.

Ok I will take a look.

>
> >
> > Sorry I'm not sure I understand here, you mean no longer need #ifdef
> > CONFIG_MEM_SOFT_DIRTY for these function definitions, right?
>
> Likely we could drop them. VM_SOFTDIRTY will never be set so the code
> will not be invoked.

The relationship of VM_SOFTDIRTY and clear_soft_dirty_pmd() is not
very direct from the first sight, let me take a further look.

>
> And for architectures where VM_SOFTDIRTY is never even possible
> (!CONFIG_MEM_SOFT_DIRTY) we keep it as 0.

Ok.

>
> That way, the compiler can even optimize out all of that code because
>
> "vma->vm_flags & VM_SOFTDIRTY" -> "vma->vm_flags & 0"
>
> will never be true.
>
> >
> >>
> >>> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
> >>> index 4c035637eeb7..2a3578a4ae4c 100644
> >>> --- a/include/linux/pgtable.h
> >>> +++ b/include/linux/pgtable.h
> >>> @@ -1537,6 +1537,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
> >>>    #define arch_start_context_switch(prev)     do {} while (0)
> >>>    #endif
> >>>
> >>> +/*
> >>> + * Some platforms can customize the PTE soft-dirty bit making it unavailable
> >>> + * even if the architecture provides the resource.
> >>> + * Adding this API allows architectures to add their own checks for the
> >>> + * devices on which the kernel is running.
> >>> + * Note: When overiding it, please make sure the CONFIG_MEM_SOFT_DIRTY
> >>> + * is part of this macro.
> >>> + */
> >>> +#ifndef pgtable_soft_dirty_supported
> >>> +#define pgtable_soft_dirty_supported()       IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
> >>> +#endif
> >>> +
> >>>    #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
> >>>    #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
> >>>    static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
> >>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
> >>> index 830107b6dd08..b32ce2b0b998 100644
> >>> --- a/mm/debug_vm_pgtable.c
> >>> +++ b/mm/debug_vm_pgtable.c
> >>> @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
> >>>    {
> >>>        pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
> >>>
> >>> -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> >>> +     if (!pgtable_soft_dirty_supported())
> >>>                return;
> >>>
> >>>        pr_debug("Validating PTE soft dirty\n");
> >>> @@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
> >>>    {
> >>>        pte_t pte;
> >>>
> >>> -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> >>> +     if (!pgtable_soft_dirty_supported())
> >>>                return;
> >>>
> >>>        pr_debug("Validating PTE swap soft dirty\n");
> >>> @@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
> >>>    {
> >>>        pmd_t pmd;
> >>>
> >>> -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> >>> +     if (!pgtable_soft_dirty_supported())
> >>>                return;
> >>>
> >>>        if (!has_transparent_hugepage())
> >>> @@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
> >>>    {
> >>>        pmd_t pmd;
> >>>
> >>> -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
> >>> -             !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
> >>> +     if (!pgtable_soft_dirty_supported() ||
> >>> +         !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
> >>>                return;
> >>>
> >>>        if (!has_transparent_hugepage())
> >>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> >>> index 9c38a95e9f09..218d430a2ec6 100644
> >>> --- a/mm/huge_memory.c
> >>> +++ b/mm/huge_memory.c
> >>> @@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
> >>>
> >>>    static pmd_t move_soft_dirty_pmd(pmd_t pmd)
> >>>    {
> >>> -#ifdef CONFIG_MEM_SOFT_DIRTY
> >>> -     if (unlikely(is_pmd_migration_entry(pmd)))
> >>> -             pmd = pmd_swp_mksoft_dirty(pmd);
> >>> -     else if (pmd_present(pmd))
> >>> -             pmd = pmd_mksoft_dirty(pmd);
> >>> -#endif
> >>> +     if (pgtable_soft_dirty_supported()) {
> >>> +             if (unlikely(is_pmd_migration_entry(pmd)))
> >>> +                     pmd = pmd_swp_mksoft_dirty(pmd);
> >>> +             else if (pmd_present(pmd))
> >>> +                     pmd = pmd_mksoft_dirty(pmd);
> >>> +     }
> >>> +
> >>
> >> Wondering, should simply the arch take care of that and we can just clal
> >> pmd_swp_mksoft_dirty / pmd_mksoft_dirty?
> >
>
> I think we have that already in include/linux/pgtable.h:
>
> We have stubs that just don't do anything.
>
> For riscv support you would handle runtime-enablement in these helpers.
>
> >
> >>
> >>>        return pmd;
> >>>    }
> >>>
> >>> diff --git a/mm/internal.h b/mm/internal.h
> >>> index 45b725c3dc03..c6ca62f8ecf3 100644
> >>> --- a/mm/internal.h
> >>> +++ b/mm/internal.h
> >>> @@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
> >>>         * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
> >>>         * will be constantly true.
> >>>         */
> >>> -     if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
> >>> +     if (!pgtable_soft_dirty_supported())
> >>>                return false;
> >>>
> >>
> >> That should be handled with the above never-set-VM_SOFTDIRTY.
> >
> > We don't need to check if (!pgtable_soft_dirty_supported()) if I
> > understand correctly.
> Hm, let me think about that. No, I think this has to stay as the comment
> says, so this case here is special.

I will cook a new version and then we can discuss further based on the
new patch.

Thanks for your review,
Chunyan
Re: [PATCH v11 1/5] mm: softdirty: Add pgtable_soft_dirty_supported()
Posted by David Hildenbrand 2 weeks, 6 days ago
On 12.09.25 11:21, Chunyan Zhang wrote:
> On Fri, 12 Sept 2025 at 16:41, David Hildenbrand <david@redhat.com> wrote:
>>
>> [...]
>>
>>>>> +/*
>>>>> + * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is
>>>>> + * unavailable on which the kernel is running, even if the architecture
>>>>> + * provides the resource and soft-dirty is compiled in.
>>>>> + */
>>>>> +#ifdef CONFIG_MEM_SOFT_DIRTY
>>>>> +     if (!pgtable_soft_dirty_supported())
>>>>> +             mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
>>>>> +#endif
>>>>
>>>> You can now drop the ifdef.
>>>
>>> Ok, you mean define VM_SOFTDIRTY 0x08000000 no matter if
>>> MEM_SOFT_DIRTY is compiled in, right?
>>>
>>> Then I need memcpy() to set mnemonics[ilog2(VM_SOFTDIRTY)] here.
>>
>> The whole hunk will not be required when we make sure VM_SOFTDIRTY never
>> gets set, correct?
> 
> Oh no, this hunk code does not set vmflag.
> The mnemonics[ilog2(VM_SOFTDIRTY)] is for show_smap_vma_flags(),
> something like below:
> # cat /proc/1/smaps
> 5555605c7000-555560680000 r-xp 00000000 fe:00 19
>    /bin/busybox
> ...
> VmFlags: rd ex mr mw me sd
> 
> 'sd' is for soft-dirty
> 
> I think this is still needed, right?

If nobody sets VM_SOFTDIRTY in vma->vm_flags, then we will never print it.

So you can just leave the "#ifdef CONFIG_MEM_SOFT_DIRTY" as is to handle 
the VM_SOFTDIRTY=0 case.

So you should not have to change anything in show_smap_vma_flags().

[...]

>>>> That should be handled with the above never-set-VM_SOFTDIRTY.
>>>
>>> We don't need to check if (!pgtable_soft_dirty_supported()) if I
>>> understand correctly.
>> Hm, let me think about that. No, I think this has to stay as the comment
>> says, so this case here is special.
> 
> I will cook a new version and then we can discuss further based on the
> new patch.


Sounds good!


-- 
Cheers

David / dhildenb