Some platforms can customize the PTE PMD entry soft-dirty bit making it
unavailable even if the architecture provides the resource.
Add an API which architectures can define their specific implementations
to detect if soft-dirty bit is available on which device the kernel is
running.
Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
---
fs/proc/task_mmu.c | 17 ++++++++++++++++-
include/linux/pgtable.h | 12 ++++++++++++
mm/debug_vm_pgtable.c | 10 +++++-----
mm/huge_memory.c | 13 +++++++------
mm/internal.h | 2 +-
mm/mremap.c | 13 +++++++------
mm/userfaultfd.c | 10 ++++------
7 files changed, 52 insertions(+), 25 deletions(-)
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 29cca0e6d0ff..9e8083b6d4cd 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
* -Werror=unterminated-string-initialization warning
* with GCC 15
*/
- static const char mnemonics[BITS_PER_LONG][3] = {
+ static char mnemonics[BITS_PER_LONG][3] = {
/*
* In case if we meet a flag we don't know about.
*/
@@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_SEALED)] = "sl",
#endif
};
+/*
+ * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is
+ * unavailable on which the kernel is running, even if the architecture
+ * provides the resource and soft-dirty is compiled in.
+ */
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ if (!pgtable_soft_dirty_supported())
+ mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0;
+#endif
+
size_t i;
seq_puts(m, "VmFlags: ");
@@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr,
static inline void clear_soft_dirty(struct vm_area_struct *vma,
unsigned long addr, pte_t *pte)
{
+ if (!pgtable_soft_dirty_supported())
+ return;
/*
* The soft-dirty tracker uses #PF-s to catch writes
* to pages, so write-protect the pte as well. See the
@@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma,
{
pmd_t old, pmd = *pmdp;
+ if (!pgtable_soft_dirty_supported())
+ return;
+
if (pmd_present(pmd)) {
/* See comment in change_huge_pmd() */
old = pmdp_invalidate(vma, addr, pmdp);
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index 4c035637eeb7..2a3578a4ae4c 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1537,6 +1537,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
#define arch_start_context_switch(prev) do {} while (0)
#endif
+/*
+ * Some platforms can customize the PTE soft-dirty bit making it unavailable
+ * even if the architecture provides the resource.
+ * Adding this API allows architectures to add their own checks for the
+ * devices on which the kernel is running.
+ * Note: When overiding it, please make sure the CONFIG_MEM_SOFT_DIRTY
+ * is part of this macro.
+ */
+#ifndef pgtable_soft_dirty_supported
+#define pgtable_soft_dirty_supported() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)
+#endif
+
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
#ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION
static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd)
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index 830107b6dd08..b32ce2b0b998 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args)
{
pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot);
- if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+ if (!pgtable_soft_dirty_supported())
return;
pr_debug("Validating PTE soft dirty\n");
@@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args)
{
pte_t pte;
- if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+ if (!pgtable_soft_dirty_supported())
return;
pr_debug("Validating PTE swap soft dirty\n");
@@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args)
{
pmd_t pmd;
- if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+ if (!pgtable_soft_dirty_supported())
return;
if (!has_transparent_hugepage())
@@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args)
{
pmd_t pmd;
- if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) ||
- !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
+ if (!pgtable_soft_dirty_supported() ||
+ !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION))
return;
if (!has_transparent_hugepage())
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 9c38a95e9f09..218d430a2ec6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl,
static pmd_t move_soft_dirty_pmd(pmd_t pmd)
{
-#ifdef CONFIG_MEM_SOFT_DIRTY
- if (unlikely(is_pmd_migration_entry(pmd)))
- pmd = pmd_swp_mksoft_dirty(pmd);
- else if (pmd_present(pmd))
- pmd = pmd_mksoft_dirty(pmd);
-#endif
+ if (pgtable_soft_dirty_supported()) {
+ if (unlikely(is_pmd_migration_entry(pmd)))
+ pmd = pmd_swp_mksoft_dirty(pmd);
+ else if (pmd_present(pmd))
+ pmd = pmd_mksoft_dirty(pmd);
+ }
+
return pmd;
}
diff --git a/mm/internal.h b/mm/internal.h
index 45b725c3dc03..c6ca62f8ecf3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma)
* VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY)
* will be constantly true.
*/
- if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY))
+ if (!pgtable_soft_dirty_supported())
return false;
/*
diff --git a/mm/mremap.c b/mm/mremap.c
index e618a706aff5..7beb3114dbf5 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -162,12 +162,13 @@ static pte_t move_soft_dirty_pte(pte_t pte)
* Set soft dirty bit so we can notice
* in userspace the ptes were moved.
*/
-#ifdef CONFIG_MEM_SOFT_DIRTY
- if (pte_present(pte))
- pte = pte_mksoft_dirty(pte);
- else if (is_swap_pte(pte))
- pte = pte_swp_mksoft_dirty(pte);
-#endif
+ if (pgtable_soft_dirty_supported()) {
+ if (pte_present(pte))
+ pte = pte_mksoft_dirty(pte);
+ else if (is_swap_pte(pte))
+ pte = pte_swp_mksoft_dirty(pte);
+ }
+
return pte;
}
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 45e6290e2e8b..85f43479b67a 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1065,9 +1065,8 @@ static int move_present_pte(struct mm_struct *mm,
orig_dst_pte = folio_mk_pte(src_folio, dst_vma->vm_page_prot);
/* Set soft dirty bit so userspace can notice the pte was moved */
-#ifdef CONFIG_MEM_SOFT_DIRTY
- orig_dst_pte = pte_mksoft_dirty(orig_dst_pte);
-#endif
+ if (pgtable_soft_dirty_supported())
+ orig_dst_pte = pte_mksoft_dirty(orig_dst_pte);
if (pte_dirty(orig_src_pte))
orig_dst_pte = pte_mkdirty(orig_dst_pte);
orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma);
@@ -1134,9 +1133,8 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma,
}
orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte);
-#ifdef CONFIG_MEM_SOFT_DIRTY
- orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
-#endif
+ if (pgtable_soft_dirty_supported())
+ orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte);
set_pte_at(mm, dst_addr, dst_pte, orig_src_pte);
double_pt_unlock(dst_ptl, src_ptl);
--
2.34.1
On 11.09.25 11:55, Chunyan Zhang wrote: > Some platforms can customize the PTE PMD entry soft-dirty bit making it > unavailable even if the architecture provides the resource. > > Add an API which architectures can define their specific implementations > to detect if soft-dirty bit is available on which device the kernel is > running. Thinking to myself: maybe pgtable_supports_soft_dirty() would read better Whatever you prefer. > > Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn> > --- > fs/proc/task_mmu.c | 17 ++++++++++++++++- > include/linux/pgtable.h | 12 ++++++++++++ > mm/debug_vm_pgtable.c | 10 +++++----- > mm/huge_memory.c | 13 +++++++------ > mm/internal.h | 2 +- > mm/mremap.c | 13 +++++++------ > mm/userfaultfd.c | 10 ++++------ > 7 files changed, 52 insertions(+), 25 deletions(-) > > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > index 29cca0e6d0ff..9e8083b6d4cd 100644 > --- a/fs/proc/task_mmu.c > +++ b/fs/proc/task_mmu.c > @@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) > * -Werror=unterminated-string-initialization warning > * with GCC 15 > */ > - static const char mnemonics[BITS_PER_LONG][3] = { > + static char mnemonics[BITS_PER_LONG][3] = { > /* > * In case if we meet a flag we don't know about. > */ > @@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) > [ilog2(VM_SEALED)] = "sl", > #endif > }; > +/* > + * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is > + * unavailable on which the kernel is running, even if the architecture > + * provides the resource and soft-dirty is compiled in. > + */ > +#ifdef CONFIG_MEM_SOFT_DIRTY > + if (!pgtable_soft_dirty_supported()) > + mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0; > +#endif You can now drop the ifdef. But, I wonder if could we instead just stop setting the flag. Then we don't have to worry about any VM_SOFTDIRTY checks. Something like the following diff --git a/include/linux/mm.h b/include/linux/mm.h index 892fe5dbf9de0..8b8bf63a32ef7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -783,6 +783,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) static inline void vm_flags_init(struct vm_area_struct *vma, vm_flags_t flags) { + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); ACCESS_PRIVATE(vma, __vm_flags) = flags; } @@ -801,6 +802,7 @@ static inline void vm_flags_reset(struct vm_area_struct *vma, static inline void vm_flags_reset_once(struct vm_area_struct *vma, vm_flags_t flags) { + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); vma_assert_write_locked(vma); WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); } @@ -808,6 +810,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma, static inline void vm_flags_set(struct vm_area_struct *vma, vm_flags_t flags) { + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); vma_start_write(vma); ACCESS_PRIVATE(vma, __vm_flags) |= flags; } diff --git a/mm/mmap.c b/mm/mmap.c index 5fd3b80fda1d5..40cb3fbf9a247 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1451,8 +1451,10 @@ static struct vm_area_struct *__install_special_mapping( return ERR_PTR(-ENOMEM); vma_set_range(vma, addr, addr + len, 0); - vm_flags_init(vma, (vm_flags | mm->def_flags | - VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK); + vm_flags |= mm->def_flags | VM_DONTEXPAND; + if (pgtable_soft_dirty_supported()) + vm_flags |= VM_SOFTDIRTY; + vm_flags_init(vma, vm_flags & ~VM_LOCKED_MASK); vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); vma->vm_ops = ops; diff --git a/mm/vma.c b/mm/vma.c index abe0da33c8446..16a1ed2a6199c 100644 --- a/mm/vma.c +++ b/mm/vma.c @@ -2551,7 +2551,8 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma) * then new mapped in-place (which must be aimed as * a completely new data area). */ - vm_flags_set(vma, VM_SOFTDIRTY); + if (pgtable_soft_dirty_supported()) + vm_flags_set(vma, VM_SOFTDIRTY); vma_set_page_prot(vma); } @@ -2819,7 +2820,8 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, mm->data_vm += len >> PAGE_SHIFT; if (vm_flags & VM_LOCKED) mm->locked_vm += (len >> PAGE_SHIFT); - vm_flags_set(vma, VM_SOFTDIRTY); + if (pgtable_soft_dirty_supported()) + vm_flags_set(vma, VM_SOFTDIRTY); return 0; mas_store_fail: diff --git a/mm/vma_exec.c b/mm/vma_exec.c index 922ee51747a68..c06732a5a620a 100644 --- a/mm/vma_exec.c +++ b/mm/vma_exec.c @@ -107,6 +107,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, unsigned long *top_mem_p) { + unsigned long flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; int err; struct vm_area_struct *vma = vm_area_alloc(mm); @@ -137,7 +138,9 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; - vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP); + if (pgtable_soft_dirty_supported()) + flags |= VM_SOFTDIRTY; + vm_flags_init(vma, flags); vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); err = insert_vm_struct(mm, vma); > + > size_t i; > > seq_puts(m, "VmFlags: "); > @@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, > static inline void clear_soft_dirty(struct vm_area_struct *vma, > unsigned long addr, pte_t *pte) > { > + if (!pgtable_soft_dirty_supported()) > + return; > /* > * The soft-dirty tracker uses #PF-s to catch writes > * to pages, so write-protect the pte as well. See the > @@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, > { > pmd_t old, pmd = *pmdp; > > + if (!pgtable_soft_dirty_supported()) > + return; > + > if (pmd_present(pmd)) { > /* See comment in change_huge_pmd() */ > old = pmdp_invalidate(vma, addr, pmdp); That would all be handled with the above never-set-VM_SOFTDIRTY. > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > index 4c035637eeb7..2a3578a4ae4c 100644 > --- a/include/linux/pgtable.h > +++ b/include/linux/pgtable.h > @@ -1537,6 +1537,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) > #define arch_start_context_switch(prev) do {} while (0) > #endif > > +/* > + * Some platforms can customize the PTE soft-dirty bit making it unavailable > + * even if the architecture provides the resource. > + * Adding this API allows architectures to add their own checks for the > + * devices on which the kernel is running. > + * Note: When overiding it, please make sure the CONFIG_MEM_SOFT_DIRTY > + * is part of this macro. > + */ > +#ifndef pgtable_soft_dirty_supported > +#define pgtable_soft_dirty_supported() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) > +#endif > + > #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY > #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION > static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) > diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c > index 830107b6dd08..b32ce2b0b998 100644 > --- a/mm/debug_vm_pgtable.c > +++ b/mm/debug_vm_pgtable.c > @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args) > { > pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); > > - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > + if (!pgtable_soft_dirty_supported()) > return; > > pr_debug("Validating PTE soft dirty\n"); > @@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args) > { > pte_t pte; > > - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > + if (!pgtable_soft_dirty_supported()) > return; > > pr_debug("Validating PTE swap soft dirty\n"); > @@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) > { > pmd_t pmd; > > - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > + if (!pgtable_soft_dirty_supported()) > return; > > if (!has_transparent_hugepage()) > @@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) > { > pmd_t pmd; > > - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || > - !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) > + if (!pgtable_soft_dirty_supported() || > + !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) > return; > > if (!has_transparent_hugepage()) > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index 9c38a95e9f09..218d430a2ec6 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, > > static pmd_t move_soft_dirty_pmd(pmd_t pmd) > { > -#ifdef CONFIG_MEM_SOFT_DIRTY > - if (unlikely(is_pmd_migration_entry(pmd))) > - pmd = pmd_swp_mksoft_dirty(pmd); > - else if (pmd_present(pmd)) > - pmd = pmd_mksoft_dirty(pmd); > -#endif > + if (pgtable_soft_dirty_supported()) { > + if (unlikely(is_pmd_migration_entry(pmd))) > + pmd = pmd_swp_mksoft_dirty(pmd); > + else if (pmd_present(pmd)) > + pmd = pmd_mksoft_dirty(pmd); > + } > + Wondering, should simply the arch take care of that and we can just clal pmd_swp_mksoft_dirty / pmd_mksoft_dirty? > return pmd; > } > > diff --git a/mm/internal.h b/mm/internal.h > index 45b725c3dc03..c6ca62f8ecf3 100644 > --- a/mm/internal.h > +++ b/mm/internal.h > @@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) > * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY) > * will be constantly true. > */ > - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > + if (!pgtable_soft_dirty_supported()) > return false; > That should be handled with the above never-set-VM_SOFTDIRTY. > /* > diff --git a/mm/mremap.c b/mm/mremap.c > index e618a706aff5..7beb3114dbf5 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -162,12 +162,13 @@ static pte_t move_soft_dirty_pte(pte_t pte) > * Set soft dirty bit so we can notice > * in userspace the ptes were moved. > */ > -#ifdef CONFIG_MEM_SOFT_DIRTY > - if (pte_present(pte)) > - pte = pte_mksoft_dirty(pte); > - else if (is_swap_pte(pte)) > - pte = pte_swp_mksoft_dirty(pte); > -#endif > + if (pgtable_soft_dirty_supported()) { > + if (pte_present(pte)) > + pte = pte_mksoft_dirty(pte); > + else if (is_swap_pte(pte)) > + pte = pte_swp_mksoft_dirty(pte); > + } > + > return pte; > } > -- Cheers David / dhildenb
Hi David, On Thu, 11 Sept 2025 at 21:09, David Hildenbrand <david@redhat.com> wrote: > > On 11.09.25 11:55, Chunyan Zhang wrote: > > Some platforms can customize the PTE PMD entry soft-dirty bit making it > > unavailable even if the architecture provides the resource. > > > > Add an API which architectures can define their specific implementations > > to detect if soft-dirty bit is available on which device the kernel is > > running. > > Thinking to myself: maybe pgtable_supports_soft_dirty() would read better > Whatever you prefer. I will use pgtable_supports_* in the next version. > > > > Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn> > > --- > > fs/proc/task_mmu.c | 17 ++++++++++++++++- > > include/linux/pgtable.h | 12 ++++++++++++ > > mm/debug_vm_pgtable.c | 10 +++++----- > > mm/huge_memory.c | 13 +++++++------ > > mm/internal.h | 2 +- > > mm/mremap.c | 13 +++++++------ > > mm/userfaultfd.c | 10 ++++------ > > 7 files changed, 52 insertions(+), 25 deletions(-) > > > > diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c > > index 29cca0e6d0ff..9e8083b6d4cd 100644 > > --- a/fs/proc/task_mmu.c > > +++ b/fs/proc/task_mmu.c > > @@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) > > * -Werror=unterminated-string-initialization warning > > * with GCC 15 > > */ > > - static const char mnemonics[BITS_PER_LONG][3] = { > > + static char mnemonics[BITS_PER_LONG][3] = { > > /* > > * In case if we meet a flag we don't know about. > > */ > > @@ -1129,6 +1129,16 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) > > [ilog2(VM_SEALED)] = "sl", > > #endif > > }; > > +/* > > + * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is > > + * unavailable on which the kernel is running, even if the architecture > > + * provides the resource and soft-dirty is compiled in. > > + */ > > +#ifdef CONFIG_MEM_SOFT_DIRTY > > + if (!pgtable_soft_dirty_supported()) > > + mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0; > > +#endif > > You can now drop the ifdef. Ok, you mean define VM_SOFTDIRTY 0x08000000 no matter if MEM_SOFT_DIRTY is compiled in, right? Then I need memcpy() to set mnemonics[ilog2(VM_SOFTDIRTY)] here. > > But, I wonder if could we instead just stop setting the flag. Then we don't > have to worry about any VM_SOFTDIRTY checks. > > Something like the following > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 892fe5dbf9de0..8b8bf63a32ef7 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -783,6 +783,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) > static inline void vm_flags_init(struct vm_area_struct *vma, > vm_flags_t flags) > { > + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); > ACCESS_PRIVATE(vma, __vm_flags) = flags; > } > > @@ -801,6 +802,7 @@ static inline void vm_flags_reset(struct vm_area_struct *vma, > static inline void vm_flags_reset_once(struct vm_area_struct *vma, > vm_flags_t flags) > { > + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); > vma_assert_write_locked(vma); > WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); > } > @@ -808,6 +810,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma, > static inline void vm_flags_set(struct vm_area_struct *vma, > vm_flags_t flags) > { > + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); > vma_start_write(vma); > ACCESS_PRIVATE(vma, __vm_flags) |= flags; > } > diff --git a/mm/mmap.c b/mm/mmap.c > index 5fd3b80fda1d5..40cb3fbf9a247 100644 > --- a/mm/mmap.c > +++ b/mm/mmap.c > @@ -1451,8 +1451,10 @@ static struct vm_area_struct *__install_special_mapping( > return ERR_PTR(-ENOMEM); > > vma_set_range(vma, addr, addr + len, 0); > - vm_flags_init(vma, (vm_flags | mm->def_flags | > - VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK); > + vm_flags |= mm->def_flags | VM_DONTEXPAND; Why use '|=' rather than not directly setting vm_flags which is an uninitialized variable? > + if (pgtable_soft_dirty_supported()) > + vm_flags |= VM_SOFTDIRTY; > + vm_flags_init(vma, vm_flags & ~VM_LOCKED_MASK); > vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); > > vma->vm_ops = ops; > diff --git a/mm/vma.c b/mm/vma.c > index abe0da33c8446..16a1ed2a6199c 100644 > --- a/mm/vma.c > +++ b/mm/vma.c > @@ -2551,7 +2551,8 @@ static void __mmap_complete(struct mmap_state *map, struct vm_area_struct *vma) > * then new mapped in-place (which must be aimed as > * a completely new data area). > */ > - vm_flags_set(vma, VM_SOFTDIRTY); > + if (pgtable_soft_dirty_supported()) > + vm_flags_set(vma, VM_SOFTDIRTY); > > vma_set_page_prot(vma); > } > @@ -2819,7 +2820,8 @@ int do_brk_flags(struct vma_iterator *vmi, struct vm_area_struct *vma, > mm->data_vm += len >> PAGE_SHIFT; > if (vm_flags & VM_LOCKED) > mm->locked_vm += (len >> PAGE_SHIFT); > - vm_flags_set(vma, VM_SOFTDIRTY); > + if (pgtable_soft_dirty_supported()) > + vm_flags_set(vma, VM_SOFTDIRTY); > return 0; > > mas_store_fail: > diff --git a/mm/vma_exec.c b/mm/vma_exec.c > index 922ee51747a68..c06732a5a620a 100644 > --- a/mm/vma_exec.c > +++ b/mm/vma_exec.c > @@ -107,6 +107,7 @@ int relocate_vma_down(struct vm_area_struct *vma, unsigned long shift) > int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, > unsigned long *top_mem_p) > { > + unsigned long flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; > int err; > struct vm_area_struct *vma = vm_area_alloc(mm); > > @@ -137,7 +138,9 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, > BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); > vma->vm_end = STACK_TOP_MAX; > vma->vm_start = vma->vm_end - PAGE_SIZE; > - vm_flags_init(vma, VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP); > + if (pgtable_soft_dirty_supported()) > + flags |= VM_SOFTDIRTY; > + vm_flags_init(vma, flags); > vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); > > err = insert_vm_struct(mm, vma); > > > > + > > size_t i; > > > > seq_puts(m, "VmFlags: "); > > @@ -1531,6 +1541,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, > > static inline void clear_soft_dirty(struct vm_area_struct *vma, > > unsigned long addr, pte_t *pte) > > { > > + if (!pgtable_soft_dirty_supported()) > > + return; > > /* > > * The soft-dirty tracker uses #PF-s to catch writes > > * to pages, so write-protect the pte as well. See the > > @@ -1566,6 +1578,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, > > { > > pmd_t old, pmd = *pmdp; > > > > + if (!pgtable_soft_dirty_supported()) > > + return; > > + > > if (pmd_present(pmd)) { > > /* See comment in change_huge_pmd() */ > > old = pmdp_invalidate(vma, addr, pmdp); > > That would all be handled with the above never-set-VM_SOFTDIRTY. Sorry I'm not sure I understand here, you mean no longer need #ifdef CONFIG_MEM_SOFT_DIRTY for these function definitions, right? > > > diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > > index 4c035637eeb7..2a3578a4ae4c 100644 > > --- a/include/linux/pgtable.h > > +++ b/include/linux/pgtable.h > > @@ -1537,6 +1537,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) > > #define arch_start_context_switch(prev) do {} while (0) > > #endif > > > > +/* > > + * Some platforms can customize the PTE soft-dirty bit making it unavailable > > + * even if the architecture provides the resource. > > + * Adding this API allows architectures to add their own checks for the > > + * devices on which the kernel is running. > > + * Note: When overiding it, please make sure the CONFIG_MEM_SOFT_DIRTY > > + * is part of this macro. > > + */ > > +#ifndef pgtable_soft_dirty_supported > > +#define pgtable_soft_dirty_supported() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) > > +#endif > > + > > #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY > > #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION > > static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) > > diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c > > index 830107b6dd08..b32ce2b0b998 100644 > > --- a/mm/debug_vm_pgtable.c > > +++ b/mm/debug_vm_pgtable.c > > @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args) > > { > > pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); > > > > - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > > + if (!pgtable_soft_dirty_supported()) > > return; > > > > pr_debug("Validating PTE soft dirty\n"); > > @@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args) > > { > > pte_t pte; > > > > - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > > + if (!pgtable_soft_dirty_supported()) > > return; > > > > pr_debug("Validating PTE swap soft dirty\n"); > > @@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) > > { > > pmd_t pmd; > > > > - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > > + if (!pgtable_soft_dirty_supported()) > > return; > > > > if (!has_transparent_hugepage()) > > @@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) > > { > > pmd_t pmd; > > > > - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || > > - !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) > > + if (!pgtable_soft_dirty_supported() || > > + !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) > > return; > > > > if (!has_transparent_hugepage()) > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > > index 9c38a95e9f09..218d430a2ec6 100644 > > --- a/mm/huge_memory.c > > +++ b/mm/huge_memory.c > > @@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, > > > > static pmd_t move_soft_dirty_pmd(pmd_t pmd) > > { > > -#ifdef CONFIG_MEM_SOFT_DIRTY > > - if (unlikely(is_pmd_migration_entry(pmd))) > > - pmd = pmd_swp_mksoft_dirty(pmd); > > - else if (pmd_present(pmd)) > > - pmd = pmd_mksoft_dirty(pmd); > > -#endif > > + if (pgtable_soft_dirty_supported()) { > > + if (unlikely(is_pmd_migration_entry(pmd))) > > + pmd = pmd_swp_mksoft_dirty(pmd); > > + else if (pmd_present(pmd)) > > + pmd = pmd_mksoft_dirty(pmd); > > + } > > + > > Wondering, should simply the arch take care of that and we can just clal > pmd_swp_mksoft_dirty / pmd_mksoft_dirty? Ok, I think I can do that in another patchset. > > > return pmd; > > } > > > > diff --git a/mm/internal.h b/mm/internal.h > > index 45b725c3dc03..c6ca62f8ecf3 100644 > > --- a/mm/internal.h > > +++ b/mm/internal.h > > @@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) > > * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY) > > * will be constantly true. > > */ > > - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > > + if (!pgtable_soft_dirty_supported()) > > return false; > > > > That should be handled with the above never-set-VM_SOFTDIRTY. We don't need to check if (!pgtable_soft_dirty_supported()) if I understand correctly. Thanks for the review, Chunyan > > > /* > > diff --git a/mm/mremap.c b/mm/mremap.c > > index e618a706aff5..7beb3114dbf5 100644 > > --- a/mm/mremap.c > > +++ b/mm/mremap.c > > @@ -162,12 +162,13 @@ static pte_t move_soft_dirty_pte(pte_t pte) > > * Set soft dirty bit so we can notice > > * in userspace the ptes were moved. > > */ > > -#ifdef CONFIG_MEM_SOFT_DIRTY > > - if (pte_present(pte)) > > - pte = pte_mksoft_dirty(pte); > > - else if (is_swap_pte(pte)) > > - pte = pte_swp_mksoft_dirty(pte); > > -#endif > > + if (pgtable_soft_dirty_supported()) { > > + if (pte_present(pte)) > > + pte = pte_mksoft_dirty(pte); > > + else if (is_swap_pte(pte)) > > + pte = pte_swp_mksoft_dirty(pte); > > + } > > + > > return pte; > > } > > > -- > Cheers > > David / dhildenb >
[...] >>> +/* >>> + * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is >>> + * unavailable on which the kernel is running, even if the architecture >>> + * provides the resource and soft-dirty is compiled in. >>> + */ >>> +#ifdef CONFIG_MEM_SOFT_DIRTY >>> + if (!pgtable_soft_dirty_supported()) >>> + mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0; >>> +#endif >> >> You can now drop the ifdef. > > Ok, you mean define VM_SOFTDIRTY 0x08000000 no matter if > MEM_SOFT_DIRTY is compiled in, right? > > Then I need memcpy() to set mnemonics[ilog2(VM_SOFTDIRTY)] here. The whole hunk will not be required when we make sure VM_SOFTDIRTY never gets set, correct? > >> >> But, I wonder if could we instead just stop setting the flag. Then we don't >> have to worry about any VM_SOFTDIRTY checks. >> >> Something like the following >> >> diff --git a/include/linux/mm.h b/include/linux/mm.h >> index 892fe5dbf9de0..8b8bf63a32ef7 100644 >> --- a/include/linux/mm.h >> +++ b/include/linux/mm.h >> @@ -783,6 +783,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) >> static inline void vm_flags_init(struct vm_area_struct *vma, >> vm_flags_t flags) >> { >> + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); >> ACCESS_PRIVATE(vma, __vm_flags) = flags; >> } >> >> @@ -801,6 +802,7 @@ static inline void vm_flags_reset(struct vm_area_struct *vma, >> static inline void vm_flags_reset_once(struct vm_area_struct *vma, >> vm_flags_t flags) >> { >> + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); >> vma_assert_write_locked(vma); >> WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); >> } >> @@ -808,6 +810,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma, >> static inline void vm_flags_set(struct vm_area_struct *vma, >> vm_flags_t flags) >> { >> + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); >> vma_start_write(vma); >> ACCESS_PRIVATE(vma, __vm_flags) |= flags; >> } >> diff --git a/mm/mmap.c b/mm/mmap.c >> index 5fd3b80fda1d5..40cb3fbf9a247 100644 >> --- a/mm/mmap.c >> +++ b/mm/mmap.c >> @@ -1451,8 +1451,10 @@ static struct vm_area_struct *__install_special_mapping( >> return ERR_PTR(-ENOMEM); >> >> vma_set_range(vma, addr, addr + len, 0); >> - vm_flags_init(vma, (vm_flags | mm->def_flags | >> - VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK); >> + vm_flags |= mm->def_flags | VM_DONTEXPAND; > > Why use '|=' rather than not directly setting vm_flags which is an > uninitialized variable? vm_flags is passed in by the caller? But just to clarify: this code was just a quick hack, adjust it as you need. [...] >>> >>> + if (!pgtable_soft_dirty_supported()) >>> + return; >>> + >>> if (pmd_present(pmd)) { >>> /* See comment in change_huge_pmd() */ >>> old = pmdp_invalidate(vma, addr, pmdp); >> >> That would all be handled with the above never-set-VM_SOFTDIRTY. I meant that there is no need to add the pgtable_soft_dirty_supported() check. > > Sorry I'm not sure I understand here, you mean no longer need #ifdef > CONFIG_MEM_SOFT_DIRTY for these function definitions, right? Likely we could drop them. VM_SOFTDIRTY will never be set so the code will not be invoked. And for architectures where VM_SOFTDIRTY is never even possible (!CONFIG_MEM_SOFT_DIRTY) we keep it as 0. That way, the compiler can even optimize out all of that code because "vma->vm_flags & VM_SOFTDIRTY" -> "vma->vm_flags & 0" will never be true. > >> >>> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h >>> index 4c035637eeb7..2a3578a4ae4c 100644 >>> --- a/include/linux/pgtable.h >>> +++ b/include/linux/pgtable.h >>> @@ -1537,6 +1537,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) >>> #define arch_start_context_switch(prev) do {} while (0) >>> #endif >>> >>> +/* >>> + * Some platforms can customize the PTE soft-dirty bit making it unavailable >>> + * even if the architecture provides the resource. >>> + * Adding this API allows architectures to add their own checks for the >>> + * devices on which the kernel is running. >>> + * Note: When overiding it, please make sure the CONFIG_MEM_SOFT_DIRTY >>> + * is part of this macro. >>> + */ >>> +#ifndef pgtable_soft_dirty_supported >>> +#define pgtable_soft_dirty_supported() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) >>> +#endif >>> + >>> #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY >>> #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION >>> static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) >>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c >>> index 830107b6dd08..b32ce2b0b998 100644 >>> --- a/mm/debug_vm_pgtable.c >>> +++ b/mm/debug_vm_pgtable.c >>> @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args) >>> { >>> pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); >>> >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) >>> + if (!pgtable_soft_dirty_supported()) >>> return; >>> >>> pr_debug("Validating PTE soft dirty\n"); >>> @@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args) >>> { >>> pte_t pte; >>> >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) >>> + if (!pgtable_soft_dirty_supported()) >>> return; >>> >>> pr_debug("Validating PTE swap soft dirty\n"); >>> @@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) >>> { >>> pmd_t pmd; >>> >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) >>> + if (!pgtable_soft_dirty_supported()) >>> return; >>> >>> if (!has_transparent_hugepage()) >>> @@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) >>> { >>> pmd_t pmd; >>> >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || >>> - !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) >>> + if (!pgtable_soft_dirty_supported() || >>> + !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) >>> return; >>> >>> if (!has_transparent_hugepage()) >>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c >>> index 9c38a95e9f09..218d430a2ec6 100644 >>> --- a/mm/huge_memory.c >>> +++ b/mm/huge_memory.c >>> @@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, >>> >>> static pmd_t move_soft_dirty_pmd(pmd_t pmd) >>> { >>> -#ifdef CONFIG_MEM_SOFT_DIRTY >>> - if (unlikely(is_pmd_migration_entry(pmd))) >>> - pmd = pmd_swp_mksoft_dirty(pmd); >>> - else if (pmd_present(pmd)) >>> - pmd = pmd_mksoft_dirty(pmd); >>> -#endif >>> + if (pgtable_soft_dirty_supported()) { >>> + if (unlikely(is_pmd_migration_entry(pmd))) >>> + pmd = pmd_swp_mksoft_dirty(pmd); >>> + else if (pmd_present(pmd)) >>> + pmd = pmd_mksoft_dirty(pmd); >>> + } >>> + >> >> Wondering, should simply the arch take care of that and we can just clal >> pmd_swp_mksoft_dirty / pmd_mksoft_dirty? > I think we have that already in include/linux/pgtable.h: We have stubs that just don't do anything. For riscv support you would handle runtime-enablement in these helpers. > >> >>> return pmd; >>> } >>> >>> diff --git a/mm/internal.h b/mm/internal.h >>> index 45b725c3dc03..c6ca62f8ecf3 100644 >>> --- a/mm/internal.h >>> +++ b/mm/internal.h >>> @@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) >>> * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY) >>> * will be constantly true. >>> */ >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) >>> + if (!pgtable_soft_dirty_supported()) >>> return false; >>> >> >> That should be handled with the above never-set-VM_SOFTDIRTY. > > We don't need to check if (!pgtable_soft_dirty_supported()) if I > understand correctly. Hm, let me think about that. No, I think this has to stay as the comment says, so this case here is special. -- Cheers David / dhildenb
On Fri, 12 Sept 2025 at 16:41, David Hildenbrand <david@redhat.com> wrote: > > [...] > > >>> +/* > >>> + * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is > >>> + * unavailable on which the kernel is running, even if the architecture > >>> + * provides the resource and soft-dirty is compiled in. > >>> + */ > >>> +#ifdef CONFIG_MEM_SOFT_DIRTY > >>> + if (!pgtable_soft_dirty_supported()) > >>> + mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0; > >>> +#endif > >> > >> You can now drop the ifdef. > > > > Ok, you mean define VM_SOFTDIRTY 0x08000000 no matter if > > MEM_SOFT_DIRTY is compiled in, right? > > > > Then I need memcpy() to set mnemonics[ilog2(VM_SOFTDIRTY)] here. > > The whole hunk will not be required when we make sure VM_SOFTDIRTY never > gets set, correct? Oh no, this hunk code does not set vmflag. The mnemonics[ilog2(VM_SOFTDIRTY)] is for show_smap_vma_flags(), something like below: # cat /proc/1/smaps 5555605c7000-555560680000 r-xp 00000000 fe:00 19 /bin/busybox ... VmFlags: rd ex mr mw me sd 'sd' is for soft-dirty I think this is still needed, right? > > > > >> > >> But, I wonder if could we instead just stop setting the flag. Then we don't > >> have to worry about any VM_SOFTDIRTY checks. > >> > >> Something like the following > >> > >> diff --git a/include/linux/mm.h b/include/linux/mm.h > >> index 892fe5dbf9de0..8b8bf63a32ef7 100644 > >> --- a/include/linux/mm.h > >> +++ b/include/linux/mm.h > >> @@ -783,6 +783,7 @@ static inline void vma_init(struct vm_area_struct *vma, struct mm_struct *mm) > >> static inline void vm_flags_init(struct vm_area_struct *vma, > >> vm_flags_t flags) > >> { > >> + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); > >> ACCESS_PRIVATE(vma, __vm_flags) = flags; > >> } > >> > >> @@ -801,6 +802,7 @@ static inline void vm_flags_reset(struct vm_area_struct *vma, > >> static inline void vm_flags_reset_once(struct vm_area_struct *vma, > >> vm_flags_t flags) > >> { > >> + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); > >> vma_assert_write_locked(vma); > >> WRITE_ONCE(ACCESS_PRIVATE(vma, __vm_flags), flags); > >> } > >> @@ -808,6 +810,7 @@ static inline void vm_flags_reset_once(struct vm_area_struct *vma, > >> static inline void vm_flags_set(struct vm_area_struct *vma, > >> vm_flags_t flags) > >> { > >> + VM_WARN_ON_ONCE(!pgtable_soft_dirty_supported() && (flags & VM_SOFTDIRTY)); > >> vma_start_write(vma); > >> ACCESS_PRIVATE(vma, __vm_flags) |= flags; > >> } > >> diff --git a/mm/mmap.c b/mm/mmap.c > >> index 5fd3b80fda1d5..40cb3fbf9a247 100644 > >> --- a/mm/mmap.c > >> +++ b/mm/mmap.c > >> @@ -1451,8 +1451,10 @@ static struct vm_area_struct *__install_special_mapping( > >> return ERR_PTR(-ENOMEM); > >> > >> vma_set_range(vma, addr, addr + len, 0); > >> - vm_flags_init(vma, (vm_flags | mm->def_flags | > >> - VM_DONTEXPAND | VM_SOFTDIRTY) & ~VM_LOCKED_MASK); > >> + vm_flags |= mm->def_flags | VM_DONTEXPAND; > > > > Why use '|=' rather than not directly setting vm_flags which is an > > uninitialized variable? > > vm_flags is passed in by the caller? > Then the original code seems wrong. > But just to clarify: this code was just a quick hack, adjust it as you need. Got it. > > [...] > > >>> > >>> + if (!pgtable_soft_dirty_supported()) > >>> + return; > >>> + > >>> if (pmd_present(pmd)) { > >>> /* See comment in change_huge_pmd() */ > >>> old = pmdp_invalidate(vma, addr, pmdp); > >> > >> That would all be handled with the above never-set-VM_SOFTDIRTY. > > I meant that there is no need to add the pgtable_soft_dirty_supported() > check. Ok I will take a look. > > > > > Sorry I'm not sure I understand here, you mean no longer need #ifdef > > CONFIG_MEM_SOFT_DIRTY for these function definitions, right? > > Likely we could drop them. VM_SOFTDIRTY will never be set so the code > will not be invoked. The relationship of VM_SOFTDIRTY and clear_soft_dirty_pmd() is not very direct from the first sight, let me take a further look. > > And for architectures where VM_SOFTDIRTY is never even possible > (!CONFIG_MEM_SOFT_DIRTY) we keep it as 0. Ok. > > That way, the compiler can even optimize out all of that code because > > "vma->vm_flags & VM_SOFTDIRTY" -> "vma->vm_flags & 0" > > will never be true. > > > > >> > >>> diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h > >>> index 4c035637eeb7..2a3578a4ae4c 100644 > >>> --- a/include/linux/pgtable.h > >>> +++ b/include/linux/pgtable.h > >>> @@ -1537,6 +1537,18 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) > >>> #define arch_start_context_switch(prev) do {} while (0) > >>> #endif > >>> > >>> +/* > >>> + * Some platforms can customize the PTE soft-dirty bit making it unavailable > >>> + * even if the architecture provides the resource. > >>> + * Adding this API allows architectures to add their own checks for the > >>> + * devices on which the kernel is running. > >>> + * Note: When overiding it, please make sure the CONFIG_MEM_SOFT_DIRTY > >>> + * is part of this macro. > >>> + */ > >>> +#ifndef pgtable_soft_dirty_supported > >>> +#define pgtable_soft_dirty_supported() IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) > >>> +#endif > >>> + > >>> #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY > >>> #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION > >>> static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) > >>> diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c > >>> index 830107b6dd08..b32ce2b0b998 100644 > >>> --- a/mm/debug_vm_pgtable.c > >>> +++ b/mm/debug_vm_pgtable.c > >>> @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args) > >>> { > >>> pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); > >>> > >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > >>> + if (!pgtable_soft_dirty_supported()) > >>> return; > >>> > >>> pr_debug("Validating PTE soft dirty\n"); > >>> @@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args) > >>> { > >>> pte_t pte; > >>> > >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > >>> + if (!pgtable_soft_dirty_supported()) > >>> return; > >>> > >>> pr_debug("Validating PTE swap soft dirty\n"); > >>> @@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) > >>> { > >>> pmd_t pmd; > >>> > >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > >>> + if (!pgtable_soft_dirty_supported()) > >>> return; > >>> > >>> if (!has_transparent_hugepage()) > >>> @@ -734,8 +734,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) > >>> { > >>> pmd_t pmd; > >>> > >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || > >>> - !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) > >>> + if (!pgtable_soft_dirty_supported() || > >>> + !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) > >>> return; > >>> > >>> if (!has_transparent_hugepage()) > >>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c > >>> index 9c38a95e9f09..218d430a2ec6 100644 > >>> --- a/mm/huge_memory.c > >>> +++ b/mm/huge_memory.c > >>> @@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, > >>> > >>> static pmd_t move_soft_dirty_pmd(pmd_t pmd) > >>> { > >>> -#ifdef CONFIG_MEM_SOFT_DIRTY > >>> - if (unlikely(is_pmd_migration_entry(pmd))) > >>> - pmd = pmd_swp_mksoft_dirty(pmd); > >>> - else if (pmd_present(pmd)) > >>> - pmd = pmd_mksoft_dirty(pmd); > >>> -#endif > >>> + if (pgtable_soft_dirty_supported()) { > >>> + if (unlikely(is_pmd_migration_entry(pmd))) > >>> + pmd = pmd_swp_mksoft_dirty(pmd); > >>> + else if (pmd_present(pmd)) > >>> + pmd = pmd_mksoft_dirty(pmd); > >>> + } > >>> + > >> > >> Wondering, should simply the arch take care of that and we can just clal > >> pmd_swp_mksoft_dirty / pmd_mksoft_dirty? > > > > I think we have that already in include/linux/pgtable.h: > > We have stubs that just don't do anything. > > For riscv support you would handle runtime-enablement in these helpers. > > > > >> > >>> return pmd; > >>> } > >>> > >>> diff --git a/mm/internal.h b/mm/internal.h > >>> index 45b725c3dc03..c6ca62f8ecf3 100644 > >>> --- a/mm/internal.h > >>> +++ b/mm/internal.h > >>> @@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) > >>> * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY) > >>> * will be constantly true. > >>> */ > >>> - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) > >>> + if (!pgtable_soft_dirty_supported()) > >>> return false; > >>> > >> > >> That should be handled with the above never-set-VM_SOFTDIRTY. > > > > We don't need to check if (!pgtable_soft_dirty_supported()) if I > > understand correctly. > Hm, let me think about that. No, I think this has to stay as the comment > says, so this case here is special. I will cook a new version and then we can discuss further based on the new patch. Thanks for your review, Chunyan
On 12.09.25 11:21, Chunyan Zhang wrote: > On Fri, 12 Sept 2025 at 16:41, David Hildenbrand <david@redhat.com> wrote: >> >> [...] >> >>>>> +/* >>>>> + * We should remove the VM_SOFTDIRTY flag if the soft-dirty bit is >>>>> + * unavailable on which the kernel is running, even if the architecture >>>>> + * provides the resource and soft-dirty is compiled in. >>>>> + */ >>>>> +#ifdef CONFIG_MEM_SOFT_DIRTY >>>>> + if (!pgtable_soft_dirty_supported()) >>>>> + mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0; >>>>> +#endif >>>> >>>> You can now drop the ifdef. >>> >>> Ok, you mean define VM_SOFTDIRTY 0x08000000 no matter if >>> MEM_SOFT_DIRTY is compiled in, right? >>> >>> Then I need memcpy() to set mnemonics[ilog2(VM_SOFTDIRTY)] here. >> >> The whole hunk will not be required when we make sure VM_SOFTDIRTY never >> gets set, correct? > > Oh no, this hunk code does not set vmflag. > The mnemonics[ilog2(VM_SOFTDIRTY)] is for show_smap_vma_flags(), > something like below: > # cat /proc/1/smaps > 5555605c7000-555560680000 r-xp 00000000 fe:00 19 > /bin/busybox > ... > VmFlags: rd ex mr mw me sd > > 'sd' is for soft-dirty > > I think this is still needed, right? If nobody sets VM_SOFTDIRTY in vma->vm_flags, then we will never print it. So you can just leave the "#ifdef CONFIG_MEM_SOFT_DIRTY" as is to handle the VM_SOFTDIRTY=0 case. So you should not have to change anything in show_smap_vma_flags(). [...] >>>> That should be handled with the above never-set-VM_SOFTDIRTY. >>> >>> We don't need to check if (!pgtable_soft_dirty_supported()) if I >>> understand correctly. >> Hm, let me think about that. No, I think this has to stay as the comment >> says, so this case here is special. > > I will cook a new version and then we can discuss further based on the > new patch. Sounds good! -- Cheers David / dhildenb
© 2016 - 2025 Red Hat, Inc.