This patch completely moves the old userfaultfd core to use the new
vm_uffd_ops API. After this change, existing file systems will start to
use the new API for userfault operations.
When at it, moving vma_can_userfault() into mm/userfaultfd.c instead,
because it's getting too big. It's only used in slow paths so it shouldn't
be an issue.
This will also remove quite some hard-coded checks for either shmem or
hugetlbfs. Now all the old checks should still work but with vm_uffd_ops.
Note that anonymous memory will still need to be processed separately
because it doesn't have vm_ops at all.
Signed-off-by: Peter Xu <peterx@redhat.com>
---
include/linux/shmem_fs.h | 14 -----
include/linux/userfaultfd_k.h | 46 ++++----------
mm/shmem.c | 2 +-
mm/userfaultfd.c | 115 +++++++++++++++++++++++++---------
4 files changed, 101 insertions(+), 76 deletions(-)
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index 6d0f9c599ff7..2f5b7b295cf6 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -195,20 +195,6 @@ static inline pgoff_t shmem_fallocend(struct inode *inode, pgoff_t eof)
extern bool shmem_charge(struct inode *inode, long pages);
extern void shmem_uncharge(struct inode *inode, long pages);
-#ifdef CONFIG_USERFAULTFD
-#ifdef CONFIG_SHMEM
-extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
- struct vm_area_struct *dst_vma,
- unsigned long dst_addr,
- unsigned long src_addr,
- uffd_flags_t flags,
- struct folio **foliop);
-#else /* !CONFIG_SHMEM */
-#define shmem_mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, \
- src_addr, flags, foliop) ({ BUG(); 0; })
-#endif /* CONFIG_SHMEM */
-#endif /* CONFIG_USERFAULTFD */
-
/*
* Used space is stored as unsigned 64-bit value in bytes but
* quota core supports only signed 64-bit values so use that
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index e79c724b3b95..4e56ad423a4a 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -85,9 +85,14 @@ extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
#define MFILL_ATOMIC_FLAG(nr) ((__force uffd_flags_t) MFILL_ATOMIC_BIT(nr))
#define MFILL_ATOMIC_MODE_MASK ((__force uffd_flags_t) (MFILL_ATOMIC_BIT(0) - 1))
+static inline enum mfill_atomic_mode uffd_flags_get_mode(uffd_flags_t flags)
+{
+ return (enum mfill_atomic_mode)(flags & MFILL_ATOMIC_MODE_MASK);
+}
+
static inline bool uffd_flags_mode_is(uffd_flags_t flags, enum mfill_atomic_mode expected)
{
- return (flags & MFILL_ATOMIC_MODE_MASK) == ((__force uffd_flags_t) expected);
+ return uffd_flags_get_mode(flags) == expected;
}
static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_atomic_mode mode)
@@ -196,41 +201,16 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
return vma->vm_flags & __VM_UFFD_FLAGS;
}
-static inline bool vma_can_userfault(struct vm_area_struct *vma,
- unsigned long vm_flags,
- bool wp_async)
+static inline const vm_uffd_ops *vma_get_uffd_ops(struct vm_area_struct *vma)
{
- vm_flags &= __VM_UFFD_FLAGS;
-
- if (vma->vm_flags & VM_DROPPABLE)
- return false;
-
- if ((vm_flags & VM_UFFD_MINOR) &&
- (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
- return false;
-
- /*
- * If wp async enabled, and WP is the only mode enabled, allow any
- * memory type.
- */
- if (wp_async && (vm_flags == VM_UFFD_WP))
- return true;
-
-#ifndef CONFIG_PTE_MARKER_UFFD_WP
- /*
- * If user requested uffd-wp but not enabled pte markers for
- * uffd-wp, then shmem & hugetlbfs are not supported but only
- * anonymous.
- */
- if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma))
- return false;
-#endif
-
- /* By default, allow any of anon|shmem|hugetlb */
- return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
- vma_is_shmem(vma);
+ if (vma->vm_ops && vma->vm_ops->userfaultfd_ops)
+ return vma->vm_ops->userfaultfd_ops;
+ return NULL;
}
+bool vma_can_userfault(struct vm_area_struct *vma,
+ unsigned long vm_flags, bool wp_async);
+
static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma)
{
struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx;
diff --git a/mm/shmem.c b/mm/shmem.c
index bd0a29000318..4d71fc7be358 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3158,7 +3158,7 @@ static int shmem_uffd_get_folio(struct inode *inode, pgoff_t pgoff,
return shmem_get_folio(inode, pgoff, 0, folio, SGP_NOALLOC);
}
-int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
+static int shmem_mfill_atomic_pte(pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index 879505c6996f..61783ff2d335 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -14,12 +14,48 @@
#include <linux/userfaultfd_k.h>
#include <linux/mmu_notifier.h>
#include <linux/hugetlb.h>
-#include <linux/shmem_fs.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>
#include "internal.h"
#include "swap.h"
+bool vma_can_userfault(struct vm_area_struct *vma,
+ unsigned long vm_flags, bool wp_async)
+{
+ unsigned long supported;
+
+ if (vma->vm_flags & VM_DROPPABLE)
+ return false;
+
+ vm_flags &= __VM_UFFD_FLAGS;
+
+#ifndef CONFIG_PTE_MARKER_UFFD_WP
+ /*
+ * If user requested uffd-wp but not enabled pte markers for
+ * uffd-wp, then any file system (like shmem or hugetlbfs) are not
+ * supported but only anonymous.
+ */
+ if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma))
+ return false;
+#endif
+ /*
+ * If wp async enabled, and WP is the only mode enabled, allow any
+ * memory type.
+ */
+ if (wp_async && (vm_flags == VM_UFFD_WP))
+ return true;
+
+ if (vma_is_anonymous(vma))
+ /* Anonymous has no page cache, MINOR not supported */
+ supported = VM_UFFD_MISSING | VM_UFFD_WP;
+ else if (vma_get_uffd_ops(vma))
+ supported = vma_get_uffd_ops(vma)->uffd_features;
+ else
+ return false;
+
+ return !(vm_flags & (~supported));
+}
+
static __always_inline
bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end)
{
@@ -384,11 +420,15 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd,
{
struct inode *inode = file_inode(dst_vma->vm_file);
pgoff_t pgoff = linear_page_index(dst_vma, dst_addr);
+ const vm_uffd_ops *uffd_ops = vma_get_uffd_ops(dst_vma);
struct folio *folio;
struct page *page;
int ret;
- ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC);
+ if (WARN_ON_ONCE(!uffd_ops || !uffd_ops->uffd_get_folio))
+ return -EINVAL;
+
+ ret = uffd_ops->uffd_get_folio(inode, pgoff, &folio);
/* Our caller expects us to return -EFAULT if we failed to find folio */
if (ret == -ENOENT)
ret = -EFAULT;
@@ -504,18 +544,6 @@ static __always_inline ssize_t mfill_atomic_hugetlb(
u32 hash;
struct address_space *mapping;
- /*
- * There is no default zero huge page for all huge page sizes as
- * supported by hugetlb. A PMD_SIZE huge pages may exist as used
- * by THP. Since we can not reliably insert a zero page, this
- * feature is not supported.
- */
- if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) {
- up_read(&ctx->map_changing_lock);
- uffd_mfill_unlock(dst_vma);
- return -EINVAL;
- }
-
src_addr = src_start;
dst_addr = dst_start;
copied = 0;
@@ -686,14 +714,55 @@ static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd,
err = mfill_atomic_pte_zeropage(dst_pmd,
dst_vma, dst_addr);
} else {
- err = shmem_mfill_atomic_pte(dst_pmd, dst_vma,
- dst_addr, src_addr,
- flags, foliop);
+ const vm_uffd_ops *uffd_ops = vma_get_uffd_ops(dst_vma);
+
+ if (WARN_ON_ONCE(!uffd_ops || !uffd_ops->uffd_copy)) {
+ err = -EINVAL;
+ } else {
+ err = uffd_ops->uffd_copy(dst_pmd, dst_vma,
+ dst_addr, src_addr,
+ flags, foliop);
+ }
}
return err;
}
+static inline bool
+vma_uffd_ops_supported(struct vm_area_struct *vma, uffd_flags_t flags)
+{
+ enum mfill_atomic_mode mode = uffd_flags_get_mode(flags);
+ const vm_uffd_ops *uffd_ops;
+ unsigned long uffd_ioctls;
+
+ if ((flags & MFILL_ATOMIC_WP) && !(vma->vm_flags & VM_UFFD_WP))
+ return false;
+
+ /* Anonymous supports everything except CONTINUE */
+ if (vma_is_anonymous(vma))
+ return mode != MFILL_ATOMIC_CONTINUE;
+
+ uffd_ops = vma_get_uffd_ops(vma);
+ if (!uffd_ops)
+ return false;
+
+ uffd_ioctls = uffd_ops->uffd_ioctls;
+ switch (mode) {
+ case MFILL_ATOMIC_COPY:
+ return uffd_ioctls & BIT(_UFFDIO_COPY);
+ case MFILL_ATOMIC_ZEROPAGE:
+ return uffd_ioctls & BIT(_UFFDIO_ZEROPAGE);
+ case MFILL_ATOMIC_CONTINUE:
+ if (!(vma->vm_flags & VM_SHARED))
+ return false;
+ return uffd_ioctls & BIT(_UFFDIO_CONTINUE);
+ case MFILL_ATOMIC_POISON:
+ return uffd_ioctls & BIT(_UFFDIO_POISON);
+ default:
+ return false;
+ }
+}
+
static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
unsigned long dst_start,
unsigned long src_start,
@@ -752,11 +821,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
dst_vma->vm_flags & VM_SHARED))
goto out_unlock;
- /*
- * validate 'mode' now that we know the dst_vma: don't allow
- * a wrprotect copy if the userfaultfd didn't register as WP.
- */
- if ((flags & MFILL_ATOMIC_WP) && !(dst_vma->vm_flags & VM_UFFD_WP))
+ if (!vma_uffd_ops_supported(dst_vma, flags))
goto out_unlock;
/*
@@ -766,12 +831,6 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx,
return mfill_atomic_hugetlb(ctx, dst_vma, dst_start,
src_start, len, flags);
- if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
- goto out_unlock;
- if (!vma_is_shmem(dst_vma) &&
- uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE))
- goto out_unlock;
-
while (src_addr < src_start + len) {
pmd_t dst_pmdval;
--
2.49.0
On Fri, Jun 20, 2025 at 12:04 PM Peter Xu <peterx@redhat.com> wrote: > > This patch completely moves the old userfaultfd core to use the new > vm_uffd_ops API. After this change, existing file systems will start to > use the new API for userfault operations. > > When at it, moving vma_can_userfault() into mm/userfaultfd.c instead, > because it's getting too big. It's only used in slow paths so it shouldn't > be an issue. > > This will also remove quite some hard-coded checks for either shmem or > hugetlbfs. Now all the old checks should still work but with vm_uffd_ops. > > Note that anonymous memory will still need to be processed separately > because it doesn't have vm_ops at all. > > Signed-off-by: Peter Xu <peterx@redhat.com> > --- > include/linux/shmem_fs.h | 14 ----- > include/linux/userfaultfd_k.h | 46 ++++---------- > mm/shmem.c | 2 +- > mm/userfaultfd.c | 115 +++++++++++++++++++++++++--------- > 4 files changed, 101 insertions(+), 76 deletions(-) > > diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h > index 6d0f9c599ff7..2f5b7b295cf6 100644 > --- a/include/linux/shmem_fs.h > +++ b/include/linux/shmem_fs.h > @@ -195,20 +195,6 @@ static inline pgoff_t shmem_fallocend(struct inode *inode, pgoff_t eof) > extern bool shmem_charge(struct inode *inode, long pages); > extern void shmem_uncharge(struct inode *inode, long pages); > > -#ifdef CONFIG_USERFAULTFD > -#ifdef CONFIG_SHMEM > -extern int shmem_mfill_atomic_pte(pmd_t *dst_pmd, > - struct vm_area_struct *dst_vma, > - unsigned long dst_addr, > - unsigned long src_addr, > - uffd_flags_t flags, > - struct folio **foliop); > -#else /* !CONFIG_SHMEM */ > -#define shmem_mfill_atomic_pte(dst_pmd, dst_vma, dst_addr, \ > - src_addr, flags, foliop) ({ BUG(); 0; }) > -#endif /* CONFIG_SHMEM */ > -#endif /* CONFIG_USERFAULTFD */ > - > /* > * Used space is stored as unsigned 64-bit value in bytes but > * quota core supports only signed 64-bit values so use that > diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h > index e79c724b3b95..4e56ad423a4a 100644 > --- a/include/linux/userfaultfd_k.h > +++ b/include/linux/userfaultfd_k.h > @@ -85,9 +85,14 @@ extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); > #define MFILL_ATOMIC_FLAG(nr) ((__force uffd_flags_t) MFILL_ATOMIC_BIT(nr)) > #define MFILL_ATOMIC_MODE_MASK ((__force uffd_flags_t) (MFILL_ATOMIC_BIT(0) - 1)) > > +static inline enum mfill_atomic_mode uffd_flags_get_mode(uffd_flags_t flags) > +{ > + return (enum mfill_atomic_mode)(flags & MFILL_ATOMIC_MODE_MASK); > +} > + > static inline bool uffd_flags_mode_is(uffd_flags_t flags, enum mfill_atomic_mode expected) > { > - return (flags & MFILL_ATOMIC_MODE_MASK) == ((__force uffd_flags_t) expected); > + return uffd_flags_get_mode(flags) == expected; > } > > static inline uffd_flags_t uffd_flags_set_mode(uffd_flags_t flags, enum mfill_atomic_mode mode) > @@ -196,41 +201,16 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma) > return vma->vm_flags & __VM_UFFD_FLAGS; > } > > -static inline bool vma_can_userfault(struct vm_area_struct *vma, > - unsigned long vm_flags, > - bool wp_async) > +static inline const vm_uffd_ops *vma_get_uffd_ops(struct vm_area_struct *vma) > { > - vm_flags &= __VM_UFFD_FLAGS; > - > - if (vma->vm_flags & VM_DROPPABLE) > - return false; > - > - if ((vm_flags & VM_UFFD_MINOR) && > - (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) > - return false; > - > - /* > - * If wp async enabled, and WP is the only mode enabled, allow any > - * memory type. > - */ > - if (wp_async && (vm_flags == VM_UFFD_WP)) > - return true; > - > -#ifndef CONFIG_PTE_MARKER_UFFD_WP > - /* > - * If user requested uffd-wp but not enabled pte markers for > - * uffd-wp, then shmem & hugetlbfs are not supported but only > - * anonymous. > - */ > - if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) > - return false; > -#endif Hi Peter, Thanks for this cleanup! It looks like the above two checks, the wp-async one and the PTE marker check, have been reordered in this patch. Does this result in a functional difference? The rest of this series looks fine to me. :) > - > - /* By default, allow any of anon|shmem|hugetlb */ > - return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || > - vma_is_shmem(vma); > + if (vma->vm_ops && vma->vm_ops->userfaultfd_ops) > + return vma->vm_ops->userfaultfd_ops; > + return NULL; > } > > +bool vma_can_userfault(struct vm_area_struct *vma, > + unsigned long vm_flags, bool wp_async); > + > static inline bool vma_has_uffd_without_event_remap(struct vm_area_struct *vma) > { > struct userfaultfd_ctx *uffd_ctx = vma->vm_userfaultfd_ctx.ctx; > diff --git a/mm/shmem.c b/mm/shmem.c > index bd0a29000318..4d71fc7be358 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -3158,7 +3158,7 @@ static int shmem_uffd_get_folio(struct inode *inode, pgoff_t pgoff, > return shmem_get_folio(inode, pgoff, 0, folio, SGP_NOALLOC); > } > > -int shmem_mfill_atomic_pte(pmd_t *dst_pmd, > +static int shmem_mfill_atomic_pte(pmd_t *dst_pmd, > struct vm_area_struct *dst_vma, > unsigned long dst_addr, > unsigned long src_addr, > diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c > index 879505c6996f..61783ff2d335 100644 > --- a/mm/userfaultfd.c > +++ b/mm/userfaultfd.c > @@ -14,12 +14,48 @@ > #include <linux/userfaultfd_k.h> > #include <linux/mmu_notifier.h> > #include <linux/hugetlb.h> > -#include <linux/shmem_fs.h> > #include <asm/tlbflush.h> > #include <asm/tlb.h> > #include "internal.h" > #include "swap.h" > > +bool vma_can_userfault(struct vm_area_struct *vma, > + unsigned long vm_flags, bool wp_async) > +{ > + unsigned long supported; > + > + if (vma->vm_flags & VM_DROPPABLE) > + return false; > + > + vm_flags &= __VM_UFFD_FLAGS; > + > +#ifndef CONFIG_PTE_MARKER_UFFD_WP > + /* > + * If user requested uffd-wp but not enabled pte markers for > + * uffd-wp, then any file system (like shmem or hugetlbfs) are not > + * supported but only anonymous. > + */ > + if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) > + return false; > +#endif > + /* > + * If wp async enabled, and WP is the only mode enabled, allow any > + * memory type. > + */ > + if (wp_async && (vm_flags == VM_UFFD_WP)) > + return true; > + > + if (vma_is_anonymous(vma)) > + /* Anonymous has no page cache, MINOR not supported */ > + supported = VM_UFFD_MISSING | VM_UFFD_WP; > + else if (vma_get_uffd_ops(vma)) > + supported = vma_get_uffd_ops(vma)->uffd_features; > + else > + return false; > + > + return !(vm_flags & (~supported)); > +} > + > static __always_inline > bool validate_dst_vma(struct vm_area_struct *dst_vma, unsigned long dst_end) > { > @@ -384,11 +420,15 @@ static int mfill_atomic_pte_continue(pmd_t *dst_pmd, > { > struct inode *inode = file_inode(dst_vma->vm_file); > pgoff_t pgoff = linear_page_index(dst_vma, dst_addr); > + const vm_uffd_ops *uffd_ops = vma_get_uffd_ops(dst_vma); > struct folio *folio; > struct page *page; > int ret; > > - ret = shmem_get_folio(inode, pgoff, 0, &folio, SGP_NOALLOC); > + if (WARN_ON_ONCE(!uffd_ops || !uffd_ops->uffd_get_folio)) > + return -EINVAL; > + > + ret = uffd_ops->uffd_get_folio(inode, pgoff, &folio); > /* Our caller expects us to return -EFAULT if we failed to find folio */ > if (ret == -ENOENT) > ret = -EFAULT; > @@ -504,18 +544,6 @@ static __always_inline ssize_t mfill_atomic_hugetlb( > u32 hash; > struct address_space *mapping; > > - /* > - * There is no default zero huge page for all huge page sizes as > - * supported by hugetlb. A PMD_SIZE huge pages may exist as used > - * by THP. Since we can not reliably insert a zero page, this > - * feature is not supported. > - */ > - if (uffd_flags_mode_is(flags, MFILL_ATOMIC_ZEROPAGE)) { > - up_read(&ctx->map_changing_lock); > - uffd_mfill_unlock(dst_vma); > - return -EINVAL; > - } > - > src_addr = src_start; > dst_addr = dst_start; > copied = 0; > @@ -686,14 +714,55 @@ static __always_inline ssize_t mfill_atomic_pte(pmd_t *dst_pmd, > err = mfill_atomic_pte_zeropage(dst_pmd, > dst_vma, dst_addr); > } else { > - err = shmem_mfill_atomic_pte(dst_pmd, dst_vma, > - dst_addr, src_addr, > - flags, foliop); > + const vm_uffd_ops *uffd_ops = vma_get_uffd_ops(dst_vma); > + > + if (WARN_ON_ONCE(!uffd_ops || !uffd_ops->uffd_copy)) { > + err = -EINVAL; > + } else { > + err = uffd_ops->uffd_copy(dst_pmd, dst_vma, > + dst_addr, src_addr, > + flags, foliop); > + } > } > > return err; > } > > +static inline bool > +vma_uffd_ops_supported(struct vm_area_struct *vma, uffd_flags_t flags) > +{ > + enum mfill_atomic_mode mode = uffd_flags_get_mode(flags); > + const vm_uffd_ops *uffd_ops; > + unsigned long uffd_ioctls; > + > + if ((flags & MFILL_ATOMIC_WP) && !(vma->vm_flags & VM_UFFD_WP)) > + return false; > + > + /* Anonymous supports everything except CONTINUE */ > + if (vma_is_anonymous(vma)) > + return mode != MFILL_ATOMIC_CONTINUE; > + > + uffd_ops = vma_get_uffd_ops(vma); > + if (!uffd_ops) > + return false; > + > + uffd_ioctls = uffd_ops->uffd_ioctls; > + switch (mode) { > + case MFILL_ATOMIC_COPY: > + return uffd_ioctls & BIT(_UFFDIO_COPY); > + case MFILL_ATOMIC_ZEROPAGE: > + return uffd_ioctls & BIT(_UFFDIO_ZEROPAGE); > + case MFILL_ATOMIC_CONTINUE: > + if (!(vma->vm_flags & VM_SHARED)) > + return false; > + return uffd_ioctls & BIT(_UFFDIO_CONTINUE); > + case MFILL_ATOMIC_POISON: > + return uffd_ioctls & BIT(_UFFDIO_POISON); > + default: > + return false; > + } > +} > + > static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, > unsigned long dst_start, > unsigned long src_start, > @@ -752,11 +821,7 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, > dst_vma->vm_flags & VM_SHARED)) > goto out_unlock; > > - /* > - * validate 'mode' now that we know the dst_vma: don't allow > - * a wrprotect copy if the userfaultfd didn't register as WP. > - */ > - if ((flags & MFILL_ATOMIC_WP) && !(dst_vma->vm_flags & VM_UFFD_WP)) > + if (!vma_uffd_ops_supported(dst_vma, flags)) > goto out_unlock; > > /* > @@ -766,12 +831,6 @@ static __always_inline ssize_t mfill_atomic(struct userfaultfd_ctx *ctx, > return mfill_atomic_hugetlb(ctx, dst_vma, dst_start, > src_start, len, flags); > > - if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma)) > - goto out_unlock; > - if (!vma_is_shmem(dst_vma) && > - uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) > - goto out_unlock; > - > while (src_addr < src_start + len) { > pmd_t dst_pmdval; > > -- > 2.49.0 >
On Wed, Jun 25, 2025 at 01:31:49PM -0700, James Houghton wrote: > > -static inline bool vma_can_userfault(struct vm_area_struct *vma, > > - unsigned long vm_flags, > > - bool wp_async) > > +static inline const vm_uffd_ops *vma_get_uffd_ops(struct vm_area_struct *vma) > > { > > - vm_flags &= __VM_UFFD_FLAGS; > > - > > - if (vma->vm_flags & VM_DROPPABLE) > > - return false; > > - > > - if ((vm_flags & VM_UFFD_MINOR) && > > - (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) > > - return false; > > - > > - /* > > - * If wp async enabled, and WP is the only mode enabled, allow any > > - * memory type. > > - */ > > - if (wp_async && (vm_flags == VM_UFFD_WP)) > > - return true; > > - > > -#ifndef CONFIG_PTE_MARKER_UFFD_WP > > - /* > > - * If user requested uffd-wp but not enabled pte markers for > > - * uffd-wp, then shmem & hugetlbfs are not supported but only > > - * anonymous. > > - */ > > - if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) > > - return false; > > -#endif > > Hi Peter, > > Thanks for this cleanup! > > It looks like the above two checks, the wp-async one and the PTE > marker check, have been reordered in this patch. Does this result in a > functional difference? > > The rest of this series looks fine to me. :) Thanks for the very careful review, James! Yes that's a small tweak I did when moving. I don't expect to have any functional change. Maybe I should at least mention that in the commit log. Here I did the movement because fundamentally wp_async depends on the pte markers, so it may be slightly more intuitive to check pte markers first, rejecting any form of file wr-protect traps. Otherwise it may looks like we could return the true for wp_async==true too early. In reality IIUC it can't happen. For example, currently userfaultfd_api() has: #ifndef CONFIG_PTE_MARKER_UFFD_WP uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM; uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; #endif So when wp_async can be true above, pte markers must be compiled.. IOW, above code clip should work identically with below lines: #ifdef CONFIG_PTE_MARKER_UFFD_WP if (wp_async && (vm_flags == VM_UFFD_WP)) return true; #endif #ifndef CONFIG_PTE_MARKER_UFFD_WP if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) return false; #endif Then it means both chunks of code cannot be compiled together. The order shouldn't matter. But maybe I should just move it back as before, to save the explain and confusions. Let me know if you have any preference. Thanks, -- Peter Xu
On Wed, Jun 25, 2025 at 2:21 PM Peter Xu <peterx@redhat.com> wrote: > > On Wed, Jun 25, 2025 at 01:31:49PM -0700, James Houghton wrote: > > > -static inline bool vma_can_userfault(struct vm_area_struct *vma, > > > - unsigned long vm_flags, > > > - bool wp_async) > > > +static inline const vm_uffd_ops *vma_get_uffd_ops(struct vm_area_struct *vma) > > > { > > > - vm_flags &= __VM_UFFD_FLAGS; > > > - > > > - if (vma->vm_flags & VM_DROPPABLE) > > > - return false; > > > - > > > - if ((vm_flags & VM_UFFD_MINOR) && > > > - (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma))) > > > - return false; > > > - > > > - /* > > > - * If wp async enabled, and WP is the only mode enabled, allow any > > > - * memory type. > > > - */ > > > - if (wp_async && (vm_flags == VM_UFFD_WP)) > > > - return true; > > > - > > > -#ifndef CONFIG_PTE_MARKER_UFFD_WP > > > - /* > > > - * If user requested uffd-wp but not enabled pte markers for > > > - * uffd-wp, then shmem & hugetlbfs are not supported but only > > > - * anonymous. > > > - */ > > > - if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) > > > - return false; > > > -#endif > > > > Hi Peter, > > > > Thanks for this cleanup! > > > > It looks like the above two checks, the wp-async one and the PTE > > marker check, have been reordered in this patch. Does this result in a > > functional difference? > > > > The rest of this series looks fine to me. :) > > Thanks for the very careful review, James! > > Yes that's a small tweak I did when moving. I don't expect to have any > functional change. Maybe I should at least mention that in the commit log. Yeah if you could leave a small explanation, like what you have below, in the commit log, that would be good. :) > > Here I did the movement because fundamentally wp_async depends on the pte > markers, so it may be slightly more intuitive to check pte markers first, > rejecting any form of file wr-protect traps. Otherwise it may looks like > we could return the true for wp_async==true too early. In reality IIUC it > can't happen. > > For example, currently userfaultfd_api() has: > > #ifndef CONFIG_PTE_MARKER_UFFD_WP > uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM; > uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; > uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; > #endif Ah, I see, thanks! > So when wp_async can be true above, pte markers must be compiled.. IOW, > above code clip should work identically with below lines: > > #ifdef CONFIG_PTE_MARKER_UFFD_WP > if (wp_async && (vm_flags == VM_UFFD_WP)) > return true; > #endif > > #ifndef CONFIG_PTE_MARKER_UFFD_WP > if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) > return false; > #endif > > Then it means both chunks of code cannot be compiled together. The order > shouldn't matter. > > But maybe I should just move it back as before, to save the explain and > confusions. Let me know if you have any preference. Feel free to leave this patch as it is, that's fine. Thanks for the explanation. :) If you'd like, feel free to add: Reviewed-by: James Houghton <jthoughton@google.com>
Hi Peter, kernel test robot noticed the following build warnings: [auto build test WARNING on next-20250620] [cannot apply to akpm-mm/mm-everything linus/master v6.16-rc2 v6.16-rc1 v6.15 v6.16-rc2] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/Peter-Xu/mm-Introduce-vm_uffd_ops-API/20250621-030557 base: next-20250620 patch link: https://lore.kernel.org/r/20250620190342.1780170-5-peterx%40redhat.com patch subject: [PATCH 4/4] mm: Apply vm_uffd_ops API to core mm config: i386-randconfig-061-20250622 (https://download.01.org/0day-ci/archive/20250623/202506230216.JVgQj2Si-lkp@intel.com/config) compiler: gcc-11 (Debian 11.3.0-12) 11.3.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250623/202506230216.JVgQj2Si-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202506230216.JVgQj2Si-lkp@intel.com/ sparse warnings: (new ones prefixed by >>) mm/shmem.c: note: in included file (through include/linux/shmem_fs.h): >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t -- mm/hugetlb.c:669:12: sparse: sparse: context imbalance in 'allocate_file_region_entries' - wrong count at exit mm/hugetlb.c:740:13: sparse: sparse: context imbalance in 'region_add' - wrong count at exit mm/hugetlb.c:807:13: sparse: sparse: context imbalance in 'region_chg' - wrong count at exit mm/hugetlb.c:5798:20: sparse: sparse: context imbalance in 'move_huge_pte' - different lock contexts for basic block mm/hugetlb.c: note: in included file: include/linux/mm.h:1391:22: sparse: sparse: context imbalance in 'hugetlb_wp' - unexpected unlock mm/hugetlb.c: note: in included file (through include/linux/hugetlb.h, include/linux/migrate.h): >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t -- mm/userfaultfd.c:270:9: sparse: sparse: context imbalance in 'mfill_atomic_install_pte' - different lock contexts for basic block mm/userfaultfd.c:412:9: sparse: sparse: context imbalance in 'mfill_atomic_pte_zeropage' - different lock contexts for basic block mm/userfaultfd.c:498:9: sparse: sparse: context imbalance in 'mfill_atomic_pte_poison' - different lock contexts for basic block mm/userfaultfd.c: note: in included file: >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t mm/userfaultfd.c: note: in included file (through include/linux/rbtree.h, include/linux/mm_types.h, include/linux/mmzone.h, ...): include/linux/rcupdate.h:871:25: sparse: sparse: context imbalance in 'move_pages_pte' - unexpected unlock vim +90 include/linux/userfaultfd_k.h 87 88 static inline enum mfill_atomic_mode uffd_flags_get_mode(uffd_flags_t flags) 89 { > 90 return (enum mfill_atomic_mode)(flags & MFILL_ATOMIC_MODE_MASK); 91 } 92 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Mon, Jun 23, 2025 at 03:09:13AM +0800, kernel test robot wrote: > Hi Peter, > > kernel test robot noticed the following build warnings: > > [auto build test WARNING on next-20250620] > [cannot apply to akpm-mm/mm-everything linus/master v6.16-rc2 v6.16-rc1 v6.15 v6.16-rc2] > [If your patch is applied to the wrong git tree, kindly drop us a note. > And when submitting patch, we suggest to use '--base' as documented in > https://git-scm.com/docs/git-format-patch#_base_tree_information] > > url: https://github.com/intel-lab-lkp/linux/commits/Peter-Xu/mm-Introduce-vm_uffd_ops-API/20250621-030557 > base: next-20250620 > patch link: https://lore.kernel.org/r/20250620190342.1780170-5-peterx%40redhat.com > patch subject: [PATCH 4/4] mm: Apply vm_uffd_ops API to core mm > config: i386-randconfig-061-20250622 (https://download.01.org/0day-ci/archive/20250623/202506230216.JVgQj2Si-lkp@intel.com/config) > compiler: gcc-11 (Debian 11.3.0-12) 11.3.0 > reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20250623/202506230216.JVgQj2Si-lkp@intel.com/reproduce) > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <lkp@intel.com> > | Closes: https://lore.kernel.org/oe-kbuild-all/202506230216.JVgQj2Si-lkp@intel.com/ > > sparse warnings: (new ones prefixed by >>) > mm/shmem.c: note: in included file (through include/linux/shmem_fs.h): > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > -- > mm/hugetlb.c:669:12: sparse: sparse: context imbalance in 'allocate_file_region_entries' - wrong count at exit > mm/hugetlb.c:740:13: sparse: sparse: context imbalance in 'region_add' - wrong count at exit > mm/hugetlb.c:807:13: sparse: sparse: context imbalance in 'region_chg' - wrong count at exit > mm/hugetlb.c:5798:20: sparse: sparse: context imbalance in 'move_huge_pte' - different lock contexts for basic block > mm/hugetlb.c: note: in included file: > include/linux/mm.h:1391:22: sparse: sparse: context imbalance in 'hugetlb_wp' - unexpected unlock > mm/hugetlb.c: note: in included file (through include/linux/hugetlb.h, include/linux/migrate.h): > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > -- > mm/userfaultfd.c:270:9: sparse: sparse: context imbalance in 'mfill_atomic_install_pte' - different lock contexts for basic block > mm/userfaultfd.c:412:9: sparse: sparse: context imbalance in 'mfill_atomic_pte_zeropage' - different lock contexts for basic block > mm/userfaultfd.c:498:9: sparse: sparse: context imbalance in 'mfill_atomic_pte_poison' - different lock contexts for basic block > mm/userfaultfd.c: note: in included file: > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > >> include/linux/userfaultfd_k.h:90:17: sparse: sparse: cast from restricted uffd_flags_t > mm/userfaultfd.c: note: in included file (through include/linux/rbtree.h, include/linux/mm_types.h, include/linux/mmzone.h, ...): > include/linux/rcupdate.h:871:25: sparse: sparse: context imbalance in 'move_pages_pte' - unexpected unlock > > vim +90 include/linux/userfaultfd_k.h > > 87 > 88 static inline enum mfill_atomic_mode uffd_flags_get_mode(uffd_flags_t flags) > 89 { > > 90 return (enum mfill_atomic_mode)(flags & MFILL_ATOMIC_MODE_MASK); > 91 } > 92 Sigh.. I thought the cast would helped to tell sparse on this. Looks like I need __force.. I'll squash below change when repost: diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 6064f9542d5b..6c5ca68204dd 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -87,7 +87,7 @@ extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason); static inline enum mfill_atomic_mode uffd_flags_get_mode(uffd_flags_t flags) { - return (enum mfill_atomic_mode)(flags & MFILL_ATOMIC_MODE_MASK); + return (__force enum mfill_atomic_mode)(flags & MFILL_ATOMIC_MODE_MASK); } static inline bool uffd_flags_mode_is(uffd_flags_t flags, enum mfill_atomic_mode expected) -- Peter Xu
© 2016 - 2025 Red Hat, Inc.