From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
When a VMA is registered with userfaulfd in minor mode, its ->fault()
method should check if a folio exists in the page cache and if yes
->fault() should call handle_userfault(VM_UFFD_MISSING).
Instead of calling handle_userfault() directly from a specific ->fault()
implementation introduce new fault reason VM_FAULT_UFFD_MINOR that will
notify the core page fault handler that it should call
handle_userfaultfd(VM_UFFD_MISSING) to complete a page fault.
Replace a call to handle_userfault(VM_UFFD_MISSING) in shmem and use the
new VM_FAULT_UFFD_MINOR there instead.
Suggested-by: David Hildenbrand (Red Hat) <david@kernel.org>
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
include/linux/mm_types.h | 3 +++
mm/memory.c | 2 ++
mm/shmem.c | 2 +-
3 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 90e5790c318f..eb135369940f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -1523,6 +1523,8 @@ typedef __bitwise unsigned int vm_fault_t;
* fsync() to complete (for synchronous page faults
* in DAX)
* @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released
+ * @VM_FAULT_UFFD_MINOR: ->fault did not modify page tables and needs
+ * handle_userfault(VM_UFFD_MINOR) to complete
* @VM_FAULT_HINDEX_MASK: mask HINDEX value
*
*/
@@ -1540,6 +1542,7 @@ enum vm_fault_reason {
VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000,
VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000,
VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000,
+ VM_FAULT_UFFD_MINOR = (__force vm_fault_t)0x008000,
VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000,
};
diff --git a/mm/memory.c b/mm/memory.c
index b59ae7ce42eb..94acbac8cefb 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -5279,6 +5279,8 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
}
ret = vma->vm_ops->fault(vmf);
+ if (unlikely(ret & VM_FAULT_UFFD_MINOR))
+ return handle_userfault(vmf, VM_UFFD_MINOR);
if (unlikely(ret & (VM_FAULT_ERROR | VM_FAULT_NOPAGE | VM_FAULT_RETRY |
VM_FAULT_DONE_COW)))
return ret;
diff --git a/mm/shmem.c b/mm/shmem.c
index aaa21bb60f51..6dcb73b52bcc 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2461,7 +2461,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
if (folio && vma && userfaultfd_minor(vma)) {
if (!xa_is_value(folio))
folio_put(folio);
- *fault_type = handle_userfault(vmf, VM_UFFD_MINOR);
+ *fault_type = VM_FAULT_UFFD_MINOR;
return 0;
}
--
2.50.1
On 11/23/25 11:27, Mike Rapoport wrote:
> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>
> When a VMA is registered with userfaulfd in minor mode, its ->fault()
> method should check if a folio exists in the page cache and if yes
> ->fault() should call handle_userfault(VM_UFFD_MISSING).
>
> Instead of calling handle_userfault() directly from a specific ->fault()
> implementation introduce new fault reason VM_FAULT_UFFD_MINOR that will
> notify the core page fault handler that it should call
> handle_userfaultfd(VM_UFFD_MISSING) to complete a page fault.
>
> Replace a call to handle_userfault(VM_UFFD_MISSING) in shmem and use the
> new VM_FAULT_UFFD_MINOR there instead.
>
> Suggested-by: David Hildenbrand (Red Hat) <david@kernel.org>
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> ---
> include/linux/mm_types.h | 3 +++
> mm/memory.c | 2 ++
> mm/shmem.c | 2 +-
> 3 files changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> index 90e5790c318f..eb135369940f 100644
> --- a/include/linux/mm_types.h
> +++ b/include/linux/mm_types.h
> @@ -1523,6 +1523,8 @@ typedef __bitwise unsigned int vm_fault_t;
> * fsync() to complete (for synchronous page faults
> * in DAX)
> * @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released
> + * @VM_FAULT_UFFD_MINOR: ->fault did not modify page tables and needs
> + * handle_userfault(VM_UFFD_MINOR) to complete
> * @VM_FAULT_HINDEX_MASK: mask HINDEX value
> *
> */
> @@ -1540,6 +1542,7 @@ enum vm_fault_reason {
> VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000,
> VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000,
> VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000,
> + VM_FAULT_UFFD_MINOR = (__force vm_fault_t)0x008000,
> VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000,
> };
>
> diff --git a/mm/memory.c b/mm/memory.c
> index b59ae7ce42eb..94acbac8cefb 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -5279,6 +5279,8 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
> }
>
> ret = vma->vm_ops->fault(vmf);
> + if (unlikely(ret & VM_FAULT_UFFD_MINOR))
> + return handle_userfault(vmf, VM_UFFD_MINOR);
If we could define VM_FAULT_UFFD_MINOR to be 0 without USERFAULTFD, we
could optimize that check out completely on such configs.
--
Cheers
David
On Mon, Nov 24, 2025 at 12:05:18PM +0100, David Hildenbrand (Red Hat) wrote:
> On 11/23/25 11:27, Mike Rapoport wrote:
> > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
> >
> > When a VMA is registered with userfaulfd in minor mode, its ->fault()
> > method should check if a folio exists in the page cache and if yes
> > ->fault() should call handle_userfault(VM_UFFD_MISSING).
> >
> > Instead of calling handle_userfault() directly from a specific ->fault()
> > implementation introduce new fault reason VM_FAULT_UFFD_MINOR that will
> > notify the core page fault handler that it should call
> > handle_userfaultfd(VM_UFFD_MISSING) to complete a page fault.
> >
> > Replace a call to handle_userfault(VM_UFFD_MISSING) in shmem and use the
> > new VM_FAULT_UFFD_MINOR there instead.
> >
> > Suggested-by: David Hildenbrand (Red Hat) <david@kernel.org>
> > Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> > ---
> > include/linux/mm_types.h | 3 +++
> > mm/memory.c | 2 ++
> > mm/shmem.c | 2 +-
> > 3 files changed, 6 insertions(+), 1 deletion(-)
> >
> > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
> > index 90e5790c318f..eb135369940f 100644
> > --- a/include/linux/mm_types.h
> > +++ b/include/linux/mm_types.h
> > @@ -1523,6 +1523,8 @@ typedef __bitwise unsigned int vm_fault_t;
> > * fsync() to complete (for synchronous page faults
> > * in DAX)
> > * @VM_FAULT_COMPLETED: ->fault completed, meanwhile mmap lock released
> > + * @VM_FAULT_UFFD_MINOR: ->fault did not modify page tables and needs
> > + * handle_userfault(VM_UFFD_MINOR) to complete
> > * @VM_FAULT_HINDEX_MASK: mask HINDEX value
> > *
> > */
> > @@ -1540,6 +1542,7 @@ enum vm_fault_reason {
> > VM_FAULT_DONE_COW = (__force vm_fault_t)0x001000,
> > VM_FAULT_NEEDDSYNC = (__force vm_fault_t)0x002000,
> > VM_FAULT_COMPLETED = (__force vm_fault_t)0x004000,
> > + VM_FAULT_UFFD_MINOR = (__force vm_fault_t)0x008000,
> > VM_FAULT_HINDEX_MASK = (__force vm_fault_t)0x0f0000,
> > };
> > diff --git a/mm/memory.c b/mm/memory.c
> > index b59ae7ce42eb..94acbac8cefb 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -5279,6 +5279,8 @@ static vm_fault_t __do_fault(struct vm_fault *vmf)
> > }
> > ret = vma->vm_ops->fault(vmf);
> > + if (unlikely(ret & VM_FAULT_UFFD_MINOR))
> > + return handle_userfault(vmf, VM_UFFD_MINOR);
>
> If we could define VM_FAULT_UFFD_MINOR to be 0 without USERFAULTFD, we could
> optimize that check out completely on such configs.
It will be a bit ugly, but we can :)
> --
> Cheers
>
> David
--
Sincerely yours,
Mike.
© 2016 - 2025 Red Hat, Inc.