From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
userfaultfd notifications about minor page faults used for live migration
and snapshotting of VMs with memory backed by shared hugetlbfs or tmpfs
mappings as described in detail in commit 7677f7fd8be7 ("userfaultfd: add
minor fault registration mode").
To use the same mechanism for VMs that use guest_memfd to map their memory,
guest_memfd should support userfaultfd minor mode.
Extend ->fault() method of guest_memfd with ability to notify core page
fault handler that a page fault requires handle_userfault(VM_UFFD_MINOR) to
complete and add implementation of ->get_shared_folio() to guest_memfd
vm_ops.
Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
virt/kvm/guest_memfd.c | 28 ++++++++++++++++++++++++++++
1 file changed, 28 insertions(+)
diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index ffadc5ee8e04..2a2b076293f9 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -4,6 +4,7 @@
#include <linux/kvm_host.h>
#include <linux/pagemap.h>
#include <linux/anon_inodes.h>
+#include <linux/userfaultfd_k.h>
#include "kvm_mm.h"
@@ -369,6 +370,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
return vmf_error(err);
}
+ if (userfaultfd_minor(vmf->vma)) {
+ folio_unlock(folio);
+ folio_put(folio);
+ return VM_FAULT_UFFD_MINOR;
+ }
+
if (WARN_ON_ONCE(folio_test_large(folio))) {
ret = VM_FAULT_SIGBUS;
goto out_folio;
@@ -390,8 +397,29 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
return ret;
}
+#ifdef CONFIG_USERFAULTFD
+static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff)
+{
+ struct folio *folio;
+
+ folio = kvm_gmem_get_folio(inode, pgoff);
+ if (IS_ERR_OR_NULL(folio))
+ return folio;
+
+ if (!folio_test_uptodate(folio)) {
+ clear_highpage(folio_page(folio, 0));
+ kvm_gmem_mark_prepared(folio);
+ }
+
+ return folio;
+}
+#endif
+
static const struct vm_operations_struct kvm_gmem_vm_ops = {
.fault = kvm_gmem_fault_user_mapping,
+#ifdef CONFIG_USERFAULTFD
+ .get_folio = kvm_gmem_get_folio,
+#endif
};
static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
--
2.50.1
On 25/11/2025 18:38, Mike Rapoport wrote:
> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>
> userfaultfd notifications about minor page faults used for live migration
> and snapshotting of VMs with memory backed by shared hugetlbfs or tmpfs
> mappings as described in detail in commit 7677f7fd8be7 ("userfaultfd: add
> minor fault registration mode").
>
> To use the same mechanism for VMs that use guest_memfd to map their memory,
> guest_memfd should support userfaultfd minor mode.
>
> Extend ->fault() method of guest_memfd with ability to notify core page
> fault handler that a page fault requires handle_userfault(VM_UFFD_MINOR) to
> complete and add implementation of ->get_shared_folio() to guest_memfd
> vm_ops.
>
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> ---
> virt/kvm/guest_memfd.c | 28 ++++++++++++++++++++++++++++
> 1 file changed, 28 insertions(+)
>
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index ffadc5ee8e04..2a2b076293f9 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -4,6 +4,7 @@
> #include <linux/kvm_host.h>
> #include <linux/pagemap.h>
> #include <linux/anon_inodes.h>
> +#include <linux/userfaultfd_k.h>
>
> #include "kvm_mm.h"
>
> @@ -369,6 +370,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
> return vmf_error(err);
> }
>
> + if (userfaultfd_minor(vmf->vma)) {
> + folio_unlock(folio);
> + folio_put(folio);
> + return VM_FAULT_UFFD_MINOR;
> + }
> +
> if (WARN_ON_ONCE(folio_test_large(folio))) {
> ret = VM_FAULT_SIGBUS;
> goto out_folio;
> @@ -390,8 +397,29 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
> return ret;
> }
>
> +#ifdef CONFIG_USERFAULTFD
> +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff)
We have to name it differently, otherwise it clashes with the existing
one in this file.
> +{
> + struct folio *folio;
> +
> + folio = kvm_gmem_get_folio(inode, pgoff);
^^
> + if (IS_ERR_OR_NULL(folio))
> + return folio;
> +
> + if (!folio_test_uptodate(folio)) {
> + clear_highpage(folio_page(folio, 0));
> + kvm_gmem_mark_prepared(folio);
> + }
> +
> + return folio;
> +}
> +#endif
> +
> static const struct vm_operations_struct kvm_gmem_vm_ops = {
> .fault = kvm_gmem_fault_user_mapping,
> +#ifdef CONFIG_USERFAULTFD
> + .get_folio = kvm_gmem_get_folio,
> +#endif
> };
>
> static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
> --
> 2.50.1
>
On Wed, Nov 26, 2025 at 04:49:31PM +0000, Nikita Kalyazin wrote: > On 25/11/2025 18:38, Mike Rapoport wrote: > > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> > > > > +#ifdef CONFIG_USERFAULTFD > > +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff) > > We have to name it differently, otherwise it clashes with the existing one > in this file. It's all David's fault! ;-P How about kvm_gmem_get_prepared_folio() ? -- Sincerely yours, Mike.
On 11/27/25 11:36, Mike Rapoport wrote: > On Wed, Nov 26, 2025 at 04:49:31PM +0000, Nikita Kalyazin wrote: >> On 25/11/2025 18:38, Mike Rapoport wrote: >>> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> >>> >>> +#ifdef CONFIG_USERFAULTFD >>> +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff) >> >> We have to name it differently, otherwise it clashes with the existing one >> in this file. > > It's all David's fault! ;-P As usual :) > How about kvm_gmem_get_prepared_folio() ? Or maybe just spell out that it is for vm_ops kvm_gmem_vm_ops_get_folio() -- Cheers David
On 27/11/2025 10:36, Mike Rapoport wrote: > On Wed, Nov 26, 2025 at 04:49:31PM +0000, Nikita Kalyazin wrote: >> On 25/11/2025 18:38, Mike Rapoport wrote: >>> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> >>> >>> +#ifdef CONFIG_USERFAULTFD >>> +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff) >> >> We have to name it differently, otherwise it clashes with the existing one >> in this file. > > It's all David's fault! ;-P > How about kvm_gmem_get_prepared_folio() ? I'm afraid it may not be ideal due to preparedness tracking being removed from guest_memfd at some point [1]. Would it be too bad to add an indication to userfaultfd in the name somehow given that it's already guarded by the config? [1] https://lore.kernel.org/linux-coco/20251113230759.1562024-1-michael.roth@amd.com > > -- > Sincerely yours, > Mike.
On Thu, Nov 27, 2025 at 11:19:35AM +0000, Nikita Kalyazin wrote: > > > On 27/11/2025 10:36, Mike Rapoport wrote: > > On Wed, Nov 26, 2025 at 04:49:31PM +0000, Nikita Kalyazin wrote: > > > On 25/11/2025 18:38, Mike Rapoport wrote: > > > > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> > > > > > > > > +#ifdef CONFIG_USERFAULTFD > > > > +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff) > > > > > > We have to name it differently, otherwise it clashes with the existing one > > > in this file. > > > > It's all David's fault! ;-P > > How about kvm_gmem_get_prepared_folio() ? > > I'm afraid it may not be ideal due to preparedness tracking being removed > from guest_memfd at some point [1]. Would it be too bad to add an > indication to userfaultfd in the name somehow given that it's already > guarded by the config? Hmm, shmem also has this clash. There I picked shmem_get_folio_noalloc() because that describes well what it does: lookup folio in the page cache, grab it if it's there or return -ENOENT if it's missing. That's also what hugetlb does for uffd minor fault. The guest_memfd implementation I copied from one of the older postings allocates the folio if it's not in the page cache and it seems to me that it also should only look up existing folios to keep uffd minor semantics uniform. Then it makes sense also to name the vm_ops method get_folio_noalloc(). -- Sincerely yours, Mike.
On 27/11/2025 19:04, Mike Rapoport wrote: > On Thu, Nov 27, 2025 at 11:19:35AM +0000, Nikita Kalyazin wrote: >> >> >> On 27/11/2025 10:36, Mike Rapoport wrote: >>> On Wed, Nov 26, 2025 at 04:49:31PM +0000, Nikita Kalyazin wrote: >>>> On 25/11/2025 18:38, Mike Rapoport wrote: >>>>> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> >>>>> >>>>> +#ifdef CONFIG_USERFAULTFD >>>>> +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff) >>>> >>>> We have to name it differently, otherwise it clashes with the existing one >>>> in this file. >>> >>> It's all David's fault! ;-P >>> How about kvm_gmem_get_prepared_folio() ? >> >> I'm afraid it may not be ideal due to preparedness tracking being removed >> from guest_memfd at some point [1]. Would it be too bad to add an >> indication to userfaultfd in the name somehow given that it's already >> guarded by the config? > > Hmm, shmem also has this clash. There I picked shmem_get_folio_noalloc() > because that describes well what it does: lookup folio in the page cache, > grab it if it's there or return -ENOENT if it's missing. > That's also what hugetlb does for uffd minor fault. > > The guest_memfd implementation I copied from one of the older postings > allocates the folio if it's not in the page cache and it seems to me that > it also should only look up existing folios to keep uffd minor semantics > uniform. I can't see a reason for guest_memfd to deviate from shmem and hugetlb here so makes sense to me. > > Then it makes sense also to name the vm_ops method get_folio_noalloc(). > > -- > Sincerely yours, > Mike.
* Mike Rapoport <rppt@kernel.org> [251125 13:39]:
> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>
> userfaultfd notifications about minor page faults used for live migration
> and snapshotting of VMs with memory backed by shared hugetlbfs or tmpfs
> mappings as described in detail in commit 7677f7fd8be7 ("userfaultfd: add
> minor fault registration mode").
>
> To use the same mechanism for VMs that use guest_memfd to map their memory,
> guest_memfd should support userfaultfd minor mode.
>
> Extend ->fault() method of guest_memfd with ability to notify core page
> fault handler that a page fault requires handle_userfault(VM_UFFD_MINOR) to
> complete and add implementation of ->get_shared_folio() to guest_memfd
> vm_ops.
>
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>
> ---
> virt/kvm/guest_memfd.c | 28 ++++++++++++++++++++++++++++
> 1 file changed, 28 insertions(+)
>
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index ffadc5ee8e04..2a2b076293f9 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -4,6 +4,7 @@
> #include <linux/kvm_host.h>
> #include <linux/pagemap.h>
> #include <linux/anon_inodes.h>
> +#include <linux/userfaultfd_k.h>
>
> #include "kvm_mm.h"
>
> @@ -369,6 +370,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
> return vmf_error(err);
> }
>
> + if (userfaultfd_minor(vmf->vma)) {
> + folio_unlock(folio);
> + folio_put(folio);
> + return VM_FAULT_UFFD_MINOR;
> + }
> +
> if (WARN_ON_ONCE(folio_test_large(folio))) {
> ret = VM_FAULT_SIGBUS;
> goto out_folio;
> @@ -390,8 +397,29 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
> return ret;
> }
>
> +#ifdef CONFIG_USERFAULTFD
> +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff)
> +{
> + struct folio *folio;
> +
> + folio = kvm_gmem_get_folio(inode, pgoff);
> + if (IS_ERR_OR_NULL(folio))
> + return folio;
> +
> + if (!folio_test_uptodate(folio)) {
> + clear_highpage(folio_page(folio, 0));
> + kvm_gmem_mark_prepared(folio);
> + }
> +
> + return folio;
> +}
> +#endif
> +
> static const struct vm_operations_struct kvm_gmem_vm_ops = {
> .fault = kvm_gmem_fault_user_mapping,
> +#ifdef CONFIG_USERFAULTFD
> + .get_folio = kvm_gmem_get_folio,
> +#endif
> };
>
> static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
> --
> 2.50.1
>
On 11/25/25 19:38, Mike Rapoport wrote:
> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>
> userfaultfd notifications about minor page faults used for live migration
> and snapshotting of VMs with memory backed by shared hugetlbfs or tmpfs
> mappings as described in detail in commit 7677f7fd8be7 ("userfaultfd: add
> minor fault registration mode").
>
> To use the same mechanism for VMs that use guest_memfd to map their memory,
> guest_memfd should support userfaultfd minor mode.
>
> Extend ->fault() method of guest_memfd with ability to notify core page
> fault handler that a page fault requires handle_userfault(VM_UFFD_MINOR) to
> complete and add implementation of ->get_shared_folio() to guest_memfd
> vm_ops.
>
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> ---
No exports and still looks clean to me, nice. :)
--
Cheers
David
© 2016 - 2025 Red Hat, Inc.