[PATCH v2 4/5] guest_memfd: add support for userfaultfd minor mode

Mike Rapoport posted 5 patches 6 days, 5 hours ago
There is a newer version of this series
[PATCH v2 4/5] guest_memfd: add support for userfaultfd minor mode
Posted by Mike Rapoport 6 days, 5 hours ago
From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>

userfaultfd notifications about minor page faults used for live migration
and snapshotting of VMs with memory backed by shared hugetlbfs or tmpfs
mappings as described in detail in commit 7677f7fd8be7 ("userfaultfd: add
minor fault registration mode").

To use the same mechanism for VMs that use guest_memfd to map their memory,
guest_memfd should support userfaultfd minor mode.

Extend ->fault() method of guest_memfd with ability to notify core page
fault handler that a page fault requires handle_userfault(VM_UFFD_MINOR) to
complete and add implementation of ->get_shared_folio() to guest_memfd
vm_ops.

Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
---
 virt/kvm/guest_memfd.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
index ffadc5ee8e04..2a2b076293f9 100644
--- a/virt/kvm/guest_memfd.c
+++ b/virt/kvm/guest_memfd.c
@@ -4,6 +4,7 @@
 #include <linux/kvm_host.h>
 #include <linux/pagemap.h>
 #include <linux/anon_inodes.h>
+#include <linux/userfaultfd_k.h>
 
 #include "kvm_mm.h"
 
@@ -369,6 +370,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
 		return vmf_error(err);
 	}
 
+	if (userfaultfd_minor(vmf->vma)) {
+		folio_unlock(folio);
+		folio_put(folio);
+		return VM_FAULT_UFFD_MINOR;
+	}
+
 	if (WARN_ON_ONCE(folio_test_large(folio))) {
 		ret = VM_FAULT_SIGBUS;
 		goto out_folio;
@@ -390,8 +397,29 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
 	return ret;
 }
 
+#ifdef CONFIG_USERFAULTFD
+static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff)
+{
+	struct folio *folio;
+
+	folio = kvm_gmem_get_folio(inode, pgoff);
+	if (IS_ERR_OR_NULL(folio))
+		return folio;
+
+	if (!folio_test_uptodate(folio)) {
+		clear_highpage(folio_page(folio, 0));
+		kvm_gmem_mark_prepared(folio);
+	}
+
+	return folio;
+}
+#endif
+
 static const struct vm_operations_struct kvm_gmem_vm_ops = {
 	.fault = kvm_gmem_fault_user_mapping,
+#ifdef CONFIG_USERFAULTFD
+	.get_folio	= kvm_gmem_get_folio,
+#endif
 };
 
 static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
-- 
2.50.1
Re: [PATCH v2 4/5] guest_memfd: add support for userfaultfd minor mode
Posted by Nikita Kalyazin 5 days, 7 hours ago

On 25/11/2025 18:38, Mike Rapoport wrote:
> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
> 
> userfaultfd notifications about minor page faults used for live migration
> and snapshotting of VMs with memory backed by shared hugetlbfs or tmpfs
> mappings as described in detail in commit 7677f7fd8be7 ("userfaultfd: add
> minor fault registration mode").
> 
> To use the same mechanism for VMs that use guest_memfd to map their memory,
> guest_memfd should support userfaultfd minor mode.
> 
> Extend ->fault() method of guest_memfd with ability to notify core page
> fault handler that a page fault requires handle_userfault(VM_UFFD_MINOR) to
> complete and add implementation of ->get_shared_folio() to guest_memfd
> vm_ops.
> 
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> ---
>   virt/kvm/guest_memfd.c | 28 ++++++++++++++++++++++++++++
>   1 file changed, 28 insertions(+)
> 
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index ffadc5ee8e04..2a2b076293f9 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -4,6 +4,7 @@
>   #include <linux/kvm_host.h>
>   #include <linux/pagemap.h>
>   #include <linux/anon_inodes.h>
> +#include <linux/userfaultfd_k.h>
> 
>   #include "kvm_mm.h"
> 
> @@ -369,6 +370,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
>                  return vmf_error(err);
>          }
> 
> +       if (userfaultfd_minor(vmf->vma)) {
> +               folio_unlock(folio);
> +               folio_put(folio);
> +               return VM_FAULT_UFFD_MINOR;
> +       }
> +
>          if (WARN_ON_ONCE(folio_test_large(folio))) {
>                  ret = VM_FAULT_SIGBUS;
>                  goto out_folio;
> @@ -390,8 +397,29 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
>          return ret;
>   }
> 
> +#ifdef CONFIG_USERFAULTFD
> +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff)

We have to name it differently, otherwise it clashes with the existing 
one in this file.

> +{
> +       struct folio *folio;
> +
> +       folio = kvm_gmem_get_folio(inode, pgoff);

                   ^^

> +       if (IS_ERR_OR_NULL(folio))
> +               return folio;
> +
> +       if (!folio_test_uptodate(folio)) {
> +               clear_highpage(folio_page(folio, 0));
> +               kvm_gmem_mark_prepared(folio);
> +       }
> +
> +       return folio;
> +}
> +#endif
> +
>   static const struct vm_operations_struct kvm_gmem_vm_ops = {
>          .fault = kvm_gmem_fault_user_mapping,
> +#ifdef CONFIG_USERFAULTFD
> +       .get_folio      = kvm_gmem_get_folio,
> +#endif
>   };
> 
>   static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
> --
> 2.50.1
>
Re: [PATCH v2 4/5] guest_memfd: add support for userfaultfd minor mode
Posted by Mike Rapoport 4 days, 13 hours ago
On Wed, Nov 26, 2025 at 04:49:31PM +0000, Nikita Kalyazin wrote:
> On 25/11/2025 18:38, Mike Rapoport wrote:
> > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
> > 
> > +#ifdef CONFIG_USERFAULTFD
> > +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff)
> 
> We have to name it differently, otherwise it clashes with the existing one
> in this file.

It's all David's fault! ;-P
How about kvm_gmem_get_prepared_folio() ?

-- 
Sincerely yours,
Mike.
Re: [PATCH v2 4/5] guest_memfd: add support for userfaultfd minor mode
Posted by David Hildenbrand (Red Hat) 4 days, 12 hours ago
On 11/27/25 11:36, Mike Rapoport wrote:
> On Wed, Nov 26, 2025 at 04:49:31PM +0000, Nikita Kalyazin wrote:
>> On 25/11/2025 18:38, Mike Rapoport wrote:
>>> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>>>
>>> +#ifdef CONFIG_USERFAULTFD
>>> +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff)
>>
>> We have to name it differently, otherwise it clashes with the existing one
>> in this file.
> 
> It's all David's fault! ;-P

As usual :)

> How about kvm_gmem_get_prepared_folio() ?

Or maybe just spell out that it is for vm_ops

kvm_gmem_vm_ops_get_folio()

-- 
Cheers

David
Re: [PATCH v2 4/5] guest_memfd: add support for userfaultfd minor mode
Posted by Nikita Kalyazin 4 days, 12 hours ago

On 27/11/2025 10:36, Mike Rapoport wrote:
> On Wed, Nov 26, 2025 at 04:49:31PM +0000, Nikita Kalyazin wrote:
>> On 25/11/2025 18:38, Mike Rapoport wrote:
>>> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>>>
>>> +#ifdef CONFIG_USERFAULTFD
>>> +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff)
>>
>> We have to name it differently, otherwise it clashes with the existing one
>> in this file.
> 
> It's all David's fault! ;-P
> How about kvm_gmem_get_prepared_folio() ?

I'm afraid it may not be ideal due to preparedness tracking being 
removed from guest_memfd at some point [1].  Would it be too bad to add 
an indication to userfaultfd in the name somehow given that it's already 
guarded by the config?

[1] 
https://lore.kernel.org/linux-coco/20251113230759.1562024-1-michael.roth@amd.com

> 
> --
> Sincerely yours,
> Mike.
Re: [PATCH v2 4/5] guest_memfd: add support for userfaultfd minor mode
Posted by Mike Rapoport 4 days, 4 hours ago
On Thu, Nov 27, 2025 at 11:19:35AM +0000, Nikita Kalyazin wrote:
> 
> 
> On 27/11/2025 10:36, Mike Rapoport wrote:
> > On Wed, Nov 26, 2025 at 04:49:31PM +0000, Nikita Kalyazin wrote:
> > > On 25/11/2025 18:38, Mike Rapoport wrote:
> > > > From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
> > > > 
> > > > +#ifdef CONFIG_USERFAULTFD
> > > > +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff)
> > > 
> > > We have to name it differently, otherwise it clashes with the existing one
> > > in this file.
> > 
> > It's all David's fault! ;-P
> > How about kvm_gmem_get_prepared_folio() ?
> 
> I'm afraid it may not be ideal due to preparedness tracking being removed
> from guest_memfd at some point [1].  Would it be too bad to add an
> indication to userfaultfd in the name somehow given that it's already
> guarded by the config?

Hmm, shmem also has this clash. There I picked shmem_get_folio_noalloc()
because that describes well what it does: lookup folio in the page cache,
grab it if it's there or return -ENOENT if it's missing.
That's also what hugetlb does for uffd minor fault.

The guest_memfd implementation I copied from one of the older postings
allocates the folio if it's not in the page cache and it seems to me that
it also should only look up existing folios to keep uffd minor semantics
uniform.
 
Then it makes sense also to name the vm_ops method get_folio_noalloc().

-- 
Sincerely yours,
Mike.
Re: [PATCH v2 4/5] guest_memfd: add support for userfaultfd minor mode
Posted by Nikita Kalyazin 3 days, 11 hours ago

On 27/11/2025 19:04, Mike Rapoport wrote:
> On Thu, Nov 27, 2025 at 11:19:35AM +0000, Nikita Kalyazin wrote:
>>
>>
>> On 27/11/2025 10:36, Mike Rapoport wrote:
>>> On Wed, Nov 26, 2025 at 04:49:31PM +0000, Nikita Kalyazin wrote:
>>>> On 25/11/2025 18:38, Mike Rapoport wrote:
>>>>> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
>>>>>
>>>>> +#ifdef CONFIG_USERFAULTFD
>>>>> +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff)
>>>>
>>>> We have to name it differently, otherwise it clashes with the existing one
>>>> in this file.
>>>
>>> It's all David's fault! ;-P
>>> How about kvm_gmem_get_prepared_folio() ?
>>
>> I'm afraid it may not be ideal due to preparedness tracking being removed
>> from guest_memfd at some point [1].  Would it be too bad to add an
>> indication to userfaultfd in the name somehow given that it's already
>> guarded by the config?
> 
> Hmm, shmem also has this clash. There I picked shmem_get_folio_noalloc()
> because that describes well what it does: lookup folio in the page cache,
> grab it if it's there or return -ENOENT if it's missing.
> That's also what hugetlb does for uffd minor fault.
> 
> The guest_memfd implementation I copied from one of the older postings
> allocates the folio if it's not in the page cache and it seems to me that
> it also should only look up existing folios to keep uffd minor semantics
> uniform.

I can't see a reason for guest_memfd to deviate from shmem and hugetlb 
here so makes sense to me.

> 
> Then it makes sense also to name the vm_ops method get_folio_noalloc().
> 
> --
> Sincerely yours,
> Mike.
Re: [PATCH v2 4/5] guest_memfd: add support for userfaultfd minor mode
Posted by Liam R. Howlett 5 days, 8 hours ago
* Mike Rapoport <rppt@kernel.org> [251125 13:39]:
> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
> 
> userfaultfd notifications about minor page faults used for live migration
> and snapshotting of VMs with memory backed by shared hugetlbfs or tmpfs
> mappings as described in detail in commit 7677f7fd8be7 ("userfaultfd: add
> minor fault registration mode").
> 
> To use the same mechanism for VMs that use guest_memfd to map their memory,
> guest_memfd should support userfaultfd minor mode.
> 
> Extend ->fault() method of guest_memfd with ability to notify core page
> fault handler that a page fault requires handle_userfault(VM_UFFD_MINOR) to
> complete and add implementation of ->get_shared_folio() to guest_memfd
> vm_ops.
> 
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>

Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com>

> ---
>  virt/kvm/guest_memfd.c | 28 ++++++++++++++++++++++++++++
>  1 file changed, 28 insertions(+)
> 
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index ffadc5ee8e04..2a2b076293f9 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -4,6 +4,7 @@
>  #include <linux/kvm_host.h>
>  #include <linux/pagemap.h>
>  #include <linux/anon_inodes.h>
> +#include <linux/userfaultfd_k.h>
>  
>  #include "kvm_mm.h"
>  
> @@ -369,6 +370,12 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
>  		return vmf_error(err);
>  	}
>  
> +	if (userfaultfd_minor(vmf->vma)) {
> +		folio_unlock(folio);
> +		folio_put(folio);
> +		return VM_FAULT_UFFD_MINOR;
> +	}
> +
>  	if (WARN_ON_ONCE(folio_test_large(folio))) {
>  		ret = VM_FAULT_SIGBUS;
>  		goto out_folio;
> @@ -390,8 +397,29 @@ static vm_fault_t kvm_gmem_fault_user_mapping(struct vm_fault *vmf)
>  	return ret;
>  }
>  
> +#ifdef CONFIG_USERFAULTFD
> +static struct folio *kvm_gmem_get_folio(struct inode *inode, pgoff_t pgoff)
> +{
> +	struct folio *folio;
> +
> +	folio = kvm_gmem_get_folio(inode, pgoff);
> +	if (IS_ERR_OR_NULL(folio))
> +		return folio;
> +
> +	if (!folio_test_uptodate(folio)) {
> +		clear_highpage(folio_page(folio, 0));
> +		kvm_gmem_mark_prepared(folio);
> +	}
> +
> +	return folio;
> +}
> +#endif
> +
>  static const struct vm_operations_struct kvm_gmem_vm_ops = {
>  	.fault = kvm_gmem_fault_user_mapping,
> +#ifdef CONFIG_USERFAULTFD
> +	.get_folio	= kvm_gmem_get_folio,
> +#endif
>  };
>  
>  static int kvm_gmem_mmap(struct file *file, struct vm_area_struct *vma)
> -- 
> 2.50.1
>
Re: [PATCH v2 4/5] guest_memfd: add support for userfaultfd minor mode
Posted by David Hildenbrand (Red Hat) 5 days, 13 hours ago
On 11/25/25 19:38, Mike Rapoport wrote:
> From: "Mike Rapoport (Microsoft)" <rppt@kernel.org>
> 
> userfaultfd notifications about minor page faults used for live migration
> and snapshotting of VMs with memory backed by shared hugetlbfs or tmpfs
> mappings as described in detail in commit 7677f7fd8be7 ("userfaultfd: add
> minor fault registration mode").
> 
> To use the same mechanism for VMs that use guest_memfd to map their memory,
> guest_memfd should support userfaultfd minor mode.
> 
> Extend ->fault() method of guest_memfd with ability to notify core page
> fault handler that a page fault requires handle_userfault(VM_UFFD_MINOR) to
> complete and add implementation of ->get_shared_folio() to guest_memfd
> vm_ops.
> 
> Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
> ---

No exports and still looks clean to me, nice. :)

-- 
Cheers

David