KVM/hostmem: Support in-place guest-memfd as VM backends

[PATCH 8/8] hostmem: Support in-place guest memfd to back a VM

Posted by Peter Xu 3 months, 2 weeks ago

Host backends supports guest-memfd now by detecting whether it's a
confidential VM.  There's no way to choose it yet from the memory level to
use it in-place.  If we use guest-memfd, it so far always implies we need
two layers of memory backends, while the guest-memfd only provides the
private set of pages.

This patch introduces a way so that QEMU can consume guest memfd as the
only source of memory to back the object (aka, in place), rather than
having another backend supporting the pages converted to shared.

To use the in-place guest-memfd, one can add a memfd object with:

  -object memory-backend-memfd,guest-memfd=on,share=on

Note that share=on is required with in-place guest_memfd.

Signed-off-by: Peter Xu <peterx@redhat.com>
---
 qapi/qom.json            |  6 +++-
 backends/hostmem-memfd.c | 66 +++++++++++++++++++++++++++++++++++++---
 2 files changed, 67 insertions(+), 5 deletions(-)

diff --git a/qapi/qom.json b/qapi/qom.json
index 830cb2ffe7..6b090fe9a0 100644
--- a/qapi/qom.json
+++ b/qapi/qom.json
@@ -764,13 +764,17 @@
 # @seal: if true, create a sealed-file, which will block further
 #     resizing of the memory (default: true)
 #
+# @guest-memfd: if true, use guest-memfd to back the memory region.
+#     (default: false, since: 10.2)
+#
 # Since: 2.12
 ##
 { 'struct': 'MemoryBackendMemfdProperties',
   'base': 'MemoryBackendProperties',
   'data': { '*hugetlb': 'bool',
             '*hugetlbsize': 'size',
-            '*seal': 'bool' },
+            '*seal': 'bool',
+            '*guest-memfd': 'bool' },
   'if': 'CONFIG_LINUX' }
 
 ##
diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
index ea93f034e4..1fa16c1e1d 100644
--- a/backends/hostmem-memfd.c
+++ b/backends/hostmem-memfd.c
@@ -18,6 +18,8 @@
 #include "qapi/error.h"
 #include "qom/object.h"
 #include "migration/cpr.h"
+#include "system/kvm.h"
+#include <linux/kvm.h>
 
 OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendMemfd, MEMORY_BACKEND_MEMFD)
 
@@ -28,6 +30,13 @@ struct HostMemoryBackendMemfd {
     bool hugetlb;
     uint64_t hugetlbsize;
     bool seal;
+    /*
+     * NOTE: this differs from HostMemoryBackend's guest_memfd_private,
+     * which represents a internally private guest-memfd that only backs
+     * private pages.  Instead, this flag marks the memory backend will
+     * 100% use the guest-memfd pages in-place.
+     */
+    bool guest_memfd;
 };
 
 static bool
@@ -47,10 +56,40 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
         goto have_fd;
     }
 
-    fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
-                           m->hugetlb, m->hugetlbsize, m->seal ?
-                           F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
-                           errp);
+    if (m->guest_memfd) {
+        /* User choose to use in-place guest-memfd to back the VM.. */
+        if (!backend->share) {
+            error_setg(errp, "In-place guest-memfd must be used with share=on");
+            return false;
+        }
+
+        /*
+         * This is the request to have a guest-memfd to back private pages.
+         * In-place guest-memfd doesn't work like that.  Disable it for now
+         * to make it simple, so that each memory backend can only have
+         * guest-memfd either as private, or fully shared.
+         */
+        if (backend->guest_memfd_private) {
+            error_setg(errp, "In-place guest-memfd cannot be used with another "
+                       "private guest-memfd");
+            return false;
+        }
+
+        /* TODO: add huge page support */
+        fd = kvm_create_guest_memfd(backend->size,
+                                    GUEST_MEMFD_FLAG_MMAP |
+                                    GUEST_MEMFD_FLAG_INIT_SHARED,
+                                    errp);
+        if (fd < 0) {
+            return false;
+        }
+    } else {
+        fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
+                               m->hugetlb, m->hugetlbsize, m->seal ?
+                               F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
+                               errp);
+    }
+
     if (fd == -1) {
         return false;
     }
@@ -65,6 +104,18 @@ have_fd:
                                           backend->size, ram_flags, fd, 0, errp);
 }
 
+static bool
+memfd_backend_get_guest_memfd(Object *o, Error **errp)
+{
+    return MEMORY_BACKEND_MEMFD(o)->guest_memfd;
+}
+
+static void
+memfd_backend_set_guest_memfd(Object *o, bool value, Error **errp)
+{
+    MEMORY_BACKEND_MEMFD(o)->guest_memfd = value;
+}
+
 static bool
 memfd_backend_get_hugetlb(Object *o, Error **errp)
 {
@@ -152,6 +203,13 @@ memfd_backend_class_init(ObjectClass *oc, const void *data)
         object_class_property_set_description(oc, "hugetlbsize",
                                               "Huge pages size (ex: 2M, 1G)");
     }
+
+    object_class_property_add_bool(oc, "guest-memfd",
+                                   memfd_backend_get_guest_memfd,
+                                   memfd_backend_set_guest_memfd);
+    object_class_property_set_description(oc, "guest-memfd",
+                                          "Use guest memfd");
+
     object_class_property_add_bool(oc, "seal",
                                    memfd_backend_get_seal,
                                    memfd_backend_set_seal);
-- 
2.50.1

Re: [PATCH 8/8] hostmem: Support in-place guest memfd to back a VM

Posted by Xiaoyao Li 3 months, 2 weeks ago

On 10/24/2025 2:59 AM, Peter Xu wrote:
> Host backends supports guest-memfd now by detecting whether it's a
> confidential VM.  There's no way to choose it yet from the memory level to
> use it in-place.  If we use guest-memfd, it so far always implies we need
> two layers of memory backends, while the guest-memfd only provides the
> private set of pages.
> 
> This patch introduces a way so that QEMU can consume guest memfd as the
> only source of memory to back the object (aka, in place), rather than
> having another backend supporting the pages converted to shared.
> 
> To use the in-place guest-memfd, one can add a memfd object with:
> 
>    -object memory-backend-memfd,guest-memfd=on,share=on
> 
> Note that share=on is required with in-place guest_memfd.

First, I'm not sure "in-place" is the proper wording here. At first 
glance on the series, I thought it's something related to "in-place" 
page conversion. After reading a bit, I really that it is enabling guest 
memfd with mmap support to serve as normal memory backend.

Second, my POC implementation chose to implement a separate and specific 
memory-backend type "memory-backend-guest-memfd". Your approach to add 
an option of "guest-memfd" to memory-backend-memfd looks OK to me and it 
requires less code. But I think we need to explicitly error out to users 
when they set "guest_memfd" to on with unsupported properties 
configured, e.g., "hugetlb", "hugetlbsize", and "seal".

Third, the intended usage of gmem with mmap from KVM/kernel's 
perspective is userspace configures the meomry slot by passing the gmem 
fd to @guest_memfd and @guest_memfd of struct 
kvm_userspace_memory_region2 instead of passing the user address 
returned by mmap of the fd to @userspace_addr return mmap() as this 
patch does. Surely the usage of this path works. But when QEMU is going 
to support in-place conversion of gmem, we has to pass the @guest_memfd.
Well, this is no issue now and we can handle it in the future when needed.

> Signed-off-by: Peter Xu <peterx@redhat.com>
> ---
>   qapi/qom.json            |  6 +++-
>   backends/hostmem-memfd.c | 66 +++++++++++++++++++++++++++++++++++++---
>   2 files changed, 67 insertions(+), 5 deletions(-)
> 
> diff --git a/qapi/qom.json b/qapi/qom.json
> index 830cb2ffe7..6b090fe9a0 100644
> --- a/qapi/qom.json
> +++ b/qapi/qom.json
> @@ -764,13 +764,17 @@
>   # @seal: if true, create a sealed-file, which will block further
>   #     resizing of the memory (default: true)
>   #
> +# @guest-memfd: if true, use guest-memfd to back the memory region.
> +#     (default: false, since: 10.2)
> +#
>   # Since: 2.12
>   ##
>   { 'struct': 'MemoryBackendMemfdProperties',
>     'base': 'MemoryBackendProperties',
>     'data': { '*hugetlb': 'bool',
>               '*hugetlbsize': 'size',
> -            '*seal': 'bool' },
> +            '*seal': 'bool',
> +            '*guest-memfd': 'bool' },
>     'if': 'CONFIG_LINUX' }
>   
>   ##
> diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
> index ea93f034e4..1fa16c1e1d 100644
> --- a/backends/hostmem-memfd.c
> +++ b/backends/hostmem-memfd.c
> @@ -18,6 +18,8 @@
>   #include "qapi/error.h"
>   #include "qom/object.h"
>   #include "migration/cpr.h"
> +#include "system/kvm.h"
> +#include <linux/kvm.h>
>   
>   OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendMemfd, MEMORY_BACKEND_MEMFD)
>   
> @@ -28,6 +30,13 @@ struct HostMemoryBackendMemfd {
>       bool hugetlb;
>       uint64_t hugetlbsize;
>       bool seal;
> +    /*
> +     * NOTE: this differs from HostMemoryBackend's guest_memfd_private,
> +     * which represents a internally private guest-memfd that only backs
> +     * private pages.  Instead, this flag marks the memory backend will
> +     * 100% use the guest-memfd pages in-place.
> +     */
> +    bool guest_memfd;
>   };
>   
>   static bool
> @@ -47,10 +56,40 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
>           goto have_fd;
>       }
>   
> -    fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
> -                           m->hugetlb, m->hugetlbsize, m->seal ?
> -                           F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
> -                           errp);
> +    if (m->guest_memfd) {
> +        /* User choose to use in-place guest-memfd to back the VM.. */
> +        if (!backend->share) {
> +            error_setg(errp, "In-place guest-memfd must be used with share=on");
> +            return false;
> +        }
> +
> +        /*
> +         * This is the request to have a guest-memfd to back private pages.
> +         * In-place guest-memfd doesn't work like that.  Disable it for now
> +         * to make it simple, so that each memory backend can only have
> +         * guest-memfd either as private, or fully shared.
> +         */
> +        if (backend->guest_memfd_private) {
> +            error_setg(errp, "In-place guest-memfd cannot be used with another "
> +                       "private guest-memfd");
> +            return false;
> +        }

Add kvm_enabled() here, otherwise the following calling of 
kvm_create_guest_memfd() emits confusing information when accelerator is 
not configured as KVM, e.g., -machine q35,accel=tcg

qemu-system-x86: KVM does not support guest_memfd


> +        /* TODO: add huge page support */
> +        fd = kvm_create_guest_memfd(backend->size,
> +                                    GUEST_MEMFD_FLAG_MMAP |
> +                                    GUEST_MEMFD_FLAG_INIT_SHARED,
> +                                    errp);
> +        if (fd < 0) {
> +            return false;
> +        }
> +    } else {
> +        fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
> +                               m->hugetlb, m->hugetlbsize, m->seal ?
> +                               F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
> +                               errp);
> +    }
> +
>       if (fd == -1) {
>           return false;
>       }
> @@ -65,6 +104,18 @@ have_fd:
>                                             backend->size, ram_flags, fd, 0, errp);
>   }
>   
> +static bool
> +memfd_backend_get_guest_memfd(Object *o, Error **errp)
> +{
> +    return MEMORY_BACKEND_MEMFD(o)->guest_memfd;
> +}
> +
> +static void
> +memfd_backend_set_guest_memfd(Object *o, bool value, Error **errp)
> +{
> +    MEMORY_BACKEND_MEMFD(o)->guest_memfd = value;
> +}
> +
>   static bool
>   memfd_backend_get_hugetlb(Object *o, Error **errp)
>   {
> @@ -152,6 +203,13 @@ memfd_backend_class_init(ObjectClass *oc, const void *data)
>           object_class_property_set_description(oc, "hugetlbsize",
>                                                 "Huge pages size (ex: 2M, 1G)");
>       }
> +
> +    object_class_property_add_bool(oc, "guest-memfd",
> +                                   memfd_backend_get_guest_memfd,
> +                                   memfd_backend_set_guest_memfd);
> +    object_class_property_set_description(oc, "guest-memfd",
> +                                          "Use guest memfd");
> +
>       object_class_property_add_bool(oc, "seal",
>                                      memfd_backend_get_seal,
>                                      memfd_backend_set_seal);

Re: [PATCH 8/8] hostmem: Support in-place guest memfd to back a VM

Posted by Peter Xu 3 months, 2 weeks ago

On Fri, Oct 24, 2025 at 05:01:44PM +0800, Xiaoyao Li wrote:
> On 10/24/2025 2:59 AM, Peter Xu wrote:
> > Host backends supports guest-memfd now by detecting whether it's a
> > confidential VM.  There's no way to choose it yet from the memory level to
> > use it in-place.  If we use guest-memfd, it so far always implies we need
> > two layers of memory backends, while the guest-memfd only provides the
> > private set of pages.
> > 
> > This patch introduces a way so that QEMU can consume guest memfd as the
> > only source of memory to back the object (aka, in place), rather than
> > having another backend supporting the pages converted to shared.
> > 
> > To use the in-place guest-memfd, one can add a memfd object with:
> > 
> >    -object memory-backend-memfd,guest-memfd=on,share=on
> > 
> > Note that share=on is required with in-place guest_memfd.
> 
> First, I'm not sure "in-place" is the proper wording here. At first glance
> on the series, I thought it's something related to "in-place" page
> conversion. After reading a bit, I really that it is enabling guest memfd
> with mmap support to serve as normal memory backend.

It'll be only proper in current context of qemu, but yes I'm aware CoCo
also has such idea, so at least I should have come up with something
better. My bad.  When I wrote the patches a while ago it wasn't as clear,
and I didn't pay attention when I prepare them upstream.

> 
> Second, my POC implementation chose to implement a separate and specific
> memory-backend type "memory-backend-guest-memfd". Your approach to add an
> option of "guest-memfd" to memory-backend-memfd looks OK to me and it
> requires less code. But I think we need to explicitly error out to users
> when they set "guest_memfd" to on with unsupported properties configured,
> e.g., "hugetlb", "hugetlbsize", and "seal".

In my local tree I actually reused hugetlb* parameters, that needs
Ackerley's 1G kernel patches, and some mine on top.

Before I go and reply your other series..  I was definitely not aware that
anyone has been working on it!  Could you share a pointer?  Or is it still
in a private branch?

I'm more than happy to drop this series if you have an older / better
version.  Then I can rebase whatever I work on top.

> 
> Third, the intended usage of gmem with mmap from KVM/kernel's perspective is
> userspace configures the meomry slot by passing the gmem fd to @guest_memfd
> and @guest_memfd of struct kvm_userspace_memory_region2 instead of passing
> the user address returned by mmap of the fd to @userspace_addr return mmap()
> as this patch does. Surely the usage of this path works. But when QEMU is
> going to support in-place conversion of gmem, we has to pass the
> @guest_memfd.
> Well, this is no issue now and we can handle it in the future when needed.

Yes, that's something the private guest-memfd would need.  For completely
shared guest-memfd, IIUC we will use a lot of different code paths, the
goal is to make old APIs work not only for KVM_SET_USER_MEMORY_REGION, but
for all the rest modules like vhost-kernel, vhost-user, and so on.

> 
> > Signed-off-by: Peter Xu <peterx@redhat.com>
> > ---
> >   qapi/qom.json            |  6 +++-
> >   backends/hostmem-memfd.c | 66 +++++++++++++++++++++++++++++++++++++---
> >   2 files changed, 67 insertions(+), 5 deletions(-)
> > 
> > diff --git a/qapi/qom.json b/qapi/qom.json
> > index 830cb2ffe7..6b090fe9a0 100644
> > --- a/qapi/qom.json
> > +++ b/qapi/qom.json
> > @@ -764,13 +764,17 @@
> >   # @seal: if true, create a sealed-file, which will block further
> >   #     resizing of the memory (default: true)
> >   #
> > +# @guest-memfd: if true, use guest-memfd to back the memory region.
> > +#     (default: false, since: 10.2)
> > +#
> >   # Since: 2.12
> >   ##
> >   { 'struct': 'MemoryBackendMemfdProperties',
> >     'base': 'MemoryBackendProperties',
> >     'data': { '*hugetlb': 'bool',
> >               '*hugetlbsize': 'size',
> > -            '*seal': 'bool' },
> > +            '*seal': 'bool',
> > +            '*guest-memfd': 'bool' },
> >     'if': 'CONFIG_LINUX' }
> >   ##
> > diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
> > index ea93f034e4..1fa16c1e1d 100644
> > --- a/backends/hostmem-memfd.c
> > +++ b/backends/hostmem-memfd.c
> > @@ -18,6 +18,8 @@
> >   #include "qapi/error.h"
> >   #include "qom/object.h"
> >   #include "migration/cpr.h"
> > +#include "system/kvm.h"
> > +#include <linux/kvm.h>
> >   OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendMemfd, MEMORY_BACKEND_MEMFD)
> > @@ -28,6 +30,13 @@ struct HostMemoryBackendMemfd {
> >       bool hugetlb;
> >       uint64_t hugetlbsize;
> >       bool seal;
> > +    /*
> > +     * NOTE: this differs from HostMemoryBackend's guest_memfd_private,
> > +     * which represents a internally private guest-memfd that only backs
> > +     * private pages.  Instead, this flag marks the memory backend will
> > +     * 100% use the guest-memfd pages in-place.
> > +     */
> > +    bool guest_memfd;
> >   };
> >   static bool
> > @@ -47,10 +56,40 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
> >           goto have_fd;
> >       }
> > -    fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
> > -                           m->hugetlb, m->hugetlbsize, m->seal ?
> > -                           F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
> > -                           errp);
> > +    if (m->guest_memfd) {
> > +        /* User choose to use in-place guest-memfd to back the VM.. */
> > +        if (!backend->share) {
> > +            error_setg(errp, "In-place guest-memfd must be used with share=on");
> > +            return false;
> > +        }
> > +
> > +        /*
> > +         * This is the request to have a guest-memfd to back private pages.
> > +         * In-place guest-memfd doesn't work like that.  Disable it for now
> > +         * to make it simple, so that each memory backend can only have
> > +         * guest-memfd either as private, or fully shared.
> > +         */
> > +        if (backend->guest_memfd_private) {
> > +            error_setg(errp, "In-place guest-memfd cannot be used with another "
> > +                       "private guest-memfd");
> > +            return false;
> > +        }
> 
> Add kvm_enabled() here, otherwise the following calling of
> kvm_create_guest_memfd() emits confusing information when accelerator is not
> configured as KVM, e.g., -machine q35,accel=tcg
> 
> qemu-system-x86: KVM does not support guest_memfd
> 
> 
> > +        /* TODO: add huge page support */
> > +        fd = kvm_create_guest_memfd(backend->size,
> > +                                    GUEST_MEMFD_FLAG_MMAP |
> > +                                    GUEST_MEMFD_FLAG_INIT_SHARED,
> > +                                    errp);
> > +        if (fd < 0) {
> > +            return false;
> > +        }
> > +    } else {
> > +        fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
> > +                               m->hugetlb, m->hugetlbsize, m->seal ?
> > +                               F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
> > +                               errp);
> > +    }
> > +
> >       if (fd == -1) {
> >           return false;
> >       }
> > @@ -65,6 +104,18 @@ have_fd:
> >                                             backend->size, ram_flags, fd, 0, errp);
> >   }
> > +static bool
> > +memfd_backend_get_guest_memfd(Object *o, Error **errp)
> > +{
> > +    return MEMORY_BACKEND_MEMFD(o)->guest_memfd;
> > +}
> > +
> > +static void
> > +memfd_backend_set_guest_memfd(Object *o, bool value, Error **errp)
> > +{
> > +    MEMORY_BACKEND_MEMFD(o)->guest_memfd = value;
> > +}
> > +
> >   static bool
> >   memfd_backend_get_hugetlb(Object *o, Error **errp)
> >   {
> > @@ -152,6 +203,13 @@ memfd_backend_class_init(ObjectClass *oc, const void *data)
> >           object_class_property_set_description(oc, "hugetlbsize",
> >                                                 "Huge pages size (ex: 2M, 1G)");
> >       }
> > +
> > +    object_class_property_add_bool(oc, "guest-memfd",
> > +                                   memfd_backend_get_guest_memfd,
> > +                                   memfd_backend_set_guest_memfd);
> > +    object_class_property_set_description(oc, "guest-memfd",
> > +                                          "Use guest memfd");
> > +
> >       object_class_property_add_bool(oc, "seal",
> >                                      memfd_backend_get_seal,
> >                                      memfd_backend_set_seal);
> 

-- 
Peter Xu

Re: [PATCH 8/8] hostmem: Support in-place guest memfd to back a VM

Posted by Xiaoyao Li 3 months, 2 weeks ago

On 10/24/2025 11:22 PM, Peter Xu wrote:
> On Fri, Oct 24, 2025 at 05:01:44PM +0800, Xiaoyao Li wrote:
>> On 10/24/2025 2:59 AM, Peter Xu wrote:
>>> Host backends supports guest-memfd now by detecting whether it's a
>>> confidential VM.  There's no way to choose it yet from the memory level to
>>> use it in-place.  If we use guest-memfd, it so far always implies we need
>>> two layers of memory backends, while the guest-memfd only provides the
>>> private set of pages.
>>>
>>> This patch introduces a way so that QEMU can consume guest memfd as the
>>> only source of memory to back the object (aka, in place), rather than
>>> having another backend supporting the pages converted to shared.
>>>
>>> To use the in-place guest-memfd, one can add a memfd object with:
>>>
>>>     -object memory-backend-memfd,guest-memfd=on,share=on
>>>
>>> Note that share=on is required with in-place guest_memfd.
>>
>> First, I'm not sure "in-place" is the proper wording here. At first glance
>> on the series, I thought it's something related to "in-place" page
>> conversion. After reading a bit, I really that it is enabling guest memfd
>> with mmap support to serve as normal memory backend.
> 
> It'll be only proper in current context of qemu, but yes I'm aware CoCo
> also has such idea, so at least I should have come up with something
> better. My bad.  When I wrote the patches a while ago it wasn't as clear,
> and I didn't pay attention when I prepare them upstream.
> 
>>
>> Second, my POC implementation chose to implement a separate and specific
>> memory-backend type "memory-backend-guest-memfd". Your approach to add an
>> option of "guest-memfd" to memory-backend-memfd looks OK to me and it
>> requires less code. But I think we need to explicitly error out to users
>> when they set "guest_memfd" to on with unsupported properties configured,
>> e.g., "hugetlb", "hugetlbsize", and "seal".
> 
> In my local tree I actually reused hugetlb* parameters, that needs
> Ackerley's 1G kernel patches, and some mine on top.
> 
> Before I go and reply your other series..  I was definitely not aware that
> anyone has been working on it!  Could you share a pointer?  Or is it still
> in a private branch?

I shared it publicly when reviwed and tested KVM series: 
https://lore.kernel.org/all/13654746-3edc-4e4a-ac4f-fa281b83b2ae@intel.com/

The poc branch:

   https://github.com/intel-staging/qemu-tdx.git lxy/gmem-mmap-poc

It was based on the old QEMU and based on old kernel API of v6.18-rc1 
(the API changes on -rc2).

> I'm more than happy to drop this series if you have an older / better
> version.  Then I can rebase whatever I work on top.

I was not authorized to do the QEMU upstream of gmem mmap support inside 
the company. So please keep your series and I'm happy to help review it 
and make it upstreamed.

>>
>> Third, the intended usage of gmem with mmap from KVM/kernel's perspective is
>> userspace configures the meomry slot by passing the gmem fd to @guest_memfd
>> and @guest_memfd of struct kvm_userspace_memory_region2 instead of passing
>> the user address returned by mmap of the fd to @userspace_addr return mmap()
>> as this patch does. Surely the usage of this path works. But when QEMU is
>> going to support in-place conversion of gmem, we has to pass the
>> @guest_memfd.
>> Well, this is no issue now and we can handle it in the future when needed.
> 
> Yes, that's something the private guest-memfd would need.  For completely
> shared guest-memfd, IIUC we will use a lot of different code paths, the
> goal is to make old APIs work not only for KVM_SET_USER_MEMORY_REGION, but
> for all the rest modules like vhost-kernel, vhost-user, and so on.

And if pass the @guest_memfd, we will need to handle the issue of 
aliased: https://lore.kernel.org/all/aH-0MdNJbH19Mhm3@google.com/

>>
>>> Signed-off-by: Peter Xu <peterx@redhat.com>
>>> ---
>>>    qapi/qom.json            |  6 +++-
>>>    backends/hostmem-memfd.c | 66 +++++++++++++++++++++++++++++++++++++---
>>>    2 files changed, 67 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/qapi/qom.json b/qapi/qom.json
>>> index 830cb2ffe7..6b090fe9a0 100644
>>> --- a/qapi/qom.json
>>> +++ b/qapi/qom.json
>>> @@ -764,13 +764,17 @@
>>>    # @seal: if true, create a sealed-file, which will block further
>>>    #     resizing of the memory (default: true)
>>>    #
>>> +# @guest-memfd: if true, use guest-memfd to back the memory region.
>>> +#     (default: false, since: 10.2)
>>> +#
>>>    # Since: 2.12
>>>    ##
>>>    { 'struct': 'MemoryBackendMemfdProperties',
>>>      'base': 'MemoryBackendProperties',
>>>      'data': { '*hugetlb': 'bool',
>>>                '*hugetlbsize': 'size',
>>> -            '*seal': 'bool' },
>>> +            '*seal': 'bool',
>>> +            '*guest-memfd': 'bool' },
>>>      'if': 'CONFIG_LINUX' }
>>>    ##
>>> diff --git a/backends/hostmem-memfd.c b/backends/hostmem-memfd.c
>>> index ea93f034e4..1fa16c1e1d 100644
>>> --- a/backends/hostmem-memfd.c
>>> +++ b/backends/hostmem-memfd.c
>>> @@ -18,6 +18,8 @@
>>>    #include "qapi/error.h"
>>>    #include "qom/object.h"
>>>    #include "migration/cpr.h"
>>> +#include "system/kvm.h"
>>> +#include <linux/kvm.h>
>>>    OBJECT_DECLARE_SIMPLE_TYPE(HostMemoryBackendMemfd, MEMORY_BACKEND_MEMFD)
>>> @@ -28,6 +30,13 @@ struct HostMemoryBackendMemfd {
>>>        bool hugetlb;
>>>        uint64_t hugetlbsize;
>>>        bool seal;
>>> +    /*
>>> +     * NOTE: this differs from HostMemoryBackend's guest_memfd_private,
>>> +     * which represents a internally private guest-memfd that only backs
>>> +     * private pages.  Instead, this flag marks the memory backend will
>>> +     * 100% use the guest-memfd pages in-place.
>>> +     */
>>> +    bool guest_memfd;
>>>    };
>>>    static bool
>>> @@ -47,10 +56,40 @@ memfd_backend_memory_alloc(HostMemoryBackend *backend, Error **errp)
>>>            goto have_fd;
>>>        }
>>> -    fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
>>> -                           m->hugetlb, m->hugetlbsize, m->seal ?
>>> -                           F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
>>> -                           errp);
>>> +    if (m->guest_memfd) {
>>> +        /* User choose to use in-place guest-memfd to back the VM.. */
>>> +        if (!backend->share) {
>>> +            error_setg(errp, "In-place guest-memfd must be used with share=on");
>>> +            return false;
>>> +        }
>>> +
>>> +        /*
>>> +         * This is the request to have a guest-memfd to back private pages.
>>> +         * In-place guest-memfd doesn't work like that.  Disable it for now
>>> +         * to make it simple, so that each memory backend can only have
>>> +         * guest-memfd either as private, or fully shared.
>>> +         */
>>> +        if (backend->guest_memfd_private) {
>>> +            error_setg(errp, "In-place guest-memfd cannot be used with another "
>>> +                       "private guest-memfd");
>>> +            return false;
>>> +        }
>>
>> Add kvm_enabled() here, otherwise the following calling of
>> kvm_create_guest_memfd() emits confusing information when accelerator is not
>> configured as KVM, e.g., -machine q35,accel=tcg
>>
>> qemu-system-x86: KVM does not support guest_memfd
>>
>>
>>> +        /* TODO: add huge page support */
>>> +        fd = kvm_create_guest_memfd(backend->size,
>>> +                                    GUEST_MEMFD_FLAG_MMAP |
>>> +                                    GUEST_MEMFD_FLAG_INIT_SHARED,
>>> +                                    errp);
>>> +        if (fd < 0) {
>>> +            return false;
>>> +        }
>>> +    } else {
>>> +        fd = qemu_memfd_create(TYPE_MEMORY_BACKEND_MEMFD, backend->size,
>>> +                               m->hugetlb, m->hugetlbsize, m->seal ?
>>> +                               F_SEAL_GROW | F_SEAL_SHRINK | F_SEAL_SEAL : 0,
>>> +                               errp);
>>> +    }
>>> +
>>>        if (fd == -1) {
>>>            return false;
>>>        }
>>> @@ -65,6 +104,18 @@ have_fd:
>>>                                              backend->size, ram_flags, fd, 0, errp);
>>>    }
>>> +static bool
>>> +memfd_backend_get_guest_memfd(Object *o, Error **errp)
>>> +{
>>> +    return MEMORY_BACKEND_MEMFD(o)->guest_memfd;
>>> +}
>>> +
>>> +static void
>>> +memfd_backend_set_guest_memfd(Object *o, bool value, Error **errp)
>>> +{
>>> +    MEMORY_BACKEND_MEMFD(o)->guest_memfd = value;
>>> +}
>>> +
>>>    static bool
>>>    memfd_backend_get_hugetlb(Object *o, Error **errp)
>>>    {
>>> @@ -152,6 +203,13 @@ memfd_backend_class_init(ObjectClass *oc, const void *data)
>>>            object_class_property_set_description(oc, "hugetlbsize",
>>>                                                  "Huge pages size (ex: 2M, 1G)");
>>>        }
>>> +
>>> +    object_class_property_add_bool(oc, "guest-memfd",
>>> +                                   memfd_backend_get_guest_memfd,
>>> +                                   memfd_backend_set_guest_memfd);
>>> +    object_class_property_set_description(oc, "guest-memfd",
>>> +                                          "Use guest memfd");
>>> +
>>>        object_class_property_add_bool(oc, "seal",
>>>                                       memfd_backend_get_seal,
>>>                                       memfd_backend_set_seal);
>>
>