[PATCH 5/7] memory: Register the RamDiscardManager instance upon guest_memfd creation

Chenyi Qiang posted 7 patches 1 year, 1 month ago
There is a newer version of this series
[PATCH 5/7] memory: Register the RamDiscardManager instance upon guest_memfd creation
Posted by Chenyi Qiang 1 year, 1 month ago
Introduce the realize()/unrealize() callbacks to initialize/uninitialize
the new guest_memfd_manager object and register/unregister it in the
target MemoryRegion.

Guest_memfd was initially set to shared until the commit bd3bcf6962
("kvm/memory: Make memory type private by default if it has guest memfd
backend"). To align with this change, the default state in
guest_memfd_manager is set to private. (The bitmap is cleared to 0).
Additionally, setting the default to private can also reduce the
overhead of mapping shared pages into IOMMU by VFIO during the bootup stage.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
---
 include/sysemu/guest-memfd-manager.h | 27 +++++++++++++++++++++++++++
 system/guest-memfd-manager.c         | 28 +++++++++++++++++++++++++++-
 system/physmem.c                     |  7 +++++++
 3 files changed, 61 insertions(+), 1 deletion(-)

diff --git a/include/sysemu/guest-memfd-manager.h b/include/sysemu/guest-memfd-manager.h
index 9dc4e0346d..d1e7f698e8 100644
--- a/include/sysemu/guest-memfd-manager.h
+++ b/include/sysemu/guest-memfd-manager.h
@@ -42,6 +42,8 @@ struct GuestMemfdManager {
 struct GuestMemfdManagerClass {
     ObjectClass parent_class;
 
+    void (*realize)(GuestMemfdManager *gmm, MemoryRegion *mr, uint64_t region_size);
+    void (*unrealize)(GuestMemfdManager *gmm);
     int (*state_change)(GuestMemfdManager *gmm, uint64_t offset, uint64_t size,
                         bool shared_to_private);
 };
@@ -61,4 +63,29 @@ static inline int guest_memfd_manager_state_change(GuestMemfdManager *gmm, uint6
     return 0;
 }
 
+static inline void guest_memfd_manager_realize(GuestMemfdManager *gmm,
+                                              MemoryRegion *mr, uint64_t region_size)
+{
+    GuestMemfdManagerClass *klass;
+
+    g_assert(gmm);
+    klass = GUEST_MEMFD_MANAGER_GET_CLASS(gmm);
+
+    if (klass->realize) {
+        klass->realize(gmm, mr, region_size);
+    }
+}
+
+static inline void guest_memfd_manager_unrealize(GuestMemfdManager *gmm)
+{
+    GuestMemfdManagerClass *klass;
+
+    g_assert(gmm);
+    klass = GUEST_MEMFD_MANAGER_GET_CLASS(gmm);
+
+    if (klass->unrealize) {
+        klass->unrealize(gmm);
+    }
+}
+
 #endif
diff --git a/system/guest-memfd-manager.c b/system/guest-memfd-manager.c
index 6601df5f3f..b6a32f0bfb 100644
--- a/system/guest-memfd-manager.c
+++ b/system/guest-memfd-manager.c
@@ -366,6 +366,31 @@ static int guest_memfd_state_change(GuestMemfdManager *gmm, uint64_t offset,
     return ret;
 }
 
+static void guest_memfd_manager_realizefn(GuestMemfdManager *gmm, MemoryRegion *mr,
+                                          uint64_t region_size)
+{
+    uint64_t bitmap_size;
+
+    gmm->block_size = qemu_real_host_page_size();
+    bitmap_size = ROUND_UP(region_size, gmm->block_size) / gmm->block_size;
+
+    gmm->mr = mr;
+    gmm->bitmap_size = bitmap_size;
+    gmm->bitmap = bitmap_new(bitmap_size);
+
+    memory_region_set_ram_discard_manager(gmm->mr, RAM_DISCARD_MANAGER(gmm));
+}
+
+static void guest_memfd_manager_unrealizefn(GuestMemfdManager *gmm)
+{
+    memory_region_set_ram_discard_manager(gmm->mr, NULL);
+
+    g_free(gmm->bitmap);
+    gmm->bitmap = NULL;
+    gmm->bitmap_size = 0;
+    gmm->mr = NULL;
+}
+
 static void guest_memfd_manager_init(Object *obj)
 {
     GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(obj);
@@ -375,7 +400,6 @@ static void guest_memfd_manager_init(Object *obj)
 
 static void guest_memfd_manager_finalize(Object *obj)
 {
-    g_free(GUEST_MEMFD_MANAGER(obj)->bitmap);
 }
 
 static void guest_memfd_manager_class_init(ObjectClass *oc, void *data)
@@ -384,6 +408,8 @@ static void guest_memfd_manager_class_init(ObjectClass *oc, void *data)
     RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(oc);
 
     gmmc->state_change = guest_memfd_state_change;
+    gmmc->realize = guest_memfd_manager_realizefn;
+    gmmc->unrealize = guest_memfd_manager_unrealizefn;
 
     rdmc->get_min_granularity = guest_memfd_rdm_get_min_granularity;
     rdmc->register_listener = guest_memfd_rdm_register_listener;
diff --git a/system/physmem.c b/system/physmem.c
index dc1db3a384..532182a6dd 100644
--- a/system/physmem.c
+++ b/system/physmem.c
@@ -53,6 +53,7 @@
 #include "sysemu/hostmem.h"
 #include "sysemu/hw_accel.h"
 #include "sysemu/xen-mapcache.h"
+#include "sysemu/guest-memfd-manager.h"
 #include "trace.h"
 
 #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
@@ -1885,6 +1886,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
             qemu_mutex_unlock_ramlist();
             goto out_free;
         }
+
+        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(object_new(TYPE_GUEST_MEMFD_MANAGER));
+        guest_memfd_manager_realize(gmm, new_block->mr, new_block->mr->size);
     }
 
     ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
@@ -2139,6 +2143,9 @@ static void reclaim_ramblock(RAMBlock *block)
 
     if (block->guest_memfd >= 0) {
         close(block->guest_memfd);
+        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(block->mr->rdm);
+        guest_memfd_manager_unrealize(gmm);
+        object_unref(OBJECT(gmm));
         ram_block_discard_require(false);
     }
 
-- 
2.43.5
Re: [PATCH 5/7] memory: Register the RamDiscardManager instance upon guest_memfd creation
Posted by Zhao Liu 1 year ago
>  #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> @@ -1885,6 +1886,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
>              qemu_mutex_unlock_ramlist();
>              goto out_free;
>          }
> +
> +        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(object_new(TYPE_GUEST_MEMFD_MANAGER));
> +        guest_memfd_manager_realize(gmm, new_block->mr, new_block->mr->size);

realize & unrealize are usually used for QDev. I think it's not good to use
*realize and *unrealize here.

Why about "guest_memfd_manager_attach_ram"?

In addition, it seems the third parameter is unnecessary and we can access
MemoryRegion.size directly in guest_memfd_manager_realize().

>      }
>  
>      ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
> @@ -2139,6 +2143,9 @@ static void reclaim_ramblock(RAMBlock *block)
>  
>      if (block->guest_memfd >= 0) {
>          close(block->guest_memfd);
> +        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(block->mr->rdm);
> +        guest_memfd_manager_unrealize(gmm);

Similiarly, what about "guest_memfd_manager_unattach_ram"?

> +        object_unref(OBJECT(gmm));
>          ram_block_discard_require(false);
>      }
>  

Regards,
Zhao
Re: [PATCH 5/7] memory: Register the RamDiscardManager instance upon guest_memfd creation
Posted by Chenyi Qiang 1 year ago
Thanks Zhao for your review!

On 1/9/2025 4:14 PM, Zhao Liu wrote:
>>  #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>> @@ -1885,6 +1886,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
>>              qemu_mutex_unlock_ramlist();
>>              goto out_free;
>>          }
>> +
>> +        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(object_new(TYPE_GUEST_MEMFD_MANAGER));
>> +        guest_memfd_manager_realize(gmm, new_block->mr, new_block->mr->size);
> 
> realize & unrealize are usually used for QDev. I think it's not good to use
> *realize and *unrealize here.
> 
> Why about "guest_memfd_manager_attach_ram"?
> 
> In addition, it seems the third parameter is unnecessary and we can access
> MemoryRegion.size directly in guest_memfd_manager_realize().

LGTM. Will follow your suggestion if we still wrap the operations in one
function. (We may change to the HostMemoryBackend RDM then unpack the
operations).

> 
>>      }
>>  
>>      ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
>> @@ -2139,6 +2143,9 @@ static void reclaim_ramblock(RAMBlock *block)
>>  
>>      if (block->guest_memfd >= 0) {
>>          close(block->guest_memfd);
>> +        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(block->mr->rdm);
>> +        guest_memfd_manager_unrealize(gmm);
> 
> Similiarly, what about "guest_memfd_manager_unattach_ram"?

Ditto. thanks.

> 
>> +        object_unref(OBJECT(gmm));
>>          ram_block_discard_require(false);
>>      }
>>  
> 
> Regards,
> Zhao
>
Re: [PATCH 5/7] memory: Register the RamDiscardManager instance upon guest_memfd creation
Posted by Alexey Kardashevskiy 1 year, 1 month ago
On 13/12/24 18:08, Chenyi Qiang wrote:
> Introduce the realize()/unrealize() callbacks to initialize/uninitialize
> the new guest_memfd_manager object and register/unregister it in the
> target MemoryRegion.
> 
> Guest_memfd was initially set to shared until the commit bd3bcf6962
> ("kvm/memory: Make memory type private by default if it has guest memfd
> backend"). To align with this change, the default state in
> guest_memfd_manager is set to private. (The bitmap is cleared to 0).
> Additionally, setting the default to private can also reduce the
> overhead of mapping shared pages into IOMMU by VFIO during the bootup stage.
> 
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> ---
>   include/sysemu/guest-memfd-manager.h | 27 +++++++++++++++++++++++++++
>   system/guest-memfd-manager.c         | 28 +++++++++++++++++++++++++++-
>   system/physmem.c                     |  7 +++++++
>   3 files changed, 61 insertions(+), 1 deletion(-)
> 
> diff --git a/include/sysemu/guest-memfd-manager.h b/include/sysemu/guest-memfd-manager.h
> index 9dc4e0346d..d1e7f698e8 100644
> --- a/include/sysemu/guest-memfd-manager.h
> +++ b/include/sysemu/guest-memfd-manager.h
> @@ -42,6 +42,8 @@ struct GuestMemfdManager {
>   struct GuestMemfdManagerClass {
>       ObjectClass parent_class;
>   
> +    void (*realize)(GuestMemfdManager *gmm, MemoryRegion *mr, uint64_t region_size);
> +    void (*unrealize)(GuestMemfdManager *gmm);
>       int (*state_change)(GuestMemfdManager *gmm, uint64_t offset, uint64_t size,
>                           bool shared_to_private);
>   };
> @@ -61,4 +63,29 @@ static inline int guest_memfd_manager_state_change(GuestMemfdManager *gmm, uint6
>       return 0;
>   }
>   
> +static inline void guest_memfd_manager_realize(GuestMemfdManager *gmm,
> +                                              MemoryRegion *mr, uint64_t region_size)
> +{
> +    GuestMemfdManagerClass *klass;
> +
> +    g_assert(gmm);
> +    klass = GUEST_MEMFD_MANAGER_GET_CLASS(gmm);
> +
> +    if (klass->realize) {
> +        klass->realize(gmm, mr, region_size);

Ditch realize() hook and call guest_memfd_manager_realizefn() directly?
Not clear why these new hooks are needed.

> +    }
> +}
> +
> +static inline void guest_memfd_manager_unrealize(GuestMemfdManager *gmm)
> +{
> +    GuestMemfdManagerClass *klass;
> +
> +    g_assert(gmm);
> +    klass = GUEST_MEMFD_MANAGER_GET_CLASS(gmm);
> +
> +    if (klass->unrealize) {
> +        klass->unrealize(gmm);
> +    }
> +}

guest_memfd_manager_unrealizefn()?


> +
>   #endif
> diff --git a/system/guest-memfd-manager.c b/system/guest-memfd-manager.c
> index 6601df5f3f..b6a32f0bfb 100644
> --- a/system/guest-memfd-manager.c
> +++ b/system/guest-memfd-manager.c
> @@ -366,6 +366,31 @@ static int guest_memfd_state_change(GuestMemfdManager *gmm, uint64_t offset,
>       return ret;
>   }
>   
> +static void guest_memfd_manager_realizefn(GuestMemfdManager *gmm, MemoryRegion *mr,
> +                                          uint64_t region_size)
> +{
> +    uint64_t bitmap_size;
> +
> +    gmm->block_size = qemu_real_host_page_size();
> +    bitmap_size = ROUND_UP(region_size, gmm->block_size) / gmm->block_size;

imho unaligned region_size should be an assert.

> +
> +    gmm->mr = mr;
> +    gmm->bitmap_size = bitmap_size;
> +    gmm->bitmap = bitmap_new(bitmap_size);
> +
> +    memory_region_set_ram_discard_manager(gmm->mr, RAM_DISCARD_MANAGER(gmm));
> +}

This belongs to 2/7.

> +
> +static void guest_memfd_manager_unrealizefn(GuestMemfdManager *gmm)
> +{
> +    memory_region_set_ram_discard_manager(gmm->mr, NULL);
> +
> +    g_free(gmm->bitmap);
> +    gmm->bitmap = NULL;
> +    gmm->bitmap_size = 0;
> +    gmm->mr = NULL;

@gmm is being destroyed here, why bother zeroing?

> +}
> +

This function belongs to 2/7.

>   static void guest_memfd_manager_init(Object *obj)
>   {
>       GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(obj);
> @@ -375,7 +400,6 @@ static void guest_memfd_manager_init(Object *obj)
>   
>   static void guest_memfd_manager_finalize(Object *obj)
>   {
> -    g_free(GUEST_MEMFD_MANAGER(obj)->bitmap);
>   }
>   
>   static void guest_memfd_manager_class_init(ObjectClass *oc, void *data)
> @@ -384,6 +408,8 @@ static void guest_memfd_manager_class_init(ObjectClass *oc, void *data)
>       RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(oc);
>   
>       gmmc->state_change = guest_memfd_state_change;
> +    gmmc->realize = guest_memfd_manager_realizefn;
> +    gmmc->unrealize = guest_memfd_manager_unrealizefn;
>   
>       rdmc->get_min_granularity = guest_memfd_rdm_get_min_granularity;
>       rdmc->register_listener = guest_memfd_rdm_register_listener;
> diff --git a/system/physmem.c b/system/physmem.c
> index dc1db3a384..532182a6dd 100644
> --- a/system/physmem.c
> +++ b/system/physmem.c
> @@ -53,6 +53,7 @@
>   #include "sysemu/hostmem.h"
>   #include "sysemu/hw_accel.h"
>   #include "sysemu/xen-mapcache.h"
> +#include "sysemu/guest-memfd-manager.h"
>   #include "trace.h"
>   
>   #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
> @@ -1885,6 +1886,9 @@ static void ram_block_add(RAMBlock *new_block, Error **errp)
>               qemu_mutex_unlock_ramlist();
>               goto out_free;
>           }
> +
> +        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(object_new(TYPE_GUEST_MEMFD_MANAGER));
> +        guest_memfd_manager_realize(gmm, new_block->mr, new_block->mr->size);

Wow. Quite invasive.

>       }
>   
>       ram_size = (new_block->offset + new_block->max_length) >> TARGET_PAGE_BITS;
> @@ -2139,6 +2143,9 @@ static void reclaim_ramblock(RAMBlock *block)
>   
>       if (block->guest_memfd >= 0) {
>           close(block->guest_memfd);
> +        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(block->mr->rdm);
> +        guest_memfd_manager_unrealize(gmm);
> +        object_unref(OBJECT(gmm));

Likely don't matter but I'd do the cleanup before close() or do 
block->guest_memfd=-1 before the cleanup. Thanks,


>           ram_block_discard_require(false);
>       }
>   

-- 
Alexey
Re: [PATCH 5/7] memory: Register the RamDiscardManager instance upon guest_memfd creation
Posted by Chenyi Qiang 1 year ago

On 1/8/2025 12:47 PM, Alexey Kardashevskiy wrote:
> On 13/12/24 18:08, Chenyi Qiang wrote:
>> Introduce the realize()/unrealize() callbacks to initialize/uninitialize
>> the new guest_memfd_manager object and register/unregister it in the
>> target MemoryRegion.
>>
>> Guest_memfd was initially set to shared until the commit bd3bcf6962
>> ("kvm/memory: Make memory type private by default if it has guest memfd
>> backend"). To align with this change, the default state in
>> guest_memfd_manager is set to private. (The bitmap is cleared to 0).
>> Additionally, setting the default to private can also reduce the
>> overhead of mapping shared pages into IOMMU by VFIO during the bootup
>> stage.
>>
>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>> ---
>>   include/sysemu/guest-memfd-manager.h | 27 +++++++++++++++++++++++++++
>>   system/guest-memfd-manager.c         | 28 +++++++++++++++++++++++++++-
>>   system/physmem.c                     |  7 +++++++
>>   3 files changed, 61 insertions(+), 1 deletion(-)
>>
>> diff --git a/include/sysemu/guest-memfd-manager.h b/include/sysemu/
>> guest-memfd-manager.h
>> index 9dc4e0346d..d1e7f698e8 100644
>> --- a/include/sysemu/guest-memfd-manager.h
>> +++ b/include/sysemu/guest-memfd-manager.h
>> @@ -42,6 +42,8 @@ struct GuestMemfdManager {
>>   struct GuestMemfdManagerClass {
>>       ObjectClass parent_class;
>>   +    void (*realize)(GuestMemfdManager *gmm, MemoryRegion *mr,
>> uint64_t region_size);
>> +    void (*unrealize)(GuestMemfdManager *gmm);
>>       int (*state_change)(GuestMemfdManager *gmm, uint64_t offset,
>> uint64_t size,
>>                           bool shared_to_private);
>>   };
>> @@ -61,4 +63,29 @@ static inline int
>> guest_memfd_manager_state_change(GuestMemfdManager *gmm, uint6
>>       return 0;
>>   }
>>   +static inline void guest_memfd_manager_realize(GuestMemfdManager *gmm,
>> +                                              MemoryRegion *mr,
>> uint64_t region_size)
>> +{
>> +    GuestMemfdManagerClass *klass;
>> +
>> +    g_assert(gmm);
>> +    klass = GUEST_MEMFD_MANAGER_GET_CLASS(gmm);
>> +
>> +    if (klass->realize) {
>> +        klass->realize(gmm, mr, region_size);
> 
> Ditch realize() hook and call guest_memfd_manager_realizefn() directly?
> Not clear why these new hooks are needed.

> 
>> +    }
>> +}
>> +
>> +static inline void guest_memfd_manager_unrealize(GuestMemfdManager *gmm)
>> +{
>> +    GuestMemfdManagerClass *klass;
>> +
>> +    g_assert(gmm);
>> +    klass = GUEST_MEMFD_MANAGER_GET_CLASS(gmm);
>> +
>> +    if (klass->unrealize) {
>> +        klass->unrealize(gmm);
>> +    }
>> +}
> 
> guest_memfd_manager_unrealizefn()?

Agree. Adding these wrappers seem unnecessary.

> 
> 
>> +
>>   #endif
>> diff --git a/system/guest-memfd-manager.c b/system/guest-memfd-manager.c
>> index 6601df5f3f..b6a32f0bfb 100644
>> --- a/system/guest-memfd-manager.c
>> +++ b/system/guest-memfd-manager.c
>> @@ -366,6 +366,31 @@ static int
>> guest_memfd_state_change(GuestMemfdManager *gmm, uint64_t offset,
>>       return ret;
>>   }
>>   +static void guest_memfd_manager_realizefn(GuestMemfdManager *gmm,
>> MemoryRegion *mr,
>> +                                          uint64_t region_size)
>> +{
>> +    uint64_t bitmap_size;
>> +
>> +    gmm->block_size = qemu_real_host_page_size();
>> +    bitmap_size = ROUND_UP(region_size, gmm->block_size) / gmm-
>> >block_size;
> 
> imho unaligned region_size should be an assert.

There's no guarantee the region_size of the MemoryRegion is PAGE_SIZE
aligned. So the ROUND_UP() is more appropriate.

> 
>> +
>> +    gmm->mr = mr;
>> +    gmm->bitmap_size = bitmap_size;
>> +    gmm->bitmap = bitmap_new(bitmap_size);
>> +
>> +    memory_region_set_ram_discard_manager(gmm->mr,
>> RAM_DISCARD_MANAGER(gmm));
>> +}
> 
> This belongs to 2/7.
> 
>> +
>> +static void guest_memfd_manager_unrealizefn(GuestMemfdManager *gmm)
>> +{
>> +    memory_region_set_ram_discard_manager(gmm->mr, NULL);
>> +
>> +    g_free(gmm->bitmap);
>> +    gmm->bitmap = NULL;
>> +    gmm->bitmap_size = 0;
>> +    gmm->mr = NULL;
> 
> @gmm is being destroyed here, why bother zeroing?

OK, will remove it.

> 
>> +}
>> +
> 
> This function belongs to 2/7.

Will move both realizefn() and unrealizefn().

> 
>>   static void guest_memfd_manager_init(Object *obj)
>>   {
>>       GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(obj);
>> @@ -375,7 +400,6 @@ static void guest_memfd_manager_init(Object *obj)
>>     static void guest_memfd_manager_finalize(Object *obj)
>>   {
>> -    g_free(GUEST_MEMFD_MANAGER(obj)->bitmap);
>>   }
>>     static void guest_memfd_manager_class_init(ObjectClass *oc, void
>> *data)
>> @@ -384,6 +408,8 @@ static void
>> guest_memfd_manager_class_init(ObjectClass *oc, void *data)
>>       RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(oc);
>>         gmmc->state_change = guest_memfd_state_change;
>> +    gmmc->realize = guest_memfd_manager_realizefn;
>> +    gmmc->unrealize = guest_memfd_manager_unrealizefn;
>>         rdmc->get_min_granularity = guest_memfd_rdm_get_min_granularity;
>>       rdmc->register_listener = guest_memfd_rdm_register_listener;
>> diff --git a/system/physmem.c b/system/physmem.c
>> index dc1db3a384..532182a6dd 100644
>> --- a/system/physmem.c
>> +++ b/system/physmem.c
>> @@ -53,6 +53,7 @@
>>   #include "sysemu/hostmem.h"
>>   #include "sysemu/hw_accel.h"
>>   #include "sysemu/xen-mapcache.h"
>> +#include "sysemu/guest-memfd-manager.h"
>>   #include "trace.h"
>>     #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>> @@ -1885,6 +1886,9 @@ static void ram_block_add(RAMBlock *new_block,
>> Error **errp)
>>               qemu_mutex_unlock_ramlist();
>>               goto out_free;
>>           }
>> +
>> +        GuestMemfdManager *gmm =
>> GUEST_MEMFD_MANAGER(object_new(TYPE_GUEST_MEMFD_MANAGER));
>> +        guest_memfd_manager_realize(gmm, new_block->mr, new_block-
>> >mr->size);
> 
> Wow. Quite invasive.

Yeah... It creates a manager object no matter whether the user wants to
use shared passthru or not. We assume some fields like private/shared
bitmap may also be helpful in other scenario for future usage, and if no
passthru device, the listener would just return, so it is acceptable.

> 
>>       }
>>         ram_size = (new_block->offset + new_block->max_length) >>
>> TARGET_PAGE_BITS;
>> @@ -2139,6 +2143,9 @@ static void reclaim_ramblock(RAMBlock *block)
>>         if (block->guest_memfd >= 0) {
>>           close(block->guest_memfd);
>> +        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(block->mr->rdm);
>> +        guest_memfd_manager_unrealize(gmm);
>> +        object_unref(OBJECT(gmm));
> 
> Likely don't matter but I'd do the cleanup before close() or do block-
>>guest_memfd=-1 before the cleanup. Thanks,
> 
> 
>>           ram_block_discard_require(false);
>>       }
>>   
> 


Re: [PATCH 5/7] memory: Register the RamDiscardManager instance upon guest_memfd creation
Posted by Alexey Kardashevskiy 1 year ago

On 9/1/25 16:34, Chenyi Qiang wrote:
> 
> 
> On 1/8/2025 12:47 PM, Alexey Kardashevskiy wrote:
>> On 13/12/24 18:08, Chenyi Qiang wrote:
>>> Introduce the realize()/unrealize() callbacks to initialize/uninitialize
>>> the new guest_memfd_manager object and register/unregister it in the
>>> target MemoryRegion.
>>>
>>> Guest_memfd was initially set to shared until the commit bd3bcf6962
>>> ("kvm/memory: Make memory type private by default if it has guest memfd
>>> backend"). To align with this change, the default state in
>>> guest_memfd_manager is set to private. (The bitmap is cleared to 0).
>>> Additionally, setting the default to private can also reduce the
>>> overhead of mapping shared pages into IOMMU by VFIO during the bootup
>>> stage.
>>>
>>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>>> ---
>>>    include/sysemu/guest-memfd-manager.h | 27 +++++++++++++++++++++++++++
>>>    system/guest-memfd-manager.c         | 28 +++++++++++++++++++++++++++-
>>>    system/physmem.c                     |  7 +++++++
>>>    3 files changed, 61 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/include/sysemu/guest-memfd-manager.h b/include/sysemu/
>>> guest-memfd-manager.h
>>> index 9dc4e0346d..d1e7f698e8 100644
>>> --- a/include/sysemu/guest-memfd-manager.h
>>> +++ b/include/sysemu/guest-memfd-manager.h
>>> @@ -42,6 +42,8 @@ struct GuestMemfdManager {
>>>    struct GuestMemfdManagerClass {
>>>        ObjectClass parent_class;
>>>    +    void (*realize)(GuestMemfdManager *gmm, MemoryRegion *mr,
>>> uint64_t region_size);
>>> +    void (*unrealize)(GuestMemfdManager *gmm);
>>>        int (*state_change)(GuestMemfdManager *gmm, uint64_t offset,
>>> uint64_t size,
>>>                            bool shared_to_private);
>>>    };
>>> @@ -61,4 +63,29 @@ static inline int
>>> guest_memfd_manager_state_change(GuestMemfdManager *gmm, uint6
>>>        return 0;
>>>    }
>>>    +static inline void guest_memfd_manager_realize(GuestMemfdManager *gmm,
>>> +                                              MemoryRegion *mr,
>>> uint64_t region_size)
>>> +{
>>> +    GuestMemfdManagerClass *klass;
>>> +
>>> +    g_assert(gmm);
>>> +    klass = GUEST_MEMFD_MANAGER_GET_CLASS(gmm);
>>> +
>>> +    if (klass->realize) {
>>> +        klass->realize(gmm, mr, region_size);
>>
>> Ditch realize() hook and call guest_memfd_manager_realizefn() directly?
>> Not clear why these new hooks are needed.
> 
>>
>>> +    }
>>> +}
>>> +
>>> +static inline void guest_memfd_manager_unrealize(GuestMemfdManager *gmm)
>>> +{
>>> +    GuestMemfdManagerClass *klass;
>>> +
>>> +    g_assert(gmm);
>>> +    klass = GUEST_MEMFD_MANAGER_GET_CLASS(gmm);
>>> +
>>> +    if (klass->unrealize) {
>>> +        klass->unrealize(gmm);
>>> +    }
>>> +}
>>
>> guest_memfd_manager_unrealizefn()?
> 
> Agree. Adding these wrappers seem unnecessary.
> 
>>
>>
>>> +
>>>    #endif
>>> diff --git a/system/guest-memfd-manager.c b/system/guest-memfd-manager.c
>>> index 6601df5f3f..b6a32f0bfb 100644
>>> --- a/system/guest-memfd-manager.c
>>> +++ b/system/guest-memfd-manager.c
>>> @@ -366,6 +366,31 @@ static int
>>> guest_memfd_state_change(GuestMemfdManager *gmm, uint64_t offset,
>>>        return ret;
>>>    }
>>>    +static void guest_memfd_manager_realizefn(GuestMemfdManager *gmm,
>>> MemoryRegion *mr,
>>> +                                          uint64_t region_size)
>>> +{
>>> +    uint64_t bitmap_size;
>>> +
>>> +    gmm->block_size = qemu_real_host_page_size();
>>> +    bitmap_size = ROUND_UP(region_size, gmm->block_size) / gmm-
>>>> block_size;
>>
>> imho unaligned region_size should be an assert.
> 
> There's no guarantee the region_size of the MemoryRegion is PAGE_SIZE
> aligned. So the ROUND_UP() is more appropriate.

It is all about DMA so the smallest you can map is PAGE_SIZE so even if 
you round up here, it is likely going to fail to DMA-map later anyway 
(or not?).


>>> +
>>> +    gmm->mr = mr;
>>> +    gmm->bitmap_size = bitmap_size;
>>> +    gmm->bitmap = bitmap_new(bitmap_size);
>>> +
>>> +    memory_region_set_ram_discard_manager(gmm->mr,
>>> RAM_DISCARD_MANAGER(gmm));
>>> +}
>>
>> This belongs to 2/7.
>>
>>> +
>>> +static void guest_memfd_manager_unrealizefn(GuestMemfdManager *gmm)
>>> +{
>>> +    memory_region_set_ram_discard_manager(gmm->mr, NULL);
>>> +
>>> +    g_free(gmm->bitmap);
>>> +    gmm->bitmap = NULL;
>>> +    gmm->bitmap_size = 0;
>>> +    gmm->mr = NULL;
>>
>> @gmm is being destroyed here, why bother zeroing?
> 
> OK, will remove it.
> 
>>
>>> +}
>>> +
>>
>> This function belongs to 2/7.
> 
> Will move both realizefn() and unrealizefn().

Yes.


>>
>>>    static void guest_memfd_manager_init(Object *obj)
>>>    {
>>>        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(obj);
>>> @@ -375,7 +400,6 @@ static void guest_memfd_manager_init(Object *obj)
>>>      static void guest_memfd_manager_finalize(Object *obj)
>>>    {
>>> -    g_free(GUEST_MEMFD_MANAGER(obj)->bitmap);
>>>    }
>>>      static void guest_memfd_manager_class_init(ObjectClass *oc, void
>>> *data)
>>> @@ -384,6 +408,8 @@ static void
>>> guest_memfd_manager_class_init(ObjectClass *oc, void *data)
>>>        RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(oc);
>>>          gmmc->state_change = guest_memfd_state_change;
>>> +    gmmc->realize = guest_memfd_manager_realizefn;
>>> +    gmmc->unrealize = guest_memfd_manager_unrealizefn;
>>>          rdmc->get_min_granularity = guest_memfd_rdm_get_min_granularity;
>>>        rdmc->register_listener = guest_memfd_rdm_register_listener;
>>> diff --git a/system/physmem.c b/system/physmem.c
>>> index dc1db3a384..532182a6dd 100644
>>> --- a/system/physmem.c
>>> +++ b/system/physmem.c
>>> @@ -53,6 +53,7 @@
>>>    #include "sysemu/hostmem.h"
>>>    #include "sysemu/hw_accel.h"
>>>    #include "sysemu/xen-mapcache.h"
>>> +#include "sysemu/guest-memfd-manager.h"
>>>    #include "trace.h"
>>>      #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>>> @@ -1885,6 +1886,9 @@ static void ram_block_add(RAMBlock *new_block,
>>> Error **errp)
>>>                qemu_mutex_unlock_ramlist();
>>>                goto out_free;
>>>            }
>>> +
>>> +        GuestMemfdManager *gmm =
>>> GUEST_MEMFD_MANAGER(object_new(TYPE_GUEST_MEMFD_MANAGER));
>>> +        guest_memfd_manager_realize(gmm, new_block->mr, new_block-
>>>> mr->size);
>>
>> Wow. Quite invasive.
> 
> Yeah... It creates a manager object no matter whether the user wants to
> us	e shared passthru or not. We assume some fields like private/shared
> bitmap may also be helpful in other scenario for future usage, and if no
> passthru device, the listener would just return, so it is acceptable.

Explain these other scenarios in the commit log please as otherwise 
making this an interface of HostMemoryBackendMemfd looks way cleaner. 
Thanks,

>>
>>>        }
>>>          ram_size = (new_block->offset + new_block->max_length) >>
>>> TARGET_PAGE_BITS;
>>> @@ -2139,6 +2143,9 @@ static void reclaim_ramblock(RAMBlock *block)
>>>          if (block->guest_memfd >= 0) {
>>>            close(block->guest_memfd);
>>> +        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(block->mr->rdm);
>>> +        guest_memfd_manager_unrealize(gmm);
>>> +        object_unref(OBJECT(gmm));
>>
>> Likely don't matter but I'd do the cleanup before close() or do block-
>>> guest_memfd=-1 before the cleanup. Thanks,
>>
>>
>>>            ram_block_discard_require(false);
>>>        }
>>>    
>>
> 

-- 
Alexey


Re: [PATCH 5/7] memory: Register the RamDiscardManager instance upon guest_memfd creation
Posted by Chenyi Qiang 1 year ago

On 1/9/2025 5:32 PM, Alexey Kardashevskiy wrote:
> 
> 
> On 9/1/25 16:34, Chenyi Qiang wrote:
>>
>>
>> On 1/8/2025 12:47 PM, Alexey Kardashevskiy wrote:
>>> On 13/12/24 18:08, Chenyi Qiang wrote:
>>>> Introduce the realize()/unrealize() callbacks to initialize/
>>>> uninitialize
>>>> the new guest_memfd_manager object and register/unregister it in the
>>>> target MemoryRegion.
>>>>
>>>> Guest_memfd was initially set to shared until the commit bd3bcf6962
>>>> ("kvm/memory: Make memory type private by default if it has guest memfd
>>>> backend"). To align with this change, the default state in
>>>> guest_memfd_manager is set to private. (The bitmap is cleared to 0).
>>>> Additionally, setting the default to private can also reduce the
>>>> overhead of mapping shared pages into IOMMU by VFIO during the bootup
>>>> stage.
>>>>
>>>> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
>>>> ---
>>>>    include/sysemu/guest-memfd-manager.h | 27 +++++++++++++++++++++++
>>>> ++++
>>>>    system/guest-memfd-manager.c         | 28 +++++++++++++++++++++++
>>>> ++++-
>>>>    system/physmem.c                     |  7 +++++++
>>>>    3 files changed, 61 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/include/sysemu/guest-memfd-manager.h b/include/sysemu/
>>>> guest-memfd-manager.h
>>>> index 9dc4e0346d..d1e7f698e8 100644
>>>> --- a/include/sysemu/guest-memfd-manager.h
>>>> +++ b/include/sysemu/guest-memfd-manager.h
>>>> @@ -42,6 +42,8 @@ struct GuestMemfdManager {
>>>>    struct GuestMemfdManagerClass {
>>>>        ObjectClass parent_class;
>>>>    +    void (*realize)(GuestMemfdManager *gmm, MemoryRegion *mr,
>>>> uint64_t region_size);
>>>> +    void (*unrealize)(GuestMemfdManager *gmm);
>>>>        int (*state_change)(GuestMemfdManager *gmm, uint64_t offset,
>>>> uint64_t size,
>>>>                            bool shared_to_private);
>>>>    };
>>>> @@ -61,4 +63,29 @@ static inline int
>>>> guest_memfd_manager_state_change(GuestMemfdManager *gmm, uint6
>>>>        return 0;
>>>>    }
>>>>    +static inline void guest_memfd_manager_realize(GuestMemfdManager
>>>> *gmm,
>>>> +                                              MemoryRegion *mr,
>>>> uint64_t region_size)
>>>> +{
>>>> +    GuestMemfdManagerClass *klass;
>>>> +
>>>> +    g_assert(gmm);
>>>> +    klass = GUEST_MEMFD_MANAGER_GET_CLASS(gmm);
>>>> +
>>>> +    if (klass->realize) {
>>>> +        klass->realize(gmm, mr, region_size);
>>>
>>> Ditch realize() hook and call guest_memfd_manager_realizefn() directly?
>>> Not clear why these new hooks are needed.
>>
>>>
>>>> +    }
>>>> +}
>>>> +
>>>> +static inline void guest_memfd_manager_unrealize(GuestMemfdManager
>>>> *gmm)
>>>> +{
>>>> +    GuestMemfdManagerClass *klass;
>>>> +
>>>> +    g_assert(gmm);
>>>> +    klass = GUEST_MEMFD_MANAGER_GET_CLASS(gmm);
>>>> +
>>>> +    if (klass->unrealize) {
>>>> +        klass->unrealize(gmm);
>>>> +    }
>>>> +}
>>>
>>> guest_memfd_manager_unrealizefn()?
>>
>> Agree. Adding these wrappers seem unnecessary.
>>
>>>
>>>
>>>> +
>>>>    #endif
>>>> diff --git a/system/guest-memfd-manager.c b/system/guest-memfd-
>>>> manager.c
>>>> index 6601df5f3f..b6a32f0bfb 100644
>>>> --- a/system/guest-memfd-manager.c
>>>> +++ b/system/guest-memfd-manager.c
>>>> @@ -366,6 +366,31 @@ static int
>>>> guest_memfd_state_change(GuestMemfdManager *gmm, uint64_t offset,
>>>>        return ret;
>>>>    }
>>>>    +static void guest_memfd_manager_realizefn(GuestMemfdManager *gmm,
>>>> MemoryRegion *mr,
>>>> +                                          uint64_t region_size)
>>>> +{
>>>> +    uint64_t bitmap_size;
>>>> +
>>>> +    gmm->block_size = qemu_real_host_page_size();
>>>> +    bitmap_size = ROUND_UP(region_size, gmm->block_size) / gmm-
>>>>> block_size;
>>>
>>> imho unaligned region_size should be an assert.
>>
>> There's no guarantee the region_size of the MemoryRegion is PAGE_SIZE
>> aligned. So the ROUND_UP() is more appropriate.
> 
> It is all about DMA so the smallest you can map is PAGE_SIZE so even if
> you round up here, it is likely going to fail to DMA-map later anyway
> (or not?).

Checked the handling of VFIO, if the size is less than PAGE_SIZE, it
will just return and won't do DMA-map.

Here is a different thing. It tries to calculate the bitmap_size. The
bitmap is used to track the private/shared status of the page. So if the
size is less than PAGE_SIZE, we still use the one bit to track this
small-size range.

> 
> 
>>>> +
>>>> +    gmm->mr = mr;
>>>> +    gmm->bitmap_size = bitmap_size;
>>>> +    gmm->bitmap = bitmap_new(bitmap_size);
>>>> +
>>>> +    memory_region_set_ram_discard_manager(gmm->mr,
>>>> RAM_DISCARD_MANAGER(gmm));
>>>> +}
>>>
>>> This belongs to 2/7.
>>>
>>>> +
>>>> +static void guest_memfd_manager_unrealizefn(GuestMemfdManager *gmm)
>>>> +{
>>>> +    memory_region_set_ram_discard_manager(gmm->mr, NULL);
>>>> +
>>>> +    g_free(gmm->bitmap);
>>>> +    gmm->bitmap = NULL;
>>>> +    gmm->bitmap_size = 0;
>>>> +    gmm->mr = NULL;
>>>
>>> @gmm is being destroyed here, why bother zeroing?
>>
>> OK, will remove it.
>>
>>>
>>>> +}
>>>> +
>>>
>>> This function belongs to 2/7.
>>
>> Will move both realizefn() and unrealizefn().
> 
> Yes.
> 
> 
>>>
>>>>    static void guest_memfd_manager_init(Object *obj)
>>>>    {
>>>>        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(obj);
>>>> @@ -375,7 +400,6 @@ static void guest_memfd_manager_init(Object *obj)
>>>>      static void guest_memfd_manager_finalize(Object *obj)
>>>>    {
>>>> -    g_free(GUEST_MEMFD_MANAGER(obj)->bitmap);
>>>>    }
>>>>      static void guest_memfd_manager_class_init(ObjectClass *oc, void
>>>> *data)
>>>> @@ -384,6 +408,8 @@ static void
>>>> guest_memfd_manager_class_init(ObjectClass *oc, void *data)
>>>>        RamDiscardManagerClass *rdmc = RAM_DISCARD_MANAGER_CLASS(oc);
>>>>          gmmc->state_change = guest_memfd_state_change;
>>>> +    gmmc->realize = guest_memfd_manager_realizefn;
>>>> +    gmmc->unrealize = guest_memfd_manager_unrealizefn;
>>>>          rdmc->get_min_granularity =
>>>> guest_memfd_rdm_get_min_granularity;
>>>>        rdmc->register_listener = guest_memfd_rdm_register_listener;
>>>> diff --git a/system/physmem.c b/system/physmem.c
>>>> index dc1db3a384..532182a6dd 100644
>>>> --- a/system/physmem.c
>>>> +++ b/system/physmem.c
>>>> @@ -53,6 +53,7 @@
>>>>    #include "sysemu/hostmem.h"
>>>>    #include "sysemu/hw_accel.h"
>>>>    #include "sysemu/xen-mapcache.h"
>>>> +#include "sysemu/guest-memfd-manager.h"
>>>>    #include "trace.h"
>>>>      #ifdef CONFIG_FALLOCATE_PUNCH_HOLE
>>>> @@ -1885,6 +1886,9 @@ static void ram_block_add(RAMBlock *new_block,
>>>> Error **errp)
>>>>                qemu_mutex_unlock_ramlist();
>>>>                goto out_free;
>>>>            }
>>>> +
>>>> +        GuestMemfdManager *gmm =
>>>> GUEST_MEMFD_MANAGER(object_new(TYPE_GUEST_MEMFD_MANAGER));
>>>> +        guest_memfd_manager_realize(gmm, new_block->mr, new_block-
>>>>> mr->size);
>>>
>>> Wow. Quite invasive.
>>
>> Yeah... It creates a manager object no matter whether the user wants to
>> us    e shared passthru or not. We assume some fields like private/shared
>> bitmap may also be helpful in other scenario for future usage, and if no
>> passthru device, the listener would just return, so it is acceptable.
> 
> Explain these other scenarios in the commit log please as otherwise
> making this an interface of HostMemoryBackendMemfd looks way cleaner.
> Thanks,

Thanks for the suggestion. Until now, I think making this an interface
of HostMemoryBackend is cleaner. The potential future usage for
non-HostMemoryBackend guest_memfd-backed memory region I can think of is
the the TEE I/O for iommufd P2P support? when it tries to initialize RAM
device memory region with the attribute of shared/private. But I think
it would be a long term story and we are not sure what it will be like
in future.

> 
>>>
>>>>        }
>>>>          ram_size = (new_block->offset + new_block->max_length) >>
>>>> TARGET_PAGE_BITS;
>>>> @@ -2139,6 +2143,9 @@ static void reclaim_ramblock(RAMBlock *block)
>>>>          if (block->guest_memfd >= 0) {
>>>>            close(block->guest_memfd);
>>>> +        GuestMemfdManager *gmm = GUEST_MEMFD_MANAGER(block->mr->rdm);
>>>> +        guest_memfd_manager_unrealize(gmm);
>>>> +        object_unref(OBJECT(gmm));
>>>
>>> Likely don't matter but I'd do the cleanup before close() or do block-
>>>> guest_memfd=-1 before the cleanup. Thanks,
>>>
>>>
>>>>            ram_block_discard_require(false);
>>>>        }
>>>>    
>>>
>>
>