[PATCH v5 09/10] KVM: Introduce RamDiscardListener for attribute changes during memory conversions

Chenyi Qiang posted 10 patches 5 months, 4 weeks ago
There is a newer version of this series
[PATCH v5 09/10] KVM: Introduce RamDiscardListener for attribute changes during memory conversions
Posted by Chenyi Qiang 5 months, 4 weeks ago
With the introduction of the RamBlockAttribute object to manage
RAMBlocks with guest_memfd, it is more elegant to move KVM set attribute
into a RamDiscardListener.

The KVM attribute change RamDiscardListener is registered/unregistered
for each memory region section during kvm_region_add/del(). The listener
handler performs attribute change upon receiving notifications from
ram_block_attribute_state_change() calls. After this change, the
operations in kvm_convert_memory() can be removed.

Note that, errors can be returned in
ram_block_attribute_notify_to_discard() by KVM attribute changes,
although it is currently unlikely to happen. With in-place conversion
guest_memfd in the future, it would be more likely to encounter errors
and require error handling. For now, simply return the result, and
kvm_convert_memory() will cause QEMU to quit if any issue arises.

Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
---
Changes in v5:
    - Revert to use RamDiscardListener

Changes in v4:
    - Newly added.
---
 accel/kvm/kvm-all.c                         | 72 ++++++++++++++++++---
 include/system/confidential-guest-support.h |  9 +++
 system/ram-block-attribute.c                | 16 +++--
 target/i386/kvm/tdx.c                       |  1 +
 target/i386/sev.c                           |  1 +
 5 files changed, 85 insertions(+), 14 deletions(-)

diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 2d7ecaeb6a..ca4ef8062b 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -49,6 +49,7 @@
 #include "kvm-cpus.h"
 #include "system/dirtylimit.h"
 #include "qemu/range.h"
+#include "system/confidential-guest-support.h"
 
 #include "hw/boards.h"
 #include "system/stats.h"
@@ -1689,28 +1690,90 @@ static int kvm_dirty_ring_init(KVMState *s)
     return 0;
 }
 
+static int kvm_private_shared_notify(RamDiscardListener *rdl,
+                                     MemoryRegionSection *section,
+                                     bool to_private)
+{
+    hwaddr start = section->offset_within_address_space;
+    hwaddr size = section->size;
+
+    if (to_private) {
+        return kvm_set_memory_attributes_private(start, size);
+    } else {
+        return kvm_set_memory_attributes_shared(start, size);
+    }
+}
+
+static int kvm_ram_discard_notify_to_shared(RamDiscardListener *rdl,
+                                            MemoryRegionSection *section)
+{
+    return kvm_private_shared_notify(rdl, section, false);
+}
+
+static int kvm_ram_discard_notify_to_private(RamDiscardListener *rdl,
+                                             MemoryRegionSection *section)
+{
+    return kvm_private_shared_notify(rdl, section, true);
+}
+
 static void kvm_region_add(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
     KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+    ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
+    RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
     KVMMemoryUpdate *update;
+    CGSRamDiscardListener *crdl;
+    RamDiscardListener *rdl;
+
 
     update = g_new0(KVMMemoryUpdate, 1);
     update->section = *section;
 
     QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next);
+
+    if (!memory_region_has_guest_memfd(section->mr) || !rdm) {
+        return;
+    }
+
+    crdl = g_new0(CGSRamDiscardListener, 1);
+    crdl->mr = section->mr;
+    crdl->offset_within_address_space = section->offset_within_address_space;
+    rdl = &crdl->listener;
+    QLIST_INSERT_HEAD(&cgs->cgs_rdl_list, crdl, next);
+    ram_discard_listener_init(rdl, kvm_ram_discard_notify_to_shared,
+                              kvm_ram_discard_notify_to_private, true);
+    ram_discard_manager_register_listener(rdm, rdl, section);
 }
 
 static void kvm_region_del(MemoryListener *listener,
                            MemoryRegionSection *section)
 {
     KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+    ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
+    RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
     KVMMemoryUpdate *update;
+    CGSRamDiscardListener *crdl;
+    RamDiscardListener *rdl;
 
     update = g_new0(KVMMemoryUpdate, 1);
     update->section = *section;
 
     QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next);
+    if (!memory_region_has_guest_memfd(section->mr) || !rdm) {
+        return;
+    }
+
+    QLIST_FOREACH(crdl, &cgs->cgs_rdl_list, next) {
+        if (crdl->mr == section->mr &&
+            crdl->offset_within_address_space == section->offset_within_address_space) {
+            rdl = &crdl->listener;
+            ram_discard_manager_unregister_listener(rdm, rdl);
+            QLIST_REMOVE(crdl, next);
+            g_free(crdl);
+            break;
+        }
+    }
 }
 
 static void kvm_region_commit(MemoryListener *listener)
@@ -3077,15 +3140,6 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
         goto out_unref;
     }
 
-    if (to_private) {
-        ret = kvm_set_memory_attributes_private(start, size);
-    } else {
-        ret = kvm_set_memory_attributes_shared(start, size);
-    }
-    if (ret) {
-        goto out_unref;
-    }
-
     addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
     rb = qemu_ram_block_from_host(addr, false, &offset);
 
diff --git a/include/system/confidential-guest-support.h b/include/system/confidential-guest-support.h
index ea46b50c56..974abdbf6b 100644
--- a/include/system/confidential-guest-support.h
+++ b/include/system/confidential-guest-support.h
@@ -19,12 +19,19 @@
 #define QEMU_CONFIDENTIAL_GUEST_SUPPORT_H
 
 #include "qom/object.h"
+#include "system/memory.h"
 
 #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support"
 OBJECT_DECLARE_TYPE(ConfidentialGuestSupport,
                     ConfidentialGuestSupportClass,
                     CONFIDENTIAL_GUEST_SUPPORT)
 
+typedef struct CGSRamDiscardListener {
+    MemoryRegion *mr;
+    hwaddr offset_within_address_space;
+    RamDiscardListener listener;
+    QLIST_ENTRY(CGSRamDiscardListener) next;
+} CGSRamDiscardListener;
 
 struct ConfidentialGuestSupport {
     Object parent;
@@ -34,6 +41,8 @@ struct ConfidentialGuestSupport {
      */
     bool require_guest_memfd;
 
+    QLIST_HEAD(, CGSRamDiscardListener) cgs_rdl_list;
+
     /*
      * ready: flag set by CGS initialization code once it's ready to
      *        start executing instructions in a potentially-secure
diff --git a/system/ram-block-attribute.c b/system/ram-block-attribute.c
index 896c3d7543..387501b569 100644
--- a/system/ram-block-attribute.c
+++ b/system/ram-block-attribute.c
@@ -274,11 +274,12 @@ static bool ram_block_attribute_is_valid_range(RamBlockAttribute *attr,
     return true;
 }
 
-static void ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
-                                                  uint64_t offset,
-                                                  uint64_t size)
+static int ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
+                                                 uint64_t offset,
+                                                 uint64_t size)
 {
     RamDiscardListener *rdl;
+    int ret = 0;
 
     QLIST_FOREACH(rdl, &attr->rdl_list, next) {
         MemoryRegionSection tmp = *rdl->section;
@@ -286,8 +287,13 @@ static void ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
         if (!memory_region_section_intersect_range(&tmp, offset, size)) {
             continue;
         }
-        rdl->notify_discard(rdl, &tmp);
+        ret = rdl->notify_discard(rdl, &tmp);
+        if (ret) {
+            break;
+        }
     }
+
+    return ret;
 }
 
 static int
@@ -377,7 +383,7 @@ int ram_block_attribute_state_change(RamBlockAttribute *attr, uint64_t offset,
 
     if (to_private) {
         bitmap_clear(attr->bitmap, first_bit, nbits);
-        ram_block_attribute_notify_to_discard(attr, offset, size);
+        ret = ram_block_attribute_notify_to_discard(attr, offset, size);
     } else {
         bitmap_set(attr->bitmap, first_bit, nbits);
         ret = ram_block_attribute_notify_to_populated(attr, offset, size);
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 7ef49690bd..17b360059c 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -1492,6 +1492,7 @@ static void tdx_guest_init(Object *obj)
     qemu_mutex_init(&tdx->lock);
 
     cgs->require_guest_memfd = true;
+    QLIST_INIT(&cgs->cgs_rdl_list);
     tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
 
     object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
diff --git a/target/i386/sev.c b/target/i386/sev.c
index adf787797e..f1b9c35fc3 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -2430,6 +2430,7 @@ sev_snp_guest_instance_init(Object *obj)
     SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
 
     cgs->require_guest_memfd = true;
+    QLIST_INIT(&cgs->cgs_rdl_list);
 
     /* default init/start/finish params for kvm */
     sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY;
-- 
2.43.5
Re: [PATCH v5 09/10] KVM: Introduce RamDiscardListener for attribute changes during memory conversions
Posted by Alexey Kardashevskiy 5 months, 3 weeks ago

On 20/5/25 20:28, Chenyi Qiang wrote:
> With the introduction of the RamBlockAttribute object to manage
> RAMBlocks with guest_memfd, it is more elegant to move KVM set attribute
> into a RamDiscardListener.
> 
> The KVM attribute change RamDiscardListener is registered/unregistered
> for each memory region section during kvm_region_add/del(). The listener
> handler performs attribute change upon receiving notifications from
> ram_block_attribute_state_change() calls. After this change, the
> operations in kvm_convert_memory() can be removed.
> 
> Note that, errors can be returned in
> ram_block_attribute_notify_to_discard() by KVM attribute changes,
> although it is currently unlikely to happen. With in-place conversion
> guest_memfd in the future, it would be more likely to encounter errors
> and require error handling. For now, simply return the result, and
> kvm_convert_memory() will cause QEMU to quit if any issue arises.
> 
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> ---
> Changes in v5:
>      - Revert to use RamDiscardListener
> 
> Changes in v4:
>      - Newly added.
> ---
>   accel/kvm/kvm-all.c                         | 72 ++++++++++++++++++---
>   include/system/confidential-guest-support.h |  9 +++
>   system/ram-block-attribute.c                | 16 +++--
>   target/i386/kvm/tdx.c                       |  1 +
>   target/i386/sev.c                           |  1 +

imho this diffstat disagrees with the "more elegant" :)
+1 for ditching it from this patchset. Thanks,


>   5 files changed, 85 insertions(+), 14 deletions(-)
> 
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 2d7ecaeb6a..ca4ef8062b 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -49,6 +49,7 @@
>   #include "kvm-cpus.h"
>   #include "system/dirtylimit.h"
>   #include "qemu/range.h"
> +#include "system/confidential-guest-support.h"
>   
>   #include "hw/boards.h"
>   #include "system/stats.h"
> @@ -1689,28 +1690,90 @@ static int kvm_dirty_ring_init(KVMState *s)
>       return 0;
>   }
>   
> +static int kvm_private_shared_notify(RamDiscardListener *rdl,
> +                                     MemoryRegionSection *section,
> +                                     bool to_private)
> +{
> +    hwaddr start = section->offset_within_address_space;
> +    hwaddr size = section->size;
> +
> +    if (to_private) {
> +        return kvm_set_memory_attributes_private(start, size);
> +    } else {
> +        return kvm_set_memory_attributes_shared(start, size);
> +    }
> +}
> +
> +static int kvm_ram_discard_notify_to_shared(RamDiscardListener *rdl,
> +                                            MemoryRegionSection *section)
> +{
> +    return kvm_private_shared_notify(rdl, section, false);
> +}
> +
> +static int kvm_ram_discard_notify_to_private(RamDiscardListener *rdl,
> +                                             MemoryRegionSection *section)
> +{
> +    return kvm_private_shared_notify(rdl, section, true);
> +}
> +
>   static void kvm_region_add(MemoryListener *listener,
>                              MemoryRegionSection *section)
>   {
>       KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
> +    ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
> +    RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
>       KVMMemoryUpdate *update;
> +    CGSRamDiscardListener *crdl;
> +    RamDiscardListener *rdl;
> +
>   
>       update = g_new0(KVMMemoryUpdate, 1);
>       update->section = *section;
>   
>       QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next);
> +
> +    if (!memory_region_has_guest_memfd(section->mr) || !rdm) {
> +        return;
> +    }
> +
> +    crdl = g_new0(CGSRamDiscardListener, 1);
> +    crdl->mr = section->mr;
> +    crdl->offset_within_address_space = section->offset_within_address_space;
> +    rdl = &crdl->listener;
> +    QLIST_INSERT_HEAD(&cgs->cgs_rdl_list, crdl, next);
> +    ram_discard_listener_init(rdl, kvm_ram_discard_notify_to_shared,
> +                              kvm_ram_discard_notify_to_private, true);
> +    ram_discard_manager_register_listener(rdm, rdl, section);
>   }
>   
>   static void kvm_region_del(MemoryListener *listener,
>                              MemoryRegionSection *section)
>   {
>       KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
> +    ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
> +    RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
>       KVMMemoryUpdate *update;
> +    CGSRamDiscardListener *crdl;
> +    RamDiscardListener *rdl;
>   
>       update = g_new0(KVMMemoryUpdate, 1);
>       update->section = *section;
>   
>       QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next);
> +    if (!memory_region_has_guest_memfd(section->mr) || !rdm) {
> +        return;
> +    }
> +
> +    QLIST_FOREACH(crdl, &cgs->cgs_rdl_list, next) {
> +        if (crdl->mr == section->mr &&
> +            crdl->offset_within_address_space == section->offset_within_address_space) {
> +            rdl = &crdl->listener;
> +            ram_discard_manager_unregister_listener(rdm, rdl);
> +            QLIST_REMOVE(crdl, next);
> +            g_free(crdl);
> +            break;
> +        }
> +    }
>   }
>   
>   static void kvm_region_commit(MemoryListener *listener)
> @@ -3077,15 +3140,6 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
>           goto out_unref;
>       }
>   
> -    if (to_private) {
> -        ret = kvm_set_memory_attributes_private(start, size);
> -    } else {
> -        ret = kvm_set_memory_attributes_shared(start, size);
> -    }
> -    if (ret) {
> -        goto out_unref;
> -    }
> -
>       addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
>       rb = qemu_ram_block_from_host(addr, false, &offset);
>   
> diff --git a/include/system/confidential-guest-support.h b/include/system/confidential-guest-support.h
> index ea46b50c56..974abdbf6b 100644
> --- a/include/system/confidential-guest-support.h
> +++ b/include/system/confidential-guest-support.h
> @@ -19,12 +19,19 @@
>   #define QEMU_CONFIDENTIAL_GUEST_SUPPORT_H
>   
>   #include "qom/object.h"
> +#include "system/memory.h"
>   
>   #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support"
>   OBJECT_DECLARE_TYPE(ConfidentialGuestSupport,
>                       ConfidentialGuestSupportClass,
>                       CONFIDENTIAL_GUEST_SUPPORT)
>   
> +typedef struct CGSRamDiscardListener {
> +    MemoryRegion *mr;
> +    hwaddr offset_within_address_space;
> +    RamDiscardListener listener;
> +    QLIST_ENTRY(CGSRamDiscardListener) next;
> +} CGSRamDiscardListener;
>   
>   struct ConfidentialGuestSupport {
>       Object parent;
> @@ -34,6 +41,8 @@ struct ConfidentialGuestSupport {
>        */
>       bool require_guest_memfd;
>   
> +    QLIST_HEAD(, CGSRamDiscardListener) cgs_rdl_list;
> +
>       /*
>        * ready: flag set by CGS initialization code once it's ready to
>        *        start executing instructions in a potentially-secure
> diff --git a/system/ram-block-attribute.c b/system/ram-block-attribute.c
> index 896c3d7543..387501b569 100644
> --- a/system/ram-block-attribute.c
> +++ b/system/ram-block-attribute.c
> @@ -274,11 +274,12 @@ static bool ram_block_attribute_is_valid_range(RamBlockAttribute *attr,
>       return true;
>   }
>   
> -static void ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
> -                                                  uint64_t offset,
> -                                                  uint64_t size)
> +static int ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
> +                                                 uint64_t offset,
> +                                                 uint64_t size)
>   {
>       RamDiscardListener *rdl;
> +    int ret = 0;
>   
>       QLIST_FOREACH(rdl, &attr->rdl_list, next) {
>           MemoryRegionSection tmp = *rdl->section;
> @@ -286,8 +287,13 @@ static void ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
>           if (!memory_region_section_intersect_range(&tmp, offset, size)) {
>               continue;
>           }
> -        rdl->notify_discard(rdl, &tmp);
> +        ret = rdl->notify_discard(rdl, &tmp);
> +        if (ret) {
> +            break;
> +        }
>       }
> +
> +    return ret;
>   }
>   
>   static int
> @@ -377,7 +383,7 @@ int ram_block_attribute_state_change(RamBlockAttribute *attr, uint64_t offset,
>   
>       if (to_private) {
>           bitmap_clear(attr->bitmap, first_bit, nbits);
> -        ram_block_attribute_notify_to_discard(attr, offset, size);
> +        ret = ram_block_attribute_notify_to_discard(attr, offset, size);
>       } else {
>           bitmap_set(attr->bitmap, first_bit, nbits);
>           ret = ram_block_attribute_notify_to_populated(attr, offset, size);
> diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
> index 7ef49690bd..17b360059c 100644
> --- a/target/i386/kvm/tdx.c
> +++ b/target/i386/kvm/tdx.c
> @@ -1492,6 +1492,7 @@ static void tdx_guest_init(Object *obj)
>       qemu_mutex_init(&tdx->lock);
>   
>       cgs->require_guest_memfd = true;
> +    QLIST_INIT(&cgs->cgs_rdl_list);
>       tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
>   
>       object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
> diff --git a/target/i386/sev.c b/target/i386/sev.c
> index adf787797e..f1b9c35fc3 100644
> --- a/target/i386/sev.c
> +++ b/target/i386/sev.c
> @@ -2430,6 +2430,7 @@ sev_snp_guest_instance_init(Object *obj)
>       SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
>   
>       cgs->require_guest_memfd = true;
> +    QLIST_INIT(&cgs->cgs_rdl_list);
>   
>       /* default init/start/finish params for kvm */
>       sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY;

-- 
Alexey
Re: [PATCH v5 09/10] KVM: Introduce RamDiscardListener for attribute changes during memory conversions
Posted by David Hildenbrand 5 months, 3 weeks ago
On 20.05.25 12:28, Chenyi Qiang wrote:
> With the introduction of the RamBlockAttribute object to manage
> RAMBlocks with guest_memfd, it is more elegant to move KVM set attribute
> into a RamDiscardListener.
> 
> The KVM attribute change RamDiscardListener is registered/unregistered
> for each memory region section during kvm_region_add/del(). The listener
> handler performs attribute change upon receiving notifications from
> ram_block_attribute_state_change() calls. After this change, the
> operations in kvm_convert_memory() can be removed.
> 
> Note that, errors can be returned in
> ram_block_attribute_notify_to_discard() by KVM attribute changes,
> although it is currently unlikely to happen. With in-place conversion
> guest_memfd in the future, it would be more likely to encounter errors
> and require error handling. For now, simply return the result, and
> kvm_convert_memory() will cause QEMU to quit if any issue arises.
> 
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> ---

[...]

>   static void kvm_region_commit(MemoryListener *listener)
> @@ -3077,15 +3140,6 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
>           goto out_unref;
>       }
>   
> -    if (to_private) {
> -        ret = kvm_set_memory_attributes_private(start, size);
> -    } else {
> -        ret = kvm_set_memory_attributes_shared(start, size);
> -    }
> -    if (ret) {
> -        goto out_unref;
> -    }
> -

I wonder if it's best to leave that out for now. With in-place 
conversion it will all get a bit more tricky, because we'd need to call 
in different orders ...

e.g., do private -> shared before mapping to vfio, but to shared 
->private after unmapping from vfio.

That can be easier handled when doing the calls from KVM code directly.

-- 
Cheers,

David / dhildenb