With the introduction of the RamBlockAttribute object to manage
RAMBlocks with guest_memfd, it is more elegant to move KVM set attribute
into a RamDiscardListener.
The KVM attribute change RamDiscardListener is registered/unregistered
for each memory region section during kvm_region_add/del(). The listener
handler performs attribute change upon receiving notifications from
ram_block_attribute_state_change() calls. After this change, the
operations in kvm_convert_memory() can be removed.
Note that, errors can be returned in
ram_block_attribute_notify_to_discard() by KVM attribute changes,
although it is currently unlikely to happen. With in-place conversion
guest_memfd in the future, it would be more likely to encounter errors
and require error handling. For now, simply return the result, and
kvm_convert_memory() will cause QEMU to quit if any issue arises.
Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
---
Changes in v5:
- Revert to use RamDiscardListener
Changes in v4:
- Newly added.
---
accel/kvm/kvm-all.c | 72 ++++++++++++++++++---
include/system/confidential-guest-support.h | 9 +++
system/ram-block-attribute.c | 16 +++--
target/i386/kvm/tdx.c | 1 +
target/i386/sev.c | 1 +
5 files changed, 85 insertions(+), 14 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 2d7ecaeb6a..ca4ef8062b 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -49,6 +49,7 @@
#include "kvm-cpus.h"
#include "system/dirtylimit.h"
#include "qemu/range.h"
+#include "system/confidential-guest-support.h"
#include "hw/boards.h"
#include "system/stats.h"
@@ -1689,28 +1690,90 @@ static int kvm_dirty_ring_init(KVMState *s)
return 0;
}
+static int kvm_private_shared_notify(RamDiscardListener *rdl,
+ MemoryRegionSection *section,
+ bool to_private)
+{
+ hwaddr start = section->offset_within_address_space;
+ hwaddr size = section->size;
+
+ if (to_private) {
+ return kvm_set_memory_attributes_private(start, size);
+ } else {
+ return kvm_set_memory_attributes_shared(start, size);
+ }
+}
+
+static int kvm_ram_discard_notify_to_shared(RamDiscardListener *rdl,
+ MemoryRegionSection *section)
+{
+ return kvm_private_shared_notify(rdl, section, false);
+}
+
+static int kvm_ram_discard_notify_to_private(RamDiscardListener *rdl,
+ MemoryRegionSection *section)
+{
+ return kvm_private_shared_notify(rdl, section, true);
+}
+
static void kvm_region_add(MemoryListener *listener,
MemoryRegionSection *section)
{
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
KVMMemoryUpdate *update;
+ CGSRamDiscardListener *crdl;
+ RamDiscardListener *rdl;
+
update = g_new0(KVMMemoryUpdate, 1);
update->section = *section;
QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next);
+
+ if (!memory_region_has_guest_memfd(section->mr) || !rdm) {
+ return;
+ }
+
+ crdl = g_new0(CGSRamDiscardListener, 1);
+ crdl->mr = section->mr;
+ crdl->offset_within_address_space = section->offset_within_address_space;
+ rdl = &crdl->listener;
+ QLIST_INSERT_HEAD(&cgs->cgs_rdl_list, crdl, next);
+ ram_discard_listener_init(rdl, kvm_ram_discard_notify_to_shared,
+ kvm_ram_discard_notify_to_private, true);
+ ram_discard_manager_register_listener(rdm, rdl, section);
}
static void kvm_region_del(MemoryListener *listener,
MemoryRegionSection *section)
{
KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
+ ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
+ RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
KVMMemoryUpdate *update;
+ CGSRamDiscardListener *crdl;
+ RamDiscardListener *rdl;
update = g_new0(KVMMemoryUpdate, 1);
update->section = *section;
QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next);
+ if (!memory_region_has_guest_memfd(section->mr) || !rdm) {
+ return;
+ }
+
+ QLIST_FOREACH(crdl, &cgs->cgs_rdl_list, next) {
+ if (crdl->mr == section->mr &&
+ crdl->offset_within_address_space == section->offset_within_address_space) {
+ rdl = &crdl->listener;
+ ram_discard_manager_unregister_listener(rdm, rdl);
+ QLIST_REMOVE(crdl, next);
+ g_free(crdl);
+ break;
+ }
+ }
}
static void kvm_region_commit(MemoryListener *listener)
@@ -3077,15 +3140,6 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
goto out_unref;
}
- if (to_private) {
- ret = kvm_set_memory_attributes_private(start, size);
- } else {
- ret = kvm_set_memory_attributes_shared(start, size);
- }
- if (ret) {
- goto out_unref;
- }
-
addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
rb = qemu_ram_block_from_host(addr, false, &offset);
diff --git a/include/system/confidential-guest-support.h b/include/system/confidential-guest-support.h
index ea46b50c56..974abdbf6b 100644
--- a/include/system/confidential-guest-support.h
+++ b/include/system/confidential-guest-support.h
@@ -19,12 +19,19 @@
#define QEMU_CONFIDENTIAL_GUEST_SUPPORT_H
#include "qom/object.h"
+#include "system/memory.h"
#define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support"
OBJECT_DECLARE_TYPE(ConfidentialGuestSupport,
ConfidentialGuestSupportClass,
CONFIDENTIAL_GUEST_SUPPORT)
+typedef struct CGSRamDiscardListener {
+ MemoryRegion *mr;
+ hwaddr offset_within_address_space;
+ RamDiscardListener listener;
+ QLIST_ENTRY(CGSRamDiscardListener) next;
+} CGSRamDiscardListener;
struct ConfidentialGuestSupport {
Object parent;
@@ -34,6 +41,8 @@ struct ConfidentialGuestSupport {
*/
bool require_guest_memfd;
+ QLIST_HEAD(, CGSRamDiscardListener) cgs_rdl_list;
+
/*
* ready: flag set by CGS initialization code once it's ready to
* start executing instructions in a potentially-secure
diff --git a/system/ram-block-attribute.c b/system/ram-block-attribute.c
index 896c3d7543..387501b569 100644
--- a/system/ram-block-attribute.c
+++ b/system/ram-block-attribute.c
@@ -274,11 +274,12 @@ static bool ram_block_attribute_is_valid_range(RamBlockAttribute *attr,
return true;
}
-static void ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
- uint64_t offset,
- uint64_t size)
+static int ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
+ uint64_t offset,
+ uint64_t size)
{
RamDiscardListener *rdl;
+ int ret = 0;
QLIST_FOREACH(rdl, &attr->rdl_list, next) {
MemoryRegionSection tmp = *rdl->section;
@@ -286,8 +287,13 @@ static void ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
if (!memory_region_section_intersect_range(&tmp, offset, size)) {
continue;
}
- rdl->notify_discard(rdl, &tmp);
+ ret = rdl->notify_discard(rdl, &tmp);
+ if (ret) {
+ break;
+ }
}
+
+ return ret;
}
static int
@@ -377,7 +383,7 @@ int ram_block_attribute_state_change(RamBlockAttribute *attr, uint64_t offset,
if (to_private) {
bitmap_clear(attr->bitmap, first_bit, nbits);
- ram_block_attribute_notify_to_discard(attr, offset, size);
+ ret = ram_block_attribute_notify_to_discard(attr, offset, size);
} else {
bitmap_set(attr->bitmap, first_bit, nbits);
ret = ram_block_attribute_notify_to_populated(attr, offset, size);
diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
index 7ef49690bd..17b360059c 100644
--- a/target/i386/kvm/tdx.c
+++ b/target/i386/kvm/tdx.c
@@ -1492,6 +1492,7 @@ static void tdx_guest_init(Object *obj)
qemu_mutex_init(&tdx->lock);
cgs->require_guest_memfd = true;
+ QLIST_INIT(&cgs->cgs_rdl_list);
tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
diff --git a/target/i386/sev.c b/target/i386/sev.c
index adf787797e..f1b9c35fc3 100644
--- a/target/i386/sev.c
+++ b/target/i386/sev.c
@@ -2430,6 +2430,7 @@ sev_snp_guest_instance_init(Object *obj)
SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
cgs->require_guest_memfd = true;
+ QLIST_INIT(&cgs->cgs_rdl_list);
/* default init/start/finish params for kvm */
sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY;
--
2.43.5
On 20/5/25 20:28, Chenyi Qiang wrote:
> With the introduction of the RamBlockAttribute object to manage
> RAMBlocks with guest_memfd, it is more elegant to move KVM set attribute
> into a RamDiscardListener.
>
> The KVM attribute change RamDiscardListener is registered/unregistered
> for each memory region section during kvm_region_add/del(). The listener
> handler performs attribute change upon receiving notifications from
> ram_block_attribute_state_change() calls. After this change, the
> operations in kvm_convert_memory() can be removed.
>
> Note that, errors can be returned in
> ram_block_attribute_notify_to_discard() by KVM attribute changes,
> although it is currently unlikely to happen. With in-place conversion
> guest_memfd in the future, it would be more likely to encounter errors
> and require error handling. For now, simply return the result, and
> kvm_convert_memory() will cause QEMU to quit if any issue arises.
>
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> ---
> Changes in v5:
> - Revert to use RamDiscardListener
>
> Changes in v4:
> - Newly added.
> ---
> accel/kvm/kvm-all.c | 72 ++++++++++++++++++---
> include/system/confidential-guest-support.h | 9 +++
> system/ram-block-attribute.c | 16 +++--
> target/i386/kvm/tdx.c | 1 +
> target/i386/sev.c | 1 +
imho this diffstat disagrees with the "more elegant" :)
+1 for ditching it from this patchset. Thanks,
> 5 files changed, 85 insertions(+), 14 deletions(-)
>
> diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
> index 2d7ecaeb6a..ca4ef8062b 100644
> --- a/accel/kvm/kvm-all.c
> +++ b/accel/kvm/kvm-all.c
> @@ -49,6 +49,7 @@
> #include "kvm-cpus.h"
> #include "system/dirtylimit.h"
> #include "qemu/range.h"
> +#include "system/confidential-guest-support.h"
>
> #include "hw/boards.h"
> #include "system/stats.h"
> @@ -1689,28 +1690,90 @@ static int kvm_dirty_ring_init(KVMState *s)
> return 0;
> }
>
> +static int kvm_private_shared_notify(RamDiscardListener *rdl,
> + MemoryRegionSection *section,
> + bool to_private)
> +{
> + hwaddr start = section->offset_within_address_space;
> + hwaddr size = section->size;
> +
> + if (to_private) {
> + return kvm_set_memory_attributes_private(start, size);
> + } else {
> + return kvm_set_memory_attributes_shared(start, size);
> + }
> +}
> +
> +static int kvm_ram_discard_notify_to_shared(RamDiscardListener *rdl,
> + MemoryRegionSection *section)
> +{
> + return kvm_private_shared_notify(rdl, section, false);
> +}
> +
> +static int kvm_ram_discard_notify_to_private(RamDiscardListener *rdl,
> + MemoryRegionSection *section)
> +{
> + return kvm_private_shared_notify(rdl, section, true);
> +}
> +
> static void kvm_region_add(MemoryListener *listener,
> MemoryRegionSection *section)
> {
> KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
> + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
> + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
> KVMMemoryUpdate *update;
> + CGSRamDiscardListener *crdl;
> + RamDiscardListener *rdl;
> +
>
> update = g_new0(KVMMemoryUpdate, 1);
> update->section = *section;
>
> QSIMPLEQ_INSERT_TAIL(&kml->transaction_add, update, next);
> +
> + if (!memory_region_has_guest_memfd(section->mr) || !rdm) {
> + return;
> + }
> +
> + crdl = g_new0(CGSRamDiscardListener, 1);
> + crdl->mr = section->mr;
> + crdl->offset_within_address_space = section->offset_within_address_space;
> + rdl = &crdl->listener;
> + QLIST_INSERT_HEAD(&cgs->cgs_rdl_list, crdl, next);
> + ram_discard_listener_init(rdl, kvm_ram_discard_notify_to_shared,
> + kvm_ram_discard_notify_to_private, true);
> + ram_discard_manager_register_listener(rdm, rdl, section);
> }
>
> static void kvm_region_del(MemoryListener *listener,
> MemoryRegionSection *section)
> {
> KVMMemoryListener *kml = container_of(listener, KVMMemoryListener, listener);
> + ConfidentialGuestSupport *cgs = MACHINE(qdev_get_machine())->cgs;
> + RamDiscardManager *rdm = memory_region_get_ram_discard_manager(section->mr);
> KVMMemoryUpdate *update;
> + CGSRamDiscardListener *crdl;
> + RamDiscardListener *rdl;
>
> update = g_new0(KVMMemoryUpdate, 1);
> update->section = *section;
>
> QSIMPLEQ_INSERT_TAIL(&kml->transaction_del, update, next);
> + if (!memory_region_has_guest_memfd(section->mr) || !rdm) {
> + return;
> + }
> +
> + QLIST_FOREACH(crdl, &cgs->cgs_rdl_list, next) {
> + if (crdl->mr == section->mr &&
> + crdl->offset_within_address_space == section->offset_within_address_space) {
> + rdl = &crdl->listener;
> + ram_discard_manager_unregister_listener(rdm, rdl);
> + QLIST_REMOVE(crdl, next);
> + g_free(crdl);
> + break;
> + }
> + }
> }
>
> static void kvm_region_commit(MemoryListener *listener)
> @@ -3077,15 +3140,6 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
> goto out_unref;
> }
>
> - if (to_private) {
> - ret = kvm_set_memory_attributes_private(start, size);
> - } else {
> - ret = kvm_set_memory_attributes_shared(start, size);
> - }
> - if (ret) {
> - goto out_unref;
> - }
> -
> addr = memory_region_get_ram_ptr(mr) + section.offset_within_region;
> rb = qemu_ram_block_from_host(addr, false, &offset);
>
> diff --git a/include/system/confidential-guest-support.h b/include/system/confidential-guest-support.h
> index ea46b50c56..974abdbf6b 100644
> --- a/include/system/confidential-guest-support.h
> +++ b/include/system/confidential-guest-support.h
> @@ -19,12 +19,19 @@
> #define QEMU_CONFIDENTIAL_GUEST_SUPPORT_H
>
> #include "qom/object.h"
> +#include "system/memory.h"
>
> #define TYPE_CONFIDENTIAL_GUEST_SUPPORT "confidential-guest-support"
> OBJECT_DECLARE_TYPE(ConfidentialGuestSupport,
> ConfidentialGuestSupportClass,
> CONFIDENTIAL_GUEST_SUPPORT)
>
> +typedef struct CGSRamDiscardListener {
> + MemoryRegion *mr;
> + hwaddr offset_within_address_space;
> + RamDiscardListener listener;
> + QLIST_ENTRY(CGSRamDiscardListener) next;
> +} CGSRamDiscardListener;
>
> struct ConfidentialGuestSupport {
> Object parent;
> @@ -34,6 +41,8 @@ struct ConfidentialGuestSupport {
> */
> bool require_guest_memfd;
>
> + QLIST_HEAD(, CGSRamDiscardListener) cgs_rdl_list;
> +
> /*
> * ready: flag set by CGS initialization code once it's ready to
> * start executing instructions in a potentially-secure
> diff --git a/system/ram-block-attribute.c b/system/ram-block-attribute.c
> index 896c3d7543..387501b569 100644
> --- a/system/ram-block-attribute.c
> +++ b/system/ram-block-attribute.c
> @@ -274,11 +274,12 @@ static bool ram_block_attribute_is_valid_range(RamBlockAttribute *attr,
> return true;
> }
>
> -static void ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
> - uint64_t offset,
> - uint64_t size)
> +static int ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
> + uint64_t offset,
> + uint64_t size)
> {
> RamDiscardListener *rdl;
> + int ret = 0;
>
> QLIST_FOREACH(rdl, &attr->rdl_list, next) {
> MemoryRegionSection tmp = *rdl->section;
> @@ -286,8 +287,13 @@ static void ram_block_attribute_notify_to_discard(RamBlockAttribute *attr,
> if (!memory_region_section_intersect_range(&tmp, offset, size)) {
> continue;
> }
> - rdl->notify_discard(rdl, &tmp);
> + ret = rdl->notify_discard(rdl, &tmp);
> + if (ret) {
> + break;
> + }
> }
> +
> + return ret;
> }
>
> static int
> @@ -377,7 +383,7 @@ int ram_block_attribute_state_change(RamBlockAttribute *attr, uint64_t offset,
>
> if (to_private) {
> bitmap_clear(attr->bitmap, first_bit, nbits);
> - ram_block_attribute_notify_to_discard(attr, offset, size);
> + ret = ram_block_attribute_notify_to_discard(attr, offset, size);
> } else {
> bitmap_set(attr->bitmap, first_bit, nbits);
> ret = ram_block_attribute_notify_to_populated(attr, offset, size);
> diff --git a/target/i386/kvm/tdx.c b/target/i386/kvm/tdx.c
> index 7ef49690bd..17b360059c 100644
> --- a/target/i386/kvm/tdx.c
> +++ b/target/i386/kvm/tdx.c
> @@ -1492,6 +1492,7 @@ static void tdx_guest_init(Object *obj)
> qemu_mutex_init(&tdx->lock);
>
> cgs->require_guest_memfd = true;
> + QLIST_INIT(&cgs->cgs_rdl_list);
> tdx->attributes = TDX_TD_ATTRIBUTES_SEPT_VE_DISABLE;
>
> object_property_add_uint64_ptr(obj, "attributes", &tdx->attributes,
> diff --git a/target/i386/sev.c b/target/i386/sev.c
> index adf787797e..f1b9c35fc3 100644
> --- a/target/i386/sev.c
> +++ b/target/i386/sev.c
> @@ -2430,6 +2430,7 @@ sev_snp_guest_instance_init(Object *obj)
> SevSnpGuestState *sev_snp_guest = SEV_SNP_GUEST(obj);
>
> cgs->require_guest_memfd = true;
> + QLIST_INIT(&cgs->cgs_rdl_list);
>
> /* default init/start/finish params for kvm */
> sev_snp_guest->kvm_start_conf.policy = DEFAULT_SEV_SNP_POLICY;
--
Alexey
On 20.05.25 12:28, Chenyi Qiang wrote:
> With the introduction of the RamBlockAttribute object to manage
> RAMBlocks with guest_memfd, it is more elegant to move KVM set attribute
> into a RamDiscardListener.
>
> The KVM attribute change RamDiscardListener is registered/unregistered
> for each memory region section during kvm_region_add/del(). The listener
> handler performs attribute change upon receiving notifications from
> ram_block_attribute_state_change() calls. After this change, the
> operations in kvm_convert_memory() can be removed.
>
> Note that, errors can be returned in
> ram_block_attribute_notify_to_discard() by KVM attribute changes,
> although it is currently unlikely to happen. With in-place conversion
> guest_memfd in the future, it would be more likely to encounter errors
> and require error handling. For now, simply return the result, and
> kvm_convert_memory() will cause QEMU to quit if any issue arises.
>
> Signed-off-by: Chenyi Qiang <chenyi.qiang@intel.com>
> ---
[...]
> static void kvm_region_commit(MemoryListener *listener)
> @@ -3077,15 +3140,6 @@ int kvm_convert_memory(hwaddr start, hwaddr size, bool to_private)
> goto out_unref;
> }
>
> - if (to_private) {
> - ret = kvm_set_memory_attributes_private(start, size);
> - } else {
> - ret = kvm_set_memory_attributes_shared(start, size);
> - }
> - if (ret) {
> - goto out_unref;
> - }
> -
I wonder if it's best to leave that out for now. With in-place
conversion it will all get a bit more tricky, because we'd need to call
in different orders ...
e.g., do private -> shared before mapping to vfio, but to shared
->private after unmapping from vfio.
That can be easier handled when doing the calls from KVM code directly.
--
Cheers,
David / dhildenb
© 2016 - 2025 Red Hat, Inc.