From: Ani Sinha <anisinha@redhat.com>
This change adds common kvm specific support to handle KVM VM file descriptor
change. KVM VM file descriptor can change as a part of confidential guest reset
mechanism. A new function api kvm_arch_on_vmfd_change() per
architecture platform is added in order to implement architecture specific
changes required to support it. A subsequent patch will add x86 specific
implementation for kvm_arch_on_vmfd_change() as currently only x86 supports
confidential guest reset.
Signed-off-by: Ani Sinha <anisinha@redhat.com>
Link: https://lore.kernel.org/r/20260225035000.385950-6-anisinha@redhat.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
MAINTAINERS | 6 +++
include/system/kvm.h | 3 ++
accel/kvm/kvm-all.c | 88 ++++++++++++++++++++++++++++++++++++++++--
stubs/kvm.c | 22 +++++++++++
target/i386/kvm/kvm.c | 10 +++++
accel/kvm/trace-events | 1 +
stubs/meson.build | 1 +
7 files changed, 128 insertions(+), 3 deletions(-)
create mode 100644 stubs/kvm.c
diff --git a/MAINTAINERS b/MAINTAINERS
index 72153288c53..a8e1546de1e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -152,6 +152,12 @@ F: tools/i386/
F: tests/functional/i386/
F: tests/functional/x86_64/
+X86 VM file descriptor change on reset test
+M: Ani Sinha <anisinha@redhat.com>
+M: Paolo Bonzini <pbonzini@redhat.com>
+S: Maintained
+F: stubs/kvm.c
+
Guest CPU cores (TCG)
---------------------
Overall TCG CPUs
diff --git a/include/system/kvm.h b/include/system/kvm.h
index 8f9eecf044c..5fc7251fd94 100644
--- a/include/system/kvm.h
+++ b/include/system/kvm.h
@@ -456,6 +456,9 @@ int kvm_physical_memory_addr_from_host(KVMState *s, void *ram_addr,
#endif /* COMPILING_PER_TARGET */
+bool kvm_arch_supports_vmfd_change(void);
+int kvm_arch_on_vmfd_change(MachineState *ms, KVMState *s);
+
void kvm_cpu_synchronize_state(CPUState *cpu);
void kvm_init_cpu_signals(CPUState *cpu);
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index 0d8b0c43470..cc5c42ce4de 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -2415,11 +2415,9 @@ void kvm_irqchip_set_qemuirq_gsi(KVMState *s, qemu_irq irq, int gsi)
g_hash_table_insert(s->gsimap, irq, GINT_TO_POINTER(gsi));
}
-static void kvm_irqchip_create(KVMState *s)
+static void do_kvm_irqchip_create(KVMState *s)
{
int ret;
-
- assert(s->kernel_irqchip_split != ON_OFF_AUTO_AUTO);
if (kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
;
} else if (kvm_check_extension(s, KVM_CAP_S390_IRQCHIP)) {
@@ -2452,7 +2450,13 @@ static void kvm_irqchip_create(KVMState *s)
fprintf(stderr, "Create kernel irqchip failed: %s\n", strerror(-ret));
exit(1);
}
+}
+static void kvm_irqchip_create(KVMState *s)
+{
+ assert(s->kernel_irqchip_split != ON_OFF_AUTO_AUTO);
+
+ do_kvm_irqchip_create(s);
kvm_kernel_irqchip = true;
/* If we have an in-kernel IRQ chip then we must have asynchronous
* interrupt delivery (though the reverse is not necessarily true)
@@ -2607,6 +2611,83 @@ static int kvm_setup_dirty_ring(KVMState *s)
return 0;
}
+static int kvm_reset_vmfd(MachineState *ms)
+{
+ KVMState *s;
+ KVMMemoryListener *kml;
+ int ret = 0, type;
+ Error *err = NULL;
+
+ /*
+ * bail if the current architecture does not support VM file
+ * descriptor change.
+ */
+ if (!kvm_arch_supports_vmfd_change()) {
+ error_report("This target architecture does not support KVM VM "
+ "file descriptor change.");
+ return -EOPNOTSUPP;
+ }
+
+ s = KVM_STATE(ms->accelerator);
+ kml = &s->memory_listener;
+
+ memory_listener_unregister(&kml->listener);
+ memory_listener_unregister(&kvm_io_listener);
+
+ if (s->vmfd >= 0) {
+ close(s->vmfd);
+ }
+
+ type = find_kvm_machine_type(ms);
+ if (type < 0) {
+ return -EINVAL;
+ }
+
+ ret = do_kvm_create_vm(s, type);
+ if (ret < 0) {
+ return ret;
+ }
+
+ s->vmfd = ret;
+
+ kvm_setup_dirty_ring(s);
+
+ /* rebind memory to new vm fd */
+ ret = ram_block_rebind(&err);
+ if (ret < 0) {
+ return ret;
+ }
+ assert(!err);
+
+ ret = kvm_arch_on_vmfd_change(ms, s);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (s->kernel_irqchip_allowed) {
+ do_kvm_irqchip_create(s);
+ }
+
+ /* these can be only called after ram_block_rebind() */
+ memory_listener_register(&kml->listener, &address_space_memory);
+ memory_listener_register(&kvm_io_listener, &address_space_io);
+
+ /*
+ * kvm fd has changed. Commit the irq routes to KVM once more.
+ */
+ kvm_irqchip_commit_routes(s);
+ /*
+ * for confidential guest, this is the last possible place where we
+ * can call synchronize_all_post_init() to sync all vcpu states to
+ * kvm.
+ */
+ if (ms->cgs) {
+ cpu_synchronize_all_post_init();
+ }
+ trace_kvm_reset_vmfd();
+ return ret;
+}
+
static int kvm_init(AccelState *as, MachineState *ms)
{
MachineClass *mc = MACHINE_GET_CLASS(ms);
@@ -4015,6 +4096,7 @@ static void kvm_accel_class_init(ObjectClass *oc, const void *data)
AccelClass *ac = ACCEL_CLASS(oc);
ac->name = "KVM";
ac->init_machine = kvm_init;
+ ac->rebuild_guest = kvm_reset_vmfd;
ac->has_memory = kvm_accel_has_memory;
ac->allowed = &kvm_allowed;
ac->gdbstub_supported_sstep_flags = kvm_gdbstub_sstep_flags;
diff --git a/stubs/kvm.c b/stubs/kvm.c
new file mode 100644
index 00000000000..2db61d89a73
--- /dev/null
+++ b/stubs/kvm.c
@@ -0,0 +1,22 @@
+/*
+ * kvm target arch specific stubs
+ *
+ * Copyright (c) 2026 Red Hat, Inc.
+ *
+ * Author:
+ * Ani Sinha <anisinha@redhat.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+#include "qemu/osdep.h"
+#include "system/kvm.h"
+
+int kvm_arch_on_vmfd_change(MachineState *ms, KVMState *s)
+{
+ abort();
+}
+
+bool kvm_arch_supports_vmfd_change(void)
+{
+ return false;
+}
diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
index c0be2e932da..524b5276a68 100644
--- a/target/i386/kvm/kvm.c
+++ b/target/i386/kvm/kvm.c
@@ -3389,6 +3389,16 @@ static int kvm_vm_enable_energy_msrs(KVMState *s)
return 0;
}
+int kvm_arch_on_vmfd_change(MachineState *ms, KVMState *s)
+{
+ abort();
+}
+
+bool kvm_arch_supports_vmfd_change(void)
+{
+ return false;
+}
+
int kvm_arch_init(MachineState *ms, KVMState *s)
{
int ret;
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
index e43d18a8692..e4beda01488 100644
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -14,6 +14,7 @@ kvm_destroy_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
kvm_unpark_vcpu(unsigned long arch_cpu_id, const char *msg) "id: %lu %s"
kvm_irqchip_commit_routes(void) ""
+kvm_reset_vmfd(void) ""
kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
kvm_irqchip_release_virq(int virq) "virq %d"
diff --git a/stubs/meson.build b/stubs/meson.build
index 8a07059500d..6ae478bacc1 100644
--- a/stubs/meson.build
+++ b/stubs/meson.build
@@ -74,6 +74,7 @@ if have_system
if igvm.found()
stub_ss.add(files('igvm.c'))
endif
+ stub_ss.add(files('kvm.c'))
stub_ss.add(files('target-get-monitor-def.c'))
stub_ss.add(files('target-monitor-defs.c'))
stub_ss.add(files('win32-kbd-hook.c'))
--
2.53.0