From: Ani Sinha <anisinha@redhat.com>
Confidential guests needs to generate a new KVM file descriptor upon virtual
machine reset. Existing VCPUs needs to be reattached to this new
KVM VM file descriptor. As a part of this, new VCPU file descriptors against
this new KVM VM file descriptor needs to be created and re-initialized.
Resources allocated against the old VCPU fds needs to be released. This change
makes this happen.
Signed-off-by: Ani Sinha <anisinha@redhat.com>
Link: https://lore.kernel.org/r/20260225035000.385950-16-anisinha@redhat.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
accel/kvm/kvm-all.c | 215 +++++++++++++++++++++++++++++++++--------
accel/kvm/trace-events | 1 +
2 files changed, 174 insertions(+), 42 deletions(-)
diff --git a/accel/kvm/kvm-all.c b/accel/kvm/kvm-all.c
index d244156f6f4..a347a71a2ee 100644
--- a/accel/kvm/kvm-all.c
+++ b/accel/kvm/kvm-all.c
@@ -127,6 +127,10 @@ static NotifierList kvm_irqchip_change_notifiers =
static NotifierWithReturnList register_vmfd_changed_notifiers =
NOTIFIER_WITH_RETURN_LIST_INITIALIZER(register_vmfd_changed_notifiers);
+static int map_kvm_run(KVMState *s, CPUState *cpu, Error **errp);
+static int map_kvm_dirty_gfns(KVMState *s, CPUState *cpu, Error **errp);
+static int vcpu_unmap_regions(KVMState *s, CPUState *cpu);
+
struct KVMResampleFd {
int gsi;
EventNotifier *resample_event;
@@ -420,6 +424,90 @@ err:
return ret;
}
+static void kvm_create_vcpu_internal(CPUState *cpu, KVMState *s, int kvm_fd)
+{
+ cpu->kvm_fd = kvm_fd;
+ cpu->kvm_state = s;
+ if (!s->guest_state_protected) {
+ cpu->vcpu_dirty = true;
+ }
+ cpu->dirty_pages = 0;
+ cpu->throttle_us_per_full = 0;
+
+ return;
+}
+
+static int kvm_rebind_vcpus(Error **errp)
+{
+ CPUState *cpu;
+ unsigned long vcpu_id;
+ KVMState *s = kvm_state;
+ int kvm_fd, ret = 0;
+
+ CPU_FOREACH(cpu) {
+ vcpu_id = kvm_arch_vcpu_id(cpu);
+
+ if (cpu->kvm_fd) {
+ close(cpu->kvm_fd);
+ }
+
+ ret = kvm_arch_destroy_vcpu(cpu);
+ if (ret < 0) {
+ goto err;
+ }
+
+ if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
+ s->coalesced_mmio_ring = NULL;
+ }
+
+ ret = vcpu_unmap_regions(s, cpu);
+ if (ret < 0) {
+ goto err;
+ }
+
+ ret = kvm_arch_pre_create_vcpu(cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
+ kvm_fd = kvm_vm_ioctl(s, KVM_CREATE_VCPU, vcpu_id);
+ if (kvm_fd < 0) {
+ error_report("KVM_CREATE_VCPU IOCTL failed for vCPU %lu (%s)",
+ vcpu_id, strerror(kvm_fd));
+ return kvm_fd;
+ }
+
+ kvm_create_vcpu_internal(cpu, s, kvm_fd);
+
+ ret = map_kvm_run(s, cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
+ if (s->kvm_dirty_ring_size) {
+ ret = map_kvm_dirty_gfns(s, cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+ }
+
+ ret = kvm_arch_init_vcpu(cpu);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "kvm_init_vcpu: kvm_arch_init_vcpu failed (%lu)",
+ vcpu_id);
+ }
+
+ close(cpu->kvm_vcpu_stats_fd);
+ cpu->kvm_vcpu_stats_fd = kvm_vcpu_ioctl(cpu, KVM_GET_STATS_FD, NULL);
+ kvm_init_cpu_signals(cpu);
+ }
+ trace_kvm_rebind_vcpus();
+
+ err:
+ return ret;
+}
+
static void kvm_park_vcpu(CPUState *cpu)
{
struct KVMParkedVcpu *vcpu;
@@ -483,13 +571,7 @@ static int kvm_create_vcpu(CPUState *cpu)
}
}
- cpu->kvm_fd = kvm_fd;
- cpu->kvm_state = s;
- if (!s->guest_state_protected) {
- cpu->vcpu_dirty = true;
- }
- cpu->dirty_pages = 0;
- cpu->throttle_us_per_full = 0;
+ kvm_create_vcpu_internal(cpu, s, kvm_fd);
trace_kvm_create_vcpu(cpu->cpu_index, vcpu_id, kvm_fd);
@@ -508,19 +590,11 @@ int kvm_create_and_park_vcpu(CPUState *cpu)
return ret;
}
-static int do_kvm_destroy_vcpu(CPUState *cpu)
+static int vcpu_unmap_regions(KVMState *s, CPUState *cpu)
{
- KVMState *s = kvm_state;
int mmap_size;
int ret = 0;
- trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
-
- ret = kvm_arch_destroy_vcpu(cpu);
- if (ret < 0) {
- goto err;
- }
-
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size < 0) {
ret = mmap_size;
@@ -548,6 +622,31 @@ static int do_kvm_destroy_vcpu(CPUState *cpu)
cpu->kvm_dirty_gfns = NULL;
}
+ err:
+ return ret;
+}
+
+static int do_kvm_destroy_vcpu(CPUState *cpu)
+{
+ KVMState *s = kvm_state;
+ int ret = 0;
+
+ trace_kvm_destroy_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+
+ ret = kvm_arch_destroy_vcpu(cpu);
+ if (ret < 0) {
+ goto err;
+ }
+
+ /* If I am the CPU that created coalesced_mmio_ring, then discard it */
+ if (s->coalesced_mmio_ring == (void *)cpu->kvm_run + PAGE_SIZE) {
+ s->coalesced_mmio_ring = NULL;
+ }
+
+ ret = vcpu_unmap_regions(s, cpu);
+ if (ret < 0) {
+ goto err;
+ }
kvm_park_vcpu(cpu);
err:
return ret;
@@ -561,26 +660,9 @@ void kvm_destroy_vcpu(CPUState *cpu)
}
}
-int kvm_init_vcpu(CPUState *cpu, Error **errp)
+static int map_kvm_run(KVMState *s, CPUState *cpu, Error **errp)
{
- KVMState *s = kvm_state;
- int mmap_size;
- int ret;
-
- trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
-
- ret = kvm_arch_pre_create_vcpu(cpu, errp);
- if (ret < 0) {
- goto err;
- }
-
- ret = kvm_create_vcpu(cpu);
- if (ret < 0) {
- error_setg_errno(errp, -ret,
- "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
- kvm_arch_vcpu_id(cpu));
- goto err;
- }
+ int mmap_size, ret = 0;
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
if (mmap_size < 0) {
@@ -605,14 +687,53 @@ int kvm_init_vcpu(CPUState *cpu, Error **errp)
(void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
}
+ err:
+ return ret;
+}
+
+static int map_kvm_dirty_gfns(KVMState *s, CPUState *cpu, Error **errp)
+{
+ int ret = 0;
+ /* Use MAP_SHARED to share pages with the kernel */
+ cpu->kvm_dirty_gfns = mmap(NULL, s->kvm_dirty_ring_bytes,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ cpu->kvm_fd,
+ PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET);
+ if (cpu->kvm_dirty_gfns == MAP_FAILED) {
+ ret = -errno;
+ }
+
+ return ret;
+}
+
+int kvm_init_vcpu(CPUState *cpu, Error **errp)
+{
+ KVMState *s = kvm_state;
+ int ret;
+
+ trace_kvm_init_vcpu(cpu->cpu_index, kvm_arch_vcpu_id(cpu));
+
+ ret = kvm_arch_pre_create_vcpu(cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
+ ret = kvm_create_vcpu(cpu);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret,
+ "kvm_init_vcpu: kvm_create_vcpu failed (%lu)",
+ kvm_arch_vcpu_id(cpu));
+ goto err;
+ }
+
+ ret = map_kvm_run(s, cpu, errp);
+ if (ret < 0) {
+ goto err;
+ }
+
if (s->kvm_dirty_ring_size) {
- /* Use MAP_SHARED to share pages with the kernel */
- cpu->kvm_dirty_gfns = mmap(NULL, s->kvm_dirty_ring_bytes,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- cpu->kvm_fd,
- PAGE_SIZE * KVM_DIRTY_LOG_PAGE_OFFSET);
- if (cpu->kvm_dirty_gfns == MAP_FAILED) {
- ret = -errno;
+ ret = map_kvm_dirty_gfns(s, cpu, errp);
+ if (ret < 0) {
goto err;
}
}
@@ -2710,6 +2831,16 @@ static int kvm_reset_vmfd(MachineState *ms)
}
assert(!err);
+ /*
+ * rebind new vcpu fds with the new kvm fds
+ * These can only be called after kvm_arch_on_vmfd_change()
+ */
+ ret = kvm_rebind_vcpus(&err);
+ if (ret < 0) {
+ return ret;
+ }
+ assert(!err);
+
/* these can be only called after ram_block_rebind() */
memory_listener_register(&kml->listener, &address_space_memory);
memory_listener_register(&kvm_io_listener, &address_space_io);
diff --git a/accel/kvm/trace-events b/accel/kvm/trace-events
index e4beda01488..4a8921c632b 100644
--- a/accel/kvm/trace-events
+++ b/accel/kvm/trace-events
@@ -15,6 +15,7 @@ kvm_park_vcpu(int cpu_index, unsigned long arch_cpu_id) "index: %d id: %lu"
kvm_unpark_vcpu(unsigned long arch_cpu_id, const char *msg) "id: %lu %s"
kvm_irqchip_commit_routes(void) ""
kvm_reset_vmfd(void) ""
+kvm_rebind_vcpus(void) ""
kvm_irqchip_add_msi_route(char *name, int vector, int virq) "dev %s vector %d virq %d"
kvm_irqchip_update_msi_route(int virq) "Updating MSI route virq=%d"
kvm_irqchip_release_virq(int virq) "virq %d"
--
2.53.0