[PATCH v3 27/33] kvm/xen_evtchn: add support for confidential guest reset

Ani Sinha posted 33 patches 1 week, 6 days ago
Maintainers: Paolo Bonzini <pbonzini@redhat.com>, Eduardo Habkost <eduardo@habkost.net>, Marcel Apfelbaum <marcel.apfelbaum@gmail.com>, "Philippe Mathieu-Daudé" <philmd@linaro.org>, Yanan Wang <wangyanan55@huawei.com>, Zhao Liu <zhao1.liu@intel.com>, "Maciej S. Szmigiero" <maciej.szmigiero@oracle.com>, Richard Henderson <richard.henderson@linaro.org>, "Michael S. Tsirkin" <mst@redhat.com>, David Woodhouse <dwmw2@infradead.org>, Paul Durrant <paul@xen.org>, Bernhard Beschow <shentey@gmail.com>, Alex Williamson <alex@shazbot.org>, "Cédric Le Goater" <clg@redhat.com>, Peter Xu <peterx@redhat.com>, Peter Maydell <peter.maydell@linaro.org>, Marcelo Tosatti <mtosatti@redhat.com>, Song Gao <gaosong@loongson.cn>, Huacai Chen <chenhuacai@kernel.org>, Aurelien Jarno <aurelien@aurel32.net>, Jiaxun Yang <jiaxun.yang@flygoat.com>, Aleksandar Rikalo <arikalo@gmail.com>, Nicholas Piggin <npiggin@gmail.com>, Harsh Prateek Bora <harshpb@linux.ibm.com>, Chinmay Rath <rathc@linux.ibm.com>, Palmer Dabbelt <palmer@dabbelt.com>, Alistair Francis <alistair.francis@wdc.com>, Weiwei Li <liwei1518@gmail.com>, Daniel Henrique Barboza <dbarboza@ventanamicro.com>, Liu Zhiwei <zhiwei_liu@linux.alibaba.com>, Halil Pasic <pasic@linux.ibm.com>, Christian Borntraeger <borntraeger@linux.ibm.com>, Eric Farman <farman@linux.ibm.com>, Matthew Rosato <mjrosato@linux.ibm.com>, Ilya Leoshkevich <iii@linux.ibm.com>, David Hildenbrand <david@kernel.org>, Thomas Huth <thuth@redhat.com>, Ani Sinha <anisinha@redhat.com>
[PATCH v3 27/33] kvm/xen_evtchn: add support for confidential guest reset
Posted by Ani Sinha 1 week, 6 days ago
As a part of the confidential guest reset, when the KVM VM file handle is
changed, Xen event ports and kernel ports that were associated with the
previous KVM file handle needs to be reassociated with the new handle. This is
performed with the help of a callback handler that gets invoked during the
confidential guest reset process when the KVM VM file fd changes.

Signed-off-by: Ani Sinha <anisinha@redhat.com>
---
 hw/i386/kvm/xen_evtchn.c | 113 +++++++++++++++++++++++++++++++++++++--
 1 file changed, 110 insertions(+), 3 deletions(-)

diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index 8b243984e4..7802fa68ae 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -133,6 +133,26 @@ struct pirq_info {
     bool is_translated;
 };
 
+struct eventfds {
+    uint16_t type;
+    evtchn_port_t port;
+    int fd;
+    QLIST_ENTRY(eventfds) node;
+};
+
+struct kernel_ports {
+    uint16_t type;
+    evtchn_port_t port;
+    uint32_t vcpu_id;
+    QLIST_ENTRY(kernel_ports) node;
+};
+
+static QLIST_HEAD(, eventfds) eventfd_list =
+    QLIST_HEAD_INITIALIZER(eventfd_list);
+
+static QLIST_HEAD(, kernel_ports) kernel_port_list =
+    QLIST_HEAD_INITIALIZER(kernel_port_list);
+
 struct XenEvtchnState {
     /*< private >*/
     SysBusDevice busdev;
@@ -178,6 +198,7 @@ struct XenEvtchnState {
 #define pirq_inuse(s, pirq) (pirq_inuse_word(s, pirq) & pirq_inuse_bit(pirq))
 
 struct XenEvtchnState *xen_evtchn_singleton;
+static NotifierWithReturn xen_eventchn_notifier;
 
 /* Top bits of callback_param are the type (HVM_PARAM_CALLBACK_TYPE_xxx) */
 #define CALLBACK_VIA_TYPE_SHIFT 56
@@ -304,6 +325,57 @@ static void gsi_assert_bh(void *opaque)
     }
 }
 
+static int xen_eventchn_handle_vmfd_change(NotifierWithReturn *notifier,
+                                           void *data, Error **errp)
+{
+    struct eventfds *ef;
+    struct kernel_ports *kp;
+    struct kvm_xen_hvm_attr ha;
+    CPUState *cpu;
+    int ret;
+
+    /* we are not interested in pre vmfd change notification */
+    if (((VmfdChangeNotifier *)data)->pre) {
+        return 0;
+    }
+
+    QLIST_FOREACH(ef, &eventfd_list, node) {
+        ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
+        ha.u.evtchn.send_port = ef->port;
+        ha.u.evtchn.type = ef->type;
+        ha.u.evtchn.flags = 0;
+        ha.u.evtchn.deliver.eventfd.port = 0;
+        ha.u.evtchn.deliver.eventfd.fd = ef->fd;
+
+        ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
+        if (ret < 0) {
+            error_setg(errp, "KVM_XEN_HVM_SET_ATTR failed with %d", ret);
+            return ret;
+        }
+    }
+
+    memset(&ha, 0, sizeof(ha));
+
+    QLIST_FOREACH(kp, &kernel_port_list, node) {
+        cpu = qemu_get_cpu(kp->vcpu_id);
+        ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
+        ha.u.evtchn.send_port = kp->port;
+        ha.u.evtchn.type = kp->type;
+        ha.u.evtchn.flags = 0;
+        ha.u.evtchn.deliver.port.port = kp->port;
+        ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
+        ha.u.evtchn.deliver.port.priority =
+            KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+        ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
+        if (ret < 0) {
+            error_setg(errp, "KVM_XEN_HVM_SET_ATTR failed with %d", ret);
+            return ret;
+        }
+    }
+    return 0;
+}
+
 void xen_evtchn_create(unsigned int nr_gsis, qemu_irq *system_gsis)
 {
     XenEvtchnState *s = XEN_EVTCHN(sysbus_create_simple(TYPE_XEN_EVTCHN,
@@ -350,6 +422,9 @@ void xen_evtchn_create(unsigned int nr_gsis, qemu_irq *system_gsis)
 
     /* Set event channel functions for backend drivers to use */
     xen_evtchn_ops = &emu_evtchn_backend_ops;
+
+    xen_eventchn_notifier.notify = xen_eventchn_handle_vmfd_change;
+    kvm_vmfd_add_change_notifier(&xen_eventchn_notifier);
 }
 
 static void xen_evtchn_register_types(void)
@@ -547,6 +622,8 @@ static void inject_callback(XenEvtchnState *s, uint32_t vcpu)
 static void deassign_kernel_port(evtchn_port_t port)
 {
     struct kvm_xen_hvm_attr ha;
+    struct kernel_ports *kp;
+    struct eventfds *ef;
     int ret;
 
     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
@@ -557,6 +634,19 @@ static void deassign_kernel_port(evtchn_port_t port)
     if (ret) {
         qemu_log_mask(LOG_GUEST_ERROR, "Failed to unbind kernel port %d: %s\n",
                       port, strerror(ret));
+    } else {
+        QLIST_FOREACH(kp, &kernel_port_list, node) {
+            if (kp->port == port) {
+                QLIST_REMOVE(kp, node);
+                g_free(kp);
+            }
+        }
+        QLIST_FOREACH(ef, &eventfd_list, node) {
+            if (ef->port == port) {
+                QLIST_REMOVE(ef, node);
+                g_free(ef);
+            }
+        }
     }
 }
 
@@ -565,6 +655,8 @@ static int assign_kernel_port(uint16_t type, evtchn_port_t port,
 {
     CPUState *cpu = qemu_get_cpu(vcpu_id);
     struct kvm_xen_hvm_attr ha;
+    struct kernel_ports *kp = g_malloc0(sizeof(*kp));
+    int ret;
 
     if (!cpu) {
         return -ENOENT;
@@ -578,12 +670,21 @@ static int assign_kernel_port(uint16_t type, evtchn_port_t port,
     ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
     ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
 
-    return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
+    ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
+    if (ret == 0) {
+        kp->type = type;
+        kp->port = port;
+        kp->vcpu_id = vcpu_id;
+        QLIST_INSERT_HEAD(&kernel_port_list, kp, node);
+    }
+    return ret;
 }
 
 static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
 {
     struct kvm_xen_hvm_attr ha;
+    struct eventfds *ef = g_malloc0(sizeof(*ef));
+    int ret;
 
     ha.type = KVM_XEN_ATTR_TYPE_EVTCHN;
     ha.u.evtchn.send_port = port;
@@ -592,7 +693,14 @@ static int assign_kernel_eventfd(uint16_t type, evtchn_port_t port, int fd)
     ha.u.evtchn.deliver.eventfd.port = 0;
     ha.u.evtchn.deliver.eventfd.fd = fd;
 
-    return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
+    ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
+    if (ret == 0) {
+        ef->type = type;
+        ef->port = port;
+        ef->fd = fd;
+        QLIST_INSERT_HEAD(&eventfd_list, ef, node);
+    }
+    return ret;
 }
 
 static bool valid_port(evtchn_port_t port)
@@ -2391,4 +2499,3 @@ void hmp_xen_event_inject(Monitor *mon, const QDict *qdict)
         monitor_printf(mon, "Delivered port %d\n", port);
     }
 }
-
-- 
2.42.0
Re: [PATCH v3 27/33] kvm/xen_evtchn: add support for confidential guest reset
Posted by David Woodhouse 1 week, 5 days ago
On Tue, 2026-01-27 at 10:45 +0530, Ani Sinha wrote:
> +    } else {
> +        QLIST_FOREACH(kp, &kernel_port_list, node) {
> +            if (kp->port == port) {
> +                QLIST_REMOVE(kp, node);
> +                g_free(kp);
> +            }
> +        }
> +        QLIST_FOREACH(ef, &eventfd_list, node) {
> +            if (ef->port == port) {
> +                QLIST_REMOVE(ef, node);
> +                g_free(ef);
> +            }
> +        }
>      }
>  }
>  

Do those not need to be QLIST_FOREACH_SAFE ? 

> @@ -565,6 +655,8 @@ static int assign_kernel_port(uint16_t type, evtchn_port_t port,
>  {
>      CPUState *cpu = qemu_get_cpu(vcpu_id);
>      struct kvm_xen_hvm_attr ha;
> +    struct kernel_ports *kp = g_malloc0(sizeof(*kp));
> +    int ret;
>  
>      if (!cpu) {
>          return -ENOENT;
> @@ -578,12 +670,21 @@ static int assign_kernel_port(uint16_t type, evtchn_port_t port,
>      ha.u.evtchn.deliver.port.vcpu = kvm_arch_vcpu_id(cpu);
>      ha.u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
>  
> -    return kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
> +    ret = kvm_vm_ioctl(kvm_state, KVM_XEN_HVM_SET_ATTR, &ha);
> +    if (ret == 0) {
> +        kp->type = type;
> +        kp->port = port;
> +        kp->vcpu_id = vcpu_id;
> +        QLIST_INSERT_HEAD(&kernel_port_list, kp, node);
> +    }
> +    return ret;
>  }
> 

I think 'kp' leaks in the error case there? And the next one in
assign_kernel_eventfd().

I don't much like duplicating the code which performs the ioctls.
Couldn't you have a helper function which does that when given a
'struct kernel_ports', then invoke *that* from both places?

Or even better, make use of the fact that we *already* have the ability
to replay these for serialisation/deserialisation?

But backing up a little more... if this is for rebooting guests,
shouldn't we be wiping the event channel setup completely, like a soft
reset? Do we preserve to do *any* of this? Are you sure you don't see a
soft reset wiping it all away, either before or after this code runs?
Re: [PATCH v3 27/33] kvm/xen_evtchn: add support for confidential guest reset
Posted by Ani Sinha 3 days, 1 hour ago
On Wed, Jan 28, 2026 at 12:22 AM David Woodhouse <dwmw2@infradead.org> wrote:
>

> >
>
> I think 'kp' leaks in the error case there? And the next one in
> assign_kernel_eventfd().
>
> I don't much like duplicating the code which performs the ioctls.
> Couldn't you have a helper function which does that when given a
> 'struct kernel_ports', then invoke *that* from both places?
>
> Or even better, make use of the fact that we *already* have the ability
> to replay these for serialisation/deserialisation?
>
> But backing up a little more... if this is for rebooting guests,
> shouldn't we be wiping the event channel setup completely, like a soft
> reset? Do we preserve to do *any* of this? Are you sure you don't see a
> soft reset wiping it all away, either before or after this code runs?

Ah yes, good catch! I added the following assertions and they never
failed has I walked through the code with gdb:

diff --git a/hw/i386/kvm/xen_evtchn.c b/hw/i386/kvm/xen_evtchn.c
index 7802fa68ae..3c882d85bb 100644
--- a/hw/i386/kvm/xen_evtchn.c
+++ b/hw/i386/kvm/xen_evtchn.c
@@ -1241,6 +1241,9 @@ int xen_evtchn_soft_reset(void)
         kvm_update_msi_routes_all(NULL, true, 0, 0);
     }

+    assert(QLIST_EMPTY(&eventfd_list));
+    assert(QLIST_EMPTY(&kernel_port_list));
+
     return 0;
 }

I reverted this patch and it does not break my Xen functional test. So
it seems the soft reset calls
xen_evtchn_soft_reset() -> close_port() -> deassign_kernel_port() and
all ports are cleaned up, meaning they do not need preserving through
the reset.

I will drop this patch in the next spin-up.