Implement initial interrupt handling logic in the MSHV backend. This
includes management of MSI and un/registering of irqfd mechanisms.
Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
---
accel/mshv/irq.c | 370 ++++++++++++++++++++++++++++++++++++++++
accel/mshv/meson.build | 1 +
accel/mshv/mshv-all.c | 2 +
accel/mshv/trace-events | 9 +
hw/intc/apic.c | 9 +
include/system/mshv.h | 14 ++
6 files changed, 405 insertions(+)
create mode 100644 accel/mshv/irq.c
diff --git a/accel/mshv/irq.c b/accel/mshv/irq.c
new file mode 100644
index 0000000000..74f0bb62db
--- /dev/null
+++ b/accel/mshv/irq.c
@@ -0,0 +1,370 @@
+/*
+ * QEMU MSHV support
+ *
+ * Copyright Microsoft, Corp. 2025
+ *
+ * Authors:
+ * Ziqiao Zhou <ziqiaozhou@microsoft.com>
+ * Magnus Kulke <magnuskulke@microsoft.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "hw/hyperv/linux-mshv.h"
+#include "hw/hyperv/hvhdk_mini.h"
+#include "qemu/osdep.h"
+#include "hw/pci/msi.h"
+#include "system/mshv.h"
+#include "trace.h"
+#include <stdint.h>
+#include <sys/ioctl.h>
+
+#define MSHV_IRQFD_RESAMPLE_FLAG (1 << MSHV_IRQFD_BIT_RESAMPLE)
+#define MSHV_IRQFD_BIT_DEASSIGN_FLAG (1 << MSHV_IRQFD_BIT_DEASSIGN)
+
+static MshvMsiControl *msi_control;
+static QemuMutex msi_control_mutex;
+
+void mshv_init_msicontrol(void)
+{
+ qemu_mutex_init(&msi_control_mutex);
+ msi_control = g_new0(MshvMsiControl, 1);
+ msi_control->gsi_routes = g_hash_table_new(g_direct_hash, g_direct_equal);
+ msi_control->updated = false;
+}
+
+static int set_msi_routing(uint32_t gsi, uint64_t addr, uint32_t data)
+{
+ struct mshv_user_irq_entry *entry;
+ uint32_t high_addr = addr >> 32;
+ uint32_t low_addr = addr & 0xFFFFFFFF;
+ GHashTable *gsi_routes;
+
+ trace_mshv_set_msi_routing(gsi, addr, data);
+
+ if (gsi >= MSHV_MAX_MSI_ROUTES) {
+ error_report("gsi >= MSHV_MAX_MSI_ROUTES");
+ return -1;
+ }
+
+ assert(msi_control);
+
+ WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
+ gsi_routes = msi_control->gsi_routes;
+ entry = g_hash_table_lookup(gsi_routes, GINT_TO_POINTER(gsi));
+
+ if (entry
+ && entry->address_hi == high_addr
+ && entry->address_lo == low_addr
+ && entry->data == data)
+ {
+ /* nothing to update */
+ return 0;
+ }
+
+ /* free old entry */
+ g_free(entry);
+
+ /* create new entry */
+ entry = g_new0(mshv_user_irq_entry, 1);
+ entry->gsi = gsi;
+ entry->address_hi = high_addr;
+ entry->address_lo = low_addr;
+ entry->data = data;
+
+ g_hash_table_insert(gsi_routes, GINT_TO_POINTER(gsi), entry);
+ msi_control->updated = true;
+ }
+
+ return 0;
+}
+
+static int add_msi_routing(uint64_t addr, uint32_t data)
+{
+ struct mshv_user_irq_entry *route_entry;
+ uint32_t high_addr = addr >> 32;
+ uint32_t low_addr = addr & 0xFFFFFFFF;
+ int gsi;
+ GHashTable *gsi_routes;
+
+ trace_mshv_add_msi_routing(addr, data);
+
+ assert(msi_control);
+
+ WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
+ /* find an empty slot */
+ gsi = 0;
+ gsi_routes = msi_control->gsi_routes;
+ while (gsi < MSHV_MAX_MSI_ROUTES) {
+ route_entry = g_hash_table_lookup(gsi_routes, GINT_TO_POINTER(gsi));
+ if (!route_entry) {
+ break;
+ }
+ gsi++;
+ }
+ if (gsi >= MSHV_MAX_MSI_ROUTES) {
+ error_report("No empty gsi slot available");
+ return -1;
+ }
+
+ /* create new entry */
+ route_entry = g_new0(struct mshv_user_irq_entry, 1);
+ route_entry->gsi = gsi;
+ route_entry->address_hi = high_addr;
+ route_entry->address_lo = low_addr;
+ route_entry->data = data;
+
+ g_hash_table_insert(gsi_routes, GINT_TO_POINTER(gsi), route_entry);
+ msi_control->updated = true;
+ }
+
+ return gsi;
+}
+
+static int commit_msi_routing_table(void)
+{
+ guint len;
+ int i, ret;
+ size_t table_size;
+ struct mshv_user_irq_table *table;
+ GHashTableIter iter;
+ gpointer key, value;
+ int vm_fd = mshv_state->vm;
+
+ assert(msi_control);
+
+ WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
+ if (!msi_control->updated) {
+ /* nothing to update */
+ return 0;
+ }
+
+ /* Calculate the size of the table */
+ len = g_hash_table_size(msi_control->gsi_routes);
+ table_size = sizeof(struct mshv_user_irq_table)
+ + len * sizeof(struct mshv_user_irq_entry);
+ table = g_malloc0(table_size);
+
+ g_hash_table_iter_init(&iter, msi_control->gsi_routes);
+ i = 0;
+ while (g_hash_table_iter_next(&iter, &key, &value)) {
+ struct mshv_user_irq_entry *entry = value;
+ table->entries[i] = *entry;
+ i++;
+ }
+
+ trace_mshv_commit_msi_routing_table(vm_fd, len);
+
+ ret = ioctl(vm_fd, MSHV_SET_MSI_ROUTING, table);
+ g_free(table);
+ if (ret < 0) {
+ error_report("Failed to commit msi routing table");
+ return -1;
+ }
+ msi_control->updated = false;
+ }
+ return 0;
+}
+
+static int remove_msi_routing(uint32_t gsi)
+{
+ struct mshv_user_irq_entry *route_entry;
+ GHashTable *gsi_routes;
+
+ trace_mshv_remove_msi_routing(gsi);
+
+ if (gsi >= MSHV_MAX_MSI_ROUTES) {
+ error_report("Invalid GSI: %u", gsi);
+ return -1;
+ }
+
+ assert(msi_control);
+
+ WITH_QEMU_LOCK_GUARD(&msi_control_mutex) {
+ gsi_routes = msi_control->gsi_routes;
+ route_entry = g_hash_table_lookup(gsi_routes, GINT_TO_POINTER(gsi));
+ if (route_entry) {
+ g_hash_table_remove(gsi_routes, GINT_TO_POINTER(gsi));
+ g_free(route_entry);
+ msi_control->updated = true;
+ }
+ }
+
+ return 0;
+}
+
+/* Pass an eventfd which is to be used for injecting interrupts from userland */
+static int irqfd(int vm_fd, int fd, int resample_fd, uint32_t gsi,
+ uint32_t flags)
+{
+ int ret;
+ struct mshv_user_irqfd arg = {
+ .fd = fd,
+ .resamplefd = resample_fd,
+ .gsi = gsi,
+ .flags = flags,
+ };
+
+ ret = ioctl(vm_fd, MSHV_IRQFD, &arg);
+ if (ret < 0) {
+ error_report("Failed to set irqfd: gsi=%u, fd=%d", gsi, fd);
+ return -1;
+ }
+ return ret;
+}
+
+static int register_irqfd(int vm_fd, int event_fd, uint32_t gsi)
+{
+ int ret;
+
+ trace_mshv_register_irqfd(vm_fd, event_fd, gsi);
+
+ ret = irqfd(vm_fd, event_fd, 0, gsi, 0);
+ if (ret < 0) {
+ error_report("Failed to register irqfd: gsi=%u", gsi);
+ return -1;
+ }
+ return 0;
+}
+
+static int register_irqfd_with_resample(int vm_fd, int event_fd,
+ int resample_fd, uint32_t gsi)
+{
+ int ret;
+ uint32_t flags = MSHV_IRQFD_RESAMPLE_FLAG;
+
+ ret = irqfd(vm_fd, event_fd, resample_fd, gsi, flags);
+ if (ret < 0) {
+ error_report("Failed to register irqfd with resample: gsi=%u", gsi);
+ return -errno;
+ }
+ return 0;
+}
+
+static int unregister_irqfd(int vm_fd, int event_fd, uint32_t gsi)
+{
+ int ret;
+ uint32_t flags = MSHV_IRQFD_BIT_DEASSIGN_FLAG;
+
+ ret = irqfd(vm_fd, event_fd, 0, gsi, flags);
+ if (ret < 0) {
+ error_report("Failed to unregister irqfd: gsi=%u", gsi);
+ return -errno;
+ }
+ return 0;
+}
+
+static int irqchip_update_irqfd_notifier_gsi(const EventNotifier *event,
+ const EventNotifier *resample,
+ int virq, bool add)
+{
+ int fd = event_notifier_get_fd(event);
+ int rfd = resample ? event_notifier_get_fd(resample) : -1;
+ int vm_fd = mshv_state->vm;
+
+ trace_mshv_irqchip_update_irqfd_notifier_gsi(fd, rfd, virq, add);
+
+ if (!add) {
+ return unregister_irqfd(vm_fd, fd, virq);
+ }
+
+ if (rfd > 0) {
+ return register_irqfd_with_resample(vm_fd, fd, rfd, virq);
+ }
+
+ return register_irqfd(vm_fd, fd, virq);
+}
+
+
+int mshv_irqchip_add_msi_route(int vector, PCIDevice *dev)
+{
+ MSIMessage msg = { 0, 0 };
+ int virq = 0;
+
+ if (pci_available && dev) {
+ msg = pci_get_msi_message(dev, vector);
+ virq = add_msi_routing(msg.address, le32_to_cpu(msg.data));
+ }
+
+ return virq;
+}
+
+void mshv_irqchip_release_virq(int virq)
+{
+ remove_msi_routing(virq);
+}
+
+int mshv_irqchip_update_msi_route(int virq, MSIMessage msg, PCIDevice *dev)
+{
+ int ret;
+
+ ret = set_msi_routing(virq, msg.address, le32_to_cpu(msg.data));
+ if (ret < 0) {
+ error_report("Failed to set msi routing");
+ return -1;
+ }
+
+ return 0;
+}
+
+int mshv_request_interrupt(int vm_fd, uint32_t interrupt_type, uint32_t vector,
+ uint32_t vp_index, bool logical_dest_mode,
+ bool level_triggered)
+{
+ int ret;
+
+ if (vector == 0) {
+ /* TODO: why do we receive this? */
+ return 0;
+ }
+
+ union hv_interrupt_control control = {
+ .interrupt_type = interrupt_type,
+ .level_triggered = level_triggered,
+ .logical_dest_mode = logical_dest_mode,
+ .rsvd = 0,
+ };
+
+ struct hv_input_assert_virtual_interrupt arg = {0};
+ arg.control = control;
+ arg.dest_addr = (uint64_t)vp_index;
+ arg.vector = vector;
+
+ struct mshv_root_hvcall args = {0};
+ args.code = HVCALL_ASSERT_VIRTUAL_INTERRUPT;
+ args.in_sz = sizeof(arg);
+ args.in_ptr = (uint64_t)&arg;
+
+ ret = mshv_hvcall(vm_fd, &args);
+ if (ret < 0) {
+ error_report("Failed to request interrupt");
+ return -errno;
+ }
+ return 0;
+}
+
+void mshv_irqchip_commit_routes(void)
+{
+ int ret;
+
+ ret = commit_msi_routing_table();
+ if (ret < 0) {
+ error_report("Failed to commit msi routing table");
+ abort();
+ }
+}
+
+int mshv_irqchip_add_irqfd_notifier_gsi(const EventNotifier *event,
+ const EventNotifier *resample,
+ int virq)
+{
+ return irqchip_update_irqfd_notifier_gsi(event, resample, virq, true);
+}
+
+int mshv_irqchip_remove_irqfd_notifier_gsi(const EventNotifier *event,
+ int virq)
+{
+ return irqchip_update_irqfd_notifier_gsi(event, NULL, virq, false);
+}
diff --git a/accel/mshv/meson.build b/accel/mshv/meson.build
index 8a6beb3fb1..f88fc8678c 100644
--- a/accel/mshv/meson.build
+++ b/accel/mshv/meson.build
@@ -1,5 +1,6 @@
mshv_ss = ss.source_set()
mshv_ss.add(if_true: files(
+ 'irq.c',
'mem.c',
'mshv-all.c'
))
diff --git a/accel/mshv/mshv-all.c b/accel/mshv/mshv-all.c
index e4085b216d..a29e356ba0 100644
--- a/accel/mshv/mshv-all.c
+++ b/accel/mshv/mshv-all.c
@@ -417,6 +417,8 @@ static int mshv_init(MachineState *ms)
return -1;
}
+ mshv_init_msicontrol();
+
do {
int vm_fd = create_vm(mshv_fd);
s->vm = vm_fd;
diff --git a/accel/mshv/trace-events b/accel/mshv/trace-events
index 5929cb45a5..beb5be7b73 100644
--- a/accel/mshv/trace-events
+++ b/accel/mshv/trace-events
@@ -1,7 +1,16 @@
# See docs/devel/tracing.rst for syntax documentation.
+mshv_handle_interrupt(uint32_t cpu, int mask) "cpu_index %d mask %x"
mshv_set_memory(bool add, uint64_t gpa, uint64_t size, uint64_t user_addr, bool readonly, int ret) "[add = %d] gpa = %lx size = %lx user = %lx readonly = %d result = %d"
mshv_mem_ioeventfd_add(uint64_t addr, uint32_t size, uint32_t data) "addr %lx size %d data %x"
mshv_mem_ioeventfd_del(uint64_t addr, uint32_t size, uint32_t data) "addr %lx size %d data %x"
mshv_hvcall_args(const char* hvcall, uint16_t code, uint16_t in_sz) "built args for '%s' code: %d in_sz: %d"
+
+mshv_set_msi_routing(uint32_t gsi, uint64_t addr, uint32_t data) "gsi %d addr %lx data %x"
+mshv_remove_msi_routing(uint32_t gsi) "gsi %d"
+mshv_add_msi_routing(uint64_t addr, uint32_t data) "addr %lx data %x"
+mshv_commit_msi_routing_table(int vm_fd, int len) "vm_fd %d table_size %d"
+mshv_register_irqfd(int vm_fd, int event_fd, uint32_t gsi) "vm_fd %d event_fd %d gsi %d"
+mshv_irqchip_update_irqfd_notifier_gsi(int event_fd, int resample_fd, int virq, bool add) "event_fd %d resample_fd %d virq %d add %d"
+
diff --git a/hw/intc/apic.c b/hw/intc/apic.c
index bcb103560c..4d1fe7cdd1 100644
--- a/hw/intc/apic.c
+++ b/hw/intc/apic.c
@@ -27,6 +27,7 @@
#include "hw/pci/msi.h"
#include "qemu/host-utils.h"
#include "system/kvm.h"
+#include "system/mshv.h"
#include "trace.h"
#include "hw/i386/apic-msidef.h"
#include "qapi/error.h"
@@ -932,6 +933,14 @@ static void apic_send_msi(MSIMessage *msi)
uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1;
uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7;
/* XXX: Ignore redirection hint. */
+#ifdef CONFIG_MSHV
+ if (mshv_enabled()) {
+ /* TODO: error handling? */
+ mshv_request_interrupt(mshv_state->vm, delivery, vector, dest,
+ dest_mode, trigger_mode);
+ return;
+ }
+#endif
apic_deliver_irq(dest, dest_mode, delivery, vector, trigger_mode);
}
diff --git a/include/system/mshv.h b/include/system/mshv.h
index c7ee4f0cc1..4c1e901835 100644
--- a/include/system/mshv.h
+++ b/include/system/mshv.h
@@ -40,6 +40,10 @@
*/
#define MSHV_USE_IOEVENTFD 1
+#define MSHV_USE_KERNEL_GSI_IRQFD 1
+
+#define MSHV_MAX_MSI_ROUTES 4096
+
#define MSHV_PAGE_SHIFT 12
@@ -72,6 +76,11 @@ struct AccelCPUState {
bool dirty;
};
+typedef struct MshvMsiControl {
+ bool updated;
+ GHashTable *gsi_routes;
+} MshvMsiControl;
+
#else /* CONFIG_MSHV_IS_POSSIBLE */
#define mshv_enabled() false
#endif
@@ -106,6 +115,11 @@ int mshv_remove_mem(int vm_fd, const MshvMemoryRegion *mr);
void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
bool add);
/* interrupt */
+void mshv_init_msicontrol(void);
+int mshv_request_interrupt(int vm_fd, uint32_t interrupt_type, uint32_t vector,
+ uint32_t vp_index, bool logical_destination_mode,
+ bool level_triggered);
+
int mshv_irqchip_add_msi_route(int vector, PCIDevice *dev);
int mshv_irqchip_update_msi_route(int virq, MSIMessage msg, PCIDevice *dev);
void mshv_irqchip_commit_routes(void);
--
2.34.1
On Tue, May 20, 2025 at 01:30:04PM +0200, Magnus Kulke wrote:
> Implement initial interrupt handling logic in the MSHV backend. This
> includes management of MSI and un/registering of irqfd mechanisms.
>
> Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
> ---
[...]
> +int mshv_request_interrupt(int vm_fd, uint32_t interrupt_type, uint32_t vector,
> + uint32_t vp_index, bool logical_dest_mode,
> + bool level_triggered)
> +{
> + int ret;
> +
> + if (vector == 0) {
> + /* TODO: why do we receive this? */
You must have seen this in real life, right? We need to convince
ourselves why this is okay.
Thanks,
Wei.
> + return 0;
> + }
> +
> + union hv_interrupt_control control = {
> + .interrupt_type = interrupt_type,
> + .level_triggered = level_triggered,
> + .logical_dest_mode = logical_dest_mode,
> + .rsvd = 0,
> + };
> +
> + struct hv_input_assert_virtual_interrupt arg = {0};
> + arg.control = control;
> + arg.dest_addr = (uint64_t)vp_index;
> + arg.vector = vector;
> +
> + struct mshv_root_hvcall args = {0};
> + args.code = HVCALL_ASSERT_VIRTUAL_INTERRUPT;
> + args.in_sz = sizeof(arg);
> + args.in_ptr = (uint64_t)&arg;
> +
> + ret = mshv_hvcall(vm_fd, &args);
> + if (ret < 0) {
> + error_report("Failed to request interrupt");
> + return -errno;
> + }
> + return 0;
> +}
> +
On Tue, May 20, 2025 at 08:15:20PM +0000, Wei Liu wrote:
> On Tue, May 20, 2025 at 01:30:04PM +0200, Magnus Kulke wrote:
> > Implement initial interrupt handling logic in the MSHV backend. This
> > includes management of MSI and un/registering of irqfd mechanisms.
> >
> > Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
> > ---
> [...]
> > +int mshv_request_interrupt(int vm_fd, uint32_t interrupt_type, uint32_t vector,
> > + uint32_t vp_index, bool logical_dest_mode,
> > + bool level_triggered)
> > +{
> > + int ret;
> > +
> > + if (vector == 0) {
> > + /* TODO: why do we receive this? */
>
> You must have seen this in real life, right? We need to convince
> ourselves why this is okay.
>
> Thanks,
> Wei.
>
I haven't seen this in real use, I spotted it in the mshvc library and
wondered why we have this clause at this point. We can log a warning if
that occurs.
On 5/20/25 13:30, Magnus Kulke wrote: > diff --git a/include/system/mshv.h b/include/system/mshv.h > index c7ee4f0cc1..4c1e901835 100644 > --- a/include/system/mshv.h > +++ b/include/system/mshv.h > @@ -40,6 +40,10 @@ > */ > #define MSHV_USE_IOEVENTFD 1 > > +#define MSHV_USE_KERNEL_GSI_IRQFD 1 Please make this code unconditional - same for MSHV_USE_IOEVENTFD. Paolo
© 2016 - 2025 Red Hat, Inc.