Implement ioeventfd registration in the MSHV accelerator backend to
handle guest-triggered events. This enables integration with QEMU's
eventfd-based I/O mechanism.
Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
---
accel/mshv/mshv-all.c | 116 ++++++++++++++++++++++++++++++++++++++++
accel/mshv/trace-events | 3 ++
include/system/mshv.h | 8 +++
3 files changed, 127 insertions(+)
diff --git a/accel/mshv/mshv-all.c b/accel/mshv/mshv-all.c
index 712e651627..2ae9d1cffa 100644
--- a/accel/mshv/mshv-all.c
+++ b/accel/mshv/mshv-all.c
@@ -7,6 +7,7 @@
* Ziqiao Zhou <ziqiaozhou@microsoft.com>
* Magnus Kulke <magnuskulke@microsoft.com>
* Jinank Jain <jinankjain@microsoft.com>
+ * Wei Liu <liuwe@microsoft.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
@@ -236,11 +237,126 @@ static void mem_region_del(MemoryListener *listener,
memory_region_unref(section->mr);
}
+typedef enum {
+ DATAMATCH_NONE,
+ DATAMATCH_U32,
+ DATAMATCH_U64,
+} DatamatchTag;
+
+typedef struct {
+ DatamatchTag tag;
+ union {
+ uint32_t u32;
+ uint64_t u64;
+ } value;
+} Datamatch;
+
+/* flags: determine whether to de/assign */
+static int ioeventfd(int vm_fd, int event_fd, uint64_t addr, Datamatch dm,
+ uint32_t flags)
+{
+ mshv_user_ioeventfd args = {0};
+ args.fd = event_fd;
+ args.addr = addr;
+ args.flags = flags;
+
+ if (dm.tag == DATAMATCH_NONE) {
+ args.datamatch = 0;
+ } else {
+ flags |= BIT(MSHV_IOEVENTFD_BIT_DATAMATCH);
+ args.flags = flags;
+ if (dm.tag == DATAMATCH_U64) {
+ args.len = sizeof(uint64_t);
+ args.datamatch = dm.value.u64;
+ } else {
+ args.len = sizeof(uint32_t);
+ args.datamatch = dm.value.u32;
+ }
+ }
+
+ return ioctl(vm_fd, MSHV_IOEVENTFD, &args);
+}
+
+static int unregister_ioevent(int vm_fd, int event_fd, uint64_t mmio_addr)
+{
+ uint32_t flags = 0;
+ Datamatch dm = {0};
+
+ flags |= BIT(MSHV_IOEVENTFD_BIT_DEASSIGN);
+ dm.tag = DATAMATCH_NONE;
+
+ return ioeventfd(vm_fd, event_fd, mmio_addr, dm, flags);
+}
+
+static int register_ioevent(int vm_fd, int event_fd, uint64_t mmio_addr,
+ uint64_t val, bool is_64bit, bool is_datamatch)
+{
+ uint32_t flags = 0;
+ Datamatch dm = {0};
+
+ if (!is_datamatch) {
+ dm.tag = DATAMATCH_NONE;
+ } else if (is_64bit) {
+ dm.tag = DATAMATCH_U64;
+ dm.value.u64 = val;
+ } else {
+ dm.tag = DATAMATCH_U32;
+ dm.value.u32 = val;
+ }
+
+ return ioeventfd(vm_fd, event_fd, mmio_addr, dm, flags);
+}
+
+static void mem_ioeventfd_add(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool match_data, uint64_t data,
+ EventNotifier *e)
+{
+ int fd = event_notifier_get_fd(e);
+ int ret;
+ bool is_64 = int128_get64(section->size) == 8;
+ uint64_t addr = section->offset_within_address_space;
+
+ trace_mshv_mem_ioeventfd_add(addr, int128_get64(section->size), data);
+
+ ret = register_ioevent(mshv_state->vm, fd, addr, data, is_64, match_data);
+
+ if (ret < 0) {
+ error_report("Failed to register ioeventfd: %s (%d)", strerror(-ret),
+ -ret);
+ abort();
+ }
+}
+
+static void mem_ioeventfd_del(MemoryListener *listener,
+ MemoryRegionSection *section,
+ bool match_data, uint64_t data,
+ EventNotifier *e)
+{
+ int fd = event_notifier_get_fd(e);
+ int ret;
+ uint64_t addr = section->offset_within_address_space;
+
+ trace_mshv_mem_ioeventfd_del(section->offset_within_address_space,
+ int128_get64(section->size), data);
+
+ ret = unregister_ioevent(mshv_state->vm, fd, addr);
+ if (ret < 0) {
+ error_report("Failed to unregister ioeventfd: %s (%d)", strerror(-ret),
+ -ret);
+ abort();
+ }
+}
+
static MemoryListener mshv_memory_listener = {
.name = "mshv",
.priority = MEMORY_LISTENER_PRIORITY_ACCEL,
.region_add = mem_region_add,
.region_del = mem_region_del,
+#ifdef MSHV_USE_IOEVENTFD
+ .eventfd_add = mem_ioeventfd_add,
+ .eventfd_del = mem_ioeventfd_del,
+#endif
};
static MemoryListener mshv_io_listener = {
diff --git a/accel/mshv/trace-events b/accel/mshv/trace-events
index 9a3af6b8be..b49a5b1702 100644
--- a/accel/mshv/trace-events
+++ b/accel/mshv/trace-events
@@ -1,6 +1,9 @@
# See docs/devel/tracing.rst for syntax documentation.
mshv_set_memory(bool add, uint64_t gpa, uint64_t size, uint64_t user_addr, bool readonly, int ret) "[add = %d] gpa = %lx size = %lx user = %lx readonly = %d result = %d"
+mshv_mem_ioeventfd_add(uint64_t addr, uint32_t size, uint32_t data) "addr %lx size %d data %x"
+mshv_mem_ioeventfd_del(uint64_t addr, uint32_t size, uint32_t data) "addr %lx size %d data %x"
+
mshv_hvcall_args(const char* hvcall, uint16_t code, uint16_t in_sz) "built args for '%s' code: %d in_sz: %d"
mshv_set_msi_routing(uint32_t gsi, uint64_t addr, uint32_t data) "gsi=%d addr=%lx data=%x"
diff --git a/include/system/mshv.h b/include/system/mshv.h
index 3624d9477f..c2b0414c85 100644
--- a/include/system/mshv.h
+++ b/include/system/mshv.h
@@ -30,6 +30,14 @@
#define CONFIG_MSHV_IS_POSSIBLE
#endif
+/*
+ * Set to 0 if we do not want to use eventfd to optimize the MMIO events.
+ * Set to 1 so that mshv kernel driver receives doorbell when the VM access
+ * MMIO memory and then signal eventfd to notify the qemu device
+ * without extra switching to qemu to emulate mmio access.
+ */
+#define MSHV_USE_IOEVENTFD 1
+
#define MSHV_PAGE_SHIFT 12
#ifdef CONFIG_MSHV_IS_POSSIBLE
--
2.34.1