Create the MSHV virtual machine by opening a partition and issuing
the necessary ioctl to initialize it. This sets up the basic VM
structure and initial configuration used by MSHV to manage guest state.
Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
---
accel/mshv/mshv-all.c | 210 ++++++++++++++++++++++++++++++++++-
accel/mshv/trace-events | 3 +
accel/mshv/trace.h | 1 +
include/system/mshv.h | 20 +++-
meson.build | 1 +
target/i386/mshv/meson.build | 1 +
target/i386/mshv/mshv-cpu.c | 71 ++++++++++++
7 files changed, 300 insertions(+), 7 deletions(-)
create mode 100644 accel/mshv/trace-events
create mode 100644 accel/mshv/trace.h
create mode 100644 target/i386/mshv/mshv-cpu.c
diff --git a/accel/mshv/mshv-all.c b/accel/mshv/mshv-all.c
index 9e0590c4f9..712e651627 100644
--- a/accel/mshv/mshv-all.c
+++ b/accel/mshv/mshv-all.c
@@ -46,8 +46,177 @@ DECLARE_INSTANCE_CHECKER(MshvState, MSHV_STATE, TYPE_MSHV_ACCEL)
bool mshv_allowed;
-MshvState *mshv_state;
+MshvState *mshv_state = NULL;
+static int init_mshv(int *mshv_fd)
+{
+ int fd = open("/dev/mshv", O_RDWR | O_CLOEXEC);
+ if (fd < 0) {
+ error_report("Failed to open /dev/mshv: %s", strerror(errno));
+ return -1;
+ }
+ *mshv_fd = fd;
+ return 0;
+}
+
+/* freeze 1 to pause, 0 to resume */
+static int set_time_freeze(int vm_fd, int freeze)
+{
+ int ret;
+
+ if (freeze != 0 && freeze != 1) {
+ error_report("Invalid time freeze value");
+ return -1;
+ }
+
+ struct hv_input_set_partition_property in = {0};
+ in.property_code = HV_PARTITION_PROPERTY_TIME_FREEZE;
+ in.property_value = freeze;
+
+ struct mshv_root_hvcall args = {0};
+ args.code = HVCALL_SET_PARTITION_PROPERTY;
+ args.in_sz = sizeof(in);
+ args.in_ptr = (uint64_t)∈
+
+ ret = mshv_hvcall(vm_fd, &args);
+ if (ret < 0) {
+ error_report("Failed to set time freeze");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int pause_vm(int vm_fd)
+{
+ int ret;
+
+ ret = set_time_freeze(vm_fd, 1);
+ if (ret < 0) {
+ error_report("Failed to pause partition: %s", strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int resume_vm(int vm_fd)
+{
+ int ret;
+
+ ret = set_time_freeze(vm_fd, 0);
+ if (ret < 0) {
+ error_report("Failed to resume partition: %s", strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int create_partition(int mshv_fd, int *vm_fd)
+{
+ int ret;
+ struct mshv_create_partition args = {0};
+
+ /* Initialize pt_flags with the desired features */
+ uint64_t pt_flags = (1ULL << MSHV_PT_BIT_LAPIC) |
+ (1ULL << MSHV_PT_BIT_X2APIC) |
+ (1ULL << MSHV_PT_BIT_GPA_SUPER_PAGES);
+
+ /* Set default isolation type */
+ uint64_t pt_isolation = MSHV_PT_ISOLATION_NONE;
+
+ args.pt_flags = pt_flags;
+ args.pt_isolation = pt_isolation;
+
+ ret = ioctl(mshv_fd, MSHV_CREATE_PARTITION, &args);
+ if (ret < 0) {
+ error_report("Failed to create partition: %s", strerror(errno));
+ return -1;
+ }
+
+ *vm_fd = ret;
+ return 0;
+}
+
+static int set_synthetic_proc_features(int vm_fd)
+{
+ int ret;
+ struct hv_input_set_partition_property in = {0};
+ union hv_partition_synthetic_processor_features features = {0};
+
+ /* Access the bitfield and set the desired features */
+ features.hypervisor_present = 1;
+ features.hv1 = 1;
+ features.access_partition_reference_counter = 1;
+ features.access_synic_regs = 1;
+ features.access_synthetic_timer_regs = 1;
+ features.access_partition_reference_tsc = 1;
+ features.access_frequency_regs = 1;
+ features.access_intr_ctrl_regs = 1;
+ features.access_vp_index = 1;
+ features.access_hypercall_regs = 1;
+ features.tb_flush_hypercalls = 1;
+ features.synthetic_cluster_ipi = 1;
+ features.direct_synthetic_timers = 1;
+
+ mshv_arch_amend_proc_features(&features);
+
+ in.property_code = HV_PARTITION_PROPERTY_SYNTHETIC_PROC_FEATURES;
+ in.property_value = features.as_uint64[0];
+
+ struct mshv_root_hvcall args = {0};
+ args.code = HVCALL_SET_PARTITION_PROPERTY;
+ args.in_sz = sizeof(in);
+ args.in_ptr = (uint64_t)∈
+
+ trace_mshv_hvcall_args("synthetic_proc_features", args.code, args.in_sz);
+
+ ret = mshv_hvcall(vm_fd, &args);
+ if (ret < 0) {
+ error_report("Failed to set synthethic proc features");
+ return -errno;
+ }
+ return 0;
+}
+
+static int initialize_vm(int vm_fd)
+{
+ int ret = ioctl(vm_fd, MSHV_INITIALIZE_PARTITION);
+ if (ret < 0) {
+ error_report("Failed to initialize partition: %s", strerror(errno));
+ return -1;
+ }
+ return 0;
+}
+
+static int create_vm(int mshv_fd, int *vm_fd)
+{
+ int ret = create_partition(mshv_fd, vm_fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = set_synthetic_proc_features(*vm_fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = initialize_vm(*vm_fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = mshv_arch_post_init_vm(*vm_fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ /* Always create a frozen partition */
+ pause_vm(*vm_fd);
+
+ return 0;
+}
static void mem_region_add(MemoryListener *listener,
MemoryRegionSection *section)
@@ -97,16 +266,55 @@ static void register_mshv_memory_listener(MshvState *s, MshvMemoryListener *mml,
}
}
+int mshv_hvcall(int vm_fd, const struct mshv_root_hvcall *args)
+{
+ int ret = 0;
+
+ ret = ioctl(vm_fd, MSHV_ROOT_HVCALL, args);
+ if (ret < 0) {
+ error_report("Failed to perform hvcall: %s", strerror(errno));
+ return -1;
+ }
+ return ret;
+}
+
static int mshv_init(MachineState *ms)
{
MshvState *s;
+ int mshv_fd, vm_fd, ret;
+
+ if (mshv_state) {
+ warn_report("MSHV accelerator already initialized");
+ return 0;
+ }
+
s = MSHV_STATE(ms->accelerator);
accel_blocker_init();
s->vm = 0;
+ ret = init_mshv(&mshv_fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = create_vm(mshv_fd, &vm_fd);
+ if (ret < 0) {
+ close(mshv_fd);
+ return -1;
+ }
+
+ ret = resume_vm(vm_fd);
+ if (ret < 0) {
+ close(mshv_fd);
+ close(vm_fd);
+ return -1;
+ }
+
+ s->vm = vm_fd;
+ s->fd = mshv_fd;
s->nr_as = 1;
s->as = g_new0(MshvAddressSpace, s->nr_as);
diff --git a/accel/mshv/trace-events b/accel/mshv/trace-events
new file mode 100644
index 0000000000..f99e8c5a41
--- /dev/null
+++ b/accel/mshv/trace-events
@@ -0,0 +1,3 @@
+# See docs/devel/tracing.rst for syntax documentation.
+
+mshv_hvcall_args(const char* hvcall, uint16_t code, uint16_t in_sz) "built args for '%s' code: %d in_sz: %d"
diff --git a/accel/mshv/trace.h b/accel/mshv/trace.h
new file mode 100644
index 0000000000..da5b40cd24
--- /dev/null
+++ b/accel/mshv/trace.h
@@ -0,0 +1 @@
+#include "trace/trace-accel_mshv.h"
diff --git a/include/system/mshv.h b/include/system/mshv.h
index 43a22e0f48..2ac594d0aa 100644
--- a/include/system/mshv.h
+++ b/include/system/mshv.h
@@ -45,12 +45,13 @@ typedef struct MshvAddressSpace {
} MshvAddressSpace;
typedef struct MshvState {
- AccelState parent_obj;
- int vm;
- MshvMemoryListener memory_listener;
- /* number of listeners */
- int nr_as;
- MshvAddressSpace *as;
+ AccelState parent_obj;
+ int vm;
+ MshvMemoryListener memory_listener;
+ /* number of listeners */
+ int nr_as;
+ MshvAddressSpace *as;
+ int fd;
} MshvState;
extern MshvState *mshv_state;
@@ -68,6 +69,13 @@ struct AccelCPUState {
#define mshv_msi_via_irqfd_enabled() false
#endif
+/* cpu */
+void mshv_arch_amend_proc_features(
+ union hv_partition_synthetic_processor_features *features);
+int mshv_arch_post_init_vm(int vm_fd);
+
+int mshv_hvcall(int mshv_fd, const struct mshv_root_hvcall *args);
+
/* memory */
void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
bool add);
diff --git a/meson.build b/meson.build
index 927f3474ea..b19772d27f 100644
--- a/meson.build
+++ b/meson.build
@@ -3640,6 +3640,7 @@ endif
if have_system
trace_events_subdirs += [
'accel/kvm',
+ 'accel/mshv',
'audio',
'backends',
'backends/tpm',
diff --git a/target/i386/mshv/meson.build b/target/i386/mshv/meson.build
index 8ddaa7c11d..647e5dafb7 100644
--- a/target/i386/mshv/meson.build
+++ b/target/i386/mshv/meson.build
@@ -1,6 +1,7 @@
i386_mshv_ss = ss.source_set()
i386_mshv_ss.add(files(
+ 'mshv-cpu.c',
'x86.c',
))
diff --git a/target/i386/mshv/mshv-cpu.c b/target/i386/mshv/mshv-cpu.c
new file mode 100644
index 0000000000..c00e98dfba
--- /dev/null
+++ b/target/i386/mshv/mshv-cpu.c
@@ -0,0 +1,71 @@
+/*
+ * QEMU MSHV support
+ *
+ * Copyright Microsoft, Corp. 2025
+ *
+ * Authors: Ziqiao Zhou <ziqiaozhou@microsoft.com>
+ * Magnus Kulke <magnuskulke@microsoft.com>
+ * Jinank Jain <jinankjain@microsoft.com>
+ *
+ * SPDX-License-Identifier: GPL-2.0-or-later
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/error-report.h"
+#include "qemu/typedefs.h"
+
+#include "system/mshv.h"
+#include "system/address-spaces.h"
+#include "linux/mshv.h"
+#include "hw/hyperv/hvhdk_mini.h"
+#include "hw/hyperv/hvgdk.h"
+
+
+#include "trace-accel_mshv.h"
+#include "trace.h"
+
+void mshv_arch_amend_proc_features(
+ union hv_partition_synthetic_processor_features *features)
+{
+ features->access_guest_idle_reg = 1;
+}
+
+/*
+ * Default Microsoft Hypervisor behavior for unimplemented MSR is to send a
+ * fault to the guest if it tries to access it. It is possible to override
+ * this behavior with a more suitable option i.e., ignore writes from the guest
+ * and return zero in attempt to read unimplemented.
+ */
+static int set_unimplemented_msr_action(int vm_fd)
+{
+ struct hv_input_set_partition_property in = {0};
+ struct mshv_root_hvcall args = {0};
+
+ in.property_code = HV_PARTITION_PROPERTY_UNIMPLEMENTED_MSR_ACTION;
+ in.property_value = HV_UNIMPLEMENTED_MSR_ACTION_IGNORE_WRITE_READ_ZERO;
+
+ args.code = HVCALL_SET_PARTITION_PROPERTY;
+ args.in_sz = sizeof(in);
+ args.in_ptr = (uint64_t)∈
+
+ trace_mshv_hvcall_args("unimplemented_msr_action", args.code, args.in_sz);
+
+ int ret = mshv_hvcall(vm_fd, &args);
+ if (ret < 0) {
+ error_report("Failed to set unimplemented MSR action");
+ return -1;
+ }
+ return 0;
+}
+
+int mshv_arch_post_init_vm(int vm_fd)
+{
+ int ret;
+
+ ret = set_unimplemented_msr_action(vm_fd);
+ if (ret < 0) {
+ error_report("Failed to set unimplemented MSR action");
+ }
+
+ return ret;
+}
--
2.34.1