Add the main vCPU execution loop for MSHV using the MSHV_RUN_VP ioctl.
A translate_gva() hypercall is implemented. The execution loop handles
guest entry and VM exits. There are handlers for memory r/w, PIO and
MMIO to which the exit events are dispatched.
In case of MMIO the i386 instruction decoder/emulator is invoked to
perform the operation in user space.
Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
---
target/i386/mshv/mshv-cpu.c | 554 ++++++++++++++++++++++++++++++++++--
1 file changed, 524 insertions(+), 30 deletions(-)
diff --git a/target/i386/mshv/mshv-cpu.c b/target/i386/mshv/mshv-cpu.c
index fdc7e5e019..27c6cd6138 100644
--- a/target/i386/mshv/mshv-cpu.c
+++ b/target/i386/mshv/mshv-cpu.c
@@ -21,6 +21,7 @@
#include "qemu/typedefs.h"
#include "system/mshv.h"
+#include "system/address-spaces.h"
#include "hw/hyperv/linux-mshv.h"
#include "hw/hyperv/hvhdk_mini.h"
#include "hw/hyperv/hvgdk.h"
@@ -145,6 +146,34 @@ static void remove_cpu_guard(int cpu_fd)
}
}
+static int translate_gva(int cpu_fd, uint64_t gva, uint64_t *gpa,
+ uint64_t flags)
+{
+ int ret;
+ union hv_translate_gva_result result = { 0 };
+
+ *gpa = 0;
+ mshv_translate_gva args = {
+ .gva = gva,
+ .flags = flags,
+ .gpa = (__u64 *)gpa,
+ .result = &result,
+ };
+
+ ret = ioctl(cpu_fd, MSHV_TRANSLATE_GVA, &args);
+ if (ret < 0) {
+ error_report("failed to invoke gpa->gva translation");
+ return -errno;
+ }
+ if (result.result_code != HV_TRANSLATE_GVA_SUCCESS) {
+ error_report("failed to translate gva (" TARGET_FMT_lx ") to gpa", gva);
+ return -1;
+
+ }
+
+ return 0;
+}
+
int mshv_set_generic_regs(int cpu_fd, hv_register_assoc *assocs, size_t n_regs)
{
struct mshv_vp_registers input = {
@@ -1027,10 +1056,503 @@ void mshv_arch_amend_proc_features(
features->access_guest_idle_reg = 1;
}
+static int set_memory_info(const struct hyperv_message *msg,
+ struct hv_x64_memory_intercept_message *info)
+{
+ if (msg->header.message_type != HVMSG_GPA_INTERCEPT
+ && msg->header.message_type != HVMSG_UNMAPPED_GPA
+ && msg->header.message_type != HVMSG_UNACCEPTED_GPA) {
+ error_report("invalid message type");
+ return -1;
+ }
+ memcpy(info, msg->payload, sizeof(*info));
+
+ return 0;
+}
+
+static int emulate_instruction(CPUState *cpu,
+ const uint8_t *insn_bytes, size_t insn_len,
+ uint64_t gva, uint64_t gpa)
+{
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ struct x86_decode decode = { 0 };
+ int ret;
+ int cpu_fd = mshv_vcpufd(cpu);
+ QemuMutex *guard;
+ x86_insn_stream stream = { .bytes = insn_bytes, .len = insn_len };
+
+ guard = g_hash_table_lookup(cpu_guards, GUINT_TO_POINTER(cpu_fd));
+ if (!guard) {
+ error_report("failed to get cpu guard");
+ return -1;
+ }
+
+ WITH_QEMU_LOCK_GUARD(guard) {
+ ret = mshv_load_regs(cpu);
+ if (ret < 0) {
+ error_report("failed to load registers");
+ return -1;
+ }
+
+ decode_instruction_stream(env, &decode, &stream);
+ exec_instruction(env, &decode);
+
+ ret = mshv_store_regs(cpu);
+ if (ret < 0) {
+ error_report("failed to store registers");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int handle_mmio(CPUState *cpu, const struct hyperv_message *msg,
+ MshvVmExit *exit_reason)
+{
+ struct hv_x64_memory_intercept_message info = { 0 };
+ size_t insn_len;
+ uint8_t access_type;
+ uint8_t *instruction_bytes;
+ int ret;
+
+ ret = set_memory_info(msg, &info);
+ if (ret < 0) {
+ error_report("failed to convert message to memory info");
+ return -1;
+ }
+ insn_len = info.instruction_byte_count;
+ access_type = info.header.intercept_access_type;
+
+ if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_EXECUTE) {
+ error_report("invalid intercept access type: execute");
+ return -1;
+ }
+
+ if (insn_len > 16) {
+ error_report("invalid mmio instruction length: %zu", insn_len);
+ return -1;
+ }
+
+ if (insn_len == 0) {
+ warn_report("mmio instruction buffer empty");
+ }
+
+ instruction_bytes = info.instruction_bytes;
+
+ ret = emulate_instruction(cpu, instruction_bytes, insn_len,
+ info.guest_virtual_address,
+ info.guest_physical_address);
+ if (ret < 0) {
+ error_report("failed to emulate mmio");
+ return -1;
+ }
+
+ *exit_reason = MshvVmExitIgnore;
+
+ return 0;
+}
+
+static int handle_unmapped_mem(int vm_fd, CPUState *cpu,
+ const struct hyperv_message *msg,
+ MshvVmExit *exit_reason)
+{
+ struct hv_x64_memory_intercept_message info = { 0 };
+ int ret;
+
+ ret = set_memory_info(msg, &info);
+ if (ret < 0) {
+ error_report("failed to convert message to memory info");
+ return -1;
+ }
+
+ return handle_mmio(cpu, msg, exit_reason);
+}
+
+static int set_ioport_info(const struct hyperv_message *msg,
+ hv_x64_io_port_intercept_message *info)
+{
+ if (msg->header.message_type != HVMSG_X64_IO_PORT_INTERCEPT) {
+ error_report("Invalid message type");
+ return -1;
+ }
+ memcpy(info, msg->payload, sizeof(*info));
+
+ return 0;
+}
+
+typedef struct X64Registers {
+ const uint32_t *names;
+ const uint64_t *values;
+ uintptr_t count;
+} X64Registers;
+
+static int set_x64_registers(int cpu_fd, const X64Registers *regs)
+{
+ size_t n_regs = regs->count;
+ struct hv_register_assoc *assocs;
+
+ assocs = g_new0(hv_register_assoc, n_regs);
+ for (size_t i = 0; i < n_regs; i++) {
+ assocs[i].name = regs->names[i];
+ assocs[i].value.reg64 = regs->values[i];
+ }
+ int ret;
+
+ ret = mshv_set_generic_regs(cpu_fd, assocs, n_regs);
+ g_free(assocs);
+ if (ret < 0) {
+ error_report("failed to set x64 registers");
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline MemTxAttrs get_mem_attrs(bool is_secure_mode)
+{
+ MemTxAttrs memattr = {0};
+ memattr.secure = is_secure_mode;
+ return memattr;
+}
+
+static void pio_read(uint64_t port, uint8_t *data, uintptr_t size,
+ bool is_secure_mode)
+{
+ int ret = 0;
+ MemTxAttrs memattr = get_mem_attrs(is_secure_mode);
+ ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size,
+ false);
+ if (ret != MEMTX_OK) {
+ error_report("Failed to read from port %lx: %d", port, ret);
+ abort();
+ }
+}
+
+static int pio_write(uint64_t port, const uint8_t *data, uintptr_t size,
+ bool is_secure_mode)
+{
+ int ret = 0;
+ MemTxAttrs memattr = get_mem_attrs(is_secure_mode);
+ ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size,
+ true);
+ return ret;
+}
+
+static int handle_pio_non_str(const CPUState *cpu,
+ hv_x64_io_port_intercept_message *info) {
+ size_t len = info->access_info.access_size;
+ uint8_t access_type = info->header.intercept_access_type;
+ int ret;
+ uint32_t val, eax;
+ const uint32_t eax_mask = 0xffffffffu >> (32 - len * 8);
+ size_t insn_len;
+ uint64_t rip, rax;
+ uint32_t reg_names[2];
+ uint64_t reg_values[2];
+ struct X64Registers x64_regs = { 0 };
+ uint16_t port = info->port_number;
+ int cpu_fd = mshv_vcpufd(cpu);
+
+ if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) {
+ union {
+ uint32_t u32;
+ uint8_t bytes[4];
+ } conv;
+
+ /* convert the first 4 bytes of rax to bytes */
+ conv.u32 = (uint32_t)info->rax;
+ /* secure mode is set to false */
+ ret = pio_write(port, conv.bytes, len, false);
+ if (ret < 0) {
+ error_report("Failed to write to io port");
+ return -1;
+ }
+ } else {
+ uint8_t data[4] = { 0 };
+ /* secure mode is set to false */
+ pio_read(info->port_number, data, len, false);
+
+ /* Preserve high bits in EAX, but clear out high bits in RAX */
+ val = *(uint32_t *)data;
+ eax = (((uint32_t)info->rax) & ~eax_mask) | (val & eax_mask);
+ info->rax = (uint64_t)eax;
+ }
+
+ insn_len = info->header.instruction_length;
+
+ /* Advance RIP and update RAX */
+ rip = info->header.rip + insn_len;
+ rax = info->rax;
+
+ reg_names[0] = HV_X64_REGISTER_RIP;
+ reg_values[0] = rip;
+ reg_names[1] = HV_X64_REGISTER_RAX;
+ reg_values[1] = rax;
+
+ x64_regs.names = reg_names;
+ x64_regs.values = reg_values;
+ x64_regs.count = 2;
+
+ ret = set_x64_registers(cpu_fd, &x64_regs);
+ if (ret < 0) {
+ error_report("Failed to set x64 registers");
+ return -1;
+ }
+
+ cpu->accel->dirty = false;
+
+ return 0;
+}
+
+static int fetch_guest_state(CPUState *cpu)
+{
+ int ret;
+
+ ret = mshv_get_standard_regs(cpu);
+ if (ret < 0) {
+ error_report("Failed to get standard registers");
+ return -1;
+ }
+
+ ret = mshv_get_special_regs(cpu);
+ if (ret < 0) {
+ error_report("Failed to get special registers");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int read_memory(int cpu_fd, uint64_t initial_gva, uint64_t initial_gpa,
+ uint64_t gva, uint8_t *data, size_t len)
+{
+ int ret;
+ uint64_t gpa, flags;
+
+ if (gva == initial_gva) {
+ gpa = initial_gpa;
+ } else {
+ flags = HV_TRANSLATE_GVA_VALIDATE_READ;
+ ret = translate_gva(cpu_fd, gva, &gpa, flags);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = mshv_guest_mem_read(gpa, data, len, false, false);
+ if (ret < 0) {
+ error_report("failed to read guest mem");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int write_memory(int cpu_fd, uint64_t initial_gva, uint64_t initial_gpa,
+ uint64_t gva, const uint8_t *data, size_t len)
+{
+ int ret;
+ uint64_t gpa, flags;
+
+ if (gva == initial_gva) {
+ gpa = initial_gpa;
+ } else {
+ flags = HV_TRANSLATE_GVA_VALIDATE_WRITE;
+ ret = translate_gva(cpu_fd, gva, &gpa, flags);
+ if (ret < 0) {
+ error_report("failed to translate gva to gpa");
+ return -1;
+ }
+ }
+ ret = mshv_guest_mem_write(gpa, data, len, false);
+ if (ret != MEMTX_OK) {
+ error_report("failed to write to mmio");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int handle_pio_str_write(CPUState *cpu,
+ hv_x64_io_port_intercept_message *info,
+ size_t repeat, uint16_t port,
+ bool direction_flag)
+{
+ int ret;
+ uint64_t src;
+ uint8_t data[4] = { 0 };
+ size_t len = info->access_info.access_size;
+ int cpu_fd = mshv_vcpufd(cpu);
+
+ src = linear_addr(cpu, info->rsi, R_DS);
+
+ for (size_t i = 0; i < repeat; i++) {
+ ret = read_memory(cpu_fd, 0, 0, src, data, len);
+ if (ret < 0) {
+ error_report("Failed to read memory");
+ return -1;
+ }
+ ret = pio_write(port, data, len, false);
+ if (ret < 0) {
+ error_report("Failed to write to io port");
+ return -1;
+ }
+ src += direction_flag ? -len : len;
+ info->rsi += direction_flag ? -len : len;
+ }
+
+ return 0;
+}
+
+static int handle_pio_str_read(CPUState *cpu,
+ hv_x64_io_port_intercept_message *info,
+ size_t repeat, uint16_t port,
+ bool direction_flag)
+{
+ int ret;
+ uint64_t dst;
+ size_t len = info->access_info.access_size;
+ uint8_t data[4] = { 0 };
+ int cpu_fd = mshv_vcpufd(cpu);
+
+ dst = linear_addr(cpu, info->rdi, R_ES);
+
+ for (size_t i = 0; i < repeat; i++) {
+ pio_read(port, data, len, false);
+
+ ret = write_memory(cpu_fd, 0, 0, dst, data, len);
+ if (ret < 0) {
+ error_report("Failed to write memory");
+ return -1;
+ }
+ dst += direction_flag ? -len : len;
+ info->rdi += direction_flag ? -len : len;
+ }
+
+ return 0;
+}
+
+static int handle_pio_str(CPUState *cpu,
+ hv_x64_io_port_intercept_message *info)
+{
+ uint8_t access_type = info->header.intercept_access_type;
+ uint16_t port = info->port_number;
+ bool repop = info->access_info.rep_prefix == 1;
+ size_t repeat = repop ? info->rcx : 1;
+ size_t insn_len = info->header.instruction_length;
+ bool direction_flag;
+ uint32_t reg_names[3];
+ uint64_t reg_values[3];
+ int ret;
+ struct X64Registers x64_regs = { 0 };
+ X86CPU *x86_cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86_cpu->env;
+ int cpu_fd = mshv_vcpufd(cpu);
+
+ ret = fetch_guest_state(cpu);
+ if (ret < 0) {
+ error_report("Failed to fetch guest state");
+ return -1;
+ }
+
+ direction_flag = (env->eflags & DF) != 0;
+
+ if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) {
+ ret = handle_pio_str_write(cpu, info, repeat, port, direction_flag);
+ if (ret < 0) {
+ error_report("Failed to handle pio str write");
+ return -1;
+ }
+ reg_names[0] = HV_X64_REGISTER_RSI;
+ reg_values[0] = info->rsi;
+ } else {
+ ret = handle_pio_str_read(cpu, info, repeat, port, direction_flag);
+ reg_names[0] = HV_X64_REGISTER_RDI;
+ reg_values[0] = info->rdi;
+ }
+
+ reg_names[1] = HV_X64_REGISTER_RIP;
+ reg_values[1] = info->header.rip + insn_len;
+ reg_names[2] = HV_X64_REGISTER_RAX;
+ reg_values[2] = info->rax;
+
+ x64_regs.names = reg_names;
+ x64_regs.values = reg_values;
+ x64_regs.count = 2;
+
+ ret = set_x64_registers(cpu_fd, &x64_regs);
+ if (ret < 0) {
+ error_report("Failed to set x64 registers");
+ return -1;
+ }
+
+ cpu->accel->dirty = false;
+
+ return 0;
+}
+
+static int handle_pio(CPUState *cpu, const struct hyperv_message *msg)
+{
+ struct hv_x64_io_port_intercept_message info = { 0 };
+ int ret;
+
+ ret = set_ioport_info(msg, &info);
+ if (ret < 0) {
+ error_report("Failed to convert message to ioport info");
+ return -1;
+ }
+
+ if (info.access_info.string_op) {
+ return handle_pio_str(cpu, &info);
+ }
+
+ return handle_pio_non_str(cpu, &info);
+}
+
int mshv_run_vcpu(int vm_fd, CPUState *cpu, hv_message *msg, MshvVmExit *exit)
{
- error_report("unimplemented");
- abort();
+ int ret;
+ hv_message exit_msg = { 0 };
+ enum MshvVmExit exit_reason;
+ int cpu_fd = mshv_vcpufd(cpu);
+
+ ret = ioctl(cpu_fd, MSHV_RUN_VP, &exit_msg);
+ if (ret < 0) {
+ return MshvVmExitShutdown;
+ }
+
+ switch (exit_msg.header.message_type) {
+ case HVMSG_UNRECOVERABLE_EXCEPTION:
+ *msg = exit_msg;
+ return MshvVmExitShutdown;
+ case HVMSG_UNMAPPED_GPA:
+ ret = handle_unmapped_mem(vm_fd, cpu, &exit_msg, &exit_reason);
+ if (ret < 0) {
+ error_report("failed to handle unmapped memory");
+ return -1;
+ }
+ return exit_reason;
+ case HVMSG_GPA_INTERCEPT:
+ ret = handle_mmio(cpu, &exit_msg, &exit_reason);
+ if (ret < 0) {
+ error_report("failed to handle mmio");
+ return -1;
+ }
+ return exit_reason;
+ case HVMSG_X64_IO_PORT_INTERCEPT:
+ ret = handle_pio(cpu, &exit_msg);
+ if (ret < 0) {
+ return MshvVmExitSpecial;
+ }
+ return MshvVmExitIgnore;
+ default:
+ msg = &exit_msg;
+ }
+
+ *exit = MshvVmExitIgnore;
+ return 0;
}
void mshv_remove_vcpu(int vm_fd, int cpu_fd)
@@ -1061,34 +1583,6 @@ int mshv_create_vcpu(int vm_fd, uint8_t vp_index, int *cpu_fd)
return 0;
}
-static int translate_gva(int cpu_fd, uint64_t gva, uint64_t *gpa,
- uint64_t flags)
-{
- int ret;
- union hv_translate_gva_result result = { 0 };
-
- *gpa = 0;
- mshv_translate_gva args = {
- .gva = gva,
- .flags = flags,
- .gpa = (__u64 *)gpa,
- .result = &result,
- };
-
- ret = ioctl(cpu_fd, MSHV_TRANSLATE_GVA, &args);
- if (ret < 0) {
- error_report("failed to invoke gpa->gva translation");
- return -errno;
- }
- if (result.result_code != HV_TRANSLATE_GVA_SUCCESS) {
- error_report("failed to translate gva (" TARGET_FMT_lx ") to gpa", gva);
- return -1;
-
- }
-
- return 0;
-}
-
static int guest_mem_read_with_gva(const CPUState *cpu, uint64_t gva,
uint8_t *data, uintptr_t size,
bool fetch_instruction)
--
2.34.1
On Tue, May 20, 2025 at 01:30:17PM +0200, Magnus Kulke wrote:
> Add the main vCPU execution loop for MSHV using the MSHV_RUN_VP ioctl.
>
> A translate_gva() hypercall is implemented. The execution loop handles
> guest entry and VM exits. There are handlers for memory r/w, PIO and
> MMIO to which the exit events are dispatched.
>
> In case of MMIO the i386 instruction decoder/emulator is invoked to
> perform the operation in user space.
>
> Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
> ---
[...]
> +
> +static int handle_mmio(CPUState *cpu, const struct hyperv_message *msg,
> + MshvVmExit *exit_reason)
> +{
> + struct hv_x64_memory_intercept_message info = { 0 };
> + size_t insn_len;
> + uint8_t access_type;
> + uint8_t *instruction_bytes;
> + int ret;
> +
> + ret = set_memory_info(msg, &info);
> + if (ret < 0) {
> + error_report("failed to convert message to memory info");
> + return -1;
> + }
> + insn_len = info.instruction_byte_count;
> + access_type = info.header.intercept_access_type;
> +
> + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_EXECUTE) {
> + error_report("invalid intercept access type: execute");
> + return -1;
> + }
> +
You can assert(insn_len <= 16) here to simplify the code.
> + if (insn_len > 16) {
> + error_report("invalid mmio instruction length: %zu", insn_len);
> + return -1;
> + }
> +
> + if (insn_len == 0) {
> + warn_report("mmio instruction buffer empty");
This is a valid state so there is no need to warn.
> + }
> +
> + instruction_bytes = info.instruction_bytes;
> +
> + ret = emulate_instruction(cpu, instruction_bytes, insn_len,
> + info.guest_virtual_address,
> + info.guest_physical_address);
> + if (ret < 0) {
> + error_report("failed to emulate mmio");
> + return -1;
> + }
> +
> + *exit_reason = MshvVmExitIgnore;
> +
> + return 0;
> +}
> +
> +static int handle_unmapped_mem(int vm_fd, CPUState *cpu,
> + const struct hyperv_message *msg,
> + MshvVmExit *exit_reason)
> +{
> + struct hv_x64_memory_intercept_message info = { 0 };
> + int ret;
> +
> + ret = set_memory_info(msg, &info);
> + if (ret < 0) {
> + error_report("failed to convert message to memory info");
> + return -1;
> + }
> +
> + return handle_mmio(cpu, msg, exit_reason);
> +}
> +
> +static int set_ioport_info(const struct hyperv_message *msg,
> + hv_x64_io_port_intercept_message *info)
> +{
> + if (msg->header.message_type != HVMSG_X64_IO_PORT_INTERCEPT) {
> + error_report("Invalid message type");
> + return -1;
> + }
> + memcpy(info, msg->payload, sizeof(*info));
> +
> + return 0;
> +}
> +
> +typedef struct X64Registers {
> + const uint32_t *names;
> + const uint64_t *values;
> + uintptr_t count;
> +} X64Registers;
> +
> +static int set_x64_registers(int cpu_fd, const X64Registers *regs)
> +{
> + size_t n_regs = regs->count;
> + struct hv_register_assoc *assocs;
> +
> + assocs = g_new0(hv_register_assoc, n_regs);
> + for (size_t i = 0; i < n_regs; i++) {
> + assocs[i].name = regs->names[i];
> + assocs[i].value.reg64 = regs->values[i];
> + }
> + int ret;
> +
> + ret = mshv_set_generic_regs(cpu_fd, assocs, n_regs);
> + g_free(assocs);
> + if (ret < 0) {
> + error_report("failed to set x64 registers");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static inline MemTxAttrs get_mem_attrs(bool is_secure_mode)
> +{
> + MemTxAttrs memattr = {0};
> + memattr.secure = is_secure_mode;
> + return memattr;
> +}
> +
> +static void pio_read(uint64_t port, uint8_t *data, uintptr_t size,
> + bool is_secure_mode)
> +{
> + int ret = 0;
> + MemTxAttrs memattr = get_mem_attrs(is_secure_mode);
> + ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size,
> + false);
> + if (ret != MEMTX_OK) {
> + error_report("Failed to read from port %lx: %d", port, ret);
> + abort();
> + }
> +}
> +
> +static int pio_write(uint64_t port, const uint8_t *data, uintptr_t size,
> + bool is_secure_mode)
> +{
> + int ret = 0;
> + MemTxAttrs memattr = get_mem_attrs(is_secure_mode);
> + ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size,
> + true);
> + return ret;
> +}
> +
> +static int handle_pio_non_str(const CPUState *cpu,
> + hv_x64_io_port_intercept_message *info) {
> + size_t len = info->access_info.access_size;
> + uint8_t access_type = info->header.intercept_access_type;
> + int ret;
> + uint32_t val, eax;
> + const uint32_t eax_mask = 0xffffffffu >> (32 - len * 8);
> + size_t insn_len;
> + uint64_t rip, rax;
> + uint32_t reg_names[2];
> + uint64_t reg_values[2];
> + struct X64Registers x64_regs = { 0 };
> + uint16_t port = info->port_number;
> + int cpu_fd = mshv_vcpufd(cpu);
> +
> + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) {
> + union {
> + uint32_t u32;
> + uint8_t bytes[4];
> + } conv;
> +
> + /* convert the first 4 bytes of rax to bytes */
> + conv.u32 = (uint32_t)info->rax;
> + /* secure mode is set to false */
> + ret = pio_write(port, conv.bytes, len, false);
> + if (ret < 0) {
> + error_report("Failed to write to io port");
> + return -1;
> + }
> + } else {
> + uint8_t data[4] = { 0 };
> + /* secure mode is set to false */
> + pio_read(info->port_number, data, len, false);
> +
> + /* Preserve high bits in EAX, but clear out high bits in RAX */
> + val = *(uint32_t *)data;
> + eax = (((uint32_t)info->rax) & ~eax_mask) | (val & eax_mask);
> + info->rax = (uint64_t)eax;
> + }
> +
> + insn_len = info->header.instruction_length;
> +
> + /* Advance RIP and update RAX */
> + rip = info->header.rip + insn_len;
> + rax = info->rax;
> +
> + reg_names[0] = HV_X64_REGISTER_RIP;
> + reg_values[0] = rip;
> + reg_names[1] = HV_X64_REGISTER_RAX;
> + reg_values[1] = rax;
> +
> + x64_regs.names = reg_names;
> + x64_regs.values = reg_values;
> + x64_regs.count = 2;
> +
> + ret = set_x64_registers(cpu_fd, &x64_regs);
> + if (ret < 0) {
> + error_report("Failed to set x64 registers");
> + return -1;
> + }
> +
> + cpu->accel->dirty = false;
> +
> + return 0;
> +}
> +
> +static int fetch_guest_state(CPUState *cpu)
> +{
> + int ret;
> +
> + ret = mshv_get_standard_regs(cpu);
> + if (ret < 0) {
> + error_report("Failed to get standard registers");
> + return -1;
> + }
> +
> + ret = mshv_get_special_regs(cpu);
> + if (ret < 0) {
> + error_report("Failed to get special registers");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int read_memory(int cpu_fd, uint64_t initial_gva, uint64_t initial_gpa,
> + uint64_t gva, uint8_t *data, size_t len)
> +{
> + int ret;
> + uint64_t gpa, flags;
> +
> + if (gva == initial_gva) {
> + gpa = initial_gpa;
> + } else {
> + flags = HV_TRANSLATE_GVA_VALIDATE_READ;
> + ret = translate_gva(cpu_fd, gva, &gpa, flags);
> + if (ret < 0) {
> + return -1;
> + }
> +
> + ret = mshv_guest_mem_read(gpa, data, len, false, false);
> + if (ret < 0) {
> + error_report("failed to read guest mem");
> + return -1;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int write_memory(int cpu_fd, uint64_t initial_gva, uint64_t initial_gpa,
> + uint64_t gva, const uint8_t *data, size_t len)
> +{
> + int ret;
> + uint64_t gpa, flags;
> +
> + if (gva == initial_gva) {
> + gpa = initial_gpa;
> + } else {
> + flags = HV_TRANSLATE_GVA_VALIDATE_WRITE;
> + ret = translate_gva(cpu_fd, gva, &gpa, flags);
> + if (ret < 0) {
> + error_report("failed to translate gva to gpa");
> + return -1;
> + }
> + }
> + ret = mshv_guest_mem_write(gpa, data, len, false);
> + if (ret != MEMTX_OK) {
> + error_report("failed to write to mmio");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int handle_pio_str_write(CPUState *cpu,
> + hv_x64_io_port_intercept_message *info,
> + size_t repeat, uint16_t port,
> + bool direction_flag)
> +{
> + int ret;
> + uint64_t src;
> + uint8_t data[4] = { 0 };
> + size_t len = info->access_info.access_size;
> + int cpu_fd = mshv_vcpufd(cpu);
> +
> + src = linear_addr(cpu, info->rsi, R_DS);
> +
> + for (size_t i = 0; i < repeat; i++) {
> + ret = read_memory(cpu_fd, 0, 0, src, data, len);
> + if (ret < 0) {
> + error_report("Failed to read memory");
> + return -1;
> + }
> + ret = pio_write(port, data, len, false);
> + if (ret < 0) {
> + error_report("Failed to write to io port");
> + return -1;
> + }
> + src += direction_flag ? -len : len;
> + info->rsi += direction_flag ? -len : len;
> + }
> +
> + return 0;
> +}
> +
> +static int handle_pio_str_read(CPUState *cpu,
> + hv_x64_io_port_intercept_message *info,
> + size_t repeat, uint16_t port,
> + bool direction_flag)
> +{
> + int ret;
> + uint64_t dst;
> + size_t len = info->access_info.access_size;
> + uint8_t data[4] = { 0 };
> + int cpu_fd = mshv_vcpufd(cpu);
> +
> + dst = linear_addr(cpu, info->rdi, R_ES);
> +
> + for (size_t i = 0; i < repeat; i++) {
> + pio_read(port, data, len, false);
> +
> + ret = write_memory(cpu_fd, 0, 0, dst, data, len);
> + if (ret < 0) {
> + error_report("Failed to write memory");
> + return -1;
> + }
> + dst += direction_flag ? -len : len;
> + info->rdi += direction_flag ? -len : len;
> + }
> +
> + return 0;
> +}
> +
> +static int handle_pio_str(CPUState *cpu,
> + hv_x64_io_port_intercept_message *info)
> +{
> + uint8_t access_type = info->header.intercept_access_type;
> + uint16_t port = info->port_number;
> + bool repop = info->access_info.rep_prefix == 1;
> + size_t repeat = repop ? info->rcx : 1;
> + size_t insn_len = info->header.instruction_length;
> + bool direction_flag;
> + uint32_t reg_names[3];
> + uint64_t reg_values[3];
> + int ret;
> + struct X64Registers x64_regs = { 0 };
> + X86CPU *x86_cpu = X86_CPU(cpu);
> + CPUX86State *env = &x86_cpu->env;
> + int cpu_fd = mshv_vcpufd(cpu);
> +
> + ret = fetch_guest_state(cpu);
> + if (ret < 0) {
> + error_report("Failed to fetch guest state");
> + return -1;
> + }
> +
> + direction_flag = (env->eflags & DF) != 0;
> +
> + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) {
> + ret = handle_pio_str_write(cpu, info, repeat, port, direction_flag);
> + if (ret < 0) {
> + error_report("Failed to handle pio str write");
> + return -1;
> + }
> + reg_names[0] = HV_X64_REGISTER_RSI;
> + reg_values[0] = info->rsi;
> + } else {
> + ret = handle_pio_str_read(cpu, info, repeat, port, direction_flag);
> + reg_names[0] = HV_X64_REGISTER_RDI;
> + reg_values[0] = info->rdi;
> + }
> +
> + reg_names[1] = HV_X64_REGISTER_RIP;
> + reg_values[1] = info->header.rip + insn_len;
> + reg_names[2] = HV_X64_REGISTER_RAX;
> + reg_values[2] = info->rax;
> +
> + x64_regs.names = reg_names;
> + x64_regs.values = reg_values;
> + x64_regs.count = 2;
> +
> + ret = set_x64_registers(cpu_fd, &x64_regs);
> + if (ret < 0) {
> + error_report("Failed to set x64 registers");
> + return -1;
> + }
> +
> + cpu->accel->dirty = false;
> +
> + return 0;
> +}
> +
> +static int handle_pio(CPUState *cpu, const struct hyperv_message *msg)
> +{
> + struct hv_x64_io_port_intercept_message info = { 0 };
> + int ret;
> +
> + ret = set_ioport_info(msg, &info);
> + if (ret < 0) {
> + error_report("Failed to convert message to ioport info");
> + return -1;
> + }
> +
> + if (info.access_info.string_op) {
> + return handle_pio_str(cpu, &info);
> + }
> +
> + return handle_pio_non_str(cpu, &info);
> +}
> +
> int mshv_run_vcpu(int vm_fd, CPUState *cpu, hv_message *msg, MshvVmExit *exit)
> {
> - error_report("unimplemented");
> - abort();
> + int ret;
> + hv_message exit_msg = { 0 };
> + enum MshvVmExit exit_reason;
> + int cpu_fd = mshv_vcpufd(cpu);
> +
> + ret = ioctl(cpu_fd, MSHV_RUN_VP, &exit_msg);
> + if (ret < 0) {
> + return MshvVmExitShutdown;
> + }
> +
> + switch (exit_msg.header.message_type) {
> + case HVMSG_UNRECOVERABLE_EXCEPTION:
> + *msg = exit_msg;
> + return MshvVmExitShutdown;
> + case HVMSG_UNMAPPED_GPA:
> + ret = handle_unmapped_mem(vm_fd, cpu, &exit_msg, &exit_reason);
> + if (ret < 0) {
> + error_report("failed to handle unmapped memory");
> + return -1;
> + }
> + return exit_reason;
> + case HVMSG_GPA_INTERCEPT:
I'm not sure why you want to handle UNMAPPED_GPA and GPA_INTERCEPT
separately. In Cloud Hypervisor there is one code path for both.
Is this due to how the memory address space is set up in QEMU?
> + ret = handle_mmio(cpu, &exit_msg, &exit_reason);
> + if (ret < 0) {
> + error_report("failed to handle mmio");
> + return -1;
> + }
> + return exit_reason;
> + case HVMSG_X64_IO_PORT_INTERCEPT:
> + ret = handle_pio(cpu, &exit_msg);
> + if (ret < 0) {
> + return MshvVmExitSpecial;
> + }
> + return MshvVmExitIgnore;
> + default:
> + msg = &exit_msg;
Do you not get any HALT exit? How are you going to shut down the VM?
> + }
> +
> + *exit = MshvVmExitIgnore;
> + return 0;
> }
>
> void mshv_remove_vcpu(int vm_fd, int cpu_fd)
> @@ -1061,34 +1583,6 @@ int mshv_create_vcpu(int vm_fd, uint8_t vp_index, int *cpu_fd)
> return 0;
> }
>
> -static int translate_gva(int cpu_fd, uint64_t gva, uint64_t *gpa,
> - uint64_t flags)
> -{
> - int ret;
> - union hv_translate_gva_result result = { 0 };
> -
> - *gpa = 0;
> - mshv_translate_gva args = {
> - .gva = gva,
> - .flags = flags,
> - .gpa = (__u64 *)gpa,
> - .result = &result,
> - };
> -
> - ret = ioctl(cpu_fd, MSHV_TRANSLATE_GVA, &args);
> - if (ret < 0) {
> - error_report("failed to invoke gpa->gva translation");
> - return -errno;
> - }
> - if (result.result_code != HV_TRANSLATE_GVA_SUCCESS) {
> - error_report("failed to translate gva (" TARGET_FMT_lx ") to gpa", gva);
> - return -1;
> -
> - }
> -
> - return 0;
> -}
> -
Why not put this function in the correct location in the previous patch
to begin with?
Thanks,
Wei.
> static int guest_mem_read_with_gva(const CPUState *cpu, uint64_t gva,
> uint8_t *data, uintptr_t size,
> bool fetch_instruction)
> --
> 2.34.1
>
On Tue, May 20, 2025 at 10:52:39PM +0000, Wei Liu wrote: > On Tue, May 20, 2025 at 01:30:17PM +0200, Magnus Kulke wrote: > > + default: > > + msg = &exit_msg; > > Do you not get any HALT exit? How are you going to shut down the VM? > In the WHPX accelerator there is this comment: case WHvRunVpExitReasonX64Halt: /* * WARNING: as of build 19043.1526 (21H1), this exit reason is no * longer used. */ ret = whpx_handle_halt(cpu); break; I wonder if this also applies to HVMSG_X64_HALT from the MSHV driver?
On Tue, Jul 01, 2025 at 10:35:34AM +0200, Magnus Kulke wrote: > On Tue, May 20, 2025 at 10:52:39PM +0000, Wei Liu wrote: > > On Tue, May 20, 2025 at 01:30:17PM +0200, Magnus Kulke wrote: > > > + default: > > > + msg = &exit_msg; > > > > Do you not get any HALT exit? How are you going to shut down the VM? > > > > In the WHPX accelerator there is this comment: > > case WHvRunVpExitReasonX64Halt: > /* > * WARNING: as of build 19043.1526 (21H1), this exit reason is no > * longer used. > */ > ret = whpx_handle_halt(cpu); > break; > > I wonder if this also applies to HVMSG_X64_HALT from the MSHV driver? IIRC that's still used in our driver. You can try shutting down the VM with `poweroff` or `halt` and see if you get the exit. Wei
On Tue, Jul 01, 2025 at 03:11:39PM +0000, Wei Liu wrote:
> On Tue, Jul 01, 2025 at 10:35:34AM +0200, Magnus Kulke wrote:
> > On Tue, May 20, 2025 at 10:52:39PM +0000, Wei Liu wrote:
> > > On Tue, May 20, 2025 at 01:30:17PM +0200, Magnus Kulke wrote:
> > > > + default:
> > > > + msg = &exit_msg;
> > >
> > > Do you not get any HALT exit? How are you going to shut down the VM?
> > >
> >
> > In the WHPX accelerator there is this comment:
> >
> > case WHvRunVpExitReasonX64Halt:
> > /*
> > * WARNING: as of build 19043.1526 (21H1), this exit reason is no
> > * longer used.
> > */
> > ret = whpx_handle_halt(cpu);
> > break;
> >
> > I wonder if this also applies to HVMSG_X64_HALT from the MSHV driver?
>
> IIRC that's still used in our driver.
>
> You can try shutting down the VM with `poweroff` or `halt` and see if
> you get the exit.
>
> Wei
I wasn't able to trigger the exit with `poweroff` or `halt -p`. Or a
kernel module that performs:
```
local_irq_disable();
__asm__("hlt");
```
(it will just hang the guest).
I have added the handler, but it looks like it's dead code currently.
On Tue, Jul 01, 2025 at 05:45:07PM +0200, Magnus Kulke wrote:
> On Tue, Jul 01, 2025 at 03:11:39PM +0000, Wei Liu wrote:
> > On Tue, Jul 01, 2025 at 10:35:34AM +0200, Magnus Kulke wrote:
> > > On Tue, May 20, 2025 at 10:52:39PM +0000, Wei Liu wrote:
> > > > On Tue, May 20, 2025 at 01:30:17PM +0200, Magnus Kulke wrote:
> > > > > + default:
> > > > > + msg = &exit_msg;
> > > >
> > > > Do you not get any HALT exit? How are you going to shut down the VM?
> > > >
> > >
> > > In the WHPX accelerator there is this comment:
> > >
> > > case WHvRunVpExitReasonX64Halt:
> > > /*
> > > * WARNING: as of build 19043.1526 (21H1), this exit reason is no
> > > * longer used.
> > > */
> > > ret = whpx_handle_halt(cpu);
> > > break;
> > >
> > > I wonder if this also applies to HVMSG_X64_HALT from the MSHV driver?
> >
> > IIRC that's still used in our driver.
> >
> > You can try shutting down the VM with `poweroff` or `halt` and see if
> > you get the exit.
> >
> > Wei
>
> I wasn't able to trigger the exit with `poweroff` or `halt -p`. Or a
> kernel module that performs:
>
> ```
> local_irq_disable();
> __asm__("hlt");
> ```
>
> (it will just hang the guest).
>
> I have added the handler, but it looks like it's dead code currently.
We can leave the out for now as long as the guest shutdown works.
Wei.
On Tue, Jul 01, 2025 at 03:47:40PM +0000, Wei Liu wrote: > > We can leave the out for now as long as the guest shutdown works. > > Wei. yup, shutdown works fine, so I will drop the commit from the next patch set, thanks!
On Tue, May 20, 2025 at 10:52:39PM +0000, Wei Liu wrote: > On Tue, May 20, 2025 at 01:30:17PM +0200, Magnus Kulke wrote: > > + case HVMSG_GPA_INTERCEPT: > > I'm not sure why you want to handle UNMAPPED_GPA and GPA_INTERCEPT > separately. In Cloud Hypervisor there is one code path for both. > > Is this due to how the memory address space is set up in QEMU? > yes, indeed. this is a provisiong for the dynamic re-mapping of overlapping userspace addresses. We can handle both together in this commit, though.
On 5/20/25 13:30, Magnus Kulke wrote:
> +static int emulate_instruction(CPUState *cpu,
> + const uint8_t *insn_bytes, size_t insn_len,
> + uint64_t gva, uint64_t gpa)
> +{
> + X86CPU *x86_cpu = X86_CPU(cpu);
> + CPUX86State *env = &x86_cpu->env;
> + struct x86_decode decode = { 0 };
> + int ret;
> + int cpu_fd = mshv_vcpufd(cpu);
> + QemuMutex *guard;
> + x86_insn_stream stream = { .bytes = insn_bytes, .len = insn_len };
> +
> + guard = g_hash_table_lookup(cpu_guards, GUINT_TO_POINTER(cpu_fd));
mshv_cpu_exec() will always run in the vCPU thread, so you don't need a
mutex. All of patch 14 can go, in fact.
Paolo
> + if (!guard) {
> + error_report("failed to get cpu guard");
> + return -1;
> + }
> +
> + WITH_QEMU_LOCK_GUARD(guard) {
> + ret = mshv_load_regs(cpu);
> + if (ret < 0) {
> + error_report("failed to load registers");
> + return -1;
> + }
> +
> + decode_instruction_stream(env, &decode, &stream);
> + exec_instruction(env, &decode);
> +
> + ret = mshv_store_regs(cpu);
> + if (ret < 0) {
> + error_report("failed to store registers");
> + return -1;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int handle_mmio(CPUState *cpu, const struct hyperv_message *msg,
> + MshvVmExit *exit_reason)
> +{
> + struct hv_x64_memory_intercept_message info = { 0 };
> + size_t insn_len;
> + uint8_t access_type;
> + uint8_t *instruction_bytes;
> + int ret;
> +
> + ret = set_memory_info(msg, &info);
> + if (ret < 0) {
> + error_report("failed to convert message to memory info");
> + return -1;
> + }
> + insn_len = info.instruction_byte_count;
> + access_type = info.header.intercept_access_type;
> +
> + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_EXECUTE) {
> + error_report("invalid intercept access type: execute");
> + return -1;
> + }
> +
> + if (insn_len > 16) {
> + error_report("invalid mmio instruction length: %zu", insn_len);
> + return -1;
> + }
> +
> + if (insn_len == 0) {
> + warn_report("mmio instruction buffer empty");
> + }
> +
> + instruction_bytes = info.instruction_bytes;
> +
> + ret = emulate_instruction(cpu, instruction_bytes, insn_len,
> + info.guest_virtual_address,
> + info.guest_physical_address);
> + if (ret < 0) {
> + error_report("failed to emulate mmio");
> + return -1;
> + }
> +
> + *exit_reason = MshvVmExitIgnore;
> +
> + return 0;
> +}
> +
> +static int handle_unmapped_mem(int vm_fd, CPUState *cpu,
> + const struct hyperv_message *msg,
> + MshvVmExit *exit_reason)
> +{
> + struct hv_x64_memory_intercept_message info = { 0 };
> + int ret;
> +
> + ret = set_memory_info(msg, &info);
> + if (ret < 0) {
> + error_report("failed to convert message to memory info");
> + return -1;
> + }
> +
> + return handle_mmio(cpu, msg, exit_reason);
> +}
> +
> +static int set_ioport_info(const struct hyperv_message *msg,
> + hv_x64_io_port_intercept_message *info)
> +{
> + if (msg->header.message_type != HVMSG_X64_IO_PORT_INTERCEPT) {
> + error_report("Invalid message type");
> + return -1;
> + }
> + memcpy(info, msg->payload, sizeof(*info));
> +
> + return 0;
> +}
> +
> +typedef struct X64Registers {
> + const uint32_t *names;
> + const uint64_t *values;
> + uintptr_t count;
> +} X64Registers;
> +
> +static int set_x64_registers(int cpu_fd, const X64Registers *regs)
> +{
> + size_t n_regs = regs->count;
> + struct hv_register_assoc *assocs;
> +
> + assocs = g_new0(hv_register_assoc, n_regs);
> + for (size_t i = 0; i < n_regs; i++) {
> + assocs[i].name = regs->names[i];
> + assocs[i].value.reg64 = regs->values[i];
> + }
> + int ret;
> +
> + ret = mshv_set_generic_regs(cpu_fd, assocs, n_regs);
> + g_free(assocs);
> + if (ret < 0) {
> + error_report("failed to set x64 registers");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static inline MemTxAttrs get_mem_attrs(bool is_secure_mode)
> +{
> + MemTxAttrs memattr = {0};
> + memattr.secure = is_secure_mode;
> + return memattr;
> +}
> +
> +static void pio_read(uint64_t port, uint8_t *data, uintptr_t size,
> + bool is_secure_mode)
> +{
> + int ret = 0;
> + MemTxAttrs memattr = get_mem_attrs(is_secure_mode);
> + ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size,
> + false);
> + if (ret != MEMTX_OK) {
> + error_report("Failed to read from port %lx: %d", port, ret);
> + abort();
> + }
> +}
> +
> +static int pio_write(uint64_t port, const uint8_t *data, uintptr_t size,
> + bool is_secure_mode)
> +{
> + int ret = 0;
> + MemTxAttrs memattr = get_mem_attrs(is_secure_mode);
> + ret = address_space_rw(&address_space_io, port, memattr, (void *)data, size,
> + true);
> + return ret;
> +}
> +
> +static int handle_pio_non_str(const CPUState *cpu,
> + hv_x64_io_port_intercept_message *info) {
> + size_t len = info->access_info.access_size;
> + uint8_t access_type = info->header.intercept_access_type;
> + int ret;
> + uint32_t val, eax;
> + const uint32_t eax_mask = 0xffffffffu >> (32 - len * 8);
> + size_t insn_len;
> + uint64_t rip, rax;
> + uint32_t reg_names[2];
> + uint64_t reg_values[2];
> + struct X64Registers x64_regs = { 0 };
> + uint16_t port = info->port_number;
> + int cpu_fd = mshv_vcpufd(cpu);
> +
> + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) {
> + union {
> + uint32_t u32;
> + uint8_t bytes[4];
> + } conv;
> +
> + /* convert the first 4 bytes of rax to bytes */
> + conv.u32 = (uint32_t)info->rax;
> + /* secure mode is set to false */
> + ret = pio_write(port, conv.bytes, len, false);
> + if (ret < 0) {
> + error_report("Failed to write to io port");
> + return -1;
> + }
> + } else {
> + uint8_t data[4] = { 0 };
> + /* secure mode is set to false */
> + pio_read(info->port_number, data, len, false);
> +
> + /* Preserve high bits in EAX, but clear out high bits in RAX */
> + val = *(uint32_t *)data;
> + eax = (((uint32_t)info->rax) & ~eax_mask) | (val & eax_mask);
> + info->rax = (uint64_t)eax;
> + }
> +
> + insn_len = info->header.instruction_length;
> +
> + /* Advance RIP and update RAX */
> + rip = info->header.rip + insn_len;
> + rax = info->rax;
> +
> + reg_names[0] = HV_X64_REGISTER_RIP;
> + reg_values[0] = rip;
> + reg_names[1] = HV_X64_REGISTER_RAX;
> + reg_values[1] = rax;
> +
> + x64_regs.names = reg_names;
> + x64_regs.values = reg_values;
> + x64_regs.count = 2;
> +
> + ret = set_x64_registers(cpu_fd, &x64_regs);
> + if (ret < 0) {
> + error_report("Failed to set x64 registers");
> + return -1;
> + }
> +
> + cpu->accel->dirty = false;
> +
> + return 0;
> +}
> +
> +static int fetch_guest_state(CPUState *cpu)
> +{
> + int ret;
> +
> + ret = mshv_get_standard_regs(cpu);
> + if (ret < 0) {
> + error_report("Failed to get standard registers");
> + return -1;
> + }
> +
> + ret = mshv_get_special_regs(cpu);
> + if (ret < 0) {
> + error_report("Failed to get special registers");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int read_memory(int cpu_fd, uint64_t initial_gva, uint64_t initial_gpa,
> + uint64_t gva, uint8_t *data, size_t len)
> +{
> + int ret;
> + uint64_t gpa, flags;
> +
> + if (gva == initial_gva) {
> + gpa = initial_gpa;
> + } else {
> + flags = HV_TRANSLATE_GVA_VALIDATE_READ;
> + ret = translate_gva(cpu_fd, gva, &gpa, flags);
> + if (ret < 0) {
> + return -1;
> + }
> +
> + ret = mshv_guest_mem_read(gpa, data, len, false, false);
> + if (ret < 0) {
> + error_report("failed to read guest mem");
> + return -1;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int write_memory(int cpu_fd, uint64_t initial_gva, uint64_t initial_gpa,
> + uint64_t gva, const uint8_t *data, size_t len)
> +{
> + int ret;
> + uint64_t gpa, flags;
> +
> + if (gva == initial_gva) {
> + gpa = initial_gpa;
> + } else {
> + flags = HV_TRANSLATE_GVA_VALIDATE_WRITE;
> + ret = translate_gva(cpu_fd, gva, &gpa, flags);
> + if (ret < 0) {
> + error_report("failed to translate gva to gpa");
> + return -1;
> + }
> + }
> + ret = mshv_guest_mem_write(gpa, data, len, false);
> + if (ret != MEMTX_OK) {
> + error_report("failed to write to mmio");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int handle_pio_str_write(CPUState *cpu,
> + hv_x64_io_port_intercept_message *info,
> + size_t repeat, uint16_t port,
> + bool direction_flag)
> +{
> + int ret;
> + uint64_t src;
> + uint8_t data[4] = { 0 };
> + size_t len = info->access_info.access_size;
> + int cpu_fd = mshv_vcpufd(cpu);
> +
> + src = linear_addr(cpu, info->rsi, R_DS);
> +
> + for (size_t i = 0; i < repeat; i++) {
> + ret = read_memory(cpu_fd, 0, 0, src, data, len);
> + if (ret < 0) {
> + error_report("Failed to read memory");
> + return -1;
> + }
> + ret = pio_write(port, data, len, false);
> + if (ret < 0) {
> + error_report("Failed to write to io port");
> + return -1;
> + }
> + src += direction_flag ? -len : len;
> + info->rsi += direction_flag ? -len : len;
> + }
> +
> + return 0;
> +}
> +
> +static int handle_pio_str_read(CPUState *cpu,
> + hv_x64_io_port_intercept_message *info,
> + size_t repeat, uint16_t port,
> + bool direction_flag)
> +{
> + int ret;
> + uint64_t dst;
> + size_t len = info->access_info.access_size;
> + uint8_t data[4] = { 0 };
> + int cpu_fd = mshv_vcpufd(cpu);
> +
> + dst = linear_addr(cpu, info->rdi, R_ES);
> +
> + for (size_t i = 0; i < repeat; i++) {
> + pio_read(port, data, len, false);
> +
> + ret = write_memory(cpu_fd, 0, 0, dst, data, len);
> + if (ret < 0) {
> + error_report("Failed to write memory");
> + return -1;
> + }
> + dst += direction_flag ? -len : len;
> + info->rdi += direction_flag ? -len : len;
> + }
> +
> + return 0;
> +}
> +
> +static int handle_pio_str(CPUState *cpu,
> + hv_x64_io_port_intercept_message *info)
> +{
> + uint8_t access_type = info->header.intercept_access_type;
> + uint16_t port = info->port_number;
> + bool repop = info->access_info.rep_prefix == 1;
> + size_t repeat = repop ? info->rcx : 1;
> + size_t insn_len = info->header.instruction_length;
> + bool direction_flag;
> + uint32_t reg_names[3];
> + uint64_t reg_values[3];
> + int ret;
> + struct X64Registers x64_regs = { 0 };
> + X86CPU *x86_cpu = X86_CPU(cpu);
> + CPUX86State *env = &x86_cpu->env;
> + int cpu_fd = mshv_vcpufd(cpu);
> +
> + ret = fetch_guest_state(cpu);
> + if (ret < 0) {
> + error_report("Failed to fetch guest state");
> + return -1;
> + }
> +
> + direction_flag = (env->eflags & DF) != 0;
> +
> + if (access_type == HV_X64_INTERCEPT_ACCESS_TYPE_WRITE) {
> + ret = handle_pio_str_write(cpu, info, repeat, port, direction_flag);
> + if (ret < 0) {
> + error_report("Failed to handle pio str write");
> + return -1;
> + }
> + reg_names[0] = HV_X64_REGISTER_RSI;
> + reg_values[0] = info->rsi;
> + } else {
> + ret = handle_pio_str_read(cpu, info, repeat, port, direction_flag);
> + reg_names[0] = HV_X64_REGISTER_RDI;
> + reg_values[0] = info->rdi;
> + }
> +
> + reg_names[1] = HV_X64_REGISTER_RIP;
> + reg_values[1] = info->header.rip + insn_len;
> + reg_names[2] = HV_X64_REGISTER_RAX;
> + reg_values[2] = info->rax;
> +
> + x64_regs.names = reg_names;
> + x64_regs.values = reg_values;
> + x64_regs.count = 2;
> +
> + ret = set_x64_registers(cpu_fd, &x64_regs);
> + if (ret < 0) {
> + error_report("Failed to set x64 registers");
> + return -1;
> + }
> +
> + cpu->accel->dirty = false;
> +
> + return 0;
> +}
> +
> +static int handle_pio(CPUState *cpu, const struct hyperv_message *msg)
> +{
> + struct hv_x64_io_port_intercept_message info = { 0 };
> + int ret;
> +
> + ret = set_ioport_info(msg, &info);
> + if (ret < 0) {
> + error_report("Failed to convert message to ioport info");
> + return -1;
> + }
> +
> + if (info.access_info.string_op) {
> + return handle_pio_str(cpu, &info);
> + }
> +
> + return handle_pio_non_str(cpu, &info);
> +}
> +
> int mshv_run_vcpu(int vm_fd, CPUState *cpu, hv_message *msg, MshvVmExit *exit)
> {
> - error_report("unimplemented");
> - abort();
> + int ret;
> + hv_message exit_msg = { 0 };
> + enum MshvVmExit exit_reason;
> + int cpu_fd = mshv_vcpufd(cpu);
> +
> + ret = ioctl(cpu_fd, MSHV_RUN_VP, &exit_msg);
> + if (ret < 0) {
> + return MshvVmExitShutdown;
> + }
> +
> + switch (exit_msg.header.message_type) {
> + case HVMSG_UNRECOVERABLE_EXCEPTION:
> + *msg = exit_msg;
> + return MshvVmExitShutdown;
> + case HVMSG_UNMAPPED_GPA:
> + ret = handle_unmapped_mem(vm_fd, cpu, &exit_msg, &exit_reason);
> + if (ret < 0) {
> + error_report("failed to handle unmapped memory");
> + return -1;
> + }
> + return exit_reason;
> + case HVMSG_GPA_INTERCEPT:
> + ret = handle_mmio(cpu, &exit_msg, &exit_reason);
> + if (ret < 0) {
> + error_report("failed to handle mmio");
> + return -1;
> + }
> + return exit_reason;
> + case HVMSG_X64_IO_PORT_INTERCEPT:
> + ret = handle_pio(cpu, &exit_msg);
> + if (ret < 0) {
> + return MshvVmExitSpecial;
> + }
> + return MshvVmExitIgnore;
> + default:
> + msg = &exit_msg;
> + }
> +
> + *exit = MshvVmExitIgnore;
> + return 0;
> }
>
> void mshv_remove_vcpu(int vm_fd, int cpu_fd)
> @@ -1061,34 +1583,6 @@ int mshv_create_vcpu(int vm_fd, uint8_t vp_index, int *cpu_fd)
> return 0;
> }
>
> -static int translate_gva(int cpu_fd, uint64_t gva, uint64_t *gpa,
> - uint64_t flags)
> -{
> - int ret;
> - union hv_translate_gva_result result = { 0 };
> -
> - *gpa = 0;
> - mshv_translate_gva args = {
> - .gva = gva,
> - .flags = flags,
> - .gpa = (__u64 *)gpa,
> - .result = &result,
> - };
> -
> - ret = ioctl(cpu_fd, MSHV_TRANSLATE_GVA, &args);
> - if (ret < 0) {
> - error_report("failed to invoke gpa->gva translation");
> - return -errno;
> - }
> - if (result.result_code != HV_TRANSLATE_GVA_SUCCESS) {
> - error_report("failed to translate gva (" TARGET_FMT_lx ") to gpa", gva);
> - return -1;
> -
> - }
> -
> - return 0;
> -}
> -
> static int guest_mem_read_with_gva(const CPUState *cpu, uint64_t gva,
> uint8_t *data, uintptr_t size,
> bool fetch_instruction)
© 2016 - 2025 Red Hat, Inc.