Write CPU register state to MSHV vCPUs. Various mapping functions to
prepare the payload for the HV call have been implemented.
Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
---
include/system/mshv.h | 41 ++++++
target/i386/mshv/mshv-cpu.c | 249 ++++++++++++++++++++++++++++++++++++
2 files changed, 290 insertions(+)
diff --git a/include/system/mshv.h b/include/system/mshv.h
index 055489a6f3..76a3b0010e 100644
--- a/include/system/mshv.h
+++ b/include/system/mshv.h
@@ -99,6 +99,46 @@ typedef struct MshvMsiControl {
#define EFER_LMA ((uint64_t)0x400)
#define EFER_LME ((uint64_t)0x100)
+/* CR0 bits */
+#define CR0_PE ((uint64_t)0x1)
+#define CR0_PG ((uint64_t)0x80000000)
+
+/* CR4 bits */
+#define CR4_PAE ((uint64_t)0x20)
+#define CR4_LA57 ((uint64_t)0x1000)
+
+/* rflags bits (shift values) */
+#define CF_SHIFT 0
+#define PF_SHIFT 2
+#define AF_SHIFT 4
+#define ZF_SHIFT 6
+#define SF_SHIFT 7
+#define DF_SHIFT 10
+#define OF_SHIFT 11
+
+/* rflags bits (bit masks) */
+#define CF ((uint64_t)1 << CF_SHIFT)
+#define PF ((uint64_t)1 << PF_SHIFT)
+#define AF ((uint64_t)1 << AF_SHIFT)
+#define ZF ((uint64_t)1 << ZF_SHIFT)
+#define SF ((uint64_t)1 << SF_SHIFT)
+#define DF ((uint64_t)1 << DF_SHIFT)
+#define OF ((uint64_t)1 << OF_SHIFT)
+
+typedef struct MshvFPU {
+ uint8_t fpr[8][16];
+ uint16_t fcw;
+ uint16_t fsw;
+ uint8_t ftwx;
+ uint8_t pad1;
+ uint16_t last_opcode;
+ uint64_t last_ip;
+ uint64_t last_dp;
+ uint8_t xmm[16][16];
+ uint32_t mxcsr;
+ uint32_t pad2;
+} MshvFPU;
+
typedef enum MshvVmExit {
MshvVmExitIgnore = 0,
MshvVmExitShutdown = 1,
@@ -108,6 +148,7 @@ typedef enum MshvVmExit {
void mshv_init_cpu_logic(void);
int mshv_create_vcpu(int vm_fd, uint8_t vp_index, int *cpu_fd);
void mshv_remove_vcpu(int vm_fd, int cpu_fd);
+int mshv_configure_vcpu(const CPUState *cpu, const MshvFPU *fpu, uint64_t xcr0);
int mshv_get_standard_regs(CPUState *cpu);
int mshv_get_special_regs(CPUState *cpu);
int mshv_run_vcpu(int vm_fd, CPUState *cpu, hv_message *msg, MshvVmExit *exit);
diff --git a/target/i386/mshv/mshv-cpu.c b/target/i386/mshv/mshv-cpu.c
index 979ee5b8c3..ad42a09b99 100644
--- a/target/i386/mshv/mshv-cpu.c
+++ b/target/i386/mshv/mshv-cpu.c
@@ -79,6 +79,35 @@ static enum hv_register_name SPECIAL_REGISTER_NAMES[18] = {
HV_REGISTER_PENDING_INTERRUPTION,
};
+static enum hv_register_name FPU_REGISTER_NAMES[26] = {
+ HV_X64_REGISTER_XMM0,
+ HV_X64_REGISTER_XMM1,
+ HV_X64_REGISTER_XMM2,
+ HV_X64_REGISTER_XMM3,
+ HV_X64_REGISTER_XMM4,
+ HV_X64_REGISTER_XMM5,
+ HV_X64_REGISTER_XMM6,
+ HV_X64_REGISTER_XMM7,
+ HV_X64_REGISTER_XMM8,
+ HV_X64_REGISTER_XMM9,
+ HV_X64_REGISTER_XMM10,
+ HV_X64_REGISTER_XMM11,
+ HV_X64_REGISTER_XMM12,
+ HV_X64_REGISTER_XMM13,
+ HV_X64_REGISTER_XMM14,
+ HV_X64_REGISTER_XMM15,
+ HV_X64_REGISTER_FP_MMX0,
+ HV_X64_REGISTER_FP_MMX1,
+ HV_X64_REGISTER_FP_MMX2,
+ HV_X64_REGISTER_FP_MMX3,
+ HV_X64_REGISTER_FP_MMX4,
+ HV_X64_REGISTER_FP_MMX5,
+ HV_X64_REGISTER_FP_MMX6,
+ HV_X64_REGISTER_FP_MMX7,
+ HV_X64_REGISTER_FP_CONTROL_STATUS,
+ HV_X64_REGISTER_XMM_CONTROL_STATUS,
+};
+
static void add_cpu_guard(int cpu_fd)
{
QemuMutex *guard;
@@ -343,8 +372,228 @@ int mshv_load_regs(CPUState *cpu)
return 0;
}
+static void populate_hv_segment_reg(SegmentCache *seg,
+ hv_x64_segment_register *hv_reg)
+{
+ uint32_t flags = seg->flags;
+
+ hv_reg->base = seg->base;
+ hv_reg->limit = seg->limit;
+ hv_reg->selector = seg->selector;
+ hv_reg->segment_type = (flags >> DESC_TYPE_SHIFT) & 0xF;
+ hv_reg->non_system_segment = (flags & DESC_S_MASK) != 0;
+ hv_reg->descriptor_privilege_level = (flags >> DESC_DPL_SHIFT) & 0x3;
+ hv_reg->present = (flags & DESC_P_MASK) != 0;
+ hv_reg->reserved = 0;
+ hv_reg->available = (flags & DESC_AVL_MASK) != 0;
+ hv_reg->_long = (flags >> DESC_L_SHIFT) & 0x1;
+ hv_reg->_default = (flags >> DESC_B_SHIFT) & 0x1;
+ hv_reg->granularity = (flags & DESC_G_MASK) != 0;
+}
+
+static void populate_hv_table_reg(const struct SegmentCache *seg,
+ hv_x64_table_register *hv_reg)
+{
+ hv_reg->base = seg->base;
+ hv_reg->limit = seg->limit;
+ memset(hv_reg->pad, 0, sizeof(hv_reg->pad));
+}
+
+static int set_special_regs(const CPUState *cpu)
+{
+ X86CPU *x86cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86cpu->env;
+ int cpu_fd = mshv_vcpufd(cpu);
+ struct hv_register_assoc *assocs;
+ size_t n_regs = sizeof(SPECIAL_REGISTER_NAMES) / sizeof(hv_register_name);
+ int ret;
+
+ assocs = g_new0(struct hv_register_assoc, n_regs);
+
+ /* set names */
+ for (size_t i = 0; i < n_regs; i++) {
+ assocs[i].name = SPECIAL_REGISTER_NAMES[i];
+ }
+ populate_hv_segment_reg(&env->segs[R_CS], &assocs[0].value.segment);
+ populate_hv_segment_reg(&env->segs[R_DS], &assocs[1].value.segment);
+ populate_hv_segment_reg(&env->segs[R_ES], &assocs[2].value.segment);
+ populate_hv_segment_reg(&env->segs[R_FS], &assocs[3].value.segment);
+ populate_hv_segment_reg(&env->segs[R_GS], &assocs[4].value.segment);
+ populate_hv_segment_reg(&env->segs[R_SS], &assocs[5].value.segment);
+ populate_hv_segment_reg(&env->tr, &assocs[6].value.segment);
+ populate_hv_segment_reg(&env->ldt, &assocs[7].value.segment);
+
+ populate_hv_table_reg(&env->gdt, &assocs[8].value.table);
+ populate_hv_table_reg(&env->idt, &assocs[9].value.table);
+
+ assocs[10].value.reg64 = env->cr[0];
+ assocs[11].value.reg64 = env->cr[2];
+ assocs[12].value.reg64 = env->cr[3];
+ assocs[13].value.reg64 = env->cr[4];
+ assocs[14].value.reg64 = cpu_get_apic_tpr(x86cpu->apic_state);
+ assocs[15].value.reg64 = env->efer;
+ assocs[16].value.reg64 = cpu_get_apic_base(x86cpu->apic_state);
+
+ /*
+ * TODO: support asserting an interrupt using interrup_bitmap
+ * it should be possible if we use the vm_fd
+ */
+
+ ret = mshv_set_generic_regs(cpu_fd, assocs, n_regs);
+ g_free(assocs);
+ if (ret < 0) {
+ error_report("failed to set special registers");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int set_fpu_regs(int cpu_fd, const struct MshvFPU *regs)
+{
+ struct hv_register_assoc *assocs;
+ union hv_register_value *value;
+ size_t n_regs = sizeof(FPU_REGISTER_NAMES) / sizeof(enum hv_register_name);
+ size_t fp_i;
+ union hv_x64_fp_control_status_register *ctrl_status;
+ union hv_x64_xmm_control_status_register *xmm_ctrl_status;
+ int ret;
+
+ assocs = g_new0(struct hv_register_assoc, n_regs);
+
+ /* first 16 registers are xmm0-xmm15 */
+ for (size_t i = 0; i < 16; i++) {
+ assocs[i].name = FPU_REGISTER_NAMES[i];
+ value = &assocs[i].value;
+ memcpy(&value->reg128, ®s->xmm[i], 16);
+ }
+
+ /* next 8 registers are fp_mmx0-fp_mmx7 */
+ for (size_t i = 16; i < 24; i++) {
+ assocs[i].name = FPU_REGISTER_NAMES[i];
+ fp_i = (i - 16);
+ value = &assocs[i].value;
+ memcpy(&value->reg128, ®s->fpr[fp_i], 16);
+ }
+
+ /* last two registers are fp_control_status and xmm_control_status */
+ assocs[24].name = FPU_REGISTER_NAMES[24];
+ value = &assocs[24].value;
+ ctrl_status = &value->fp_control_status;
+ ctrl_status->fp_control = regs->fcw;
+ ctrl_status->fp_status = regs->fsw;
+ ctrl_status->fp_tag = regs->ftwx;
+ ctrl_status->reserved = 0;
+ ctrl_status->last_fp_op = regs->last_opcode;
+ ctrl_status->last_fp_rip = regs->last_ip;
+
+ assocs[25].name = FPU_REGISTER_NAMES[25];
+ value = &assocs[25].value;
+ xmm_ctrl_status = &value->xmm_control_status;
+ xmm_ctrl_status->xmm_status_control = regs->mxcsr;
+ xmm_ctrl_status->xmm_status_control_mask = 0;
+ xmm_ctrl_status->last_fp_rdp = regs->last_dp;
+
+ ret = mshv_set_generic_regs(cpu_fd, assocs, n_regs);
+ g_free(assocs);
+ if (ret < 0) {
+ error_report("failed to set fpu registers");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int set_xc_reg(int cpu_fd, uint64_t xcr0)
+{
+ int ret;
+ struct hv_register_assoc assoc = {
+ .name = HV_X64_REGISTER_XFEM,
+ .value.reg64 = xcr0,
+ };
+
+ ret = mshv_set_generic_regs(cpu_fd, &assoc, 1);
+ if (ret < 0) {
+ error_report("failed to set xcr0");
+ return -errno;
+ }
+ return 0;
+}
+
+static int set_cpu_state(const CPUState *cpu, const MshvFPU *fpu_regs,
+ uint64_t xcr0)
+{
+ int ret;
+ int cpu_fd = mshv_vcpufd(cpu);
+
+ ret = set_standard_regs(cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = set_special_regs(cpu);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = set_fpu_regs(cpu_fd, fpu_regs);
+ if (ret < 0) {
+ return ret;
+ }
+ ret = set_xc_reg(cpu_fd, xcr0);
+ if (ret < 0) {
+ return ret;
+ }
+ return 0;
+}
+
+/*
+ * TODO: populate topology info:
+ *
+ * X86CPU *x86cpu = X86_CPU(cpu);
+ * CPUX86State *env = &x86cpu->env;
+ * X86CPUTopoInfo *topo_info = &env->topo_info;
+ */
+int mshv_configure_vcpu(const CPUState *cpu, const struct MshvFPU *fpu,
+ uint64_t xcr0)
+{
+ int ret;
+
+ ret = set_cpu_state(cpu, fpu, xcr0);
+ if (ret < 0) {
+ error_report("failed to set cpu state");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int put_regs(const CPUState *cpu)
+{
+ X86CPU *x86cpu = X86_CPU(cpu);
+ CPUX86State *env = &x86cpu->env;
+ MshvFPU fpu = {0};
+ int ret;
+
+ memset(&fpu, 0, sizeof(fpu));
+
+ ret = mshv_configure_vcpu(cpu, &fpu, env->xcr0);
+ if (ret < 0) {
+ error_report("failed to configure vcpu");
+ return ret;
+ }
+
+ return 0;
+}
+
int mshv_arch_put_registers(const CPUState *cpu)
{
+ int ret;
+
+ ret = put_regs(cpu);
+ if (ret < 0) {
+ error_report("Failed to put registers");
+ return -1;
+ }
+
error_report("unimplemented");
abort();
}
--
2.34.1
On Tue, May 20, 2025 at 01:30:11PM +0200, Magnus Kulke wrote:
> Write CPU register state to MSHV vCPUs. Various mapping functions to
> prepare the payload for the HV call have been implemented.
>
> Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
> ---
[...]
> +
> +static void populate_hv_table_reg(const struct SegmentCache *seg,
> + hv_x64_table_register *hv_reg)
> +{
> + hv_reg->base = seg->base;
> + hv_reg->limit = seg->limit;
> + memset(hv_reg->pad, 0, sizeof(hv_reg->pad));
I'm not sure if the compiler will optimize this function call out.
It is straightforward to write
*hv_reg = { .base = seg->base, .limit = seg->limit };
> +}
> +
> +static int set_special_regs(const CPUState *cpu)
> +{
> + X86CPU *x86cpu = X86_CPU(cpu);
> + CPUX86State *env = &x86cpu->env;
> + int cpu_fd = mshv_vcpufd(cpu);
> + struct hv_register_assoc *assocs;
> + size_t n_regs = sizeof(SPECIAL_REGISTER_NAMES) / sizeof(hv_register_name);
> + int ret;
> +
> + assocs = g_new0(struct hv_register_assoc, n_regs);
The allocation here can be removed, since we know for sure how many
elements are in `SPECIAL_REGISTER_NAMES`. It should be fine to use an
on-stack array.
There are probably other places you can optimize.
Thanks,
Wei.
On Tue, May 20, 2025 at 01:30:11PM +0200, Magnus Kulke wrote:
> Write CPU register state to MSHV vCPUs. Various mapping functions to
> prepare the payload for the HV call have been implemented.
>
> Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
> ---
[...]
> +static int set_special_regs(const CPUState *cpu)
> +{
> + X86CPU *x86cpu = X86_CPU(cpu);
> + CPUX86State *env = &x86cpu->env;
> + int cpu_fd = mshv_vcpufd(cpu);
> + struct hv_register_assoc *assocs;
> + size_t n_regs = sizeof(SPECIAL_REGISTER_NAMES) / sizeof(hv_register_name);
> + int ret;
> +
> + assocs = g_new0(struct hv_register_assoc, n_regs);
> +
> + /* set names */
> + for (size_t i = 0; i < n_regs; i++) {
> + assocs[i].name = SPECIAL_REGISTER_NAMES[i];
> + }
> + populate_hv_segment_reg(&env->segs[R_CS], &assocs[0].value.segment);
> + populate_hv_segment_reg(&env->segs[R_DS], &assocs[1].value.segment);
> + populate_hv_segment_reg(&env->segs[R_ES], &assocs[2].value.segment);
> + populate_hv_segment_reg(&env->segs[R_FS], &assocs[3].value.segment);
> + populate_hv_segment_reg(&env->segs[R_GS], &assocs[4].value.segment);
> + populate_hv_segment_reg(&env->segs[R_SS], &assocs[5].value.segment);
> + populate_hv_segment_reg(&env->tr, &assocs[6].value.segment);
> + populate_hv_segment_reg(&env->ldt, &assocs[7].value.segment);
> +
> + populate_hv_table_reg(&env->gdt, &assocs[8].value.table);
> + populate_hv_table_reg(&env->idt, &assocs[9].value.table);
> +
> + assocs[10].value.reg64 = env->cr[0];
> + assocs[11].value.reg64 = env->cr[2];
> + assocs[12].value.reg64 = env->cr[3];
> + assocs[13].value.reg64 = env->cr[4];
> + assocs[14].value.reg64 = cpu_get_apic_tpr(x86cpu->apic_state);
> + assocs[15].value.reg64 = env->efer;
> + assocs[16].value.reg64 = cpu_get_apic_base(x86cpu->apic_state);
> +
> + /*
> + * TODO: support asserting an interrupt using interrup_bitmap
> + * it should be possible if we use the vm_fd
> + */
> +
Why is there a need to assert an interrupt here?
> + ret = mshv_set_generic_regs(cpu_fd, assocs, n_regs);
> + g_free(assocs);
> + if (ret < 0) {
> + error_report("failed to set special registers");
> + return -1;
> + }
> +
> + return 0;
> +}
> +
> +static int set_fpu_regs(int cpu_fd, const struct MshvFPU *regs)
Please change regs to fpu.
Thanks,
Wei.
> +{
> + struct hv_register_assoc *assocs;
> + union hv_register_value *value;
> + size_t n_regs = sizeof(FPU_REGISTER_NAMES) / sizeof(enum hv_register_name);
> + size_t fp_i;
> + union hv_x64_fp_control_status_register *ctrl_status;
> + union hv_x64_xmm_control_status_register *xmm_ctrl_status;
> + int ret;
> +
On Tue, May 20, 2025 at 10:22:27PM +0000, Wei Liu wrote: > On Tue, May 20, 2025 at 01:30:11PM +0200, Magnus Kulke wrote: > > + /* > > + * TODO: support asserting an interrupt using interrup_bitmap > > + * it should be possible if we use the vm_fd > > + */ > > + > > Why is there a need to assert an interrupt here? > The comment has been carried over from the mshv-ioctl crate: https://github.com/rust-vmm/mshv/blob/main/mshv-ioctls/src/ioctls/vcpu.rs#L778 I was wondering whether we can/want to set the bitmap here, since we do have access to the vm_fd, but I didn't follow up on that yet.
On Wed, May 28, 2025 at 04:30:55PM +0200, Magnus Kulke wrote: > On Tue, May 20, 2025 at 10:22:27PM +0000, Wei Liu wrote: > > On Tue, May 20, 2025 at 01:30:11PM +0200, Magnus Kulke wrote: > > > + /* > > > + * TODO: support asserting an interrupt using interrup_bitmap > > > + * it should be possible if we use the vm_fd > > > + */ > > > + > > > > Why is there a need to assert an interrupt here? > > > > The comment has been carried over from the mshv-ioctl crate: > > https://github.com/rust-vmm/mshv/blob/main/mshv-ioctls/src/ioctls/vcpu.rs#L778 > > I was wondering whether we can/want to set the bitmap here, since we do > have access to the vm_fd, but I didn't follow up on that yet. In the code snippet you quoted, an error is returned if the bitmap is not empty. Please at least print a warning if the bitmap is not empty to catch any issues. Debugging lost interrupts is hard enough as it is. CC the Rust-VMM code co-owners for awareness. Thanks, Wei.
On 5/20/25 13:30, Magnus Kulke wrote:
> Write CPU register state to MSHV vCPUs. Various mapping functions to
> prepare the payload for the HV call have been implemented.
>
> Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
> ---
> include/system/mshv.h | 41 ++++++
> target/i386/mshv/mshv-cpu.c | 249 ++++++++++++++++++++++++++++++++++++
> 2 files changed, 290 insertions(+)
>
> diff --git a/include/system/mshv.h b/include/system/mshv.h
> index 055489a6f3..76a3b0010e 100644
> --- a/include/system/mshv.h
> +++ b/include/system/mshv.h
> @@ -99,6 +99,46 @@ typedef struct MshvMsiControl {
> #define EFER_LMA ((uint64_t)0x400)
> #define EFER_LME ((uint64_t)0x100)
>
> +/* CR0 bits */
> +#define CR0_PE ((uint64_t)0x1)
> +#define CR0_PG ((uint64_t)0x80000000)
> +
> +/* CR4 bits */
> +#define CR4_PAE ((uint64_t)0x20)
> +#define CR4_LA57 ((uint64_t)0x1000)
> +
> +/* rflags bits (shift values) */
> +#define CF_SHIFT 0
> +#define PF_SHIFT 2
> +#define AF_SHIFT 4
> +#define ZF_SHIFT 6
> +#define SF_SHIFT 7
> +#define DF_SHIFT 10
> +#define OF_SHIFT 11
> +
> +/* rflags bits (bit masks) */
> +#define CF ((uint64_t)1 << CF_SHIFT)
> +#define PF ((uint64_t)1 << PF_SHIFT)
> +#define AF ((uint64_t)1 << AF_SHIFT)
> +#define ZF ((uint64_t)1 << ZF_SHIFT)
> +#define SF ((uint64_t)1 << SF_SHIFT)
> +#define DF ((uint64_t)1 << DF_SHIFT)
> +#define OF ((uint64_t)1 << OF_SHIFT)
All of these are either duplicate or unused.
Paolo
© 2016 - 2026 Red Hat, Inc.