From: Xin Li <xin3.li@intel.com>
Add FRED VMCS fields to nested VMX context management.
Signed-off-by: Xin Li <xin3.li@intel.com>
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
Tested-by: Shan Kang <shan.kang@intel.com>
---
Change since v2:
* Add and use nested_cpu_has_fred(vmcs12) because vmcs02 should be set
from vmcs12 if and only if the field is enabled in L1's VMX config
(Sean Christopherson).
* Fix coding style (Sean Christopherson).
Change since v1:
* Remove hyperv TLFS related changes (Jeremi Piotrowski).
* Use kvm_cpu_cap_has() instead of cpu_feature_enabled() (Chao Gao).
---
Documentation/virt/kvm/x86/nested-vmx.rst | 18 +++++
arch/x86/kvm/vmx/nested.c | 88 ++++++++++++++++++-----
arch/x86/kvm/vmx/nested.h | 8 +++
arch/x86/kvm/vmx/nested_vmcs_fields.h | 12 ++++
arch/x86/kvm/vmx/vmcs12.c | 18 +++++
arch/x86/kvm/vmx/vmcs12.h | 36 ++++++++++
arch/x86/kvm/vmx/vmcs_shadow_fields.h | 4 ++
7 files changed, 168 insertions(+), 16 deletions(-)
diff --git a/Documentation/virt/kvm/x86/nested-vmx.rst b/Documentation/virt/kvm/x86/nested-vmx.rst
index e64ef231f310..87fa9f3877ab 100644
--- a/Documentation/virt/kvm/x86/nested-vmx.rst
+++ b/Documentation/virt/kvm/x86/nested-vmx.rst
@@ -218,6 +218,24 @@ struct shadow_vmcs is ever changed.
u16 host_gs_selector;
u16 host_tr_selector;
u64 secondary_vm_exit_controls;
+ u64 guest_ia32_fred_config;
+ u64 guest_ia32_fred_rsp1;
+ u64 guest_ia32_fred_rsp2;
+ u64 guest_ia32_fred_rsp3;
+ u64 guest_ia32_fred_stklvls;
+ u64 guest_ia32_fred_ssp1;
+ u64 guest_ia32_fred_ssp2;
+ u64 guest_ia32_fred_ssp3;
+ u64 host_ia32_fred_config;
+ u64 host_ia32_fred_rsp1;
+ u64 host_ia32_fred_rsp2;
+ u64 host_ia32_fred_rsp3;
+ u64 host_ia32_fred_stklvls;
+ u64 host_ia32_fred_ssp1;
+ u64 host_ia32_fred_ssp2;
+ u64 host_ia32_fred_ssp3;
+ u64 injected_event_data;
+ u64 original_event_data;
};
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 4529fd635385..45a5ffa51e60 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -719,6 +719,12 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_FRED_RSP0, MSR_TYPE_RW);
+
+ nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
+ MSR_IA32_FRED_SSP0, MSR_TYPE_RW);
#endif
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
@@ -1268,9 +1274,11 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
{
const u64 feature_bits = VMX_BASIC_DUAL_MONITOR_TREATMENT |
VMX_BASIC_INOUT |
- VMX_BASIC_TRUE_CTLS;
+ VMX_BASIC_TRUE_CTLS |
+ VMX_BASIC_NESTED_EXCEPTION;
- const u64 reserved_bits = GENMASK_ULL(63, 56) |
+ const u64 reserved_bits = GENMASK_ULL(63, 59) |
+ GENMASK_ULL(57, 56) |
GENMASK_ULL(47, 45) |
BIT_ULL(31);
@@ -2536,6 +2544,8 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs0
vmcs12->vm_entry_instruction_len);
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
vmcs12->guest_interruptibility_info);
+ if (nested_cpu_has_fred(vmcs12))
+ vmcs_write64(INJECTED_EVENT_DATA, vmcs12->injected_event_data);
vmx->loaded_vmcs->nmi_known_unmasked =
!(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
} else {
@@ -2588,6 +2598,17 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
vmx_segment_cache_clear(vmx);
+
+ if (nested_cpu_has_fred(vmcs12)) {
+ vmcs_write64(GUEST_IA32_FRED_CONFIG, vmcs12->guest_ia32_fred_config);
+ vmcs_write64(GUEST_IA32_FRED_RSP1, vmcs12->guest_ia32_fred_rsp1);
+ vmcs_write64(GUEST_IA32_FRED_RSP2, vmcs12->guest_ia32_fred_rsp2);
+ vmcs_write64(GUEST_IA32_FRED_RSP3, vmcs12->guest_ia32_fred_rsp3);
+ vmcs_write64(GUEST_IA32_FRED_STKLVLS, vmcs12->guest_ia32_fred_stklvls);
+ vmcs_write64(GUEST_IA32_FRED_SSP1, vmcs12->guest_ia32_fred_ssp1);
+ vmcs_write64(GUEST_IA32_FRED_SSP2, vmcs12->guest_ia32_fred_ssp2);
+ vmcs_write64(GUEST_IA32_FRED_SSP3, vmcs12->guest_ia32_fred_ssp3);
+ }
}
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
@@ -3881,6 +3902,8 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
u32 idt_vectoring;
unsigned int nr;
+ vmcs12->original_event_data = 0;
+
/*
* Per the SDM, VM-Exits due to double and triple faults are never
* considered to occur during event delivery, even if the double/triple
@@ -3919,6 +3942,11 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
vcpu->arch.exception.error_code;
}
+ if (vcpu->arch.exception.nested)
+ idt_vectoring |= INTR_INFO_NESTED_EXCEPTION_MASK;
+
+ vmcs12->original_event_data = vcpu->arch.exception.event_data;
+
vmcs12->idt_vectoring_info_field = idt_vectoring;
} else if (vcpu->arch.nmi_injected) {
vmcs12->idt_vectoring_info_field =
@@ -4009,19 +4037,10 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu)
struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
u32 intr_info = ex->vector | INTR_INFO_VALID_MASK;
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
- unsigned long exit_qual;
+ unsigned long exit_qual = 0;
- if (ex->has_payload) {
- exit_qual = ex->payload;
- } else if (ex->vector == PF_VECTOR) {
- exit_qual = vcpu->arch.cr2;
- } else if (ex->vector == DB_VECTOR) {
- exit_qual = vcpu->arch.dr6;
- exit_qual &= ~DR6_BT;
- exit_qual ^= DR6_ACTIVE_LOW;
- } else {
- exit_qual = 0;
- }
+ if (ex->vector != NM_VECTOR)
+ exit_qual = ex->event_data;
/*
* Unlike AMD's Paged Real Mode, which reports an error code on #PF
@@ -4042,10 +4061,13 @@ static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu)
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
}
- if (kvm_exception_is_soft(ex->vector))
+ if (kvm_exception_is_soft(ex->vector)) {
intr_info |= INTR_TYPE_SOFT_EXCEPTION;
- else
+ } else {
intr_info |= INTR_TYPE_HARD_EXCEPTION;
+ if (ex->nested)
+ intr_info |= INTR_INFO_NESTED_EXCEPTION_MASK;
+ }
if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
vmx_get_nmi_mask(vcpu))
@@ -4468,6 +4490,14 @@ static bool is_vmcs12_ext_field(unsigned long field)
case GUEST_TR_BASE:
case GUEST_GDTR_BASE:
case GUEST_IDTR_BASE:
+ case GUEST_IA32_FRED_CONFIG:
+ case GUEST_IA32_FRED_RSP1:
+ case GUEST_IA32_FRED_RSP2:
+ case GUEST_IA32_FRED_RSP3:
+ case GUEST_IA32_FRED_STKLVLS:
+ case GUEST_IA32_FRED_SSP1:
+ case GUEST_IA32_FRED_SSP2:
+ case GUEST_IA32_FRED_SSP3:
case GUEST_PENDING_DBG_EXCEPTIONS:
case GUEST_BNDCFGS:
return true;
@@ -4517,6 +4547,18 @@ static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
+
+ if (nested_cpu_has_fred(vmcs12)) {
+ vmcs12->guest_ia32_fred_config = vmcs_read64(GUEST_IA32_FRED_CONFIG);
+ vmcs12->guest_ia32_fred_rsp1 = vmcs_read64(GUEST_IA32_FRED_RSP1);
+ vmcs12->guest_ia32_fred_rsp2 = vmcs_read64(GUEST_IA32_FRED_RSP2);
+ vmcs12->guest_ia32_fred_rsp3 = vmcs_read64(GUEST_IA32_FRED_RSP3);
+ vmcs12->guest_ia32_fred_stklvls = vmcs_read64(GUEST_IA32_FRED_STKLVLS);
+ vmcs12->guest_ia32_fred_ssp1 = vmcs_read64(GUEST_IA32_FRED_SSP1);
+ vmcs12->guest_ia32_fred_ssp2 = vmcs_read64(GUEST_IA32_FRED_SSP2);
+ vmcs12->guest_ia32_fred_ssp3 = vmcs_read64(GUEST_IA32_FRED_SSP3);
+ }
+
vmcs12->guest_pending_dbg_exceptions =
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
@@ -4741,6 +4783,17 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
+ if (nested_cpu_has_fred(vmcs12)) {
+ vmcs_write64(GUEST_IA32_FRED_CONFIG, vmcs12->host_ia32_fred_config);
+ vmcs_write64(GUEST_IA32_FRED_RSP1, vmcs12->host_ia32_fred_rsp1);
+ vmcs_write64(GUEST_IA32_FRED_RSP2, vmcs12->host_ia32_fred_rsp2);
+ vmcs_write64(GUEST_IA32_FRED_RSP3, vmcs12->host_ia32_fred_rsp3);
+ vmcs_write64(GUEST_IA32_FRED_STKLVLS, vmcs12->host_ia32_fred_stklvls);
+ vmcs_write64(GUEST_IA32_FRED_SSP1, vmcs12->host_ia32_fred_ssp1);
+ vmcs_write64(GUEST_IA32_FRED_SSP2, vmcs12->host_ia32_fred_ssp2);
+ vmcs_write64(GUEST_IA32_FRED_SSP3, vmcs12->host_ia32_fred_ssp3);
+ }
+
/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
vmcs_write64(GUEST_BNDCFGS, 0);
@@ -7197,6 +7250,9 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs)
msrs->basic |= VMX_BASIC_TRUE_CTLS;
if (cpu_has_vmx_basic_inout())
msrs->basic |= VMX_BASIC_INOUT;
+
+ if (cpu_has_vmx_fred())
+ msrs->basic |= VMX_BASIC_NESTED_EXCEPTION;
}
static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs)
diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
index 2c296b6abb8c..5272f617fcef 100644
--- a/arch/x86/kvm/vmx/nested.h
+++ b/arch/x86/kvm/vmx/nested.h
@@ -251,6 +251,14 @@ static inline bool nested_cpu_has_encls_exit(struct vmcs12 *vmcs12)
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING);
}
+static inline bool nested_cpu_has_fred(struct vmcs12 *vmcs12)
+{
+ return vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_FRED &&
+ vmcs12->vm_exit_controls & VM_EXIT_ACTIVATE_SECONDARY_CONTROLS &&
+ vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_SAVE_IA32_FRED &&
+ vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_LOAD_IA32_FRED;
+}
+
/*
* if fixed0[i] == 1: val[i] must be 1
* if fixed1[i] == 0: val[i] must be 0
diff --git a/arch/x86/kvm/vmx/nested_vmcs_fields.h b/arch/x86/kvm/vmx/nested_vmcs_fields.h
index fcd6c32dce31..dea22279d008 100644
--- a/arch/x86/kvm/vmx/nested_vmcs_fields.h
+++ b/arch/x86/kvm/vmx/nested_vmcs_fields.h
@@ -9,5 +9,17 @@ BUILD_BUG_ON(1)
#define HAS_VMCS_FIELD_RANGE(x, y, c)
#endif
+HAS_VMCS_FIELD(SECONDARY_VM_EXIT_CONTROLS, guest_can_use(vcpu, X86_FEATURE_FRED))
+HAS_VMCS_FIELD(SECONDARY_VM_EXIT_CONTROLS_HIGH, guest_can_use(vcpu, X86_FEATURE_FRED))
+
+HAS_VMCS_FIELD_RANGE(GUEST_IA32_FRED_CONFIG, GUEST_IA32_FRED_SSP3, guest_can_use(vcpu, X86_FEATURE_FRED))
+HAS_VMCS_FIELD_RANGE(HOST_IA32_FRED_CONFIG, HOST_IA32_FRED_SSP3, guest_can_use(vcpu, X86_FEATURE_FRED))
+
+HAS_VMCS_FIELD(INJECTED_EVENT_DATA, guest_can_use(vcpu, X86_FEATURE_FRED))
+HAS_VMCS_FIELD(INJECTED_EVENT_DATA_HIGH, guest_can_use(vcpu, X86_FEATURE_FRED))
+
+HAS_VMCS_FIELD(ORIGINAL_EVENT_DATA, guest_can_use(vcpu, X86_FEATURE_FRED))
+HAS_VMCS_FIELD(ORIGINAL_EVENT_DATA_HIGH, guest_can_use(vcpu, X86_FEATURE_FRED))
+
#undef HAS_VMCS_FIELD
#undef HAS_VMCS_FIELD_RANGE
diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c
index 98457d7b2b23..59f17fdfad11 100644
--- a/arch/x86/kvm/vmx/vmcs12.c
+++ b/arch/x86/kvm/vmx/vmcs12.c
@@ -80,6 +80,7 @@ const unsigned short vmcs12_field_offsets[] = {
FIELD(VM_ENTRY_MSR_LOAD_COUNT, vm_entry_msr_load_count),
FIELD(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field),
FIELD(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code),
+ FIELD(INJECTED_EVENT_DATA, injected_event_data),
FIELD(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len),
FIELD(TPR_THRESHOLD, tpr_threshold),
FIELD(SECONDARY_VM_EXEC_CONTROL, secondary_vm_exec_control),
@@ -89,6 +90,7 @@ const unsigned short vmcs12_field_offsets[] = {
FIELD(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code),
FIELD(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field),
FIELD(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code),
+ FIELD(ORIGINAL_EVENT_DATA, original_event_data),
FIELD(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len),
FIELD(VMX_INSTRUCTION_INFO, vmx_instruction_info),
FIELD(GUEST_ES_LIMIT, guest_es_limit),
@@ -152,5 +154,21 @@ const unsigned short vmcs12_field_offsets[] = {
FIELD(HOST_IA32_SYSENTER_EIP, host_ia32_sysenter_eip),
FIELD(HOST_RSP, host_rsp),
FIELD(HOST_RIP, host_rip),
+ FIELD(GUEST_IA32_FRED_CONFIG, guest_ia32_fred_config),
+ FIELD(GUEST_IA32_FRED_RSP1, guest_ia32_fred_rsp1),
+ FIELD(GUEST_IA32_FRED_RSP2, guest_ia32_fred_rsp2),
+ FIELD(GUEST_IA32_FRED_RSP3, guest_ia32_fred_rsp3),
+ FIELD(GUEST_IA32_FRED_STKLVLS, guest_ia32_fred_stklvls),
+ FIELD(GUEST_IA32_FRED_SSP1, guest_ia32_fred_ssp1),
+ FIELD(GUEST_IA32_FRED_SSP2, guest_ia32_fred_ssp2),
+ FIELD(GUEST_IA32_FRED_SSP3, guest_ia32_fred_ssp3),
+ FIELD(HOST_IA32_FRED_CONFIG, host_ia32_fred_config),
+ FIELD(HOST_IA32_FRED_RSP1, host_ia32_fred_rsp1),
+ FIELD(HOST_IA32_FRED_RSP2, host_ia32_fred_rsp2),
+ FIELD(HOST_IA32_FRED_RSP3, host_ia32_fred_rsp3),
+ FIELD(HOST_IA32_FRED_STKLVLS, host_ia32_fred_stklvls),
+ FIELD(HOST_IA32_FRED_SSP1, host_ia32_fred_ssp1),
+ FIELD(HOST_IA32_FRED_SSP2, host_ia32_fred_ssp2),
+ FIELD(HOST_IA32_FRED_SSP3, host_ia32_fred_ssp3),
};
const unsigned int nr_vmcs12_fields = ARRAY_SIZE(vmcs12_field_offsets);
diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h
index 1fe3ed9108aa..f2a33d7007c9 100644
--- a/arch/x86/kvm/vmx/vmcs12.h
+++ b/arch/x86/kvm/vmx/vmcs12.h
@@ -186,6 +186,24 @@ struct __packed vmcs12 {
u16 host_tr_selector;
u16 guest_pml_index;
u64 secondary_vm_exit_controls;
+ u64 guest_ia32_fred_config;
+ u64 guest_ia32_fred_rsp1;
+ u64 guest_ia32_fred_rsp2;
+ u64 guest_ia32_fred_rsp3;
+ u64 guest_ia32_fred_stklvls;
+ u64 guest_ia32_fred_ssp1;
+ u64 guest_ia32_fred_ssp2;
+ u64 guest_ia32_fred_ssp3;
+ u64 host_ia32_fred_config;
+ u64 host_ia32_fred_rsp1;
+ u64 host_ia32_fred_rsp2;
+ u64 host_ia32_fred_rsp3;
+ u64 host_ia32_fred_stklvls;
+ u64 host_ia32_fred_ssp1;
+ u64 host_ia32_fred_ssp2;
+ u64 host_ia32_fred_ssp3;
+ u64 injected_event_data;
+ u64 original_event_data;
};
/*
@@ -362,6 +380,24 @@ static inline void vmx_check_vmcs12_offsets(void)
CHECK_OFFSET(host_tr_selector, 994);
CHECK_OFFSET(guest_pml_index, 996);
CHECK_OFFSET(secondary_vm_exit_controls, 998);
+ CHECK_OFFSET(guest_ia32_fred_config, 1006);
+ CHECK_OFFSET(guest_ia32_fred_rsp1, 1014);
+ CHECK_OFFSET(guest_ia32_fred_rsp2, 1022);
+ CHECK_OFFSET(guest_ia32_fred_rsp3, 1030);
+ CHECK_OFFSET(guest_ia32_fred_stklvls, 1038);
+ CHECK_OFFSET(guest_ia32_fred_ssp1, 1046);
+ CHECK_OFFSET(guest_ia32_fred_ssp2, 1054);
+ CHECK_OFFSET(guest_ia32_fred_ssp3, 1062);
+ CHECK_OFFSET(host_ia32_fred_config, 1070);
+ CHECK_OFFSET(host_ia32_fred_rsp1, 1078);
+ CHECK_OFFSET(host_ia32_fred_rsp2, 1086);
+ CHECK_OFFSET(host_ia32_fred_rsp3, 1094);
+ CHECK_OFFSET(host_ia32_fred_stklvls, 1102);
+ CHECK_OFFSET(host_ia32_fred_ssp1, 1110);
+ CHECK_OFFSET(host_ia32_fred_ssp2, 1118);
+ CHECK_OFFSET(host_ia32_fred_ssp3, 1126);
+ CHECK_OFFSET(injected_event_data, 1134);
+ CHECK_OFFSET(original_event_data, 1142);
}
extern const unsigned short vmcs12_field_offsets[];
diff --git a/arch/x86/kvm/vmx/vmcs_shadow_fields.h b/arch/x86/kvm/vmx/vmcs_shadow_fields.h
index 53b64dce1309..607945ada35f 100644
--- a/arch/x86/kvm/vmx/vmcs_shadow_fields.h
+++ b/arch/x86/kvm/vmx/vmcs_shadow_fields.h
@@ -86,6 +86,10 @@ SHADOW_FIELD_RW(HOST_GS_BASE, host_gs_base)
/* 64-bit */
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS, guest_physical_address)
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH, guest_physical_address)
+__SHADOW_FIELD_RO(ORIGINAL_EVENT_DATA, original_event_data, cpu_has_vmx_fred())
+__SHADOW_FIELD_RO(ORIGINAL_EVENT_DATA_HIGH, original_event_data, cpu_has_vmx_fred())
+__SHADOW_FIELD_RW(INJECTED_EVENT_DATA, injected_event_data, cpu_has_vmx_fred())
+__SHADOW_FIELD_RW(INJECTED_EVENT_DATA_HIGH, injected_event_data, cpu_has_vmx_fred())
#undef SHADOW_FIELD_RO
#undef SHADOW_FIELD_RW
--
2.46.2
>@@ -7197,6 +7250,9 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) > msrs->basic |= VMX_BASIC_TRUE_CTLS; > if (cpu_has_vmx_basic_inout()) > msrs->basic |= VMX_BASIC_INOUT; >+ >+ if (cpu_has_vmx_fred()) >+ msrs->basic |= VMX_BASIC_NESTED_EXCEPTION; why not advertising VMX_BASIC_NESTED_EXCEPTION if the CPU supports it? just like VMX_BASIC_INOUT right above. > } > > static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs) >diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h >index 2c296b6abb8c..5272f617fcef 100644 >--- a/arch/x86/kvm/vmx/nested.h >+++ b/arch/x86/kvm/vmx/nested.h >@@ -251,6 +251,14 @@ static inline bool nested_cpu_has_encls_exit(struct vmcs12 *vmcs12) > return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING); > } > >+static inline bool nested_cpu_has_fred(struct vmcs12 *vmcs12) >+{ >+ return vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_FRED && >+ vmcs12->vm_exit_controls & VM_EXIT_ACTIVATE_SECONDARY_CONTROLS && >+ vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_SAVE_IA32_FRED && >+ vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_LOAD_IA32_FRED; Is it a requirement in the SDM that the VMM should enable all FRED controls or none? If not, the VMM is allowed to enable only one or two of them. This means KVM would need to emulate FRED controls for the L1 VMM as three separate features.
On 10/24/2024 12:42 AM, Chao Gao wrote: >> @@ -7197,6 +7250,9 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) >> msrs->basic |= VMX_BASIC_TRUE_CTLS; >> if (cpu_has_vmx_basic_inout()) >> msrs->basic |= VMX_BASIC_INOUT; >> + >> + if (cpu_has_vmx_fred()) >> + msrs->basic |= VMX_BASIC_NESTED_EXCEPTION; > > why not advertising VMX_BASIC_NESTED_EXCEPTION if the CPU supports it? just like > VMX_BASIC_INOUT right above. Because VMX nested-exception support only works with FRED. We could pass host MSR_IA32_VMX_BASIC.VMX_BASIC_NESTED_EXCEPTION to nested, but it's meaningless w/o VMX FRED. > > >> } >> >> static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs) >> diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h >> index 2c296b6abb8c..5272f617fcef 100644 >> --- a/arch/x86/kvm/vmx/nested.h >> +++ b/arch/x86/kvm/vmx/nested.h >> @@ -251,6 +251,14 @@ static inline bool nested_cpu_has_encls_exit(struct vmcs12 *vmcs12) >> return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING); >> } >> >> +static inline bool nested_cpu_has_fred(struct vmcs12 *vmcs12) >> +{ >> + return vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_FRED && >> + vmcs12->vm_exit_controls & VM_EXIT_ACTIVATE_SECONDARY_CONTROLS && >> + vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_SAVE_IA32_FRED && >> + vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_LOAD_IA32_FRED; > > Is it a requirement in the SDM that the VMM should enable all FRED controls or > none? If not, the VMM is allowed to enable only one or two of them. This means > KVM would need to emulate FRED controls for the L1 VMM as three separate > features. The SDM doesn't say that. But FRED states are used during and immediately after VM entry and exit, I don't see a good reason for a VMM to enable only one or two of the 3 save/load configs. Say if VM_ENTRY_LOAD_IA32_FRED is not set, it means a VMM needs to switch to guest FRED states before it does a VM entry, which is absolutely a big mess. TBH I'm not sure this is the question you have in mind.
On Fri, Oct 25, 2024 at 12:25:45AM -0700, Xin Li wrote: >On 10/24/2024 12:42 AM, Chao Gao wrote: >> > @@ -7197,6 +7250,9 @@ static void nested_vmx_setup_basic(struct nested_vmx_msrs *msrs) >> > msrs->basic |= VMX_BASIC_TRUE_CTLS; >> > if (cpu_has_vmx_basic_inout()) >> > msrs->basic |= VMX_BASIC_INOUT; >> > + >> > + if (cpu_has_vmx_fred()) >> > + msrs->basic |= VMX_BASIC_NESTED_EXCEPTION; >> >> why not advertising VMX_BASIC_NESTED_EXCEPTION if the CPU supports it? just like >> VMX_BASIC_INOUT right above. > >Because VMX nested-exception support only works with FRED. > >We could pass host MSR_IA32_VMX_BASIC.VMX_BASIC_NESTED_EXCEPTION to >nested, but it's meaningless w/o VMX FRED. But it seems KVM cannot benefit from this attempt to avoid meaningless configurations because on FRED-capable system, the userspace VMM can choose to hide FRED and expose VMX nested exceptions alone. KVM needs to handle this case anyway. I suggest not bothering with it. > >> >> >> > } >> > >> > static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs) >> > diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h >> > index 2c296b6abb8c..5272f617fcef 100644 >> > --- a/arch/x86/kvm/vmx/nested.h >> > +++ b/arch/x86/kvm/vmx/nested.h >> > @@ -251,6 +251,14 @@ static inline bool nested_cpu_has_encls_exit(struct vmcs12 *vmcs12) >> > return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING); >> > } >> > >> > +static inline bool nested_cpu_has_fred(struct vmcs12 *vmcs12) >> > +{ >> > + return vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_FRED && >> > + vmcs12->vm_exit_controls & VM_EXIT_ACTIVATE_SECONDARY_CONTROLS && >> > + vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_SAVE_IA32_FRED && >> > + vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_LOAD_IA32_FRED; >> >> Is it a requirement in the SDM that the VMM should enable all FRED controls or >> none? If not, the VMM is allowed to enable only one or two of them. This means >> KVM would need to emulate FRED controls for the L1 VMM as three separate >> features. > >The SDM doesn't say that. But FRED states are used during and >immediately after VM entry and exit, I don't see a good reason for a VMM >to enable only one or two of the 3 save/load configs. > >Say if VM_ENTRY_LOAD_IA32_FRED is not set, it means a VMM needs to >switch to guest FRED states before it does a VM entry, which is >absolutely a big mess. If the VMM doesn't enable FRED, it's fine to load guest FRED states before VM entry, right? The key is to emulate hardware behavior accurately without making assumptions about guests. If some combinations of controls cannot be emulated properly, KVM should report internal errors at some point. > >TBH I'm not sure this is the question you have in mind.
On Mon, Oct 28, 2024, Chao Gao wrote: > On Fri, Oct 25, 2024 at 12:25:45AM -0700, Xin Li wrote: > >> > static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs) > >> > diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h > >> > index 2c296b6abb8c..5272f617fcef 100644 > >> > --- a/arch/x86/kvm/vmx/nested.h > >> > +++ b/arch/x86/kvm/vmx/nested.h > >> > @@ -251,6 +251,14 @@ static inline bool nested_cpu_has_encls_exit(struct vmcs12 *vmcs12) > >> > return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING); > >> > } > >> > > >> > +static inline bool nested_cpu_has_fred(struct vmcs12 *vmcs12) > >> > +{ > >> > + return vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_FRED && > >> > + vmcs12->vm_exit_controls & VM_EXIT_ACTIVATE_SECONDARY_CONTROLS && > >> > + vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_SAVE_IA32_FRED && > >> > + vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_LOAD_IA32_FRED; > >> > >> Is it a requirement in the SDM that the VMM should enable all FRED controls or > >> none? If not, the VMM is allowed to enable only one or two of them. This means > >> KVM would need to emulate FRED controls for the L1 VMM as three separate > >> features. > > > >The SDM doesn't say that. But FRED states are used during and > >immediately after VM entry and exit, I don't see a good reason for a VMM > >to enable only one or two of the 3 save/load configs. Not KVM's concern. > >Say if VM_ENTRY_LOAD_IA32_FRED is not set, it means a VMM needs to > >switch to guest FRED states before it does a VM entry, which is > >absolutely a big mess. Again, not KVM's concern. > If the VMM doesn't enable FRED, it's fine to load guest FRED states before VM > entry, right? Yep. Or if L1 is simply broken and elects to manually load FRED state before VM-Enter instead of using VM_ENTRY_LOAD_IA32_FRED, then any badness that happens is 100% L1's problem to deal with. KVM's responsiblity is to emulate the architectural behavior, what L1 may or may not do is irrelevant. > The key is to emulate hardware behavior accurately without making assumptions > about guests. +1000 > If some combinations of controls cannot be emulated properly, KVM > should report internal errors at some point.
On 10/28/2024 11:27 AM, Sean Christopherson wrote: > On Mon, Oct 28, 2024, Chao Gao wrote: >> On Fri, Oct 25, 2024 at 12:25:45AM -0700, Xin Li wrote: >>>>> static void nested_vmx_setup_cr_fixed(struct nested_vmx_msrs *msrs) >>>>> diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h >>>>> index 2c296b6abb8c..5272f617fcef 100644 >>>>> --- a/arch/x86/kvm/vmx/nested.h >>>>> +++ b/arch/x86/kvm/vmx/nested.h >>>>> @@ -251,6 +251,14 @@ static inline bool nested_cpu_has_encls_exit(struct vmcs12 *vmcs12) >>>>> return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING); >>>>> } >>>>> >>>>> +static inline bool nested_cpu_has_fred(struct vmcs12 *vmcs12) >>>>> +{ >>>>> + return vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_FRED && >>>>> + vmcs12->vm_exit_controls & VM_EXIT_ACTIVATE_SECONDARY_CONTROLS && >>>>> + vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_SAVE_IA32_FRED && >>>>> + vmcs12->secondary_vm_exit_controls & SECONDARY_VM_EXIT_LOAD_IA32_FRED; >>>> >>>> Is it a requirement in the SDM that the VMM should enable all FRED controls or >>>> none? If not, the VMM is allowed to enable only one or two of them. This means >>>> KVM would need to emulate FRED controls for the L1 VMM as three separate >>>> features. >>> >>> The SDM doesn't say that. But FRED states are used during and >>> immediately after VM entry and exit, I don't see a good reason for a VMM >>> to enable only one or two of the 3 save/load configs. > > Not KVM's concern. > >>> Say if VM_ENTRY_LOAD_IA32_FRED is not set, it means a VMM needs to >>> switch to guest FRED states before it does a VM entry, which is >>> absolutely a big mess. > > Again, not KVM's concern. > >> If the VMM doesn't enable FRED, it's fine to load guest FRED states before VM >> entry, right? > > Yep. Or if L1 is simply broken and elects to manually load FRED state before > VM-Enter instead of using VM_ENTRY_LOAD_IA32_FRED, then any badness that happens > is 100% L1's problem to deal with. KVM's responsiblity is to emulate the > architectural behavior, what L1 may or may not do is irrelevant. Damn, obviously I COMPLETELY missed this point. Let me think how should KVM as L0 handle it. > >> The key is to emulate hardware behavior accurately without making assumptions >> about guests. > > +1000 > >> If some combinations of controls cannot be emulated properly, KVM >> should report internal errors at some point. Yeah, only if CANNOT. Otherwise a broken VMM will behave differently on real hardware and KVM, even if it crashes in a way which it never knows about, right?
© 2016 - 2024 Red Hat, Inc.