[v4] Enable CET Virtualization

[PATCH v4 10/20] KVM:x86: Make guest supervisor states as non-XSAVE managed

Posted by Yang Weijiang 2 years, 6 months ago

Save and reload guest CET supervisor states, i.e.,PL{0,1,2}_SSP,
when vCPU context is being swapped before and after userspace
<->kernel entry, also do the same operation when vCPU is sched-in
or sched-out.

Enabling CET supervisor state management in KVM due to:
 -Introducing unnecessary XSAVE operation when switch to non-vCPU
userspace within current FPU framework.
 -Forcing allocating additional space for CET supervisor states in
each thread context regardless whether it's vCPU thread or not.

Add a new helper kvm_arch_sched_out() for that purpose. Adding
the support in kvm_arch_vcpu_put/load() without the new helper
looks possible, but the put/load functions are also called in
vcpu_put()/load(), the latter are heavily used in KVM, so adding
new helper makes the implementation clearer.

Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
---
 arch/arm64/include/asm/kvm_host.h   |  1 +
 arch/mips/include/asm/kvm_host.h    |  1 +
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/riscv/include/asm/kvm_host.h   |  1 +
 arch/s390/include/asm/kvm_host.h    |  1 +
 arch/x86/kvm/x86.c                  | 37 +++++++++++++++++++++++++++++
 include/linux/kvm_host.h            |  1 +
 virt/kvm/kvm_main.c                 |  1 +
 8 files changed, 44 insertions(+)

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 7e7e19ef6993..98235cb3d258 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -1023,6 +1023,7 @@ void kvm_arm_vcpu_ptrauth_trap(struct kvm_vcpu *vcpu);
 
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu) {}
 
 void kvm_arm_init_debug(void);
 void kvm_arm_vcpu_init_debug(struct kvm_vcpu *vcpu);
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 957121a495f0..56c5e85ba5a3 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -893,6 +893,7 @@ static inline void kvm_arch_free_memslot(struct kvm *kvm,
 					 struct kvm_memory_slot *slot) {}
 static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 14ee0dece853..11587d953bf6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -880,6 +880,7 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h
index ee0acccb1d3b..6ff4a04fe0f2 100644
--- a/arch/riscv/include/asm/kvm_host.h
+++ b/arch/riscv/include/asm/kvm_host.h
@@ -244,6 +244,7 @@ struct kvm_vcpu_arch {
 
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu) {}
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 2bbc3d54959d..d1750a6a86cf 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -1033,6 +1033,7 @@ extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
 
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
 					 struct kvm_memory_slot *slot) {}
 static inline void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) {}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7a3753c05c09..f7558f0f6fc0 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -11212,6 +11212,33 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 	trace_kvm_fpu(0);
 }
 
+static void kvm_save_cet_supervisor_ssp(struct kvm_vcpu *vcpu)
+{
+	preempt_disable();
+	if (unlikely(guest_can_use(vcpu, X86_FEATURE_SHSTK))) {
+		rdmsrl(MSR_IA32_PL0_SSP, vcpu->arch.cet_s_ssp[0]);
+		rdmsrl(MSR_IA32_PL1_SSP, vcpu->arch.cet_s_ssp[1]);
+		rdmsrl(MSR_IA32_PL2_SSP, vcpu->arch.cet_s_ssp[2]);
+		/*
+		 * Omit reset to host PL{1,2}_SSP because Linux will never use
+		 * these MSRs.
+		 */
+		wrmsrl(MSR_IA32_PL0_SSP, 0);
+	}
+	preempt_enable();
+}
+
+static void kvm_reload_cet_supervisor_ssp(struct kvm_vcpu *vcpu)
+{
+	preempt_disable();
+	if (unlikely(guest_can_use(vcpu, X86_FEATURE_SHSTK))) {
+		wrmsrl(MSR_IA32_PL0_SSP, vcpu->arch.cet_s_ssp[0]);
+		wrmsrl(MSR_IA32_PL1_SSP, vcpu->arch.cet_s_ssp[1]);
+		wrmsrl(MSR_IA32_PL2_SSP, vcpu->arch.cet_s_ssp[2]);
+	}
+	preempt_enable();
+}
+
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 {
 	struct kvm_queued_exception *ex = &vcpu->arch.exception;
@@ -11222,6 +11249,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 	kvm_sigset_activate(vcpu);
 	kvm_run->flags = 0;
 	kvm_load_guest_fpu(vcpu);
+	kvm_reload_cet_supervisor_ssp(vcpu);
 
 	kvm_vcpu_srcu_read_lock(vcpu);
 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
@@ -11310,6 +11338,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
 	r = vcpu_run(vcpu);
 
 out:
+	kvm_save_cet_supervisor_ssp(vcpu);
 	kvm_put_guest_fpu(vcpu);
 	if (kvm_run->kvm_valid_regs)
 		store_regs(vcpu);
@@ -12398,9 +12427,17 @@ void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
 		pmu->need_cleanup = true;
 		kvm_make_request(KVM_REQ_PMU, vcpu);
 	}
+
+	kvm_reload_cet_supervisor_ssp(vcpu);
+
 	static_call(kvm_x86_sched_in)(vcpu, cpu);
 }
 
+void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu)
+{
+	kvm_save_cet_supervisor_ssp(vcpu);
+}
+
 void kvm_arch_free_vm(struct kvm *kvm)
 {
 	kfree(to_kvm_hv(kvm)->hv_pa_pg);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d90331f16db1..b3032a5f0641 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1423,6 +1423,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
 
 void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
+void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu);
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 66c1447d3c7f..42f28e8905e1 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -5885,6 +5885,7 @@ static void kvm_sched_out(struct preempt_notifier *pn,
 {
 	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
 
+	kvm_arch_sched_out(vcpu, 0);
 	if (current->on_rq) {
 		WRITE_ONCE(vcpu->preempted, true);
 		WRITE_ONCE(vcpu->ready, true);
-- 
2.27.0

Re: [PATCH v4 10/20] KVM:x86: Make guest supervisor states as non-XSAVE managed

Posted by Chao Gao 2 years, 6 months ago

On Thu, Jul 20, 2023 at 11:03:42PM -0400, Yang Weijiang wrote:
>+static void kvm_save_cet_supervisor_ssp(struct kvm_vcpu *vcpu)
>+{
>+	preempt_disable();

what's the purpose of disabling preemption?

>+	if (unlikely(guest_can_use(vcpu, X86_FEATURE_SHSTK))) {
>+		rdmsrl(MSR_IA32_PL0_SSP, vcpu->arch.cet_s_ssp[0]);
>+		rdmsrl(MSR_IA32_PL1_SSP, vcpu->arch.cet_s_ssp[1]);
>+		rdmsrl(MSR_IA32_PL2_SSP, vcpu->arch.cet_s_ssp[2]);
>+		/*
>+		 * Omit reset to host PL{1,2}_SSP because Linux will never use
>+		 * these MSRs.
>+		 */
>+		wrmsrl(MSR_IA32_PL0_SSP, 0);

You don't need to reset the MSR because current host doesn't enable SSS
and leaving guest value in the MSR won't affect host behavior.

>+	}
>+	preempt_enable();
>+}
>+
>+static void kvm_reload_cet_supervisor_ssp(struct kvm_vcpu *vcpu)
>+{
>+	preempt_disable();
>+	if (unlikely(guest_can_use(vcpu, X86_FEATURE_SHSTK))) {
>+		wrmsrl(MSR_IA32_PL0_SSP, vcpu->arch.cet_s_ssp[0]);
>+		wrmsrl(MSR_IA32_PL1_SSP, vcpu->arch.cet_s_ssp[1]);
>+		wrmsrl(MSR_IA32_PL2_SSP, vcpu->arch.cet_s_ssp[2]);
>+	}
>+	preempt_enable();
>+}

save/load PLx_SSP in kvm_sched_in/out() and in VCPU_RUN ioctl is sub-optimal.

How about:
1. expose kvm_save/reload_cet_supervisor_ssp()
2. reload guest PLx_SSP in {vmx,svm}_prepare_switch_to_guest()
3. save guest PLx_SSP in vmx_prepare_switch_to_host() and
   svm_prepare_host_switch()?

this way existing svm/vmx->guest_state_loaded can help to reduce a lot of
unnecessary PLx_SSP MSR accesses.

>+
> int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
> {
> 	struct kvm_queued_exception *ex = &vcpu->arch.exception;
>@@ -11222,6 +11249,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
> 	kvm_sigset_activate(vcpu);
> 	kvm_run->flags = 0;
> 	kvm_load_guest_fpu(vcpu);
>+	kvm_reload_cet_supervisor_ssp(vcpu);
> 
> 	kvm_vcpu_srcu_read_lock(vcpu);
> 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
>@@ -11310,6 +11338,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
> 	r = vcpu_run(vcpu);
> 
> out:
>+	kvm_save_cet_supervisor_ssp(vcpu);
> 	kvm_put_guest_fpu(vcpu);
> 	if (kvm_run->kvm_valid_regs)
> 		store_regs(vcpu);
>@@ -12398,9 +12427,17 @@ void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
> 		pmu->need_cleanup = true;
> 		kvm_make_request(KVM_REQ_PMU, vcpu);
> 	}
>+
>+	kvm_reload_cet_supervisor_ssp(vcpu);
>+
> 	static_call(kvm_x86_sched_in)(vcpu, cpu);
> }
> 
>+void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu)
>+{

@cpu its meaning isn't clear and isn't used and ...

>+	kvm_save_cet_supervisor_ssp(vcpu);
>+}
>+
> void kvm_arch_free_vm(struct kvm *kvm)
> {
> 	kfree(to_kvm_hv(kvm)->hv_pa_pg);
>diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>index d90331f16db1..b3032a5f0641 100644
>--- a/include/linux/kvm_host.h
>+++ b/include/linux/kvm_host.h
>@@ -1423,6 +1423,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
> 
> void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
>+void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu);
> 
> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
> void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
>diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>index 66c1447d3c7f..42f28e8905e1 100644
>--- a/virt/kvm/kvm_main.c
>+++ b/virt/kvm/kvm_main.c
>@@ -5885,6 +5885,7 @@ static void kvm_sched_out(struct preempt_notifier *pn,
> {
> 	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
> 
>+	kvm_arch_sched_out(vcpu, 0);

passing 0 always looks problematic.

> 	if (current->on_rq) {
> 		WRITE_ONCE(vcpu->preempted, true);
> 		WRITE_ONCE(vcpu->ready, true);
>-- 
>2.27.0
>

Re: [PATCH v4 10/20] KVM:x86: Make guest supervisor states as non-XSAVE managed

Posted by Yang, Weijiang 2 years, 6 months ago

On 7/24/2023 4:26 PM, Chao Gao wrote:
> On Thu, Jul 20, 2023 at 11:03:42PM -0400, Yang Weijiang wrote:
>> +static void kvm_save_cet_supervisor_ssp(struct kvm_vcpu *vcpu)
>> +{
>> +	preempt_disable();
> what's the purpose of disabling preemption?

Thanks!

These preempt_disable/enable() becomes unnecessary due to the PLx_SSP 
handling

in sched_in/out(). Will remove them.

>
>> +	if (unlikely(guest_can_use(vcpu, X86_FEATURE_SHSTK))) {
>> +		rdmsrl(MSR_IA32_PL0_SSP, vcpu->arch.cet_s_ssp[0]);
>> +		rdmsrl(MSR_IA32_PL1_SSP, vcpu->arch.cet_s_ssp[1]);
>> +		rdmsrl(MSR_IA32_PL2_SSP, vcpu->arch.cet_s_ssp[2]);
>> +		/*
>> +		 * Omit reset to host PL{1,2}_SSP because Linux will never use
>> +		 * these MSRs.
>> +		 */
>> +		wrmsrl(MSR_IA32_PL0_SSP, 0);
> You don't need to reset the MSR because current host doesn't enable SSS
> and leaving guest value in the MSR won't affect host behavior.

Yes,  I just want to make the host PLx_SSPs as clean as possible.

>
>> +	}
>> +	preempt_enable();
>> +}
>> +
>> +static void kvm_reload_cet_supervisor_ssp(struct kvm_vcpu *vcpu)
>> +{
>> +	preempt_disable();
>> +	if (unlikely(guest_can_use(vcpu, X86_FEATURE_SHSTK))) {
>> +		wrmsrl(MSR_IA32_PL0_SSP, vcpu->arch.cet_s_ssp[0]);
>> +		wrmsrl(MSR_IA32_PL1_SSP, vcpu->arch.cet_s_ssp[1]);
>> +		wrmsrl(MSR_IA32_PL2_SSP, vcpu->arch.cet_s_ssp[2]);
>> +	}
>> +	preempt_enable();
>> +}
> save/load PLx_SSP in kvm_sched_in/out() and in VCPU_RUN ioctl is sub-optimal.
>
> How about:
> 1. expose kvm_save/reload_cet_supervisor_ssp()
> 2. reload guest PLx_SSP in {vmx,svm}_prepare_switch_to_guest()
> 3. save guest PLx_SSP in vmx_prepare_switch_to_host() and
>     svm_prepare_host_switch()?
>
> this way existing svm/vmx->guest_state_loaded can help to reduce a lot of
> unnecessary PLx_SSP MSR accesses.

Nice suggestion! It looks workable. I'll try this,  thanks!

>
>> +
>> int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>> {
>> 	struct kvm_queued_exception *ex = &vcpu->arch.exception;
>> @@ -11222,6 +11249,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>> 	kvm_sigset_activate(vcpu);
>> 	kvm_run->flags = 0;
>> 	kvm_load_guest_fpu(vcpu);
>> +	kvm_reload_cet_supervisor_ssp(vcpu);
>>
>> 	kvm_vcpu_srcu_read_lock(vcpu);
>> 	if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
>> @@ -11310,6 +11338,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu)
>> 	r = vcpu_run(vcpu);
>>
>> out:
>> +	kvm_save_cet_supervisor_ssp(vcpu);
>> 	kvm_put_guest_fpu(vcpu);
>> 	if (kvm_run->kvm_valid_regs)
>> 		store_regs(vcpu);
>> @@ -12398,9 +12427,17 @@ void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
>> 		pmu->need_cleanup = true;
>> 		kvm_make_request(KVM_REQ_PMU, vcpu);
>> 	}
>> +
>> +	kvm_reload_cet_supervisor_ssp(vcpu);
>> +
>> 	static_call(kvm_x86_sched_in)(vcpu, cpu);
>> }
>>
>> +void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu)
>> +{
> @cpu its meaning isn't clear and isn't used and ...
Yes, I should have removed it.
>
>> +	kvm_save_cet_supervisor_ssp(vcpu);
>> +}
>> +
>> void kvm_arch_free_vm(struct kvm *kvm)
>> {
>> 	kfree(to_kvm_hv(kvm)->hv_pa_pg);
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index d90331f16db1..b3032a5f0641 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -1423,6 +1423,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>> int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu);
>>
>> void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
>> +void kvm_arch_sched_out(struct kvm_vcpu *vcpu, int cpu);
>>
>> void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
>> void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
>> diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
>> index 66c1447d3c7f..42f28e8905e1 100644
>> --- a/virt/kvm/kvm_main.c
>> +++ b/virt/kvm/kvm_main.c
>> @@ -5885,6 +5885,7 @@ static void kvm_sched_out(struct preempt_notifier *pn,
>> {
>> 	struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
>>
>> +	kvm_arch_sched_out(vcpu, 0);
> passing 0 always looks problematic.
Can you elaborate? I have no intent to use @cpu now.
>> 	if (current->on_rq) {
>> 		WRITE_ONCE(vcpu->preempted, true);
>> 		WRITE_ONCE(vcpu->ready, true);
>> -- 
>> 2.27.0
>>