Add Bus Lock Detect support in AMD SVM. Bus Lock Detect is enabled through
MSR_IA32_DEBUGCTLMSR and MSR_IA32_DEBUGCTLMSR is virtualized only if LBR
Virtualization is enabled. Add this dependency in the SVM.
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
---
arch/x86/kvm/svm/nested.c | 3 ++-
arch/x86/kvm/svm/svm.c | 17 ++++++++++++++---
2 files changed, 16 insertions(+), 4 deletions(-)
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index 6f704c1037e5..1df9158c72c1 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -586,7 +586,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
/* These bits will be set properly on the first execution when new_vmc12 is true */
if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
- svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
+ /* DR6_RTM is a reserved bit on AMD and as such must be set to 1 */
+ svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_FIXED_1 | DR6_RTM;
vmcb_mark_dirty(vmcb02, VMCB_DR);
}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index e1b6a16e97c0..9f3d31a5d231 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -1047,7 +1047,8 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
- bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
+ u64 dbgctl_buslock_lbr = DEBUGCTLMSR_BUS_LOCK_DETECT | DEBUGCTLMSR_LBR;
+ bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & dbgctl_buslock_lbr) ||
(is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
@@ -3158,6 +3159,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
if (data & DEBUGCTL_RESERVED_BITS)
return 1;
+ if ((data & DEBUGCTLMSR_BUS_LOCK_DETECT) &&
+ !guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+ return 1;
+
svm_get_lbr_vmcb(svm)->save.dbgctl = data;
svm_update_lbrv(vcpu);
break;
@@ -5225,8 +5230,14 @@ static __init void svm_set_cpu_caps(void)
/* CPUID 0x8000001F (SME/SEV features) */
sev_set_cpu_caps();
- /* Don't advertise Bus Lock Detect to guest if SVM support is absent */
- kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
+ /*
+ * LBR Virtualization must be enabled to support BusLockTrap inside the
+ * guest, since BusLockTrap is enabled through MSR_IA32_DEBUGCTLMSR and
+ * MSR_IA32_DEBUGCTLMSR is virtualized only if LBR Virtualization is
+ * enabled.
+ */
+ if (!lbrv)
+ kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
}
static __init int svm_hardware_setup(void)
--
2.34.1
On Thu, Aug 08, 2024, Ravi Bangoria wrote:
> Add Bus Lock Detect support in AMD SVM. Bus Lock Detect is enabled through
> MSR_IA32_DEBUGCTLMSR and MSR_IA32_DEBUGCTLMSR is virtualized only if LBR
> Virtualization is enabled. Add this dependency in the SVM.
This doesn't depend on the x86 patches that have gone into tip-tree, correct?
In the future, unless there's an actual depenency in code or functionality,
please send separate series for patches that obviously need to be routed through
different trees.
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
> Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
> ---
> arch/x86/kvm/svm/nested.c | 3 ++-
> arch/x86/kvm/svm/svm.c | 17 ++++++++++++++---
> 2 files changed, 16 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
> index 6f704c1037e5..1df9158c72c1 100644
> --- a/arch/x86/kvm/svm/nested.c
> +++ b/arch/x86/kvm/svm/nested.c
> @@ -586,7 +586,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
> /* These bits will be set properly on the first execution when new_vmc12 is true */
> if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
> vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
> - svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
> + /* DR6_RTM is a reserved bit on AMD and as such must be set to 1 */
> + svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_FIXED_1 | DR6_RTM;
> vmcb_mark_dirty(vmcb02, VMCB_DR);
> }
>
> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> index e1b6a16e97c0..9f3d31a5d231 100644
> --- a/arch/x86/kvm/svm/svm.c
> +++ b/arch/x86/kvm/svm/svm.c
> @@ -1047,7 +1047,8 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
> - bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
> + u64 dbgctl_buslock_lbr = DEBUGCTLMSR_BUS_LOCK_DETECT | DEBUGCTLMSR_LBR;
> + bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & dbgctl_buslock_lbr) ||
> (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
> (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
Out of sight, but this leads to calling svm_enable_lbrv() even when the guest
just wants to enable BUS_LOCK_DETECT. Ignoring SEV-ES guests, KVM will intercept
writes to DEBUGCTL, so can't KVM defer mucking with the intercepts and
svm_copy_lbrs() until the guest actually wants to use LBRs?
Hmm, and I think the existing code is broken. If L1 passes DEBUGCTL through to
L2, then KVM will handles writes to L1's effective value. And if L1 also passes
through the LBRs, then KVM will fail to update the MSR bitmaps for vmcb02.
Ah, it's just a performance issue though, because KVM will still emulate RDMSR.
Ugh, this code is silly. The LBR MSRs are read-only, yet KVM passes them through
for write.
Anyways, I'm thinking something like this? Note, using msr_write_intercepted()
is wrong, because that'll check L2's bitmap if is_guest_mode(), and the idea is
to use L1's bitmap as the canary.
static void svm_update_passthrough_lbrs(struct kvm_vcpu *vcpu, bool passthrough)
{
struct vcpu_svm *svm = to_svm(vcpu);
KVM_BUG_ON(!passthrough && sev_es_guest(vcpu->kvm), vcpu->kvm);
if (!msr_write_intercepted(vcpu, MSR_IA32_LASTBRANCHFROMIP) == passthrough)
return;
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, passthrough, 0);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, passthrough, 0);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, passthrough, 0);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, passthrough, 0);
/*
* When enabling, move the LBR msrs to vmcb02 so that L2 can see them,
* and then move them back to vmcb01 when disabling to avoid copying
* them on nested guest entries.
*/
if (is_guest_mode(vcpu)) {
if (passthrough)
svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
else
svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
}
}
void svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (WARN_ON_ONCE(!sev_es_guest(vcpu->kvm)))
return;
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
svm_update_passthrough_lbrs(vcpu, true);
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1);
}
static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
{
/*
* If LBR virtualization is disabled, the LBR MSRs are always kept in
* vmcb01. If LBR virtualization is enabled and L1 is running VMs of
* its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
*/
return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
svm->vmcb01.ptr;
}
void svm_update_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
u64 guest_debugctl = svm_get_lbr_vmcb(svm)->save.dbgctl;
bool enable_lbrv = (guest_debugctl & DEBUGCTLMSR_LBR) ||
(is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
if (enable_lbrv || (guest_debugctl & DEBUGCTLMSR_BUS_LOCK_DETECT))
svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
else
svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
svm_update_passthrough_lbrs(vcpu, enable_lbrv);
}
> @@ -3158,6 +3159,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
> if (data & DEBUGCTL_RESERVED_BITS)
Not your code, but why does DEBUGCTL_RESERVED_BITS = ~0x3f!?!? That means the
introduction of the below check, which is architecturally correct, has the
potential to break guests. *sigh*
I doubt it will cause a problem, but it's something to look out for.
> return 1;
>
> + if ((data & DEBUGCTLMSR_BUS_LOCK_DETECT) &&
> + !guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
> + return 1;
> +
> svm_get_lbr_vmcb(svm)->save.dbgctl = data;
> svm_update_lbrv(vcpu);
> break;
> @@ -5225,8 +5230,14 @@ static __init void svm_set_cpu_caps(void)
> /* CPUID 0x8000001F (SME/SEV features) */
> sev_set_cpu_caps();
>
> - /* Don't advertise Bus Lock Detect to guest if SVM support is absent */
> - kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
> + /*
> + * LBR Virtualization must be enabled to support BusLockTrap inside the
> + * guest, since BusLockTrap is enabled through MSR_IA32_DEBUGCTLMSR and
> + * MSR_IA32_DEBUGCTLMSR is virtualized only if LBR Virtualization is
> + * enabled.
> + */
> + if (!lbrv)
> + kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
> }
>
> static __init int svm_hardware_setup(void)
> --
> 2.34.1
>
Hi,
On 17-08-2024 05:43, Sean Christopherson wrote:
> On Thu, Aug 08, 2024, Ravi Bangoria wrote:
>> Add Bus Lock Detect support in AMD SVM. Bus Lock Detect is enabled through
>> MSR_IA32_DEBUGCTLMSR and MSR_IA32_DEBUGCTLMSR is virtualized only if LBR
>> Virtualization is enabled. Add this dependency in the SVM.
>
> This doesn't depend on the x86 patches that have gone into tip-tree, correct?
>
> In the future, unless there's an actual depenency in code or functionality,
> please send separate series for patches that obviously need to be routed through
> different trees.
>
>> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
>> Reviewed-by: Tom Lendacky <thomas.lendacky@amd.com>
>> ---
>> arch/x86/kvm/svm/nested.c | 3 ++-
>> arch/x86/kvm/svm/svm.c | 17 ++++++++++++++---
>> 2 files changed, 16 insertions(+), 4 deletions(-)
>>
>> diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
>> index 6f704c1037e5..1df9158c72c1 100644
>> --- a/arch/x86/kvm/svm/nested.c
>> +++ b/arch/x86/kvm/svm/nested.c
>> @@ -586,7 +586,8 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12
>> /* These bits will be set properly on the first execution when new_vmc12 is true */
>> if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
>> vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
>> - svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
>> + /* DR6_RTM is a reserved bit on AMD and as such must be set to 1 */
>> + svm->vcpu.arch.dr6 = svm->nested.save.dr6 | DR6_FIXED_1 | DR6_RTM;
>> vmcb_mark_dirty(vmcb02, VMCB_DR);
>> }
>>
>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>> index e1b6a16e97c0..9f3d31a5d231 100644
>> --- a/arch/x86/kvm/svm/svm.c
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -1047,7 +1047,8 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
>> {
>> struct vcpu_svm *svm = to_svm(vcpu);
>> bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
>> - bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
>> + u64 dbgctl_buslock_lbr = DEBUGCTLMSR_BUS_LOCK_DETECT | DEBUGCTLMSR_LBR;
>> + bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & dbgctl_buslock_lbr) ||
>> (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
>> (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
>
> Out of sight, but this leads to calling svm_enable_lbrv() even when the guest
> just wants to enable BUS_LOCK_DETECT. Ignoring SEV-ES guests, KVM will intercept
> writes to DEBUGCTL, so can't KVM defer mucking with the intercepts and
> svm_copy_lbrs() until the guest actually wants to use LBRs?
>
> Hmm, and I think the existing code is broken. If L1 passes DEBUGCTL through to
> L2, then KVM will handles writes to L1's effective value. And if L1 also passes
> through the LBRs, then KVM will fail to update the MSR bitmaps for vmcb02.
>
> Ah, it's just a performance issue though, because KVM will still emulate RDMSR.
>
> Ugh, this code is silly. The LBR MSRs are read-only, yet KVM passes them through
> for write.
>
> Anyways, I'm thinking something like this? Note, using msr_write_intercepted()
> is wrong, because that'll check L2's bitmap if is_guest_mode(), and the idea is
> to use L1's bitmap as the canary.
>
> static void svm_update_passthrough_lbrs(struct kvm_vcpu *vcpu, bool passthrough)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
>
> KVM_BUG_ON(!passthrough && sev_es_guest(vcpu->kvm), vcpu->kvm);
>
> if (!msr_write_intercepted(vcpu, MSR_IA32_LASTBRANCHFROMIP) == passthrough)
> return;
>
> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, passthrough, 0);
> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, passthrough, 0);
> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, passthrough, 0);
> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, passthrough, 0);
>
> /*
> * When enabling, move the LBR msrs to vmcb02 so that L2 can see them,
> * and then move them back to vmcb01 when disabling to avoid copying
> * them on nested guest entries.
> */
> if (is_guest_mode(vcpu)) {
> if (passthrough)
> svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
> else
> svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
> }
> }
>
> void svm_enable_lbrv(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
>
> if (WARN_ON_ONCE(!sev_es_guest(vcpu->kvm)))
> return;
>
> svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
> svm_update_passthrough_lbrs(vcpu, true);
>
> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1);
> }
>
> static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
> {
> /*
> * If LBR virtualization is disabled, the LBR MSRs are always kept in
> * vmcb01. If LBR virtualization is enabled and L1 is running VMs of
> * its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
> */
> return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
> svm->vmcb01.ptr;
> }
>
> void svm_update_lbrv(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> u64 guest_debugctl = svm_get_lbr_vmcb(svm)->save.dbgctl;
> bool enable_lbrv = (guest_debugctl & DEBUGCTLMSR_LBR) ||
> (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
> (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
>
> if (enable_lbrv || (guest_debugctl & DEBUGCTLMSR_BUS_LOCK_DETECT))
> svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
> else
> svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
>
> svm_update_passthrough_lbrs(vcpu, enable_lbrv);
> }
>
>
>> @@ -3158,6 +3159,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
>> if (data & DEBUGCTL_RESERVED_BITS)
>
> Not your code, but why does DEBUGCTL_RESERVED_BITS = ~0x3f!?!? That means the
> introduction of the below check, which is architecturally correct, has the
> potential to break guests. *sigh*
>
> I doubt it will cause a problem, but it's something to look out for.
>
>> return 1;
>>
>> + if ((data & DEBUGCTLMSR_BUS_LOCK_DETECT) &&
>> + !guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
>> + return 1;
>> +
>> svm_get_lbr_vmcb(svm)->save.dbgctl = data;
>> svm_update_lbrv(vcpu);
>> break;
>> @@ -5225,8 +5230,14 @@ static __init void svm_set_cpu_caps(void)
>> /* CPUID 0x8000001F (SME/SEV features) */
>> sev_set_cpu_caps();
>>
>> - /* Don't advertise Bus Lock Detect to guest if SVM support is absent */
>> - kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
>> + /*
>> + * LBR Virtualization must be enabled to support BusLockTrap inside the
>> + * guest, since BusLockTrap is enabled through MSR_IA32_DEBUGCTLMSR and
>> + * MSR_IA32_DEBUGCTLMSR is virtualized only if LBR Virtualization is
>> + * enabled.
>> + */
>> + if (!lbrv)
>> + kvm_cpu_cap_clear(X86_FEATURE_BUS_LOCK_DETECT);
>> }
>>
>> static __init int svm_hardware_setup(void)
>> --
>> 2.34.1
>>
Thanks Sean for the refactored code. I ported your implementation to the 6.16
kernel and did some testing with KVM unit tests. After instrumentation I found
couple of issues and potential fixes.
===========================================================
Issue 1: Interception still enabled after enabling LBRV
===========================================================
Using the 6.16 upstream kernel (unpatched) I ran the KUT tests and they passed
when run from both the bare metal and from inside a L1 guest. However for L2
guest, when looking at the logs I found that RDMSR interception of LBR MSRs is
still enabled despite the LBRV is enabled for the L2 guest. Effectively, the
reads are emulated instead of being virtualized, which is not the intended
behaviour. KUT cannot distinguish between emulated and virtualized RDMSR, and
hence the test passes regardless.
===========================================================
Issue 2: Basic LBR KUT fails with Sean's implementation
===========================================================
After using your implementation, all KUTs passed on the bare metal. With LBRV
enabled for L2, RDMSR interception of LBR MSRs is disabled as intended.
However, when running KUT tests inside an L1 guest, the tests fail.
After digging deeper, I figured that when L2 attempts to update the LBR using
DBGCTL, the L1's LBRs are copied to L2's LBR MSRs from
svm_update_passthrough_lbrs().
/*
* When enabling, move the LBR msrs to vmcb02 so that L2 can see them,
* and then move them back to vmcb01 when disabling to avoid copying
* them on nested guest entries.
*/
if (is_guest_mode(vcpu)) {
if (passthrough)
svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr); <---- copy happens here
else
svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
}
This results in DBGCTL and other LBRs of L2 being overwritten by L1's value.
So if L1 has DBGCTL.LBR disabled, L2 won't be able to turn on LBR. This results
in failing KUT test.
L2> wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
L2> DO_BRANCH(host_branch0);
L2> dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
L2> wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
L2> TEST_EXPECT_EQ(dbgctl, DEBUGCTLMSR_LBR); <---- This check fails since
dbgctl is still 0
Line-by-line code analysis
--------------------------
KVM> # start L1 guest
<L1> # start KUT test svm_lbrv_test0
<L2> wrmsr(MSR_IA32_DEBUGCTLMSR, DEBUGCTLMSR_LBR);
KVM> # inject wrmsr into L1
<L1> vmcb01->control.virt_ext = 1; <---- L1's vmcb01 is vmcb12 wrt KVM
<L1> # turn off intercepts
<L1> vmrun
KVM> vmcb02->control.virt_ext = 1;
KVM> svm_copy_lbrs(vmcb02, vmcb12); <---- Correct value loaded here
KVM> nested.ctl.virt_ext = 1;
KVM> # turn off intercepts
KVM> svm_copy_lbrs(vmcb02, vmcb01); <---- This overwrites the value
KVM> # start L2 guest
<L2> DO_BRANCH(host_branch0);
<L2> dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
...
<L2> wrmsr(MSR_IA32_DEBUGCTLMSR, 0);
...
<L2> TEST_EXPECT_EQ(dbgctl, DEBUGCTLMSR_LBR); <---- This check fails
===========================================================
Potential Solution
===========================================================
A potential fix is to copy the LBRs only when L1 has DBGCTL.LBR is enabled.
This will prevent the overwrite. I successfully tested it by running KUT on
all levels, viz, bare metal, L1 and L2. Please share your thoughts on
this.
if (is_guest_mode(vcpu) && svm->vmcb01.ptr->save.dbgctl & DEBUGCTLMSR_LBR) {
if (passthrough)
svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
else
svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
}
If the patch below looks good, I'll split it and send it as a series.
Additionaly, I added a MSR read interception API similar to the one
implemented by Ravi.
---
arch/x86/kvm/svm/svm.c | 66 ++++++++++++++++++++++++++++--------------
1 file changed, 44 insertions(+), 22 deletions(-)
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index d9931c6c4bc6..f0f77199ec12 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -663,6 +663,11 @@ static void clr_dr_intercepts(struct vcpu_svm *svm)
recalc_intercepts(svm);
}
+static bool msr_read_intercepted_msrpm(void *msrpm, u32 msr)
+{
+ return svm_test_msr_bitmap_read(msrpm, msr);
+}
+
static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
{
/*
@@ -864,32 +869,49 @@ void svm_copy_lbrs(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
vmcb_mark_dirty(to_vmcb, VMCB_LBR);
}
-void svm_enable_lbrv(struct kvm_vcpu *vcpu)
+static void svm_update_passthrough_lbrs(struct kvm_vcpu *vcpu, bool passthrough)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ bool to_intercept = !passthrough;
- svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
- svm_recalc_lbr_msr_intercepts(vcpu);
+ KVM_BUG_ON(to_intercept && sev_es_guest(vcpu->kvm), vcpu->kvm);
- /* Move the LBR msrs to the vmcb02 so that the guest can see them. */
- if (is_guest_mode(vcpu))
- svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
+ if (msr_read_intercepted_msrpm(svm->msrpm, MSR_IA32_LASTBRANCHFROMIP) == to_intercept)
+ return;
+
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHFROMIP, MSR_TYPE_R, to_intercept);
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHTOIP, MSR_TYPE_R, to_intercept);
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTINTFROMIP, MSR_TYPE_R, to_intercept);
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTINTTOIP, MSR_TYPE_R, to_intercept);
+
+ /*
+ * When enabling, move the LBR msrs to vmcb02 so that L2 can see them,
+ * and then move them back to vmcb01 when disabling to avoid copying
+ * them on nested guest entries.
+ *
+ * Perform this only when L1 has enabled LBR to prevent the overwrite.
+ */
+ if (is_guest_mode(vcpu) && svm->vmcb01.ptr->save.dbgctl & DEBUGCTLMSR_LBR) {
+ if (passthrough)
+ svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
+ else
+ svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
+ }
}
-static void svm_disable_lbrv(struct kvm_vcpu *vcpu)
+void svm_enable_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- KVM_BUG_ON(sev_es_guest(vcpu->kvm), vcpu->kvm);
- svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
- svm_recalc_lbr_msr_intercepts(vcpu);
+ /* Allow the function call from SEV-ES guests only */
+ if (WARN_ON_ONCE(!sev_es_guest(vcpu->kvm)))
+ return;
- /*
- * Move the LBR msrs back to the vmcb01 to avoid copying them
- * on nested guest entries.
- */
- if (is_guest_mode(vcpu))
- svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
+ svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
+
+ svm_update_passthrough_lbrs(vcpu, true);
+
+ svm_set_intercept_for_msr(vcpu, MSR_IA32_DEBUGCTLMSR, MSR_TYPE_RW, 0);
}
static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
@@ -906,18 +928,18 @@ static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
void svm_update_lbrv(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
- bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
- bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
+ u64 guest_debugctl = svm_get_lbr_vmcb(svm)->save.dbgctl;
+ bool enable_lbrv = (guest_debugctl & DEBUGCTLMSR_LBR) ||
(is_guest_mode(vcpu) && guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
(svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
- if (enable_lbrv == current_enable_lbrv)
- return;
if (enable_lbrv)
- svm_enable_lbrv(vcpu);
+ svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
else
- svm_disable_lbrv(vcpu);
+ svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
+
+ svm_update_passthrough_lbrs(vcpu, enable_lbrv);
}
void disable_nmi_singlestep(struct vcpu_svm *svm)
--
2.43.0
On Tue, Nov 11, 2025, Shivansh Dhiman wrote:
> On 17-08-2024 05:43, Sean Christopherson wrote:
> > On Thu, Aug 08, 2024, Ravi Bangoria wrote:
> >> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
> >> index e1b6a16e97c0..9f3d31a5d231 100644
> >> --- a/arch/x86/kvm/svm/svm.c
> >> +++ b/arch/x86/kvm/svm/svm.c
> >> @@ -1047,7 +1047,8 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
> >> {
> >> struct vcpu_svm *svm = to_svm(vcpu);
> >> bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
> >> - bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
> >> + u64 dbgctl_buslock_lbr = DEBUGCTLMSR_BUS_LOCK_DETECT | DEBUGCTLMSR_LBR;
> >> + bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & dbgctl_buslock_lbr) ||
> >> (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
> >> (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
> >
> > Out of sight, but this leads to calling svm_enable_lbrv() even when the guest
> > just wants to enable BUS_LOCK_DETECT. Ignoring SEV-ES guests, KVM will intercept
> > writes to DEBUGCTL, so can't KVM defer mucking with the intercepts and
> > svm_copy_lbrs() until the guest actually wants to use LBRs?
> >
> > Hmm, and I think the existing code is broken. If L1 passes DEBUGCTL through to
> > L2, then KVM will handles writes to L1's effective value. And if L1 also passes
> > through the LBRs, then KVM will fail to update the MSR bitmaps for vmcb02.
> >
> > Ah, it's just a performance issue though, because KVM will still emulate RDMSR.
> >
> > Ugh, this code is silly. The LBR MSRs are read-only, yet KVM passes them through
> > for write.
> >
> > Anyways, I'm thinking something like this? Note, using msr_write_intercepted()
> > is wrong, because that'll check L2's bitmap if is_guest_mode(), and the idea is
> > to use L1's bitmap as the canary.
...
> ===========================================================
> Issue 1: Interception still enabled after enabling LBRV
> ===========================================================
> Using the 6.16 upstream kernel (unpatched) I ran the KUT tests and they passed
> when run from both the bare metal and from inside a L1 guest. However for L2
> guest, when looking at the logs I found that RDMSR interception of LBR MSRs is
> still enabled despite the LBRV is enabled for the L2 guest. Effectively, the
> reads are emulated instead of being virtualized, which is not the intended
> behaviour. KUT cannot distinguish between emulated and virtualized RDMSR, and
> hence the test passes regardless.
I haven't looked closely at your patch or at Yosry's patches, but I suspect this
was _just_ fixed:
https://lore.kernel.org/all/20251108004524.1600006-1-yosry.ahmed@linux.dev
> ===========================================================
> Issue 2: Basic LBR KUT fails with Sean's implementation
> ===========================================================
> After using your implementation, all KUTs passed on the bare metal. With LBRV
> enabled for L2, RDMSR interception of LBR MSRs is disabled as intended.
> However, when running KUT tests inside an L1 guest, the tests fail.
Same story here: I haven't had cycles to actually look at code, but Yosry also
posted a pile of changes for KUT:
https://lore.kernel.org/all/20251110232642.633672-1-yosry.ahmed@linux.dev
On 11-11-2025 20:45, Sean Christopherson wrote: > On Tue, Nov 11, 2025, Shivansh Dhiman wrote: >> On 17-08-2024 05:43, Sean Christopherson wrote: >>> On Thu, Aug 08, 2024, Ravi Bangoria wrote: ... > >> =========================================================== >> Issue 1: Interception still enabled after enabling LBRV >> =========================================================== >> Using the 6.16 upstream kernel (unpatched) I ran the KUT tests and they passed >> when run from both the bare metal and from inside a L1 guest. However for L2 >> guest, when looking at the logs I found that RDMSR interception of LBR MSRs is >> still enabled despite the LBRV is enabled for the L2 guest. Effectively, the >> reads are emulated instead of being virtualized, which is not the intended >> behaviour. KUT cannot distinguish between emulated and virtualized RDMSR, and >> hence the test passes regardless. > > I haven't looked closely at your patch or at Yosry's patches, but I suspect this > was _just_ fixed: > > https://lore.kernel.org/all/20251108004524.1600006-1-yosry.ahmed@linux.dev Thanks Sean. I tested Yosry's patches and they indeed have solved this issue. > >> =========================================================== >> Issue 2: Basic LBR KUT fails with Sean's implementation >> =========================================================== >> After using your implementation, all KUTs passed on the bare metal. With LBRV >> enabled for L2, RDMSR interception of LBR MSRs is disabled as intended. >> However, when running KUT tests inside an L1 guest, the tests fail. > > Same story here: I haven't had cycles to actually look at code, but Yosry also > posted a pile of changes for KUT: > > https://lore.kernel.org/all/20251110232642.633672-1-yosry.ahmed@linux.dev This issue was also related to buggy LBRV in the kernel. Yosry's patches have fixed this issue as well and I've verified it. There was a slight flakiness in the KUT which was later fixed by another patch by Yosry's. [1] Thanks, Shivansh 1. https://lore.kernel.org/all/20251113224639.2916783-1-yosry.ahmed@linux.dev/
>> @@ -3158,6 +3159,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
>> if (data & DEBUGCTL_RESERVED_BITS)
>
> Not your code, but why does DEBUGCTL_RESERVED_BITS = ~0x3f!?!? That means the
> introduction of the below check, which is architecturally correct, has the
> potential to break guests. *sigh*
>
> I doubt it will cause a problem, but it's something to look out for.
This dates back to 2008: https://git.kernel.org/torvalds/c/24e09cbf480a7
The legacy definition[1] of DEBUGCTL MSR is:
5:2 PB: performance monitor pin control. Read-write. Reset: 0h.
This field does not control any hardware.
1 BTF. Read-write. Reset: 0. 1=Enable branch single step.
0 LBR. Read-write. Reset: 0. 1=Enable last branch record.
[1]: https://bugzilla.kernel.org/attachment.cgi?id=287389
Thanks,
Ravi
On 21-Aug-24 11:06 AM, Ravi Bangoria wrote: >>> @@ -3158,6 +3159,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) >>> if (data & DEBUGCTL_RESERVED_BITS) >> >> Not your code, but why does DEBUGCTL_RESERVED_BITS = ~0x3f!?!? That means the >> introduction of the below check, which is architecturally correct, has the >> potential to break guests. *sigh* >> >> I doubt it will cause a problem, but it's something to look out for. > This dates back to 2008: https://git.kernel.org/torvalds/c/24e09cbf480a7 > > The legacy definition[1] of DEBUGCTL MSR is: > > 5:2 PB: performance monitor pin control. Read-write. Reset: 0h. > This field does not control any hardware. > 1 BTF. Read-write. Reset: 0. 1=Enable branch single step. > 0 LBR. Read-write. Reset: 0. 1=Enable last branch record. > > [1]: https://bugzilla.kernel.org/attachment.cgi?id=287389 How about adding cpu_feature_enabled() check: if (data & DEBUGCTL_RESERVED_BITS) return 1; if (cpu_feature_enabled(X86_FEATURE_BUS_LOCK_DETECT) && (data & DEBUGCTLMSR_BUS_LOCK_DETECT) && !guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT)) return 1; Thanks, Ravi
On Wed, Aug 21, 2024, Ravi Bangoria wrote: > On 21-Aug-24 11:06 AM, Ravi Bangoria wrote: > >>> @@ -3158,6 +3159,10 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) > >>> if (data & DEBUGCTL_RESERVED_BITS) > >> > >> Not your code, but why does DEBUGCTL_RESERVED_BITS = ~0x3f!?!? That means the > >> introduction of the below check, which is architecturally correct, has the > >> potential to break guests. *sigh* > >> > >> I doubt it will cause a problem, but it's something to look out for. > > This dates back to 2008: https://git.kernel.org/torvalds/c/24e09cbf480a7 > > > > The legacy definition[1] of DEBUGCTL MSR is: > > > > 5:2 PB: performance monitor pin control. Read-write. Reset: 0h. > > This field does not control any hardware. Uh, what? So the CPU provided 4 bits of scratch space? Or is that saying that 5:2 controlled some perfmon stuff on older CPUs, but that Zen deprecated those bits? > > 1 BTF. Read-write. Reset: 0. 1=Enable branch single step. > > 0 LBR. Read-write. Reset: 0. 1=Enable last branch record. > > > > [1]: https://bugzilla.kernel.org/attachment.cgi?id=287389 > > How about adding cpu_feature_enabled() check: That doesn't fix anything, KVM will still break, just on a smaller set of CPUs. The only way to avoid breaking guests is to ignore bits 5:2, though we could quirk that so that userspace can effectively enable what is now the architectural behavior. Though I'm very tempted to just add a prep patch to disallow setting bits 5:2 and see if anyone complains. If they do, then we can add a quirk. And if no one complains, yay.
Sean,
>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>> index e1b6a16e97c0..9f3d31a5d231 100644
>> --- a/arch/x86/kvm/svm/svm.c
>> +++ b/arch/x86/kvm/svm/svm.c
>> @@ -1047,7 +1047,8 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
>> {
>> struct vcpu_svm *svm = to_svm(vcpu);
>> bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
>> - bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
>> + u64 dbgctl_buslock_lbr = DEBUGCTLMSR_BUS_LOCK_DETECT | DEBUGCTLMSR_LBR;
>> + bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & dbgctl_buslock_lbr) ||
>> (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
>> (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
>
> Out of sight, but this leads to calling svm_enable_lbrv() even when the guest
> just wants to enable BUS_LOCK_DETECT. Ignoring SEV-ES guests, KVM will intercept
> writes to DEBUGCTL, so can't KVM defer mucking with the intercepts and
> svm_copy_lbrs() until the guest actually wants to use LBRs?
>
> Hmm, and I think the existing code is broken. If L1 passes DEBUGCTL through to
> L2, then KVM will handles writes to L1's effective value. And if L1 also passes
> through the LBRs, then KVM will fail to update the MSR bitmaps for vmcb02.
>
> Ah, it's just a performance issue though, because KVM will still emulate RDMSR.
>
> Ugh, this code is silly. The LBR MSRs are read-only, yet KVM passes them through
> for write.
>
> Anyways, I'm thinking something like this? Note, using msr_write_intercepted()
> is wrong, because that'll check L2's bitmap if is_guest_mode(), and the idea is
> to use L1's bitmap as the canary.
>
> static void svm_update_passthrough_lbrs(struct kvm_vcpu *vcpu, bool passthrough)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
>
> KVM_BUG_ON(!passthrough && sev_es_guest(vcpu->kvm), vcpu->kvm);
>
> if (!msr_write_intercepted(vcpu, MSR_IA32_LASTBRANCHFROMIP) == passthrough)
> return;
>
> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, passthrough, 0);
> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, passthrough, 0);
> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, passthrough, 0);
> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, passthrough, 0);
>
> /*
> * When enabling, move the LBR msrs to vmcb02 so that L2 can see them,
> * and then move them back to vmcb01 when disabling to avoid copying
> * them on nested guest entries.
> */
> if (is_guest_mode(vcpu)) {
> if (passthrough)
> svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
> else
> svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
> }
> }
>
> void svm_enable_lbrv(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
>
> if (WARN_ON_ONCE(!sev_es_guest(vcpu->kvm)))
> return;
>
> svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
> svm_update_passthrough_lbrs(vcpu, true);
>
> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1);
> }
>
> static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
> {
> /*
> * If LBR virtualization is disabled, the LBR MSRs are always kept in
> * vmcb01. If LBR virtualization is enabled and L1 is running VMs of
> * its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
> */
> return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
> svm->vmcb01.ptr;
> }
>
> void svm_update_lbrv(struct kvm_vcpu *vcpu)
> {
> struct vcpu_svm *svm = to_svm(vcpu);
> u64 guest_debugctl = svm_get_lbr_vmcb(svm)->save.dbgctl;
> bool enable_lbrv = (guest_debugctl & DEBUGCTLMSR_LBR) ||
> (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
> (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
>
> if (enable_lbrv || (guest_debugctl & DEBUGCTLMSR_BUS_LOCK_DETECT))
> svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
> else
> svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
>
> svm_update_passthrough_lbrs(vcpu, enable_lbrv);
> }
This refactored code looks fine. I did some sanity testing with SVM/SEV/SEV-ES
guests and not seeing any issues. I'll respin with above change included.
Thanks for the feedback,
Ravi
On 20-Aug-24 10:08 PM, Ravi Bangoria wrote:
> Sean,
>
>>> diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
>>> index e1b6a16e97c0..9f3d31a5d231 100644
>>> --- a/arch/x86/kvm/svm/svm.c
>>> +++ b/arch/x86/kvm/svm/svm.c
>>> @@ -1047,7 +1047,8 @@ void svm_update_lbrv(struct kvm_vcpu *vcpu)
>>> {
>>> struct vcpu_svm *svm = to_svm(vcpu);
>>> bool current_enable_lbrv = svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK;
>>> - bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & DEBUGCTLMSR_LBR) ||
>>> + u64 dbgctl_buslock_lbr = DEBUGCTLMSR_BUS_LOCK_DETECT | DEBUGCTLMSR_LBR;
>>> + bool enable_lbrv = (svm_get_lbr_vmcb(svm)->save.dbgctl & dbgctl_buslock_lbr) ||
>>> (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
>>> (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
>>
>> Out of sight, but this leads to calling svm_enable_lbrv() even when the guest
>> just wants to enable BUS_LOCK_DETECT. Ignoring SEV-ES guests, KVM will intercept
>> writes to DEBUGCTL, so can't KVM defer mucking with the intercepts and
>> svm_copy_lbrs() until the guest actually wants to use LBRs?
>>
>> Hmm, and I think the existing code is broken. If L1 passes DEBUGCTL through to
>> L2, then KVM will handles writes to L1's effective value. And if L1 also passes
>> through the LBRs, then KVM will fail to update the MSR bitmaps for vmcb02.
>>
>> Ah, it's just a performance issue though, because KVM will still emulate RDMSR.
>>
>> Ugh, this code is silly. The LBR MSRs are read-only, yet KVM passes them through
>> for write.
>>
>> Anyways, I'm thinking something like this? Note, using msr_write_intercepted()
>> is wrong, because that'll check L2's bitmap if is_guest_mode(), and the idea is
>> to use L1's bitmap as the canary.
>>
>> static void svm_update_passthrough_lbrs(struct kvm_vcpu *vcpu, bool passthrough)
>> {
>> struct vcpu_svm *svm = to_svm(vcpu);
>>
>> KVM_BUG_ON(!passthrough && sev_es_guest(vcpu->kvm), vcpu->kvm);
>>
>> if (!msr_write_intercepted(vcpu, MSR_IA32_LASTBRANCHFROMIP) == passthrough)
>> return;
>>
>> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, passthrough, 0);
>> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, passthrough, 0);
>> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, passthrough, 0);
>> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, passthrough, 0);
>>
>> /*
>> * When enabling, move the LBR msrs to vmcb02 so that L2 can see them,
>> * and then move them back to vmcb01 when disabling to avoid copying
>> * them on nested guest entries.
>> */
>> if (is_guest_mode(vcpu)) {
>> if (passthrough)
>> svm_copy_lbrs(svm->vmcb, svm->vmcb01.ptr);
>> else
>> svm_copy_lbrs(svm->vmcb01.ptr, svm->vmcb);
>> }
>> }
>>
>> void svm_enable_lbrv(struct kvm_vcpu *vcpu)
>> {
>> struct vcpu_svm *svm = to_svm(vcpu);
>>
>> if (WARN_ON_ONCE(!sev_es_guest(vcpu->kvm)))
>> return;
>>
>> svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
>> svm_update_passthrough_lbrs(vcpu, true);
>>
>> set_msr_interception(vcpu, svm->msrpm, MSR_IA32_DEBUGCTLMSR, 1, 1);
>> }
>>
>> static struct vmcb *svm_get_lbr_vmcb(struct vcpu_svm *svm)
>> {
>> /*
>> * If LBR virtualization is disabled, the LBR MSRs are always kept in
>> * vmcb01. If LBR virtualization is enabled and L1 is running VMs of
>> * its own, the MSRs are moved between vmcb01 and vmcb02 as needed.
>> */
>> return svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK ? svm->vmcb :
>> svm->vmcb01.ptr;
>> }
>>
>> void svm_update_lbrv(struct kvm_vcpu *vcpu)
>> {
>> struct vcpu_svm *svm = to_svm(vcpu);
>> u64 guest_debugctl = svm_get_lbr_vmcb(svm)->save.dbgctl;
>> bool enable_lbrv = (guest_debugctl & DEBUGCTLMSR_LBR) ||
>> (is_guest_mode(vcpu) && guest_can_use(vcpu, X86_FEATURE_LBRV) &&
>> (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK));
>>
>> if (enable_lbrv || (guest_debugctl & DEBUGCTLMSR_BUS_LOCK_DETECT))
>> svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
>> else
>> svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
>>
>> svm_update_passthrough_lbrs(vcpu, enable_lbrv);
>> }
>
> This refactored code looks fine. I did some sanity testing with SVM/SEV/SEV-ES
> guests and not seeing any issues. I'll respin with above change included.
Realised that KUT LBR tests were failing with this change and I had
to do this to fix those:
---
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 0b807099cb19..3dd737db85ef 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -795,6 +795,21 @@ static bool valid_msr_intercept(u32 index)
return direct_access_msr_slot(index) != -ENOENT;
}
+static bool msr_read_intercepted_msrpm(u32 *msrpm, u32 msr)
+{
+ unsigned long tmp;
+ u8 bit_read;
+ u32 offset;
+
+ offset = svm_msrpm_offset(msr);
+ bit_read = 2 * (msr & 0x0f);
+ tmp = msrpm[offset];
+
+ BUG_ON(offset == MSR_INVALID);
+
+ return test_bit(bit_read, &tmp);
+}
+
static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr)
{
u8 bit_write;
@@ -1000,7 +1015,7 @@ static void svm_update_passthrough_lbrs(struct kvm_vcpu *vcpu, bool passthrough)
KVM_BUG_ON(!passthrough && sev_es_guest(vcpu->kvm), vcpu->kvm);
- if (!msr_write_intercepted(vcpu, MSR_IA32_LASTBRANCHFROMIP) == passthrough)
+ if (!msr_read_intercepted_msrpm(svm->msrpm, MSR_IA32_LASTBRANCHFROMIP) == passthrough)
return;
set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, passthrough, 0);
---
I've added a new api for read interception since LBR register writes are
always intercepted.
Does this looks good?
Thanks,
Ravi
© 2016 - 2025 Red Hat, Inc.