Arch LBR MSRs are xsave-supported, but they're operated as "independent"
xsave feature by PMU code, i.e., during thread/process context switch,
the MSRs are saved/restored with perf_event_task_sched_{in|out} instead
of generic kernel fpu switch code, i.e.,save_fpregs_to_fpstate() and
restore_fpregs_from_fpstate(). When vcpu guest/host fpu state swap happens,
Arch LBR MSRs are retained so they can be accessed directly.
Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
---
arch/x86/kvm/vmx/pmu_intel.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index b57944d5e7d8..241128972776 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -410,6 +410,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
}
return 0;
+ case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31:
+ case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31:
+ case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31:
+ rdmsrl(msr_info->index, msr_info->data);
+ return 0;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
@@ -528,6 +533,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
(data & ARCH_LBR_CTL_LBREN))
intel_pmu_create_guest_lbr_event(vcpu);
return 0;
+ case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31:
+ case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31:
+ case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31:
+ wrmsrl(msr_info->index, msr_info->data);
+ return 0;
default:
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
--
2.27.0
On Thu, Nov 24, 2022, Yang Weijiang wrote:
> Arch LBR MSRs are xsave-supported, but they're operated as "independent"
> xsave feature by PMU code, i.e., during thread/process context switch,
> the MSRs are saved/restored with perf_event_task_sched_{in|out} instead
> of generic kernel fpu switch code, i.e.,save_fpregs_to_fpstate() and
> restore_fpregs_from_fpstate(). When vcpu guest/host fpu state swap happens,
> Arch LBR MSRs are retained so they can be accessed directly.
>
> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
> ---
> arch/x86/kvm/vmx/pmu_intel.c | 10 ++++++++++
> 1 file changed, 10 insertions(+)
>
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index b57944d5e7d8..241128972776 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -410,6 +410,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
> }
> return 0;
> + case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31:
> + case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31:
> + case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31:
> + rdmsrl(msr_info->index, msr_info->data);
I don't see how this is correct. As called out in patch 5:
: If for some magical reason it's safe to access arch LBR MSRs without disabling
: IRQs and confirming perf event ownership, I want to see a very detailed changelog
: explaining exactly how that magic works.
> + return 0;
> default:
> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> @@ -528,6 +533,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> (data & ARCH_LBR_CTL_LBREN))
> intel_pmu_create_guest_lbr_event(vcpu);
> return 0;
> + case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31:
> + case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31:
> + case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31:
> + wrmsrl(msr_info->index, msr_info->data);
> + return 0;
> default:
> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
> --
> 2.27.0
>
On 1/28/2023 6:13 AM, Sean Christopherson wrote:
> On Thu, Nov 24, 2022, Yang Weijiang wrote:
>> Arch LBR MSRs are xsave-supported, but they're operated as "independent"
>> xsave feature by PMU code, i.e., during thread/process context switch,
>> the MSRs are saved/restored with perf_event_task_sched_{in|out} instead
>> of generic kernel fpu switch code, i.e.,save_fpregs_to_fpstate() and
>> restore_fpregs_from_fpstate(). When vcpu guest/host fpu state swap happens,
>> Arch LBR MSRs are retained so they can be accessed directly.
>>
>> Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
>> Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
>> ---
>> arch/x86/kvm/vmx/pmu_intel.c | 10 ++++++++++
>> 1 file changed, 10 insertions(+)
>>
>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
>> index b57944d5e7d8..241128972776 100644
>> --- a/arch/x86/kvm/vmx/pmu_intel.c
>> +++ b/arch/x86/kvm/vmx/pmu_intel.c
>> @@ -410,6 +410,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
>> }
>> return 0;
>> + case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31:
>> + case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31:
>> + case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31:
>> + rdmsrl(msr_info->index, msr_info->data);
> I don't see how this is correct. As called out in patch 5:
>
> : If for some magical reason it's safe to access arch LBR MSRs without disabling
> : IRQs and confirming perf event ownership, I want to see a very detailed changelog
> : explaining exactly how that magic works.
The MSR lists here are just for live migration. When arch-lbr is active,
these MSRs are passed through
to guest.
>
>> + return 0;
>> default:
>> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>> @@ -528,6 +533,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>> (data & ARCH_LBR_CTL_LBREN))
>> intel_pmu_create_guest_lbr_event(vcpu);
>> return 0;
>> + case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31:
>> + case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31:
>> + case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31:
>> + wrmsrl(msr_info->index, msr_info->data);
>> + return 0;
>> default:
>> if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
>> (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
>> --
>> 2.27.0
>>
On Mon, Jan 30, 2023, Yang, Weijiang wrote:
>
> On 1/28/2023 6:13 AM, Sean Christopherson wrote:
> > On Thu, Nov 24, 2022, Yang Weijiang wrote:
> > > Arch LBR MSRs are xsave-supported, but they're operated as "independent"
> > > xsave feature by PMU code, i.e., during thread/process context switch,
> > > the MSRs are saved/restored with perf_event_task_sched_{in|out} instead
> > > of generic kernel fpu switch code, i.e.,save_fpregs_to_fpstate() and
> > > restore_fpregs_from_fpstate(). When vcpu guest/host fpu state swap happens,
> > > Arch LBR MSRs are retained so they can be accessed directly.
> > >
> > > Signed-off-by: Yang Weijiang <weijiang.yang@intel.com>
> > > Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
> > > ---
> > > arch/x86/kvm/vmx/pmu_intel.c | 10 ++++++++++
> > > 1 file changed, 10 insertions(+)
> > >
> > > diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> > > index b57944d5e7d8..241128972776 100644
> > > --- a/arch/x86/kvm/vmx/pmu_intel.c
> > > +++ b/arch/x86/kvm/vmx/pmu_intel.c
> > > @@ -410,6 +410,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > > msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
> > > }
> > > return 0;
> > > + case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31:
> > > + case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31:
> > > + case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31:
> > > + rdmsrl(msr_info->index, msr_info->data);
> > I don't see how this is correct. As called out in patch 5:
> >
> > : If for some magical reason it's safe to access arch LBR MSRs without disabling
> > : IRQs and confirming perf event ownership, I want to see a very detailed changelog
> > : explaining exactly how that magic works.
>
> The MSR lists here are just for live migration. When arch-lbr is active,
> these MSRs are passed through to guest.
None of that explains how the guest's MSR values are guaranteed to be resident
in hardware.
On 1/31/2023 1:30 AM, Sean Christopherson wrote:
> On Mon, Jan 30, 2023, Yang, Weijiang wrote:
>> On 1/28/2023 6:13 AM, Sean Christopherson wrote:
>>> On Thu, Nov 24, 2022, Yang Weijiang wrote:
>>>> Arch LBR MSRs are xsave-supported, but they're operated as "independent"
>>>> xsave feature by PMU code, i.e., during thread/process context switch,
>>>> the MSRs are saved/restored with perf_event_task_sched_{in|out} instead
>>>> of generic kernel fpu switch code, i.e.,save_fpregs_to_fpstate() and
>>>> restore_fpregs_from_fpstate(). When vcpu guest/host fpu state swap happens,
>>>> Arch LBR MSRs are retained so they can be accessed directly.
>>>>
>>>> Signed-off-by: Yang Weijiang<weijiang.yang@intel.com>
>>>> Reviewed-by: Kan Liang<kan.liang@linux.intel.com>
>>>> ---
>>>> arch/x86/kvm/vmx/pmu_intel.c | 10 ++++++++++
>>>> 1 file changed, 10 insertions(+)
>>>>
>>>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
>>>> index b57944d5e7d8..241128972776 100644
>>>> --- a/arch/x86/kvm/vmx/pmu_intel.c
>>>> +++ b/arch/x86/kvm/vmx/pmu_intel.c
>>>> @@ -410,6 +410,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>>> msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
>>>> }
>>>> return 0;
>>>> + case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31:
>>>> + case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31:
>>>> + case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31:
>>>> + rdmsrl(msr_info->index, msr_info->data);
>>> I don't see how this is correct. As called out in patch 5:
>>>
>>> : If for some magical reason it's safe to access arch LBR MSRs without disabling
>>> : IRQs and confirming perf event ownership, I want to see a very detailed changelog
>>> : explaining exactly how that magic works.
>> The MSR lists here are just for live migration. When arch-lbr is active,
>> these MSRs are passed through to guest.
> None of that explains how the guest's MSR values are guaranteed to be resident
> in hardware.
I ignored host *event* scheduling case in commit log.
My understanding is, host LBR *event* could break in at any point when
the vCPU is running,
in this case disabling IRQs before read/write the MSRs is pointless
because the HW context could have
been swapped. I need to do more investigation for the issue.
On Tue, Jan 31, 2023, Yang, Weijiang wrote:
>
> On 1/31/2023 1:30 AM, Sean Christopherson wrote:
> > On Mon, Jan 30, 2023, Yang, Weijiang wrote:
> > > On 1/28/2023 6:13 AM, Sean Christopherson wrote:
> > > > On Thu, Nov 24, 2022, Yang Weijiang wrote:
> > > > > Arch LBR MSRs are xsave-supported, but they're operated as "independent"
> > > > > xsave feature by PMU code, i.e., during thread/process context switch,
> > > > > the MSRs are saved/restored with perf_event_task_sched_{in|out} instead
> > > > > of generic kernel fpu switch code, i.e.,save_fpregs_to_fpstate() and
> > > > > restore_fpregs_from_fpstate(). When vcpu guest/host fpu state swap happens,
> > > > > Arch LBR MSRs are retained so they can be accessed directly.
> > > > >
> > > > > Signed-off-by: Yang Weijiang<weijiang.yang@intel.com>
> > > > > Reviewed-by: Kan Liang<kan.liang@linux.intel.com>
> > > > > ---
> > > > > arch/x86/kvm/vmx/pmu_intel.c | 10 ++++++++++
> > > > > 1 file changed, 10 insertions(+)
> > > > >
> > > > > diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> > > > > index b57944d5e7d8..241128972776 100644
> > > > > --- a/arch/x86/kvm/vmx/pmu_intel.c
> > > > > +++ b/arch/x86/kvm/vmx/pmu_intel.c
> > > > > @@ -410,6 +410,11 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
> > > > > msr_info->data = vmcs_read64(GUEST_IA32_LBR_CTL);
> > > > > }
> > > > > return 0;
> > > > > + case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31:
> > > > > + case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31:
> > > > > + case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31:
> > > > > + rdmsrl(msr_info->index, msr_info->data);
> > > > I don't see how this is correct. As called out in patch 5:
> > > >
> > > > : If for some magical reason it's safe to access arch LBR MSRs without disabling
> > > > : IRQs and confirming perf event ownership, I want to see a very detailed changelog
> > > > : explaining exactly how that magic works.
> > > The MSR lists here are just for live migration. When arch-lbr is active,
> > > these MSRs are passed through to guest.
> > None of that explains how the guest's MSR values are guaranteed to be resident
> > in hardware.
>
> I ignored host *event* scheduling case in commit log.
>
> My understanding is, host LBR *event* could break in at any point when the
> vCPU is running,
>
> in this case disabling IRQs before read/write the MSRs is pointless because
> the HW context could have been swapped. I need to do more investigation for
> the issue.
Which is presumably why intel_pmu_handle_lbr_msrs_access() checks that the LBR
perf event is active prior to accessing the MSRs, with IRQs disabled...
/*
* Disable irq to ensure the LBR feature doesn't get reclaimed by the
* host at the time the value is read from the msr, and this avoids the
* host LBR value to be leaked to the guest. If LBR has been reclaimed,
* return 0 on guest reads.
*/
local_irq_disable();
if (lbr_desc->event->state == PERF_EVENT_STATE_ACTIVE) {
if (read)
rdmsrl(index, msr_info->data);
else
wrmsrl(index, msr_info->data);
__set_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
local_irq_enable();
return true;
}
clear_bit(INTEL_PMC_IDX_FIXED_VLBR, vcpu_to_pmu(vcpu)->pmc_in_use);
local_irq_enable();
© 2016 - 2026 Red Hat, Inc.