[PATCH v13 021/113] KVM: TDX: Make pmu_intel.c ignore guest TD case

isaku.yamahata@intel.com posted 113 patches 2 years, 11 months ago
There is a newer version of this series
[PATCH v13 021/113] KVM: TDX: Make pmu_intel.c ignore guest TD case
Posted by isaku.yamahata@intel.com 2 years, 11 months ago
From: Isaku Yamahata <isaku.yamahata@intel.com>

Because TDX KVM doesn't support PMU yet (it's future work of TDX KVM
support as another patch series) and pmu_intel.c touches vmx specific
structure in vcpu initialization, as workaround add dummy structure to
struct vcpu_tdx and pmu_intel.c can ignore TDX case.

Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/vmx/pmu_intel.c | 46 +++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/vmx/pmu_intel.h | 28 ++++++++++++++++++++++
 arch/x86/kvm/vmx/tdx.h       |  8 ++++++-
 arch/x86/kvm/vmx/vmx.c       |  2 +-
 arch/x86/kvm/vmx/vmx.h       | 32 +------------------------
 5 files changed, 82 insertions(+), 34 deletions(-)
 create mode 100644 arch/x86/kvm/vmx/pmu_intel.h

diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index e8a3be0b9df9..df1f4ddfa72d 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -19,6 +19,7 @@
 #include "lapic.h"
 #include "nested.h"
 #include "pmu.h"
+#include "tdx.h"
 
 #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
 
@@ -40,6 +41,26 @@ static struct {
 /* mapping between fixed pmc index and intel_arch_events array */
 static int fixed_pmc_events[] = {1, 0, 7};
 
+struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_INTEL_TDX_HOST
+	if (is_td_vcpu(vcpu))
+		return &to_tdx(vcpu)->lbr_desc;
+#endif
+
+	return &to_vmx(vcpu)->lbr_desc;
+}
+
+struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
+{
+#ifdef CONFIG_INTEL_TDX_HOST
+	if (is_td_vcpu(vcpu))
+		return &to_tdx(vcpu)->lbr_desc.records;
+#endif
+
+	return &to_vmx(vcpu)->lbr_desc.records;
+}
+
 static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
 {
 	struct kvm_pmc *pmc;
@@ -172,6 +193,23 @@ static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
 	return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
 }
 
+bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu)
+{
+	if (is_td_vcpu(vcpu))
+		return false;
+	return cpuid_model_is_consistent(vcpu);
+}
+
+bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
+{
+	struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
+
+	if (is_td_vcpu(vcpu))
+		return false;
+
+	return lbr->nr && (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_LBR_FMT);
+}
+
 static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
 {
 	struct x86_pmu_lbr *records = vcpu_to_lbr_records(vcpu);
@@ -282,6 +320,9 @@ int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu)
 					PERF_SAMPLE_BRANCH_USER,
 	};
 
+	if (WARN_ON_ONCE(is_td_vcpu(vcpu)))
+		return 0;
+
 	if (unlikely(lbr_desc->event)) {
 		__set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
 		return 0;
@@ -591,7 +632,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
 		INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
 
 	perf_capabilities = vcpu_get_perf_capabilities(vcpu);
-	if (cpuid_model_is_consistent(vcpu) &&
+	if (intel_pmu_lbr_is_compatible(vcpu) &&
 	    (perf_capabilities & PMU_CAP_LBR_FMT))
 		x86_perf_get_lbr(&lbr_desc->records);
 	else
@@ -647,6 +688,9 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
 	struct kvm_pmc *pmc = NULL;
 	int i;
 
+	if (is_td_vcpu(vcpu))
+		return;
+
 	for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) {
 		pmc = &pmu->gp_counters[i];
 
diff --git a/arch/x86/kvm/vmx/pmu_intel.h b/arch/x86/kvm/vmx/pmu_intel.h
new file mode 100644
index 000000000000..66bba47c1269
--- /dev/null
+++ b/arch/x86/kvm/vmx/pmu_intel.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_VMX_PMU_INTEL_H
+#define  __KVM_X86_VMX_PMU_INTEL_H
+
+struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu);
+struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu);
+
+bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu);
+bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu);
+int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
+
+struct lbr_desc {
+	/* Basic info about guest LBR records. */
+	struct x86_pmu_lbr records;
+
+	/*
+	 * Emulate LBR feature via passthrough LBR registers when the
+	 * per-vcpu guest LBR event is scheduled on the current pcpu.
+	 *
+	 * The records may be inaccurate if the host reclaims the LBR.
+	 */
+	struct perf_event *event;
+
+	/* True if LBRs are marked as not intercepted in the MSR bitmap */
+	bool msr_passthrough;
+};
+
+#endif /* __KVM_X86_VMX_PMU_INTEL_H */
diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
index 1e00e75b1c5e..5728820fed5e 100644
--- a/arch/x86/kvm/vmx/tdx.h
+++ b/arch/x86/kvm/vmx/tdx.h
@@ -4,6 +4,7 @@
 
 #ifdef CONFIG_INTEL_TDX_HOST
 
+#include "pmu_intel.h"
 #include "tdx_ops.h"
 
 struct kvm_tdx {
@@ -21,7 +22,12 @@ struct kvm_tdx {
 
 struct vcpu_tdx {
 	struct kvm_vcpu	vcpu;
-	/* TDX specific members follow. */
+
+	/*
+	 * Dummy to make pmu_intel not corrupt memory.
+	 * TODO: Support PMU for TDX.  Future work.
+	 */
+	struct lbr_desc lbr_desc;
 };
 
 static inline bool is_td(struct kvm *kvm)
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index d23830d92f61..f9e9fd7fde2c 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2432,7 +2432,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			if ((data & PMU_CAP_LBR_FMT) !=
 			    (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT))
 				return 1;
-			if (!cpuid_model_is_consistent(vcpu))
+			if (!intel_pmu_lbr_is_compatible(vcpu))
 				return 1;
 		}
 		if (data & PERF_CAP_PEBS_FORMAT) {
diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
index 2acdc54bc34b..1d15c3c2751b 100644
--- a/arch/x86/kvm/vmx/vmx.h
+++ b/arch/x86/kvm/vmx/vmx.h
@@ -11,6 +11,7 @@
 #include "capabilities.h"
 #include "../kvm_cache_regs.h"
 #include "posted_intr.h"
+#include "pmu_intel.h"
 #include "vmcs.h"
 #include "vmx_ops.h"
 #include "../cpuid.h"
@@ -105,22 +106,6 @@ static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
 	return pmu->version > 1;
 }
 
-struct lbr_desc {
-	/* Basic info about guest LBR records. */
-	struct x86_pmu_lbr records;
-
-	/*
-	 * Emulate LBR feature via passthrough LBR registers when the
-	 * per-vcpu guest LBR event is scheduled on the current pcpu.
-	 *
-	 * The records may be inaccurate if the host reclaims the LBR.
-	 */
-	struct perf_event *event;
-
-	/* True if LBRs are marked as not intercepted in the MSR bitmap */
-	bool msr_passthrough;
-};
-
 /*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -650,21 +635,6 @@ static __always_inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
 	return container_of(vcpu, struct vcpu_vmx, vcpu);
 }
 
-static inline struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
-{
-	return &to_vmx(vcpu)->lbr_desc;
-}
-
-static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
-{
-	return &vcpu_to_lbr_desc(vcpu)->records;
-}
-
-static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
-{
-	return !!vcpu_to_lbr_records(vcpu)->nr;
-}
-
 void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
 int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
 void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);
-- 
2.25.1
Re: [PATCH v13 021/113] KVM: TDX: Make pmu_intel.c ignore guest TD case
Posted by Zhi Wang 2 years, 10 months ago
Hi Like:

Would you mind to take a look on this patch? It would be nice to have
a r-b also from you. :)

On Sun, 12 Mar 2023 10:55:45 -0700
isaku.yamahata@intel.com wrote:

> From: Isaku Yamahata <isaku.yamahata@intel.com>
> 
> Because TDX KVM doesn't support PMU yet (it's future work of TDX KVM
> support as another patch series) and pmu_intel.c touches vmx specific
> structure in vcpu initialization, as workaround add dummy structure to
> struct vcpu_tdx and pmu_intel.c can ignore TDX case.
> 
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
>  arch/x86/kvm/vmx/pmu_intel.c | 46 +++++++++++++++++++++++++++++++++++-
>  arch/x86/kvm/vmx/pmu_intel.h | 28 ++++++++++++++++++++++
>  arch/x86/kvm/vmx/tdx.h       |  8 ++++++-
>  arch/x86/kvm/vmx/vmx.c       |  2 +-
>  arch/x86/kvm/vmx/vmx.h       | 32 +------------------------
>  5 files changed, 82 insertions(+), 34 deletions(-)
>  create mode 100644 arch/x86/kvm/vmx/pmu_intel.h
> 
> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
> index e8a3be0b9df9..df1f4ddfa72d 100644
> --- a/arch/x86/kvm/vmx/pmu_intel.c
> +++ b/arch/x86/kvm/vmx/pmu_intel.c
> @@ -19,6 +19,7 @@
>  #include "lapic.h"
>  #include "nested.h"
>  #include "pmu.h"
> +#include "tdx.h"
>  
>  #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
>  
> @@ -40,6 +41,26 @@ static struct {
>  /* mapping between fixed pmc index and intel_arch_events array */
>  static int fixed_pmc_events[] = {1, 0, 7};
>  
> +struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
> +{
> +#ifdef CONFIG_INTEL_TDX_HOST
> +	if (is_td_vcpu(vcpu))
> +		return &to_tdx(vcpu)->lbr_desc;
> +#endif
> +
> +	return &to_vmx(vcpu)->lbr_desc;
> +}
> +
> +struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
> +{
> +#ifdef CONFIG_INTEL_TDX_HOST
> +	if (is_td_vcpu(vcpu))
> +		return &to_tdx(vcpu)->lbr_desc.records;
> +#endif
> +
> +	return &to_vmx(vcpu)->lbr_desc.records;
> +}
> +
>  static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
>  {
>  	struct kvm_pmc *pmc;
> @@ -172,6 +193,23 @@ static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
>  	return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
>  }
>  
> +bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu)
> +{
> +	if (is_td_vcpu(vcpu))
> +		return false;
> +	return cpuid_model_is_consistent(vcpu);
> +}
> +
> +bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
> +{
> +	struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
> +
> +	if (is_td_vcpu(vcpu))
> +		return false;
> +
> +	return lbr->nr && (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_LBR_FMT);
> +}
> +
>  static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
>  {
>  	struct x86_pmu_lbr *records = vcpu_to_lbr_records(vcpu);
> @@ -282,6 +320,9 @@ int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu)
>  					PERF_SAMPLE_BRANCH_USER,
>  	};
>  
> +	if (WARN_ON_ONCE(is_td_vcpu(vcpu)))
> +		return 0;
> +
>  	if (unlikely(lbr_desc->event)) {
>  		__set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
>  		return 0;
> @@ -591,7 +632,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
>  		INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
>  
>  	perf_capabilities = vcpu_get_perf_capabilities(vcpu);
> -	if (cpuid_model_is_consistent(vcpu) &&
> +	if (intel_pmu_lbr_is_compatible(vcpu) &&
>  	    (perf_capabilities & PMU_CAP_LBR_FMT))
>  		x86_perf_get_lbr(&lbr_desc->records);
>  	else
> @@ -647,6 +688,9 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
>  	struct kvm_pmc *pmc = NULL;
>  	int i;
>  
> +	if (is_td_vcpu(vcpu))
> +		return;
> +
>  	for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) {
>  		pmc = &pmu->gp_counters[i];
>  
> diff --git a/arch/x86/kvm/vmx/pmu_intel.h b/arch/x86/kvm/vmx/pmu_intel.h
> new file mode 100644
> index 000000000000..66bba47c1269
> --- /dev/null
> +++ b/arch/x86/kvm/vmx/pmu_intel.h
> @@ -0,0 +1,28 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef __KVM_X86_VMX_PMU_INTEL_H
> +#define  __KVM_X86_VMX_PMU_INTEL_H
> +
> +struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu);
> +struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu);
> +
> +bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu);
> +bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu);
> +int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
> +
> +struct lbr_desc {
> +	/* Basic info about guest LBR records. */
> +	struct x86_pmu_lbr records;
> +
> +	/*
> +	 * Emulate LBR feature via passthrough LBR registers when the
> +	 * per-vcpu guest LBR event is scheduled on the current pcpu.
> +	 *
> +	 * The records may be inaccurate if the host reclaims the LBR.
> +	 */
> +	struct perf_event *event;
> +
> +	/* True if LBRs are marked as not intercepted in the MSR bitmap */
> +	bool msr_passthrough;
> +};
> +
> +#endif /* __KVM_X86_VMX_PMU_INTEL_H */
> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
> index 1e00e75b1c5e..5728820fed5e 100644
> --- a/arch/x86/kvm/vmx/tdx.h
> +++ b/arch/x86/kvm/vmx/tdx.h
> @@ -4,6 +4,7 @@
>  
>  #ifdef CONFIG_INTEL_TDX_HOST
>  
> +#include "pmu_intel.h"
>  #include "tdx_ops.h"
>  
>  struct kvm_tdx {
> @@ -21,7 +22,12 @@ struct kvm_tdx {
>  
>  struct vcpu_tdx {
>  	struct kvm_vcpu	vcpu;
> -	/* TDX specific members follow. */
> +
> +	/*
> +	 * Dummy to make pmu_intel not corrupt memory.
> +	 * TODO: Support PMU for TDX.  Future work.
> +	 */
> +	struct lbr_desc lbr_desc;
>  };
>  
>  static inline bool is_td(struct kvm *kvm)
> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> index d23830d92f61..f9e9fd7fde2c 100644
> --- a/arch/x86/kvm/vmx/vmx.c
> +++ b/arch/x86/kvm/vmx/vmx.c
> @@ -2432,7 +2432,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>  			if ((data & PMU_CAP_LBR_FMT) !=
>  			    (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT))
>  				return 1;
> -			if (!cpuid_model_is_consistent(vcpu))
> +			if (!intel_pmu_lbr_is_compatible(vcpu))
>  				return 1;
>  		}
>  		if (data & PERF_CAP_PEBS_FORMAT) {
> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
> index 2acdc54bc34b..1d15c3c2751b 100644
> --- a/arch/x86/kvm/vmx/vmx.h
> +++ b/arch/x86/kvm/vmx/vmx.h
> @@ -11,6 +11,7 @@
>  #include "capabilities.h"
>  #include "../kvm_cache_regs.h"
>  #include "posted_intr.h"
> +#include "pmu_intel.h"
>  #include "vmcs.h"
>  #include "vmx_ops.h"
>  #include "../cpuid.h"
> @@ -105,22 +106,6 @@ static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
>  	return pmu->version > 1;
>  }
>  
> -struct lbr_desc {
> -	/* Basic info about guest LBR records. */
> -	struct x86_pmu_lbr records;
> -
> -	/*
> -	 * Emulate LBR feature via passthrough LBR registers when the
> -	 * per-vcpu guest LBR event is scheduled on the current pcpu.
> -	 *
> -	 * The records may be inaccurate if the host reclaims the LBR.
> -	 */
> -	struct perf_event *event;
> -
> -	/* True if LBRs are marked as not intercepted in the MSR bitmap */
> -	bool msr_passthrough;
> -};
> -
>  /*
>   * The nested_vmx structure is part of vcpu_vmx, and holds information we need
>   * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
> @@ -650,21 +635,6 @@ static __always_inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
>  	return container_of(vcpu, struct vcpu_vmx, vcpu);
>  }
>  
> -static inline struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
> -{
> -	return &to_vmx(vcpu)->lbr_desc;
> -}
> -
> -static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
> -{
> -	return &vcpu_to_lbr_desc(vcpu)->records;
> -}
> -
> -static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
> -{
> -	return !!vcpu_to_lbr_records(vcpu)->nr;
> -}
> -
>  void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
>  int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
>  void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);
Re: [PATCH v13 021/113] KVM: TDX: Make pmu_intel.c ignore guest TD case
Posted by Like Xu 2 years, 9 months ago
On 2/4/2023 4:50 pm, Zhi Wang wrote:
> Hi Like:
> 
> Would you mind to take a look on this patch? It would be nice to have
> a r-b also from you. :)
> 
> On Sun, 12 Mar 2023 10:55:45 -0700
> isaku.yamahata@intel.com wrote:
> 
>> From: Isaku Yamahata <isaku.yamahata@intel.com>
>>
>> Because TDX KVM doesn't support PMU yet (it's future work of TDX KVM
>> support as another patch series) and pmu_intel.c touches vmx specific

It would be nice to have pmu support for tdx-guest from the very beginning.
If you guys are more open on the tdx development model, I'd like to help on 
those features.

>> structure in vcpu initialization, as workaround add dummy structure to
>> struct vcpu_tdx and pmu_intel.c can ignore TDX case.

If the target is not to provide a workaround, how about other variants:
	- struct lbr_desc lbr_desc;
	- pebs ds_buffer;
?

We also need tdx selftest to verify the unavailability of these features.
Also, it would be great to have TDX's "System Profiling Mode" featue back in the 
specification.

>>
>> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
>> ---
>>   arch/x86/kvm/vmx/pmu_intel.c | 46 +++++++++++++++++++++++++++++++++++-
>>   arch/x86/kvm/vmx/pmu_intel.h | 28 ++++++++++++++++++++++
>>   arch/x86/kvm/vmx/tdx.h       |  8 ++++++-
>>   arch/x86/kvm/vmx/vmx.c       |  2 +-
>>   arch/x86/kvm/vmx/vmx.h       | 32 +------------------------
>>   5 files changed, 82 insertions(+), 34 deletions(-)
>>   create mode 100644 arch/x86/kvm/vmx/pmu_intel.h
>>
>> diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
>> index e8a3be0b9df9..df1f4ddfa72d 100644
>> --- a/arch/x86/kvm/vmx/pmu_intel.c
>> +++ b/arch/x86/kvm/vmx/pmu_intel.c
>> @@ -19,6 +19,7 @@
>>   #include "lapic.h"
>>   #include "nested.h"
>>   #include "pmu.h"
>> +#include "tdx.h"
>>   
>>   #define MSR_PMC_FULL_WIDTH_BIT      (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
>>   
>> @@ -40,6 +41,26 @@ static struct {
>>   /* mapping between fixed pmc index and intel_arch_events array */
>>   static int fixed_pmc_events[] = {1, 0, 7};
>>   
>> +struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
>> +{
>> +#ifdef CONFIG_INTEL_TDX_HOST
>> +	if (is_td_vcpu(vcpu))
>> +		return &to_tdx(vcpu)->lbr_desc;
>> +#endif
>> +
>> +	return &to_vmx(vcpu)->lbr_desc;
>> +}
>> +
>> +struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
>> +{
>> +#ifdef CONFIG_INTEL_TDX_HOST
>> +	if (is_td_vcpu(vcpu))
>> +		return &to_tdx(vcpu)->lbr_desc.records;
>> +#endif
>> +
>> +	return &to_vmx(vcpu)->lbr_desc.records;
>> +}
>> +
>>   static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data)
>>   {
>>   	struct kvm_pmc *pmc;
>> @@ -172,6 +193,23 @@ static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
>>   	return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
>>   }
>>   
>> +bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu)
>> +{
>> +	if (is_td_vcpu(vcpu))
>> +		return false;
>> +	return cpuid_model_is_consistent(vcpu);
>> +}
>> +
>> +bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
>> +{
>> +	struct x86_pmu_lbr *lbr = vcpu_to_lbr_records(vcpu);
>> +
>> +	if (is_td_vcpu(vcpu))
>> +		return false;
>> +
>> +	return lbr->nr && (vcpu_get_perf_capabilities(vcpu) & PMU_CAP_LBR_FMT);
>> +}
>> +
>>   static bool intel_pmu_is_valid_lbr_msr(struct kvm_vcpu *vcpu, u32 index)
>>   {
>>   	struct x86_pmu_lbr *records = vcpu_to_lbr_records(vcpu);
>> @@ -282,6 +320,9 @@ int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu)
>>   					PERF_SAMPLE_BRANCH_USER,
>>   	};
>>   
>> +	if (WARN_ON_ONCE(is_td_vcpu(vcpu)))
>> +		return 0;
>> +
>>   	if (unlikely(lbr_desc->event)) {
>>   		__set_bit(INTEL_PMC_IDX_FIXED_VLBR, pmu->pmc_in_use);
>>   		return 0;
>> @@ -591,7 +632,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
>>   		INTEL_PMC_MAX_GENERIC, pmu->nr_arch_fixed_counters);
>>   
>>   	perf_capabilities = vcpu_get_perf_capabilities(vcpu);
>> -	if (cpuid_model_is_consistent(vcpu) &&
>> +	if (intel_pmu_lbr_is_compatible(vcpu) &&
>>   	    (perf_capabilities & PMU_CAP_LBR_FMT))
>>   		x86_perf_get_lbr(&lbr_desc->records);
>>   	else
>> @@ -647,6 +688,9 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu)
>>   	struct kvm_pmc *pmc = NULL;
>>   	int i;
>>   
>> +	if (is_td_vcpu(vcpu))
>> +		return;
>> +
>>   	for (i = 0; i < KVM_INTEL_PMC_MAX_GENERIC; i++) {
>>   		pmc = &pmu->gp_counters[i];
>>   
>> diff --git a/arch/x86/kvm/vmx/pmu_intel.h b/arch/x86/kvm/vmx/pmu_intel.h
>> new file mode 100644
>> index 000000000000..66bba47c1269
>> --- /dev/null
>> +++ b/arch/x86/kvm/vmx/pmu_intel.h
>> @@ -0,0 +1,28 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +#ifndef __KVM_X86_VMX_PMU_INTEL_H
>> +#define  __KVM_X86_VMX_PMU_INTEL_H
>> +
>> +struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu);
>> +struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu);
>> +
>> +bool intel_pmu_lbr_is_compatible(struct kvm_vcpu *vcpu);
>> +bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu);
>> +int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
>> +
>> +struct lbr_desc {
>> +	/* Basic info about guest LBR records. */
>> +	struct x86_pmu_lbr records;
>> +
>> +	/*
>> +	 * Emulate LBR feature via passthrough LBR registers when the
>> +	 * per-vcpu guest LBR event is scheduled on the current pcpu.
>> +	 *
>> +	 * The records may be inaccurate if the host reclaims the LBR.
>> +	 */
>> +	struct perf_event *event;
>> +
>> +	/* True if LBRs are marked as not intercepted in the MSR bitmap */
>> +	bool msr_passthrough;
>> +};
>> +
>> +#endif /* __KVM_X86_VMX_PMU_INTEL_H */
>> diff --git a/arch/x86/kvm/vmx/tdx.h b/arch/x86/kvm/vmx/tdx.h
>> index 1e00e75b1c5e..5728820fed5e 100644
>> --- a/arch/x86/kvm/vmx/tdx.h
>> +++ b/arch/x86/kvm/vmx/tdx.h
>> @@ -4,6 +4,7 @@
>>   
>>   #ifdef CONFIG_INTEL_TDX_HOST
>>   
>> +#include "pmu_intel.h"
>>   #include "tdx_ops.h"
>>   
>>   struct kvm_tdx {
>> @@ -21,7 +22,12 @@ struct kvm_tdx {
>>   
>>   struct vcpu_tdx {
>>   	struct kvm_vcpu	vcpu;
>> -	/* TDX specific members follow. */
>> +
>> +	/*
>> +	 * Dummy to make pmu_intel not corrupt memory.
>> +	 * TODO: Support PMU for TDX.  Future work.
>> +	 */
>> +	struct lbr_desc lbr_desc;
>>   };
>>   
>>   static inline bool is_td(struct kvm *kvm)
>> diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
>> index d23830d92f61..f9e9fd7fde2c 100644
>> --- a/arch/x86/kvm/vmx/vmx.c
>> +++ b/arch/x86/kvm/vmx/vmx.c
>> @@ -2432,7 +2432,7 @@ int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
>>   			if ((data & PMU_CAP_LBR_FMT) !=
>>   			    (kvm_caps.supported_perf_cap & PMU_CAP_LBR_FMT))
>>   				return 1;
>> -			if (!cpuid_model_is_consistent(vcpu))
>> +			if (!intel_pmu_lbr_is_compatible(vcpu))
>>   				return 1;
>>   		}
>>   		if (data & PERF_CAP_PEBS_FORMAT) {
>> diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
>> index 2acdc54bc34b..1d15c3c2751b 100644
>> --- a/arch/x86/kvm/vmx/vmx.h
>> +++ b/arch/x86/kvm/vmx/vmx.h
>> @@ -11,6 +11,7 @@
>>   #include "capabilities.h"
>>   #include "../kvm_cache_regs.h"
>>   #include "posted_intr.h"
>> +#include "pmu_intel.h"
>>   #include "vmcs.h"
>>   #include "vmx_ops.h"
>>   #include "../cpuid.h"
>> @@ -105,22 +106,6 @@ static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
>>   	return pmu->version > 1;
>>   }
>>   
>> -struct lbr_desc {
>> -	/* Basic info about guest LBR records. */
>> -	struct x86_pmu_lbr records;
>> -
>> -	/*
>> -	 * Emulate LBR feature via passthrough LBR registers when the
>> -	 * per-vcpu guest LBR event is scheduled on the current pcpu.
>> -	 *
>> -	 * The records may be inaccurate if the host reclaims the LBR.
>> -	 */
>> -	struct perf_event *event;
>> -
>> -	/* True if LBRs are marked as not intercepted in the MSR bitmap */
>> -	bool msr_passthrough;
>> -};
>> -
>>   /*
>>    * The nested_vmx structure is part of vcpu_vmx, and holds information we need
>>    * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
>> @@ -650,21 +635,6 @@ static __always_inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
>>   	return container_of(vcpu, struct vcpu_vmx, vcpu);
>>   }
>>   
>> -static inline struct lbr_desc *vcpu_to_lbr_desc(struct kvm_vcpu *vcpu)
>> -{
>> -	return &to_vmx(vcpu)->lbr_desc;
>> -}
>> -
>> -static inline struct x86_pmu_lbr *vcpu_to_lbr_records(struct kvm_vcpu *vcpu)
>> -{
>> -	return &vcpu_to_lbr_desc(vcpu)->records;
>> -}
>> -
>> -static inline bool intel_pmu_lbr_is_enabled(struct kvm_vcpu *vcpu)
>> -{
>> -	return !!vcpu_to_lbr_records(vcpu)->nr;
>> -}
>> -
>>   void intel_pmu_cross_mapped_check(struct kvm_pmu *pmu);
>>   int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu);
>>   void vmx_passthrough_lbr_msrs(struct kvm_vcpu *vcpu);
> 
>
Re: [PATCH v13 021/113] KVM: TDX: Make pmu_intel.c ignore guest TD case
Posted by Isaku Yamahata 2 years, 8 months ago
On Wed, Apr 19, 2023 at 04:21:21PM +0800,
Like Xu <like.xu.linux@gmail.com> wrote:

> On 2/4/2023 4:50 pm, Zhi Wang wrote:
> > Hi Like:
> > 
> > Would you mind to take a look on this patch? It would be nice to have
> > a r-b also from you. :)
> > 
> > On Sun, 12 Mar 2023 10:55:45 -0700
> > isaku.yamahata@intel.com wrote:
> > 
> > > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > > 
> > > Because TDX KVM doesn't support PMU yet (it's future work of TDX KVM
> > > support as another patch series) and pmu_intel.c touches vmx specific
> 
> It would be nice to have pmu support for tdx-guest from the very beginning.

It's supported in the public github repo.
https://github.com/intel/tdx/tree/kvm-upstream-workaround
As this patch series has 100+ patches, I don't want to bloat this patch more.


> If you guys are more open on the tdx development model, I'd like to help on
> those features.

This mainling list is the place.  


> > > structure in vcpu initialization, as workaround add dummy structure to
> > > struct vcpu_tdx and pmu_intel.c can ignore TDX case.
> 
> If the target is not to provide a workaround, how about other variants:
> 	- struct lbr_desc lbr_desc;
> 	- pebs ds_buffer;
> ?
> 
> We also need tdx selftest to verify the unavailability of these features.
> Also, it would be great to have TDX's "System Profiling Mode" featue back in
> the specification.

I don't think it's productive. Once merging this patch series, we can move on
to TDX PMU support (or whatever still missing feature) as second (or later)
step.
-- 
Isaku Yamahata <isaku.yamahata@gmail.com>
Re: [PATCH v13 021/113] KVM: TDX: Make pmu_intel.c ignore guest TD case
Posted by Like Xu 2 years, 8 months ago
On 28/5/2023 4:26 pm, Isaku Yamahata wrote:
> On Wed, Apr 19, 2023 at 04:21:21PM +0800,
> Like Xu <like.xu.linux@gmail.com> wrote:
> 
>> On 2/4/2023 4:50 pm, Zhi Wang wrote:
>>> Hi Like:
>>>
>>> Would you mind to take a look on this patch? It would be nice to have
>>> a r-b also from you. :)
>>>
>>> On Sun, 12 Mar 2023 10:55:45 -0700
>>> isaku.yamahata@intel.com wrote:
>>>
>>>> From: Isaku Yamahata <isaku.yamahata@intel.com>
>>>>
>>>> Because TDX KVM doesn't support PMU yet (it's future work of TDX KVM
>>>> support as another patch series) and pmu_intel.c touches vmx specific
>>
>> It would be nice to have pmu support for tdx-guest from the very beginning.
> 
> It's supported in the public github repo.
> https://github.com/intel/tdx/tree/kvm-upstream-workaround
> As this patch series has 100+ patches, I don't want to bloat this patch more.

I presume we are talking about 873e2391e729...63761adbf5aa for TD pmu:

A quick glance brought me at least these comments:

(1) how does intel_pmu_save/restore() handle the enabled host LBR/PEBS ?
(2) guest PMI injection may be malicious and could the current guest pmu driver 
handle it ?
(3) how do we handle the case when host counters can be enabled before TDENTER
for debuggable TD and support the case like "perf-kvm for both guest and host" ?

My point is actually, changes to perf/core should be CC to the perf reviewers
as early as possible to prevent key player from killing the direction.

> 
> 
>> If you guys are more open on the tdx development model, I'd like to help on
>> those features.
> 
> This mainling list is the place.

Yeah, plus the PUCK and LPC KVM micro-conference.

> 
> 
>>>> structure in vcpu initialization, as workaround add dummy structure to
>>>> struct vcpu_tdx and pmu_intel.c can ignore TDX case.
>>
>> If the target is not to provide a workaround, how about other variants:
>> 	- struct lbr_desc lbr_desc;
>> 	- pebs ds_buffer;
>> ?
>>
>> We also need tdx selftest to verify the unavailability of these features.
>> Also, it would be great to have TDX's "System Profiling Mode" featue back in
>> the specification.

Detailed TD (plus debuggable) PMU selftest would clearly speed up the review 
process.

> 
> I don't think it's productive. Once merging this patch series, we can move on
> to TDX PMU support (or whatever still missing feature) as second (or later)
> step.

I totally agree that TD PMU (plus debuggable) support should not be the first 
part of the
code to be merged in, but the related discussion should be kicked off on day 0.
As a scenario for users using Intel TDX, it's expected to support "System 
Profiling Mode",
which was first introduced in 344425-001US but disappeared since version 003.
Re: [PATCH v13 021/113] KVM: TDX: Make pmu_intel.c ignore guest TD case
Posted by Isaku Yamahata 2 years, 8 months ago
On Mon, May 29, 2023 at 10:19:16PM +0800,
Like Xu <like.xu.linux@gmail.com> wrote:

> On 28/5/2023 4:26 pm, Isaku Yamahata wrote:
> > On Wed, Apr 19, 2023 at 04:21:21PM +0800,
> > Like Xu <like.xu.linux@gmail.com> wrote:
> > 
> > > On 2/4/2023 4:50 pm, Zhi Wang wrote:
> > > > Hi Like:
> > > > 
> > > > Would you mind to take a look on this patch? It would be nice to have
> > > > a r-b also from you. :)
> > > > 
> > > > On Sun, 12 Mar 2023 10:55:45 -0700
> > > > isaku.yamahata@intel.com wrote:
> > > > 
> > > > > From: Isaku Yamahata <isaku.yamahata@intel.com>
> > > > > 
> > > > > Because TDX KVM doesn't support PMU yet (it's future work of TDX KVM
> > > > > support as another patch series) and pmu_intel.c touches vmx specific
> > > 
> > > It would be nice to have pmu support for tdx-guest from the very beginning.
> > 
> > It's supported in the public github repo.
> > https://github.com/intel/tdx/tree/kvm-upstream-workaround
> > As this patch series has 100+ patches, I don't want to bloat this patch more.
> 
> I presume we are talking about 873e2391e729...63761adbf5aa for TD pmu:
> 
> A quick glance brought me at least these comments:
> 
> (1) how does intel_pmu_save/restore() handle the enabled host LBR/PEBS ?

It's not handled yet. We need to save/restore those MSRs.


> (2) guest PMI injection may be malicious and could the current guest pmu
> driver handle it ?

This isn't specific to PMI.  Malicious VMM can inject any interrupt to the
guest at any time.  Guest should be prepared for it.


> (3) how do we handle the case when host counters can be enabled before TDENTER
> for debuggable TD and support the case like "perf-kvm for both guest and host" ?

On TDEXIT, those are disabled. VMM has to restore MSRs and enable it again.
There is a window where events can be missed.


> My point is actually, changes to perf/core should be CC to the perf reviewers
> as early as possible to prevent key player from killing the direction.

Sure, agreed.


> > > > > structure in vcpu initialization, as workaround add dummy structure to
> > > > > struct vcpu_tdx and pmu_intel.c can ignore TDX case.
> > > 
> > > If the target is not to provide a workaround, how about other variants:
> > > 	- struct lbr_desc lbr_desc;
> > > 	- pebs ds_buffer;
> > > ?
> > > 
> > > We also need tdx selftest to verify the unavailability of these features.
> > > Also, it would be great to have TDX's "System Profiling Mode" featue back in
> > > the specification.
> 
> Detailed TD (plus debuggable) PMU selftest would clearly speed up the review
> process.

The existing KVM PMU selftest can be utilized. Or do you have something else in
mind?

-- 
Isaku Yamahata <isaku.yamahata@gmail.com>