Move the existing scattered performance monitoring related feature bits
from CPUID leaf 0x80000022[EAX] into a dedicated word since additional
bits will be defined from the same leaf in the future. This includes
X86_FEATURE_PERFMON_V2 and X86_FEATURE_AMD_LBR_V2.
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
---
arch/x86/include/asm/cpufeature.h | 7 +++++--
arch/x86/include/asm/cpufeatures.h | 10 +++++++---
arch/x86/include/asm/disabled-features.h | 3 ++-
arch/x86/include/asm/required-features.h | 3 ++-
arch/x86/kernel/cpu/common.c | 3 +++
arch/x86/kernel/cpu/scattered.c | 2 --
arch/x86/kvm/cpuid.c | 5 +----
arch/x86/kvm/reverse_cpuid.h | 1 -
8 files changed, 20 insertions(+), 14 deletions(-)
diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index a1273698fc43..68dd27d60748 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -33,6 +33,7 @@ enum cpuid_leafs
CPUID_7_EDX,
CPUID_8000_001F_EAX,
CPUID_8000_0021_EAX,
+ CPUID_8000_0022_EAX,
};
#define X86_CAP_FMT_NUM "%d:%d"
@@ -91,8 +92,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \
REQUIRED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define DISABLED_MASK_BIT_SET(feature_bit) \
( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \
@@ -116,8 +118,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32];
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \
CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \
+ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \
DISABLED_MASK_CHECK || \
- BUILD_BUG_ON_ZERO(NCAPINTS != 21))
+ BUILD_BUG_ON_ZERO(NCAPINTS != 22))
#define cpu_has(c, bit) \
(__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index f0337f7bcf16..028f333dc530 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -13,7 +13,7 @@
/*
* Defines x86 CPU feature bits
*/
-#define NCAPINTS 21 /* N 32-bit words worth of info */
+#define NCAPINTS 22 /* N 32-bit words worth of info */
#define NBUGINTS 2 /* N 32-bit bug flags */
/*
@@ -94,7 +94,7 @@
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
+/* FREE! ( 3*32+17) */
#define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
@@ -209,7 +209,7 @@
#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */
+/* FREE! ( 7*32+20) */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
@@ -459,6 +459,10 @@
#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
+/* AMD-defined performance monitoring features, CPUID level 0x80000022 (EAX), word 21 */
+#define X86_FEATURE_PERFMON_V2 (21*32+ 0) /* AMD Performance Monitoring Version 2 */
+#define X86_FEATURE_AMD_LBR_V2 (21*32+ 1) /* AMD Last Branch Record Extension Version 2 */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h
index da4054fbf533..c492bdc97b05 100644
--- a/arch/x86/include/asm/disabled-features.h
+++ b/arch/x86/include/asm/disabled-features.h
@@ -155,6 +155,7 @@
#define DISABLED_MASK18 (DISABLE_IBT)
#define DISABLED_MASK19 (DISABLE_SEV_SNP)
#define DISABLED_MASK20 0
-#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define DISABLED_MASK21 0
+#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_DISABLED_FEATURES_H */
diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h
index 7ba1726b71c7..e9187ddd3d1f 100644
--- a/arch/x86/include/asm/required-features.h
+++ b/arch/x86/include/asm/required-features.h
@@ -99,6 +99,7 @@
#define REQUIRED_MASK18 0
#define REQUIRED_MASK19 0
#define REQUIRED_MASK20 0
-#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21)
+#define REQUIRED_MASK21 0
+#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22)
#endif /* _ASM_X86_REQUIRED_FEATURES_H */
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index ba8cf5e9ce56..771e32a517f5 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1039,6 +1039,9 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
if (c->extended_cpuid_level >= 0x80000021)
c->x86_capability[CPUID_8000_0021_EAX] = cpuid_eax(0x80000021);
+ if (c->extended_cpuid_level >= 0x80000022)
+ c->x86_capability[CPUID_8000_0022_EAX] = cpuid_eax(0x80000022);
+
init_scattered_cpuid_features(c);
init_speculation_control(c);
diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c
index 0dad49a09b7a..b5b1ce0bd493 100644
--- a/arch/x86/kernel/cpu/scattered.c
+++ b/arch/x86/kernel/cpu/scattered.c
@@ -47,8 +47,6 @@ static const struct cpuid_bit cpuid_bits[] = {
{ X86_FEATURE_MBA, CPUID_EBX, 6, 0x80000008, 0 },
{ X86_FEATURE_SMBA, CPUID_EBX, 2, 0x80000020, 0 },
{ X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 },
- { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 },
- { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 },
{ 0, 0, 0, 0, 0 }
};
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index adba49afb5fe..f9bcbb0626bd 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -773,10 +773,7 @@ void kvm_set_cpu_caps(void)
kvm_cpu_cap_check_and_set(X86_FEATURE_SBPB);
kvm_cpu_cap_check_and_set(X86_FEATURE_IBPB_BRTYPE);
kvm_cpu_cap_check_and_set(X86_FEATURE_SRSO_NO);
-
- kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX,
- F(PERFMON_V2)
- );
+ kvm_cpu_cap_check_and_set(X86_FEATURE_PERFMON_V2);
/*
* Synthesize "LFENCE is serializing" into the AMD-defined entry in
diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h
index aadefcaa9561..c69d05cedf42 100644
--- a/arch/x86/kvm/reverse_cpuid.h
+++ b/arch/x86/kvm/reverse_cpuid.h
@@ -15,7 +15,6 @@ enum kvm_only_cpuid_leafs {
CPUID_12_EAX = NCAPINTS,
CPUID_7_1_EDX,
CPUID_8000_0007_EDX,
- CPUID_8000_0022_EAX,
CPUID_7_2_EDX,
NR_KVM_CPU_CAPS,
--
2.34.1
* Sandipan Das <sandipan.das@amd.com> wrote: > Move the existing scattered performance monitoring related feature bits > from CPUID leaf 0x80000022[EAX] into a dedicated word since additional > bits will be defined from the same leaf in the future. This includes > X86_FEATURE_PERFMON_V2 and X86_FEATURE_AMD_LBR_V2. > > Signed-off-by: Sandipan Das <sandipan.das@amd.com> > --- > arch/x86/include/asm/cpufeature.h | 7 +++++-- > arch/x86/include/asm/cpufeatures.h | 10 +++++++--- > arch/x86/include/asm/disabled-features.h | 3 ++- > arch/x86/include/asm/required-features.h | 3 ++- > arch/x86/kernel/cpu/common.c | 3 +++ > arch/x86/kernel/cpu/scattered.c | 2 -- > arch/x86/kvm/cpuid.c | 5 +---- > arch/x86/kvm/reverse_cpuid.h | 1 - > 8 files changed, 20 insertions(+), 14 deletions(-) > > diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h > index a1273698fc43..68dd27d60748 100644 > --- a/arch/x86/include/asm/cpufeature.h > +++ b/arch/x86/include/asm/cpufeature.h > @@ -33,6 +33,7 @@ enum cpuid_leafs > CPUID_7_EDX, > CPUID_8000_001F_EAX, > CPUID_8000_0021_EAX, > + CPUID_8000_0022_EAX, > #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */ > #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */ > #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */ > -#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */ > +/* FREE! ( 3*32+17) */ > #define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */ > #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ > #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ > @@ -209,7 +209,7 @@ > #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ > #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ > #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ > -#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */ > +/* FREE! ( 7*32+20) */ > #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ > #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ > #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ > @@ -459,6 +459,10 @@ > #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ > #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ > > +/* AMD-defined performance monitoring features, CPUID level 0x80000022 (EAX), word 21 */ > +#define X86_FEATURE_PERFMON_V2 (21*32+ 0) /* AMD Performance Monitoring Version 2 */ > +#define X86_FEATURE_AMD_LBR_V2 (21*32+ 1) /* AMD Last Branch Record Extension Version 2 */ Thank you! I presume you tested both patches on the relevant system with the X86_FEATURE_AMD_LBR_PMC_FREEZE bug? Thanks, Ingo
* Ingo Molnar <mingo@kernel.org> wrote: > > +/* AMD-defined performance monitoring features, CPUID level 0x80000022 (EAX), word 21 */ > > +#define X86_FEATURE_PERFMON_V2 (21*32+ 0) /* AMD Performance Monitoring Version 2 */ > > +#define X86_FEATURE_AMD_LBR_V2 (21*32+ 1) /* AMD Last Branch Record Extension Version 2 */ > > Thank you! I presume you tested both patches on the relevant system > with the X86_FEATURE_AMD_LBR_PMC_FREEZE bug? Also, Boris reminds me that we really want to do what I suggested originally and use a new synthethic word here, not a new vendor word, to keep all the bits better compressed in cpuinfo.x86_capability[] - so one more iteration will be needed. Thanks, Ingo
On 3/19/2024 3:47 PM, Ingo Molnar wrote:
>
> * Sandipan Das <sandipan.das@amd.com> wrote:
>
>> Move the existing scattered performance monitoring related feature bits
>> from CPUID leaf 0x80000022[EAX] into a dedicated word since additional
>> bits will be defined from the same leaf in the future. This includes
>> X86_FEATURE_PERFMON_V2 and X86_FEATURE_AMD_LBR_V2.
>>
>> Signed-off-by: Sandipan Das <sandipan.das@amd.com>
>> ---
>> arch/x86/include/asm/cpufeature.h | 7 +++++--
>> arch/x86/include/asm/cpufeatures.h | 10 +++++++---
>> arch/x86/include/asm/disabled-features.h | 3 ++-
>> arch/x86/include/asm/required-features.h | 3 ++-
>> arch/x86/kernel/cpu/common.c | 3 +++
>> arch/x86/kernel/cpu/scattered.c | 2 --
>> arch/x86/kvm/cpuid.c | 5 +----
>> arch/x86/kvm/reverse_cpuid.h | 1 -
>> 8 files changed, 20 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
>> index a1273698fc43..68dd27d60748 100644
>> --- a/arch/x86/include/asm/cpufeature.h
>> +++ b/arch/x86/include/asm/cpufeature.h
>> @@ -33,6 +33,7 @@ enum cpuid_leafs
>> CPUID_7_EDX,
>> CPUID_8000_001F_EAX,
>> CPUID_8000_0021_EAX,
>> + CPUID_8000_0022_EAX,
>
>> #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
>> #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
>> #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
>> -#define X86_FEATURE_AMD_LBR_V2 ( 3*32+17) /* AMD Last Branch Record Extension Version 2 */
>> +/* FREE! ( 3*32+17) */
>> #define X86_FEATURE_CLEAR_CPU_BUF ( 3*32+18) /* "" Clear CPU buffers using VERW */
>> #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
>> #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
>> @@ -209,7 +209,7 @@
>> #define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
>> #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
>> #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
>> -#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */
>> +/* FREE! ( 7*32+20) */
>> #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
>> #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
>> #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
>> @@ -459,6 +459,10 @@
>> #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
>> #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */
>>
>> +/* AMD-defined performance monitoring features, CPUID level 0x80000022 (EAX), word 21 */
>> +#define X86_FEATURE_PERFMON_V2 (21*32+ 0) /* AMD Performance Monitoring Version 2 */
>> +#define X86_FEATURE_AMD_LBR_V2 (21*32+ 1) /* AMD Last Branch Record Extension Version 2 */
>
> Thank you! I presume you tested both patches on the relevant system
> with the X86_FEATURE_AMD_LBR_PMC_FREEZE bug?
>
Yes, I tested them on systems which don't support freeze.
When kernel branches are captured on such systems, the records mostly
point to amd_pmu_lbr_read() and native_read_msr() which are called to
read the branch record MSRs. This is the expected result since LBR
does not stop recording branches after a PMC overflow.
E.g.
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 190K of event 'ex_ret_brn_tkn'
# Event count (approx.): 190144
#
# Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles
# ........ ....... .................... ........................... ........................... ..................
#
24.98% branchy [kernel.kallsyms] [k] amd_pmu_lbr_read [k] amd_pmu_lbr_read -
12.49% branchy [kernel.kallsyms] [k] amd_pmu_lbr_read [k] native_read_msr -
12.49% branchy [kernel.kallsyms] [k] native_read_msr [k] native_read_msr -
12.49% branchy [kernel.kallsyms] [k] srso_alias_safe_ret [k] amd_pmu_lbr_read -
12.49% branchy [kernel.kallsyms] [k] srso_alias_safe_ret [k] srso_alias_safe_ret -
12.49% branchy [kernel.kallsyms] [k] srso_alias_return_thunk [k] srso_alias_return_thunk -
6.25% branchy [kernel.kallsyms] [k] native_read_msr [k] srso_alias_return_thunk -
6.25% branchy [kernel.kallsyms] [k] srso_alias_return_thunk [k] srso_alias_safe_ret -
0.02% perf-ex [kernel.kallsyms] [k] amd_pmu_lbr_read [k] amd_pmu_lbr_read -
0.01% perf-ex [kernel.kallsyms] [k] amd_pmu_lbr_read [k] native_read_msr -
0.01% perf-ex [kernel.kallsyms] [k] native_read_msr [k] native_read_msr -
0.01% perf-ex [kernel.kallsyms] [k] srso_alias_safe_ret [k] amd_pmu_lbr_read -
0.01% perf-ex [kernel.kallsyms] [k] srso_alias_safe_ret [k] srso_alias_safe_ret -
0.01% perf-ex [kernel.kallsyms] [k] srso_alias_return_thunk [k] srso_alias_return_thunk -
0.00% perf-ex [kernel.kallsyms] [k] native_read_msr [k] srso_alias_return_thunk -
0.00% perf-ex [kernel.kallsyms] [k] srso_alias_return_thunk [k] srso_alias_safe_ret -
© 2016 - 2026 Red Hat, Inc.