Linux remembers cpu_cachinfo::num_leaves per CPU, but x86 initializes all
CPUs from the same global "num_cache_leaves".
This is erroneous on systems such as Meteor Lake, where each CPU has a
distinct num_leaves value. Delete the global "num_cache_leaves" and
initialize num_leaves on each CPU.
Reviewed-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
Cc: Andreas Herrmann <aherrmann@suse.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Chen Yu <yu.c.chen@intel.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Len Brown <len.brown@intel.com>
Cc: Nikolay Borisov <nik.borisov@suse.com>
Cc: Radu Rendec <rrendec@redhat.com>
Cc: Pierre Gondois <Pierre.Gondois@arm.com>
Cc: Pu Wen <puwen@hygon.cn>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Sudeep Holla <sudeep.holla@arm.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Will Deacon <will@kernel.org>
Cc: Zhang Rui <rui.zhang@intel.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: stable@vger.kernel.org # 6.3+
---
After this change, all CPUs will traverse CPUID leaf 0x4 when booted for
the first time. On systems with symmetric cache topologies this is
useless work.
Creating a list of processor models that have asymmetric cache topologies
was considered. The burden of maintaining such list would outweigh the
performance benefit of skipping this extra step.
---
Changes since v4:
* None
Changes since v3:
* Rebased on v6.7-rc5.
Changes since v2:
* None
Changes since v1:
* Do not make num_cache_leaves a per-CPU variable. Instead, reuse the
existing per-CPU ci_cpu_cacheinfo variable. (Dave Hansen)
---
arch/x86/kernel/cpu/cacheinfo.c | 44 +++++++++++++++++++--------------
1 file changed, 26 insertions(+), 18 deletions(-)
diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 392d09c936d6..b5e216677a46 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -178,7 +178,16 @@ struct _cpuid4_info_regs {
struct amd_northbridge *nb;
};
-static unsigned short num_cache_leaves;
+static inline unsigned int get_num_cache_leaves(unsigned int cpu)
+{
+ return get_cpu_cacheinfo(cpu)->num_leaves;
+}
+
+static inline void
+set_num_cache_leaves(unsigned int nr_leaves, unsigned int cpu)
+{
+ get_cpu_cacheinfo(cpu)->num_leaves = nr_leaves;
+}
/* AMD doesn't have CPUID4. Emulate it here to report the same
information to the user. This makes some assumptions about the machine:
@@ -718,19 +727,21 @@ void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
+ unsigned int cpu = c->cpu_index;
+
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
- num_cache_leaves = find_num_cache_leaves(c);
+ set_num_cache_leaves(find_num_cache_leaves(c), cpu);
} else if (c->extended_cpuid_level >= 0x80000006) {
if (cpuid_edx(0x80000006) & 0xf000)
- num_cache_leaves = 4;
+ set_num_cache_leaves(4, cpu);
else
- num_cache_leaves = 3;
+ set_num_cache_leaves(3, cpu);
}
}
void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
{
- num_cache_leaves = find_num_cache_leaves(c);
+ set_num_cache_leaves(find_num_cache_leaves(c), c->cpu_index);
}
void init_intel_cacheinfo(struct cpuinfo_x86 *c)
@@ -742,19 +753,19 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
if (c->cpuid_level > 3) {
- static int is_initialized;
-
- if (is_initialized == 0) {
- /* Init num_cache_leaves from boot CPU */
- num_cache_leaves = find_num_cache_leaves(c);
- is_initialized++;
- }
+ /*
+ * There should be at least one leaf. A non-zero value means
+ * that the number of leaves has been initialized.
+ */
+ if (!get_num_cache_leaves(c->cpu_index))
+ set_num_cache_leaves(find_num_cache_leaves(c),
+ c->cpu_index);
/*
* Whenever possible use cpuid(4), deterministic cache
* parameters cpuid leaf to find the cache details
*/
- for (i = 0; i < num_cache_leaves; i++) {
+ for (i = 0; i < get_num_cache_leaves(c->cpu_index); i++) {
struct _cpuid4_info_regs this_leaf = {};
int retval;
@@ -790,14 +801,14 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
* Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
* trace cache
*/
- if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
+ if ((!get_num_cache_leaves(c->cpu_index) || c->x86 == 15) && c->cpuid_level > 1) {
/* supports eax=2 call */
int j, n;
unsigned int regs[4];
unsigned char *dp = (unsigned char *)regs;
int only_trace = 0;
- if (num_cache_leaves != 0 && c->x86 == 15)
+ if (get_num_cache_leaves(c->cpu_index) && c->x86 == 15)
only_trace = 1;
/* Number of times to iterate */
@@ -993,12 +1004,9 @@ int init_cache_level(unsigned int cpu)
{
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
- if (!num_cache_leaves)
- return -ENOENT;
if (!this_cpu_ci)
return -EINVAL;
this_cpu_ci->num_levels = 3;
- this_cpu_ci->num_leaves = num_cache_leaves;
return 0;
}
--
2.34.1
On Mon, Aug 26, 2024 at 10:16:34PM -0700, Ricardo Neri wrote: > Linux remembers cpu_cachinfo::num_leaves per CPU, but x86 initializes all > CPUs from the same global "num_cache_leaves". > > This is erroneous on systems such as Meteor Lake, where each CPU has a > distinct num_leaves value. Delete the global "num_cache_leaves" and > initialize num_leaves on each CPU. > > Reviewed-by: Len Brown <len.brown@intel.com> > Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com> Reviewed-by: Andreas Herrmann <aherrmann@suse.de> Tested-by: Andreas Herrmann <aherrmann@suse.de> Test was done with a system equipped with AMD Phenom II X6 1055T and test kernels based on v6.11-rc5-176-g20371ba12063. Thanks, Andreas
On 27.08.24 г. 8:16 ч., Ricardo Neri wrote:
> Linux remembers cpu_cachinfo::num_leaves per CPU, but x86 initializes all
> CPUs from the same global "num_cache_leaves".
>
> This is erroneous on systems such as Meteor Lake, where each CPU has a
> distinct num_leaves value. Delete the global "num_cache_leaves" and
> initialize num_leaves on each CPU.
>
> Reviewed-by: Len Brown <len.brown@intel.com>
> Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> ---
> Cc: Andreas Herrmann <aherrmann@suse.com>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: Chen Yu <yu.c.chen@intel.com>
> Cc: Huang Ying <ying.huang@intel.com>
> Cc: Len Brown <len.brown@intel.com>
> Cc: Nikolay Borisov <nik.borisov@suse.com>
> Cc: Radu Rendec <rrendec@redhat.com>
> Cc: Pierre Gondois <Pierre.Gondois@arm.com>
> Cc: Pu Wen <puwen@hygon.cn>
> Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
> Cc: Sudeep Holla <sudeep.holla@arm.com>
> Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> Cc: Will Deacon <will@kernel.org>
> Cc: Zhang Rui <rui.zhang@intel.com>
> Cc: linux-arm-kernel@lists.infradead.org
> Cc: stable@vger.kernel.org # 6.3+
> ---
> After this change, all CPUs will traverse CPUID leaf 0x4 when booted for
> the first time. On systems with symmetric cache topologies this is
> useless work.
>
> Creating a list of processor models that have asymmetric cache topologies
> was considered. The burden of maintaining such list would outweigh the
> performance benefit of skipping this extra step.
> ---
> Changes since v4:
> * None
>
> Changes since v3:
> * Rebased on v6.7-rc5.
>
> Changes since v2:
> * None
>
> Changes since v1:
> * Do not make num_cache_leaves a per-CPU variable. Instead, reuse the
> existing per-CPU ci_cpu_cacheinfo variable. (Dave Hansen)
> ---
Overall LGTM, one minor nit below which is not a deal breaker.
Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
> arch/x86/kernel/cpu/cacheinfo.c | 44 +++++++++++++++++++--------------
> 1 file changed, 26 insertions(+), 18 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
> index 392d09c936d6..b5e216677a46 100644
> --- a/arch/x86/kernel/cpu/cacheinfo.c
> +++ b/arch/x86/kernel/cpu/cacheinfo.c
> @@ -178,7 +178,16 @@ struct _cpuid4_info_regs {
> struct amd_northbridge *nb;
> };
>
> -static unsigned short num_cache_leaves;
> +static inline unsigned int get_num_cache_leaves(unsigned int cpu)
> +{
> + return get_cpu_cacheinfo(cpu)->num_leaves;
> +}
> +
> +static inline void
> +set_num_cache_leaves(unsigned int nr_leaves, unsigned int cpu)
> +{
nit: I think it's more natural to have the cpu parameter come first.
> + get_cpu_cacheinfo(cpu)->num_leaves = nr_leaves;
> +}
>
> /* AMD doesn't have CPUID4. Emulate it here to report the same
> information to the user. This makes some assumptions about the machine:
<snip>
On Wed, Aug 28, 2024 at 03:57:05PM +0300, Nikolay Borisov wrote:
>
>
> On 27.08.24 г. 8:16 ч., Ricardo Neri wrote:
> > Linux remembers cpu_cachinfo::num_leaves per CPU, but x86 initializes all
> > CPUs from the same global "num_cache_leaves".
> >
> > This is erroneous on systems such as Meteor Lake, where each CPU has a
> > distinct num_leaves value. Delete the global "num_cache_leaves" and
> > initialize num_leaves on each CPU.
> >
> > Reviewed-by: Len Brown <len.brown@intel.com>
> > Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
> > ---
> > Cc: Andreas Herrmann <aherrmann@suse.com>
> > Cc: Catalin Marinas <catalin.marinas@arm.com>
> > Cc: Chen Yu <yu.c.chen@intel.com>
> > Cc: Huang Ying <ying.huang@intel.com>
> > Cc: Len Brown <len.brown@intel.com>
> > Cc: Nikolay Borisov <nik.borisov@suse.com>
> > Cc: Radu Rendec <rrendec@redhat.com>
> > Cc: Pierre Gondois <Pierre.Gondois@arm.com>
> > Cc: Pu Wen <puwen@hygon.cn>
> > Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
> > Cc: Sudeep Holla <sudeep.holla@arm.com>
> > Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
> > Cc: Will Deacon <will@kernel.org>
> > Cc: Zhang Rui <rui.zhang@intel.com>
> > Cc: linux-arm-kernel@lists.infradead.org
> > Cc: stable@vger.kernel.org # 6.3+
> > ---
> > After this change, all CPUs will traverse CPUID leaf 0x4 when booted for
> > the first time. On systems with symmetric cache topologies this is
> > useless work.
> >
> > Creating a list of processor models that have asymmetric cache topologies
> > was considered. The burden of maintaining such list would outweigh the
> > performance benefit of skipping this extra step.
> > ---
> > Changes since v4:
> > * None
> >
> > Changes since v3:
> > * Rebased on v6.7-rc5.
> >
> > Changes since v2:
> > * None
> >
> > Changes since v1:
> > * Do not make num_cache_leaves a per-CPU variable. Instead, reuse the
> > existing per-CPU ci_cpu_cacheinfo variable. (Dave Hansen)
> > ---
>
>
> Overall LGTM, one minor nit below which is not a deal breaker.
>
>
> Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
Thank you!
>
> > arch/x86/kernel/cpu/cacheinfo.c | 44 +++++++++++++++++++--------------
> > 1 file changed, 26 insertions(+), 18 deletions(-)
> >
> > diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
> > index 392d09c936d6..b5e216677a46 100644
> > --- a/arch/x86/kernel/cpu/cacheinfo.c
> > +++ b/arch/x86/kernel/cpu/cacheinfo.c
> > @@ -178,7 +178,16 @@ struct _cpuid4_info_regs {
> > struct amd_northbridge *nb;
> > };
> > -static unsigned short num_cache_leaves;
> > +static inline unsigned int get_num_cache_leaves(unsigned int cpu)
> > +{
> > + return get_cpu_cacheinfo(cpu)->num_leaves;
> > +}
> > +
> > +static inline void
> > +set_num_cache_leaves(unsigned int nr_leaves, unsigned int cpu)
> > +{
>
> nit: I think it's more natural to have the cpu parameter come first.
Sure! I will wait a few days for more feedback. Then I will post an
updated version with this change.
Best,
Ricardo
© 2016 - 2025 Red Hat, Inc.