[PATCH RFC] x86/cacheinfo: Remove get_cache_id() due to ID collisions

Ricardo Neri posted 1 patch 2 weeks, 3 days ago
arch/x86/kernel/cpu/cacheinfo.c | 34 ++++++++--------------------------
1 file changed, 8 insertions(+), 26 deletions(-)
[PATCH RFC] x86/cacheinfo: Remove get_cache_id() due to ID collisions
Posted by Ricardo Neri 2 weeks, 3 days ago
calc_cache_topo_id() and get_cache_id() both compute cache IDs used to
determine which CPUs share a cache. Intel processors use
calc_cache_topo_id(), while Hygon and recent AMD processors use
get_cache_id(). Intel processors use get_cache_id() as well, but only to
populate /sys/devices/system/cpu/cpuN/cache/index*/id. Besides duplicating
logic, get_cache_id() returns incorrect and colliding IDs.

calc_cache_topo_id() masks off the least-significant N bits of an APIC ID.
N is the smallest integer such that 2^N is greater than or equal to the
number of logical CPUs sharing a cache as reported in CPUID.4.<level>.
EAX[25:14]. This produces unique IDs for all CPUs that share a cache.

get_cache_id() instead right-shifts the APIC ID by N. This only works when
N is consistent across CPUs. On systems with heterogeneous cores (e.g.,
Intel hybrid processors), different cores report different numbers of
logical CPUs sharing a cache. This results in inconsistent shifts and,
consequently, cache ID collisions between CPUs that do not actually share
the cache.

Consider the APIC IDs and CPUID leaf 4 data for the L1 cache of selected
CPUs from a Meteor Lake-based Lenovo ThinkPad T16 Gen 3:

    CPU  APICID num_threads_sharing calc_cache_topo_id() get_cache_id()
     2    0x18           2                 0x18              0xc
     3    0x19           2                 0x18              0xc
    10     0xc           1                  0xc              0xc

CPUs 2 and 3 are the two logical CPUs of one core. They share the L1 cache.
CPU10 does not, yet get_cache_id() assigns it the same ID, illustrating
the collision.

While masking and shifting are equivalent when N is identical across CPUs,
using a shift makes the computation dependent on per-CPU topology and
therefore unsafe.

Switch all users to calc_cache_topo_id() and remove get_cache_id(). The
values exposed in /sys/devices/system/cpu/cpuN/cache/index*/id will change,
but they will remain unique and continue to reflect shared caches
correctly.

Reported-by: Len Brown <len.brown@intel.com>
Tested-by: Len Brown <len.brown@intel.com>
Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
---
Hi Prateek, Ahmed,

I would like to get your feedback on this patch before involving the x86
maintainers. I want to ensure that I don't break Hygon or AMD systems or
miss any subtle detail.

I tested this patch on Genoa and Turin systems, both of which used
get_cache_id() to compute L3 cache IDs. The IDs changed but the rest of
the files under /sys/devices/system/cpu/cpuN/cache/index* remain the
the same.
---
 arch/x86/kernel/cpu/cacheinfo.c | 34 ++++++++--------------------------
 1 file changed, 8 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 51a95b07831f..56194ef55d8e 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -289,20 +289,14 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
 	return i;
 }
 
-/*
- * The max shared threads number comes from CPUID(0x4) EAX[25-14] with input
- * ECX as cache index. Then right shift apicid by the number's order to get
- * cache id for this cache node.
- */
-static unsigned int get_cache_id(u32 apicid, const struct _cpuid4_info *id4)
+static unsigned int calc_cache_topo_id(struct cpuinfo_x86 *c, const struct _cpuid4_info *id4)
 {
-	unsigned long num_threads_sharing;
+	unsigned int num_threads_sharing;
 	int index_msb;
 
 	num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
 	index_msb = get_count_order(num_threads_sharing);
-
-	return apicid >> index_msb;
+	return c->topo.apicid & ~((1 << index_msb) - 1);
 }
 
 /*
@@ -332,7 +326,7 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id)
 		struct _cpuid4_info id4 = {};
 
 		if (!amd_fill_cpuid4_info(llc_index, &id4))
-			c->topo.llc_id = get_cache_id(c->topo.apicid, &id4);
+			c->topo.llc_id = calc_cache_topo_id(c, &id4);
 	}
 }
 
@@ -410,16 +404,6 @@ static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c)
 	intel_cacheinfo_done(c, l3, l2, l1i, l1d);
 }
 
-static unsigned int calc_cache_topo_id(struct cpuinfo_x86 *c, const struct _cpuid4_info *id4)
-{
-	unsigned int num_threads_sharing;
-	int index_msb;
-
-	num_threads_sharing = 1 + id4->eax.split.num_threads_sharing;
-	index_msb = get_count_order(num_threads_sharing);
-	return c->topo.apicid & ~((1 << index_msb) - 1);
-}
-
 static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c)
 {
 	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
@@ -551,7 +535,7 @@ static void __cache_cpumap_setup(unsigned int cpu, int index,
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 	struct cacheinfo *ci, *sibling_ci;
 	unsigned long num_threads_sharing;
-	int index_msb, i;
+	int i;
 
 	if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) {
 		if (__cache_amd_cpumap_setup(cpu, index, id4))
@@ -565,10 +549,9 @@ static void __cache_cpumap_setup(unsigned int cpu, int index,
 	if (num_threads_sharing == 1)
 		return;
 
-	index_msb = get_count_order(num_threads_sharing);
-
 	for_each_online_cpu(i)
-		if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) {
+		/* Caller calculated the cache ID of @cpu */
+		if (id4->id == calc_cache_topo_id(&cpu_data(i), id4)) {
 			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
 
 			/* Skip if itself or no cacheinfo */
@@ -612,7 +595,6 @@ int populate_cache_leaves(unsigned int cpu)
 	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 	struct cacheinfo *ci = this_cpu_ci->info_list;
 	u8 cpu_vendor = boot_cpu_data.x86_vendor;
-	u32 apicid = cpu_data(cpu).topo.apicid;
 	struct amd_northbridge *nb = NULL;
 	struct _cpuid4_info id4 = {};
 	int idx, ret;
@@ -622,7 +604,7 @@ int populate_cache_leaves(unsigned int cpu)
 		if (ret)
 			return ret;
 
-		id4.id = get_cache_id(apicid, &id4);
+		id4.id = calc_cache_topo_id(&cpu_data(cpu), &id4);
 
 		if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON)
 			nb = amd_init_l3_cache(idx);

---
base-commit: 9fefc4bad1a3db12ecf6ca8e9886b29721249e77
change-id: 20260220-rneri-x86-cacheinfoid-bug-71e3f7e28e94

Best regards,
-- 
Ricardo Neri <ricardo.neri-calderon@linux.intel.com>