[PATCH v3 22/29] x86/cpu: Use consolidated leaf 0x2 descriptor table

Ahmed S. Darwish posted 29 patches 9 months ago
There is a newer version of this series
[PATCH v3 22/29] x86/cpu: Use consolidated leaf 0x2 descriptor table
Posted by Ahmed S. Darwish 9 months ago
CPUID leaf 0x2 output is a stream of one-byte descriptors, each implying
certain details about the CPU's cache and TLB entries.

At previous commits, the mapping tables for such descriptors were merged
into one consolidated table.  The mapping was also transformed into a
hash lookup instead of a loop-based lookup for each descriptor.

Use the new consolidated table and its hash-based lookup through the
for_each_leaf_0x2_tlb_entry() accessor.

Remove the TLB-specific mapping, intel_tlb_table[], as it is now no
longer used.  Remove the <cpuid/types.h> macro, for_each_leaf_0x2_desc(),
since the converted code was its last user.

Signed-off-by: Ahmed S. Darwish <darwi@linutronix.de>
---
 arch/x86/include/asm/cpuid/types.h | 10 ++++
 arch/x86/kernel/cpu/intel.c        | 83 +++---------------------------
 2 files changed, 17 insertions(+), 76 deletions(-)

diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h
index 4426198f2078..10d16f6bbe6f 100644
--- a/arch/x86/include/asm/cpuid/types.h
+++ b/arch/x86/include/asm/cpuid/types.h
@@ -111,4 +111,14 @@ struct leaf_0x2_table {
 
 extern const struct leaf_0x2_table cpuid_0x2_table[256];
 
+/*
+ * All of leaf 0x2's one-byte TLB descriptors implies the same number of entries
+ * for their respective TLB types.  TLB descriptor 0x63 is an exception: it
+ * implies 4 dTLB entries for 1GB pages and 32 dTLB entries for 2MB or 4MB pages.
+ *
+ * Encode that descriptor's dTLB entry count for 2MB/4MB pages here, as the entry
+ * count for dTLB 1GB pages is already encoded at the cpuid_0x2_table[]'s mapping.
+ */
+#define TLB_0x63_2M_4M_ENTRIES		32
+
 #endif /* _ASM_X86_CPUID_TYPES_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index def433e0081f..e5d814703406 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -626,81 +626,11 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
 }
 #endif
 
-/*
- * All of leaf 0x2's one-byte TLB descriptors implies the same number of
- * entries for their respective TLB types.  The 0x63 descriptor is an
- * exception: it implies 4 dTLB entries for 1GB pages 32 dTLB entries
- * for 2MB or 4MB pages.  Encode descriptor 0x63 dTLB entry count for
- * 2MB/4MB pages here, as its count for dTLB 1GB pages is already at the
- * intel_tlb_table[] mapping.
- */
-#define TLB_0x63_2M_4M_ENTRIES	32
-
-struct _tlb_table {
-	unsigned char descriptor;
-	enum _tlb_table_type type;
-	unsigned int entries;
-};
-
-static const struct _tlb_table intel_tlb_table[] = {
-	{ 0x01, TLB_INST_4K,		32},	/* TLB_INST 4 KByte pages, 4-way set associative */
-	{ 0x02, TLB_INST_4M,		2},	/* TLB_INST 4 MByte pages, full associative */
-	{ 0x03, TLB_DATA_4K,		64},	/* TLB_DATA 4 KByte pages, 4-way set associative */
-	{ 0x04, TLB_DATA_4M,		8},	/* TLB_DATA 4 MByte pages, 4-way set associative */
-	{ 0x05, TLB_DATA_4M,		32},	/* TLB_DATA 4 MByte pages, 4-way set associative */
-	{ 0x0b, TLB_INST_4M,		4},	/* TLB_INST 4 MByte pages, 4-way set associative */
-	{ 0x4f, TLB_INST_4K,		32},	/* TLB_INST 4 KByte pages */
-	{ 0x50, TLB_INST_ALL,		64},	/* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
-	{ 0x51, TLB_INST_ALL,		128},	/* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
-	{ 0x52, TLB_INST_ALL,		256},	/* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */
-	{ 0x55, TLB_INST_2M_4M,		7},	/* TLB_INST 2-MByte or 4-MByte pages, fully associative */
-	{ 0x56, TLB_DATA0_4M,		16},	/* TLB_DATA0 4 MByte pages, 4-way set associative */
-	{ 0x57, TLB_DATA0_4K,		16},	/* TLB_DATA0 4 KByte pages, 4-way associative */
-	{ 0x59, TLB_DATA0_4K,		16},	/* TLB_DATA0 4 KByte pages, fully associative */
-	{ 0x5a, TLB_DATA0_2M_4M,	32},	/* TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative */
-	{ 0x5b, TLB_DATA_4K_4M,		64},	/* TLB_DATA 4 KByte and 4 MByte pages */
-	{ 0x5c, TLB_DATA_4K_4M,		128},	/* TLB_DATA 4 KByte and 4 MByte pages */
-	{ 0x5d, TLB_DATA_4K_4M,		256},	/* TLB_DATA 4 KByte and 4 MByte pages */
-	{ 0x61, TLB_INST_4K,		48},	/* TLB_INST 4 KByte pages, full associative */
-	{ 0x63, TLB_DATA_1G_2M_4M,	4},	/* TLB_DATA 1 GByte pages, 4-way set associative
-						 * (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here) */
-	{ 0x6b, TLB_DATA_4K,		256},	/* TLB_DATA 4 KByte pages, 8-way associative */
-	{ 0x6c, TLB_DATA_2M_4M,		128},	/* TLB_DATA 2 MByte or 4 MByte pages, 8-way associative */
-	{ 0x6d, TLB_DATA_1G,		16},	/* TLB_DATA 1 GByte pages, fully associative */
-	{ 0x76, TLB_INST_2M_4M,		8},	/* TLB_INST 2-MByte or 4-MByte pages, fully associative */
-	{ 0xb0, TLB_INST_4K,		128},	/* TLB_INST 4 KByte pages, 4-way set associative */
-	{ 0xb1, TLB_INST_2M_4M,		4},	/* TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries */
-	{ 0xb2, TLB_INST_4K,		64},	/* TLB_INST 4KByte pages, 4-way set associative */
-	{ 0xb3, TLB_DATA_4K,		128},	/* TLB_DATA 4 KByte pages, 4-way set associative */
-	{ 0xb4, TLB_DATA_4K,		256},	/* TLB_DATA 4 KByte pages, 4-way associative */
-	{ 0xb5, TLB_INST_4K,		64},	/* TLB_INST 4 KByte pages, 8-way set associative */
-	{ 0xb6, TLB_INST_4K,		128},	/* TLB_INST 4 KByte pages, 8-way set associative */
-	{ 0xba, TLB_DATA_4K,		64},	/* TLB_DATA 4 KByte pages, 4-way associative */
-	{ 0xc0, TLB_DATA_4K_4M,		8},	/* TLB_DATA 4 KByte and 4 MByte pages, 4-way associative */
-	{ 0xc1, STLB_4K_2M,		1024},	/* STLB 4 KByte and 2 MByte pages, 8-way associative */
-	{ 0xc2, TLB_DATA_2M_4M,		16},	/* TLB_DATA 2 MByte/4MByte pages, 4-way associative */
-	{ 0xca, STLB_4K,		512},	/* STLB 4 KByte pages, 4-way associative */
-	{ 0x00, 0, 0 }
-};
-
-static void intel_tlb_lookup(const unsigned char desc)
+static void intel_tlb_lookup(const struct leaf_0x2_table *entry)
 {
-	unsigned int entries;
-	unsigned char k;
-
-	if (desc == 0)
-		return;
-
-	/* look up this descriptor in the table */
-	for (k = 0; intel_tlb_table[k].descriptor != desc &&
-	     intel_tlb_table[k].descriptor != 0; k++)
-		;
-
-	if (intel_tlb_table[k].type == 0)
-		return;
+	short entries = entry->entries;
 
-	entries = intel_tlb_table[k].entries;
-	switch (intel_tlb_table[k].type) {
+	switch (entry->t_type) {
 	case STLB_4K:
 		tlb_lli_4k = max(tlb_lli_4k, entries);
 		tlb_lld_4k = max(tlb_lld_4k, entries);
@@ -757,15 +687,16 @@ static void intel_tlb_lookup(const unsigned char desc)
 
 static void intel_detect_tlb(struct cpuinfo_x86 *c)
 {
+	const struct leaf_0x2_table *entry;
 	union leaf_0x2_regs regs;
-	u8 *desc;
+	u8 *ptr;
 
 	if (c->cpuid_level < 2)
 		return;
 
 	cpuid_get_leaf_0x2_regs(&regs);
-	for_each_leaf_0x2_desc(regs, desc)
-		intel_tlb_lookup(*desc);
+	for_each_leaf_0x2_entry(regs, ptr, entry)
+		intel_tlb_lookup(entry);
 }
 
 static const struct cpu_dev intel_cpu_dev = {
-- 
2.48.1
Re: [PATCH v3 22/29] x86/cpu: Use consolidated leaf 0x2 descriptor table
Posted by kernel test robot 8 months, 3 weeks ago

Hello,

kernel test robot noticed "BUG:KASAN:stack-out-of-bounds_in_intel_detect_tlb" on:

commit: e114ca069e278f250be2b7bc49b2679dc5da4a95 ("[PATCH v3 22/29] x86/cpu: Use consolidated leaf 0x2 descriptor table")
url: https://github.com/intel-lab-lkp/linux/commits/Ahmed-S-Darwish/x86-cpu-Remove-leaf-0x2-parsing-loop/20250319-203156
patch link: https://lore.kernel.org/all/20250319122137.4004-23-darwi@linutronix.de/
patch subject: [PATCH v3 22/29] x86/cpu: Use consolidated leaf 0x2 descriptor table

in testcase: boot

config: x86_64-rhel-9.4-kselftests
compiler: gcc-12
test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G

(please refer to attached dmesg/kmsg for entire log/backtrace)


+---------------------------------------------------+------------+------------+
|                                                   | bf82706005 | e114ca069e |
+---------------------------------------------------+------------+------------+
| BUG:KASAN:stack-out-of-bounds_in_intel_detect_tlb | 0          | 12         |
+---------------------------------------------------+------------+------------+


If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202503241523.6b53646b-lkp@intel.com


[ 5.001760][ T0] BUG: KASAN: stack-out-of-bounds in intel_detect_tlb (arch/x86/kernel/cpu/intel.c:698 arch/x86/kernel/cpu/intel.c:688) 
[    5.001760][    T0] Read of size 1 at addr ffffffff8a607e80 by task swapper/0/0
[    5.001760][    T0]
[    5.001760][    T0] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.14.0-rc5-00152-ge114ca069e27 #1
[    5.001760][    T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
[    5.001760][    T0] Call Trace:
[    5.001760][    T0]  <TASK>
[ 5.001760][ T0] dump_stack_lvl (lib/dump_stack.c:124) 
[ 5.001760][ T0] print_address_description+0x2c/0x3f0 
[ 5.001760][ T0] ? intel_detect_tlb (arch/x86/kernel/cpu/intel.c:698 arch/x86/kernel/cpu/intel.c:688) 
[ 5.001760][ T0] print_report (mm/kasan/report.c:522) 
[ 5.001760][ T0] ? kasan_addr_to_slab (mm/kasan/common.c:37) 
[ 5.001760][ T0] ? intel_detect_tlb (arch/x86/kernel/cpu/intel.c:698 arch/x86/kernel/cpu/intel.c:688) 
[ 5.001760][ T0] kasan_report (mm/kasan/report.c:636) 
[ 5.001760][ T0] ? intel_detect_tlb (arch/x86/kernel/cpu/intel.c:698 arch/x86/kernel/cpu/intel.c:688) 
[ 5.001760][ T0] intel_detect_tlb (arch/x86/kernel/cpu/intel.c:698 arch/x86/kernel/cpu/intel.c:688) 
[ 5.001760][ T0] ? __pfx_intel_detect_tlb (arch/x86/kernel/cpu/intel.c:689) 
[ 5.001760][ T0] ? numa_add_cpu (include/linux/nodemask.h:272 (discriminator 2) mm/numa_emulation.c:560 (discriminator 2)) 
[ 5.001760][ T0] arch_cpu_finalize_init (arch/x86/kernel/cpu/common.c:862 arch/x86/kernel/cpu/common.c:1999 arch/x86/kernel/cpu/common.c:2409) 
[ 5.001760][ T0] start_kernel (init/main.c:1067) 
[ 5.001760][ T0] x86_64_start_reservations (arch/x86/kernel/head64.c:503) 
[ 5.001760][ T0] x86_64_start_kernel (arch/x86/kernel/head64.c:445 (discriminator 17)) 
[ 5.001760][ T0] ? soft_restart_cpu (arch/x86/kernel/head_64.S:459) 
[ 5.001760][ T0] common_startup_64 (arch/x86/kernel/head_64.S:421) 
[    5.001760][    T0]  </TASK>
[    5.001760][    T0]
[    5.001760][    T0] The buggy address belongs to stack of task swapper/0/0
[    5.001760][    T0]  and is located at offset 48 in frame:
[ 5.001760][ T0] intel_detect_tlb (arch/x86/kernel/cpu/intel.c:689) 
[    5.001760][    T0]
[    5.001760][    T0] This frame has 1 object:
[    5.001760][    T0]  [32, 48) 'regs'
[    5.001760][    T0]
[    5.001760][    T0] The buggy address belongs to the physical page:
[    5.001760][    T0] page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1ab407
[    5.001760][    T0] flags: 0x17ffffc0002000(reserved|node=0|zone=2|lastcpupid=0x1fffff)
[    5.001760][    T0] raw: 0017ffffc0002000 ffffea0006ad01c8 ffffea0006ad01c8 0000000000000000
[    5.001760][    T0] raw: 0000000000000000 0000000000000000 00000001ffffffff 0000000000000000
[    5.001760][    T0] page dumped because: kasan: bad access detected
[    5.001760][    T0]
[    5.001760][    T0] Memory state around the buggy address:
[    5.001760][    T0]  ffffffff8a607d80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[    5.001760][    T0]  ffffffff8a607e00: 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00 00
[    5.001760][    T0] >ffffffff8a607e80: f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[    5.001760][    T0]                    ^
[    5.001760][    T0]  ffffffff8a607f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[    5.001760][    T0]  ffffffff8a607f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
[    5.001760][    T0] ==================================================================
[    5.001775][    T0] Disabling lock debugging due to kernel taint


The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20250324/202503241523.6b53646b-lkp@intel.com



-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH v3 22/29] x86/cpu: Use consolidated leaf 0x2 descriptor table
Posted by Ahmed S. Darwish 8 months, 3 weeks ago
Hi,

On Mon, 24 Mar 2025, kernel test robot wrote:
>
> [ 5.001760][ T0] BUG: KASAN: stack-out-of-bounds in intel_detect_tlb (arch/x86/kernel/cpu/intel.c:698 arch/x86/kernel/cpu/intel.c:688)
> [    5.001760][    T0] Read of size 1 at addr ffffffff8a607e80 by task swapper/0/0
> [    5.001760][    T0]
> [    5.001760][    T0] CPU: 0 UID: 0 PID: 0 Comm: swapper/0 Not tainted 6.14.0-rc5-00152-ge114ca069e27 #1
> [    5.001760][    T0] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014
> [    5.001760][    T0] Call Trace:
> [    5.001760][    T0]  <TASK>
> [ 5.001760][ T0] dump_stack_lvl (lib/dump_stack.c:124)
> [ 5.001760][ T0] print_address_description+0x2c/0x3f0
> [ 5.001760][ T0] ? intel_detect_tlb (arch/x86/kernel/cpu/intel.c:698 arch/x86/kernel/cpu/intel.c:688)
>

I've reproduced the KASAN report and below hunk fixes it:

 | --- a/arch/x86/include/asm/cpuid/leaf_0x2_api.h
 | +++ b/arch/x86/include/asm/cpuid/leaf_0x2_api.h
 | @@ -88,9 +88,9 @@ static inline void cpuid_get_leaf_0x2_regs(union leaf_0x2_regs *regs)
 |   *		}
 |   *	}
 |   */
 | -#define for_each_leaf_0x2_entry(regs, __ptr, entry)			\
 | -	for (__ptr = &(regs).desc[1], entry = &cpuid_0x2_table[*__ptr];	\
 | -	     __ptr < &(regs).desc[16];					\
 | -	     __ptr++, entry = &cpuid_0x2_table[*__ptr])
 | +#define for_each_leaf_0x2_entry(regs, __ptr, entry)				\
 | +	for (__ptr = &(regs).desc[1];						\
 | +	     __ptr < &(regs).desc[16] && (entry = &cpuid_0x2_table[*__ptr]);	\
 | +	     __ptr++)

I'll include the fix in v4.

(It also makes sense that this was triggered at x86/cpu intel.c and not
 x86/cacheinfo, since in cacheinfo.c, CPUID(4) when available is always
 preferred to CPUID(2).)

Thanks!

--
Ahmed S. Darwish
Linutronix GmbH