[PATCH v4 28/34] x86/cacheinfo: Use parsed CPUID(0x80000005) and CPUID(0x80000006)

Ahmed S. Darwish posted 34 patches 1 month, 2 weeks ago
There is a newer version of this series
[PATCH v4 28/34] x86/cacheinfo: Use parsed CPUID(0x80000005) and CPUID(0x80000006)
Posted by Ahmed S. Darwish 1 month, 2 weeks ago
For the AMD CPUID(0x4)-emulation logic, use parsed CPUID(0x80000005) and
CPUID(0x80000006) APID access instead of invoking direct CPUID queries.

Beside centralizing CPUID access, this allows using the auto-generated
<cpuid/leaf_types.h> 'struct leaf_0x80000005_0' and 'struct
leaf_0x80000006_0' data types.

Remove the 'union {l1,l2,l3}_cache' definitions as they are no longer
needed.

Note, the expression:

    ci->num_leaves = (cpuid_edx(0x80000006) & 0xf000) ? 4 : 3;

is replaced with:

    ci->num_leaves = cpuid_leaf(c, 0x80000006)->l3_assoc ? 4 : 3;

which is the same logic, since the 'l3_assoc' bitfield is 4 bits wide at
EDX offset 12.  Per AMD manuals, an L3 associativity of zero implies the
absence of an L3 cache on the CPU.

While at it, separate the 'Fallback AMD CPUID(0x4) emulation' comment
from the '@AMD_L2_L3_INVALID_ASSOC' one, since the former acts as a
source code section header.

Signed-off-by: Ahmed S. Darwish <darwi@linutronix.de>
---
 arch/x86/kernel/cpu/cacheinfo.c | 105 ++++++++++++--------------------
 1 file changed, 40 insertions(+), 65 deletions(-)

diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index f0540cba4bd4..de8e7125eedd 100644
--- a/arch/x86/kernel/cpu/cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -56,47 +56,17 @@ static const enum cache_type cache_type_map[] = {
 };
 
 /*
- * Fallback AMD CPUID(0x4) emulation
+ * Fallback AMD CPUID(0x4) emulation:
  * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d)
- *
+ */
+
+/*
  * @AMD_L2_L3_INVALID_ASSOC: cache info for the respective L2/L3 cache should
  * be determined from CPUID(0x8000001d) instead of CPUID(0x80000006).
  */
-
 #define AMD_CPUID4_FULLY_ASSOCIATIVE	0xffff
 #define AMD_L2_L3_INVALID_ASSOC		0x9
 
-union l1_cache {
-	struct {
-		unsigned line_size	:8;
-		unsigned lines_per_tag	:8;
-		unsigned assoc		:8;
-		unsigned size_in_kb	:8;
-	};
-	unsigned int val;
-};
-
-union l2_cache {
-	struct {
-		unsigned line_size	:8;
-		unsigned lines_per_tag	:4;
-		unsigned assoc		:4;
-		unsigned size_in_kb	:16;
-	};
-	unsigned int val;
-};
-
-union l3_cache {
-	struct {
-		unsigned line_size	:8;
-		unsigned lines_per_tag	:4;
-		unsigned assoc		:4;
-		unsigned res		:2;
-		unsigned size_encoded	:14;
-	};
-	unsigned int val;
-};
-
 /* L2/L3 associativity mapping */
 static const unsigned short assocs[] = {
 	[1]		= 1,
@@ -117,50 +87,52 @@ static const unsigned short assocs[] = {
 static const unsigned char levels[] = { 1, 1, 2, 3 };
 static const unsigned char types[]  = { 1, 2, 3, 3 };
 
-static void legacy_amd_cpuid4(int index, struct leaf_0x4_0 *regs)
+static void legacy_amd_cpuid4(struct cpuinfo_x86 *c, int index, struct leaf_0x4_0 *regs)
 {
-	unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb;
-	union l1_cache l1i, l1d, *l1;
-	union l2_cache l2;
-	union l3_cache l3;
+	const struct leaf_0x80000005_0 *el5 = cpuid_leaf(c, 0x80000005);
+	const struct leaf_0x80000006_0 *el6 = cpuid_leaf(c, 0x80000006);
+	const struct cpuid_regs *el5_raw = (const struct cpuid_regs *)el5;
+	unsigned int line_size, lines_per_tag, assoc, size_in_kb;
 
 	*regs = (struct leaf_0x4_0){ };
 
-	cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val);
-	cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val);
-
-	l1 = &l1d;
 	switch (index) {
-	case 1:
-		l1 = &l1i;
-		fallthrough;
 	case 0:
-		if (!l1->val)
+		if (!el5 || !el5_raw->ecx)
 			return;
 
-		assoc		= (l1->assoc == 0xff) ? AMD_CPUID4_FULLY_ASSOCIATIVE : l1->assoc;
-		line_size	= l1->line_size;
-		lines_per_tag	= l1->lines_per_tag;
-		size_in_kb	= l1->size_in_kb;
+		assoc		= el5->l1_dcache_assoc;
+		line_size	= el5->l1_dcache_line_size;
+		lines_per_tag	= el5->l1_dcache_nlines;
+		size_in_kb	= el5->l1_dcache_size_kb;
+		break;
+	case 1:
+		if (!el5 || !el5_raw->edx)
+			return;
+
+		assoc		= el5->l1_icache_assoc;
+		line_size	= el5->l1_icache_line_size;
+		lines_per_tag	= el5->l1_icache_nlines;
+		size_in_kb	= el5->l1_icache_size_kb;
 		break;
 	case 2:
-		if (!l2.assoc || l2.assoc == AMD_L2_L3_INVALID_ASSOC)
+		if (!el6 || !el6->l2_assoc || el6->l2_assoc == AMD_L2_L3_INVALID_ASSOC)
 			return;
 
 		/* Use x86_cache_size as it might have K7 errata fixes */
-		assoc		= assocs[l2.assoc];
-		line_size	= l2.line_size;
-		lines_per_tag	= l2.lines_per_tag;
+		assoc		= assocs[el6->l2_assoc];
+		line_size	= el6->l2_line_size;
+		lines_per_tag	= el6->l2_nlines;
 		size_in_kb	= __this_cpu_read(cpu_info.x86_cache_size);
 		break;
 	case 3:
-		if (!l3.assoc || l3.assoc == AMD_L2_L3_INVALID_ASSOC)
+		if (!el6 || !el6->l3_assoc || el6->l3_assoc == AMD_L2_L3_INVALID_ASSOC)
 			return;
 
-		assoc		= assocs[l3.assoc];
-		line_size	= l3.line_size;
-		lines_per_tag	= l3.lines_per_tag;
-		size_in_kb	= l3.size_encoded * 512;
+		assoc		= assocs[el6->l3_assoc];
+		line_size	= el6->l3_line_size;
+		lines_per_tag	= el6->l3_nlines;
+		size_in_kb	= el6->l3_size_range * 512;
 		if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
 			size_in_kb	= size_in_kb >> 1;
 			assoc		= assoc >> 1;
@@ -170,6 +142,10 @@ static void legacy_amd_cpuid4(int index, struct leaf_0x4_0 *regs)
 		return;
 	}
 
+	/* For L1d and L1i caches, 0xff is the full associativity marker */
+	if ((index == 0 || index == 1) && assoc == 0xff)
+		assoc = AMD_CPUID4_FULLY_ASSOCIATIVE;
+
 	regs->cache_self_init		= 1;
 	regs->cache_type		= types[index];
 	regs->cache_level		= levels[index];
@@ -207,7 +183,7 @@ static int amd_fill_cpuid4_info(struct cpuinfo_x86 *c, int index, struct _cpuid4
 	if (boot_cpu_has(X86_FEATURE_TOPOEXT) || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
 		regs = *(struct leaf_0x4_0 *)cpuid_subleaf_index(c, 0x8000001d, index);
 	else
-		legacy_amd_cpuid4(index, &regs);
+		legacy_amd_cpuid4(c, index, &regs);
 
 	return cpuid4_info_fill_done(id4, &regs);
 }
@@ -279,10 +255,9 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c)
 {
 	struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
 
-	if (boot_cpu_has(X86_FEATURE_TOPOEXT))
-		ci->num_leaves = cpuid_subleaf_count(c, 0x8000001d);
-	else if (c->extended_cpuid_level >= 0x80000006)
-		ci->num_leaves = (cpuid_edx(0x80000006) & 0xf000) ? 4 : 3;
+	ci->num_leaves = boot_cpu_has(X86_FEATURE_TOPOEXT) ?
+		cpuid_subleaf_count(c, 0x8000001d) :
+		cpuid_leaf(c, 0x80000006)->l3_assoc ? 4 : 3;
 }
 
 void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
-- 
2.50.1
Re: [PATCH v4 28/34] x86/cacheinfo: Use parsed CPUID(0x80000005) and CPUID(0x80000006)
Posted by Dave Hansen 1 month, 2 weeks ago
On 8/15/25 00:02, Ahmed S. Darwish wrote:
> +static void legacy_amd_cpuid4(struct cpuinfo_x86 *c, int index, struct leaf_0x4_0 *regs)
>  {
> -	unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb;
> -	union l1_cache l1i, l1d, *l1;
> -	union l2_cache l2;
> -	union l3_cache l3;
> +	const struct leaf_0x80000005_0 *el5 = cpuid_leaf(c, 0x80000005);
> +	const struct leaf_0x80000006_0 *el6 = cpuid_leaf(c, 0x80000006);
> +	const struct cpuid_regs *el5_raw = (const struct cpuid_regs *)el5;

Is there any way we could get rid of the casts? The lack of type safety
on those always worries me. Maybe a helper like this:

    const struct cpuid_regs *el5_raw = cpuid_leaf_raw(c, 0x80000006);

(although that would probably just do casts internally). The other way
would be to rig the macros up so that each 'struct leaf_$FOO' had a:

	union {
		struct leaf_0x80000005_0 l;
		struct cpuid_regs raw;
	};

and then the macros just selected one of the two.
Re: [PATCH v4 28/34] x86/cacheinfo: Use parsed CPUID(0x80000005) and CPUID(0x80000006)
Posted by David Woodhouse 1 month, 2 weeks ago
On Fri, 2025-08-15 at 08:25 -0700, Dave Hansen wrote:
> On 8/15/25 00:02, Ahmed S. Darwish wrote:
> > +static void legacy_amd_cpuid4(struct cpuinfo_x86 *c, int index,
> > struct leaf_0x4_0 *regs)
> >   {
> > -	unsigned int dummy, line_size, lines_per_tag, assoc,
> > size_in_kb;
> > -	union l1_cache l1i, l1d, *l1;
> > -	union l2_cache l2;
> > -	union l3_cache l3;
> > +	const struct leaf_0x80000005_0 *el5 = cpuid_leaf(c,
> > 0x80000005);
> > +	const struct leaf_0x80000006_0 *el6 = cpuid_leaf(c,
> > 0x80000006);
> > +	const struct cpuid_regs *el5_raw = (const struct
> > cpuid_regs *)el5;
> 
> Is there any way we could get rid of the casts? The lack of type
> safety
> on those always worries me. Maybe a helper like this:
> 
>     const struct cpuid_regs *el5_raw = cpuid_leaf_raw(c, 0x80000006);
> 
> (although that would probably just do casts internally). The other
> way
> would be to rig the macros up so that each 'struct leaf_$FOO' had a:
> 
> 	union {
> 		struct leaf_0x80000005_0 l;
> 		struct cpuid_regs raw;
> 	};
> 
> and then the macros just selected one of the two.

How about automatically building the cast into the macro invocation...

#define cpuid_leaf(c, l) ((struct cpuid_leaf_ ## l *)__cpuid_leaf(c, l)
#define cpuid_lead_index(c, l, i) ((struct cpuid_leaf_ ## l ## _ ## i) __cpuid_leaf_index(c, l, i)

That's a bit ugly unless we can canonicalise the leaf and index values
though. It even forces us to use raw numbers instead of names. Can we
come up with something that works... ?

Re: [PATCH v4 28/34] x86/cacheinfo: Use parsed CPUID(0x80000005) and CPUID(0x80000006)
Posted by Ahmed S. Darwish 1 month, 2 weeks ago
Hi!

On Mon, 18 Aug 2025, David Woodhouse wrote:
>
> How about automatically building the cast into the macro invocation...
>
> #define cpuid_leaf(c, l) ((struct cpuid_leaf_ ## l *)__cpuid_leaf(c, l)
> #define cpuid_lead_index(c, l, i) ((struct cpuid_leaf_ ## l ## _ ## i) __cpuid_leaf_index(c, l, i)
>

There are zero casts needed by this model.  From patch 07/34 ("x86/cpuid:
Introduce a centralized CPUID data model"), the access CPUID parser APIs
just dissolve into:

    const struct leaf_0x7_0 *l7_0;
    const struct leaf_0x7_1 *l7_1;

    l7_0 = cpuid_subleaf(c, 0x7, 0);
                         |   |   └────────┐
                         |   └─────────┐  |
                         *             *  *
                        &c.cpuid.leaf_0x7_0[0]

    l7_1 = cpuid_subleaf(c, 0x7, 1);
                         |   |   └────────┐
                         |   └─────────┐  |
                         *             *  *
                        &c.cpuid.leaf_0x7_1[0]

where, for example, the data type of 'c.cpuid.leaf_0x7_0[0]' is native
'struct leaf_0x7_0', and so on.

The per-CPU CPUID table(s) are built in a /fully-typed/ form:

    struct leaf_0x0_0		leaf_0x0_0[1];
    struct leaf_0x1_0		leaf_0x1_0[1];
    struct leaf_0x2_0		leaf_0x2_0[1];
    struct leaf_0x4_0		leaf_0x4_0[8];
    struct leaf_0x16_0		leaf_0x16_0[1];
    struct leaf_0x80000000_0	leaf_0x80000000_0[1];
    struct leaf_0x80000005_0	leaf_0x80000005_0[1];
    struct leaf_0x80000006_0	leaf_0x80000006_0[1];
    struct leaf_0x8000001d_0	leaf_0x8000001d_0[8];

Then, the exported CPUID parser APIs do some CPP tokenization magic to
access such fully-typed data -- no casting needed.

The only exception is raw EAX->EDX register access:

    /**
     * cpuid_leaf_regs() - Access parsed CPUID data in raw format
     * @_cpuinfo:	CPU capability structure reference ('struct cpuinfo_x86')
     * @_leaf:		CPUID leaf, in compile-time 0xN format
     *
     * Similar to cpuid_leaf(), but returns a raw 'struct cpuid_regs' pointer to
     * the parsed CPUID data instead of a "typed" <cpuid/leaf_types.h> pointer.
     */
    #define cpuid_leaf_regs(_cpuinfo, _leaf)				\
	((struct cpuid_regs *)(cpuid_leaf(_cpuinfo, _leaf)))

But the usage of this raw-access API is very limited.  This is by design,
as one of the purpose of this work was to ensure type safety and avoid
call-site bit fiddling.

Thanks!

--
Ahmed S. Darwish
Linutronix GmbH
Re: [PATCH v4 28/34] x86/cacheinfo: Use parsed CPUID(0x80000005) and CPUID(0x80000006)
Posted by Ahmed S. Darwish 1 month, 2 weeks ago
On Fri, 15 Aug 2025, Dave Hansen wrote:
>
> On 8/15/25 00:02, Ahmed S. Darwish wrote:
> > +static void legacy_amd_cpuid4(struct cpuinfo_x86 *c, int index, struct leaf_0x4_0 *regs)
> >  {
> > -	unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb;
> > -	union l1_cache l1i, l1d, *l1;
> > -	union l2_cache l2;
> > -	union l3_cache l3;
> > +	const struct leaf_0x80000005_0 *el5 = cpuid_leaf(c, 0x80000005);
> > +	const struct leaf_0x80000006_0 *el6 = cpuid_leaf(c, 0x80000006);
> > +	const struct cpuid_regs *el5_raw = (const struct cpuid_regs *)el5;
>
> Is there any way we could get rid of the casts? The lack of type safety
> on those always worries me. Maybe a helper like this:
>
>     const struct cpuid_regs *el5_raw = cpuid_leaf_raw(c, 0x80000006);
>
> (although that would probably just do casts internally).
>

Indeed.

I already have at <asm/cpuid/api.h>:

    /**
     * cpuid_leaf_regs() - Access parsed CPUID data in raw format
     * @_cpuinfo:	CPU capability structure reference ('struct cpuinfo_x86')
     * @_leaf:		CPUID leaf, in compile-time 0xN format
     *
     * Similar to cpuid_leaf(), but returns a raw 'struct cpuid_regs' pointer to
     * the parsed CPUID data instead of a "typed" <cpuid/leaf_types.h> pointer.
     */
    #define cpuid_leaf_regs(_cpuinfo, _leaf)				\
	((struct cpuid_regs *)(cpuid_leaf(_cpuinfo, _leaf)))

The only reason I didn't use it in this patch was to avoid a repetion.
But, you're correct, using the API is better than call-site casting.

Thus, the code shall be:

    const struct leaf_0x80000005_0 *el5 = cpuid_leaf(c, 0x80000005);
    const struct leaf_0x80000006_0 *el6 = cpuid_leaf(c, 0x80000006);
    const struct cpuid_regs *el5_raw = cpuid_leaf_regs(c, 0x80000005);

I'll do it like that in v5.

Thanks!
Ahmed