[PATCH v1 10/40] x86/cpu: Remove leaf 0x2 parsing loop and add helpers

Ahmed S. Darwish posted 40 patches 1 year ago
There is a newer version of this series
[PATCH v1 10/40] x86/cpu: Remove leaf 0x2 parsing loop and add helpers
Posted by Ahmed S. Darwish 1 year ago
Leaf 0x2 output includes a "query count" byte where it was supposed to
specify the number of repeated cpuid leaf 0x2 subleaf 0 queries needed
to extract all of the hardware's cache and TLB descriptors.

Per current Intel manuals, all CPUs supporting this leaf "will always"
return an iteration count of 1.

Remove the leaf 0x2 query count loop and just query the hardware once.
Parse the output with C99 bitfields instead of ugly bitwise operations.
Provide leaf 0x2 parsing helpers at asm/cpuid/types.h to do all that.

Use the new leaf 0x2 parsing helpers at x86/cpu intel.c.  Further
commits will also use them for x86/cacheinfo.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ahmed S. Darwish <darwi@linutronix.de>
---
 arch/x86/include/asm/cpuid/types.h | 79 ++++++++++++++++++++++++++++++
 arch/x86/kernel/cpu/intel.c        | 24 +++------
 2 files changed, 85 insertions(+), 18 deletions(-)
 create mode 100644 arch/x86/include/asm/cpuid/types.h

diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h
new file mode 100644
index 000000000000..50f6046a57b9
--- /dev/null
+++ b/arch/x86/include/asm/cpuid/types.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_CPUID_TYPES_H
+#define _ASM_X86_CPUID_TYPES_H
+
+#include <linux/types.h>
+
+#include <asm/cpuid.h>
+
+/*
+ * CPUID(0x2) parsing helpers
+ * Check for_each_leaf_0x2_desc() documentation.
+ */
+
+struct leaf_0x2_reg {
+		u32		: 31,
+			invalid	: 1;
+};
+
+union leaf_0x2_regs {
+	struct leaf_0x2_reg	reg[4];
+	u32			regv[4];
+	u8			desc[16];
+};
+
+/**
+ * get_leaf_0x2_regs() - Return sanitized leaf 0x2 register output
+ * @regs:	Output parameter
+ *
+ * Get leaf 0x2 register output and store it in @regs.  Invalid byte
+ * descriptors returned by the hardware will be force set to zero (the
+ * NULL cache/TLB descriptor) before returning them to the caller.
+ */
+static inline void get_leaf_0x2_regs(union leaf_0x2_regs *regs)
+{
+	cpuid_leaf(0x2, regs);
+
+	/*
+	 * All Intel CPUs must report an iteration count of 1.  In case
+	 * of bogus hardware, treat all returned descriptors as NULL.
+	 */
+	if (regs->desc[0] != 0x01) {
+		for (int i = 0; i < 4; i++)
+			regs->regv[i] = 0;
+		return;
+	}
+
+	/*
+	 * The most significant bit (MSB) of each register must be clear.
+	 * If a register is invalid, replace its descriptors with NULL.
+	 */
+	for (int i = 0; i < 4; i++) {
+		if (regs->reg[i].invalid)
+			regs->regv[i] = 0;
+	}
+}
+
+/**
+ * for_each_leaf_0x2_desc() - Iterator for leaf 0x2 descriptors
+ * @regs:	Leaf 0x2 register output, as returned by get_leaf_0x2_regs()
+ * @desc:	Pointer to the returned descriptor for each iteration
+ *
+ * Loop over the 1-byte descriptors in the passed leaf 0x2 output registers
+ * @regs.  Provide each descriptor through @desc.
+ *
+ * Sample usage::
+ *
+ *	union leaf_0x2_regs regs;
+ *	u8 *desc;
+ *
+ *	get_leaf_0x2_regs(&regs);
+ *	for_each_leaf_0x2_desc(regs, desc) {
+ *		// Handle *desc value
+ *	}
+ */
+#define for_each_leaf_0x2_desc(regs, desc)				\
+	/* Skip the first byte as it is not a descriptor */		\
+	for (desc = &(regs).desc[1]; desc < &(regs).desc[16]; desc++)
+
+#endif /* _ASM_X86_CPUID_TYPES_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index cfd492cf9c3b..57e170ffe3ba 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -15,6 +15,7 @@
 #include <asm/cpu_device_id.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu.h>
+#include <asm/cpuid/types.h>
 #include <asm/hwcap2.h>
 #include <asm/intel-family.h>
 #include <asm/microcode.h>
@@ -778,28 +779,15 @@ static void intel_tlb_lookup(const unsigned char desc)
 
 static void intel_detect_tlb(struct cpuinfo_x86 *c)
 {
-	int i, j, n;
-	unsigned int regs[4];
-	unsigned char *desc = (unsigned char *)regs;
+	union leaf_0x2_regs regs;
+	u8 *desc;
 
 	if (c->cpuid_level < 2)
 		return;
 
-	/* Number of times to iterate */
-	n = cpuid_eax(2) & 0xFF;
-
-	for (i = 0 ; i < n ; i++) {
-		cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
-
-		/* If bit 31 is set, this is an unknown format */
-		for (j = 0 ; j < 4 ; j++)
-			if (regs[j] & (1 << 31))
-				regs[j] = 0;
-
-		/* Byte 0 is level count, not a descriptor */
-		for (j = 1 ; j < 16 ; j++)
-			intel_tlb_lookup(desc[j]);
-	}
+	get_leaf_0x2_regs(&regs);
+	for_each_leaf_0x2_desc(regs, desc)
+		intel_tlb_lookup(*desc);
 }
 
 static const struct cpu_dev intel_cpu_dev = {
-- 
2.48.1
Re: [PATCH v1 10/40] x86/cpu: Remove leaf 0x2 parsing loop and add helpers
Posted by Ingo Molnar 1 year ago
* Ahmed S. Darwish <darwi@linutronix.de> wrote:

> Leaf 0x2 output includes a "query count" byte where it was supposed to
> specify the number of repeated cpuid leaf 0x2 subleaf 0 queries needed
> to extract all of the hardware's cache and TLB descriptors.

s/cpuid
 /CPUID

Please do this in the rest of the series too. (I did it for the first 9 
patches.)

> +++ b/arch/x86/include/asm/cpuid/types.h
> @@ -0,0 +1,79 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_X86_CPUID_TYPES_H
> +#define _ASM_X86_CPUID_TYPES_H
> +
> +#include <linux/types.h>
> +
> +#include <asm/cpuid.h>

So that header organization is a bit messy: if <asm/cpuid.h> is 
supposed to be the main header, why is there a <asm/cpuid/types.h>?

I'd suggest we follow the FPU header structure:

  starship:~/tip/arch/x86/include/asm/fpu> ls -l
  total 48
  -rw-rw-r-- 1 mingo mingo  5732 Feb 27 19:24 api.h
  -rw-rw-r-- 1 mingo mingo   671 Feb 26 16:13 regset.h
  -rw-rw-r-- 1 mingo mingo  2203 Feb 27 13:52 sched.h
  -rw-rw-r-- 1 mingo mingo  1110 Feb 27 19:24 signal.h
  -rw-rw-r-- 1 mingo mingo 14741 Feb 27 19:24 types.h
  -rw-rw-r-- 1 mingo mingo   811 Feb 26 16:13 xcr.h
  -rw-rw-r-- 1 mingo mingo  4401 Feb 27 23:01 xstate.h

With <asm/cpuid/api.h> being the main header - established via a 
separate preparatory patch.

This followup patch can then add <asm/cpuid/types.h> which will also be 
included in <asm/cpuid/api.h>.


> +/*
> + * CPUID(0x2) parsing helpers
> + * Check for_each_leaf_0x2_desc() documentation.
> + */
> +
> +struct leaf_0x2_reg {
> +		u32		: 31,
> +			invalid	: 1;
> +};
> +
> +union leaf_0x2_regs {
> +	struct leaf_0x2_reg	reg[4];
> +	u32			regv[4];
> +	u8			desc[16];
> +};
> +
> +/**
> + * get_leaf_0x2_regs() - Return sanitized leaf 0x2 register output
> + * @regs:	Output parameter
> + *
> + * Get leaf 0x2 register output and store it in @regs.  Invalid byte
> + * descriptors returned by the hardware will be force set to zero (the
> + * NULL cache/TLB descriptor) before returning them to the caller.
> + */
> +static inline void get_leaf_0x2_regs(union leaf_0x2_regs *regs)


Please prefix all new cpuid API functions and types with cpuid_.

> +#define for_each_leaf_0x2_desc(regs, desc)				\
> +	/* Skip the first byte as it is not a descriptor */		\
> +	for (desc = &(regs).desc[1]; desc < &(regs).desc[16]; desc++)

The comment line can come before the macro.

> +	get_leaf_0x2_regs(&regs);
> +	for_each_leaf_0x2_desc(regs, desc)
> +		intel_tlb_lookup(*desc);

Nice interface otherwise.

Thanks,

	Ingo
Re: [PATCH v1 10/40] x86/cpu: Remove leaf 0x2 parsing loop and add helpers
Posted by Ahmed S. Darwish 1 year ago
Hi Ingo,

On Tue, 04 Mar 2025, Ingo Molnar wrote:
> ...
> * Ahmed S. Darwish <darwi@linutronix.de> wrote:
> > +
> > +#include <linux/types.h>
> > +
> > +#include <asm/cpuid.h>
>
> So that header organization is a bit messy: if <asm/cpuid.h> is
> supposed to be the main header, why is there a <asm/cpuid/types.h>?
>
> I'd suggest we follow the FPU header structure:
>
>   starship:~/tip/arch/x86/include/asm/fpu> ls -l
>   total 48
>   -rw-rw-r-- 1 mingo mingo  5732 Feb 27 19:24 api.h
>   -rw-rw-r-- 1 mingo mingo   671 Feb 26 16:13 regset.h
>   -rw-rw-r-- 1 mingo mingo  2203 Feb 27 13:52 sched.h
>   -rw-rw-r-- 1 mingo mingo  1110 Feb 27 19:24 signal.h
>   -rw-rw-r-- 1 mingo mingo 14741 Feb 27 19:24 types.h
>   -rw-rw-r-- 1 mingo mingo   811 Feb 26 16:13 xcr.h
>   -rw-rw-r-- 1 mingo mingo  4401 Feb 27 23:01 xstate.h
>
> With <asm/cpuid/api.h> being the main header - established via a
> separate preparatory patch.
>
> This followup patch can then add <asm/cpuid/types.h> which will also be
> included in <asm/cpuid/api.h>.
>

Sounds sensible.  Thanks!

FYI, in our CPUID-model patch queue (the one after this), we have
something like:

    <asm/cpuid/>
    │
    ├── leaves.h   CPUID bitfields; auto-generated by x86-cpuid-db
    ├── data.h     Internal data structures for the model
    ├── api.h      The new CPUID-model API
    └── ops.h      The raw CPUID ops [Formerly <asm/cpuid.h>]

So doing this from within this PQ should fit nicely.

> > +
> > +/**
> > + * get_leaf_0x2_regs() - Return sanitized leaf 0x2 register output
> > + * @regs:	Output parameter
> > + *
> > + * Get leaf 0x2 register output and store it in @regs.  Invalid byte
> > + * descriptors returned by the hardware will be force set to zero (the
> > + * NULL cache/TLB descriptor) before returning them to the caller.
> > + */
> > +static inline void get_leaf_0x2_regs(union leaf_0x2_regs *regs)
>
> Please prefix all new cpuid API functions and types with cpuid_.
>

ACK.

> > +#define for_each_leaf_0x2_desc(regs, desc)				\
> > +	/* Skip the first byte as it is not a descriptor */		\
> > +	for (desc = &(regs).desc[1]; desc < &(regs).desc[16]; desc++)
>
> The comment line can come before the macro.
>

ACK.

> > +	get_leaf_0x2_regs(&regs);
> > +	for_each_leaf_0x2_desc(regs, desc)
> > +		intel_tlb_lookup(*desc);
>
> Nice interface otherwise.
>

Thanks!
Ahmed