[patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser

Thomas Gleixner posted 19 patches 1 year, 11 months ago
There is a newer version of this series
[patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser
Posted by Thomas Gleixner 1 year, 11 months ago
From: Thomas Gleixner <tglx@linutronix.de>

detect_extended_topology() along with it's early() variant is a classic
example for duct tape engineering:

  - It evaluates an array of subleafs with a boatload of local variables
    for the relevant topology levels instead of using an array to save the
    enumerated information and propagate it to the right level

  - It has no boundary checks for subleafs

  - It prevents updating the die_id with a crude workaround instead of
    checking for leaf 0xb which does not provide die information.

  - It's broken vs. the number of dies evaluation as it uses:

      num_processors[DIE_LEVEL] / num_processors[CORE_LEVEL]

    which "works" only correctly if there is none of the intermediate
    topology levels (MODULE/TILE) enumerated.

There is zero value in trying to "fix" that code as the only proper fix is
to rewrite it from scratch.

Implement a sane parser with proper code documentation, which will be used
for the consolidated topology evaluation in the next step.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Sohil Mehta <sohil.mehta@intel.com>
Tested-by: Michael Kelley <mhklinux@outlook.com>

---
 arch/x86/kernel/cpu/Makefile       |    2 
 arch/x86/kernel/cpu/topology.h     |   12 +++
 arch/x86/kernel/cpu/topology_ext.c |  130 +++++++++++++++++++++++++++++++++++++
 3 files changed, 143 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/topology_ext.c
---
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -18,7 +18,7 @@ KMSAN_SANITIZE_common.o := n
 KCSAN_SANITIZE_common.o := n
 
 obj-y			:= cacheinfo.o scattered.o
-obj-y			+= topology_common.o topology.o
+obj-y			+= topology_common.o topology_ext.o topology.o
 obj-y			+= common.o
 obj-y			+= rdrand.o
 obj-y			+= match.o
--- a/arch/x86/kernel/cpu/topology.h
+++ b/arch/x86/kernel/cpu/topology.h
@@ -16,6 +16,7 @@ void cpu_init_topology(struct cpuinfo_x8
 void cpu_parse_topology(struct cpuinfo_x86 *c);
 void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
 		      unsigned int shift, unsigned int ncpus);
+bool cpu_parse_topology_ext(struct topo_scan *tscan);
 
 static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom)
 {
@@ -36,4 +37,15 @@ static inline u32 topo_domain_mask(enum
 	return (1U << x86_topo_system.dom_shifts[dom]) - 1;
 }
 
+/*
+ * Update a domain level after the fact without propagating. Used to fixup
+ * broken CPUID enumerations.
+ */
+static inline void topology_update_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
+				       unsigned int shift, unsigned int ncpus)
+{
+	tscan->dom_shifts[dom] = shift;
+	tscan->dom_ncpus[dom] = ncpus;
+}
+
 #endif /* ARCH_X86_TOPOLOGY_H */
--- /dev/null
+++ b/arch/x86/kernel/cpu/topology_ext.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpu.h>
+
+#include <asm/apic.h>
+#include <asm/memtype.h>
+#include <asm/processor.h>
+
+#include "cpu.h"
+
+enum topo_types {
+	INVALID_TYPE		= 0,
+	SMT_TYPE		= 1,
+	CORE_TYPE		= 2,
+	MAX_TYPE_0B		= 3,
+	MODULE_TYPE		= 3,
+	TILE_TYPE		= 4,
+	DIE_TYPE		= 5,
+	DIEGRP_TYPE		= 6,
+	MAX_TYPE_1F		= 7,
+};
+
+/*
+ * Use a lookup table for the case that there are future types > 6 which
+ * describe an intermediate domain level which does not exist today.
+ */
+static const unsigned int topo_domain_map_0b_1f[MAX_TYPE_1F] = {
+	[SMT_TYPE]	= TOPO_SMT_DOMAIN,
+	[CORE_TYPE]	= TOPO_CORE_DOMAIN,
+	[MODULE_TYPE]	= TOPO_MODULE_DOMAIN,
+	[TILE_TYPE]	= TOPO_TILE_DOMAIN,
+	[DIE_TYPE]	= TOPO_DIE_DOMAIN,
+	[DIEGRP_TYPE]	= TOPO_DIEGRP_DOMAIN,
+};
+
+static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf,
+				unsigned int *last_dom)
+{
+	unsigned int dom, maxtype;
+	const unsigned int *map;
+	struct {
+		// eax
+		u32	x2apic_shift	:  5, // Number of bits to shift APIC ID right
+					      // for the topology ID at the next level
+					: 27; // Reserved
+		// ebx
+		u32	num_processors	: 16, // Number of processors at current level
+					: 16; // Reserved
+		// ecx
+		u32	level		:  8, // Current topology level. Same as sub leaf number
+			type		:  8, // Level type. If 0, invalid
+					: 16; // Reserved
+		// edx
+		u32	x2apic_id	: 32; // X2APIC ID of the current logical processor
+	} sl;
+
+	switch (leaf) {
+	case 0x0b: maxtype = MAX_TYPE_0B; map = topo_domain_map_0b_1f; break;
+	case 0x1f: maxtype = MAX_TYPE_1F; map = topo_domain_map_0b_1f; break;
+	default: return false;
+	}
+
+	cpuid_subleaf(leaf, subleaf, &sl);
+
+	if (!sl.num_processors || sl.type == INVALID_TYPE)
+		return false;
+
+	if (sl.type >= maxtype) {
+		pr_err_once("Topology: leaf 0x%x:%d Unknown domain type %u\n",
+			    leaf, subleaf, sl.type);
+		/*
+		 * It really would have been too obvious to make the domain
+		 * type space sparse and leave a few reserved types between
+		 * the points which might change instead of following the
+		 * usual "this can be fixed in software" principle.
+		 */
+		dom = *last_dom + 1;
+	} else {
+		dom = map[sl.type];
+		*last_dom = dom;
+	}
+
+	if (!dom) {
+		tscan->c->topo.initial_apicid = sl.x2apic_id;
+	} else if (tscan->c->topo.initial_apicid != sl.x2apic_id) {
+		pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf %d APIC ID mismatch %x != %x\n",
+			     leaf, subleaf, tscan->c->topo.initial_apicid, sl.x2apic_id);
+	}
+
+	topology_set_dom(tscan, dom, sl.x2apic_shift, sl.num_processors);
+	return true;
+}
+
+static bool parse_topology_leaf(struct topo_scan *tscan, u32 leaf)
+{
+	unsigned int last_dom;
+	u32 subleaf;
+
+	/* Read all available subleafs and populate the levels */
+	for (subleaf = 0, last_dom = 0; topo_subleaf(tscan, leaf, subleaf, &last_dom); subleaf++);
+
+	/* If subleaf 0 failed to parse, give up */
+	if (!subleaf)
+		return false;
+
+	/*
+	 * There are machines in the wild which have shift 0 in the subleaf
+	 * 0, but advertise 2 logical processors at that level. They are
+	 * truly SMT.
+	 */
+	if (!tscan->dom_shifts[TOPO_SMT_DOMAIN] && tscan->dom_ncpus[TOPO_SMT_DOMAIN] > 1) {
+		unsigned int sft = get_count_order(tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+
+		pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf 0 has shift level 0 but %u CPUs\n",
+			     leaf, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+		topology_update_dom(tscan, TOPO_SMT_DOMAIN, sft, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
+	}
+
+	set_cpu_cap(tscan->c, X86_FEATURE_XTOPOLOGY);
+	return true;
+}
+
+bool cpu_parse_topology_ext(struct topo_scan *tscan)
+{
+	/* Intel: Try leaf 0x1F first. */
+	if (tscan->c->cpuid_level >= 0x1f && parse_topology_leaf(tscan, 0x1f))
+		return true;
+
+	/* Intel/AMD: Fall back to leaf 0xB if available */
+	return tscan->c->cpuid_level >= 0x0b && parse_topology_leaf(tscan, 0x0b);
+}
Re: [patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser
Posted by Borislav Petkov 1 year, 10 months ago
On Tue, Jan 23, 2024 at 01:53:39PM +0100, Thomas Gleixner wrote:
> +static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf,

"parse_topo_subleaf"?

With a verb in the name...

> +				unsigned int *last_dom)
> +{
> +	unsigned int dom, maxtype;
> +	const unsigned int *map;
> +	struct {
> +		// eax

Can we please not use those yucky // comments together with the
multiline ones?

> +		u32	x2apic_shift	:  5, // Number of bits to shift APIC ID right
> +					      // for the topology ID at the next level
> +					: 27; // Reserved
> +		// ebx
> +		u32	num_processors	: 16, // Number of processors at current level
> +					: 16; // Reserved
> +		// ecx
> +		u32	level		:  8, // Current topology level. Same as sub leaf number
> +			type		:  8, // Level type. If 0, invalid
> +					: 16; // Reserved
> +		// edx
> +		u32	x2apic_id	: 32; // X2APIC ID of the current logical processor
> +	} sl;

...

> +static bool parse_topology_leaf(struct topo_scan *tscan, u32 leaf)
> +{
> +	unsigned int last_dom;
> +	u32 subleaf;
> +
> +	/* Read all available subleafs and populate the levels */
> +	for (subleaf = 0, last_dom = 0; topo_subleaf(tscan, leaf, subleaf, &last_dom); subleaf++);
> +
> +	/* If subleaf 0 failed to parse, give up */
> +	if (!subleaf)
> +		return false;
> +
> +	/*
> +	 * There are machines in the wild which have shift 0 in the subleaf
> +	 * 0, but advertise 2 logical processors at that level. They are
> +	 * truly SMT.
> +	 */
> +	if (!tscan->dom_shifts[TOPO_SMT_DOMAIN] && tscan->dom_ncpus[TOPO_SMT_DOMAIN] > 1) {
> +		unsigned int sft = get_count_order(tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
> +
> +		pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf 0 has shift level 0 but %u CPUs\n",
> +			     leaf, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);

Do you really wanna warn about that? Hoping that someone would do
something about it while there's time...?

> +		topology_update_dom(tscan, TOPO_SMT_DOMAIN, sft, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
> +	}
> +
> +	set_cpu_cap(tscan->c, X86_FEATURE_XTOPOLOGY);
> +	return true;
> +}

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser
Posted by Thomas Gleixner 1 year, 10 months ago
On Tue, Jan 30 2024 at 20:31, Borislav Petkov wrote:
> On Tue, Jan 23, 2024 at 01:53:39PM +0100, Thomas Gleixner wrote:
>> +static inline bool topo_subleaf(struct topo_scan *tscan, u32 leaf, u32 subleaf,
>
> "parse_topo_subleaf"?
>
> With a verb in the name...
>
>> +				unsigned int *last_dom)
>> +{
>> +	unsigned int dom, maxtype;
>> +	const unsigned int *map;
>> +	struct {
>> +		// eax
>
> Can we please not use those yucky // comments together with the
> multiline ones?

TBH, the // comment style is really better for struct definitions. It's
denser and easier to parse.

		// eax
		u32	x2apic_shift	:  5, // Number of bits to shift APIC ID right
					      // for the topology ID at the next level
					: 27; // Reserved
		// ebx
		u32	num_processors	: 16, // Number of processors at current level
					: 16; // Reserved

versus:

		/* eax */
		u32	x2apic_shift	:  5, /*
                                               * Number of bits to shift APIC ID right
					       * for the topology ID at	the next level
                                               */
					: 27; /* Reserved */

		/* ebx */
		u32	num_processors	: 16, /* Number of processors at current level */
					: 16; /* Reserved */

Especially x2apic_shift is horrible and the comments of EBX are visually
impaired while with the C++ comments x2apic_shift looks natural and the
EBX comments are just open to the right and therefore simpler.

>> +	if (!tscan->dom_shifts[TOPO_SMT_DOMAIN] && tscan->dom_ncpus[TOPO_SMT_DOMAIN] > 1) {
>> +		unsigned int sft = get_count_order(tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
>> +
>> +		pr_warn_once(FW_BUG "CPUID leaf 0x%x subleaf 0 has shift level 0 but %u CPUs\n",
>> +			     leaf, tscan->dom_ncpus[TOPO_SMT_DOMAIN]);
>
> Do you really wanna warn about that? Hoping that someone would do
> something about it while there's time...?

If it's caught in early testing, this should be fixed, no?

Thanks,

        tglx
Re: [patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser
Posted by Thomas Gleixner 1 year, 10 months ago
On Mon, Feb 12 2024 at 15:17, Thomas Gleixner wrote:
> On Tue, Jan 30 2024 at 20:31, Borislav Petkov wrote:
> TBH, the // comment style is really better for struct definitions. It's
> denser and easier to parse.
>
> 		// eax
> 		u32	x2apic_shift	:  5, // Number of bits to shift APIC ID right
> 					      // for the topology ID at the next level
> 					: 27; // Reserved
> 		// ebx
> 		u32	num_processors	: 16, // Number of processors at current level
> 					: 16; // Reserved
>
> versus:
>
> 		/* eax */
> 		u32	x2apic_shift	:  5, /*
>                                                * Number of bits to shift APIC ID right
> 					       * for the topology ID at	the next level
>                                                */
> 					: 27; /* Reserved */
>
> 		/* ebx */
> 		u32	num_processors	: 16, /* Number of processors at current level */
> 					: 16; /* Reserved */
>
> Especially x2apic_shift is horrible and the comments of EBX are visually
> impaired while with the C++ comments x2apic_shift looks natural and the
> EBX comments are just open to the right and therefore simpler.

Aside of that it would make the struct generator in the CPUID data base
more complex.
Re: [patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser
Posted by Borislav Petkov 1 year, 10 months ago
On Mon, Feb 12, 2024 at 04:03:41PM +0100, Thomas Gleixner wrote:
> Aside of that it would make the struct generator in the CPUID data base
> more complex.

Hmm, that's a valid point... that thing being XML is already complex.
:-P

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser
Posted by Borislav Petkov 1 year, 10 months ago
On Mon, Feb 12, 2024 at 03:17:45PM +0100, Thomas Gleixner wrote:
> Especially x2apic_shift is horrible and the comments of EBX are visually
> impaired while with the C++ comments x2apic_shift looks natural and the
> EBX comments are just open to the right and therefore simpler.

I'd say, put comments *above* the member versus on the side. We don't
like side comments, if you remember. :-)

And, for example, the commenting in arch/x86/include/asm/fpu/types.h is
not half as bad and works real nice for struct definitions, I'd say.

But if you want to make that into a rule to have C++, side comments for
struct members I guess I'll get accustomed to it eventually.

> If it's caught in early testing, this should be fixed, no?

Hope dies last. :)

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser
Posted by Thomas Gleixner 1 year, 10 months ago
On Mon, Feb 12 2024 at 16:00, Borislav Petkov wrote:
> On Mon, Feb 12, 2024 at 03:17:45PM +0100, Thomas Gleixner wrote:
>> Especially x2apic_shift is horrible and the comments of EBX are visually
>> impaired while with the C++ comments x2apic_shift looks natural and the
>> EBX comments are just open to the right and therefore simpler.
>
> I'd say, put comments *above* the member versus on the side. We don't
> like side comments, if you remember. :-)

In code, no. For struct definitions if they are strictly tabular
formatted, they are actually nice as they are more compact and take less
space than the above member variant.

		// eax
		u32	x2apic_shift	:  5, // Number of bits to shift APIC ID right
					      // for the topology ID at the next level
					: 27; // Reserved
		// ebx
		u32	num_processors	: 16, // Number of processors at current level
					: 16; // Reserved

versus:

		/* eax */
                	/*
                         * Number of bits to shift APIC ID right for the topology ID
	                 * at the next level
                         */
		u32	x2apic_shift	:  5,
                	/* Reserved */
					: 27;

		/* ebx */
                	 /* Number of processors at current level */
		u32	num_processors	: 16,
                	 /* Reserved */
					: 16;

This really makes my eyes bleed.
Re: [patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser
Posted by Borislav Petkov 1 year, 10 months ago
On Mon, Feb 12, 2024 at 04:08:31PM +0100, Thomas Gleixner wrote:
> This really makes my eyes bleed.

From: Borislav Petkov (AMD) <bp@alien8.de>
Date:   Mon Feb 12 16:41:42 2024 +0100

Documentation/maintainer-tip: Add C++ tail comments exception

Document when C++-style, tail comments should be used.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>

diff --git a/Documentation/process/maintainer-tip.rst b/Documentation/process/maintainer-tip.rst
index 799359231b7f..497bb39727c8 100644
--- a/Documentation/process/maintainer-tip.rst
+++ b/Documentation/process/maintainer-tip.rst
@@ -480,7 +480,7 @@ Multi-line comments::
 	 * Larger multi-line comments should be split into paragraphs.
 	 */
 
-No tail comments:
+No tail comments (see below):
 
   Please refrain from using tail comments. Tail comments disturb the
   reading flow in almost all contexts, but especially in code::
@@ -501,6 +501,34 @@ No tail comments:
 	/* This magic initialization needs a comment. Maybe not? */
 	seed = MAGIC_CONSTANT;
 
+  Use C++ style, tail comments when documenting structs in headers to
+  achieve a more compact layout and better readability::
+
+        // eax
+        u32     x2apic_shift    :  5, // Number of bits to shift APIC ID right
+                                      // for the topology ID at the next level
+                                : 27; // Reserved
+        // ebx
+        u32     num_processors  : 16, // Number of processors at current level
+                                : 16; // Reserved
+
+  versus::
+
+	/* eax */
+	        /*
+	         * Number of bits to shift APIC ID right for the topology ID
+	         * at the next level
+	         */
+         u32     x2apic_shift    :  5,
+		 /* Reserved */
+				 : 27;
+
+	/* ebx */
+		/* Number of processors at current level */
+	u32     num_processors  : 16,
+		/* Reserved */
+				: 16;
+
 Comment the important things:
 
   Comments should be added where the operation is not obvious. Documenting



-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [patch v5 06/19] x86/cpu: Provide a sane leaf 0xb/0x1f parser
Posted by Thomas Gleixner 1 year, 10 months ago
On Mon, Feb 12 2024 at 16:43, Borislav Petkov wrote:
> On Mon, Feb 12, 2024 at 04:08:31PM +0100, Thomas Gleixner wrote:
>> This really makes my eyes bleed.
>
> From: Borislav Petkov (AMD) <bp@alien8.de>
> Date:   Mon Feb 12 16:41:42 2024 +0100
>
> Documentation/maintainer-tip: Add C++ tail comments exception
>
> Document when C++-style, tail comments should be used.
>
> Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>

Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
[tip: x86/misc] Documentation/maintainer-tip: Add C++ tail comments exception
Posted by tip-bot2 for Borislav Petkov (AMD) 1 year, 10 months ago
The following commit has been merged into the x86/misc branch of tip:

Commit-ID:     7dd0a21ccb5a937ca9f798afad34de4ba030f8d4
Gitweb:        https://git.kernel.org/tip/7dd0a21ccb5a937ca9f798afad34de4ba030f8d4
Author:        Borislav Petkov (AMD) <bp@alien8.de>
AuthorDate:    Mon, 12 Feb 2024 16:41:42 +01:00
Committer:     Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Tue, 13 Feb 2024 13:19:40 +01:00

Documentation/maintainer-tip: Add C++ tail comments exception

Document when C++-style, tail comments should be used.

Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/20240130193102.GEZblOdor_bzoVhT0f@fat_crate.local
---
 Documentation/process/maintainer-tip.rst | 30 ++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/Documentation/process/maintainer-tip.rst b/Documentation/process/maintainer-tip.rst
index 7993592..497bb39 100644
--- a/Documentation/process/maintainer-tip.rst
+++ b/Documentation/process/maintainer-tip.rst
@@ -480,7 +480,7 @@ Multi-line comments::
 	 * Larger multi-line comments should be split into paragraphs.
 	 */
 
-No tail comments:
+No tail comments (see below):
 
   Please refrain from using tail comments. Tail comments disturb the
   reading flow in almost all contexts, but especially in code::
@@ -501,6 +501,34 @@ No tail comments:
 	/* This magic initialization needs a comment. Maybe not? */
 	seed = MAGIC_CONSTANT;
 
+  Use C++ style, tail comments when documenting structs in headers to
+  achieve a more compact layout and better readability::
+
+        // eax
+        u32     x2apic_shift    :  5, // Number of bits to shift APIC ID right
+                                      // for the topology ID at the next level
+                                : 27; // Reserved
+        // ebx
+        u32     num_processors  : 16, // Number of processors at current level
+                                : 16; // Reserved
+
+  versus::
+
+	/* eax */
+	        /*
+	         * Number of bits to shift APIC ID right for the topology ID
+	         * at the next level
+	         */
+         u32     x2apic_shift    :  5,
+		 /* Reserved */
+				 : 27;
+
+	/* ebx */
+		/* Number of processors at current level */
+	u32     num_processors  : 16,
+		/* Reserved */
+				: 16;
+
 Comment the important things:
 
   Comments should be added where the operation is not obvious. Documenting