[PATCH] x86/topology: Fix Hygon logical_die_id by mapping from amd_node_id

Aichun Shi posted 1 patch 1 month, 2 weeks ago
arch/x86/kernel/cpu/Makefile          |  2 +-
arch/x86/kernel/cpu/topology.h        |  5 ++
arch/x86/kernel/cpu/topology_common.c |  3 +
arch/x86/kernel/cpu/topology_hygon.c  | 86 +++++++++++++++++++++++++++
4 files changed, 95 insertions(+), 1 deletion(-)
create mode 100644 arch/x86/kernel/cpu/topology_hygon.c
[PATCH] x86/topology: Fix Hygon logical_die_id by mapping from amd_node_id
Posted by Aichun Shi 1 month, 2 weeks ago
On Hygon CPUs, for some cases such as EDAC error decoding, contiguous die
IDs are required. cpuinfo_topology.amd_node_id cannot be used directly as
it may be non-contiguous, thus cpuinfo_topology.logical_die_id can be used
in such cases.

When CPUID leaf 0x80000026 is not available on Hygon CPUs, die (or node)
topology is not encoded in APIC ID space. logical_die_id obtained from
topology_get_logical_id(apicid, TOPO_DIE_DOMAIN) therefore does not align
with die (or node) topology and is incorrect.

Fix this by adding a Hygon-only fixup in a new topology_hygon.c: Build a
bijective mapping from (logical_pkg_id, amd_node_id) to a contiguous
logical_die_id. Leave the original APIC-based path unchanged when CPUID
leaf 0x80000026 is available.

Signed-off-by: Aichun Shi <shiaichun@open-hieco.net>
---
 arch/x86/kernel/cpu/Makefile          |  2 +-
 arch/x86/kernel/cpu/topology.h        |  5 ++
 arch/x86/kernel/cpu/topology_common.c |  3 +
 arch/x86/kernel/cpu/topology_hygon.c  | 86 +++++++++++++++++++++++++++
 4 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kernel/cpu/topology_hygon.c

diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 2f8a58ef690e..4e80d0a5d0c9 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -38,10 +38,10 @@ obj-y					+= intel.o tsx.o
 obj-$(CONFIG_PM)			+= intel_epb.o
 endif
 obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
+obj-$(CONFIG_CPU_SUP_HYGON)		+= hygon.o topology_hygon.o
 ifeq ($(CONFIG_AMD_NB)$(CONFIG_SYSFS),yy)
 obj-y					+= amd_cache_disable.o
 endif
-obj-$(CONFIG_CPU_SUP_HYGON)		+= hygon.o
 obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
 obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
 obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h
index 37326297f80c..7203ff0457a4 100644
--- a/arch/x86/kernel/cpu/topology.h
+++ b/arch/x86/kernel/cpu/topology.h
@@ -22,6 +22,11 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
 bool cpu_parse_topology_ext(struct topo_scan *tscan);
 void cpu_parse_topology_amd(struct topo_scan *tscan);
 void cpu_topology_fixup_amd(struct topo_scan *tscan);
+#ifdef CONFIG_CPU_SUP_HYGON
+void cpu_topology_fixup_hygon(struct topo_scan *tscan);
+#else
+static inline void cpu_topology_fixup_hygon(struct topo_scan *tscan) { }
+#endif
 
 static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom)
 {
diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
index 71625795d711..82803744bd0e 100644
--- a/arch/x86/kernel/cpu/topology_common.c
+++ b/arch/x86/kernel/cpu/topology_common.c
@@ -199,6 +199,9 @@ static void topo_set_ids(struct topo_scan *tscan, bool early)
 
 	if (c->x86_vendor == X86_VENDOR_AMD)
 		cpu_topology_fixup_amd(tscan);
+
+	if (c->x86_vendor == X86_VENDOR_HYGON && !early)
+		cpu_topology_fixup_hygon(tscan);
 }
 
 void cpu_parse_topology(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/cpu/topology_hygon.c b/arch/x86/kernel/cpu/topology_hygon.c
new file mode 100644
index 000000000000..645c30524da6
--- /dev/null
+++ b/arch/x86/kernel/cpu/topology_hygon.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpu.h>
+#include <linux/printk.h>
+#include <linux/spinlock.h>
+
+#include <asm/processor.h>
+
+#include "topology.h"
+
+/* (logical_pkg_id, amd_node_id) -> logical_die_id mapping. */
+struct hygon_die_map_entry {
+	u32 logical_pkg_id;
+	u32 amd_node_id;
+	u32 logical_die_id;
+};
+
+static DEFINE_SPINLOCK(hygon_die_map_lock);
+static struct hygon_die_map_entry hygon_die_map[NR_CPUS];
+static u32 hygon_next_logical_die_id;
+static unsigned int hygon_die_map_nr;
+
+/*
+ * Build a bijective mapping from (logical_pkg_id, amd_node_id) to a contiguous
+ * logical_die_id for Hygon CPUs.
+ *
+ * Returns logical_die_id (>= 0) on success, or -EINVAL on error.
+ */
+static int hygon_get_logical_die_id(struct topo_scan *tscan)
+{
+	struct cpuinfo_x86 *c = tscan->c;
+	unsigned long flags;
+	int logical_die_id;
+	unsigned int i;
+	int logical_pkg_id = c->topo.logical_pkg_id;
+
+	if (c->x86_vendor != X86_VENDOR_HYGON)
+		return -EINVAL;
+
+	if (logical_pkg_id < 0)
+		return -EINVAL;
+
+	spin_lock_irqsave(&hygon_die_map_lock, flags);
+
+	/* Look up existing (logical_pkg_id, amd_node_id) -> logical_die_id. */
+	for (i = 0; i < hygon_die_map_nr; i++) {
+		if (hygon_die_map[i].logical_pkg_id == logical_pkg_id &&
+		    hygon_die_map[i].amd_node_id == tscan->amd_node_id) {
+			logical_die_id = hygon_die_map[i].logical_die_id;
+			goto out_unlock;
+		}
+	}
+
+	if (hygon_die_map_nr >= ARRAY_SIZE(hygon_die_map)) {
+		pr_warn_once("CPU topo: Hygon die map exhausted\n");
+		logical_die_id = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* Allocate new contiguous logical_die_id. */
+	logical_die_id = hygon_next_logical_die_id++;
+	hygon_die_map[hygon_die_map_nr++] = (struct hygon_die_map_entry) {
+		.logical_pkg_id = logical_pkg_id,
+		.amd_node_id = tscan->amd_node_id,
+		.logical_die_id = logical_die_id,
+	};
+
+out_unlock:
+	spin_unlock_irqrestore(&hygon_die_map_lock, flags);
+	return logical_die_id;
+}
+
+void cpu_topology_fixup_hygon(struct topo_scan *tscan)
+{
+	/* Skip the fixup if CPUID leaf 0x80000026 is available. */
+	if (tscan->c->extended_cpuid_level >= 0x80000026)
+		return;
+
+	/* When CPUID leaf 0x80000026 is not available, die (node) topology is
+	 * not encoded in APIC ID space. logical_die_id from the DIE domain is
+	 * incorrect, so fix it up.
+	 */
+	int logical_die_id = hygon_get_logical_die_id(tscan);
+
+	if (logical_die_id >= 0)
+		tscan->c->topo.logical_die_id = logical_die_id;
+}
-- 
2.47.3
Re: [PATCH] x86/topology: Fix Hygon logical_die_id by mapping from amd_node_id
Posted by Yazen Ghannam 3 weeks, 6 days ago
On Sun, Mar 01, 2026 at 10:11:56PM +0800, Aichun Shi wrote:
> On Hygon CPUs, for some cases such as EDAC error decoding, contiguous die
> IDs are required. cpuinfo_topology.amd_node_id cannot be used directly as
> it may be non-contiguous, thus cpuinfo_topology.logical_die_id can be used
> in such cases.
> 
> When CPUID leaf 0x80000026 is not available on Hygon CPUs, die (or node)
> topology is not encoded in APIC ID space. logical_die_id obtained from
> topology_get_logical_id(apicid, TOPO_DIE_DOMAIN) therefore does not align
> with die (or node) topology and is incorrect.
> 
> Fix this by adding a Hygon-only fixup in a new topology_hygon.c: Build a
> bijective mapping from (logical_pkg_id, amd_node_id) to a contiguous
> logical_die_id. Leave the original APIC-based path unchanged when CPUID
> leaf 0x80000026 is available.
> 
> Signed-off-by: Aichun Shi <shiaichun@open-hieco.net>
> ---
>  arch/x86/kernel/cpu/Makefile          |  2 +-
>  arch/x86/kernel/cpu/topology.h        |  5 ++
>  arch/x86/kernel/cpu/topology_common.c |  3 +
>  arch/x86/kernel/cpu/topology_hygon.c  | 86 +++++++++++++++++++++++++++
>  4 files changed, 95 insertions(+), 1 deletion(-)
>  create mode 100644 arch/x86/kernel/cpu/topology_hygon.c
> 
> diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
> index 2f8a58ef690e..4e80d0a5d0c9 100644
> --- a/arch/x86/kernel/cpu/Makefile
> +++ b/arch/x86/kernel/cpu/Makefile
> @@ -38,10 +38,10 @@ obj-y					+= intel.o tsx.o
>  obj-$(CONFIG_PM)			+= intel_epb.o
>  endif
>  obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
> +obj-$(CONFIG_CPU_SUP_HYGON)		+= hygon.o topology_hygon.o
>  ifeq ($(CONFIG_AMD_NB)$(CONFIG_SYSFS),yy)
>  obj-y					+= amd_cache_disable.o
>  endif
> -obj-$(CONFIG_CPU_SUP_HYGON)		+= hygon.o
>  obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
>  obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
>  obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
> diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h
> index 37326297f80c..7203ff0457a4 100644
> --- a/arch/x86/kernel/cpu/topology.h
> +++ b/arch/x86/kernel/cpu/topology.h
> @@ -22,6 +22,11 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
>  bool cpu_parse_topology_ext(struct topo_scan *tscan);
>  void cpu_parse_topology_amd(struct topo_scan *tscan);
>  void cpu_topology_fixup_amd(struct topo_scan *tscan);
> +#ifdef CONFIG_CPU_SUP_HYGON
> +void cpu_topology_fixup_hygon(struct topo_scan *tscan);
> +#else
> +static inline void cpu_topology_fixup_hygon(struct topo_scan *tscan) { }
> +#endif
>  
>  static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom)
>  {
> diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
> index 71625795d711..82803744bd0e 100644
> --- a/arch/x86/kernel/cpu/topology_common.c
> +++ b/arch/x86/kernel/cpu/topology_common.c
> @@ -199,6 +199,9 @@ static void topo_set_ids(struct topo_scan *tscan, bool early)
>  
>  	if (c->x86_vendor == X86_VENDOR_AMD)
>  		cpu_topology_fixup_amd(tscan);
> +
> +	if (c->x86_vendor == X86_VENDOR_HYGON && !early)
> +		cpu_topology_fixup_hygon(tscan);
>  }
>  
>  void cpu_parse_topology(struct cpuinfo_x86 *c)
> diff --git a/arch/x86/kernel/cpu/topology_hygon.c b/arch/x86/kernel/cpu/topology_hygon.c
> new file mode 100644
> index 000000000000..645c30524da6
> --- /dev/null
> +++ b/arch/x86/kernel/cpu/topology_hygon.c
> @@ -0,0 +1,86 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/cpu.h>
> +#include <linux/printk.h>
> +#include <linux/spinlock.h>
> +
> +#include <asm/processor.h>

checkpatch --strict has this:
CHECK: Consider using #include <linux/processor.h> instead of <asm/processor.h>

> +
> +#include "topology.h"
> +
> +/* (logical_pkg_id, amd_node_id) -> logical_die_id mapping. */
> +struct hygon_die_map_entry {
> +	u32 logical_pkg_id;
> +	u32 amd_node_id;
> +	u32 logical_die_id;
> +};
> +
> +static DEFINE_SPINLOCK(hygon_die_map_lock);
> +static struct hygon_die_map_entry hygon_die_map[NR_CPUS];

This is a lot of structures to statically allocate. NR_CPUS can be set
up to 8192.

If you must allocate statically, is there a more conservative value?

For example, the max number of "AMD Nodes" per system is '8'. If the
goal is to map "amd_node_id" to another value, then you will still have
a limit of '8' for the other value.

Also, does amd_num_nodes() still work in this case? I understand that
the "ID" values are not contiguous, but is there still a discoverable
"total number"?

> +static u32 hygon_next_logical_die_id;
> +static unsigned int hygon_die_map_nr;
> +
> +/*
> + * Build a bijective mapping from (logical_pkg_id, amd_node_id) to a contiguous
> + * logical_die_id for Hygon CPUs.
> + *
> + * Returns logical_die_id (>= 0) on success, or -EINVAL on error.

I don't think you need a return value. You can update tscan on success
or leave it on error.

> + */
> +static int hygon_get_logical_die_id(struct topo_scan *tscan)
> +{
> +	struct cpuinfo_x86 *c = tscan->c;
> +	unsigned long flags;
> +	int logical_die_id;
> +	unsigned int i;
> +	int logical_pkg_id = c->topo.logical_pkg_id;
> +
> +	if (c->x86_vendor != X86_VENDOR_HYGON)
> +		return -EINVAL;
> +
> +	if (logical_pkg_id < 0)
> +		return -EINVAL;

Is this possible?

It is 'u32' in 'struct cpuinfo_topology'.

> +
> +	spin_lock_irqsave(&hygon_die_map_lock, flags);
> +
> +	/* Look up existing (logical_pkg_id, amd_node_id) -> logical_die_id. */
> +	for (i = 0; i < hygon_die_map_nr; i++) {
> +		if (hygon_die_map[i].logical_pkg_id == logical_pkg_id &&
> +		    hygon_die_map[i].amd_node_id == tscan->amd_node_id) {
> +			logical_die_id = hygon_die_map[i].logical_die_id;

Just update the 'tscan' data here.

> +			goto out_unlock;
> +		}
> +	}
> +
> +	if (hygon_die_map_nr >= ARRAY_SIZE(hygon_die_map)) {
> +		pr_warn_once("CPU topo: Hygon die map exhausted\n");
> +		logical_die_id = -EINVAL;
> +		goto out_unlock;
> +	}
> +
> +	/* Allocate new contiguous logical_die_id. */
> +	logical_die_id = hygon_next_logical_die_id++;
> +	hygon_die_map[hygon_die_map_nr++] = (struct hygon_die_map_entry) {
> +		.logical_pkg_id = logical_pkg_id,
> +		.amd_node_id = tscan->amd_node_id,
> +		.logical_die_id = logical_die_id,
> +	};
> +
> +out_unlock:
> +	spin_unlock_irqrestore(&hygon_die_map_lock, flags);
> +	return logical_die_id;
> +}
> +
> +void cpu_topology_fixup_hygon(struct topo_scan *tscan)
> +{
> +	/* Skip the fixup if CPUID leaf 0x80000026 is available. */
> +	if (tscan->c->extended_cpuid_level >= 0x80000026)
> +		return;
> +
> +	/* When CPUID leaf 0x80000026 is not available, die (node) topology is

Multi-line comments should start on the next line.

    /*
     * Example
     * comments
     */

> +	 * not encoded in APIC ID space. logical_die_id from the DIE domain is
> +	 * incorrect, so fix it up.
> +	 */
> +	int logical_die_id = hygon_get_logical_die_id(tscan);
> +
> +	if (logical_die_id >= 0)
> +		tscan->c->topo.logical_die_id = logical_die_id;
>

This function could be simpler:

  void cpu_topology_fixup_hygon(struct topo_scan *tscan)
  {
    if (!Hygon)
      return;

    if (CPUID >= 0x80000026)
      return;

    hygon_build_logical_die_id_map(tscan);
  }

Thanks,
Yazen
Re: [PATCH] x86/topology: Fix Hygon logical_die_id by mapping from amd_node_id
Posted by Aichun Shi 2 weeks, 6 days ago
On Thu, 19 Mar 2026 11:53:02 -0400, Yazen Ghannam wrote:
> On Sun, Mar 01, 2026 at 10:11:56PM +0800, Aichun Shi wrote:
> > On Hygon CPUs, for some cases such as EDAC error decoding, contiguous die
> > IDs are required. cpuinfo_topology.amd_node_id cannot be used directly as
> > it may be non-contiguous, thus cpuinfo_topology.logical_die_id can be used
> > in such cases.
> > 
> > When CPUID leaf 0x80000026 is not available on Hygon CPUs, die (or node)
> > topology is not encoded in APIC ID space. logical_die_id obtained from
> > topology_get_logical_id(apicid, TOPO_DIE_DOMAIN) therefore does not align
> > with die (or node) topology and is incorrect.
> > 
> > Fix this by adding a Hygon-only fixup in a new topology_hygon.c: Build a
> > bijective mapping from (logical_pkg_id, amd_node_id) to a contiguous
> > logical_die_id. Leave the original APIC-based path unchanged when CPUID
> > leaf 0x80000026 is available.
> > 
> > Signed-off-by: Aichun Shi <shiaichun@open-hieco.net>
> > ---
> >  arch/x86/kernel/cpu/Makefile          |  2 +-
> >  arch/x86/kernel/cpu/topology.h        |  5 ++
> >  arch/x86/kernel/cpu/topology_common.c |  3 +
> >  arch/x86/kernel/cpu/topology_hygon.c  | 86 +++++++++++++++++++++++++++
> >  4 files changed, 95 insertions(+), 1 deletion(-)
> >  create mode 100644 arch/x86/kernel/cpu/topology_hygon.c
> > 
> > diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
> > index 2f8a58ef690e..4e80d0a5d0c9 100644
> > --- a/arch/x86/kernel/cpu/Makefile
> > +++ b/arch/x86/kernel/cpu/Makefile
> > @@ -38,10 +38,10 @@ obj-y					+= intel.o tsx.o
> >  obj-$(CONFIG_PM)			+= intel_epb.o
> >  endif
> >  obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
> > +obj-$(CONFIG_CPU_SUP_HYGON)		+= hygon.o topology_hygon.o
> >  ifeq ($(CONFIG_AMD_NB)$(CONFIG_SYSFS),yy)
> >  obj-y					+= amd_cache_disable.o
> >  endif
> > -obj-$(CONFIG_CPU_SUP_HYGON)		+= hygon.o
> >  obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
> >  obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
> >  obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
> > diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h
> > index 37326297f80c..7203ff0457a4 100644
> > --- a/arch/x86/kernel/cpu/topology.h
> > +++ b/arch/x86/kernel/cpu/topology.h
> > @@ -22,6 +22,11 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
> >  bool cpu_parse_topology_ext(struct topo_scan *tscan);
> >  void cpu_parse_topology_amd(struct topo_scan *tscan);
> >  void cpu_topology_fixup_amd(struct topo_scan *tscan);
> > +#ifdef CONFIG_CPU_SUP_HYGON
> > +void cpu_topology_fixup_hygon(struct topo_scan *tscan);
> > +#else
> > +static inline void cpu_topology_fixup_hygon(struct topo_scan *tscan) { }
> > +#endif
> >  
> >  static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom)
> >  {
> > diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
> > index 71625795d711..82803744bd0e 100644
> > --- a/arch/x86/kernel/cpu/topology_common.c
> > +++ b/arch/x86/kernel/cpu/topology_common.c
> > @@ -199,6 +199,9 @@ static void topo_set_ids(struct topo_scan *tscan, bool early)
> >  
> >  	if (c->x86_vendor == X86_VENDOR_AMD)
> >  		cpu_topology_fixup_amd(tscan);
> > +
> > +	if (c->x86_vendor == X86_VENDOR_HYGON && !early)
> > +		cpu_topology_fixup_hygon(tscan);
> >  }
> >  
> >  void cpu_parse_topology(struct cpuinfo_x86 *c)
> > diff --git a/arch/x86/kernel/cpu/topology_hygon.c b/arch/x86/kernel/cpu/topology_hygon.c
> > new file mode 100644
> > index 000000000000..645c30524da6
> > --- /dev/null
> > +++ b/arch/x86/kernel/cpu/topology_hygon.c
> > @@ -0,0 +1,86 @@
> > +// SPDX-License-Identifier: GPL-2.0
> > +#include <linux/cpu.h>
> > +#include <linux/printk.h>
> > +#include <linux/spinlock.h>
> > +
> > +#include <asm/processor.h>
> 
> checkpatch --strict has this:
> CHECK: Consider using #include <linux/processor.h> instead of <asm/processor.h>

OK. I will change it, and check with "checkpatch --strict".

> > +
> > +#include "topology.h"
> > +
> > +/* (logical_pkg_id, amd_node_id) -> logical_die_id mapping. */
> > +struct hygon_die_map_entry {
> > +	u32 logical_pkg_id;
> > +	u32 amd_node_id;
> > +	u32 logical_die_id;
> > +};
> > +
> > +static DEFINE_SPINLOCK(hygon_die_map_lock);
> > +static struct hygon_die_map_entry hygon_die_map[NR_CPUS];
> 
> This is a lot of structures to statically allocate. NR_CPUS can be set
> up to 8192.
> 
> If you must allocate statically, is there a more conservative value?
> 
> For example, the max number of "AMD Nodes" per system is '8'. If the
> goal is to map "amd_node_id" to another value, then you will still have
> a limit of '8' for the other value.
> 
> Also, does amd_num_nodes() still work in this case? I understand that
> the "ID" values are not contiguous, but is there still a discoverable
> "total number"?

Yes, amd_num_nodes() also works in this case for Hygon system. I will use
it as the max number, and allocate the structures dynamically.

> > +static u32 hygon_next_logical_die_id;
> > +static unsigned int hygon_die_map_nr;
> > +
> > +/*
> > + * Build a bijective mapping from (logical_pkg_id, amd_node_id) to a contiguous
> > + * logical_die_id for Hygon CPUs.
> > + *
> > + * Returns logical_die_id (>= 0) on success, or -EINVAL on error.
> 
> I don't think you need a return value. You can update tscan on success
> or leave it on error.

You are right, this logic is more clear. I will update tscan in the function.

> > + */
> > +static int hygon_get_logical_die_id(struct topo_scan *tscan)
> > +{
> > +	struct cpuinfo_x86 *c = tscan->c;
> > +	unsigned long flags;
> > +	int logical_die_id;
> > +	unsigned int i;
> > +	int logical_pkg_id = c->topo.logical_pkg_id;
> > +
> > +	if (c->x86_vendor != X86_VENDOR_HYGON)
> > +		return -EINVAL;
> > +
> > +	if (logical_pkg_id < 0)
> > +		return -EINVAL;
> 
> Is this possible?
> 
> It is 'u32' in 'struct cpuinfo_topology'.

You are right, this check is unnecessary, I will remove it.

> > +
> > +	spin_lock_irqsave(&hygon_die_map_lock, flags);
> > +
> > +	/* Look up existing (logical_pkg_id, amd_node_id) -> logical_die_id. */
> > +	for (i = 0; i < hygon_die_map_nr; i++) {
> > +		if (hygon_die_map[i].logical_pkg_id == logical_pkg_id &&
> > +		    hygon_die_map[i].amd_node_id == tscan->amd_node_id) {
> > +			logical_die_id = hygon_die_map[i].logical_die_id;
> 
> Just update the 'tscan' data here.

OK.

> > +			goto out_unlock;
> > +		}
> > +	}
> > +
> > +	if (hygon_die_map_nr >= ARRAY_SIZE(hygon_die_map)) {
> > +		pr_warn_once("CPU topo: Hygon die map exhausted\n");
> > +		logical_die_id = -EINVAL;
> > +		goto out_unlock;
> > +	}
> > +
> > +	/* Allocate new contiguous logical_die_id. */
> > +	logical_die_id = hygon_next_logical_die_id++;
> > +	hygon_die_map[hygon_die_map_nr++] = (struct hygon_die_map_entry) {
> > +		.logical_pkg_id = logical_pkg_id,
> > +		.amd_node_id = tscan->amd_node_id,
> > +		.logical_die_id = logical_die_id,
> > +	};
> > +
> > +out_unlock:
> > +	spin_unlock_irqrestore(&hygon_die_map_lock, flags);
> > +	return logical_die_id;
> > +}
> > +
> > +void cpu_topology_fixup_hygon(struct topo_scan *tscan)
> > +{
> > +	/* Skip the fixup if CPUID leaf 0x80000026 is available. */
> > +	if (tscan->c->extended_cpuid_level >= 0x80000026)
> > +		return;
> > +
> > +	/* When CPUID leaf 0x80000026 is not available, die (node) topology is
> 
> Multi-line comments should start on the next line.
> 
>     /*
>      * Example
>      * comments
>      */

OK. I will update it.
 
> > +	 * not encoded in APIC ID space. logical_die_id from the DIE domain is
> > +	 * incorrect, so fix it up.
> > +	 */
> > +	int logical_die_id = hygon_get_logical_die_id(tscan);
> > +
> > +	if (logical_die_id >= 0)
> > +		tscan->c->topo.logical_die_id = logical_die_id;
> >
> 
> This function could be simpler:
> 
>   void cpu_topology_fixup_hygon(struct topo_scan *tscan)
>   {
>     if (!Hygon)
>       return;
> 
>     if (CPUID >= 0x80000026)
>       return;
> 
>     hygon_build_logical_die_id_map(tscan);
>   }

Good advice, I will follow it.
 
> Thanks,
> Yazen
> 

Thanks for your review and detailed comments!
I will send patch v2 soon.

Aichun Shi
Re: [PATCH] x86/topology: Fix Hygon logical_die_id by mapping from amd_node_id
Posted by Aichun Shi 1 week ago
On 3/26/2026 7:41 PM, Aichun Shi wrote:
> On Thu, 19 Mar 2026 11:53:02 -0400, Yazen Ghannam wrote:
>> On Sun, Mar 01, 2026 at 10:11:56PM +0800, Aichun Shi wrote:
>>> On Hygon CPUs, for some cases such as EDAC error decoding, contiguous die
>>> IDs are required. cpuinfo_topology.amd_node_id cannot be used directly as
>>> it may be non-contiguous, thus cpuinfo_topology.logical_die_id can be used
>>> in such cases.
>>>
>>> When CPUID leaf 0x80000026 is not available on Hygon CPUs, die (or node)
>>> topology is not encoded in APIC ID space. logical_die_id obtained from
>>> topology_get_logical_id(apicid, TOPO_DIE_DOMAIN) therefore does not align
>>> with die (or node) topology and is incorrect.
>>>
>>> Fix this by adding a Hygon-only fixup in a new topology_hygon.c: Build a
>>> bijective mapping from (logical_pkg_id, amd_node_id) to a contiguous
>>> logical_die_id. Leave the original APIC-based path unchanged when CPUID
>>> leaf 0x80000026 is available.
>>>
>>> Signed-off-by: Aichun Shi <shiaichun@open-hieco.net>
>>> ---
>>>   arch/x86/kernel/cpu/Makefile          |  2 +-
>>>   arch/x86/kernel/cpu/topology.h        |  5 ++
>>>   arch/x86/kernel/cpu/topology_common.c |  3 +
>>>   arch/x86/kernel/cpu/topology_hygon.c  | 86 +++++++++++++++++++++++++++
>>>   4 files changed, 95 insertions(+), 1 deletion(-)
>>>   create mode 100644 arch/x86/kernel/cpu/topology_hygon.c
>>>
>>> diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
>>> index 2f8a58ef690e..4e80d0a5d0c9 100644
>>> --- a/arch/x86/kernel/cpu/Makefile
>>> +++ b/arch/x86/kernel/cpu/Makefile
>>> @@ -38,10 +38,10 @@ obj-y					+= intel.o tsx.o
>>>   obj-$(CONFIG_PM)			+= intel_epb.o
>>>   endif
>>>   obj-$(CONFIG_CPU_SUP_AMD)		+= amd.o
>>> +obj-$(CONFIG_CPU_SUP_HYGON)		+= hygon.o topology_hygon.o
>>>   ifeq ($(CONFIG_AMD_NB)$(CONFIG_SYSFS),yy)
>>>   obj-y					+= amd_cache_disable.o
>>>   endif
>>> -obj-$(CONFIG_CPU_SUP_HYGON)		+= hygon.o
>>>   obj-$(CONFIG_CPU_SUP_CYRIX_32)		+= cyrix.o
>>>   obj-$(CONFIG_CPU_SUP_CENTAUR)		+= centaur.o
>>>   obj-$(CONFIG_CPU_SUP_TRANSMETA_32)	+= transmeta.o
>>> diff --git a/arch/x86/kernel/cpu/topology.h b/arch/x86/kernel/cpu/topology.h
>>> index 37326297f80c..7203ff0457a4 100644
>>> --- a/arch/x86/kernel/cpu/topology.h
>>> +++ b/arch/x86/kernel/cpu/topology.h
>>> @@ -22,6 +22,11 @@ void topology_set_dom(struct topo_scan *tscan, enum x86_topology_domains dom,
>>>   bool cpu_parse_topology_ext(struct topo_scan *tscan);
>>>   void cpu_parse_topology_amd(struct topo_scan *tscan);
>>>   void cpu_topology_fixup_amd(struct topo_scan *tscan);
>>> +#ifdef CONFIG_CPU_SUP_HYGON
>>> +void cpu_topology_fixup_hygon(struct topo_scan *tscan);
>>> +#else
>>> +static inline void cpu_topology_fixup_hygon(struct topo_scan *tscan) { }
>>> +#endif
>>>   
>>>   static inline u32 topo_shift_apicid(u32 apicid, enum x86_topology_domains dom)
>>>   {
>>> diff --git a/arch/x86/kernel/cpu/topology_common.c b/arch/x86/kernel/cpu/topology_common.c
>>> index 71625795d711..82803744bd0e 100644
>>> --- a/arch/x86/kernel/cpu/topology_common.c
>>> +++ b/arch/x86/kernel/cpu/topology_common.c
>>> @@ -199,6 +199,9 @@ static void topo_set_ids(struct topo_scan *tscan, bool early)
>>>   
>>>   	if (c->x86_vendor == X86_VENDOR_AMD)
>>>   		cpu_topology_fixup_amd(tscan);
>>> +
>>> +	if (c->x86_vendor == X86_VENDOR_HYGON && !early)
>>> +		cpu_topology_fixup_hygon(tscan);
>>>   }
>>>   
>>>   void cpu_parse_topology(struct cpuinfo_x86 *c)
>>> diff --git a/arch/x86/kernel/cpu/topology_hygon.c b/arch/x86/kernel/cpu/topology_hygon.c
>>> new file mode 100644
>>> index 000000000000..645c30524da6
>>> --- /dev/null
>>> +++ b/arch/x86/kernel/cpu/topology_hygon.c
>>> @@ -0,0 +1,86 @@
>>> +// SPDX-License-Identifier: GPL-2.0
>>> +#include <linux/cpu.h>
>>> +#include <linux/printk.h>
>>> +#include <linux/spinlock.h>
>>> +
>>> +#include <asm/processor.h>
>>
>> checkpatch --strict has this:
>> CHECK: Consider using #include <linux/processor.h> instead of <asm/processor.h>
> 
> OK. I will change it, and check with "checkpatch --strict".
> 
>>> +
>>> +#include "topology.h"
>>> +
>>> +/* (logical_pkg_id, amd_node_id) -> logical_die_id mapping. */
>>> +struct hygon_die_map_entry {
>>> +	u32 logical_pkg_id;
>>> +	u32 amd_node_id;
>>> +	u32 logical_die_id;
>>> +};
>>> +
>>> +static DEFINE_SPINLOCK(hygon_die_map_lock);
>>> +static struct hygon_die_map_entry hygon_die_map[NR_CPUS];
>>
>> This is a lot of structures to statically allocate. NR_CPUS can be set
>> up to 8192.
>>
>> If you must allocate statically, is there a more conservative value?
>>
>> For example, the max number of "AMD Nodes" per system is '8'. If the
>> goal is to map "amd_node_id" to another value, then you will still have
>> a limit of '8' for the other value.
>>
>> Also, does amd_num_nodes() still work in this case? I understand that
>> the "ID" values are not contiguous, but is there still a discoverable
>> "total number"?
> 
> Yes, amd_num_nodes() also works in this case for Hygon system. I will use
> it as the max number, and allocate the structures dynamically.
> 
>>> +static u32 hygon_next_logical_die_id;
>>> +static unsigned int hygon_die_map_nr;
>>> +
>>> +/*
>>> + * Build a bijective mapping from (logical_pkg_id, amd_node_id) to a contiguous
>>> + * logical_die_id for Hygon CPUs.
>>> + *
>>> + * Returns logical_die_id (>= 0) on success, or -EINVAL on error.
>>
>> I don't think you need a return value. You can update tscan on success
>> or leave it on error.
> 
> You are right, this logic is more clear. I will update tscan in the function.
> 
>>> + */
>>> +static int hygon_get_logical_die_id(struct topo_scan *tscan)
>>> +{
>>> +	struct cpuinfo_x86 *c = tscan->c;
>>> +	unsigned long flags;
>>> +	int logical_die_id;
>>> +	unsigned int i;
>>> +	int logical_pkg_id = c->topo.logical_pkg_id;
>>> +
>>> +	if (c->x86_vendor != X86_VENDOR_HYGON)
>>> +		return -EINVAL;
>>> +
>>> +	if (logical_pkg_id < 0)
>>> +		return -EINVAL;
>>
>> Is this possible?
>>
>> It is 'u32' in 'struct cpuinfo_topology'.
> 
> You are right, this check is unnecessary, I will remove it.
> 
>>> +
>>> +	spin_lock_irqsave(&hygon_die_map_lock, flags);
>>> +
>>> +	/* Look up existing (logical_pkg_id, amd_node_id) -> logical_die_id. */
>>> +	for (i = 0; i < hygon_die_map_nr; i++) {
>>> +		if (hygon_die_map[i].logical_pkg_id == logical_pkg_id &&
>>> +		    hygon_die_map[i].amd_node_id == tscan->amd_node_id) {
>>> +			logical_die_id = hygon_die_map[i].logical_die_id;
>>
>> Just update the 'tscan' data here.
> 
> OK.
> 
>>> +			goto out_unlock;
>>> +		}
>>> +	}
>>> +
>>> +	if (hygon_die_map_nr >= ARRAY_SIZE(hygon_die_map)) {
>>> +		pr_warn_once("CPU topo: Hygon die map exhausted\n");
>>> +		logical_die_id = -EINVAL;
>>> +		goto out_unlock;
>>> +	}
>>> +
>>> +	/* Allocate new contiguous logical_die_id. */
>>> +	logical_die_id = hygon_next_logical_die_id++;
>>> +	hygon_die_map[hygon_die_map_nr++] = (struct hygon_die_map_entry) {
>>> +		.logical_pkg_id = logical_pkg_id,
>>> +		.amd_node_id = tscan->amd_node_id,
>>> +		.logical_die_id = logical_die_id,
>>> +	};
>>> +
>>> +out_unlock:
>>> +	spin_unlock_irqrestore(&hygon_die_map_lock, flags);
>>> +	return logical_die_id;
>>> +}
>>> +
>>> +void cpu_topology_fixup_hygon(struct topo_scan *tscan)
>>> +{
>>> +	/* Skip the fixup if CPUID leaf 0x80000026 is available. */
>>> +	if (tscan->c->extended_cpuid_level >= 0x80000026)
>>> +		return;
>>> +
>>> +	/* When CPUID leaf 0x80000026 is not available, die (node) topology is
>>
>> Multi-line comments should start on the next line.
>>
>>      /*
>>       * Example
>>       * comments
>>       */
> 
> OK. I will update it.
>   
>>> +	 * not encoded in APIC ID space. logical_die_id from the DIE domain is
>>> +	 * incorrect, so fix it up.
>>> +	 */
>>> +	int logical_die_id = hygon_get_logical_die_id(tscan);
>>> +
>>> +	if (logical_die_id >= 0)
>>> +		tscan->c->topo.logical_die_id = logical_die_id;
>>>
>>
>> This function could be simpler:
>>
>>    void cpu_topology_fixup_hygon(struct topo_scan *tscan)
>>    {
>>      if (!Hygon)
>>        return;
>>
>>      if (CPUID >= 0x80000026)
>>        return;
>>
>>      hygon_build_logical_die_id_map(tscan);
>>    }
> 
> Good advice, I will follow it.
>   
>> Thanks,
>> Yazen
>>
> 
> Thanks for your review and detailed comments!
> I will send patch v2 soon.
> 

Hi Yazen,
   For the Hygon die ID non-contiguous issue, I will drop this 
workaround patch(sorry for it) as we have a more reliable solution 
provided in Lin Wang's "Hygon Node" RFC patch series [1] by exporting a 
proposed API hygon_cpu_to_logical_node().

   Lin's solution not only solves the non-contiguous issue, but also 
establishes a reliable mapping between two independent hardware domains: 
the Data Fabric(DF) register world (socket_id and DFID from PCI config 
space) and the CPU topology world (phys_node_id from CPUID). So that 
EDAC, MCE, and ATL users can translate a CPU reference to the correct DF 
node, or route a CPU-reported MCE to its DF node correctly.

   My RFC patch series "EDAC/RAS: Hygon Family 0x18 UMC ECC address 
translation" [2] will also use Lin's solution in the next version for 
hygon_get_die_id() in the patch [06/10] [3].

   I would really appreciate it if you could take a look at the RFC 
patch series [2] when you have time.

[1] 
https://lore.kernel.org/lkml/20260402111515.1155505-1-wanglin@open-hieco.net/
[2] 
https://lore.kernel.org/lkml/cover.1775213147.git.shiaichun@open-hieco.net/
[3] 
https://lore.kernel.org/lkml/c9fb6003ed90496dd32666b452eb62de45b731e1.1775213147.git.shiaichun@open-hieco.net/

Thanks!

Aichun