[PATCH] powerpc/topology: Support coregroup in PowerNV

Srikar Dronamraju posted 1 patch 16 hours ago
arch/powerpc/include/asm/topology.h | 15 +++--
arch/powerpc/mm/numa.c              | 87 ++++++++++++++++++++++-------
2 files changed, 73 insertions(+), 29 deletions(-)
[PATCH] powerpc/topology: Support coregroup in PowerNV
Posted by Srikar Dronamraju 16 hours ago
Coregroup support was only available on PowerPC on PowerVM LPARs.
However if firmware were to expose coregroup-id to the kernel, then
coregroup can even be supported on PowerNV too.

PowerPC Linux kernel will detect support for coregroup by looking at the
primary_domain_index. Till now on PowerNV systems, primary_domain_index
has been the penultimate domain in cpunode ibm,associativity device-tree
property. This would be taken as hint that coregroup support is not
available in the firmware.

If on PowerNV systems, primary_domain_index is not the penultimate
domain in cpunode ibm,associativity device-tree property, then it would
be taken as a hint that coregroup support is available in the firmware.

This logic makes it compatible with PowerVM Systems, where
primary_domain_index is not the penultimate domain in cpunode
ibm,associativity device-tree property.

$ lscpu
Architecture:        ppc64le
Byte Order:          Little Endian
CPU(s):              480
On-line CPU(s) list: 0-479
Thread(s) per core:  8
Core(s) per socket:  15
Socket(s):           4
NUMA node(s):        4
Model:               2.0 (pvr 0080 0200)
Model name:          POWER10, altivec supported
CPU max MHz:         3249.0000
CPU min MHz:         3249.0000
L1d cache:           32K
L1i cache:           48K
L2 cache:            1024K
L3 cache:            4096K
NUMA node0 CPU(s):   0-119
NUMA node1 CPU(s):   120-239
NUMA node2 CPU(s):   240-359
NUMA node3 CPU(s):   360-479

with-out patched firmware and/or Linux-kernel
---------------------------------------------
$ grep -h -r . /sys/devices/system/cpu/*/topology/die_id |sort | uniq -c | sort -n -r
    120 27
    120 18
    120 9
    120 0

$ grep -h -r . /sys/devices/system/cpu/*/topology/die_cpus_list |sort | uniq -c | sort -n -r
    120 360-479
    120 240-359
    120 120-239
    120 0-119

with patched firmware and Linux-kernel
--------------------------------------
grep -h -r . /sys/devices/system/cpu/*/topology/die_id |sort | uniq -c | sort -n -r
     64 6
     64 4
     64 2
     64 0
     56 7
     56 5
     56 3
     56 1
grep -h -r . /sys/devices/system/cpu/*/topology/die_cpus_list |sort | uniq -c | sort -n -r
     64 360-375,392-407,424-439,456-471
     64 240-255,272-287,296-311,328-343
     64 120-135,152-167,184-199,208-223
     64 0-15,32-47,64-79,88-103
     56 376-391,408-423,440-455,472-479
     56 256-271,288-295,312-327,344-359
     56 16-31,48-63,80-87,104-119
     56 136-151,168-183,200-207,224-239

Observation:
Without the patched kernel and/or skiboot, die-id and numa were same.
With patched kernel and/or skiboot, we see 2 die-id per node.
Signed-off-by: Srikar Dronamraju <srikar@linux.ibm.com>
---
 arch/powerpc/include/asm/topology.h | 15 +++--
 arch/powerpc/mm/numa.c              | 87 ++++++++++++++++++++++-------
 2 files changed, 73 insertions(+), 29 deletions(-)

diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 66ed5fe1b718..568e6bc55726 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -71,6 +71,7 @@ extern void map_cpu_to_node(int cpu, int node);
 extern void unmap_cpu_from_node(unsigned long cpu);
 #endif /* CONFIG_HOTPLUG_CPU */
 
+extern int cpu_to_coregroup_id(int cpu);
 #else
 
 static inline int early_cpu_to_node(int cpu) { return 0; }
@@ -107,14 +108,6 @@ static inline void map_cpu_to_node(int cpu, int node) {}
 static inline void unmap_cpu_from_node(unsigned long cpu) {}
 #endif /* CONFIG_HOTPLUG_CPU */
 #endif /* CONFIG_SMP */
-
-#endif /* CONFIG_NUMA */
-
-#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
-void find_and_update_cpu_nid(int cpu);
-extern int cpu_to_coregroup_id(int cpu);
-#else
-static inline void find_and_update_cpu_nid(int cpu) {}
 static inline int cpu_to_coregroup_id(int cpu)
 {
 #ifdef CONFIG_SMP
@@ -124,6 +117,12 @@ static inline int cpu_to_coregroup_id(int cpu)
 #endif
 }
 
+#endif /* CONFIG_NUMA */
+
+#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
+void find_and_update_cpu_nid(int cpu);
+#else
+static inline void find_and_update_cpu_nid(int cpu) {}
 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
 
 #include <asm-generic/topology.h>
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f4cf3ae036de..9b45cc9e1f27 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -432,7 +432,7 @@ static void __init initialize_form2_numa_distance_lookup_table(void)
 
 static int __init find_primary_domain_index(void)
 {
-	int index;
+	int index = -1;
 	struct device_node *root;
 
 	/*
@@ -502,12 +502,9 @@ static int __init find_primary_domain_index(void)
 		distance_ref_points_depth = MAX_DISTANCE_REF_POINTS;
 	}
 
-	of_node_put(root);
-	return index;
-
 err:
 	of_node_put(root);
-	return -1;
+	return index;
 }
 
 static void __init get_n_mem_cells(int *n_addr_cells, int *n_size_cells)
@@ -892,12 +889,32 @@ static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
 	return 0;
 }
 
+/*
+ * If hierarchy extends beyond primary_domain_index + 1, then next
+ * level corresponds to coregroup.
+ */
+static int detect_and_enable_coregroup(const __be32 *associativity, int index)
+{
+	if (!associativity)
+		return -1;
+
+	if (!index) {
+		index = of_read_number(associativity, 1);
+
+		if (index > primary_domain_index + 1)
+			coregroup_enabled = 1;
+		else
+			index = -1;
+	}
+	return index;
+}
+
 static int __init parse_numa_properties(void)
 {
 	struct device_node *memory, *pci;
-	int default_nid = 0;
-	unsigned long i;
+	int default_nid = 0, index = 0;
 	const __be32 *associativity;
+	unsigned long i;
 
 	if (numa_enabled == 0) {
 		pr_warn("disabled by user\n");
@@ -930,7 +947,6 @@ static int __init parse_numa_properties(void)
 	 */
 	for_each_present_cpu(i) {
 		__be32 vphn_assoc[VPHN_ASSOC_BUFSIZE];
-		struct device_node *cpu;
 		int nid = NUMA_NO_NODE;
 
 		memset(vphn_assoc, 0, VPHN_ASSOC_BUFSIZE * sizeof(__be32));
@@ -938,7 +954,10 @@ static int __init parse_numa_properties(void)
 		if (__vphn_get_associativity(i, vphn_assoc) == 0) {
 			nid = associativity_to_nid(vphn_assoc);
 			initialize_form1_numa_distance(vphn_assoc);
+			if (!index)
+				index = detect_and_enable_coregroup(vphn_assoc, index);
 		} else {
+			struct device_node *cpu;
 
 			/*
 			 * Don't fall back to default_nid yet -- we will plug
@@ -951,6 +970,8 @@ static int __init parse_numa_properties(void)
 			associativity = of_get_associativity(cpu);
 			if (associativity) {
 				nid = associativity_to_nid(associativity);
+				if (!index)
+					index = detect_and_enable_coregroup(associativity, index);
 				initialize_form1_numa_distance(associativity);
 			}
 			of_node_put(cpu);
@@ -1431,9 +1452,26 @@ void find_and_update_cpu_nid(int cpu)
 	pr_debug("%s:%d cpu %d nid %d\n", __func__, __LINE__, cpu, new_nid);
 }
 
+static int topology_update_init(void)
+{
+	topology_inited = 1;
+	return 0;
+}
+device_initcall(topology_update_init);
+
+#else
+static long vphn_get_associativity(unsigned long cpu,
+					__be32 *associativity)
+{
+	return -1;
+}
+#endif /* CONFIG_PPC_SPLPAR */
+
 int cpu_to_coregroup_id(int cpu)
 {
-	__be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+	int coregroup_id = cpu_to_core_id(cpu);
+	struct device_node *cpunode = NULL;
+	const __be32 *associativity;
 	int index;
 
 	if (cpu < 0 || cpu > nr_cpu_ids)
@@ -1442,24 +1480,31 @@ int cpu_to_coregroup_id(int cpu)
 	if (!coregroup_enabled)
 		goto out;
 
-	if (!firmware_has_feature(FW_FEATURE_VPHN))
-		goto out;
+	if (firmware_has_feature(FW_FEATURE_VPHN)) {
+		__be32 tmp[VPHN_ASSOC_BUFSIZE] = {0};
 
-	if (vphn_get_associativity(cpu, associativity))
+		if (vphn_get_associativity(cpu, tmp))
+			goto out;
+
+		associativity = tmp;
+
+	} else {
+		cpunode = of_get_cpu_node(cpu, NULL);
+		if (!cpunode)
+			goto out;
+
+		associativity = of_get_associativity(cpunode);
+	}
+	if (!associativity)
 		goto out;
 
 	index = of_read_number(associativity, 1);
 	if (index > primary_domain_index + 1)
-		return of_read_number(&associativity[index - 1], 1);
+		coregroup_id = of_read_number(&associativity[index - 1], 1);
 
 out:
-	return cpu_to_core_id(cpu);
-}
+	if (cpunode)
+		of_node_put(cpunode);
 
-static int topology_update_init(void)
-{
-	topology_inited = 1;
-	return 0;
+	return coregroup_id;
 }
-device_initcall(topology_update_init);
-#endif /* CONFIG_PPC_SPLPAR */
-- 
2.43.0