arch/x86/include/asm/uv/uv_hub.h | 32 ++-- arch/x86/kernel/apic/x2apic_uv_x.c | 245 ++++++++++++++++------------- 2 files changed, 160 insertions(+), 117 deletions(-)
Sub-NUMA clustering (SNC) invalidates previous assumptions of a 1:1
relationship between blades, sockets, and nodes. Fix these
assumptions and build tables correctly when SNC is enabled.
Also replace uses of BUG() and BUG_ON() with WARN_ON() and recovery.
Signed-off-by: Steve Wahl <steve.wahl@hpe.com>
---
arch/x86/include/asm/uv/uv_hub.h | 32 ++--
arch/x86/kernel/apic/x2apic_uv_x.c | 245 ++++++++++++++++-------------
2 files changed, 160 insertions(+), 117 deletions(-)
diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h
index d3e3197917be..5fa76c2ced51 100644
--- a/arch/x86/include/asm/uv/uv_hub.h
+++ b/arch/x86/include/asm/uv/uv_hub.h
@@ -177,6 +177,7 @@ struct uv_hub_info_s {
unsigned short nr_possible_cpus;
unsigned short nr_online_cpus;
short memory_nid;
+ unsigned short *node_to_socket;
};
/* CPU specific info with a pointer to the hub common info struct */
@@ -519,25 +520,30 @@ static inline int uv_socket_to_node(int socket)
return _uv_socket_to_node(socket, uv_hub_info->socket_to_node);
}
+static inline int uv_pnode_to_socket(int pnode)
+{
+ unsigned short *p2s = uv_hub_info->pnode_to_socket;
+
+ return p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode;
+}
+
/* pnode, offset --> socket virtual */
static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset)
{
unsigned int m_val = uv_hub_info->m_val;
unsigned long base;
- unsigned short sockid, node, *p2s;
+ unsigned short sockid;
if (m_val)
return __va(((unsigned long)pnode << m_val) | offset);
- p2s = uv_hub_info->pnode_to_socket;
- sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode;
- node = uv_socket_to_node(sockid);
+ sockid = uv_pnode_to_socket(pnode);
/* limit address of previous socket is our base, except node 0 is 0 */
- if (!node)
+ if (sockid == 0)
return __va((unsigned long)offset);
- base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit);
+ base = (unsigned long)(uv_hub_info->gr_table[sockid - 1].limit);
return __va(base << UV_GAM_RANGE_SHFT | offset);
}
@@ -644,7 +650,7 @@ static inline int uv_cpu_blade_processor_id(int cpu)
/* Blade number to Node number (UV2..UV4 is 1:1) */
static inline int uv_blade_to_node(int blade)
{
- return blade;
+ return uv_socket_to_node(blade);
}
/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */
@@ -656,23 +662,27 @@ static inline int uv_numa_blade_id(void)
/*
* Convert linux node number to the UV blade number.
* .. Currently for UV2 thru UV4 the node and the blade are identical.
- * .. If this changes then you MUST check references to this function!
+ * .. UV5 needs conversion when sub-numa clustering is enabled.
*/
static inline int uv_node_to_blade_id(int nid)
{
- return nid;
+ unsigned short *n2s = uv_hub_info->node_to_socket;
+
+ return n2s ? n2s[nid] : nid;
}
/* Convert a CPU number to the UV blade number */
static inline int uv_cpu_to_blade_id(int cpu)
{
- return uv_node_to_blade_id(cpu_to_node(cpu));
+ return uv_cpu_hub_info(cpu)->numa_blade_id;
}
/* Convert a blade id to the PNODE of the blade */
static inline int uv_blade_to_pnode(int bid)
{
- return uv_hub_info_list(uv_blade_to_node(bid))->pnode;
+ unsigned short *s2p = uv_hub_info->socket_to_pnode;
+
+ return s2p ? s2p[bid] : bid;
}
/* Nid of memory node on blade. -1 if no blade-local memory */
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index 482855227964..57bfc40470eb 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -546,7 +546,6 @@ unsigned long sn_rtc_cycles_per_second;
EXPORT_SYMBOL(sn_rtc_cycles_per_second);
/* The following values are used for the per node hub info struct */
-static __initdata unsigned short *_node_to_pnode;
static __initdata unsigned short _min_socket, _max_socket;
static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len;
static __initdata struct uv_gam_range_entry *uv_gre_table;
@@ -554,6 +553,7 @@ static __initdata struct uv_gam_parameters *uv_gp_table;
static __initdata unsigned short *_socket_to_node;
static __initdata unsigned short *_socket_to_pnode;
static __initdata unsigned short *_pnode_to_socket;
+static __initdata unsigned short *_node_to_socket;
static __initdata struct uv_gam_range_s *_gr_table;
@@ -617,7 +617,9 @@ static __init void build_uv_gr_table(void)
bytes = _gr_table_len * sizeof(struct uv_gam_range_s);
grt = kzalloc(bytes, GFP_KERNEL);
- BUG_ON(!grt);
+ WARN_ON_ONCE(!grt);
+ if (!grt)
+ return;
_gr_table = grt;
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
@@ -1292,6 +1294,7 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi)
hi->nasid_shift = uv_cpuid.nasid_shift;
hi->min_pnode = _min_pnode;
hi->min_socket = _min_socket;
+ hi->node_to_socket = _node_to_socket;
hi->pnode_to_socket = _pnode_to_socket;
hi->socket_to_node = _socket_to_node;
hi->socket_to_pnode = _socket_to_pnode;
@@ -1490,16 +1493,56 @@ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info)
pr_info("UV: number nodes/possible blades %d\n", uv_pb);
}
+static int __init alloc_conv_table(int num_elem, unsigned short **table)
+{
+ int i;
+ size_t bytes;
+
+ bytes = num_elem * sizeof(*table[0]);
+ *table = kmalloc(bytes, GFP_KERNEL);
+ WARN_ON_ONCE(!*table);
+ if (!*table)
+ return -ENOMEM;
+ for (i = 0; i < num_elem; i++)
+ ((unsigned short *)*table)[i] = SOCK_EMPTY;
+ return 0;
+}
+
+/* Remove conversion table if it's 1:1 */
+#define FREE_1_TO_1_TABLE(tbl, min, max, max2) free_1_to_1_table(&tbl, #tbl, min, max, max2)
+
+static void __init free_1_to_1_table(unsigned short **tp, char *tname, int min, int max, int max2)
+{
+ int i;
+ unsigned short *table = *tp;
+
+ if (table == NULL)
+ return;
+ if (max != max2)
+ return;
+ for (i = 0; i < max; i++) {
+ if (i != table[i])
+ return;
+ }
+ kfree(table);
+ *tp = NULL;
+ pr_info("UV: %s is 1:1, conversion table removed\n", tname);
+}
+
+/*
+ * Build Socket Tables
+ * If the number of nodes is >1 per socket, socket to node table will
+ * contain lowest node number on that socket.
+ */
static void __init build_socket_tables(void)
{
struct uv_gam_range_entry *gre = uv_gre_table;
- int num, nump;
+ int nums, numn, nump;
int cpu, i, lnid;
int minsock = _min_socket;
int maxsock = _max_socket;
int minpnode = _min_pnode;
int maxpnode = _max_pnode;
- size_t bytes;
if (!gre) {
if (is_uv2_hub() || is_uv3_hub()) {
@@ -1507,39 +1550,36 @@ static void __init build_socket_tables(void)
return;
}
pr_err("UV: Error: UVsystab address translations not available!\n");
- BUG();
+ WARN_ON_ONCE(!gre);
+ return;
}
- /* Build socket id -> node id, pnode */
- num = maxsock - minsock + 1;
- bytes = num * sizeof(_socket_to_node[0]);
- _socket_to_node = kmalloc(bytes, GFP_KERNEL);
- _socket_to_pnode = kmalloc(bytes, GFP_KERNEL);
-
+ numn = num_possible_nodes();
nump = maxpnode - minpnode + 1;
- bytes = nump * sizeof(_pnode_to_socket[0]);
- _pnode_to_socket = kmalloc(bytes, GFP_KERNEL);
- BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket);
+ nums = maxsock - minsock + 1;
- for (i = 0; i < num; i++)
- _socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY;
+ /* Allocate and clear tables */
+ if (alloc_conv_table(nump, &_pnode_to_socket) < 0)
+ return;
+ if (alloc_conv_table(nums, &_socket_to_pnode) < 0)
+ return;
- for (i = 0; i < nump; i++)
- _pnode_to_socket[i] = SOCK_EMPTY;
+ if (alloc_conv_table(numn, &_node_to_socket) < 0)
+ return;
+ if (alloc_conv_table(nums, &_socket_to_node) < 0)
+ return;
/* Fill in pnode/node/addr conversion list values: */
- pr_info("UV: GAM Building socket/pnode conversion tables\n");
for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) {
if (gre->type == UV_GAM_RANGE_TYPE_HOLE)
continue;
i = gre->sockid - minsock;
- /* Duplicate: */
- if (_socket_to_pnode[i] != SOCK_EMPTY)
- continue;
- _socket_to_pnode[i] = gre->pnode;
+ if (_socket_to_pnode[i] == SOCK_EMPTY)
+ _socket_to_pnode[i] = gre->pnode;
i = gre->pnode - minpnode;
- _pnode_to_socket[i] = gre->sockid;
+ if (_pnode_to_socket[i] == SOCK_EMPTY)
+ _pnode_to_socket[i] = gre->sockid;
pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n",
gre->sockid, gre->type, gre->nasid,
@@ -1549,66 +1589,39 @@ static void __init build_socket_tables(void)
/* Set socket -> node values: */
lnid = NUMA_NO_NODE;
- for_each_present_cpu(cpu) {
+ for_each_possible_cpu(cpu) {
int nid = cpu_to_node(cpu);
int apicid, sockid;
if (lnid == nid)
continue;
lnid = nid;
+
apicid = per_cpu(x86_cpu_to_apicid, cpu);
sockid = apicid >> uv_cpuid.socketid_shift;
- _socket_to_node[sockid - minsock] = nid;
- pr_info("UV: sid:%02x: apicid:%04x node:%2d\n",
- sockid, apicid, nid);
- }
- /* Set up physical blade to pnode translation from GAM Range Table: */
- bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]);
- _node_to_pnode = kmalloc(bytes, GFP_KERNEL);
- BUG_ON(!_node_to_pnode);
+ if (_socket_to_node[sockid - minsock] == SOCK_EMPTY)
+ _socket_to_node[sockid - minsock] = nid;
- for (lnid = 0; lnid < num_possible_nodes(); lnid++) {
- unsigned short sockid;
+ if (_node_to_socket[nid] == SOCK_EMPTY)
+ _node_to_socket[nid] = sockid;
- for (sockid = minsock; sockid <= maxsock; sockid++) {
- if (lnid == _socket_to_node[sockid - minsock]) {
- _node_to_pnode[lnid] = _socket_to_pnode[sockid - minsock];
- break;
- }
- }
- if (sockid > maxsock) {
- pr_err("UV: socket for node %d not found!\n", lnid);
- BUG();
- }
+ pr_info("UV: sid:%02x: apicid:%04x socket:%02d node:%03x s2n:%03x\n",
+ sockid,
+ apicid,
+ _node_to_socket[nid],
+ nid,
+ _socket_to_node[sockid - minsock]);
}
/*
- * If socket id == pnode or socket id == node for all nodes,
+ * If e.g. socket id == pnode for all pnodes,
* system runs faster by removing corresponding conversion table.
*/
- pr_info("UV: Checking socket->node/pnode for identity maps\n");
- if (minsock == 0) {
- for (i = 0; i < num; i++)
- if (_socket_to_node[i] == SOCK_EMPTY || i != _socket_to_node[i])
- break;
- if (i >= num) {
- kfree(_socket_to_node);
- _socket_to_node = NULL;
- pr_info("UV: 1:1 socket_to_node table removed\n");
- }
- }
- if (minsock == minpnode) {
- for (i = 0; i < num; i++)
- if (_socket_to_pnode[i] != SOCK_EMPTY &&
- _socket_to_pnode[i] != i + minpnode)
- break;
- if (i >= num) {
- kfree(_socket_to_pnode);
- _socket_to_pnode = NULL;
- pr_info("UV: 1:1 socket_to_pnode table removed\n");
- }
- }
+ FREE_1_TO_1_TABLE(_socket_to_node, _min_socket, nums, numn);
+ FREE_1_TO_1_TABLE(_node_to_socket, _min_socket, nums, numn);
+ FREE_1_TO_1_TABLE(_socket_to_pnode, _min_pnode, nums, nump);
+ FREE_1_TO_1_TABLE(_pnode_to_socket, _min_pnode, nums, nump);
}
/* Check which reboot to use */
@@ -1692,12 +1705,13 @@ static __init int uv_system_init_hubless(void)
static void __init uv_system_init_hub(void)
{
struct uv_hub_info_s hub_info = {0};
- int bytes, cpu, nodeid;
+ int bytes, cpu, nodeid, bid;
unsigned short min_pnode = 9999, max_pnode = 0;
char *hub = is_uv5_hub() ? "UV500" :
is_uv4_hub() ? "UV400" :
is_uv3_hub() ? "UV300" :
is_uv2_hub() ? "UV2000/3000" : NULL;
+ struct uv_hub_info_s **uv_hub_info_list_blade;
if (!hub) {
pr_err("UV: Unknown/unsupported UV hub\n");
@@ -1720,9 +1734,12 @@ static void __init uv_system_init_hub(void)
build_uv_gr_table();
set_block_size();
uv_init_hub_info(&hub_info);
- uv_possible_blades = num_possible_nodes();
- if (!_node_to_pnode)
+ /* If UV2 or UV3 may need to get # blades from HW */
+ if (is_uv(UV2|UV3) && !uv_gre_table)
boot_init_possible_blades(&hub_info);
+ else
+ /* min/max sockets set in decode_gam_rng_tbl */
+ uv_possible_blades = (_max_socket - _min_socket) + 1;
/* uv_num_possible_blades() is really the hub count: */
pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus());
@@ -1731,79 +1748,92 @@ static void __init uv_system_init_hub(void)
hub_info.coherency_domain_number = sn_coherency_id;
uv_rtc_init();
+ /*
+ * __uv_hub_info_list[] is indexed by node, but there is only one hub_info
+ * structure per blade. First, allocate one structure per blade.
+ */
+
bytes = sizeof(void *) * uv_num_possible_blades();
- __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL);
- BUG_ON(!__uv_hub_info_list);
+ uv_hub_info_list_blade = kzalloc(bytes, GFP_KERNEL);
+ WARN_ON_ONCE(!uv_hub_info_list_blade);
+ if (!uv_hub_info_list_blade)
+ return;
bytes = sizeof(struct uv_hub_info_s);
- for_each_node(nodeid) {
+ for_each_possible_blade(bid) {
struct uv_hub_info_s *new_hub;
- if (__uv_hub_info_list[nodeid]) {
- pr_err("UV: Node %d UV HUB already initialized!?\n", nodeid);
- BUG();
- }
-
- /* Allocate new per hub info list */
- new_hub = (nodeid == 0) ? &uv_hub_info_node0 : kzalloc_node(bytes, GFP_KERNEL, nodeid);
- BUG_ON(!new_hub);
- __uv_hub_info_list[nodeid] = new_hub;
- new_hub = uv_hub_info_list(nodeid);
- BUG_ON(!new_hub);
+ /* Allocate & fill new per hub info list */
+ new_hub = (bid == 0) ? &uv_hub_info_node0
+ : kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid));
+ WARN_ON_ONCE(!new_hub);
+ if (!new_hub)
+ return;
+ uv_hub_info_list_blade[bid] = new_hub;
*new_hub = hub_info;
/* Use information from GAM table if available: */
- if (_node_to_pnode)
- new_hub->pnode = _node_to_pnode[nodeid];
+ if (uv_gre_table)
+ new_hub->pnode = uv_blade_to_pnode(bid);
else /* Or fill in during CPU loop: */
new_hub->pnode = 0xffff;
- new_hub->numa_blade_id = uv_node_to_blade_id(nodeid);
+ new_hub->numa_blade_id = bid;
new_hub->memory_nid = NUMA_NO_NODE;
new_hub->nr_possible_cpus = 0;
new_hub->nr_online_cpus = 0;
}
+ /*
+ * Now populate __uv_hub_info_list[] for each node with the
+ * pointer to the struct for the blade it resides on.
+ */
+
+ bytes = sizeof(void *) * num_possible_nodes();
+ __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL);
+ WARN_ON_ONCE(!__uv_hub_info_list);
+ if (!__uv_hub_info_list)
+ return;
+
+ for_each_node(nodeid) {
+ __uv_hub_info_list[nodeid] = uv_hub_info_list_blade[uv_node_to_blade_id(nodeid)];
+ }
+
/* Initialize per CPU info: */
for_each_possible_cpu(cpu) {
- int apicid = per_cpu(x86_cpu_to_apicid, cpu);
- int numa_node_id;
+ int apicid = early_per_cpu(x86_cpu_to_apicid, cpu);
+ unsigned short bid;
unsigned short pnode;
- nodeid = cpu_to_node(cpu);
- numa_node_id = numa_cpu_node(cpu);
pnode = uv_apicid_to_pnode(apicid);
+ bid = uv_pnode_to_socket(pnode) - _min_socket;
- uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid);
+ uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list_blade[bid];
uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++;
if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE)
uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu);
- /* Init memoryless node: */
- if (nodeid != numa_node_id &&
- uv_hub_info_list(numa_node_id)->pnode == 0xffff)
- uv_hub_info_list(numa_node_id)->pnode = pnode;
- else if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
+ if (uv_cpu_hub_info(cpu)->pnode == 0xffff)
uv_cpu_hub_info(cpu)->pnode = pnode;
}
- for_each_node(nodeid) {
- unsigned short pnode = uv_hub_info_list(nodeid)->pnode;
+ for_each_possible_blade(bid) {
+ unsigned short pnode = uv_hub_info_list_blade[bid]->pnode;
/* Add pnode info for pre-GAM list nodes without CPUs: */
if (pnode == 0xffff) {
unsigned long paddr;
- paddr = node_start_pfn(nodeid) << PAGE_SHIFT;
+ paddr = node_start_pfn(uv_blade_to_node(bid)) << PAGE_SHIFT;
pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr));
- uv_hub_info_list(nodeid)->pnode = pnode;
+ uv_hub_info_list_blade[bid]->pnode = pnode;
}
min_pnode = min(pnode, min_pnode);
max_pnode = max(pnode, max_pnode);
- pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n",
- nodeid,
- uv_hub_info_list(nodeid)->pnode,
- uv_hub_info_list(nodeid)->nr_possible_cpus);
+ pr_info("UV: HUB:%2d pn:%02x nrcpus:%d\n",
+ bid,
+ uv_hub_info_list_blade[bid]->pnode,
+ uv_hub_info_list_blade[bid]->nr_possible_cpus);
}
pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode);
@@ -1811,6 +1841,9 @@ static void __init uv_system_init_hub(void)
map_mmr_high(max_pnode);
map_mmioh_high(min_pnode, max_pnode);
+ kfree(uv_hub_info_list_blade);
+ uv_hub_info_list_blade = NULL;
+
uv_nmi_setup();
uv_cpu_init();
uv_setup_proc_files(0);
--
2.26.2
PLEASE IGNORE. I forgot to put x86/<etc> in the subject line. Let me fix it!! Thanks. --> Steve Wahl On Mon, Jan 23, 2023 at 03:57:21PM -0600, Steve Wahl wrote: > Sub-NUMA clustering (SNC) invalidates previous assumptions of a 1:1 > relationship between blades, sockets, and nodes. Fix these > assumptions and build tables correctly when SNC is enabled. > > Also replace uses of BUG() and BUG_ON() with WARN_ON() and recovery. > > Signed-off-by: Steve Wahl <steve.wahl@hpe.com> > --- > arch/x86/include/asm/uv/uv_hub.h | 32 ++-- > arch/x86/kernel/apic/x2apic_uv_x.c | 245 ++++++++++++++++------------- > 2 files changed, 160 insertions(+), 117 deletions(-) > > diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h > index d3e3197917be..5fa76c2ced51 100644 > --- a/arch/x86/include/asm/uv/uv_hub.h > +++ b/arch/x86/include/asm/uv/uv_hub.h > @@ -177,6 +177,7 @@ struct uv_hub_info_s { > unsigned short nr_possible_cpus; > unsigned short nr_online_cpus; > short memory_nid; > + unsigned short *node_to_socket; > }; > > /* CPU specific info with a pointer to the hub common info struct */ > @@ -519,25 +520,30 @@ static inline int uv_socket_to_node(int socket) > return _uv_socket_to_node(socket, uv_hub_info->socket_to_node); > } > > +static inline int uv_pnode_to_socket(int pnode) > +{ > + unsigned short *p2s = uv_hub_info->pnode_to_socket; > + > + return p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; > +} > + > /* pnode, offset --> socket virtual */ > static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) > { > unsigned int m_val = uv_hub_info->m_val; > unsigned long base; > - unsigned short sockid, node, *p2s; > + unsigned short sockid; > > if (m_val) > return __va(((unsigned long)pnode << m_val) | offset); > > - p2s = uv_hub_info->pnode_to_socket; > - sockid = p2s ? p2s[pnode - uv_hub_info->min_pnode] : pnode; > - node = uv_socket_to_node(sockid); > + sockid = uv_pnode_to_socket(pnode); > > /* limit address of previous socket is our base, except node 0 is 0 */ > - if (!node) > + if (sockid == 0) > return __va((unsigned long)offset); > > - base = (unsigned long)(uv_hub_info->gr_table[node - 1].limit); > + base = (unsigned long)(uv_hub_info->gr_table[sockid - 1].limit); > return __va(base << UV_GAM_RANGE_SHFT | offset); > } > > @@ -644,7 +650,7 @@ static inline int uv_cpu_blade_processor_id(int cpu) > /* Blade number to Node number (UV2..UV4 is 1:1) */ > static inline int uv_blade_to_node(int blade) > { > - return blade; > + return uv_socket_to_node(blade); > } > > /* Blade number of current cpu. Numnbered 0 .. <#blades -1> */ > @@ -656,23 +662,27 @@ static inline int uv_numa_blade_id(void) > /* > * Convert linux node number to the UV blade number. > * .. Currently for UV2 thru UV4 the node and the blade are identical. > - * .. If this changes then you MUST check references to this function! > + * .. UV5 needs conversion when sub-numa clustering is enabled. > */ > static inline int uv_node_to_blade_id(int nid) > { > - return nid; > + unsigned short *n2s = uv_hub_info->node_to_socket; > + > + return n2s ? n2s[nid] : nid; > } > > /* Convert a CPU number to the UV blade number */ > static inline int uv_cpu_to_blade_id(int cpu) > { > - return uv_node_to_blade_id(cpu_to_node(cpu)); > + return uv_cpu_hub_info(cpu)->numa_blade_id; > } > > /* Convert a blade id to the PNODE of the blade */ > static inline int uv_blade_to_pnode(int bid) > { > - return uv_hub_info_list(uv_blade_to_node(bid))->pnode; > + unsigned short *s2p = uv_hub_info->socket_to_pnode; > + > + return s2p ? s2p[bid] : bid; > } > > /* Nid of memory node on blade. -1 if no blade-local memory */ > diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c > index 482855227964..57bfc40470eb 100644 > --- a/arch/x86/kernel/apic/x2apic_uv_x.c > +++ b/arch/x86/kernel/apic/x2apic_uv_x.c > @@ -546,7 +546,6 @@ unsigned long sn_rtc_cycles_per_second; > EXPORT_SYMBOL(sn_rtc_cycles_per_second); > > /* The following values are used for the per node hub info struct */ > -static __initdata unsigned short *_node_to_pnode; > static __initdata unsigned short _min_socket, _max_socket; > static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len; > static __initdata struct uv_gam_range_entry *uv_gre_table; > @@ -554,6 +553,7 @@ static __initdata struct uv_gam_parameters *uv_gp_table; > static __initdata unsigned short *_socket_to_node; > static __initdata unsigned short *_socket_to_pnode; > static __initdata unsigned short *_pnode_to_socket; > +static __initdata unsigned short *_node_to_socket; > > static __initdata struct uv_gam_range_s *_gr_table; > > @@ -617,7 +617,9 @@ static __init void build_uv_gr_table(void) > > bytes = _gr_table_len * sizeof(struct uv_gam_range_s); > grt = kzalloc(bytes, GFP_KERNEL); > - BUG_ON(!grt); > + WARN_ON_ONCE(!grt); > + if (!grt) > + return; > _gr_table = grt; > > for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { > @@ -1292,6 +1294,7 @@ static void __init uv_init_hub_info(struct uv_hub_info_s *hi) > hi->nasid_shift = uv_cpuid.nasid_shift; > hi->min_pnode = _min_pnode; > hi->min_socket = _min_socket; > + hi->node_to_socket = _node_to_socket; > hi->pnode_to_socket = _pnode_to_socket; > hi->socket_to_node = _socket_to_node; > hi->socket_to_pnode = _socket_to_pnode; > @@ -1490,16 +1493,56 @@ static __init void boot_init_possible_blades(struct uv_hub_info_s *hub_info) > pr_info("UV: number nodes/possible blades %d\n", uv_pb); > } > > +static int __init alloc_conv_table(int num_elem, unsigned short **table) > +{ > + int i; > + size_t bytes; > + > + bytes = num_elem * sizeof(*table[0]); > + *table = kmalloc(bytes, GFP_KERNEL); > + WARN_ON_ONCE(!*table); > + if (!*table) > + return -ENOMEM; > + for (i = 0; i < num_elem; i++) > + ((unsigned short *)*table)[i] = SOCK_EMPTY; > + return 0; > +} > + > +/* Remove conversion table if it's 1:1 */ > +#define FREE_1_TO_1_TABLE(tbl, min, max, max2) free_1_to_1_table(&tbl, #tbl, min, max, max2) > + > +static void __init free_1_to_1_table(unsigned short **tp, char *tname, int min, int max, int max2) > +{ > + int i; > + unsigned short *table = *tp; > + > + if (table == NULL) > + return; > + if (max != max2) > + return; > + for (i = 0; i < max; i++) { > + if (i != table[i]) > + return; > + } > + kfree(table); > + *tp = NULL; > + pr_info("UV: %s is 1:1, conversion table removed\n", tname); > +} > + > +/* > + * Build Socket Tables > + * If the number of nodes is >1 per socket, socket to node table will > + * contain lowest node number on that socket. > + */ > static void __init build_socket_tables(void) > { > struct uv_gam_range_entry *gre = uv_gre_table; > - int num, nump; > + int nums, numn, nump; > int cpu, i, lnid; > int minsock = _min_socket; > int maxsock = _max_socket; > int minpnode = _min_pnode; > int maxpnode = _max_pnode; > - size_t bytes; > > if (!gre) { > if (is_uv2_hub() || is_uv3_hub()) { > @@ -1507,39 +1550,36 @@ static void __init build_socket_tables(void) > return; > } > pr_err("UV: Error: UVsystab address translations not available!\n"); > - BUG(); > + WARN_ON_ONCE(!gre); > + return; > } > > - /* Build socket id -> node id, pnode */ > - num = maxsock - minsock + 1; > - bytes = num * sizeof(_socket_to_node[0]); > - _socket_to_node = kmalloc(bytes, GFP_KERNEL); > - _socket_to_pnode = kmalloc(bytes, GFP_KERNEL); > - > + numn = num_possible_nodes(); > nump = maxpnode - minpnode + 1; > - bytes = nump * sizeof(_pnode_to_socket[0]); > - _pnode_to_socket = kmalloc(bytes, GFP_KERNEL); > - BUG_ON(!_socket_to_node || !_socket_to_pnode || !_pnode_to_socket); > + nums = maxsock - minsock + 1; > > - for (i = 0; i < num; i++) > - _socket_to_node[i] = _socket_to_pnode[i] = SOCK_EMPTY; > + /* Allocate and clear tables */ > + if (alloc_conv_table(nump, &_pnode_to_socket) < 0) > + return; > + if (alloc_conv_table(nums, &_socket_to_pnode) < 0) > + return; > > - for (i = 0; i < nump; i++) > - _pnode_to_socket[i] = SOCK_EMPTY; > + if (alloc_conv_table(numn, &_node_to_socket) < 0) > + return; > + if (alloc_conv_table(nums, &_socket_to_node) < 0) > + return; > > /* Fill in pnode/node/addr conversion list values: */ > - pr_info("UV: GAM Building socket/pnode conversion tables\n"); > for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { > if (gre->type == UV_GAM_RANGE_TYPE_HOLE) > continue; > i = gre->sockid - minsock; > - /* Duplicate: */ > - if (_socket_to_pnode[i] != SOCK_EMPTY) > - continue; > - _socket_to_pnode[i] = gre->pnode; > + if (_socket_to_pnode[i] == SOCK_EMPTY) > + _socket_to_pnode[i] = gre->pnode; > > i = gre->pnode - minpnode; > - _pnode_to_socket[i] = gre->sockid; > + if (_pnode_to_socket[i] == SOCK_EMPTY) > + _pnode_to_socket[i] = gre->sockid; > > pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n", > gre->sockid, gre->type, gre->nasid, > @@ -1549,66 +1589,39 @@ static void __init build_socket_tables(void) > > /* Set socket -> node values: */ > lnid = NUMA_NO_NODE; > - for_each_present_cpu(cpu) { > + for_each_possible_cpu(cpu) { > int nid = cpu_to_node(cpu); > int apicid, sockid; > > if (lnid == nid) > continue; > lnid = nid; > + > apicid = per_cpu(x86_cpu_to_apicid, cpu); > sockid = apicid >> uv_cpuid.socketid_shift; > - _socket_to_node[sockid - minsock] = nid; > - pr_info("UV: sid:%02x: apicid:%04x node:%2d\n", > - sockid, apicid, nid); > - } > > - /* Set up physical blade to pnode translation from GAM Range Table: */ > - bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]); > - _node_to_pnode = kmalloc(bytes, GFP_KERNEL); > - BUG_ON(!_node_to_pnode); > + if (_socket_to_node[sockid - minsock] == SOCK_EMPTY) > + _socket_to_node[sockid - minsock] = nid; > > - for (lnid = 0; lnid < num_possible_nodes(); lnid++) { > - unsigned short sockid; > + if (_node_to_socket[nid] == SOCK_EMPTY) > + _node_to_socket[nid] = sockid; > > - for (sockid = minsock; sockid <= maxsock; sockid++) { > - if (lnid == _socket_to_node[sockid - minsock]) { > - _node_to_pnode[lnid] = _socket_to_pnode[sockid - minsock]; > - break; > - } > - } > - if (sockid > maxsock) { > - pr_err("UV: socket for node %d not found!\n", lnid); > - BUG(); > - } > + pr_info("UV: sid:%02x: apicid:%04x socket:%02d node:%03x s2n:%03x\n", > + sockid, > + apicid, > + _node_to_socket[nid], > + nid, > + _socket_to_node[sockid - minsock]); > } > > /* > - * If socket id == pnode or socket id == node for all nodes, > + * If e.g. socket id == pnode for all pnodes, > * system runs faster by removing corresponding conversion table. > */ > - pr_info("UV: Checking socket->node/pnode for identity maps\n"); > - if (minsock == 0) { > - for (i = 0; i < num; i++) > - if (_socket_to_node[i] == SOCK_EMPTY || i != _socket_to_node[i]) > - break; > - if (i >= num) { > - kfree(_socket_to_node); > - _socket_to_node = NULL; > - pr_info("UV: 1:1 socket_to_node table removed\n"); > - } > - } > - if (minsock == minpnode) { > - for (i = 0; i < num; i++) > - if (_socket_to_pnode[i] != SOCK_EMPTY && > - _socket_to_pnode[i] != i + minpnode) > - break; > - if (i >= num) { > - kfree(_socket_to_pnode); > - _socket_to_pnode = NULL; > - pr_info("UV: 1:1 socket_to_pnode table removed\n"); > - } > - } > + FREE_1_TO_1_TABLE(_socket_to_node, _min_socket, nums, numn); > + FREE_1_TO_1_TABLE(_node_to_socket, _min_socket, nums, numn); > + FREE_1_TO_1_TABLE(_socket_to_pnode, _min_pnode, nums, nump); > + FREE_1_TO_1_TABLE(_pnode_to_socket, _min_pnode, nums, nump); > } > > /* Check which reboot to use */ > @@ -1692,12 +1705,13 @@ static __init int uv_system_init_hubless(void) > static void __init uv_system_init_hub(void) > { > struct uv_hub_info_s hub_info = {0}; > - int bytes, cpu, nodeid; > + int bytes, cpu, nodeid, bid; > unsigned short min_pnode = 9999, max_pnode = 0; > char *hub = is_uv5_hub() ? "UV500" : > is_uv4_hub() ? "UV400" : > is_uv3_hub() ? "UV300" : > is_uv2_hub() ? "UV2000/3000" : NULL; > + struct uv_hub_info_s **uv_hub_info_list_blade; > > if (!hub) { > pr_err("UV: Unknown/unsupported UV hub\n"); > @@ -1720,9 +1734,12 @@ static void __init uv_system_init_hub(void) > build_uv_gr_table(); > set_block_size(); > uv_init_hub_info(&hub_info); > - uv_possible_blades = num_possible_nodes(); > - if (!_node_to_pnode) > + /* If UV2 or UV3 may need to get # blades from HW */ > + if (is_uv(UV2|UV3) && !uv_gre_table) > boot_init_possible_blades(&hub_info); > + else > + /* min/max sockets set in decode_gam_rng_tbl */ > + uv_possible_blades = (_max_socket - _min_socket) + 1; > > /* uv_num_possible_blades() is really the hub count: */ > pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus()); > @@ -1731,79 +1748,92 @@ static void __init uv_system_init_hub(void) > hub_info.coherency_domain_number = sn_coherency_id; > uv_rtc_init(); > > + /* > + * __uv_hub_info_list[] is indexed by node, but there is only one hub_info > + * structure per blade. First, allocate one structure per blade. > + */ > + > bytes = sizeof(void *) * uv_num_possible_blades(); > - __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); > - BUG_ON(!__uv_hub_info_list); > + uv_hub_info_list_blade = kzalloc(bytes, GFP_KERNEL); > + WARN_ON_ONCE(!uv_hub_info_list_blade); > + if (!uv_hub_info_list_blade) > + return; > > bytes = sizeof(struct uv_hub_info_s); > - for_each_node(nodeid) { > + for_each_possible_blade(bid) { > struct uv_hub_info_s *new_hub; > > - if (__uv_hub_info_list[nodeid]) { > - pr_err("UV: Node %d UV HUB already initialized!?\n", nodeid); > - BUG(); > - } > - > - /* Allocate new per hub info list */ > - new_hub = (nodeid == 0) ? &uv_hub_info_node0 : kzalloc_node(bytes, GFP_KERNEL, nodeid); > - BUG_ON(!new_hub); > - __uv_hub_info_list[nodeid] = new_hub; > - new_hub = uv_hub_info_list(nodeid); > - BUG_ON(!new_hub); > + /* Allocate & fill new per hub info list */ > + new_hub = (bid == 0) ? &uv_hub_info_node0 > + : kzalloc_node(bytes, GFP_KERNEL, uv_blade_to_node(bid)); > + WARN_ON_ONCE(!new_hub); > + if (!new_hub) > + return; > + uv_hub_info_list_blade[bid] = new_hub; > *new_hub = hub_info; > > /* Use information from GAM table if available: */ > - if (_node_to_pnode) > - new_hub->pnode = _node_to_pnode[nodeid]; > + if (uv_gre_table) > + new_hub->pnode = uv_blade_to_pnode(bid); > else /* Or fill in during CPU loop: */ > new_hub->pnode = 0xffff; > > - new_hub->numa_blade_id = uv_node_to_blade_id(nodeid); > + new_hub->numa_blade_id = bid; > new_hub->memory_nid = NUMA_NO_NODE; > new_hub->nr_possible_cpus = 0; > new_hub->nr_online_cpus = 0; > } > > + /* > + * Now populate __uv_hub_info_list[] for each node with the > + * pointer to the struct for the blade it resides on. > + */ > + > + bytes = sizeof(void *) * num_possible_nodes(); > + __uv_hub_info_list = kzalloc(bytes, GFP_KERNEL); > + WARN_ON_ONCE(!__uv_hub_info_list); > + if (!__uv_hub_info_list) > + return; > + > + for_each_node(nodeid) { > + __uv_hub_info_list[nodeid] = uv_hub_info_list_blade[uv_node_to_blade_id(nodeid)]; > + } > + > /* Initialize per CPU info: */ > for_each_possible_cpu(cpu) { > - int apicid = per_cpu(x86_cpu_to_apicid, cpu); > - int numa_node_id; > + int apicid = early_per_cpu(x86_cpu_to_apicid, cpu); > + unsigned short bid; > unsigned short pnode; > > - nodeid = cpu_to_node(cpu); > - numa_node_id = numa_cpu_node(cpu); > pnode = uv_apicid_to_pnode(apicid); > + bid = uv_pnode_to_socket(pnode) - _min_socket; > > - uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid); > + uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list_blade[bid]; > uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++; > if (uv_cpu_hub_info(cpu)->memory_nid == NUMA_NO_NODE) > uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu); > > - /* Init memoryless node: */ > - if (nodeid != numa_node_id && > - uv_hub_info_list(numa_node_id)->pnode == 0xffff) > - uv_hub_info_list(numa_node_id)->pnode = pnode; > - else if (uv_cpu_hub_info(cpu)->pnode == 0xffff) > + if (uv_cpu_hub_info(cpu)->pnode == 0xffff) > uv_cpu_hub_info(cpu)->pnode = pnode; > } > > - for_each_node(nodeid) { > - unsigned short pnode = uv_hub_info_list(nodeid)->pnode; > + for_each_possible_blade(bid) { > + unsigned short pnode = uv_hub_info_list_blade[bid]->pnode; > > /* Add pnode info for pre-GAM list nodes without CPUs: */ > if (pnode == 0xffff) { > unsigned long paddr; > > - paddr = node_start_pfn(nodeid) << PAGE_SHIFT; > + paddr = node_start_pfn(uv_blade_to_node(bid)) << PAGE_SHIFT; > pnode = uv_gpa_to_pnode(uv_soc_phys_ram_to_gpa(paddr)); > - uv_hub_info_list(nodeid)->pnode = pnode; > + uv_hub_info_list_blade[bid]->pnode = pnode; > } > min_pnode = min(pnode, min_pnode); > max_pnode = max(pnode, max_pnode); > - pr_info("UV: UVHUB node:%2d pn:%02x nrcpus:%d\n", > - nodeid, > - uv_hub_info_list(nodeid)->pnode, > - uv_hub_info_list(nodeid)->nr_possible_cpus); > + pr_info("UV: HUB:%2d pn:%02x nrcpus:%d\n", > + bid, > + uv_hub_info_list_blade[bid]->pnode, > + uv_hub_info_list_blade[bid]->nr_possible_cpus); > } > > pr_info("UV: min_pnode:%02x max_pnode:%02x\n", min_pnode, max_pnode); > @@ -1811,6 +1841,9 @@ static void __init uv_system_init_hub(void) > map_mmr_high(max_pnode); > map_mmioh_high(min_pnode, max_pnode); > > + kfree(uv_hub_info_list_blade); > + uv_hub_info_list_blade = NULL; > + > uv_nmi_setup(); > uv_cpu_init(); > uv_setup_proc_files(0); > -- > 2.26.2 > -- Steve Wahl, Hewlett Packard Enterprise
© 2016 - 2025 Red Hat, Inc.