This function can be reused by Arm device tree based
NUMA support. So we move it from x86 to common, as well
as its related variables and functions:
setup_node_bootmem, numa_init_array and numa_emulation.
As numa_initmem_init has been moved to common, _memnodemap
is not used cross files. We can restore _memnodemap to
static.
Signed-off-by: Wei Chen <wei.chen@arm.com>
---
xen/arch/x86/numa.c | 118 ----------------------------------
xen/common/numa.c | 122 +++++++++++++++++++++++++++++++++++-
xen/include/asm-x86/numa.h | 5 --
xen/include/asm-x86/setup.h | 1 -
xen/include/xen/numa.h | 8 ++-
5 files changed, 128 insertions(+), 126 deletions(-)
diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
index f2626b3968..6908738305 100644
--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -38,7 +38,6 @@ nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {
nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
-bool numa_off;
s8 acpi_numa = 0;
int srat_disabled(void)
@@ -46,123 +45,6 @@ int srat_disabled(void)
return numa_off || acpi_numa < 0;
}
-/* initialize NODE_DATA given nodeid and start/end */
-void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
-{
- unsigned long start_pfn, end_pfn;
-
- start_pfn = start >> PAGE_SHIFT;
- end_pfn = end >> PAGE_SHIFT;
-
- NODE_DATA(nodeid)->node_start_pfn = start_pfn;
- NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
-
- node_set_online(nodeid);
-}
-
-void __init numa_init_array(void)
-{
- int rr, i;
-
- /* There are unfortunately some poorly designed mainboards around
- that only connect memory to a single CPU. This breaks the 1:1 cpu->node
- mapping. To avoid this fill in the mapping for all possible
- CPUs, as the number of CPUs is not known yet.
- We round robin the existing nodes. */
- rr = first_node(node_online_map);
- for ( i = 0; i < nr_cpu_ids; i++ )
- {
- if ( cpu_to_node[i] != NUMA_NO_NODE )
- continue;
- numa_set_node(i, rr);
- rr = cycle_node(rr, node_online_map);
- }
-}
-
-#ifdef CONFIG_NUMA_EMU
-static int numa_fake __initdata = 0;
-
-/* Numa emulation */
-static int __init numa_emulation(u64 start_pfn, u64 end_pfn)
-{
- int i;
- struct node nodes[MAX_NUMNODES];
- u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
-
- /* Kludge needed for the hash function */
- if ( hweight64(sz) > 1 )
- {
- u64 x = 1;
- while ( (x << 1) < sz )
- x <<= 1;
- if ( x < sz/2 )
- printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n");
- sz = x;
- }
-
- memset(&nodes,0,sizeof(nodes));
- for ( i = 0; i < numa_fake; i++ )
- {
- nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
- if ( i == numa_fake - 1 )
- sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
- nodes[i].end = nodes[i].start + sz;
- printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n",
- i,
- nodes[i].start, nodes[i].end,
- (nodes[i].end - nodes[i].start) >> 20);
- node_set_online(i);
- }
- memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
- if ( memnode_shift < 0 )
- {
- memnode_shift = 0;
- printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
- return -1;
- }
- for_each_online_node ( i )
- setup_node_bootmem(i, nodes[i].start, nodes[i].end);
- numa_init_array();
-
- return 0;
-}
-#endif
-
-void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
- int i;
-
-#ifdef CONFIG_NUMA_EMU
- if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
- return;
-#endif
-
-#ifdef CONFIG_ACPI_NUMA
- if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
- (u64)end_pfn << PAGE_SHIFT) )
- return;
-#endif
-
- printk(KERN_INFO "%s\n",
- numa_off ? "NUMA turned off" : "No NUMA configuration found");
-
- printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
- (u64)start_pfn << PAGE_SHIFT,
- (u64)end_pfn << PAGE_SHIFT);
- /* setup dummy node covering all memory */
- memnode_shift = BITS_PER_LONG - 1;
- memnodemap = _memnodemap;
- memnodemapsize = ARRAY_SIZE(_memnodemap);
-
- nodes_clear(node_online_map);
- node_set_online(0);
- for ( i = 0; i < nr_cpu_ids; i++ )
- numa_set_node(i, 0);
- cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
- setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT,
- (u64)end_pfn << PAGE_SHIFT);
-}
-
void numa_set_node(int cpu, nodeid_t node)
{
cpu_to_node[cpu] = node;
diff --git a/xen/common/numa.c b/xen/common/numa.c
index 1facc8fe2b..26c0006d04 100644
--- a/xen/common/numa.c
+++ b/xen/common/numa.c
@@ -14,12 +14,13 @@
#include <xen/smp.h>
#include <xen/pfn.h>
#include <xen/sched.h>
+#include <asm/acpi.h>
struct node_data node_data[MAX_NUMNODES];
/* Mapping from pdx to node id */
int memnode_shift;
-typeof(*memnodemap) _memnodemap[64];
+static typeof(*memnodemap) _memnodemap[64];
unsigned long memnodemapsize;
u8 *memnodemap;
@@ -34,6 +35,8 @@ int num_node_memblks;
struct node node_memblk_range[NR_NODE_MEMBLKS];
nodeid_t memblk_nodeid[NR_NODE_MEMBLKS];
+bool numa_off;
+
/*
* Given a shift value, try to populate memnodemap[]
* Returns :
@@ -191,3 +194,120 @@ void numa_add_cpu(int cpu)
{
cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
}
+
+/* initialize NODE_DATA given nodeid and start/end */
+void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
+{
+ unsigned long start_pfn, end_pfn;
+
+ start_pfn = start >> PAGE_SHIFT;
+ end_pfn = end >> PAGE_SHIFT;
+
+ NODE_DATA(nodeid)->node_start_pfn = start_pfn;
+ NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
+
+ node_set_online(nodeid);
+}
+
+void __init numa_init_array(void)
+{
+ int rr, i;
+
+ /* There are unfortunately some poorly designed mainboards around
+ that only connect memory to a single CPU. This breaks the 1:1 cpu->node
+ mapping. To avoid this fill in the mapping for all possible
+ CPUs, as the number of CPUs is not known yet.
+ We round robin the existing nodes. */
+ rr = first_node(node_online_map);
+ for ( i = 0; i < nr_cpu_ids; i++ )
+ {
+ if ( cpu_to_node[i] != NUMA_NO_NODE )
+ continue;
+ numa_set_node(i, rr);
+ rr = cycle_node(rr, node_online_map);
+ }
+}
+
+#ifdef CONFIG_NUMA_EMU
+int numa_fake __initdata = 0;
+
+/* Numa emulation */
+static int __init numa_emulation(u64 start_pfn, u64 end_pfn)
+{
+ int i;
+ struct node nodes[MAX_NUMNODES];
+ u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
+
+ /* Kludge needed for the hash function */
+ if ( hweight64(sz) > 1 )
+ {
+ u64 x = 1;
+ while ( (x << 1) < sz )
+ x <<= 1;
+ if ( x < sz/2 )
+ printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n");
+ sz = x;
+ }
+
+ memset(&nodes,0,sizeof(nodes));
+ for ( i = 0; i < numa_fake; i++ )
+ {
+ nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
+ if ( i == numa_fake - 1 )
+ sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
+ nodes[i].end = nodes[i].start + sz;
+ printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n",
+ i,
+ nodes[i].start, nodes[i].end,
+ (nodes[i].end - nodes[i].start) >> 20);
+ node_set_online(i);
+ }
+ memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
+ if ( memnode_shift < 0 )
+ {
+ memnode_shift = 0;
+ printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
+ return -1;
+ }
+ for_each_online_node ( i )
+ setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+ numa_init_array();
+
+ return 0;
+}
+#endif
+
+void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+{
+ int i;
+
+#ifdef CONFIG_NUMA_EMU
+ if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
+ return;
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+ if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
+ (u64)end_pfn << PAGE_SHIFT) )
+ return;
+#endif
+
+ printk(KERN_INFO "%s\n",
+ numa_off ? "NUMA turned off" : "No NUMA configuration found");
+
+ printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
+ (u64)start_pfn << PAGE_SHIFT,
+ (u64)end_pfn << PAGE_SHIFT);
+ /* setup dummy node covering all memory */
+ memnode_shift = BITS_PER_LONG - 1;
+ memnodemap = _memnodemap;
+ memnodemapsize = ARRAY_SIZE(_memnodemap);
+
+ nodes_clear(node_online_map);
+ node_set_online(0);
+ for ( i = 0; i < nr_cpu_ids; i++ )
+ numa_set_node(i, 0);
+ cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
+ setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT,
+ (u64)end_pfn << PAGE_SHIFT);
+}
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
index e8a92ad9df..f8e4e15586 100644
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -21,16 +21,11 @@ extern nodeid_t pxm_to_node(unsigned int pxm);
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
-extern void numa_init_array(void);
-extern bool numa_off;
-
-
extern int srat_disabled(void);
extern void numa_set_node(int cpu, nodeid_t node);
extern nodeid_t setup_node(unsigned int pxm);
extern void srat_detect_node(int cpu);
-extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
extern nodeid_t apicid_to_node[];
extern void init_cpu_to_node(void);
diff --git a/xen/include/asm-x86/setup.h b/xen/include/asm-x86/setup.h
index 24be46115d..63838ba2d1 100644
--- a/xen/include/asm-x86/setup.h
+++ b/xen/include/asm-x86/setup.h
@@ -17,7 +17,6 @@ void early_time_init(void);
void set_nr_cpu_ids(unsigned int max_cpus);
-void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
void arch_init_memory(void);
void subarch_init_memory(void);
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index 67b79a73a3..258a5cb3db 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -26,7 +26,6 @@
extern int memnode_shift;
extern unsigned long memnodemapsize;
extern u8 *memnodemap;
-extern typeof(*memnodemap) _memnodemap[64];
struct node_data {
unsigned long node_start_pfn;
@@ -69,6 +68,13 @@ extern int conflicting_memblks(u64 start, u64 end);
extern void cutoff_node(int i, u64 start, u64 end);
extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
+extern void numa_init_array(void);
+extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern bool numa_off;
+extern int numa_fake;
+
+extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
+
#endif /* CONFIG_NUMA */
#endif /* _XEN_NUMA_H */
--
2.25.1
Hi Wei,
On 11/08/2021 11:23, Wei Chen wrote:
> This function can be reused by Arm device tree based
> NUMA support. So we move it from x86 to common, as well
> as its related variables and functions:
> setup_node_bootmem, numa_init_array and numa_emulation.
>
> As numa_initmem_init has been moved to common, _memnodemap
> is not used cross files. We can restore _memnodemap to
> static.
As we discussed on a previous patch, we should try to avoid this kind of
dance. I can help to find a split that would achieve that.
>
> Signed-off-by: Wei Chen <wei.chen@arm.com>
> ---
> xen/arch/x86/numa.c | 118 ----------------------------------
> xen/common/numa.c | 122 +++++++++++++++++++++++++++++++++++-
> xen/include/asm-x86/numa.h | 5 --
> xen/include/asm-x86/setup.h | 1 -
> xen/include/xen/numa.h | 8 ++-
> 5 files changed, 128 insertions(+), 126 deletions(-)
>
> diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
> index f2626b3968..6908738305 100644
> --- a/xen/arch/x86/numa.c
> +++ b/xen/arch/x86/numa.c
> @@ -38,7 +38,6 @@ nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {
>
> nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
>
> -bool numa_off;
> s8 acpi_numa = 0;
>
> int srat_disabled(void)
> @@ -46,123 +45,6 @@ int srat_disabled(void)
> return numa_off || acpi_numa < 0;
> }
>
> -/* initialize NODE_DATA given nodeid and start/end */
> -void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
> -{
> - unsigned long start_pfn, end_pfn;
> -
> - start_pfn = start >> PAGE_SHIFT;
> - end_pfn = end >> PAGE_SHIFT;
> -
> - NODE_DATA(nodeid)->node_start_pfn = start_pfn;
> - NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
> -
> - node_set_online(nodeid);
> -}
> -
> -void __init numa_init_array(void)
> -{
> - int rr, i;
> -
> - /* There are unfortunately some poorly designed mainboards around
> - that only connect memory to a single CPU. This breaks the 1:1 cpu->node
> - mapping. To avoid this fill in the mapping for all possible
> - CPUs, as the number of CPUs is not known yet.
> - We round robin the existing nodes. */
> - rr = first_node(node_online_map);
> - for ( i = 0; i < nr_cpu_ids; i++ )
> - {
> - if ( cpu_to_node[i] != NUMA_NO_NODE )
> - continue;
> - numa_set_node(i, rr);
> - rr = cycle_node(rr, node_online_map);
> - }
> -}
> -
> -#ifdef CONFIG_NUMA_EMU
> -static int numa_fake __initdata = 0;
> -
> -/* Numa emulation */
> -static int __init numa_emulation(u64 start_pfn, u64 end_pfn)
> -{
> - int i;
> - struct node nodes[MAX_NUMNODES];
> - u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
> -
> - /* Kludge needed for the hash function */
> - if ( hweight64(sz) > 1 )
> - {
> - u64 x = 1;
> - while ( (x << 1) < sz )
> - x <<= 1;
> - if ( x < sz/2 )
> - printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n");
> - sz = x;
> - }
> -
> - memset(&nodes,0,sizeof(nodes));
> - for ( i = 0; i < numa_fake; i++ )
> - {
> - nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
> - if ( i == numa_fake - 1 )
> - sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
> - nodes[i].end = nodes[i].start + sz;
> - printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n",
> - i,
> - nodes[i].start, nodes[i].end,
> - (nodes[i].end - nodes[i].start) >> 20);
> - node_set_online(i);
> - }
> - memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
> - if ( memnode_shift < 0 )
> - {
> - memnode_shift = 0;
> - printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
> - return -1;
> - }
> - for_each_online_node ( i )
> - setup_node_bootmem(i, nodes[i].start, nodes[i].end);
> - numa_init_array();
> -
> - return 0;
> -}
> -#endif
> -
> -void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
> -{
> - int i;
> -
> -#ifdef CONFIG_NUMA_EMU
> - if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
> - return;
> -#endif
> -
> -#ifdef CONFIG_ACPI_NUMA
> - if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
> - (u64)end_pfn << PAGE_SHIFT) )
> - return;
> -#endif
> -
> - printk(KERN_INFO "%s\n",
> - numa_off ? "NUMA turned off" : "No NUMA configuration found");
> -
> - printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
> - (u64)start_pfn << PAGE_SHIFT,
> - (u64)end_pfn << PAGE_SHIFT);
> - /* setup dummy node covering all memory */
> - memnode_shift = BITS_PER_LONG - 1;
> - memnodemap = _memnodemap;
> - memnodemapsize = ARRAY_SIZE(_memnodemap);
> -
> - nodes_clear(node_online_map);
> - node_set_online(0);
> - for ( i = 0; i < nr_cpu_ids; i++ )
> - numa_set_node(i, 0);
> - cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
> - setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT,
> - (u64)end_pfn << PAGE_SHIFT);
> -}
> -
> void numa_set_node(int cpu, nodeid_t node)
> {
> cpu_to_node[cpu] = node;
> diff --git a/xen/common/numa.c b/xen/common/numa.c
> index 1facc8fe2b..26c0006d04 100644
> --- a/xen/common/numa.c
> +++ b/xen/common/numa.c
> @@ -14,12 +14,13 @@
> #include <xen/smp.h>
> #include <xen/pfn.h>
> #include <xen/sched.h>
NIT: We tend to add a newline betwen <xen/...> headers and <asm/...>
headers.
> +#include <asm/acpi.h>
>
> struct node_data node_data[MAX_NUMNODES];
>
> /* Mapping from pdx to node id */
> int memnode_shift;
> -typeof(*memnodemap) _memnodemap[64];
> +static typeof(*memnodemap) _memnodemap[64];
> unsigned long memnodemapsize;
> u8 *memnodemap;
>
> @@ -34,6 +35,8 @@ int num_node_memblks;
> struct node node_memblk_range[NR_NODE_MEMBLKS];
> nodeid_t memblk_nodeid[NR_NODE_MEMBLKS];
>
> +bool numa_off;
> +
> /*
> * Given a shift value, try to populate memnodemap[]
> * Returns :
> @@ -191,3 +194,120 @@ void numa_add_cpu(int cpu)
> {
> cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
> }
> +
> +/* initialize NODE_DATA given nodeid and start/end */
> +void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
From an abstract PoV, start and end should be paddr_t. This should be
done on a separate patch though.
> +{
> + unsigned long start_pfn, end_pfn;
> +
> + start_pfn = start >> PAGE_SHIFT;
> + end_pfn = end >> PAGE_SHIFT;
> +
> + NODE_DATA(nodeid)->node_start_pfn = start_pfn;
> + NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
> +
> + node_set_online(nodeid);
> +}
> +
> +void __init numa_init_array(void)
> +{
> + int rr, i;
> +
> + /* There are unfortunately some poorly designed mainboards around
> + that only connect memory to a single CPU. This breaks the 1:1 cpu->node
> + mapping. To avoid this fill in the mapping for all possible
> + CPUs, as the number of CPUs is not known yet.
> + We round robin the existing nodes. */
> + rr = first_node(node_online_map);
> + for ( i = 0; i < nr_cpu_ids; i++ )
> + {
> + if ( cpu_to_node[i] != NUMA_NO_NODE )
> + continue;
> + numa_set_node(i, rr);
> + rr = cycle_node(rr, node_online_map);
> + }
> +}
> +
> +#ifdef CONFIG_NUMA_EMU
> +int numa_fake __initdata = 0;
> +
> +/* Numa emulation */
> +static int __init numa_emulation(u64 start_pfn, u64 end_pfn)
Here, this should be either "unsigned long" or ideally "mfn_t".
Although, if you use "unsigned long", you will need to...
> +{
> + int i;
> + struct node nodes[MAX_NUMNODES];
> + u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
... cast "(end_pfn - start_pfn)" to uin64_t or use pfn_to_paddr().
> +
> + /* Kludge needed for the hash function */
> + if ( hweight64(sz) > 1 )
> + {
> + u64 x = 1;
> + while ( (x << 1) < sz )
> + x <<= 1;
> + if ( x < sz/2 )
> + printk(KERN_ERR "Numa emulation unbalanced. Complain to maintainer\n");
> + sz = x;
> + }
> +
> + memset(&nodes,0,sizeof(nodes));
> + for ( i = 0; i < numa_fake; i++ )
> + {
> + nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
> + if ( i == numa_fake - 1 )
> + sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
> + nodes[i].end = nodes[i].start + sz;
> + printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" (%"PRIu64"MB)\n",
> + i,
> + nodes[i].start, nodes[i].end,
> + (nodes[i].end - nodes[i].start) >> 20);
> + node_set_online(i);
> + }
> + memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
> + if ( memnode_shift < 0 )
> + {
> + memnode_shift = 0;
> + printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
> + return -1;
> + }
> + for_each_online_node ( i )
> + setup_node_bootmem(i, nodes[i].start, nodes[i].end);
> + numa_init_array();
> +
> + return 0;
> +}
> +#endif
> +
> +void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
> +{
> + int i;
> +
> +#ifdef CONFIG_NUMA_EMU
> + if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
> + return;
> +#endif
> +
> +#ifdef CONFIG_ACPI_NUMA
> + if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
> + (u64)end_pfn << PAGE_SHIFT) )
(u64)v << PAGE_SHIFT should be switched to use pfn_to_paddr() or
mfn_to_paddr() if you decide to make start_pfn and end_pfn typesafe.
> + return;
> +#endif
> +
> + printk(KERN_INFO "%s\n",
> + numa_off ? "NUMA turned off" : "No NUMA configuration found");
> +
> + printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
> + (u64)start_pfn << PAGE_SHIFT,
> + (u64)end_pfn << PAGE_SHIFT);
Same remark here. PRIx64 would also have to be switched to PRIpaddr.
> + /* setup dummy node covering all memory */
> + memnode_shift = BITS_PER_LONG - 1;
> + memnodemap = _memnodemap;
> + memnodemapsize = ARRAY_SIZE(_memnodemap);
> +
> + nodes_clear(node_online_map);
> + node_set_online(0);
> + for ( i = 0; i < nr_cpu_ids; i++ )
> + numa_set_node(i, 0);
> + cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
> + setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT,
> + (u64)end_pfn << PAGE_SHIFT);
> +}
> diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
> index e8a92ad9df..f8e4e15586 100644
> --- a/xen/include/asm-x86/numa.h
> +++ b/xen/include/asm-x86/numa.h
> @@ -21,16 +21,11 @@ extern nodeid_t pxm_to_node(unsigned int pxm);
>
> #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
>
> -extern void numa_init_array(void);
> -extern bool numa_off;
> -
> -
> extern int srat_disabled(void);
> extern void numa_set_node(int cpu, nodeid_t node);
> extern nodeid_t setup_node(unsigned int pxm);
> extern void srat_detect_node(int cpu);
>
> -extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
> extern nodeid_t apicid_to_node[];
> extern void init_cpu_to_node(void);
>
> diff --git a/xen/include/asm-x86/setup.h b/xen/include/asm-x86/setup.h
> index 24be46115d..63838ba2d1 100644
> --- a/xen/include/asm-x86/setup.h
> +++ b/xen/include/asm-x86/setup.h
> @@ -17,7 +17,6 @@ void early_time_init(void);
>
> void set_nr_cpu_ids(unsigned int max_cpus);
>
> -void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
> void arch_init_memory(void);
> void subarch_init_memory(void);
>
> diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
> index 67b79a73a3..258a5cb3db 100644
> --- a/xen/include/xen/numa.h
> +++ b/xen/include/xen/numa.h
> @@ -26,7 +26,6 @@
> extern int memnode_shift;
> extern unsigned long memnodemapsize;
> extern u8 *memnodemap;
> -extern typeof(*memnodemap) _memnodemap[64];
>
> struct node_data {
> unsigned long node_start_pfn;
> @@ -69,6 +68,13 @@ extern int conflicting_memblks(u64 start, u64 end);
> extern void cutoff_node(int i, u64 start, u64 end);
> extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
>
> +extern void numa_init_array(void);
> +extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
> +extern bool numa_off;
> +extern int numa_fake;
> +
> +extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
> +
> #endif /* CONFIG_NUMA */
>
> #endif /* _XEN_NUMA_H */
>
Cheers,
--
Julien Grall
Hi Julien,
> -----Original Message-----
> From: Julien Grall <julien@xen.org>
> Sent: 2021年8月25日 18:22
> To: Wei Chen <Wei.Chen@arm.com>; xen-devel@lists.xenproject.org;
> sstabellini@kernel.org; jbeulich@suse.com
> Cc: Bertrand Marquis <Bertrand.Marquis@arm.com>
> Subject: Re: [XEN RFC PATCH 12/40] xen/x86: Move numa_initmem_init to
> common
>
> Hi Wei,
>
> On 11/08/2021 11:23, Wei Chen wrote:
> > This function can be reused by Arm device tree based
> > NUMA support. So we move it from x86 to common, as well
> > as its related variables and functions:
> > setup_node_bootmem, numa_init_array and numa_emulation.
> >
> > As numa_initmem_init has been moved to common, _memnodemap
> > is not used cross files. We can restore _memnodemap to
> > static.
>
> As we discussed on a previous patch, we should try to avoid this kind of
> dance. I can help to find a split that would achieve that.
>
Yes, thanks!
> >
> > Signed-off-by: Wei Chen <wei.chen@arm.com>
> > ---
> > xen/arch/x86/numa.c | 118 ----------------------------------
> > xen/common/numa.c | 122 +++++++++++++++++++++++++++++++++++-
> > xen/include/asm-x86/numa.h | 5 --
> > xen/include/asm-x86/setup.h | 1 -
> > xen/include/xen/numa.h | 8 ++-
> > 5 files changed, 128 insertions(+), 126 deletions(-)
> >
> > diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
> > index f2626b3968..6908738305 100644
> > --- a/xen/arch/x86/numa.c
> > +++ b/xen/arch/x86/numa.c
> > @@ -38,7 +38,6 @@ nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {
> >
> > nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
> >
> > -bool numa_off;
> > s8 acpi_numa = 0;
> >
> > int srat_disabled(void)
> > @@ -46,123 +45,6 @@ int srat_disabled(void)
> > return numa_off || acpi_numa < 0;
> > }
> >
> > -/* initialize NODE_DATA given nodeid and start/end */
> > -void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
> > -{
> > - unsigned long start_pfn, end_pfn;
> > -
> > - start_pfn = start >> PAGE_SHIFT;
> > - end_pfn = end >> PAGE_SHIFT;
> > -
> > - NODE_DATA(nodeid)->node_start_pfn = start_pfn;
> > - NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
> > -
> > - node_set_online(nodeid);
> > -}
> > -
> > -void __init numa_init_array(void)
> > -{
> > - int rr, i;
> > -
> > - /* There are unfortunately some poorly designed mainboards around
> > - that only connect memory to a single CPU. This breaks the 1:1
> cpu->node
> > - mapping. To avoid this fill in the mapping for all possible
> > - CPUs, as the number of CPUs is not known yet.
> > - We round robin the existing nodes. */
> > - rr = first_node(node_online_map);
> > - for ( i = 0; i < nr_cpu_ids; i++ )
> > - {
> > - if ( cpu_to_node[i] != NUMA_NO_NODE )
> > - continue;
> > - numa_set_node(i, rr);
> > - rr = cycle_node(rr, node_online_map);
> > - }
> > -}
> > -
> > -#ifdef CONFIG_NUMA_EMU
> > -static int numa_fake __initdata = 0;
> > -
> > -/* Numa emulation */
> > -static int __init numa_emulation(u64 start_pfn, u64 end_pfn)
> > -{
> > - int i;
> > - struct node nodes[MAX_NUMNODES];
> > - u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
> > -
> > - /* Kludge needed for the hash function */
> > - if ( hweight64(sz) > 1 )
> > - {
> > - u64 x = 1;
> > - while ( (x << 1) < sz )
> > - x <<= 1;
> > - if ( x < sz/2 )
> > - printk(KERN_ERR "Numa emulation unbalanced. Complain to
> maintainer\n");
> > - sz = x;
> > - }
> > -
> > - memset(&nodes,0,sizeof(nodes));
> > - for ( i = 0; i < numa_fake; i++ )
> > - {
> > - nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
> > - if ( i == numa_fake - 1 )
> > - sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
> > - nodes[i].end = nodes[i].start + sz;
> > - printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64"
> (%"PRIu64"MB)\n",
> > - i,
> > - nodes[i].start, nodes[i].end,
> > - (nodes[i].end - nodes[i].start) >> 20);
> > - node_set_online(i);
> > - }
> > - memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
> > - if ( memnode_shift < 0 )
> > - {
> > - memnode_shift = 0;
> > - printk(KERN_ERR "No NUMA hash function found. Emulation
> disabled.\n");
> > - return -1;
> > - }
> > - for_each_online_node ( i )
> > - setup_node_bootmem(i, nodes[i].start, nodes[i].end);
> > - numa_init_array();
> > -
> > - return 0;
> > -}
> > -#endif
> > -
> > -void __init numa_initmem_init(unsigned long start_pfn, unsigned long
> end_pfn)
> > -{
> > - int i;
> > -
> > -#ifdef CONFIG_NUMA_EMU
> > - if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
> > - return;
> > -#endif
> > -
> > -#ifdef CONFIG_ACPI_NUMA
> > - if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
> > - (u64)end_pfn << PAGE_SHIFT) )
> > - return;
> > -#endif
> > -
> > - printk(KERN_INFO "%s\n",
> > - numa_off ? "NUMA turned off" : "No NUMA configuration
> found");
> > -
> > - printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
> > - (u64)start_pfn << PAGE_SHIFT,
> > - (u64)end_pfn << PAGE_SHIFT);
> > - /* setup dummy node covering all memory */
> > - memnode_shift = BITS_PER_LONG - 1;
> > - memnodemap = _memnodemap;
> > - memnodemapsize = ARRAY_SIZE(_memnodemap);
> > -
> > - nodes_clear(node_online_map);
> > - node_set_online(0);
> > - for ( i = 0; i < nr_cpu_ids; i++ )
> > - numa_set_node(i, 0);
> > - cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
> > - setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT,
> > - (u64)end_pfn << PAGE_SHIFT);
> > -}
> > -
> > void numa_set_node(int cpu, nodeid_t node)
> > {
> > cpu_to_node[cpu] = node;
> > diff --git a/xen/common/numa.c b/xen/common/numa.c
> > index 1facc8fe2b..26c0006d04 100644
> > --- a/xen/common/numa.c
> > +++ b/xen/common/numa.c
> > @@ -14,12 +14,13 @@
> > #include <xen/smp.h>
> > #include <xen/pfn.h>
> > #include <xen/sched.h>
>
> NIT: We tend to add a newline betwen <xen/...> headers and <asm/...>
> headers.
>
got it
> > +#include <asm/acpi.h>
> >
> > struct node_data node_data[MAX_NUMNODES];
> >
> > /* Mapping from pdx to node id */
> > int memnode_shift;
> > -typeof(*memnodemap) _memnodemap[64];
> > +static typeof(*memnodemap) _memnodemap[64];
> > unsigned long memnodemapsize;
> > u8 *memnodemap;
> >
> > @@ -34,6 +35,8 @@ int num_node_memblks;
> > struct node node_memblk_range[NR_NODE_MEMBLKS];
> > nodeid_t memblk_nodeid[NR_NODE_MEMBLKS];
> >
> > +bool numa_off;
> > +
> > /*
> > * Given a shift value, try to populate memnodemap[]
> > * Returns :
> > @@ -191,3 +194,120 @@ void numa_add_cpu(int cpu)
> > {
> > cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
> > }
> > +
> > +/* initialize NODE_DATA given nodeid and start/end */
> > +void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
>
> From an abstract PoV, start and end should be paddr_t. This should be
> done on a separate patch though.
>
Ok.
> > +{
> > + unsigned long start_pfn, end_pfn;
> > +
> > + start_pfn = start >> PAGE_SHIFT;
> > + end_pfn = end >> PAGE_SHIFT;
> > +
> > + NODE_DATA(nodeid)->node_start_pfn = start_pfn;
> > + NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
> > +
> > + node_set_online(nodeid);
> > +}
> > +
> > +void __init numa_init_array(void)
> > +{
> > + int rr, i;
> > +
> > + /* There are unfortunately some poorly designed mainboards around
> > + that only connect memory to a single CPU. This breaks the 1:1
> cpu->node
> > + mapping. To avoid this fill in the mapping for all possible
> > + CPUs, as the number of CPUs is not known yet.
> > + We round robin the existing nodes. */
> > + rr = first_node(node_online_map);
> > + for ( i = 0; i < nr_cpu_ids; i++ )
> > + {
> > + if ( cpu_to_node[i] != NUMA_NO_NODE )
> > + continue;
> > + numa_set_node(i, rr);
> > + rr = cycle_node(rr, node_online_map);
> > + }
> > +}
> > +
> > +#ifdef CONFIG_NUMA_EMU
> > +int numa_fake __initdata = 0;
> > +
> > +/* Numa emulation */
> > +static int __init numa_emulation(u64 start_pfn, u64 end_pfn)
>
> Here, this should be either "unsigned long" or ideally "mfn_t".
> Although, if you use "unsigned long", you will need to...
>
Do we need a separate patch to do it?
> > +{
> > + int i;
> > + struct node nodes[MAX_NUMNODES];
> > + u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
>
> ... cast "(end_pfn - start_pfn)" to uin64_t or use pfn_to_paddr().
>
Ok
> > +
> > + /* Kludge needed for the hash function */
> > + if ( hweight64(sz) > 1 )
> > + {
> > + u64 x = 1;
> > + while ( (x << 1) < sz )
> > + x <<= 1;
> > + if ( x < sz/2 )
> > + printk(KERN_ERR "Numa emulation unbalanced. Complain to
> maintainer\n");
> > + sz = x;
> > + }
> > +
> > + memset(&nodes,0,sizeof(nodes));
> > + for ( i = 0; i < numa_fake; i++ )
> > + {
> > + nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
> > + if ( i == numa_fake - 1 )
> > + sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
> > + nodes[i].end = nodes[i].start + sz;
> > + printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64"
> (%"PRIu64"MB)\n",
> > + i,
> > + nodes[i].start, nodes[i].end,
> > + (nodes[i].end - nodes[i].start) >> 20);
> > + node_set_online(i);
> > + }
> > + memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
> > + if ( memnode_shift < 0 )
> > + {
> > + memnode_shift = 0;
> > + printk(KERN_ERR "No NUMA hash function found. Emulation
> disabled.\n");
> > + return -1;
> > + }
> > + for_each_online_node ( i )
> > + setup_node_bootmem(i, nodes[i].start, nodes[i].end);
> > + numa_init_array();
> > +
> > + return 0;
> > +}
> > +#endif
> > +
> > +void __init numa_initmem_init(unsigned long start_pfn, unsigned long
> end_pfn)
> > +{
> > + int i;
> > +
> > +#ifdef CONFIG_NUMA_EMU
> > + if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
> > + return;
> > +#endif
> > +
> > +#ifdef CONFIG_ACPI_NUMA
> > + if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
> > + (u64)end_pfn << PAGE_SHIFT) )
>
> (u64)v << PAGE_SHIFT should be switched to use pfn_to_paddr() or
> mfn_to_paddr() if you decide to make start_pfn and end_pfn typesafe.
>
Still need a separate patch to change it before move?
> > + return;
> > +#endif
> > +
> > + printk(KERN_INFO "%s\n",
> > + numa_off ? "NUMA turned off" : "No NUMA configuration
> found");
> > +
> > + printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
> > + (u64)start_pfn << PAGE_SHIFT,
> > + (u64)end_pfn << PAGE_SHIFT);
>
> Same remark here. PRIx64 would also have to be switched to PRIpaddr.
>
Hmm, It seems I'd better to use a separate patch to do PRIpaddr clean up
before move code.
> > + /* setup dummy node covering all memory */
> > + memnode_shift = BITS_PER_LONG - 1;
> > + memnodemap = _memnodemap;
> > + memnodemapsize = ARRAY_SIZE(_memnodemap);
> > +
> > + nodes_clear(node_online_map);
> > + node_set_online(0);
> > + for ( i = 0; i < nr_cpu_ids; i++ )
> > + numa_set_node(i, 0);
> > + cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
> > + setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT,
> > + (u64)end_pfn << PAGE_SHIFT);
> > +}
> > diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
> > index e8a92ad9df..f8e4e15586 100644
> > --- a/xen/include/asm-x86/numa.h
> > +++ b/xen/include/asm-x86/numa.h
> > @@ -21,16 +21,11 @@ extern nodeid_t pxm_to_node(unsigned int pxm);
> >
> > #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT))
> >
> > -extern void numa_init_array(void);
> > -extern bool numa_off;
> > -
> > -
> > extern int srat_disabled(void);
> > extern void numa_set_node(int cpu, nodeid_t node);
> > extern nodeid_t setup_node(unsigned int pxm);
> > extern void srat_detect_node(int cpu);
> >
> > -extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
> > extern nodeid_t apicid_to_node[];
> > extern void init_cpu_to_node(void);
> >
> > diff --git a/xen/include/asm-x86/setup.h b/xen/include/asm-x86/setup.h
> > index 24be46115d..63838ba2d1 100644
> > --- a/xen/include/asm-x86/setup.h
> > +++ b/xen/include/asm-x86/setup.h
> > @@ -17,7 +17,6 @@ void early_time_init(void);
> >
> > void set_nr_cpu_ids(unsigned int max_cpus);
> >
> > -void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
> > void arch_init_memory(void);
> > void subarch_init_memory(void);
> >
> > diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
> > index 67b79a73a3..258a5cb3db 100644
> > --- a/xen/include/xen/numa.h
> > +++ b/xen/include/xen/numa.h
> > @@ -26,7 +26,6 @@
> > extern int memnode_shift;
> > extern unsigned long memnodemapsize;
> > extern u8 *memnodemap;
> > -extern typeof(*memnodemap) _memnodemap[64];
> >
> > struct node_data {
> > unsigned long node_start_pfn;
> > @@ -69,6 +68,13 @@ extern int conflicting_memblks(u64 start, u64 end);
> > extern void cutoff_node(int i, u64 start, u64 end);
> > extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
> >
> > +extern void numa_init_array(void);
> > +extern void numa_initmem_init(unsigned long start_pfn, unsigned long
> end_pfn);
> > +extern bool numa_off;
> > +extern int numa_fake;
> > +
> > +extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
> > +
> > #endif /* CONFIG_NUMA */
> >
> > #endif /* _XEN_NUMA_H */
> >
>
> Cheers,
>
> --
> Julien Grall
On 25/08/2021 12:15, Wei Chen wrote:
> Hi Julien,
Hi Wei,
>> -----Original Message-----
>> From: Julien Grall <julien@xen.org>
>> Sent: 2021年8月25日 18:22
>> To: Wei Chen <Wei.Chen@arm.com>; xen-devel@lists.xenproject.org;
>> sstabellini@kernel.org; jbeulich@suse.com
>> Cc: Bertrand Marquis <Bertrand.Marquis@arm.com>
>> Subject: Re: [XEN RFC PATCH 12/40] xen/x86: Move numa_initmem_init to
>> common
>>
>> Hi Wei,
>>
>> On 11/08/2021 11:23, Wei Chen wrote:
>>> This function can be reused by Arm device tree based
>>> NUMA support. So we move it from x86 to common, as well
>>> as its related variables and functions:
>>> setup_node_bootmem, numa_init_array and numa_emulation.
>>>
>>> As numa_initmem_init has been moved to common, _memnodemap
>>> is not used cross files. We can restore _memnodemap to
>>> static.
>>
>> As we discussed on a previous patch, we should try to avoid this kind of
>> dance. I can help to find a split that would achieve that.
>>
>
> Yes, thanks!
>
>>>
>>> Signed-off-by: Wei Chen <wei.chen@arm.com>
>>> ---
>>> xen/arch/x86/numa.c | 118 ----------------------------------
>>> xen/common/numa.c | 122 +++++++++++++++++++++++++++++++++++-
>>> xen/include/asm-x86/numa.h | 5 --
>>> xen/include/asm-x86/setup.h | 1 -
>>> xen/include/xen/numa.h | 8 ++-
>>> 5 files changed, 128 insertions(+), 126 deletions(-)
>>>
>>> diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
>>> index f2626b3968..6908738305 100644
>>> --- a/xen/arch/x86/numa.c
>>> +++ b/xen/arch/x86/numa.c
>>> @@ -38,7 +38,6 @@ nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {
>>>
>>> nodemask_t __read_mostly node_online_map = { { [0] = 1UL } };
>>>
>>> -bool numa_off;
>>> s8 acpi_numa = 0;
>>>
>>> int srat_disabled(void)
>>> @@ -46,123 +45,6 @@ int srat_disabled(void)
>>> return numa_off || acpi_numa < 0;
>>> }
>>>
>>> -/* initialize NODE_DATA given nodeid and start/end */
>>> -void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
>>> -{
>>> - unsigned long start_pfn, end_pfn;
>>> -
>>> - start_pfn = start >> PAGE_SHIFT;
>>> - end_pfn = end >> PAGE_SHIFT;
>>> -
>>> - NODE_DATA(nodeid)->node_start_pfn = start_pfn;
>>> - NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
>>> -
>>> - node_set_online(nodeid);
>>> -}
>>> -
>>> -void __init numa_init_array(void)
>>> -{
>>> - int rr, i;
>>> -
>>> - /* There are unfortunately some poorly designed mainboards around
>>> - that only connect memory to a single CPU. This breaks the 1:1
>> cpu->node
>>> - mapping. To avoid this fill in the mapping for all possible
>>> - CPUs, as the number of CPUs is not known yet.
>>> - We round robin the existing nodes. */
>>> - rr = first_node(node_online_map);
>>> - for ( i = 0; i < nr_cpu_ids; i++ )
>>> - {
>>> - if ( cpu_to_node[i] != NUMA_NO_NODE )
>>> - continue;
>>> - numa_set_node(i, rr);
>>> - rr = cycle_node(rr, node_online_map);
>>> - }
>>> -}
>>> -
>>> -#ifdef CONFIG_NUMA_EMU
>>> -static int numa_fake __initdata = 0;
>>> -
>>> -/* Numa emulation */
>>> -static int __init numa_emulation(u64 start_pfn, u64 end_pfn)
>>> -{
>>> - int i;
>>> - struct node nodes[MAX_NUMNODES];
>>> - u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
>>> -
>>> - /* Kludge needed for the hash function */
>>> - if ( hweight64(sz) > 1 )
>>> - {
>>> - u64 x = 1;
>>> - while ( (x << 1) < sz )
>>> - x <<= 1;
>>> - if ( x < sz/2 )
>>> - printk(KERN_ERR "Numa emulation unbalanced. Complain to
>> maintainer\n");
>>> - sz = x;
>>> - }
>>> -
>>> - memset(&nodes,0,sizeof(nodes));
>>> - for ( i = 0; i < numa_fake; i++ )
>>> - {
>>> - nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
>>> - if ( i == numa_fake - 1 )
>>> - sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
>>> - nodes[i].end = nodes[i].start + sz;
>>> - printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64"
>> (%"PRIu64"MB)\n",
>>> - i,
>>> - nodes[i].start, nodes[i].end,
>>> - (nodes[i].end - nodes[i].start) >> 20);
>>> - node_set_online(i);
>>> - }
>>> - memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
>>> - if ( memnode_shift < 0 )
>>> - {
>>> - memnode_shift = 0;
>>> - printk(KERN_ERR "No NUMA hash function found. Emulation
>> disabled.\n");
>>> - return -1;
>>> - }
>>> - for_each_online_node ( i )
>>> - setup_node_bootmem(i, nodes[i].start, nodes[i].end);
>>> - numa_init_array();
>>> -
>>> - return 0;
>>> -}
>>> -#endif
>>> -
>>> -void __init numa_initmem_init(unsigned long start_pfn, unsigned long
>> end_pfn)
>>> -{
>>> - int i;
>>> -
>>> -#ifdef CONFIG_NUMA_EMU
>>> - if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
>>> - return;
>>> -#endif
>>> -
>>> -#ifdef CONFIG_ACPI_NUMA
>>> - if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
>>> - (u64)end_pfn << PAGE_SHIFT) )
>>> - return;
>>> -#endif
>>> -
>>> - printk(KERN_INFO "%s\n",
>>> - numa_off ? "NUMA turned off" : "No NUMA configuration
>> found");
>>> -
>>> - printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
>>> - (u64)start_pfn << PAGE_SHIFT,
>>> - (u64)end_pfn << PAGE_SHIFT);
>>> - /* setup dummy node covering all memory */
>>> - memnode_shift = BITS_PER_LONG - 1;
>>> - memnodemap = _memnodemap;
>>> - memnodemapsize = ARRAY_SIZE(_memnodemap);
>>> -
>>> - nodes_clear(node_online_map);
>>> - node_set_online(0);
>>> - for ( i = 0; i < nr_cpu_ids; i++ )
>>> - numa_set_node(i, 0);
>>> - cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
>>> - setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT,
>>> - (u64)end_pfn << PAGE_SHIFT);
>>> -}
>>> -
>>> void numa_set_node(int cpu, nodeid_t node)
>>> {
>>> cpu_to_node[cpu] = node;
>>> diff --git a/xen/common/numa.c b/xen/common/numa.c
>>> index 1facc8fe2b..26c0006d04 100644
>>> --- a/xen/common/numa.c
>>> +++ b/xen/common/numa.c
>>> @@ -14,12 +14,13 @@
>>> #include <xen/smp.h>
>>> #include <xen/pfn.h>
>>> #include <xen/sched.h>
>>
>> NIT: We tend to add a newline betwen <xen/...> headers and <asm/...>
>> headers.
>>
>
> got it
>
>>> +#include <asm/acpi.h>
>>>
>>> struct node_data node_data[MAX_NUMNODES];
>>>
>>> /* Mapping from pdx to node id */
>>> int memnode_shift;
>>> -typeof(*memnodemap) _memnodemap[64];
>>> +static typeof(*memnodemap) _memnodemap[64];
>>> unsigned long memnodemapsize;
>>> u8 *memnodemap;
>>>
>>> @@ -34,6 +35,8 @@ int num_node_memblks;
>>> struct node node_memblk_range[NR_NODE_MEMBLKS];
>>> nodeid_t memblk_nodeid[NR_NODE_MEMBLKS];
>>>
>>> +bool numa_off;
>>> +
>>> /*
>>> * Given a shift value, try to populate memnodemap[]
>>> * Returns :
>>> @@ -191,3 +194,120 @@ void numa_add_cpu(int cpu)
>>> {
>>> cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
>>> }
>>> +
>>> +/* initialize NODE_DATA given nodeid and start/end */
>>> +void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
>>
>> From an abstract PoV, start and end should be paddr_t. This should be
>> done on a separate patch though.
>>
>
> Ok.
>
>>> +{
>>> + unsigned long start_pfn, end_pfn;
>>> +
>>> + start_pfn = start >> PAGE_SHIFT;
>>> + end_pfn = end >> PAGE_SHIFT;
>>> +
>>> + NODE_DATA(nodeid)->node_start_pfn = start_pfn;
>>> + NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
>>> +
>>> + node_set_online(nodeid);
>>> +}
>>> +
>>> +void __init numa_init_array(void)
>>> +{
>>> + int rr, i;
>>> +
>>> + /* There are unfortunately some poorly designed mainboards around
>>> + that only connect memory to a single CPU. This breaks the 1:1
>> cpu->node
>>> + mapping. To avoid this fill in the mapping for all possible
>>> + CPUs, as the number of CPUs is not known yet.
>>> + We round robin the existing nodes. */
>>> + rr = first_node(node_online_map);
>>> + for ( i = 0; i < nr_cpu_ids; i++ )
>>> + {
>>> + if ( cpu_to_node[i] != NUMA_NO_NODE )
>>> + continue;
>>> + numa_set_node(i, rr);
>>> + rr = cycle_node(rr, node_online_map);
>>> + }
>>> +}
>>> +
>>> +#ifdef CONFIG_NUMA_EMU
>>> +int numa_fake __initdata = 0;
>>> +
>>> +/* Numa emulation */
>>> +static int __init numa_emulation(u64 start_pfn, u64 end_pfn)
>>
>> Here, this should be either "unsigned long" or ideally "mfn_t".
>> Although, if you use "unsigned long", you will need to...
>>
>
> Do we need a separate patch to do it?
I would prefer if the cleanups are done separately as this makes easier
to review code movement.
>
>>> +{
>>> + int i;
>>> + struct node nodes[MAX_NUMNODES];
>>> + u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
>>
>> ... cast "(end_pfn - start_pfn)" to uin64_t or use pfn_to_paddr().
>>
>
> Ok
>
>>> +
>>> + /* Kludge needed for the hash function */
>>> + if ( hweight64(sz) > 1 )
>>> + {
>>> + u64 x = 1;
>>> + while ( (x << 1) < sz )
>>> + x <<= 1;
>>> + if ( x < sz/2 )
>>> + printk(KERN_ERR "Numa emulation unbalanced. Complain to
>> maintainer\n");
>>> + sz = x;
>>> + }
>>> +
>>> + memset(&nodes,0,sizeof(nodes));
>>> + for ( i = 0; i < numa_fake; i++ )
>>> + {
>>> + nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
>>> + if ( i == numa_fake - 1 )
>>> + sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
>>> + nodes[i].end = nodes[i].start + sz;
>>> + printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64"
>> (%"PRIu64"MB)\n",
>>> + i,
>>> + nodes[i].start, nodes[i].end,
>>> + (nodes[i].end - nodes[i].start) >> 20);
>>> + node_set_online(i);
>>> + }
>>> + memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
>>> + if ( memnode_shift < 0 )
>>> + {
>>> + memnode_shift = 0;
>>> + printk(KERN_ERR "No NUMA hash function found. Emulation
>> disabled.\n");
>>> + return -1;
>>> + }
>>> + for_each_online_node ( i )
>>> + setup_node_bootmem(i, nodes[i].start, nodes[i].end);
>>> + numa_init_array();
>>> +
>>> + return 0;
>>> +}
>>> +#endif
>>> +
>>> +void __init numa_initmem_init(unsigned long start_pfn, unsigned long
>> end_pfn)
>>> +{
>>> + int i;
>>> +
>>> +#ifdef CONFIG_NUMA_EMU
>>> + if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
>>> + return;
>>> +#endif
>>> +
>>> +#ifdef CONFIG_ACPI_NUMA
>>> + if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
>>> + (u64)end_pfn << PAGE_SHIFT) )
>>
>> (u64)v << PAGE_SHIFT should be switched to use pfn_to_paddr() or
>> mfn_to_paddr() if you decide to make start_pfn and end_pfn typesafe.
>>
>
> Still need a separate patch to change it before move?
Yes.
Cheers,
--
Julien Grall
© 2016 - 2026 Red Hat, Inc.