[PATCH] x86/numa: Bump NR_NODE_MEMBLKS to MAX_NUMNODES * 4

Nikolay Kuratov posted 1 patch 1 year, 8 months ago
There is a newer version of this series
arch/x86/boot/compressed/acpi.c | 6 +++---
arch/x86/boot/compressed/misc.h | 2 +-
arch/x86/include/asm/numa.h     | 2 +-
3 files changed, 5 insertions(+), 5 deletions(-)
[PATCH] x86/numa: Bump NR_NODE_MEMBLKS to MAX_NUMNODES * 4
Posted by Nikolay Kuratov 1 year, 8 months ago
With modern AMD EPYC platform we're able to spot 3
memblocks per NUMA, so bump MAX_NUMNODES multiplier
from 2 to 4. Problem becomes apparent if MAX_NUMNODES
close enough to real amount of nodes and leaves us with
`too many memblk ranges` dmesg error.

Bump also maximal count of immovable regions accordingly.

Signed-off-by: Nikolay Kuratov <kniv@yandex-team.ru>
---
 If overhead related to doubled arrays is too undesirable
 maybe we should consider config option for this? It appears that
 NR_NODE_MEMBLKS used only on LoongArch and x86 (macro in asm-generic
 is orphane).

 arch/x86/boot/compressed/acpi.c | 6 +++---
 arch/x86/boot/compressed/misc.h | 2 +-
 arch/x86/include/asm/numa.h     | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
index f196b1d1ddf8..74575a900924 100644
--- a/arch/x86/boot/compressed/acpi.c
+++ b/arch/x86/boot/compressed/acpi.c
@@ -17,9 +17,9 @@
 
 /*
  * Immovable memory regions representation. Max amount of memory regions is
- * MAX_NUMNODES*2.
+ * MAX_NUMNODES*4.
  */
-struct mem_vector immovable_mem[MAX_NUMNODES*2];
+struct mem_vector immovable_mem[MAX_NUMNODES*4];
 
 static acpi_physical_address
 __efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len)
@@ -305,7 +305,7 @@ int count_immovable_mem_regions(void)
 				num++;
 			}
 
-			if (num >= MAX_NUMNODES*2) {
+			if (num >= ARRAY_SIZE(immovable_mem)) {
 				debug_putstr("Too many immovable memory regions, aborting.\n");
 				return 0;
 			}
diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
index b353a7be380c..a756569852e5 100644
--- a/arch/x86/boot/compressed/misc.h
+++ b/arch/x86/boot/compressed/misc.h
@@ -174,7 +174,7 @@ static inline acpi_physical_address get_rsdp_addr(void) { return 0; }
 #endif
 
 #if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
-extern struct mem_vector immovable_mem[MAX_NUMNODES*2];
+extern struct mem_vector immovable_mem[MAX_NUMNODES*4];
 int count_immovable_mem_regions(void);
 #else
 static inline int count_immovable_mem_regions(void) { return 0; }
diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
index ef2844d69173..057eafe6fed5 100644
--- a/arch/x86/include/asm/numa.h
+++ b/arch/x86/include/asm/numa.h
@@ -10,7 +10,7 @@
 
 #ifdef CONFIG_NUMA
 
-#define NR_NODE_MEMBLKS		(MAX_NUMNODES*2)
+#define NR_NODE_MEMBLKS		(MAX_NUMNODES*4)
 
 extern int numa_off;
 
-- 
2.34.1
Re: [PATCH] x86/numa: Bump NR_NODE_MEMBLKS to MAX_NUMNODES * 4
Posted by Claude Bing 1 year, 4 months ago
On 5/20/24 10:50, Nikolay Kuratov wrote:
> With modern AMD EPYC platform we're able to spot 3
> memblocks per NUMA, so bump MAX_NUMNODES multiplier
> from 2 to 4. Problem becomes apparent if MAX_NUMNODES
> close enough to real amount of nodes and leaves us with
> `too many memblk ranges` dmesg error.
> 
> Bump also maximal count of immovable regions accordingly.
> 
> Signed-off-by: Nikolay Kuratov <kniv@yandex-team.ru>
> ---
>  If overhead related to doubled arrays is too undesirable
>  maybe we should consider config option for this? It appears that
>  NR_NODE_MEMBLKS used only on LoongArch and x86 (macro in asm-generic
>  is orphane).
> 
>  arch/x86/boot/compressed/acpi.c | 6 +++---
>  arch/x86/boot/compressed/misc.h | 2 +-
>  arch/x86/include/asm/numa.h     | 2 +-
>  3 files changed, 5 insertions(+), 5 deletions(-)
> 
> diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c
> index f196b1d1ddf8..74575a900924 100644
> --- a/arch/x86/boot/compressed/acpi.c
> +++ b/arch/x86/boot/compressed/acpi.c
> @@ -17,9 +17,9 @@
>  
>  /*
>   * Immovable memory regions representation. Max amount of memory regions is
> - * MAX_NUMNODES*2.
> + * MAX_NUMNODES*4.
>   */
> -struct mem_vector immovable_mem[MAX_NUMNODES*2];
> +struct mem_vector immovable_mem[MAX_NUMNODES*4];
>  
>  static acpi_physical_address
>  __efi_get_rsdp_addr(unsigned long cfg_tbl_pa, unsigned int cfg_tbl_len)
> @@ -305,7 +305,7 @@ int count_immovable_mem_regions(void)
>  				num++;
>  			}
>  
> -			if (num >= MAX_NUMNODES*2) {
> +			if (num >= ARRAY_SIZE(immovable_mem)) {
>  				debug_putstr("Too many immovable memory regions, aborting.\n");
>  				return 0;
>  			}
> diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
> index b353a7be380c..a756569852e5 100644
> --- a/arch/x86/boot/compressed/misc.h
> +++ b/arch/x86/boot/compressed/misc.h
> @@ -174,7 +174,7 @@ static inline acpi_physical_address get_rsdp_addr(void) { return 0; }
>  #endif
>  
>  #if defined(CONFIG_RANDOMIZE_BASE) && defined(CONFIG_MEMORY_HOTREMOVE) && defined(CONFIG_ACPI)
> -extern struct mem_vector immovable_mem[MAX_NUMNODES*2];
> +extern struct mem_vector immovable_mem[MAX_NUMNODES*4];
>  int count_immovable_mem_regions(void);
>  #else
>  static inline int count_immovable_mem_regions(void) { return 0; }
> diff --git a/arch/x86/include/asm/numa.h b/arch/x86/include/asm/numa.h
> index ef2844d69173..057eafe6fed5 100644
> --- a/arch/x86/include/asm/numa.h
> +++ b/arch/x86/include/asm/numa.h
> @@ -10,7 +10,7 @@
>  
>  #ifdef CONFIG_NUMA
>  
> -#define NR_NODE_MEMBLKS		(MAX_NUMNODES*2)
> +#define NR_NODE_MEMBLKS		(MAX_NUMNODES*4)
>  
>  extern int numa_off;
>  
Our testing confirms this patch resolves at least one case where a
quad-CPU system (Supermicro X11QPH+) only registers a single NUMA node.
Would it be possible to have this patch merged after a review?

Debug messages seen prior to applying the patch:

  ACPI: SRAT: Node 0 PXM 0 [mem 0x00000000-0x7fffffff]
  ACPI: SRAT: Node 0 PXM 0 [mem 0x100000000-0x207fffffff]
  ACPI: SRAT: Node 0 PXM 0 [mem 0x2080000000-0x407fffffff]
  ACPI: SRAT: Node 1 PXM 1 [mem 0x4080000000-0x607fffffff]
  ACPI: SRAT: Node 1 PXM 1 [mem 0x6080000000-0x807fffffff]
  ACPI: SRAT: Node 2 PXM 2 [mem 0x8080000000-0xa07fffffff]
  ACPI: SRAT: Node 2 PXM 2 [mem 0xa080000000-0xc07fffffff]
  ACPI: SRAT: Node 3 PXM 3 [mem 0xc080000000-0xe07fffffff]
  too many memblk ranges
  ACPI: SRAT: Failed to add memblk to node 3 [mem
  0xe080000000-0x1007fffffff]
  ACPI: SRAT: SRAT not used.
  NUMA: Initialized distance table, cnt=4
  No NUMA configuration found
  Faking a node at [mem 0x0000000000000000-0x000001007fffffff]
  NODE_DATA(0) allocated [mem 0x1007fffa000-0x1007fffbfff]

After the patch, all four nodes were recognized:

  ACPI: SRAT: Node 0 PXM 0 [mem 0x00000000-0x7fffffff]
  ACPI: SRAT: Node 0 PXM 0 [mem 0x100000000-0x207fffffff]
  ACPI: SRAT: Node 0 PXM 0 [mem 0x2080000000-0x407fffffff]
  ACPI: SRAT: Node 1 PXM 1 [mem 0x4080000000-0x607fffffff]
  ACPI: SRAT: Node 1 PXM 1 [mem 0x6080000000-0x807fffffff]
  ACPI: SRAT: Node 2 PXM 2 [mem 0x8080000000-0xa07fffffff]
  ACPI: SRAT: Node 2 PXM 2 [mem 0xa080000000-0xc07fffffff]
  ACPI: SRAT: Node 3 PXM 3 [mem 0xc080000000-0xe07fffffff]
  ACPI: SRAT: Node 3 PXM 3 [mem 0xe080000000-0x1007fffffff]
  NUMA: Initialized distance table, cnt=4
  NUMA: Node 0 [mem 0x00000000-0x7fffffff] + [mem
  0x100000000-0x207fffffff] -> [mem 0x00000000-0x207fffffff]
  NUMA: Node 0 [mem 0x00000000-0x207fffffff] + [mem
  0x2080000000-0x407fffffff] -> [mem 0x00000000-0x407fffffff]
  NUMA: Node 1 [mem 0x4080000000-0x607fffffff] + [mem
  0x6080000000-0x807fffffff] -> [mem 0x4080000000-0x807fffffff]
  NUMA: Node 2 [mem 0x8080000000-0xa07fffffff] + [mem
  0xa080000000-0xc07fffffff] -> [mem 0x8080000000-0xc07fffffff]
  NUMA: Node 3 [mem 0xc080000000-0xe07fffffff] + [mem
  0xe080000000-0x1007fffffff] -> [mem 0xc080000000-0x1007fffffff]
  NODE_DATA(0) allocated [mem 0x407fffe000-0x407fffffff]
  NODE_DATA(1) allocated [mem 0x807fffe000-0x807fffffff]
  NODE_DATA(2) allocated [mem 0xc07fffe000-0xc07fffffff]
  NODE_DATA(3) allocated [mem 0x1007fff9000-0x1007fffafff]

Tested-by: Claude Bing <cbing@cybernetics.com>