[PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c

Tiezhu Yang posted 4 patches 1 year, 10 months ago
There is a newer version of this series
[PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c
Posted by Tiezhu Yang 1 year, 10 months ago
Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
actually used, this is not reasonable and does not work as intended.

Let us select HAVE_SETUP_PER_CPU_AREA unconditionally and then move
CONFIG_HAVE_SETUP_PER_CPU_AREA related code from numa.c to smp.c to
avoid this problem.

Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
---
 arch/loongarch/Kconfig       |  2 +-
 arch/loongarch/kernel/numa.c | 58 -----------------------------------
 arch/loongarch/kernel/smp.c  | 59 ++++++++++++++++++++++++++++++++++++
 3 files changed, 60 insertions(+), 59 deletions(-)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index a5f300ec6f28..64052ae2c2af 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -151,7 +151,7 @@ config LOONGARCH
 	select HAVE_RUST
 	select HAVE_SAMPLE_FTRACE_DIRECT
 	select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
-	select HAVE_SETUP_PER_CPU_AREA if NUMA
+	select HAVE_SETUP_PER_CPU_AREA
 	select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
 	select HAVE_STACKPROTECTOR
 	select HAVE_SYSCALL_TRACEPOINTS
diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
index 8fe21f868f72..49dc1d932ce2 100644
--- a/arch/loongarch/kernel/numa.c
+++ b/arch/loongarch/kernel/numa.c
@@ -48,64 +48,6 @@ EXPORT_SYMBOL(__cpuid_to_node);
 
 nodemask_t numa_nodes_parsed __initdata;
 
-#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
-unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
-EXPORT_SYMBOL(__per_cpu_offset);
-
-static int __init pcpu_cpu_to_node(int cpu)
-{
-	return early_cpu_to_node(cpu);
-}
-
-static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
-{
-	if (early_cpu_to_node(from) == early_cpu_to_node(to))
-		return LOCAL_DISTANCE;
-	else
-		return REMOTE_DISTANCE;
-}
-
-void __init pcpu_populate_pte(unsigned long addr)
-{
-	populate_kernel_pte(addr);
-}
-
-void __init setup_per_cpu_areas(void)
-{
-	unsigned long delta;
-	unsigned int cpu;
-	int rc = -EINVAL;
-
-	if (pcpu_chosen_fc == PCPU_FC_AUTO) {
-		if (nr_node_ids >= 8)
-			pcpu_chosen_fc = PCPU_FC_PAGE;
-		else
-			pcpu_chosen_fc = PCPU_FC_EMBED;
-	}
-
-	/*
-	 * Always reserve area for module percpu variables.  That's
-	 * what the legacy allocator did.
-	 */
-	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
-		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
-					    PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
-					    pcpu_cpu_distance, pcpu_cpu_to_node);
-		if (rc < 0)
-			pr_warn("%s allocator failed (%d), falling back to page size\n",
-				pcpu_fc_names[pcpu_chosen_fc], rc);
-	}
-	if (rc < 0)
-		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
-	if (rc < 0)
-		panic("cannot initialize percpu area (err=%d)", rc);
-
-	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
-	for_each_possible_cpu(cpu)
-		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
-}
-#endif
-
 /*
  * Get nodeid by logical cpu number.
  * __cpuid_to_node maps phyical cpu id to node, so we
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index aabee0b280fe..88b9c6b68d1e 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -29,6 +29,7 @@
 #include <asm/loongson.h>
 #include <asm/mmu_context.h>
 #include <asm/numa.h>
+#include <asm/pgalloc.h>
 #include <asm/processor.h>
 #include <asm/setup.h>
 #include <asm/time.h>
@@ -717,3 +718,61 @@ void flush_tlb_one(unsigned long vaddr)
 	on_each_cpu(flush_tlb_one_ipi, (void *)vaddr, 1);
 }
 EXPORT_SYMBOL(flush_tlb_one);
+
+#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
+unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(__per_cpu_offset);
+
+static int __init pcpu_cpu_to_node(int cpu)
+{
+	return early_cpu_to_node(cpu);
+}
+
+static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
+{
+	if (early_cpu_to_node(from) == early_cpu_to_node(to))
+		return LOCAL_DISTANCE;
+	else
+		return REMOTE_DISTANCE;
+}
+
+void __init pcpu_populate_pte(unsigned long addr)
+{
+	populate_kernel_pte(addr);
+}
+
+void __init setup_per_cpu_areas(void)
+{
+	unsigned long delta;
+	unsigned int cpu;
+	int rc = -EINVAL;
+
+	if (pcpu_chosen_fc == PCPU_FC_AUTO) {
+		if (nr_node_ids >= 8)
+			pcpu_chosen_fc = PCPU_FC_PAGE;
+		else
+			pcpu_chosen_fc = PCPU_FC_EMBED;
+	}
+
+	/*
+	 * Always reserve area for module percpu variables.  That's
+	 * what the legacy allocator did.
+	 */
+	if (pcpu_chosen_fc != PCPU_FC_PAGE) {
+		rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
+					    PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
+					    pcpu_cpu_distance, pcpu_cpu_to_node);
+		if (rc < 0)
+			pr_warn("%s allocator failed (%d), falling back to page size\n",
+				pcpu_fc_names[pcpu_chosen_fc], rc);
+	}
+	if (rc < 0)
+		rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
+	if (rc < 0)
+		panic("cannot initialize percpu area (err=%d)", rc);
+
+	delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
+	for_each_possible_cpu(cpu)
+		__per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
+}
+#endif
-- 
2.42.0
Re: [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c
Posted by Huacai Chen 1 year, 10 months ago
On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>
> Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
> specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
> not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
> actually used, this is not reasonable and does not work as intended.
Why is the generic version not reasonable? We need a custom version
just because it can put the percpu variable in the best node. If NUMA
disabled, software can only see one node, how to optimize?

Huacai

>
> Let us select HAVE_SETUP_PER_CPU_AREA unconditionally and then move
> CONFIG_HAVE_SETUP_PER_CPU_AREA related code from numa.c to smp.c to
> avoid this problem.
>
> Signed-off-by: Tiezhu Yang <yangtiezhu@loongson.cn>
> ---
>  arch/loongarch/Kconfig       |  2 +-
>  arch/loongarch/kernel/numa.c | 58 -----------------------------------
>  arch/loongarch/kernel/smp.c  | 59 ++++++++++++++++++++++++++++++++++++
>  3 files changed, 60 insertions(+), 59 deletions(-)
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index a5f300ec6f28..64052ae2c2af 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -151,7 +151,7 @@ config LOONGARCH
>         select HAVE_RUST
>         select HAVE_SAMPLE_FTRACE_DIRECT
>         select HAVE_SAMPLE_FTRACE_DIRECT_MULTI
> -       select HAVE_SETUP_PER_CPU_AREA if NUMA
> +       select HAVE_SETUP_PER_CPU_AREA
>         select HAVE_STACK_VALIDATION if HAVE_OBJTOOL
>         select HAVE_STACKPROTECTOR
>         select HAVE_SYSCALL_TRACEPOINTS
> diff --git a/arch/loongarch/kernel/numa.c b/arch/loongarch/kernel/numa.c
> index 8fe21f868f72..49dc1d932ce2 100644
> --- a/arch/loongarch/kernel/numa.c
> +++ b/arch/loongarch/kernel/numa.c
> @@ -48,64 +48,6 @@ EXPORT_SYMBOL(__cpuid_to_node);
>
>  nodemask_t numa_nodes_parsed __initdata;
>
> -#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
> -unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> -EXPORT_SYMBOL(__per_cpu_offset);
> -
> -static int __init pcpu_cpu_to_node(int cpu)
> -{
> -       return early_cpu_to_node(cpu);
> -}
> -
> -static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> -{
> -       if (early_cpu_to_node(from) == early_cpu_to_node(to))
> -               return LOCAL_DISTANCE;
> -       else
> -               return REMOTE_DISTANCE;
> -}
> -
> -void __init pcpu_populate_pte(unsigned long addr)
> -{
> -       populate_kernel_pte(addr);
> -}
> -
> -void __init setup_per_cpu_areas(void)
> -{
> -       unsigned long delta;
> -       unsigned int cpu;
> -       int rc = -EINVAL;
> -
> -       if (pcpu_chosen_fc == PCPU_FC_AUTO) {
> -               if (nr_node_ids >= 8)
> -                       pcpu_chosen_fc = PCPU_FC_PAGE;
> -               else
> -                       pcpu_chosen_fc = PCPU_FC_EMBED;
> -       }
> -
> -       /*
> -        * Always reserve area for module percpu variables.  That's
> -        * what the legacy allocator did.
> -        */
> -       if (pcpu_chosen_fc != PCPU_FC_PAGE) {
> -               rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> -                                           PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
> -                                           pcpu_cpu_distance, pcpu_cpu_to_node);
> -               if (rc < 0)
> -                       pr_warn("%s allocator failed (%d), falling back to page size\n",
> -                               pcpu_fc_names[pcpu_chosen_fc], rc);
> -       }
> -       if (rc < 0)
> -               rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
> -       if (rc < 0)
> -               panic("cannot initialize percpu area (err=%d)", rc);
> -
> -       delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
> -       for_each_possible_cpu(cpu)
> -               __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
> -}
> -#endif
> -
>  /*
>   * Get nodeid by logical cpu number.
>   * __cpuid_to_node maps phyical cpu id to node, so we
> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
> index aabee0b280fe..88b9c6b68d1e 100644
> --- a/arch/loongarch/kernel/smp.c
> +++ b/arch/loongarch/kernel/smp.c
> @@ -29,6 +29,7 @@
>  #include <asm/loongson.h>
>  #include <asm/mmu_context.h>
>  #include <asm/numa.h>
> +#include <asm/pgalloc.h>
>  #include <asm/processor.h>
>  #include <asm/setup.h>
>  #include <asm/time.h>
> @@ -717,3 +718,61 @@ void flush_tlb_one(unsigned long vaddr)
>         on_each_cpu(flush_tlb_one_ipi, (void *)vaddr, 1);
>  }
>  EXPORT_SYMBOL(flush_tlb_one);
> +
> +#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
> +unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
> +EXPORT_SYMBOL(__per_cpu_offset);
> +
> +static int __init pcpu_cpu_to_node(int cpu)
> +{
> +       return early_cpu_to_node(cpu);
> +}
> +
> +static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
> +{
> +       if (early_cpu_to_node(from) == early_cpu_to_node(to))
> +               return LOCAL_DISTANCE;
> +       else
> +               return REMOTE_DISTANCE;
> +}
> +
> +void __init pcpu_populate_pte(unsigned long addr)
> +{
> +       populate_kernel_pte(addr);
> +}
> +
> +void __init setup_per_cpu_areas(void)
> +{
> +       unsigned long delta;
> +       unsigned int cpu;
> +       int rc = -EINVAL;
> +
> +       if (pcpu_chosen_fc == PCPU_FC_AUTO) {
> +               if (nr_node_ids >= 8)
> +                       pcpu_chosen_fc = PCPU_FC_PAGE;
> +               else
> +                       pcpu_chosen_fc = PCPU_FC_EMBED;
> +       }
> +
> +       /*
> +        * Always reserve area for module percpu variables.  That's
> +        * what the legacy allocator did.
> +        */
> +       if (pcpu_chosen_fc != PCPU_FC_PAGE) {
> +               rc = pcpu_embed_first_chunk(PERCPU_MODULE_RESERVE,
> +                                           PERCPU_DYNAMIC_RESERVE, PMD_SIZE,
> +                                           pcpu_cpu_distance, pcpu_cpu_to_node);
> +               if (rc < 0)
> +                       pr_warn("%s allocator failed (%d), falling back to page size\n",
> +                               pcpu_fc_names[pcpu_chosen_fc], rc);
> +       }
> +       if (rc < 0)
> +               rc = pcpu_page_first_chunk(PERCPU_MODULE_RESERVE, pcpu_cpu_to_node);
> +       if (rc < 0)
> +               panic("cannot initialize percpu area (err=%d)", rc);
> +
> +       delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
> +       for_each_possible_cpu(cpu)
> +               __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
> +}
> +#endif
> --
> 2.42.0
>
>
Re: [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c
Posted by Tiezhu Yang 1 year, 10 months ago

On 04/12/2024 12:12 PM, Huacai Chen wrote:
> On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>>
>> Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
>> specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
>> not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
>> actually used, this is not reasonable and does not work as intended.
> Why is the generic version not reasonable? We need a custom version
> just because it can put the percpu variable in the best node. If NUMA
> disabled, software can only see one node, how to optimize?

The initial aim is to use the arch specific setup_per_cpu_areas()
in any case under CONFIG_SMP, this patch can be dropped if it is
meaningless for the case of !CONFIG_NUMA and CONFIG_SMP.

Thanks,
Tiezhu

Re: [PATCH v3 1/4] LoongArch: Move CONFIG_HAVE_SETUP_PER_CPU_AREA related code to smp.c
Posted by Huacai Chen 1 year, 10 months ago
On Fri, Apr 12, 2024 at 5:27 PM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
>
>
>
> On 04/12/2024 12:12 PM, Huacai Chen wrote:
> > On Thu, Apr 11, 2024 at 9:05 AM Tiezhu Yang <yangtiezhu@loongson.cn> wrote:
> >>
> >> Currently, if CONFIG_NUMA is not set but CONFIG_SMP is set, the arch
> >> specified setup_per_cpu_areas() in arch/loongarch/kernel/numa.c will
> >> not be built and the generic setup_per_cpu_areas() in mm/percpu.c is
> >> actually used, this is not reasonable and does not work as intended.
> > Why is the generic version not reasonable? We need a custom version
> > just because it can put the percpu variable in the best node. If NUMA
> > disabled, software can only see one node, how to optimize?
>
> The initial aim is to use the arch specific setup_per_cpu_areas()
> in any case under CONFIG_SMP, this patch can be dropped if it is
> meaningless for the case of !CONFIG_NUMA and CONFIG_SMP.
Yes, it is better to drop this patch.

Huacai
>
> Thanks,
> Tiezhu
>
>