[PATCH V2] LoongArch:support CONFIG_SCHED_MC

Tianyang Zhang posted 1 patch 9 months, 2 weeks ago
arch/loongarch/Kconfig                |  9 ++++++
arch/loongarch/include/asm/smp.h      |  1 +
arch/loongarch/include/asm/topology.h |  8 +++++
arch/loongarch/kernel/smp.c           | 46 +++++++++++++++++++++++++++
4 files changed, 64 insertions(+)
[PATCH V2] LoongArch:support CONFIG_SCHED_MC
Posted by Tianyang Zhang 9 months, 2 weeks ago
From: wanghongliang <wanghongliang@loongson.cn>

In order to achieve more reasonable load balancing behavior,
support for SCHED_MC has been added.
The LLC distribution of Loongarch now is consistent with numa-node,
the balancing domain of SCHED_MC can effectively reduce the situation
where processes are awakened to smt_sibling

Co-developed-by: wanghongliang <wanghongliang@loongson.cn>
Signed-off-by: wanghongliang <wanghongliang@loongson.cn>
Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn>
---
 arch/loongarch/Kconfig                |  9 ++++++
 arch/loongarch/include/asm/smp.h      |  1 +
 arch/loongarch/include/asm/topology.h |  8 +++++
 arch/loongarch/kernel/smp.c           | 46 +++++++++++++++++++++++++++
 4 files changed, 64 insertions(+)

diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 1a2cf012b..3d6d129ee 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -456,6 +456,15 @@ config SCHED_SMT
 	  Improves scheduler's performance when there are multiple
 	  threads in one physical core.
 
+config SCHED_MC
+	prompt "Multi-core scheduler support"
+	depends on SMP
+	default y
+	help
+	  Multi-core scheduler support improves the CPU scheduler's decision
+	  making when dealing with multi-core CPU chips at a cost of slightly
+	  increased overhead in some places.
+
 config SMP
 	bool "Multi-Processing support"
 	help
diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
index b87d1d5e5..13955d726 100644
--- a/arch/loongarch/include/asm/smp.h
+++ b/arch/loongarch/include/asm/smp.h
@@ -26,6 +26,7 @@ extern int num_processors;
 extern int disabled_cpus;
 extern cpumask_t cpu_sibling_map[];
 extern cpumask_t cpu_core_map[];
+extern cpumask_t cpu_llc_shared_map[];
 extern cpumask_t cpu_foreign_map[];
 
 void loongson_smp_setup(void);
diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h
index 50273c918..dfaf45d57 100644
--- a/arch/loongarch/include/asm/topology.h
+++ b/arch/loongarch/include/asm/topology.h
@@ -36,6 +36,14 @@ void numa_set_distance(int from, int to, int distance);
 #define topology_sibling_cpumask(cpu)		(&cpu_sibling_map[cpu])
 #endif
 
+/*
+ * return cpus that shares the last level cache.
+ */
+static inline const struct cpumask *cpu_coregroup_mask(int cpu)
+{
+	return &cpu_llc_shared_map[cpu];
+}
+
 #include <asm-generic/topology.h>
 
 static inline void arch_fix_phys_package_id(int num, u32 slot) { }
diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
index 4b24589c0..7b9e996a1 100644
--- a/arch/loongarch/kernel/smp.c
+++ b/arch/loongarch/kernel/smp.c
@@ -50,6 +50,9 @@ EXPORT_SYMBOL(cpu_sibling_map);
 cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
 EXPORT_SYMBOL(cpu_core_map);
 
+cpumask_t cpu_llc_shared_map[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(cpu_llc_shared_map);
+
 static DECLARE_COMPLETION(cpu_starting);
 static DECLARE_COMPLETION(cpu_running);
 
@@ -66,6 +69,10 @@ static cpumask_t cpu_sibling_setup_map;
 /* representing cpus for which core maps can be computed */
 static cpumask_t cpu_core_setup_map;
 
+/* representing cpus for which llc sibling maps can be computed */
+static cpumask_t cpu_llc_shared_setup_map;
+
+
 struct secondary_data cpuboot_data;
 static DEFINE_PER_CPU(int, cpu_state);
 
@@ -102,6 +109,42 @@ static inline void set_cpu_core_map(int cpu)
 	}
 }
 
+static inline bool cpus_are_shared_llc(int cpua, int cpub)
+{
+	if (cpu_to_node(cpua) != cpu_to_node(cpub))
+		return false;
+
+	return true;
+}
+
+static inline void set_cpu_llc_shared_map(int cpu)
+{
+	int i;
+
+	cpumask_set_cpu(cpu, &cpu_llc_shared_setup_map);
+
+	for_each_cpu(i, &cpu_llc_shared_setup_map) {
+		if (cpus_are_shared_llc(cpu, i)) {
+			cpumask_set_cpu(i, &cpu_llc_shared_map[cpu]);
+			cpumask_set_cpu(cpu, &cpu_llc_shared_map[i]);
+		}
+	}
+}
+
+static inline void clear_cpu_llc_shared_map(int cpu)
+{
+	int i;
+
+	for_each_cpu(i, &cpu_llc_shared_setup_map) {
+		if (cpus_are_shared_llc(cpu, i)) {
+			cpumask_clear_cpu(i, &cpu_llc_shared_map[cpu]);
+			cpumask_clear_cpu(cpu, &cpu_llc_shared_map[i]);
+		}
+	}
+
+	cpumask_clear_cpu(cpu, &cpu_llc_shared_setup_map);
+}
+
 static inline void set_cpu_sibling_map(int cpu)
 {
 	int i;
@@ -406,6 +449,7 @@ int loongson_cpu_disable(void)
 #endif
 	set_cpu_online(cpu, false);
 	clear_cpu_sibling_map(cpu);
+	clear_cpu_llc_shared_map(cpu);
 	calculate_cpu_foreign_map();
 	local_irq_save(flags);
 	irq_migrate_all_off_this_cpu();
@@ -573,6 +617,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 	loongson_prepare_cpus(max_cpus);
 	set_cpu_sibling_map(0);
 	set_cpu_core_map(0);
+	set_cpu_llc_shared_map(0);
 	calculate_cpu_foreign_map();
 #ifndef CONFIG_HOTPLUG_CPU
 	init_cpu_present(cpu_possible_mask);
@@ -614,6 +659,7 @@ asmlinkage void start_secondary(void)
 
 	set_cpu_sibling_map(cpu);
 	set_cpu_core_map(cpu);
+	set_cpu_llc_shared_map(cpu);
 
 	notify_cpu_starting(cpu);
 
-- 
2.43.0
Re: [PATCH V2] LoongArch:support CONFIG_SCHED_MC
Posted by Huacai Chen 9 months, 1 week ago
Hi, Tianyang,

On Wed, Apr 30, 2025 at 5:08 PM Tianyang Zhang
<zhangtianyang@loongson.cn> wrote:
>
> From: wanghongliang <wanghongliang@loongson.cn>
>
> In order to achieve more reasonable load balancing behavior,
> support for SCHED_MC has been added.
> The LLC distribution of Loongarch now is consistent with numa-node,
> the balancing domain of SCHED_MC can effectively reduce the situation
> where processes are awakened to smt_sibling
>
> Co-developed-by: wanghongliang <wanghongliang@loongson.cn>
> Signed-off-by: wanghongliang <wanghongliang@loongson.cn>
> Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn>
> ---
>  arch/loongarch/Kconfig                |  9 ++++++
>  arch/loongarch/include/asm/smp.h      |  1 +
>  arch/loongarch/include/asm/topology.h |  8 +++++
>  arch/loongarch/kernel/smp.c           | 46 +++++++++++++++++++++++++++
>  4 files changed, 64 insertions(+)
>
> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 1a2cf012b..3d6d129ee 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -456,6 +456,15 @@ config SCHED_SMT
>           Improves scheduler's performance when there are multiple
>           threads in one physical core.
>
> +config SCHED_MC
> +       prompt "Multi-core scheduler support"
This should be bool....

To save your time, I have fixed this typo, refresh the title and
commit message [1], you can download it and then submit V3.

And if needed, also update the version in the internal repo.

[1] https://github.com/chenhuacai/linux/commit/17c9a8142696c428e2da37109ac30deb5133eaf7


Huacai

> +       depends on SMP
> +       default y
> +       help
> +         Multi-core scheduler support improves the CPU scheduler's decision
> +         making when dealing with multi-core CPU chips at a cost of slightly
> +         increased overhead in some places.
> +
>  config SMP
>         bool "Multi-Processing support"
>         help
> diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h
> index b87d1d5e5..13955d726 100644
> --- a/arch/loongarch/include/asm/smp.h
> +++ b/arch/loongarch/include/asm/smp.h
> @@ -26,6 +26,7 @@ extern int num_processors;
>  extern int disabled_cpus;
>  extern cpumask_t cpu_sibling_map[];
>  extern cpumask_t cpu_core_map[];
> +extern cpumask_t cpu_llc_shared_map[];
>  extern cpumask_t cpu_foreign_map[];
>
>  void loongson_smp_setup(void);
> diff --git a/arch/loongarch/include/asm/topology.h b/arch/loongarch/include/asm/topology.h
> index 50273c918..dfaf45d57 100644
> --- a/arch/loongarch/include/asm/topology.h
> +++ b/arch/loongarch/include/asm/topology.h
> @@ -36,6 +36,14 @@ void numa_set_distance(int from, int to, int distance);
>  #define topology_sibling_cpumask(cpu)          (&cpu_sibling_map[cpu])
>  #endif
>
> +/*
> + * return cpus that shares the last level cache.
> + */
> +static inline const struct cpumask *cpu_coregroup_mask(int cpu)
> +{
> +       return &cpu_llc_shared_map[cpu];
> +}
> +
>  #include <asm-generic/topology.h>
>
>  static inline void arch_fix_phys_package_id(int num, u32 slot) { }
> diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c
> index 4b24589c0..7b9e996a1 100644
> --- a/arch/loongarch/kernel/smp.c
> +++ b/arch/loongarch/kernel/smp.c
> @@ -50,6 +50,9 @@ EXPORT_SYMBOL(cpu_sibling_map);
>  cpumask_t cpu_core_map[NR_CPUS] __read_mostly;
>  EXPORT_SYMBOL(cpu_core_map);
>
> +cpumask_t cpu_llc_shared_map[NR_CPUS] __read_mostly;
> +EXPORT_SYMBOL(cpu_llc_shared_map);
> +
>  static DECLARE_COMPLETION(cpu_starting);
>  static DECLARE_COMPLETION(cpu_running);
>
> @@ -66,6 +69,10 @@ static cpumask_t cpu_sibling_setup_map;
>  /* representing cpus for which core maps can be computed */
>  static cpumask_t cpu_core_setup_map;
>
> +/* representing cpus for which llc sibling maps can be computed */
> +static cpumask_t cpu_llc_shared_setup_map;
> +
> +
>  struct secondary_data cpuboot_data;
>  static DEFINE_PER_CPU(int, cpu_state);
>
> @@ -102,6 +109,42 @@ static inline void set_cpu_core_map(int cpu)
>         }
>  }
>
> +static inline bool cpus_are_shared_llc(int cpua, int cpub)
> +{
> +       if (cpu_to_node(cpua) != cpu_to_node(cpub))
> +               return false;
> +
> +       return true;
> +}
> +
> +static inline void set_cpu_llc_shared_map(int cpu)
> +{
> +       int i;
> +
> +       cpumask_set_cpu(cpu, &cpu_llc_shared_setup_map);
> +
> +       for_each_cpu(i, &cpu_llc_shared_setup_map) {
> +               if (cpus_are_shared_llc(cpu, i)) {
> +                       cpumask_set_cpu(i, &cpu_llc_shared_map[cpu]);
> +                       cpumask_set_cpu(cpu, &cpu_llc_shared_map[i]);
> +               }
> +       }
> +}
> +
> +static inline void clear_cpu_llc_shared_map(int cpu)
> +{
> +       int i;
> +
> +       for_each_cpu(i, &cpu_llc_shared_setup_map) {
> +               if (cpus_are_shared_llc(cpu, i)) {
> +                       cpumask_clear_cpu(i, &cpu_llc_shared_map[cpu]);
> +                       cpumask_clear_cpu(cpu, &cpu_llc_shared_map[i]);
> +               }
> +       }
> +
> +       cpumask_clear_cpu(cpu, &cpu_llc_shared_setup_map);
> +}
> +
>  static inline void set_cpu_sibling_map(int cpu)
>  {
>         int i;
> @@ -406,6 +449,7 @@ int loongson_cpu_disable(void)
>  #endif
>         set_cpu_online(cpu, false);
>         clear_cpu_sibling_map(cpu);
> +       clear_cpu_llc_shared_map(cpu);
>         calculate_cpu_foreign_map();
>         local_irq_save(flags);
>         irq_migrate_all_off_this_cpu();
> @@ -573,6 +617,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
>         loongson_prepare_cpus(max_cpus);
>         set_cpu_sibling_map(0);
>         set_cpu_core_map(0);
> +       set_cpu_llc_shared_map(0);
>         calculate_cpu_foreign_map();
>  #ifndef CONFIG_HOTPLUG_CPU
>         init_cpu_present(cpu_possible_mask);
> @@ -614,6 +659,7 @@ asmlinkage void start_secondary(void)
>
>         set_cpu_sibling_map(cpu);
>         set_cpu_core_map(cpu);
> +       set_cpu_llc_shared_map(cpu);
>
>         notify_cpu_starting(cpu);
>
> --
> 2.43.0
>
>