arch/mips/Kconfig | 1 + arch/mips/include/asm/smp.h | 1 - arch/mips/loongson64/smp.c | 35 ++--------------------------------- 3 files changed, 3 insertions(+), 34 deletions(-)
Nowadays SYNC_R4K is performing better than Loongson64's
custom sync mechanism.
Switch to SYNC_R4K to improve performance and reduce code
duplication.
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
Last minute for 6.11 :-)
---
arch/mips/Kconfig | 1 +
arch/mips/include/asm/smp.h | 1 -
arch/mips/loongson64/smp.c | 35 ++---------------------------------
3 files changed, 3 insertions(+), 34 deletions(-)
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 1236ea122061..e163059dd4d3 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -478,6 +478,7 @@ config MACH_LOONGSON64
select BOARD_SCACHE
select CSRC_R4K
select CEVT_R4K
+ select SYNC_R4K
select FORCE_PCI
select ISA
select I8259
diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
index bc2c240f414b..2427d76f953f 100644
--- a/arch/mips/include/asm/smp.h
+++ b/arch/mips/include/asm/smp.h
@@ -50,7 +50,6 @@ extern int __cpu_logical_map[NR_CPUS];
#define SMP_CALL_FUNCTION 0x2
/* Octeon - Tell another core to flush its icache */
#define SMP_ICACHE_FLUSH 0x4
-#define SMP_ASK_C0COUNT 0x8
/* Mask of CPUs which are currently definitely operating coherently */
extern cpumask_t cpu_coherent_mask;
diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
index 66d049cdcf14..147acd972a07 100644
--- a/arch/mips/loongson64/smp.c
+++ b/arch/mips/loongson64/smp.c
@@ -33,7 +33,6 @@ static void __iomem *ipi_clear0_regs[16];
static void __iomem *ipi_status0_regs[16];
static void __iomem *ipi_en0_regs[16];
static void __iomem *ipi_mailbox_buf[16];
-static uint32_t core0_c0count[NR_CPUS];
static u32 (*ipi_read_clear)(int cpu);
static void (*ipi_write_action)(int cpu, u32 action);
@@ -382,11 +381,10 @@ loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action)
ipi_write_action(cpu_logical_map(i), (u32)action);
}
-
static irqreturn_t loongson3_ipi_interrupt(int irq, void *dev_id)
{
- int i, cpu = smp_processor_id();
- unsigned int action, c0count;
+ int cpu = smp_processor_id();
+ unsigned int action;
action = ipi_read_clear(cpu);
@@ -399,26 +397,14 @@ static irqreturn_t loongson3_ipi_interrupt(int irq, void *dev_id)
irq_exit();
}
- if (action & SMP_ASK_C0COUNT) {
- BUG_ON(cpu != 0);
- c0count = read_c0_count();
- c0count = c0count ? c0count : 1;
- for (i = 1; i < nr_cpu_ids; i++)
- core0_c0count[i] = c0count;
- nudge_writes(); /* Let others see the result ASAP */
- }
-
return IRQ_HANDLED;
}
-#define MAX_LOOPS 800
/*
* SMP init and finish on secondary CPUs
*/
static void loongson3_init_secondary(void)
{
- int i;
- uint32_t initcount;
unsigned int cpu = smp_processor_id();
unsigned int imask = STATUSF_IP7 | STATUSF_IP6 |
STATUSF_IP3 | STATUSF_IP2;
@@ -432,23 +418,6 @@ static void loongson3_init_secondary(void)
cpu_logical_map(cpu) % loongson_sysconf.cores_per_package);
cpu_data[cpu].package =
cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
-
- i = 0;
- core0_c0count[cpu] = 0;
- loongson3_send_ipi_single(0, SMP_ASK_C0COUNT);
- while (!core0_c0count[cpu]) {
- i++;
- cpu_relax();
- }
-
- if (i > MAX_LOOPS)
- i = MAX_LOOPS;
- if (cpu_data[cpu].package)
- initcount = core0_c0count[cpu] + i;
- else /* Local access is faster for loops */
- initcount = core0_c0count[cpu] + i/2;
-
- write_c0_count(initcount);
}
static void loongson3_smp_finish(void)
---
base-commit: 0b58e108042b0ed28a71cd7edf5175999955b233
change-id: 20240714-loongson64-cevt-r4k-eb74d4ad984c
Best regards,
--
Jiaxun Yang <jiaxun.yang@flygoat.com>
On Sun, Jul 14, 2024 at 10:41:05AM +0800, Jiaxun Yang wrote: > Nowadays SYNC_R4K is performing better than Loongson64's > custom sync mechanism. > > Switch to SYNC_R4K to improve performance and reduce code > duplication. > > Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> > --- > Last minute for 6.11 :-) > --- > arch/mips/Kconfig | 1 + > arch/mips/include/asm/smp.h | 1 - > arch/mips/loongson64/smp.c | 35 ++--------------------------------- > 3 files changed, 3 insertions(+), 34 deletions(-) applied to mips-next. Thomas. -- Crap can work. Given enough thrust pigs will fly, but it's not necessarily a good idea. [ RFC1925, 2.3 ]
在2024年7月14日七月 上午10:41,Jiaxun Yang写道:
> Nowadays SYNC_R4K is performing better than Loongson64's
> custom sync mechanism.
>
> Switch to SYNC_R4K to improve performance and reduce code
> duplication.
>
> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> ---
> Last minute for 6.11 :-)
Hi Thomas,
Could you please apply this to 6.11 PR, or 6.11 fixes?
This is technically a left over of previous clock source series, and it does help
on preventing random RCU stall for multi-node Loongson-3 systems.
Thanks
- Jiaxun
> ---
> arch/mips/Kconfig | 1 +
> arch/mips/include/asm/smp.h | 1 -
> arch/mips/loongson64/smp.c | 35 ++---------------------------------
> 3 files changed, 3 insertions(+), 34 deletions(-)
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 1236ea122061..e163059dd4d3 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -478,6 +478,7 @@ config MACH_LOONGSON64
> select BOARD_SCACHE
> select CSRC_R4K
> select CEVT_R4K
> + select SYNC_R4K
> select FORCE_PCI
> select ISA
> select I8259
> diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
> index bc2c240f414b..2427d76f953f 100644
> --- a/arch/mips/include/asm/smp.h
> +++ b/arch/mips/include/asm/smp.h
> @@ -50,7 +50,6 @@ extern int __cpu_logical_map[NR_CPUS];
> #define SMP_CALL_FUNCTION 0x2
> /* Octeon - Tell another core to flush its icache */
> #define SMP_ICACHE_FLUSH 0x4
> -#define SMP_ASK_C0COUNT 0x8
>
> /* Mask of CPUs which are currently definitely operating coherently */
> extern cpumask_t cpu_coherent_mask;
> diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
> index 66d049cdcf14..147acd972a07 100644
> --- a/arch/mips/loongson64/smp.c
> +++ b/arch/mips/loongson64/smp.c
> @@ -33,7 +33,6 @@ static void __iomem *ipi_clear0_regs[16];
> static void __iomem *ipi_status0_regs[16];
> static void __iomem *ipi_en0_regs[16];
> static void __iomem *ipi_mailbox_buf[16];
> -static uint32_t core0_c0count[NR_CPUS];
>
> static u32 (*ipi_read_clear)(int cpu);
> static void (*ipi_write_action)(int cpu, u32 action);
> @@ -382,11 +381,10 @@ loongson3_send_ipi_mask(const struct cpumask
> *mask, unsigned int action)
> ipi_write_action(cpu_logical_map(i), (u32)action);
> }
>
> -
> static irqreturn_t loongson3_ipi_interrupt(int irq, void *dev_id)
> {
> - int i, cpu = smp_processor_id();
> - unsigned int action, c0count;
> + int cpu = smp_processor_id();
> + unsigned int action;
>
> action = ipi_read_clear(cpu);
>
> @@ -399,26 +397,14 @@ static irqreturn_t loongson3_ipi_interrupt(int
> irq, void *dev_id)
> irq_exit();
> }
>
> - if (action & SMP_ASK_C0COUNT) {
> - BUG_ON(cpu != 0);
> - c0count = read_c0_count();
> - c0count = c0count ? c0count : 1;
> - for (i = 1; i < nr_cpu_ids; i++)
> - core0_c0count[i] = c0count;
> - nudge_writes(); /* Let others see the result ASAP */
> - }
> -
> return IRQ_HANDLED;
> }
>
> -#define MAX_LOOPS 800
> /*
> * SMP init and finish on secondary CPUs
> */
> static void loongson3_init_secondary(void)
> {
> - int i;
> - uint32_t initcount;
> unsigned int cpu = smp_processor_id();
> unsigned int imask = STATUSF_IP7 | STATUSF_IP6 |
> STATUSF_IP3 | STATUSF_IP2;
> @@ -432,23 +418,6 @@ static void loongson3_init_secondary(void)
> cpu_logical_map(cpu) % loongson_sysconf.cores_per_package);
> cpu_data[cpu].package =
> cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
> -
> - i = 0;
> - core0_c0count[cpu] = 0;
> - loongson3_send_ipi_single(0, SMP_ASK_C0COUNT);
> - while (!core0_c0count[cpu]) {
> - i++;
> - cpu_relax();
> - }
> -
> - if (i > MAX_LOOPS)
> - i = MAX_LOOPS;
> - if (cpu_data[cpu].package)
> - initcount = core0_c0count[cpu] + i;
> - else /* Local access is faster for loops */
> - initcount = core0_c0count[cpu] + i/2;
> -
> - write_c0_count(initcount);
> }
>
> static void loongson3_smp_finish(void)
>
> ---
> base-commit: 0b58e108042b0ed28a71cd7edf5175999955b233
> change-id: 20240714-loongson64-cevt-r4k-eb74d4ad984c
>
> Best regards,
> --
> Jiaxun Yang <jiaxun.yang@flygoat.com>
--
- Jiaxun
On Thu, Jul 18, 2024 at 03:34:30PM +0800, Jiaxun Yang wrote: > > > 在2024年7月14日七月 上午10:41,Jiaxun Yang写道: > > Nowadays SYNC_R4K is performing better than Loongson64's > > custom sync mechanism. > > > > Switch to SYNC_R4K to improve performance and reduce code > > duplication. > > > > Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> > > --- > > Last minute for 6.11 :-) > > Hi Thomas, > > Could you please apply this to 6.11 PR, or 6.11 fixes? > > This is technically a left over of previous clock source series, and it does help > on preventing random RCU stall for multi-node Loongson-3 systems. if Huacai is ok with it, I'll add it to a second PR for 6.11. Thomas. -- Crap can work. Given enough thrust pigs will fly, but it's not necessarily a good idea. [ RFC1925, 2.3 ]
Reviewed-by: Huacai Chen <chenhuacai@loongson.cn> On Fri, Jul 19, 2024 at 1:29 AM Thomas Bogendoerfer <tsbogend@alpha.franken.de> wrote: > > On Thu, Jul 18, 2024 at 03:34:30PM +0800, Jiaxun Yang wrote: > > > > > > 在2024年7月14日七月 上午10:41,Jiaxun Yang写道: > > > Nowadays SYNC_R4K is performing better than Loongson64's > > > custom sync mechanism. > > > > > > Switch to SYNC_R4K to improve performance and reduce code > > > duplication. > > > > > > Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com> > > > --- > > > Last minute for 6.11 :-) > > > > Hi Thomas, > > > > Could you please apply this to 6.11 PR, or 6.11 fixes? > > > > This is technically a left over of previous clock source series, and it does help > > on preventing random RCU stall for multi-node Loongson-3 systems. > > if Huacai is ok with it, I'll add it to a second PR for 6.11. > > Thomas. > > -- > Crap can work. Given enough thrust pigs will fly, but it's not necessarily a > good idea. [ RFC1925, 2.3 ]
On Sun, Jul 14, 2024 at 10:41 AM Jiaxun Yang <jiaxun.yang@flygoat.com> wrote:
>
> Nowadays SYNC_R4K is performing better than Loongson64's
> custom sync mechanism.
Loongson64's preciseness is significantly better than SYNC_R4K.
Huacai
>
> Switch to SYNC_R4K to improve performance and reduce code
> duplication.
>
> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> ---
> Last minute for 6.11 :-)
> ---
> arch/mips/Kconfig | 1 +
> arch/mips/include/asm/smp.h | 1 -
> arch/mips/loongson64/smp.c | 35 ++---------------------------------
> 3 files changed, 3 insertions(+), 34 deletions(-)
>
> diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
> index 1236ea122061..e163059dd4d3 100644
> --- a/arch/mips/Kconfig
> +++ b/arch/mips/Kconfig
> @@ -478,6 +478,7 @@ config MACH_LOONGSON64
> select BOARD_SCACHE
> select CSRC_R4K
> select CEVT_R4K
> + select SYNC_R4K
> select FORCE_PCI
> select ISA
> select I8259
> diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h
> index bc2c240f414b..2427d76f953f 100644
> --- a/arch/mips/include/asm/smp.h
> +++ b/arch/mips/include/asm/smp.h
> @@ -50,7 +50,6 @@ extern int __cpu_logical_map[NR_CPUS];
> #define SMP_CALL_FUNCTION 0x2
> /* Octeon - Tell another core to flush its icache */
> #define SMP_ICACHE_FLUSH 0x4
> -#define SMP_ASK_C0COUNT 0x8
>
> /* Mask of CPUs which are currently definitely operating coherently */
> extern cpumask_t cpu_coherent_mask;
> diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c
> index 66d049cdcf14..147acd972a07 100644
> --- a/arch/mips/loongson64/smp.c
> +++ b/arch/mips/loongson64/smp.c
> @@ -33,7 +33,6 @@ static void __iomem *ipi_clear0_regs[16];
> static void __iomem *ipi_status0_regs[16];
> static void __iomem *ipi_en0_regs[16];
> static void __iomem *ipi_mailbox_buf[16];
> -static uint32_t core0_c0count[NR_CPUS];
>
> static u32 (*ipi_read_clear)(int cpu);
> static void (*ipi_write_action)(int cpu, u32 action);
> @@ -382,11 +381,10 @@ loongson3_send_ipi_mask(const struct cpumask *mask, unsigned int action)
> ipi_write_action(cpu_logical_map(i), (u32)action);
> }
>
> -
> static irqreturn_t loongson3_ipi_interrupt(int irq, void *dev_id)
> {
> - int i, cpu = smp_processor_id();
> - unsigned int action, c0count;
> + int cpu = smp_processor_id();
> + unsigned int action;
>
> action = ipi_read_clear(cpu);
>
> @@ -399,26 +397,14 @@ static irqreturn_t loongson3_ipi_interrupt(int irq, void *dev_id)
> irq_exit();
> }
>
> - if (action & SMP_ASK_C0COUNT) {
> - BUG_ON(cpu != 0);
> - c0count = read_c0_count();
> - c0count = c0count ? c0count : 1;
> - for (i = 1; i < nr_cpu_ids; i++)
> - core0_c0count[i] = c0count;
> - nudge_writes(); /* Let others see the result ASAP */
> - }
> -
> return IRQ_HANDLED;
> }
>
> -#define MAX_LOOPS 800
> /*
> * SMP init and finish on secondary CPUs
> */
> static void loongson3_init_secondary(void)
> {
> - int i;
> - uint32_t initcount;
> unsigned int cpu = smp_processor_id();
> unsigned int imask = STATUSF_IP7 | STATUSF_IP6 |
> STATUSF_IP3 | STATUSF_IP2;
> @@ -432,23 +418,6 @@ static void loongson3_init_secondary(void)
> cpu_logical_map(cpu) % loongson_sysconf.cores_per_package);
> cpu_data[cpu].package =
> cpu_logical_map(cpu) / loongson_sysconf.cores_per_package;
> -
> - i = 0;
> - core0_c0count[cpu] = 0;
> - loongson3_send_ipi_single(0, SMP_ASK_C0COUNT);
> - while (!core0_c0count[cpu]) {
> - i++;
> - cpu_relax();
> - }
> -
> - if (i > MAX_LOOPS)
> - i = MAX_LOOPS;
> - if (cpu_data[cpu].package)
> - initcount = core0_c0count[cpu] + i;
> - else /* Local access is faster for loops */
> - initcount = core0_c0count[cpu] + i/2;
> -
> - write_c0_count(initcount);
> }
>
> static void loongson3_smp_finish(void)
>
> ---
> base-commit: 0b58e108042b0ed28a71cd7edf5175999955b233
> change-id: 20240714-loongson64-cevt-r4k-eb74d4ad984c
>
> Best regards,
> --
> Jiaxun Yang <jiaxun.yang@flygoat.com>
>
© 2016 - 2026 Red Hat, Inc.