[PATCH 2/3] loongarch: Introduce sys_loongarch_flush_icache syscall

Jiaxun Yang posted 3 patches 1 year, 1 month ago
[PATCH 2/3] loongarch: Introduce sys_loongarch_flush_icache syscall
Posted by Jiaxun Yang 1 year, 1 month ago
On LoongArch CPUs with ICACHET, writes automatically sync to both local and
remote instruction caches. CPUs without this feature lack userspace cache
flush instructions, requiring a syscall to maintain I/D cache coherence and
propagate to remote caches.

sys_loongarch_flush_icache() is defined to flush the instruction cache
over an address range, with the flush applying to either all threads or
just the caller.

Currently all LoongArch64 implementations from Loongson comes with ICACHET,
however most LoongArch32 implementations including openLA500 and emerging
third party LoongArch64 implementations such as WiredNG are coming without
ICACHET.

Sadly many user space applications are assuming ICACHET support, we can't
recall those binaries. So we'd better get UAPI for cacheflush ready soonish
and encourage application to start using it.

The syscall resolves to a ibar for now, it should be revised when we have
actual non-ICACHET support in kernel.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
 arch/loongarch/include/asm/cacheflush.h |  6 ++++++
 arch/loongarch/include/asm/syscall.h    |  2 ++
 arch/loongarch/kernel/Makefile.syscalls |  3 +--
 arch/loongarch/kernel/syscall.c         | 28 ++++++++++++++++++++++++++++
 scripts/syscall.tbl                     |  2 ++
 5 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
index f8754d08a31ab07490717c31b9253871668b9a76..94f4a47f00860977db0b360965a22ff0a461c098 100644
--- a/arch/loongarch/include/asm/cacheflush.h
+++ b/arch/loongarch/include/asm/cacheflush.h
@@ -80,6 +80,12 @@ static inline void flush_cache_line(int leaf, unsigned long addr)
 	}
 }
 
+/*
+ * Bits in sys_loongarch_flush_icache()'s flags argument.
+ */
+#define SYS_LOONGARCH_FLUSH_ICACHE_LOCAL 1UL
+#define SYS_LOONGARCH_FLUSH_ICACHE_ALL   (SYS_LOONGARCH_FLUSH_ICACHE_LOCAL)
+
 #include <asm-generic/cacheflush.h>
 
 #endif /* _ASM_CACHEFLUSH_H */
diff --git a/arch/loongarch/include/asm/syscall.h b/arch/loongarch/include/asm/syscall.h
index e286dc58476e6e6c5d126866a8590a96e4b4089a..6bd414a98a757de3c1bc78643fa1749f07efb1c0 100644
--- a/arch/loongarch/include/asm/syscall.h
+++ b/arch/loongarch/include/asm/syscall.h
@@ -71,4 +71,6 @@ static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
 	return false;
 }
 
+asmlinkage long sys_loongarch_flush_icache(uintptr_t, uintptr_t, uintptr_t);
+
 #endif	/* __ASM_LOONGARCH_SYSCALL_H */
diff --git a/arch/loongarch/kernel/Makefile.syscalls b/arch/loongarch/kernel/Makefile.syscalls
index ab7d9baa29152da97932c7e447a183fba265451c..11665e3000beffd24ef9d683a4ac337554e0b320 100644
--- a/arch/loongarch/kernel/Makefile.syscalls
+++ b/arch/loongarch/kernel/Makefile.syscalls
@@ -1,4 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
 
-# No special ABIs on loongarch so far
-syscall_abis_64 +=
+syscall_abis_64 += loongarch
diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c
index b267db6ed79c20199504247c181cc245ef86abfd..2bc164d972b4d41c39e91481803d42bfd0184d3f 100644
--- a/arch/loongarch/kernel/syscall.c
+++ b/arch/loongarch/kernel/syscall.c
@@ -15,6 +15,7 @@
 #include <linux/unistd.h>
 
 #include <asm/asm.h>
+#include <asm/cacheflush.h>
 #include <asm/exception.h>
 #include <asm/loongarch.h>
 #include <asm/signal.h>
@@ -51,6 +52,33 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long,
 }
 #endif
 
+/*
+ * On LoongArch CPUs with ICACHET, writes automatically sync to both local and
+ * remote instruction caches. CPUs without this feature lack userspace cache
+ * flush instructions, requiring a syscall to maintain I/D cache coherence and
+ * propagate to remote caches.
+ *
+ * sys_loongarch_flush_icache() is defined to flush the instruction cache
+ * over an address range, with the flush applying to either all threads or
+ * just the caller.
+ */
+SYSCALL_DEFINE3(loongarch_flush_icache, uintptr_t, start, uintptr_t, end,
+	uintptr_t, flags)
+{
+	/* Check the reserved flags. */
+	if (unlikely(flags & ~SYS_LOONGARCH_FLUSH_ICACHE_ALL))
+		return -EINVAL;
+
+	/*
+	 * SYS_LOONGARCH_FLUSH_ICACHE_LOCAL is not handled so far, needs
+	 * to be realized when non-ICACHET CPUs are supported.
+	 */
+
+	flush_icache_user_range(start, end);
+
+	return 0;
+}
+
 void *sys_call_table[__NR_syscalls] = {
 	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
 #ifdef CONFIG_64BIT
diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
index ebbdb3c42e9f74613b003014c0baf44c842bb756..723fe859956809f26d6ec50ad7812933531ef687 100644
--- a/scripts/syscall.tbl
+++ b/scripts/syscall.tbl
@@ -298,6 +298,8 @@
 244	csky	set_thread_area			sys_set_thread_area
 245	csky	cacheflush			sys_cacheflush
 
+259	loongarch       loongarch_flush_icache	sys_loongarch_flush_icache
+
 244	nios2	cacheflush			sys_cacheflush
 
 244	or1k	or1k_atomic			sys_or1k_atomic

-- 
2.43.0
Re: [PATCH 2/3] loongarch: Introduce sys_loongarch_flush_icache syscall
Posted by Xi Ruoyao 1 year, 1 month ago
On Thu, 2025-01-02 at 18:34 +0000, Jiaxun Yang wrote:

/* snip */

> Sadly many user space applications are assuming ICACHET support, we can't
> recall those binaries. So we'd better get UAPI for cacheflush ready soonish
> and encourage application to start using it.

To encourage the developers changing ibar to loongarch_flush_icache, we
should minimize the extra overhead on mainstream systems.  We can add an
vDSO layer so if the CPU has ICACHET:

int vdso_loongarch_flush_icache(...)
{
  asm ("ibar 0");
  return 0;
}

And otherwise the vDSO wrapper invokes the real syscall.  I've
implemented the boot-time alternative runtime patching for vDSO at
https://lore.kernel.org/loongarch/20240816110717.10249-3-xry111@xry111.site/.

> The syscall resolves to a ibar for now, it should be revised when we have
> actual non-ICACHET support in kernel.

/* snip */

> diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
> index f8754d08a31ab07490717c31b9253871668b9a76..94f4a47f00860977db0b360965a22ff0a461c098 100644
> --- a/arch/loongarch/include/asm/cacheflush.h
> +++ b/arch/loongarch/include/asm/cacheflush.h
> @@ -80,6 +80,12 @@ static inline void flush_cache_line(int leaf, unsigned long addr)
>  	}
>  }
>  
> +/*
> + * Bits in sys_loongarch_flush_icache()'s flags argument.
> + */
> +#define SYS_LOONGARCH_FLUSH_ICACHE_LOCAL 1UL
> +#define SYS_LOONGARCH_FLUSH_ICACHE_ALL   (SYS_LOONGARCH_FLUSH_ICACHE_LOCAL)

Not a UAPI header so not usable by the user?  How would they specify
flags then?

If you meant to add them for UAPI, it would be very problematic.  When a
new cache type emerges in the hardware implementations, we need to grow
SYS_LOONGARCH_FLUSH_ICACHE_ALL in the UAPI header, but we cannot change
the already compiled JIT applications.  Thus all JIT applications have
to be recompiled with the latest UAPI header.  This just seems an
unnecessary severe burden to the packagers.

Instead IMO it's better not to expose so much details to the userspace.
Just remove the flags argument and flush all the icaches the kernel
knows, so with a new cache type the user (and distro) just need to
update or patch their kernel, w/o recompiling all JIT apps.


-- 
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
Re: [PATCH 2/3] loongarch: Introduce sys_loongarch_flush_icache syscall
Posted by Jiaxun Yang 1 year, 1 month ago

在2025年1月4日一月 上午9:31,Xi Ruoyao写道:
> On Thu, 2025-01-02 at 18:34 +0000, Jiaxun Yang wrote:
>
> /* snip */
>
>> Sadly many user space applications are assuming ICACHET support, we can't
>> recall those binaries. So we'd better get UAPI for cacheflush ready soonish
>> and encourage application to start using it.
>
> To encourage the developers changing ibar to loongarch_flush_icache, we
> should minimize the extra overhead on mainstream systems.  We can add an
> vDSO layer so if the CPU has ICACHET:

I'm a little bit confused as that's exactly what I'm doing in PATCH 3.

>
> int vdso_loongarch_flush_icache(...)
> {
>   asm ("ibar 0");
>   return 0;
> }
>
> And otherwise the vDSO wrapper invokes the real syscall.  I've
> implemented the boot-time alternative runtime patching for vDSO at
> https://lore.kernel.org/loongarch/20240816110717.10249-3-xry111@xry111.site/.

Thanks! Noted.

>
>> The syscall resolves to a ibar for now, it should be revised when we have
>> actual non-ICACHET support in kernel.
>
> /* snip */
>
>> diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
>> index f8754d08a31ab07490717c31b9253871668b9a76..94f4a47f00860977db0b360965a22ff0a461c098 100644
>> --- a/arch/loongarch/include/asm/cacheflush.h
>> +++ b/arch/loongarch/include/asm/cacheflush.h
>> @@ -80,6 +80,12 @@ static inline void flush_cache_line(int leaf, unsigned long addr)
>>  	}
>>  }
>>  
>> +/*
>> + * Bits in sys_loongarch_flush_icache()'s flags argument.
>> + */
>> +#define SYS_LOONGARCH_FLUSH_ICACHE_LOCAL 1UL
>> +#define SYS_LOONGARCH_FLUSH_ICACHE_ALL   (SYS_LOONGARCH_FLUSH_ICACHE_LOCAL)
>
> Not a UAPI header so not usable by the user?  How would they specify
> flags then?

We are following the RISC-V's convention on not exposing flags in UAPI
header for now as it's not really ready.

>
> If you meant to add them for UAPI, it would be very problematic.  When a
> new cache type emerges in the hardware implementations, we need to grow
> SYS_LOONGARCH_FLUSH_ICACHE_ALL in the UAPI header, but we cannot change
> the already compiled JIT applications.  Thus all JIT applications have
> to be recompiled with the latest UAPI header.  This just seems an
> unnecessary severe burden to the packagers.

The _LOCAL flag not meant to be hardware cache level but the scope.
(i.e. all threads or just the caller). Vast majority of applications
shouldn't need this level of granularity, so just setting flags to zero.

However, for application want fine-grained optimisations they should
probe availability of flags before using it. Thus kernel should reject
all unknown flags to assist application probing.

>
> Instead IMO it's better not to expose so much details to the userspace.
> Just remove the flags argument and flush all the icaches the kernel
> knows, so with a new cache type the user (and distro) just need to
> update or patch their kernel, w/o recompiling all JIT apps.

There is no need to change anything in user space usage when a new cache
type emerge. See explanations above.

Thanks
>
> -- 
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University

-- 
- Jiaxun
Re: [PATCH 2/3] loongarch: Introduce sys_loongarch_flush_icache syscall
Posted by Jinyang Shen 1 year, 1 month ago
Hi, Jiaxun,

On 2025/1/3 02:34, Jiaxun Yang wrote:
> On LoongArch CPUs with ICACHET, writes automatically sync to both local and
> remote instruction caches. CPUs without this feature lack userspace cache
> flush instructions, requiring a syscall to maintain I/D cache coherence and
> propagate to remote caches.
> 
> sys_loongarch_flush_icache() is defined to flush the instruction cache
> over an address range, with the flush applying to either all threads or
> just the caller.
> 
> Currently all LoongArch64 implementations from Loongson comes with ICACHET,
> however most LoongArch32 implementations including openLA500 and emerging
> third party LoongArch64 implementations such as WiredNG are coming without
> ICACHET.
> 
> Sadly many user space applications are assuming ICACHET support, we can't
> recall those binaries. So we'd better get UAPI for cacheflush ready soonish
> and encourage application to start using it.
> 
> The syscall resolves to a ibar for now, it should be revised when we have
> actual non-ICACHET support in kernel.
> 
> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> ---
>   arch/loongarch/include/asm/cacheflush.h |  6 ++++++
>   arch/loongarch/include/asm/syscall.h    |  2 ++
>   arch/loongarch/kernel/Makefile.syscalls |  3 +--
>   arch/loongarch/kernel/syscall.c         | 28 ++++++++++++++++++++++++++++
>   scripts/syscall.tbl                     |  2 ++
>   5 files changed, 39 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
> index f8754d08a31ab07490717c31b9253871668b9a76..94f4a47f00860977db0b360965a22ff0a461c098 100644
> --- a/arch/loongarch/include/asm/cacheflush.h
> +++ b/arch/loongarch/include/asm/cacheflush.h
> @@ -80,6 +80,12 @@ static inline void flush_cache_line(int leaf, unsigned long addr)
>   	}
>   }
>   
> +/*
> + * Bits in sys_loongarch_flush_icache()'s flags argument.
> + */
> +#define SYS_LOONGARCH_FLUSH_ICACHE_LOCAL 1UL
> +#define SYS_LOONGARCH_FLUSH_ICACHE_ALL   (SYS_LOONGARCH_FLUSH_ICACHE_LOCAL)
> +
>   #include <asm-generic/cacheflush.h>
>   
>   #endif /* _ASM_CACHEFLUSH_H */
> diff --git a/arch/loongarch/include/asm/syscall.h b/arch/loongarch/include/asm/syscall.h
> index e286dc58476e6e6c5d126866a8590a96e4b4089a..6bd414a98a757de3c1bc78643fa1749f07efb1c0 100644
> --- a/arch/loongarch/include/asm/syscall.h
> +++ b/arch/loongarch/include/asm/syscall.h
> @@ -71,4 +71,6 @@ static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
>   	return false;
>   }
>   
> +asmlinkage long sys_loongarch_flush_icache(uintptr_t, uintptr_t, uintptr_t);
> +
>   #endif	/* __ASM_LOONGARCH_SYSCALL_H */
> diff --git a/arch/loongarch/kernel/Makefile.syscalls b/arch/loongarch/kernel/Makefile.syscalls
> index ab7d9baa29152da97932c7e447a183fba265451c..11665e3000beffd24ef9d683a4ac337554e0b320 100644
> --- a/arch/loongarch/kernel/Makefile.syscalls
> +++ b/arch/loongarch/kernel/Makefile.syscalls
> @@ -1,4 +1,3 @@
>   # SPDX-License-Identifier: GPL-2.0
>   
> -# No special ABIs on loongarch so far
> -syscall_abis_64 +=
> +syscall_abis_64 += loongarch

LoongArch64 need arch-specific syscall, but LoongArch32 needn't?

> diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c
> index b267db6ed79c20199504247c181cc245ef86abfd..2bc164d972b4d41c39e91481803d42bfd0184d3f 100644
> --- a/arch/loongarch/kernel/syscall.c
> +++ b/arch/loongarch/kernel/syscall.c
> @@ -15,6 +15,7 @@
>   #include <linux/unistd.h>
>   
>   #include <asm/asm.h>
> +#include <asm/cacheflush.h>
>   #include <asm/exception.h>
>   #include <asm/loongarch.h>
>   #include <asm/signal.h>
> @@ -51,6 +52,33 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long,
>   }
>   #endif
>   
> +/*
> + * On LoongArch CPUs with ICACHET, writes automatically sync to both local and
> + * remote instruction caches. CPUs without this feature lack userspace cache
> + * flush instructions, requiring a syscall to maintain I/D cache coherence and
> + * propagate to remote caches.
> + *
> + * sys_loongarch_flush_icache() is defined to flush the instruction cache
> + * over an address range, with the flush applying to either all threads or
> + * just the caller.
> + */
> +SYSCALL_DEFINE3(loongarch_flush_icache, uintptr_t, start, uintptr_t, end,
> +	uintptr_t, flags)
> +{
> +	/* Check the reserved flags. */
> +	if (unlikely(flags & ~SYS_LOONGARCH_FLUSH_ICACHE_ALL))
> +		return -EINVAL;
> +
> +	/*
> +	 * SYS_LOONGARCH_FLUSH_ICACHE_LOCAL is not handled so far, needs
> +	 * to be realized when non-ICACHET CPUs are supported.
> +	 */
> +
> +	flush_icache_user_range(start, end);
> +
> +	return 0;
> +}
> +
>   void *sys_call_table[__NR_syscalls] = {
>   	[0 ... __NR_syscalls - 1] = sys_ni_syscall,
>   #ifdef CONFIG_64BIT
> diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
> index ebbdb3c42e9f74613b003014c0baf44c842bb756..723fe859956809f26d6ec50ad7812933531ef687 100644
> --- a/scripts/syscall.tbl
> +++ b/scripts/syscall.tbl
> @@ -298,6 +298,8 @@
>   244	csky	set_thread_area			sys_set_thread_area
>   245	csky	cacheflush			sys_cacheflush
>   
> +259	loongarch       loongarch_flush_icache	sys_loongarch_flush_icache

Can we use cacheflush as arc, csky and nios2?

Jinyang

> +
>   244	nios2	cacheflush			sys_cacheflush
>   
>   244	or1k	or1k_atomic			sys_or1k_atomic
>
Re: [PATCH 2/3] loongarch: Introduce sys_loongarch_flush_icache syscall
Posted by Arnd Bergmann 1 year, 1 month ago
On Sat, Jan 4, 2025, at 10:04, Jinyang Shen wrote:
> On 2025/1/3 02:34, Jiaxun Yang wrote:
>> +/*
>> + * On LoongArch CPUs with ICACHET, writes automatically sync to both local and
>> + * remote instruction caches. CPUs without this feature lack userspace cache
>> + * flush instructions, requiring a syscall to maintain I/D cache coherence and
>> + * propagate to remote caches.
>> + *
>> + * sys_loongarch_flush_icache() is defined to flush the instruction cache
>> + * over an address range, with the flush applying to either all threads or
>> + * just the caller.
>> + */
>> +SYSCALL_DEFINE3(loongarch_flush_icache, uintptr_t, start, uintptr_t, end,
>> +	uintptr_t, flags)


I think for consistency with other architectures, we want start/length/flags
instead of start/end/flags.

The meaning of the third argument is rather inconsistent between
architectures already, but at least the second argument is always
length so far.


>> diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
>> index ebbdb3c42e9f74613b003014c0baf44c842bb756..723fe859956809f26d6ec50ad7812933531ef687 100644
>> --- a/scripts/syscall.tbl
>> +++ b/scripts/syscall.tbl
>> @@ -298,6 +298,8 @@
>>   244	csky	set_thread_area			sys_set_thread_area
>>   245	csky	cacheflush			sys_cacheflush
>>   
>> +259	loongarch       loongarch_flush_icache	sys_loongarch_flush_icache
>
> Can we use cacheflush as arc, csky and nios2?

Agreed. I would also use the number 244 instead of 259 here.

     Arnd
Re: [PATCH 2/3] loongarch: Introduce sys_loongarch_flush_icache syscall
Posted by Jiaxun Yang 1 year, 1 month ago

在2025年1月4日一月 下午3:07,Arnd Bergmann写道:
[...]

Hi Arnd,

Thanks for your comments!

>
> I think for consistency with other architectures, we want start/length/flags
> instead of start/end/flags.
>
> The meaning of the third argument is rather inconsistent between
> architectures already, but at least the second argument is always
> length so far.

So this is actually designed to be aligned with RISC-V's semantics,
and thus all arguments are aligned with RISC-V.

IMO RISC-V's semantics is a better design that we should take, as
I replied to Jinyang above.

>
>
>>> diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
>>> index ebbdb3c42e9f74613b003014c0baf44c842bb756..723fe859956809f26d6ec50ad7812933531ef687 100644
>>> --- a/scripts/syscall.tbl
>>> +++ b/scripts/syscall.tbl
>>> @@ -298,6 +298,8 @@
>>>   244	csky	set_thread_area			sys_set_thread_area
>>>   245	csky	cacheflush			sys_cacheflush
>>>   
>>> +259	loongarch       loongarch_flush_icache	sys_loongarch_flush_icache
>>
>> Can we use cacheflush as arc, csky and nios2?
>
> Agreed. I would also use the number 244 instead of 259 here.

259 is also selected to be aligned with RISC-V.

Thanks
>
>      Arnd

-- 
- Jiaxun
Re: [PATCH 2/3] loongarch: Introduce sys_loongarch_flush_icache syscall
Posted by Jiaxun Yang 1 year, 1 month ago

在2025年1月4日一月 上午9:04,Jinyang Shen写道:
[...]
>>   
>> -# No special ABIs on loongarch so far
>> -syscall_abis_64 +=
>> +syscall_abis_64 += loongarch
>
> LoongArch64 need arch-specific syscall, but LoongArch32 needn't?

My bad, lost in rebasing :-)

>
[...]
>> diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
>> index ebbdb3c42e9f74613b003014c0baf44c842bb756..723fe859956809f26d6ec50ad7812933531ef687 100644
>> --- a/scripts/syscall.tbl
>> +++ b/scripts/syscall.tbl
>> @@ -298,6 +298,8 @@
>>   244	csky	set_thread_area			sys_set_thread_area
>>   245	csky	cacheflush			sys_cacheflush
>>   
>> +259	loongarch       loongarch_flush_icache	sys_loongarch_flush_icache
>
> Can we use cacheflush as arc, csky and nios2?

I think cacheflush syscall is more or less an outdated design inherited
from...MIPS...

Exposing flush of other cache levels to user space is not wise in security
perspective. The design of cacheflush syscall is also not vDSO friendly.
riscv_flush_icache is designed to avoid those drawbacks, and we should follow.

Thanks

>
> Jinyang
>
[...]

-- 
- Jiaxun