On LoongArch CPUs with ICACHET, writes automatically sync to both local and
remote instruction caches. CPUs without this feature lack userspace cache
flush instructions, requiring a syscall to maintain I/D cache coherence and
propagate to remote caches.
sys_loongarch_flush_icache() is defined to flush the instruction cache
over an address range, with the flush applying to either all threads or
just the caller.
Currently all LoongArch64 implementations from Loongson comes with ICACHET,
however most LoongArch32 implementations including openLA500 and emerging
third party LoongArch64 implementations such as WiredNG are coming without
ICACHET.
Sadly many user space applications are assuming ICACHET support, we can't
recall those binaries. So we'd better get UAPI for cacheflush ready soonish
and encourage application to start using it.
The syscall resolves to a ibar for now, it should be revised when we have
actual non-ICACHET support in kernel.
Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
---
arch/loongarch/include/asm/cacheflush.h | 6 ++++++
arch/loongarch/include/asm/syscall.h | 2 ++
arch/loongarch/kernel/Makefile.syscalls | 3 +--
arch/loongarch/kernel/syscall.c | 28 ++++++++++++++++++++++++++++
scripts/syscall.tbl | 2 ++
5 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
index f8754d08a31ab07490717c31b9253871668b9a76..94f4a47f00860977db0b360965a22ff0a461c098 100644
--- a/arch/loongarch/include/asm/cacheflush.h
+++ b/arch/loongarch/include/asm/cacheflush.h
@@ -80,6 +80,12 @@ static inline void flush_cache_line(int leaf, unsigned long addr)
}
}
+/*
+ * Bits in sys_loongarch_flush_icache()'s flags argument.
+ */
+#define SYS_LOONGARCH_FLUSH_ICACHE_LOCAL 1UL
+#define SYS_LOONGARCH_FLUSH_ICACHE_ALL (SYS_LOONGARCH_FLUSH_ICACHE_LOCAL)
+
#include <asm-generic/cacheflush.h>
#endif /* _ASM_CACHEFLUSH_H */
diff --git a/arch/loongarch/include/asm/syscall.h b/arch/loongarch/include/asm/syscall.h
index e286dc58476e6e6c5d126866a8590a96e4b4089a..6bd414a98a757de3c1bc78643fa1749f07efb1c0 100644
--- a/arch/loongarch/include/asm/syscall.h
+++ b/arch/loongarch/include/asm/syscall.h
@@ -71,4 +71,6 @@ static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
return false;
}
+asmlinkage long sys_loongarch_flush_icache(uintptr_t, uintptr_t, uintptr_t);
+
#endif /* __ASM_LOONGARCH_SYSCALL_H */
diff --git a/arch/loongarch/kernel/Makefile.syscalls b/arch/loongarch/kernel/Makefile.syscalls
index ab7d9baa29152da97932c7e447a183fba265451c..11665e3000beffd24ef9d683a4ac337554e0b320 100644
--- a/arch/loongarch/kernel/Makefile.syscalls
+++ b/arch/loongarch/kernel/Makefile.syscalls
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0
-# No special ABIs on loongarch so far
-syscall_abis_64 +=
+syscall_abis_64 += loongarch
diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c
index b267db6ed79c20199504247c181cc245ef86abfd..2bc164d972b4d41c39e91481803d42bfd0184d3f 100644
--- a/arch/loongarch/kernel/syscall.c
+++ b/arch/loongarch/kernel/syscall.c
@@ -15,6 +15,7 @@
#include <linux/unistd.h>
#include <asm/asm.h>
+#include <asm/cacheflush.h>
#include <asm/exception.h>
#include <asm/loongarch.h>
#include <asm/signal.h>
@@ -51,6 +52,33 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long,
}
#endif
+/*
+ * On LoongArch CPUs with ICACHET, writes automatically sync to both local and
+ * remote instruction caches. CPUs without this feature lack userspace cache
+ * flush instructions, requiring a syscall to maintain I/D cache coherence and
+ * propagate to remote caches.
+ *
+ * sys_loongarch_flush_icache() is defined to flush the instruction cache
+ * over an address range, with the flush applying to either all threads or
+ * just the caller.
+ */
+SYSCALL_DEFINE3(loongarch_flush_icache, uintptr_t, start, uintptr_t, end,
+ uintptr_t, flags)
+{
+ /* Check the reserved flags. */
+ if (unlikely(flags & ~SYS_LOONGARCH_FLUSH_ICACHE_ALL))
+ return -EINVAL;
+
+ /*
+ * SYS_LOONGARCH_FLUSH_ICACHE_LOCAL is not handled so far, needs
+ * to be realized when non-ICACHET CPUs are supported.
+ */
+
+ flush_icache_user_range(start, end);
+
+ return 0;
+}
+
void *sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = sys_ni_syscall,
#ifdef CONFIG_64BIT
diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
index ebbdb3c42e9f74613b003014c0baf44c842bb756..723fe859956809f26d6ec50ad7812933531ef687 100644
--- a/scripts/syscall.tbl
+++ b/scripts/syscall.tbl
@@ -298,6 +298,8 @@
244 csky set_thread_area sys_set_thread_area
245 csky cacheflush sys_cacheflush
+259 loongarch loongarch_flush_icache sys_loongarch_flush_icache
+
244 nios2 cacheflush sys_cacheflush
244 or1k or1k_atomic sys_or1k_atomic
--
2.43.0
On Thu, 2025-01-02 at 18:34 +0000, Jiaxun Yang wrote:
/* snip */
> Sadly many user space applications are assuming ICACHET support, we can't
> recall those binaries. So we'd better get UAPI for cacheflush ready soonish
> and encourage application to start using it.
To encourage the developers changing ibar to loongarch_flush_icache, we
should minimize the extra overhead on mainstream systems. We can add an
vDSO layer so if the CPU has ICACHET:
int vdso_loongarch_flush_icache(...)
{
asm ("ibar 0");
return 0;
}
And otherwise the vDSO wrapper invokes the real syscall. I've
implemented the boot-time alternative runtime patching for vDSO at
https://lore.kernel.org/loongarch/20240816110717.10249-3-xry111@xry111.site/.
> The syscall resolves to a ibar for now, it should be revised when we have
> actual non-ICACHET support in kernel.
/* snip */
> diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
> index f8754d08a31ab07490717c31b9253871668b9a76..94f4a47f00860977db0b360965a22ff0a461c098 100644
> --- a/arch/loongarch/include/asm/cacheflush.h
> +++ b/arch/loongarch/include/asm/cacheflush.h
> @@ -80,6 +80,12 @@ static inline void flush_cache_line(int leaf, unsigned long addr)
> }
> }
>
> +/*
> + * Bits in sys_loongarch_flush_icache()'s flags argument.
> + */
> +#define SYS_LOONGARCH_FLUSH_ICACHE_LOCAL 1UL
> +#define SYS_LOONGARCH_FLUSH_ICACHE_ALL (SYS_LOONGARCH_FLUSH_ICACHE_LOCAL)
Not a UAPI header so not usable by the user? How would they specify
flags then?
If you meant to add them for UAPI, it would be very problematic. When a
new cache type emerges in the hardware implementations, we need to grow
SYS_LOONGARCH_FLUSH_ICACHE_ALL in the UAPI header, but we cannot change
the already compiled JIT applications. Thus all JIT applications have
to be recompiled with the latest UAPI header. This just seems an
unnecessary severe burden to the packagers.
Instead IMO it's better not to expose so much details to the userspace.
Just remove the flags argument and flush all the icaches the kernel
knows, so with a new cache type the user (and distro) just need to
update or patch their kernel, w/o recompiling all JIT apps.
--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University
在2025年1月4日一月 上午9:31,Xi Ruoyao写道:
> On Thu, 2025-01-02 at 18:34 +0000, Jiaxun Yang wrote:
>
> /* snip */
>
>> Sadly many user space applications are assuming ICACHET support, we can't
>> recall those binaries. So we'd better get UAPI for cacheflush ready soonish
>> and encourage application to start using it.
>
> To encourage the developers changing ibar to loongarch_flush_icache, we
> should minimize the extra overhead on mainstream systems. We can add an
> vDSO layer so if the CPU has ICACHET:
I'm a little bit confused as that's exactly what I'm doing in PATCH 3.
>
> int vdso_loongarch_flush_icache(...)
> {
> asm ("ibar 0");
> return 0;
> }
>
> And otherwise the vDSO wrapper invokes the real syscall. I've
> implemented the boot-time alternative runtime patching for vDSO at
> https://lore.kernel.org/loongarch/20240816110717.10249-3-xry111@xry111.site/.
Thanks! Noted.
>
>> The syscall resolves to a ibar for now, it should be revised when we have
>> actual non-ICACHET support in kernel.
>
> /* snip */
>
>> diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
>> index f8754d08a31ab07490717c31b9253871668b9a76..94f4a47f00860977db0b360965a22ff0a461c098 100644
>> --- a/arch/loongarch/include/asm/cacheflush.h
>> +++ b/arch/loongarch/include/asm/cacheflush.h
>> @@ -80,6 +80,12 @@ static inline void flush_cache_line(int leaf, unsigned long addr)
>> }
>> }
>>
>> +/*
>> + * Bits in sys_loongarch_flush_icache()'s flags argument.
>> + */
>> +#define SYS_LOONGARCH_FLUSH_ICACHE_LOCAL 1UL
>> +#define SYS_LOONGARCH_FLUSH_ICACHE_ALL (SYS_LOONGARCH_FLUSH_ICACHE_LOCAL)
>
> Not a UAPI header so not usable by the user? How would they specify
> flags then?
We are following the RISC-V's convention on not exposing flags in UAPI
header for now as it's not really ready.
>
> If you meant to add them for UAPI, it would be very problematic. When a
> new cache type emerges in the hardware implementations, we need to grow
> SYS_LOONGARCH_FLUSH_ICACHE_ALL in the UAPI header, but we cannot change
> the already compiled JIT applications. Thus all JIT applications have
> to be recompiled with the latest UAPI header. This just seems an
> unnecessary severe burden to the packagers.
The _LOCAL flag not meant to be hardware cache level but the scope.
(i.e. all threads or just the caller). Vast majority of applications
shouldn't need this level of granularity, so just setting flags to zero.
However, for application want fine-grained optimisations they should
probe availability of flags before using it. Thus kernel should reject
all unknown flags to assist application probing.
>
> Instead IMO it's better not to expose so much details to the userspace.
> Just remove the flags argument and flush all the icaches the kernel
> knows, so with a new cache type the user (and distro) just need to
> update or patch their kernel, w/o recompiling all JIT apps.
There is no need to change anything in user space usage when a new cache
type emerge. See explanations above.
Thanks
>
> --
> Xi Ruoyao <xry111@xry111.site>
> School of Aerospace Science and Technology, Xidian University
--
- Jiaxun
Hi, Jiaxun,
On 2025/1/3 02:34, Jiaxun Yang wrote:
> On LoongArch CPUs with ICACHET, writes automatically sync to both local and
> remote instruction caches. CPUs without this feature lack userspace cache
> flush instructions, requiring a syscall to maintain I/D cache coherence and
> propagate to remote caches.
>
> sys_loongarch_flush_icache() is defined to flush the instruction cache
> over an address range, with the flush applying to either all threads or
> just the caller.
>
> Currently all LoongArch64 implementations from Loongson comes with ICACHET,
> however most LoongArch32 implementations including openLA500 and emerging
> third party LoongArch64 implementations such as WiredNG are coming without
> ICACHET.
>
> Sadly many user space applications are assuming ICACHET support, we can't
> recall those binaries. So we'd better get UAPI for cacheflush ready soonish
> and encourage application to start using it.
>
> The syscall resolves to a ibar for now, it should be revised when we have
> actual non-ICACHET support in kernel.
>
> Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
> ---
> arch/loongarch/include/asm/cacheflush.h | 6 ++++++
> arch/loongarch/include/asm/syscall.h | 2 ++
> arch/loongarch/kernel/Makefile.syscalls | 3 +--
> arch/loongarch/kernel/syscall.c | 28 ++++++++++++++++++++++++++++
> scripts/syscall.tbl | 2 ++
> 5 files changed, 39 insertions(+), 2 deletions(-)
>
> diff --git a/arch/loongarch/include/asm/cacheflush.h b/arch/loongarch/include/asm/cacheflush.h
> index f8754d08a31ab07490717c31b9253871668b9a76..94f4a47f00860977db0b360965a22ff0a461c098 100644
> --- a/arch/loongarch/include/asm/cacheflush.h
> +++ b/arch/loongarch/include/asm/cacheflush.h
> @@ -80,6 +80,12 @@ static inline void flush_cache_line(int leaf, unsigned long addr)
> }
> }
>
> +/*
> + * Bits in sys_loongarch_flush_icache()'s flags argument.
> + */
> +#define SYS_LOONGARCH_FLUSH_ICACHE_LOCAL 1UL
> +#define SYS_LOONGARCH_FLUSH_ICACHE_ALL (SYS_LOONGARCH_FLUSH_ICACHE_LOCAL)
> +
> #include <asm-generic/cacheflush.h>
>
> #endif /* _ASM_CACHEFLUSH_H */
> diff --git a/arch/loongarch/include/asm/syscall.h b/arch/loongarch/include/asm/syscall.h
> index e286dc58476e6e6c5d126866a8590a96e4b4089a..6bd414a98a757de3c1bc78643fa1749f07efb1c0 100644
> --- a/arch/loongarch/include/asm/syscall.h
> +++ b/arch/loongarch/include/asm/syscall.h
> @@ -71,4 +71,6 @@ static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
> return false;
> }
>
> +asmlinkage long sys_loongarch_flush_icache(uintptr_t, uintptr_t, uintptr_t);
> +
> #endif /* __ASM_LOONGARCH_SYSCALL_H */
> diff --git a/arch/loongarch/kernel/Makefile.syscalls b/arch/loongarch/kernel/Makefile.syscalls
> index ab7d9baa29152da97932c7e447a183fba265451c..11665e3000beffd24ef9d683a4ac337554e0b320 100644
> --- a/arch/loongarch/kernel/Makefile.syscalls
> +++ b/arch/loongarch/kernel/Makefile.syscalls
> @@ -1,4 +1,3 @@
> # SPDX-License-Identifier: GPL-2.0
>
> -# No special ABIs on loongarch so far
> -syscall_abis_64 +=
> +syscall_abis_64 += loongarch
LoongArch64 need arch-specific syscall, but LoongArch32 needn't?
> diff --git a/arch/loongarch/kernel/syscall.c b/arch/loongarch/kernel/syscall.c
> index b267db6ed79c20199504247c181cc245ef86abfd..2bc164d972b4d41c39e91481803d42bfd0184d3f 100644
> --- a/arch/loongarch/kernel/syscall.c
> +++ b/arch/loongarch/kernel/syscall.c
> @@ -15,6 +15,7 @@
> #include <linux/unistd.h>
>
> #include <asm/asm.h>
> +#include <asm/cacheflush.h>
> #include <asm/exception.h>
> #include <asm/loongarch.h>
> #include <asm/signal.h>
> @@ -51,6 +52,33 @@ SYSCALL_DEFINE6(mmap2, unsigned long, addr, unsigned long, len, unsigned long,
> }
> #endif
>
> +/*
> + * On LoongArch CPUs with ICACHET, writes automatically sync to both local and
> + * remote instruction caches. CPUs without this feature lack userspace cache
> + * flush instructions, requiring a syscall to maintain I/D cache coherence and
> + * propagate to remote caches.
> + *
> + * sys_loongarch_flush_icache() is defined to flush the instruction cache
> + * over an address range, with the flush applying to either all threads or
> + * just the caller.
> + */
> +SYSCALL_DEFINE3(loongarch_flush_icache, uintptr_t, start, uintptr_t, end,
> + uintptr_t, flags)
> +{
> + /* Check the reserved flags. */
> + if (unlikely(flags & ~SYS_LOONGARCH_FLUSH_ICACHE_ALL))
> + return -EINVAL;
> +
> + /*
> + * SYS_LOONGARCH_FLUSH_ICACHE_LOCAL is not handled so far, needs
> + * to be realized when non-ICACHET CPUs are supported.
> + */
> +
> + flush_icache_user_range(start, end);
> +
> + return 0;
> +}
> +
> void *sys_call_table[__NR_syscalls] = {
> [0 ... __NR_syscalls - 1] = sys_ni_syscall,
> #ifdef CONFIG_64BIT
> diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
> index ebbdb3c42e9f74613b003014c0baf44c842bb756..723fe859956809f26d6ec50ad7812933531ef687 100644
> --- a/scripts/syscall.tbl
> +++ b/scripts/syscall.tbl
> @@ -298,6 +298,8 @@
> 244 csky set_thread_area sys_set_thread_area
> 245 csky cacheflush sys_cacheflush
>
> +259 loongarch loongarch_flush_icache sys_loongarch_flush_icache
Can we use cacheflush as arc, csky and nios2?
Jinyang
> +
> 244 nios2 cacheflush sys_cacheflush
>
> 244 or1k or1k_atomic sys_or1k_atomic
>
On Sat, Jan 4, 2025, at 10:04, Jinyang Shen wrote:
> On 2025/1/3 02:34, Jiaxun Yang wrote:
>> +/*
>> + * On LoongArch CPUs with ICACHET, writes automatically sync to both local and
>> + * remote instruction caches. CPUs without this feature lack userspace cache
>> + * flush instructions, requiring a syscall to maintain I/D cache coherence and
>> + * propagate to remote caches.
>> + *
>> + * sys_loongarch_flush_icache() is defined to flush the instruction cache
>> + * over an address range, with the flush applying to either all threads or
>> + * just the caller.
>> + */
>> +SYSCALL_DEFINE3(loongarch_flush_icache, uintptr_t, start, uintptr_t, end,
>> + uintptr_t, flags)
I think for consistency with other architectures, we want start/length/flags
instead of start/end/flags.
The meaning of the third argument is rather inconsistent between
architectures already, but at least the second argument is always
length so far.
>> diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl
>> index ebbdb3c42e9f74613b003014c0baf44c842bb756..723fe859956809f26d6ec50ad7812933531ef687 100644
>> --- a/scripts/syscall.tbl
>> +++ b/scripts/syscall.tbl
>> @@ -298,6 +298,8 @@
>> 244 csky set_thread_area sys_set_thread_area
>> 245 csky cacheflush sys_cacheflush
>>
>> +259 loongarch loongarch_flush_icache sys_loongarch_flush_icache
>
> Can we use cacheflush as arc, csky and nios2?
Agreed. I would also use the number 244 instead of 259 here.
Arnd
在2025年1月4日一月 下午3:07,Arnd Bergmann写道: [...] Hi Arnd, Thanks for your comments! > > I think for consistency with other architectures, we want start/length/flags > instead of start/end/flags. > > The meaning of the third argument is rather inconsistent between > architectures already, but at least the second argument is always > length so far. So this is actually designed to be aligned with RISC-V's semantics, and thus all arguments are aligned with RISC-V. IMO RISC-V's semantics is a better design that we should take, as I replied to Jinyang above. > > >>> diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl >>> index ebbdb3c42e9f74613b003014c0baf44c842bb756..723fe859956809f26d6ec50ad7812933531ef687 100644 >>> --- a/scripts/syscall.tbl >>> +++ b/scripts/syscall.tbl >>> @@ -298,6 +298,8 @@ >>> 244 csky set_thread_area sys_set_thread_area >>> 245 csky cacheflush sys_cacheflush >>> >>> +259 loongarch loongarch_flush_icache sys_loongarch_flush_icache >> >> Can we use cacheflush as arc, csky and nios2? > > Agreed. I would also use the number 244 instead of 259 here. 259 is also selected to be aligned with RISC-V. Thanks > > Arnd -- - Jiaxun
在2025年1月4日一月 上午9:04,Jinyang Shen写道: [...] >> >> -# No special ABIs on loongarch so far >> -syscall_abis_64 += >> +syscall_abis_64 += loongarch > > LoongArch64 need arch-specific syscall, but LoongArch32 needn't? My bad, lost in rebasing :-) > [...] >> diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl >> index ebbdb3c42e9f74613b003014c0baf44c842bb756..723fe859956809f26d6ec50ad7812933531ef687 100644 >> --- a/scripts/syscall.tbl >> +++ b/scripts/syscall.tbl >> @@ -298,6 +298,8 @@ >> 244 csky set_thread_area sys_set_thread_area >> 245 csky cacheflush sys_cacheflush >> >> +259 loongarch loongarch_flush_icache sys_loongarch_flush_icache > > Can we use cacheflush as arc, csky and nios2? I think cacheflush syscall is more or less an outdated design inherited from...MIPS... Exposing flush of other cache levels to user space is not wise in security perspective. The design of cacheflush syscall is also not vDSO friendly. riscv_flush_icache is designed to avoid those drawbacks, and we should follow. Thanks > > Jinyang > [...] -- - Jiaxun
© 2016 - 2026 Red Hat, Inc.