From: Ard Biesheuvel <ardb@kernel.org>
Commit aefbab8e77eb16b5
("arm64: fpsimd: Preserve/restore kernel mode NEON at context switch")
added a 'kernel_fpsimd_state' field to struct thread_struct, which is
the arch-specific portion of struct task_struct, and is allocated for
each task in the system. The size of this field is 528 bytes, resulting
in non-trivial bloat of task_struct, and the resulting memory overhead
may impact performance on systems with many processes.
This allocation is only used if the task is scheduled out or interrupted
by a softirq while using the FP/SIMD unit in kernel mode, and so it is
possible to transparently allocate this buffer on the caller's stack
instead.
So tweak the 'ksimd' scoped guard implementation so that a stack buffer
is allocated and passed to both kernel_neon_begin() and
kernel_neon_end(), and record it in the task struct. Passing the address
to both functions, and checking the addresses for consistency ensures
that callers of the updated bare begin/end API use it in a manner that
is consistent with the new context switch semantics.
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm64/include/asm/neon.h | 4 +--
arch/arm64/include/asm/processor.h | 2 +-
arch/arm64/include/asm/simd.h | 7 ++--
arch/arm64/kernel/fpsimd.c | 34 +++++++++++++-------
4 files changed, 31 insertions(+), 16 deletions(-)
diff --git a/arch/arm64/include/asm/neon.h b/arch/arm64/include/asm/neon.h
index d4b1d172a79b..acebee4605b5 100644
--- a/arch/arm64/include/asm/neon.h
+++ b/arch/arm64/include/asm/neon.h
@@ -13,7 +13,7 @@
#define cpu_has_neon() system_supports_fpsimd()
-void kernel_neon_begin(void);
-void kernel_neon_end(void);
+void kernel_neon_begin(struct user_fpsimd_state *);
+void kernel_neon_end(struct user_fpsimd_state *);
#endif /* ! __ASM_NEON_H */
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index 4f8d677b73ee..93bca4d454d7 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -172,7 +172,7 @@ struct thread_struct {
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
- struct user_fpsimd_state kernel_fpsimd_state;
+ struct user_fpsimd_state *kernel_fpsimd_state;
unsigned int kernel_fpsimd_cpu;
#ifdef CONFIG_ARM64_PTR_AUTH
struct ptrauth_keys_user keys_user;
diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
index d9f83c478736..7ddb25df5c98 100644
--- a/arch/arm64/include/asm/simd.h
+++ b/arch/arm64/include/asm/simd.h
@@ -43,8 +43,11 @@ static __must_check inline bool may_use_simd(void) {
#endif /* ! CONFIG_KERNEL_MODE_NEON */
-DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
+DEFINE_LOCK_GUARD_1(ksimd,
+ struct user_fpsimd_state,
+ kernel_neon_begin(_T->lock),
+ kernel_neon_end(_T->lock))
-#define scoped_ksimd() scoped_guard(ksimd)
+#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
#endif
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index c37f02d7194e..ea9192a180aa 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -1488,21 +1488,23 @@ static void fpsimd_load_kernel_state(struct task_struct *task)
* Elide the load if this CPU holds the most recent kernel mode
* FPSIMD context of the current task.
*/
- if (last->st == &task->thread.kernel_fpsimd_state &&
+ if (last->st == task->thread.kernel_fpsimd_state &&
task->thread.kernel_fpsimd_cpu == smp_processor_id())
return;
- fpsimd_load_state(&task->thread.kernel_fpsimd_state);
+ fpsimd_load_state(task->thread.kernel_fpsimd_state);
}
static void fpsimd_save_kernel_state(struct task_struct *task)
{
struct cpu_fp_state cpu_fp_state = {
- .st = &task->thread.kernel_fpsimd_state,
+ .st = task->thread.kernel_fpsimd_state,
.to_save = FP_STATE_FPSIMD,
};
- fpsimd_save_state(&task->thread.kernel_fpsimd_state);
+ BUG_ON(!cpu_fp_state.st);
+
+ fpsimd_save_state(task->thread.kernel_fpsimd_state);
fpsimd_bind_state_to_cpu(&cpu_fp_state);
task->thread.kernel_fpsimd_cpu = smp_processor_id();
@@ -1773,6 +1775,7 @@ void fpsimd_update_current_state(struct user_fpsimd_state const *state)
void fpsimd_flush_task_state(struct task_struct *t)
{
t->thread.fpsimd_cpu = NR_CPUS;
+ t->thread.kernel_fpsimd_state = NULL;
/*
* If we don't support fpsimd, bail out after we have
* reset the fpsimd_cpu for this task and clear the
@@ -1833,7 +1836,7 @@ void fpsimd_save_and_flush_cpu_state(void)
* The caller may freely use the FPSIMD registers until kernel_neon_end() is
* called.
*/
-void kernel_neon_begin(void)
+void kernel_neon_begin(struct user_fpsimd_state *s)
{
if (WARN_ON(!system_supports_fpsimd()))
return;
@@ -1866,8 +1869,16 @@ void kernel_neon_begin(void)
* mode in task context. So in this case, setting the flag here
* is always appropriate.
*/
- if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
+ /*
+ * Record the caller provided buffer as the kernel mode
+ * FP/SIMD buffer for this task, so that the state can
+ * be preserved and restored on a context switch.
+ */
+ if (cmpxchg(¤t->thread.kernel_fpsimd_state, NULL, s))
+ BUG();
set_thread_flag(TIF_KERNEL_FPSTATE);
+ }
}
/* Invalidate any task state remaining in the fpsimd regs: */
@@ -1886,7 +1897,7 @@ EXPORT_SYMBOL_GPL(kernel_neon_begin);
* The caller must not use the FPSIMD registers after this function is called,
* unless kernel_neon_begin() is called again in the meantime.
*/
-void kernel_neon_end(void)
+void kernel_neon_end(struct user_fpsimd_state *s)
{
if (!system_supports_fpsimd())
return;
@@ -1899,8 +1910,9 @@ void kernel_neon_end(void)
if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
test_thread_flag(TIF_KERNEL_FPSTATE))
fpsimd_load_kernel_state(current);
- else
- clear_thread_flag(TIF_KERNEL_FPSTATE);
+ else if (test_and_clear_thread_flag(TIF_KERNEL_FPSTATE))
+ if (cmpxchg(¤t->thread.kernel_fpsimd_state, s, NULL) != s)
+ BUG();
}
EXPORT_SYMBOL_GPL(kernel_neon_end);
@@ -1936,7 +1948,7 @@ void __efi_fpsimd_begin(void)
WARN_ON(preemptible());
if (may_use_simd()) {
- kernel_neon_begin();
+ kernel_neon_begin(&efi_fpsimd_state);
} else {
/*
* If !efi_sve_state, SVE can't be in use yet and doesn't need
@@ -1985,7 +1997,7 @@ void __efi_fpsimd_end(void)
return;
if (!efi_fpsimd_state_used) {
- kernel_neon_end();
+ kernel_neon_end(&efi_fpsimd_state);
} else {
if (system_supports_sve() && efi_sve_state_used) {
bool ffr = true;
--
2.51.0.618.g983fd99d29-goog
On Wed, Oct 01, 2025 at 11:02:22PM +0200, Ard Biesheuvel wrote:
> diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> index 4f8d677b73ee..93bca4d454d7 100644
> --- a/arch/arm64/include/asm/processor.h
> +++ b/arch/arm64/include/asm/processor.h
> @@ -172,7 +172,7 @@ struct thread_struct {
> unsigned long fault_code; /* ESR_EL1 value */
> struct debug_info debug; /* debugging */
>
> - struct user_fpsimd_state kernel_fpsimd_state;
> + struct user_fpsimd_state *kernel_fpsimd_state;
Is TIF_KERNEL_FPSTATE redundant with kernel_fpsimd_state != NULL? If
so, should we have both?
> +void kernel_neon_begin(struct user_fpsimd_state *s)
> {
> if (WARN_ON(!system_supports_fpsimd()))
> return;
> @@ -1866,8 +1869,16 @@ void kernel_neon_begin(void)
> * mode in task context. So in this case, setting the flag here
> * is always appropriate.
> */
> - if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
> + if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
> + /*
> + * Record the caller provided buffer as the kernel mode
> + * FP/SIMD buffer for this task, so that the state can
> + * be preserved and restored on a context switch.
> + */
> + if (cmpxchg(¤t->thread.kernel_fpsimd_state, NULL, s))
> + BUG();
Does this really need to be a cmpxchg, vs. a regular load and store?
It's just operating on current.
> + else if (test_and_clear_thread_flag(TIF_KERNEL_FPSTATE))
> + if (cmpxchg(¤t->thread.kernel_fpsimd_state, s, NULL) != s)
> + BUG();
Likewise.
- Eric
On Fri, 3 Oct 2025 at 22:18, Eric Biggers <ebiggers@kernel.org> wrote:
>
> On Wed, Oct 01, 2025 at 11:02:22PM +0200, Ard Biesheuvel wrote:
> > diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
> > index 4f8d677b73ee..93bca4d454d7 100644
> > --- a/arch/arm64/include/asm/processor.h
> > +++ b/arch/arm64/include/asm/processor.h
> > @@ -172,7 +172,7 @@ struct thread_struct {
> > unsigned long fault_code; /* ESR_EL1 value */
> > struct debug_info debug; /* debugging */
> >
> > - struct user_fpsimd_state kernel_fpsimd_state;
> > + struct user_fpsimd_state *kernel_fpsimd_state;
>
> Is TIF_KERNEL_FPSTATE redundant with kernel_fpsimd_state != NULL?
Not entirely.
The generic kernel_fpu_begin/end API that the amdgpu driver uses
cannot straight-forwardly use a stack buffer, given how the API is
implemented. Since this code already disables preemption when using
the FPU, and should not assume being able to use kernel mode FP in
softirq context, I think we'll end up doing something like the
following for arm64
void kernel_fpu_begin(void)
{
preempt_disable();
local_bh_disable();
kernel_neon_begin(NULL);
}
etc, as it never actually needs this buffer to begin with.
Technically, setting TIF_KERNEL_FPSTATE is not necessary when no
scheduling or softirq interruptions may be expected, but I still think
it is better to keep these separate.
> > +void kernel_neon_begin(struct user_fpsimd_state *s)
> > {
> > if (WARN_ON(!system_supports_fpsimd()))
> > return;
> > @@ -1866,8 +1869,16 @@ void kernel_neon_begin(void)
> > * mode in task context. So in this case, setting the flag here
> > * is always appropriate.
> > */
> > - if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq())
> > + if (IS_ENABLED(CONFIG_PREEMPT_RT) || !in_serving_softirq()) {
> > + /*
> > + * Record the caller provided buffer as the kernel mode
> > + * FP/SIMD buffer for this task, so that the state can
> > + * be preserved and restored on a context switch.
> > + */
> > + if (cmpxchg(¤t->thread.kernel_fpsimd_state, NULL, s))
> > + BUG();
>
> Does this really need to be a cmpxchg, vs. a regular load and store?
> It's just operating on current.
>
It does not need to be atomic, no. I moved this around a bit while I
was working on it, but in its current form, we can just BUG/WARN on
the old value being wrong, and set the new one using an ordinary store
(in both cases).
On Wed, Oct 01, 2025 at 11:02:22PM +0200, Ard Biesheuvel wrote:
> [...]
> diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
> index d9f83c478736..7ddb25df5c98 100644
> --- a/arch/arm64/include/asm/simd.h
> +++ b/arch/arm64/include/asm/simd.h
> @@ -43,8 +43,11 @@ static __must_check inline bool may_use_simd(void) {
>
> #endif /* ! CONFIG_KERNEL_MODE_NEON */
>
> -DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
> +DEFINE_LOCK_GUARD_1(ksimd,
> + struct user_fpsimd_state,
> + kernel_neon_begin(_T->lock),
> + kernel_neon_end(_T->lock))
>
> -#define scoped_ksimd() scoped_guard(ksimd)
> +#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
I love it!
> [...]
> -void kernel_neon_end(void)
> +void kernel_neon_end(struct user_fpsimd_state *s)
> {
> if (!system_supports_fpsimd())
> return;
> @@ -1899,8 +1910,9 @@ void kernel_neon_end(void)
> if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
> test_thread_flag(TIF_KERNEL_FPSTATE))
> fpsimd_load_kernel_state(current);
> - else
> - clear_thread_flag(TIF_KERNEL_FPSTATE);
> + else if (test_and_clear_thread_flag(TIF_KERNEL_FPSTATE))
> + if (cmpxchg(¤t->thread.kernel_fpsimd_state, s, NULL) != s)
> + BUG();
I always question BUG() uses -- is there a recoverable way to deal with
a mismatch here? I assume not and that this is the best we can do, but I
thought I'd just explicitly ask. :)
-Kees
--
Kees Cook
On Thu, 2 Oct 2025 at 18:22, Kees Cook <kees@kernel.org> wrote:
>
> On Wed, Oct 01, 2025 at 11:02:22PM +0200, Ard Biesheuvel wrote:
> > [...]
> > diff --git a/arch/arm64/include/asm/simd.h b/arch/arm64/include/asm/simd.h
> > index d9f83c478736..7ddb25df5c98 100644
> > --- a/arch/arm64/include/asm/simd.h
> > +++ b/arch/arm64/include/asm/simd.h
> > @@ -43,8 +43,11 @@ static __must_check inline bool may_use_simd(void) {
> >
> > #endif /* ! CONFIG_KERNEL_MODE_NEON */
> >
> > -DEFINE_LOCK_GUARD_0(ksimd, kernel_neon_begin(), kernel_neon_end())
> > +DEFINE_LOCK_GUARD_1(ksimd,
> > + struct user_fpsimd_state,
> > + kernel_neon_begin(_T->lock),
> > + kernel_neon_end(_T->lock))
> >
> > -#define scoped_ksimd() scoped_guard(ksimd)
> > +#define scoped_ksimd() scoped_guard(ksimd, &(struct user_fpsimd_state){})
>
> I love it!
>
> > [...]
> > -void kernel_neon_end(void)
> > +void kernel_neon_end(struct user_fpsimd_state *s)
> > {
> > if (!system_supports_fpsimd())
> > return;
> > @@ -1899,8 +1910,9 @@ void kernel_neon_end(void)
> > if (!IS_ENABLED(CONFIG_PREEMPT_RT) && in_serving_softirq() &&
> > test_thread_flag(TIF_KERNEL_FPSTATE))
> > fpsimd_load_kernel_state(current);
> > - else
> > - clear_thread_flag(TIF_KERNEL_FPSTATE);
> > + else if (test_and_clear_thread_flag(TIF_KERNEL_FPSTATE))
> > + if (cmpxchg(¤t->thread.kernel_fpsimd_state, s, NULL) != s)
> > + BUG();
>
> I always question BUG() uses -- is there a recoverable way to deal with
> a mismatch here? I assume not and that this is the best we can do, but I
> thought I'd just explicitly ask. :)
>
So this fires when kernel_neon_end() passes a different buffer than
the preceding kernel_neon_begin(), but the only purpose of passing it
twice is this particular check.
So perhaps this one can be demoted to a WARN() instead. The preceding
one will fire if kernel_neon_begin() is called and the recorded buffer
pointer is still set from a previous NEON block. Perhaps the same
applies there too: the warning is about something that already
happened, and we can actually proceed as usual.
TL;DR yes let's WARN() instead in both cases.
© 2016 - 2026 Red Hat, Inc.