[PATCH 3/3, v4] x86/fpu: Remove init_task FPU state dependencies, add debugging warning for PF_KTHREAD tasks

Ingo Molnar posted 1 patch 1 year, 6 months ago
arch/x86/include/asm/processor.h |  6 +++++-
arch/x86/kernel/fpu/core.c       | 13 ++++++++++---
arch/x86/kernel/fpu/init.c       |  5 ++---
arch/x86/kernel/fpu/xstate.c     |  3 ---
arch/x86/kernel/vmlinux.lds.S    |  4 ----
5 files changed, 17 insertions(+), 14 deletions(-)
[PATCH 3/3, v4] x86/fpu: Remove init_task FPU state dependencies, add debugging warning for PF_KTHREAD tasks
Posted by Ingo Molnar 1 year, 6 months ago

* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Wed, 5 Jun 2024 at 09:27, Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > Yes, but kernel_fpu_begin() never does save_fpregs_to_fpstate() if
> > current->flags & PF_KTHREAD ?
> 
> Ahh, and init_thread does have PF_KTHREAD.
> 
> Ok, looks fine to me, except I think the commit message should be cleared up.
> 
> The whole sentence about
> 
>  "But the init task isn't supposed to be using the FPU in any case ..."
> 
> is just simply not true.
>
> It should be more along the lines of "kernel threads don't need an FPU
> save area, because their FPU use is not preemptible or reentrant and
> they don't return to user space".

Indeed - I fixed & clarified the changelog accordingly - see the new patch 
further below.

I changed the debug check to test for PF_KTHREAD, and to return NULL:

+#ifdef CONFIG_X86_DEBUG_FPU
+struct fpu *x86_task_fpu(struct task_struct *task)
+{
+	if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
+		return NULL;
+
+	return (void *)task + sizeof(*task);
+}
+#endif

... and the NULL we return will likely crash & exit any kthreads attempting 
to use the FPU context area - which I think is preferable to returning 
invalid memory that may then be corrupted.

Hopefully this remains a hypothethical concern. :-)

Alternatively, this may be one of the very few cases where a BUG_ON() might 
be justified? This condition is not recoverable in any sane fashion IMO.

Thanks,

	Ingo

============================>
From: Ingo Molnar <mingo@kernel.org>
Date: Wed, 5 Jun 2024 10:35:57 +0200
Subject: [PATCH] x86/fpu: Remove init_task FPU state dependencies, add debugging warning for PF_KTHREAD tasks

init_task's FPU state initialization was a bit of a hack:

		__x86_init_fpu_begin = .;
		. = __x86_init_fpu_begin + 128*PAGE_SIZE;
		__x86_init_fpu_end = .;

But the init task isn't supposed to be using the FPU context
in any case, so remove the hack and add in some debug warnings.

As Linus noted in the discussion, the init task (and other
PF_KTHREAD tasks) *can* use the FPU via kernel_fpu_begin()/_end(),
but they don't need the context area because their FPU use is not
preemptible or reentrant, and they don't return to user-space.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Uros Bizjak <ubizjak@gmail.com>
Link: https://lore.kernel.org/r/20240605083557.2051480-4-mingo@kernel.org
---
 arch/x86/include/asm/processor.h |  6 +++++-
 arch/x86/kernel/fpu/core.c       | 13 ++++++++++---
 arch/x86/kernel/fpu/init.c       |  5 ++---
 arch/x86/kernel/fpu/xstate.c     |  3 ---
 arch/x86/kernel/vmlinux.lds.S    |  4 ----
 5 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 249c5fa20de4..ed8981866f4d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -504,7 +504,11 @@ struct thread_struct {
 #endif
 };
 
-#define x86_task_fpu(task) ((struct fpu *)((void *)task + sizeof(*task)))
+#ifdef CONFIG_X86_DEBUG_FPU
+extern struct fpu *x86_task_fpu(struct task_struct *task);
+#else
+# define x86_task_fpu(task) ((struct fpu *)((void *)task + sizeof(*task)))
+#endif
 
 /*
  * X86 doesn't need any embedded-FPU-struct quirks:
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 0ccabcd3bf62..d9ff8ca5b32d 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -51,6 +51,16 @@ static DEFINE_PER_CPU(bool, in_kernel_fpu);
  */
 DEFINE_PER_CPU(struct fpu *, fpu_fpregs_owner_ctx);
 
+#ifdef CONFIG_X86_DEBUG_FPU
+struct fpu *x86_task_fpu(struct task_struct *task)
+{
+	if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
+		return NULL;
+
+	return (void *)task + sizeof(*task);
+}
+#endif
+
 /*
  * Can we use the FPU in kernel mode with the
  * whole "kernel_fpu_begin/end()" sequence?
@@ -591,10 +601,8 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
 	 * This is safe because task_struct size is a multiple of cacheline size.
 	 */
 	struct fpu *dst_fpu = (void *)dst + sizeof(*dst);
-	struct fpu *src_fpu = x86_task_fpu(current);
 
 	BUILD_BUG_ON(sizeof(*dst) % SMP_CACHE_BYTES != 0);
-	BUG_ON(!src_fpu);
 
 	/* The new task's FPU state cannot be valid in the hardware. */
 	dst_fpu->last_cpu = -1;
@@ -657,7 +665,6 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
 	if (update_fpu_shstk(dst, ssp))
 		return 1;
 
-	trace_x86_fpu_copy_src(src_fpu);
 	trace_x86_fpu_copy_dst(dst_fpu);
 
 	return 0;
diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c
index 11aa31410df2..53580e59e5db 100644
--- a/arch/x86/kernel/fpu/init.c
+++ b/arch/x86/kernel/fpu/init.c
@@ -38,7 +38,7 @@ static void fpu__init_cpu_generic(void)
 	/* Flush out any pending x87 state: */
 #ifdef CONFIG_MATH_EMULATION
 	if (!boot_cpu_has(X86_FEATURE_FPU))
-		fpstate_init_soft(&x86_task_fpu(current)->fpstate->regs.soft);
+		;
 	else
 #endif
 		asm volatile ("fninit");
@@ -164,7 +164,7 @@ static void __init fpu__init_task_struct_size(void)
 	 * Subtract off the static size of the register state.
 	 * It potentially has a bunch of padding.
 	 */
-	task_size -= sizeof(x86_task_fpu(current)->__fpstate.regs);
+	task_size -= sizeof(union fpregs_state);
 
 	/*
 	 * Add back the dynamically-calculated register state
@@ -209,7 +209,6 @@ static void __init fpu__init_system_xstate_size_legacy(void)
 	fpu_kernel_cfg.default_size = size;
 	fpu_user_cfg.max_size = size;
 	fpu_user_cfg.default_size = size;
-	fpstate_reset(x86_task_fpu(current));
 }
 
 /*
diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c
index 90b11671e943..1f37da22ddbe 100644
--- a/arch/x86/kernel/fpu/xstate.c
+++ b/arch/x86/kernel/fpu/xstate.c
@@ -844,9 +844,6 @@ void __init fpu__init_system_xstate(unsigned int legacy_size)
 	if (err)
 		goto out_disable;
 
-	/* Reset the state for the current task */
-	fpstate_reset(x86_task_fpu(current));
-
 	/*
 	 * Update info used for ptrace frames; use standard-format size and no
 	 * supervisor xstates:
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 226244a894da..3509afc6a672 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -170,10 +170,6 @@ SECTIONS
 		/* equivalent to task_pt_regs(&init_task) */
 		__top_init_kernel_stack = __end_init_stack - TOP_OF_KERNEL_STACK_PADDING - PTREGS_SIZE;
 
-		__x86_init_fpu_begin = .;
-		. = __x86_init_fpu_begin + 128*PAGE_SIZE;
-		__x86_init_fpu_end = .;
-
 #ifdef CONFIG_X86_32
 		/* 32 bit has nosave before _edata */
 		NOSAVE_DATA
Re: [PATCH 3/3, v4] x86/fpu: Remove init_task FPU state dependencies, add debugging warning for PF_KTHREAD tasks
Posted by Oleg Nesterov 1 year, 6 months ago
On 06/06, Ingo Molnar wrote:
>
> I changed the debug check to test for PF_KTHREAD, and to return NULL:
>
> +#ifdef CONFIG_X86_DEBUG_FPU
> +struct fpu *x86_task_fpu(struct task_struct *task)
> +{
> +	if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
> +		return NULL;
> +
> +	return (void *)task + sizeof(*task);
> +}
> +#endif

How many users enable CONFIG_X86_DEBUG_FPU? Perhaps it makes sense
to check PF_KTHREAD unconditionally for the start, them add
if (IS_ENABLED(X86_DEBUG_FPU)). But I won't insist.


For the record, I think we can later change this code to check

	task->flags & (PF_KTHREAD | PF_USER_WORKER)

but I guess this needs some (simple) changes in the ptrace/coredump
paths.

Oleg.
Re: [PATCH 3/3, v4] x86/fpu: Remove init_task FPU state dependencies, add debugging warning for PF_KTHREAD tasks
Posted by Ingo Molnar 1 year, 6 months ago
* Oleg Nesterov <oleg@redhat.com> wrote:

> On 06/06, Ingo Molnar wrote:
> >
> > I changed the debug check to test for PF_KTHREAD, and to return NULL:
> >
> > +#ifdef CONFIG_X86_DEBUG_FPU
> > +struct fpu *x86_task_fpu(struct task_struct *task)
> > +{
> > +	if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
> > +		return NULL;
> > +
> > +	return (void *)task + sizeof(*task);
> > +}
> > +#endif
> 
> How many users enable CONFIG_X86_DEBUG_FPU?

Ubuntu does:

  kepler:~/tip> grep X86_DEBUG_FPU /boot/config-6.*
  /boot/config-6.8.0-35-generic:CONFIG_X86_DEBUG_FPU=y

Fedora doesn't:

  kepler:~/s/tmp> grep X86_DEBUG_FPU ./lib/modules/6.9.0-64.fc41.x86_64/config
  # CONFIG_X86_DEBUG_FPU is not set

So it's a bit of a hit and miss, but at least one major distribution does, 
which is all that we need really.

> [...] Perhaps it makes sense to check PF_KTHREAD unconditionally for the 
> start, them add if (IS_ENABLED(X86_DEBUG_FPU)). But I won't insist.

I think Ubuntu ought to add enough debug coverage - and this check isn't 
exactly cheap, given how it replaces a simple build time structure offset 
with a full-blown function call ...

> For the record, I think we can later change this code to check
> 
> 	task->flags & (PF_KTHREAD | PF_USER_WORKER)
> 
> but I guess this needs some (simple) changes in the ptrace/coredump
> paths.

Sounds useful, would you be interested in cooking up a series on top of 
tip:master (or tip:WIP.x86/fpu), if you are interested and have the time? 

I'll send out one last iteration of this series today, otherwise the 
changes seem close to final for an upstream merge via the tip:x86/fpu tree.

Thanks,

	Ingo
Re: [PATCH 3/3, v4] x86/fpu: Remove init_task FPU state dependencies, add debugging warning for PF_KTHREAD tasks
Posted by Ingo Molnar 1 year, 6 months ago
* Ingo Molnar <mingo@kernel.org> wrote:

> I changed the debug check to test for PF_KTHREAD, and to return NULL:
> 
> +#ifdef CONFIG_X86_DEBUG_FPU
> +struct fpu *x86_task_fpu(struct task_struct *task)
> +{
> +	if (WARN_ON_ONCE(task->flags & PF_KTHREAD))
> +		return NULL;
> +
> +	return (void *)task + sizeof(*task);
> +}
> +#endif
> 
> ... and the NULL we return will likely crash & exit any kthreads attempting 
> to use the FPU context area - which I think is preferable to returning 
> invalid memory that may then be corrupted.
> 
> Hopefully this remains a hypothethical concern. :-)
> 
> Alternatively, this may be one of the very few cases where a BUG_ON() might 
> be justified? This condition is not recoverable in any sane fashion IMO.

And promptly this triggered in live testing, because while kthreads do not 
use the FPU context area, the current fpu__drop() code does call 
x86_task_cpu() even for kthreads...

See the two new 4/3 and 5/3 patches in this thread I've sent that clean 
this up:

  [PATCH 4/3] x86/fpu: Push 'fpu' pointer calculation into the fpu__drop() call
  [PATCH 5/3] x86/fpu: Make sure x86_task_fpu() doesn't get called for PF_KTHREAD tasks during exit

I'll also reorder the patches to apply these fpu__drop() changes before 
adding the debug warning.

Thanks,

	Ingo
[PATCH 4/3] x86/fpu: Push 'fpu' pointer calculation into the fpu__drop() call
Posted by Ingo Molnar 1 year, 6 months ago
This encapsulates the fpu__drop() functionality better, and it
will also enable other changes that want to check a task for
PF_KTHREAD before calling x86_task_fpu().

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/include/asm/fpu/sched.h | 2 +-
 arch/x86/kernel/fpu/core.c       | 4 +++-
 arch/x86/kernel/process.c        | 3 +--
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/fpu/sched.h b/arch/x86/include/asm/fpu/sched.h
index 1feaa68b7567..5fd12634bcc4 100644
--- a/arch/x86/include/asm/fpu/sched.h
+++ b/arch/x86/include/asm/fpu/sched.h
@@ -10,7 +10,7 @@
 #include <asm/trace/fpu.h>
 
 extern void save_fpregs_to_fpstate(struct fpu *fpu);
-extern void fpu__drop(struct fpu *fpu);
+extern void fpu__drop(struct task_struct *tsk);
 extern int  fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
 		      unsigned long shstk_addr);
 extern void fpu_flush_thread(void);
diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index d9ff8ca5b32d..3223eb3dd09d 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -679,8 +679,10 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
  * a state-restore is coming: either an explicit one,
  * or a reschedule.
  */
-void fpu__drop(struct fpu *fpu)
+void fpu__drop(struct task_struct *tsk)
 {
+	struct fpu *fpu = x86_task_fpu(tsk);
+
 	preempt_disable();
 
 	if (fpu == x86_task_fpu(current)) {
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4184c085627e..0a24997c8cc6 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -114,7 +114,6 @@ void arch_release_task_struct(struct task_struct *tsk)
 void exit_thread(struct task_struct *tsk)
 {
 	struct thread_struct *t = &tsk->thread;
-	struct fpu *fpu = x86_task_fpu(tsk);
 
 	if (test_thread_flag(TIF_IO_BITMAP))
 		io_bitmap_exit(tsk);
@@ -122,7 +121,7 @@ void exit_thread(struct task_struct *tsk)
 	free_vm86(t);
 
 	shstk_free(tsk);
-	fpu__drop(fpu);
+	fpu__drop(tsk);
 }
 
 static int set_new_tls(struct task_struct *p, unsigned long tls)
[PATCH 5/3] x86/fpu: Make sure x86_task_fpu() doesn't get called for PF_KTHREAD tasks during exit
Posted by Ingo Molnar 1 year, 6 months ago
fpu__drop() calls x86_task_fpu() unconditionally, while the FPU context
area will not be present if it's the init task, and should not be in
use when it's some other type of kthread.

Return early for PF_KTHREAD tasks. The debug warning in x86_task_fpu()
will catch any kthreads attempting to use the FPU save area.

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/fpu/core.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c
index 3223eb3dd09d..db608afa686f 100644
--- a/arch/x86/kernel/fpu/core.c
+++ b/arch/x86/kernel/fpu/core.c
@@ -681,7 +681,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags, bool minimal,
  */
 void fpu__drop(struct task_struct *tsk)
 {
-	struct fpu *fpu = x86_task_fpu(tsk);
+	struct fpu *fpu;
+
+	/* PF_KTHREAD tasks do not use the FPU context area: */
+	if (tsk->flags & PF_KTHREAD)
+		return;
+
+	fpu = x86_task_fpu(tsk);
 
 	preempt_disable();