[v1] x86/fpu: Correct pkru/xstate inconsistency

[PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Brian Geffon 4 years, 4 months ago

When eagerly switching PKRU in switch_fpu_finish() it checks that
current is not a kernel thread as kernel threads will never use PKRU.
It's possible that this_cpu_read_stable() on current_task
(ie. get_current()) is returning an old cached value. To resolve this
reference next_p directly rather than relying on current.

As written it's possible when switching from a kernel thread to a
userspace thread to observe a cached PF_KTHREAD flag and never restore
the PKRU. And as a result this issue only occurs when switching
from a kernel thread to a userspace thread, switching from a non kernel
thread works perfectly fine because all that is considered in that
situation are the flags from some other non kernel task and the next fpu
is passed in to switch_fpu_finish().

This behavior only exists between 5.2 and 5.13 when it was fixed by a
rewrite decoupling PKRU from xstate, in:
  commit 954436989cc5 ("x86/fpu: Remove PKRU handling from switch_fpu_finish()")

Unfortunately backporting the fix from 5.13 is probably not realistic as
it's part of a 60+ patch series which rewrites most of the PKRU handling.

Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state")
Signed-off-by: Brian Geffon <bgeffon@google.com>
Signed-off-by: Willis Kung <williskung@google.com>
Tested-by: Willis Kung <williskung@google.com>
Cc: <stable@vger.kernel.org> # v5.4.x
Cc: <stable@vger.kernel.org> # v5.10.x
---
 arch/x86/include/asm/fpu/internal.h | 13 ++++++++-----
 arch/x86/kernel/process_32.c        |  6 ++----
 arch/x86/kernel/process_64.c        |  6 ++----
 3 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h
index 03b3de491b5e..5ed702e2c55f 100644
--- a/arch/x86/include/asm/fpu/internal.h
+++ b/arch/x86/include/asm/fpu/internal.h
@@ -560,9 +560,11 @@ static inline void __fpregs_load_activate(void)
  * The FPU context is only stored/restored for a user task and
  * PF_KTHREAD is used to distinguish between kernel and user threads.
  */
-static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
+static inline void switch_fpu_prepare(struct task_struct *prev, int cpu)
 {
-	if (static_cpu_has(X86_FEATURE_FPU) && !(current->flags & PF_KTHREAD)) {
+	struct fpu *old_fpu = &prev->thread.fpu;
+
+	if (static_cpu_has(X86_FEATURE_FPU) && !(prev->flags & PF_KTHREAD)) {
 		if (!copy_fpregs_to_fpstate(old_fpu))
 			old_fpu->last_cpu = -1;
 		else
@@ -581,10 +583,11 @@ static inline void switch_fpu_prepare(struct fpu *old_fpu, int cpu)
  * Load PKRU from the FPU context if available. Delay loading of the
  * complete FPU state until the return to userland.
  */
-static inline void switch_fpu_finish(struct fpu *new_fpu)
+static inline void switch_fpu_finish(struct task_struct *next)
 {
 	u32 pkru_val = init_pkru_value;
 	struct pkru_state *pk;
+	struct fpu *next_fpu = &next->thread.fpu;
 
 	if (!static_cpu_has(X86_FEATURE_FPU))
 		return;
@@ -598,7 +601,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
 	 * PKRU state is switched eagerly because it needs to be valid before we
 	 * return to userland e.g. for a copy_to_user() operation.
 	 */
-	if (!(current->flags & PF_KTHREAD)) {
+	if (!(next->flags & PF_KTHREAD)) {
 		/*
 		 * If the PKRU bit in xsave.header.xfeatures is not set,
 		 * then the PKRU component was in init state, which means
@@ -607,7 +610,7 @@ static inline void switch_fpu_finish(struct fpu *new_fpu)
 		 * in memory is not valid. This means pkru_val has to be
 		 * set to 0 and not to init_pkru_value.
 		 */
-		pk = get_xsave_addr(&new_fpu->state.xsave, XFEATURE_PKRU);
+		pk = get_xsave_addr(&next_fpu->state.xsave, XFEATURE_PKRU);
 		pkru_val = pk ? pk->pkru : 0;
 	}
 	__write_pkru(pkru_val);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index b8ceec4974fe..352f876950ab 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -229,14 +229,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread,
 			     *next = &next_p->thread;
-	struct fpu *prev_fpu = &prev->fpu;
-	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
 
 	/* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
 
 	if (!test_thread_flag(TIF_NEED_FPU_LOAD))
-		switch_fpu_prepare(prev_fpu, cpu);
+		switch_fpu_prepare(prev_p, cpu);
 
 	/*
 	 * Save away %gs. No need to save %fs, as it was saved on the
@@ -292,7 +290,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 
 	this_cpu_write(current_task, next_p);
 
-	switch_fpu_finish(next_fpu);
+	switch_fpu_finish(next_p);
 
 	/* Load the Intel cache allocation PQR MSR. */
 	resctrl_sched_in();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index da3cc3a10d63..633788362906 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -505,15 +505,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 {
 	struct thread_struct *prev = &prev_p->thread;
 	struct thread_struct *next = &next_p->thread;
-	struct fpu *prev_fpu = &prev->fpu;
-	struct fpu *next_fpu = &next->fpu;
 	int cpu = smp_processor_id();
 
 	WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
 		     this_cpu_read(irq_count) != -1);
 
 	if (!test_thread_flag(TIF_NEED_FPU_LOAD))
-		switch_fpu_prepare(prev_fpu, cpu);
+		switch_fpu_prepare(prev_p, cpu);
 
 	/* We must save %fs and %gs before load_TLS() because
 	 * %fs and %gs may be cleared by load_TLS().
@@ -565,7 +563,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 	this_cpu_write(current_task, next_p);
 	this_cpu_write(cpu_current_top_of_stack, task_top_of_stack(next_p));
 
-	switch_fpu_finish(next_fpu);
+	switch_fpu_finish(next_p);
 
 	/* Reload sp0. */
 	update_task_stack(next_p);
-- 
2.35.1.265.g69c8d7142f-goog

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Greg KH 4 years, 4 months ago

On Tue, Feb 15, 2022 at 11:22:33AM -0800, Brian Geffon wrote:
> When eagerly switching PKRU in switch_fpu_finish() it checks that
> current is not a kernel thread as kernel threads will never use PKRU.
> It's possible that this_cpu_read_stable() on current_task
> (ie. get_current()) is returning an old cached value. To resolve this
> reference next_p directly rather than relying on current.
> 
> As written it's possible when switching from a kernel thread to a
> userspace thread to observe a cached PF_KTHREAD flag and never restore
> the PKRU. And as a result this issue only occurs when switching
> from a kernel thread to a userspace thread, switching from a non kernel
> thread works perfectly fine because all that is considered in that
> situation are the flags from some other non kernel task and the next fpu
> is passed in to switch_fpu_finish().
> 
> This behavior only exists between 5.2 and 5.13 when it was fixed by a
> rewrite decoupling PKRU from xstate, in:
>   commit 954436989cc5 ("x86/fpu: Remove PKRU handling from switch_fpu_finish()")
> 
> Unfortunately backporting the fix from 5.13 is probably not realistic as
> it's part of a 60+ patch series which rewrites most of the PKRU handling.
> 
> Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state")
> Signed-off-by: Brian Geffon <bgeffon@google.com>
> Signed-off-by: Willis Kung <williskung@google.com>
> Tested-by: Willis Kung <williskung@google.com>
> Cc: <stable@vger.kernel.org> # v5.4.x
> Cc: <stable@vger.kernel.org> # v5.10.x
> ---
>  arch/x86/include/asm/fpu/internal.h | 13 ++++++++-----
>  arch/x86/kernel/process_32.c        |  6 ++----
>  arch/x86/kernel/process_64.c        |  6 ++----
>  3 files changed, 12 insertions(+), 13 deletions(-)

So this is ONLY for 5.4.y and 5.10.y?  I'm really really loath to take
non-upstream changes as 95% of the time (really) it goes wrong.

How was this tested, and what do the maintainers of this subsystem
think?  And will you be around to fix the bugs in this when they are
found?

And finally, what's wrong with 60+ patches to backport to fix a severe
issue?  What's preventing that from happening?  Did you try it and see
what exactly is involved?

thanks,

greg k-h

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Brian Geffon 4 years, 4 months ago

On Tue, Feb 15, 2022 at 2:45 PM Greg KH <gregkh@linuxfoundation.org> wrote:
>
> On Tue, Feb 15, 2022 at 11:22:33AM -0800, Brian Geffon wrote:
> > When eagerly switching PKRU in switch_fpu_finish() it checks that
> > current is not a kernel thread as kernel threads will never use PKRU.
> > It's possible that this_cpu_read_stable() on current_task
> > (ie. get_current()) is returning an old cached value. To resolve this
> > reference next_p directly rather than relying on current.
> >
> > As written it's possible when switching from a kernel thread to a
> > userspace thread to observe a cached PF_KTHREAD flag and never restore
> > the PKRU. And as a result this issue only occurs when switching
> > from a kernel thread to a userspace thread, switching from a non kernel
> > thread works perfectly fine because all that is considered in that
> > situation are the flags from some other non kernel task and the next fpu
> > is passed in to switch_fpu_finish().
> >
> > This behavior only exists between 5.2 and 5.13 when it was fixed by a
> > rewrite decoupling PKRU from xstate, in:
> >   commit 954436989cc5 ("x86/fpu: Remove PKRU handling from switch_fpu_finish()")
> >
> > Unfortunately backporting the fix from 5.13 is probably not realistic as
> > it's part of a 60+ patch series which rewrites most of the PKRU handling.
> >
> > Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state")
> > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > Signed-off-by: Willis Kung <williskung@google.com>
> > Tested-by: Willis Kung <williskung@google.com>
> > Cc: <stable@vger.kernel.org> # v5.4.x
> > Cc: <stable@vger.kernel.org> # v5.10.x
> > ---
> >  arch/x86/include/asm/fpu/internal.h | 13 ++++++++-----
> >  arch/x86/kernel/process_32.c        |  6 ++----
> >  arch/x86/kernel/process_64.c        |  6 ++----
> >  3 files changed, 12 insertions(+), 13 deletions(-)
>
> So this is ONLY for 5.4.y and 5.10.y?  I'm really really loath to take
> non-upstream changes as 95% of the time (really) it goes wrong.

That's correct, this bug was introduced in 5.2 and that code was
completely refactored in 5.13 indirectly fixing it.

>
> How was this tested, and what do the maintainers of this subsystem
> think?  And will you be around to fix the bugs in this when they are
> found?

This has been trivial to reproduce, I've used a small repro which I've
put here: https://gist.github.com/bgaff/9f8cbfc8dd22e60f9492e4f0aff8f04f
, I also was able to reproduce this using the protection_keys self
tests on a 11th Gen Core i5-1135G7. I'm happy to commit to addressing
any bugs that may appear. I'll see what the maintainers say, but there
is also a smaller fix that just involves using this_cpu_read() in
switch_fpu_finish() for this specific issue, although that approach
isn't as clean.

>
> And finally, what's wrong with 60+ patches to backport to fix a severe
> issue?  What's preventing that from happening?  Did you try it and see
> what exactly is involved?

It was quite a substantial rewrite of that code with fixes layered on since.

>
> thanks,
>
> greg k-h

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Dave Hansen 4 years, 4 months ago

On 2/15/22 13:32, Brian Geffon wrote:
>> How was this tested, and what do the maintainers of this subsystem
>> think?  And will you be around to fix the bugs in this when they are
>> found?
> This has been trivial to reproduce, I've used a small repro which I've
> put here: https://gist.github.com/bgaff/9f8cbfc8dd22e60f9492e4f0aff8f04f
> , I also was able to reproduce this using the protection_keys self
> tests on a 11th Gen Core i5-1135G7. 

I've got an i7-1165G7, but I'm not seeing any failures on a
5.11 distro kernel.

How long does this take for you to reproduce?

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Brian Geffon 4 years, 4 months ago

On Tue, Feb 15, 2022 at 4:42 PM Dave Hansen <dave.hansen@intel.com> wrote:
>
> On 2/15/22 13:32, Brian Geffon wrote:
> >> How was this tested, and what do the maintainers of this subsystem
> >> think?  And will you be around to fix the bugs in this when they are
> >> found?
> > This has been trivial to reproduce, I've used a small repro which I've
> > put here: https://gist.github.com/bgaff/9f8cbfc8dd22e60f9492e4f0aff8f04f
> > , I also was able to reproduce this using the protection_keys self
> > tests on a 11th Gen Core i5-1135G7.
>
> I've got an i7-1165G7, but I'm not seeing any failures on a
> 5.11 distro kernel.
>
> How long does this take for you to reproduce?

It reproduces for me in just a few seconds. I'm not sure what could be
different about my setup, I've tested on a vanilla 5.10 kernel and it
reproduced the same way.

Brian

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Brian Geffon 4 years, 4 months ago

On Tue, Feb 15, 2022 at 4:42 PM Dave Hansen <dave.hansen@intel.com> wrote:
>
> On 2/15/22 13:32, Brian Geffon wrote:
> >> How was this tested, and what do the maintainers of this subsystem
> >> think?  And will you be around to fix the bugs in this when they are
> >> found?
> > This has been trivial to reproduce, I've used a small repro which I've
> > put here: https://gist.github.com/bgaff/9f8cbfc8dd22e60f9492e4f0aff8f04f
> > , I also was able to reproduce this using the protection_keys self
> > tests on a 11th Gen Core i5-1135G7.
>
> I've got an i7-1165G7, but I'm not seeing any failures on a
> 5.11 distro kernel.
>

Hi Dave,
I suspect the reason you're not seeing it is toolchain related, I'm
building with clang 14.0.0 and it produces the sequence of
instructions which use the cached value. Let me know if there is
anything I can do to help you investigate this further.

Brian

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Greg KH 4 years, 4 months ago

On Tue, Feb 15, 2022 at 09:01:54PM -0500, Brian Geffon wrote:
> On Tue, Feb 15, 2022 at 4:42 PM Dave Hansen <dave.hansen@intel.com> wrote:
> >
> > On 2/15/22 13:32, Brian Geffon wrote:
> > >> How was this tested, and what do the maintainers of this subsystem
> > >> think?  And will you be around to fix the bugs in this when they are
> > >> found?
> > > This has been trivial to reproduce, I've used a small repro which I've
> > > put here: https://gist.github.com/bgaff/9f8cbfc8dd22e60f9492e4f0aff8f04f
> > > , I also was able to reproduce this using the protection_keys self
> > > tests on a 11th Gen Core i5-1135G7.
> >
> > I've got an i7-1165G7, but I'm not seeing any failures on a
> > 5.11 distro kernel.
> >
> 
> Hi Dave,
> I suspect the reason you're not seeing it is toolchain related, I'm
> building with clang 14.0.0 and it produces the sequence of
> instructions which use the cached value. Let me know if there is
> anything I can do to help you investigate this further.

Do older versions of clang have this problem?

thanks,

greg k-h

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Greg KH 4 years, 4 months ago

On Tue, Feb 15, 2022 at 04:32:48PM -0500, Brian Geffon wrote:
> On Tue, Feb 15, 2022 at 2:45 PM Greg KH <gregkh@linuxfoundation.org> wrote:
> >
> > On Tue, Feb 15, 2022 at 11:22:33AM -0800, Brian Geffon wrote:
> > > When eagerly switching PKRU in switch_fpu_finish() it checks that
> > > current is not a kernel thread as kernel threads will never use PKRU.
> > > It's possible that this_cpu_read_stable() on current_task
> > > (ie. get_current()) is returning an old cached value. To resolve this
> > > reference next_p directly rather than relying on current.
> > >
> > > As written it's possible when switching from a kernel thread to a
> > > userspace thread to observe a cached PF_KTHREAD flag and never restore
> > > the PKRU. And as a result this issue only occurs when switching
> > > from a kernel thread to a userspace thread, switching from a non kernel
> > > thread works perfectly fine because all that is considered in that
> > > situation are the flags from some other non kernel task and the next fpu
> > > is passed in to switch_fpu_finish().
> > >
> > > This behavior only exists between 5.2 and 5.13 when it was fixed by a
> > > rewrite decoupling PKRU from xstate, in:
> > >   commit 954436989cc5 ("x86/fpu: Remove PKRU handling from switch_fpu_finish()")
> > >
> > > Unfortunately backporting the fix from 5.13 is probably not realistic as
> > > it's part of a 60+ patch series which rewrites most of the PKRU handling.
> > >
> > > Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state")
> > > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > > Signed-off-by: Willis Kung <williskung@google.com>
> > > Tested-by: Willis Kung <williskung@google.com>
> > > Cc: <stable@vger.kernel.org> # v5.4.x
> > > Cc: <stable@vger.kernel.org> # v5.10.x
> > > ---
> > >  arch/x86/include/asm/fpu/internal.h | 13 ++++++++-----
> > >  arch/x86/kernel/process_32.c        |  6 ++----
> > >  arch/x86/kernel/process_64.c        |  6 ++----
> > >  3 files changed, 12 insertions(+), 13 deletions(-)
> >
> > So this is ONLY for 5.4.y and 5.10.y?  I'm really really loath to take
> > non-upstream changes as 95% of the time (really) it goes wrong.
> 
> That's correct, this bug was introduced in 5.2 and that code was
> completely refactored in 5.13 indirectly fixing it.

What series of commits contain that work?

And again, why not just take them?  What is wrong with that if this is
such a big issue?

> > How was this tested, and what do the maintainers of this subsystem
> > think?  And will you be around to fix the bugs in this when they are
> > found?
> 
> This has been trivial to reproduce, I've used a small repro which I've
> put here: https://gist.github.com/bgaff/9f8cbfc8dd22e60f9492e4f0aff8f04f
> , I also was able to reproduce this using the protection_keys self
> tests on a 11th Gen Core i5-1135G7. I'm happy to commit to addressing
> any bugs that may appear. I'll see what the maintainers say, but there
> is also a smaller fix that just involves using this_cpu_read() in
> switch_fpu_finish() for this specific issue, although that approach
> isn't as clean.

Can you add the test to the in-kernel tests so that we make sure it is
fixed and never comes back?

thanks,

greg k-h

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Brian Geffon 4 years, 4 months ago

On Wed, Feb 16, 2022 at 5:05 AM Greg KH <gregkh@linuxfoundation.org> wrote:
>
> On Tue, Feb 15, 2022 at 04:32:48PM -0500, Brian Geffon wrote:
> > On Tue, Feb 15, 2022 at 2:45 PM Greg KH <gregkh@linuxfoundation.org> wrote:
> > >
> > > On Tue, Feb 15, 2022 at 11:22:33AM -0800, Brian Geffon wrote:
> > > > When eagerly switching PKRU in switch_fpu_finish() it checks that
> > > > current is not a kernel thread as kernel threads will never use PKRU.
> > > > It's possible that this_cpu_read_stable() on current_task
> > > > (ie. get_current()) is returning an old cached value. To resolve this
> > > > reference next_p directly rather than relying on current.
> > > >
> > > > As written it's possible when switching from a kernel thread to a
> > > > userspace thread to observe a cached PF_KTHREAD flag and never restore
> > > > the PKRU. And as a result this issue only occurs when switching
> > > > from a kernel thread to a userspace thread, switching from a non kernel
> > > > thread works perfectly fine because all that is considered in that
> > > > situation are the flags from some other non kernel task and the next fpu
> > > > is passed in to switch_fpu_finish().
> > > >
> > > > This behavior only exists between 5.2 and 5.13 when it was fixed by a
> > > > rewrite decoupling PKRU from xstate, in:
> > > >   commit 954436989cc5 ("x86/fpu: Remove PKRU handling from switch_fpu_finish()")
> > > >
> > > > Unfortunately backporting the fix from 5.13 is probably not realistic as
> > > > it's part of a 60+ patch series which rewrites most of the PKRU handling.
> > > >
> > > > Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state")
> > > > Signed-off-by: Brian Geffon <bgeffon@google.com>
> > > > Signed-off-by: Willis Kung <williskung@google.com>
> > > > Tested-by: Willis Kung <williskung@google.com>
> > > > Cc: <stable@vger.kernel.org> # v5.4.x
> > > > Cc: <stable@vger.kernel.org> # v5.10.x
> > > > ---
> > > >  arch/x86/include/asm/fpu/internal.h | 13 ++++++++-----
> > > >  arch/x86/kernel/process_32.c        |  6 ++----
> > > >  arch/x86/kernel/process_64.c        |  6 ++----
> > > >  3 files changed, 12 insertions(+), 13 deletions(-)
> > >
> > > So this is ONLY for 5.4.y and 5.10.y?  I'm really really loath to take
> > > non-upstream changes as 95% of the time (really) it goes wrong.
> >
> > That's correct, this bug was introduced in 5.2 and that code was
> > completely refactored in 5.13 indirectly fixing it.
>

Hi Greg,

> What series of commits contain that work?

This is the series,
https://lore.kernel.org/all/20210623120127.327154589@linutronix.de/,
it does quite a bit of cleanup to correct the larger problem of having
pkru merged into xstate.

> And again, why not just take them?  What is wrong with that if this is
> such a big issue?

Anything is possible I suppose but looking through the series it seems
that it's not going to apply cleanly so we're going to end up with
something that, if we made it work, would look very different and
would touch a much larger cross section of code. If the concern here
is risk of things going wrong, attempting to pull back or cherry-pick
parts of this series seems riskier. However, if we don't attempt to
pull back all those patches I will likely be proposing at least one
more small patch for 5.4 and 5.10 to fix PKRU in these kernels because
right now it's broken, particularly on AMD processors as Dave
mentioned.

>
> > > How was this tested, and what do the maintainers of this subsystem
> > > think?  And will you be around to fix the bugs in this when they are
> > > found?
> >
> > This has been trivial to reproduce, I've used a small repro which I've
> > put here: https://gist.github.com/bgaff/9f8cbfc8dd22e60f9492e4f0aff8f04f
> > , I also was able to reproduce this using the protection_keys self
> > tests on a 11th Gen Core i5-1135G7. I'm happy to commit to addressing
> > any bugs that may appear. I'll see what the maintainers say, but there
> > is also a smaller fix that just involves using this_cpu_read() in
> > switch_fpu_finish() for this specific issue, although that approach
> > isn't as clean.
>
> Can you add the test to the in-kernel tests so that we make sure it is
> fixed and never comes back?

I'm already able to reproduce it with the kernel selftests. To be
honest, I'm not sure why this hasn't been reported yet. I could be
doing something horribly wrong. But it seems the likely reason is that
my compiler is doing what it's allowed to do, which is optimize the
load of current_task. current -> get_current() ->
this_cpu_read_stable(current_task) is allowed to read a cached value.
Perhaps gcc is just not taking advantage of that optimization, I'm not
sure. But writing to current_task and then immediately reading it back
via this_cpu_read_stable() can be problematic for this reason, and the
disassembled code shows that this happening.

Brian

>
> thanks,
>
> greg k-h

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Dave Hansen 4 years, 4 months ago

On 2/16/22 02:05, Greg KH wrote:
>>> How was this tested, and what do the maintainers of this subsystem
>>> think?  And will you be around to fix the bugs in this when they are
>>> found?
>> This has been trivial to reproduce, I've used a small repro which I've
>> put here: https://gist.github.com/bgaff/9f8cbfc8dd22e60f9492e4f0aff8f04f
>> , I also was able to reproduce this using the protection_keys self
>> tests on a 11th Gen Core i5-1135G7. I'm happy to commit to addressing
>> any bugs that may appear. I'll see what the maintainers say, but there
>> is also a smaller fix that just involves using this_cpu_read() in
>> switch_fpu_finish() for this specific issue, although that approach
>> isn't as clean.
> Can you add the test to the in-kernel tests so that we make sure it is
> fixed and never comes back?

It would be great if Brian could confirm this.  But, I'm 99% sure that
this can be reproduced in the vm/protection_keys.c selftest, if you run
it for long enough.

The symptom here is corruption of the PKRU register.  I created *lots*
of bugs like this during protection keys development so the selftest
keeps a shadow copy of the register to specifically watch for corruption.

It's _plausible_ that no one ever ran the pkey selftests with a
clang-compiled kernel for long enough to hit this issue.

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Brian Geffon 4 years, 4 months ago

On Wed, Feb 16, 2022 at 10:16 AM Dave Hansen <dave.hansen@intel.com> wrote:
>
> On 2/16/22 02:05, Greg KH wrote:
> >>> How was this tested, and what do the maintainers of this subsystem
> >>> think?  And will you be around to fix the bugs in this when they are
> >>> found?
> >> This has been trivial to reproduce, I've used a small repro which I've
> >> put here: https://gist.github.com/bgaff/9f8cbfc8dd22e60f9492e4f0aff8f04f
> >> , I also was able to reproduce this using the protection_keys self
> >> tests on a 11th Gen Core i5-1135G7. I'm happy to commit to addressing
> >> any bugs that may appear. I'll see what the maintainers say, but there
> >> is also a smaller fix that just involves using this_cpu_read() in
> >> switch_fpu_finish() for this specific issue, although that approach
> >> isn't as clean.
> > Can you add the test to the in-kernel tests so that we make sure it is
> > fixed and never comes back?
>
> It would be great if Brian could confirm this.  But, I'm 99% sure that
> this can be reproduced in the vm/protection_keys.c selftest, if you run
> it for long enough.

Hi Dave,
Yes, this is reproduced by the protection keys selfs tests. If your
kernel was compiled in a way which caches current_task when read via
this_cpu_read_stable(), then when switching from a kernel thread to a
user thread you will observe this behavior, so the only situation
where it's timing related is waiting for that switch from a kernel to
user thread. If your kernel is compiled in a way which does not cache
the value of current_task then you will never observe this behavior.
My understanding is that this is perfectly valid for the compiler to
produce that code.

>
> The symptom here is corruption of the PKRU register.  I created *lots*
> of bugs like this during protection keys development so the selftest
> keeps a shadow copy of the register to specifically watch for corruption.
>
> It's _plausible_ that no one ever ran the pkey selftests with a
> clang-compiled kernel for long enough to hit this issue.

For ChromeOS we use clang and when I tested a vanilla 5.10 kernel
_built with clang_ it also reproduced, so I suspect you're right that
the self tests just haven't been run against clang built kernels all
that often.

How would you and Greg KH like to proceed with this? I'm happy to help
however I can.

Brian

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Dave Hansen 4 years, 4 months ago

On 2/17/22 05:31, Brian Geffon wrote:
> How would you and Greg KH like to proceed with this? I'm happy to help
> however I can.

If I could wave a magic wand, I'd just apply the whole FPU rewrite to
stable.

My second choice would be to stop managing PKRU with XSAVE.
x86_pkru_load() uses WRPKRU instead of XSAVE and keeps the task's PKRU
in task->pkru instead of the XSAVE buffer.  Doing that will take some
care, including pulling XFEATURE_PKRU out of the feature mask (RFBM) at
XRSTOR.  I _think_ that can be done in a manageable set of patches which
 will keep stable close to mainline.  I recognize that more bugs might
get introduced in the process which are unique to stable.

If you give that a shot and realize that it's not feasible to do a
subset, then we can fall back to the minimal fix.  I'm not asking for a
multi-month engineering effort here.  Maybe an hour or two to see if
it's really as scary as it looks.

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Brian Geffon 4 years, 4 months ago

On Thu, Feb 17, 2022 at 11:44 AM Dave Hansen <dave.hansen@intel.com> wrote:
>
> On 2/17/22 05:31, Brian Geffon wrote:
> > How would you and Greg KH like to proceed with this? I'm happy to help
> > however I can.
>
> If I could wave a magic wand, I'd just apply the whole FPU rewrite to
> stable.
>
> My second choice would be to stop managing PKRU with XSAVE.
> x86_pkru_load() uses WRPKRU instead of XSAVE and keeps the task's PKRU
> in task->pkru instead of the XSAVE buffer.  Doing that will take some
> care, including pulling XFEATURE_PKRU out of the feature mask (RFBM) at
> XRSTOR.  I _think_ that can be done in a manageable set of patches which
>  will keep stable close to mainline.  I recognize that more bugs might
> get introduced in the process which are unique to stable.

Hi Dave,
I did take some time to look through that series and pick out what I
think is the minimum set that would pull out PKRU from xstate, that
list is:

   9782a712eb  x86/fpu: Add PKRU storage outside of task XSAVE buffer
   784a46618f   x86/pkeys: Move read_pkru() and write_pkru()
   ff7ebff47c59  x86/pkru: Provide pkru_write_default()
   739e2eec0f   x86/pkru: Provide pkru_get_init_value()
   fa8c84b77a   x86/cpu: Write the default PKRU value when enabling PKE
   72a6c08c44  x86/pkru: Remove xstate fiddling from write_pkru()
   2ebe81c6d8  x86/fpu: Dont restore PKRU in fpregs_restore_userspace()
   71ef453355   x86/kvm: Avoid looking up PKRU in XSAVE buffer
   954436989c  x86/fpu: Remove PKRU handling from switch_fpu_finish()
   e84ba47e31  x86/fpu: Hook up PKRU into ptrace()
   30a304a138  x86/fpu: Mask PKRU from kernel XRSTOR[S] operations
   0e8c54f6b2c  x86/fpu: Don't store PKRU in xstate in fpu_reset_fpstate()

The majority of these don't apply cleanly to 5.4, and there are some
other patches we'd have to pull back too that moved code and some of
the those won't be needed for 5.10 though. TBH, I'm not sure it makes
sense to try to do this just given the fact that most do not cleanly
apply.

Brian

>
> If you give that a shot and realize that it's not feasible to do a
> subset, then we can fall back to the minimal fix.  I'm not asking for a
> multi-month engineering effort here.  Maybe an hour or two to see if
> it's really as scary as it looks.

Re: [PATCH stable 5.4,5.10] x86/fpu: Correct pkru/xstate inconsistency

Posted by Dave Hansen 4 years, 4 months ago

On 2/15/22 11:22, Brian Geffon wrote:
> When eagerly switching PKRU in switch_fpu_finish() it checks that
> current is not a kernel thread as kernel threads will never use PKRU.
> It's possible that this_cpu_read_stable() on current_task
> (ie. get_current()) is returning an old cached value. To resolve this
> reference next_p directly rather than relying on current.
> 
> As written it's possible when switching from a kernel thread to a
> userspace thread to observe a cached PF_KTHREAD flag and never restore
> the PKRU. And as a result this issue only occurs when switching
> from a kernel thread to a userspace thread, switching from a non kernel
> thread works perfectly fine because all that is considered in that
> situation are the flags from some other non kernel task and the next fpu
> is passed in to switch_fpu_finish().
> 
> This behavior only exists between 5.2 and 5.13 when it was fixed by a
> rewrite decoupling PKRU from xstate, in:
>   commit 954436989cc5 ("x86/fpu: Remove PKRU handling from switch_fpu_finish()")
> 
> Unfortunately backporting the fix from 5.13 is probably not realistic as
> it's part of a 60+ patch series which rewrites most of the PKRU handling.
> 
> Fixes: 0cecca9d03c9 ("x86/fpu: Eager switch PKRU state")
> Signed-off-by: Brian Geffon <bgeffon@google.com>
> Signed-off-by: Willis Kung <williskung@google.com>
> Tested-by: Willis Kung <williskung@google.com>
> Cc: <stable@vger.kernel.org> # v5.4.x
> Cc: <stable@vger.kernel.org> # v5.10.x

I don't like forking the stable code from mainline.  But I also think
that backporting the FPU reworking that changed the PKRU handling is
likely to cause more bugs in stable than it fixes.

This fix is at least isolated to the protection keys code.

Acked-by: Dave Hansen <dave.hansen@linux.intel.com>