[PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature

Amit Shah posted 2 patches 3 weeks, 3 days ago
There is a newer version of this series
[PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Amit Shah 3 weeks, 3 days ago
From: Amit Shah <amit.shah@amd.com>

Remove explicit RET stuffing / filling on VMEXITs and context
switches on AMD CPUs with the ERAPS feature (Turin+).

With the Enhanced Return Address Prediction Security feature,  any
hardware TLB flush results in flushing of the RSB (aka RAP in AMD spec).
This guarantees an RSB flush across context switches.  The feature also
explicitly tags host and guest addresses - eliminating the need for
explicit flushing of the RSB on VMEXIT.

The BTC_NO feature in AMD CPUs ensures RET predictions do not speculate
from outside the RSB. Together, the BTC_NO and ERAPS features ensure no
flushing or stuffing of the RSB is necessary anymore.

Feature documented in AMD PPR 57238.

Signed-off-by: Amit Shah <amit.shah@amd.com>
---
 Documentation/admin-guide/hw-vuln/spectre.rst |  5 +--
 arch/x86/include/asm/cpufeatures.h            |  1 +
 arch/x86/include/asm/nospec-branch.h          | 11 ++++++
 arch/x86/kernel/cpu/bugs.c                    | 36 +++++++++++++------
 4 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index 132e0bc6007e..647c10c0307a 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -417,9 +417,10 @@ The possible values in this file are:
 
   - Return stack buffer (RSB) protection status:
 
-  =============   ===========================================
+  =============   ========================================================
   'RSB filling'   Protection of RSB on context switch enabled
-  =============   ===========================================
+  'ERAPS'         Hardware RSB flush on context switches + guest/host tags
+  =============   ========================================================
 
   - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status:
 
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 913fd3a7bac6..665032b12871 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -458,6 +458,7 @@
 #define X86_FEATURE_AUTOIBRS		(20*32+ 8) /* Automatic IBRS */
 #define X86_FEATURE_NO_SMM_CTL_MSR	(20*32+ 9) /* SMM_CTL MSR is not present */
 
+#define X86_FEATURE_ERAPS		(20*32+24) /* Enhanced RAP / RSB / RAS Security */
 #define X86_FEATURE_SBPB		(20*32+27) /* Selective Branch Prediction Barrier */
 #define X86_FEATURE_IBPB_BRTYPE		(20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
 #define X86_FEATURE_SRSO_NO		(20*32+29) /* CPU is not affected by SRSO */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 96b410b1d4e8..24d0fe5d5a8b 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -117,6 +117,17 @@
  * We define a CPP macro such that it can be used from both .S files and
  * inline assembly. It's possible to do a .macro and then include that
  * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ *
+ * AMD CPUs with the ERAPS feature may have a larger default RSB.  These CPUs
+ * use the default number of entries on a host, and can optionally (based on
+ * hypervisor setup) use 32 (old) or the new default in a guest.  The number
+ * of default entries is reflected in CPUID 8000_0021:EBX[23:16].
+ *
+ * With the ERAPS feature, RSB filling is not necessary anymore: the RSB is
+ * auto-cleared on a TLB flush (i.e. a context switch).  Adapting the value of
+ * RSB_CLEAR_LOOPS below for ERAPS would change it to a runtime variable
+ * instead of the current compile-time constant, so leave it as-is, as this
+ * works for both older CPUs, as well as newer ones with ERAPS.
  */
 
 #define RETPOLINE_THUNK_SIZE	32
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 47a01d4028f6..83b34a522dd7 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1828,9 +1828,6 @@ static void __init spectre_v2_select_mitigation(void)
 	 *    speculated return targets may come from the branch predictor,
 	 *    which could have a user-poisoned BTB or BHB entry.
 	 *
-	 *    AMD has it even worse: *all* returns are speculated from the BTB,
-	 *    regardless of the state of the RSB.
-	 *
 	 *    When IBRS or eIBRS is enabled, the "user -> kernel" attack
 	 *    scenario is mitigated by the IBRS branch prediction isolation
 	 *    properties, so the RSB buffer filling wouldn't be necessary to
@@ -1838,6 +1835,15 @@ static void __init spectre_v2_select_mitigation(void)
 	 *
 	 *    The "user -> user" attack scenario is mitigated by RSB filling.
 	 *
+	 *    AMD CPUs without the BTC_NO bit may speculate return targets
+	 *    from the BTB. CPUs with BTC_NO do not speculate return targets
+	 *    from the BTB, even on RSB underflow.
+	 *
+	 *    The ERAPS CPU feature (which implies the presence of BTC_NO)
+	 *    adds an RSB flush each time a TLB flush happens (i.e., on every
+	 *    context switch).  So, RSB filling is not necessary for this
+	 *    attack type with ERAPS present.
+	 *
 	 * 2) Poisoned RSB entry
 	 *
 	 *    If the 'next' in-kernel return stack is shorter than 'prev',
@@ -1848,17 +1854,24 @@ static void __init spectre_v2_select_mitigation(void)
 	 *    eIBRS.
 	 *
 	 *    The "user -> user" scenario, also known as SpectreBHB, requires
-	 *    RSB clearing.
+	 *    RSB clearing on processors without ERAPS.
 	 *
 	 * So to mitigate all cases, unconditionally fill RSB on context
-	 * switches.
-	 *
-	 * FIXME: Is this pointless for retbleed-affected AMD?
+	 * switches when ERAPS is not present.
 	 */
-	setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
-	pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
+	if (!boot_cpu_has(X86_FEATURE_ERAPS)) {
+		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+		pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
 
-	spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
+		/*
+		 * For guest -> host (or vice versa) RSB poisoning scenarios,
+		 * determine the mitigation mode here.  With ERAPS, RSB
+		 * entries are tagged as host or guest - ensuring that neither
+		 * the host nor the guest have to clear or fill RSB entries to
+		 * avoid poisoning, skip RSB filling at VMEXIT in that case.
+		 */
+		spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
+	}
 
 	/*
 	 * Retpoline protects the kernel, but doesn't protect firmware.  IBRS
@@ -2871,7 +2884,7 @@ static ssize_t spectre_v2_show_state(char *buf)
 	    spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
 		return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
 
-	return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
+	return sysfs_emit(buf, "%s%s%s%s%s%s%s%s%s\n",
 			  spectre_v2_strings[spectre_v2_enabled],
 			  ibpb_state(),
 			  boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
@@ -2879,6 +2892,7 @@ static ssize_t spectre_v2_show_state(char *buf)
 			  boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
 			  pbrsb_eibrs_state(),
 			  spectre_bhi_state(),
+			  boot_cpu_has(X86_FEATURE_ERAPS) ? "; ERAPS hardware RSB flush" : "",
 			  /* this should always be at the end */
 			  spectre_v2_module_string());
 }
-- 
2.47.0
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Dave Hansen 3 weeks, 3 days ago
On 10/31/24 08:39, Amit Shah wrote:
...
> With the Enhanced Return Address Prediction Security feature,  any
> hardware TLB flush results in flushing of the RSB (aka RAP in AMD spec).
> This guarantees an RSB flush across context switches. 

Check out the APM, volume 2: "5.5.1 Process Context Identifier"

	... when system software switches address spaces (by writing ...
	CR3[62:12]), the processor may use TLB mappings previously
	stored for that address space and PCID, providing that bit 63 of
	the source operand is set to 1.

tl;dr: PCIDs mean you don't necessarily flush the TLB on context switches.
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Shah, Amit 2 weeks, 6 days ago
On Thu, 2024-10-31 at 16:11 -0700, Dave Hansen wrote:
> On 10/31/24 08:39, Amit Shah wrote:
> ...
> > With the Enhanced Return Address Prediction Security feature,  any
> > hardware TLB flush results in flushing of the RSB (aka RAP in AMD
> > spec).
> > This guarantees an RSB flush across context switches. 
> 
> Check out the APM, volume 2: "5.5.1 Process Context Identifier"
> 
> 	... when system software switches address spaces (by writing
> ...
> 	CR3[62:12]), the processor may use TLB mappings previously
> 	stored for that address space and PCID, providing that bit
> 63 of
> 	the source operand is set to 1.
> 
> tl;dr: PCIDs mean you don't necessarily flush the TLB on context
> switches.

Right - thanks, I'll have to reword that to say the RSB is flushed
along with the TLB - so any action that causes the TLB to be flushed
will also cause the RSB to be flushed.
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Dave Hansen 2 weeks, 6 days ago
On 11/4/24 00:58, Shah, Amit wrote:
> Right - thanks, I'll have to reword that to say the RSB is flushed
> along with the TLB - so any action that causes the TLB to be flushed
> will also cause the RSB to be flushed.

Hold on though.

Is there a need for the RSB to be flushed at context switch?  You talked
about it like there was a need:

> any hardware TLB flush results in flushing of the RSB (aka RAP in
> AMD spec). This guarantees an RSB flush across context switches.
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Shah, Amit 2 weeks, 6 days ago
On Mon, 2024-11-04 at 08:11 -0800, Dave Hansen wrote:
> On 11/4/24 00:58, Shah, Amit wrote:
> > Right - thanks, I'll have to reword that to say the RSB is flushed
> > along with the TLB - so any action that causes the TLB to be
> > flushed
> > will also cause the RSB to be flushed.
> 
> Hold on though.
> 
> Is there a need for the RSB to be flushed at context switch?  You
> talked
> about it like there was a need:
> 
> > any hardware TLB flush results in flushing of the RSB (aka RAP in
> > AMD spec). This guarantees an RSB flush across context switches.

I want to justify that not setting X86_FEATURE_RSB_CTXSW is still doing
the right thing, albeit in hardware.
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Dave Hansen 2 weeks, 6 days ago
On 11/4/24 08:13, Shah, Amit wrote:
> I want to justify that not setting X86_FEATURE_RSB_CTXSW is still doing
> the right thing, albeit in hardware.

Let's back up a bit.

In the kernel, we have security concerns if RSB contents remain across
context switches.  If process A's RSB entries are left and then process
B uses them, there's a problem.

Today, we mitigate that issue with manual kernel RSB state zapping on
context switches (X86_FEATURE_RSB_CTXSW).

You're saying that this fancy new ERAPS feature includes a new mechanism
to zap RSB state.  But that only triggers "each time a TLB flush happens".

So what you're saying above is that you are concerned about RSB contents
sticking around across context switches.  But instead of using
X86_FEATURE_RSB_CTXSW, you believe that the new TLB-flush-triggered
ERAPS flush can be used instead.

Are we all on the same page so far?

I think you're wrong.  We can't depend on ERAPS for this.  Linux doesn't
flush the TLB on context switches when PCIDs are in play.  Thus, ERAPS
won't flush the RSB and will leave bad state in there and will leave the
system vulnerable.

Or what am I missing?
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Shah, Amit 2 weeks, 6 days ago
On Mon, 2024-11-04 at 08:26 -0800, Dave Hansen wrote:
> On 11/4/24 08:13, Shah, Amit wrote:
> > I want to justify that not setting X86_FEATURE_RSB_CTXSW is still
> > doing
> > the right thing, albeit in hardware.
> 
> Let's back up a bit.
> 
> In the kernel, we have security concerns if RSB contents remain
> across
> context switches.  If process A's RSB entries are left and then
> process
> B uses them, there's a problem.
> 
> Today, we mitigate that issue with manual kernel RSB state zapping on
> context switches (X86_FEATURE_RSB_CTXSW).
> 
> You're saying that this fancy new ERAPS feature includes a new
> mechanism
> to zap RSB state.  But that only triggers "each time a TLB flush
> happens".
> 
> So what you're saying above is that you are concerned about RSB
> contents
> sticking around across context switches.  But instead of using
> X86_FEATURE_RSB_CTXSW, you believe that the new TLB-flush-triggered
> ERAPS flush can be used instead.
> 
> Are we all on the same page so far?

All good so far.

> I think you're wrong.  We can't depend on ERAPS for this.  Linux
> doesn't
> flush the TLB on context switches when PCIDs are in play.  Thus,
> ERAPS
> won't flush the RSB and will leave bad state in there and will leave
> the
> system vulnerable.
> 
> Or what am I missing?

I just received confirmation from our hardware engineers on this too:

1. the RSB is flushed when CR3 is updated
2. the RSB is flushed when INVPCID is issued (except type 0 - single
address).

I didn't mention 1. so far, which led to your question, right?  Does
this now cover all the cases?

		Amit
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Dave Hansen 2 weeks, 6 days ago
On 11/4/24 09:22, Shah, Amit wrote:
>> I think you're wrong.  We can't depend on ERAPS for this.  Linux 
>> doesn't flush the TLB on context switches when PCIDs are in play.
>> Thus, ERAPS won't flush the RSB and will leave bad state in there
>> and will leave the system vulnerable.
>>
>> Or what am I missing?
> I just received confirmation from our hardware engineers on this too:
> 
> 1. the RSB is flushed when CR3 is updated
> 2. the RSB is flushed when INVPCID is issued (except type 0 - single
> address).
> 
> I didn't mention 1. so far, which led to your question, right?  

Not only did you not mention it, you said something _completely_
different.  So, where the documentation for this thing?  I dug through
the 57230 .zip file and I see the CPUID bit:

	24 ERAPS. Read-only. Reset: 1. Indicates support for enhanced
		  return address predictor security.

but nothing telling us how it works.

> Does this now cover all the cases?

Nope, it's worse than I thought.  Look at:

> SYM_FUNC_START(__switch_to_asm)
...
>         FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW

which does the RSB fill at the same time it switches RSP.

So we feel the need to flush the RSB on *ALL* task switches.  That
includes switches between threads in a process *AND* switches over to
kernel threads from user ones.

So, I'll flip this back around.  Today, X86_FEATURE_RSB_CTXSW zaps the
RSB whenever RSP is updated to a new task stack.  Please convince me
that ERAPS provides superior coverage or is unnecessary in all the
possible combinations switching between:

	different thread, same mm
	user=>kernel, same mm
	kernel=>user, same mm
	different mm (we already covered this)

Because several of those switches can happen without a CR3 write or INVPCID.
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Shah, Amit 2 weeks, 5 days ago
On Mon, 2024-11-04 at 09:45 -0800, Dave Hansen wrote:
> On 11/4/24 09:22, Shah, Amit wrote:
> > > I think you're wrong.  We can't depend on ERAPS for this.  Linux 
> > > doesn't flush the TLB on context switches when PCIDs are in play.
> > > Thus, ERAPS won't flush the RSB and will leave bad state in there
> > > and will leave the system vulnerable.
> > > 
> > > Or what am I missing?
> > I just received confirmation from our hardware engineers on this
> > too:
> > 
> > 1. the RSB is flushed when CR3 is updated
> > 2. the RSB is flushed when INVPCID is issued (except type 0 -
> > single
> > address).
> > 
> > I didn't mention 1. so far, which led to your question, right?  
> 
> Not only did you not mention it, you said something _completely_
> different.  So, where the documentation for this thing?  I dug
> through
> the 57230 .zip file and I see the CPUID bit:
> 
> 	24 ERAPS. Read-only. Reset: 1. Indicates support for
> enhanced
> 		  return address predictor security.
> 
> but nothing telling us how it works.

I'm expecting the APM update come out soon, but I have put together

https://amitshah.net/2024/11/eraps-reduces-software-tax-for-hardware-bugs/

based on information I have.  I think it's mostly consistent with what
I've said so far - with the exception of the mov-CR3 flush only
confirmed yesterday.

> > Does this now cover all the cases?
> 
> Nope, it's worse than I thought.  Look at:
> 
> > SYM_FUNC_START(__switch_to_asm)
> ...
> >         FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS,
> > X86_FEATURE_RSB_CTXSW
> 
> which does the RSB fill at the same time it switches RSP.
> 
> So we feel the need to flush the RSB on *ALL* task switches.  That
> includes switches between threads in a process *AND* switches over to
> kernel threads from user ones.

(since these cases are the same as those listed below, I'll only reply
in one place)

> So, I'll flip this back around.  Today, X86_FEATURE_RSB_CTXSW zaps
> the
> RSB whenever RSP is updated to a new task stack.  Please convince me
> that ERAPS provides superior coverage or is unnecessary in all the
> possible combinations switching between:
> 
> 	different thread, same mm

This case is the same userspace process with valid addresses in the RSB
for that process.  An invalid speculation isn't security sensitive,
just a misprediction that won't be retired.  So we are good here.

>	user=>kernel, same mm
>	kernel=>user, same mm

user-kernel is protected with SMEP.  Also, we don't call
FILL_RETURN_BUFFER for these switches?

> 	different mm (we already covered this)
> 
> Because several of those switches can happen without a CR3 write or
> INVPCID.


(that covers all of them IIRC)

		Amit
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Dave Hansen 2 weeks, 5 days ago
On 11/5/24 02:39, Shah, Amit wrote:
> On Mon, 2024-11-04 at 09:45 -0800, Dave Hansen wrote:
> I'm expecting the APM update come out soon, but I have put together
> 
> https://amitshah.net/2024/11/eraps-reduces-software-tax-for-hardware-bugs/
> 
> based on information I have.  I think it's mostly consistent with what
> I've said so far - with the exception of the mov-CR3 flush only
> confirmed yesterday.

That's better.  But your original cover letter did say:

	Feature documented in AMD PPR 57238.

which is technically true because the _bit_ is defined.  But it's far,
far from being sufficiently documented for Linux to actually use it.

Could we please be more careful about these in the future?

>> So, I'll flip this back around.  Today, X86_FEATURE_RSB_CTXSW zaps
>> the
>> RSB whenever RSP is updated to a new task stack.  Please convince me
>> that ERAPS provides superior coverage or is unnecessary in all the
>> possible combinations switching between:
>>
>> 	different thread, same mm
> 
> This case is the same userspace process with valid addresses in the RSB
> for that process.  An invalid speculation isn't security sensitive,
> just a misprediction that won't be retired.  So we are good here.

Does that match what the __switch_to_asm comment says, though?

>         /*
>          * When switching from a shallower to a deeper call stack
>          * the RSB may either underflow or use entries populated
>          * with userspace addresses. On CPUs where those concerns
>          * exist, overwrite the RSB with entries which capture
>          * speculative execution to prevent attack.
>          */

It is also talking just about call depth, not about same-address-space
RSB entries being harmless.  That's because this is also trying to avoid
having the kernel consume any user-placed RSB entries, regardless of
whether they're from the same mm or not.

>> 	user=>kernel, same mm
>> 	kernel=>user, same mm
> 
> user-kernel is protected with SMEP.  Also, we don't call
> FILL_RETURN_BUFFER for these switches?

Amit, I'm beginning to fear that you haven't gone and looked at the
relevant code here.  Please go look at SYM_FUNC_START(__switch_to_asm)
in arch/x86/entry/entry_64.S.  I believe this code is called for all
task switches, including switching from a user task to a kernel task.  I
also believe that FILL_RETURN_BUFFER is used unconditionally for every
__switch_to_asm call (when X86_FEATURE_RSB_CTXSW is on of course).

Could we please start over on this patch?

Let's get the ERAPS+TLB-flush nonsense out of the kernel and get the
commit message right.

Then let's go from there.
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Shah, Amit 2 weeks, 5 days ago
On Tue, 2024-11-05 at 08:19 -0800, Dave Hansen wrote:
> On 11/5/24 02:39, Shah, Amit wrote:
> > On Mon, 2024-11-04 at 09:45 -0800, Dave Hansen wrote:
> > I'm expecting the APM update come out soon, but I have put together
> > 
> > https://amitshah.net/2024/11/eraps-reduces-software-tax-for-hardware-bugs/
> > 
> > based on information I have.  I think it's mostly consistent with
> > what
> > I've said so far - with the exception of the mov-CR3 flush only
> > confirmed yesterday.
> 
> That's better.  But your original cover letter did say:
> 
> 	Feature documented in AMD PPR 57238.
> 
> which is technically true because the _bit_ is defined.  But it's
> far,
> far from being sufficiently documented for Linux to actually use it.

Yea; apologies.

> Could we please be more careful about these in the future?
> 
> > > So, I'll flip this back around.  Today, X86_FEATURE_RSB_CTXSW
> > > zaps
> > > the
> > > RSB whenever RSP is updated to a new task stack.  Please convince
> > > me
> > > that ERAPS provides superior coverage or is unnecessary in all
> > > the
> > > possible combinations switching between:
> > > 
> > > 	different thread, same mm
> > 
> > This case is the same userspace process with valid addresses in the
> > RSB
> > for that process.  An invalid speculation isn't security sensitive,
> > just a misprediction that won't be retired.  So we are good here.
> 
> Does that match what the __switch_to_asm comment says, though?
> 
> >         /*
> >          * When switching from a shallower to a deeper call stack
> >          * the RSB may either underflow or use entries populated
> >          * with userspace addresses. On CPUs where those concerns
> >          * exist, overwrite the RSB with entries which capture
> >          * speculative execution to prevent attack.
> >          */
> 
> It is also talking just about call depth, not about same-address-
> space
> RSB entries being harmless.  That's because this is also trying to
> avoid
> having the kernel consume any user-placed RSB entries, regardless of
> whether they're from the same mm or not.
> 
> > > 	user=>kernel, same mm
> > > 	kernel=>user, same mm
> > 
> > user-kernel is protected with SMEP.  Also, we don't call
> > FILL_RETURN_BUFFER for these switches?
> 
> Amit, I'm beginning to fear that you haven't gone and looked at the
> relevant code here.  Please go look at
> SYM_FUNC_START(__switch_to_asm)
> in arch/x86/entry/entry_64.S.  I believe this code is called for all
> task switches, including switching from a user task to a kernel
> task.  I
> also believe that FILL_RETURN_BUFFER is used unconditionally for
> every
> __switch_to_asm call (when X86_FEATURE_RSB_CTXSW is on of course).
> 
> Could we please start over on this patch?
> 
> Let's get the ERAPS+TLB-flush nonsense out of the kernel and get the
> commit message right.
> 
> Then let's go from there.

Alright - you've been really patient, so thanks for that.  I agree I'll
post a v2 with updated commit messages, and then continue this
discussion on user/kernel task switch.  And I'll also add an RFC tag to
it to ensure it doesn't get picked up.

		Amit
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Andrew Cooper 2 weeks, 5 days ago
> So, I'll flip this back around.  Today, X86_FEATURE_RSB_CTXSW zaps the
> RSB whenever RSP is updated to a new task stack.  Please convince me
> that ERAPS provides superior coverage or is unnecessary in all the
> possible combinations switching between:
>
> 	different thread, same mm
> 	user=>kernel, same mm
> 	kernel=>user, same mm
> 	different mm (we already covered this)
>
> Because several of those switches can happen without a CR3 write or INVPCID.

user=>kernel=>user, same mm explicitly does not want to flush the RAS,
because if the system call is shallow enough, some of the userspace RAS
is still intact on when you get back into user mode.

The case which I expect will go wrong is user=>kernel=>different kthread
because this stays on the same mm.

That does need to flush the RAS and won't hit any TLB maintenance
instructions that I'm aware of.

~Andrew
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Pawan Gupta 3 weeks, 3 days ago
On Thu, Oct 31, 2024 at 04:39:24PM +0100, Amit Shah wrote:
> From: Amit Shah <amit.shah@amd.com>
> 
> Remove explicit RET stuffing / filling on VMEXITs and context
> switches on AMD CPUs with the ERAPS feature (Turin+).
> 
> With the Enhanced Return Address Prediction Security feature,  any
> hardware TLB flush results in flushing of the RSB (aka RAP in AMD spec).
> This guarantees an RSB flush across context switches.

Is it that the mov to CR3 triggers the RSB flush?

> Feature documented in AMD PPR 57238.

I couldn't find ERAPS feature description here, I could only manage to find
the bit position:

24 	ERAPS. Read-only. Reset: 1. Indicates support for enhanced return
	address predictor security.

Could you please point me to the document/section where this is described?
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Shah, Amit 2 weeks, 6 days ago
On Thu, 2024-10-31 at 16:03 -0700, Pawan Gupta wrote:
> On Thu, Oct 31, 2024 at 04:39:24PM +0100, Amit Shah wrote:
> > From: Amit Shah <amit.shah@amd.com>
> > 
> > Remove explicit RET stuffing / filling on VMEXITs and context
> > switches on AMD CPUs with the ERAPS feature (Turin+).
> > 
> > With the Enhanced Return Address Prediction Security feature,  any
> > hardware TLB flush results in flushing of the RSB (aka RAP in AMD
> > spec).
> > This guarantees an RSB flush across context switches.
> 
> Is it that the mov to CR3 triggers the RSB flush?

The INVPCID instruction, that causes the TLB flush, is the trigger
here.

> > Feature documented in AMD PPR 57238.
> 
> I couldn't find ERAPS feature description here, I could only manage
> to find
> the bit position:
> 
> 24 	ERAPS. Read-only. Reset: 1. Indicates support for enhanced
> return
> 	address predictor security.
> 
> Could you please point me to the document/section where this is
> described?

Unfortunately, that's all we have right now in the official
documentation.

I've put up some notes in
https://amitshah.net/2024/11/eraps-reduces-software-tax-for-hardware-bugs/

Thanks,
		Amit
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Andrew Cooper 2 weeks, 6 days ago
> Unfortunately, that's all we have right now in the official
> documentation.
>
> I've put up some notes in
> https://amitshah.net/2024/11/eraps-reduces-software-tax-for-hardware-bugs/

I appreciate the attempt to get a few details out, but this is very
confused on bunch of details.

Most importantly, you've described Intel RSB underflows, but named it
AMD BTC.

"Retbleed" is two totally different things.   I begged the discoverers
to give it two names, and I also begged the x86 maintainers to not alias
them in Linux's view of the world, but alas.

AMD's BTC comes from a bad branch type prediction, and a late resteer
from the ret uop executing.   It has nothing to do with RAS/RSB
underflow conditions.

~Andrew
Re: [PATCH 1/2] x86: cpu/bugs: add support for AMD ERAPS feature
Posted by Shah, Amit 2 weeks, 6 days ago
On Mon, 2024-11-04 at 14:52 +0000, Andrew Cooper wrote:
> > Unfortunately, that's all we have right now in the official
> > documentation.
> > 
> > I've put up some notes in
> > https://amitshah.net/2024/11/eraps-reduces-software-tax-for-hardware-bugs/
> 
> I appreciate the attempt to get a few details out, but this is very
> confused on bunch of details.
> 
> Most importantly, you've described Intel RSB underflows, but named it
> AMD BTC.
> 
> "Retbleed" is two totally different things.   I begged the
> discoverers
> to give it two names, and I also begged the x86 maintainers to not
> alias
> them in Linux's view of the world, but alas.
> 
> AMD's BTC comes from a bad branch type prediction, and a late resteer
> from the ret uop executing.   It has nothing to do with RAS/RSB
> underflow conditions.

BTC indeed is only branch-type confusion.  The point I wanted to make
there is that to entirely get rid of X86_FEATURE_RSB_CTXW, I had to
confirm that AMD CPUs do not speculate return addresses from the BTB or
BHB since BTC was fixed.  (Or, in other words, to clarify the previous
comments there that said that AMD predicts from the BTB/BHB in every
case).

So - the only point in saying BTC_NO is relevant here is me confirming
that AMD is not going to speculate return addresses from outside of the
RSB. And that comment can now reflect reality.

		Amit
[tip: x86/cpu] x86/bugs: Add support for AMD ERAPS feature
Posted by tip-bot2 for Amit Shah 2 weeks, 6 days ago
The following commit has been merged into the x86/cpu branch of tip:

Commit-ID:     b5cbd5ff79a06395a17f8f524f6f8e90dcfe42d1
Gitweb:        https://git.kernel.org/tip/b5cbd5ff79a06395a17f8f524f6f8e90dcfe42d1
Author:        Amit Shah <amit.shah@amd.com>
AuthorDate:    Thu, 31 Oct 2024 16:39:24 +01:00
Committer:     Borislav Petkov (AMD) <bp@alien8.de>
CommitterDate: Mon, 04 Nov 2024 06:20:22 +01:00

x86/bugs: Add support for AMD ERAPS feature

Remove explicit RET stuffing / filling on VMEXITs and context switches
on AMD CPUs with the ERAPS feature (Zen5).

With the Enhanced Return Address Prediction Security feature,  any
hardware TLB flush results in flushing of the RSB (aka RAP in AMD spec).
This guarantees an RSB flush across context switches.  The feature also
explicitly tags host and guest addresses - eliminating the need for
explicit flushing of the RSB on VMEXIT.

The BTC_NO feature in AMD CPUs ensures RET predictions do not speculate
from outside the RSB. Together, the BTC_NO and ERAPS features ensure no
flushing or stuffing of the RSB is necessary anymore.

Feature documented in AMD PPR 57238.

  [ bp: Massage commit message. ]

Signed-off-by: Amit Shah <amit.shah@amd.com>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Link: https://lore.kernel.org/r/20241031153925.36216-2-amit@kernel.org
---
 Documentation/admin-guide/hw-vuln/spectre.rst |  5 +--
 arch/x86/include/asm/cpufeatures.h            |  1 +-
 arch/x86/include/asm/nospec-branch.h          | 11 ++++++-
 arch/x86/kernel/cpu/bugs.c                    | 36 ++++++++++++------
 4 files changed, 40 insertions(+), 13 deletions(-)

diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index 132e0bc..647c10c 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -417,9 +417,10 @@ The possible values in this file are:
 
   - Return stack buffer (RSB) protection status:
 
-  =============   ===========================================
+  =============   ========================================================
   'RSB filling'   Protection of RSB on context switch enabled
-  =============   ===========================================
+  'ERAPS'         Hardware RSB flush on context switches + guest/host tags
+  =============   ========================================================
 
   - EIBRS Post-barrier Return Stack Buffer (PBRSB) protection status:
 
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 05e985c..7f78212 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -457,6 +457,7 @@
 #define X86_FEATURE_AUTOIBRS		(20*32+ 8) /* Automatic IBRS */
 #define X86_FEATURE_NO_SMM_CTL_MSR	(20*32+ 9) /* SMM_CTL MSR is not present */
 
+#define X86_FEATURE_ERAPS		(20*32+24) /* Enhanced RAP / RSB / RAS Security */
 #define X86_FEATURE_SBPB		(20*32+27) /* Selective Branch Prediction Barrier */
 #define X86_FEATURE_IBPB_BRTYPE		(20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */
 #define X86_FEATURE_SRSO_NO		(20*32+29) /* CPU is not affected by SRSO */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index ff5f1ec..d7587b4 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -117,6 +117,17 @@
  * We define a CPP macro such that it can be used from both .S files and
  * inline assembly. It's possible to do a .macro and then include that
  * from C via asm(".include <asm/nospec-branch.h>") but let's not go there.
+ *
+ * AMD CPUs with the ERAPS feature may have a larger default RSB.  These CPUs
+ * use the default number of entries on a host, and can optionally (based on
+ * hypervisor setup) use 32 (old) or the new default in a guest.  The number
+ * of default entries is reflected in CPUID 8000_0021:EBX[23:16].
+ *
+ * With the ERAPS feature, RSB filling is not necessary anymore: the RSB is
+ * auto-cleared on a TLB flush (i.e. a context switch).  Adapting the value of
+ * RSB_CLEAR_LOOPS below for ERAPS would change it to a runtime variable
+ * instead of the current compile-time constant, so leave it as-is, as this
+ * works for both older CPUs, as well as newer ones with ERAPS.
  */
 
 #define RETPOLINE_THUNK_SIZE	32
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index d191542..3825779 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -1811,9 +1811,6 @@ static void __init spectre_v2_select_mitigation(void)
 	 *    speculated return targets may come from the branch predictor,
 	 *    which could have a user-poisoned BTB or BHB entry.
 	 *
-	 *    AMD has it even worse: *all* returns are speculated from the BTB,
-	 *    regardless of the state of the RSB.
-	 *
 	 *    When IBRS or eIBRS is enabled, the "user -> kernel" attack
 	 *    scenario is mitigated by the IBRS branch prediction isolation
 	 *    properties, so the RSB buffer filling wouldn't be necessary to
@@ -1821,6 +1818,15 @@ static void __init spectre_v2_select_mitigation(void)
 	 *
 	 *    The "user -> user" attack scenario is mitigated by RSB filling.
 	 *
+	 *    AMD CPUs without the BTC_NO bit may speculate return targets
+	 *    from the BTB. CPUs with BTC_NO do not speculate return targets
+	 *    from the BTB, even on RSB underflow.
+	 *
+	 *    The ERAPS CPU feature (which implies the presence of BTC_NO)
+	 *    adds an RSB flush each time a TLB flush happens (i.e., on every
+	 *    context switch).  So, RSB filling is not necessary for this
+	 *    attack type with ERAPS present.
+	 *
 	 * 2) Poisoned RSB entry
 	 *
 	 *    If the 'next' in-kernel return stack is shorter than 'prev',
@@ -1831,17 +1837,24 @@ static void __init spectre_v2_select_mitigation(void)
 	 *    eIBRS.
 	 *
 	 *    The "user -> user" scenario, also known as SpectreBHB, requires
-	 *    RSB clearing.
+	 *    RSB clearing on processors without ERAPS.
 	 *
 	 * So to mitigate all cases, unconditionally fill RSB on context
-	 * switches.
-	 *
-	 * FIXME: Is this pointless for retbleed-affected AMD?
+	 * switches when ERAPS is not present.
 	 */
-	setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
-	pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
+	if (!boot_cpu_has(X86_FEATURE_ERAPS)) {
+		setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
+		pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
 
-	spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
+		/*
+		 * For guest -> host (or vice versa) RSB poisoning scenarios,
+		 * determine the mitigation mode here.  With ERAPS, RSB
+		 * entries are tagged as host or guest - ensuring that neither
+		 * the host nor the guest have to clear or fill RSB entries to
+		 * avoid poisoning, skip RSB filling at VMEXIT in that case.
+		 */
+		spectre_v2_determine_rsb_fill_type_at_vmexit(mode);
+	}
 
 	/*
 	 * Retpoline protects the kernel, but doesn't protect firmware.  IBRS
@@ -2839,7 +2852,7 @@ static ssize_t spectre_v2_show_state(char *buf)
 	    spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
 		return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
 
-	return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n",
+	return sysfs_emit(buf, "%s%s%s%s%s%s%s%s%s\n",
 			  spectre_v2_strings[spectre_v2_enabled],
 			  ibpb_state(),
 			  boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "",
@@ -2847,6 +2860,7 @@ static ssize_t spectre_v2_show_state(char *buf)
 			  boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "",
 			  pbrsb_eibrs_state(),
 			  spectre_bhi_state(),
+			  boot_cpu_has(X86_FEATURE_ERAPS) ? "; ERAPS hardware RSB flush" : "",
 			  /* this should always be at the end */
 			  spectre_v2_module_string());
 }