Attack vector controls

[PATCH v4 03/36] x86/bugs: Restructure mmio mitigation

Posted by David Kaplan 11 months ago

Restructure mmio mitigation to use select/update/apply functions to
create consistent vulnerability handling.

Signed-off-by: David Kaplan <david.kaplan@amd.com>
---
 arch/x86/kernel/cpu/bugs.c | 77 +++++++++++++++++++++++++-------------
 1 file changed, 51 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 2fd58b7089c4..a727f7998bec 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -68,6 +68,8 @@ static void __init taa_select_mitigation(void);
 static void __init taa_update_mitigation(void);
 static void __init taa_apply_mitigation(void);
 static void __init mmio_select_mitigation(void);
+static void __init mmio_update_mitigation(void);
+static void __init mmio_apply_mitigation(void);
 static void __init srbds_select_mitigation(void);
 static void __init l1d_flush_select_mitigation(void);
 static void __init srso_select_mitigation(void);
@@ -194,6 +196,7 @@ void __init cpu_select_mitigations(void)
 	l1tf_select_mitigation();
 	mds_select_mitigation();
 	taa_select_mitigation();
+	mmio_select_mitigation();
 	md_clear_select_mitigation();
 	srbds_select_mitigation();
 	l1d_flush_select_mitigation();
@@ -211,9 +214,11 @@ void __init cpu_select_mitigations(void)
 	 */
 	mds_update_mitigation();
 	taa_update_mitigation();
+	mmio_update_mitigation();
 
 	mds_apply_mitigation();
 	taa_apply_mitigation();
+	mmio_apply_mitigation();
 }
 
 /*
@@ -511,24 +516,60 @@ static void __init mmio_select_mitigation(void)
 		return;
 	}
 
-	if (mmio_mitigation == MMIO_MITIGATION_OFF)
-		return;
+	/* Microcode will be checked in mmio_update_mitigation(). */
+	if (mmio_mitigation == MMIO_MITIGATION_AUTO)
+		mmio_mitigation = MMIO_MITIGATION_VERW;
 
 	/*
 	 * Enable CPU buffer clear mitigation for host and VMM, if also affected
-	 * by MDS or TAA. Otherwise, enable mitigation for VMM only.
+	 * by MDS or TAA.
 	 */
-	if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
-					      boot_cpu_has(X86_FEATURE_RTM)))
-		setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
+	if (boot_cpu_has_bug(X86_BUG_MDS) || taa_vulnerable())
+		verw_mitigation_selected = true;
+}
+
+static void __init mmio_update_mitigation(void)
+{
+	if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || cpu_mitigations_off())
+		return;
+
+	if (verw_mitigation_selected)
+		mmio_mitigation = MMIO_MITIGATION_VERW;
+
+	if (mmio_mitigation == MMIO_MITIGATION_VERW) {
+		/*
+		 * Check if the system has the right microcode.
+		 *
+		 * CPU Fill buffer clear mitigation is enumerated by either an explicit
+		 * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
+		 * affected systems.
+		 */
+		if (!((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
+		      (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
+		       boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
+		     !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))))
+			mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
+	}
+
+	if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
+		pr_info("Unknown: No mitigations\n");
+	else
+		pr_info("%s\n", mmio_strings[mmio_mitigation]);
+}
+
+static void __init mmio_apply_mitigation(void)
+{
+	if (mmio_mitigation == MMIO_MITIGATION_OFF)
+		return;
 
 	/*
-	 * X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based
-	 * mitigations, disable KVM-only mitigation in that case.
+	 * Only enable the VMM mitigation if the CPU buffer clear mitigation is
+	 * not being used.
 	 */
-	if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
+	if (verw_mitigation_selected) {
+		setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
 		static_branch_disable(&mmio_stale_data_clear);
-	else
+	} else
 		static_branch_enable(&mmio_stale_data_clear);
 
 	/*
@@ -539,21 +580,6 @@ static void __init mmio_select_mitigation(void)
 	if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO))
 		static_branch_enable(&mds_idle_clear);
 
-	/*
-	 * Check if the system has the right microcode.
-	 *
-	 * CPU Fill buffer clear mitigation is enumerated by either an explicit
-	 * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
-	 * affected systems.
-	 */
-	if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
-	    (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
-	     boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
-	     !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)))
-		mmio_mitigation = MMIO_MITIGATION_VERW;
-	else
-		mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
-
 	if (mmio_nosmt || cpu_mitigations_auto_nosmt())
 		cpu_smt_disable(false);
 }
@@ -676,7 +702,6 @@ static void __init md_clear_update_mitigation(void)
 
 static void __init md_clear_select_mitigation(void)
 {
-	mmio_select_mitigation();
 	rfds_select_mitigation();
 
 	/*
-- 
2.34.1

Re: [PATCH v4 03/36] x86/bugs: Restructure mmio mitigation

Posted by Borislav Petkov 11 months ago

On Mon, Mar 10, 2025 at 11:39:50AM -0500, David Kaplan wrote:
> @@ -511,24 +516,60 @@ static void __init mmio_select_mitigation(void)
>  		return;
>  	}
>  
> -	if (mmio_mitigation == MMIO_MITIGATION_OFF)
> -		return;
> +	/* Microcode will be checked in mmio_update_mitigation(). */
> +	if (mmio_mitigation == MMIO_MITIGATION_AUTO)
> +		mmio_mitigation = MMIO_MITIGATION_VERW;
>  
>  	/*
>  	 * Enable CPU buffer clear mitigation for host and VMM, if also affected
> -	 * by MDS or TAA. Otherwise, enable mitigation for VMM only.
> +	 * by MDS or TAA.
>  	 */
> -	if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
> -					      boot_cpu_has(X86_FEATURE_RTM)))
> -		setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
> +	if (boot_cpu_has_bug(X86_BUG_MDS) || taa_vulnerable())
> +		verw_mitigation_selected = true;
> +}

So applied this reads strange:

        if (mmio_mitigation == MMIO_MITIGATION_AUTO)
                mmio_mitigation = MMIO_MITIGATION_VERW;

        if (boot_cpu_has_bug(X86_BUG_MDS) || taa_vulnerable())
                verw_mitigation_selected = true;

I'd expect to see:

	if (mmio_mitigation == MMIO_MITIGATION_AUTO) {
                mmio_mitigation = MMIO_MITIGATION_VERW;
		verw_mitigation_selected = true;
	}

        if (boot_cpu_has_bug(X86_BUG_MDS) || taa_vulnerable())
                verw_mitigation_selected = true;

because the above branch already selected MMIO_MITIGATION_VERW so we might as
well set verw_mitigation_selected, right?

> +static void __init mmio_update_mitigation(void)
> +{
> +	if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || cpu_mitigations_off())
> +		return;
> +
> +	if (verw_mitigation_selected)
> +		mmio_mitigation = MMIO_MITIGATION_VERW;

... and the above change would obviate this one.

Looking at that verw_mitigation_selected switch - that seems like the higher
prio thing we should be looking at first as in: *something* selected VERW
mitigation so we must honor it.

And then the *_select_mitigation() functions will simply use the respective
*_mitigation variable to perhaps override it only when really needed.

I think.

Or maybe I'm missing an aspect.

Because if we make verw_mitigation_selected the higher prio thing, we can
remove some of that additional checking.

Or?

> +
> +	if (mmio_mitigation == MMIO_MITIGATION_VERW) {

I.e., in mmio_update_mitigation(), the only check you need to do is:

	if (verw_mitigation_selected)

and then adjust mmio_mitigation depending on microcode presence or not.

> +		/*
> +		 * Check if the system has the right microcode.
> +		 *
> +		 * CPU Fill buffer clear mitigation is enumerated by either an explicit
> +		 * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
> +		 * affected systems.
> +		 */
> +		if (!((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) ||
> +		      (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
> +		       boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
> +		     !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))))
> +			mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;

... as you do here.

> +	}
> +
> +	if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))

Btw, that UNKNOWN thing is just silly. Looking at git history:

7df548840c49 ("x86/bugs: Add "unknown" reporting for MMIO Stale Data")

this was added just so that it doesn't say "Not affected" about those CPUs but
"unknown."

But

  "Mitigation is not deployed when the status is unknown."

so if it is only about reporting, I think we can synthesize the logic of this:

        if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
                if (cpu_matches(cpu_vuln_blacklist, MMIO))
                        setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
                else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
                        setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
        }

into a separate function and get rid of that X86_BUG_MMIO_UNKNOWN thing.

Pawan?

I'll try to whack it later to see how ugly it gets.

> +		pr_info("Unknown: No mitigations\n");
> +	else
> +		pr_info("%s\n", mmio_strings[mmio_mitigation]);
> +}
> +
> +static void __init mmio_apply_mitigation(void)
> +{
> +	if (mmio_mitigation == MMIO_MITIGATION_OFF)
> +		return;
>  
>  	/*
> -	 * X86_FEATURE_CLEAR_CPU_BUF could be enabled by other VERW based
> -	 * mitigations, disable KVM-only mitigation in that case.
> +	 * Only enable the VMM mitigation if the CPU buffer clear mitigation is
> +	 * not being used.

So this comment doesn't fit with what the code now does...

>  	 */
> -	if (boot_cpu_has(X86_FEATURE_CLEAR_CPU_BUF))
> +	if (verw_mitigation_selected) {

... which is to check whether something enabled the VERW mitigation...

> +		setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF);
>  		static_branch_disable(&mmio_stale_data_clear);
> -	else
> +	} else
>  		static_branch_enable(&mmio_stale_data_clear);

{ } around the else branch too pls.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: [PATCH v4 03/36] x86/bugs: Restructure mmio mitigation

Posted by Pawan Gupta 11 months ago

On Thu, Mar 13, 2025 at 10:36:17AM +0100, Borislav Petkov wrote:
> I'd expect to see:
> 
> 	if (mmio_mitigation == MMIO_MITIGATION_AUTO) {
>                 mmio_mitigation = MMIO_MITIGATION_VERW;
> 		verw_mitigation_selected = true;
> 	}
> 
>         if (boot_cpu_has_bug(X86_BUG_MDS) || taa_vulnerable())
>                 verw_mitigation_selected = true;
> 
> because the above branch already selected MMIO_MITIGATION_VERW so we might as
> well set verw_mitigation_selected, right?

There is a subtle difference between setting verw_mitigation_selected and
MMIO_MITIGATION_VERW. The former is a system-wide switch that indicates
VERW is needed at both kernel-exit and VMenter. MMIO Stale Data is
different from other VERW based mitigations because it only requires VERW
at VMenter, when not affected by MDS/TAA. So, turning the system-wide knob
here would be wrong.

> > +static void __init mmio_update_mitigation(void)
> > +{
> > +	if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || cpu_mitigations_off())
> > +		return;
> > +
> > +	if (verw_mitigation_selected)
> > +		mmio_mitigation = MMIO_MITIGATION_VERW;
[...]
> > +	if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
> 
> Btw, that UNKNOWN thing is just silly. Looking at git history:
> 
> 7df548840c49 ("x86/bugs: Add "unknown" reporting for MMIO Stale Data")
> 
> this was added just so that it doesn't say "Not affected" about those CPUs but
> "unknown."
> 
> But
> 
>   "Mitigation is not deployed when the status is unknown."
> 
> so if it is only about reporting, I think we can synthesize the logic of this:
> 
>         if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
>                 if (cpu_matches(cpu_vuln_blacklist, MMIO))
>                         setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
>                 else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
>                         setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
>         }
> 
> into a separate function and get rid of that X86_BUG_MMIO_UNKNOWN thing.

Hmm, that would not be straightforward, specially for sysfs status. The
above logic requires parsing the cpu_vuln_whitelist which is not available
after init. Moreover, sysfs reads would become slower if it has to read an
MSR and parse tables.

Also, cpu_show_common() by default shows "Not affected" in the absence of
bug bit. So setting X86_BUG_MMIO_UNKNOWN is simpler overall.

cpu_show_common(bug)
{
	if (!boot_cpu_has_bug(bug))
		return sysfs_emit(buf, "Not affected\n");

Re: [PATCH v4 03/36] x86/bugs: Restructure mmio mitigation

Posted by Borislav Petkov 10 months, 3 weeks ago

On Thu, Mar 13, 2025 at 12:26:06PM -0700, Pawan Gupta wrote:
> Hmm, that would not be straightforward, specially for sysfs status.

See below:

- the unknown thing is done only for this vuln and not for the others

- it doesn't do anything besides reporting things differently - it doesn't
  apply any mitigations - it is simply causing unnecessary complications which
  don't bring anything besides maintenance overhead. Unless I'm missing an
  angle...

- all the currently unaffected CPUs can also be in "unknown" status so why is
  this special?

IOW, just whack the thing.

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 46935f29805c..75d77a5c28d7 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -518,7 +518,7 @@
 #define X86_BUG_ITLB_MULTIHIT		X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
 #define X86_BUG_SRBDS			X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
 #define X86_BUG_MMIO_STALE_DATA		X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_MMIO_UNKNOWN		X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+/* unused, was #define X86_BUG_MMIO_UNKNOWN		X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
 #define X86_BUG_RETBLEED		X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
 #define X86_BUG_EIBRS_PBRSB		X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
 #define X86_BUG_SMT_RSB			X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 4386aa6c69e1..a91a1cac6183 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -428,7 +428,6 @@ static const char * const mmio_strings[] = {
 static void __init mmio_select_mitigation(void)
 {
 	if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
-	     boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) ||
 	     cpu_mitigations_off()) {
 		mmio_mitigation = MMIO_MITIGATION_OFF;
 		return;
@@ -591,8 +590,6 @@ static void __init md_clear_update_mitigation(void)
 		pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
 	if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
 		pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
-	else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
-		pr_info("MMIO Stale Data: Unknown: No mitigations\n");
 	if (boot_cpu_has_bug(X86_BUG_RFDS))
 		pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]);
 }
@@ -2819,9 +2816,6 @@ static ssize_t tsx_async_abort_show_state(char *buf)
 
 static ssize_t mmio_stale_data_show_state(char *buf)
 {
-	if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
-		return sysfs_emit(buf, "Unknown: No mitigations\n");
-
 	if (mmio_mitigation == MMIO_MITIGATION_OFF)
 		return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
 
@@ -3006,7 +3000,6 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 		return srbds_show_state(buf);
 
 	case X86_BUG_MMIO_STALE_DATA:
-	case X86_BUG_MMIO_UNKNOWN:
 		return mmio_stale_data_show_state(buf);
 
 	case X86_BUG_RETBLEED:
@@ -3075,10 +3068,7 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *
 
 ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
 {
-	if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN))
-		return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN);
-	else
-		return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+	return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
 }
 
 ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 12126adbc3a9..4ada55f126ae 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1402,15 +1402,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
 	 * Affected CPU list is generally enough to enumerate the vulnerability,
 	 * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
 	 * not want the guest to enumerate the bug.
-	 *
-	 * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist,
-	 * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits.
 	 */
 	if (!arch_cap_mmio_immune(x86_arch_cap_msr)) {
 		if (cpu_matches(cpu_vuln_blacklist, MMIO))
 			setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
-		else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO))
-			setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN);
 	}
 
 	if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 9e3fa7942e7d..e88500d90309 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -508,7 +508,7 @@
 #define X86_BUG_ITLB_MULTIHIT		X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */
 #define X86_BUG_SRBDS			X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */
 #define X86_BUG_MMIO_STALE_DATA		X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */
-#define X86_BUG_MMIO_UNKNOWN		X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
+/* unused, was #define X86_BUG_MMIO_UNKNOWN		X86_BUG(26) * "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */
 #define X86_BUG_RETBLEED		X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */
 #define X86_BUG_EIBRS_PBRSB		X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */
 #define X86_BUG_SMT_RSB			X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: [PATCH v4 03/36] x86/bugs: Restructure mmio mitigation

Posted by Pawan Gupta 10 months, 2 weeks ago

On Mon, Mar 24, 2025 at 10:29:15AM +0100, Borislav Petkov wrote:
> On Thu, Mar 13, 2025 at 12:26:06PM -0700, Pawan Gupta wrote:
> > Hmm, that would not be straightforward, specially for sysfs status.
> 
> See below:
> 
> - the unknown thing is done only for this vuln and not for the others
> 
> - it doesn't do anything besides reporting things differently - it doesn't
>   apply any mitigations - it is simply causing unnecessary complications which
>   don't bring anything besides maintenance overhead. Unless I'm missing an
>   angle...

"Unknown" status reporting was requested by Andrew Cooper. I am not able to
find that conversation though. IIRC, the reason was out-of-service CPUs
were not tested for the presence of vulnerability. Adding Andrew to Cc.

> - all the currently unaffected CPUs can also be in "unknown" status so why is
>   this special?

Makes sense.

> IOW, just whack the thing.

I will let Andrew comment on this.

Re: [PATCH v4 03/36] x86/bugs: Restructure mmio mitigation

Posted by Borislav Petkov 10 months, 2 weeks ago

On Mon, Mar 24, 2025 at 10:41:15AM -0700, Pawan Gupta wrote:
> "Unknown" status reporting was requested by Andrew Cooper. I am not able to
> find that conversation though. IIRC, the reason was out-of-service CPUs
> were not tested for the presence of vulnerability. Adding Andrew to Cc.

Right, except we don't do that for other vulns... let's wait for him to
respond though...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

MMIO and VERW

Posted by Borislav Petkov 10 months, 3 weeks ago

Carving this thing out into a separate thread:

On Thu, Mar 13, 2025 at 12:26:06PM -0700, Pawan Gupta wrote:
> On Thu, Mar 13, 2025 at 10:36:17AM +0100, Borislav Petkov wrote:
> > I'd expect to see:
> > 
> > 	if (mmio_mitigation == MMIO_MITIGATION_AUTO) {
> >                 mmio_mitigation = MMIO_MITIGATION_VERW;
> > 		verw_mitigation_selected = true;
> > 	}
> > 
> >         if (boot_cpu_has_bug(X86_BUG_MDS) || taa_vulnerable())
> >                 verw_mitigation_selected = true;
> > 
> > because the above branch already selected MMIO_MITIGATION_VERW so we might as
> > well set verw_mitigation_selected, right?
> 
> There is a subtle difference between setting verw_mitigation_selected and
> MMIO_MITIGATION_VERW. The former is a system-wide switch that indicates
> VERW is needed at both kernel-exit and VMenter. MMIO Stale Data is
> different from other VERW based mitigations because it only requires VERW
> at VMenter, when not affected by MDS/TAA. So, turning the system-wide knob
> here would be wrong.

Realistically speaking, do we have a machine where you *only* enable VERW on
VMENTER?

I'm not talking about some experimentation scenario where one measures which
mitigations cost how much.

Do we have a real-life hw configuration where the *only* VERW mitigation
needed is at VMENTER because that machine is affected *only* by MMIO and no
other VERW-based mitigation is needed?

Because if not, we might as well drop that too-special distinction and
simplify that maze of nasty conditional spaghetti...

Hmmm?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: MMIO and VERW

Posted by Pawan Gupta 10 months, 3 weeks ago

On Tue, Mar 18, 2025 at 03:16:59PM +0100, Borislav Petkov wrote:
> Carving this thing out into a separate thread:
> 
> On Thu, Mar 13, 2025 at 12:26:06PM -0700, Pawan Gupta wrote:
> > On Thu, Mar 13, 2025 at 10:36:17AM +0100, Borislav Petkov wrote:
> > > I'd expect to see:
> > > 
> > > 	if (mmio_mitigation == MMIO_MITIGATION_AUTO) {
> > >                 mmio_mitigation = MMIO_MITIGATION_VERW;
> > > 		verw_mitigation_selected = true;
> > > 	}
> > > 
> > >         if (boot_cpu_has_bug(X86_BUG_MDS) || taa_vulnerable())
> > >                 verw_mitigation_selected = true;
> > > 
> > > because the above branch already selected MMIO_MITIGATION_VERW so we might as
> > > well set verw_mitigation_selected, right?
> > 
> > There is a subtle difference between setting verw_mitigation_selected and
> > MMIO_MITIGATION_VERW. The former is a system-wide switch that indicates
> > VERW is needed at both kernel-exit and VMenter. MMIO Stale Data is
> > different from other VERW based mitigations because it only requires VERW
> > at VMenter, when not affected by MDS/TAA. So, turning the system-wide knob
> > here would be wrong.
> 
> Realistically speaking, do we have a machine where you *only* enable VERW on
> VMENTER?

Yes, more on it below.

> I'm not talking about some experimentation scenario where one measures which
> mitigations cost how much.
> 
> Do we have a real-life hw configuration where the *only* VERW mitigation
> needed is at VMENTER because that machine is affected *only* by MMIO and no
> other VERW-based mitigation is needed?

Rocket Lake, Comet Lake, Ice Lake with tsx=off only require VERW at
VMENTER. There are other MMIO affected CPUs that are not affected by MDS
and do not support TSX or disable it by default.

Re: MMIO and VERW

Posted by Borislav Petkov 10 months, 3 weeks ago

On Tue, Mar 18, 2025 at 09:25:05AM -0700, Pawan Gupta wrote:
> Rocket Lake, Comet Lake, Ice Lake with tsx=off only require VERW at
> VMENTER. There are other MMIO affected CPUs that are not affected by MDS
> and do not support TSX or disable it by default.

So all those CPUs are only affected by MMIO and not affected by neither of
those:

TAA, RFDS, MDS

?

Or is that the case only when TSX is not enabled/not present there?

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: MMIO and VERW

Posted by Pawan Gupta 10 months, 3 weeks ago

On Tue, Mar 18, 2025 at 05:34:51PM +0100, Borislav Petkov wrote:
> On Tue, Mar 18, 2025 at 09:25:05AM -0700, Pawan Gupta wrote:
> > Rocket Lake, Comet Lake, Ice Lake with tsx=off only require VERW at
> > VMENTER. There are other MMIO affected CPUs that are not affected by MDS
> > and do not support TSX or disable it by default.
> 
> So all those CPUs are only affected by MMIO and not affected by neither of
> those:
> 
> TAA, RFDS, MDS

That is correct, they are not affected by MDS, TAA and RFDS.

> Or is that the case only when TSX is not enabled/not present there?

As per the affected CPU table [1], Ice Lake is not affected by TAA even if
TSX is enabled.

[1] https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html#tab-blade-1-2

Re: MMIO and VERW

Posted by Borislav Petkov 10 months, 3 weeks ago

On Tue, Mar 18, 2025 at 09:56:45AM -0700, Pawan Gupta wrote:
> On Tue, Mar 18, 2025 at 05:34:51PM +0100, Borislav Petkov wrote:
> > On Tue, Mar 18, 2025 at 09:25:05AM -0700, Pawan Gupta wrote:
> > > Rocket Lake, Comet Lake, Ice Lake with tsx=off only require VERW at
> > > VMENTER. There are other MMIO affected CPUs that are not affected by MDS
> > > and do not support TSX or disable it by default.
> > 
> > So all those CPUs are only affected by MMIO and not affected by neither of
> > those:
> > 
> > TAA, RFDS, MDS
> 
> That is correct, they are not affected by MDS, TAA and RFDS.
> 
> > Or is that the case only when TSX is not enabled/not present there?
> 
> As per the affected CPU table [1], Ice Lake is not affected by TAA even if
> TSX is enabled.

That table is insane - I need at least 4 monitors to stare at it properly. :-P

Anyway, so I'm wondering if we special-case those CPUs and have them select
a special

MMIO_MITIGATION_VERW_VM

case and keep them separate from that whole
CPUs-can-be-affected-by-multiple-vulns and the mitigation for all of them is
VERW.

They will enable mmio_stale_data_clear and will be out of the equation.

Which will make this other logic simpler.

Hmm...

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette

Re: MMIO and VERW

Posted by Pawan Gupta 10 months, 3 weeks ago

On Tue, Mar 18, 2025 at 06:37:08PM +0100, Borislav Petkov wrote:
> On Tue, Mar 18, 2025 at 09:56:45AM -0700, Pawan Gupta wrote:
> > On Tue, Mar 18, 2025 at 05:34:51PM +0100, Borislav Petkov wrote:
> > > On Tue, Mar 18, 2025 at 09:25:05AM -0700, Pawan Gupta wrote:
> > > > Rocket Lake, Comet Lake, Ice Lake with tsx=off only require VERW at
> > > > VMENTER. There are other MMIO affected CPUs that are not affected by MDS
> > > > and do not support TSX or disable it by default.
> > > 
> > > So all those CPUs are only affected by MMIO and not affected by neither of
> > > those:
> > > 
> > > TAA, RFDS, MDS
> > 
> > That is correct, they are not affected by MDS, TAA and RFDS.
> > 
> > > Or is that the case only when TSX is not enabled/not present there?
> > 
> > As per the affected CPU table [1], Ice Lake is not affected by TAA even if
> > TSX is enabled.
> 
> That table is insane - I need at least 4 monitors to stare at it properly. :-P

:D Totally agree. A machine readable format is here:
https://github.com/intel/Intel-affected-processor-list/blob/main/Intel_affected_processor_list.csv

> Anyway, so I'm wondering if we special-case those CPUs and have them select
> a special
> 
> MMIO_MITIGATION_VERW_VM
> 
> case and keep them separate from that whole
> CPUs-can-be-affected-by-multiple-vulns and the mitigation for all of them is
> VERW.
> 
> They will enable mmio_stale_data_clear and will be out of the equation.
> 
> Which will make this other logic simpler.

Likely yes, I will give this a shot and see how it compares with the
currrent implementation. Thanks for the suggestion.

Re: MMIO and VERW

Posted by Borislav Petkov 10 months, 3 weeks ago

On Tue, Mar 18, 2025 at 11:15:15AM -0700, Pawan Gupta wrote:
> :D Totally agree. A machine readable format is here:
> https://github.com/intel/Intel-affected-processor-list/blob/main/Intel_affected_processor_list.csv

Nice.

> Likely yes, I will give this a shot and see how it compares with the
> currrent implementation. Thanks for the suggestion.

Thanks!

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette