[PATCH for-4.21 v2] x86/AMD: avoid REP MOVSB for Zen3/4

Jan Beulich posted 1 patch 2 weeks, 3 days ago
Patches applied successfully (tree, apply log)
git fetch https://gitlab.com/xen-project/patchew/xen tags/patchew/2e5698e1-a21b-489f-863e-9e77e5dfaa13@suse.com
[PATCH for-4.21 v2] x86/AMD: avoid REP MOVSB for Zen3/4
Posted by Jan Beulich 2 weeks, 3 days ago
Along with Zen2 (which doesn't expose ERMS), both families reportedly
suffer from sub-optimal aliasing detection when deciding whether REP MOVSB
can actually be carried out the accelerated way. Therefore we want to
avoid its use in the common case of memcpy(); copy_page_hot() is fine, as
its two pointers are always going to be having the same low 5 bits.

Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Jan Beulich <jbeulich@suse.com>
Reviewed-by: Jason Andryuk <jason.andryuk@amd.com>
Acked-by: Roger Pau Monné <roger.pau@citrix.com>
Release-Acked-by: Oleksii Kurochko<oleksii.kurochko@gmail.com>
---
Trying to amend the comment in cpufeatures.h (e.g. "..., i.e. ERMS minus
the Zen3/4 pointer aliasing issue") makes it get longish, so I kept it at
the shortened form.
---
v2: Leave page copying alone.

--- a/xen/arch/x86/cpu/amd.c
+++ b/xen/arch/x86/cpu/amd.c
@@ -1386,6 +1386,10 @@ static void cf_check init_amd(struct cpu
 
 	check_syscfg_dram_mod_en();
 
+	if (c == &boot_cpu_data && cpu_has(c, X86_FEATURE_ERMS)
+	    && c->family != 0x19 /* Zen3/4 */)
+		setup_force_cpu_cap(X86_FEATURE_XEN_REP_MOVSB);
+
 	amd_log_freq(c);
 }
 
--- a/xen/arch/x86/cpu/intel.c
+++ b/xen/arch/x86/cpu/intel.c
@@ -684,6 +684,9 @@ static void cf_check init_intel(struct c
 	 */
 	if (c == &boot_cpu_data && c->vfm == INTEL_SKYLAKE_X)
 		setup_clear_cpu_cap(X86_FEATURE_CLWB);
+
+	if (c == &boot_cpu_data && cpu_has(c, X86_FEATURE_ERMS))
+		setup_force_cpu_cap(X86_FEATURE_XEN_REP_MOVSB);
 }
 
 const struct cpu_dev __initconst_cf_clobber intel_cpu_dev = {
--- a/xen/arch/x86/include/asm/cpufeatures.h
+++ b/xen/arch/x86/include/asm/cpufeatures.h
@@ -7,7 +7,7 @@
 #define FSCAPINTS FEATURESET_NR_ENTRIES
 
 /* Synthetic words follow the featureset words. */
-#define X86_NR_SYNTH 1
+#define X86_NR_SYNTH 2
 #define X86_SYNTH(x) (FSCAPINTS * 32 + (x))
 
 /* Synthetic features */
@@ -43,6 +43,7 @@ XEN_CPUFEATURE(IBPB_ENTRY_PV,     X86_SY
 XEN_CPUFEATURE(IBPB_ENTRY_HVM,    X86_SYNTH(29)) /* MSR_PRED_CMD used by Xen for HVM */
 XEN_CPUFEATURE(USE_VMCALL,        X86_SYNTH(30)) /* Use VMCALL instead of VMMCALL */
 XEN_CPUFEATURE(PDX_COMPRESSION,   X86_SYNTH(31)) /* PDX compression */
+XEN_CPUFEATURE(XEN_REP_MOVSB,     X86_SYNTH(32)) /* REP MOVSB used for memcpy() */
 
 /* Bug words follow the synthetic words. */
 #define X86_NR_BUG 1
--- a/xen/arch/x86/memcpy.S
+++ b/xen/arch/x86/memcpy.S
@@ -10,7 +10,7 @@ FUNC(memcpy)
          * precautions were taken).
          */
         ALTERNATIVE "and $7, %edx; shr $3, %rcx", \
-                    STR(rep movsb; RET), X86_FEATURE_ERMS
+                    STR(rep movsb; RET), X86_FEATURE_XEN_REP_MOVSB
         rep movsq
         or      %edx, %ecx
         jz      1f