From: Eric Biggers <ebiggers@google.com>
Lift zmm_exclusion_list in aesni-intel_glue.c into the x86 CPU setup
code, and add a new x86 CPU feature flag X86_FEATURE_PREFER_YMM that is
set when the CPU is on this list.
This allows other code in arch/x86/, such as the CRC library code, to
apply the same exclusion list when deciding whether to execute 256-bit
or 512-bit optimized functions.
Note that full AVX512 support including ZMM registers is still exposed
to userspace and is still supported for in-kernel use. This flag just
indicates whether in-kernel code should prefer to use YMM registers.
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: "Martin K. Petersen" <martin.petersen@oracle.com>
Signed-off-by: Eric Biggers <ebiggers@google.com>
---
arch/x86/crypto/aesni-intel_glue.c | 22 +---------------------
arch/x86/include/asm/cpufeatures.h | 1 +
arch/x86/kernel/cpu/intel.c | 22 ++++++++++++++++++++++
3 files changed, 24 insertions(+), 21 deletions(-)
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 11e95fc62636e..3e9ab5cdade47 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1534,30 +1534,10 @@ DEFINE_GCM_ALGS(vaes_avx10_256, FLAG_AVX10_256,
DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512,
"generic-gcm-vaes-avx10_512", "rfc4106-gcm-vaes-avx10_512",
AES_GCM_KEY_AVX10_SIZE, 800);
#endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
-/*
- * This is a list of CPU models that are known to suffer from downclocking when
- * zmm registers (512-bit vectors) are used. On these CPUs, the AES mode
- * implementations with zmm registers won't be used by default. Implementations
- * with ymm registers (256-bit vectors) will be used by default instead.
- */
-static const struct x86_cpu_id zmm_exclusion_list[] = {
- X86_MATCH_VFM(INTEL_SKYLAKE_X, 0),
- X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
- X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
- X86_MATCH_VFM(INTEL_ICELAKE, 0),
- X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
- X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0),
- X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0),
- X86_MATCH_VFM(INTEL_TIGERLAKE, 0),
- /* Allow Rocket Lake and later, and Sapphire Rapids and later. */
- /* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */
- {},
-};
-
static int __init register_avx_algs(void)
{
int err;
if (!boot_cpu_has(X86_FEATURE_AVX))
@@ -1598,11 +1578,11 @@ static int __init register_avx_algs(void)
ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
aes_gcm_simdalgs_vaes_avx10_256);
if (err)
return err;
- if (x86_match_cpu(zmm_exclusion_list)) {
+ if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
int i;
aes_xts_alg_vaes_avx10_512.base.cra_priority = 1;
for (i = 0; i < ARRAY_SIZE(aes_gcm_algs_vaes_avx10_512); i++)
aes_gcm_algs_vaes_avx10_512[i].base.cra_priority = 1;
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 508c0dad116bc..99334026a26c9 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -481,10 +481,11 @@
#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */
#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */
#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */
#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */
+#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */
/*
* BUG word(s)
*/
#define X86_BUG(x) (NCAPINTS*32 + (x))
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3dce22f00dc34..c3005c4ec46a9 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -519,10 +519,29 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
msr = this_cpu_read(msr_misc_features_shadow);
wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
}
+/*
+ * This is a list of Intel CPUs that are known to suffer from downclocking when
+ * ZMM registers (512-bit vectors) are used. On these CPUs, when the kernel
+ * executes SIMD-optimized code such as cryptography functions or CRCs, it
+ * should prefer 256-bit (YMM) code to 512-bit (ZMM) code.
+ */
+static const struct x86_cpu_id zmm_exclusion_list[] = {
+ X86_MATCH_VFM(INTEL_SKYLAKE_X, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
+ X86_MATCH_VFM(INTEL_ICELAKE_NNPI, 0),
+ X86_MATCH_VFM(INTEL_TIGERLAKE_L, 0),
+ X86_MATCH_VFM(INTEL_TIGERLAKE, 0),
+ /* Allow Rocket Lake and later, and Sapphire Rapids and later. */
+ {},
+};
+
static void init_intel(struct cpuinfo_x86 *c)
{
early_init_intel(c);
intel_workarounds(c);
@@ -599,10 +618,13 @@ static void init_intel(struct cpuinfo_x86 *c)
if (p)
strcpy(c->x86_model_id, p);
}
#endif
+ if (x86_match_cpu(zmm_exclusion_list))
+ set_cpu_cap(c, X86_FEATURE_PREFER_YMM);
+
/* Work around errata */
srat_detect_node(c);
init_ia32_feat_ctl(c);
--
2.48.1
On Mon, Feb 10, 2025 at 09:45:35AM -0800, Eric Biggers wrote:
> @@ -1598,11 +1578,11 @@ static int __init register_avx_algs(void)
> ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
> aes_gcm_simdalgs_vaes_avx10_256);
> if (err)
> return err;
>
> - if (x86_match_cpu(zmm_exclusion_list)) {
> + if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
s/boot_cpu_has/cpu_feature_enabled/
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Mon, Feb 10, 2025 at 09:40:30PM +0100, Borislav Petkov wrote:
> On Mon, Feb 10, 2025 at 09:45:35AM -0800, Eric Biggers wrote:
> > @@ -1598,11 +1578,11 @@ static int __init register_avx_algs(void)
> > ARRAY_SIZE(aes_gcm_algs_vaes_avx10_256),
> > aes_gcm_simdalgs_vaes_avx10_256);
> > if (err)
> > return err;
> >
> > - if (x86_match_cpu(zmm_exclusion_list)) {
> > + if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
>
> s/boot_cpu_has/cpu_feature_enabled/
>
$ git grep boot_cpu_has arch/x86/crypto | wc -l
87
$ git grep cpu_feature_enabled arch/x86/crypto | wc -l
0
It wouldn't make sense to change just this one. Should they really all be
changed?
I see that cpu_feature_enabled() uses code patching while boot_cpu_has() does
not. All these checks occur once at module load time, though, so code patching
wouldn't be beneficial.
- Eric
On Mon, Feb 10, 2025 at 01:01:03PM -0800, Eric Biggers wrote:
> I see that cpu_feature_enabled() uses code patching while boot_cpu_has() does
> not. All these checks occur once at module load time, though, so code patching
> wouldn't be beneficial.
We want to convert all code to use a single interface for testing CPU features
- cpu_feature_enabled() - and the implementation shouldn't be important to
users - it should just work.
Since you're adding new code, you might as well use the proper interface. As
to converting crypto/ and the rest of the tree, that should happen at some
point... eventually...
Thx.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
On Mon, Feb 10, 2025 at 10:17:10PM +0100, Borislav Petkov wrote: > On Mon, Feb 10, 2025 at 01:01:03PM -0800, Eric Biggers wrote: > > I see that cpu_feature_enabled() uses code patching while boot_cpu_has() does > > not. All these checks occur once at module load time, though, so code patching > > wouldn't be beneficial. > > We want to convert all code to use a single interface for testing CPU features > - cpu_feature_enabled() - and the implementation shouldn't be important to > users - it should just work. > > Since you're adding new code, you might as well use the proper interface. As > to converting crypto/ and the rest of the tree, that should happen at some > point... eventually... Well, it's new code in a function that already has a bunch of boot_cpu_has() checks. I don't really like leaving around random inconsistencies. If there is a new way to do it, we should just update it everywhere. I'll also note that boot_cpu_has() is missing a comment that says it is deprecated (if it really is). - Eric
On Mon, Feb 10, 2025 at 01:37:05PM -0800, Eric Biggers wrote:
> I'll also note that boot_cpu_has() is missing a comment that says it is
> deprecated (if it really is).
/*
* This is the default CPU features testing macro to use in code.
^^^^^^^^^^^^^
*
* It is for detection of features which need kernel infrastructure to be
* used. It may *not* directly test the CPU itself. Use the cpu_has() family
* if you want true runtime testing of CPU features, like in hypervisor code
* where you are supporting a possible guest feature where host support for it
* is not relevant.
*/
#define cpu_feature_enabled(bit) \
(__builtin_constant_p(bit) && DISABLED_MASK_BIT_SET(bit) ? 0 : static_cpu_has(bit))
#define boot_cpu_has(bit) cpu_has(&boot_cpu_data, bit)
---
Forget what I said - we'll convert everything when the time comes.
--
Regards/Gruss,
Boris.
https://people.kernel.org/tglx/notes-about-netiquette
© 2016 - 2025 Red Hat, Inc.