[PATCH] crypto: x86/chacha - Remove SIMD fallback path

Herbert Xu posted 1 patch 10 months, 1 week ago
[PATCH] crypto: x86/chacha - Remove SIMD fallback path
Posted by Herbert Xu 10 months, 1 week ago
On Wed, Apr 02, 2025 at 08:59:34PM -0700, Eric Biggers wrote:
>
> But in a lot of cases there is also no reason to even add that restriction.  I'm
> not sure why you're so eager to make the library functions harder to use.

I have no intention of making any changes to siphash.  It doesn't
even use SIMD.

All I want to do is get rid of the crypto_simd_usable() fallback
paths that we currently have in arch/x86/crypto.  This code is
never used in hardirq context (and should never be).

For example:

---8<---
Get rid of the fallback path as SIMD is now always usable in softirq
context.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index 8bb74a272879..6a3d60cf3192 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -6,9 +6,7 @@
  * Copyright (C) 2015 Martin Willi
  */
 
-#include <crypto/algapi.h>
 #include <crypto/internal/chacha.h>
-#include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
@@ -35,7 +33,6 @@ asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 					   unsigned int len, int nrounds);
 
-static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
 
@@ -123,23 +120,15 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 
 void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
 {
-	if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable()) {
-		hchacha_block_generic(state, stream, nrounds);
-	} else {
-		kernel_fpu_begin();
-		hchacha_block_ssse3(state, stream, nrounds);
-		kernel_fpu_end();
-	}
+	kernel_fpu_begin();
+	hchacha_block_ssse3(state, stream, nrounds);
+	kernel_fpu_end();
 }
 EXPORT_SYMBOL(hchacha_block_arch);
 
 void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
 		       int nrounds)
 {
-	if (!static_branch_likely(&chacha_use_simd) || !crypto_simd_usable() ||
-	    bytes <= CHACHA_BLOCK_SIZE)
-		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
-
 	do {
 		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
 
@@ -171,18 +160,11 @@ static int chacha_simd_stream_xor(struct skcipher_request *req,
 		if (nbytes < walk.total)
 			nbytes = round_down(nbytes, walk.stride);
 
-		if (!static_branch_likely(&chacha_use_simd) ||
-		    !crypto_simd_usable()) {
-			chacha_crypt_generic(state, walk.dst.virt.addr,
-					     walk.src.virt.addr, nbytes,
-					     ctx->nrounds);
-		} else {
-			kernel_fpu_begin();
-			chacha_dosimd(state, walk.dst.virt.addr,
-				      walk.src.virt.addr, nbytes,
-				      ctx->nrounds);
-			kernel_fpu_end();
-		}
+		kernel_fpu_begin();
+		chacha_dosimd(state, walk.dst.virt.addr,
+			      walk.src.virt.addr, nbytes,
+			      ctx->nrounds);
+		kernel_fpu_end();
 		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
@@ -207,13 +189,9 @@ static int xchacha_simd(struct skcipher_request *req)
 
 	chacha_init(state, ctx->key, req->iv);
 
-	if (req->cryptlen > CHACHA_BLOCK_SIZE && crypto_simd_usable()) {
-		kernel_fpu_begin();
-		hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
-		kernel_fpu_end();
-	} else {
-		hchacha_block_generic(state, subctx.key, ctx->nrounds);
-	}
+	kernel_fpu_begin();
+	hchacha_block_ssse3(state, subctx.key, ctx->nrounds);
+	kernel_fpu_end();
 	subctx.nrounds = ctx->nrounds;
 
 	memcpy(&real_iv[0], req->iv + 24, 8);
@@ -275,8 +253,6 @@ static int __init chacha_simd_mod_init(void)
 	if (!boot_cpu_has(X86_FEATURE_SSSE3))
 		return 0;
 
-	static_branch_enable(&chacha_use_simd);
-
 	if (boot_cpu_has(X86_FEATURE_AVX) &&
 	    boot_cpu_has(X86_FEATURE_AVX2) &&
 	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
Re: [PATCH] crypto: x86/chacha - Remove SIMD fallback path
Posted by Eric Biggers 10 months ago
On Thu, Apr 03, 2025 at 12:14:50PM +0800, Herbert Xu wrote:
> On Wed, Apr 02, 2025 at 08:59:34PM -0700, Eric Biggers wrote:
> >
> > But in a lot of cases there is also no reason to even add that restriction.  I'm
> > not sure why you're so eager to make the library functions harder to use.
> 
> I have no intention of making any changes to siphash.  It doesn't
> even use SIMD.
> 
> All I want to do is get rid of the crypto_simd_usable() fallback
> paths that we currently have in arch/x86/crypto.  This code is
> never used in hardirq context (and should never be).
> 
> For example:
> 
> ---8<---
> Get rid of the fallback path as SIMD is now always usable in softirq
> context.
> 
> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
> 

It looks like this broken patch already got applied for some reason.

First, there doesn't seem to be agreement yet that the library functions should
have requirements on the calling context.

Second, your patch made unrelated changes that deleted the checks for SSSE3
support.  Thus dropping support for CPUs that don't support SSSE3.

- Eric
[PATCH] crypto: x86/chacha - Restore SSSE3 fallback path
Posted by Herbert Xu 10 months ago
On Mon, Apr 07, 2025 at 09:48:42AM -0700, Eric Biggers wrote:
> 
> First, there doesn't seem to be agreement yet that the library functions should
> have requirements on the calling context.

Do you have a real example of hard IRQ usage for chacha? Not some
imaginary post-crash scenario that ends up calling into generic code.

And if you really wanted to do that, it's much better to fix up
kernel_fpu_begin to support hard IRQs rather than adding useless
may_use_simd() checks all over the place.

> Second, your patch made unrelated changes that deleted the checks for SSSE3
> support.  Thus dropping support for CPUs that don't support SSSE3.

Sorry.  That was an oversight.

---8<---
The chacha_use_simd static branch is required for x86 machines that
lack SSSE3 support.  Restore it and the generic fallback code.

Reported-by: Eric Biggers <ebiggers@kernel.org>
Fixes: 9b4400215e0e ("crypto: x86/chacha - Remove SIMD fallback path")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>

diff --git a/arch/x86/crypto/chacha_glue.c b/arch/x86/crypto/chacha_glue.c
index b7fd7a1f0e15..fcc14c006bde 100644
--- a/arch/x86/crypto/chacha_glue.c
+++ b/arch/x86/crypto/chacha_glue.c
@@ -5,11 +5,12 @@
  * Copyright (C) 2015 Martin Willi
  */
 
+#include <asm/simd.h>
 #include <crypto/chacha.h>
+#include <linux/jump_label.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/sizes.h>
-#include <asm/simd.h>
 
 asmlinkage void chacha_block_xor_ssse3(u32 *state, u8 *dst, const u8 *src,
 				       unsigned int len, int nrounds);
@@ -31,6 +32,7 @@ asmlinkage void chacha_4block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 asmlinkage void chacha_8block_xor_avx512vl(u32 *state, u8 *dst, const u8 *src,
 					   unsigned int len, int nrounds);
 
+static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_simd);
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx2);
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(chacha_use_avx512vl);
 
@@ -117,15 +119,23 @@ static void chacha_dosimd(u32 *state, u8 *dst, const u8 *src,
 
 void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds)
 {
-	kernel_fpu_begin();
-	hchacha_block_ssse3(state, stream, nrounds);
-	kernel_fpu_end();
+	if (!static_branch_likely(&chacha_use_simd)) {
+		hchacha_block_generic(state, stream, nrounds);
+	} else {
+		kernel_fpu_begin();
+		hchacha_block_ssse3(state, stream, nrounds);
+		kernel_fpu_end();
+	}
 }
 EXPORT_SYMBOL(hchacha_block_arch);
 
 void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes,
 		       int nrounds)
 {
+	if (!static_branch_likely(&chacha_use_simd) ||
+	    bytes <= CHACHA_BLOCK_SIZE)
+		return chacha_crypt_generic(state, dst, src, bytes, nrounds);
+
 	do {
 		unsigned int todo = min_t(unsigned int, bytes, SZ_4K);
 
@@ -142,7 +152,7 @@ EXPORT_SYMBOL(chacha_crypt_arch);
 
 bool chacha_is_arch_optimized(void)
 {
-	return true;
+	return static_key_enabled(&chacha_use_simd);
 }
 EXPORT_SYMBOL(chacha_is_arch_optimized);
 
@@ -151,6 +161,8 @@ static int __init chacha_simd_mod_init(void)
 	if (!boot_cpu_has(X86_FEATURE_SSSE3))
 		return 0;
 
+	static_branch_enable(&chacha_use_simd);
+
 	if (boot_cpu_has(X86_FEATURE_AVX) &&
 	    boot_cpu_has(X86_FEATURE_AVX2) &&
 	    cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) {
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt