From nobody Wed Oct 8 00:23:30 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D73411E47CC; Fri, 4 Jul 2025 02:42:07 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1751596927; cv=none; b=Fg7IhPal6/mLkzbjLf9DtAtRDq8k5ecJTj7KGfpzXXhCpytrj/xCvIZyTlRYqN3AkcjWMiJBIYRxkCq9Pn2M93EueE69BYH008ndxLUpSgGCeX01DYwx6q4Sj+qnoWLdt4qcVGg9sbdFYrILCJRcejzfj8b9WEIj4pUAhiQumc0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1751596927; c=relaxed/simple; bh=cJEKeMrrEFzU02uZcEkZZnobeGg0XAEqgJDDvcatu80=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=PSENTGrq/brw5PXO0HFXnYgS4z2JMwHLR6HH2ooFlg8kePk+P4mRl+gyUagSkC2akE/MCoPbFYOqPT+yN8zbEcKtYK8KTUV5qOTH1yyyQguCGMdUVm0Ny3QOeYFsrqUog5W8SIL0QHFQe7grN0bJpBEJYaz0MNCH937AhQ878lI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=XgYL/dPx; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="XgYL/dPx" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 39C81C4CEEE; Fri, 4 Jul 2025 02:42:07 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1751596927; bh=cJEKeMrrEFzU02uZcEkZZnobeGg0XAEqgJDDvcatu80=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=XgYL/dPxx3tVST0ATkylNrYDgusq5kAsc1dJaMR/SHzed1jkA5IbKkjm2442fzjHu ZfwubcuqqRl7I+toiC4LYuPm3sceLMWz73Hh8C/+xkMssRfS9mrC7HGD2XQNJcZaHW hcA0ifxSALVhYQbh470HfuVklE5bUMK2/eojNh7a1gxA4ziuXnLkXE91Z9xosIQ+ce dj5ct2NqPspv7wICaktv9t4rsvQqSRPWNzepTe7ZMjjd5QF0DKawyCkuuBCWFJOxF8 Jl69RLJ8n8ApU9YBM3G6mfm9NTIbmH6qqgMqsUj4VKJp8bpqsdEda4U6HYBAbATECO K4jrPqYAstABA== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org, x86@kernel.org, Ard Biesheuvel , "Jason A . Donenfeld" , Eric Biggers Subject: [PATCH 1/2] lib/crypto: x86/sha256: Move static_call above kernel-mode FPU section Date: Thu, 3 Jul 2025 19:39:57 -0700 Message-ID: <20250704023958.73274-2-ebiggers@kernel.org> X-Mailer: git-send-email 2.50.0 In-Reply-To: <20250704023958.73274-1-ebiggers@kernel.org> References: <20250704023958.73274-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" As I did for sha512_blocks(), reorganize x86's sha256_blocks() to be just a static_call. To achieve that, for each assembly function add a C function that handles the kernel-mode FPU section and fallback. While this increases total code size slightly, the amount of code actually executed on a given system does not increase, and it is slightly more efficient since it eliminates the extra static_key. It also makes the assembly functions be called with standard direct calls instead of static calls, eliminating the need for ANNOTATE_NOENDBR. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel --- lib/crypto/x86/sha256-avx-asm.S | 3 -- lib/crypto/x86/sha256-avx2-asm.S | 3 -- lib/crypto/x86/sha256-ni-asm.S | 2 -- lib/crypto/x86/sha256-ssse3-asm.S | 2 -- lib/crypto/x86/sha256.h | 48 ++++++++++++++++--------------- 5 files changed, 25 insertions(+), 33 deletions(-) diff --git a/lib/crypto/x86/sha256-avx-asm.S b/lib/crypto/x86/sha256-avx-as= m.S index 73bcff2b548f4..798a7f07fa013 100644 --- a/lib/crypto/x86/sha256-avx-asm.S +++ b/lib/crypto/x86/sha256-avx-asm.S @@ -46,11 +46,10 @@ ######################################################################## # This code schedules 1 block at a time, with 4 lanes per block ######################################################################## =20 #include -#include =20 ## assume buffers not aligned #define VMOVDQ vmovdqu =20 ################################ Define Macros @@ -344,12 +343,10 @@ a =3D TMP_ ## void sha256_transform_avx(struct sha256_block_state *state, ## const u8 *data, size_t nblocks); ######################################################################## .text SYM_FUNC_START(sha256_transform_avx) - ANNOTATE_NOENDBR # since this is called only via static_call - pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 diff --git a/lib/crypto/x86/sha256-avx2-asm.S b/lib/crypto/x86/sha256-avx2-= asm.S index 45787570387f2..62a46993359e6 100644 --- a/lib/crypto/x86/sha256-avx2-asm.S +++ b/lib/crypto/x86/sha256-avx2-asm.S @@ -47,11 +47,10 @@ ######################################################################## # This code schedules 2 blocks at a time, with 4 lanes per block ######################################################################## =20 #include -#include =20 ## assume buffers not aligned #define VMOVDQ vmovdqu =20 ################################ Define Macros @@ -521,12 +520,10 @@ STACK_SIZE =3D _CTX + _CTX_SIZE ## void sha256_transform_rorx(struct sha256_block_state *state, ## const u8 *data, size_t nblocks); ######################################################################## .text SYM_FUNC_START(sha256_transform_rorx) - ANNOTATE_NOENDBR # since this is called only via static_call - pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 diff --git a/lib/crypto/x86/sha256-ni-asm.S b/lib/crypto/x86/sha256-ni-asm.S index 4af7d22e29e47..9ebbacbb9c13b 100644 --- a/lib/crypto/x86/sha256-ni-asm.S +++ b/lib/crypto/x86/sha256-ni-asm.S @@ -52,11 +52,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ =20 #include -#include =20 #define STATE_PTR %rdi /* 1st arg */ #define DATA_PTR %rsi /* 2nd arg */ #define NUM_BLKS %rdx /* 3rd arg */ =20 @@ -109,11 +108,10 @@ * void sha256_ni_transform(struct sha256_block_state *state, * const u8 *data, size_t nblocks); */ .text SYM_FUNC_START(sha256_ni_transform) - ANNOTATE_NOENDBR # since this is called only via static_call =20 shl $6, NUM_BLKS /* convert to bytes */ jz .Ldone_hash add DATA_PTR, NUM_BLKS /* pointer to end of data */ =20 diff --git a/lib/crypto/x86/sha256-ssse3-asm.S b/lib/crypto/x86/sha256-ssse= 3-asm.S index 407b30adcd37f..3b602b7d43fad 100644 --- a/lib/crypto/x86/sha256-ssse3-asm.S +++ b/lib/crypto/x86/sha256-ssse3-asm.S @@ -351,12 +351,10 @@ a =3D TMP_ ## void sha256_transform_ssse3(struct sha256_block_state *state, ## const u8 *data, size_t nblocks); ######################################################################## .text SYM_FUNC_START(sha256_transform_ssse3) - ANNOTATE_NOENDBR # since this is called only via static_call - pushq %rbx pushq %r12 pushq %r13 pushq %r14 pushq %r15 diff --git a/lib/crypto/x86/sha256.h b/lib/crypto/x86/sha256.h index 3b5456c222ba6..669bc06538b67 100644 --- a/lib/crypto/x86/sha256.h +++ b/lib/crypto/x86/sha256.h @@ -6,50 +6,52 @@ */ #include #include #include =20 -asmlinkage void sha256_transform_ssse3(struct sha256_block_state *state, - const u8 *data, size_t nblocks); -asmlinkage void sha256_transform_avx(struct sha256_block_state *state, - const u8 *data, size_t nblocks); -asmlinkage void sha256_transform_rorx(struct sha256_block_state *state, - const u8 *data, size_t nblocks); -asmlinkage void sha256_ni_transform(struct sha256_block_state *state, - const u8 *data, size_t nblocks); +DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_blocks_generic); =20 -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_sha256_x86); +#define DEFINE_X86_SHA256_FN(c_fn, asm_fn) = \ + asmlinkage void asm_fn(struct sha256_block_state *state, \ + const u8 *data, size_t nblocks); \ + static void c_fn(struct sha256_block_state *state, const u8 *data, \ + size_t nblocks) \ + { \ + if (likely(crypto_simd_usable())) { \ + kernel_fpu_begin(); \ + asm_fn(state, data, nblocks); \ + kernel_fpu_end(); \ + } else { \ + sha256_blocks_generic(state, data, nblocks); \ + } \ + } =20 -DEFINE_STATIC_CALL(sha256_blocks_x86, sha256_transform_ssse3); +DEFINE_X86_SHA256_FN(sha256_blocks_ssse3, sha256_transform_ssse3); +DEFINE_X86_SHA256_FN(sha256_blocks_avx, sha256_transform_avx); +DEFINE_X86_SHA256_FN(sha256_blocks_avx2, sha256_transform_rorx); +DEFINE_X86_SHA256_FN(sha256_blocks_ni, sha256_ni_transform); =20 static void sha256_blocks(struct sha256_block_state *state, const u8 *data, size_t nblocks) { - if (static_branch_likely(&have_sha256_x86) && crypto_simd_usable()) { - kernel_fpu_begin(); - static_call(sha256_blocks_x86)(state, data, nblocks); - kernel_fpu_end(); - } else { - sha256_blocks_generic(state, data, nblocks); - } + static_call(sha256_blocks_x86)(state, data, nblocks); } =20 #define sha256_mod_init_arch sha256_mod_init_arch static inline void sha256_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_SHA_NI)) { - static_call_update(sha256_blocks_x86, sha256_ni_transform); + static_call_update(sha256_blocks_x86, sha256_blocks_ni); } else if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL) && boot_cpu_has(X86_FEATURE_AVX)) { if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) static_call_update(sha256_blocks_x86, - sha256_transform_rorx); + sha256_blocks_avx2); else static_call_update(sha256_blocks_x86, - sha256_transform_avx); - } else if (!boot_cpu_has(X86_FEATURE_SSSE3)) { - return; + sha256_blocks_avx); + } else if (boot_cpu_has(X86_FEATURE_SSSE3)) { + static_call_update(sha256_blocks_x86, sha256_blocks_ssse3); } - static_branch_enable(&have_sha256_x86); } --=20 2.50.0 From nobody Wed Oct 8 00:23:30 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3D1771E5B69; Fri, 4 Jul 2025 02:42:07 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1751596928; cv=none; b=nS9PqcEpVxwxxzbu+Hj5/lCMO+2C1uFQhcslinhz3nLhmuesW1g1pSEtiD+U4fYXYJIZOIJc6Pz96bQkBXoy4jF1rNFcnMlEZi0kTbHnMGzSBA4eei6VsvlP4t1LAAfY90iwjfHVRfuuV2EkwwZ6go27VfucRzvXqOwBHaYEVRs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1751596928; c=relaxed/simple; bh=ZmWFyCvhpkEvRK5ZvfnfogthS9shinEKaV9jCAXnFN4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=YlmIc0PLsyQyoMedfEFysOOyvBj9xGj7m42n3Vw+tHZOXReXicnp8TruVy83R/4h3CgFGE9UDpHXHeRHi1MhaxlZIgHcCr6YeZJ2CD1VAH3aNnzW7jvB+6R6K0uhDgkw6sOsd/PTik21+Yh+bYAuRTNs23aok+c0I30Wn332qJo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=cIx+dLHv; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="cIx+dLHv" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9387DC4CEF1; Fri, 4 Jul 2025 02:42:07 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1751596927; bh=ZmWFyCvhpkEvRK5ZvfnfogthS9shinEKaV9jCAXnFN4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=cIx+dLHvnD7vp3FPJybU2kv4jSWxAyNj5jnP4psbbrCgaVDbbNBW4LzV3aFWwr3H4 5TLrX21MwCQVxP5mDdBzmT/T+S2RhFa7m/0OheQKgQa+l6ymMmWAwNgmvczbvhOwu9 WwGaW3Q3x3osRu3Rj0KTdE9vND2dGwtuaL0/GdJR04gQAnoMzYrVSSNhppXSc8ZTGM N+bAek2IvuOWUlTfnHLXGxFqzcQT4b4ZXsi7pOQ1RkE2Mf6yYxsb9V+avhEtYP0S5B yiWIcWxGE95EgKC7xXxGnwmPfRkL/sdM7tqSoM8bxng1D7+Na+bzeFHgFNXR8L2Hw1 PffSvqfkkhmSg== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org, x86@kernel.org, Ard Biesheuvel , "Jason A . Donenfeld" , Eric Biggers Subject: [PATCH 2/2] lib/crypto: x86/sha256: Remove unnecessary checks for nblocks==0 Date: Thu, 3 Jul 2025 19:39:58 -0700 Message-ID: <20250704023958.73274-3-ebiggers@kernel.org> X-Mailer: git-send-email 2.50.0 In-Reply-To: <20250704023958.73274-1-ebiggers@kernel.org> References: <20250704023958.73274-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Since sha256_blocks() is called only with nblocks >=3D 1, remove unnecessary checks for nblocks =3D=3D 0 from the x86 SHA-256 assembly code. Signed-off-by: Eric Biggers Acked-by: Ard Biesheuvel --- lib/crypto/x86/sha256-avx-asm.S | 3 --- lib/crypto/x86/sha256-avx2-asm.S | 1 - lib/crypto/x86/sha256-ni-asm.S | 3 --- lib/crypto/x86/sha256-ssse3-asm.S | 3 --- 4 files changed, 10 deletions(-) diff --git a/lib/crypto/x86/sha256-avx-asm.S b/lib/crypto/x86/sha256-avx-as= m.S index 798a7f07fa013..c1aceb3ba3a3a 100644 --- a/lib/crypto/x86/sha256-avx-asm.S +++ b/lib/crypto/x86/sha256-avx-asm.S @@ -355,11 +355,10 @@ SYM_FUNC_START(sha256_transform_avx) =20 subq $STACK_SIZE, %rsp # allocate stack space and $~15, %rsp # align stack pointer =20 shl $6, NUM_BLKS # convert to bytes - jz .Ldone_hash add INP, NUM_BLKS # pointer to end of data mov NUM_BLKS, _INP_END(%rsp) =20 ## load initial digest mov 4*0(CTX), a @@ -444,12 +443,10 @@ SYM_FUNC_START(sha256_transform_avx) mov _INP(%rsp), INP add $64, INP cmp _INP_END(%rsp), INP jne .Lloop0 =20 -.Ldone_hash: - mov %rbp, %rsp popq %rbp popq %r15 popq %r14 popq %r13 diff --git a/lib/crypto/x86/sha256-avx2-asm.S b/lib/crypto/x86/sha256-avx2-= asm.S index 62a46993359e6..eb8836fb9695c 100644 --- a/lib/crypto/x86/sha256-avx2-asm.S +++ b/lib/crypto/x86/sha256-avx2-asm.S @@ -533,11 +533,10 @@ SYM_FUNC_START(sha256_transform_rorx) =20 subq $STACK_SIZE, %rsp and $-32, %rsp # align rsp to 32 byte boundary =20 shl $6, NUM_BLKS # convert to bytes - jz .Ldone_hash lea -64(INP, NUM_BLKS), NUM_BLKS # pointer to last block mov NUM_BLKS, _INP_END(%rsp) =20 cmp NUM_BLKS, INP je .Lonly_one_block diff --git a/lib/crypto/x86/sha256-ni-asm.S b/lib/crypto/x86/sha256-ni-asm.S index 9ebbacbb9c13b..4bd9490ffc662 100644 --- a/lib/crypto/x86/sha256-ni-asm.S +++ b/lib/crypto/x86/sha256-ni-asm.S @@ -110,11 +110,10 @@ */ .text SYM_FUNC_START(sha256_ni_transform) =20 shl $6, NUM_BLKS /* convert to bytes */ - jz .Ldone_hash add DATA_PTR, NUM_BLKS /* pointer to end of data */ =20 /* * load initial hash values * Need to reorder these appropriately @@ -161,12 +160,10 @@ SYM_FUNC_START(sha256_ni_transform) pshufd $0x1B, STATE1, STATE1 /* DCBA */ =20 movdqu STATE1, 0*16(STATE_PTR) movdqu STATE0, 1*16(STATE_PTR) =20 -.Ldone_hash: - RET SYM_FUNC_END(sha256_ni_transform) =20 .section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 diff --git a/lib/crypto/x86/sha256-ssse3-asm.S b/lib/crypto/x86/sha256-ssse= 3-asm.S index 3b602b7d43fad..0a2719661784c 100644 --- a/lib/crypto/x86/sha256-ssse3-asm.S +++ b/lib/crypto/x86/sha256-ssse3-asm.S @@ -363,11 +363,10 @@ SYM_FUNC_START(sha256_transform_ssse3) =20 subq $STACK_SIZE, %rsp and $~15, %rsp =20 shl $6, NUM_BLKS # convert to bytes - jz .Ldone_hash add INP, NUM_BLKS mov NUM_BLKS, _INP_END(%rsp) # pointer to end of data =20 ## load initial digest mov 4*0(CTX), a @@ -456,12 +455,10 @@ SYM_FUNC_START(sha256_transform_ssse3) mov _INP(%rsp), INP add $64, INP cmp _INP_END(%rsp), INP jne .Lloop0 =20 -.Ldone_hash: - mov %rbp, %rsp popq %rbp popq %r15 popq %r14 popq %r13 --=20 2.50.0