From nobody Tue Nov 26 15:19:26 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A62176FD5; Thu, 17 Oct 2024 00:02:13 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123333; cv=none; b=pbJk1JT9moWag3342Tb3KVaJrN0rzH7ndIFSnbiju4U/NfPkjUmKbL8WM+VPdlenU4J34/sMNxvPgchbws6++/gOmBhrE8I3LGXK/P7pIhn9kR+/D3P7oQiChbZoeUja8G3PuGz9vtAv6tzkRWexJE4e6sBE6XJ3aGyofDAXJww= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123333; c=relaxed/simple; bh=+IlcoQ78tiRH6TqYz3pDNxBORgQDSbjuYlpc1Mf+M5c=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=E3TEyAt39RPM8XLcZIORYWpRBg9TAxK4c2jm5k0lGxSOrswbL2N1uaPWw4pR+/8Pan2ibPgraVWqJr18+vABfbRYOfsBMwO9KAfW3vRv+4x5thIKB/hkjK4DFRklqrIMXuIv1hJU84ONcsDyYOthWj39WiGOa+fSyuyR/WdsYgg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=G7u7toA+; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="G7u7toA+" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 148CFC4CEC7; Thu, 17 Oct 2024 00:02:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729123333; bh=+IlcoQ78tiRH6TqYz3pDNxBORgQDSbjuYlpc1Mf+M5c=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=G7u7toA+c7UTTGMsGNxnJmApGFCsgT1efvMFQLnH61y/kIx5J1+tPWO6y3t5+slHV L3DDZegfw0lIY9U3YGputtZlJk7bTmJDUPB1GY1USZSZIAraDBbNB8rI9nLMva7zrM b/PMeDRpMkYOyV2n15vgRfJaql2PTv+ObfVti8CWQZ3zWD74ohRiIIKypG+TkERHzA x+8g7IVWmfuDGUHvnc/hX+5Hb25tkqZP7cw3lsCDv7YBAepxEG+vo6YawXN3m9WKc/ N0FvXHFOk2X5RWbriEA4lDHWv14+zHBOte/pw3iwtD5MmIsQSDn87GUB0tAZET42qM 7jWS0GlHHRmIw== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek , linux-kernel@vger.kernel.org, stable@vger.kernel.org Subject: [PATCH v2 01/10] crypto: x86/aegis128 - access 32-bit arguments as 32-bit Date: Wed, 16 Oct 2024 17:00:42 -0700 Message-ID: <20241017000051.228294-2-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org> References: <20241017000051.228294-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Fix the AEGIS assembly code to access 'unsigned int' arguments as 32-bit values instead of 64-bit, since the upper bits of the corresponding 64-bit registers are not guaranteed to be zero. Note: there haven't been any reports of this bug actually causing incorrect behavior. Neither gcc nor clang guarantee zero-extension to 64 bits, but zero-extension is likely to happen in practice because most instructions that operate on 32-bit registers zero-extend to 64 bits. Fixes: 1d373d4e8e15 ("crypto: x86 - Add optimized AEGIS implementations") Cc: stable@vger.kernel.org Reviewed-by: Ondrej Mosnacek Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 29 ++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis12= 8-aesni-asm.S index ad7f4c8916256..2de859173940e 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -19,11 +19,11 @@ #define MSG %xmm5 #define T0 %xmm6 #define T1 %xmm7 =20 #define STATEP %rdi -#define LEN %rsi +#define LEN %esi #define SRC %rdx #define DST %rcx =20 .section .rodata.cst16.aegis128_const, "aM", @progbits, 32 .align 16 @@ -74,50 +74,50 @@ */ SYM_FUNC_START_LOCAL(__load_partial) xor %r9d, %r9d pxor MSG, MSG =20 - mov LEN, %r8 + mov LEN, %r8d and $0x1, %r8 jz .Lld_partial_1 =20 - mov LEN, %r8 + mov LEN, %r8d and $0x1E, %r8 add SRC, %r8 mov (%r8), %r9b =20 .Lld_partial_1: - mov LEN, %r8 + mov LEN, %r8d and $0x2, %r8 jz .Lld_partial_2 =20 - mov LEN, %r8 + mov LEN, %r8d and $0x1C, %r8 add SRC, %r8 shl $0x10, %r9 mov (%r8), %r9w =20 .Lld_partial_2: - mov LEN, %r8 + mov LEN, %r8d and $0x4, %r8 jz .Lld_partial_4 =20 - mov LEN, %r8 + mov LEN, %r8d and $0x18, %r8 add SRC, %r8 shl $32, %r9 mov (%r8), %r8d xor %r8, %r9 =20 .Lld_partial_4: movq %r9, MSG =20 - mov LEN, %r8 + mov LEN, %r8d and $0x8, %r8 jz .Lld_partial_8 =20 - mov LEN, %r8 + mov LEN, %r8d and $0x10, %r8 add SRC, %r8 pslldq $8, MSG movq (%r8), T0 pxor T0, MSG @@ -137,11 +137,11 @@ SYM_FUNC_END(__load_partial) * %r8 * %r9 * %r10 */ SYM_FUNC_START_LOCAL(__store_partial) - mov LEN, %r8 + mov LEN, %r8d mov DST, %r9 =20 movq T0, %r10 =20 cmp $8, %r8 @@ -675,11 +675,11 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) =20 movdqa MSG, T0 call __store_partial =20 /* mask with byte count: */ - movq LEN, T0 + movd LEN, T0 punpcklbw T0, T0 punpcklbw T0, T0 punpcklbw T0, T0 punpcklbw T0, T0 movdqa .Laegis128_counter(%rip), T1 @@ -700,11 +700,12 @@ SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) RET SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) =20 /* * void crypto_aegis128_aesni_final(void *state, void *tag_xor, - * u64 assoclen, u64 cryptlen); + * unsigned int assoclen, + * unsigned int cryptlen); */ SYM_FUNC_START(crypto_aegis128_aesni_final) FRAME_BEGIN =20 /* load the state: */ @@ -713,12 +714,12 @@ SYM_FUNC_START(crypto_aegis128_aesni_final) movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 =20 /* prepare length block: */ - movq %rdx, MSG - movq %rcx, T0 + movd %edx, MSG + movd %ecx, T0 pslldq $8, T0 pxor T0, MSG psllq $3, MSG /* multiply by 8 (to get bit count) */ =20 pxor STATE3, MSG --=20 2.47.0 From nobody Tue Nov 26 15:19:26 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C919F8BEE; Thu, 17 Oct 2024 00:02:13 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123333; cv=none; b=ntq1tLSlbJLvqT5k5IiqEEfVzP5Q+4Nsc0Jn6vKPjQOSLgYdWpyZGg2iQtNOp72F9iYWTH4whteuAksrC88ezzjs0lGmvLDfNYEuq3cj8TJkRyZdLEb3fDV5FFgeARCRxHc/WcZY97mTikbWpJz61iPaGvy23YKyeEA/aC1lIzc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123333; c=relaxed/simple; bh=GOg2tfF66KOFcc1MBoC10XwLYKnKiGkh8fnJesLZqp4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=rQmaGl4lDQtO9zPrElZ7z44buu/f1eUq+M+ehaArLWs9eCiF3OfXBLPWZOObiBFFqU1S8U461gVq/uAAaXHTiSufl3/4IQ4KNxd9jh8iYCICk/L1lk9hz2cvOWljkczbGeT7xyisYFqUgGSFjP7NiQab+h3OV56HeMjAg5cp4p0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=cMgU5Vxv; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="cMgU5Vxv" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5DF28C4CED2; Thu, 17 Oct 2024 00:02:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729123333; bh=GOg2tfF66KOFcc1MBoC10XwLYKnKiGkh8fnJesLZqp4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=cMgU5VxvlVj3Foc0L25L1fWmDitGPxp+p3FTXXRPCmPxRHRQckFndpwjDZiu17Ce0 H/sa6SF7t5riuz/3trlNBsvFykwTteaJSY+b8Gt6l3yCzAOQ2VgtVjZTH73LX71bCk +xxccrAS+fIMLX+jdMjtt0N0BTTzcrRYKp2/5FsuRRRSfQ1OxaQuI0BnnkJZNHKKye xnvYRhlz6I8hNiTWmbUrAKrIyxCblJD4bk2TmNzzdgtWlY7KXhekC9eoMvYyxU7Nhg 0C7P5/9Y99uQlBrnWf2QsRB2WC/OZWy7Pj5/0gb5C5x0AuqKwQPAxzqkeHt3MzRAp1 w3eNZpPU16U7A== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek , linux-kernel@vger.kernel.org Subject: [PATCH v2 02/10] crypto: x86/aegis128 - remove no-op init and exit functions Date: Wed, 16 Oct 2024 17:00:43 -0700 Message-ID: <20241017000051.228294-3-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org> References: <20241017000051.228294-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Don't bother providing empty stubs for the init and exit methods in struct aead_alg, since they are optional anyway. Reviewed-by: Ondrej Mosnacek Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-glue.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis1= 28-aesni-glue.c index 4623189000d89..96586470154e0 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -225,26 +225,15 @@ static int crypto_aegis128_aesni_decrypt(struct aead_= request *req) crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); =20 return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; } =20 -static int crypto_aegis128_aesni_init_tfm(struct crypto_aead *aead) -{ - return 0; -} - -static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead) -{ -} - static struct aead_alg crypto_aegis128_aesni_alg =3D { .setkey =3D crypto_aegis128_aesni_setkey, .setauthsize =3D crypto_aegis128_aesni_setauthsize, .encrypt =3D crypto_aegis128_aesni_encrypt, .decrypt =3D crypto_aegis128_aesni_decrypt, - .init =3D crypto_aegis128_aesni_init_tfm, - .exit =3D crypto_aegis128_aesni_exit_tfm, =20 .ivsize =3D AEGIS128_NONCE_SIZE, .maxauthsize =3D AEGIS128_MAX_AUTH_SIZE, .chunksize =3D AEGIS128_BLOCK_SIZE, =20 --=20 2.47.0 From nobody Tue Nov 26 15:19:26 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 32AC52F28; Thu, 17 Oct 2024 00:02:13 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123334; cv=none; b=AddrhsuFpHm3DctvjW+QcS/6WT2MTcyaivLiMFocfpEOPF902tccyXJgYiEyL+7G/BRQGp4kvXSeGTTBx+maue6dHiTZLSxGqQWc/7U3wFOSyDelp5ASappWg8aiq3uv69soYUVM0IjUmF/B1FuHLDgA5XAhChnEwXkEpGt9dGg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123334; c=relaxed/simple; bh=CCDkUk+BogzAuwSYHqJdOGDwQa77QBwBfOFPld6YMqA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=cZNe1/R6PpLl0gWTRKBteB6ZwfJahx+xixmmJjcRYK+RbR0gB7deFBLlQH/n5bdORFy4iBv8hIze7WimAdxwGoKQdChZ44BeYCC4/jKVvwhXbc01VlSusLHVzEkHzKGXRX7ur9VY1TIyPJcgvqC16B4oZ0OzJQLgYL+91D3DxZo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=ZjHzXNI5; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="ZjHzXNI5" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9F765C4CECF; Thu, 17 Oct 2024 00:02:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729123333; bh=CCDkUk+BogzAuwSYHqJdOGDwQa77QBwBfOFPld6YMqA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ZjHzXNI5JKlZF4kuc/yNhusuLsjR3g/6DZre9zC7Y2kfTMSt3Lv75jGYQbiBAqI+S mvKNbXCzFAmrLbFnx7gDHs5FnAuuWVzAneLnKoQJlgRfuntgsBMOwuht+LRUcnUxro GWo8q7zAHYUt17VG3twU/Pr+FcXrmNhuqeLwG4I3VV0pFF4fIjKW6RuzOicxlWCJnN 88blNIXjN+Z/DUcxQ8fHQGDOg1rEEp42CP25Z/kffc/gWG0H8nYQeMl0lA8LX64kyP 7gbaH57rYV+13fE2qIxWSmO2qOCci4IbbHZmc/124Ff3ehkmUAqMbckNohESfqhMmn XapBT26/x0FJA== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek , linux-kernel@vger.kernel.org Subject: [PATCH v2 03/10] crypto: x86/aegis128 - eliminate some indirect calls Date: Wed, 16 Oct 2024 17:00:44 -0700 Message-ID: <20241017000051.228294-4-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org> References: <20241017000051.228294-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Instead of using a struct of function pointers to decide whether to call the encryption or decryption assembly functions, use a conditional branch on a bool. Force-inline the functions to avoid actually generating the branch. This improves performance slightly since indirect calls are slow. Remove the now-unnecessary CFI stubs. Note that just force-inlining the existing functions might cause the compiler to optimize out the indirect branches, but that would not be a reliable way to do it and the CFI stubs would still be required. Reviewed-by: Ondrej Mosnacek Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 9 ++-- arch/x86/crypto/aegis128-aesni-glue.c | 74 +++++++++++++-------------- 2 files changed, 40 insertions(+), 43 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis12= 8-aesni-asm.S index 2de859173940e..1b57558548c78 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -5,11 +5,10 @@ * Copyright (c) 2017-2018 Ondrej Mosnacek * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. */ =20 #include -#include #include =20 #define STATE0 %xmm0 #define STATE1 %xmm1 #define STATE2 %xmm2 @@ -401,11 +400,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_ad) =20 /* * void crypto_aegis128_aesni_enc(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc) +SYM_FUNC_START(crypto_aegis128_aesni_enc) FRAME_BEGIN =20 cmp $0x10, LEN jb .Lenc_out =20 @@ -498,11 +497,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc) =20 /* * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_TYPED_FUNC_START(crypto_aegis128_aesni_enc_tail) +SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 @@ -555,11 +554,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) =20 /* * void crypto_aegis128_aesni_dec(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec) +SYM_FUNC_START(crypto_aegis128_aesni_dec) FRAME_BEGIN =20 cmp $0x10, LEN jb .Ldec_out =20 @@ -652,11 +651,11 @@ SYM_FUNC_END(crypto_aegis128_aesni_dec) =20 /* * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, * const void *src, void *dst); */ -SYM_TYPED_FUNC_START(crypto_aegis128_aesni_dec_tail) +SYM_FUNC_START(crypto_aegis128_aesni_dec_tail) FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis1= 28-aesni-glue.c index 96586470154e0..deb39cef0be1a 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -54,20 +54,10 @@ struct aegis_state { =20 struct aegis_ctx { struct aegis_block key; }; =20 -struct aegis_crypt_ops { - int (*skcipher_walk_init)(struct skcipher_walk *walk, - struct aead_request *req, bool atomic); - - void (*crypt_blocks)(void *state, unsigned int length, const void *src, - void *dst); - void (*crypt_tail)(void *state, unsigned int length, const void *src, - void *dst); -}; - static void crypto_aegis128_aesni_process_ad( struct aegis_state *state, struct scatterlist *sg_src, unsigned int assoclen) { struct scatter_walk walk; @@ -112,24 +102,41 @@ static void crypto_aegis128_aesni_process_ad( memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos); crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes); } } =20 -static void crypto_aegis128_aesni_process_crypt( - struct aegis_state *state, struct skcipher_walk *walk, - const struct aegis_crypt_ops *ops) +static __always_inline void +crypto_aegis128_aesni_process_crypt(struct aegis_state *state, + struct skcipher_walk *walk, bool enc) { while (walk->nbytes >=3D AEGIS128_BLOCK_SIZE) { - ops->crypt_blocks(state, - round_down(walk->nbytes, AEGIS128_BLOCK_SIZE), - walk->src.virt.addr, walk->dst.virt.addr); + if (enc) + crypto_aegis128_aesni_enc( + state, + round_down(walk->nbytes, + AEGIS128_BLOCK_SIZE), + walk->src.virt.addr, + walk->dst.virt.addr); + else + crypto_aegis128_aesni_dec( + state, + round_down(walk->nbytes, + AEGIS128_BLOCK_SIZE), + walk->src.virt.addr, + walk->dst.virt.addr); skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE); } =20 if (walk->nbytes) { - ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr, - walk->dst.virt.addr); + if (enc) + crypto_aegis128_aesni_enc_tail(state, walk->nbytes, + walk->src.virt.addr, + walk->dst.virt.addr); + else + crypto_aegis128_aesni_dec_tail(state, walk->nbytes, + walk->src.virt.addr, + walk->dst.virt.addr); skcipher_walk_done(walk, 0); } } =20 static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aea= d) @@ -160,71 +167,62 @@ static int crypto_aegis128_aesni_setauthsize(struct c= rypto_aead *tfm, if (authsize < AEGIS128_MIN_AUTH_SIZE) return -EINVAL; return 0; } =20 -static void crypto_aegis128_aesni_crypt(struct aead_request *req, - struct aegis_block *tag_xor, - unsigned int cryptlen, - const struct aegis_crypt_ops *ops) +static __always_inline void +crypto_aegis128_aesni_crypt(struct aead_request *req, + struct aegis_block *tag_xor, + unsigned int cryptlen, bool enc) { struct crypto_aead *tfm =3D crypto_aead_reqtfm(req); struct aegis_ctx *ctx =3D crypto_aegis128_aesni_ctx(tfm); struct skcipher_walk walk; struct aegis_state state; =20 - ops->skcipher_walk_init(&walk, req, true); + if (enc) + skcipher_walk_aead_encrypt(&walk, req, true); + else + skcipher_walk_aead_decrypt(&walk, req, true); =20 kernel_fpu_begin(); =20 crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv); crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); - crypto_aegis128_aesni_process_crypt(&state, &walk, ops); + crypto_aegis128_aesni_process_crypt(&state, &walk, enc); crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); =20 kernel_fpu_end(); } =20 static int crypto_aegis128_aesni_encrypt(struct aead_request *req) { - static const struct aegis_crypt_ops OPS =3D { - .skcipher_walk_init =3D skcipher_walk_aead_encrypt, - .crypt_blocks =3D crypto_aegis128_aesni_enc, - .crypt_tail =3D crypto_aegis128_aesni_enc_tail, - }; - struct crypto_aead *tfm =3D crypto_aead_reqtfm(req); struct aegis_block tag =3D {}; unsigned int authsize =3D crypto_aead_authsize(tfm); unsigned int cryptlen =3D req->cryptlen; =20 - crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); + crypto_aegis128_aesni_crypt(req, &tag, cryptlen, true); =20 scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen, authsize, 1); return 0; } =20 static int crypto_aegis128_aesni_decrypt(struct aead_request *req) { static const struct aegis_block zeros =3D {}; =20 - static const struct aegis_crypt_ops OPS =3D { - .skcipher_walk_init =3D skcipher_walk_aead_decrypt, - .crypt_blocks =3D crypto_aegis128_aesni_dec, - .crypt_tail =3D crypto_aegis128_aesni_dec_tail, - }; - struct crypto_aead *tfm =3D crypto_aead_reqtfm(req); struct aegis_block tag; unsigned int authsize =3D crypto_aead_authsize(tfm); unsigned int cryptlen =3D req->cryptlen - authsize; =20 scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen, authsize, 0); =20 - crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); + crypto_aegis128_aesni_crypt(req, &tag, cryptlen, false); =20 return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; } =20 static struct aead_alg crypto_aegis128_aesni_alg =3D { --=20 2.47.0 From nobody Tue Nov 26 15:19:26 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2E1B02CA8; Thu, 17 Oct 2024 00:02:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123334; cv=none; b=JXBWzSXa0ERTvfe8Fa8koPnFIIM7/0n6ItQi/5aMbSG//80hpvqJZP47XBFmm+9UZZKOVQ76KJEItsMJ2TTW6gCAiEhE5hlMmeZgpZAnsxbccKsuvmeB2LKkcmXRNLbSWbagOpnM/t1sqLiNSPj3lzAwYHiy3zL06N953vhL9B8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123334; c=relaxed/simple; bh=Shx8VLH3EJ7HHdCQzE2K/aZTn7yLlymeq4hWFW2B38A=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Xqeb5hMzLp3VQeBZyE70NkISWsLJnZi2ldxxPCwfRognG2R3Ywl49QLotJ7UwM8/nY4mFEMc3PQ57zI/xicsqNLjajm/R86v0tZgI9YzuR/d0O9DoQwU5ddFUQZrKOk0iOWEIbY1J3Nq9YbJtLXctVMDg3xIreRT8jjn+jwHtzU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=c+O2Y6ks; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="c+O2Y6ks" Received: by smtp.kernel.org (Postfix) with ESMTPSA id E1166C4CED7; Thu, 17 Oct 2024 00:02:13 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729123334; bh=Shx8VLH3EJ7HHdCQzE2K/aZTn7yLlymeq4hWFW2B38A=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=c+O2Y6ks1BGV80bYEBIyl50fyfu6KLGLmDbb/haP/qh8qrfMdW+ybbCl1aOqa87s2 o8CD0+lBnIZTicm2iKbkVb8/XVfqQRI1rs8qfxnSs7QklLR9c04XfRoENBP+Pf+rsw pW9YE/2bnaKY/Lh2hnJBAccJchRpkDT0uskHOLnCh1xSA2dwgzs32tOKAv9DQvrFjS /EdHhrNesBPqRsFjxd/aLdW9+tHD5tSAKqWloFTrCm370RUYsUwR1SonP4C1S+BoTS 3KAkdlLfO2qxM19UTygHwuT3vRhCarALNgLFhr0lAmAXt9NzQSot+YvWzCUTDlGo0x 1K1bZ+H8GxkSQ== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek , linux-kernel@vger.kernel.org Subject: [PATCH v2 04/10] crypto: x86/aegis128 - don't bother with special code for aligned data Date: Wed, 16 Oct 2024 17:00:45 -0700 Message-ID: <20241017000051.228294-5-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org> References: <20241017000051.228294-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Remove the AEGIS assembly code paths that were "optimized" to operate on 16-byte aligned data using movdqa, and instead just use the code paths that use movdqu and can handle data with any alignment. This does not reduce performance. movdqa is basically a historical artifact; on aligned data, movdqu and movdqa have had the same performance since Intel Nehalem (2008) and AMD Bulldozer (2011). And code that requires AES-NI cannot run on CPUs older than those anyway. Reviewed-by: Ondrej Mosnacek Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 122 +++++---------------------- 1 file changed, 22 insertions(+), 100 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis12= 8-aesni-asm.S index 1b57558548c78..5541aca2fd0dd 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -243,56 +243,12 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 =20 - mov SRC, %r8 - and $0xF, %r8 - jnz .Lad_u_loop - -.align 8 -.Lad_a_loop: - movdqa 0x00(SRC), MSG - aegis128_update - pxor MSG, STATE4 - sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_1 - - movdqa 0x10(SRC), MSG - aegis128_update - pxor MSG, STATE3 - sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_2 - - movdqa 0x20(SRC), MSG - aegis128_update - pxor MSG, STATE2 - sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_3 - - movdqa 0x30(SRC), MSG - aegis128_update - pxor MSG, STATE1 - sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_4 - - movdqa 0x40(SRC), MSG - aegis128_update - pxor MSG, STATE0 - sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_0 - - add $0x50, SRC - jmp .Lad_a_loop - .align 8 -.Lad_u_loop: +.Lad_loop: movdqu 0x00(SRC), MSG aegis128_update pxor MSG, STATE4 sub $0x10, LEN cmp $0x10, LEN @@ -325,11 +281,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) sub $0x10, LEN cmp $0x10, LEN jl .Lad_out_0 =20 add $0x50, SRC - jmp .Lad_u_loop + jmp .Lad_loop =20 /* store the state: */ .Lad_out_0: movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) @@ -378,19 +334,19 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) .Lad_out: FRAME_END RET SYM_FUNC_END(crypto_aegis128_aesni_ad) =20 -.macro encrypt_block a s0 s1 s2 s3 s4 i - movdq\a (\i * 0x10)(SRC), MSG +.macro encrypt_block s0 s1 s2 s3 s4 i + movdqu (\i * 0x10)(SRC), MSG movdqa MSG, T0 pxor \s1, T0 pxor \s4, T0 movdqa \s2, T1 pand \s3, T1 pxor T1, T0 - movdq\a T0, (\i * 0x10)(DST) + movdqu T0, (\i * 0x10)(DST) =20 aegis128_update pxor MSG, \s4 =20 sub $0x10, LEN @@ -413,38 +369,21 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 =20 - mov SRC, %r8 - or DST, %r8 - and $0xF, %r8 - jnz .Lenc_u_loop - .align 8 -.Lenc_a_loop: - encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 - encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 - encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 - encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 - encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 +.Lenc_loop: + encrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0 + encrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1 + encrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2 + encrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3 + encrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4 =20 add $0x50, SRC add $0x50, DST - jmp .Lenc_a_loop - -.align 8 -.Lenc_u_loop: - encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 - encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 - encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 - encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 - encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 - - add $0x50, SRC - add $0x50, DST - jmp .Lenc_u_loop + jmp .Lenc_loop =20 /* store the state: */ .Lenc_out_0: movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) @@ -533,18 +472,18 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) =20 FRAME_END RET SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) =20 -.macro decrypt_block a s0 s1 s2 s3 s4 i - movdq\a (\i * 0x10)(SRC), MSG +.macro decrypt_block s0 s1 s2 s3 s4 i + movdqu (\i * 0x10)(SRC), MSG pxor \s1, MSG pxor \s4, MSG movdqa \s2, T1 pand \s3, T1 pxor T1, MSG - movdq\a MSG, (\i * 0x10)(DST) + movdqu MSG, (\i * 0x10)(DST) =20 aegis128_update pxor MSG, \s4 =20 sub $0x10, LEN @@ -567,38 +506,21 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 =20 - mov SRC, %r8 - or DST, %r8 - and $0xF, %r8 - jnz .Ldec_u_loop - -.align 8 -.Ldec_a_loop: - decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 - decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 - decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 - decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 - decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 - - add $0x50, SRC - add $0x50, DST - jmp .Ldec_a_loop - .align 8 -.Ldec_u_loop: - decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 - decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 - decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 - decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 - decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 +.Ldec_loop: + decrypt_block STATE0 STATE1 STATE2 STATE3 STATE4 0 + decrypt_block STATE4 STATE0 STATE1 STATE2 STATE3 1 + decrypt_block STATE3 STATE4 STATE0 STATE1 STATE2 2 + decrypt_block STATE2 STATE3 STATE4 STATE0 STATE1 3 + decrypt_block STATE1 STATE2 STATE3 STATE4 STATE0 4 =20 add $0x50, SRC add $0x50, DST - jmp .Ldec_u_loop + jmp .Ldec_loop =20 /* store the state: */ .Ldec_out_0: movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) --=20 2.47.0 From nobody Tue Nov 26 15:19:26 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CADA0168BD; Thu, 17 Oct 2024 00:02:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123334; cv=none; b=gFDcp3D/XGj7cMkyEkkVhv9NFWlsi35VoNTlBHUlnMEn+Q7HnsJvC7yCAOrFSqK0Bdq4aH0lHSUqQPfflBJFMZqx06B39N+XDbdxuQEh81W9esBDMPP1FJUCiosKZDXDZSTX82RTVsNfqPNEDlz8YHKDm4FiVE7JMB3ZYneUghM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123334; c=relaxed/simple; bh=T8iFEXgTwDU+HSkJacYTZ/UcnjmIgvhhm4kIZh3HL/k=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=AOAutLgT4M/aSGzBjcQlR4hT9zSqKD92Kxf2GWHENMsGBrGnbFMYbKkgbZjGZgzwMI46sw+DZDattG9YU8LZ1u6jkG+GGA5DyFUEUeq31mEGaFq9HJVRdQZ/cC9thSTc0SUCUM1nvJz3GlJe5WoijjRJYgX5y9fPg34HmMay/58= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=NiUx8ipl; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="NiUx8ipl" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2E646C4CEDB; Thu, 17 Oct 2024 00:02:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729123334; bh=T8iFEXgTwDU+HSkJacYTZ/UcnjmIgvhhm4kIZh3HL/k=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=NiUx8ipl1LwhQOM8F+55jyMiXI1tt+bffGhdRG/TAPcq1VO5aJID26ERsVkEaBulQ BqxGepu4cJsVOJsY4wPS5K8KrNMhedScBHEYjRp+d6hhE9z4yg2/JzjRTRrk76j4xE EVsI8UgfhtLxow8olx59pwpAQg/ey0PuWkve7BwO/r6Dwsy9vh6OkQP215Eluaj0DJ DHqaPBVAcCLcsOK7ARZNofZXKyc1Eeu2sAumngDKX/8Lk8F1PT3wVuaMuO0YeBmsLx WWZCx2+KTYM4LGoHl3AuicPt+aJKnuVrP12S8mKUO5gdbHZfzxv9H+vKyoz02GTN5B gJGhmSaTZP56A== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek , linux-kernel@vger.kernel.org Subject: [PATCH v2 05/10] crypto: x86/aegis128 - optimize length block preparation using SSE4.1 Date: Wed, 16 Oct 2024 17:00:46 -0700 Message-ID: <20241017000051.228294-6-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org> References: <20241017000051.228294-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Start using SSE4.1 instructions in the AES-NI AEGIS code, with the first use case being preparing the length block in fewer instructions. In practice this does not reduce the set of CPUs on which the code can run, because all Intel and AMD CPUs with AES-NI also have SSE4.1. Upgrade the existing SSE2 feature check to SSE4.1, though it seems this check is not strictly necessary; the aesni-intel module has been getting away with using SSE4.1 despite checking for AES-NI only. Reviewed-by: Ondrej Mosnacek Signed-off-by: Eric Biggers --- arch/x86/crypto/Kconfig | 4 ++-- arch/x86/crypto/aegis128-aesni-asm.S | 6 ++---- arch/x86/crypto/aegis128-aesni-glue.c | 6 +++--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index 7b1bebed879df..3d2e38ba52403 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -361,20 +361,20 @@ config CRYPTO_CHACHA20_X86_64 - SSSE3 (Supplemental SSE3) - AVX2 (Advanced Vector Extensions 2) - AVX-512VL (Advanced Vector Extensions-512VL) =20 config CRYPTO_AEGIS128_AESNI_SSE2 - tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE2)" + tristate "AEAD ciphers: AEGIS-128 (AES-NI/SSE4.1)" depends on X86 && 64BIT select CRYPTO_AEAD select CRYPTO_SIMD help AEGIS-128 AEAD algorithm =20 Architecture: x86_64 using: - AES-NI (AES New Instructions) - - SSE2 (Streaming SIMD Extensions 2) + - SSE4.1 (Streaming SIMD Extensions 4.1) =20 config CRYPTO_NHPOLY1305_SSE2 tristate "Hash functions: NHPoly1305 (SSE2)" depends on X86 && 64BIT select CRYPTO_NHPOLY1305 diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis12= 8-aesni-asm.S index 5541aca2fd0dd..6ed4bc452c292 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * AES-NI + SSE2 implementation of AEGIS-128 + * AES-NI + SSE4.1 implementation of AEGIS-128 * * Copyright (c) 2017-2018 Ondrej Mosnacek * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. */ =20 @@ -636,13 +636,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_final) movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 =20 /* prepare length block: */ movd %edx, MSG - movd %ecx, T0 - pslldq $8, T0 - pxor T0, MSG + pinsrd $2, %ecx, MSG psllq $3, MSG /* multiply by 8 (to get bit count) */ =20 pxor STATE3, MSG =20 /* update state: */ diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis1= 28-aesni-glue.c index deb39cef0be1a..4dd2d981a514f 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* * The AEGIS-128 Authenticated-Encryption Algorithm - * Glue for AES-NI + SSE2 implementation + * Glue for AES-NI + SSE4.1 implementation * * Copyright (c) 2017-2018 Ondrej Mosnacek * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. */ =20 @@ -252,11 +252,11 @@ static struct aead_alg crypto_aegis128_aesni_alg =3D { =20 static struct simd_aead_alg *simd_alg; =20 static int __init crypto_aegis128_aesni_module_init(void) { - if (!boot_cpu_has(X86_FEATURE_XMM2) || + if (!boot_cpu_has(X86_FEATURE_XMM4_1) || !boot_cpu_has(X86_FEATURE_AES) || !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL)) return -ENODEV; =20 return simd_register_aeads_compat(&crypto_aegis128_aesni_alg, 1, @@ -271,8 +271,8 @@ static void __exit crypto_aegis128_aesni_module_exit(vo= id) module_init(crypto_aegis128_aesni_module_init); module_exit(crypto_aegis128_aesni_module_exit); =20 MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ondrej Mosnacek "); -MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation"= ); +MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE4.1 implementatio= n"); MODULE_ALIAS_CRYPTO("aegis128"); MODULE_ALIAS_CRYPTO("aegis128-aesni"); --=20 2.47.0 From nobody Tue Nov 26 15:19:26 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 002A8182D2; Thu, 17 Oct 2024 00:02:14 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123335; cv=none; b=G8hV4Ta3R89PVgtYIUhIYsAUy2q1/wJaPsNm7UQ8hl9SWpI8pFZqGhQfcd7YyNn7sUIib2Jc6nPUQczNA14M6rVtyC7eTanare2RE7k2wl175QKpmGHts81IbjL9vBbtUPAfXs9bb9E90HATUtQHQ7oyevoksj6HPCg7zkOEUic= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123335; c=relaxed/simple; bh=STEao61qdOBFlm96DR2zxYNVqZ0pbmgKZZdE/wkRW1A=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=lW4Rjjm4yAXzDW15fR8feSMAN5SlT1UOLMpl3IOneZTQtxnz3f461aIqkL1Ufc5NCafXhnBBx3CDUQtX99IeGrhyTYtGYosxcWt8ghGX7B+eOyVWbztZalW4DDcDRteWv5Exyh7E+2ElV5f6JFFMy/Qt+s6ggV2B/PCseRc7yzQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=EYJeYE1I; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="EYJeYE1I" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7037DC4CECF; Thu, 17 Oct 2024 00:02:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729123334; bh=STEao61qdOBFlm96DR2zxYNVqZ0pbmgKZZdE/wkRW1A=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=EYJeYE1ImDoxqPK9bSjsfZDYHns6DbqLZKSthg2Obiw6c54xrGL8xnkjXEsT8vIc1 WULIOavByGbSdp3SQ2LpYPKyTGvieXyqzc+cdZV0FzpJDTg7h9Dro1knwCETMQwNl1 IkLem2dOIGU3s9KoQsFSrOFnfOqiKiUXgPL8gHLk4gO6mXTXWzTJ3hM3OeGUzU96DE eb8vpCZhAtc+cOG7MmFtsH9PZJOQB8wiaw+R36S6LxSMBWUIfefG9WuUqoyUyLTP3T AoslyZV+lRpriROmRYE8M1f+j+GyAPshyrZBKwzQ5TzVqj0eZsbQL5FNYxuAaCFmut vPst+5QBmqvLg== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek , linux-kernel@vger.kernel.org Subject: [PATCH v2 06/10] crypto: x86/aegis128 - improve assembly function prototypes Date: Wed, 16 Oct 2024 17:00:47 -0700 Message-ID: <20241017000051.228294-7-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org> References: <20241017000051.228294-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Adjust the prototypes of the AEGIS assembly functions: - Use proper types instead of 'void *', when applicable. - Move the length parameter to after the buffers it describes rather than before, to match the usual convention. Also shorten its name to just len (which is the name used in the assembly code). - Declare register aliases at the beginning of each function rather than once per file. This was necessary because len was moved, but also it allows adding some aliases where raw registers were used before. - Put assoclen and cryptlen in the correct order when declaring the finalization function in the .c file. - Remove the unnecessary "crypto_" prefix. Reviewed-by: Ondrej Mosnacek Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 105 ++++++++++++++++---------- arch/x86/crypto/aegis128-aesni-glue.c | 92 +++++++++++----------- 2 files changed, 112 insertions(+), 85 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis12= 8-aesni-asm.S index 6ed4bc452c292..9dfdbe0b1fb83 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -17,15 +17,10 @@ #define KEY %xmm5 #define MSG %xmm5 #define T0 %xmm6 #define T1 %xmm7 =20 -#define STATEP %rdi -#define LEN %esi -#define SRC %rdx -#define DST %rcx - .section .rodata.cst16.aegis128_const, "aM", @progbits, 32 .align 16 .Laegis128_const_0: .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 @@ -70,10 +65,12 @@ * T0 * %r8 * %r9 */ SYM_FUNC_START_LOCAL(__load_partial) + .set LEN, %ecx + .set SRC, %rsi xor %r9d, %r9d pxor MSG, MSG =20 mov LEN, %r8d and $0x1, %r8 @@ -136,10 +133,12 @@ SYM_FUNC_END(__load_partial) * %r8 * %r9 * %r10 */ SYM_FUNC_START_LOCAL(__store_partial) + .set LEN, %ecx + .set DST, %rdx mov LEN, %r8d mov DST, %r9 =20 movq T0, %r10 =20 @@ -182,20 +181,25 @@ SYM_FUNC_START_LOCAL(__store_partial) .Lst_partial_1: RET SYM_FUNC_END(__store_partial) =20 /* - * void crypto_aegis128_aesni_init(void *state, const void *key, const voi= d *iv); + * void aegis128_aesni_init(struct aegis_state *state, + * const struct aegis_block *key, + * const u8 iv[AEGIS128_NONCE_SIZE]); */ -SYM_FUNC_START(crypto_aegis128_aesni_init) +SYM_FUNC_START(aegis128_aesni_init) + .set STATEP, %rdi + .set KEYP, %rsi + .set IVP, %rdx FRAME_BEGIN =20 /* load IV: */ - movdqu (%rdx), T1 + movdqu (IVP), T1 =20 /* load key: */ - movdqa (%rsi), KEY + movdqa (KEYP), KEY pxor KEY, T1 movdqa T1, STATE0 movdqa KEY, STATE3 movdqa KEY, STATE4 =20 @@ -224,17 +228,20 @@ SYM_FUNC_START(crypto_aegis128_aesni_init) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) =20 FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_init) +SYM_FUNC_END(aegis128_aesni_init) =20 /* - * void crypto_aegis128_aesni_ad(void *state, unsigned int length, - * const void *data); + * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data, + * unsigned int len); */ -SYM_FUNC_START(crypto_aegis128_aesni_ad) +SYM_FUNC_START(aegis128_aesni_ad) + .set STATEP, %rdi + .set SRC, %rsi + .set LEN, %edx FRAME_BEGIN =20 cmp $0x10, LEN jb .Lad_out =20 @@ -332,11 +339,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_ad) RET =20 .Lad_out: FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_ad) +SYM_FUNC_END(aegis128_aesni_ad) =20 .macro encrypt_block s0 s1 s2 s3 s4 i movdqu (\i * 0x10)(SRC), MSG movdqa MSG, T0 pxor \s1, T0 @@ -353,14 +360,18 @@ SYM_FUNC_END(crypto_aegis128_aesni_ad) cmp $0x10, LEN jl .Lenc_out_\i .endm =20 /* - * void crypto_aegis128_aesni_enc(void *state, unsigned int length, - * const void *src, void *dst); + * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *d= st, + * unsigned int len); */ -SYM_FUNC_START(crypto_aegis128_aesni_enc) +SYM_FUNC_START(aegis128_aesni_enc) + .set STATEP, %rdi + .set SRC, %rsi + .set DST, %rdx + .set LEN, %ecx FRAME_BEGIN =20 cmp $0x10, LEN jb .Lenc_out =20 @@ -430,17 +441,21 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc) RET =20 .Lenc_out: FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_enc) +SYM_FUNC_END(aegis128_aesni_enc) =20 /* - * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, - * const void *src, void *dst); + * void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src, + * u8 *dst, unsigned int len); */ -SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) +SYM_FUNC_START(aegis128_aesni_enc_tail) + .set STATEP, %rdi + .set SRC, %rsi + .set DST, %rdx + .set LEN, %ecx FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 @@ -470,11 +485,11 @@ SYM_FUNC_START(crypto_aegis128_aesni_enc_tail) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) =20 FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) +SYM_FUNC_END(aegis128_aesni_enc_tail) =20 .macro decrypt_block s0 s1 s2 s3 s4 i movdqu (\i * 0x10)(SRC), MSG pxor \s1, MSG pxor \s4, MSG @@ -490,14 +505,18 @@ SYM_FUNC_END(crypto_aegis128_aesni_enc_tail) cmp $0x10, LEN jl .Ldec_out_\i .endm =20 /* - * void crypto_aegis128_aesni_dec(void *state, unsigned int length, - * const void *src, void *dst); + * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *d= st, + * unsigned int len); */ -SYM_FUNC_START(crypto_aegis128_aesni_dec) +SYM_FUNC_START(aegis128_aesni_dec) + .set STATEP, %rdi + .set SRC, %rsi + .set DST, %rdx + .set LEN, %ecx FRAME_BEGIN =20 cmp $0x10, LEN jb .Ldec_out =20 @@ -567,17 +586,21 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec) RET =20 .Ldec_out: FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_dec) +SYM_FUNC_END(aegis128_aesni_dec) =20 /* - * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, - * const void *src, void *dst); + * void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src, + * u8 *dst, unsigned int len); */ -SYM_FUNC_START(crypto_aegis128_aesni_dec_tail) +SYM_FUNC_START(aegis128_aesni_dec_tail) + .set STATEP, %rdi + .set SRC, %rsi + .set DST, %rdx + .set LEN, %ecx FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 @@ -617,30 +640,34 @@ SYM_FUNC_START(crypto_aegis128_aesni_dec_tail) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) =20 FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_dec_tail) +SYM_FUNC_END(aegis128_aesni_dec_tail) =20 /* - * void crypto_aegis128_aesni_final(void *state, void *tag_xor, - * unsigned int assoclen, - * unsigned int cryptlen); + * void aegis128_aesni_final(struct aegis_state *state, + * struct aegis_block *tag_xor, + * unsigned int assoclen, unsigned int cryptlen); */ -SYM_FUNC_START(crypto_aegis128_aesni_final) +SYM_FUNC_START(aegis128_aesni_final) + .set STATEP, %rdi + .set TAG_XOR, %rsi + .set ASSOCLEN, %edx + .set CRYPTLEN, %ecx FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 =20 /* prepare length block: */ - movd %edx, MSG - pinsrd $2, %ecx, MSG + movd ASSOCLEN, MSG + pinsrd $2, CRYPTLEN, MSG psllq $3, MSG /* multiply by 8 (to get bit count) */ =20 pxor STATE3, MSG =20 /* update state: */ @@ -651,18 +678,18 @@ SYM_FUNC_START(crypto_aegis128_aesni_final) aegis128_update; pxor MSG, STATE0 aegis128_update; pxor MSG, STATE4 aegis128_update; pxor MSG, STATE3 =20 /* xor tag: */ - movdqu (%rsi), MSG + movdqu (TAG_XOR), MSG =20 pxor STATE0, MSG pxor STATE1, MSG pxor STATE2, MSG pxor STATE3, MSG pxor STATE4, MSG =20 - movdqu MSG, (%rsi) + movdqu MSG, (TAG_XOR) =20 FRAME_END RET -SYM_FUNC_END(crypto_aegis128_aesni_final) +SYM_FUNC_END(aegis128_aesni_final) diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis1= 28-aesni-glue.c index 4dd2d981a514f..9555958e4089d 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -21,31 +21,10 @@ #define AEGIS128_STATE_BLOCKS 5 #define AEGIS128_KEY_SIZE 16 #define AEGIS128_MIN_AUTH_SIZE 8 #define AEGIS128_MAX_AUTH_SIZE 16 =20 -asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *i= v); - -asmlinkage void crypto_aegis128_aesni_ad( - void *state, unsigned int length, const void *data); - -asmlinkage void crypto_aegis128_aesni_enc( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128_aesni_dec( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128_aesni_enc_tail( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128_aesni_dec_tail( - void *state, unsigned int length, const void *src, void *dst); - -asmlinkage void crypto_aegis128_aesni_final( - void *state, void *tag_xor, unsigned int cryptlen, - unsigned int assoclen); - struct aegis_block { u8 bytes[AEGIS128_BLOCK_SIZE] __aligned(AEGIS128_BLOCK_ALIGN); }; =20 struct aegis_state { @@ -54,10 +33,36 @@ struct aegis_state { =20 struct aegis_ctx { struct aegis_block key; }; =20 +asmlinkage void aegis128_aesni_init(struct aegis_state *state, + const struct aegis_block *key, + const u8 iv[AEGIS128_NONCE_SIZE]); + +asmlinkage void aegis128_aesni_ad(struct aegis_state *state, const u8 *dat= a, + unsigned int len); + +asmlinkage void aegis128_aesni_enc(struct aegis_state *state, const u8 *sr= c, + u8 *dst, unsigned int len); + +asmlinkage void aegis128_aesni_dec(struct aegis_state *state, const u8 *sr= c, + u8 *dst, unsigned int len); + +asmlinkage void aegis128_aesni_enc_tail(struct aegis_state *state, + const u8 *src, u8 *dst, + unsigned int len); + +asmlinkage void aegis128_aesni_dec_tail(struct aegis_state *state, + const u8 *src, u8 *dst, + unsigned int len); + +asmlinkage void aegis128_aesni_final(struct aegis_state *state, + struct aegis_block *tag_xor, + unsigned int assoclen, + unsigned int cryptlen); + static void crypto_aegis128_aesni_process_ad( struct aegis_state *state, struct scatterlist *sg_src, unsigned int assoclen) { struct scatter_walk walk; @@ -73,19 +78,18 @@ static void crypto_aegis128_aesni_process_ad( =20 if (pos + size >=3D AEGIS128_BLOCK_SIZE) { if (pos > 0) { unsigned int fill =3D AEGIS128_BLOCK_SIZE - pos; memcpy(buf.bytes + pos, src, fill); - crypto_aegis128_aesni_ad(state, - AEGIS128_BLOCK_SIZE, - buf.bytes); + aegis128_aesni_ad(state, buf.bytes, + AEGIS128_BLOCK_SIZE); pos =3D 0; left -=3D fill; src +=3D fill; } =20 - crypto_aegis128_aesni_ad(state, left, src); + aegis128_aesni_ad(state, src, left); =20 src +=3D left & ~(AEGIS128_BLOCK_SIZE - 1); left &=3D AEGIS128_BLOCK_SIZE - 1; } =20 @@ -98,45 +102,41 @@ static void crypto_aegis128_aesni_process_ad( scatterwalk_done(&walk, 0, assoclen); } =20 if (pos > 0) { memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos); - crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes); + aegis128_aesni_ad(state, buf.bytes, AEGIS128_BLOCK_SIZE); } } =20 static __always_inline void crypto_aegis128_aesni_process_crypt(struct aegis_state *state, struct skcipher_walk *walk, bool enc) { while (walk->nbytes >=3D AEGIS128_BLOCK_SIZE) { if (enc) - crypto_aegis128_aesni_enc( - state, - round_down(walk->nbytes, - AEGIS128_BLOCK_SIZE), - walk->src.virt.addr, - walk->dst.virt.addr); + aegis128_aesni_enc(state, walk->src.virt.addr, + walk->dst.virt.addr, + round_down(walk->nbytes, + AEGIS128_BLOCK_SIZE)); else - crypto_aegis128_aesni_dec( - state, - round_down(walk->nbytes, - AEGIS128_BLOCK_SIZE), - walk->src.virt.addr, - walk->dst.virt.addr); + aegis128_aesni_dec(state, walk->src.virt.addr, + walk->dst.virt.addr, + round_down(walk->nbytes, + AEGIS128_BLOCK_SIZE)); skcipher_walk_done(walk, walk->nbytes % AEGIS128_BLOCK_SIZE); } =20 if (walk->nbytes) { if (enc) - crypto_aegis128_aesni_enc_tail(state, walk->nbytes, - walk->src.virt.addr, - walk->dst.virt.addr); + aegis128_aesni_enc_tail(state, walk->src.virt.addr, + walk->dst.virt.addr, + walk->nbytes); else - crypto_aegis128_aesni_dec_tail(state, walk->nbytes, - walk->src.virt.addr, - walk->dst.virt.addr); + aegis128_aesni_dec_tail(state, walk->src.virt.addr, + walk->dst.virt.addr, + walk->nbytes); skcipher_walk_done(walk, 0); } } =20 static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aea= d) @@ -184,14 +184,14 @@ crypto_aegis128_aesni_crypt(struct aead_request *req, else skcipher_walk_aead_decrypt(&walk, req, true); =20 kernel_fpu_begin(); =20 - crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv); + aegis128_aesni_init(&state, &ctx->key, req->iv); crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); crypto_aegis128_aesni_process_crypt(&state, &walk, enc); - crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); + aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); =20 kernel_fpu_end(); } =20 static int crypto_aegis128_aesni_encrypt(struct aead_request *req) --=20 2.47.0 From nobody Tue Nov 26 15:19:26 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 09F3B18E3F; Thu, 17 Oct 2024 00:02:15 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123335; cv=none; b=kR96i9mEYl9YBGql91iaJOWwcV7n3RMxx6UR8Xpw00ChwmYQjzGWTg1KOmYAR47XUbqLs3dhFOghud/JxMdjGRckZOyq7jPmGr8px0GGoLeyzTL+QYW99iiV5keElHoYmZgKTpLwbS7erH2AB7KO8onyIuf6EmW5JMXRofv+umA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123335; c=relaxed/simple; bh=9a54tPMqOVZMyr0Ex0zfIp+tWVsw6jMjfj7xf1MVcY4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=HzXmBixp7ak6cjpmDp7Tn6tNUdf2wP+w50RUE8UuIypgbtfdm+HE+s/XP0ZLui0xW6DFD5KVPTKX9x7vUUM/cDOs8EGSXeqhZgo1PHXQN53rzu9U3u2ljGFEvA4Oge+2AGLVQFqJ9z2oJF2LvVUVvMt2CR3VCzmYviIHmTW7C0U= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=gbLl0XXf; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="gbLl0XXf" Received: by smtp.kernel.org (Postfix) with ESMTPSA id BF310C4AF0E; Thu, 17 Oct 2024 00:02:14 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729123334; bh=9a54tPMqOVZMyr0Ex0zfIp+tWVsw6jMjfj7xf1MVcY4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=gbLl0XXfROI2OlJXgtzuHNJzwwp+GnbkbgNq6MftbxlcT9Q9MJZ9KorHkQlsW6AKH RmJV6Sx1b0pE07vZZ+YF54vxfev7nAhoscXhvn4Cx/I0g4osBN3omN0+itOUuqxNdu ne0v+Mocp5hw1HqGubqVdvsGB0yXGKeB7FanplRIkSjfuIV23/7wnmFJaVxoLFwpGi 7lz/XiiFwa1lQh4LO6hxAlsWb3O5+rUnejc3LwxJdfSacDCNA+0b4WrbIfaEe5q8Oh Ap7Hp+/V9Uyy8htBf2bR1LrujHt5y9pj2gVBYBd2HcmhoiGkP03XcM9ZEU83ihM80N Sh57S4TzJ6BJg== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek , linux-kernel@vger.kernel.org Subject: [PATCH v2 07/10] crypto: x86/aegis128 - optimize partial block handling using SSE4.1 Date: Wed, 16 Oct 2024 17:00:48 -0700 Message-ID: <20241017000051.228294-8-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org> References: <20241017000051.228294-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Optimize the code that loads and stores partial blocks, taking advantage of SSE4.1. The code is adapted from that in aes-gcm-aesni-x86_64.S. Reviewed-by: Ondrej Mosnacek Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 236 +++++++++++---------------- 1 file changed, 95 insertions(+), 141 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis12= 8-aesni-asm.S index 9dfdbe0b1fb83..e650330ef6951 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -2,10 +2,11 @@ /* * AES-NI + SSE4.1 implementation of AEGIS-128 * * Copyright (c) 2017-2018 Ondrej Mosnacek * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * Copyright 2024 Google LLC */ =20 #include #include =20 @@ -26,15 +27,15 @@ .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 .Laegis128_const_1: .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd =20 -.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16 -.align 16 -.Laegis128_counter: - .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 - .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f +.section .rodata.cst32.zeropad_mask, "aM", @progbits, 32 +.align 32 +.Lzeropad_mask: + .octa 0xffffffffffffffffffffffffffffffff + .octa 0 =20 .text =20 /* * aegis128_update @@ -53,136 +54,90 @@ aesenc STATE3, STATE2 aesenc T0, STATE3 .endm =20 /* - * __load_partial: internal ABI - * input: - * LEN - bytes - * SRC - src - * output: - * MSG - message block - * changed: - * T0 - * %r8 - * %r9 + * Load 1 <=3D LEN (%ecx) <=3D 15 bytes from the pointer SRC into the xmm = register + * MSG and zeroize any remaining bytes. Clobbers %rax, %rcx, and %r8. */ -SYM_FUNC_START_LOCAL(__load_partial) - .set LEN, %ecx - .set SRC, %rsi - xor %r9d, %r9d - pxor MSG, MSG - - mov LEN, %r8d - and $0x1, %r8 - jz .Lld_partial_1 - - mov LEN, %r8d - and $0x1E, %r8 - add SRC, %r8 - mov (%r8), %r9b - -.Lld_partial_1: - mov LEN, %r8d - and $0x2, %r8 - jz .Lld_partial_2 - - mov LEN, %r8d - and $0x1C, %r8 - add SRC, %r8 - shl $0x10, %r9 - mov (%r8), %r9w - -.Lld_partial_2: - mov LEN, %r8d - and $0x4, %r8 - jz .Lld_partial_4 - - mov LEN, %r8d - and $0x18, %r8 - add SRC, %r8 - shl $32, %r9 - mov (%r8), %r8d - xor %r8, %r9 - -.Lld_partial_4: - movq %r9, MSG - - mov LEN, %r8d - and $0x8, %r8 - jz .Lld_partial_8 - - mov LEN, %r8d - and $0x10, %r8 - add SRC, %r8 - pslldq $8, MSG - movq (%r8), T0 - pxor T0, MSG - -.Lld_partial_8: - RET -SYM_FUNC_END(__load_partial) +.macro load_partial + sub $8, %ecx /* LEN - 8 */ + jle .Lle8\@ + + /* Load 9 <=3D LEN <=3D 15 bytes: */ + movq (SRC), MSG /* Load first 8 bytes */ + mov (SRC, %rcx), %rax /* Load last 8 bytes */ + neg %ecx + shl $3, %ecx + shr %cl, %rax /* Discard overlapping bytes */ + pinsrq $1, %rax, MSG + jmp .Ldone\@ + +.Lle8\@: + add $4, %ecx /* LEN - 4 */ + jl .Llt4\@ + + /* Load 4 <=3D LEN <=3D 8 bytes: */ + mov (SRC), %eax /* Load first 4 bytes */ + mov (SRC, %rcx), %r8d /* Load last 4 bytes */ + jmp .Lcombine\@ + +.Llt4\@: + /* Load 1 <=3D LEN <=3D 3 bytes: */ + add $2, %ecx /* LEN - 2 */ + movzbl (SRC), %eax /* Load first byte */ + jl .Lmovq\@ + movzwl (SRC, %rcx), %r8d /* Load last 2 bytes */ +.Lcombine\@: + shl $3, %ecx + shl %cl, %r8 + or %r8, %rax /* Combine the two parts */ +.Lmovq\@: + movq %rax, MSG +.Ldone\@: +.endm =20 /* - * __store_partial: internal ABI - * input: - * LEN - bytes - * DST - dst - * output: - * T0 - message block - * changed: - * %r8 - * %r9 - * %r10 + * Store 1 <=3D LEN (%ecx) <=3D 15 bytes from the xmm register \msg to the= pointer + * DST. Clobbers %rax, %rcx, and %r8. */ -SYM_FUNC_START_LOCAL(__store_partial) - .set LEN, %ecx - .set DST, %rdx - mov LEN, %r8d - mov DST, %r9 - - movq T0, %r10 - - cmp $8, %r8 - jl .Lst_partial_8 - - mov %r10, (%r9) - psrldq $8, T0 - movq T0, %r10 - - sub $8, %r8 - add $8, %r9 - -.Lst_partial_8: - cmp $4, %r8 - jl .Lst_partial_4 - - mov %r10d, (%r9) - shr $32, %r10 - - sub $4, %r8 - add $4, %r9 - -.Lst_partial_4: - cmp $2, %r8 - jl .Lst_partial_2 - - mov %r10w, (%r9) - shr $0x10, %r10 - - sub $2, %r8 - add $2, %r9 - -.Lst_partial_2: - cmp $1, %r8 - jl .Lst_partial_1 - - mov %r10b, (%r9) - -.Lst_partial_1: - RET -SYM_FUNC_END(__store_partial) +.macro store_partial msg + sub $8, %ecx /* LEN - 8 */ + jl .Llt8\@ + + /* Store 8 <=3D LEN <=3D 15 bytes: */ + pextrq $1, \msg, %rax + mov %ecx, %r8d + shl $3, %ecx + ror %cl, %rax + mov %rax, (DST, %r8) /* Store last LEN - 8 bytes */ + movq \msg, (DST) /* Store first 8 bytes */ + jmp .Ldone\@ + +.Llt8\@: + add $4, %ecx /* LEN - 4 */ + jl .Llt4\@ + + /* Store 4 <=3D LEN <=3D 7 bytes: */ + pextrd $1, \msg, %eax + mov %ecx, %r8d + shl $3, %ecx + ror %cl, %eax + mov %eax, (DST, %r8) /* Store last LEN - 4 bytes */ + movd \msg, (DST) /* Store first 4 bytes */ + jmp .Ldone\@ + +.Llt4\@: + /* Store 1 <=3D LEN <=3D 3 bytes: */ + pextrb $0, \msg, 0(DST) + cmp $-2, %ecx /* LEN - 4 =3D=3D -2, i.e. LEN =3D=3D 2? */ + jl .Ldone\@ + pextrb $1, \msg, 1(DST) + je .Ldone\@ + pextrb $2, \msg, 2(DST) +.Ldone\@: +.endm =20 /* * void aegis128_aesni_init(struct aegis_state *state, * const struct aegis_block *key, * const u8 iv[AEGIS128_NONCE_SIZE]); @@ -451,31 +406,33 @@ SYM_FUNC_END(aegis128_aesni_enc) */ SYM_FUNC_START(aegis128_aesni_enc_tail) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx - .set LEN, %ecx + .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 =20 /* encrypt message: */ - call __load_partial + mov LEN, %r9d + load_partial =20 movdqa MSG, T0 pxor STATE1, T0 pxor STATE4, T0 movdqa STATE2, T1 pand STATE3, T1 pxor T1, T0 =20 - call __store_partial + mov %r9d, LEN + store_partial T0 =20 aegis128_update pxor MSG, STATE4 =20 /* store the state: */ @@ -596,40 +553,37 @@ SYM_FUNC_END(aegis128_aesni_dec) */ SYM_FUNC_START(aegis128_aesni_dec_tail) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx - .set LEN, %ecx + .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 movdqu 0x40(STATEP), STATE4 =20 /* decrypt message: */ - call __load_partial + mov LEN, %r9d + load_partial =20 pxor STATE1, MSG pxor STATE4, MSG movdqa STATE2, T1 pand STATE3, T1 pxor T1, MSG =20 - movdqa MSG, T0 - call __store_partial + mov %r9d, LEN + store_partial MSG =20 /* mask with byte count: */ - movd LEN, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - punpcklbw T0, T0 - movdqa .Laegis128_counter(%rip), T1 - pcmpgtb T1, T0 + lea .Lzeropad_mask+16(%rip), %rax + sub %r9, %rax + movdqu (%rax), T0 pand T0, MSG =20 aegis128_update pxor MSG, STATE4 =20 --=20 2.47.0 From nobody Tue Nov 26 15:19:26 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 57B0A1E535; Thu, 17 Oct 2024 00:02:15 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123335; cv=none; b=WxMbQeq6iLTZ1Gxfru2dYQfEK2B6EJ8wc3sBE3h60Xb5bRJonG8UYNktMpSYIjQ5DqTBhscgJzlgUNzQPL93dpHRnqUdKwDFcDHN9EYO4RQM1qxINW+/TgDMGWWGIjlieM2OEipk1neuQCePiBa+YQVrUFp9QeDoacTDpzQTpdQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123335; c=relaxed/simple; bh=mnIFXtbSEJHKpgclCqmzD+cNjbdqMN9ncYjpYF69/Vs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=oOfER1Vrd8F7SLg8ZHE/yE4PZtC4aj595Q8QXsqN7BR4bA8QipktwHL43Cm7rX15+If18V8C6m5TiOFNvHrArqYXDPey/t/yIP82swB17Xcypn3rbR5n1kuhMoFog6wuJwKsY6McUAmA4cuztP0O/G3LJh6np5iwENLgBXiwef8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=P1KppPXB; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="P1KppPXB" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 10A71C4CEDA; Thu, 17 Oct 2024 00:02:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729123335; bh=mnIFXtbSEJHKpgclCqmzD+cNjbdqMN9ncYjpYF69/Vs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=P1KppPXBGvcgYWV/2FnWbCakG+wL1YmhoAmDdSbF9yDWTQslTHanecuPQQAQQ9N/U t04jg8oKam8H1Q9FsQPPKwBBhObkgyMA0VDNr3SvfWenz9fSACjTkjaXyahSEvZ9dE g52Fw+sEmwtCkgSpFLpV9RG6PdxHeOwaS1xEvfEB6D5Lop/FyB6X1eHXtdRCqfwpth PJq/PlxW80ZowuWWiUkW4blAADSveb0p6gRAI45r4wr3nxeFzE0tasnKBWBsc0WvVj 3i/JPU2lHjv43VDAIg3seE9/W3BW85J51XseW5RWqeBODLNCKfkIHhqbNO0cw0WWMZ wi0XkVHzXyvQA== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek , linux-kernel@vger.kernel.org Subject: [PATCH v2 08/10] crypto: x86/aegis128 - take advantage of block-aligned len Date: Wed, 16 Oct 2024 17:00:49 -0700 Message-ID: <20241017000051.228294-9-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org> References: <20241017000051.228294-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Update a caller of aegis128_aesni_ad() to round down the length to a block boundary. After that, aegis128_aesni_ad(), aegis128_aesni_enc(), and aegis128_aesni_dec() are only passed whole blocks. Update the assembly code to take advantage of that, which eliminates some unneeded instructions. For aegis128_aesni_enc() and aegis128_aesni_dec(), the length is also always nonzero, so stop checking for zero length. Reviewed-by: Ondrej Mosnacek Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 37 +++++++++++---------------- arch/x86/crypto/aegis128-aesni-glue.c | 4 +-- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis12= 8-aesni-asm.S index e650330ef6951..345b1eafe45af 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -188,19 +188,21 @@ SYM_FUNC_START(aegis128_aesni_init) SYM_FUNC_END(aegis128_aesni_init) =20 /* * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data, * unsigned int len); + * + * len must be a multiple of 16. */ SYM_FUNC_START(aegis128_aesni_ad) .set STATEP, %rdi .set SRC, %rsi .set LEN, %edx FRAME_BEGIN =20 - cmp $0x10, LEN - jb .Lad_out + test LEN, LEN + jz .Lad_out =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -211,40 +213,35 @@ SYM_FUNC_START(aegis128_aesni_ad) .Lad_loop: movdqu 0x00(SRC), MSG aegis128_update pxor MSG, STATE4 sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_1 + jz .Lad_out_1 =20 movdqu 0x10(SRC), MSG aegis128_update pxor MSG, STATE3 sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_2 + jz .Lad_out_2 =20 movdqu 0x20(SRC), MSG aegis128_update pxor MSG, STATE2 sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_3 + jz .Lad_out_3 =20 movdqu 0x30(SRC), MSG aegis128_update pxor MSG, STATE1 sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_4 + jz .Lad_out_4 =20 movdqu 0x40(SRC), MSG aegis128_update pxor MSG, STATE0 sub $0x10, LEN - cmp $0x10, LEN - jl .Lad_out_0 + jz .Lad_out_0 =20 add $0x50, SRC jmp .Lad_loop =20 /* store the state: */ @@ -310,28 +307,26 @@ SYM_FUNC_END(aegis128_aesni_ad) =20 aegis128_update pxor MSG, \s4 =20 sub $0x10, LEN - cmp $0x10, LEN - jl .Lenc_out_\i + jz .Lenc_out_\i .endm =20 /* * void aegis128_aesni_enc(struct aegis_state *state, const u8 *src, u8 *d= st, * unsigned int len); + * + * len must be nonzero and a multiple of 16. */ SYM_FUNC_START(aegis128_aesni_enc) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx FRAME_BEGIN =20 - cmp $0x10, LEN - jb .Lenc_out - /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 @@ -457,28 +452,26 @@ SYM_FUNC_END(aegis128_aesni_enc_tail) =20 aegis128_update pxor MSG, \s4 =20 sub $0x10, LEN - cmp $0x10, LEN - jl .Ldec_out_\i + jz .Ldec_out_\i .endm =20 /* * void aegis128_aesni_dec(struct aegis_state *state, const u8 *src, u8 *d= st, * unsigned int len); + * + * len must be nonzero and a multiple of 16. */ SYM_FUNC_START(aegis128_aesni_dec) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx FRAME_BEGIN =20 - cmp $0x10, LEN - jb .Ldec_out - /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 movdqu 0x30(STATEP), STATE3 diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis1= 28-aesni-glue.c index 9555958e4089d..c19d8e3d96a35 100644 --- a/arch/x86/crypto/aegis128-aesni-glue.c +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -85,12 +85,12 @@ static void crypto_aegis128_aesni_process_ad( pos =3D 0; left -=3D fill; src +=3D fill; } =20 - aegis128_aesni_ad(state, src, left); - + aegis128_aesni_ad(state, src, + left & ~(AEGIS128_BLOCK_SIZE - 1)); src +=3D left & ~(AEGIS128_BLOCK_SIZE - 1); left &=3D AEGIS128_BLOCK_SIZE - 1; } =20 memcpy(buf.bytes + pos, src, left); --=20 2.47.0 From nobody Tue Nov 26 15:19:26 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 90CEA2209D; Thu, 17 Oct 2024 00:02:15 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123335; cv=none; b=tn6FobFA1iQ85EzfNC205V1T+plx+CYWx6gutel/oGlRshRFZfxiGlhqChYW5YiRXyTddEZ0aSvXcc3TOb9nORHfcIMVOm5fe27B85J1fpQ38WyTWvReQiUqY5WoGYJKnqauBAUDNqQKi14seJo4CwRztQ/BzEg55MA8e7vgwl0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123335; c=relaxed/simple; bh=h2nr4+ISSvbvy89jbUAMYsXqu+yopR2HzEiAJ4mZcys=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=WC/nfoRZlvyuZXluDcaiEGjuJQJGwHczavTr3Ct3badjxxkBPNo3QowYOL8oz2DhMZITbMNyyJnLLKxbV3yllaOu6rYYdd/UfPIH0AhXrDY0jVk2GK9HpdjfW0wjf/J6OpHOzPG3NnucBnEsKCGHrlF8yplqtTaCDj7r+r3fj3g= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=EBwu2Oz8; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="EBwu2Oz8" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 52449C4CECF; Thu, 17 Oct 2024 00:02:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729123335; bh=h2nr4+ISSvbvy89jbUAMYsXqu+yopR2HzEiAJ4mZcys=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=EBwu2Oz8n/4FfbYFrh4gNLOuf+nQPECwO2bUZOAUdMRn0wvbJva564Y6lmNi3V0Re aQg2ob8GuKqTmi7VuFDzEfxfNulfcDtk+pvpKiViUtk73YgTzHbV0i3Io+LOuu1xL9 PKxUn+LzVEEaf+UDd8ptGArdqsjnTCmQBHVOtMMAoHVJEhtaLCRZJOYZ2wNkld1hve JGFlTlixwLeAHIgYGqRkzDSA9wsQZfLmFEcwGyiYo6CeEhJ9CqPRdGLDm3p1Kr9IDv RZA4j9IhqsVaVDREu+SI+mkpNyrelDWnTuF1DnH3l57TVrAgOriB4A3mYbfr0MQUMq ONCS95JkUPdfA== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek , linux-kernel@vger.kernel.org Subject: [PATCH v2 09/10] crypto: x86/aegis128 - remove unneeded FRAME_BEGIN and FRAME_END Date: Wed, 16 Oct 2024 17:00:50 -0700 Message-ID: <20241017000051.228294-10-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org> References: <20241017000051.228294-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Stop using FRAME_BEGIN and FRAME_END in the AEGIS assembly functions, since all these functions are now leaf functions. This eliminates some unnecessary instructions. Reviewed-by: Ondrej Mosnacek Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 34 ---------------------------- 1 file changed, 34 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis12= 8-aesni-asm.S index 345b1eafe45af..42f25fea4e082 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -6,11 +6,10 @@ * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. * Copyright 2024 Google LLC */ =20 #include -#include =20 #define STATE0 %xmm0 #define STATE1 %xmm1 #define STATE2 %xmm2 #define STATE3 %xmm3 @@ -144,11 +143,10 @@ */ SYM_FUNC_START(aegis128_aesni_init) .set STATEP, %rdi .set KEYP, %rsi .set IVP, %rdx - FRAME_BEGIN =20 /* load IV: */ movdqu (IVP), T1 =20 /* load key: */ @@ -180,12 +178,10 @@ SYM_FUNC_START(aegis128_aesni_init) movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - - FRAME_END RET SYM_FUNC_END(aegis128_aesni_init) =20 /* * void aegis128_aesni_ad(struct aegis_state *state, const u8 *data, @@ -195,11 +191,10 @@ SYM_FUNC_END(aegis128_aesni_init) */ SYM_FUNC_START(aegis128_aesni_ad) .set STATEP, %rdi .set SRC, %rsi .set LEN, %edx - FRAME_BEGIN =20 test LEN, LEN jz .Lad_out =20 /* load the state: */ @@ -249,51 +244,45 @@ SYM_FUNC_START(aegis128_aesni_ad) movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - FRAME_END RET =20 .Lad_out_1: movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) movdqu STATE1, 0x20(STATEP) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) - FRAME_END RET =20 .Lad_out_2: movdqu STATE3, 0x00(STATEP) movdqu STATE4, 0x10(STATEP) movdqu STATE0, 0x20(STATEP) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) - FRAME_END RET =20 .Lad_out_3: movdqu STATE2, 0x00(STATEP) movdqu STATE3, 0x10(STATEP) movdqu STATE4, 0x20(STATEP) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) - FRAME_END RET =20 .Lad_out_4: movdqu STATE1, 0x00(STATEP) movdqu STATE2, 0x10(STATEP) movdqu STATE3, 0x20(STATEP) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) - FRAME_END RET =20 .Lad_out: - FRAME_END RET SYM_FUNC_END(aegis128_aesni_ad) =20 .macro encrypt_block s0 s1 s2 s3 s4 i movdqu (\i * 0x10)(SRC), MSG @@ -321,11 +310,10 @@ SYM_FUNC_END(aegis128_aesni_ad) SYM_FUNC_START(aegis128_aesni_enc) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx - FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -349,51 +337,45 @@ SYM_FUNC_START(aegis128_aesni_enc) movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) movdqu STATE1, 0x20(STATEP) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) - FRAME_END RET =20 .Lenc_out_1: movdqu STATE3, 0x00(STATEP) movdqu STATE4, 0x10(STATEP) movdqu STATE0, 0x20(STATEP) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) - FRAME_END RET =20 .Lenc_out_2: movdqu STATE2, 0x00(STATEP) movdqu STATE3, 0x10(STATEP) movdqu STATE4, 0x20(STATEP) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) - FRAME_END RET =20 .Lenc_out_3: movdqu STATE1, 0x00(STATEP) movdqu STATE2, 0x10(STATEP) movdqu STATE3, 0x20(STATEP) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) - FRAME_END RET =20 .Lenc_out_4: movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - FRAME_END RET =20 .Lenc_out: - FRAME_END RET SYM_FUNC_END(aegis128_aesni_enc) =20 /* * void aegis128_aesni_enc_tail(struct aegis_state *state, const u8 *src, @@ -402,11 +384,10 @@ SYM_FUNC_END(aegis128_aesni_enc) SYM_FUNC_START(aegis128_aesni_enc_tail) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ - FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -434,12 +415,10 @@ SYM_FUNC_START(aegis128_aesni_enc_tail) movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) movdqu STATE1, 0x20(STATEP) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) - - FRAME_END RET SYM_FUNC_END(aegis128_aesni_enc_tail) =20 .macro decrypt_block s0 s1 s2 s3 s4 i movdqu (\i * 0x10)(SRC), MSG @@ -466,11 +445,10 @@ SYM_FUNC_END(aegis128_aesni_enc_tail) SYM_FUNC_START(aegis128_aesni_dec) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx - FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -494,51 +472,45 @@ SYM_FUNC_START(aegis128_aesni_dec) movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) movdqu STATE1, 0x20(STATEP) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) - FRAME_END RET =20 .Ldec_out_1: movdqu STATE3, 0x00(STATEP) movdqu STATE4, 0x10(STATEP) movdqu STATE0, 0x20(STATEP) movdqu STATE1, 0x30(STATEP) movdqu STATE2, 0x40(STATEP) - FRAME_END RET =20 .Ldec_out_2: movdqu STATE2, 0x00(STATEP) movdqu STATE3, 0x10(STATEP) movdqu STATE4, 0x20(STATEP) movdqu STATE0, 0x30(STATEP) movdqu STATE1, 0x40(STATEP) - FRAME_END RET =20 .Ldec_out_3: movdqu STATE1, 0x00(STATEP) movdqu STATE2, 0x10(STATEP) movdqu STATE3, 0x20(STATEP) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) - FRAME_END RET =20 .Ldec_out_4: movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - FRAME_END RET =20 .Ldec_out: - FRAME_END RET SYM_FUNC_END(aegis128_aesni_dec) =20 /* * void aegis128_aesni_dec_tail(struct aegis_state *state, const u8 *src, @@ -547,11 +519,10 @@ SYM_FUNC_END(aegis128_aesni_dec) SYM_FUNC_START(aegis128_aesni_dec_tail) .set STATEP, %rdi .set SRC, %rsi .set DST, %rdx .set LEN, %ecx /* {load,store}_partial rely on this being %ecx */ - FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -584,12 +555,10 @@ SYM_FUNC_START(aegis128_aesni_dec_tail) movdqu STATE4, 0x00(STATEP) movdqu STATE0, 0x10(STATEP) movdqu STATE1, 0x20(STATEP) movdqu STATE2, 0x30(STATEP) movdqu STATE3, 0x40(STATEP) - - FRAME_END RET SYM_FUNC_END(aegis128_aesni_dec_tail) =20 /* * void aegis128_aesni_final(struct aegis_state *state, @@ -599,11 +568,10 @@ SYM_FUNC_END(aegis128_aesni_dec_tail) SYM_FUNC_START(aegis128_aesni_final) .set STATEP, %rdi .set TAG_XOR, %rsi .set ASSOCLEN, %edx .set CRYPTLEN, %ecx - FRAME_BEGIN =20 /* load the state: */ movdqu 0x00(STATEP), STATE0 movdqu 0x10(STATEP), STATE1 movdqu 0x20(STATEP), STATE2 @@ -634,9 +602,7 @@ SYM_FUNC_START(aegis128_aesni_final) pxor STATE2, MSG pxor STATE3, MSG pxor STATE4, MSG =20 movdqu MSG, (TAG_XOR) - - FRAME_END RET SYM_FUNC_END(aegis128_aesni_final) --=20 2.47.0 From nobody Tue Nov 26 15:19:26 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 46B572B9BC; Thu, 17 Oct 2024 00:02:15 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123336; cv=none; b=IlCWrvSrGOIyipfs6aYq4Z4f65MZpUVI7xSKxyZ4m6+eK2Z6NGlZOCCrew8johMNyLDoPP307mU6Yx0sTB7FoCgMN+8TXEWQlk6sdEMngRMmEtT/stEteLtjGZdhD9+So+7zn6A1qQUswqCU09bQwNSM95u2GBsPIDIb7tYHZKk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729123336; c=relaxed/simple; bh=hYzZxMzvH6bhj9Qc158JhHNrZoSgHCIZdmOezInyl9c=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=kVB49+4NKGwQb8lJ63DKNKyvgCKh6kA41KeOzM3FeutEJ+S2W8jSfMyXca1SrHfnPTyKOHPcBbJPVgmImUJ7H01AgrrE+JtO1A08tYpmn/7iRx2TJIbZpd5jKvFgxwGb4ukwASlzt1bFjOb0eMiGysX2NG2RvsYFAN/lKbDIWPY= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=nRt7VT4y; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="nRt7VT4y" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 956BBC4CEDA; Thu, 17 Oct 2024 00:02:15 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729123335; bh=hYzZxMzvH6bhj9Qc158JhHNrZoSgHCIZdmOezInyl9c=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=nRt7VT4yqdUDci1D4NMkpyRyDO/GlRMb9tHP63k7yvAOd0x0lBd/jbcoHt+OqG8VY UfoiYKdcseqn2Vc8o35sTKVZTBlvvtDX24a+98NJqP9pu8I4YjwI+pXXuif2627LFz wXHPASgDbLS5wIWioEJi0g0uuE6pF6y0bZVFihOrOYQewSt73amfwG4IW18VupyFrO D5lCm9WLWJlKn/GnM9CHpxOLHu5sH6nOydHBI/CNAmIsNDI4mIPIoZ3pIM/DDmR7tb iTsW85DnTj8E0JxcJQyoOvjkUNDdQyZSeCWngioIzXGvxIKEfxiDKgPD1qGAHtBRoC zabntDlaCf/5g== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: x86@kernel.org, Ondrej Mosnacek , linux-kernel@vger.kernel.org Subject: [PATCH v2 10/10] crypto: x86/aegis128 - remove unneeded RETs Date: Wed, 16 Oct 2024 17:00:51 -0700 Message-ID: <20241017000051.228294-11-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241017000051.228294-1-ebiggers@kernel.org> References: <20241017000051.228294-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Remove returns that are immediately followed by another return. Reviewed-by: Ondrej Mosnacek Signed-off-by: Eric Biggers --- arch/x86/crypto/aegis128-aesni-asm.S | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis12= 8-aesni-asm.S index 42f25fea4e082..7294dc0ee7baa 100644 --- a/arch/x86/crypto/aegis128-aesni-asm.S +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -276,12 +276,10 @@ SYM_FUNC_START(aegis128_aesni_ad) movdqu STATE1, 0x00(STATEP) movdqu STATE2, 0x10(STATEP) movdqu STATE3, 0x20(STATEP) movdqu STATE4, 0x30(STATEP) movdqu STATE0, 0x40(STATEP) - RET - .Lad_out: RET SYM_FUNC_END(aegis128_aesni_ad) =20 .macro encrypt_block s0 s1 s2 s3 s4 i @@ -369,12 +367,10 @@ SYM_FUNC_START(aegis128_aesni_enc) movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - RET - .Lenc_out: RET SYM_FUNC_END(aegis128_aesni_enc) =20 /* @@ -504,12 +500,10 @@ SYM_FUNC_START(aegis128_aesni_dec) movdqu STATE0, 0x00(STATEP) movdqu STATE1, 0x10(STATEP) movdqu STATE2, 0x20(STATEP) movdqu STATE3, 0x30(STATEP) movdqu STATE4, 0x40(STATEP) - RET - .Ldec_out: RET SYM_FUNC_END(aegis128_aesni_dec) =20 /* --=20 2.47.0