From nobody Fri Oct 3 14:34:23 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A702C322DC4; Fri, 29 Aug 2025 15:26:26 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756481186; cv=none; b=Es9bXJTL8lYVvi253jUbPm1a0ZLnesXtRqZrRjwgVQf9lhn5ryq8CGi1uUxMUY3uhVJVstuICta8Ly4lmr40eMyFedZkdYJsxj+N1S/a3fdQHdKVGPUYfvay8NK6LI91WniI8Vw87i5CbneNf0/sIW0U948+b6HbcDQ47OmNN/w= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756481186; c=relaxed/simple; bh=6I+Wb/aEB8jPWbBTlpEsierId3E1MIMBuEPA7Zmdpeo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=uNvmoI2u4OBrIo2JXW0q+Ag5ZIPBYRWFuPKgPRhluXxzIJCWvzeTSoYNivE3Eg1sz2vd2vu/a7VvivN69tpHMjbQdOqb5S6fNB7QvIxpsrrRPUfJLodfdk7MIDrULFviDlUDou+9+J2jZP+W4WNmbsKNpic6ghqZQm86Qxd8dN4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=LxhDN1X1; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="LxhDN1X1" Received: by smtp.kernel.org (Postfix) with ESMTPSA id F3A1FC4CEF5; Fri, 29 Aug 2025 15:26:25 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1756481186; bh=6I+Wb/aEB8jPWbBTlpEsierId3E1MIMBuEPA7Zmdpeo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=LxhDN1X1MDSozZ8VKfnUvne6zetiil28tw1KT7dak9g52EJYAWdSpOfQx483QkVk4 jpe2f0jSpbNzvLSlD3BZ57RtVg5PH+mBRMsIxLwgKSBpc301s2ImW9VcDs0tCJOqhz 0lbaKxEVOYdXweVCU1zGq4NrbFUzVqfrS3ex2a8hwm81m5Oly4jeTBk3wZbJ9a5t12 DcYJQlmSBnADKLQ1BtrgOO+MCWtS1cAmjc5Twr+YH+87rd4v5ffKHbmw+ouDhi3OTd w12fZrCXLiGn/90Khknd2zk4TsGV54wnOMfqV2t6SVWjcTzz5hIjs9KHzR50gOWy9D JN8/ixfXucB8w== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org, linux-riscv@lists.infradead.org, Ard Biesheuvel , "Jason A . Donenfeld" , Zhihang Shao , Andy Polyakov , Eric Biggers Subject: [PATCH v3 1/3] lib/crypto: poly1305: Remove unused function poly1305_is_arch_optimized() Date: Fri, 29 Aug 2025 08:25:11 -0700 Message-ID: <20250829152513.92459-2-ebiggers@kernel.org> X-Mailer: git-send-email 2.50.1 In-Reply-To: <20250829152513.92459-1-ebiggers@kernel.org> References: <20250829152513.92459-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" poly1305_is_arch_optimized() is unused, so remove it. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- include/crypto/poly1305.h | 9 --------- lib/crypto/arm/poly1305-glue.c | 7 ------- lib/crypto/arm64/poly1305-glue.c | 7 ------- lib/crypto/mips/poly1305-glue.c | 6 ------ lib/crypto/powerpc/poly1305-p10-glue.c | 6 ------ lib/crypto/x86/poly1305_glue.c | 6 ------ 6 files changed, 41 deletions(-) diff --git a/include/crypto/poly1305.h b/include/crypto/poly1305.h index e54abda8cfe95..d4daeec8da19d 100644 --- a/include/crypto/poly1305.h +++ b/include/crypto/poly1305.h @@ -62,15 +62,6 @@ void poly1305_init(struct poly1305_desc_ctx *desc, const u8 key[POLY1305_KEY_SIZE]); void poly1305_update(struct poly1305_desc_ctx *desc, const u8 *src, unsigned int nbytes); void poly1305_final(struct poly1305_desc_ctx *desc, u8 *digest); =20 -#if IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305) -bool poly1305_is_arch_optimized(void); -#else -static inline bool poly1305_is_arch_optimized(void) -{ - return false; -} -#endif - #endif diff --git a/lib/crypto/arm/poly1305-glue.c b/lib/crypto/arm/poly1305-glue.c index 2d86c78af8837..9e513e319e37c 100644 --- a/lib/crypto/arm/poly1305-glue.c +++ b/lib/crypto/arm/poly1305-glue.c @@ -49,17 +49,10 @@ void poly1305_blocks_arch(struct poly1305_block_state *= state, const u8 *src, } else poly1305_blocks_arm(state, src, len, padbit); } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); =20 -bool poly1305_is_arch_optimized(void) -{ - /* We always can use at least the ARM scalar implementation. */ - return true; -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init arm_poly1305_mod_init(void) { if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && (elf_hwcap & HWCAP_NEON)) static_branch_enable(&have_neon); diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-g= lue.c index 31aea21ce42f7..d4a522e7d25a9 100644 --- a/lib/crypto/arm64/poly1305-glue.c +++ b/lib/crypto/arm64/poly1305-glue.c @@ -48,17 +48,10 @@ void poly1305_blocks_arch(struct poly1305_block_state *= state, const u8 *src, } else poly1305_blocks(state, src, len, padbit); } EXPORT_SYMBOL_GPL(poly1305_blocks_arch); =20 -bool poly1305_is_arch_optimized(void) -{ - /* We always can use at least the ARM64 scalar implementation. */ - return true; -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init neon_poly1305_mod_init(void) { if (cpu_have_named_feature(ASIMD)) static_branch_enable(&have_neon); return 0; diff --git a/lib/crypto/mips/poly1305-glue.c b/lib/crypto/mips/poly1305-glu= e.c index 764a38a652002..002f50f710aba 100644 --- a/lib/crypto/mips/poly1305-glue.c +++ b/lib/crypto/mips/poly1305-glue.c @@ -21,13 +21,7 @@ EXPORT_SYMBOL_GPL(poly1305_blocks_arch); asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]); EXPORT_SYMBOL_GPL(poly1305_emit_arch); =20 -bool poly1305_is_arch_optimized(void) -{ - return true; -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - MODULE_DESCRIPTION("Poly1305 transform (MIPS accelerated"); MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/powerpc/poly1305-p10-glue.c b/lib/crypto/powerpc/po= ly1305-p10-glue.c index 3f1664a724b65..184a71f9c1dee 100644 --- a/lib/crypto/powerpc/poly1305-p10-glue.c +++ b/lib/crypto/powerpc/poly1305-p10-glue.c @@ -70,16 +70,10 @@ void poly1305_emit_arch(const struct poly1305_state *st= ate, return poly1305_emit_generic(state, digest, nonce); poly1305_emit_64(state, nonce, digest); } EXPORT_SYMBOL_GPL(poly1305_emit_arch); =20 -bool poly1305_is_arch_optimized(void) -{ - return static_key_enabled(&have_p10); -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init poly1305_p10_init(void) { if (cpu_has_feature(CPU_FTR_ARCH_31)) static_branch_enable(&have_p10); return 0; diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305_glue.c index 856d48fd422b0..deb5841cb0ada 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305_glue.c @@ -139,16 +139,10 @@ void poly1305_emit_arch(const struct poly1305_state *= ctx, else poly1305_emit_avx(ctx, mac, nonce); } EXPORT_SYMBOL_GPL(poly1305_emit_arch); =20 -bool poly1305_is_arch_optimized(void) -{ - return static_key_enabled(&poly1305_use_avx); -} -EXPORT_SYMBOL(poly1305_is_arch_optimized); - static int __init poly1305_simd_mod_init(void) { if (boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) static_branch_enable(&poly1305_use_avx); --=20 2.50.1 From nobody Fri Oct 3 14:34:23 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 10D2D322DD5; Fri, 29 Aug 2025 15:26:26 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756481187; cv=none; b=YL7kHbQVcNk7JRS/38voEdQjX2g/M9OzO6wiYvmyzzkUfE49+WHUv2Rc86m+6K4VD/QCw6mWktiFWoi46HLpDSoz7UE6K4m+3jzGu6OVvKdKDNrFC5uydNTJOLzqdq2wrp5om0u0QKGHoA4sSTE1ee7XamlhgqXuQClvoqys074= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756481187; c=relaxed/simple; bh=7gBq+9rpxnMYV00k2zrrkq7REFGoZXK6EBtJntVmOgc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=EjilAddSL7iXggwp89y20zsf1ZJLhCsmvTwVM07kGUEJPcP6dQRr3H0KpG2axZQLJjw61g/VvFO95/it7jjz84uxxMV1uHAScP3vQ21Xz4KUmRMBvPBE3kurAz73t94DP5mYNv039CctPzVbJyitLdbc82q+MaGEc3fd2JDXPJs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=eevTrGTJ; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="eevTrGTJ" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5C2C6C4CEF7; Fri, 29 Aug 2025 15:26:26 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1756481186; bh=7gBq+9rpxnMYV00k2zrrkq7REFGoZXK6EBtJntVmOgc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=eevTrGTJG848waYjjkJ2XClPWVwqggkOGDZKIWYmupkaRIhDjvo5dJs5ToSM/dRrt /5ymRPY5YHNix2Z8ibSWHo9TCu567hqR/P8aoa8m7MAUIful/rjz48CJQu/AOZ/4hJ +qfmurCwxDbx4Os0hFjBBMMmSetdh57LoPnP8fCgXiNW4JX473TrF4pxdQQJ46+LnP hbak+KGowmd66Bt16mb7lPeyQuIvjYunfM5W4E30hQ70r3a6sIQ/zQewIy5wk6mmai 0yiyssIgjmdiv1FDrW3vq+xKVonXsopo1kKP03hVosETdyrE05u7kWrW2Qg6AzjzWQ +MuIhm85CVh9Q== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org, linux-riscv@lists.infradead.org, Ard Biesheuvel , "Jason A . Donenfeld" , Zhihang Shao , Andy Polyakov , Eric Biggers Subject: [PATCH v3 2/3] lib/crypto: poly1305: Consolidate into single module Date: Fri, 29 Aug 2025 08:25:12 -0700 Message-ID: <20250829152513.92459-3-ebiggers@kernel.org> X-Mailer: git-send-email 2.50.1 In-Reply-To: <20250829152513.92459-1-ebiggers@kernel.org> References: <20250829152513.92459-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" Consolidate the Poly1305 code into a single module, similar to various other algorithms (SHA-1, SHA-256, SHA-512, etc.): - Each arch now provides a header file lib/crypto/$(SRCARCH)/poly1305.h, replacing lib/crypto/$(SRCARCH)/poly1305*.c. The header defines poly1305_block_init(), poly1305_blocks(), poly1305_emit(), and optionally poly1305_mod_init_arch(). It is included by lib/crypto/poly1305.c, and thus the code gets built into the single libpoly1305 module, with improved inlining in some cases. - Whether arch-optimized Poly1305 is buildable is now controlled centrally by lib/crypto/Kconfig instead of by lib/crypto/$(SRCARCH)/Kconfig. The conditions for enabling it remain the same as before, and it remains enabled by default. (The PPC64 one remains unconditionally disabled due to 'depends on BROKEN'.) - Any additional arch-specific translation units for the optimized Poly1305 code, such as assembly files, are now compiled by lib/crypto/Makefile instead of lib/crypto/$(SRCARCH)/Makefile. A special consideration is needed because the Adiantum code uses the poly1305_core_*() functions directly. For now, just carry forward that approach. This means retaining the CRYPTO_LIB_POLY1305_GENERIC kconfig symbol, and keeping the poly1305_core_*() functions in separate translation units. So it's not quite as streamlined I've done with the other hash functions, but we still get a single libpoly1305 module. Note: to see the diff from the arm, arm64, and x86 .c files to the new .h files, view this commit with 'git show -M10'. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- crypto/Kconfig | 2 + include/crypto/internal/poly1305.h | 16 ++-- lib/crypto/Kconfig | 50 ++++++------ lib/crypto/Makefile | 59 ++++++++++++-- lib/crypto/arm/Kconfig | 5 -- lib/crypto/arm/Makefile | 18 ----- lib/crypto/arm/poly1305-armv4.pl | 3 +- lib/crypto/arm/poly1305-glue.c | 69 ---------------- lib/crypto/arm/poly1305.h | 53 ++++++++++++ lib/crypto/arm64/Kconfig | 6 -- lib/crypto/arm64/Makefile | 13 --- lib/crypto/arm64/poly1305-armv8.pl | 3 + lib/crypto/arm64/poly1305-glue.c | 67 --------------- lib/crypto/arm64/poly1305.h | 50 ++++++++++++ lib/crypto/mips/Kconfig | 5 -- lib/crypto/mips/Makefile | 14 ---- lib/crypto/mips/poly1305-glue.c | 27 ------- lib/crypto/mips/poly1305-mips.pl | 8 +- lib/crypto/mips/poly1305.h | 14 ++++ lib/crypto/poly1305-generic.c | 25 ------ lib/crypto/poly1305.c | 81 ++++++++++++------- lib/crypto/powerpc/Kconfig | 8 -- lib/crypto/powerpc/Makefile | 3 - .../{poly1305-p10-glue.c =3D> poly1305.h} | 34 +++----- lib/crypto/x86/Kconfig | 6 -- lib/crypto/x86/Makefile | 10 --- lib/crypto/x86/poly1305-x86_64-cryptogams.pl | 33 +++----- .../x86/{poly1305_glue.c =3D> poly1305.h} | 41 ++++------ 28 files changed, 299 insertions(+), 424 deletions(-) delete mode 100644 lib/crypto/arm/poly1305-glue.c create mode 100644 lib/crypto/arm/poly1305.h delete mode 100644 lib/crypto/arm64/poly1305-glue.c create mode 100644 lib/crypto/arm64/poly1305.h delete mode 100644 lib/crypto/mips/poly1305-glue.c create mode 100644 lib/crypto/mips/poly1305.h delete mode 100644 lib/crypto/poly1305-generic.c rename lib/crypto/powerpc/{poly1305-p10-glue.c =3D> poly1305.h} (66%) rename lib/crypto/x86/{poly1305_glue.c =3D> poly1305.h} (85%) diff --git a/crypto/Kconfig b/crypto/Kconfig index 1575dbec084d6..e8ccf5f51b855 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -607,10 +607,11 @@ endmenu menu "Length-preserving ciphers and modes" =20 config CRYPTO_ADIANTUM tristate "Adiantum" select CRYPTO_CHACHA20 + select CRYPTO_LIB_POLY1305 select CRYPTO_LIB_POLY1305_GENERIC select CRYPTO_NHPOLY1305 select CRYPTO_MANAGER help Adiantum tweakable, length-preserving encryption mode @@ -768,10 +769,11 @@ config CRYPTO_XTS multiple of 16 bytes. =20 config CRYPTO_NHPOLY1305 tristate select CRYPTO_HASH + select CRYPTO_LIB_POLY1305 select CRYPTO_LIB_POLY1305_GENERIC =20 endmenu =20 menu "AEAD (authenticated encryption with associated data) ciphers" diff --git a/include/crypto/internal/poly1305.h b/include/crypto/internal/p= oly1305.h index c60315f475623..a72fff409ab85 100644 --- a/include/crypto/internal/poly1305.h +++ b/include/crypto/internal/poly1305.h @@ -28,28 +28,26 @@ void poly1305_core_blocks(struct poly1305_state *state, const struct poly1305_core_key *key, const void *src, unsigned int nblocks, u32 hibit); void poly1305_core_emit(const struct poly1305_state *state, const u32 nonc= e[4], void *dst); =20 -void poly1305_block_init_arch(struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -void poly1305_block_init_generic(struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *sr= c, - unsigned int len, u32 padbit); +static inline void +poly1305_block_init_generic(struct poly1305_block_state *desc, + const u8 raw_key[POLY1305_BLOCK_SIZE]) +{ + poly1305_core_init(&desc->h); + poly1305_core_setkey(&desc->core_r, raw_key); +} =20 static inline void poly1305_blocks_generic(struct poly1305_block_state *st= ate, const u8 *src, unsigned int len, u32 padbit) { poly1305_core_blocks(&state->h, &state->core_r, src, len / POLY1305_BLOCK_SIZE, padbit); } =20 -void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]); - static inline void poly1305_emit_generic(const struct poly1305_state *stat= e, u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]) { poly1305_core_emit(state, nonce, digest); diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 79b848448e07f..9991118c41a9d 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -112,40 +112,44 @@ config CRYPTO_LIB_MD5_ARCH depends on CRYPTO_LIB_MD5 && !UML default y if MIPS && CPU_CAVIUM_OCTEON default y if PPC default y if SPARC64 =20 +config CRYPTO_LIB_POLY1305 + tristate + help + The Poly1305 library functions. Select this if your module uses any + of the functions from . + +config CRYPTO_LIB_POLY1305_ARCH + bool + depends on CRYPTO_LIB_POLY1305 && !UML + default y if ARM + default y if ARM64 && KERNEL_MODE_NEON + default y if MIPS + # The PPC64 code needs to be fixed to work in softirq context. + default y if PPC64 && CPU_LITTLE_ENDIAN && VSX && BROKEN + default y if X86_64 + +# This symbol controls the inclusion of the Poly1305 generic code. This d= iffers +# from most of the other algorithms, which handle the generic code +# "automatically" via __maybe_unused. This is needed so that the Adiantum= code, +# which calls the poly1305_core_*() functions directly, can enable them. +config CRYPTO_LIB_POLY1305_GENERIC + bool + depends on CRYPTO_LIB_POLY1305 + # Enable if there's no arch impl or the arch impl requires the generic + # impl as a fallback. (Or if selected explicitly.) + default y if !CRYPTO_LIB_POLY1305_ARCH || PPC64 + config CRYPTO_LIB_POLY1305_RSIZE int default 2 if MIPS default 11 if X86_64 default 9 if ARM || ARM64 default 1 =20 -config CRYPTO_ARCH_HAVE_LIB_POLY1305 - bool - help - Declares whether the architecture provides an arch-specific - accelerated implementation of the Poly1305 library interface, - either builtin or as a module. - -config CRYPTO_LIB_POLY1305_GENERIC - tristate - default CRYPTO_LIB_POLY1305 if !CRYPTO_ARCH_HAVE_LIB_POLY1305 - help - This symbol can be selected by arch implementations of the Poly1305 - library interface that require the generic code as a fallback, e.g., - for SIMD implementations. If no arch specific implementation is - enabled, this implementation serves the users of CRYPTO_LIB_POLY1305. - -config CRYPTO_LIB_POLY1305 - tristate - help - Enable the Poly1305 library interface. This interface may be fulfilled - by either the generic implementation or an arch-specific one, if one - is available and enabled. - config CRYPTO_LIB_CHACHA20POLY1305 tristate select CRYPTO_LIB_CHACHA select CRYPTO_LIB_POLY1305 select CRYPTO_LIB_UTILS diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index d362636a22d38..e0536e3b3a04c 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -69,17 +69,64 @@ libmd5-$(CONFIG_PPC) +=3D powerpc/md5-asm.o libmd5-$(CONFIG_SPARC) +=3D sparc/md5_asm.o endif # CONFIG_CRYPTO_LIB_MD5_ARCH =20 ##########################################################################= ###### =20 -obj-$(CONFIG_CRYPTO_LIB_POLY1305) +=3D libpoly1305.o -libpoly1305-y +=3D poly1305.o +obj-$(CONFIG_CRYPTO_LIB_POLY1305) +=3D libpoly1305.o +libpoly1305-y :=3D poly1305.o +ifeq ($(CONFIG_ARCH_SUPPORTS_INT128),y) +libpoly1305-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) +=3D poly1305-donna64.o +else +libpoly1305-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) +=3D poly1305-donna32.o +endif + +ifeq ($(CONFIG_CRYPTO_LIB_POLY1305_ARCH),y) +CFLAGS_poly1305.o +=3D -I$(src)/$(SRCARCH) + +ifeq ($(CONFIG_ARM),y) +libpoly1305-y +=3D arm/poly1305-core.o +$(obj)/arm/poly1305-core.S: $(src)/arm/poly1305-armv4.pl + $(call cmd,perlasm) +# massage the perlasm code a bit so we only get the NEON routine if we nee= d it +poly1305-aflags-$(CONFIG_CPU_V7) :=3D -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_A= RCH__=3D5 +poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) :=3D -U__LINUX_ARM_ARCH__ -D__L= INUX_ARM_ARCH__=3D7 +AFLAGS_arm/poly1305-core.o +=3D $(poly1305-aflags-y) $(aflags-thumb2-y) +endif + +ifeq ($(CONFIG_ARM64),y) +libpoly1305-y +=3D arm64/poly1305-core.o +$(obj)/arm64/poly1305-core.S: $(src)/arm64/poly1305-armv8.pl + $(call cmd,perlasm_with_args) +endif + +ifeq ($(CONFIG_MIPS),y) +libpoly1305-y +=3D mips/poly1305-core.o +poly1305-perlasm-flavour-$(CONFIG_32BIT) :=3D o32 +poly1305-perlasm-flavour-$(CONFIG_64BIT) :=3D 64 +quiet_cmd_perlasm_poly1305 =3D PERLASM $@ + cmd_perlasm_poly1305 =3D $(PERL) $< $(poly1305-perlasm-flavour-y) $@ +# Use if_changed instead of cmd, in case the flavour changed. +$(obj)/mips/poly1305-core.S: $(src)/mips/poly1305-mips.pl FORCE + $(call if_changed,perlasm_poly1305) +targets +=3D mips/poly1305-core.S +endif =20 -obj-$(CONFIG_CRYPTO_LIB_POLY1305_GENERIC) +=3D libpoly1305-generic.o -libpoly1305-generic-y :=3D poly1305-donna32.o -libpoly1305-generic-$(CONFIG_ARCH_SUPPORTS_INT128) :=3D poly1305-donna64.o -libpoly1305-generic-y +=3D poly1305-generic.o +libpoly1305-$(CONFIG_PPC) +=3D powerpc/poly1305-p10le_64.o + +ifeq ($(CONFIG_X86),y) +libpoly1305-y +=3D x86/poly1305-x86_64-cryptogams.o +$(obj)/x86/poly1305-x86_64-cryptogams.S: $(src)/x86/poly1305-x86_64-crypto= gams.pl + $(call cmd,perlasm) +endif + +endif # CONFIG_CRYPTO_LIB_POLY1305_ARCH + +# clean-files must be defined unconditionally +clean-files +=3D arm/poly1305-core.S \ + arm64/poly1305-core.S \ + mips/poly1305-core.S \ + x86/poly1305-x86_64-cryptogams.S =20 ##########################################################################= ###### =20 obj-$(CONFIG_CRYPTO_LIB_SHA1) +=3D libsha1.o libsha1-y :=3D sha1.o diff --git a/lib/crypto/arm/Kconfig b/lib/crypto/arm/Kconfig index e8444fd0aae30..0d821e282c645 100644 --- a/lib/crypto/arm/Kconfig +++ b/lib/crypto/arm/Kconfig @@ -15,10 +15,5 @@ config CRYPTO_BLAKE2S_ARM =20 config CRYPTO_CHACHA20_NEON tristate default CRYPTO_LIB_CHACHA select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_ARM - tristate - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/arm/Makefile b/lib/crypto/arm/Makefile index 4c042a4c77ed6..9f70e61d419e2 100644 --- a/lib/crypto/arm/Makefile +++ b/lib/crypto/arm/Makefile @@ -4,23 +4,5 @@ obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) +=3D libblake2s-arm.o libblake2s-arm-y :=3D blake2s-core.o blake2s-glue.o =20 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) +=3D chacha-neon.o chacha-neon-y :=3D chacha-scalar-core.o chacha-glue.o chacha-neon-$(CONFIG_KERNEL_MODE_NEON) +=3D chacha-neon-core.o - -obj-$(CONFIG_CRYPTO_POLY1305_ARM) +=3D poly1305-arm.o -poly1305-arm-y :=3D poly1305-core.o poly1305-glue.o - -quiet_cmd_perl =3D PERL $@ - cmd_perl =3D $(PERL) $(<) > $(@) - -$(obj)/%-core.S: $(src)/%-armv4.pl - $(call cmd,perl) - -clean-files +=3D poly1305-core.S - -aflags-thumb2-$(CONFIG_THUMB2_KERNEL) :=3D -U__thumb2__ -D__thumb2__=3D1 - -# massage the perlasm code a bit so we only get the NEON routine if we nee= d it -poly1305-aflags-$(CONFIG_CPU_V7) :=3D -U__LINUX_ARM_ARCH__ -D__LINUX_ARM_A= RCH__=3D5 -poly1305-aflags-$(CONFIG_KERNEL_MODE_NEON) :=3D -U__LINUX_ARM_ARCH__ -D__L= INUX_ARM_ARCH__=3D7 -AFLAGS_poly1305-core.o +=3D $(poly1305-aflags-y) $(aflags-thumb2-y) diff --git a/lib/crypto/arm/poly1305-armv4.pl b/lib/crypto/arm/poly1305-arm= v4.pl index dd7a996361a71..34c11b7b44bd7 100644 --- a/lib/crypto/arm/poly1305-armv4.pl +++ b/lib/crypto/arm/poly1305-armv4.pl @@ -41,13 +41,12 @@ $code.=3D<<___; #ifndef __KERNEL__ # include "arm_arch.h" #else # define __ARM_ARCH__ __LINUX_ARM_ARCH__ # define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__ -# define poly1305_init poly1305_block_init_arch +# define poly1305_init poly1305_block_init # define poly1305_blocks poly1305_blocks_arm -# define poly1305_emit poly1305_emit_arch #endif =20 #if defined(__thumb2__) .syntax unified .thumb diff --git a/lib/crypto/arm/poly1305-glue.c b/lib/crypto/arm/poly1305-glue.c deleted file mode 100644 index 9e513e319e37c..0000000000000 --- a/lib/crypto/arm/poly1305-glue.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM - * - * Copyright (C) 2019 Linaro Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks_arm(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *sr= c, - unsigned int len, u32 padbit) -{ - len =3D round_down(len, POLY1305_BLOCK_SIZE); - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - static_branch_likely(&have_neon) && likely(may_use_simd())) { - do { - unsigned int todo =3D min_t(unsigned int, len, SZ_4K); - - kernel_neon_begin(); - poly1305_blocks_neon(state, src, todo, padbit); - kernel_neon_end(); - - len -=3D todo; - src +=3D todo; - } while (len); - } else - poly1305_blocks_arm(state, src, len, padbit); -} -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); - -static int __init arm_poly1305_mod_init(void) -{ - if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && - (elf_hwcap & HWCAP_NEON)) - static_branch_enable(&have_neon); - return 0; -} -subsys_initcall(arm_poly1305_mod_init); - -static void __exit arm_poly1305_mod_exit(void) -{ -} -module_exit(arm_poly1305_mod_exit); - -MODULE_DESCRIPTION("Accelerated Poly1305 transform for ARM"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/arm/poly1305.h b/lib/crypto/arm/poly1305.h new file mode 100644 index 0000000000000..0021cf368307c --- /dev/null +++ b/lib/crypto/arm/poly1305.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for ARM + * + * Copyright (C) 2019 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks_arm(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *= src, + unsigned int len, u32 padbit) +{ + if (IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && + static_branch_likely(&have_neon) && likely(may_use_simd())) { + do { + unsigned int todo =3D min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(state, src, todo, padbit); + kernel_neon_end(); + + len -=3D todo; + src +=3D todo; + } while (len); + } else + poly1305_blocks_arm(state, src, len, padbit); +} + +#ifdef CONFIG_KERNEL_MODE_NEON +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) +{ + if (elf_hwcap & HWCAP_NEON) + static_branch_enable(&have_neon); +} +#endif /* CONFIG_KERNEL_MODE_NEON */ diff --git a/lib/crypto/arm64/Kconfig b/lib/crypto/arm64/Kconfig index 0b903ef524d85..07c8a4f0ab03a 100644 --- a/lib/crypto/arm64/Kconfig +++ b/lib/crypto/arm64/Kconfig @@ -4,11 +4,5 @@ config CRYPTO_CHACHA20_NEON tristate depends on KERNEL_MODE_NEON default CRYPTO_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_NEON - tristate - depends on KERNEL_MODE_NEON - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/arm64/Makefile b/lib/crypto/arm64/Makefile index 6207088397a73..d49cceca3d1ca 100644 --- a/lib/crypto/arm64/Makefile +++ b/lib/crypto/arm64/Makefile @@ -1,17 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only =20 obj-$(CONFIG_CRYPTO_CHACHA20_NEON) +=3D chacha-neon.o chacha-neon-y :=3D chacha-neon-core.o chacha-neon-glue.o - -obj-$(CONFIG_CRYPTO_POLY1305_NEON) +=3D poly1305-neon.o -poly1305-neon-y :=3D poly1305-core.o poly1305-glue.o -AFLAGS_poly1305-core.o +=3D -Dpoly1305_init=3Dpoly1305_block_init_arch -AFLAGS_poly1305-core.o +=3D -Dpoly1305_emit=3Dpoly1305_emit_arch - -quiet_cmd_perlasm =3D PERLASM $@ - cmd_perlasm =3D $(PERL) $(<) void $(@) - -$(obj)/%-core.S: $(src)/%-armv8.pl - $(call cmd,perlasm) - -clean-files +=3D poly1305-core.S diff --git a/lib/crypto/arm64/poly1305-armv8.pl b/lib/crypto/arm64/poly1305= -armv8.pl index 22c9069c06505..f1930c6b55cee 100644 --- a/lib/crypto/arm64/poly1305-armv8.pl +++ b/lib/crypto/arm64/poly1305-armv8.pl @@ -48,10 +48,13 @@ my ($h0,$h1,$h2,$r0,$r1,$s1,$t0,$t1,$d0,$d1,$d2) =3D ma= p("x$_",(4..14)); =20 $code.=3D<<___; #ifndef __KERNEL__ # include "arm_arch.h" .extern OPENSSL_armcap_P +#else +# define poly1305_init poly1305_block_init +# define poly1305_blocks poly1305_blocks_arm64 #endif =20 .text =20 // forward "declarations" are required for Apple diff --git a/lib/crypto/arm64/poly1305-glue.c b/lib/crypto/arm64/poly1305-g= lue.c deleted file mode 100644 index d4a522e7d25a9..0000000000000 --- a/lib/crypto/arm64/poly1305-glue.c +++ /dev/null @@ -1,67 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 - * - * Copyright (C) 2019 Linaro Ltd. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); - -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *sr= c, - unsigned int len, u32 padbit) -{ - len =3D round_down(len, POLY1305_BLOCK_SIZE); - if (static_branch_likely(&have_neon) && likely(may_use_simd())) { - do { - unsigned int todo =3D min_t(unsigned int, len, SZ_4K); - - kernel_neon_begin(); - poly1305_blocks_neon(state, src, todo, padbit); - kernel_neon_end(); - - len -=3D todo; - src +=3D todo; - } while (len); - } else - poly1305_blocks(state, src, len, padbit); -} -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); - -static int __init neon_poly1305_mod_init(void) -{ - if (cpu_have_named_feature(ASIMD)) - static_branch_enable(&have_neon); - return 0; -} -subsys_initcall(neon_poly1305_mod_init); - -static void __exit neon_poly1305_mod_exit(void) -{ -} -module_exit(neon_poly1305_mod_exit); - -MODULE_DESCRIPTION("Poly1305 authenticator (ARM64 optimized)"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/arm64/poly1305.h b/lib/crypto/arm64/poly1305.h new file mode 100644 index 0000000000000..aed5921ccd9a1 --- /dev/null +++ b/lib/crypto/arm64/poly1305.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for arm64 + * + * Copyright (C) 2019 Linaro Ltd. + */ + +#include +#include +#include +#include +#include +#include + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks_arm64(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_blocks_neon(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon); + +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *= src, + unsigned int len, u32 padbit) +{ + if (static_branch_likely(&have_neon) && likely(may_use_simd())) { + do { + unsigned int todo =3D min_t(unsigned int, len, SZ_4K); + + kernel_neon_begin(); + poly1305_blocks_neon(state, src, todo, padbit); + kernel_neon_end(); + + len -=3D todo; + src +=3D todo; + } while (len); + } else + poly1305_blocks_arm64(state, src, len, padbit); +} + +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) +{ + if (cpu_have_named_feature(ASIMD)) + static_branch_enable(&have_neon); +} diff --git a/lib/crypto/mips/Kconfig b/lib/crypto/mips/Kconfig index 0670a170c1be0..94c1a0892c203 100644 --- a/lib/crypto/mips/Kconfig +++ b/lib/crypto/mips/Kconfig @@ -3,10 +3,5 @@ config CRYPTO_CHACHA_MIPS tristate depends on CPU_MIPS32_R2 default CRYPTO_LIB_CHACHA select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_MIPS - tristate - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/mips/Makefile b/lib/crypto/mips/Makefile index 804488c7adedc..b5ea0e25c21ef 100644 --- a/lib/crypto/mips/Makefile +++ b/lib/crypto/mips/Makefile @@ -1,19 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only =20 obj-$(CONFIG_CRYPTO_CHACHA_MIPS) +=3D chacha-mips.o chacha-mips-y :=3D chacha-core.o chacha-glue.o AFLAGS_chacha-core.o +=3D -O2 # needed to fill branch delay slots - -obj-$(CONFIG_CRYPTO_POLY1305_MIPS) +=3D poly1305-mips.o -poly1305-mips-y :=3D poly1305-core.o poly1305-glue.o - -perlasm-flavour-$(CONFIG_32BIT) :=3D o32 -perlasm-flavour-$(CONFIG_64BIT) :=3D 64 - -quiet_cmd_perlasm =3D PERLASM $@ - cmd_perlasm =3D $(PERL) $(<) $(perlasm-flavour-y) $(@) - -$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE - $(call if_changed,perlasm) - -targets +=3D poly1305-core.S diff --git a/lib/crypto/mips/poly1305-glue.c b/lib/crypto/mips/poly1305-glu= e.c deleted file mode 100644 index 002f50f710aba..0000000000000 --- a/lib/crypto/mips/poly1305-glue.c +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS - * - * Copyright (C) 2019 Linaro Ltd. - */ - -#include -#include -#include -#include -#include - -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); -asmlinkage void poly1305_blocks_arch(struct poly1305_block_state *state, - const u8 *src, u32 len, u32 hibit); -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); -asmlinkage void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]); -EXPORT_SYMBOL_GPL(poly1305_emit_arch); - -MODULE_DESCRIPTION("Poly1305 transform (MIPS accelerated"); -MODULE_LICENSE("GPL v2"); diff --git a/lib/crypto/mips/poly1305-mips.pl b/lib/crypto/mips/poly1305-mi= ps.pl index 399f10c3e3850..71347f34f4f9f 100644 --- a/lib/crypto/mips/poly1305-mips.pl +++ b/lib/crypto/mips/poly1305-mips.pl @@ -91,13 +91,11 @@ $code.=3D<<___; # define mflo(rd,rs,rt) mflo rd # define mfhi(rd,rs,rt) mfhi rd #endif =20 #ifdef __KERNEL__ -# define poly1305_init poly1305_block_init_arch -# define poly1305_blocks poly1305_blocks_arch -# define poly1305_emit poly1305_emit_arch +# define poly1305_init poly1305_block_init #endif =20 #if defined(__MIPSEB__) && !defined(MIPSEB) # define MIPSEB #endif @@ -563,13 +561,11 @@ $code.=3D<<___; # define mflo(rd,rs,rt) mflo rd # define mfhi(rd,rs,rt) mfhi rd #endif =20 #ifdef __KERNEL__ -# define poly1305_init poly1305_block_init_arch -# define poly1305_blocks poly1305_blocks_arch -# define poly1305_emit poly1305_emit_arch +# define poly1305_init poly1305_block_init #endif =20 #if defined(__MIPSEB__) && !defined(MIPSEB) # define MIPSEB #endif diff --git a/lib/crypto/mips/poly1305.h b/lib/crypto/mips/poly1305.h new file mode 100644 index 0000000000000..85de450f1a93d --- /dev/null +++ b/lib/crypto/mips/poly1305.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS + * + * Copyright (C) 2019 Linaro Ltd. + */ + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); diff --git a/lib/crypto/poly1305-generic.c b/lib/crypto/poly1305-generic.c deleted file mode 100644 index 71a16c5c538b4..0000000000000 --- a/lib/crypto/poly1305-generic.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Poly1305 authenticator algorithm, RFC7539 - * - * Copyright (C) 2015 Martin Willi - * - * Based on public domain code by Andrew Moon and Daniel J. Bernstein. - */ - -#include -#include -#include -#include - -void poly1305_block_init_generic(struct poly1305_block_state *desc, - const u8 raw_key[POLY1305_BLOCK_SIZE]) -{ - poly1305_core_init(&desc->h); - poly1305_core_setkey(&desc->core_r, raw_key); -} -EXPORT_SYMBOL_GPL(poly1305_block_init_generic); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); -MODULE_DESCRIPTION("Poly1305 algorithm (generic implementation)"); diff --git a/lib/crypto/poly1305.c b/lib/crypto/poly1305.c index a6dc182b6c22d..f313ccc4b4dd2 100644 --- a/lib/crypto/poly1305.c +++ b/lib/crypto/poly1305.c @@ -5,71 +5,96 @@ * Copyright (C) 2015 Martin Willi * * Based on public domain code by Andrew Moon and Daniel J. Bernstein. */ =20 -#include #include #include #include #include #include #include =20 +#ifdef CONFIG_CRYPTO_LIB_POLY1305_ARCH +#include "poly1305.h" /* $(SRCARCH)/poly1305.h */ +#else +#define poly1305_block_init poly1305_block_init_generic +#define poly1305_blocks poly1305_blocks_generic +#define poly1305_emit poly1305_emit_generic +#endif + void poly1305_init(struct poly1305_desc_ctx *desc, const u8 key[POLY1305_KEY_SIZE]) { desc->s[0] =3D get_unaligned_le32(key + 16); desc->s[1] =3D get_unaligned_le32(key + 20); desc->s[2] =3D get_unaligned_le32(key + 24); desc->s[3] =3D get_unaligned_le32(key + 28); desc->buflen =3D 0; - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_block_init_arch(&desc->state, key); - else - poly1305_block_init_generic(&desc->state, key); + poly1305_block_init(&desc->state, key); } EXPORT_SYMBOL(poly1305_init); =20 -static inline void poly1305_blocks(struct poly1305_block_state *state, - const u8 *src, unsigned int len) -{ - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_blocks_arch(state, src, len, 1); - else - poly1305_blocks_generic(state, src, len, 1); -} - void poly1305_update(struct poly1305_desc_ctx *desc, const u8 *src, unsigned int nbytes) { - desc->buflen =3D BLOCK_HASH_UPDATE(poly1305_blocks, &desc->state, - src, nbytes, POLY1305_BLOCK_SIZE, - desc->buf, desc->buflen); + if (desc->buflen + nbytes >=3D POLY1305_BLOCK_SIZE) { + unsigned int bulk_len; + + if (desc->buflen) { + unsigned int l =3D POLY1305_BLOCK_SIZE - desc->buflen; + + memcpy(&desc->buf[desc->buflen], src, l); + src +=3D l; + nbytes -=3D l; + + poly1305_blocks(&desc->state, desc->buf, + POLY1305_BLOCK_SIZE, 1); + desc->buflen =3D 0; + } + + bulk_len =3D round_down(nbytes, POLY1305_BLOCK_SIZE); + nbytes %=3D POLY1305_BLOCK_SIZE; + + if (bulk_len) { + poly1305_blocks(&desc->state, src, bulk_len, 1); + src +=3D bulk_len; + } + } + if (nbytes) { + memcpy(&desc->buf[desc->buflen], src, nbytes); + desc->buflen +=3D nbytes; + } } EXPORT_SYMBOL(poly1305_update); =20 void poly1305_final(struct poly1305_desc_ctx *desc, u8 *dst) { if (unlikely(desc->buflen)) { desc->buf[desc->buflen++] =3D 1; memset(desc->buf + desc->buflen, 0, POLY1305_BLOCK_SIZE - desc->buflen); - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_blocks_arch(&desc->state, desc->buf, - POLY1305_BLOCK_SIZE, 0); - else - poly1305_blocks_generic(&desc->state, desc->buf, - POLY1305_BLOCK_SIZE, 0); + poly1305_blocks(&desc->state, desc->buf, POLY1305_BLOCK_SIZE, + 0); } =20 - if (IS_ENABLED(CONFIG_CRYPTO_ARCH_HAVE_LIB_POLY1305)) - poly1305_emit_arch(&desc->state.h, dst, desc->s); - else - poly1305_emit_generic(&desc->state.h, dst, desc->s); + poly1305_emit(&desc->state.h, dst, desc->s); *desc =3D (struct poly1305_desc_ctx){}; } EXPORT_SYMBOL(poly1305_final); =20 +#ifdef poly1305_mod_init_arch +static int __init poly1305_mod_init(void) +{ + poly1305_mod_init_arch(); + return 0; +} +subsys_initcall(poly1305_mod_init); + +static void __exit poly1305_mod_exit(void) +{ +} +module_exit(poly1305_mod_exit); +#endif + MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Willi "); MODULE_DESCRIPTION("Poly1305 authenticator algorithm, RFC7539"); diff --git a/lib/crypto/powerpc/Kconfig b/lib/crypto/powerpc/Kconfig index 2eaeb7665a6a0..e41012a61876e 100644 --- a/lib/crypto/powerpc/Kconfig +++ b/lib/crypto/powerpc/Kconfig @@ -4,13 +4,5 @@ config CRYPTO_CHACHA20_P10 tristate depends on PPC64 && CPU_LITTLE_ENDIAN && VSX default CRYPTO_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_P10 - tristate - depends on PPC64 && CPU_LITTLE_ENDIAN && VSX - depends on BROKEN # Needs to be fixed to work in softirq context - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 - select CRYPTO_LIB_POLY1305_GENERIC diff --git a/lib/crypto/powerpc/Makefile b/lib/crypto/powerpc/Makefile index 5709ae14258a0..778a04edd226c 100644 --- a/lib/crypto/powerpc/Makefile +++ b/lib/crypto/powerpc/Makefile @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only =20 obj-$(CONFIG_CRYPTO_CHACHA20_P10) +=3D chacha-p10-crypto.o chacha-p10-crypto-y :=3D chacha-p10-glue.o chacha-p10le-8x.o - -obj-$(CONFIG_CRYPTO_POLY1305_P10) +=3D poly1305-p10-crypto.o -poly1305-p10-crypto-y :=3D poly1305-p10-glue.o poly1305-p10le_64.o diff --git a/lib/crypto/powerpc/poly1305-p10-glue.c b/lib/crypto/powerpc/po= ly1305.h similarity index 66% rename from lib/crypto/powerpc/poly1305-p10-glue.c rename to lib/crypto/powerpc/poly1305.h index 184a71f9c1dee..b8ed098a0e95f 100644 --- a/lib/crypto/powerpc/poly1305-p10-glue.c +++ b/lib/crypto/powerpc/poly1305.h @@ -1,17 +1,15 @@ -// SPDX-License-Identifier: GPL-2.0 +/* SPDX-License-Identifier: GPL-2.0 */ /* * Poly1305 authenticator algorithm, RFC7539. * * Copyright 2023- IBM Corp. All rights reserved. */ #include -#include #include #include #include -#include #include =20 asmlinkage void poly1305_p10le_4blocks(struct poly1305_block_state *state,= const u8 *m, u32 mlen); asmlinkage void poly1305_64s(struct poly1305_block_state *state, const u8 = *m, u32 mlen, int highbit); asmlinkage void poly1305_emit_64(const struct poly1305_state *state, const= u32 nonce[4], u8 digest[POLY1305_DIGEST_SIZE]); @@ -28,24 +26,23 @@ static void vsx_end(void) { disable_kernel_vsx(); preempt_enable(); } =20 -void poly1305_block_init_arch(struct poly1305_block_state *dctx, - const u8 raw_key[POLY1305_BLOCK_SIZE]) +static void poly1305_block_init(struct poly1305_block_state *dctx, + const u8 raw_key[POLY1305_BLOCK_SIZE]) { if (!static_key_enabled(&have_p10)) return poly1305_block_init_generic(dctx, raw_key); =20 dctx->h =3D (struct poly1305_state){}; dctx->core_r.key.r64[0] =3D get_unaligned_le64(raw_key + 0); dctx->core_r.key.r64[1] =3D get_unaligned_le64(raw_key + 8); } -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); =20 -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *sr= c, - unsigned int len, u32 padbit) +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *= src, + unsigned int len, u32 padbit) { if (!static_key_enabled(&have_p10)) return poly1305_blocks_generic(state, src, len, padbit); vsx_begin(); if (len >=3D POLY1305_BLOCK_SIZE * 4) { @@ -58,33 +55,20 @@ void poly1305_blocks_arch(struct poly1305_block_state *= state, const u8 *src, len -=3D POLY1305_BLOCK_SIZE; src +=3D POLY1305_BLOCK_SIZE; } vsx_end(); } -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); =20 -void poly1305_emit_arch(const struct poly1305_state *state, - u8 digest[POLY1305_DIGEST_SIZE], - const u32 nonce[4]) +static void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], const u32 nonce[4]) { if (!static_key_enabled(&have_p10)) return poly1305_emit_generic(state, digest, nonce); poly1305_emit_64(state, nonce, digest); } -EXPORT_SYMBOL_GPL(poly1305_emit_arch); =20 -static int __init poly1305_p10_init(void) +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) { if (cpu_has_feature(CPU_FTR_ARCH_31)) static_branch_enable(&have_p10); - return 0; } -subsys_initcall(poly1305_p10_init); - -static void __exit poly1305_p10_exit(void) -{ -} -module_exit(poly1305_p10_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Danny Tsen "); -MODULE_DESCRIPTION("Optimized Poly1305 for P10"); diff --git a/lib/crypto/x86/Kconfig b/lib/crypto/x86/Kconfig index 546fe2afe0b51..24dc9a59b2728 100644 --- a/lib/crypto/x86/Kconfig +++ b/lib/crypto/x86/Kconfig @@ -16,11 +16,5 @@ config CRYPTO_CHACHA20_X86_64 tristate depends on 64BIT default CRYPTO_LIB_CHACHA select CRYPTO_LIB_CHACHA_GENERIC select CRYPTO_ARCH_HAVE_LIB_CHACHA - -config CRYPTO_POLY1305_X86_64 - tristate - depends on 64BIT - default CRYPTO_LIB_POLY1305 - select CRYPTO_ARCH_HAVE_LIB_POLY1305 diff --git a/lib/crypto/x86/Makefile b/lib/crypto/x86/Makefile index c2ff8c5f1046e..16c9d76f99472 100644 --- a/lib/crypto/x86/Makefile +++ b/lib/crypto/x86/Makefile @@ -3,15 +3,5 @@ obj-$(CONFIG_CRYPTO_BLAKE2S_X86) +=3D libblake2s-x86_64.o libblake2s-x86_64-y :=3D blake2s-core.o blake2s-glue.o =20 obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) +=3D chacha-x86_64.o chacha-x86_64-y :=3D chacha-avx2-x86_64.o chacha-ssse3-x86_64.o chacha-avx= 512vl-x86_64.o chacha_glue.o - -obj-$(CONFIG_CRYPTO_POLY1305_X86_64) +=3D poly1305-x86_64.o -poly1305-x86_64-y :=3D poly1305-x86_64-cryptogams.o poly1305_glue.o -targets +=3D poly1305-x86_64-cryptogams.S - -quiet_cmd_perlasm =3D PERLASM $@ - cmd_perlasm =3D $(PERL) $< > $@ - -$(obj)/%.S: $(src)/%.pl FORCE - $(call if_changed,perlasm) diff --git a/lib/crypto/x86/poly1305-x86_64-cryptogams.pl b/lib/crypto/x86/= poly1305-x86_64-cryptogams.pl index 501827254fed7..409ec6955733a 100644 --- a/lib/crypto/x86/poly1305-x86_64-cryptogams.pl +++ b/lib/crypto/x86/poly1305-x86_64-cryptogams.pl @@ -116,34 +116,21 @@ sub declare_function() { $code .=3D ".align $align\n"; $code .=3D "$name:\n"; } } =20 -sub declare_typed_function() { - my ($name, $align, $nargs) =3D @_; - if($kernel) { - $code .=3D "SYM_TYPED_FUNC_START($name)\n"; - $code .=3D ".L$name:\n"; - } else { - $code .=3D ".globl $name\n"; - $code .=3D ".type $name,\@function,$nargs\n"; - $code .=3D ".align $align\n"; - $code .=3D "$name:\n"; - } -} - sub end_function() { my ($name) =3D @_; if($kernel) { $code .=3D "SYM_FUNC_END($name)\n"; } else { $code .=3D ".size $name,.-$name\n"; } } =20 $code.=3D<<___ if $kernel; -#include +#include ___ =20 if ($avx) { $code.=3D<<___ if $kernel; .section .rodata @@ -247,18 +234,18 @@ $code.=3D<<___; .text ___ $code.=3D<<___ if (!$kernel); .extern OPENSSL_ia32cap_P =20 -.globl poly1305_block_init_arch -.hidden poly1305_block_init_arch +.globl poly1305_init_x86_64 +.hidden poly1305_init_x86_64 .globl poly1305_blocks_x86_64 .hidden poly1305_blocks_x86_64 .globl poly1305_emit_x86_64 .hidden poly1305_emit_x86_64 ___ -&declare_typed_function("poly1305_block_init_arch", 32, 3); +&declare_function("poly1305_init_x86_64", 32, 3); $code.=3D<<___; xor %eax,%eax mov %rax,0($ctx) # initialize hash value mov %rax,8($ctx) mov %rax,16($ctx) @@ -309,11 +296,11 @@ ___ $code.=3D<<___; mov \$1,%eax .Lno_key: RET ___ -&end_function("poly1305_block_init_arch"); +&end_function("poly1305_init_x86_64"); =20 &declare_function("poly1305_blocks_x86_64", 32, 4); $code.=3D<<___; .cfi_startproc .Lblocks: @@ -4116,13 +4103,13 @@ avx_handler: RET .size avx_handler,.-avx_handler =20 .section .pdata .align 4 - .rva .LSEH_begin_poly1305_block_init_arch - .rva .LSEH_end_poly1305_block_init_arch - .rva .LSEH_info_poly1305_block_init_arch + .rva .LSEH_begin_poly1305_init_x86_64 + .rva .LSEH_end_poly1305_init_x86_64 + .rva .LSEH_info_poly1305_init_x86_64 =20 .rva .LSEH_begin_poly1305_blocks_x86_64 .rva .LSEH_end_poly1305_blocks_x86_64 .rva .LSEH_info_poly1305_blocks_x86_64 =20 @@ -4166,14 +4153,14 @@ $code.=3D<<___ if ($avx>2); .rva .LSEH_info_poly1305_blocks_avx512 ___ $code.=3D<<___; .section .xdata .align 8 -.LSEH_info_poly1305_block_init_arch: +.LSEH_info_poly1305_init_x86_64: .byte 9,0,0,0 .rva se_handler - .rva .LSEH_begin_poly1305_block_init_arch,.LSEH_begin_poly1305_block_init= _arch + .rva .LSEH_begin_poly1305_init_x86_64,.LSEH_begin_poly1305_init_x86_64 =20 .LSEH_info_poly1305_blocks_x86_64: .byte 9,0,0,0 .rva se_handler .rva .Lblocks_body,.Lblocks_epilogue diff --git a/lib/crypto/x86/poly1305_glue.c b/lib/crypto/x86/poly1305.h similarity index 85% rename from lib/crypto/x86/poly1305_glue.c rename to lib/crypto/x86/poly1305.h index deb5841cb0ada..ee92e3740a787 100644 --- a/lib/crypto/x86/poly1305_glue.c +++ b/lib/crypto/x86/poly1305.h @@ -1,18 +1,15 @@ -// SPDX-License-Identifier: GPL-2.0 OR MIT +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ /* * Copyright (C) 2015-2019 Jason A. Donenfeld . All Right= s Reserved. */ =20 #include #include -#include #include #include -#include #include -#include =20 struct poly1305_arch_internal { union { struct { u32 h[5]; @@ -59,14 +56,12 @@ static void convert_to_base2_64(void *ctx) state->hs[2] +=3D ULT(state->hs[1], cy); #undef ULT state->is_base2_26 =3D 0; } =20 -asmlinkage void poly1305_block_init_arch( - struct poly1305_block_state *state, - const u8 raw_key[POLY1305_BLOCK_SIZE]); -EXPORT_SYMBOL_GPL(poly1305_block_init_arch); +asmlinkage void poly1305_init_x86_64(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); asmlinkage void poly1305_blocks_x86_64(struct poly1305_arch_internal *ctx, const u8 *inp, const size_t len, const u32 padbit); asmlinkage void poly1305_emit_x86_64(const struct poly1305_state *ctx, u8 mac[POLY1305_DIGEST_SIZE], @@ -86,12 +81,18 @@ asmlinkage void poly1305_blocks_avx512(struct poly1305_= arch_internal *ctx, =20 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx); static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx2); static __ro_after_init DEFINE_STATIC_KEY_FALSE(poly1305_use_avx512); =20 -void poly1305_blocks_arch(struct poly1305_block_state *state, const u8 *in= p, - unsigned int len, u32 padbit) +static void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]) +{ + poly1305_init_x86_64(state, raw_key); +} + +static void poly1305_blocks(struct poly1305_block_state *state, const u8 *= inp, + unsigned int len, u32 padbit) { struct poly1305_arch_internal *ctx =3D container_of(&state->h.h, struct poly1305_arch_internal, h); =20 /* SIMD disables preemption, so relax after processing each page. */ @@ -127,23 +128,22 @@ void poly1305_blocks_arch(struct poly1305_block_state= *state, const u8 *inp, =20 len -=3D bytes; inp +=3D bytes; } while (len); } -EXPORT_SYMBOL_GPL(poly1305_blocks_arch); =20 -void poly1305_emit_arch(const struct poly1305_state *ctx, - u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4]) +static void poly1305_emit(const struct poly1305_state *ctx, + u8 mac[POLY1305_DIGEST_SIZE], const u32 nonce[4]) { if (!static_branch_likely(&poly1305_use_avx)) poly1305_emit_x86_64(ctx, mac, nonce); else poly1305_emit_avx(ctx, mac, nonce); } -EXPORT_SYMBOL_GPL(poly1305_emit_arch); =20 -static int __init poly1305_simd_mod_init(void) +#define poly1305_mod_init_arch poly1305_mod_init_arch +static void poly1305_mod_init_arch(void) { if (boot_cpu_has(X86_FEATURE_AVX) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL)) static_branch_enable(&poly1305_use_avx); if (boot_cpu_has(X86_FEATURE_AVX) && boot_cpu_has(X86_FEATURE_AVX2) && @@ -153,17 +153,6 @@ static int __init poly1305_simd_mod_init(void) boot_cpu_has(X86_FEATURE_AVX512F) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM | XFEATURE_MA= SK_AVX512, NULL) && /* Skylake downclocks unacceptably much when using zmm, but later gen= erations are fast. */ boot_cpu_data.x86_vfm !=3D INTEL_SKYLAKE_X) static_branch_enable(&poly1305_use_avx512); - return 0; } -subsys_initcall(poly1305_simd_mod_init); - -static void __exit poly1305_simd_mod_exit(void) -{ -} -module_exit(poly1305_simd_mod_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jason A. Donenfeld "); -MODULE_DESCRIPTION("Poly1305 authenticator"); --=20 2.50.1 From nobody Fri Oct 3 14:34:23 2025 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7A480326D58; Fri, 29 Aug 2025 15:26:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756481187; cv=none; b=KPcKLnKjMm2DUHekRUh6dpmvGZX5f8kAUC+jrtf9QBW7OVZNe6CEy226fu+hS50pmsJHOOc3QlZoECNErG264fWFhzumYeNIAiJB3LXYr8zm976WQWNfxlSSMa+2qkII8nZ6Z0/C6jyF7CFTtubH0I12O+jnSbZRHn8rf4G6fqc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1756481187; c=relaxed/simple; bh=z3ex74hX2vS0/bj5gkeyYBZm8aR4041ai4D65ght5tg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=EljEjzwTgj+2YLaJjEMgte3Dm72hTGFqVZFxOKt7krwrj6Cvk1Cbrgk4bDyMUKcLgOKjK/oou6D75Blui7rFlIiz1QF+TCekGWgiwyOI/7O+12fVpTLI2lgq6dPRFUp5liulAuDMzPWk6XyO78IKpZFSHWBpTRlJ5M8HxKTyLWs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=EOD/biG0; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="EOD/biG0" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C1DB7C4CEF8; Fri, 29 Aug 2025 15:26:26 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1756481187; bh=z3ex74hX2vS0/bj5gkeyYBZm8aR4041ai4D65ght5tg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=EOD/biG00f6b7caMGdbn/txUVX0b9JyUq/CX2F6EBaLl4WXnC3OU0luJjO++hx7Bg iCw5oQVzUDNl8+qe6G+ZJ+mSID8s7rlbjxZMCrysONN79Z9VCYkTOTcV+7CLN0rN5A iFmJyLXQmi8p4eLlYSsjy3u3uWNaRDZoJK/KrDjwZXzwMJqq1Kv+FL49JfghSwEjUB vwav1QmYZ+enijPSRNixyEZ0cda0NQkxRdnWu882khGGyv+dojBo7jRBop2GeG4Fmu 2T6VgvSIhWNOVT7c7DIeZrEM/8UQB/IPliD0G92h2hN7d52RY4gMQna1AOGBuA1z4S C4hrSffXkmQOg== From: Eric Biggers To: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org, linux-riscv@lists.infradead.org, Ard Biesheuvel , "Jason A . Donenfeld" , Zhihang Shao , Andy Polyakov , Eric Biggers , Chunyan Zhang Subject: [PATCH v3 3/3] lib/crypto: riscv/poly1305: Import OpenSSL/CRYPTOGAMS implementation Date: Fri, 29 Aug 2025 08:25:13 -0700 Message-ID: <20250829152513.92459-4-ebiggers@kernel.org> X-Mailer: git-send-email 2.50.1 In-Reply-To: <20250829152513.92459-1-ebiggers@kernel.org> References: <20250829152513.92459-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Zhihang Shao This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation for riscv authored by Andy Polyakov. The file 'poly1305-riscv.pl' is taken straight from https://github.com/dot-asm/cryptogams commit 5e3fba73576244708a752fa61a8e93e587f271bb. This patch was tested on SpacemiT X60, with 2~2.5x improvement over generic implementation. Signed-off-by: Chunyan Zhang Signed-off-by: Zhihang Shao [EB: ported to lib/crypto/riscv/] Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- lib/crypto/Kconfig | 3 +- lib/crypto/Makefile | 14 + lib/crypto/riscv/poly1305-riscv.pl | 847 +++++++++++++++++++++++++++++ lib/crypto/riscv/poly1305.h | 14 + 4 files changed, 877 insertions(+), 1 deletion(-) create mode 100644 lib/crypto/riscv/poly1305-riscv.pl create mode 100644 lib/crypto/riscv/poly1305.h diff --git a/lib/crypto/Kconfig b/lib/crypto/Kconfig index 9991118c41a9d..cb4e056a98faf 100644 --- a/lib/crypto/Kconfig +++ b/lib/crypto/Kconfig @@ -126,10 +126,11 @@ config CRYPTO_LIB_POLY1305_ARCH default y if ARM default y if ARM64 && KERNEL_MODE_NEON default y if MIPS # The PPC64 code needs to be fixed to work in softirq context. default y if PPC64 && CPU_LITTLE_ENDIAN && VSX && BROKEN + default y if RISCV default y if X86_64 =20 # This symbol controls the inclusion of the Poly1305 generic code. This d= iffers # from most of the other algorithms, which handle the generic code # "automatically" via __maybe_unused. This is needed so that the Adiantum= code, @@ -141,11 +142,11 @@ config CRYPTO_LIB_POLY1305_GENERIC # impl as a fallback. (Or if selected explicitly.) default y if !CRYPTO_LIB_POLY1305_ARCH || PPC64 =20 config CRYPTO_LIB_POLY1305_RSIZE int - default 2 if MIPS + default 2 if MIPS || RISCV default 11 if X86_64 default 9 if ARM || ARM64 default 1 =20 config CRYPTO_LIB_CHACHA20POLY1305 diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index e0536e3b3a04c..cd460e5e3dd24 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -110,10 +110,23 @@ $(obj)/mips/poly1305-core.S: $(src)/mips/poly1305-mip= s.pl FORCE targets +=3D mips/poly1305-core.S endif =20 libpoly1305-$(CONFIG_PPC) +=3D powerpc/poly1305-p10le_64.o =20 +ifeq ($(CONFIG_RISCV),y) +libpoly1305-y +=3D riscv/poly1305-core.o +poly1305-perlasm-flavour-$(CONFIG_32BIT) :=3D 32 +poly1305-perlasm-flavour-$(CONFIG_64BIT) :=3D 64 +quiet_cmd_perlasm_poly1305 =3D PERLASM $@ + cmd_perlasm_poly1305 =3D $(PERL) $< $(poly1305-perlasm-flavour-y) $@ +# Use if_changed instead of cmd, in case the flavour changed. +$(obj)/riscv/poly1305-core.S: $(src)/riscv/poly1305-riscv.pl FORCE + $(call if_changed,perlasm_poly1305) +targets +=3D riscv/poly1305-core.S +AFLAGS_riscv/poly1305-core.o +=3D -Dpoly1305_init=3Dpoly1305_block_init +endif + ifeq ($(CONFIG_X86),y) libpoly1305-y +=3D x86/poly1305-x86_64-cryptogams.o $(obj)/x86/poly1305-x86_64-cryptogams.S: $(src)/x86/poly1305-x86_64-crypto= gams.pl $(call cmd,perlasm) endif @@ -122,10 +135,11 @@ endif # CONFIG_CRYPTO_LIB_POLY1305_ARCH =20 # clean-files must be defined unconditionally clean-files +=3D arm/poly1305-core.S \ arm64/poly1305-core.S \ mips/poly1305-core.S \ + riscv/poly1305-core.S \ x86/poly1305-x86_64-cryptogams.S =20 ##########################################################################= ###### =20 obj-$(CONFIG_CRYPTO_LIB_SHA1) +=3D libsha1.o diff --git a/lib/crypto/riscv/poly1305-riscv.pl b/lib/crypto/riscv/poly1305= -riscv.pl new file mode 100644 index 0000000000000..e25e6338a9ac1 --- /dev/null +++ b/lib/crypto/riscv/poly1305-riscv.pl @@ -0,0 +1,847 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause +# +# =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D +# Written by Andy Polyakov, @dot-asm, initially for use with OpenSSL. +# =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D +# +# Poly1305 hash for RISC-V. +# +# February 2019 +# +# In the essence it's pretty straightforward transliteration of MIPS +# module [without big-endian option]. +# +# 1.8 cycles per byte on U74, >100% faster than compiler-generated +# code. 1.9 cpb on C910, ~75% improvement. 3.3 on Spacemit X60, ~69% +# improvement. +# +# June 2024. +# +# Add CHERI support. +# +###################################################################### +# +($zero,$ra,$sp,$gp,$tp)=3Dmap("x$_",(0..4)); +($t0,$t1,$t2,$t3,$t4,$t5,$t6)=3Dmap("x$_",(5..7,28..31)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=3Dmap("x$_",(10..17)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=3Dmap("x$_",(8,9,18..2= 7)); +# +###################################################################### + +$flavour =3D shift || "64"; + +for (@ARGV) { $output=3D$_ if (/\w[\w\-]*\.\w+$/); } +open STDOUT,">$output"; + +$code.=3D<<___; +#ifdef __KERNEL__ +# ifdef __riscv_zicfilp +# undef __riscv_zicfilp // calls are expected to be direct +# endif +#endif + +#if defined(__CHERI_PURE_CAPABILITY__) && !defined(__riscv_misaligned_fast) +# define __riscv_misaligned_fast 1 +#endif +___ + +if ($flavour =3D~ /64/) {{{ +###################################################################### +# 64-bit code path... +# +my ($ctx,$inp,$len,$padbit) =3D ($a0,$a1,$a2,$a3); +my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) =3D ($a4,$a5,$a6,$a7,$t0,$t1,= $t2); + +$code.=3D<<___; +#if __riscv_xlen =3D=3D 64 +# if __SIZEOF_POINTER__ =3D=3D 16 +# define PUSH csc +# define POP clc +# else +# define PUSH sd +# define POP ld +# endif +#else +# error "unsupported __riscv_xlen" +#endif + +.option pic +.text + +.globl poly1305_init +.type poly1305_init,\@function +poly1305_init: +#ifdef __riscv_zicfilp + lpad 0 +#endif + sd $zero,0($ctx) + sd $zero,8($ctx) + sd $zero,16($ctx) + + beqz $inp,.Lno_key + +#ifndef __riscv_misaligned_fast + andi $tmp0,$inp,7 # $inp % 8 + andi $inp,$inp,-8 # align $inp + slli $tmp0,$tmp0,3 # byte to bit offset +#endif + ld $in0,0($inp) + ld $in1,8($inp) +#ifndef __riscv_misaligned_fast + beqz $tmp0,.Laligned_key + + ld $tmp2,16($inp) + neg $tmp1,$tmp0 # implicit &63 in sll + srl $in0,$in0,$tmp0 + sll $tmp3,$in1,$tmp1 + srl $in1,$in1,$tmp0 + sll $tmp2,$tmp2,$tmp1 + or $in0,$in0,$tmp3 + or $in1,$in1,$tmp2 + +.Laligned_key: +#endif + li $tmp0,1 + slli $tmp0,$tmp0,32 # 0x0000000100000000 + addi $tmp0,$tmp0,-63 # 0x00000000ffffffc1 + slli $tmp0,$tmp0,28 # 0x0ffffffc10000000 + addi $tmp0,$tmp0,-1 # 0x0ffffffc0fffffff + + and $in0,$in0,$tmp0 + addi $tmp0,$tmp0,-3 # 0x0ffffffc0ffffffc + and $in1,$in1,$tmp0 + + sd $in0,24($ctx) + srli $tmp0,$in1,2 + sd $in1,32($ctx) + add $tmp0,$tmp0,$in1 # s1 =3D r1 + (r1 >> 2) + sd $tmp0,40($ctx) + +.Lno_key: + li $a0,0 # return 0 + ret +.size poly1305_init,.-poly1305_init +___ +{ +my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) =3D + ($s0,$s1,$s2,$s3,$t3,$t4,$in0,$in1,$t2); +my ($shr,$shl) =3D ($t5,$t6); # used on R6 + +$code.=3D<<___; +.globl poly1305_blocks +.type poly1305_blocks,\@function +poly1305_blocks: +#ifdef __riscv_zicfilp + lpad 0 +#endif + andi $len,$len,-16 # complete blocks only + beqz $len,.Lno_data + + caddi $sp,$sp,-4*__SIZEOF_POINTER__ + PUSH $s0,3*__SIZEOF_POINTER__($sp) + PUSH $s1,2*__SIZEOF_POINTER__($sp) + PUSH $s2,1*__SIZEOF_POINTER__($sp) + PUSH $s3,0*__SIZEOF_POINTER__($sp) + +#ifndef __riscv_misaligned_fast + andi $shr,$inp,7 + andi $inp,$inp,-8 # align $inp + slli $shr,$shr,3 # byte to bit offset + neg $shl,$shr # implicit &63 in sll +#endif + + ld $h0,0($ctx) # load hash value + ld $h1,8($ctx) + ld $h2,16($ctx) + + ld $r0,24($ctx) # load key + ld $r1,32($ctx) + ld $rs1,40($ctx) + + add $len,$len,$inp # end of buffer + +.Loop: + ld $in0,0($inp) # load input + ld $in1,8($inp) +#ifndef __riscv_misaligned_fast + beqz $shr,.Laligned_inp + + ld $tmp2,16($inp) + srl $in0,$in0,$shr + sll $tmp3,$in1,$shl + srl $in1,$in1,$shr + sll $tmp2,$tmp2,$shl + or $in0,$in0,$tmp3 + or $in1,$in1,$tmp2 + +.Laligned_inp: +#endif + caddi $inp,$inp,16 + + andi $tmp0,$h2,-4 # modulo-scheduled reduction + srli $tmp1,$h2,2 + andi $h2,$h2,3 + + add $d0,$h0,$in0 # accumulate input + add $tmp1,$tmp1,$tmp0 + sltu $tmp0,$d0,$h0 + add $d0,$d0,$tmp1 # ... and residue + sltu $tmp1,$d0,$tmp1 + add $d1,$h1,$in1 + add $tmp0,$tmp0,$tmp1 + sltu $tmp1,$d1,$h1 + add $d1,$d1,$tmp0 + + add $d2,$h2,$padbit + sltu $tmp0,$d1,$tmp0 + mulhu $h1,$r0,$d0 # h0*r0 + mul $h0,$r0,$d0 + + add $d2,$d2,$tmp1 + add $d2,$d2,$tmp0 + mulhu $tmp1,$rs1,$d1 # h1*5*r1 + mul $tmp0,$rs1,$d1 + + mulhu $h2,$r1,$d0 # h0*r1 + mul $tmp2,$r1,$d0 + add $h0,$h0,$tmp0 + add $h1,$h1,$tmp1 + sltu $tmp0,$h0,$tmp0 + + add $h1,$h1,$tmp0 + add $h1,$h1,$tmp2 + mulhu $tmp1,$r0,$d1 # h1*r0 + mul $tmp0,$r0,$d1 + + sltu $tmp2,$h1,$tmp2 + add $h2,$h2,$tmp2 + mul $tmp2,$rs1,$d2 # h2*5*r1 + + add $h1,$h1,$tmp0 + add $h2,$h2,$tmp1 + mul $tmp3,$r0,$d2 # h2*r0 + sltu $tmp0,$h1,$tmp0 + add $h2,$h2,$tmp0 + + add $h1,$h1,$tmp2 + sltu $tmp2,$h1,$tmp2 + add $h2,$h2,$tmp2 + add $h2,$h2,$tmp3 + + bne $inp,$len,.Loop + + sd $h0,0($ctx) # store hash value + sd $h1,8($ctx) + sd $h2,16($ctx) + + POP $s0,3*__SIZEOF_POINTER__($sp) # epilogue + POP $s1,2*__SIZEOF_POINTER__($sp) + POP $s2,1*__SIZEOF_POINTER__($sp) + POP $s3,0*__SIZEOF_POINTER__($sp) + caddi $sp,$sp,4*__SIZEOF_POINTER__ + +.Lno_data: + ret +.size poly1305_blocks,.-poly1305_blocks +___ +} +{ +my ($ctx,$mac,$nonce) =3D ($a0,$a1,$a2); + +$code.=3D<<___; +.globl poly1305_emit +.type poly1305_emit,\@function +poly1305_emit: +#ifdef __riscv_zicfilp + lpad 0 +#endif + ld $tmp2,16($ctx) + ld $tmp0,0($ctx) + ld $tmp1,8($ctx) + + andi $in0,$tmp2,-4 # final reduction + srl $in1,$tmp2,2 + andi $tmp2,$tmp2,3 + add $in0,$in0,$in1 + + add $tmp0,$tmp0,$in0 + sltu $in1,$tmp0,$in0 + addi $in0,$tmp0,5 # compare to modulus + add $tmp1,$tmp1,$in1 + sltiu $tmp3,$in0,5 + sltu $tmp4,$tmp1,$in1 + add $in1,$tmp1,$tmp3 + add $tmp2,$tmp2,$tmp4 + sltu $tmp3,$in1,$tmp3 + add $tmp2,$tmp2,$tmp3 + + srli $tmp2,$tmp2,2 # see if it carried/borrowed + neg $tmp2,$tmp2 + + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + and $in0,$in0,$tmp2 + and $in1,$in1,$tmp2 + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + + lwu $tmp0,0($nonce) # load nonce + lwu $tmp1,4($nonce) + lwu $tmp2,8($nonce) + lwu $tmp3,12($nonce) + slli $tmp1,$tmp1,32 + slli $tmp3,$tmp3,32 + or $tmp0,$tmp0,$tmp1 + or $tmp2,$tmp2,$tmp3 + + add $in0,$in0,$tmp0 # accumulate nonce + add $in1,$in1,$tmp2 + sltu $tmp0,$in0,$tmp0 + add $in1,$in1,$tmp0 + +#ifdef __riscv_misaligned_fast + sd $in0,0($mac) # write mac value + sd $in1,8($mac) +#else + srli $tmp0,$in0,8 # write mac value + srli $tmp1,$in0,16 + srli $tmp2,$in0,24 + sb $in0,0($mac) + srli $tmp3,$in0,32 + sb $tmp0,1($mac) + srli $tmp0,$in0,40 + sb $tmp1,2($mac) + srli $tmp1,$in0,48 + sb $tmp2,3($mac) + srli $tmp2,$in0,56 + sb $tmp3,4($mac) + srli $tmp3,$in1,8 + sb $tmp0,5($mac) + srli $tmp0,$in1,16 + sb $tmp1,6($mac) + srli $tmp1,$in1,24 + sb $tmp2,7($mac) + + sb $in1,8($mac) + srli $tmp2,$in1,32 + sb $tmp3,9($mac) + srli $tmp3,$in1,40 + sb $tmp0,10($mac) + srli $tmp0,$in1,48 + sb $tmp1,11($mac) + srli $tmp1,$in1,56 + sb $tmp2,12($mac) + sb $tmp3,13($mac) + sb $tmp0,14($mac) + sb $tmp1,15($mac) +#endif + + ret +.size poly1305_emit,.-poly1305_emit +.string "Poly1305 for RISC-V, CRYPTOGAMS by \@dot-asm" +___ +} +}}} else {{{ +###################################################################### +# 32-bit code path +# + +my ($ctx,$inp,$len,$padbit) =3D ($a0,$a1,$a2,$a3); +my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) =3D + ($a4,$a5,$a6,$a7,$t0,$t1,$t2,$t3); + +$code.=3D<<___; +#if __riscv_xlen =3D=3D 32 +# if __SIZEOF_POINTER__ =3D=3D 8 +# define PUSH csc +# define POP clc +# else +# define PUSH sw +# define POP lw +# endif +# define MULX(hi,lo,a,b) mulhu hi,a,b; mul lo,a,b +# define srliw srli +# define srlw srl +# define sllw sll +# define addw add +# define addiw addi +# define mulw mul +#elif __riscv_xlen =3D=3D 64 +# if __SIZEOF_POINTER__ =3D=3D 16 +# define PUSH csc +# define POP clc +# else +# define PUSH sd +# define POP ld +# endif +# define MULX(hi,lo,a,b) slli b,b,32; srli b,b,32; mul hi,a,b; addiw lo,hi= ,0; srai hi,hi,32 +#else +# error "unsupported __riscv_xlen" +#endif + +.option pic +.text + +.globl poly1305_init +.type poly1305_init,\@function +poly1305_init: +#ifdef __riscv_zicfilp + lpad 0 +#endif + sw $zero,0($ctx) + sw $zero,4($ctx) + sw $zero,8($ctx) + sw $zero,12($ctx) + sw $zero,16($ctx) + + beqz $inp,.Lno_key + +#ifndef __riscv_misaligned_fast + andi $tmp0,$inp,3 # $inp % 4 + sub $inp,$inp,$tmp0 # align $inp + sll $tmp0,$tmp0,3 # byte to bit offset +#endif + lw $in0,0($inp) + lw $in1,4($inp) + lw $in2,8($inp) + lw $in3,12($inp) +#ifndef __riscv_misaligned_fast + beqz $tmp0,.Laligned_key + + lw $tmp2,16($inp) + sub $tmp1,$zero,$tmp0 + srlw $in0,$in0,$tmp0 + sllw $tmp3,$in1,$tmp1 + srlw $in1,$in1,$tmp0 + or $in0,$in0,$tmp3 + sllw $tmp3,$in2,$tmp1 + srlw $in2,$in2,$tmp0 + or $in1,$in1,$tmp3 + sllw $tmp3,$in3,$tmp1 + srlw $in3,$in3,$tmp0 + or $in2,$in2,$tmp3 + sllw $tmp2,$tmp2,$tmp1 + or $in3,$in3,$tmp2 +.Laligned_key: +#endif + + lui $tmp0,0x10000 + addi $tmp0,$tmp0,-1 # 0x0fffffff + and $in0,$in0,$tmp0 + addi $tmp0,$tmp0,-3 # 0x0ffffffc + and $in1,$in1,$tmp0 + and $in2,$in2,$tmp0 + and $in3,$in3,$tmp0 + + sw $in0,20($ctx) + sw $in1,24($ctx) + sw $in2,28($ctx) + sw $in3,32($ctx) + + srlw $tmp1,$in1,2 + srlw $tmp2,$in2,2 + srlw $tmp3,$in3,2 + addw $in1,$in1,$tmp1 # s1 =3D r1 + (r1 >> 2) + addw $in2,$in2,$tmp2 + addw $in3,$in3,$tmp3 + sw $in1,36($ctx) + sw $in2,40($ctx) + sw $in3,44($ctx) +.Lno_key: + li $a0,0 + ret +.size poly1305_init,.-poly1305_init +___ +{ +my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) =3D + ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $t0,$t1,$t2); +my ($d0,$d1,$d2,$d3) =3D + ($a4,$a5,$a6,$a7); +my $shr =3D $ra; # used on R6 + +$code.=3D<<___; +.globl poly1305_blocks +.type poly1305_blocks,\@function +poly1305_blocks: +#ifdef __riscv_zicfilp + lpad 0 +#endif + andi $len,$len,-16 # complete blocks only + beqz $len,.Labort + +#ifdef __riscv_zcmp + cm.push {ra,s0-s8}, -48 +#else + caddi $sp,$sp,-__SIZEOF_POINTER__*12 + PUSH $ra, __SIZEOF_POINTER__*11($sp) + PUSH $s0, __SIZEOF_POINTER__*10($sp) + PUSH $s1, __SIZEOF_POINTER__*9($sp) + PUSH $s2, __SIZEOF_POINTER__*8($sp) + PUSH $s3, __SIZEOF_POINTER__*7($sp) + PUSH $s4, __SIZEOF_POINTER__*6($sp) + PUSH $s5, __SIZEOF_POINTER__*5($sp) + PUSH $s6, __SIZEOF_POINTER__*4($sp) + PUSH $s7, __SIZEOF_POINTER__*3($sp) + PUSH $s8, __SIZEOF_POINTER__*2($sp) +#endif + +#ifndef __riscv_misaligned_fast + andi $shr,$inp,3 + andi $inp,$inp,-4 # align $inp + slli $shr,$shr,3 # byte to bit offset +#endif + + lw $h0,0($ctx) # load hash value + lw $h1,4($ctx) + lw $h2,8($ctx) + lw $h3,12($ctx) + lw $h4,16($ctx) + + lw $r0,20($ctx) # load key + lw $r1,24($ctx) + lw $r2,28($ctx) + lw $r3,32($ctx) + lw $rs1,36($ctx) + lw $rs2,40($ctx) + lw $rs3,44($ctx) + + add $len,$len,$inp # end of buffer + +.Loop: + lw $d0,0($inp) # load input + lw $d1,4($inp) + lw $d2,8($inp) + lw $d3,12($inp) +#ifndef __riscv_misaligned_fast + beqz $shr,.Laligned_inp + + lw $t4,16($inp) + sub $t5,$zero,$shr + srlw $d0,$d0,$shr + sllw $t3,$d1,$t5 + srlw $d1,$d1,$shr + or $d0,$d0,$t3 + sllw $t3,$d2,$t5 + srlw $d2,$d2,$shr + or $d1,$d1,$t3 + sllw $t3,$d3,$t5 + srlw $d3,$d3,$shr + or $d2,$d2,$t3 + sllw $t4,$t4,$t5 + or $d3,$d3,$t4 + +.Laligned_inp: +#endif + srliw $t3,$h4,2 # modulo-scheduled reduction + andi $t4,$h4,-4 + andi $h4,$h4,3 + + addw $d0,$d0,$h0 # accumulate input + addw $t4,$t4,$t3 + sltu $h0,$d0,$h0 + addw $d0,$d0,$t4 # ... and residue + sltu $t4,$d0,$t4 + + addw $d1,$d1,$h1 + addw $h0,$h0,$t4 # carry + sltu $h1,$d1,$h1 + addw $d1,$d1,$h0 + sltu $h0,$d1,$h0 + + addw $d2,$d2,$h2 + addw $h1,$h1,$h0 # carry + sltu $h2,$d2,$h2 + addw $d2,$d2,$h1 + sltu $h1,$d2,$h1 + + addw $d3,$d3,$h3 + addw $h2,$h2,$h1 # carry + sltu $h3,$d3,$h3 + addw $d3,$d3,$h2 + + MULX ($h1,$h0,$r0,$d0) # d0*r0 + + sltu $h2,$d3,$h2 + addw $h3,$h3,$h2 # carry + + MULX ($t4,$t3,$rs3,$d1) # d1*s3 + + addw $h4,$h4,$padbit + caddi $inp,$inp,16 + addw $h4,$h4,$h3 + + MULX ($t6,$a3,$rs2,$d2) # d2*s2 + addw $h0,$h0,$t3 + addw $h1,$h1,$t4 + sltu $t3,$h0,$t3 + addw $h1,$h1,$t3 + + MULX ($t4,$t3,$rs1,$d3) # d3*s1 + addw $h0,$h0,$a3 + addw $h1,$h1,$t6 + sltu $a3,$h0,$a3 + addw $h1,$h1,$a3 + + + MULX ($h2,$a3,$r1,$d0) # d0*r1 + addw $h0,$h0,$t3 + addw $h1,$h1,$t4 + sltu $t3,$h0,$t3 + addw $h1,$h1,$t3 + + MULX ($t4,$t3,$r0,$d1) # d1*r0 + addw $h1,$h1,$a3 + sltu $a3,$h1,$a3 + addw $h2,$h2,$a3 + + MULX ($t6,$a3,$rs3,$d2) # d2*s3 + addw $h1,$h1,$t3 + addw $h2,$h2,$t4 + sltu $t3,$h1,$t3 + addw $h2,$h2,$t3 + + MULX ($t4,$t3,$rs2,$d3) # d3*s2 + addw $h1,$h1,$a3 + addw $h2,$h2,$t6 + sltu $a3,$h1,$a3 + addw $h2,$h2,$a3 + + mulw $a3,$rs1,$h4 # h4*s1 + addw $h1,$h1,$t3 + addw $h2,$h2,$t4 + sltu $t3,$h1,$t3 + addw $h2,$h2,$t3 + + + MULX ($h3,$t3,$r2,$d0) # d0*r2 + addw $h1,$h1,$a3 + sltu $a3,$h1,$a3 + addw $h2,$h2,$a3 + + MULX ($t6,$a3,$r1,$d1) # d1*r1 + addw $h2,$h2,$t3 + sltu $t3,$h2,$t3 + addw $h3,$h3,$t3 + + MULX ($t4,$t3,$r0,$d2) # d2*r0 + addw $h2,$h2,$a3 + addw $h3,$h3,$t6 + sltu $a3,$h2,$a3 + addw $h3,$h3,$a3 + + MULX ($t6,$a3,$rs3,$d3) # d3*s3 + addw $h2,$h2,$t3 + addw $h3,$h3,$t4 + sltu $t3,$h2,$t3 + addw $h3,$h3,$t3 + + mulw $t3,$rs2,$h4 # h4*s2 + addw $h2,$h2,$a3 + addw $h3,$h3,$t6 + sltu $a3,$h2,$a3 + addw $h3,$h3,$a3 + + + MULX ($t6,$a3,$r3,$d0) # d0*r3 + addw $h2,$h2,$t3 + sltu $t3,$h2,$t3 + addw $h3,$h3,$t3 + + MULX ($t4,$t3,$r2,$d1) # d1*r2 + addw $h3,$h3,$a3 + sltu $a3,$h3,$a3 + addw $t6,$t6,$a3 + + MULX ($a3,$d3,$r0,$d3) # d3*r0 + addw $h3,$h3,$t3 + addw $t6,$t6,$t4 + sltu $t3,$h3,$t3 + addw $t6,$t6,$t3 + + MULX ($t4,$t3,$r1,$d2) # d2*r1 + addw $h3,$h3,$d3 + addw $t6,$t6,$a3 + sltu $d3,$h3,$d3 + addw $t6,$t6,$d3 + + mulw $a3,$rs3,$h4 # h4*s3 + addw $h3,$h3,$t3 + addw $t6,$t6,$t4 + sltu $t3,$h3,$t3 + addw $t6,$t6,$t3 + + + mulw $h4,$r0,$h4 # h4*r0 + addw $h3,$h3,$a3 + sltu $a3,$h3,$a3 + addw $t6,$t6,$a3 + addw $h4,$t6,$h4 + + li $padbit,1 # if we loop, padbit is 1 + + bne $inp,$len,.Loop + + sw $h0,0($ctx) # store hash value + sw $h1,4($ctx) + sw $h2,8($ctx) + sw $h3,12($ctx) + sw $h4,16($ctx) + +#ifdef __riscv_zcmp + cm.popret {ra,s0-s8}, 48 +#else + POP $ra, __SIZEOF_POINTER__*11($sp) + POP $s0, __SIZEOF_POINTER__*10($sp) + POP $s1, __SIZEOF_POINTER__*9($sp) + POP $s2, __SIZEOF_POINTER__*8($sp) + POP $s3, __SIZEOF_POINTER__*7($sp) + POP $s4, __SIZEOF_POINTER__*6($sp) + POP $s5, __SIZEOF_POINTER__*5($sp) + POP $s6, __SIZEOF_POINTER__*4($sp) + POP $s7, __SIZEOF_POINTER__*3($sp) + POP $s8, __SIZEOF_POINTER__*2($sp) + caddi $sp,$sp,__SIZEOF_POINTER__*12 +#endif +.Labort: + ret +.size poly1305_blocks,.-poly1305_blocks +___ +} +{ +my ($ctx,$mac,$nonce,$tmp4) =3D ($a0,$a1,$a2,$a3); + +$code.=3D<<___; +.globl poly1305_emit +.type poly1305_emit,\@function +poly1305_emit: +#ifdef __riscv_zicfilp + lpad 0 +#endif + lw $tmp4,16($ctx) + lw $tmp0,0($ctx) + lw $tmp1,4($ctx) + lw $tmp2,8($ctx) + lw $tmp3,12($ctx) + + srliw $ctx,$tmp4,2 # final reduction + andi $in0,$tmp4,-4 + andi $tmp4,$tmp4,3 + addw $ctx,$ctx,$in0 + + addw $tmp0,$tmp0,$ctx + sltu $ctx,$tmp0,$ctx + addiw $in0,$tmp0,5 # compare to modulus + addw $tmp1,$tmp1,$ctx + sltiu $in1,$in0,5 + sltu $ctx,$tmp1,$ctx + addw $in1,$in1,$tmp1 + addw $tmp2,$tmp2,$ctx + sltu $in2,$in1,$tmp1 + sltu $ctx,$tmp2,$ctx + addw $in2,$in2,$tmp2 + addw $tmp3,$tmp3,$ctx + sltu $in3,$in2,$tmp2 + sltu $ctx,$tmp3,$ctx + addw $in3,$in3,$tmp3 + addw $tmp4,$tmp4,$ctx + sltu $ctx,$in3,$tmp3 + addw $ctx,$ctx,$tmp4 + + srl $ctx,$ctx,2 # see if it carried/borrowed + sub $ctx,$zero,$ctx + + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + xor $in2,$in2,$tmp2 + xor $in3,$in3,$tmp3 + and $in0,$in0,$ctx + and $in1,$in1,$ctx + and $in2,$in2,$ctx + and $in3,$in3,$ctx + xor $in0,$in0,$tmp0 + xor $in1,$in1,$tmp1 + xor $in2,$in2,$tmp2 + xor $in3,$in3,$tmp3 + + lw $tmp0,0($nonce) # load nonce + lw $tmp1,4($nonce) + lw $tmp2,8($nonce) + lw $tmp3,12($nonce) + + addw $in0,$in0,$tmp0 # accumulate nonce + sltu $ctx,$in0,$tmp0 + + addw $in1,$in1,$tmp1 + sltu $tmp1,$in1,$tmp1 + addw $in1,$in1,$ctx + sltu $ctx,$in1,$ctx + addw $ctx,$ctx,$tmp1 + + addw $in2,$in2,$tmp2 + sltu $tmp2,$in2,$tmp2 + addw $in2,$in2,$ctx + sltu $ctx,$in2,$ctx + addw $ctx,$ctx,$tmp2 + + addw $in3,$in3,$tmp3 + addw $in3,$in3,$ctx + +#ifdef __riscv_misaligned_fast + sw $in0,0($mac) # write mac value + sw $in1,4($mac) + sw $in2,8($mac) + sw $in3,12($mac) +#else + srl $tmp0,$in0,8 # write mac value + srl $tmp1,$in0,16 + srl $tmp2,$in0,24 + sb $in0, 0($mac) + sb $tmp0,1($mac) + srl $tmp0,$in1,8 + sb $tmp1,2($mac) + srl $tmp1,$in1,16 + sb $tmp2,3($mac) + srl $tmp2,$in1,24 + sb $in1, 4($mac) + sb $tmp0,5($mac) + srl $tmp0,$in2,8 + sb $tmp1,6($mac) + srl $tmp1,$in2,16 + sb $tmp2,7($mac) + srl $tmp2,$in2,24 + sb $in2, 8($mac) + sb $tmp0,9($mac) + srl $tmp0,$in3,8 + sb $tmp1,10($mac) + srl $tmp1,$in3,16 + sb $tmp2,11($mac) + srl $tmp2,$in3,24 + sb $in3, 12($mac) + sb $tmp0,13($mac) + sb $tmp1,14($mac) + sb $tmp2,15($mac) +#endif + + ret +.size poly1305_emit,.-poly1305_emit +.string "Poly1305 for RISC-V, CRYPTOGAMS by \@dot-asm" +___ +} +}}} + +foreach (split("\n", $code)) { + if ($flavour =3D~ /^cheri/) { + s/\(x([0-9]+)\)/(c$1)/ and s/\b([ls][bhwd]u?)\b/c$1/; + s/\b(PUSH|POP)(\s+)x([0-9]+)/$1$2c$3/ or + s/\b(ret|jal)\b/c$1/; + s/\bcaddi?\b/cincoffset/ and s/\bx([0-9]+,)/c$1/g or + m/\bcmove\b/ and s/\bx([0-9]+)/c$1/g; + } else { + s/\bcaddi?\b/add/ or + s/\bcmove\b/mv/; + } + print $_, "\n"; +} + +close STDOUT; diff --git a/lib/crypto/riscv/poly1305.h b/lib/crypto/riscv/poly1305.h new file mode 100644 index 0000000000000..88f3df44e355e --- /dev/null +++ b/lib/crypto/riscv/poly1305.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for riscv + * + * Copyright (C) 2025 Institute of Software, CAS. + */ + +asmlinkage void poly1305_block_init(struct poly1305_block_state *state, + const u8 raw_key[POLY1305_BLOCK_SIZE]); +asmlinkage void poly1305_blocks(struct poly1305_block_state *state, + const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit(const struct poly1305_state *state, + u8 digest[POLY1305_DIGEST_SIZE], + const u32 nonce[4]); --=20 2.50.1