From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D4705188736; Sun, 3 Nov 2024 22:32:24 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673146; cv=none; b=tXezR0S1dhR7TgCT0FpAepnANKRxJYNSL/kK1Zrm+5MYEu6En5OqKUClvAbecupSEv44iYhiJHbAZM2ACdAmCgA7WZ1wtm1l0GhOssvT7TN182H8X4x3/AVvs78fHJLa9f/2eDpcFGofk1pHLdAg2zFrPP5h/Y4G1SPmOv2mev8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673146; c=relaxed/simple; bh=ABiaGm+ovhik7YFigewZFubogtN32tz5BD1p0OQq720=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=TWQ5DRAeg9JUMevQ5O8zLycZUZMvOuEeEHLy+IUrAZqFFWM2J2vbZQ8lft8fHeMJty5j14GMHCPoMTUe29YUTWuD022obpkgJ5dVbCyokR3xjrPkGN7y1keyhnVlc6VH7i2NxC+Bb2clXj2Ylkq7Pm95e/Rnfzmz1aYKER8QFCE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=WVk0uHfu; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="WVk0uHfu" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 42EC9C4CED3; Sun, 3 Nov 2024 22:32:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673144; bh=ABiaGm+ovhik7YFigewZFubogtN32tz5BD1p0OQq720=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=WVk0uHfugLJOnyBhphvAvhRdO9ietw2HwNP1FG0tEe51/9J+ho1lZSwSW5fWWkUbJ cnx/nwvwkunqtyQfdB6zu5TZYLBOWj+8hIzGXkelZw/EeczfL6dFUH50WsD+A0L6wp 1ea3C8GpKzWp8UGmLqvWkmVACMRW7Hz5uHzbXU2Ekqvq3CV5Hvw+yULjXXbYd5nmM5 evs/zM1913odV1Hs8XVfpJhyXYdLZ/5asXzrkmzN0LR04luqCSG3sPYmAnYNFu82YK z5A7rR9fIOw7wD5HLScsDlu1CWetycbJKeEEVPCotgB8D+8Nk/ySUHj/DWVvgFqAih CURkLUSqbIfRQ== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 01/18] lib/crc32: drop leading underscores from __crc32c_le_base Date: Sun, 3 Nov 2024 14:31:37 -0800 Message-ID: <20241103223154.136127-2-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Remove the leading underscores from __crc32c_le_base(). This is in preparation for adding crc32c_le_arch() and eventually renaming __crc32c_le() to crc32c_le(). Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- arch/arm64/lib/crc32-glue.c | 2 +- arch/riscv/lib/crc32.c | 2 +- crypto/crc32c_generic.c | 8 ++++---- include/linux/crc32.h | 2 +- lib/crc32.c | 4 ++-- lib/crc32test.c | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c index 295ae3e6b997..ad015223d15d 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32-glue.c @@ -42,11 +42,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, si= ze_t len) } =20 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) - return __crc32c_le_base(crc, p, len); + return crc32c_le_base(crc, p, len); =20 if (len >=3D min_len && cpu_have_named_feature(PMULL) && crypto_simd_usab= le()) { kernel_neon_begin(); crc =3D crc32c_le_arm64_4way(crc, p, len); kernel_neon_end(); diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c index d7dc599af3ef..333fb7af1192 100644 --- a/arch/riscv/lib/crc32.c +++ b/arch/riscv/lib/crc32.c @@ -224,11 +224,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, = size_t len) } =20 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, - CRC32C_POLY_QT_LE, __crc32c_le_base); + CRC32C_POLY_QT_LE, crc32c_le_base); } =20 static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, size_t len) { diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 7c2357c30fdf..635599b255ec 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -83,11 +83,11 @@ static int chksum_setkey(struct crypto_shash *tfm, cons= t u8 *key, static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length) { struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); =20 - ctx->crc =3D __crc32c_le_base(ctx->crc, data, length); + ctx->crc =3D crc32c_le_base(ctx->crc, data, length); return 0; } =20 static int chksum_update_arch(struct shash_desc *desc, const u8 *data, unsigned int length) @@ -106,11 +106,11 @@ static int chksum_final(struct shash_desc *desc, u8 *= out) return 0; } =20 static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 = *out) { - put_unaligned_le32(~__crc32c_le_base(*crcp, data, len), out); + put_unaligned_le32(~crc32c_le_base(*crcp, data, len), out); return 0; } =20 static int __chksum_finup_arch(u32 *crcp, const u8 *data, unsigned int len, u8 *out) @@ -198,16 +198,16 @@ static struct shash_alg algs[] =3D {{ }}; =20 static int __init crc32c_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ - return crypto_register_shashes(algs, 1 + (&__crc32c_le !=3D &__crc32c_le_= base)); + return crypto_register_shashes(algs, 1 + (&__crc32c_le !=3D &crc32c_le_ba= se)); } =20 static void __exit crc32c_mod_fini(void) { - crypto_unregister_shashes(algs, 1 + (&__crc32c_le !=3D &__crc32c_le_base)= ); + crypto_unregister_shashes(algs, 1 + (&__crc32c_le !=3D &crc32c_le_base)); } =20 subsys_initcall(crc32c_mod_init); module_exit(crc32c_mod_fini); =20 diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 87f788c0d607..5b07fc9081c4 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -37,11 +37,11 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, = size_t len2) { return crc32_le_shift(crc1, len2) ^ crc2; } =20 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure __crc32c_le_base(u32 crc, unsigned char const *p, size_t len); +u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len); =20 /** * __crc32c_le_combine - Combine two crc32c check values into one. For two * sequences of bytes, seq1 and seq2 with lengths len1 * and len2, __crc32c_le() check values were calculated diff --git a/lib/crc32.c b/lib/crc32.c index ff587fee3893..c67059b0082b 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -205,12 +205,12 @@ EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); =20 u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32= _le); EXPORT_SYMBOL(crc32_le_base); =20 -u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__= crc32c_le); -EXPORT_SYMBOL(__crc32c_le_base); +u32 __pure crc32c_le_base(u32, unsigned char const *, size_t) __alias(__cr= c32c_le); +EXPORT_SYMBOL(crc32c_le_base); =20 u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32= _be); =20 /* * This multiplies the polynomials x and y modulo the given modulus. diff --git a/lib/crc32test.c b/lib/crc32test.c index 03cf5c1f2f5d..30b8da4d8be4 100644 --- a/lib/crc32test.c +++ b/lib/crc32test.c @@ -824,11 +824,11 @@ static void crc32test_regenerate(void) for (i =3D 0; i < ARRAY_SIZE(test); i++) { pr_info("{0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x},\n", test[i].crc, test[i].start, test[i].length, crc32_le_base(test[i].crc, test_buf + test[i].start, test[i].length), crc32_be_base(test[i].crc, test_buf + test[i].start, test[i].length), - __crc32c_le_base(test[i].crc, test_buf + test[i].start, test[i].length)= ); + crc32c_le_base(test[i].crc, test_buf + test[i].start, test[i].length)); } } =20 static int __init crc32test_init(void) { --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D475918CC08; Sun, 3 Nov 2024 22:32:25 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673146; cv=none; b=YJexOmOb1Un68ymJ4KaRS3zHYRMj7YQsU4TlE38DuRm4y6DLt8msrFQS81ha/ZPuh+4uMlfXieDheVrdy0D5kKchm7ZQcJz7OzBIKGY1Vvjc6awHTxLgHJzUW/vl1dwSIOCmXRjpS6f/oCZlJZK0P5XeOF3t6AYk2CREoSBSGLE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673146; c=relaxed/simple; bh=aQx2of0YxY8WIGZoMkP00SggKkaZtnWYsiXvyH2QOMg=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=X41Lxecti65G4GbeNbOkRy08sWDpVD3mwF/KQUFU5mn7QLMc10irbJ1l+jVWF4MbaLQZoJhAK5n4zuj8T5f1ecxUCinH0x0tmPyndAZnloJmffMATGmX2RmxBQ4iOTapK6bksX6NX0VZ6NR6Dcwj3HRyJgQM/Bn+zEpfR3ldmcc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=VtOJZhHa; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="VtOJZhHa" Received: by smtp.kernel.org (Postfix) with ESMTPSA id CAE5CC4CED4; Sun, 3 Nov 2024 22:32:24 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673145; bh=aQx2of0YxY8WIGZoMkP00SggKkaZtnWYsiXvyH2QOMg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=VtOJZhHa7cYAybApcWeSaKbpiudoVOFGuFDH8l6SegXZ2A40or+s7/q8DtnBsf0vG wbjsUVEeQhrHMrNRM9Lc6QRpLufHetMUuXNbVW837WyVju0Dxo0nIbv8iY+13/ZrCs IJFYvAGkty3sioHEgihQt4Figk5+JGLnPJ5kkv2zDFuYYVIiTHcq3rtS5BzCQ0Fuje XpxxkUh9SDZGF9+paViPv8+5wuzGpFa4jy6YgrbXBqTwjQKRedPXy7G6GS0L3QS1kY ZIg1ppJVdfoq/RrbtQICFwficNgBUuJ+dJ9qO1Q50qy7QLp4lppuFxYrGwhAKsQ1iy /2NiwTwXRSEfg== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 02/18] lib/crc32: improve support for arch-specific overrides Date: Sun, 3 Nov 2024 14:31:38 -0800 Message-ID: <20241103223154.136127-3-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Currently the CRC32 library functions are defined as weak symbols, and the arm64 and riscv architectures override them. This method of arch-specific overrides has the limitation that it only works when both the base and arch code is built-in. Also, it makes the arch-specific code be silently not used if it is accidentally built with lib-y instead of obj-y; unfortunately the RISC-V code does this. This commit reorganizes the code to have explicit *_arch() functions that are called when they are enabled, similar to how some of the crypto library code works (e.g. chacha_crypt() calls chacha_crypt_arch()). Make the existing kconfig choice for the CRC32 implementation also control whether the arch-optimized implementation (if one is available) is enabled or not. Make it enabled by default if CRC32 is also enabled. The result is that arch-optimized CRC32 library functions will be included automatically when appropriate, but it is now possible to disable them. They can also now be built as a loadable module if the CRC32 library functions happen to be used only by loadable modules, in which case the arch and base CRC32 modules will be automatically loaded via direct symbol dependency when appropriate. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- arch/arm64/Kconfig | 1 + arch/arm64/lib/Makefile | 3 +- arch/arm64/lib/crc32-glue.c | 13 ++++- arch/riscv/Kconfig | 1 + arch/riscv/lib/Makefile | 3 +- arch/riscv/lib/{crc32.c =3D> crc32-riscv.c} | 13 ++++- crypto/crc32_generic.c | 4 +- crypto/crc32c_generic.c | 4 +- include/linux/crc32.h | 35 +++++++++--- lib/Kconfig | 70 +++++++++++++++++------ lib/crc32.c | 22 +++---- 11 files changed, 118 insertions(+), 51 deletions(-) rename arch/riscv/lib/{crc32.c =3D> crc32-riscv.c} (94%) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fd9df6dcc593..1e48f40f654e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -19,10 +19,11 @@ config ARM64 select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CACHE_LINE_SIZE + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DMA_OPS if XEN select ARCH_HAS_DMA_PREP_COHERENT diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 8e882f479d98..5fbcf0d56665 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -11,11 +11,12 @@ CFLAGS_xor-neon.o +=3D $(CC_FLAGS_FPU) CFLAGS_REMOVE_xor-neon.o +=3D $(CC_FLAGS_NO_FPU) endif =20 lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) +=3D uaccess_flushcache.o =20 -obj-$(CONFIG_CRC32) +=3D crc32.o crc32-glue.o +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-arm64.o +crc32-arm64-y :=3D crc32.o crc32-glue.o =20 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o =20 obj-$(CONFIG_ARM64_MTE) +=3D mte.o =20 diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c index ad015223d15d..d7f6e1cbf0d2 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32-glue.c @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only =20 #include #include +#include =20 #include #include #include #include @@ -19,11 +20,11 @@ asmlinkage u32 crc32_be_arm64(u32 crc, unsigned char co= nst *p, size_t len); =20 asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t= len); asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_= t len); asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t= len); =20 -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_le_base(crc, p, len); =20 if (len >=3D min_len && cpu_have_named_feature(PMULL) && crypto_simd_usab= le()) { @@ -38,12 +39,13 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, si= ze_t len) return crc; } =20 return crc32_le_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32_le_arch); =20 -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32c_le_base(crc, p, len); =20 if (len >=3D min_len && cpu_have_named_feature(PMULL) && crypto_simd_usab= le()) { @@ -58,12 +60,13 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p,= size_t len) return crc; } =20 return crc32c_le_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32c_le_arch); =20 -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_be_base(crc, p, len); =20 if (len >=3D min_len && cpu_have_named_feature(PMULL) && crypto_simd_usab= le()) { @@ -78,5 +81,9 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size= _t len) return crc; } =20 return crc32_be_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32_be_arch); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("arm64-optimized CRC32 functions"); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index f4c570538d55..046f0cb119da 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -22,10 +22,11 @@ config RISCV select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM_VMEMMAP select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CRC32 if RISCV_ISA_ZBC select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_FAST_MULTIPLIER diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 8eec6b69a875..79368a895fee 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -13,10 +13,9 @@ ifeq ($(CONFIG_MMU), y) lib-$(CONFIG_RISCV_ISA_V) +=3D uaccess_vector.o endif lib-$(CONFIG_MMU) +=3D uaccess.o lib-$(CONFIG_64BIT) +=3D tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) +=3D clear_page.o -lib-$(CONFIG_RISCV_ISA_ZBC) +=3D crc32.o - +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-riscv.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o lib-$(CONFIG_RISCV_ISA_V) +=3D xor.o lib-$(CONFIG_RISCV_ISA_V) +=3D riscv_v_helpers.o diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32-riscv.c similarity index 94% rename from arch/riscv/lib/crc32.c rename to arch/riscv/lib/crc32-riscv.c index 333fb7af1192..a3ff7db2a1ce 100644 --- a/arch/riscv/lib/crc32.c +++ b/arch/riscv/lib/crc32-riscv.c @@ -12,10 +12,11 @@ #include #include #include #include #include +#include =20 /* * Refer to https://www.corsix.org/content/barrett-reduction-polynomials f= or * better understanding of how this math works. * @@ -215,21 +216,23 @@ static inline u32 __pure crc32_le_generic(u32 crc, un= signed char const *p, =20 legacy: return crc_fb(crc, p, len); } =20 -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE, crc32_le_base); } +EXPORT_SYMBOL(crc32_le_arch); =20 -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, CRC32C_POLY_QT_LE, crc32c_le_base); } +EXPORT_SYMBOL(crc32c_le_arch); =20 static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, size_t len) { size_t bits =3D len * 8; @@ -251,11 +254,11 @@ static inline u32 crc32_be_unaligned(u32 crc, unsigne= d char const *p, crc ^=3D crc_low; =20 return crc; } =20 -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) { size_t offset, head_len, tail_len; unsigned long const *p_ul; unsigned long s; =20 @@ -290,5 +293,9 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, si= ze_t len) return crc; =20 legacy: return crc32_be_base(crc, p, len); } +EXPORT_SYMBOL(crc32_be_arch); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension"); diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c index 6a55d206fab3..cc064ea8240e 100644 --- a/crypto/crc32_generic.c +++ b/crypto/crc32_generic.c @@ -158,16 +158,16 @@ static struct shash_alg algs[] =3D {{ }}; =20 static int __init crc32_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ - return crypto_register_shashes(algs, 1 + (&crc32_le !=3D &crc32_le_base)); + return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } =20 static void __exit crc32_mod_fini(void) { - crypto_unregister_shashes(algs, 1 + (&crc32_le !=3D &crc32_le_base)); + crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } =20 subsys_initcall(crc32_mod_init); module_exit(crc32_mod_fini); =20 diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 635599b255ec..04b03d825cf4 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -198,16 +198,16 @@ static struct shash_alg algs[] =3D {{ }}; =20 static int __init crc32c_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ - return crypto_register_shashes(algs, 1 + (&__crc32c_le !=3D &crc32c_le_ba= se)); + return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } =20 static void __exit crc32c_mod_fini(void) { - crypto_unregister_shashes(algs, 1 + (&__crc32c_le !=3D &crc32c_le_base)); + crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } =20 subsys_initcall(crc32c_mod_init); module_exit(crc32c_mod_fini); =20 diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 5b07fc9081c4..58c632533b08 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -6,14 +6,38 @@ #define _LINUX_CRC32_H =20 #include #include =20 -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len); +u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len); +u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len); +u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len); +u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len); +u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len); +u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len); + +static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32_le_arch(crc, p, len); + return crc32_le_base(crc, p, len); +} + +static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32_be_arch(crc, p, len); + return crc32_be_base(crc, p, len); +} + +/* TODO: leading underscores should be dropped once callers have been upda= ted */ +static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32c_le_arch(crc, p, len); + return crc32c_le_base(crc, p, len); +} =20 /** * crc32_le_combine - Combine two crc32 check values into one. For two * sequences of bytes, seq1 and seq2 with lengths len1 * and len2, crc32_le() check values were calculated @@ -36,13 +60,10 @@ u32 __attribute_const__ crc32_le_shift(u32 crc, size_t = len); static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) { return crc32_le_shift(crc1, len2) ^ crc2; } =20 -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len); - /** * __crc32c_le_combine - Combine two crc32c check values into one. For two * sequences of bytes, seq1 and seq2 with lengths len1 * and len2, __crc32c_le() check values were calculated * for each, crc1 and crc2. diff --git a/lib/Kconfig b/lib/Kconfig index b38849af6f13..07afcf214f35 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -176,10 +176,13 @@ config CRC32 This option is provided for the case where no in-kernel-tree modules require CRC32/CRC32c functions, but a module built outside the kernel tree does. Such modules that use library CRC32/CRC32c functions require M here. =20 +config ARCH_HAS_CRC32 + bool + config CRC32_SELFTEST tristate "CRC32 perform self test on init" depends on CRC32 help This option enables the CRC32 library functions to perform a @@ -188,54 +191,89 @@ config CRC32_SELFTEST and computes the total elapsed time and number of bytes processed. =20 choice prompt "CRC32 implementation" depends on CRC32 - default CRC32_SLICEBY8 + default CRC32_IMPL_ARCH_PLUS_SLICEBY8 if ARCH_HAS_CRC32 + default CRC32_IMPL_SLICEBY8 if !ARCH_HAS_CRC32 help - This option allows a kernel builder to override the default choice - of CRC32 algorithm. Choose the default ("slice by 8") unless you - know that you need one of the others. + This option allows you to override the default choice of CRC32 + implementation. Choose the default unless you know that you need one + of the others. =20 -config CRC32_SLICEBY8 +config CRC32_IMPL_ARCH_PLUS_SLICEBY8 + bool "Arch-optimized, with fallback to slice-by-8" if ARCH_HAS_CRC32 + help + Use architecture-optimized implementation of CRC32. Fall back to + slice-by-8 in cases where the arch-optimized implementation cannot be + used, e.g. if the CPU lacks support for the needed instructions. + + This is the default when an arch-optimized implementation exists. + +config CRC32_IMPL_ARCH_PLUS_SLICEBY1 + bool "Arch-optimized, with fallback to slice-by-1" if ARCH_HAS_CRC32 + help + Use architecture-optimized implementation of CRC32, but fall back to + slice-by-1 instead of slice-by-8 in order to reduce the binary size. + +config CRC32_IMPL_SLICEBY8 bool "Slice by 8 bytes" help Calculate checksum 8 bytes at a time with a clever slicing algorithm. - This is the fastest algorithm, but comes with a 8KiB lookup table. - Most modern processors have enough cache to hold this table without - thrashing the cache. - - This is the default implementation choice. Choose this one unless - you have a good reason not to. + This is much slower than the architecture-optimized implementation of + CRC32 (if the selected arch has one), but it is portable and is the + fastest implementation when no arch-optimized implementation is + available. It uses an 8KiB lookup table. Most modern processors have + enough cache to hold this table without thrashing the cache. =20 -config CRC32_SLICEBY4 +config CRC32_IMPL_SLICEBY4 bool "Slice by 4 bytes" help Calculate checksum 4 bytes at a time with a clever slicing algorithm. This is a bit slower than slice by 8, but has a smaller 4KiB lookup table. =20 Only choose this option if you know what you are doing. =20 -config CRC32_SARWATE - bool "Sarwate's Algorithm (one byte at a time)" +config CRC32_IMPL_SLICEBY1 + bool "Slice by 1 byte (Sarwate's algorithm)" help Calculate checksum a byte at a time using Sarwate's algorithm. This - is not particularly fast, but has a small 256 byte lookup table. + is not particularly fast, but has a small 1KiB lookup table. =20 Only choose this option if you know what you are doing. =20 -config CRC32_BIT +config CRC32_IMPL_BIT bool "Classic Algorithm (one bit at a time)" help Calculate checksum one bit at a time. This is VERY slow, but has no lookup table. This is provided as a debugging option. =20 Only choose this option if you are debugging crc32. =20 endchoice =20 +config CRC32_ARCH + tristate + default CRC32 if CRC32_IMPL_ARCH_PLUS_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SL= ICEBY1 + +config CRC32_SLICEBY8 + bool + default y if CRC32_IMPL_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY8 + +config CRC32_SLICEBY4 + bool + default y if CRC32_IMPL_SLICEBY4 + +config CRC32_SARWATE + bool + default y if CRC32_IMPL_SLICEBY1 || CRC32_IMPL_ARCH_PLUS_SLICEBY1 + +config CRC32_BIT + bool + default y if CRC32_IMPL_BIT + config CRC64 tristate "CRC64 functions" help This option is provided for the case where no in-kernel-tree modules require CRC64 functions, but a module built outside diff --git a/lib/crc32.c b/lib/crc32.c index c67059b0082b..47151624332e 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -181,39 +181,31 @@ static inline u32 __pure crc32_le_generic(u32 crc, un= signed char const *p, #endif return crc; } =20 #if CRC_LE_BITS =3D=3D 1 -u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); } -u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); } #else -u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE); } -u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE); } #endif -EXPORT_SYMBOL(crc32_le); -EXPORT_SYMBOL(__crc32c_le); - -u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32= _le); EXPORT_SYMBOL(crc32_le_base); - -u32 __pure crc32c_le_base(u32, unsigned char const *, size_t) __alias(__cr= c32c_le); EXPORT_SYMBOL(crc32c_le_base); =20 -u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32= _be); - /* * This multiplies the polynomials x and y modulo the given modulus. * This follows the "little-endian" CRC convention that the lsbit * represents the highest power of x, and the msbit represents x^0. */ @@ -333,16 +325,16 @@ static inline u32 __pure crc32_be_generic(u32 crc, un= signed char const *p, # endif return crc; } =20 #if CRC_BE_BITS =3D=3D 1 -u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) { return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE); } #else -u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) { return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE); } #endif -EXPORT_SYMBOL(crc32_be); +EXPORT_SYMBOL(crc32_be_base); --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 99E0F1AB530; Sun, 3 Nov 2024 22:32:25 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673148; cv=none; b=j9XJ6+ctG2y7BlaPb/5ck44/7ZcGr1EYWipdKTMNF9A5+R3xcyRFYfSfkBo+afbrfYSJBGG2749YGn1G/0NiRrAXxoin7kf65sBqE2/mq8sTxHDaEBRZTNhAixF16iVftTiHGWtJWkaDbjXPO5Sb/RupULs7Ap2rlPRSvM2RDF8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673148; c=relaxed/simple; bh=R3RIsL1uK8EQAGi0SGqk7fPI8kAR8rBJM+i88HKwo+I=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=X/oemhjuoosILHuteiZn9lxZ2FLdP77o+ECH74gHd4ypgRDi0qmxy1ZKOnU45KNmiIZsQTisUHaj3GajxTraouNioJJsMaBImwAmoHralytnaIWTtWN+LviECrl/DTtVIPh/E2zgAY2X+8uUHLqhFXZCo+uPdX2oxxxr+rTD8ec= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=TrX8vnYi; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="TrX8vnYi" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5F0A3C4AF09; Sun, 3 Nov 2024 22:32:25 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673145; bh=R3RIsL1uK8EQAGi0SGqk7fPI8kAR8rBJM+i88HKwo+I=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=TrX8vnYi6RABnxqWW60rA5vLS2/aG1Tnc49VRVlwICi9yJ7QBnDWco/Yks+8wcuHa RJuv3uzU+Z6DTR7yfQGoDUX9bYZhQ8O6Hq7pMkhfl3QqRSKQV3ExIVZu2uhcpBKIjR fiwyAeVZAnX/xcIlgG4UBHb2UTaDQHawqpOvN2/n/J7uPYSvzXmIV9xhwyN2GFDagS H4ngn3Fodl7aHmeNPUSVT2tVlOXuaBpibrQaV+wveq6oJO8Y8WsWgddFxjNtz0h7QF GPGvqV/QcxpUiurjY8UUYUNlQ8oSIkZuJkJ/X9GpLLyNaLm2a8lC9ndMRajJMGOHq2 j8og8QUJ6tDSg== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH v3 03/18] lib/crc32: expose whether the lib is really optimized at runtime Date: Sun, 3 Nov 2024 14:31:39 -0800 Message-ID: <20241103223154.136127-4-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Make the CRC32 library export a function crc32_optimizations() which returns flags that indicate which CRC32 functions are actually executing optimized code at runtime. This will be used to determine whether the crc32[c]-$arch shash algorithms should be registered in the crypto API. btrfs could also start using these flags instead of the hack that it currently uses where it parses the crypto_shash_driver_name. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/arm64/lib/crc32-glue.c | 10 ++++++++++ arch/riscv/lib/crc32-riscv.c | 10 ++++++++++ include/linux/crc32.h | 15 +++++++++++++++ 3 files changed, 35 insertions(+) diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c index d7f6e1cbf0d2..15c4c9db573e 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32-glue.c @@ -83,7 +83,17 @@ u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t le= n) =20 return crc32_be_arm64(crc, p, len); } EXPORT_SYMBOL(crc32_be_arch); =20 +u32 crc32_optimizations(void) +{ + if (alternative_has_cap_likely(ARM64_HAS_CRC32)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("arm64-optimized CRC32 functions"); diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c index a3ff7db2a1ce..53d56ab422c7 100644 --- a/arch/riscv/lib/crc32-riscv.c +++ b/arch/riscv/lib/crc32-riscv.c @@ -295,7 +295,17 @@ u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t = len) legacy: return crc32_be_base(crc, p, len); } EXPORT_SYMBOL(crc32_be_arch); =20 +u32 crc32_optimizations(void) +{ + if (riscv_has_extension_likely(RISCV_ISA_EXT_ZBC)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension"); diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 58c632533b08..e9bd40056687 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -35,10 +35,25 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 = *p, size_t len) if (IS_ENABLED(CONFIG_CRC32_ARCH)) return crc32c_le_arch(crc, p, len); return crc32c_le_base(crc, p, len); } =20 +/* + * crc32_optimizations() returns flags that indicate which CRC32 library + * functions are using architecture-specific optimizations. Unlike + * IS_ENABLED(CONFIG_CRC32_ARCH) it takes into account the different CRC32 + * variants and also whether any needed CPU features are available at runt= ime. + */ +#define CRC32_LE_OPTIMIZATION BIT(0) /* crc32_le() is optimized */ +#define CRC32_BE_OPTIMIZATION BIT(1) /* crc32_be() is optimized */ +#define CRC32C_OPTIMIZATION BIT(2) /* __crc32c_le() is optimized */ +#if IS_ENABLED(CONFIG_CRC32_ARCH) +u32 crc32_optimizations(void); +#else +static inline u32 crc32_optimizations(void) { return 0; } +#endif + /** * crc32_le_combine - Combine two crc32 check values into one. For two * sequences of bytes, seq1 and seq2 with lengths len1 * and len2, crc32_le() check values were calculated * for each, crc1 and crc2. --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 64F2718E055; Sun, 3 Nov 2024 22:32:26 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673146; cv=none; b=WKlnxRrnG1+XulUDgmtOAP9iJv+/BcRKwvAm80PSmzwNiN/Yfr+UGMhx5sxa/aKUc+yGTuN8naUAOGIihKiN5Ebdocp46pmQOjZCKJe6Zog1Y3t+9jb1JVCsCGjm/zbWfK+al58kjP6bK5FurIkVhPhq+rrSCLKigSIlQXDGCak= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673146; c=relaxed/simple; bh=tFO0F6yNPt11cX0x9fLuUHzrKqGkpHig/cPtPasc7Zs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=lheSrq4TaYfcmJvCF3Yi7+AgndXiRu60TpsD1Oelzkb9l84i57mBwZ13oQf7n+iqVQqgretF8uoE/8CNMiS2H2TEN3ZATxMVSuX1gt9AX7P4r2nbAqCt8rYDAOaWRNYGMY1q0ktDv3WdJMZnQuTbQuu3/u+NWV5gQtCQAKnfTPo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=FLEXF5On; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="FLEXF5On" Received: by smtp.kernel.org (Postfix) with ESMTPSA id DF3C1C4CED6; Sun, 3 Nov 2024 22:32:25 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673146; bh=tFO0F6yNPt11cX0x9fLuUHzrKqGkpHig/cPtPasc7Zs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=FLEXF5OnjtoHBLz2W1SudB1KfgQeNbVamfWQca0Lr3ls4Ajd6zui8pPZphDG8wGHi LMeOxQgpUfQZBOrr9Xxbh82UmIw4KF07FJGWQHV31qEzfhzAW4QITRvihj6T8dNJwD 7VM4SISyDF+9nWY8v1TcdePZO8vlrUSxuR0C2sAycL5JfMzno08WrlDBIWB428LYwV H74oQ1WRR+qGqF2fzD79IPcenVru6oQPFG/aiSVpU1viylHUP1PVVW0SlpHWBah85+ GLRmjZ4czJdS+k2WUI0O0AY7XjAnn6mserV11No6DhCIo1gKixC/7dgNgepAp/Vyod VKf8STCElVs0g== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH v3 04/18] crypto: crc32 - don't unnecessarily register arch algorithms Date: Sun, 3 Nov 2024 14:31:40 -0800 Message-ID: <20241103223154.136127-5-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Instead of registering the crc32-$arch and crc32c-$arch algorithms if the arch-specific code was built, only register them when that code was built *and* is not falling back to the base implementation at runtime. This avoids confusing users like btrfs which checks the shash driver name to determine whether it is crc32c-generic. (It would also make sense to change btrfs to test the crc32_optimization flags itself, so that it doesn't have to use the weird hack of parsing the driver name. This change still makes sense either way though.) Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- crypto/crc32_generic.c | 8 ++++++-- crypto/crc32c_generic.c | 8 ++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c index cc064ea8240e..783a30b27398 100644 --- a/crypto/crc32_generic.c +++ b/crypto/crc32_generic.c @@ -155,19 +155,23 @@ static struct shash_alg algs[] =3D {{ .base.cra_ctxsize =3D sizeof(u32), .base.cra_module =3D THIS_MODULE, .base.cra_init =3D crc32_cra_init, }}; =20 +static int num_algs; + static int __init crc32_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ - return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); + num_algs =3D 1 + ((crc32_optimizations() & CRC32_LE_OPTIMIZATION) !=3D 0); + + return crypto_register_shashes(algs, num_algs); } =20 static void __exit crc32_mod_fini(void) { - crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); + crypto_unregister_shashes(algs, num_algs); } =20 subsys_initcall(crc32_mod_init); module_exit(crc32_mod_fini); =20 diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 04b03d825cf4..985da981d6e2 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -195,19 +195,23 @@ static struct shash_alg algs[] =3D {{ .base.cra_ctxsize =3D sizeof(struct chksum_ctx), .base.cra_module =3D THIS_MODULE, .base.cra_init =3D crc32c_cra_init, }}; =20 +static int num_algs; + static int __init crc32c_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ - return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); + num_algs =3D 1 + ((crc32_optimizations() & CRC32C_OPTIMIZATION) !=3D 0); + + return crypto_register_shashes(algs, num_algs); } =20 static void __exit crc32c_mod_fini(void) { - crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); + crypto_unregister_shashes(algs, num_algs); } =20 subsys_initcall(crc32c_mod_init); module_exit(crc32c_mod_fini); =20 --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id EDD4A19597F; Sun, 3 Nov 2024 22:32:26 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673147; cv=none; b=Pjbklnc3nDYgaGWcmNpgR3msVetVYS7YOeOadrhabIrKg6PcbV5ANS/qJ5EMHjRQOnHUYSBgSVAQtytWe+icbNESvsFoSmS2oGCbg4wMj6FwQi+Hdh7Hyoli9v2tGqEYI7ZMa3EO+xAGzasaMQg4Z9K73Q5bi2n11rM85JK1Jzs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673147; c=relaxed/simple; bh=owy60ENgJkwWJWOUwVDHrl9hP+iOOa+moutmYcaocSo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=lVQMezJy57Lc6pCsDFC5eacLgup2ENF3IR2CN9Mdbav/Q7CuMawfQ27O0C3Kvk6m9qCxziqKol8FqkEDU9avsccMd3Wsw/3y3AD/BSAJd7xQX+WetTiYwXYIiyr3B/ZGcjt56uEG9Tts97GCvy8+MDW2quDLu8Gf37rRZGz/vkg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=P0OHK/nP; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="P0OHK/nP" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 6C42EC4CEDE; Sun, 3 Nov 2024 22:32:26 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673146; bh=owy60ENgJkwWJWOUwVDHrl9hP+iOOa+moutmYcaocSo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=P0OHK/nP1usaibeqRyPL//+5/CCrFD/XhXYqTAcjIDQo7jLj8PoZHuQYn/fNQX202 2iSAXETRaoxKpK5FCj4BtkoUqUIYgg7x7ELSBe/mgyeaHaePeTqz6/YwZFNhexPKj1 /jyvYJATuW4vFjS74aku1HuHpON3uCDmhiBMxsGoaj4YItuJq+Yu6WIWHwSmyfr2Gv LS8v42YSl26n1u7AeKgh5dqRgPnOnF6Q5mqdH3xsMGreQ68Z1J6k4GK2ZivQ/p8vOy N7ZpoPIrHeqgJv2E64VLone1TljRQOlwNY6tEh7HLro7ZoP5RMP4jpamwhHS/fCjMA R2PV0vP2ZZBuw== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 05/18] arm/crc32: expose CRC32 functions through lib Date: Sun, 3 Nov 2024 14:31:41 -0800 Message-ID: <20241103223154.136127-6-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the arm CRC32 assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/arm/crypto/crc32-ce-glue.c to arch/arm/lib/crc32-glue.c, view this commit with 'git show -M10'. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- arch/arm/Kconfig | 1 + arch/arm/configs/milbeaut_m10v_defconfig | 1 - arch/arm/configs/multi_v7_defconfig | 1 - arch/arm/crypto/Kconfig | 14 - arch/arm/crypto/Makefile | 2 - arch/arm/crypto/crc32-ce-glue.c | 247 ------------------ arch/arm/lib/Makefile | 3 + .../crc32-ce-core.S =3D> lib/crc32-core.S} | 0 arch/arm/lib/crc32-glue.c | 123 +++++++++ 9 files changed, 127 insertions(+), 265 deletions(-) delete mode 100644 arch/arm/crypto/crc32-ce-glue.c rename arch/arm/{crypto/crc32-ce-core.S =3D> lib/crc32-core.S} (100%) create mode 100644 arch/arm/lib/crc32-glue.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 749179a1d162..851260303234 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,10 +5,11 @@ config ARM select ARCH_32BIT_OFF_T select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_P= OINTER && !ARM_UNWIND select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_CPU_CACHE_ALIASING select ARCH_HAS_CPU_FINALIZE_INIT if MMU + select ARCH_HAS_CRC32 if KERNEL_MODE_NEON select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DMA_ALLOC if MMU select ARCH_HAS_DMA_OPS select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/mi= lbeaut_m10v_defconfig index f5eeac9c65c3..acd16204f8d7 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -105,11 +105,10 @@ CONFIG_CRYPTO_SHA2_ARM_CE=3Dm CONFIG_CRYPTO_SHA512_ARM=3Dm CONFIG_CRYPTO_AES_ARM=3Dm CONFIG_CRYPTO_AES_ARM_BS=3Dm CONFIG_CRYPTO_AES_ARM_CE=3Dm CONFIG_CRYPTO_CHACHA20_NEON=3Dm -CONFIG_CRYPTO_CRC32_ARM_CE=3Dm # CONFIG_CRYPTO_HW is not set CONFIG_CRC_CCITT=3Dm CONFIG_CRC_ITU_T=3Dm CONFIG_DMA_CMA=3Dy CONFIG_CMA_SIZE_MBYTES=3D64 diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v= 7_defconfig index 9a5f5c439b87..287ca055965f 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1304,11 +1304,10 @@ CONFIG_CRYPTO_SHA2_ARM_CE=3Dm CONFIG_CRYPTO_SHA512_ARM=3Dm CONFIG_CRYPTO_AES_ARM=3Dm CONFIG_CRYPTO_AES_ARM_BS=3Dm CONFIG_CRYPTO_AES_ARM_CE=3Dm CONFIG_CRYPTO_CHACHA20_NEON=3Dm -CONFIG_CRYPTO_CRC32_ARM_CE=3Dm CONFIG_CRYPTO_DEV_SUN4I_SS=3Dm CONFIG_CRYPTO_DEV_FSL_CAAM=3Dm CONFIG_CRYPTO_DEV_EXYNOS_RNG=3Dm CONFIG_CRYPTO_DEV_S5P=3Dm CONFIG_CRYPTO_DEV_ATMEL_AES=3Dm diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 5ff49a5e9afc..ea0ebf336d0d 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -220,24 +220,10 @@ config CRYPTO_CHACHA20_NEON stream cipher algorithms =20 Architecture: arm using: - NEON (Advanced SIMD) extensions =20 -config CRYPTO_CRC32_ARM_CE - tristate "CRC32C and CRC32" - depends on KERNEL_MODE_NEON - depends on CRC32 - select CRYPTO_HASH - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - and CRC32 CRC algorithm (IEEE 802.3) - - Architecture: arm using: - - CRC and/or PMULL instructions - - Drivers: crc32-arm-ce and crc32c-arm-ce - config CRYPTO_CRCT10DIF_ARM_CE tristate "CRCT10DIF" depends on KERNEL_MODE_NEON depends on CRC_T10DIF select CRYPTO_HASH diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 13e62c7c25dc..38ec5cc1e844 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -19,11 +19,10 @@ obj-$(CONFIG_CRYPTO_CURVE25519_NEON) +=3D curve25519-ne= on.o obj-$(CONFIG_CRYPTO_AES_ARM_CE) +=3D aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) +=3D sha1-arm-ce.o obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) +=3D sha2-arm-ce.o obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) +=3D ghash-arm-ce.o obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) +=3D crct10dif-arm-ce.o -obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) +=3D crc32-arm-ce.o =20 aes-arm-y :=3D aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y :=3D aes-neonbs-core.o aes-neonbs-glue.o sha1-arm-y :=3D sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y :=3D sha1-armv7-neon.o sha1_neon_glue.o @@ -36,11 +35,10 @@ blake2b-neon-y :=3D blake2b-neon-core.o blake2b-neon-g= lue.o sha1-arm-ce-y :=3D sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y :=3D sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y :=3D aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y :=3D ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y :=3D crct10dif-ce-core.o crct10dif-ce-glue.o -crc32-arm-ce-y:=3D crc32-ce-core.o crc32-ce-glue.o chacha-neon-y :=3D chacha-scalar-core.o chacha-glue.o chacha-neon-$(CONFIG_KERNEL_MODE_NEON) +=3D chacha-neon-core.o poly1305-arm-y :=3D poly1305-core.o poly1305-glue.o nhpoly1305-neon-y :=3D nh-neon-core.o nhpoly1305-neon-glue.o curve25519-neon-y :=3D curve25519-core.o curve25519-glue.o diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glu= e.c deleted file mode 100644 index 20b4dff13e3a..000000000000 --- a/arch/arm/crypto/crc32-ce-glue.c +++ /dev/null @@ -1,247 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instruct= ions - * - * Copyright (C) 2016 Linaro Ltd - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#define PMULL_MIN_LEN 64L /* minimum size of buffer - * for crc32_pmull_le_16 */ -#define SCALE_F 16L /* size of NEON register */ - -asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc); -asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len); - -asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc); -asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len); - -static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], u32 len); -static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], u32 len); - -static int crc32_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D 0; - return 0; -} - -static int crc32c_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D ~0; - return 0; -} - -static int crc32_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx =3D crypto_shash_ctx(hash); - - if (keylen !=3D sizeof(u32)) - return -EINVAL; - *mctx =3D le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32_init(struct shash_desc *desc) -{ - u32 *mctx =3D crypto_shash_ctx(desc->tfm); - u32 *crc =3D shash_desc_ctx(desc); - - *crc =3D *mctx; - return 0; -} - -static int crc32_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc =3D shash_desc_ctx(desc); - - *crc =3D crc32_armv8_le(*crc, data, length); - return 0; -} - -static int crc32c_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc =3D shash_desc_ctx(desc); - - *crc =3D crc32c_armv8_le(*crc, data, length); - return 0; -} - -static int crc32_final(struct shash_desc *desc, u8 *out) -{ - u32 *crc =3D shash_desc_ctx(desc); - - put_unaligned_le32(*crc, out); - return 0; -} - -static int crc32c_final(struct shash_desc *desc, u8 *out) -{ - u32 *crc =3D shash_desc_ctx(desc); - - put_unaligned_le32(~*crc, out); - return 0; -} - -static int crc32_pmull_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc =3D shash_desc_ctx(desc); - unsigned int l; - - if (crypto_simd_usable()) { - if ((u32)data % SCALE_F) { - l =3D min_t(u32, length, SCALE_F - ((u32)data % SCALE_F)); - - *crc =3D fallback_crc32(*crc, data, l); - - data +=3D l; - length -=3D l; - } - - if (length >=3D PMULL_MIN_LEN) { - l =3D round_down(length, SCALE_F); - - kernel_neon_begin(); - *crc =3D crc32_pmull_le(data, l, *crc); - kernel_neon_end(); - - data +=3D l; - length -=3D l; - } - } - - if (length > 0) - *crc =3D fallback_crc32(*crc, data, length); - - return 0; -} - -static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc =3D shash_desc_ctx(desc); - unsigned int l; - - if (crypto_simd_usable()) { - if ((u32)data % SCALE_F) { - l =3D min_t(u32, length, SCALE_F - ((u32)data % SCALE_F)); - - *crc =3D fallback_crc32c(*crc, data, l); - - data +=3D l; - length -=3D l; - } - - if (length >=3D PMULL_MIN_LEN) { - l =3D round_down(length, SCALE_F); - - kernel_neon_begin(); - *crc =3D crc32c_pmull_le(data, l, *crc); - kernel_neon_end(); - - data +=3D l; - length -=3D l; - } - } - - if (length > 0) - *crc =3D fallback_crc32c(*crc, data, length); - - return 0; -} - -static struct shash_alg crc32_pmull_algs[] =3D { { - .setkey =3D crc32_setkey, - .init =3D crc32_init, - .update =3D crc32_update, - .final =3D crc32_final, - .descsize =3D sizeof(u32), - .digestsize =3D sizeof(u32), - - .base.cra_ctxsize =3D sizeof(u32), - .base.cra_init =3D crc32_cra_init, - .base.cra_name =3D "crc32", - .base.cra_driver_name =3D "crc32-arm-ce", - .base.cra_priority =3D 200, - .base.cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_blocksize =3D 1, - .base.cra_module =3D THIS_MODULE, -}, { - .setkey =3D crc32_setkey, - .init =3D crc32_init, - .update =3D crc32c_update, - .final =3D crc32c_final, - .descsize =3D sizeof(u32), - .digestsize =3D sizeof(u32), - - .base.cra_ctxsize =3D sizeof(u32), - .base.cra_init =3D crc32c_cra_init, - .base.cra_name =3D "crc32c", - .base.cra_driver_name =3D "crc32c-arm-ce", - .base.cra_priority =3D 200, - .base.cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_blocksize =3D 1, - .base.cra_module =3D THIS_MODULE, -} }; - -static int __init crc32_pmull_mod_init(void) -{ - if (elf_hwcap2 & HWCAP2_PMULL) { - crc32_pmull_algs[0].update =3D crc32_pmull_update; - crc32_pmull_algs[1].update =3D crc32c_pmull_update; - - if (elf_hwcap2 & HWCAP2_CRC32) { - fallback_crc32 =3D crc32_armv8_le; - fallback_crc32c =3D crc32c_armv8_le; - } else { - fallback_crc32 =3D crc32_le; - fallback_crc32c =3D __crc32c_le; - } - } else if (!(elf_hwcap2 & HWCAP2_CRC32)) { - return -ENODEV; - } - - return crypto_register_shashes(crc32_pmull_algs, - ARRAY_SIZE(crc32_pmull_algs)); -} - -static void __exit crc32_pmull_mod_exit(void) -{ - crypto_unregister_shashes(crc32_pmull_algs, - ARRAY_SIZE(crc32_pmull_algs)); -} - -static const struct cpu_feature __maybe_unused crc32_cpu_feature[] =3D { - { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { } -}; -MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature); - -module_init(crc32_pmull_mod_init); -module_exit(crc32_pmull_mod_exit); - -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_DESCRIPTION("Accelerated CRC32(C) using ARM CRC, NEON and Crypto Ex= tensions"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32c"); diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 0ca5aae1bcc3..01cd4db2ed47 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -43,5 +43,8 @@ ifeq ($(CONFIG_KERNEL_MODE_NEON),y) CFLAGS_xor-neon.o +=3D $(CC_FLAGS_FPU) obj-$(CONFIG_XOR_BLOCKS) +=3D xor-neon.o endif =20 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o + +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-arm.o +crc32-arm-y :=3D crc32-glue.o crc32-core.o diff --git a/arch/arm/crypto/crc32-ce-core.S b/arch/arm/lib/crc32-core.S similarity index 100% rename from arch/arm/crypto/crc32-ce-core.S rename to arch/arm/lib/crc32-core.S diff --git a/arch/arm/lib/crc32-glue.c b/arch/arm/lib/crc32-glue.c new file mode 100644 index 000000000000..2d56fb2b0a1c --- /dev/null +++ b/arch/arm/lib/crc32-glue.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instruct= ions + * + * Copyright (C) 2016 Linaro Ltd + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +static DEFINE_STATIC_KEY_FALSE(have_crc32); +static DEFINE_STATIC_KEY_FALSE(have_pmull); + +#define PMULL_MIN_LEN 64 /* min size of buffer for pmull functions */ + +asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc); +asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len); + +asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc); +asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len); + +static u32 crc32_le_scalar(u32 crc, const u8 *p, size_t len) +{ + if (static_branch_likely(&have_crc32)) + return crc32_armv8_le(crc, p, len); + return crc32_le_base(crc, p, len); +} + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (len >=3D PMULL_MIN_LEN + 15 && + crypto_simd_usable() && static_branch_likely(&have_pmull)) { + size_t n =3D -(uintptr_t)p & 15; + + /* align p to 16-byte boundary */ + if (n) { + crc =3D crc32_le_scalar(crc, p, n); + p +=3D n; + len -=3D n; + } + n =3D round_down(len, 16); + kernel_neon_begin(); + crc =3D crc32_pmull_le(p, n, crc); + kernel_neon_end(); + p +=3D n; + len -=3D n; + } + return crc32_le_scalar(crc, p, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +static u32 crc32c_le_scalar(u32 crc, const u8 *p, size_t len) +{ + if (static_branch_likely(&have_crc32)) + return crc32c_armv8_le(crc, p, len); + return crc32c_le_base(crc, p, len); +} + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (len >=3D PMULL_MIN_LEN + 15 && + crypto_simd_usable() && static_branch_likely(&have_pmull)) { + size_t n =3D -(uintptr_t)p & 15; + + /* align p to 16-byte boundary */ + if (n) { + crc =3D crc32c_le_scalar(crc, p, n); + p +=3D n; + len -=3D n; + } + n =3D round_down(len, 16); + kernel_neon_begin(); + crc =3D crc32c_pmull_le(p, n, crc); + kernel_neon_end(); + p +=3D n; + len -=3D n; + } + return crc32c_le_scalar(crc, p, len); +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_arm_init(void) +{ + if (elf_hwcap2 & HWCAP2_CRC32) + static_branch_enable(&have_crc32); + if (elf_hwcap2 & HWCAP2_PMULL) + static_branch_enable(&have_pmull); + return 0; +} +arch_initcall(crc32_arm_init); + +static void __exit crc32_arm_exit(void) +{ +} +module_exit(crc32_arm_exit); + +u32 crc32_optimizations(void) +{ + if (elf_hwcap2 & (HWCAP2_CRC32 | HWCAP2_PMULL)) + return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_DESCRIPTION("Accelerated CRC32(C) using ARM CRC, NEON and Crypto Ex= tensions"); +MODULE_LICENSE("GPL v2"); --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 83CBD19D086; Sun, 3 Nov 2024 22:32:27 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673147; cv=none; b=KK/rztoQ485EHXhYlPAd5cyBR3YGKel8/wsrmeXQ3vv4skiZw9G2GyrqP6Y84ShepKwuiiGEdex3QFZm93Kre38tTkKMd8wLU42K4iEmlyCYOTDTnYZhxZP4+Inkn97rENfaKxBhQ50/nfyJP7pIxcKFazGg2ekD86kKkuIydJg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673147; c=relaxed/simple; bh=fyXFCTsA+89TB68z6paNi5DpZCT4DbCdr5pa5vU2M38=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=E2+dFFuP92c24wzz4S2IZABweAeEei657zbRz2V7PPfke9ODHPS2GGO4PUZPOl5s68dJB2pkoGShitO1j7ynyiG6WRGmVih1HL+b9wsJ+MZmRX28j/HvFd+k9jZPjXAid9QmjRrf2EJgvB8hjw1uO0L7M2E1XbbcOI3+k1PijCo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=mVkzwwFU; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="mVkzwwFU" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 01678C4CED7; Sun, 3 Nov 2024 22:32:26 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673147; bh=fyXFCTsA+89TB68z6paNi5DpZCT4DbCdr5pa5vU2M38=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=mVkzwwFUQE/Gkoxrt6Gd1IWMvZVXe3X7U7ZEoxNM1yq982ixS8BWhUzx+CkNKRXe8 Ues5h8S2DCdHN7NBvXNEFvzaT95/9d33uCtvxBhHgxwTUCbJZPriktJjXWwgVPnwUT mQC4LQfvZpTEvFp7jvyArWL4YdrzwtQk/OIiipVrYASgs9X5TPCE6Wsu0v/uNP/ZM+ z5Gb0m5wZMkXHFnPP7V6317AQwAHZmpvPyS0IyIA1SzJdPuygS/w1hrOMpB9jQ5g0s /szC51c2/j6PH5LWaWRf0Dlv5enlNTxgW9VirhfLQZNX2ZxSXA46hMFXKFGNFWPiQz 3qyUSoJRhIRwA== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 06/18] loongarch/crc32: expose CRC32 functions through lib Date: Sun, 3 Nov 2024 14:31:42 -0800 Message-ID: <20241103223154.136127-7-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the loongarch CRC32 assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/loongarch/crypto/crc32-loongarch.c to arch/loongarch/lib/crc32-loongarch.c, view this commit with 'git show -M10'. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- arch/loongarch/Kconfig | 1 + arch/loongarch/configs/loongson3_defconfig | 1 - arch/loongarch/crypto/Kconfig | 9 - arch/loongarch/crypto/Makefile | 2 - arch/loongarch/crypto/crc32-loongarch.c | 300 --------------------- arch/loongarch/lib/Makefile | 2 + arch/loongarch/lib/crc32-loongarch.c | 135 ++++++++++ 7 files changed, 138 insertions(+), 312 deletions(-) delete mode 100644 arch/loongarch/crypto/crc32-loongarch.c create mode 100644 arch/loongarch/lib/crc32-loongarch.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index bb35c34f86d2..455f1af0bf88 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -13,10 +13,11 @@ config LOONGARCH select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CPU_FINALIZE_INIT + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/co= nfigs/loongson3_defconfig index 75b366407a60..0487ac21b38b 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -967,11 +967,10 @@ CONFIG_CRYPTO_LZ4=3Dm CONFIG_CRYPTO_LZ4HC=3Dm CONFIG_CRYPTO_USER_API_HASH=3Dm CONFIG_CRYPTO_USER_API_SKCIPHER=3Dm CONFIG_CRYPTO_USER_API_RNG=3Dm CONFIG_CRYPTO_USER_API_AEAD=3Dm -CONFIG_CRYPTO_CRC32_LOONGARCH=3Dm CONFIG_CRYPTO_DEV_VIRTIO=3Dm CONFIG_DMA_CMA=3Dy CONFIG_DMA_NUMA_CMA=3Dy CONFIG_CMA_SIZE_MBYTES=3D0 CONFIG_PRINTK_TIME=3Dy diff --git a/arch/loongarch/crypto/Kconfig b/arch/loongarch/crypto/Kconfig index 200a6e8b43b1..a0270b3e5b30 100644 --- a/arch/loongarch/crypto/Kconfig +++ b/arch/loongarch/crypto/Kconfig @@ -1,14 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 =20 menu "Accelerated Cryptographic Algorithms for CPU (loongarch)" =20 -config CRYPTO_CRC32_LOONGARCH - tristate "CRC32c and CRC32" - select CRC32 - select CRYPTO_HASH - help - CRC32c and CRC32 CRC algorithms - - Architecture: LoongArch with CRC32 instructions - endmenu diff --git a/arch/loongarch/crypto/Makefile b/arch/loongarch/crypto/Makefile index d22613d27ce9..ba83755dde2b 100644 --- a/arch/loongarch/crypto/Makefile +++ b/arch/loongarch/crypto/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 # # Makefile for LoongArch crypto files.. # - -obj-$(CONFIG_CRYPTO_CRC32_LOONGARCH) +=3D crc32-loongarch.o diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypt= o/crc32-loongarch.c deleted file mode 100644 index b7d9782827f5..000000000000 --- a/arch/loongarch/crypto/crc32-loongarch.c +++ /dev/null @@ -1,300 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * crc32.c - CRC32 and CRC32C using LoongArch crc* instructions - * - * Module based on mips/crypto/crc32-mips.c - * - * Copyright (C) 2014 Linaro Ltd - * Copyright (C) 2018 MIPS Tech, LLC - * Copyright (C) 2020-2023 Loongson Technology Corporation Limited - */ - -#include -#include - -#include -#include - -#define _CRC32(crc, value, size, type) \ -do { \ - __asm__ __volatile__( \ - #type ".w." #size ".w" " %0, %1, %0\n\t"\ - : "+r" (crc) \ - : "r" (value) \ - : "memory"); \ -} while (0) - -#define CRC32(crc, value, size) _CRC32(crc, value, size, crc) -#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc) - -static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc =3D crc_; - - while (len >=3D sizeof(u64)) { - u64 value =3D get_unaligned_le64(p); - - CRC32(crc, value, d); - p +=3D sizeof(u64); - len -=3D sizeof(u64); - } - - if (len & sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32(crc, value, w); - p +=3D sizeof(u32); - } - - if (len & sizeof(u16)) { - u16 value =3D get_unaligned_le16(p); - - CRC32(crc, value, h); - p +=3D sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value =3D *p++; - - CRC32(crc, value, b); - } - - return crc; -} - -static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc =3D crc_; - - while (len >=3D sizeof(u64)) { - u64 value =3D get_unaligned_le64(p); - - CRC32C(crc, value, d); - p +=3D sizeof(u64); - len -=3D sizeof(u64); - } - - if (len & sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32C(crc, value, w); - p +=3D sizeof(u32); - } - - if (len & sizeof(u16)) { - u16 value =3D get_unaligned_le16(p); - - CRC32C(crc, value, h); - p +=3D sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value =3D *p++; - - CRC32C(crc, value, b); - } - - return crc; -} - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -struct chksum_ctx { - u32 key; -}; - -struct chksum_desc_ctx { - u32 crc; -}; - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D mctx->key; - - return 0; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set the s= eed. - */ -static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, unsigned= int keylen) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(tfm); - - if (keylen !=3D sizeof(mctx->key)) - return -EINVAL; - - mctx->key =3D get_unaligned_le32(key); - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned= int length) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D crc32_loongarch_hw(ctx->crc, data, length); - return 0; -} - -static int chksumc_update(struct shash_desc *desc, const u8 *data, unsigne= d int length) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D crc32c_loongarch_hw(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - put_unaligned_le32(ctx->crc, out); - return 0; -} - -static int chksumc_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - put_unaligned_le32(~ctx->crc, out); - return 0; -} - -static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *o= ut) -{ - put_unaligned_le32(crc32_loongarch_hw(crc, data, len), out); - return 0; -} - -static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *= out) -{ - put_unaligned_le32(~crc32c_loongarch_hw(crc, data, len), out); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned = int len, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - return __chksum_finup(ctx->crc, data, len, out); -} - -static int chksumc_finup(struct shash_desc *desc, const u8 *data, unsigned= int len, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - return __chksumc_finup(ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned= int length, u8 *out) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - - return __chksum_finup(mctx->key, data, length, out); -} - -static int chksumc_digest(struct shash_desc *desc, const u8 *data, unsigne= d int length, u8 *out) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - - return __chksumc_finup(mctx->key, data, length, out); -} - -static int chksum_cra_init(struct crypto_tfm *tfm) -{ - struct chksum_ctx *mctx =3D crypto_tfm_ctx(tfm); - - mctx->key =3D 0; - return 0; -} - -static int chksumc_cra_init(struct crypto_tfm *tfm) -{ - struct chksum_ctx *mctx =3D crypto_tfm_ctx(tfm); - - mctx->key =3D ~0; - return 0; -} - -static struct shash_alg crc32_alg =3D { - .digestsize =3D CHKSUM_DIGEST_SIZE, - .setkey =3D chksum_setkey, - .init =3D chksum_init, - .update =3D chksum_update, - .final =3D chksum_final, - .finup =3D chksum_finup, - .digest =3D chksum_digest, - .descsize =3D sizeof(struct chksum_desc_ctx), - .base =3D { - .cra_name =3D "crc32", - .cra_driver_name =3D "crc32-loongarch", - .cra_priority =3D 300, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct chksum_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D chksum_cra_init, - } -}; - -static struct shash_alg crc32c_alg =3D { - .digestsize =3D CHKSUM_DIGEST_SIZE, - .setkey =3D chksum_setkey, - .init =3D chksum_init, - .update =3D chksumc_update, - .final =3D chksumc_final, - .finup =3D chksumc_finup, - .digest =3D chksumc_digest, - .descsize =3D sizeof(struct chksum_desc_ctx), - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-loongarch", - .cra_priority =3D 300, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct chksum_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D chksumc_cra_init, - } -}; - -static int __init crc32_mod_init(void) -{ - int err; - - if (!cpu_has(CPU_FEATURE_CRC32)) - return 0; - - err =3D crypto_register_shash(&crc32_alg); - if (err) - return err; - - err =3D crypto_register_shash(&crc32c_alg); - if (err) - return err; - - return 0; -} - -static void __exit crc32_mod_exit(void) -{ - if (!cpu_has(CPU_FEATURE_CRC32)) - return; - - crypto_unregister_shash(&crc32_alg); - crypto_unregister_shash(&crc32c_alg); -} - -module_init(crc32_mod_init); -module_exit(crc32_mod_exit); - -MODULE_AUTHOR("Min Zhou "); -MODULE_AUTHOR("Huacai Chen "); -MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions"); -MODULE_LICENSE("GPL v2"); diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index ccea3bbd4353..fae77809048b 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -9,5 +9,7 @@ lib-y +=3D delay.o memset.o memcpy.o memmove.o \ obj-$(CONFIG_ARCH_SUPPORTS_INT128) +=3D tishift.o =20 obj-$(CONFIG_CPU_HAS_LSX) +=3D xor_simd.o xor_simd_glue.o =20 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o + +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-loongarch.o diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc3= 2-loongarch.c new file mode 100644 index 000000000000..ce862e398eb3 --- /dev/null +++ b/arch/loongarch/lib/crc32-loongarch.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CRC32 and CRC32C using LoongArch crc* instructions + * + * Module based on mips/crypto/crc32-mips.c + * + * Copyright (C) 2014 Linaro Ltd + * Copyright (C) 2018 MIPS Tech, LLC + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +#define _CRC32(crc, value, size, type) \ +do { \ + __asm__ __volatile__( \ + #type ".w." #size ".w" " %0, %1, %0\n\t"\ + : "+r" (crc) \ + : "r" (value) \ + : "memory"); \ +} while (0) + +#define CRC32(crc, value, size) _CRC32(crc, value, size, crc) +#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc) + +static DEFINE_STATIC_KEY_FALSE(have_crc32); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32_le_base(crc, p, len); + + while (len >=3D sizeof(u64)) { + u64 value =3D get_unaligned_le64(p); + + CRC32(crc, value, d); + p +=3D sizeof(u64); + len -=3D sizeof(u64); + } + + if (len & sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32(crc, value, w); + p +=3D sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value =3D get_unaligned_le16(p); + + CRC32(crc, value, h); + p +=3D sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value =3D *p++; + + CRC32(crc, value, b); + } + + return crc; +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32c_le_base(crc, p, len); + + while (len >=3D sizeof(u64)) { + u64 value =3D get_unaligned_le64(p); + + CRC32C(crc, value, d); + p +=3D sizeof(u64); + len -=3D sizeof(u64); + } + + if (len & sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32C(crc, value, w); + p +=3D sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value =3D get_unaligned_le16(p); + + CRC32C(crc, value, h); + p +=3D sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value =3D *p++; + + CRC32C(crc, value, b); + } + + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_loongarch_init(void) +{ + if (cpu_has(CPU_FEATURE_CRC32)) + static_branch_enable(&have_crc32); + return 0; +} +arch_initcall(crc32_loongarch_init); + +static void __exit crc32_loongarch_exit(void) +{ +} +module_exit(crc32_loongarch_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_crc32)) + return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Min Zhou "); +MODULE_AUTHOR("Huacai Chen "); +MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions"); +MODULE_LICENSE("GPL v2"); --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 20BB71AA792; Sun, 3 Nov 2024 22:32:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673148; cv=none; b=AaNcIJ3yWU321GkZr9Po6VYkvne/ZHqYwCjlFgptw7ta9//QrRKTbCaeJqAyabloy0WyKXXpPwp5EKJEhJwN2r2owSjVogZvPKmIQaLMk7Bxup6RPrk/btf34Wc64cXQJmeEaO4endm5OvM8KQZuJtM6hwp/t4KGWeisxelWenU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673148; c=relaxed/simple; bh=srZaNiGZ8gMoQFPqdEH2dAlb++rURb+TdhBd5TFMA40=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=lF/8YKyiu8mjHiQJympYmYkSEZ2bW2SjtQkIN8WVKVI5AKeaPnE+Jy4ANP8dXgWZF+JCPtKfpZ+X5XVv6K0fedI/xCP5qoDmKC5B6Ul9wLHgnK8sXh+v2ynSqYuIf7H/DbbGsQXUQC7ukbJJQE05mkzZ1ifQuCjAbGXuknU449A= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=V1i2jBBa; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="V1i2jBBa" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 8AE18C4CED4; Sun, 3 Nov 2024 22:32:27 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673148; bh=srZaNiGZ8gMoQFPqdEH2dAlb++rURb+TdhBd5TFMA40=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=V1i2jBBaTfB4N5ChgcdHalKuIUTTPdkUr08K5qesOFmTJcS2yTC+5x78uXSSKEwv1 8Es1MHqzCn23bxTYo360exE1QaH8Uf1WP2/gPffL0PoxBc1cGxXMlMQwGsO08uzaQ6 wdQyYMCryJEy9WzsP6+muxt1WVDkKzFwRjnHkfd1paJ4SNDDvu/4eX059n7CaW79uS BAZX44Fuhs5A+20G6Jhe1cyBxL2S8b0ajUBl+Ylw+627OgWFl5dmoo8Ru54Uy2NwOy IpSnQ75RJw6VEYaq6nfMR1h85uum+A5NfY/8dzP2AlZJrNWNO5qmPz+FIkPtRCgSRw 9rTk1VMi0zXBg== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 07/18] mips/crc32: expose CRC32 functions through lib Date: Sun, 3 Nov 2024 14:31:43 -0800 Message-ID: <20241103223154.136127-8-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the mips CRC32 assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/mips/crypto/crc32-mips.c to arch/mips/lib/crc32-mips.c, view this commit with 'git show -M10'. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- arch/mips/Kconfig | 5 +- arch/mips/configs/eyeq5_defconfig | 1 - arch/mips/configs/eyeq6_defconfig | 1 - arch/mips/configs/generic/32r6.config | 2 - arch/mips/configs/generic/64r6.config | 1 - arch/mips/crypto/Kconfig | 9 - arch/mips/crypto/Makefile | 2 - arch/mips/crypto/crc32-mips.c | 354 -------------------------- arch/mips/lib/Makefile | 2 + arch/mips/lib/crc32-mips.c | 192 ++++++++++++++ 10 files changed, 195 insertions(+), 374 deletions(-) delete mode 100644 arch/mips/crypto/crc32-mips.c create mode 100644 arch/mips/lib/crc32-mips.c diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 397edf05dd72..f80ea80d792f 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1993,15 +1993,15 @@ config CPU_MIPSR5 select MIPS_SPRAM =20 config CPU_MIPSR6 bool default y if CPU_MIPS32_R6 || CPU_MIPS64_R6 + select ARCH_HAS_CRC32 select CPU_HAS_RIXI select CPU_HAS_DIEI if !CPU_DIEI_BROKEN select HAVE_ARCH_BITREVERSE select MIPS_ASID_BITS_VARIABLE - select MIPS_CRC_SUPPORT select MIPS_SPRAM =20 config TARGET_ISA_REV int default 1 if CPU_MIPSR1 @@ -2473,13 +2473,10 @@ config MIPS_ASID_BITS default 8 =20 config MIPS_ASID_BITS_VARIABLE bool =20 -config MIPS_CRC_SUPPORT - bool - # R4600 erratum. Due to the lack of errata information the exact # technical details aren't known. I've experimentally found that disabling # interrupts during indexed I-cache flushes seems to be sufficient to deal # with the issue. config WAR_R4600_V1_INDEX_ICACHEOP diff --git a/arch/mips/configs/eyeq5_defconfig b/arch/mips/configs/eyeq5_de= fconfig index ae9a09b16e40..ff7af5dc6d9d 100644 --- a/arch/mips/configs/eyeq5_defconfig +++ b/arch/mips/configs/eyeq5_defconfig @@ -97,11 +97,10 @@ CONFIG_NFS_FS=3Dy CONFIG_NFS_V3_ACL=3Dy CONFIG_NFS_V4=3Dy CONFIG_NFS_V4_1=3Dy CONFIG_NFS_V4_2=3Dy CONFIG_ROOT_NFS=3Dy -CONFIG_CRYPTO_CRC32_MIPS=3Dy CONFIG_FRAME_WARN=3D1024 CONFIG_DEBUG_FS=3Dy # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set CONFIG_CMDLINE_BOOL=3Dy diff --git a/arch/mips/configs/eyeq6_defconfig b/arch/mips/configs/eyeq6_de= fconfig index 6597d5e88b33..0afbb45a78e8 100644 --- a/arch/mips/configs/eyeq6_defconfig +++ b/arch/mips/configs/eyeq6_defconfig @@ -100,11 +100,10 @@ CONFIG_NFS_FS=3Dy CONFIG_NFS_V3_ACL=3Dy CONFIG_NFS_V4=3Dy CONFIG_NFS_V4_1=3Dy CONFIG_NFS_V4_2=3Dy CONFIG_ROOT_NFS=3Dy -CONFIG_CRYPTO_CRC32_MIPS=3Dy CONFIG_FRAME_WARN=3D1024 CONFIG_DEBUG_FS=3Dy # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set CONFIG_CMDLINE_BOOL=3Dy diff --git a/arch/mips/configs/generic/32r6.config b/arch/mips/configs/gene= ric/32r6.config index 1a5d5ea4ab2b..ca606e71f4d0 100644 --- a/arch/mips/configs/generic/32r6.config +++ b/arch/mips/configs/generic/32r6.config @@ -1,4 +1,2 @@ CONFIG_CPU_MIPS32_R6=3Dy CONFIG_HIGHMEM=3Dy - -CONFIG_CRYPTO_CRC32_MIPS=3Dy diff --git a/arch/mips/configs/generic/64r6.config b/arch/mips/configs/gene= ric/64r6.config index 63b4e95f303d..23a300914957 100644 --- a/arch/mips/configs/generic/64r6.config +++ b/arch/mips/configs/generic/64r6.config @@ -2,7 +2,6 @@ CONFIG_CPU_MIPS64_R6=3Dy CONFIG_64BIT=3Dy CONFIG_MIPS32_O32=3Dy CONFIG_MIPS32_N32=3Dy =20 CONFIG_CPU_HAS_MSA=3Dy -CONFIG_CRYPTO_CRC32_MIPS=3Dy CONFIG_VIRTUALIZATION=3Dy diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig index 9003a5c1e879..7decd40c4e20 100644 --- a/arch/mips/crypto/Kconfig +++ b/arch/mips/crypto/Kconfig @@ -1,18 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 =20 menu "Accelerated Cryptographic Algorithms for CPU (mips)" =20 -config CRYPTO_CRC32_MIPS - tristate "CRC32c and CRC32" - depends on MIPS_CRC_SUPPORT - select CRYPTO_HASH - help - CRC32c and CRC32 CRC algorithms - - Architecture: mips - config CRYPTO_POLY1305_MIPS tristate "Hash functions: Poly1305" depends on MIPS select CRYPTO_ARCH_HAVE_LIB_POLY1305 help diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile index 5e4105cccf9f..fddc88281412 100644 --- a/arch/mips/crypto/Makefile +++ b/arch/mips/crypto/Makefile @@ -1,12 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 # # Makefile for MIPS crypto files.. # =20 -obj-$(CONFIG_CRYPTO_CRC32_MIPS) +=3D crc32-mips.o - obj-$(CONFIG_CRYPTO_CHACHA_MIPS) +=3D chacha-mips.o chacha-mips-y :=3D chacha-core.o chacha-glue.o AFLAGS_chacha-core.o +=3D -O2 # needed to fill branch delay slots =20 obj-$(CONFIG_CRYPTO_POLY1305_MIPS) +=3D poly1305-mips.o diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c deleted file mode 100644 index 90eacf00cfc3..000000000000 --- a/arch/mips/crypto/crc32-mips.c +++ /dev/null @@ -1,354 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions - * - * Module based on arm64/crypto/crc32-arm.c - * - * Copyright (C) 2014 Linaro Ltd - * Copyright (C) 2018 MIPS Tech, LLC - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -enum crc_op_size { - b, h, w, d, -}; - -enum crc_type { - crc32, - crc32c, -}; - -#ifndef TOOLCHAIN_SUPPORTS_CRC -#define _ASM_SET_CRC(OP, SZ, TYPE) \ -_ASM_MACRO_3R(OP, rt, rs, rt2, \ - ".ifnc \\rt, \\rt2\n\t" \ - ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ - ".endif\n\t" \ - _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \ - ((SZ) << 6) | ((TYPE) << 8)) \ - _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ - ((SZ) << 14) | ((TYPE) << 3))) -#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t" -#else /* !TOOLCHAIN_SUPPORTS_CRC */ -#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t" -#define _ASM_UNSET_CRC(op, SZ, TYPE) -#endif - -#define __CRC32(crc, value, op, SZ, TYPE) \ -do { \ - __asm__ __volatile__( \ - ".set push\n\t" \ - _ASM_SET_CRC(op, SZ, TYPE) \ - #op " %0, %1, %0\n\t" \ - _ASM_UNSET_CRC(op, SZ, TYPE) \ - ".set pop" \ - : "+r" (crc) \ - : "r" (value)); \ -} while (0) - -#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0) -#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0) -#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0) -#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0) -#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1) -#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1) -#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1) -#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1) - -#define _CRC32(crc, value, size, op) \ - _CRC32_##op##size(crc, value) - -#define CRC32(crc, value, size) \ - _CRC32(crc, value, size, crc32) - -#define CRC32C(crc, value, size) \ - _CRC32(crc, value, size, crc32c) - -static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc =3D crc_; - - if (IS_ENABLED(CONFIG_64BIT)) { - for (; len >=3D sizeof(u64); p +=3D sizeof(u64), len -=3D sizeof(u64)) { - u64 value =3D get_unaligned_le64(p); - - CRC32(crc, value, d); - } - - if (len & sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32(crc, value, w); - p +=3D sizeof(u32); - } - } else { - for (; len >=3D sizeof(u32); len -=3D sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32(crc, value, w); - p +=3D sizeof(u32); - } - } - - if (len & sizeof(u16)) { - u16 value =3D get_unaligned_le16(p); - - CRC32(crc, value, h); - p +=3D sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value =3D *p++; - - CRC32(crc, value, b); - } - - return crc; -} - -static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc =3D crc_; - - if (IS_ENABLED(CONFIG_64BIT)) { - for (; len >=3D sizeof(u64); p +=3D sizeof(u64), len -=3D sizeof(u64)) { - u64 value =3D get_unaligned_le64(p); - - CRC32C(crc, value, d); - } - - if (len & sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32C(crc, value, w); - p +=3D sizeof(u32); - } - } else { - for (; len >=3D sizeof(u32); len -=3D sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32C(crc, value, w); - p +=3D sizeof(u32); - } - } - - if (len & sizeof(u16)) { - u16 value =3D get_unaligned_le16(p); - - CRC32C(crc, value, h); - p +=3D sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value =3D *p++; - - CRC32C(crc, value, b); - } - return crc; -} - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -struct chksum_ctx { - u32 key; -}; - -struct chksum_desc_ctx { - u32 crc; -}; - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D mctx->key; - - return 0; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(tfm); - - if (keylen !=3D sizeof(mctx->key)) - return -EINVAL; - mctx->key =3D get_unaligned_le32(key); - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D crc32_mips_le_hw(ctx->crc, data, length); - return 0; -} - -static int chksumc_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D crc32c_mips_le_hw(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - put_unaligned_le32(ctx->crc, out); - return 0; -} - -static int chksumc_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - put_unaligned_le32(~ctx->crc, out); - return 0; -} - -static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *o= ut) -{ - put_unaligned_le32(crc32_mips_le_hw(crc, data, len), out); - return 0; -} - -static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *= out) -{ - put_unaligned_le32(~crc32c_mips_le_hw(crc, data, len), out); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - return __chksum_finup(ctx->crc, data, len, out); -} - -static int chksumc_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - return __chksumc_finup(ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - - return __chksum_finup(mctx->key, data, length, out); -} - -static int chksumc_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - - return __chksumc_finup(mctx->key, data, length, out); -} - -static int chksum_cra_init(struct crypto_tfm *tfm) -{ - struct chksum_ctx *mctx =3D crypto_tfm_ctx(tfm); - - mctx->key =3D ~0; - return 0; -} - -static struct shash_alg crc32_alg =3D { - .digestsize =3D CHKSUM_DIGEST_SIZE, - .setkey =3D chksum_setkey, - .init =3D chksum_init, - .update =3D chksum_update, - .final =3D chksum_final, - .finup =3D chksum_finup, - .digest =3D chksum_digest, - .descsize =3D sizeof(struct chksum_desc_ctx), - .base =3D { - .cra_name =3D "crc32", - .cra_driver_name =3D "crc32-mips-hw", - .cra_priority =3D 300, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct chksum_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D chksum_cra_init, - } -}; - -static struct shash_alg crc32c_alg =3D { - .digestsize =3D CHKSUM_DIGEST_SIZE, - .setkey =3D chksum_setkey, - .init =3D chksum_init, - .update =3D chksumc_update, - .final =3D chksumc_final, - .finup =3D chksumc_finup, - .digest =3D chksumc_digest, - .descsize =3D sizeof(struct chksum_desc_ctx), - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-mips-hw", - .cra_priority =3D 300, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct chksum_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D chksum_cra_init, - } -}; - -static int __init crc32_mod_init(void) -{ - int err; - - err =3D crypto_register_shash(&crc32_alg); - - if (err) - return err; - - err =3D crypto_register_shash(&crc32c_alg); - - if (err) { - crypto_unregister_shash(&crc32_alg); - return err; - } - - return 0; -} - -static void __exit crc32_mod_exit(void) -{ - crypto_unregister_shash(&crc32_alg); - crypto_unregister_shash(&crc32c_alg); -} - -MODULE_AUTHOR("Marcin Nowakowski + * Copyright (C) 2018 MIPS Tech, LLC + */ + +#include +#include +#include +#include +#include +#include +#include + +enum crc_op_size { + b, h, w, d, +}; + +enum crc_type { + crc32, + crc32c, +}; + +#ifndef TOOLCHAIN_SUPPORTS_CRC +#define _ASM_SET_CRC(OP, SZ, TYPE) \ +_ASM_MACRO_3R(OP, rt, rs, rt2, \ + ".ifnc \\rt, \\rt2\n\t" \ + ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ + ".endif\n\t" \ + _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \ + ((SZ) << 6) | ((TYPE) << 8)) \ + _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ + ((SZ) << 14) | ((TYPE) << 3))) +#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t" +#else /* !TOOLCHAIN_SUPPORTS_CRC */ +#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t" +#define _ASM_UNSET_CRC(op, SZ, TYPE) +#endif + +#define __CRC32(crc, value, op, SZ, TYPE) \ +do { \ + __asm__ __volatile__( \ + ".set push\n\t" \ + _ASM_SET_CRC(op, SZ, TYPE) \ + #op " %0, %1, %0\n\t" \ + _ASM_UNSET_CRC(op, SZ, TYPE) \ + ".set pop" \ + : "+r" (crc) \ + : "r" (value)); \ +} while (0) + +#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0) +#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0) +#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0) +#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0) +#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1) +#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1) +#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1) +#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1) + +#define _CRC32(crc, value, size, op) \ + _CRC32_##op##size(crc, value) + +#define CRC32(crc, value, size) \ + _CRC32(crc, value, size, crc32) + +#define CRC32C(crc, value, size) \ + _CRC32(crc, value, size, crc32c) + +static DEFINE_STATIC_KEY_FALSE(have_crc32); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32_le_base(crc, p, len); + + if (IS_ENABLED(CONFIG_64BIT)) { + for (; len >=3D sizeof(u64); p +=3D sizeof(u64), len -=3D sizeof(u64)) { + u64 value =3D get_unaligned_le64(p); + + CRC32(crc, value, d); + } + + if (len & sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32(crc, value, w); + p +=3D sizeof(u32); + } + } else { + for (; len >=3D sizeof(u32); len -=3D sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32(crc, value, w); + p +=3D sizeof(u32); + } + } + + if (len & sizeof(u16)) { + u16 value =3D get_unaligned_le16(p); + + CRC32(crc, value, h); + p +=3D sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value =3D *p++; + + CRC32(crc, value, b); + } + + return crc; +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32c_le_base(crc, p, len); + + if (IS_ENABLED(CONFIG_64BIT)) { + for (; len >=3D sizeof(u64); p +=3D sizeof(u64), len -=3D sizeof(u64)) { + u64 value =3D get_unaligned_le64(p); + + CRC32C(crc, value, d); + } + + if (len & sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32C(crc, value, w); + p +=3D sizeof(u32); + } + } else { + for (; len >=3D sizeof(u32); len -=3D sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32C(crc, value, w); + p +=3D sizeof(u32); + } + } + + if (len & sizeof(u16)) { + u16 value =3D get_unaligned_le16(p); + + CRC32C(crc, value, h); + p +=3D sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value =3D *p++; + + CRC32C(crc, value, b); + } + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_mips_init(void) +{ + if (cpu_have_feature(cpu_feature(MIPS_CRC32))) + static_branch_enable(&have_crc32); + return 0; +} +arch_initcall(crc32_mips_init); + +static void __exit crc32_mips_exit(void) +{ +} +module_exit(crc32_mips_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_crc32)) + return CRC32_LE_OPTIMIZATION | CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Marcin Nowakowski To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 08/18] powerpc/crc32: expose CRC32 functions through lib Date: Sun, 3 Nov 2024 14:31:44 -0800 Message-ID: <20241103223154.136127-9-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the powerpc CRC32C assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/powerpc/crypto/crc32c-vpmsum_glue.c to arch/powerpc/lib/crc32-glue.c, view this commit with 'git show -M10'. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers Acked-by: Michael Ellerman (powerpc) --- arch/powerpc/Kconfig | 1 + arch/powerpc/configs/powernv_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/crypto/Kconfig | 15 +- arch/powerpc/crypto/Makefile | 2 - arch/powerpc/crypto/crc32c-vpmsum_glue.c | 173 ------------------ arch/powerpc/crypto/crct10dif-vpmsum_asm.S | 2 +- arch/powerpc/lib/Makefile | 3 + arch/powerpc/lib/crc32-glue.c | 92 ++++++++++ .../{crypto =3D> lib}/crc32-vpmsum_core.S | 0 .../{crypto =3D> lib}/crc32c-vpmsum_asm.S | 0 11 files changed, 98 insertions(+), 192 deletions(-) delete mode 100644 arch/powerpc/crypto/crc32c-vpmsum_glue.c create mode 100644 arch/powerpc/lib/crc32-glue.c rename arch/powerpc/{crypto =3D> lib}/crc32-vpmsum_core.S (100%) rename arch/powerpc/{crypto =3D> lib}/crc32c-vpmsum_asm.S (100%) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8094a01974cc..b05889400b04 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -125,10 +125,11 @@ config PPC select ARCH_DISABLE_KASAN_INLINE if PPC_RADIX_MMU select ARCH_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_COPY_MC if PPC64 + select ARCH_HAS_CRC32 if PPC64 && ALTIVEC select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX if STRICT_KERNEL_RWX select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/= powernv_defconfig index ee84ade7a033..4a7ddea05b4d 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -318,11 +318,10 @@ CONFIG_FTR_FIXUP_SELFTEST=3Dy CONFIG_MSI_BITMAP_SELFTEST=3Dy CONFIG_XMON=3Dy CONFIG_CRYPTO_TEST=3Dm CONFIG_CRYPTO_PCBC=3Dm CONFIG_CRYPTO_HMAC=3Dy -CONFIG_CRYPTO_CRC32C_VPMSUM=3Dm CONFIG_CRYPTO_CRCT10DIF_VPMSUM=3Dm CONFIG_CRYPTO_MD5_PPC=3Dm CONFIG_CRYPTO_MICHAEL_MIC=3Dm CONFIG_CRYPTO_SHA1_PPC=3Dm CONFIG_CRYPTO_SHA256=3Dy diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/pp= c64_defconfig index a5e3e7f97f4d..ea01c0d6705f 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -388,11 +388,10 @@ CONFIG_CRYPTO_TWOFISH=3Dm CONFIG_CRYPTO_PCBC=3Dm CONFIG_CRYPTO_MICHAEL_MIC=3Dm CONFIG_CRYPTO_SHA256=3Dy CONFIG_CRYPTO_WP512=3Dm CONFIG_CRYPTO_LZO=3Dm -CONFIG_CRYPTO_CRC32C_VPMSUM=3Dm CONFIG_CRYPTO_CRCT10DIF_VPMSUM=3Dm CONFIG_CRYPTO_VPMSUM_TESTER=3Dm CONFIG_CRYPTO_MD5_PPC=3Dm CONFIG_CRYPTO_SHA1_PPC=3Dm CONFIG_CRYPTO_AES_GCM_P10=3Dm diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 46a4c85e85e2..5c016ec39530 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -11,23 +11,10 @@ config CRYPTO_CURVE25519_PPC64 Curve25519 algorithm =20 Architecture: PowerPC64 - Little-endian =20 -config CRYPTO_CRC32C_VPMSUM - tristate "CRC32c" - depends on PPC64 && ALTIVEC - select CRYPTO_HASH - select CRC32 - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - - Architecture: powerpc64 using - - AltiVec extensions - - Enable on POWER8 and newer processors for improved performance. - config CRYPTO_CRCT10DIF_VPMSUM tristate "CRC32T10DIF" depends on PPC64 && ALTIVEC && CRC_T10DIF select CRYPTO_HASH help @@ -38,11 +25,11 @@ config CRYPTO_CRCT10DIF_VPMSUM =20 Enable on POWER8 and newer processors for improved performance. =20 config CRYPTO_VPMSUM_TESTER tristate "CRC32c and CRC32T10DIF hardware acceleration tester" - depends on CRYPTO_CRCT10DIF_VPMSUM && CRYPTO_CRC32C_VPMSUM + depends on CRYPTO_CRCT10DIF_VPMSUM && CRC32_ARCH help Stress test for CRC32c and CRCT10DIF algorithms implemented with powerpc64 AltiVec extensions (POWER8 vpmsum instructions). Unless you are testing these algorithms, you don't need this. =20 diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 59808592f0a1..54486192273c 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -8,11 +8,10 @@ obj-$(CONFIG_CRYPTO_AES_PPC_SPE) +=3D aes-ppc-spe.o obj-$(CONFIG_CRYPTO_MD5_PPC) +=3D md5-ppc.o obj-$(CONFIG_CRYPTO_SHA1_PPC) +=3D sha1-powerpc.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) +=3D sha1-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) +=3D sha256-ppc-spe.o -obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) +=3D crc32c-vpmsum.o obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) +=3D crct10dif-vpmsum.o obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) +=3D crc-vpmsum_test.o obj-$(CONFIG_CRYPTO_AES_GCM_P10) +=3D aes-gcm-p10-crypto.o obj-$(CONFIG_CRYPTO_CHACHA20_P10) +=3D chacha-p10-crypto.o obj-$(CONFIG_CRYPTO_POLY1305_P10) +=3D poly1305-p10-crypto.o @@ -22,11 +21,10 @@ obj-$(CONFIG_CRYPTO_CURVE25519_PPC64) +=3D curve25519-p= pc64le.o aes-ppc-spe-y :=3D aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-mode= s.o aes-spe-glue.o md5-ppc-y :=3D md5-asm.o md5-glue.o sha1-powerpc-y :=3D sha1-powerpc-asm.o sha1.o sha1-ppc-spe-y :=3D sha1-spe-asm.o sha1-spe-glue.o sha256-ppc-spe-y :=3D sha256-spe-asm.o sha256-spe-glue.o -crc32c-vpmsum-y :=3D crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o crct10dif-vpmsum-y :=3D crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o aes-gcm-p10-crypto-y :=3D aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o = aesp10-ppc.o chacha-p10-crypto-y :=3D chacha-p10-glue.o chacha-p10le-8x.o poly1305-p10-crypto-y :=3D poly1305-p10-glue.o poly1305-p10le_64.o vmx-crypto-objs :=3D vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_c= tr.o aes_xts.o ghash.o diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto= /crc32c-vpmsum_glue.c deleted file mode 100644 index 63760b7dbb76..000000000000 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ /dev/null @@ -1,173 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -#define VMX_ALIGN 16 -#define VMX_ALIGN_MASK (VMX_ALIGN-1) - -#define VECTOR_BREAKPOINT 512 - -u32 __crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len); - -static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len) -{ - unsigned int prealign; - unsigned int tail; - - if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable()) - return __crc32c_le(crc, p, len); - - if ((unsigned long)p & VMX_ALIGN_MASK) { - prealign =3D VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); - crc =3D __crc32c_le(crc, p, prealign); - len -=3D prealign; - p +=3D prealign; - } - - if (len & ~VMX_ALIGN_MASK) { - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - crc =3D __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); - disable_kernel_altivec(); - pagefault_enable(); - preempt_enable(); - } - - tail =3D len & VMX_ALIGN_MASK; - if (tail) { - p +=3D len & ~VMX_ALIGN_MASK; - crc =3D __crc32c_le(crc, p, tail); - } - - return crc; -} - -static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D ~0; - - return 0; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int crc32c_vpmsum_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx =3D crypto_shash_ctx(hash); - - if (keylen !=3D sizeof(u32)) - return -EINVAL; - *mctx =3D le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32c_vpmsum_init(struct shash_desc *desc) -{ - u32 *mctx =3D crypto_shash_ctx(desc->tfm); - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D *mctx; - - return 0; -} - -static int crc32c_vpmsum_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D crc32c_vpmsum(*crcp, data, len); - - return 0; -} - -static int __crc32c_vpmsum_finup(u32 *crcp, const u8 *data, unsigned int l= en, - u8 *out) -{ - *(__le32 *)out =3D ~cpu_to_le32(crc32c_vpmsum(*crcp, data, len)); - - return 0; -} - -static int crc32c_vpmsum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_vpmsum_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_vpmsum_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *(__le32 *)out =3D ~cpu_to_le32p(crcp); - - return 0; -} - -static int crc32c_vpmsum_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_vpmsum_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static struct shash_alg alg =3D { - .setkey =3D crc32c_vpmsum_setkey, - .init =3D crc32c_vpmsum_init, - .update =3D crc32c_vpmsum_update, - .final =3D crc32c_vpmsum_final, - .finup =3D crc32c_vpmsum_finup, - .digest =3D crc32c_vpmsum_digest, - .descsize =3D sizeof(u32), - .digestsize =3D CHKSUM_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-vpmsum", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(u32), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32c_vpmsum_cra_init, - } -}; - -static int __init crc32c_vpmsum_mod_init(void) -{ - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - return crypto_register_shash(&alg); -} - -static void __exit crc32c_vpmsum_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_= init); -module_exit(crc32c_vpmsum_mod_fini); - -MODULE_AUTHOR("Anton Blanchard "); -MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructio= ns"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-vpmsum"); diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S b/arch/powerpc/cryp= to/crct10dif-vpmsum_asm.S index f0b93a0fe168..0a52261bf859 100644 --- a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S +++ b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S @@ -840,6 +840,6 @@ .octa 0x000000000000000000000001f65a57f8 /* x^64 div p(x) */ /* Barrett constant n */ .octa 0x0000000000000000000000018bb70000 =20 #define CRC_FUNCTION_NAME __crct10dif_vpmsum -#include "crc32-vpmsum_core.S" +#include "../lib/crc32-vpmsum_core.S" diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index f14ecab674a3..da9381a1c95b 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -76,6 +76,9 @@ obj-$(CONFIG_FTR_FIXUP_SELFTEST) +=3D feature-fixups-test= .o obj-$(CONFIG_ALTIVEC) +=3D xor_vmx.o xor_vmx_glue.o CFLAGS_xor_vmx.o +=3D -mhard-float -maltivec $(call cc-option,-mabi=3Dalti= vec) # Enable CFLAGS_xor_vmx.o +=3D -isystem $(shell $(CC) -print-file-name=3Dinclude) =20 +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-powerpc.o +crc32-powerpc-y :=3D crc32-glue.o crc32c-vpmsum_asm.o + obj-$(CONFIG_PPC64) +=3D $(obj64-y) diff --git a/arch/powerpc/lib/crc32-glue.c b/arch/powerpc/lib/crc32-glue.c new file mode 100644 index 000000000000..e9730f028afb --- /dev/null +++ b/arch/powerpc/lib/crc32-glue.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include +#include + +#define VMX_ALIGN 16 +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#define VECTOR_BREAKPOINT 512 + +static DEFINE_STATIC_KEY_FALSE(have_vec_crypto); + +u32 __crc32c_vpmsum(u32 crc, const u8 *p, size_t len); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_le_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + unsigned int prealign; + unsigned int tail; + + if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable() || + !static_branch_likely(&have_vec_crypto)) + return crc32c_le_base(crc, p, len); + + if ((unsigned long)p & VMX_ALIGN_MASK) { + prealign =3D VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + crc =3D crc32c_le_base(crc, p, prealign); + len -=3D prealign; + p +=3D prealign; + } + + if (len & ~VMX_ALIGN_MASK) { + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + crc =3D __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + disable_kernel_altivec(); + pagefault_enable(); + preempt_enable(); + } + + tail =3D len & VMX_ALIGN_MASK; + if (tail) { + p +=3D len & ~VMX_ALIGN_MASK; + crc =3D crc32c_le_base(crc, p, tail); + } + + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_powerpc_init(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S) && + (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) + static_branch_enable(&have_vec_crypto); + return 0; +} +arch_initcall(crc32_powerpc_init); + +static void __exit crc32_powerpc_exit(void) +{ +} +module_exit(crc32_powerpc_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_vec_crypto)) + return CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Anton Blanchard "); +MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructio= ns"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/lib/crc= 32-vpmsum_core.S similarity index 100% rename from arch/powerpc/crypto/crc32-vpmsum_core.S rename to arch/powerpc/lib/crc32-vpmsum_core.S diff --git a/arch/powerpc/crypto/crc32c-vpmsum_asm.S b/arch/powerpc/lib/crc= 32c-vpmsum_asm.S similarity index 100% rename from arch/powerpc/crypto/crc32c-vpmsum_asm.S rename to arch/powerpc/lib/crc32c-vpmsum_asm.S --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 909201ADFEB; Sun, 3 Nov 2024 22:32:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673149; cv=none; b=T7ut0ZYcVD4s9UYDuPNzTTIUMuTN3zVi7CXW9ZOsiznTZUXpsBCdBSETlGLWI2frzErQRvsws0izChKwpVRbCDWxlIfdRxuhzW5VY15PMG0/mwsmV6TxOqlfGsONKzzaSHxxzsl/G/KHWC+YB7qfCCZBTR0E7g6UWaN9ZM70xYU= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673149; c=relaxed/simple; bh=f+2N4brYUYTtI9VkPIVGdMQfynotEJ/v7dbK1tF3nNY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=LAcGfp60wbpvRj83+Vfr/8hcgUTJnVI/6GABo35/X/NK/TKG7b14bCK7hGah6eZyDAPotuB2p8glSSAx0pPJDkj2XR/gaU4z8pEbmOlHWpRJVwwtKuhCfFuIz4bUGK2+xZW6acTa99QRv+aBhFDnXqtL+ywybdMjKCko0Ks0xHQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=YR8n7ggR; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="YR8n7ggR" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B1437C4CED9; Sun, 3 Nov 2024 22:32:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673149; bh=f+2N4brYUYTtI9VkPIVGdMQfynotEJ/v7dbK1tF3nNY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=YR8n7ggR1x/a46HsXpDJR0KCidbbb+D2BqsxTJPcwx5U+T5i9ktY56EzFM7iCdNZQ QBHYqbHw5NqZeWXH1K6kO8TzYQhZlA+VZFwKnsgKUvvDF0Ukj7/DxERud/3ThBO4We jyj/a2Xx/I0Zl/wU3IlmPfbV4u8rnpc46gYseM7v9V/qrY2v6wJUVL2yPDZvz1bfbJ TCfRDNESvGvZMeca3HSwHZP+thUXwY69aHXdBPQoHJ0VFTt8M9HaehrJj4AmWXuKH6 s2DXiRWhdEz5EsYtL1vck83b6Viah4WV6vvh/0kqtuRjD56AQe40CG21gUrx1/T6Gd iNgi3UjuaZHSQ== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 09/18] s390/crc32: expose CRC32 functions through lib Date: Sun, 3 Nov 2024 14:31:45 -0800 Message-ID: <20241103223154.136127-10-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the s390 CRC32 assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/s390/crypto/crc32-vx.c to arch/s390/lib/crc32-glue.c, view this commit with 'git show -M10'. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- arch/s390/Kconfig | 1 + arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - arch/s390/crypto/Kconfig | 12 - arch/s390/crypto/Makefile | 2 - arch/s390/crypto/crc32-vx.c | 306 ------------------------- arch/s390/lib/Makefile | 3 + arch/s390/lib/crc32-glue.c | 92 ++++++++ arch/s390/{crypto =3D> lib}/crc32-vx.h | 0 arch/s390/{crypto =3D> lib}/crc32be-vx.c | 0 arch/s390/{crypto =3D> lib}/crc32le-vx.c | 0 11 files changed, 96 insertions(+), 322 deletions(-) delete mode 100644 arch/s390/crypto/crc32-vx.c create mode 100644 arch/s390/lib/crc32-glue.c rename arch/s390/{crypto =3D> lib}/crc32-vx.h (100%) rename arch/s390/{crypto =3D> lib}/crc32be-vx.c (100%) rename arch/s390/{crypto =3D> lib}/crc32le-vx.c (100%) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d339fe4fdedf..d1fde8b941d2 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -63,10 +63,11 @@ config S390 select ARCH_BINFMT_ELF_STATE select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_de= fconfig index fb0e9a1d9be2..fd83d8958f0b 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -792,11 +792,10 @@ CONFIG_CRYPTO_ZSTD=3Dm CONFIG_CRYPTO_ANSI_CPRNG=3Dm CONFIG_CRYPTO_USER_API_HASH=3Dm CONFIG_CRYPTO_USER_API_SKCIPHER=3Dm CONFIG_CRYPTO_USER_API_RNG=3Dm CONFIG_CRYPTO_USER_API_AEAD=3Dm -CONFIG_CRYPTO_CRC32_S390=3Dy CONFIG_CRYPTO_SHA512_S390=3Dm CONFIG_CRYPTO_SHA1_S390=3Dm CONFIG_CRYPTO_SHA256_S390=3Dm CONFIG_CRYPTO_SHA3_256_S390=3Dm CONFIG_CRYPTO_SHA3_512_S390=3Dm diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 88be0a734b60..3bdeb6d5cbd9 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -779,11 +779,10 @@ CONFIG_CRYPTO_ANSI_CPRNG=3Dm CONFIG_CRYPTO_JITTERENTROPY_OSR=3D1 CONFIG_CRYPTO_USER_API_HASH=3Dm CONFIG_CRYPTO_USER_API_SKCIPHER=3Dm CONFIG_CRYPTO_USER_API_RNG=3Dm CONFIG_CRYPTO_USER_API_AEAD=3Dm -CONFIG_CRYPTO_CRC32_S390=3Dy CONFIG_CRYPTO_SHA512_S390=3Dm CONFIG_CRYPTO_SHA1_S390=3Dm CONFIG_CRYPTO_SHA256_S390=3Dm CONFIG_CRYPTO_SHA3_256_S390=3Dm CONFIG_CRYPTO_SHA3_512_S390=3Dm diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig index d3eb3a233693..b760232537f1 100644 --- a/arch/s390/crypto/Kconfig +++ b/arch/s390/crypto/Kconfig @@ -1,21 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 =20 menu "Accelerated Cryptographic Algorithms for CPU (s390)" =20 -config CRYPTO_CRC32_S390 - tristate "CRC32c and CRC32" - depends on S390 - select CRYPTO_HASH - select CRC32 - help - CRC32c and CRC32 CRC algorithms - - Architecture: s390 - - It is available with IBM z13 or later. - config CRYPTO_SHA512_S390 tristate "Hash functions: SHA-384 and SHA-512" depends on S390 select CRYPTO_HASH help diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index a0cb96937c3d..14dafadbcbed 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -12,11 +12,9 @@ obj-$(CONFIG_CRYPTO_DES_S390) +=3D des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) +=3D aes_s390.o obj-$(CONFIG_CRYPTO_PAES_S390) +=3D paes_s390.o obj-$(CONFIG_CRYPTO_CHACHA_S390) +=3D chacha_s390.o obj-$(CONFIG_S390_PRNG) +=3D prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) +=3D ghash_s390.o -obj-$(CONFIG_CRYPTO_CRC32_S390) +=3D crc32-vx_s390.o obj-$(CONFIG_CRYPTO_HMAC_S390) +=3D hmac_s390.o obj-y +=3D arch_random.o =20 -crc32-vx_s390-y :=3D crc32-vx.o crc32le-vx.o crc32be-vx.o chacha_s390-y :=3D chacha-glue.o chacha-s390.o diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c deleted file mode 100644 index 89a10337e6ea..000000000000 --- a/arch/s390/crypto/crc32-vx.c +++ /dev/null @@ -1,306 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Crypto-API module for CRC-32 algorithms implemented with the - * z/Architecture Vector Extension Facility. - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner - */ -#define KMSG_COMPONENT "crc32-vx" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include -#include -#include -#include -#include -#include "crc32-vx.h" - -#define CRC32_BLOCK_SIZE 1 -#define CRC32_DIGEST_SIZE 4 - -#define VX_MIN_LEN 64 -#define VX_ALIGNMENT 16L -#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) - -struct crc_ctx { - u32 key; -}; - -struct crc_desc_ctx { - u32 crc; -}; - -/* - * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension - * - * Creates a function to perform a particular CRC-32 computation. Depending - * on the message buffer, the hardware-accelerated or software implementat= ion - * is used. Note that the message buffer is aligned to improve fetch - * operations of VECTOR LOAD MULTIPLE instructions. - * - */ -#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ - static u32 __pure ___fname(u32 crc, \ - unsigned char const *data, size_t datalen) \ - { \ - unsigned long prealign, aligned, remaining; \ - DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ - \ - if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \ - return ___crc32_sw(crc, data, datalen); \ - \ - if ((unsigned long)data & VX_ALIGN_MASK) { \ - prealign =3D VX_ALIGNMENT - \ - ((unsigned long)data & VX_ALIGN_MASK); \ - datalen -=3D prealign; \ - crc =3D ___crc32_sw(crc, data, prealign); \ - data =3D (void *)((unsigned long)data + prealign); \ - } \ - \ - aligned =3D datalen & ~VX_ALIGN_MASK; \ - remaining =3D datalen & VX_ALIGN_MASK; \ - \ - kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ - crc =3D ___crc32_vx(crc, data, aligned); \ - kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ - \ - if (remaining) \ - crc =3D ___crc32_sw(crc, data + aligned, remaining); \ - \ - return crc; \ - } - -DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le) -DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be) -DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le) - - -static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx =3D crypto_tfm_ctx(tfm); - - mctx->key =3D 0; - return 0; -} - -static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx =3D crypto_tfm_ctx(tfm); - - mctx->key =3D ~0; - return 0; -} - -static int crc32_vx_init(struct shash_desc *desc) -{ - struct crc_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - struct crc_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D mctx->key; - return 0; -} - -static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx =3D crypto_shash_ctx(tfm); - - if (newkeylen !=3D sizeof(mctx->key)) - return -EINVAL; - mctx->key =3D le32_to_cpu(*(__le32 *)newkey); - return 0; -} - -static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx =3D crypto_shash_ctx(tfm); - - if (newkeylen !=3D sizeof(mctx->key)) - return -EINVAL; - mctx->key =3D be32_to_cpu(*(__be32 *)newkey); - return 0; -} - -static int crc32le_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx =3D shash_desc_ctx(desc); - - *(__le32 *)out =3D cpu_to_le32p(&ctx->crc); - return 0; -} - -static int crc32be_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx =3D shash_desc_ctx(desc); - - *(__be32 *)out =3D cpu_to_be32p(&ctx->crc); - return 0; -} - -static int crc32c_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx =3D shash_desc_ctx(desc); - - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out =3D ~cpu_to_le32p(&ctx->crc); - return 0; -} - -static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__le32 *)out =3D cpu_to_le32(crc32_le_vx(*crc, data, len)); - return 0; -} - -static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__be32 *)out =3D cpu_to_be32(crc32_be_vx(*crc, data, len)); - return 0; -} - -static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out =3D ~cpu_to_le32(crc32c_le_vx(*crc, data, len)); - return 0; -} - - -#define CRC32_VX_FINUP(alg, func) \ - static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \ - data, datalen, out); \ - } - -CRC32_VX_FINUP(crc32le, crc32_le_vx) -CRC32_VX_FINUP(crc32be, crc32_be_vx) -CRC32_VX_FINUP(crc32c, crc32c_le_vx) - -#define CRC32_VX_DIGEST(alg, func) \ - static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \ - unsigned int len, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \ - data, len, out); \ - } - -CRC32_VX_DIGEST(crc32le, crc32_le_vx) -CRC32_VX_DIGEST(crc32be, crc32_be_vx) -CRC32_VX_DIGEST(crc32c, crc32c_le_vx) - -#define CRC32_VX_UPDATE(alg, func) \ - static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen) \ - { \ - struct crc_desc_ctx *ctx =3D shash_desc_ctx(desc); \ - ctx->crc =3D func(ctx->crc, data, datalen); \ - return 0; \ - } - -CRC32_VX_UPDATE(crc32le, crc32_le_vx) -CRC32_VX_UPDATE(crc32be, crc32_be_vx) -CRC32_VX_UPDATE(crc32c, crc32c_le_vx) - - -static struct shash_alg crc32_vx_algs[] =3D { - /* CRC-32 LE */ - { - .init =3D crc32_vx_init, - .setkey =3D crc32_vx_setkey, - .update =3D crc32le_vx_update, - .final =3D crc32le_vx_final, - .finup =3D crc32le_vx_finup, - .digest =3D crc32le_vx_digest, - .descsize =3D sizeof(struct crc_desc_ctx), - .digestsize =3D CRC32_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32", - .cra_driver_name =3D "crc32-vx", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CRC32_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct crc_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32_vx_cra_init_zero, - }, - }, - /* CRC-32 BE */ - { - .init =3D crc32_vx_init, - .setkey =3D crc32be_vx_setkey, - .update =3D crc32be_vx_update, - .final =3D crc32be_vx_final, - .finup =3D crc32be_vx_finup, - .digest =3D crc32be_vx_digest, - .descsize =3D sizeof(struct crc_desc_ctx), - .digestsize =3D CRC32_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32be", - .cra_driver_name =3D "crc32be-vx", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CRC32_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct crc_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32_vx_cra_init_zero, - }, - }, - /* CRC-32C LE */ - { - .init =3D crc32_vx_init, - .setkey =3D crc32_vx_setkey, - .update =3D crc32c_vx_update, - .final =3D crc32c_vx_final, - .finup =3D crc32c_vx_finup, - .digest =3D crc32c_vx_digest, - .descsize =3D sizeof(struct crc_desc_ctx), - .digestsize =3D CRC32_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-vx", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CRC32_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct crc_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32_vx_cra_init_invert, - }, - }, -}; - - -static int __init crc_vx_mod_init(void) -{ - return crypto_register_shashes(crc32_vx_algs, - ARRAY_SIZE(crc32_vx_algs)); -} - -static void __exit crc_vx_mod_exit(void) -{ - crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs)); -} - -module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init); -module_exit(crc_vx_mod_exit); - -MODULE_AUTHOR("Hendrik Brueckner "); -MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extensio= n Facility"); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32-vx"); -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-vx"); diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index f43f897d3fc0..14bbfe50033c 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -22,5 +22,8 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST) +=3D test_modules.o obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) +=3D test_modules_helpers.o =20 lib-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o =20 obj-$(CONFIG_EXPOLINE_EXTERN) +=3D expoline.o + +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-s390.o +crc32-s390-y :=3D crc32-glue.o crc32le-vx.o crc32be-vx.o diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c new file mode 100644 index 000000000000..137080e61f90 --- /dev/null +++ b/arch/s390/lib/crc32-glue.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CRC-32 implemented with the z/Architecture Vector Extension Facility. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ +#define KMSG_COMPONENT "crc32-vx" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include "crc32-vx.h" + +#define VX_MIN_LEN 64 +#define VX_ALIGNMENT 16L +#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) + +static DEFINE_STATIC_KEY_FALSE(have_vxrs); + +/* + * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension + * + * Creates a function to perform a particular CRC-32 computation. Depending + * on the message buffer, the hardware-accelerated or software implementat= ion + * is used. Note that the message buffer is aligned to improve fetch + * operations of VECTOR LOAD MULTIPLE instructions. + */ +#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ + u32 ___fname(u32 crc, const u8 *data, size_t datalen) \ + { \ + unsigned long prealign, aligned, remaining; \ + DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ + \ + if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || \ + !static_branch_likely(&have_vxrs)) \ + return ___crc32_sw(crc, data, datalen); \ + \ + if ((unsigned long)data & VX_ALIGN_MASK) { \ + prealign =3D VX_ALIGNMENT - \ + ((unsigned long)data & VX_ALIGN_MASK); \ + datalen -=3D prealign; \ + crc =3D ___crc32_sw(crc, data, prealign); \ + data =3D (void *)((unsigned long)data + prealign); \ + } \ + \ + aligned =3D datalen & ~VX_ALIGN_MASK; \ + remaining =3D datalen & VX_ALIGN_MASK; \ + \ + kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ + crc =3D ___crc32_vx(crc, data, aligned); \ + kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ + \ + if (remaining) \ + crc =3D ___crc32_sw(crc, data + aligned, remaining); \ + \ + return crc; \ + } \ + EXPORT_SYMBOL(___fname); + +DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base) +DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base) +DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base) + +static int __init crc32_s390_init(void) +{ + if (cpu_have_feature(S390_CPU_FEATURE_VXRS)) + static_branch_enable(&have_vxrs); + return 0; +} +arch_initcall(crc32_s390_init); + +static void __exit crc32_s390_exit(void) +{ +} +module_exit(crc32_s390_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_vxrs)) + return CRC32_LE_OPTIMIZATION | + CRC32_BE_OPTIMIZATION | + CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_AUTHOR("Hendrik Brueckner "); +MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extensio= n Facility"); +MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/crc32-vx.h b/arch/s390/lib/crc32-vx.h similarity index 100% rename from arch/s390/crypto/crc32-vx.h rename to arch/s390/lib/crc32-vx.h diff --git a/arch/s390/crypto/crc32be-vx.c b/arch/s390/lib/crc32be-vx.c similarity index 100% rename from arch/s390/crypto/crc32be-vx.c rename to arch/s390/lib/crc32be-vx.c diff --git a/arch/s390/crypto/crc32le-vx.c b/arch/s390/lib/crc32le-vx.c similarity index 100% rename from arch/s390/crypto/crc32le-vx.c rename to arch/s390/lib/crc32le-vx.c --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1A7F31AE875; Sun, 3 Nov 2024 22:32:29 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673150; cv=none; b=BGVInediTlUrH4SXPNSfZfgbWVYLCjJyZxvECwMAGBP7vEVTOPhfCIVCFr/CBvCw8z82LsQVBajpHFKNcY1fdV/xh/SbyyjofH4upj1U5CZeBHV9OPyh3zBiJAOpFNgsuTrWpBHoD+oMbJwP0wIBxCC/DPsnRiwGFLEijESq5xc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673150; c=relaxed/simple; bh=fnuKyp+06/WpTmmC60ypZO4FJZaHFxjnVVTOpeE5DQA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=NmIKD+nd2qrBshTrVpVQGASXXv+hhNKEF6lBKtmj4nj70G0dcx2YKA0pbuUumH0c/6mskQhZrJa6eG4PVWHk2fpBD1kiqkkADycdI4gadDUIjzmST026dDi+zL5kwlyf90t+mqBHC+DgIOmyFnKAmykNLkdDFqhBRNfHEXQsTqo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=aDRhvo56; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="aDRhvo56" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 458CFC4CECF; Sun, 3 Nov 2024 22:32:29 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673149; bh=fnuKyp+06/WpTmmC60ypZO4FJZaHFxjnVVTOpeE5DQA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=aDRhvo56Z19nFZqZtFLodnd48686q5iREqnG/WgJSpfGnqUD7QeC6zOjhrQdqeM/P t8EqO6ZML4DuwU5V7Mqv8yvuMjQSBmea2FoczsqIbhTRMtdnGwoGBFTfBvpby0Cd2P IyPBMkVn+XXYxUl2cbqpLz52uJTDujfw5HhfKKlbVoQiiRHiAcyKjhPRGiZsYaRvOA Zi6HwUhTH+m90tNH08z9z92yRhbQFXGKe+GGEVp5BqtRcSJdCYbLM+P3weepBlAgqy QYNLHsMOBEqtjjs65opoModOZ4uAE7G0MP53EgbyIIvVzIy0AD2VOR19EFmF50tSBI x8/i0mVktOJ3A== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 10/18] sparc/crc32: expose CRC32 functions through lib Date: Sun, 3 Nov 2024 14:31:46 -0800 Message-ID: <20241103223154.136127-11-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the sparc CRC32C assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/sparc/crypto/crc32c_glue.c to arch/sparc/lib/crc32_glue.c, view this commit with 'git show -M10'. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- arch/sparc/Kconfig | 1 + arch/sparc/crypto/Kconfig | 10 -- arch/sparc/crypto/Makefile | 4 - arch/sparc/crypto/crc32c_glue.c | 184 ------------------------ arch/sparc/lib/Makefile | 2 + arch/sparc/lib/crc32_glue.c | 93 ++++++++++++ arch/sparc/{crypto =3D> lib}/crc32c_asm.S | 2 +- 7 files changed, 97 insertions(+), 199 deletions(-) delete mode 100644 arch/sparc/crypto/crc32c_glue.c create mode 100644 arch/sparc/lib/crc32_glue.c rename arch/sparc/{crypto =3D> lib}/crc32c_asm.S (92%) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index dcfdb7f1dae9..0f88123925a4 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -108,10 +108,11 @@ config SPARC64 select ARCH_HAS_GIGANTIC_PAGE select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SETUP_PER_CPU_AREA select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_PER_CPU_PAGE_FIRST_CHUNK + select ARCH_HAS_CRC32 =20 config ARCH_PROC_KCORE_TEXT def_bool y =20 config CPU_BIG_ENDIAN diff --git a/arch/sparc/crypto/Kconfig b/arch/sparc/crypto/Kconfig index cfe5102b1c68..e858597de89d 100644 --- a/arch/sparc/crypto/Kconfig +++ b/arch/sparc/crypto/Kconfig @@ -14,20 +14,10 @@ config CRYPTO_DES_SPARC64 Length-preserving ciphers: DES with ECB and CBC modes Length-preserving ciphers: Tripe DES EDE with ECB and CBC modes =20 Architecture: sparc64 =20 -config CRYPTO_CRC32C_SPARC64 - tristate "CRC32c" - depends on SPARC64 - select CRYPTO_HASH - select CRC32 - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - - Architecture: sparc64 - config CRYPTO_MD5_SPARC64 tristate "Digests: MD5" depends on SPARC64 select CRYPTO_MD5 select CRYPTO_HASH diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index d257186c27d1..a2d7fca40cb4 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -10,17 +10,13 @@ obj-$(CONFIG_CRYPTO_MD5_SPARC64) +=3D md5-sparc64.o =20 obj-$(CONFIG_CRYPTO_AES_SPARC64) +=3D aes-sparc64.o obj-$(CONFIG_CRYPTO_DES_SPARC64) +=3D des-sparc64.o obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) +=3D camellia-sparc64.o =20 -obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) +=3D crc32c-sparc64.o - sha1-sparc64-y :=3D sha1_asm.o sha1_glue.o sha256-sparc64-y :=3D sha256_asm.o sha256_glue.o sha512-sparc64-y :=3D sha512_asm.o sha512_glue.o md5-sparc64-y :=3D md5_asm.o md5_glue.o =20 aes-sparc64-y :=3D aes_asm.o aes_glue.o des-sparc64-y :=3D des_asm.o des_glue.o camellia-sparc64-y :=3D camellia_asm.o camellia_glue.o - -crc32c-sparc64-y :=3D crc32c_asm.o crc32c_glue.o diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glu= e.c deleted file mode 100644 index 913b9a09e885..000000000000 --- a/arch/sparc/crypto/crc32c_glue.c +++ /dev/null @@ -1,184 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Glue code for CRC32C optimized for sparc64 crypto opcodes. - * - * This is based largely upon arch/x86/crypto/crc32c-intel.c - * - * Copyright (C) 2008 Intel Corporation - * Authors: Austin Zhang - * Kent Liu - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include "opcodes.h" - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx =3D crypto_shash_ctx(hash); - - if (keylen !=3D sizeof(u32)) - return -EINVAL; - *mctx =3D get_unaligned_le32(key); - return 0; -} - -static int crc32c_sparc64_init(struct shash_desc *desc) -{ - u32 *mctx =3D crypto_shash_ctx(desc->tfm); - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D *mctx; - - return 0; -} - -extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len); - -static u32 crc32c_compute(u32 crc, const u8 *data, unsigned int len) -{ - unsigned int n =3D -(uintptr_t)data & 7; - - if (n) { - /* Data isn't 8-byte aligned. Align it. */ - n =3D min(n, len); - crc =3D __crc32c_le(crc, data, n); - data +=3D n; - len -=3D n; - } - n =3D len & ~7U; - if (n) { - crc32c_sparc64(&crc, (const u64 *)data, n); - data +=3D n; - len -=3D n; - } - if (len) - crc =3D __crc32c_le(crc, data, len); - return crc; -} - -static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D crc32c_compute(*crcp, data, len); - return 0; -} - -static int __crc32c_sparc64_finup(const u32 *crcp, const u8 *data, - unsigned int len, u8 *out) -{ - put_unaligned_le32(~crc32c_compute(*crcp, data, len), out); - return 0; -} - -static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - put_unaligned_le32(~*crcp, out); - return 0; -} - -static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D ~0; - - return 0; -} - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -static struct shash_alg alg =3D { - .setkey =3D crc32c_sparc64_setkey, - .init =3D crc32c_sparc64_init, - .update =3D crc32c_sparc64_update, - .final =3D crc32c_sparc64_final, - .finup =3D crc32c_sparc64_finup, - .digest =3D crc32c_sparc64_digest, - .descsize =3D sizeof(u32), - .digestsize =3D CHKSUM_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-sparc64", - .cra_priority =3D SPARC_CR_OPCODE_PRIORITY, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(u32), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32c_sparc64_cra_init, - } -}; - -static bool __init sparc64_has_crc32c_opcode(void) -{ - unsigned long cfr; - - if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) - return false; - - __asm__ __volatile__("rd %%asr26, %0" : "=3Dr" (cfr)); - if (!(cfr & CFR_CRC32C)) - return false; - - return true; -} - -static int __init crc32c_sparc64_mod_init(void) -{ - if (sparc64_has_crc32c_opcode()) { - pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); - return crypto_register_shash(&alg); - } - pr_info("sparc64 crc32c opcode not available.\n"); - return -ENODEV; -} - -static void __exit crc32c_sparc64_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crc32c_sparc64_mod_init); -module_exit(crc32c_sparc64_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated= "); - -MODULE_ALIAS_CRYPTO("crc32c"); - -#include "crop_devid.c" diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index ee5091dd67ed..5724d0f356eb 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -51,5 +51,7 @@ lib-$(CONFIG_SPARC64) +=3D copy_in_user.o memmove.o lib-$(CONFIG_SPARC64) +=3D mcount.o ipcsum.o xor.o hweight.o ffs.o =20 obj-$(CONFIG_SPARC64) +=3D iomap.o obj-$(CONFIG_SPARC32) +=3D atomic32.o obj-$(CONFIG_SPARC64) +=3D PeeCeeI.o +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-sparc.o +crc32-sparc-y :=3D crc32_glue.o crc32c_asm.o diff --git a/arch/sparc/lib/crc32_glue.c b/arch/sparc/lib/crc32_glue.c new file mode 100644 index 000000000000..41076d2b1fd2 --- /dev/null +++ b/arch/sparc/lib/crc32_glue.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Glue code for CRC32C optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/crc32c-intel.c + * + * Copyright (C) 2008 Intel Corporation + * Authors: Austin Zhang + * Kent Liu + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +static DEFINE_STATIC_KEY_FALSE(have_crc32c_opcode); + +u32 crc32_le_arch(u32 crc, const u8 *data, size_t len) +{ + return crc32_le_base(crc, data, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len); + +u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len) +{ + size_t n =3D -(uintptr_t)data & 7; + + if (!static_branch_likely(&have_crc32c_opcode)) + return crc32c_le_base(crc, data, len); + + if (n) { + /* Data isn't 8-byte aligned. Align it. */ + n =3D min(n, len); + crc =3D crc32c_le_base(crc, data, n); + data +=3D n; + len -=3D n; + } + n =3D len & ~7U; + if (n) { + crc32c_sparc64(&crc, (const u64 *)data, n); + data +=3D n; + len -=3D n; + } + if (len) + crc =3D crc32c_le_base(crc, data, len); + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *data, size_t len) +{ + return crc32_be_base(crc, data, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_sparc_init(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return 0; + + __asm__ __volatile__("rd %%asr26, %0" : "=3Dr" (cfr)); + if (!(cfr & CFR_CRC32C)) + return 0; + + static_branch_enable(&have_crc32c_opcode); + pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); + return 0; +} +arch_initcall(crc32_sparc_init); + +static void __exit crc32_sparc_exit(void) +{ +} +module_exit(crc32_sparc_exit); + +u32 crc32_optimizations(void) +{ + if (static_key_enabled(&have_crc32c_opcode)) + return CRC32C_OPTIMIZATION; + return 0; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated= "); diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/lib/crc32c_asm.S similarity index 92% rename from arch/sparc/crypto/crc32c_asm.S rename to arch/sparc/lib/crc32c_asm.S index b8659a479242..ee454fa6aed6 100644 --- a/arch/sparc/crypto/crc32c_asm.S +++ b/arch/sparc/lib/crc32c_asm.S @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include #include =20 -#include "opcodes.h" +#include "../crypto/opcodes.h" =20 ENTRY(crc32c_sparc64) /* %o0=3Dcrc32p, %o1=3Ddata_ptr, %o2=3Dlen */ VISEntryHalf lda [%o0] ASI_PL, %f1 --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 594F61AF0AB; Sun, 3 Nov 2024 22:32:30 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673150; cv=none; b=fG8VmSQhAUyF9Uxq580/x2nwy3ycsH21mATLJRNqHFJUNlk56EkGDtQBeZNw6g6h3OT9Ytyrj65fVhzHCM4VFTgJtyj5fAskKwkTQ1dDbSQaN92KaSSmNnOoDxtHja1b8zbRuEK2sysY26ePaEXucQjIGFlvk3iINdhPh5P7Uy8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673150; c=relaxed/simple; bh=C1Eoo6ZdwZ6ma+QPnxpHev6POdiDHK4uQ/Pd5lLsWBY=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=qgAJHgy7I1mzy8hotoN6zGFd5BXA4Md/miNb7db6vJ7k9ZBgouiy21mgjc/DsdU71nhGVPj8QzE2S7vLGz/HQKYitKHSV2FzjfX6GTD85lcrS+XLrWu041IiXbfoaAhbReqPigYZxQpcQsdZiVOC8qwWz+/WbLwdxN7zll9oyw8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=gToxexMc; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="gToxexMc" Received: by smtp.kernel.org (Postfix) with ESMTPSA id CB7F0C4CED4; Sun, 3 Nov 2024 22:32:29 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673150; bh=C1Eoo6ZdwZ6ma+QPnxpHev6POdiDHK4uQ/Pd5lLsWBY=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=gToxexMc0R2jsOQHVnhOGMVVFr06xUilK0QttWgwoq1sW7LHTE2S1hqoWGAPCbQe9 AqMbGuVKbuq3ygr95ypTB2qCgnCI7y8zbyByKsAcr0vt4ZSHiLiz+pLzmFnpvDu+3n qFiRhvg8+CocY9l8w/0pwkNgIepDk+cwIpou20/Jcq7iBpEISzOx1G8H4eJEKsa2mX KMSstl1C8YqoaJ6aQHX3FHhoNn18lRlOY8bN8rTIx/PykY+aCgOc93kDJYMAkx/aaf q2UqumU1tPj72Y4n7uIariHfq88nVAiIq41HAASdynfIyTLCXHOo7IIvrJygE1DCGj S24AifbqDm2bg== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 11/18] x86/crc32: update prototype for crc_pcl() Date: Sun, 3 Nov 2024 14:31:47 -0800 Message-ID: <20241103223154.136127-12-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers - Change the len parameter from unsigned int to size_t, so that the library function which takes a size_t can safely use this code. - Rename to crc32c_x86_3way() which is much clearer. - Move the crc parameter to the front, as this is the usual convention. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- arch/x86/crypto/crc32c-intel_glue.c | 7 ++- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 63 ++++++++++++----------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-i= ntel_glue.c index 52c5d47ef5a1..603d159de400 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -39,12 +39,11 @@ * size is >=3D 512 to account * for fpu state save/restore overhead. */ #define CRC32C_PCL_BREAKEVEN 512 =20 -asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len, - unsigned int crc_init); +asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); #endif /* CONFIG_X86_64 */ =20 static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, siz= e_t length) { while (length--) { @@ -157,11 +156,11 @@ static int crc32c_pcl_intel_update(struct shash_desc = *desc, const u8 *data, * use faster PCL version if datasize is large enough to * overcome kernel fpu state save/restore overhead */ if (len >=3D CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { kernel_fpu_begin(); - *crcp =3D crc_pcl(data, len, *crcp); + *crcp =3D crc32c_x86_3way(*crcp, data, len); kernel_fpu_end(); } else *crcp =3D crc32c_intel_le_hw(*crcp, data, len); return 0; } @@ -169,11 +168,11 @@ static int crc32c_pcl_intel_update(struct shash_desc = *desc, const u8 *data, static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned in= t len, u8 *out) { if (len >=3D CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { kernel_fpu_begin(); - *(__le32 *)out =3D ~cpu_to_le32(crc_pcl(data, len, *crcp)); + *(__le32 *)out =3D ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len)); kernel_fpu_end(); } else *(__le32 *)out =3D ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); return 0; diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/cr= c32c-pcl-intel-asm_64.S index 752812bc4991..9b8770503bbc 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -50,19 +50,20 @@ =20 # Define threshold below which buffers are considered "small" and routed to # regular CRC code that does not interleave the CRC instructions. #define SMALL_SIZE 200 =20 -# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int cr= c_init); +# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); =20 .text -SYM_FUNC_START(crc_pcl) -#define bufp %rdi -#define bufp_d %edi -#define len %esi -#define crc_init %edx -#define crc_init_q %rdx +SYM_FUNC_START(crc32c_x86_3way) +#define crc0 %edi +#define crc0_q %rdi +#define bufp %rsi +#define bufp_d %esi +#define len %rdx +#define len_dw %edx #define n_misaligned %ecx /* overlaps chunk_bytes! */ #define n_misaligned_q %rcx #define chunk_bytes %ecx /* overlaps n_misaligned! */ #define chunk_bytes_q %rcx #define crc1 %r8 @@ -83,13 +84,13 @@ SYM_FUNC_START(crc_pcl) # Process 1 <=3D n_misaligned <=3D 7 bytes individually in order to align # the remaining data to an 8-byte boundary. .Ldo_align: movq (bufp), %rax add n_misaligned_q, bufp - sub n_misaligned, len + sub n_misaligned_q, len .Lalign_loop: - crc32b %al, crc_init # compute crc32 of 1-byte + crc32b %al, crc0 # compute crc32 of 1-byte shr $8, %rax # get next byte dec n_misaligned jne .Lalign_loop .Laligned: =20 @@ -100,11 +101,11 @@ SYM_FUNC_START(crc_pcl) cmp $128*24, len jae .Lfull_block =20 .Lpartial_block: # Compute floor(len / 24) to get num qwords to process from each lane. - imul $2731, len, %eax # 2731 =3D ceil(2^16 / 24) + imul $2731, len_dw, %eax # 2731 =3D ceil(2^16 / 24) shr $16, %eax jmp .Lcrc_3lanes =20 .Lfull_block: # Processing 128 qwords from each lane. @@ -123,20 +124,20 @@ SYM_FUNC_START(crc_pcl) jl .Lcrc_3lanes_4x_done =20 # Unroll the loop by a factor of 4 to reduce the overhead of the loop # bookkeeping instructions, which can compete with crc32q for the ALUs. .Lcrc_3lanes_4x_loop: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q crc32q (bufp,chunk_bytes_q), crc1 crc32q (bufp,chunk_bytes_q,2), crc2 - crc32q 8(bufp), crc_init_q + crc32q 8(bufp), crc0_q crc32q 8(bufp,chunk_bytes_q), crc1 crc32q 8(bufp,chunk_bytes_q,2), crc2 - crc32q 16(bufp), crc_init_q + crc32q 16(bufp), crc0_q crc32q 16(bufp,chunk_bytes_q), crc1 crc32q 16(bufp,chunk_bytes_q,2), crc2 - crc32q 24(bufp), crc_init_q + crc32q 24(bufp), crc0_q crc32q 24(bufp,chunk_bytes_q), crc1 crc32q 24(bufp,chunk_bytes_q,2), crc2 add $32, bufp sub $4, %eax jge .Lcrc_3lanes_4x_loop @@ -144,42 +145,42 @@ SYM_FUNC_START(crc_pcl) .Lcrc_3lanes_4x_done: add $4, %eax jz .Lcrc_3lanes_last_qword =20 .Lcrc_3lanes_1x_loop: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q crc32q (bufp,chunk_bytes_q), crc1 crc32q (bufp,chunk_bytes_q,2), crc2 add $8, bufp dec %eax jnz .Lcrc_3lanes_1x_loop =20 .Lcrc_3lanes_last_qword: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q crc32q (bufp,chunk_bytes_q), crc1 # SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet =20 ################################################################ ## 4) Combine three results: ################################################################ =20 lea (K_table-8)(%rip), %rax # first entry is for idx 1 pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2 lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3 - sub %eax, len # len -=3D chunk_bytes * 3 + sub %rax, len # len -=3D chunk_bytes * 3 =20 - movq crc_init_q, %xmm1 # CRC for block 1 + movq crc0_q, %xmm1 # CRC for block 1 pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2 =20 movq crc1, %xmm2 # CRC for block 2 pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1 =20 pxor %xmm2,%xmm1 movq %xmm1, %rax xor (bufp,chunk_bytes_q,2), %rax - mov crc2, crc_init_q - crc32 %rax, crc_init_q + mov crc2, crc0_q + crc32 %rax, crc0_q lea 8(bufp,chunk_bytes_q,2), bufp =20 ################################################################ ## 5) If more blocks remain, goto (2): ################################################################ @@ -191,38 +192,38 @@ SYM_FUNC_START(crc_pcl) =20 ####################################################################### ## 6) Process any remainder without interleaving: ####################################################################### .Lsmall: - test len, len + test len_dw, len_dw jz .Ldone - mov len, %eax + mov len_dw, %eax shr $3, %eax jz .Ldo_dword .Ldo_qwords: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q add $8, bufp dec %eax jnz .Ldo_qwords .Ldo_dword: - test $4, len + test $4, len_dw jz .Ldo_word - crc32l (bufp), crc_init + crc32l (bufp), crc0 add $4, bufp .Ldo_word: - test $2, len + test $2, len_dw jz .Ldo_byte - crc32w (bufp), crc_init + crc32w (bufp), crc0 add $2, bufp .Ldo_byte: - test $1, len + test $1, len_dw jz .Ldone - crc32b (bufp), crc_init + crc32b (bufp), crc0 .Ldone: - mov crc_init, %eax + mov crc0, %eax RET -SYM_FUNC_END(crc_pcl) +SYM_FUNC_END(crc32c_x86_3way) =20 .section .rodata, "a", @progbits ################################################################ ## PCLMULQDQ tables ## Table is 128 entries x 2 words (8 bytes) each --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A39451B0F23; Sun, 3 Nov 2024 22:32:30 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673151; cv=none; b=XPgtJpkh+WdU1JRcX/UoYVJg7oka5qvGPIK8trhaMbuJiSNRDfj4hFv1G88SgqUueqCyDy5rYHobZ2nryCPB2CHJx92VlhQeADFqyU7DlKjGw20aIwf4Fv10lfu1qk/peezHShIIYUwBVyknsJF7uHwxZZ98bq6tO9t7DYSrHTQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673151; c=relaxed/simple; bh=kHWHhodawY/PaSA95ElCxVPVWHZVNPifFLwbM7bCJig=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=puS8ZvU4dSAyfKqXbdgX5PVrjwhDHJcFaDyqwLHiigeBpvXh8rtyXOJ9TtdnVORikOr+UkwqkEylHAF5vT1wkGjjjPDpFkiR0RCp9bxBXjZJI1tvQlfxgrlBY81TuaYERUcpKszPnwkLmPFoV5avyKIsiAgqKHfwMLGNEliGR0M= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=gDJYU9Vd; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="gDJYU9Vd" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5F1B1C4CED5; Sun, 3 Nov 2024 22:32:30 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673150; bh=kHWHhodawY/PaSA95ElCxVPVWHZVNPifFLwbM7bCJig=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=gDJYU9VdynkbrvLTjDKcTxSufZxtp+Z/zMYrGwL5d2/F5cTIiZu1Wiw+ZESK7zmQS +FFVnjuv4vyOUFtNqcdFrGqLx5zsxKvE3A/xalMHiUtHdDZkIjSfdPYQrmB+6ySqN+ ddALTCJazXYVXhJOLaKgL3dfJtSCjV81z+BsXvYwPjGClNzLQ4cQVDO2B3vdW7vngf Ygplb6uFLVhngaCOPYPIZPgRzKkBx/DiAHo4iNxyCSe/0PUKsaXTUY0emAEWvjikbw EnEr47Hb2cCBNuPm9gE5gZTGCE0N5bdYov6knFp5JgN+MZa5XIQpyBQ3L84i8WidC1 QmT6y3lT/YF/w== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 12/18] x86/crc32: update prototype for crc32_pclmul_le_16() Date: Sun, 3 Nov 2024 14:31:48 -0800 Message-ID: <20241103223154.136127-13-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers - Change the len parameter from unsigned int to size_t, so that the library function which takes a size_t can safely use this code. - Move the crc parameter to the front, as this is the usual convention. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- arch/x86/crypto/crc32-pclmul_asm.S | 19 +++++++++---------- arch/x86/crypto/crc32-pclmul_glue.c | 4 ++-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pcl= mul_asm.S index 5d31137e2c7d..f9637789cac1 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -56,30 +56,29 @@ .octa 0x00000001F701164100000001DB710641 =20 #define CONSTANT %xmm0 =20 #ifdef __x86_64__ -#define BUF %rdi -#define LEN %rsi -#define CRC %edx +#define CRC %edi +#define BUF %rsi +#define LEN %rdx #else -#define BUF %eax -#define LEN %edx -#define CRC %ecx +#define CRC %eax +#define BUF %edx +#define LEN %ecx #endif =20 =20 =20 .text /** * Calculate crc32 - * BUF - buffer (16 bytes aligned) - * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than = 63 * CRC - initial crc32 + * BUF - buffer (16 bytes aligned) + * LEN - sizeof buffer (16 bytes aligned), LEN should be greater than= 63 * return %eax crc32 - * uint crc32_pclmul_le_16(unsigned char const *buffer, - * size_t len, uint crc32) + * u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); */ =20 SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes = aligned */ movdqa (BUF), %xmm1 movdqa 0x10(BUF), %xmm2 diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pc= lmul_glue.c index 9f5e342b9845..9d14eac51c5b 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -44,11 +44,11 @@ #define PCLMUL_MIN_LEN 64L /* minimum size of buffer * for crc32_pclmul_le_16 */ #define SCALE_F 16L /* size of xmm register */ #define SCALE_F_MASK (SCALE_F - 1) =20 -u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32); +u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); =20 static u32 __attribute__((pure)) crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) { unsigned int iquotient; @@ -69,11 +69,11 @@ static u32 __attribute__((pure)) } iquotient =3D len & (~SCALE_F_MASK); iremainder =3D len & SCALE_F_MASK; =20 kernel_fpu_begin(); - crc =3D crc32_pclmul_le_16(p, iquotient, crc); + crc =3D crc32_pclmul_le_16(crc, p, iquotient); kernel_fpu_end(); =20 if (iremainder) crc =3D crc32_le(crc, p + iquotient, iremainder); =20 --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A39C61B0F24; Sun, 3 Nov 2024 22:32:31 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673151; cv=none; b=Z0Yq1ctv/26JEt2ax1MsgS4RGb1SN7siva9TBOcrwrqZ/JI81x0/QGoXkCBRpBYsQAV0RX1wTi1mRTL/WXtdrKe9d8UY3FhaH5dpXEGy2TG1tSgtMmHgbR3noMExsYwzdObqAUF8+OBljFrJTnMK6jdlE4ARPIX5r9wbjiieGt4= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673151; c=relaxed/simple; bh=/2QZQ4RDOTqnvOjNlma0paDfRM/4mOTJMMlubi2+61I=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=YEADLNlOJuYa6rvMCA0WCcd2j6O77PN9clf46fZGbpMEl634XQ+P0Ey7vO5lXTSJ3NYB0iT4SJz+GxdYJ5FuyFTlHPSAvs52j1E6SBUpNmFgcrEOD+Kx+ie0CqXMqI1HdR9OiQo3k4H1axUUqr7BZIhO63tprWApg6KOLJv0/ko= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=LAskJnJ6; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="LAskJnJ6" Received: by smtp.kernel.org (Postfix) with ESMTPSA id E605FC4CECD; Sun, 3 Nov 2024 22:32:30 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673151; bh=/2QZQ4RDOTqnvOjNlma0paDfRM/4mOTJMMlubi2+61I=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=LAskJnJ6uL6t1DRL7QZFCS2qxVoJdc7T3HUP33SyijWg2yOGc5ImldpzsY2yJ0exP z8rp3bsv7eBoNb2PmhBtpmnxXR9Nsfp/ESuFkHW1EMu5Cz6DxdJzIp3p6ryNOH2KwZ 8tEBsL1KtLgMo7Tn2K4ZlFxoPGJpxNTGchmtDpkoPublhHT5Xp5WHDqcmjqHn9SFus WJkemLM27w6scYBmQMScUlvU17OMSnysFXnOkbJJhuoo2W0HPa9z3TpdRncPI3INpE 4UGtDVXbi/EDLoc8ulWrbvnnqwBhaOXB/Hjbp52Iq2nEFmCIpvBw6/wbFOQtr7QLDJ BRj93+6CNlPBw== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 13/18] x86/crc32: expose CRC32 functions through lib Date: Sun, 3 Nov 2024 14:31:49 -0800 Message-ID: <20241103223154.136127-14-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the x86 CRC32 assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- arch/x86/Kconfig | 1 + arch/x86/crypto/Kconfig | 22 -- arch/x86/crypto/Makefile | 7 - arch/x86/crypto/crc32-pclmul_glue.c | 202 -------------- arch/x86/crypto/crc32c-intel_glue.c | 249 ------------------ arch/x86/lib/Makefile | 4 + arch/x86/lib/crc32-glue.c | 124 +++++++++ .../crc32-pclmul_asm.S =3D> lib/crc32-pclmul.S} | 0 .../crc32c-3way.S} | 0 drivers/target/iscsi/Kconfig | 1 - 10 files changed, 129 insertions(+), 481 deletions(-) delete mode 100644 arch/x86/crypto/crc32-pclmul_glue.c delete mode 100644 arch/x86/crypto/crc32c-intel_glue.c create mode 100644 arch/x86/lib/crc32-glue.c rename arch/x86/{crypto/crc32-pclmul_asm.S =3D> lib/crc32-pclmul.S} (100%) rename arch/x86/{crypto/crc32c-pcl-intel-asm_64.S =3D> lib/crc32c-3way.S} = (100%) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 16354dfa6d96..e7470de11cec 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -74,10 +74,11 @@ config X86 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CPU_PASID if IOMMU_SVA + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index 7b1bebed879d..1ca53e847966 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -490,32 +490,10 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL GCM GHASH hash function (NIST SP800-38D) =20 Architecture: x86_64 using: - CLMUL-NI (carry-less multiplication new instructions) =20 -config CRYPTO_CRC32C_INTEL - tristate "CRC32c (SSE4.2/PCLMULQDQ)" - depends on X86 - select CRYPTO_HASH - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - - Architecture: x86 (32-bit and 64-bit) using: - - SSE4.2 (Streaming SIMD Extensions 4.2) CRC32 instruction - - PCLMULQDQ (carry-less multiplication) - -config CRYPTO_CRC32_PCLMUL - tristate "CRC32 (PCLMULQDQ)" - depends on X86 - select CRYPTO_HASH - select CRC32 - help - CRC32 CRC algorithm (IEEE 802.3) - - Architecture: x86 (32-bit and 64-bit) using: - - PCLMULQDQ (carry-less multiplication) - config CRYPTO_CRCT10DIF_PCLMUL tristate "CRCT10DIF (PCLMULQDQ)" depends on X86 && 64BIT && CRC_T10DIF select CRYPTO_HASH help diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 53b4a277809e..030b925ca4e2 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -73,17 +73,10 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) +=3D ghash-cl= mulni-intel.o ghash-clmulni-intel-y :=3D ghash-clmulni-intel_asm.o ghash-clmulni-intel_g= lue.o =20 obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) +=3D polyval-clmulni.o polyval-clmulni-y :=3D polyval-clmulni_asm.o polyval-clmulni_glue.o =20 -obj-$(CONFIG_CRYPTO_CRC32C_INTEL) +=3D crc32c-intel.o -crc32c-intel-y :=3D crc32c-intel_glue.o -crc32c-intel-$(CONFIG_64BIT) +=3D crc32c-pcl-intel-asm_64.o - -obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) +=3D crc32-pclmul.o -crc32-pclmul-y :=3D crc32-pclmul_asm.o crc32-pclmul_glue.o - obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) +=3D crct10dif-pclmul.o crct10dif-pclmul-y :=3D crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o =20 obj-$(CONFIG_CRYPTO_POLY1305_X86_64) +=3D poly1305-x86_64.o poly1305-x86_64-y :=3D poly1305-x86_64-cryptogams.o poly1305_glue.o diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pc= lmul_glue.c deleted file mode 100644 index 9d14eac51c5b..000000000000 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ /dev/null @@ -1,202 +0,0 @@ -/* GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licen= ses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/* - * Copyright 2012 Xyratex Technology Limited - * - * Wrappers for kernel crypto shash api to pclmulqdq crc32 implementation. - */ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -#define PCLMUL_MIN_LEN 64L /* minimum size of buffer - * for crc32_pclmul_le_16 */ -#define SCALE_F 16L /* size of xmm register */ -#define SCALE_F_MASK (SCALE_F - 1) - -u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); - -static u32 __attribute__((pure)) - crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) -{ - unsigned int iquotient; - unsigned int iremainder; - unsigned int prealign; - - if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable()) - return crc32_le(crc, p, len); - - if ((long)p & SCALE_F_MASK) { - /* align p to 16 byte */ - prealign =3D SCALE_F - ((long)p & SCALE_F_MASK); - - crc =3D crc32_le(crc, p, prealign); - len -=3D prealign; - p =3D (unsigned char *)(((unsigned long)p + SCALE_F_MASK) & - ~SCALE_F_MASK); - } - iquotient =3D len & (~SCALE_F_MASK); - iremainder =3D len & SCALE_F_MASK; - - kernel_fpu_begin(); - crc =3D crc32_pclmul_le_16(crc, p, iquotient); - kernel_fpu_end(); - - if (iremainder) - crc =3D crc32_le(crc, p + iquotient, iremainder); - - return crc; -} - -static int crc32_pclmul_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D 0; - - return 0; -} - -static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx =3D crypto_shash_ctx(hash); - - if (keylen !=3D sizeof(u32)) - return -EINVAL; - *mctx =3D le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32_pclmul_init(struct shash_desc *desc) -{ - u32 *mctx =3D crypto_shash_ctx(desc->tfm); - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D *mctx; - - return 0; -} - -static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D crc32_pclmul_le(*crcp, data, len); - return 0; -} - -/* No final XOR 0xFFFFFFFF, like crc32_le */ -static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int le= n, - u8 *out) -{ - *(__le32 *)out =3D cpu_to_le32(crc32_pclmul_le(*crcp, data, len)); - return 0; -} - -static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32_pclmul_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *(__le32 *)out =3D cpu_to_le32p(crcp); - return 0; -} - -static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static struct shash_alg alg =3D { - .setkey =3D crc32_pclmul_setkey, - .init =3D crc32_pclmul_init, - .update =3D crc32_pclmul_update, - .final =3D crc32_pclmul_final, - .finup =3D crc32_pclmul_finup, - .digest =3D crc32_pclmul_digest, - .descsize =3D sizeof(u32), - .digestsize =3D CHKSUM_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32", - .cra_driver_name =3D "crc32-pclmul", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(u32), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32_pclmul_cra_init, - } -}; - -static const struct x86_cpu_id crc32pclmul_cpu_id[] =3D { - X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id); - - -static int __init crc32_pclmul_mod_init(void) -{ - - if (!x86_match_cpu(crc32pclmul_cpu_id)) { - pr_info("PCLMULQDQ-NI instructions are not detected.\n"); - return -ENODEV; - } - return crypto_register_shash(&alg); -} - -static void __exit crc32_pclmul_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crc32_pclmul_mod_init); -module_exit(crc32_pclmul_mod_fini); - -MODULE_AUTHOR("Alexander Boyko "); -MODULE_DESCRIPTION("CRC32 algorithm (IEEE 802.3) accelerated with PCLMULQD= Q"); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32-pclmul"); diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-i= ntel_glue.c deleted file mode 100644 index 603d159de400..000000000000 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ /dev/null @@ -1,249 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Using hardware provided CRC32 instruction to accelerate the CRC32 dispo= sal. - * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) - * CRC32 is a new instruction in Intel SSE4.2, the reference can be found = at: - * http://www.intel.com/products/processor/manuals/ - * Intel(R) 64 and IA-32 Architectures Software Developer's Manual - * Volume 2A: Instruction Set Reference, A-M - * - * Copyright (C) 2008 Intel Corporation - * Authors: Austin Zhang - * Kent Liu - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -#define SCALE_F sizeof(unsigned long) - -#ifdef CONFIG_X86_64 -#define CRC32_INST "crc32q %1, %q0" -#else -#define CRC32_INST "crc32l %1, %0" -#endif - -#ifdef CONFIG_X86_64 -/* - * use carryless multiply version of crc32c when buffer - * size is >=3D 512 to account - * for fpu state save/restore overhead. - */ -#define CRC32C_PCL_BREAKEVEN 512 - -asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); -#endif /* CONFIG_X86_64 */ - -static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, siz= e_t length) -{ - while (length--) { - asm("crc32b %1, %0" - : "+r" (crc) : "rm" (*data)); - data++; - } - - return crc; -} - -static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size= _t len) -{ - unsigned int iquotient =3D len / SCALE_F; - unsigned int iremainder =3D len % SCALE_F; - unsigned long *ptmp =3D (unsigned long *)p; - - while (iquotient--) { - asm(CRC32_INST - : "+r" (crc) : "rm" (*ptmp)); - ptmp++; - } - - if (iremainder) - crc =3D crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, - iremainder); - - return crc; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx =3D crypto_shash_ctx(hash); - - if (keylen !=3D sizeof(u32)) - return -EINVAL; - *mctx =3D le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32c_intel_init(struct shash_desc *desc) -{ - u32 *mctx =3D crypto_shash_ctx(desc->tfm); - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D *mctx; - - return 0; -} - -static int crc32c_intel_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D crc32c_intel_le_hw(*crcp, data, len); - return 0; -} - -static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int le= n, - u8 *out) -{ - *(__le32 *)out =3D ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); - return 0; -} - -static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_intel_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *(__le32 *)out =3D ~cpu_to_le32p(crcp); - return 0; -} - -static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static int crc32c_intel_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D ~0; - - return 0; -} - -#ifdef CONFIG_X86_64 -static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - /* - * use faster PCL version if datasize is large enough to - * overcome kernel fpu state save/restore overhead - */ - if (len >=3D CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { - kernel_fpu_begin(); - *crcp =3D crc32c_x86_3way(*crcp, data, len); - kernel_fpu_end(); - } else - *crcp =3D crc32c_intel_le_hw(*crcp, data, len); - return 0; -} - -static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned in= t len, - u8 *out) -{ - if (len >=3D CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { - kernel_fpu_begin(); - *(__le32 *)out =3D ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len)); - kernel_fpu_end(); - } else - *(__le32 *)out =3D - ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); - return 0; -} - -static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} -#endif /* CONFIG_X86_64 */ - -static struct shash_alg alg =3D { - .setkey =3D crc32c_intel_setkey, - .init =3D crc32c_intel_init, - .update =3D crc32c_intel_update, - .final =3D crc32c_intel_final, - .finup =3D crc32c_intel_finup, - .digest =3D crc32c_intel_digest, - .descsize =3D sizeof(u32), - .digestsize =3D CHKSUM_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-intel", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(u32), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32c_intel_cra_init, - } -}; - -static const struct x86_cpu_id crc32c_cpu_id[] =3D { - X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id); - -static int __init crc32c_intel_mod_init(void) -{ - if (!x86_match_cpu(crc32c_cpu_id)) - return -ENODEV; -#ifdef CONFIG_X86_64 - if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { - alg.update =3D crc32c_pcl_intel_update; - alg.finup =3D crc32c_pcl_intel_finup; - alg.digest =3D crc32c_pcl_intel_digest; - } -#endif - return crypto_register_shash(&alg); -} - -static void __exit crc32c_intel_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crc32c_intel_mod_init); -module_exit(crc32c_intel_mod_fini); - -MODULE_AUTHOR("Austin Zhang , Kent Liu "); -MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.= "); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-intel"); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 98583a9dbab3..17510da06c9f 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -36,10 +36,14 @@ lib-$(CONFIG_ARCH_HAS_COPY_MC) +=3D copy_mc.o copy_mc_6= 4.o lib-$(CONFIG_INSTRUCTION_DECODER) +=3D insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) +=3D kaslr.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o lib-$(CONFIG_MITIGATION_RETPOLINE) +=3D retpoline.o =20 +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-x86.o +crc32-x86-y :=3D crc32-glue.o crc32-pclmul.o +crc32-x86-$(CONFIG_64BIT) +=3D crc32c-3way.o + obj-y +=3D msr.o msr-reg.o msr-reg-export.o hweight.o obj-y +=3D iomem.o =20 ifeq ($(CONFIG_X86_32),y) obj-y +=3D atomic64_32.o diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c new file mode 100644 index 000000000000..9fcc65db6cb5 --- /dev/null +++ b/arch/x86/lib/crc32-glue.c @@ -0,0 +1,124 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * x86-optimized CRC32 functions + * + * Copyright (C) 2008 Intel Corporation + * Copyright 2012 Xyratex Technology Limited + * Copyright 2024 Google LLC + */ + +#include +#include +#include +#include +#include +#include + +/* minimum size of buffer for crc32_pclmul_le_16 */ +#define CRC32_PCLMUL_MIN_LEN 64 + +static DEFINE_STATIC_KEY_FALSE(have_crc32); +static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); + +u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (len >=3D CRC32_PCLMUL_MIN_LEN + 15 && + crypto_simd_usable() && static_branch_likely(&have_pclmulqdq)) { + size_t n =3D -(uintptr_t)p & 15; + + /* align p to 16-byte boundary */ + if (n) { + crc =3D crc32_le_base(crc, p, n); + p +=3D n; + len -=3D n; + } + n =3D round_down(len, 16); + kernel_fpu_begin(); + crc =3D crc32_pclmul_le_16(crc, p, n); + kernel_fpu_end(); + p +=3D n; + len -=3D n; + } + if (len) + crc =3D crc32_le_base(crc, p, len); + return crc; +} +EXPORT_SYMBOL(crc32_le_arch); + +#ifdef CONFIG_X86_64 +#define CRC32_INST "crc32q %1, %q0" +#else +#define CRC32_INST "crc32l %1, %0" +#endif + +/* + * Use carryless multiply version of crc32c when buffer size is >=3D 512 to + * account for FPU state save/restore overhead. + */ +#define CRC32C_PCLMUL_BREAKEVEN 512 + +asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + size_t num_longs; + + if (!static_branch_likely(&have_crc32)) + return crc32c_le_base(crc, p, len); + + if (IS_ENABLED(CONFIG_X86_64) && len >=3D CRC32C_PCLMUL_BREAKEVEN && + crypto_simd_usable() && static_branch_likely(&have_pclmulqdq)) { + kernel_fpu_begin(); + crc =3D crc32c_x86_3way(crc, p, len); + kernel_fpu_end(); + return crc; + } + + for (num_longs =3D len / sizeof(unsigned long); + num_longs !=3D 0; num_longs--, p +=3D sizeof(unsigned long)) + asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p)); + + for (len %=3D sizeof(unsigned long); len; len--, p++) + asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p)); + + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_x86_init(void) +{ + if (boot_cpu_has(X86_FEATURE_XMM4_2)) + static_branch_enable(&have_crc32); + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) + static_branch_enable(&have_pclmulqdq); + return 0; +} +arch_initcall(crc32_x86_init); + +static void __exit crc32_x86_exit(void) +{ +} +module_exit(crc32_x86_exit); + +u32 crc32_optimizations(void) +{ + u32 optimizations =3D 0; + + if (static_key_enabled(&have_crc32)) + optimizations |=3D CRC32C_OPTIMIZATION; + if (static_key_enabled(&have_pclmulqdq)) + optimizations |=3D CRC32_LE_OPTIMIZATION; + return optimizations; +} +EXPORT_SYMBOL(crc32_optimizations); + +MODULE_DESCRIPTION("x86-optimized CRC32 functions"); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/lib/crc32-pclmul= .S similarity index 100% rename from arch/x86/crypto/crc32-pclmul_asm.S rename to arch/x86/lib/crc32-pclmul.S diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/lib/crc32= c-3way.S similarity index 100% rename from arch/x86/crypto/crc32c-pcl-intel-asm_64.S rename to arch/x86/lib/crc32c-3way.S diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig index 922b207bc69d..1c0517a12571 100644 --- a/drivers/target/iscsi/Kconfig +++ b/drivers/target/iscsi/Kconfig @@ -2,11 +2,10 @@ config ISCSI_TARGET tristate "SCSI Target Mode Stack" depends on INET select CRYPTO select CRYPTO_CRC32C - select CRYPTO_CRC32C_INTEL if X86 help Say M to enable the SCSI target mode stack. A SCSI target mode stack is software that makes local storage available over a storage network to a SCSI initiator system. The supported storage network technologies include iSCSI, Fibre Channel and the SCSI RDMA Protocol (SRP). --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 5EB201B21A9; Sun, 3 Nov 2024 22:32:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673152; cv=none; b=OJ6GcOr0drASdYZU7knczfm8yVaHas2bEGP+vHl+1E9d0dIDd4xOX2LJ4sz8GwMkPdd8HmV5xVrlaHztfPfr4a1XmbOgilNZ+1jRY/yj7l8xf1e9J4eLt1p6daKcxjFj0Njv1QKScmrzJByKWt7cmcCID0FTwkeSxJaGxgSHpFw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673152; c=relaxed/simple; bh=hmjPSLT23tUvD2mhqpKoKr7fMC5StQtNq+9niGNg1xs=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=BERW5YMhfqRxeGPyzoJMJ3X7e9g2HOB8PnA/IzP5neeIZAXPlpr2Aailj/F79KRjrMIqo9T+aES9Vg/TtnGAne84PJlos5wLbfTNOc3DXZdWuvpNCToVa6l5VGIMWh0MiiVeAlbe/3tL/udQhjb42JQ//6FndNmL8/Q9EnzdihM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Tpo5PFXj; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Tpo5PFXj" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 788B7C4CED6; Sun, 3 Nov 2024 22:32:31 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673151; bh=hmjPSLT23tUvD2mhqpKoKr7fMC5StQtNq+9niGNg1xs=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=Tpo5PFXj/m4NE6etEzQOOpPtYejpQ6IyEVf/EeYCb8tkKb0CbNlzB9cqqtXVtKsb4 ZpQGhkvVcl/tUEerW2a8LKa2qXGHFXzplvZXn6uMuBrTjz9ciJlXFNVtrcU/h9frd0 86FQ8VL7p1lQ3XyzZiPYABO0V8f1lYjXdBVP9qRed5zCEAT40rOPTRuj5I7ykuqKUo of8Q3GNyp0aJT4rBPpEy6mhQgfIQpHMeXXQQcIktnre8ZBm3I0BiW3Y3dCPReAUzrl gp02hfbdFL8Jobk4i3Tfl80tx19I0JOVpan1dsfVTjxNCPJFs7wOT50Jd4u1rxnmio IwxnJCXPSY/4g== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 14/18] lib/crc32: make crc32c() go directly to lib Date: Sun, 3 Nov 2024 14:31:50 -0800 Message-ID: <20241103223154.136127-15-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Now that the lower level __crc32c_le() library function is optimized for each architecture, make crc32c() just call that instead of taking an inefficient and error-prone detour through the shash API. Note: a future cleanup should make crc32c_le() be the actual library function instead of __crc32c_le(). That will require updating callers of __crc32c_le() to use crc32c_le() instead, and updating callers of crc32c_le() that expect a 'const void *' arg to expect 'const u8 *' instead. Similarly, a future cleanup should remove LIBCRC32C by making everyone who is selecting it just select CRC32 directly instead. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- include/linux/crc32c.h | 7 ++-- lib/Kconfig | 10 ++---- lib/Makefile | 1 - lib/libcrc32c.c | 74 ------------------------------------------ 4 files changed, 8 insertions(+), 84 deletions(-) delete mode 100644 lib/libcrc32c.c diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h index 357ae4611a45..47eb78003c26 100644 --- a/include/linux/crc32c.h +++ b/include/linux/crc32c.h @@ -1,12 +1,15 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_CRC32C_H #define _LINUX_CRC32C_H =20 -#include +#include =20 -extern u32 crc32c(u32 crc, const void *address, unsigned int length); +static inline u32 crc32c(u32 crc, const void *address, unsigned int length) +{ + return __crc32c_le(crc, address, length); +} =20 /* This macro exists for backwards-compatibility. */ #define crc32c_le crc32c =20 #endif /* _LINUX_CRC32C_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 07afcf214f35..b894ee64ff95 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -296,18 +296,14 @@ config CRC7 the kernel tree does. Such modules that use library CRC7 functions require M here. =20 config LIBCRC32C tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check" - select CRYPTO - select CRYPTO_CRC32C + select CRC32 help - This option is provided for the case where no in-kernel-tree - modules require CRC32c functions, but a module built outside the - kernel tree does. Such modules that use library CRC32c functions - require M here. See Castagnoli93. - Module will be libcrc32c. + This option just selects CRC32 and is provided for compatibility + purposes until the users are updated to select CRC32 directly. =20 config CRC8 tristate "CRC8 function" help This option provides CRC8 function. Drivers may select this diff --git a/lib/Makefile b/lib/Makefile index 773adf88af41..15646679aee2 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -161,11 +161,10 @@ obj-$(CONFIG_CRC_ITU_T) +=3D crc-itu-t.o obj-$(CONFIG_CRC32) +=3D crc32.o obj-$(CONFIG_CRC64) +=3D crc64.o obj-$(CONFIG_CRC32_SELFTEST) +=3D crc32test.o obj-$(CONFIG_CRC4) +=3D crc4.o obj-$(CONFIG_CRC7) +=3D crc7.o -obj-$(CONFIG_LIBCRC32C) +=3D libcrc32c.o obj-$(CONFIG_CRC8) +=3D crc8.o obj-$(CONFIG_CRC64_ROCKSOFT) +=3D crc64-rocksoft.o obj-$(CONFIG_XXHASH) +=3D xxhash.o obj-$(CONFIG_GENERIC_ALLOCATOR) +=3D genalloc.o =20 diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c deleted file mode 100644 index 649e687413a0..000000000000 --- a/lib/libcrc32c.c +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/*=20 - * CRC32C - *@Article{castagnoli-crc, - * author =3D { Guy Castagnoli and Stefan Braeuer and Martin Herrman= }, - * title =3D {{Optimization of Cyclic Redundancy-Check Codes with 24 - * and 32 Parity Bits}}, - * journal =3D IEEE Transactions on Communication, - * year =3D {1993}, - * volume =3D {41}, - * number =3D {6}, - * pages =3D {}, - * month =3D {June}, - *} - * Used by the iSCSI driver, possibly others, and derived from - * the iscsi-crc.c module of the linux-iscsi driver at - * http://linux-iscsi.sourceforge.net. - * - * Following the example of lib/crc32, this function is intended to be - * flexible and useful for all users. Modules that currently have their - * own crc32c, but hopefully may be able to use this one are: - * net/sctp (please add all your doco to here if you change to - * use this one!) - * - * - * Copyright (c) 2004 Cisco Systems, Inc. - */ - -#include -#include -#include -#include -#include -#include - -static struct crypto_shash *tfm; - -u32 crc32c(u32 crc, const void *address, unsigned int length) -{ - SHASH_DESC_ON_STACK(shash, tfm); - u32 ret, *ctx =3D (u32 *)shash_desc_ctx(shash); - int err; - - shash->tfm =3D tfm; - *ctx =3D crc; - - err =3D crypto_shash_update(shash, address, length); - BUG_ON(err); - - ret =3D *ctx; - barrier_data(ctx); - return ret; -} - -EXPORT_SYMBOL(crc32c); - -static int __init libcrc32c_mod_init(void) -{ - tfm =3D crypto_alloc_shash("crc32c", 0, 0); - return PTR_ERR_OR_ZERO(tfm); -} - -static void __exit libcrc32c_mod_fini(void) -{ - crypto_free_shash(tfm); -} - -module_init(libcrc32c_mod_init); -module_exit(libcrc32c_mod_fini); - -MODULE_AUTHOR("Clay Haapala "); -MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations"); -MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32c"); --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 167771B3934; Sun, 3 Nov 2024 22:32:32 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673153; cv=none; b=Eoj0aZWs5+STa4yXHw1PHpAjr4pc8YdJJyO2XzBj3IT1359Ykbp8mYuaeCz5OsN1AumhGFm5aWTbFFLymQQY3OXvVP4A1lUmdTDb7+l0tfSUO7AuP9e6ZbYbJ3DKNVuYWcWPj5ieBmae43Atb3aXv4vC0d7J/YFQh5wrrAHR2LM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673153; c=relaxed/simple; bh=ltl2dzTFUaa7H2aG4AJgeKEj/9DuYykUjD6SD30TiWA=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=U/rgPxN5zpcYubMDFtYoGG12+m6i/lrdqlTJYkQqcvuLyBQGruTSlZ9Pkz3pUl4KKJDapaDibo/OBpmmN8alvVyCl8B4EjIitVZ+9JnK7XzF8Nn3i+RSgy8LjSnaMSlKqmqo5LwjK9250FlKfX4eZgLegdOJgE1+bqLUlX5PXtU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=rD284UMd; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="rD284UMd" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0AD84C4CED4; Sun, 3 Nov 2024 22:32:32 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673152; bh=ltl2dzTFUaa7H2aG4AJgeKEj/9DuYykUjD6SD30TiWA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=rD284UMdniBNvOArttrl83MnmXpyNh/nlcC9K5Mm8gjSj26HUT3Ffziql0jIddtV1 xYPXQVBEULCOjAKWHI6hxZEby09LiQAqmJQOZ+2SeubZyJt2ePAgcqq22PnnkBfdLl iGO7KlATTICQP9q0iuNiW/jY9amA+V+pVQXLwF5M9Px9C/dR3p1nksEL0y3UncKxCw MWnsyaP5fCmbPQQeasYezXe0FndmKpf/9Xjv4kNUqGvlEda1E0lLqwdglMDaXbGiMz MBBh+epoSvd6NFLssQi/HYX1EI7ZgicEprfte/z93BzVdbXPatTcmXXYI1TbltQc5u 2Gb3LHgTHi+YA== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 15/18] ext4: switch to using the crc32c library Date: Sun, 3 Nov 2024 14:31:51 -0800 Message-ID: <20241103223154.136127-16-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Now that the crc32c() library function directly takes advantage of architecture-specific optimizations, it is unnecessary to go through the crypto API. Just use crc32c(). This is much simpler, and it improves performance due to eliminating the crypto API overhead. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- fs/ext4/Kconfig | 3 +-- fs/ext4/ext4.h | 25 +++---------------------- fs/ext4/super.c | 15 --------------- 3 files changed, 4 insertions(+), 39 deletions(-) diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index e20d59221fc0..c9ca41d91a6c 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -29,12 +29,11 @@ config EXT3_FS_SECURITY config EXT4_FS tristate "The Extended 4 (ext4) filesystem" select BUFFER_HEAD select JBD2 select CRC16 - select CRYPTO - select CRYPTO_CRC32C + select CRC32 select FS_IOMAP select FS_ENCRYPTION_ALGS if FS_ENCRYPTION help This is the next generation of the ext3 filesystem. =20 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 44b0d418143c..99aa512a7de1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -31,11 +31,11 @@ #include #include #include #include #include -#include +#include #include #include #include #ifdef __KERNEL__ #include @@ -1660,13 +1660,10 @@ struct ext4_sb_info { struct task_struct *s_mmp_tsk; =20 /* record the last minlen when FITRIM is called. */ unsigned long s_last_trim_minblks; =20 - /* Reference to checksum algorithm driver via cryptoapi */ - struct crypto_shash *s_chksum_driver; - /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_csum_seed; =20 /* Reclaim extents from extent status tree */ struct shrinker *s_es_shrinker; @@ -2465,23 +2462,11 @@ static inline __le16 ext4_rec_len_to_disk(unsigned = len, unsigned blocksize) #define DX_HASH_LAST DX_HASH_SIPHASH =20 static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, const void *address, unsigned int length) { - struct { - struct shash_desc shash; - char ctx[4]; - } desc; - - BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=3Dsizeof(desc.ctx)); - - desc.shash.tfm =3D sbi->s_chksum_driver; - *(u32 *)desc.ctx =3D crc; - - BUG_ON(crypto_shash_update(&desc.shash, address, length)); - - return *(u32 *)desc.ctx; + return crc32c(crc, address, length); } =20 #ifdef __KERNEL__ =20 /* hash info structure used by the directory hash */ @@ -3278,15 +3263,11 @@ extern void ext4_group_desc_csum_set(struct super_b= lock *sb, __u32 group, extern int ext4_register_li_request(struct super_block *sb, ext4_group_t first_not_zeroed); =20 static inline int ext4_has_metadata_csum(struct super_block *sb) { - WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) && - !EXT4_SB(sb)->s_chksum_driver); - - return ext4_has_feature_metadata_csum(sb) && - (EXT4_SB(sb)->s_chksum_driver !=3D NULL); + return ext4_has_feature_metadata_csum(sb); } =20 static inline int ext4_has_group_desc_csum(struct super_block *sb) { return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 16a4ce704460..1a821093cc0d 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1371,12 +1371,10 @@ static void ext4_put_super(struct super_block *sb) * Now that we are completely done shutting down the * superblock, we need to actually destroy the kobject. */ kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev, NULL); fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); @@ -4586,19 +4584,10 @@ static int ext4_init_metadata_csum(struct super_blo= ck *sb, struct ext4_super_blo return -EINVAL; } ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE, ext4_orphan_file_block_trigger); =20 - /* Load the checksum driver */ - sbi->s_chksum_driver =3D crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - int ret =3D PTR_ERR(sbi->s_chksum_driver); - ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); - sbi->s_chksum_driver =3D NULL; - return ret; - } - /* Check superblock checksum */ if (!ext4_superblock_csum_verify(sb, es)) { ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " "invalid superblock checksum. Run e2fsck?"); return -EFSBADCRC; @@ -5638,13 +5627,10 @@ failed_mount8: __maybe_unused flush_work(&sbi->s_sb_upd_work); ext4_stop_mmpd(sbi); del_timer_sync(&sbi->s_err_report); ext4_group_desc_free(sbi); failed_mount: - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); - #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif =20 #ifdef CONFIG_QUOTA @@ -7433,8 +7419,7 @@ static void __exit ext4_exit_fs(void) } =20 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, = Theodore Ts'o and others"); MODULE_DESCRIPTION("Fourth Extended Filesystem"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32c"); module_init(ext4_init_fs) module_exit(ext4_exit_fs) --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 6B5C31B85C4; Sun, 3 Nov 2024 22:32:33 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673155; cv=none; b=UNuLZcudnuVVD/Sra6dCC4Lnh3wDsM1N0W3/8ckv7tOC+FfFd5gsVLHX4twEZDYUhpJKBKyN9zMr1rkhQZIZBB55OGfO/juUZYrsYrw4qTF3WoqMKUJcsRZup+gMA+a9DPWhkOeH9k16v8KwDIQfm6aot9MRKNND4DZfXzUBNhk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673155; c=relaxed/simple; bh=47ZGS4s0ykwzlSaIC0c2LdDMEVdoUJMfULJGERv2xGc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=UMft4xeraaQNc4OhQI5/AGLBj71V2vBt7xpS0ar6DOd2UUlRkFy0RQ9sHVpSzmuMhb4ty4e43P2WAWcnzrNDRd4884yGrphWtPlhdFspp7W1K6Ol2qTLWB9vWWzAqVDSAw8Q7uZWXLZgJxo77XNGAVSQ4SzYh9D1OGjz9Mn8sdQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=q1ykIOa7; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="q1ykIOa7" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 90D6FC4CED5; Sun, 3 Nov 2024 22:32:32 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673153; bh=47ZGS4s0ykwzlSaIC0c2LdDMEVdoUJMfULJGERv2xGc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=q1ykIOa7FhBo5TG7EEGuwlAX6JD+hfMRhamK4oMNyVrZDpMPqHTEGAsVbnERHXS8T +I2XrUziRIKheWu2BlQzXwfZ1l1bjNXguqDK6VXaR/wct879oJyJjpLfr0pqdPRjDc pgJpJHsZKjL5na6IEdo8DdvwhyfhSCkVyu7UUX9dnortqHNcN8xxIOrpJweL/Ycdzc TNs6Kild/tZIS7PV10QKFjAyOPbFF38dNBQncvaP5igKWCs7GauQI5N8BGs/n6dj5r PX3uI/nZ89HFy8Q/OKlj/f+oQiZJDSlGKwqNEM0h+hztwIegm5uD3SmzhUbQinAYA0 pW57GbYyXw9gw== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 16/18] jbd2: switch to using the crc32c library Date: Sun, 3 Nov 2024 14:31:52 -0800 Message-ID: <20241103223154.136127-17-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Now that the crc32c() library function directly takes advantage of architecture-specific optimizations, it is unnecessary to go through the crypto API. Just use crc32c(). This is much simpler, and it improves performance due to eliminating the crypto API overhead. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- fs/jbd2/Kconfig | 2 -- fs/jbd2/journal.c | 25 ++----------------------- include/linux/jbd2.h | 31 +++---------------------------- 3 files changed, 5 insertions(+), 53 deletions(-) diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig index 4ad2c67f93f1..9c19e1512101 100644 --- a/fs/jbd2/Kconfig +++ b/fs/jbd2/Kconfig @@ -1,11 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only config JBD2 tristate select CRC32 - select CRYPTO - select CRYPTO_CRC32C help This is a generic journaling layer for block devices that support both 32-bit and 64-bit block numbers. It is currently used by the ext4 and OCFS2 filesystems, but it could also be used to add journal support to other file systems or block devices such diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 97f487c3d8fc..56cea5a738a7 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1373,24 +1373,16 @@ static int journal_check_superblock(journal_t *jour= nal) printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " "at the same time!\n"); return err; } =20 - /* Load the checksum driver */ if (jbd2_journal_has_csum_v2or3_feature(journal)) { if (sb->s_checksum_type !=3D JBD2_CRC32C_CHKSUM) { printk(KERN_ERR "JBD2: Unknown checksum type\n"); return err; } =20 - journal->j_chksum_driver =3D crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); - err =3D PTR_ERR(journal->j_chksum_driver); - journal->j_chksum_driver =3D NULL; - return err; - } /* Check superblock checksum */ if (sb->s_checksum !=3D jbd2_superblock_csum(journal, sb)) { printk(KERN_ERR "JBD2: journal checksum error\n"); err =3D -EFSBADCRC; return err; @@ -1611,12 +1603,10 @@ static journal_t *journal_init_common(struct block_= device *bdev, =20 return journal; =20 err_cleanup: percpu_counter_destroy(&journal->j_checkpoint_jh_count); - if (journal->j_chksum_driver) - crypto_free_shash(journal->j_chksum_driver); kfree(journal->j_wbuf); jbd2_journal_destroy_revoke(journal); journal_fail_superblock(journal); kfree(journal); return ERR_PTR(err); @@ -2194,12 +2184,10 @@ int jbd2_journal_destroy(journal_t *journal) if (journal->j_proc_entry) jbd2_stats_proc_exit(journal); iput(journal->j_inode); if (journal->j_revoke) jbd2_journal_destroy_revoke(journal); - if (journal->j_chksum_driver) - crypto_free_shash(journal->j_chksum_driver); kfree(journal->j_fc_wbuf); kfree(journal->j_wbuf); kfree(journal); =20 return err; @@ -2340,23 +2328,14 @@ int jbd2_journal_set_features(journal_t *journal, u= nsigned long compat, pr_err("JBD2: Cannot enable fast commits.\n"); return 0; } } =20 - /* Load the checksum driver if necessary */ - if ((journal->j_chksum_driver =3D=3D NULL) && - INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { - journal->j_chksum_driver =3D crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); - journal->j_chksum_driver =3D NULL; - return 0; - } - /* Precompute checksum seed for all metadata */ + /* Precompute checksum seed for all metadata */ + if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) journal->j_csum_seed =3D jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); - } =20 lock_buffer(journal->j_sb_buffer); =20 /* If enabling v3 checksums, update superblock */ if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 8aef9bb6ad57..33d25a3d15f1 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -26,11 +26,11 @@ #include #include #include #include #include -#include +#include #endif =20 #define journal_oom_retry 1 =20 /* @@ -1239,17 +1239,10 @@ struct journal_s * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here. */ void *j_private; =20 - /** - * @j_chksum_driver: - * - * Reference to checksum algorithm driver via cryptoapi. - */ - struct crypto_shash *j_chksum_driver; - /** * @j_csum_seed: * * Precomputed journal UUID checksum for seeding other checksums. */ @@ -1748,14 +1741,11 @@ static inline bool jbd2_journal_has_csum_v2or3_feat= ure(journal_t *j) return jbd2_has_feature_csum2(j) || jbd2_has_feature_csum3(j); } =20 static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) { - WARN_ON_ONCE(jbd2_journal_has_csum_v2or3_feature(journal) && - journal->j_chksum_driver =3D=3D NULL); - - return journal->j_chksum_driver !=3D NULL; + return jbd2_journal_has_csum_v2or3_feature(journal); } =20 static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb) { int num_fc_blocks =3D be32_to_cpu(jsb->s_num_fc_blks); @@ -1794,26 +1784,11 @@ static inline unsigned long jbd2_log_space_left(jou= rnal_t *journal) #define JBD_MAX_CHECKSUM_SIZE 4 =20 static inline u32 jbd2_chksum(journal_t *journal, u32 crc, const void *address, unsigned int length) { - struct { - struct shash_desc shash; - char ctx[JBD_MAX_CHECKSUM_SIZE]; - } desc; - int err; - - BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) > - JBD_MAX_CHECKSUM_SIZE); - - desc.shash.tfm =3D journal->j_chksum_driver; - *(u32 *)desc.ctx =3D crc; - - err =3D crypto_shash_update(&desc.shash, address, length); - BUG_ON(err); - - return *(u32 *)desc.ctx; + return crc32c(crc, address, length); } =20 /* Return most recent uncommitted transaction */ static inline tid_t jbd2_get_latest_transaction(journal_t *journal) { --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id A7C0E1B395A; Sun, 3 Nov 2024 22:32:33 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673153; cv=none; b=NnI6gE4P3P7SpmTLVakLsmS+GTNqwRCrpqSowUoXvyjUEHbj2SySUlRlEFvYd4R+pmL0x3XwxMRoVz+E/pY5ZpKFNQzxGwYlJcsWq/+xFmjLsUb5VIaIdKzoBIgjkMPwsJjcwP3xC/Uc+wuxkW8uPeawIzZ5RJhmyL5J8GUDPOQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673153; c=relaxed/simple; bh=WfN2SwQ1SfAPzqKDOcxOjX9pk3UC1SwykLtsIrx3B8I=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=IPSuvzU6d30McIUt7YjVJ2w+35YKQRooonpVed6JMk9fF147XNQ608wtoQATnAzjQnK1pyepAfyMYGQNfZFCXhpkj97xKOnoc4oaWTjBK8j6aL9a196sP2XB6sBiBdQ/3xlh6VG8G2YBTImc25BtHjyDSYPcPxf/hrDfEJdIxLM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=ncJC7MH9; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="ncJC7MH9" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 23D20C4CECD; Sun, 3 Nov 2024 22:32:33 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673153; bh=WfN2SwQ1SfAPzqKDOcxOjX9pk3UC1SwykLtsIrx3B8I=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ncJC7MH9f55fhT2iNp4ZGfgqP7KHuE7wemzzV22Z1RB/6rZX0YywjK4cWT6F4kgQu UCaCPlfmreev/pK63M1sZ7RvNNF5SmFtAQrjpr3KR1BHf9QS7SVy/pj4BVmX3P6VqQ FrGrQcXq1YnOmAmEQyHq9y4lzpkQKxUUcq9jtL67bjHifb7rexexd24s56L7e2ZMi7 SSwDcJ6PFYFrIJdIhdSFsMB9XoaDWjJu+WTNUIolMnvVQZRQQQZO9jjVE99Tv11X3a kkYlUNWGwP5shTWE8Us+pJs0v8sX0+Bq3JRf+wpALI+UnuKai7iVjidTYXu362kAZz dA7qo5/0IeuCA== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 17/18] f2fs: switch to using the crc32 library Date: Sun, 3 Nov 2024 14:31:53 -0800 Message-ID: <20241103223154.136127-18-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Now that the crc32() library function takes advantage of architecture-specific optimizations, it is unnecessary to go through the crypto API. Just use crc32(). This is much simpler, and it improves performance due to eliminating the crypto API overhead. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- fs/f2fs/Kconfig | 3 +-- fs/f2fs/f2fs.h | 19 +------------------ fs/f2fs/super.c | 15 --------------- 3 files changed, 2 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 68a1e23e1557..5916a02fb46d 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -2,12 +2,11 @@ config F2FS_FS tristate "F2FS filesystem support" depends on BLOCK select BUFFER_HEAD select NLS - select CRYPTO - select CRYPTO_CRC32 + select CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION select FS_ENCRYPTION_ALGS if FS_ENCRYPTION select FS_IOMAP select LZ4_COMPRESS if F2FS_FS_LZ4 select LZ4_DECOMPRESS if F2FS_FS_LZ4 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 33f5449dc22d..1fc5c2743c8d 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1761,13 +1761,10 @@ struct f2fs_sb_info { =20 /* For write statistics */ u64 sectors_written_start; u64 kbytes_written; =20 - /* Reference to checksum algorithm driver via cryptoapi */ - struct crypto_shash *s_chksum_driver; - /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; =20 struct workqueue_struct *post_read_wq; /* post read workqueue */ =20 @@ -1941,25 +1938,11 @@ static inline unsigned int f2fs_time_to_wait(struct= f2fs_sb_info *sbi, * Inline functions */ static inline u32 __f2fs_crc32(struct f2fs_sb_info *sbi, u32 crc, const void *address, unsigned int length) { - struct { - struct shash_desc shash; - char ctx[4]; - } desc; - int err; - - BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) !=3D sizeof(desc.ctx)); - - desc.shash.tfm =3D sbi->s_chksum_driver; - *(u32 *)desc.ctx =3D crc; - - err =3D crypto_shash_update(&desc.shash, address, length); - BUG_ON(err); - - return *(u32 *)desc.ctx; + return crc32(crc, address, length); } =20 static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, unsigned int length) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 87ab5696bd48..003d3bcb0caa 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1670,12 +1670,10 @@ static void f2fs_put_super(struct super_block *sb) =20 f2fs_destroy_post_read_wq(sbi); =20 kvfree(sbi->ckpt); =20 - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->raw_super); =20 f2fs_destroy_page_array_cache(sbi); f2fs_destroy_xattr_caches(sbi); #ifdef CONFIG_QUOTA @@ -4419,19 +4417,10 @@ static int f2fs_fill_super(struct super_block *sb, = void *data, int silent) INIT_LIST_HEAD(&sbi->inode_list[i]); spin_lock_init(&sbi->inode_lock[i]); } mutex_init(&sbi->flush_lock); =20 - /* Load the checksum driver */ - sbi->s_chksum_driver =3D crypto_alloc_shash("crc32", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - f2fs_err(sbi, "Cannot load crc32 driver."); - err =3D PTR_ERR(sbi->s_chksum_driver); - sbi->s_chksum_driver =3D NULL; - goto free_sbi; - } - /* set a block size */ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { f2fs_err(sbi, "unable to set blocksize"); goto free_sbi; } @@ -4872,12 +4861,10 @@ static int f2fs_fill_super(struct super_block *sb, = void *data, int silent) fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy); kvfree(options); free_sb_buf: kfree(raw_super); free_sbi: - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi); sb->s_fs_info =3D NULL; =20 /* give only one another chance */ if (retry_cnt > 0 && skip_recovery) { @@ -5080,7 +5067,5 @@ module_init(init_f2fs_fs) module_exit(exit_f2fs_fs) =20 MODULE_AUTHOR("Samsung Electronics's Praesto Team"); MODULE_DESCRIPTION("Flash Friendly File System"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32"); - --=20 2.47.0 From nobody Sun Nov 24 17:51:10 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 371001B5EA4; Sun, 3 Nov 2024 22:32:34 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673154; cv=none; b=tsGcGOeNNlW67mm6dLIqyDHJ5LQI4f4vr0mq0QLsNPmBgIzDnwAJTaEx8UjNx47YS30z08Ch8Af0LrN4O2l0B//Xdpl3lHB4NNcAmPB7hj7xvzyIF/6LNaQrqaBXPz6Bws7Y9e/+V94zy5/YHx3wGKic/2aQaH28KqKdUQ35oxE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1730673154; c=relaxed/simple; bh=h0v+/+dQC51krLqRK0yf3yojMlp/Lk1g/dsT2vwPhiI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=GC470oVchPnTRHr9S3St4LwX0SfePM2VoIdZv7ykTVflYEu0JQKCUJizxeoKdocfmhm4vTTcknC/D+E94v8dJrXfUL5ZOMcForpIonKZnxubC87+o41r3cm1ms0mUJMIFvYgDwezGkcHCzdu9HV3ofNloqaYmBS/ytyFOeD5R7M= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=SZh8dplT; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="SZh8dplT" Received: by smtp.kernel.org (Postfix) with ESMTPSA id AA8C3C4CECF; Sun, 3 Nov 2024 22:32:33 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1730673154; bh=h0v+/+dQC51krLqRK0yf3yojMlp/Lk1g/dsT2vwPhiI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=SZh8dplTHlFRi2pkXj5jDw8BwmOYWoRIsaQ1Okcid6WrA5P/skRI4/XY7mVYzbixc 0aZ6RuwL1J5F9KBkaSeHlrV6WB4/OA2lMDSabbAqatcfNCC5zvOV+cekvtre/YTiLI 0SI0a2Cm5gO004FcJfWIEgdlYfSz/yRPBFzZitx5xftA7rLvOhUOuB0IldkNsPxYXk OsYxLmGHV+7ENQ9yDlD6EwGMttgXosMxiBjtcnpA/FWlqR8ihJQ6eZ5UzoXrL7oufo C0N04fZZD4KlYkqiPqYi1YwunMtj9CKjhPfNSMBFCHLA4/nstjP81WyO1sq5lJByTH ZrQLm2DJtUZeQ== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linux-scsi@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org, Ard Biesheuvel Subject: [PATCH v3 18/18] scsi: target: iscsi: switch to using the crc32c library Date: Sun, 3 Nov 2024 14:31:54 -0800 Message-ID: <20241103223154.136127-19-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241103223154.136127-1-ebiggers@kernel.org> References: <20241103223154.136127-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Now that the crc32c() library function directly takes advantage of architecture-specific optimizations, it is unnecessary to go through the crypto API. Just use crc32c(). This is much simpler, and it improves performance due to eliminating the crypto API overhead. Reviewed-by: Ard Biesheuvel Signed-off-by: Eric Biggers --- drivers/target/iscsi/Kconfig | 3 +- drivers/target/iscsi/iscsi_target.c | 153 +++++++--------------- drivers/target/iscsi/iscsi_target_login.c | 50 ------- drivers/target/iscsi/iscsi_target_login.h | 1 - drivers/target/iscsi/iscsi_target_nego.c | 21 +-- include/target/iscsi/iscsi_target_core.h | 3 - 6 files changed, 49 insertions(+), 182 deletions(-) diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig index 1c0517a12571..70d76f3dd693 100644 --- a/drivers/target/iscsi/Kconfig +++ b/drivers/target/iscsi/Kconfig @@ -1,11 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only config ISCSI_TARGET tristate "SCSI Target Mode Stack" depends on INET + select CRC32 select CRYPTO - select CRYPTO_CRC32C + select CRYPTO_HASH help Say M to enable the SCSI target mode stack. A SCSI target mode stack is software that makes local storage available over a storage network to a SCSI initiator system. The supported storage network technologies include iSCSI, Fibre Channel and the SCSI RDMA Protocol (SRP). diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/isc= si_target.c index 6002283cbeba..091c1efccfb7 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -6,11 +6,11 @@ * * Author: Nicholas A. Bellinger * *************************************************************************= *****/ =20 -#include +#include #include #include #include #include #include @@ -488,12 +488,12 @@ void iscsit_aborted_task(struct iscsit_conn *conn, st= ruct iscsit_cmd *cmd) =20 __iscsit_free_cmd(cmd, true); } EXPORT_SYMBOL(iscsit_aborted_task); =20 -static void iscsit_do_crypto_hash_buf(struct ahash_request *, const void *, - u32, u32, const void *, void *); +static u32 iscsit_crc_buf(const void *buf, u32 payload_length, + u32 padding, const void *pad_bytes); static void iscsit_tx_thread_wait_for_tcp(struct iscsit_conn *); =20 static int iscsit_xmit_nondatain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd, const void *data_buf, u32 data_buf_len) @@ -508,13 +508,11 @@ iscsit_xmit_nondatain_pdu(struct iscsit_conn *conn, s= truct iscsit_cmd *cmd, iov[niov++].iov_len =3D ISCSI_HDR_LEN; =20 if (conn->conn_ops->HeaderDigest) { u32 *header_digest =3D (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; =20 - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, hdr, - ISCSI_HDR_LEN, 0, NULL, - header_digest); + *header_digest =3D iscsit_crc_buf(hdr, ISCSI_HDR_LEN, 0, NULL); =20 iov[0].iov_len +=3D ISCSI_CRC_LEN; tx_size +=3D ISCSI_CRC_LEN; pr_debug("Attaching CRC32C HeaderDigest" " to opcode 0x%x 0x%08x\n", @@ -535,15 +533,13 @@ iscsit_xmit_nondatain_pdu(struct iscsit_conn *conn, s= truct iscsit_cmd *cmd, pr_debug("Attaching %u additional" " padding bytes.\n", padding); } =20 if (conn->conn_ops->DataDigest) { - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, - data_buf, data_buf_len, - padding, &cmd->pad_bytes, - &cmd->data_crc); - + cmd->data_crc =3D iscsit_crc_buf(data_buf, data_buf_len, + padding, + &cmd->pad_bytes); iov[niov].iov_base =3D &cmd->data_crc; iov[niov++].iov_len =3D ISCSI_CRC_LEN; tx_size +=3D ISCSI_CRC_LEN; pr_debug("Attached DataDigest for %u" " bytes opcode 0x%x, CRC 0x%08x\n", @@ -564,12 +560,12 @@ iscsit_xmit_nondatain_pdu(struct iscsit_conn *conn, s= truct iscsit_cmd *cmd, } =20 static int iscsit_map_iovec(struct iscsit_cmd *cmd, struct kvec *iov, int = nvec, u32 data_offset, u32 data_length); static void iscsit_unmap_iovec(struct iscsit_cmd *); -static u32 iscsit_do_crypto_hash_sg(struct ahash_request *, struct iscsit_= cmd *, - u32, u32, u32, u8 *); +static u32 iscsit_crc_sglist(const struct iscsit_cmd *cmd, u32 data_length, + u32 padding, const u8 *pad_bytes); static int iscsit_xmit_datain_pdu(struct iscsit_conn *conn, struct iscsit_cmd *cmd, const struct iscsi_datain *datain) { struct kvec *iov; @@ -582,14 +578,12 @@ iscsit_xmit_datain_pdu(struct iscsit_conn *conn, stru= ct iscsit_cmd *cmd, tx_size +=3D ISCSI_HDR_LEN; =20 if (conn->conn_ops->HeaderDigest) { u32 *header_digest =3D (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; =20 - iscsit_do_crypto_hash_buf(conn->conn_tx_hash, cmd->pdu, - ISCSI_HDR_LEN, 0, NULL, - header_digest); - + *header_digest =3D iscsit_crc_buf(cmd->pdu, ISCSI_HDR_LEN, 0, + NULL); iov[0].iov_len +=3D ISCSI_CRC_LEN; tx_size +=3D ISCSI_CRC_LEN; =20 pr_debug("Attaching CRC32 HeaderDigest for DataIN PDU 0x%08x\n", *header_digest); @@ -612,16 +606,12 @@ iscsit_xmit_datain_pdu(struct iscsit_conn *conn, stru= ct iscsit_cmd *cmd, =20 pr_debug("Attaching %u padding bytes\n", cmd->padding); } =20 if (conn->conn_ops->DataDigest) { - cmd->data_crc =3D iscsit_do_crypto_hash_sg(conn->conn_tx_hash, - cmd, datain->offset, - datain->length, - cmd->padding, - cmd->pad_bytes); - + cmd->data_crc =3D iscsit_crc_sglist(cmd, datain->length, + cmd->padding, cmd->pad_bytes); iov[iov_count].iov_base =3D &cmd->data_crc; iov[iov_count++].iov_len =3D ISCSI_CRC_LEN; tx_size +=3D ISCSI_CRC_LEN; =20 pr_debug("Attached CRC32C DataDigest %d bytes, crc 0x%08x\n", @@ -1402,81 +1392,49 @@ iscsit_handle_scsi_cmd(struct iscsit_conn *conn, st= ruct iscsit_cmd *cmd, return 0; =20 return iscsit_get_immediate_data(cmd, hdr, dump_payload); } =20 -static u32 iscsit_do_crypto_hash_sg( - struct ahash_request *hash, - struct iscsit_cmd *cmd, - u32 data_offset, - u32 data_length, - u32 padding, - u8 *pad_bytes) +static u32 iscsit_crc_sglist(const struct iscsit_cmd *cmd, u32 data_length, + u32 padding, const u8 *pad_bytes) { - u32 data_crc; - struct scatterlist *sg; - unsigned int page_off; - - crypto_ahash_init(hash); - - sg =3D cmd->first_data_sg; - page_off =3D cmd->first_data_sg_off; - - if (data_length && page_off) { - struct scatterlist first_sg; - u32 len =3D min_t(u32, data_length, sg->length - page_off); - - sg_init_table(&first_sg, 1); - sg_set_page(&first_sg, sg_page(sg), len, sg->offset + page_off); - - ahash_request_set_crypt(hash, &first_sg, NULL, len); - crypto_ahash_update(hash); - - data_length -=3D len; - sg =3D sg_next(sg); - } + struct scatterlist *sg =3D cmd->first_data_sg; + unsigned int page_off =3D cmd->first_data_sg_off; + u32 crc =3D ~0; =20 while (data_length) { - u32 cur_len =3D min_t(u32, data_length, sg->length); + u32 cur_len =3D min_t(u32, data_length, sg->length - page_off); + const void *virt; =20 - ahash_request_set_crypt(hash, sg, NULL, cur_len); - crypto_ahash_update(hash); + virt =3D kmap_local_page(sg_page(sg)) + sg->offset + page_off; + crc =3D crc32c(crc, virt, cur_len); + kunmap_local(virt); =20 - data_length -=3D cur_len; /* iscsit_map_iovec has already checked for invalid sg pointers */ sg =3D sg_next(sg); - } =20 - if (padding) { - struct scatterlist pad_sg; - - sg_init_one(&pad_sg, pad_bytes, padding); - ahash_request_set_crypt(hash, &pad_sg, (u8 *)&data_crc, - padding); - crypto_ahash_finup(hash); - } else { - ahash_request_set_crypt(hash, NULL, (u8 *)&data_crc, 0); - crypto_ahash_final(hash); + page_off =3D 0; + data_length -=3D cur_len; } =20 - return data_crc; + if (padding) + crc =3D crc32c(crc, pad_bytes, padding); + + return ~crc; } =20 -static void iscsit_do_crypto_hash_buf(struct ahash_request *hash, - const void *buf, u32 payload_length, u32 padding, - const void *pad_bytes, void *data_crc) +static u32 iscsit_crc_buf(const void *buf, u32 payload_length, + u32 padding, const void *pad_bytes) { - struct scatterlist sg[2]; + u32 crc =3D ~0; =20 - sg_init_table(sg, ARRAY_SIZE(sg)); - sg_set_buf(sg, buf, payload_length); - if (padding) - sg_set_buf(sg + 1, pad_bytes, padding); + crc =3D crc32c(crc, buf, payload_length); =20 - ahash_request_set_crypt(hash, sg, data_crc, payload_length + padding); + if (padding) + crc =3D crc32c(crc, pad_bytes, padding); =20 - crypto_ahash_digest(hash); + return ~crc; } =20 int __iscsit_check_dataout_hdr(struct iscsit_conn *conn, void *buf, struct iscsit_cmd *cmd, u32 payload_length, @@ -1660,15 +1618,12 @@ iscsit_get_dataout(struct iscsit_conn *conn, struct= iscsit_cmd *cmd, return -1; =20 if (conn->conn_ops->DataDigest) { u32 data_crc; =20 - data_crc =3D iscsit_do_crypto_hash_sg(conn->conn_rx_hash, cmd, - be32_to_cpu(hdr->offset), - payload_length, padding, - cmd->pad_bytes); - + data_crc =3D iscsit_crc_sglist(cmd, payload_length, padding, + cmd->pad_bytes); if (checksum !=3D data_crc) { pr_err("ITT: 0x%08x, Offset: %u, Length: %u," " DataSN: 0x%08x, CRC32C DataDigest 0x%08x" " does not match computed 0x%08x\n", hdr->itt, hdr->offset, payload_length, @@ -1923,14 +1878,12 @@ static int iscsit_handle_nop_out(struct iscsit_conn= *conn, struct iscsit_cmd *cm ret =3D -1; goto out; } =20 if (conn->conn_ops->DataDigest) { - iscsit_do_crypto_hash_buf(conn->conn_rx_hash, ping_data, - payload_length, padding, - cmd->pad_bytes, &data_crc); - + data_crc =3D iscsit_crc_buf(ping_data, payload_length, + padding, cmd->pad_bytes); if (checksum !=3D data_crc) { pr_err("Ping data CRC32C DataDigest" " 0x%08x does not match computed 0x%08x\n", checksum, data_crc); if (!conn->sess->sess_ops->ErrorRecoveryLevel) { @@ -2326,14 +2279,11 @@ iscsit_handle_text_cmd(struct iscsit_conn *conn, st= ruct iscsit_cmd *cmd, rx_got =3D rx_data(conn, &iov[0], niov, rx_size); if (rx_got !=3D rx_size) goto reject; =20 if (conn->conn_ops->DataDigest) { - iscsit_do_crypto_hash_buf(conn->conn_rx_hash, - text_in, rx_size, 0, NULL, - &data_crc); - + data_crc =3D iscsit_crc_buf(text_in, rx_size, 0, NULL); if (checksum !=3D data_crc) { pr_err("Text data CRC32C DataDigest" " 0x%08x does not match computed" " 0x%08x\n", checksum, data_crc); if (!conn->sess->sess_ops->ErrorRecoveryLevel) { @@ -2686,14 +2636,12 @@ static int iscsit_handle_immediate_data( } =20 if (conn->conn_ops->DataDigest) { u32 data_crc; =20 - data_crc =3D iscsit_do_crypto_hash_sg(conn->conn_rx_hash, cmd, - cmd->write_data_done, length, padding, - cmd->pad_bytes); - + data_crc =3D iscsit_crc_sglist(cmd, length, padding, + cmd->pad_bytes); if (checksum !=3D data_crc) { pr_err("ImmediateData CRC32C DataDigest 0x%08x" " does not match computed 0x%08x\n", checksum, data_crc); =20 @@ -4114,14 +4062,12 @@ static void iscsit_get_rx_pdu(struct iscsit_conn *c= onn) if (ret !=3D ISCSI_CRC_LEN) { iscsit_rx_thread_wait_for_tcp(conn); break; } =20 - iscsit_do_crypto_hash_buf(conn->conn_rx_hash, buffer, - ISCSI_HDR_LEN, 0, NULL, - &checksum); - + checksum =3D iscsit_crc_buf(buffer, ISCSI_HDR_LEN, 0, + NULL); if (digest !=3D checksum) { pr_err("HeaderDigest CRC32C failed," " received 0x%08x, computed 0x%08x\n", digest, checksum); /* @@ -4404,19 +4350,10 @@ int iscsit_close_connection( * If any other processes are accessing this connection pointer we * must wait until they have completed. */ iscsit_check_conn_usage_count(conn); =20 - ahash_request_free(conn->conn_tx_hash); - if (conn->conn_rx_hash) { - struct crypto_ahash *tfm; - - tfm =3D crypto_ahash_reqtfm(conn->conn_rx_hash); - ahash_request_free(conn->conn_rx_hash); - crypto_free_ahash(tfm); - } - if (conn->sock) sock_release(conn->sock); =20 if (conn->conn_transport->iscsit_free_conn) conn->conn_transport->iscsit_free_conn(conn); diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/isc= si/iscsi_target_login.c index 90b870f234f0..c2ac9a99ebbb 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -6,11 +6,10 @@ * * Author: Nicholas A. Bellinger * *************************************************************************= *****/ =20 -#include #include #include #include #include #include @@ -69,50 +68,10 @@ static struct iscsi_login *iscsi_login_init_conn(struct= iscsit_conn *conn) out_login: kfree(login); return NULL; } =20 -/* - * Used by iscsi_target_nego.c:iscsi_target_locate_portal() to setup - * per struct iscsit_conn libcrypto contexts for crc32c and crc32-intel - */ -int iscsi_login_setup_crypto(struct iscsit_conn *conn) -{ - struct crypto_ahash *tfm; - - /* - * Setup slicing by CRC32C algorithm for RX and TX libcrypto contexts - * which will default to crc32c_intel.ko for cpu_has_xmm4_2, or fallback - * to software 1x8 byte slicing from crc32c.ko - */ - tfm =3D crypto_alloc_ahash("crc32c", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) { - pr_err("crypto_alloc_ahash() failed\n"); - return -ENOMEM; - } - - conn->conn_rx_hash =3D ahash_request_alloc(tfm, GFP_KERNEL); - if (!conn->conn_rx_hash) { - pr_err("ahash_request_alloc() failed for conn_rx_hash\n"); - crypto_free_ahash(tfm); - return -ENOMEM; - } - ahash_request_set_callback(conn->conn_rx_hash, 0, NULL, NULL); - - conn->conn_tx_hash =3D ahash_request_alloc(tfm, GFP_KERNEL); - if (!conn->conn_tx_hash) { - pr_err("ahash_request_alloc() failed for conn_tx_hash\n"); - ahash_request_free(conn->conn_rx_hash); - conn->conn_rx_hash =3D NULL; - crypto_free_ahash(tfm); - return -ENOMEM; - } - ahash_request_set_callback(conn->conn_tx_hash, 0, NULL, NULL); - - return 0; -} - static int iscsi_login_check_initiator_version( struct iscsit_conn *conn, u8 version_max, u8 version_min) { @@ -1163,19 +1122,10 @@ void iscsi_target_login_sess_out(struct iscsit_conn= *conn, } else spin_unlock_bh(&conn->sess->conn_lock); iscsit_dec_session_usage_count(conn->sess); } =20 - ahash_request_free(conn->conn_tx_hash); - if (conn->conn_rx_hash) { - struct crypto_ahash *tfm; - - tfm =3D crypto_ahash_reqtfm(conn->conn_rx_hash); - ahash_request_free(conn->conn_rx_hash); - crypto_free_ahash(tfm); - } - if (conn->param_list) { iscsi_release_param_list(conn->param_list); conn->param_list =3D NULL; } iscsi_target_nego_release(conn); diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/isc= si/iscsi_target_login.h index e8760735486b..03c7d695d58f 100644 --- a/drivers/target/iscsi/iscsi_target_login.h +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -7,11 +7,10 @@ struct iscsit_conn; struct iscsi_login; struct iscsi_np; struct sockaddr_storage; =20 -extern int iscsi_login_setup_crypto(struct iscsit_conn *); extern int iscsi_check_for_session_reinstatement(struct iscsit_conn *); extern int iscsi_login_post_auth_non_zero_tsih(struct iscsit_conn *, u16, = u32); extern int iscsit_setup_np(struct iscsi_np *, struct sockaddr_storage *); extern int iscsi_target_setup_login_socket(struct iscsi_np *, diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscs= i/iscsi_target_nego.c index fa3fb5f4e6bc..16e3ded98c32 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -1192,18 +1192,11 @@ int iscsi_target_locate_portal( if (!sessiontype) { if (!login->leading_connection) goto get_target; =20 sess->sess_ops->SessionType =3D 1; - /* - * Setup crc32c modules from libcrypto - */ - if (iscsi_login_setup_crypto(conn) < 0) { - pr_err("iscsi_login_setup_crypto() failed\n"); - ret =3D -1; - goto out; - } + /* * Serialize access across the discovery struct iscsi_portal_group to * process login attempt. */ conn->tpg =3D iscsit_global->discovery_tpg; @@ -1256,21 +1249,11 @@ int iscsi_target_locate_portal( ret =3D -1; goto out; } conn->tpg_np =3D tpg_np; pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt); - /* - * Setup crc32c modules from libcrypto - */ - if (iscsi_login_setup_crypto(conn) < 0) { - pr_err("iscsi_login_setup_crypto() failed\n"); - kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put); - iscsit_put_tiqn_for_login(tiqn); - conn->tpg =3D NULL; - ret =3D -1; - goto out; - } + /* * Serialize access across the struct iscsi_portal_group to * process login attempt. */ if (iscsit_access_np(np, conn->tpg) < 0) { diff --git a/include/target/iscsi/iscsi_target_core.h b/include/target/iscs= i/iscsi_target_core.h index 60af7c63b34e..51ca80abacf7 100644 --- a/include/target/iscsi/iscsi_target_core.h +++ b/include/target/iscsi/iscsi_target_core.h @@ -574,13 +574,10 @@ struct iscsit_conn { spinlock_t nopin_timer_lock; spinlock_t response_queue_lock; spinlock_t state_lock; spinlock_t login_timer_lock; spinlock_t login_worker_lock; - /* libcrypto RX and TX contexts for crc32c */ - struct ahash_request *conn_rx_hash; - struct ahash_request *conn_tx_hash; /* Used for scheduling TX and RX connection kthreads */ cpumask_var_t conn_cpumask; cpumask_var_t allowed_cpumask; unsigned int conn_rx_reset_cpumask:1; unsigned int conn_tx_reset_cpumask:1; --=20 2.47.0