From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CA7267462; Mon, 21 Oct 2024 00:29:51 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470592; cv=none; b=lv6seHhjQ1+p0PhB38VjXE6JU8w0BlaAGa34OFgUMEADNw9cEpfa1bjhmsnEjI9S20F2f/bX4FE+PPyzEZtw/2XI8cKoUIB52ZZ9lS2VBpP8BVhLj3N93fXhANkyj/S9w7emikd6MP2uipAWiWwJMsxgVCpg89mafzZWmJSP/l8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470592; c=relaxed/simple; bh=4oZ1Q3tWbzwSP9RLFVgxu8DMcPqFYU+tdgCV67sxyr0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=okxsHC75vV/VWBegARLldRAoAWBCPHkz32NjmAMQnrmxktRzJXIuT/6gsf+/YgM5D2h5XUPDAwAv5sMoQymJm0qwS9a0a3Gs8AnfoQr/yEcmc0aCJ9E5hWHMqzcf6uDHmTpdo4KZG+gqajcd89GwsxhLa8PoX5V5I9wKlecqw0Q= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=dkF1gI1T; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="dkF1gI1T" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 0AC21C4CEC7; Mon, 21 Oct 2024 00:29:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470591; bh=4oZ1Q3tWbzwSP9RLFVgxu8DMcPqFYU+tdgCV67sxyr0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=dkF1gI1TEowluxz74dX7OKbuaCDeRq+XpeD/bfQlGdd/ID5XHN7W9L1BrNaGFJkmv 7GDJ7X6cdmxo1p5lXFpXwvCTWfOr+PnCZDgUAc3FwfL0WXZKA+Sh3GmXyK76Ow8LNm UPZ360jg78H90tFxQCk3DKP5itwXmJrSwZ6eKkHOQM1TQZcCj18DY9joSClQ4Yvh+S N9xTgDGPTE4Mmt537gJGtS2VTuPEnTM02PpAgSpTZ5ICdbTpqT/RoQvl5N/O0yFdVh MBDhvKQm8r63D4IIQ0qc744pF1Idy2e8hKBAf189TJlD3+3WzD/8AlM5FSWTtGJgCg 6vmlxD+6wMUhw== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 01/15] lib/crc32: drop leading underscores from __crc32c_le_base Date: Sun, 20 Oct 2024 17:29:21 -0700 Message-ID: <20241021002935.325878-2-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Remove the leading underscores from __crc32c_le_base(). This is in preparation for adding crc32c_le_arch() and eventually renaming __crc32c_le() to crc32c_le(). Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/arm64/lib/crc32-glue.c | 2 +- arch/riscv/lib/crc32.c | 2 +- crypto/crc32c_generic.c | 8 ++++---- include/linux/crc32.h | 2 +- lib/crc32.c | 4 ++-- lib/crc32test.c | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c index 295ae3e6b997a..ad015223d15df 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32-glue.c @@ -42,11 +42,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, si= ze_t len) } =20 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) - return __crc32c_le_base(crc, p, len); + return crc32c_le_base(crc, p, len); =20 if (len >=3D min_len && cpu_have_named_feature(PMULL) && crypto_simd_usab= le()) { kernel_neon_begin(); crc =3D crc32c_le_arm64_4way(crc, p, len); kernel_neon_end(); diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32.c index d7dc599af3ef6..333fb7af11922 100644 --- a/arch/riscv/lib/crc32.c +++ b/arch/riscv/lib/crc32.c @@ -224,11 +224,11 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, = size_t len) } =20 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, - CRC32C_POLY_QT_LE, __crc32c_le_base); + CRC32C_POLY_QT_LE, crc32c_le_base); } =20 static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, size_t len) { diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 7c2357c30fdf7..635599b255ec0 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -83,11 +83,11 @@ static int chksum_setkey(struct crypto_shash *tfm, cons= t u8 *key, static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned int length) { struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); =20 - ctx->crc =3D __crc32c_le_base(ctx->crc, data, length); + ctx->crc =3D crc32c_le_base(ctx->crc, data, length); return 0; } =20 static int chksum_update_arch(struct shash_desc *desc, const u8 *data, unsigned int length) @@ -106,11 +106,11 @@ static int chksum_final(struct shash_desc *desc, u8 *= out) return 0; } =20 static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 = *out) { - put_unaligned_le32(~__crc32c_le_base(*crcp, data, len), out); + put_unaligned_le32(~crc32c_le_base(*crcp, data, len), out); return 0; } =20 static int __chksum_finup_arch(u32 *crcp, const u8 *data, unsigned int len, u8 *out) @@ -198,16 +198,16 @@ static struct shash_alg algs[] =3D {{ }}; =20 static int __init crc32c_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ - return crypto_register_shashes(algs, 1 + (&__crc32c_le !=3D &__crc32c_le_= base)); + return crypto_register_shashes(algs, 1 + (&__crc32c_le !=3D &crc32c_le_ba= se)); } =20 static void __exit crc32c_mod_fini(void) { - crypto_unregister_shashes(algs, 1 + (&__crc32c_le !=3D &__crc32c_le_base)= ); + crypto_unregister_shashes(algs, 1 + (&__crc32c_le !=3D &crc32c_le_base)); } =20 subsys_initcall(crc32c_mod_init); module_exit(crc32c_mod_fini); =20 diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 87f788c0d607b..5b07fc9081c47 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -37,11 +37,11 @@ static inline u32 crc32_le_combine(u32 crc1, u32 crc2, = size_t len2) { return crc32_le_shift(crc1, len2) ^ crc2; } =20 u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure __crc32c_le_base(u32 crc, unsigned char const *p, size_t len); +u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len); =20 /** * __crc32c_le_combine - Combine two crc32c check values into one. For two * sequences of bytes, seq1 and seq2 with lengths len1 * and len2, __crc32c_le() check values were calculated diff --git a/lib/crc32.c b/lib/crc32.c index ff587fee3893d..c67059b0082b4 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -205,12 +205,12 @@ EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); =20 u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32= _le); EXPORT_SYMBOL(crc32_le_base); =20 -u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__= crc32c_le); -EXPORT_SYMBOL(__crc32c_le_base); +u32 __pure crc32c_le_base(u32, unsigned char const *, size_t) __alias(__cr= c32c_le); +EXPORT_SYMBOL(crc32c_le_base); =20 u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32= _be); =20 /* * This multiplies the polynomials x and y modulo the given modulus. diff --git a/lib/crc32test.c b/lib/crc32test.c index 03cf5c1f2f5dc..30b8da4d8be46 100644 --- a/lib/crc32test.c +++ b/lib/crc32test.c @@ -824,11 +824,11 @@ static void crc32test_regenerate(void) for (i =3D 0; i < ARRAY_SIZE(test); i++) { pr_info("{0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x},\n", test[i].crc, test[i].start, test[i].length, crc32_le_base(test[i].crc, test_buf + test[i].start, test[i].length), crc32_be_base(test[i].crc, test_buf + test[i].start, test[i].length), - __crc32c_le_base(test[i].crc, test_buf + test[i].start, test[i].length)= ); + crc32c_le_base(test[i].crc, test_buf + test[i].start, test[i].length)); } } =20 static int __init crc32test_init(void) { --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 766A0C8FE; Mon, 21 Oct 2024 00:29:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470592; cv=none; b=RH2BBiX00GksVrgoCWziAa61gTbKgyoJ+A/q6A8fJ1p6w7cYCYz52YFOETBQD0lTSNCeDQUE+M5V9PA/aNWfqmcnabGrySkhhzSmqov23P4QICKQLRyKX1f+nZKDD7m34KMiS7uYHmSrE38PyDYOJDxU55Ff5YAeDiyijPcH27k= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470592; c=relaxed/simple; bh=SVZU2OCirSgQKq4/Sx8TXNmXgxL5UCmv8qRCEstvjB4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=uG1P8DInutA81xw6M6RBnSPVhVEzp7X4FoDTEWL96or8pVcp3D3XlrCaxwC0aBVNn2toirAsodcbASbK1bJFQ/hOd0SYe6t6tYSNggsruZ/Z/fuywHsJR44CBLJzI1vZVgjn/wChYs8/K2jnjqc4UpxdPkhGwREH+bq5LjylapE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=ob1nEJCR; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="ob1nEJCR" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 8C570C4CEE6; Mon, 21 Oct 2024 00:29:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470591; bh=SVZU2OCirSgQKq4/Sx8TXNmXgxL5UCmv8qRCEstvjB4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=ob1nEJCRPHqAkHmkPhcjFsOeuwYGSaB1NpxDuWt0fTQHvHm1KQ2XfXx3I7pKs5LCP DB5MVea9HtPjWgKOgp7yPsllKpfeU9bPxgau+J6DdUTXy90WGV0AiBKNp08RTrA1wn b0Oj/fU6A9OFnz5A6oHbeXWHihjLFEWCWJHYbGGHhpIEhOuIW8AjoxWa8FmUUlXB22 xT1zxjCqnhuXJN/DpmObGW1seEYiUrmftUmxlUpuf9dLY+mULW4a23NunmlxpoEOaz XMI6BQ+JJgPXyL8dSmQ7MxogHiFtgY50Qb0hMLBvqwO0mMvX64oTfC3fzGtLGsRhUM vZ+YzQtKLw3qA== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 02/15] lib/crc32: improve support for arch-specific overrides Date: Sun, 20 Oct 2024 17:29:22 -0700 Message-ID: <20241021002935.325878-3-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Currently the CRC32 library functions are defined as weak symbols, and the arm64 and riscv architectures override them. This method of arch-specific overrides has the limitation that it only works when both the base and arch code is built-in. Also, it makes the arch-specific code be silently not used if it is accidentally built with lib-y instead of obj-y; unfortunately the RISC-V code does this. This commit reorganizes the code to have explicit *_arch() functions that are called when they are enabled, similar to how some of the crypto library code works (e.g. chacha_crypt() calls chacha_crypt_arch()). Make the existing kconfig choice for the CRC32 implementation also control whether the arch-optimized implementation (if one is available) is enabled or not. Make it enabled by default if CRC32 is also enabled. The result is that arch-optimized CRC32 library functions will be included automatically when appropriate, but it is now possible to disable them. They can also now be built as a loadable module if the CRC32 library functions happen to be used only by loadable modules. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/arm64/Kconfig | 1 + arch/arm64/lib/Makefile | 3 +- arch/arm64/lib/crc32-glue.c | 13 ++++- arch/riscv/Kconfig | 1 + arch/riscv/lib/Makefile | 3 +- arch/riscv/lib/{crc32.c =3D> crc32-riscv.c} | 13 ++++- crypto/crc32_generic.c | 4 +- crypto/crc32c_generic.c | 4 +- include/linux/crc32.h | 35 +++++++++--- lib/Kconfig | 70 +++++++++++++++++------ lib/crc32.c | 22 +++---- 11 files changed, 118 insertions(+), 51 deletions(-) rename arch/riscv/lib/{crc32.c =3D> crc32-riscv.c} (94%) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fd9df6dcc5937..1e48f40f654e4 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -19,10 +19,11 @@ config ARM64 select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CACHE_LINE_SIZE + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DMA_OPS if XEN select ARCH_HAS_DMA_PREP_COHERENT diff --git a/arch/arm64/lib/Makefile b/arch/arm64/lib/Makefile index 8e882f479d981..5fbcf0d566655 100644 --- a/arch/arm64/lib/Makefile +++ b/arch/arm64/lib/Makefile @@ -11,11 +11,12 @@ CFLAGS_xor-neon.o +=3D $(CC_FLAGS_FPU) CFLAGS_REMOVE_xor-neon.o +=3D $(CC_FLAGS_NO_FPU) endif =20 lib-$(CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE) +=3D uaccess_flushcache.o =20 -obj-$(CONFIG_CRC32) +=3D crc32.o crc32-glue.o +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-arm64.o +crc32-arm64-y :=3D crc32.o crc32-glue.o =20 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o =20 obj-$(CONFIG_ARM64_MTE) +=3D mte.o =20 diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c index ad015223d15df..d7f6e1cbf0d23 100644 --- a/arch/arm64/lib/crc32-glue.c +++ b/arch/arm64/lib/crc32-glue.c @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-2.0-only =20 #include #include +#include =20 #include #include #include #include @@ -19,11 +20,11 @@ asmlinkage u32 crc32_be_arm64(u32 crc, unsigned char co= nst *p, size_t len); =20 asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t= len); asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_= t len); asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t= len); =20 -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_le_base(crc, p, len); =20 if (len >=3D min_len && cpu_have_named_feature(PMULL) && crypto_simd_usab= le()) { @@ -38,12 +39,13 @@ u32 __pure crc32_le(u32 crc, unsigned char const *p, si= ze_t len) return crc; } =20 return crc32_le_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32_le_arch); =20 -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32c_le_base(crc, p, len); =20 if (len >=3D min_len && cpu_have_named_feature(PMULL) && crypto_simd_usab= le()) { @@ -58,12 +60,13 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p,= size_t len) return crc; } =20 return crc32c_le_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32c_le_arch); =20 -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) { if (!alternative_has_cap_likely(ARM64_HAS_CRC32)) return crc32_be_base(crc, p, len); =20 if (len >=3D min_len && cpu_have_named_feature(PMULL) && crypto_simd_usab= le()) { @@ -78,5 +81,9 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, size= _t len) return crc; } =20 return crc32_be_arm64(crc, p, len); } +EXPORT_SYMBOL(crc32_be_arch); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("arm64-optimized CRC32 functions"); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 62545946ecf43..1c32e51eb3a4c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -22,10 +22,11 @@ config RISCV select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM_VMEMMAP select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CRC32 if RISCV_ISA_ZBC select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_FAST_MULTIPLIER diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 8eec6b69a875f..79368a895feed 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -13,10 +13,9 @@ ifeq ($(CONFIG_MMU), y) lib-$(CONFIG_RISCV_ISA_V) +=3D uaccess_vector.o endif lib-$(CONFIG_MMU) +=3D uaccess.o lib-$(CONFIG_64BIT) +=3D tishift.o lib-$(CONFIG_RISCV_ISA_ZICBOZ) +=3D clear_page.o -lib-$(CONFIG_RISCV_ISA_ZBC) +=3D crc32.o - +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-riscv.o obj-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o lib-$(CONFIG_RISCV_ISA_V) +=3D xor.o lib-$(CONFIG_RISCV_ISA_V) +=3D riscv_v_helpers.o diff --git a/arch/riscv/lib/crc32.c b/arch/riscv/lib/crc32-riscv.c similarity index 94% rename from arch/riscv/lib/crc32.c rename to arch/riscv/lib/crc32-riscv.c index 333fb7af11922..a3ff7db2a1ce2 100644 --- a/arch/riscv/lib/crc32.c +++ b/arch/riscv/lib/crc32-riscv.c @@ -12,10 +12,11 @@ #include #include #include #include #include +#include =20 /* * Refer to https://www.corsix.org/content/barrett-reduction-polynomials f= or * better understanding of how this math works. * @@ -215,21 +216,23 @@ static inline u32 __pure crc32_le_generic(u32 crc, un= signed char const *p, =20 legacy: return crc_fb(crc, p, len); } =20 -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE, crc32_le_base); } +EXPORT_SYMBOL(crc32_le_arch); =20 -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, CRC32C_POLY_LE, CRC32C_POLY_QT_LE, crc32c_le_base); } +EXPORT_SYMBOL(crc32c_le_arch); =20 static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p, size_t len) { size_t bits =3D len * 8; @@ -251,11 +254,11 @@ static inline u32 crc32_be_unaligned(u32 crc, unsigne= d char const *p, crc ^=3D crc_low; =20 return crc; } =20 -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len) { size_t offset, head_len, tail_len; unsigned long const *p_ul; unsigned long s; =20 @@ -290,5 +293,9 @@ u32 __pure crc32_be(u32 crc, unsigned char const *p, si= ze_t len) return crc; =20 legacy: return crc32_be_base(crc, p, len); } +EXPORT_SYMBOL(crc32_be_arch); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Accelerated CRC32 implementation with Zbc extension"); diff --git a/crypto/crc32_generic.c b/crypto/crc32_generic.c index 6a55d206fab31..cc064ea8240e3 100644 --- a/crypto/crc32_generic.c +++ b/crypto/crc32_generic.c @@ -158,16 +158,16 @@ static struct shash_alg algs[] =3D {{ }}; =20 static int __init crc32_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ - return crypto_register_shashes(algs, 1 + (&crc32_le !=3D &crc32_le_base)); + return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } =20 static void __exit crc32_mod_fini(void) { - crypto_unregister_shashes(algs, 1 + (&crc32_le !=3D &crc32_le_base)); + crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } =20 subsys_initcall(crc32_mod_init); module_exit(crc32_mod_fini); =20 diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c index 635599b255ec0..04b03d825cf45 100644 --- a/crypto/crc32c_generic.c +++ b/crypto/crc32c_generic.c @@ -198,16 +198,16 @@ static struct shash_alg algs[] =3D {{ }}; =20 static int __init crc32c_mod_init(void) { /* register the arch flavor only if it differs from the generic one */ - return crypto_register_shashes(algs, 1 + (&__crc32c_le !=3D &crc32c_le_ba= se)); + return crypto_register_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } =20 static void __exit crc32c_mod_fini(void) { - crypto_unregister_shashes(algs, 1 + (&__crc32c_le !=3D &crc32c_le_base)); + crypto_unregister_shashes(algs, 1 + IS_ENABLED(CONFIG_CRC32_ARCH)); } =20 subsys_initcall(crc32c_mod_init); module_exit(crc32c_mod_fini); =20 diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 5b07fc9081c47..58c632533b086 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -6,14 +6,38 @@ #define _LINUX_CRC32_H =20 #include #include =20 -u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_le_base(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32_be_base(u32 crc, unsigned char const *p, size_t len); +u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len); +u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len); +u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len); +u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len); +u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len); +u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len); + +static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32_le_arch(crc, p, len); + return crc32_le_base(crc, p, len); +} + +static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32_be_arch(crc, p, len); + return crc32_be_base(crc, p, len); +} + +/* TODO: leading underscores should be dropped once callers have been upda= ted */ +static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len) +{ + if (IS_ENABLED(CONFIG_CRC32_ARCH)) + return crc32c_le_arch(crc, p, len); + return crc32c_le_base(crc, p, len); +} =20 /** * crc32_le_combine - Combine two crc32 check values into one. For two * sequences of bytes, seq1 and seq2 with lengths len1 * and len2, crc32_le() check values were calculated @@ -36,13 +60,10 @@ u32 __attribute_const__ crc32_le_shift(u32 crc, size_t = len); static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2) { return crc32_le_shift(crc1, len2) ^ crc2; } =20 -u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len); -u32 __pure crc32c_le_base(u32 crc, unsigned char const *p, size_t len); - /** * __crc32c_le_combine - Combine two crc32c check values into one. For two * sequences of bytes, seq1 and seq2 with lengths len1 * and len2, __crc32c_le() check values were calculated * for each, crc1 and crc2. diff --git a/lib/Kconfig b/lib/Kconfig index b38849af6f130..07afcf214f353 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -176,10 +176,13 @@ config CRC32 This option is provided for the case where no in-kernel-tree modules require CRC32/CRC32c functions, but a module built outside the kernel tree does. Such modules that use library CRC32/CRC32c functions require M here. =20 +config ARCH_HAS_CRC32 + bool + config CRC32_SELFTEST tristate "CRC32 perform self test on init" depends on CRC32 help This option enables the CRC32 library functions to perform a @@ -188,54 +191,89 @@ config CRC32_SELFTEST and computes the total elapsed time and number of bytes processed. =20 choice prompt "CRC32 implementation" depends on CRC32 - default CRC32_SLICEBY8 + default CRC32_IMPL_ARCH_PLUS_SLICEBY8 if ARCH_HAS_CRC32 + default CRC32_IMPL_SLICEBY8 if !ARCH_HAS_CRC32 help - This option allows a kernel builder to override the default choice - of CRC32 algorithm. Choose the default ("slice by 8") unless you - know that you need one of the others. + This option allows you to override the default choice of CRC32 + implementation. Choose the default unless you know that you need one + of the others. =20 -config CRC32_SLICEBY8 +config CRC32_IMPL_ARCH_PLUS_SLICEBY8 + bool "Arch-optimized, with fallback to slice-by-8" if ARCH_HAS_CRC32 + help + Use architecture-optimized implementation of CRC32. Fall back to + slice-by-8 in cases where the arch-optimized implementation cannot be + used, e.g. if the CPU lacks support for the needed instructions. + + This is the default when an arch-optimized implementation exists. + +config CRC32_IMPL_ARCH_PLUS_SLICEBY1 + bool "Arch-optimized, with fallback to slice-by-1" if ARCH_HAS_CRC32 + help + Use architecture-optimized implementation of CRC32, but fall back to + slice-by-1 instead of slice-by-8 in order to reduce the binary size. + +config CRC32_IMPL_SLICEBY8 bool "Slice by 8 bytes" help Calculate checksum 8 bytes at a time with a clever slicing algorithm. - This is the fastest algorithm, but comes with a 8KiB lookup table. - Most modern processors have enough cache to hold this table without - thrashing the cache. - - This is the default implementation choice. Choose this one unless - you have a good reason not to. + This is much slower than the architecture-optimized implementation of + CRC32 (if the selected arch has one), but it is portable and is the + fastest implementation when no arch-optimized implementation is + available. It uses an 8KiB lookup table. Most modern processors have + enough cache to hold this table without thrashing the cache. =20 -config CRC32_SLICEBY4 +config CRC32_IMPL_SLICEBY4 bool "Slice by 4 bytes" help Calculate checksum 4 bytes at a time with a clever slicing algorithm. This is a bit slower than slice by 8, but has a smaller 4KiB lookup table. =20 Only choose this option if you know what you are doing. =20 -config CRC32_SARWATE - bool "Sarwate's Algorithm (one byte at a time)" +config CRC32_IMPL_SLICEBY1 + bool "Slice by 1 byte (Sarwate's algorithm)" help Calculate checksum a byte at a time using Sarwate's algorithm. This - is not particularly fast, but has a small 256 byte lookup table. + is not particularly fast, but has a small 1KiB lookup table. =20 Only choose this option if you know what you are doing. =20 -config CRC32_BIT +config CRC32_IMPL_BIT bool "Classic Algorithm (one bit at a time)" help Calculate checksum one bit at a time. This is VERY slow, but has no lookup table. This is provided as a debugging option. =20 Only choose this option if you are debugging crc32. =20 endchoice =20 +config CRC32_ARCH + tristate + default CRC32 if CRC32_IMPL_ARCH_PLUS_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SL= ICEBY1 + +config CRC32_SLICEBY8 + bool + default y if CRC32_IMPL_SLICEBY8 || CRC32_IMPL_ARCH_PLUS_SLICEBY8 + +config CRC32_SLICEBY4 + bool + default y if CRC32_IMPL_SLICEBY4 + +config CRC32_SARWATE + bool + default y if CRC32_IMPL_SLICEBY1 || CRC32_IMPL_ARCH_PLUS_SLICEBY1 + +config CRC32_BIT + bool + default y if CRC32_IMPL_BIT + config CRC64 tristate "CRC64 functions" help This option is provided for the case where no in-kernel-tree modules require CRC64 functions, but a module built outside diff --git a/lib/crc32.c b/lib/crc32.c index c67059b0082b4..47151624332ef 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -181,39 +181,31 @@ static inline u32 __pure crc32_le_generic(u32 crc, un= signed char const *p, #endif return crc; } =20 #if CRC_LE_BITS =3D=3D 1 -u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); } -u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, NULL, CRC32C_POLY_LE); } #else -u32 __pure __weak crc32_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, crc32table_le, CRC32_POLY_LE); } -u32 __pure __weak __crc32c_le(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len) { return crc32_le_generic(crc, p, len, crc32ctable_le, CRC32C_POLY_LE); } #endif -EXPORT_SYMBOL(crc32_le); -EXPORT_SYMBOL(__crc32c_le); - -u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32= _le); EXPORT_SYMBOL(crc32_le_base); - -u32 __pure crc32c_le_base(u32, unsigned char const *, size_t) __alias(__cr= c32c_le); EXPORT_SYMBOL(crc32c_le_base); =20 -u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32= _be); - /* * This multiplies the polynomials x and y modulo the given modulus. * This follows the "little-endian" CRC convention that the lsbit * represents the highest power of x, and the msbit represents x^0. */ @@ -333,16 +325,16 @@ static inline u32 __pure crc32_be_generic(u32 crc, un= signed char const *p, # endif return crc; } =20 #if CRC_BE_BITS =3D=3D 1 -u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) { return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE); } #else -u32 __pure __weak crc32_be(u32 crc, unsigned char const *p, size_t len) +u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len) { return crc32_be_generic(crc, p, len, crc32table_be, CRC32_POLY_BE); } #endif -EXPORT_SYMBOL(crc32_be); +EXPORT_SYMBOL(crc32_be_base); --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D9B92EAE7; Mon, 21 Oct 2024 00:29:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470593; cv=none; b=aIPQ9wq69FyMdJOCIhL0iu8G6Y5WaKU/hzWv3R3T0E1IYz4wYpDPgsn8smiCBScxyovzmBmCqBpJjjLjjn25NdhhhefGSVSMVTQN11vY92EESqJRPAFWh3Qwc3PelW1tUTXccaYyMPOgnwyuzIaHLJVfIhmTUFKUHZPHQrtJJv8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470593; c=relaxed/simple; bh=P29p9aakvL0t5otsAgsRJX6T4PF/lTw5WddUq+05yTc=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=Yiqqr/UJDhlR59ogPDxsipy1sJ48Ldv1OCedCNcbvV7SHlyUiLODw38fSVlfYvNZLEJN4NJKlUpVB7MuYystTrnk3iBfEDnx/ZuF/sRqkL36LFZ1RI4cAr+IQ67E9Q9xIq3afR9XCP2OHEU207GTQVJk2lSGBVe0QQGFp+gLyNM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=HKORaNc+; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="HKORaNc+" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 1AB16C4CEEF; Mon, 21 Oct 2024 00:29:52 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470592; bh=P29p9aakvL0t5otsAgsRJX6T4PF/lTw5WddUq+05yTc=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=HKORaNc+UZn0V39uJ+7P69L48cj1LQKBwhJMMdNHdXV+/+Um9dfiVTpMPncHx+dpH TMqbkNu7jnKolMkDhP82b8yUyTLHAdIaJ2imSSFMlzPsINTX39Qp1ccOg7mrYfXxlc TykXSepdgk4mPJIvwCw4yd1OtvzJLwlJT2QcU0MbBkoEERBQDMSLu1r6R6Z/vpaKhs VjANV7/j0i4SH3dji+N+eAYu+ZdeljNgbsuEprQLDq2wbLWcHdtha5rfg/AYHBd9gp qJVvRZYkBnytcF4mhrziWaTwsJjjQQ9dnQJ0RoISTXkdyyg3sNg/nm/mSgszN5LLa2 kafOYpqMVObuA== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 03/15] arm/crc32: expose CRC32 functions through lib Date: Sun, 20 Oct 2024 17:29:23 -0700 Message-ID: <20241021002935.325878-4-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the arm CRC32 assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/arm/crypto/crc32-ce-glue.c to arch/arm/lib/crc32-glue.c, view this commit with 'git show -M10'. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/arm/Kconfig | 1 + arch/arm/configs/milbeaut_m10v_defconfig | 1 - arch/arm/configs/multi_v7_defconfig | 1 - arch/arm/crypto/Kconfig | 14 - arch/arm/crypto/Makefile | 2 - arch/arm/crypto/crc32-ce-glue.c | 247 ------------------ arch/arm/lib/Makefile | 3 + .../crc32-ce-core.S =3D> lib/crc32-core.S} | 0 arch/arm/lib/crc32-glue.c | 115 ++++++++ 9 files changed, 119 insertions(+), 265 deletions(-) delete mode 100644 arch/arm/crypto/crc32-ce-glue.c rename arch/arm/{crypto/crc32-ce-core.S =3D> lib/crc32-core.S} (100%) create mode 100644 arch/arm/lib/crc32-glue.c diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 749179a1d1629..851260303234c 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,10 +5,11 @@ config ARM select ARCH_32BIT_OFF_T select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_P= OINTER && !ARM_UNWIND select ARCH_HAS_BINFMT_FLAT select ARCH_HAS_CPU_CACHE_ALIASING select ARCH_HAS_CPU_FINALIZE_INIT if MMU + select ARCH_HAS_CRC32 if KERNEL_MODE_NEON select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DMA_ALLOC if MMU select ARCH_HAS_DMA_OPS select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE diff --git a/arch/arm/configs/milbeaut_m10v_defconfig b/arch/arm/configs/mi= lbeaut_m10v_defconfig index f5eeac9c65c32..acd16204f8d7f 100644 --- a/arch/arm/configs/milbeaut_m10v_defconfig +++ b/arch/arm/configs/milbeaut_m10v_defconfig @@ -105,11 +105,10 @@ CONFIG_CRYPTO_SHA2_ARM_CE=3Dm CONFIG_CRYPTO_SHA512_ARM=3Dm CONFIG_CRYPTO_AES_ARM=3Dm CONFIG_CRYPTO_AES_ARM_BS=3Dm CONFIG_CRYPTO_AES_ARM_CE=3Dm CONFIG_CRYPTO_CHACHA20_NEON=3Dm -CONFIG_CRYPTO_CRC32_ARM_CE=3Dm # CONFIG_CRYPTO_HW is not set CONFIG_CRC_CCITT=3Dm CONFIG_CRC_ITU_T=3Dm CONFIG_DMA_CMA=3Dy CONFIG_CMA_SIZE_MBYTES=3D64 diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v= 7_defconfig index 9a5f5c439b879..287ca055965f6 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -1304,11 +1304,10 @@ CONFIG_CRYPTO_SHA2_ARM_CE=3Dm CONFIG_CRYPTO_SHA512_ARM=3Dm CONFIG_CRYPTO_AES_ARM=3Dm CONFIG_CRYPTO_AES_ARM_BS=3Dm CONFIG_CRYPTO_AES_ARM_CE=3Dm CONFIG_CRYPTO_CHACHA20_NEON=3Dm -CONFIG_CRYPTO_CRC32_ARM_CE=3Dm CONFIG_CRYPTO_DEV_SUN4I_SS=3Dm CONFIG_CRYPTO_DEV_FSL_CAAM=3Dm CONFIG_CRYPTO_DEV_EXYNOS_RNG=3Dm CONFIG_CRYPTO_DEV_S5P=3Dm CONFIG_CRYPTO_DEV_ATMEL_AES=3Dm diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 5ff49a5e9afc9..ea0ebf336d0de 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -220,24 +220,10 @@ config CRYPTO_CHACHA20_NEON stream cipher algorithms =20 Architecture: arm using: - NEON (Advanced SIMD) extensions =20 -config CRYPTO_CRC32_ARM_CE - tristate "CRC32C and CRC32" - depends on KERNEL_MODE_NEON - depends on CRC32 - select CRYPTO_HASH - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - and CRC32 CRC algorithm (IEEE 802.3) - - Architecture: arm using: - - CRC and/or PMULL instructions - - Drivers: crc32-arm-ce and crc32c-arm-ce - config CRYPTO_CRCT10DIF_ARM_CE tristate "CRCT10DIF" depends on KERNEL_MODE_NEON depends on CRC_T10DIF select CRYPTO_HASH diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile index 13e62c7c25dca..38ec5cc1e8442 100644 --- a/arch/arm/crypto/Makefile +++ b/arch/arm/crypto/Makefile @@ -19,11 +19,10 @@ obj-$(CONFIG_CRYPTO_CURVE25519_NEON) +=3D curve25519-ne= on.o obj-$(CONFIG_CRYPTO_AES_ARM_CE) +=3D aes-arm-ce.o obj-$(CONFIG_CRYPTO_SHA1_ARM_CE) +=3D sha1-arm-ce.o obj-$(CONFIG_CRYPTO_SHA2_ARM_CE) +=3D sha2-arm-ce.o obj-$(CONFIG_CRYPTO_GHASH_ARM_CE) +=3D ghash-arm-ce.o obj-$(CONFIG_CRYPTO_CRCT10DIF_ARM_CE) +=3D crct10dif-arm-ce.o -obj-$(CONFIG_CRYPTO_CRC32_ARM_CE) +=3D crc32-arm-ce.o =20 aes-arm-y :=3D aes-cipher-core.o aes-cipher-glue.o aes-arm-bs-y :=3D aes-neonbs-core.o aes-neonbs-glue.o sha1-arm-y :=3D sha1-armv4-large.o sha1_glue.o sha1-arm-neon-y :=3D sha1-armv7-neon.o sha1_neon_glue.o @@ -36,11 +35,10 @@ blake2b-neon-y :=3D blake2b-neon-core.o blake2b-neon-g= lue.o sha1-arm-ce-y :=3D sha1-ce-core.o sha1-ce-glue.o sha2-arm-ce-y :=3D sha2-ce-core.o sha2-ce-glue.o aes-arm-ce-y :=3D aes-ce-core.o aes-ce-glue.o ghash-arm-ce-y :=3D ghash-ce-core.o ghash-ce-glue.o crct10dif-arm-ce-y :=3D crct10dif-ce-core.o crct10dif-ce-glue.o -crc32-arm-ce-y:=3D crc32-ce-core.o crc32-ce-glue.o chacha-neon-y :=3D chacha-scalar-core.o chacha-glue.o chacha-neon-$(CONFIG_KERNEL_MODE_NEON) +=3D chacha-neon-core.o poly1305-arm-y :=3D poly1305-core.o poly1305-glue.o nhpoly1305-neon-y :=3D nh-neon-core.o nhpoly1305-neon-glue.o curve25519-neon-y :=3D curve25519-core.o curve25519-glue.o diff --git a/arch/arm/crypto/crc32-ce-glue.c b/arch/arm/crypto/crc32-ce-glu= e.c deleted file mode 100644 index 20b4dff13e3a6..0000000000000 --- a/arch/arm/crypto/crc32-ce-glue.c +++ /dev/null @@ -1,247 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instruct= ions - * - * Copyright (C) 2016 Linaro Ltd - */ - -#include -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include - -#define PMULL_MIN_LEN 64L /* minimum size of buffer - * for crc32_pmull_le_16 */ -#define SCALE_F 16L /* size of NEON register */ - -asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc); -asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len); - -asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc); -asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len); - -static u32 (*fallback_crc32)(u32 init_crc, const u8 buf[], u32 len); -static u32 (*fallback_crc32c)(u32 init_crc, const u8 buf[], u32 len); - -static int crc32_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D 0; - return 0; -} - -static int crc32c_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D ~0; - return 0; -} - -static int crc32_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx =3D crypto_shash_ctx(hash); - - if (keylen !=3D sizeof(u32)) - return -EINVAL; - *mctx =3D le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32_init(struct shash_desc *desc) -{ - u32 *mctx =3D crypto_shash_ctx(desc->tfm); - u32 *crc =3D shash_desc_ctx(desc); - - *crc =3D *mctx; - return 0; -} - -static int crc32_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc =3D shash_desc_ctx(desc); - - *crc =3D crc32_armv8_le(*crc, data, length); - return 0; -} - -static int crc32c_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc =3D shash_desc_ctx(desc); - - *crc =3D crc32c_armv8_le(*crc, data, length); - return 0; -} - -static int crc32_final(struct shash_desc *desc, u8 *out) -{ - u32 *crc =3D shash_desc_ctx(desc); - - put_unaligned_le32(*crc, out); - return 0; -} - -static int crc32c_final(struct shash_desc *desc, u8 *out) -{ - u32 *crc =3D shash_desc_ctx(desc); - - put_unaligned_le32(~*crc, out); - return 0; -} - -static int crc32_pmull_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc =3D shash_desc_ctx(desc); - unsigned int l; - - if (crypto_simd_usable()) { - if ((u32)data % SCALE_F) { - l =3D min_t(u32, length, SCALE_F - ((u32)data % SCALE_F)); - - *crc =3D fallback_crc32(*crc, data, l); - - data +=3D l; - length -=3D l; - } - - if (length >=3D PMULL_MIN_LEN) { - l =3D round_down(length, SCALE_F); - - kernel_neon_begin(); - *crc =3D crc32_pmull_le(data, l, *crc); - kernel_neon_end(); - - data +=3D l; - length -=3D l; - } - } - - if (length > 0) - *crc =3D fallback_crc32(*crc, data, length); - - return 0; -} - -static int crc32c_pmull_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - u32 *crc =3D shash_desc_ctx(desc); - unsigned int l; - - if (crypto_simd_usable()) { - if ((u32)data % SCALE_F) { - l =3D min_t(u32, length, SCALE_F - ((u32)data % SCALE_F)); - - *crc =3D fallback_crc32c(*crc, data, l); - - data +=3D l; - length -=3D l; - } - - if (length >=3D PMULL_MIN_LEN) { - l =3D round_down(length, SCALE_F); - - kernel_neon_begin(); - *crc =3D crc32c_pmull_le(data, l, *crc); - kernel_neon_end(); - - data +=3D l; - length -=3D l; - } - } - - if (length > 0) - *crc =3D fallback_crc32c(*crc, data, length); - - return 0; -} - -static struct shash_alg crc32_pmull_algs[] =3D { { - .setkey =3D crc32_setkey, - .init =3D crc32_init, - .update =3D crc32_update, - .final =3D crc32_final, - .descsize =3D sizeof(u32), - .digestsize =3D sizeof(u32), - - .base.cra_ctxsize =3D sizeof(u32), - .base.cra_init =3D crc32_cra_init, - .base.cra_name =3D "crc32", - .base.cra_driver_name =3D "crc32-arm-ce", - .base.cra_priority =3D 200, - .base.cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_blocksize =3D 1, - .base.cra_module =3D THIS_MODULE, -}, { - .setkey =3D crc32_setkey, - .init =3D crc32_init, - .update =3D crc32c_update, - .final =3D crc32c_final, - .descsize =3D sizeof(u32), - .digestsize =3D sizeof(u32), - - .base.cra_ctxsize =3D sizeof(u32), - .base.cra_init =3D crc32c_cra_init, - .base.cra_name =3D "crc32c", - .base.cra_driver_name =3D "crc32c-arm-ce", - .base.cra_priority =3D 200, - .base.cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .base.cra_blocksize =3D 1, - .base.cra_module =3D THIS_MODULE, -} }; - -static int __init crc32_pmull_mod_init(void) -{ - if (elf_hwcap2 & HWCAP2_PMULL) { - crc32_pmull_algs[0].update =3D crc32_pmull_update; - crc32_pmull_algs[1].update =3D crc32c_pmull_update; - - if (elf_hwcap2 & HWCAP2_CRC32) { - fallback_crc32 =3D crc32_armv8_le; - fallback_crc32c =3D crc32c_armv8_le; - } else { - fallback_crc32 =3D crc32_le; - fallback_crc32c =3D __crc32c_le; - } - } else if (!(elf_hwcap2 & HWCAP2_CRC32)) { - return -ENODEV; - } - - return crypto_register_shashes(crc32_pmull_algs, - ARRAY_SIZE(crc32_pmull_algs)); -} - -static void __exit crc32_pmull_mod_exit(void) -{ - crypto_unregister_shashes(crc32_pmull_algs, - ARRAY_SIZE(crc32_pmull_algs)); -} - -static const struct cpu_feature __maybe_unused crc32_cpu_feature[] =3D { - { cpu_feature(CRC32) }, { cpu_feature(PMULL) }, { } -}; -MODULE_DEVICE_TABLE(cpu, crc32_cpu_feature); - -module_init(crc32_pmull_mod_init); -module_exit(crc32_pmull_mod_exit); - -MODULE_AUTHOR("Ard Biesheuvel "); -MODULE_DESCRIPTION("Accelerated CRC32(C) using ARM CRC, NEON and Crypto Ex= tensions"); -MODULE_LICENSE("GPL v2"); -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32c"); diff --git a/arch/arm/lib/Makefile b/arch/arm/lib/Makefile index 0ca5aae1bcc3e..01cd4db2ed472 100644 --- a/arch/arm/lib/Makefile +++ b/arch/arm/lib/Makefile @@ -43,5 +43,8 @@ ifeq ($(CONFIG_KERNEL_MODE_NEON),y) CFLAGS_xor-neon.o +=3D $(CC_FLAGS_FPU) obj-$(CONFIG_XOR_BLOCKS) +=3D xor-neon.o endif =20 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o + +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-arm.o +crc32-arm-y :=3D crc32-glue.o crc32-core.o diff --git a/arch/arm/crypto/crc32-ce-core.S b/arch/arm/lib/crc32-core.S similarity index 100% rename from arch/arm/crypto/crc32-ce-core.S rename to arch/arm/lib/crc32-core.S diff --git a/arch/arm/lib/crc32-glue.c b/arch/arm/lib/crc32-glue.c new file mode 100644 index 0000000000000..a4f01f7be0767 --- /dev/null +++ b/arch/arm/lib/crc32-glue.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Accelerated CRC32(C) using ARM CRC, NEON and Crypto Extensions instruct= ions + * + * Copyright (C) 2016 Linaro Ltd + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +static DEFINE_STATIC_KEY_FALSE(have_crc32); +static DEFINE_STATIC_KEY_FALSE(have_pmull); + +#define PMULL_MIN_LEN 64 /* min size of buffer for pmull functions */ + +asmlinkage u32 crc32_pmull_le(const u8 buf[], u32 len, u32 init_crc); +asmlinkage u32 crc32_armv8_le(u32 init_crc, const u8 buf[], u32 len); + +asmlinkage u32 crc32c_pmull_le(const u8 buf[], u32 len, u32 init_crc); +asmlinkage u32 crc32c_armv8_le(u32 init_crc, const u8 buf[], u32 len); + +static u32 crc32_le_scalar(u32 crc, const u8 *p, size_t len) +{ + if (static_branch_likely(&have_crc32)) + return crc32_armv8_le(crc, p, len); + return crc32_le_base(crc, p, len); +} + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (len >=3D PMULL_MIN_LEN + 15 && + crypto_simd_usable() && static_branch_likely(&have_pmull)) { + size_t n =3D -(uintptr_t)p & 15; + + /* align p to 16-byte boundary */ + if (n) { + crc =3D crc32_le_scalar(crc, p, n); + p +=3D n; + len -=3D n; + } + n =3D round_down(len, 16); + kernel_neon_begin(); + crc =3D crc32_pmull_le(p, n, crc); + kernel_neon_end(); + p +=3D n; + len -=3D n; + } + return crc32_le_scalar(crc, p, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +static u32 crc32c_le_scalar(u32 crc, const u8 *p, size_t len) +{ + if (static_branch_likely(&have_crc32)) + return crc32c_armv8_le(crc, p, len); + return crc32c_le_base(crc, p, len); +} + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (len >=3D PMULL_MIN_LEN + 15 && + crypto_simd_usable() && static_branch_likely(&have_pmull)) { + size_t n =3D -(uintptr_t)p & 15; + + /* align p to 16-byte boundary */ + if (n) { + crc =3D crc32c_le_scalar(crc, p, n); + p +=3D n; + len -=3D n; + } + n =3D round_down(len, 16); + kernel_neon_begin(); + crc =3D crc32c_pmull_le(p, n, crc); + kernel_neon_end(); + p +=3D n; + len -=3D n; + } + return crc32c_le_scalar(crc, p, len); +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_arm_init(void) +{ + if (elf_hwcap2 & HWCAP2_CRC32) + static_branch_enable(&have_crc32); + if (elf_hwcap2 & HWCAP2_PMULL) + static_branch_enable(&have_pmull); + return 0; +} +arch_initcall(crc32_arm_init); + +static void __exit crc32_arm_exit(void) +{ +} +module_exit(crc32_arm_exit); + +MODULE_AUTHOR("Ard Biesheuvel "); +MODULE_DESCRIPTION("Accelerated CRC32(C) using ARM CRC, NEON and Crypto Ex= tensions"); +MODULE_LICENSE("GPL v2"); --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 309CB13FEE; Mon, 21 Oct 2024 00:29:53 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470593; cv=none; b=uyhDrsgVM0SlIFHFEw25UGcAryKLWoy1d+46QPV1mpRYbSCvsVZDB1nMaZHRBwBNT97oOLnJxag/UiTTtQaNiWQhB8VC7lIcMss7jP8YUDRb8HJB9wQueVVhvUDei1ZNqUB+xERnvK7jw8tIOXuFBbc5mNyaOAuJbTbGID/B9Y0= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470593; c=relaxed/simple; bh=DbtNL2z2AejHot2HwaYZBG/fWkGPkHCkAxKCRhDkzwo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=K7EUh0r8DZf71Q1+vLNDorbWFEbHLMmiaoYWlTViDuzx6VSuo3vXoUOpm/JOu2fBZencXOL5XbsVb5bUs2yMRVNYaTOgN/u84mlXuFMLtnMYixorRwnM4ujcxZZ/MgneLJpqIFumo8XT0+syxTYwxgG7BTx1oYXrOj4teElhlCU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=lYrw7aT4; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="lYrw7aT4" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 9DBF5C4CEFA; Mon, 21 Oct 2024 00:29:52 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470593; bh=DbtNL2z2AejHot2HwaYZBG/fWkGPkHCkAxKCRhDkzwo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=lYrw7aT4unuZs0iY9fkNDY2K2UHAcCMvB5546zVV3vOqKcV9nXsFBTyFi56jwN5OQ IFrlRxARI+LIy9oN0OaoGxr3fcQIPYLdzF7Ip32WtDP6Zrj4YQZqPpT2j7CQ2ORL+k 6PH+SDHp0Vx72eBUNwBoqt5IP1Bs5Z2otFG7VLEXcCIWIxdBxGOUl2r5r2qXK1iFfe rzjBLfRTYu6KA1wxTu8B4s7RIpKRqfGKwye9sjTOhqvX3ZVB/yq31yEAQQ11T4lANP fvoUXXXqmOhVjTjSpinzQnU5knJmJVqIi2l5K/VSWu/Le2LoRlu0sHunW3KBEqwR0q VUaRclwLHIWTg== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 04/15] loongarch/crc32: expose CRC32 functions through lib Date: Sun, 20 Oct 2024 17:29:24 -0700 Message-ID: <20241021002935.325878-5-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the loongarch CRC32 assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/loongarch/crypto/crc32-loongarch.c to arch/loongarch/lib/crc32-loongarch.c, view this commit with 'git show -M10'. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/loongarch/Kconfig | 1 + arch/loongarch/configs/loongson3_defconfig | 1 - arch/loongarch/crypto/Kconfig | 9 - arch/loongarch/crypto/Makefile | 2 - arch/loongarch/crypto/crc32-loongarch.c | 300 --------------------- arch/loongarch/lib/Makefile | 2 + arch/loongarch/lib/crc32-loongarch.c | 127 +++++++++ 7 files changed, 130 insertions(+), 312 deletions(-) delete mode 100644 arch/loongarch/crypto/crc32-loongarch.c create mode 100644 arch/loongarch/lib/crc32-loongarch.c diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index bb35c34f86d23..455f1af0bf88f 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -13,10 +13,11 @@ config LOONGARCH select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CPU_FINALIZE_INIT + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_KCOV diff --git a/arch/loongarch/configs/loongson3_defconfig b/arch/loongarch/co= nfigs/loongson3_defconfig index 75b366407a60a..0487ac21b38bb 100644 --- a/arch/loongarch/configs/loongson3_defconfig +++ b/arch/loongarch/configs/loongson3_defconfig @@ -967,11 +967,10 @@ CONFIG_CRYPTO_LZ4=3Dm CONFIG_CRYPTO_LZ4HC=3Dm CONFIG_CRYPTO_USER_API_HASH=3Dm CONFIG_CRYPTO_USER_API_SKCIPHER=3Dm CONFIG_CRYPTO_USER_API_RNG=3Dm CONFIG_CRYPTO_USER_API_AEAD=3Dm -CONFIG_CRYPTO_CRC32_LOONGARCH=3Dm CONFIG_CRYPTO_DEV_VIRTIO=3Dm CONFIG_DMA_CMA=3Dy CONFIG_DMA_NUMA_CMA=3Dy CONFIG_CMA_SIZE_MBYTES=3D0 CONFIG_PRINTK_TIME=3Dy diff --git a/arch/loongarch/crypto/Kconfig b/arch/loongarch/crypto/Kconfig index 200a6e8b43b1e..a0270b3e5b30a 100644 --- a/arch/loongarch/crypto/Kconfig +++ b/arch/loongarch/crypto/Kconfig @@ -1,14 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 =20 menu "Accelerated Cryptographic Algorithms for CPU (loongarch)" =20 -config CRYPTO_CRC32_LOONGARCH - tristate "CRC32c and CRC32" - select CRC32 - select CRYPTO_HASH - help - CRC32c and CRC32 CRC algorithms - - Architecture: LoongArch with CRC32 instructions - endmenu diff --git a/arch/loongarch/crypto/Makefile b/arch/loongarch/crypto/Makefile index d22613d27ce9e..ba83755dde2b4 100644 --- a/arch/loongarch/crypto/Makefile +++ b/arch/loongarch/crypto/Makefile @@ -1,6 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 # # Makefile for LoongArch crypto files.. # - -obj-$(CONFIG_CRYPTO_CRC32_LOONGARCH) +=3D crc32-loongarch.o diff --git a/arch/loongarch/crypto/crc32-loongarch.c b/arch/loongarch/crypt= o/crc32-loongarch.c deleted file mode 100644 index b7d9782827f55..0000000000000 --- a/arch/loongarch/crypto/crc32-loongarch.c +++ /dev/null @@ -1,300 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * crc32.c - CRC32 and CRC32C using LoongArch crc* instructions - * - * Module based on mips/crypto/crc32-mips.c - * - * Copyright (C) 2014 Linaro Ltd - * Copyright (C) 2018 MIPS Tech, LLC - * Copyright (C) 2020-2023 Loongson Technology Corporation Limited - */ - -#include -#include - -#include -#include - -#define _CRC32(crc, value, size, type) \ -do { \ - __asm__ __volatile__( \ - #type ".w." #size ".w" " %0, %1, %0\n\t"\ - : "+r" (crc) \ - : "r" (value) \ - : "memory"); \ -} while (0) - -#define CRC32(crc, value, size) _CRC32(crc, value, size, crc) -#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc) - -static u32 crc32_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc =3D crc_; - - while (len >=3D sizeof(u64)) { - u64 value =3D get_unaligned_le64(p); - - CRC32(crc, value, d); - p +=3D sizeof(u64); - len -=3D sizeof(u64); - } - - if (len & sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32(crc, value, w); - p +=3D sizeof(u32); - } - - if (len & sizeof(u16)) { - u16 value =3D get_unaligned_le16(p); - - CRC32(crc, value, h); - p +=3D sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value =3D *p++; - - CRC32(crc, value, b); - } - - return crc; -} - -static u32 crc32c_loongarch_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc =3D crc_; - - while (len >=3D sizeof(u64)) { - u64 value =3D get_unaligned_le64(p); - - CRC32C(crc, value, d); - p +=3D sizeof(u64); - len -=3D sizeof(u64); - } - - if (len & sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32C(crc, value, w); - p +=3D sizeof(u32); - } - - if (len & sizeof(u16)) { - u16 value =3D get_unaligned_le16(p); - - CRC32C(crc, value, h); - p +=3D sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value =3D *p++; - - CRC32C(crc, value, b); - } - - return crc; -} - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -struct chksum_ctx { - u32 key; -}; - -struct chksum_desc_ctx { - u32 crc; -}; - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D mctx->key; - - return 0; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set the s= eed. - */ -static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, unsigned= int keylen) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(tfm); - - if (keylen !=3D sizeof(mctx->key)) - return -EINVAL; - - mctx->key =3D get_unaligned_le32(key); - - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, unsigned= int length) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D crc32_loongarch_hw(ctx->crc, data, length); - return 0; -} - -static int chksumc_update(struct shash_desc *desc, const u8 *data, unsigne= d int length) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D crc32c_loongarch_hw(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - put_unaligned_le32(ctx->crc, out); - return 0; -} - -static int chksumc_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - put_unaligned_le32(~ctx->crc, out); - return 0; -} - -static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *o= ut) -{ - put_unaligned_le32(crc32_loongarch_hw(crc, data, len), out); - return 0; -} - -static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *= out) -{ - put_unaligned_le32(~crc32c_loongarch_hw(crc, data, len), out); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, unsigned = int len, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - return __chksum_finup(ctx->crc, data, len, out); -} - -static int chksumc_finup(struct shash_desc *desc, const u8 *data, unsigned= int len, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - return __chksumc_finup(ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, unsigned= int length, u8 *out) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - - return __chksum_finup(mctx->key, data, length, out); -} - -static int chksumc_digest(struct shash_desc *desc, const u8 *data, unsigne= d int length, u8 *out) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - - return __chksumc_finup(mctx->key, data, length, out); -} - -static int chksum_cra_init(struct crypto_tfm *tfm) -{ - struct chksum_ctx *mctx =3D crypto_tfm_ctx(tfm); - - mctx->key =3D 0; - return 0; -} - -static int chksumc_cra_init(struct crypto_tfm *tfm) -{ - struct chksum_ctx *mctx =3D crypto_tfm_ctx(tfm); - - mctx->key =3D ~0; - return 0; -} - -static struct shash_alg crc32_alg =3D { - .digestsize =3D CHKSUM_DIGEST_SIZE, - .setkey =3D chksum_setkey, - .init =3D chksum_init, - .update =3D chksum_update, - .final =3D chksum_final, - .finup =3D chksum_finup, - .digest =3D chksum_digest, - .descsize =3D sizeof(struct chksum_desc_ctx), - .base =3D { - .cra_name =3D "crc32", - .cra_driver_name =3D "crc32-loongarch", - .cra_priority =3D 300, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct chksum_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D chksum_cra_init, - } -}; - -static struct shash_alg crc32c_alg =3D { - .digestsize =3D CHKSUM_DIGEST_SIZE, - .setkey =3D chksum_setkey, - .init =3D chksum_init, - .update =3D chksumc_update, - .final =3D chksumc_final, - .finup =3D chksumc_finup, - .digest =3D chksumc_digest, - .descsize =3D sizeof(struct chksum_desc_ctx), - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-loongarch", - .cra_priority =3D 300, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct chksum_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D chksumc_cra_init, - } -}; - -static int __init crc32_mod_init(void) -{ - int err; - - if (!cpu_has(CPU_FEATURE_CRC32)) - return 0; - - err =3D crypto_register_shash(&crc32_alg); - if (err) - return err; - - err =3D crypto_register_shash(&crc32c_alg); - if (err) - return err; - - return 0; -} - -static void __exit crc32_mod_exit(void) -{ - if (!cpu_has(CPU_FEATURE_CRC32)) - return; - - crypto_unregister_shash(&crc32_alg); - crypto_unregister_shash(&crc32c_alg); -} - -module_init(crc32_mod_init); -module_exit(crc32_mod_exit); - -MODULE_AUTHOR("Min Zhou "); -MODULE_AUTHOR("Huacai Chen "); -MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions"); -MODULE_LICENSE("GPL v2"); diff --git a/arch/loongarch/lib/Makefile b/arch/loongarch/lib/Makefile index ccea3bbd43531..fae77809048b8 100644 --- a/arch/loongarch/lib/Makefile +++ b/arch/loongarch/lib/Makefile @@ -9,5 +9,7 @@ lib-y +=3D delay.o memset.o memcpy.o memmove.o \ obj-$(CONFIG_ARCH_SUPPORTS_INT128) +=3D tishift.o =20 obj-$(CONFIG_CPU_HAS_LSX) +=3D xor_simd.o xor_simd_glue.o =20 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o + +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-loongarch.o diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc3= 2-loongarch.c new file mode 100644 index 0000000000000..46eeb23b472bc --- /dev/null +++ b/arch/loongarch/lib/crc32-loongarch.c @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * crc32.c - CRC32 and CRC32C using LoongArch crc* instructions + * + * Module based on mips/crypto/crc32-mips.c + * + * Copyright (C) 2014 Linaro Ltd + * Copyright (C) 2018 MIPS Tech, LLC + * Copyright (C) 2020-2023 Loongson Technology Corporation Limited + */ + +#include +#include +#include +#include + +#define _CRC32(crc, value, size, type) \ +do { \ + __asm__ __volatile__( \ + #type ".w." #size ".w" " %0, %1, %0\n\t"\ + : "+r" (crc) \ + : "r" (value) \ + : "memory"); \ +} while (0) + +#define CRC32(crc, value, size) _CRC32(crc, value, size, crc) +#define CRC32C(crc, value, size) _CRC32(crc, value, size, crcc) + +static DEFINE_STATIC_KEY_FALSE(have_crc32); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32_le_base(crc, p, len); + + while (len >=3D sizeof(u64)) { + u64 value =3D get_unaligned_le64(p); + + CRC32(crc, value, d); + p +=3D sizeof(u64); + len -=3D sizeof(u64); + } + + if (len & sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32(crc, value, w); + p +=3D sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value =3D get_unaligned_le16(p); + + CRC32(crc, value, h); + p +=3D sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value =3D *p++; + + CRC32(crc, value, b); + } + + return crc; +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32c_le_base(crc, p, len); + + while (len >=3D sizeof(u64)) { + u64 value =3D get_unaligned_le64(p); + + CRC32C(crc, value, d); + p +=3D sizeof(u64); + len -=3D sizeof(u64); + } + + if (len & sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32C(crc, value, w); + p +=3D sizeof(u32); + } + + if (len & sizeof(u16)) { + u16 value =3D get_unaligned_le16(p); + + CRC32C(crc, value, h); + p +=3D sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value =3D *p++; + + CRC32C(crc, value, b); + } + + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_loongarch_init(void) +{ + if (cpu_has(CPU_FEATURE_CRC32)) + static_branch_enable(&have_crc32); + return 0; +} +arch_initcall(crc32_loongarch_init); + +static void __exit crc32_loongarch_exit(void) +{ +} +module_exit(crc32_loongarch_exit); + +MODULE_AUTHOR("Min Zhou "); +MODULE_AUTHOR("Huacai Chen "); +MODULE_DESCRIPTION("CRC32 and CRC32C using LoongArch crc* instructions"); +MODULE_LICENSE("GPL v2"); --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0B4081EA90; Mon, 21 Oct 2024 00:29:53 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470594; cv=none; b=NYsfoJPv39CLVUUNhHJsqcMMsFTh7vKJFfkzUhj9c5fRjWvdvU0aULE9A/NsjElZWlOLQpXEu1MC+6gaY6L0OvtCTdB6V7NT51HwX0JliPBGt7PB1nIstoy0vaW5lUBPR3xCKxS89zf4TrYs8MN0DAi2qs1MMz10wilyW1N1cew= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470594; c=relaxed/simple; bh=37CoKYQY/K3vVxK+pZWLWenOAuTcA9gdgPZY9Xy3FH0=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=LFkBzggQbrRstC6T3nZUU1GEGahUyVio1qmTvP7qJ0BmQqKJ22n4iFqf5synh/6XDsHRon7K0ufYs/CjhUCcXTEW+R7J6dZHT5AccBZDrmssrPSzA1t2LvE9orXPPPYUNB/8ZsEli7OQIycKznLDFAV6fdk0bd7ilNZ6RrOi+Ms= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=o/pntV4f; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="o/pntV4f" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 298E6C32781; Mon, 21 Oct 2024 00:29:53 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470593; bh=37CoKYQY/K3vVxK+pZWLWenOAuTcA9gdgPZY9Xy3FH0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=o/pntV4fvxNp74EG7L0BOfz/Y3MKLRRECrwFRjgeDEIXmaA3+rceRzCKbuWLMPE+f qtyzibI8U93xIJE79UM9hPanmlBjvSKd1aK5065xqOnPrVw6g2inNhXFW+9vXVPUGG PvoxQwVL/sz5QUuCtfH2cgbO+omhgkUzibTqImLp+epZArwlzvMA0ggiUY3jkymx/V uwDzgQu1GERDcJDSQtQFvjmEi+D3v4y1Z3msRQLPY2IhaOtB4g61KJyGvYzc9RSJMx Wt7NB07EgzhgoYwY4B2KceV69Qm7s/1lAO0DbqYhs1efNPWjjtqmREiV0/9PjhmwQb sJnR3Opzy1Jjg== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 05/15] mips/crc32: expose CRC32 functions through lib Date: Sun, 20 Oct 2024 17:29:25 -0700 Message-ID: <20241021002935.325878-6-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the mips CRC32 assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/mips/crypto/crc32-mips.c to arch/mips/lib/crc32-mips.c, view this commit with 'git show -M10'. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/mips/Kconfig | 5 +- arch/mips/configs/eyeq5_defconfig | 1 - arch/mips/configs/eyeq6_defconfig | 1 - arch/mips/configs/generic/32r6.config | 2 - arch/mips/configs/generic/64r6.config | 1 - arch/mips/crypto/Kconfig | 9 - arch/mips/crypto/Makefile | 2 - arch/mips/crypto/crc32-mips.c | 354 -------------------------- arch/mips/lib/Makefile | 2 + arch/mips/lib/crc32-mips.c | 184 +++++++++++++ 10 files changed, 187 insertions(+), 374 deletions(-) delete mode 100644 arch/mips/crypto/crc32-mips.c create mode 100644 arch/mips/lib/crc32-mips.c diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 397edf05dd722..f80ea80d792f5 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -1993,15 +1993,15 @@ config CPU_MIPSR5 select MIPS_SPRAM =20 config CPU_MIPSR6 bool default y if CPU_MIPS32_R6 || CPU_MIPS64_R6 + select ARCH_HAS_CRC32 select CPU_HAS_RIXI select CPU_HAS_DIEI if !CPU_DIEI_BROKEN select HAVE_ARCH_BITREVERSE select MIPS_ASID_BITS_VARIABLE - select MIPS_CRC_SUPPORT select MIPS_SPRAM =20 config TARGET_ISA_REV int default 1 if CPU_MIPSR1 @@ -2473,13 +2473,10 @@ config MIPS_ASID_BITS default 8 =20 config MIPS_ASID_BITS_VARIABLE bool =20 -config MIPS_CRC_SUPPORT - bool - # R4600 erratum. Due to the lack of errata information the exact # technical details aren't known. I've experimentally found that disabling # interrupts during indexed I-cache flushes seems to be sufficient to deal # with the issue. config WAR_R4600_V1_INDEX_ICACHEOP diff --git a/arch/mips/configs/eyeq5_defconfig b/arch/mips/configs/eyeq5_de= fconfig index ae9a09b16e40b..ff7af5dc6d9d3 100644 --- a/arch/mips/configs/eyeq5_defconfig +++ b/arch/mips/configs/eyeq5_defconfig @@ -97,11 +97,10 @@ CONFIG_NFS_FS=3Dy CONFIG_NFS_V3_ACL=3Dy CONFIG_NFS_V4=3Dy CONFIG_NFS_V4_1=3Dy CONFIG_NFS_V4_2=3Dy CONFIG_ROOT_NFS=3Dy -CONFIG_CRYPTO_CRC32_MIPS=3Dy CONFIG_FRAME_WARN=3D1024 CONFIG_DEBUG_FS=3Dy # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set CONFIG_CMDLINE_BOOL=3Dy diff --git a/arch/mips/configs/eyeq6_defconfig b/arch/mips/configs/eyeq6_de= fconfig index 6597d5e88b335..0afbb45a78e8e 100644 --- a/arch/mips/configs/eyeq6_defconfig +++ b/arch/mips/configs/eyeq6_defconfig @@ -100,11 +100,10 @@ CONFIG_NFS_FS=3Dy CONFIG_NFS_V3_ACL=3Dy CONFIG_NFS_V4=3Dy CONFIG_NFS_V4_1=3Dy CONFIG_NFS_V4_2=3Dy CONFIG_ROOT_NFS=3Dy -CONFIG_CRYPTO_CRC32_MIPS=3Dy CONFIG_FRAME_WARN=3D1024 CONFIG_DEBUG_FS=3Dy # CONFIG_RCU_TRACE is not set # CONFIG_FTRACE is not set CONFIG_CMDLINE_BOOL=3Dy diff --git a/arch/mips/configs/generic/32r6.config b/arch/mips/configs/gene= ric/32r6.config index 1a5d5ea4ab2b5..ca606e71f4d02 100644 --- a/arch/mips/configs/generic/32r6.config +++ b/arch/mips/configs/generic/32r6.config @@ -1,4 +1,2 @@ CONFIG_CPU_MIPS32_R6=3Dy CONFIG_HIGHMEM=3Dy - -CONFIG_CRYPTO_CRC32_MIPS=3Dy diff --git a/arch/mips/configs/generic/64r6.config b/arch/mips/configs/gene= ric/64r6.config index 63b4e95f303de..23a3009149570 100644 --- a/arch/mips/configs/generic/64r6.config +++ b/arch/mips/configs/generic/64r6.config @@ -2,7 +2,6 @@ CONFIG_CPU_MIPS64_R6=3Dy CONFIG_64BIT=3Dy CONFIG_MIPS32_O32=3Dy CONFIG_MIPS32_N32=3Dy =20 CONFIG_CPU_HAS_MSA=3Dy -CONFIG_CRYPTO_CRC32_MIPS=3Dy CONFIG_VIRTUALIZATION=3Dy diff --git a/arch/mips/crypto/Kconfig b/arch/mips/crypto/Kconfig index 9003a5c1e879f..7decd40c4e204 100644 --- a/arch/mips/crypto/Kconfig +++ b/arch/mips/crypto/Kconfig @@ -1,18 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 =20 menu "Accelerated Cryptographic Algorithms for CPU (mips)" =20 -config CRYPTO_CRC32_MIPS - tristate "CRC32c and CRC32" - depends on MIPS_CRC_SUPPORT - select CRYPTO_HASH - help - CRC32c and CRC32 CRC algorithms - - Architecture: mips - config CRYPTO_POLY1305_MIPS tristate "Hash functions: Poly1305" depends on MIPS select CRYPTO_ARCH_HAVE_LIB_POLY1305 help diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile index 5e4105cccf9fa..fddc882814123 100644 --- a/arch/mips/crypto/Makefile +++ b/arch/mips/crypto/Makefile @@ -1,12 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 # # Makefile for MIPS crypto files.. # =20 -obj-$(CONFIG_CRYPTO_CRC32_MIPS) +=3D crc32-mips.o - obj-$(CONFIG_CRYPTO_CHACHA_MIPS) +=3D chacha-mips.o chacha-mips-y :=3D chacha-core.o chacha-glue.o AFLAGS_chacha-core.o +=3D -O2 # needed to fill branch delay slots =20 obj-$(CONFIG_CRYPTO_POLY1305_MIPS) +=3D poly1305-mips.o diff --git a/arch/mips/crypto/crc32-mips.c b/arch/mips/crypto/crc32-mips.c deleted file mode 100644 index 90eacf00cfc31..0000000000000 --- a/arch/mips/crypto/crc32-mips.c +++ /dev/null @@ -1,354 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * crc32-mips.c - CRC32 and CRC32C using optional MIPSr6 instructions - * - * Module based on arm64/crypto/crc32-arm.c - * - * Copyright (C) 2014 Linaro Ltd - * Copyright (C) 2018 MIPS Tech, LLC - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -enum crc_op_size { - b, h, w, d, -}; - -enum crc_type { - crc32, - crc32c, -}; - -#ifndef TOOLCHAIN_SUPPORTS_CRC -#define _ASM_SET_CRC(OP, SZ, TYPE) \ -_ASM_MACRO_3R(OP, rt, rs, rt2, \ - ".ifnc \\rt, \\rt2\n\t" \ - ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ - ".endif\n\t" \ - _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \ - ((SZ) << 6) | ((TYPE) << 8)) \ - _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ - ((SZ) << 14) | ((TYPE) << 3))) -#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t" -#else /* !TOOLCHAIN_SUPPORTS_CRC */ -#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t" -#define _ASM_UNSET_CRC(op, SZ, TYPE) -#endif - -#define __CRC32(crc, value, op, SZ, TYPE) \ -do { \ - __asm__ __volatile__( \ - ".set push\n\t" \ - _ASM_SET_CRC(op, SZ, TYPE) \ - #op " %0, %1, %0\n\t" \ - _ASM_UNSET_CRC(op, SZ, TYPE) \ - ".set pop" \ - : "+r" (crc) \ - : "r" (value)); \ -} while (0) - -#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0) -#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0) -#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0) -#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0) -#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1) -#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1) -#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1) -#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1) - -#define _CRC32(crc, value, size, op) \ - _CRC32_##op##size(crc, value) - -#define CRC32(crc, value, size) \ - _CRC32(crc, value, size, crc32) - -#define CRC32C(crc, value, size) \ - _CRC32(crc, value, size, crc32c) - -static u32 crc32_mips_le_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc =3D crc_; - - if (IS_ENABLED(CONFIG_64BIT)) { - for (; len >=3D sizeof(u64); p +=3D sizeof(u64), len -=3D sizeof(u64)) { - u64 value =3D get_unaligned_le64(p); - - CRC32(crc, value, d); - } - - if (len & sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32(crc, value, w); - p +=3D sizeof(u32); - } - } else { - for (; len >=3D sizeof(u32); len -=3D sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32(crc, value, w); - p +=3D sizeof(u32); - } - } - - if (len & sizeof(u16)) { - u16 value =3D get_unaligned_le16(p); - - CRC32(crc, value, h); - p +=3D sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value =3D *p++; - - CRC32(crc, value, b); - } - - return crc; -} - -static u32 crc32c_mips_le_hw(u32 crc_, const u8 *p, unsigned int len) -{ - u32 crc =3D crc_; - - if (IS_ENABLED(CONFIG_64BIT)) { - for (; len >=3D sizeof(u64); p +=3D sizeof(u64), len -=3D sizeof(u64)) { - u64 value =3D get_unaligned_le64(p); - - CRC32C(crc, value, d); - } - - if (len & sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32C(crc, value, w); - p +=3D sizeof(u32); - } - } else { - for (; len >=3D sizeof(u32); len -=3D sizeof(u32)) { - u32 value =3D get_unaligned_le32(p); - - CRC32C(crc, value, w); - p +=3D sizeof(u32); - } - } - - if (len & sizeof(u16)) { - u16 value =3D get_unaligned_le16(p); - - CRC32C(crc, value, h); - p +=3D sizeof(u16); - } - - if (len & sizeof(u8)) { - u8 value =3D *p++; - - CRC32C(crc, value, b); - } - return crc; -} - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -struct chksum_ctx { - u32 key; -}; - -struct chksum_desc_ctx { - u32 crc; -}; - -static int chksum_init(struct shash_desc *desc) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D mctx->key; - - return 0; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int chksum_setkey(struct crypto_shash *tfm, const u8 *key, - unsigned int keylen) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(tfm); - - if (keylen !=3D sizeof(mctx->key)) - return -EINVAL; - mctx->key =3D get_unaligned_le32(key); - return 0; -} - -static int chksum_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D crc32_mips_le_hw(ctx->crc, data, length); - return 0; -} - -static int chksumc_update(struct shash_desc *desc, const u8 *data, - unsigned int length) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D crc32c_mips_le_hw(ctx->crc, data, length); - return 0; -} - -static int chksum_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - put_unaligned_le32(ctx->crc, out); - return 0; -} - -static int chksumc_final(struct shash_desc *desc, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - put_unaligned_le32(~ctx->crc, out); - return 0; -} - -static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *o= ut) -{ - put_unaligned_le32(crc32_mips_le_hw(crc, data, len), out); - return 0; -} - -static int __chksumc_finup(u32 crc, const u8 *data, unsigned int len, u8 *= out) -{ - put_unaligned_le32(~crc32c_mips_le_hw(crc, data, len), out); - return 0; -} - -static int chksum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - return __chksum_finup(ctx->crc, data, len, out); -} - -static int chksumc_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - struct chksum_desc_ctx *ctx =3D shash_desc_ctx(desc); - - return __chksumc_finup(ctx->crc, data, len, out); -} - -static int chksum_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - - return __chksum_finup(mctx->key, data, length, out); -} - -static int chksumc_digest(struct shash_desc *desc, const u8 *data, - unsigned int length, u8 *out) -{ - struct chksum_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - - return __chksumc_finup(mctx->key, data, length, out); -} - -static int chksum_cra_init(struct crypto_tfm *tfm) -{ - struct chksum_ctx *mctx =3D crypto_tfm_ctx(tfm); - - mctx->key =3D ~0; - return 0; -} - -static struct shash_alg crc32_alg =3D { - .digestsize =3D CHKSUM_DIGEST_SIZE, - .setkey =3D chksum_setkey, - .init =3D chksum_init, - .update =3D chksum_update, - .final =3D chksum_final, - .finup =3D chksum_finup, - .digest =3D chksum_digest, - .descsize =3D sizeof(struct chksum_desc_ctx), - .base =3D { - .cra_name =3D "crc32", - .cra_driver_name =3D "crc32-mips-hw", - .cra_priority =3D 300, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct chksum_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D chksum_cra_init, - } -}; - -static struct shash_alg crc32c_alg =3D { - .digestsize =3D CHKSUM_DIGEST_SIZE, - .setkey =3D chksum_setkey, - .init =3D chksum_init, - .update =3D chksumc_update, - .final =3D chksumc_final, - .finup =3D chksumc_finup, - .digest =3D chksumc_digest, - .descsize =3D sizeof(struct chksum_desc_ctx), - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-mips-hw", - .cra_priority =3D 300, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct chksum_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D chksum_cra_init, - } -}; - -static int __init crc32_mod_init(void) -{ - int err; - - err =3D crypto_register_shash(&crc32_alg); - - if (err) - return err; - - err =3D crypto_register_shash(&crc32c_alg); - - if (err) { - crypto_unregister_shash(&crc32_alg); - return err; - } - - return 0; -} - -static void __exit crc32_mod_exit(void) -{ - crypto_unregister_shash(&crc32_alg); - crypto_unregister_shash(&crc32c_alg); -} - -MODULE_AUTHOR("Marcin Nowakowski + * Copyright (C) 2018 MIPS Tech, LLC + */ + +#include +#include +#include +#include +#include +#include +#include + +enum crc_op_size { + b, h, w, d, +}; + +enum crc_type { + crc32, + crc32c, +}; + +#ifndef TOOLCHAIN_SUPPORTS_CRC +#define _ASM_SET_CRC(OP, SZ, TYPE) \ +_ASM_MACRO_3R(OP, rt, rs, rt2, \ + ".ifnc \\rt, \\rt2\n\t" \ + ".error \"invalid operands \\\"" #OP " \\rt,\\rs,\\rt2\\\"\"\n\t" \ + ".endif\n\t" \ + _ASM_INSN_IF_MIPS(0x7c00000f | (__rt << 16) | (__rs << 21) | \ + ((SZ) << 6) | ((TYPE) << 8)) \ + _ASM_INSN32_IF_MM(0x00000030 | (__rs << 16) | (__rt << 21) | \ + ((SZ) << 14) | ((TYPE) << 3))) +#define _ASM_UNSET_CRC(op, SZ, TYPE) ".purgem " #op "\n\t" +#else /* !TOOLCHAIN_SUPPORTS_CRC */ +#define _ASM_SET_CRC(op, SZ, TYPE) ".set\tcrc\n\t" +#define _ASM_UNSET_CRC(op, SZ, TYPE) +#endif + +#define __CRC32(crc, value, op, SZ, TYPE) \ +do { \ + __asm__ __volatile__( \ + ".set push\n\t" \ + _ASM_SET_CRC(op, SZ, TYPE) \ + #op " %0, %1, %0\n\t" \ + _ASM_UNSET_CRC(op, SZ, TYPE) \ + ".set pop" \ + : "+r" (crc) \ + : "r" (value)); \ +} while (0) + +#define _CRC32_crc32b(crc, value) __CRC32(crc, value, crc32b, 0, 0) +#define _CRC32_crc32h(crc, value) __CRC32(crc, value, crc32h, 1, 0) +#define _CRC32_crc32w(crc, value) __CRC32(crc, value, crc32w, 2, 0) +#define _CRC32_crc32d(crc, value) __CRC32(crc, value, crc32d, 3, 0) +#define _CRC32_crc32cb(crc, value) __CRC32(crc, value, crc32cb, 0, 1) +#define _CRC32_crc32ch(crc, value) __CRC32(crc, value, crc32ch, 1, 1) +#define _CRC32_crc32cw(crc, value) __CRC32(crc, value, crc32cw, 2, 1) +#define _CRC32_crc32cd(crc, value) __CRC32(crc, value, crc32cd, 3, 1) + +#define _CRC32(crc, value, size, op) \ + _CRC32_##op##size(crc, value) + +#define CRC32(crc, value, size) \ + _CRC32(crc, value, size, crc32) + +#define CRC32C(crc, value, size) \ + _CRC32(crc, value, size, crc32c) + +static DEFINE_STATIC_KEY_FALSE(have_crc32); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32_le_base(crc, p, len); + + if (IS_ENABLED(CONFIG_64BIT)) { + for (; len >=3D sizeof(u64); p +=3D sizeof(u64), len -=3D sizeof(u64)) { + u64 value =3D get_unaligned_le64(p); + + CRC32(crc, value, d); + } + + if (len & sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32(crc, value, w); + p +=3D sizeof(u32); + } + } else { + for (; len >=3D sizeof(u32); len -=3D sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32(crc, value, w); + p +=3D sizeof(u32); + } + } + + if (len & sizeof(u16)) { + u16 value =3D get_unaligned_le16(p); + + CRC32(crc, value, h); + p +=3D sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value =3D *p++; + + CRC32(crc, value, b); + } + + return crc; +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (!static_branch_likely(&have_crc32)) + return crc32c_le_base(crc, p, len); + + if (IS_ENABLED(CONFIG_64BIT)) { + for (; len >=3D sizeof(u64); p +=3D sizeof(u64), len -=3D sizeof(u64)) { + u64 value =3D get_unaligned_le64(p); + + CRC32C(crc, value, d); + } + + if (len & sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32C(crc, value, w); + p +=3D sizeof(u32); + } + } else { + for (; len >=3D sizeof(u32); len -=3D sizeof(u32)) { + u32 value =3D get_unaligned_le32(p); + + CRC32C(crc, value, w); + p +=3D sizeof(u32); + } + } + + if (len & sizeof(u16)) { + u16 value =3D get_unaligned_le16(p); + + CRC32C(crc, value, h); + p +=3D sizeof(u16); + } + + if (len & sizeof(u8)) { + u8 value =3D *p++; + + CRC32C(crc, value, b); + } + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_mips_init(void) +{ + if (cpu_have_feature(cpu_feature(MIPS_CRC32))) + static_branch_enable(&have_crc32); + return 0; +} +arch_initcall(crc32_mips_init); + +static void __exit crc32_mips_exit(void) +{ +} +module_exit(crc32_mips_exit); + +MODULE_AUTHOR("Marcin Nowakowski To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 06/15] powerpc/crc32: expose CRC32 functions through lib Date: Sun, 20 Oct 2024 17:29:26 -0700 Message-ID: <20241021002935.325878-7-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the powerpc CRC32C assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/powerpc/crypto/crc32c-vpmsum_glue.c to arch/powerpc/lib/crc32-glue.c, view this commit with 'git show -M10'. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/powerpc/Kconfig | 1 + arch/powerpc/configs/powernv_defconfig | 1 - arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/crypto/Kconfig | 15 +- arch/powerpc/crypto/Makefile | 2 - arch/powerpc/crypto/crc32c-vpmsum_glue.c | 173 ------------------ arch/powerpc/crypto/crct10dif-vpmsum_asm.S | 2 +- arch/powerpc/lib/Makefile | 3 + arch/powerpc/lib/crc32-glue.c | 84 +++++++++ .../{crypto =3D> lib}/crc32-vpmsum_core.S | 0 .../{crypto =3D> lib}/crc32c-vpmsum_asm.S | 0 11 files changed, 90 insertions(+), 192 deletions(-) delete mode 100644 arch/powerpc/crypto/crc32c-vpmsum_glue.c create mode 100644 arch/powerpc/lib/crc32-glue.c rename arch/powerpc/{crypto =3D> lib}/crc32-vpmsum_core.S (100%) rename arch/powerpc/{crypto =3D> lib}/crc32c-vpmsum_asm.S (100%) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8094a01974cca..b05889400b04d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -125,10 +125,11 @@ config PPC select ARCH_DISABLE_KASAN_INLINE if PPC_RADIX_MMU select ARCH_DMA_DEFAULT_COHERENT if !NOT_COHERENT_CACHE select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_COPY_MC if PPC64 + select ARCH_HAS_CRC32 if PPC64 && ALTIVEC select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX if STRICT_KERNEL_RWX select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/= powernv_defconfig index ee84ade7a0339..4a7ddea05b4db 100644 --- a/arch/powerpc/configs/powernv_defconfig +++ b/arch/powerpc/configs/powernv_defconfig @@ -318,11 +318,10 @@ CONFIG_FTR_FIXUP_SELFTEST=3Dy CONFIG_MSI_BITMAP_SELFTEST=3Dy CONFIG_XMON=3Dy CONFIG_CRYPTO_TEST=3Dm CONFIG_CRYPTO_PCBC=3Dm CONFIG_CRYPTO_HMAC=3Dy -CONFIG_CRYPTO_CRC32C_VPMSUM=3Dm CONFIG_CRYPTO_CRCT10DIF_VPMSUM=3Dm CONFIG_CRYPTO_MD5_PPC=3Dm CONFIG_CRYPTO_MICHAEL_MIC=3Dm CONFIG_CRYPTO_SHA1_PPC=3Dm CONFIG_CRYPTO_SHA256=3Dy diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/pp= c64_defconfig index a5e3e7f97f4d7..ea01c0d6705f0 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -388,11 +388,10 @@ CONFIG_CRYPTO_TWOFISH=3Dm CONFIG_CRYPTO_PCBC=3Dm CONFIG_CRYPTO_MICHAEL_MIC=3Dm CONFIG_CRYPTO_SHA256=3Dy CONFIG_CRYPTO_WP512=3Dm CONFIG_CRYPTO_LZO=3Dm -CONFIG_CRYPTO_CRC32C_VPMSUM=3Dm CONFIG_CRYPTO_CRCT10DIF_VPMSUM=3Dm CONFIG_CRYPTO_VPMSUM_TESTER=3Dm CONFIG_CRYPTO_MD5_PPC=3Dm CONFIG_CRYPTO_SHA1_PPC=3Dm CONFIG_CRYPTO_AES_GCM_P10=3Dm diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index 46a4c85e85e24..5c016ec395300 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -11,23 +11,10 @@ config CRYPTO_CURVE25519_PPC64 Curve25519 algorithm =20 Architecture: PowerPC64 - Little-endian =20 -config CRYPTO_CRC32C_VPMSUM - tristate "CRC32c" - depends on PPC64 && ALTIVEC - select CRYPTO_HASH - select CRC32 - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - - Architecture: powerpc64 using - - AltiVec extensions - - Enable on POWER8 and newer processors for improved performance. - config CRYPTO_CRCT10DIF_VPMSUM tristate "CRC32T10DIF" depends on PPC64 && ALTIVEC && CRC_T10DIF select CRYPTO_HASH help @@ -38,11 +25,11 @@ config CRYPTO_CRCT10DIF_VPMSUM =20 Enable on POWER8 and newer processors for improved performance. =20 config CRYPTO_VPMSUM_TESTER tristate "CRC32c and CRC32T10DIF hardware acceleration tester" - depends on CRYPTO_CRCT10DIF_VPMSUM && CRYPTO_CRC32C_VPMSUM + depends on CRYPTO_CRCT10DIF_VPMSUM && CRC32_ARCH help Stress test for CRC32c and CRCT10DIF algorithms implemented with powerpc64 AltiVec extensions (POWER8 vpmsum instructions). Unless you are testing these algorithms, you don't need this. =20 diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 59808592f0a1b..54486192273c2 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -8,11 +8,10 @@ obj-$(CONFIG_CRYPTO_AES_PPC_SPE) +=3D aes-ppc-spe.o obj-$(CONFIG_CRYPTO_MD5_PPC) +=3D md5-ppc.o obj-$(CONFIG_CRYPTO_SHA1_PPC) +=3D sha1-powerpc.o obj-$(CONFIG_CRYPTO_SHA1_PPC_SPE) +=3D sha1-ppc-spe.o obj-$(CONFIG_CRYPTO_SHA256_PPC_SPE) +=3D sha256-ppc-spe.o -obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) +=3D crc32c-vpmsum.o obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) +=3D crct10dif-vpmsum.o obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) +=3D crc-vpmsum_test.o obj-$(CONFIG_CRYPTO_AES_GCM_P10) +=3D aes-gcm-p10-crypto.o obj-$(CONFIG_CRYPTO_CHACHA20_P10) +=3D chacha-p10-crypto.o obj-$(CONFIG_CRYPTO_POLY1305_P10) +=3D poly1305-p10-crypto.o @@ -22,11 +21,10 @@ obj-$(CONFIG_CRYPTO_CURVE25519_PPC64) +=3D curve25519-p= pc64le.o aes-ppc-spe-y :=3D aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-mode= s.o aes-spe-glue.o md5-ppc-y :=3D md5-asm.o md5-glue.o sha1-powerpc-y :=3D sha1-powerpc-asm.o sha1.o sha1-ppc-spe-y :=3D sha1-spe-asm.o sha1-spe-glue.o sha256-ppc-spe-y :=3D sha256-spe-asm.o sha256-spe-glue.o -crc32c-vpmsum-y :=3D crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o crct10dif-vpmsum-y :=3D crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o aes-gcm-p10-crypto-y :=3D aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o = aesp10-ppc.o chacha-p10-crypto-y :=3D chacha-p10-glue.o chacha-p10le-8x.o poly1305-p10-crypto-y :=3D poly1305-p10-glue.o poly1305-p10le_64.o vmx-crypto-objs :=3D vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_c= tr.o aes_xts.o ghash.o diff --git a/arch/powerpc/crypto/crc32c-vpmsum_glue.c b/arch/powerpc/crypto= /crc32c-vpmsum_glue.c deleted file mode 100644 index 63760b7dbb760..0000000000000 --- a/arch/powerpc/crypto/crc32c-vpmsum_glue.c +++ /dev/null @@ -1,173 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -#define VMX_ALIGN 16 -#define VMX_ALIGN_MASK (VMX_ALIGN-1) - -#define VECTOR_BREAKPOINT 512 - -u32 __crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len); - -static u32 crc32c_vpmsum(u32 crc, unsigned char const *p, size_t len) -{ - unsigned int prealign; - unsigned int tail; - - if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable()) - return __crc32c_le(crc, p, len); - - if ((unsigned long)p & VMX_ALIGN_MASK) { - prealign =3D VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); - crc =3D __crc32c_le(crc, p, prealign); - len -=3D prealign; - p +=3D prealign; - } - - if (len & ~VMX_ALIGN_MASK) { - preempt_disable(); - pagefault_disable(); - enable_kernel_altivec(); - crc =3D __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); - disable_kernel_altivec(); - pagefault_enable(); - preempt_enable(); - } - - tail =3D len & VMX_ALIGN_MASK; - if (tail) { - p +=3D len & ~VMX_ALIGN_MASK; - crc =3D __crc32c_le(crc, p, tail); - } - - return crc; -} - -static int crc32c_vpmsum_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D ~0; - - return 0; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int crc32c_vpmsum_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx =3D crypto_shash_ctx(hash); - - if (keylen !=3D sizeof(u32)) - return -EINVAL; - *mctx =3D le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32c_vpmsum_init(struct shash_desc *desc) -{ - u32 *mctx =3D crypto_shash_ctx(desc->tfm); - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D *mctx; - - return 0; -} - -static int crc32c_vpmsum_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D crc32c_vpmsum(*crcp, data, len); - - return 0; -} - -static int __crc32c_vpmsum_finup(u32 *crcp, const u8 *data, unsigned int l= en, - u8 *out) -{ - *(__le32 *)out =3D ~cpu_to_le32(crc32c_vpmsum(*crcp, data, len)); - - return 0; -} - -static int crc32c_vpmsum_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_vpmsum_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_vpmsum_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *(__le32 *)out =3D ~cpu_to_le32p(crcp); - - return 0; -} - -static int crc32c_vpmsum_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_vpmsum_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static struct shash_alg alg =3D { - .setkey =3D crc32c_vpmsum_setkey, - .init =3D crc32c_vpmsum_init, - .update =3D crc32c_vpmsum_update, - .final =3D crc32c_vpmsum_final, - .finup =3D crc32c_vpmsum_finup, - .digest =3D crc32c_vpmsum_digest, - .descsize =3D sizeof(u32), - .digestsize =3D CHKSUM_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-vpmsum", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(u32), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32c_vpmsum_cra_init, - } -}; - -static int __init crc32c_vpmsum_mod_init(void) -{ - if (!cpu_has_feature(CPU_FTR_ARCH_207S)) - return -ENODEV; - - return crypto_register_shash(&alg); -} - -static void __exit crc32c_vpmsum_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_cpu_feature_match(PPC_MODULE_FEATURE_VEC_CRYPTO, crc32c_vpmsum_mod_= init); -module_exit(crc32c_vpmsum_mod_fini); - -MODULE_AUTHOR("Anton Blanchard "); -MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructio= ns"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-vpmsum"); diff --git a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S b/arch/powerpc/cryp= to/crct10dif-vpmsum_asm.S index f0b93a0fe168a..0a52261bf8599 100644 --- a/arch/powerpc/crypto/crct10dif-vpmsum_asm.S +++ b/arch/powerpc/crypto/crct10dif-vpmsum_asm.S @@ -840,6 +840,6 @@ .octa 0x000000000000000000000001f65a57f8 /* x^64 div p(x) */ /* Barrett constant n */ .octa 0x0000000000000000000000018bb70000 =20 #define CRC_FUNCTION_NAME __crct10dif_vpmsum -#include "crc32-vpmsum_core.S" +#include "../lib/crc32-vpmsum_core.S" diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index f14ecab674a34..da9381a1c95b3 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -76,6 +76,9 @@ obj-$(CONFIG_FTR_FIXUP_SELFTEST) +=3D feature-fixups-test= .o obj-$(CONFIG_ALTIVEC) +=3D xor_vmx.o xor_vmx_glue.o CFLAGS_xor_vmx.o +=3D -mhard-float -maltivec $(call cc-option,-mabi=3Dalti= vec) # Enable CFLAGS_xor_vmx.o +=3D -isystem $(shell $(CC) -print-file-name=3Dinclude) =20 +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-powerpc.o +crc32-powerpc-y :=3D crc32-glue.o crc32c-vpmsum_asm.o + obj-$(CONFIG_PPC64) +=3D $(obj64-y) diff --git a/arch/powerpc/lib/crc32-glue.c b/arch/powerpc/lib/crc32-glue.c new file mode 100644 index 0000000000000..d33bde6641bfd --- /dev/null +++ b/arch/powerpc/lib/crc32-glue.c @@ -0,0 +1,84 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include +#include +#include +#include +#include +#include +#include + +#define VMX_ALIGN 16 +#define VMX_ALIGN_MASK (VMX_ALIGN-1) + +#define VECTOR_BREAKPOINT 512 + +static DEFINE_STATIC_KEY_FALSE(have_vec_crypto); + +u32 __crc32c_vpmsum(u32 crc, const u8 *p, size_t len); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_le_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + unsigned int prealign; + unsigned int tail; + + if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) || !crypto_simd_usable() || + !static_branch_likely(&have_vec_crypto)) + return crc32c_le_base(crc, p, len); + + if ((unsigned long)p & VMX_ALIGN_MASK) { + prealign =3D VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK); + crc =3D crc32c_le_base(crc, p, prealign); + len -=3D prealign; + p +=3D prealign; + } + + if (len & ~VMX_ALIGN_MASK) { + preempt_disable(); + pagefault_disable(); + enable_kernel_altivec(); + crc =3D __crc32c_vpmsum(crc, p, len & ~VMX_ALIGN_MASK); + disable_kernel_altivec(); + pagefault_enable(); + preempt_enable(); + } + + tail =3D len & VMX_ALIGN_MASK; + if (tail) { + p +=3D len & ~VMX_ALIGN_MASK; + crc =3D crc32c_le_base(crc, p, tail); + } + + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_powerpc_init(void) +{ + if (cpu_has_feature(CPU_FTR_ARCH_207S) && + (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_VEC_CRYPTO)) + static_branch_enable(&have_vec_crypto); + return 0; +} +arch_initcall(crc32_powerpc_init); + +static void __exit crc32_powerpc_exit(void) +{ +} +module_exit(crc32_powerpc_exit); + +MODULE_AUTHOR("Anton Blanchard "); +MODULE_DESCRIPTION("CRC32C using vector polynomial multiply-sum instructio= ns"); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/lib/crc= 32-vpmsum_core.S similarity index 100% rename from arch/powerpc/crypto/crc32-vpmsum_core.S rename to arch/powerpc/lib/crc32-vpmsum_core.S diff --git a/arch/powerpc/crypto/crc32c-vpmsum_asm.S b/arch/powerpc/lib/crc= 32c-vpmsum_asm.S similarity index 100% rename from arch/powerpc/crypto/crc32c-vpmsum_asm.S rename to arch/powerpc/lib/crc32c-vpmsum_asm.S --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 210DA4437C; Mon, 21 Oct 2024 00:29:54 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470595; cv=none; b=dyWxEmzwKPGT9hRp8Yys0BLzjOTjHcpTz/qv5bGVy94BRluduFbbuSz3qnrVrAkqu3lBaEdW9vRyu63COZWtJ2FaQtrke34cVDFQ1fxBALGl9+cBNbhr5ykNy1dz/zoI9oCMBrCceG/kZHII7JA3UYtipcFkjFmpvSe96gD/7NE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470595; c=relaxed/simple; bh=oHN4Jm8KAp+40wEWTPzgeuvgj07tFEJ9e7R5m6FX/oQ=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=s+R+/Ze7kzH6SPp2PIIYgfJ6QorLFIRXkUF1MnHS00EVso0Q1mNe8cGzF9dL04dX/m4Lo/A+rZ+r9JfvagHsEILgCv+zMNM3PaA5//Q+/RgdqZ3UT9fGZiL3s16bcXJw9IsqKeA9GhGHRzMJmbCzGqz3asT8AO/9KXSUYuLtDho= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=VTNr9Iy+; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="VTNr9Iy+" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3F9DCC4CEF8; Mon, 21 Oct 2024 00:29:54 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470594; bh=oHN4Jm8KAp+40wEWTPzgeuvgj07tFEJ9e7R5m6FX/oQ=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=VTNr9Iy+61iOYLM3FA1i9/px61Rl+8yq33ofmvw43Lts03dEvrJF89F5yrpAThjT+ z1OeNMtray6VYJehHbOQ0cgSFExYuI+rAOjwvEoCnupSxkh57l3bVixVpDqFN4mJvW K8rSCD9z8DqO8L+WH38POcwcqya/HVOHn4rDOniVMcIj5/6TThna0w9igpHk8JvZk+ FI2K5BaQDDL3ABLQ89KXTB1h2SszCm6RNS9GYKhPO/WpOr0mvhvee/9XMbJ9xrtSrj Js81xp7GkFh4PlxZB/7jF40VjlLzW4s66eY0dMS2UXHC3mpeW0iwt9xYK2yr26ZZgI yYQpVghFxOGnA== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 07/15] s390/crc32: expose CRC32 functions through lib Date: Sun, 20 Oct 2024 17:29:27 -0700 Message-ID: <20241021002935.325878-8-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the s390 CRC32 assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/s390/crypto/crc32-vx.c to arch/s390/lib/crc32-glue.c, view this commit with 'git show -M10'. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/s390/Kconfig | 1 + arch/s390/configs/debug_defconfig | 1 - arch/s390/configs/defconfig | 1 - arch/s390/crypto/Kconfig | 12 - arch/s390/crypto/Makefile | 2 - arch/s390/crypto/crc32-vx.c | 306 ------------------------- arch/s390/lib/Makefile | 3 + arch/s390/lib/crc32-glue.c | 82 +++++++ arch/s390/{crypto =3D> lib}/crc32-vx.h | 0 arch/s390/{crypto =3D> lib}/crc32be-vx.c | 0 arch/s390/{crypto =3D> lib}/crc32le-vx.c | 0 11 files changed, 86 insertions(+), 322 deletions(-) delete mode 100644 arch/s390/crypto/crc32-vx.c create mode 100644 arch/s390/lib/crc32-glue.c rename arch/s390/{crypto =3D> lib}/crc32-vx.h (100%) rename arch/s390/{crypto =3D> lib}/crc32be-vx.c (100%) rename arch/s390/{crypto =3D> lib}/crc32le-vx.c (100%) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index d339fe4fdedf8..d1fde8b941d2f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -63,10 +63,11 @@ config S390 select ARCH_BINFMT_ELF_STATE select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_de= fconfig index fb0e9a1d9be25..fd83d8958f0bb 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -792,11 +792,10 @@ CONFIG_CRYPTO_ZSTD=3Dm CONFIG_CRYPTO_ANSI_CPRNG=3Dm CONFIG_CRYPTO_USER_API_HASH=3Dm CONFIG_CRYPTO_USER_API_SKCIPHER=3Dm CONFIG_CRYPTO_USER_API_RNG=3Dm CONFIG_CRYPTO_USER_API_AEAD=3Dm -CONFIG_CRYPTO_CRC32_S390=3Dy CONFIG_CRYPTO_SHA512_S390=3Dm CONFIG_CRYPTO_SHA1_S390=3Dm CONFIG_CRYPTO_SHA256_S390=3Dm CONFIG_CRYPTO_SHA3_256_S390=3Dm CONFIG_CRYPTO_SHA3_512_S390=3Dm diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 88be0a734b60f..3bdeb6d5cbd95 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -779,11 +779,10 @@ CONFIG_CRYPTO_ANSI_CPRNG=3Dm CONFIG_CRYPTO_JITTERENTROPY_OSR=3D1 CONFIG_CRYPTO_USER_API_HASH=3Dm CONFIG_CRYPTO_USER_API_SKCIPHER=3Dm CONFIG_CRYPTO_USER_API_RNG=3Dm CONFIG_CRYPTO_USER_API_AEAD=3Dm -CONFIG_CRYPTO_CRC32_S390=3Dy CONFIG_CRYPTO_SHA512_S390=3Dm CONFIG_CRYPTO_SHA1_S390=3Dm CONFIG_CRYPTO_SHA256_S390=3Dm CONFIG_CRYPTO_SHA3_256_S390=3Dm CONFIG_CRYPTO_SHA3_512_S390=3Dm diff --git a/arch/s390/crypto/Kconfig b/arch/s390/crypto/Kconfig index d3eb3a2336932..b760232537f1c 100644 --- a/arch/s390/crypto/Kconfig +++ b/arch/s390/crypto/Kconfig @@ -1,21 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 =20 menu "Accelerated Cryptographic Algorithms for CPU (s390)" =20 -config CRYPTO_CRC32_S390 - tristate "CRC32c and CRC32" - depends on S390 - select CRYPTO_HASH - select CRC32 - help - CRC32c and CRC32 CRC algorithms - - Architecture: s390 - - It is available with IBM z13 or later. - config CRYPTO_SHA512_S390 tristate "Hash functions: SHA-384 and SHA-512" depends on S390 select CRYPTO_HASH help diff --git a/arch/s390/crypto/Makefile b/arch/s390/crypto/Makefile index a0cb96937c3de..14dafadbcbed4 100644 --- a/arch/s390/crypto/Makefile +++ b/arch/s390/crypto/Makefile @@ -12,11 +12,9 @@ obj-$(CONFIG_CRYPTO_DES_S390) +=3D des_s390.o obj-$(CONFIG_CRYPTO_AES_S390) +=3D aes_s390.o obj-$(CONFIG_CRYPTO_PAES_S390) +=3D paes_s390.o obj-$(CONFIG_CRYPTO_CHACHA_S390) +=3D chacha_s390.o obj-$(CONFIG_S390_PRNG) +=3D prng.o obj-$(CONFIG_CRYPTO_GHASH_S390) +=3D ghash_s390.o -obj-$(CONFIG_CRYPTO_CRC32_S390) +=3D crc32-vx_s390.o obj-$(CONFIG_CRYPTO_HMAC_S390) +=3D hmac_s390.o obj-y +=3D arch_random.o =20 -crc32-vx_s390-y :=3D crc32-vx.o crc32le-vx.o crc32be-vx.o chacha_s390-y :=3D chacha-glue.o chacha-s390.o diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c deleted file mode 100644 index 89a10337e6ea9..0000000000000 --- a/arch/s390/crypto/crc32-vx.c +++ /dev/null @@ -1,306 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Crypto-API module for CRC-32 algorithms implemented with the - * z/Architecture Vector Extension Facility. - * - * Copyright IBM Corp. 2015 - * Author(s): Hendrik Brueckner - */ -#define KMSG_COMPONENT "crc32-vx" -#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt - -#include -#include -#include -#include -#include -#include "crc32-vx.h" - -#define CRC32_BLOCK_SIZE 1 -#define CRC32_DIGEST_SIZE 4 - -#define VX_MIN_LEN 64 -#define VX_ALIGNMENT 16L -#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) - -struct crc_ctx { - u32 key; -}; - -struct crc_desc_ctx { - u32 crc; -}; - -/* - * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension - * - * Creates a function to perform a particular CRC-32 computation. Depending - * on the message buffer, the hardware-accelerated or software implementat= ion - * is used. Note that the message buffer is aligned to improve fetch - * operations of VECTOR LOAD MULTIPLE instructions. - * - */ -#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ - static u32 __pure ___fname(u32 crc, \ - unsigned char const *data, size_t datalen) \ - { \ - unsigned long prealign, aligned, remaining; \ - DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ - \ - if (datalen < VX_MIN_LEN + VX_ALIGN_MASK) \ - return ___crc32_sw(crc, data, datalen); \ - \ - if ((unsigned long)data & VX_ALIGN_MASK) { \ - prealign =3D VX_ALIGNMENT - \ - ((unsigned long)data & VX_ALIGN_MASK); \ - datalen -=3D prealign; \ - crc =3D ___crc32_sw(crc, data, prealign); \ - data =3D (void *)((unsigned long)data + prealign); \ - } \ - \ - aligned =3D datalen & ~VX_ALIGN_MASK; \ - remaining =3D datalen & VX_ALIGN_MASK; \ - \ - kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ - crc =3D ___crc32_vx(crc, data, aligned); \ - kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ - \ - if (remaining) \ - crc =3D ___crc32_sw(crc, data + aligned, remaining); \ - \ - return crc; \ - } - -DEFINE_CRC32_VX(crc32_le_vx, crc32_le_vgfm_16, crc32_le) -DEFINE_CRC32_VX(crc32_be_vx, crc32_be_vgfm_16, crc32_be) -DEFINE_CRC32_VX(crc32c_le_vx, crc32c_le_vgfm_16, __crc32c_le) - - -static int crc32_vx_cra_init_zero(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx =3D crypto_tfm_ctx(tfm); - - mctx->key =3D 0; - return 0; -} - -static int crc32_vx_cra_init_invert(struct crypto_tfm *tfm) -{ - struct crc_ctx *mctx =3D crypto_tfm_ctx(tfm); - - mctx->key =3D ~0; - return 0; -} - -static int crc32_vx_init(struct shash_desc *desc) -{ - struct crc_ctx *mctx =3D crypto_shash_ctx(desc->tfm); - struct crc_desc_ctx *ctx =3D shash_desc_ctx(desc); - - ctx->crc =3D mctx->key; - return 0; -} - -static int crc32_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx =3D crypto_shash_ctx(tfm); - - if (newkeylen !=3D sizeof(mctx->key)) - return -EINVAL; - mctx->key =3D le32_to_cpu(*(__le32 *)newkey); - return 0; -} - -static int crc32be_vx_setkey(struct crypto_shash *tfm, const u8 *newkey, - unsigned int newkeylen) -{ - struct crc_ctx *mctx =3D crypto_shash_ctx(tfm); - - if (newkeylen !=3D sizeof(mctx->key)) - return -EINVAL; - mctx->key =3D be32_to_cpu(*(__be32 *)newkey); - return 0; -} - -static int crc32le_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx =3D shash_desc_ctx(desc); - - *(__le32 *)out =3D cpu_to_le32p(&ctx->crc); - return 0; -} - -static int crc32be_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx =3D shash_desc_ctx(desc); - - *(__be32 *)out =3D cpu_to_be32p(&ctx->crc); - return 0; -} - -static int crc32c_vx_final(struct shash_desc *desc, u8 *out) -{ - struct crc_desc_ctx *ctx =3D shash_desc_ctx(desc); - - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out =3D ~cpu_to_le32p(&ctx->crc); - return 0; -} - -static int __crc32le_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__le32 *)out =3D cpu_to_le32(crc32_le_vx(*crc, data, len)); - return 0; -} - -static int __crc32be_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - *(__be32 *)out =3D cpu_to_be32(crc32_be_vx(*crc, data, len)); - return 0; -} - -static int __crc32c_vx_finup(u32 *crc, const u8 *data, unsigned int len, - u8 *out) -{ - /* - * Perform a final XOR with 0xFFFFFFFF to be in sync - * with the generic crc32c shash implementation. - */ - *(__le32 *)out =3D ~cpu_to_le32(crc32c_le_vx(*crc, data, len)); - return 0; -} - - -#define CRC32_VX_FINUP(alg, func) \ - static int alg ## _vx_finup(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(shash_desc_ctx(desc), \ - data, datalen, out); \ - } - -CRC32_VX_FINUP(crc32le, crc32_le_vx) -CRC32_VX_FINUP(crc32be, crc32_be_vx) -CRC32_VX_FINUP(crc32c, crc32c_le_vx) - -#define CRC32_VX_DIGEST(alg, func) \ - static int alg ## _vx_digest(struct shash_desc *desc, const u8 *data, \ - unsigned int len, u8 *out) \ - { \ - return __ ## alg ## _vx_finup(crypto_shash_ctx(desc->tfm), \ - data, len, out); \ - } - -CRC32_VX_DIGEST(crc32le, crc32_le_vx) -CRC32_VX_DIGEST(crc32be, crc32_be_vx) -CRC32_VX_DIGEST(crc32c, crc32c_le_vx) - -#define CRC32_VX_UPDATE(alg, func) \ - static int alg ## _vx_update(struct shash_desc *desc, const u8 *data, \ - unsigned int datalen) \ - { \ - struct crc_desc_ctx *ctx =3D shash_desc_ctx(desc); \ - ctx->crc =3D func(ctx->crc, data, datalen); \ - return 0; \ - } - -CRC32_VX_UPDATE(crc32le, crc32_le_vx) -CRC32_VX_UPDATE(crc32be, crc32_be_vx) -CRC32_VX_UPDATE(crc32c, crc32c_le_vx) - - -static struct shash_alg crc32_vx_algs[] =3D { - /* CRC-32 LE */ - { - .init =3D crc32_vx_init, - .setkey =3D crc32_vx_setkey, - .update =3D crc32le_vx_update, - .final =3D crc32le_vx_final, - .finup =3D crc32le_vx_finup, - .digest =3D crc32le_vx_digest, - .descsize =3D sizeof(struct crc_desc_ctx), - .digestsize =3D CRC32_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32", - .cra_driver_name =3D "crc32-vx", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CRC32_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct crc_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32_vx_cra_init_zero, - }, - }, - /* CRC-32 BE */ - { - .init =3D crc32_vx_init, - .setkey =3D crc32be_vx_setkey, - .update =3D crc32be_vx_update, - .final =3D crc32be_vx_final, - .finup =3D crc32be_vx_finup, - .digest =3D crc32be_vx_digest, - .descsize =3D sizeof(struct crc_desc_ctx), - .digestsize =3D CRC32_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32be", - .cra_driver_name =3D "crc32be-vx", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CRC32_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct crc_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32_vx_cra_init_zero, - }, - }, - /* CRC-32C LE */ - { - .init =3D crc32_vx_init, - .setkey =3D crc32_vx_setkey, - .update =3D crc32c_vx_update, - .final =3D crc32c_vx_final, - .finup =3D crc32c_vx_finup, - .digest =3D crc32c_vx_digest, - .descsize =3D sizeof(struct crc_desc_ctx), - .digestsize =3D CRC32_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-vx", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CRC32_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(struct crc_ctx), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32_vx_cra_init_invert, - }, - }, -}; - - -static int __init crc_vx_mod_init(void) -{ - return crypto_register_shashes(crc32_vx_algs, - ARRAY_SIZE(crc32_vx_algs)); -} - -static void __exit crc_vx_mod_exit(void) -{ - crypto_unregister_shashes(crc32_vx_algs, ARRAY_SIZE(crc32_vx_algs)); -} - -module_cpu_feature_match(S390_CPU_FEATURE_VXRS, crc_vx_mod_init); -module_exit(crc_vx_mod_exit); - -MODULE_AUTHOR("Hendrik Brueckner "); -MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extensio= n Facility"); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32-vx"); -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-vx"); diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index f43f897d3fc02..14bbfe50033c7 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -22,5 +22,8 @@ obj-$(CONFIG_S390_MODULES_SANITY_TEST) +=3D test_modules.o obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) +=3D test_modules_helpers.o =20 lib-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o =20 obj-$(CONFIG_EXPOLINE_EXTERN) +=3D expoline.o + +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-s390.o +crc32-s390-y :=3D crc32-glue.o crc32le-vx.o crc32be-vx.o diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c new file mode 100644 index 0000000000000..6243132633aab --- /dev/null +++ b/arch/s390/lib/crc32-glue.c @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * CRC-32 implemented with the z/Architecture Vector Extension Facility. + * + * Copyright IBM Corp. 2015 + * Author(s): Hendrik Brueckner + */ +#define KMSG_COMPONENT "crc32-vx" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include "crc32-vx.h" + +#define VX_MIN_LEN 64 +#define VX_ALIGNMENT 16L +#define VX_ALIGN_MASK (VX_ALIGNMENT - 1) + +static DEFINE_STATIC_KEY_FALSE(have_vxrs); + +/* + * DEFINE_CRC32_VX() - Define a CRC-32 function using the vector extension + * + * Creates a function to perform a particular CRC-32 computation. Depending + * on the message buffer, the hardware-accelerated or software implementat= ion + * is used. Note that the message buffer is aligned to improve fetch + * operations of VECTOR LOAD MULTIPLE instructions. + */ +#define DEFINE_CRC32_VX(___fname, ___crc32_vx, ___crc32_sw) \ + u32 ___fname(u32 crc, const u8 *data, size_t datalen) \ + { \ + unsigned long prealign, aligned, remaining; \ + DECLARE_KERNEL_FPU_ONSTACK16(vxstate); \ + \ + if (datalen < VX_MIN_LEN + VX_ALIGN_MASK || \ + !static_branch_likely(&have_vxrs)) \ + return ___crc32_sw(crc, data, datalen); \ + \ + if ((unsigned long)data & VX_ALIGN_MASK) { \ + prealign =3D VX_ALIGNMENT - \ + ((unsigned long)data & VX_ALIGN_MASK); \ + datalen -=3D prealign; \ + crc =3D ___crc32_sw(crc, data, prealign); \ + data =3D (void *)((unsigned long)data + prealign); \ + } \ + \ + aligned =3D datalen & ~VX_ALIGN_MASK; \ + remaining =3D datalen & VX_ALIGN_MASK; \ + \ + kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW); \ + crc =3D ___crc32_vx(crc, data, aligned); \ + kernel_fpu_end(&vxstate, KERNEL_VXR_LOW); \ + \ + if (remaining) \ + crc =3D ___crc32_sw(crc, data + aligned, remaining); \ + \ + return crc; \ + } \ + EXPORT_SYMBOL(___fname); + +DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base) +DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base) +DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base) + +static int __init crc32_s390_init(void) +{ + if (cpu_have_feature(S390_CPU_FEATURE_VXRS)) + static_branch_enable(&have_vxrs); + return 0; +} +arch_initcall(crc32_s390_init); + +static void __exit crc32_s390_exit(void) +{ +} +module_exit(crc32_s390_exit); + +MODULE_AUTHOR("Hendrik Brueckner "); +MODULE_DESCRIPTION("CRC-32 algorithms using z/Architecture Vector Extensio= n Facility"); +MODULE_LICENSE("GPL"); diff --git a/arch/s390/crypto/crc32-vx.h b/arch/s390/lib/crc32-vx.h similarity index 100% rename from arch/s390/crypto/crc32-vx.h rename to arch/s390/lib/crc32-vx.h diff --git a/arch/s390/crypto/crc32be-vx.c b/arch/s390/lib/crc32be-vx.c similarity index 100% rename from arch/s390/crypto/crc32be-vx.c rename to arch/s390/lib/crc32be-vx.c diff --git a/arch/s390/crypto/crc32le-vx.c b/arch/s390/lib/crc32le-vx.c similarity index 100% rename from arch/s390/crypto/crc32le-vx.c rename to arch/s390/lib/crc32le-vx.c --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id DD8A873451; Mon, 21 Oct 2024 00:29:55 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470596; cv=none; b=n6XOXIuh/51dJhFjQwlVXjvSZdl+Emxf0Day2QAMZA92esORRbs0t+VANMUYvOGYjTklgcHrAxtpSvod7wQgQrTzIG4CvrcFgT5Mp1HYvd/WftjnPZqSU5iod1UKSGF7Ttwsy3Mi+UmbHOK1YoND5rRc1L0i0IrouNGY2gfFIiw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470596; c=relaxed/simple; bh=rHVPCVk2HcUuwbwJdIJiYtVAk/DIw6qPFcDDLFcj+/s=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=TPRp+IuV+YQNzDRLuiWyPC7LO1F8z6c2qmmqXW0jN8piEa5e/TXYCUHGj2x/cClrAJzzJoVakfMNh1hklTR05NPWhru7oDLGDYsg5ucelgL8l7wsLSsR0dIss78wvWQvsImUN+4Iix/+60bP7T4ht4fZd0kr3k3rXxtKaXi/n14= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=oimnqgwb; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="oimnqgwb" Received: by smtp.kernel.org (Postfix) with ESMTPSA id C2F51C4AF52; Mon, 21 Oct 2024 00:29:54 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470595; bh=rHVPCVk2HcUuwbwJdIJiYtVAk/DIw6qPFcDDLFcj+/s=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=oimnqgwblTl4P7ykphakIn/6ohauWWoNYpiucv9VSHcP0rxQt12ClLANt9RUZgGBo ulCKpFBa0z+FKjaRCSOXh8a+aT/TqjBgKHn+2oSmKkLLTtzFV00fbjWic8J9HWHpCn Hc7lOAEAsQn4BT4Lnp7X3PmY722dqJAbqNvRL6cWPHWlpWxpJ/L3XD9Tt7x+q7xX6x PbvKTmcU3kG6rd9F+gdF7duMpk//vuhib47IxODYjKSQFWiC6C62x5GEZ0robLZxGQ 0QCNJvjmTGKo66OG3GQkrQeM6d86VWl4TLI7m9sMEv0sBxCZoLZSelcpwoIYbtotpm VEJTU43g2h2FA== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 08/15] sparc/crc32: expose CRC32 functions through lib Date: Sun, 20 Oct 2024 17:29:28 -0700 Message-ID: <20241021002935.325878-9-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the sparc CRC32C assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Note: to see the diff from arch/sparc/crypto/crc32c_glue.c to arch/sparc/lib/crc32_glue.c, view this commit with 'git show -M10'. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/sparc/Kconfig | 1 + arch/sparc/crypto/Kconfig | 10 -- arch/sparc/crypto/Makefile | 4 - arch/sparc/crypto/crc32c_glue.c | 184 ------------------------ arch/sparc/lib/Makefile | 2 + arch/sparc/lib/crc32_glue.c | 85 +++++++++++ arch/sparc/{crypto =3D> lib}/crc32c_asm.S | 2 +- 7 files changed, 89 insertions(+), 199 deletions(-) delete mode 100644 arch/sparc/crypto/crc32c_glue.c create mode 100644 arch/sparc/lib/crc32_glue.c rename arch/sparc/{crypto =3D> lib}/crc32c_asm.S (92%) diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index dcfdb7f1dae97..0f88123925a4f 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -108,10 +108,11 @@ config SPARC64 select ARCH_HAS_GIGANTIC_PAGE select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_SETUP_PER_CPU_AREA select NEED_PER_CPU_EMBED_FIRST_CHUNK select NEED_PER_CPU_PAGE_FIRST_CHUNK + select ARCH_HAS_CRC32 =20 config ARCH_PROC_KCORE_TEXT def_bool y =20 config CPU_BIG_ENDIAN diff --git a/arch/sparc/crypto/Kconfig b/arch/sparc/crypto/Kconfig index cfe5102b1c683..e858597de89db 100644 --- a/arch/sparc/crypto/Kconfig +++ b/arch/sparc/crypto/Kconfig @@ -14,20 +14,10 @@ config CRYPTO_DES_SPARC64 Length-preserving ciphers: DES with ECB and CBC modes Length-preserving ciphers: Tripe DES EDE with ECB and CBC modes =20 Architecture: sparc64 =20 -config CRYPTO_CRC32C_SPARC64 - tristate "CRC32c" - depends on SPARC64 - select CRYPTO_HASH - select CRC32 - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - - Architecture: sparc64 - config CRYPTO_MD5_SPARC64 tristate "Digests: MD5" depends on SPARC64 select CRYPTO_MD5 select CRYPTO_HASH diff --git a/arch/sparc/crypto/Makefile b/arch/sparc/crypto/Makefile index d257186c27d12..a2d7fca40cb4b 100644 --- a/arch/sparc/crypto/Makefile +++ b/arch/sparc/crypto/Makefile @@ -10,17 +10,13 @@ obj-$(CONFIG_CRYPTO_MD5_SPARC64) +=3D md5-sparc64.o =20 obj-$(CONFIG_CRYPTO_AES_SPARC64) +=3D aes-sparc64.o obj-$(CONFIG_CRYPTO_DES_SPARC64) +=3D des-sparc64.o obj-$(CONFIG_CRYPTO_CAMELLIA_SPARC64) +=3D camellia-sparc64.o =20 -obj-$(CONFIG_CRYPTO_CRC32C_SPARC64) +=3D crc32c-sparc64.o - sha1-sparc64-y :=3D sha1_asm.o sha1_glue.o sha256-sparc64-y :=3D sha256_asm.o sha256_glue.o sha512-sparc64-y :=3D sha512_asm.o sha512_glue.o md5-sparc64-y :=3D md5_asm.o md5_glue.o =20 aes-sparc64-y :=3D aes_asm.o aes_glue.o des-sparc64-y :=3D des_asm.o des_glue.o camellia-sparc64-y :=3D camellia_asm.o camellia_glue.o - -crc32c-sparc64-y :=3D crc32c_asm.o crc32c_glue.o diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glu= e.c deleted file mode 100644 index 913b9a09e885d..0000000000000 --- a/arch/sparc/crypto/crc32c_glue.c +++ /dev/null @@ -1,184 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* Glue code for CRC32C optimized for sparc64 crypto opcodes. - * - * This is based largely upon arch/x86/crypto/crc32c-intel.c - * - * Copyright (C) 2008 Intel Corporation - * Authors: Austin Zhang - * Kent Liu - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include - -#include - -#include -#include -#include - -#include "opcodes.h" - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int crc32c_sparc64_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx =3D crypto_shash_ctx(hash); - - if (keylen !=3D sizeof(u32)) - return -EINVAL; - *mctx =3D get_unaligned_le32(key); - return 0; -} - -static int crc32c_sparc64_init(struct shash_desc *desc) -{ - u32 *mctx =3D crypto_shash_ctx(desc->tfm); - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D *mctx; - - return 0; -} - -extern void crc32c_sparc64(u32 *crcp, const u64 *data, unsigned int len); - -static u32 crc32c_compute(u32 crc, const u8 *data, unsigned int len) -{ - unsigned int n =3D -(uintptr_t)data & 7; - - if (n) { - /* Data isn't 8-byte aligned. Align it. */ - n =3D min(n, len); - crc =3D __crc32c_le(crc, data, n); - data +=3D n; - len -=3D n; - } - n =3D len & ~7U; - if (n) { - crc32c_sparc64(&crc, (const u64 *)data, n); - data +=3D n; - len -=3D n; - } - if (len) - crc =3D __crc32c_le(crc, data, len); - return crc; -} - -static int crc32c_sparc64_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D crc32c_compute(*crcp, data, len); - return 0; -} - -static int __crc32c_sparc64_finup(const u32 *crcp, const u8 *data, - unsigned int len, u8 *out) -{ - put_unaligned_le32(~crc32c_compute(*crcp, data, len), out); - return 0; -} - -static int crc32c_sparc64_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_sparc64_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_sparc64_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - put_unaligned_le32(~*crcp, out); - return 0; -} - -static int crc32c_sparc64_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_sparc64_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static int crc32c_sparc64_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D ~0; - - return 0; -} - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -static struct shash_alg alg =3D { - .setkey =3D crc32c_sparc64_setkey, - .init =3D crc32c_sparc64_init, - .update =3D crc32c_sparc64_update, - .final =3D crc32c_sparc64_final, - .finup =3D crc32c_sparc64_finup, - .digest =3D crc32c_sparc64_digest, - .descsize =3D sizeof(u32), - .digestsize =3D CHKSUM_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-sparc64", - .cra_priority =3D SPARC_CR_OPCODE_PRIORITY, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(u32), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32c_sparc64_cra_init, - } -}; - -static bool __init sparc64_has_crc32c_opcode(void) -{ - unsigned long cfr; - - if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) - return false; - - __asm__ __volatile__("rd %%asr26, %0" : "=3Dr" (cfr)); - if (!(cfr & CFR_CRC32C)) - return false; - - return true; -} - -static int __init crc32c_sparc64_mod_init(void) -{ - if (sparc64_has_crc32c_opcode()) { - pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); - return crypto_register_shash(&alg); - } - pr_info("sparc64 crc32c opcode not available.\n"); - return -ENODEV; -} - -static void __exit crc32c_sparc64_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crc32c_sparc64_mod_init); -module_exit(crc32c_sparc64_mod_fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated= "); - -MODULE_ALIAS_CRYPTO("crc32c"); - -#include "crop_devid.c" diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index ee5091dd67ed7..5724d0f356eb5 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -51,5 +51,7 @@ lib-$(CONFIG_SPARC64) +=3D copy_in_user.o memmove.o lib-$(CONFIG_SPARC64) +=3D mcount.o ipcsum.o xor.o hweight.o ffs.o =20 obj-$(CONFIG_SPARC64) +=3D iomap.o obj-$(CONFIG_SPARC32) +=3D atomic32.o obj-$(CONFIG_SPARC64) +=3D PeeCeeI.o +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-sparc.o +crc32-sparc-y :=3D crc32_glue.o crc32c_asm.o diff --git a/arch/sparc/lib/crc32_glue.c b/arch/sparc/lib/crc32_glue.c new file mode 100644 index 0000000000000..ef2eadf4a303e --- /dev/null +++ b/arch/sparc/lib/crc32_glue.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Glue code for CRC32C optimized for sparc64 crypto opcodes. + * + * This is based largely upon arch/x86/crypto/crc32c-intel.c + * + * Copyright (C) 2008 Intel Corporation + * Authors: Austin Zhang + * Kent Liu + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include + +static DEFINE_STATIC_KEY_FALSE(have_crc32c_opcode); + +u32 crc32_le_arch(u32 crc, const u8 *data, size_t len) +{ + return crc32_le_base(crc, data, len); +} +EXPORT_SYMBOL(crc32_le_arch); + +void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len); + +u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len) +{ + size_t n =3D -(uintptr_t)data & 7; + + if (!static_branch_likely(&have_crc32c_opcode)) + return crc32c_le_base(crc, data, len); + + if (n) { + /* Data isn't 8-byte aligned. Align it. */ + n =3D min(n, len); + crc =3D crc32c_le_base(crc, data, n); + data +=3D n; + len -=3D n; + } + n =3D len & ~7U; + if (n) { + crc32c_sparc64(&crc, (const u64 *)data, n); + data +=3D n; + len -=3D n; + } + if (len) + crc =3D crc32c_le_base(crc, data, len); + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *data, size_t len) +{ + return crc32_be_base(crc, data, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_sparc_init(void) +{ + unsigned long cfr; + + if (!(sparc64_elf_hwcap & HWCAP_SPARC_CRYPTO)) + return 0; + + __asm__ __volatile__("rd %%asr26, %0" : "=3Dr" (cfr)); + if (!(cfr & CFR_CRC32C)) + return 0; + + static_branch_enable(&have_crc32c_opcode); + pr_info("Using sparc64 crc32c opcode optimized CRC32C implementation\n"); + return 0; +} +arch_initcall(crc32_sparc_init); + +static void __exit crc32_sparc_exit(void) +{ +} +module_exit(crc32_sparc_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated= "); diff --git a/arch/sparc/crypto/crc32c_asm.S b/arch/sparc/lib/crc32c_asm.S similarity index 92% rename from arch/sparc/crypto/crc32c_asm.S rename to arch/sparc/lib/crc32c_asm.S index b8659a479242d..ee454fa6aed68 100644 --- a/arch/sparc/crypto/crc32c_asm.S +++ b/arch/sparc/lib/crc32c_asm.S @@ -1,11 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include #include #include =20 -#include "opcodes.h" +#include "../crypto/opcodes.h" =20 ENTRY(crc32c_sparc64) /* %o0=3Dcrc32p, %o1=3Ddata_ptr, %o2=3Dlen */ VISEntryHalf lda [%o0] ASI_PL, %f1 --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 28A3878C8B; Mon, 21 Oct 2024 00:29:55 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470596; cv=none; b=AbOmW3w8fwi26Qfwn0D0UvCvN6akyuHoltsllsr7uCu1uZDGYQqmSNSi0MzC/IsJ9ubgCSuOHzzh1YzkgoosHg2Rr2ZwlGH9ubd8qEdD7ylNN6fFApQROM7SjIyDZF//VamRysDzPNSLReQhsHHJjlgYXOfm8s1wiOHFAsRn770= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470596; c=relaxed/simple; bh=K7IQXSlseSFWY09dP+sFW8amqlu8VZ4K90+uvPMkb5k=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=EGguSTuY87IQri9LfCZdsNp8R/qJJCvxy7XEOWdangu9JpkXsZpPhfPRVtc0usVO82k3/lwq1b0kOKQS0DIqp/EAw/KW5XDNTKqdujuNAhCn3aq3MiAv/gWyPmJvwEKtZNHVHHzWaAnfCOBcFH4vuDLW3NXEhZPC5UZa8DAc3D8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=kRVIL6wx; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="kRVIL6wx" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 50170C4CEE8; Mon, 21 Oct 2024 00:29:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470595; bh=K7IQXSlseSFWY09dP+sFW8amqlu8VZ4K90+uvPMkb5k=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=kRVIL6wxe6/gpNnilSebEdNmVWWiuy+cK9AnqQvhtiwG7fs8Bfcm5sg/WegtI9I81 2MvpxWVvs4DLmyFb6a2pkKGpMaStFUAU7d5mqM+Sj7bEPIv0UNz6U5LJ/2PariLPce MaH4Fgd8c+PzE9ploabkvBDygZ/sF8wGOPBjSEGmSBR5/09u1Wi463R2RfCusvd9+2 SnQrzfpkQqY7Z133rLcCOnv37OZBsDp89nlGziSOwgvUETzilUKeqLc5M8AXjLS1aq ro0BFRM1A5U+wq+2rOQYIV2n8w2UZR5lEbcFXLMGmPB5lHFZHOADW1yVcPab4MzzAb p+rIKzHF8XgHQ== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 09/15] x86/crc32: update prototype for crc_pcl() Date: Sun, 20 Oct 2024 17:29:29 -0700 Message-ID: <20241021002935.325878-10-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers - Change the len parameter from unsigned int to size_t, so that the library function which takes a size_t can safely use this code. - Rename to crc32c_x86_3way() which is much clearer. - Move the crc parameter to the front, as this is the usual convention. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/x86/crypto/crc32c-intel_glue.c | 7 ++- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 63 ++++++++++++----------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-i= ntel_glue.c index 52c5d47ef5a14..603d159de4007 100644 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ b/arch/x86/crypto/crc32c-intel_glue.c @@ -39,12 +39,11 @@ * size is >=3D 512 to account * for fpu state save/restore overhead. */ #define CRC32C_PCL_BREAKEVEN 512 =20 -asmlinkage unsigned int crc_pcl(const u8 *buffer, unsigned int len, - unsigned int crc_init); +asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); #endif /* CONFIG_X86_64 */ =20 static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, siz= e_t length) { while (length--) { @@ -157,11 +156,11 @@ static int crc32c_pcl_intel_update(struct shash_desc = *desc, const u8 *data, * use faster PCL version if datasize is large enough to * overcome kernel fpu state save/restore overhead */ if (len >=3D CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { kernel_fpu_begin(); - *crcp =3D crc_pcl(data, len, *crcp); + *crcp =3D crc32c_x86_3way(*crcp, data, len); kernel_fpu_end(); } else *crcp =3D crc32c_intel_le_hw(*crcp, data, len); return 0; } @@ -169,11 +168,11 @@ static int crc32c_pcl_intel_update(struct shash_desc = *desc, const u8 *data, static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned in= t len, u8 *out) { if (len >=3D CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { kernel_fpu_begin(); - *(__le32 *)out =3D ~cpu_to_le32(crc_pcl(data, len, *crcp)); + *(__le32 *)out =3D ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len)); kernel_fpu_end(); } else *(__le32 *)out =3D ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); return 0; diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/cr= c32c-pcl-intel-asm_64.S index 752812bc4991d..9b8770503bbcd 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -50,19 +50,20 @@ =20 # Define threshold below which buffers are considered "small" and routed to # regular CRC code that does not interleave the CRC instructions. #define SMALL_SIZE 200 =20 -# unsigned int crc_pcl(const u8 *buffer, unsigned int len, unsigned int cr= c_init); +# u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); =20 .text -SYM_FUNC_START(crc_pcl) -#define bufp %rdi -#define bufp_d %edi -#define len %esi -#define crc_init %edx -#define crc_init_q %rdx +SYM_FUNC_START(crc32c_x86_3way) +#define crc0 %edi +#define crc0_q %rdi +#define bufp %rsi +#define bufp_d %esi +#define len %rdx +#define len_dw %edx #define n_misaligned %ecx /* overlaps chunk_bytes! */ #define n_misaligned_q %rcx #define chunk_bytes %ecx /* overlaps n_misaligned! */ #define chunk_bytes_q %rcx #define crc1 %r8 @@ -83,13 +84,13 @@ SYM_FUNC_START(crc_pcl) # Process 1 <=3D n_misaligned <=3D 7 bytes individually in order to align # the remaining data to an 8-byte boundary. .Ldo_align: movq (bufp), %rax add n_misaligned_q, bufp - sub n_misaligned, len + sub n_misaligned_q, len .Lalign_loop: - crc32b %al, crc_init # compute crc32 of 1-byte + crc32b %al, crc0 # compute crc32 of 1-byte shr $8, %rax # get next byte dec n_misaligned jne .Lalign_loop .Laligned: =20 @@ -100,11 +101,11 @@ SYM_FUNC_START(crc_pcl) cmp $128*24, len jae .Lfull_block =20 .Lpartial_block: # Compute floor(len / 24) to get num qwords to process from each lane. - imul $2731, len, %eax # 2731 =3D ceil(2^16 / 24) + imul $2731, len_dw, %eax # 2731 =3D ceil(2^16 / 24) shr $16, %eax jmp .Lcrc_3lanes =20 .Lfull_block: # Processing 128 qwords from each lane. @@ -123,20 +124,20 @@ SYM_FUNC_START(crc_pcl) jl .Lcrc_3lanes_4x_done =20 # Unroll the loop by a factor of 4 to reduce the overhead of the loop # bookkeeping instructions, which can compete with crc32q for the ALUs. .Lcrc_3lanes_4x_loop: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q crc32q (bufp,chunk_bytes_q), crc1 crc32q (bufp,chunk_bytes_q,2), crc2 - crc32q 8(bufp), crc_init_q + crc32q 8(bufp), crc0_q crc32q 8(bufp,chunk_bytes_q), crc1 crc32q 8(bufp,chunk_bytes_q,2), crc2 - crc32q 16(bufp), crc_init_q + crc32q 16(bufp), crc0_q crc32q 16(bufp,chunk_bytes_q), crc1 crc32q 16(bufp,chunk_bytes_q,2), crc2 - crc32q 24(bufp), crc_init_q + crc32q 24(bufp), crc0_q crc32q 24(bufp,chunk_bytes_q), crc1 crc32q 24(bufp,chunk_bytes_q,2), crc2 add $32, bufp sub $4, %eax jge .Lcrc_3lanes_4x_loop @@ -144,42 +145,42 @@ SYM_FUNC_START(crc_pcl) .Lcrc_3lanes_4x_done: add $4, %eax jz .Lcrc_3lanes_last_qword =20 .Lcrc_3lanes_1x_loop: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q crc32q (bufp,chunk_bytes_q), crc1 crc32q (bufp,chunk_bytes_q,2), crc2 add $8, bufp dec %eax jnz .Lcrc_3lanes_1x_loop =20 .Lcrc_3lanes_last_qword: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q crc32q (bufp,chunk_bytes_q), crc1 # SKIP crc32q (bufp,chunk_bytes_q,2), crc2 ; Don't do this one yet =20 ################################################################ ## 4) Combine three results: ################################################################ =20 lea (K_table-8)(%rip), %rax # first entry is for idx 1 pmovzxdq (%rax,chunk_bytes_q), %xmm0 # 2 consts: K1:K2 lea (chunk_bytes,chunk_bytes,2), %eax # chunk_bytes * 3 - sub %eax, len # len -=3D chunk_bytes * 3 + sub %rax, len # len -=3D chunk_bytes * 3 =20 - movq crc_init_q, %xmm1 # CRC for block 1 + movq crc0_q, %xmm1 # CRC for block 1 pclmulqdq $0x00, %xmm0, %xmm1 # Multiply by K2 =20 movq crc1, %xmm2 # CRC for block 2 pclmulqdq $0x10, %xmm0, %xmm2 # Multiply by K1 =20 pxor %xmm2,%xmm1 movq %xmm1, %rax xor (bufp,chunk_bytes_q,2), %rax - mov crc2, crc_init_q - crc32 %rax, crc_init_q + mov crc2, crc0_q + crc32 %rax, crc0_q lea 8(bufp,chunk_bytes_q,2), bufp =20 ################################################################ ## 5) If more blocks remain, goto (2): ################################################################ @@ -191,38 +192,38 @@ SYM_FUNC_START(crc_pcl) =20 ####################################################################### ## 6) Process any remainder without interleaving: ####################################################################### .Lsmall: - test len, len + test len_dw, len_dw jz .Ldone - mov len, %eax + mov len_dw, %eax shr $3, %eax jz .Ldo_dword .Ldo_qwords: - crc32q (bufp), crc_init_q + crc32q (bufp), crc0_q add $8, bufp dec %eax jnz .Ldo_qwords .Ldo_dword: - test $4, len + test $4, len_dw jz .Ldo_word - crc32l (bufp), crc_init + crc32l (bufp), crc0 add $4, bufp .Ldo_word: - test $2, len + test $2, len_dw jz .Ldo_byte - crc32w (bufp), crc_init + crc32w (bufp), crc0 add $2, bufp .Ldo_byte: - test $1, len + test $1, len_dw jz .Ldone - crc32b (bufp), crc_init + crc32b (bufp), crc0 .Ldone: - mov crc_init, %eax + mov crc0, %eax RET -SYM_FUNC_END(crc_pcl) +SYM_FUNC_END(crc32c_x86_3way) =20 .section .rodata, "a", @progbits ################################################################ ## PCLMULQDQ tables ## Table is 128 entries x 2 words (8 bytes) each --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 60C0081AC6; Mon, 21 Oct 2024 00:29:56 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470596; cv=none; b=eYYy1fC/qGwjJeJLhy22p2de6f1/m8N2IF0pai9qvlWHBfXFRLMo8636WiBgIzPauoevN+glSe5cr/NWSUcOb6mHyrKmB/nsYESxin6KpS3UN4R2WpynSuVasv4DQnPu/1isETWzdk/onDDAsqx3d825MJ9Y2IfI35WMyqBY17k= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470596; c=relaxed/simple; bh=AME2Y9rGG1BidsZz5nyWy4uPywdMlCj30lctBBonXoo=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=mxuFS2ODQarBMopQS3bBkVJwH4g0p78v0VAG8S2QD0Fpq8LijwIdXWsTukpFP3DYKV9K5PvUvTEWC/U7FChK3A/VEvmSMYQAM3giPfee6xSwdNHSGZqONXWUzcgXpQBZ0mdX0x2XPN3SG3zkR3mcZjmpAW9CDYIhQqujdRgAP1Y= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=GZ1FeJTr; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="GZ1FeJTr" Received: by smtp.kernel.org (Postfix) with ESMTPSA id D2FCCC4CEF6; Mon, 21 Oct 2024 00:29:55 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470596; bh=AME2Y9rGG1BidsZz5nyWy4uPywdMlCj30lctBBonXoo=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=GZ1FeJTr79DIGfhSNnnohZQidrpLUDVGtJGQtNO2L6b37sY4w4ee+R5ExaFPh8kvd 6D8uuQHeMOi4LE1rrir8M2SOyLv9IS+w6dNvRPpiCNolOmTIPDYpA0/EjR99DxqGly A1Gtyrzusjzd6qlJj69X2P92pZsd9IlpdaCUNpsNlIEYMpuETvOpGcllZLICy7Jshf 5iPmCj7NhIC1t8UGXPS5Vkc3WlhqBhjSu8MlCHnHFn7c2DbWJGZKCyvbLACn8/18B1 R0mKDvJ5oaKqIlp7l9EK5yI9CMa5fTRTG78L3kKpF5+1zNhwmAZ0mYnicgFWQkhpuD yF/OK73SRunbA== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 10/15] x86/crc32: update prototype for crc32_pclmul_le_16() Date: Sun, 20 Oct 2024 17:29:30 -0700 Message-ID: <20241021002935.325878-11-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers - Change the len parameter from unsigned int to size_t, so that the library function which takes a size_t can safely use this code. - Move the crc parameter to the front, as this is the usual convention. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/x86/crypto/crc32-pclmul_asm.S | 19 +++++++++---------- arch/x86/crypto/crc32-pclmul_glue.c | 4 ++-- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/crypto/crc32-pcl= mul_asm.S index 5d31137e2c7df..f9637789cac19 100644 --- a/arch/x86/crypto/crc32-pclmul_asm.S +++ b/arch/x86/crypto/crc32-pclmul_asm.S @@ -56,30 +56,29 @@ .octa 0x00000001F701164100000001DB710641 =20 #define CONSTANT %xmm0 =20 #ifdef __x86_64__ -#define BUF %rdi -#define LEN %rsi -#define CRC %edx +#define CRC %edi +#define BUF %rsi +#define LEN %rdx #else -#define BUF %eax -#define LEN %edx -#define CRC %ecx +#define CRC %eax +#define BUF %edx +#define LEN %ecx #endif =20 =20 =20 .text /** * Calculate crc32 - * BUF - buffer (16 bytes aligned) - * LEN - sizeof buffer (16 bytes aligned), LEN should be grater than = 63 * CRC - initial crc32 + * BUF - buffer (16 bytes aligned) + * LEN - sizeof buffer (16 bytes aligned), LEN should be greater than= 63 * return %eax crc32 - * uint crc32_pclmul_le_16(unsigned char const *buffer, - * size_t len, uint crc32) + * u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); */ =20 SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes = aligned */ movdqa (BUF), %xmm1 movdqa 0x10(BUF), %xmm2 diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pc= lmul_glue.c index 9f5e342b9845d..9d14eac51c5bb 100644 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ b/arch/x86/crypto/crc32-pclmul_glue.c @@ -44,11 +44,11 @@ #define PCLMUL_MIN_LEN 64L /* minimum size of buffer * for crc32_pclmul_le_16 */ #define SCALE_F 16L /* size of xmm register */ #define SCALE_F_MASK (SCALE_F - 1) =20 -u32 crc32_pclmul_le_16(unsigned char const *buffer, size_t len, u32 crc32); +u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); =20 static u32 __attribute__((pure)) crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) { unsigned int iquotient; @@ -69,11 +69,11 @@ static u32 __attribute__((pure)) } iquotient =3D len & (~SCALE_F_MASK); iremainder =3D len & SCALE_F_MASK; =20 kernel_fpu_begin(); - crc =3D crc32_pclmul_le_16(p, iquotient, crc); + crc =3D crc32_pclmul_le_16(crc, p, iquotient); kernel_fpu_end(); =20 if (iremainder) crc =3D crc32_le(crc, p + iquotient, iremainder); =20 --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 46765129E9C; Mon, 21 Oct 2024 00:29:56 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470597; cv=none; b=Xprvsh+d5SJ6mKouI/iQpTCKjxNu04kgps67t3hla4+8dzAqVjNk66eMQkV272Uqs/P8cfPtXeX3JBo2PRVFwTVT1uZojpiY6hHUkqFjTlZi6Csr9OVZG2wKArFCMzwX06EUdhaZO7N+NhTE64Adj/cSwH7dLYSqIdxPjWsNRyg= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470597; c=relaxed/simple; bh=DCWGkRYVa149O7Z5ne9ET9M9L94IPJdLXavc0rOVTo4=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=VcDaJkxJA75bHAxD/Vg1fiXTIQpMt7+WV/26elIJbm9/Xg+6/p8HvTAhrnVGANMWxgyNQ0qy5BWl/Q3X1nlkV9Q1KgQ+e54JmrMbq1whZKpXVxKTB+jeE6TgyxCio433Z89Zk46hF+Tc1sXLwAZdrbitF9p18OmnTD68eJsmRsk= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=O2Q1mFzP; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="O2Q1mFzP" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 61BB5C4CEFE; Mon, 21 Oct 2024 00:29:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470596; bh=DCWGkRYVa149O7Z5ne9ET9M9L94IPJdLXavc0rOVTo4=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=O2Q1mFzPmv+HTnHJnLVETI5laj9xOILEJTgPrrH6hAZcwYpdB847cjQjJ4SuktaDw PyxiEQP6CSksPv6WNj7TEGG5dc5nHZZvLm5+GONqs/r6pUmqPYenHECn16DzT+tsQE ASegu6NNrz86q1YL3SYIoeYBsGtIRMcPhOlEqPnb2Dk0iARdkm5+3GHIgSEnOZM8CY fcGyS+Vvq88wTa+06Gv5Wx0tL2t2BKTCT/vh7/xWPgIc0RGSe+vbP+v6TmV42LbiYL 0v+ZW3FAbUlYVZMpa/sxtqTBFN1A2LwQet5LRGurJzouYDQ1Cce1cRcq8lZ6+117CP LWhb8zYclYvLg== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 11/15] x86/crc32: expose CRC32 functions through lib Date: Sun, 20 Oct 2024 17:29:31 -0700 Message-ID: <20241021002935.325878-12-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Move the x86 CRC32 assembly code into the lib directory and wire it up to the library interface. This allows it to be used without going through the crypto API. It remains usable via the crypto API too via the shash algorithms that use the library interface. Thus all the arch-specific "shash" code becomes unnecessary and is removed. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- arch/x86/Kconfig | 1 + arch/x86/crypto/Kconfig | 22 -- arch/x86/crypto/Makefile | 7 - arch/x86/crypto/crc32-pclmul_glue.c | 202 -------------- arch/x86/crypto/crc32c-intel_glue.c | 249 ------------------ arch/x86/lib/Makefile | 4 + arch/x86/lib/crc32-glue.c | 112 ++++++++ .../crc32-pclmul_asm.S =3D> lib/crc32-pclmul.S} | 0 .../crc32c-3way.S} | 0 drivers/target/iscsi/Kconfig | 1 - 10 files changed, 117 insertions(+), 481 deletions(-) delete mode 100644 arch/x86/crypto/crc32-pclmul_glue.c delete mode 100644 arch/x86/crypto/crc32c-intel_glue.c create mode 100644 arch/x86/lib/crc32-glue.c rename arch/x86/{crypto/crc32-pclmul_asm.S =3D> lib/crc32-pclmul.S} (100%) rename arch/x86/{crypto/crc32c-pcl-intel-asm_64.S =3D> lib/crc32c-3way.S} = (100%) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2852fcd82cbd8..a7a1a1448d237 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -74,10 +74,11 @@ config X86 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CACHE_LINE_SIZE select ARCH_HAS_CPU_CACHE_INVALIDATE_MEMREGION select ARCH_HAS_CPU_FINALIZE_INIT select ARCH_HAS_CPU_PASID if IOMMU_SVA + select ARCH_HAS_CRC32 select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN diff --git a/arch/x86/crypto/Kconfig b/arch/x86/crypto/Kconfig index 7b1bebed879df..1ca53e847966a 100644 --- a/arch/x86/crypto/Kconfig +++ b/arch/x86/crypto/Kconfig @@ -490,32 +490,10 @@ config CRYPTO_GHASH_CLMUL_NI_INTEL GCM GHASH hash function (NIST SP800-38D) =20 Architecture: x86_64 using: - CLMUL-NI (carry-less multiplication new instructions) =20 -config CRYPTO_CRC32C_INTEL - tristate "CRC32c (SSE4.2/PCLMULQDQ)" - depends on X86 - select CRYPTO_HASH - help - CRC32c CRC algorithm with the iSCSI polynomial (RFC 3385 and RFC 3720) - - Architecture: x86 (32-bit and 64-bit) using: - - SSE4.2 (Streaming SIMD Extensions 4.2) CRC32 instruction - - PCLMULQDQ (carry-less multiplication) - -config CRYPTO_CRC32_PCLMUL - tristate "CRC32 (PCLMULQDQ)" - depends on X86 - select CRYPTO_HASH - select CRC32 - help - CRC32 CRC algorithm (IEEE 802.3) - - Architecture: x86 (32-bit and 64-bit) using: - - PCLMULQDQ (carry-less multiplication) - config CRYPTO_CRCT10DIF_PCLMUL tristate "CRCT10DIF (PCLMULQDQ)" depends on X86 && 64BIT && CRC_T10DIF select CRYPTO_HASH help diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 53b4a277809e0..030b925ca4e28 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -73,17 +73,10 @@ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) +=3D ghash-cl= mulni-intel.o ghash-clmulni-intel-y :=3D ghash-clmulni-intel_asm.o ghash-clmulni-intel_g= lue.o =20 obj-$(CONFIG_CRYPTO_POLYVAL_CLMUL_NI) +=3D polyval-clmulni.o polyval-clmulni-y :=3D polyval-clmulni_asm.o polyval-clmulni_glue.o =20 -obj-$(CONFIG_CRYPTO_CRC32C_INTEL) +=3D crc32c-intel.o -crc32c-intel-y :=3D crc32c-intel_glue.o -crc32c-intel-$(CONFIG_64BIT) +=3D crc32c-pcl-intel-asm_64.o - -obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) +=3D crc32-pclmul.o -crc32-pclmul-y :=3D crc32-pclmul_asm.o crc32-pclmul_glue.o - obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) +=3D crct10dif-pclmul.o crct10dif-pclmul-y :=3D crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o =20 obj-$(CONFIG_CRYPTO_POLY1305_X86_64) +=3D poly1305-x86_64.o poly1305-x86_64-y :=3D poly1305-x86_64-cryptogams.o poly1305_glue.o diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pc= lmul_glue.c deleted file mode 100644 index 9d14eac51c5bb..0000000000000 --- a/arch/x86/crypto/crc32-pclmul_glue.c +++ /dev/null @@ -1,202 +0,0 @@ -/* GPL HEADER START - * - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 only, - * as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License version 2 for more details (a copy is included - * in the LICENSE file that accompanied this code). - * - * You should have received a copy of the GNU General Public License - * version 2 along with this program; If not, see http://www.gnu.org/licen= ses - * - * Please visit http://www.xyratex.com/contact if you need additional - * information or have any questions. - * - * GPL HEADER END - */ - -/* - * Copyright 2012 Xyratex Technology Limited - * - * Wrappers for kernel crypto shash api to pclmulqdq crc32 implementation. - */ -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -#define PCLMUL_MIN_LEN 64L /* minimum size of buffer - * for crc32_pclmul_le_16 */ -#define SCALE_F 16L /* size of xmm register */ -#define SCALE_F_MASK (SCALE_F - 1) - -u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); - -static u32 __attribute__((pure)) - crc32_pclmul_le(u32 crc, unsigned char const *p, size_t len) -{ - unsigned int iquotient; - unsigned int iremainder; - unsigned int prealign; - - if (len < PCLMUL_MIN_LEN + SCALE_F_MASK || !crypto_simd_usable()) - return crc32_le(crc, p, len); - - if ((long)p & SCALE_F_MASK) { - /* align p to 16 byte */ - prealign =3D SCALE_F - ((long)p & SCALE_F_MASK); - - crc =3D crc32_le(crc, p, prealign); - len -=3D prealign; - p =3D (unsigned char *)(((unsigned long)p + SCALE_F_MASK) & - ~SCALE_F_MASK); - } - iquotient =3D len & (~SCALE_F_MASK); - iremainder =3D len & SCALE_F_MASK; - - kernel_fpu_begin(); - crc =3D crc32_pclmul_le_16(crc, p, iquotient); - kernel_fpu_end(); - - if (iremainder) - crc =3D crc32_le(crc, p + iquotient, iremainder); - - return crc; -} - -static int crc32_pclmul_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D 0; - - return 0; -} - -static int crc32_pclmul_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx =3D crypto_shash_ctx(hash); - - if (keylen !=3D sizeof(u32)) - return -EINVAL; - *mctx =3D le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32_pclmul_init(struct shash_desc *desc) -{ - u32 *mctx =3D crypto_shash_ctx(desc->tfm); - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D *mctx; - - return 0; -} - -static int crc32_pclmul_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D crc32_pclmul_le(*crcp, data, len); - return 0; -} - -/* No final XOR 0xFFFFFFFF, like crc32_le */ -static int __crc32_pclmul_finup(u32 *crcp, const u8 *data, unsigned int le= n, - u8 *out) -{ - *(__le32 *)out =3D cpu_to_le32(crc32_pclmul_le(*crcp, data, len)); - return 0; -} - -static int crc32_pclmul_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32_pclmul_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32_pclmul_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *(__le32 *)out =3D cpu_to_le32p(crcp); - return 0; -} - -static int crc32_pclmul_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32_pclmul_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static struct shash_alg alg =3D { - .setkey =3D crc32_pclmul_setkey, - .init =3D crc32_pclmul_init, - .update =3D crc32_pclmul_update, - .final =3D crc32_pclmul_final, - .finup =3D crc32_pclmul_finup, - .digest =3D crc32_pclmul_digest, - .descsize =3D sizeof(u32), - .digestsize =3D CHKSUM_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32", - .cra_driver_name =3D "crc32-pclmul", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(u32), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32_pclmul_cra_init, - } -}; - -static const struct x86_cpu_id crc32pclmul_cpu_id[] =3D { - X86_MATCH_FEATURE(X86_FEATURE_PCLMULQDQ, NULL), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, crc32pclmul_cpu_id); - - -static int __init crc32_pclmul_mod_init(void) -{ - - if (!x86_match_cpu(crc32pclmul_cpu_id)) { - pr_info("PCLMULQDQ-NI instructions are not detected.\n"); - return -ENODEV; - } - return crypto_register_shash(&alg); -} - -static void __exit crc32_pclmul_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crc32_pclmul_mod_init); -module_exit(crc32_pclmul_mod_fini); - -MODULE_AUTHOR("Alexander Boyko "); -MODULE_DESCRIPTION("CRC32 algorithm (IEEE 802.3) accelerated with PCLMULQD= Q"); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32"); -MODULE_ALIAS_CRYPTO("crc32-pclmul"); diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-i= ntel_glue.c deleted file mode 100644 index 603d159de4007..0000000000000 --- a/arch/x86/crypto/crc32c-intel_glue.c +++ /dev/null @@ -1,249 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Using hardware provided CRC32 instruction to accelerate the CRC32 dispo= sal. - * CRC32C polynomial:0x1EDC6F41(BE)/0x82F63B78(LE) - * CRC32 is a new instruction in Intel SSE4.2, the reference can be found = at: - * http://www.intel.com/products/processor/manuals/ - * Intel(R) 64 and IA-32 Architectures Software Developer's Manual - * Volume 2A: Instruction Set Reference, A-M - * - * Copyright (C) 2008 Intel Corporation - * Authors: Austin Zhang - * Kent Liu - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#define CHKSUM_BLOCK_SIZE 1 -#define CHKSUM_DIGEST_SIZE 4 - -#define SCALE_F sizeof(unsigned long) - -#ifdef CONFIG_X86_64 -#define CRC32_INST "crc32q %1, %q0" -#else -#define CRC32_INST "crc32l %1, %0" -#endif - -#ifdef CONFIG_X86_64 -/* - * use carryless multiply version of crc32c when buffer - * size is >=3D 512 to account - * for fpu state save/restore overhead. - */ -#define CRC32C_PCL_BREAKEVEN 512 - -asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); -#endif /* CONFIG_X86_64 */ - -static u32 crc32c_intel_le_hw_byte(u32 crc, unsigned char const *data, siz= e_t length) -{ - while (length--) { - asm("crc32b %1, %0" - : "+r" (crc) : "rm" (*data)); - data++; - } - - return crc; -} - -static u32 __pure crc32c_intel_le_hw(u32 crc, unsigned char const *p, size= _t len) -{ - unsigned int iquotient =3D len / SCALE_F; - unsigned int iremainder =3D len % SCALE_F; - unsigned long *ptmp =3D (unsigned long *)p; - - while (iquotient--) { - asm(CRC32_INST - : "+r" (crc) : "rm" (*ptmp)); - ptmp++; - } - - if (iremainder) - crc =3D crc32c_intel_le_hw_byte(crc, (unsigned char *)ptmp, - iremainder); - - return crc; -} - -/* - * Setting the seed allows arbitrary accumulators and flexible XOR policy - * If your algorithm starts with ~0, then XOR with ~0 before you set - * the seed. - */ -static int crc32c_intel_setkey(struct crypto_shash *hash, const u8 *key, - unsigned int keylen) -{ - u32 *mctx =3D crypto_shash_ctx(hash); - - if (keylen !=3D sizeof(u32)) - return -EINVAL; - *mctx =3D le32_to_cpup((__le32 *)key); - return 0; -} - -static int crc32c_intel_init(struct shash_desc *desc) -{ - u32 *mctx =3D crypto_shash_ctx(desc->tfm); - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D *mctx; - - return 0; -} - -static int crc32c_intel_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *crcp =3D crc32c_intel_le_hw(*crcp, data, len); - return 0; -} - -static int __crc32c_intel_finup(u32 *crcp, const u8 *data, unsigned int le= n, - u8 *out) -{ - *(__le32 *)out =3D ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); - return 0; -} - -static int crc32c_intel_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_intel_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_intel_final(struct shash_desc *desc, u8 *out) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - *(__le32 *)out =3D ~cpu_to_le32p(crcp); - return 0; -} - -static int crc32c_intel_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_intel_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} - -static int crc32c_intel_cra_init(struct crypto_tfm *tfm) -{ - u32 *key =3D crypto_tfm_ctx(tfm); - - *key =3D ~0; - - return 0; -} - -#ifdef CONFIG_X86_64 -static int crc32c_pcl_intel_update(struct shash_desc *desc, const u8 *data, - unsigned int len) -{ - u32 *crcp =3D shash_desc_ctx(desc); - - /* - * use faster PCL version if datasize is large enough to - * overcome kernel fpu state save/restore overhead - */ - if (len >=3D CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { - kernel_fpu_begin(); - *crcp =3D crc32c_x86_3way(*crcp, data, len); - kernel_fpu_end(); - } else - *crcp =3D crc32c_intel_le_hw(*crcp, data, len); - return 0; -} - -static int __crc32c_pcl_intel_finup(u32 *crcp, const u8 *data, unsigned in= t len, - u8 *out) -{ - if (len >=3D CRC32C_PCL_BREAKEVEN && crypto_simd_usable()) { - kernel_fpu_begin(); - *(__le32 *)out =3D ~cpu_to_le32(crc32c_x86_3way(*crcp, data, len)); - kernel_fpu_end(); - } else - *(__le32 *)out =3D - ~cpu_to_le32(crc32c_intel_le_hw(*crcp, data, len)); - return 0; -} - -static int crc32c_pcl_intel_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_pcl_intel_finup(shash_desc_ctx(desc), data, len, out); -} - -static int crc32c_pcl_intel_digest(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) -{ - return __crc32c_pcl_intel_finup(crypto_shash_ctx(desc->tfm), data, len, - out); -} -#endif /* CONFIG_X86_64 */ - -static struct shash_alg alg =3D { - .setkey =3D crc32c_intel_setkey, - .init =3D crc32c_intel_init, - .update =3D crc32c_intel_update, - .final =3D crc32c_intel_final, - .finup =3D crc32c_intel_finup, - .digest =3D crc32c_intel_digest, - .descsize =3D sizeof(u32), - .digestsize =3D CHKSUM_DIGEST_SIZE, - .base =3D { - .cra_name =3D "crc32c", - .cra_driver_name =3D "crc32c-intel", - .cra_priority =3D 200, - .cra_flags =3D CRYPTO_ALG_OPTIONAL_KEY, - .cra_blocksize =3D CHKSUM_BLOCK_SIZE, - .cra_ctxsize =3D sizeof(u32), - .cra_module =3D THIS_MODULE, - .cra_init =3D crc32c_intel_cra_init, - } -}; - -static const struct x86_cpu_id crc32c_cpu_id[] =3D { - X86_MATCH_FEATURE(X86_FEATURE_XMM4_2, NULL), - {} -}; -MODULE_DEVICE_TABLE(x86cpu, crc32c_cpu_id); - -static int __init crc32c_intel_mod_init(void) -{ - if (!x86_match_cpu(crc32c_cpu_id)) - return -ENODEV; -#ifdef CONFIG_X86_64 - if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) { - alg.update =3D crc32c_pcl_intel_update; - alg.finup =3D crc32c_pcl_intel_finup; - alg.digest =3D crc32c_pcl_intel_digest; - } -#endif - return crypto_register_shash(&alg); -} - -static void __exit crc32c_intel_mod_fini(void) -{ - crypto_unregister_shash(&alg); -} - -module_init(crc32c_intel_mod_init); -module_exit(crc32c_intel_mod_fini); - -MODULE_AUTHOR("Austin Zhang , Kent Liu "); -MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.= "); -MODULE_LICENSE("GPL"); - -MODULE_ALIAS_CRYPTO("crc32c"); -MODULE_ALIAS_CRYPTO("crc32c-intel"); diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 98583a9dbab33..17510da06c9f9 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -36,10 +36,14 @@ lib-$(CONFIG_ARCH_HAS_COPY_MC) +=3D copy_mc.o copy_mc_6= 4.o lib-$(CONFIG_INSTRUCTION_DECODER) +=3D insn.o inat.o insn-eval.o lib-$(CONFIG_RANDOMIZE_BASE) +=3D kaslr.o lib-$(CONFIG_FUNCTION_ERROR_INJECTION) +=3D error-inject.o lib-$(CONFIG_MITIGATION_RETPOLINE) +=3D retpoline.o =20 +obj-$(CONFIG_CRC32_ARCH) +=3D crc32-x86.o +crc32-x86-y :=3D crc32-glue.o crc32-pclmul.o +crc32-x86-$(CONFIG_64BIT) +=3D crc32c-3way.o + obj-y +=3D msr.o msr-reg.o msr-reg-export.o hweight.o obj-y +=3D iomem.o =20 ifeq ($(CONFIG_X86_32),y) obj-y +=3D atomic64_32.o diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c new file mode 100644 index 0000000000000..93d86f98cfe30 --- /dev/null +++ b/arch/x86/lib/crc32-glue.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * x86-optimized CRC32 functions + * + * Copyright (C) 2008 Intel Corporation + * Copyright 2012 Xyratex Technology Limited + * Copyright 2024 Google LLC + */ + +#include +#include +#include +#include +#include +#include + +/* minimum size of buffer for crc32_pclmul_le_16 */ +#define CRC32_PCLMUL_MIN_LEN 64 + +static DEFINE_STATIC_KEY_FALSE(have_crc32); +static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq); + +u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len); + +u32 crc32_le_arch(u32 crc, const u8 *p, size_t len) +{ + if (len >=3D CRC32_PCLMUL_MIN_LEN + 15 && + crypto_simd_usable() && static_branch_likely(&have_pclmulqdq)) { + size_t n =3D -(uintptr_t)p & 15; + + /* align p to 16-byte boundary */ + if (n) { + crc =3D crc32_le_base(crc, p, n); + p +=3D n; + len -=3D n; + } + n =3D round_down(len, 16); + kernel_fpu_begin(); + crc =3D crc32_pclmul_le_16(crc, p, n); + kernel_fpu_end(); + p +=3D n; + len -=3D n; + } + if (len) + crc =3D crc32_le_base(crc, p, len); + return crc; +} +EXPORT_SYMBOL(crc32_le_arch); + +#ifdef CONFIG_X86_64 +#define CRC32_INST "crc32q %1, %q0" +#else +#define CRC32_INST "crc32l %1, %0" +#endif + +/* + * Use carryless multiply version of crc32c when buffer size is >=3D 512 to + * account for FPU state save/restore overhead. + */ +#define CRC32C_PCLMUL_BREAKEVEN 512 + +asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len); + +u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len) +{ + size_t num_longs; + + if (!static_branch_likely(&have_crc32)) + return crc32c_le_base(crc, p, len); + + if (IS_ENABLED(CONFIG_X86_64) && len >=3D CRC32C_PCLMUL_BREAKEVEN && + crypto_simd_usable() && static_branch_likely(&have_pclmulqdq)) { + kernel_fpu_begin(); + crc =3D crc32c_x86_3way(crc, p, len); + kernel_fpu_end(); + return crc; + } + + for (num_longs =3D len / sizeof(unsigned long); + num_longs !=3D 0; num_longs--, p +=3D sizeof(unsigned long)) + asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p)); + + for (len %=3D sizeof(unsigned long); len; len--, p++) + asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p)); + + return crc; +} +EXPORT_SYMBOL(crc32c_le_arch); + +u32 crc32_be_arch(u32 crc, const u8 *p, size_t len) +{ + return crc32_be_base(crc, p, len); +} +EXPORT_SYMBOL(crc32_be_arch); + +static int __init crc32_x86_init(void) +{ + if (boot_cpu_has(X86_FEATURE_XMM4_2)) + static_branch_enable(&have_crc32); + if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) + static_branch_enable(&have_pclmulqdq); + return 0; +} +arch_initcall(crc32_x86_init); + +static void __exit crc32_x86_exit(void) +{ +} +module_exit(crc32_x86_exit); + +MODULE_DESCRIPTION("x86-optimized CRC32 functions"); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/crypto/crc32-pclmul_asm.S b/arch/x86/lib/crc32-pclmul= .S similarity index 100% rename from arch/x86/crypto/crc32-pclmul_asm.S rename to arch/x86/lib/crc32-pclmul.S diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/lib/crc32= c-3way.S similarity index 100% rename from arch/x86/crypto/crc32c-pcl-intel-asm_64.S rename to arch/x86/lib/crc32c-3way.S diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig index 922b207bc69dc..1c0517a125713 100644 --- a/drivers/target/iscsi/Kconfig +++ b/drivers/target/iscsi/Kconfig @@ -2,11 +2,10 @@ config ISCSI_TARGET tristate "SCSI Target Mode Stack" depends on INET select CRYPTO select CRYPTO_CRC32C - select CRYPTO_CRC32C_INTEL if X86 help Say M to enable the SCSI target mode stack. A SCSI target mode stack is software that makes local storage available over a storage network to a SCSI initiator system. The supported storage network technologies include iSCSI, Fibre Channel and the SCSI RDMA Protocol (SRP). --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id AAB6112EBE1; Mon, 21 Oct 2024 00:29:57 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470597; cv=none; b=pNhQYmD4eKzw2IH741pnt0YEg6ERkMuvHbk/9xtBVCrw37U0eiUo0Dx0tTB+UQQFzGsm15MuDILNnTsLLYlOXqFtJ3pp/yLtLOuA3OgcWbCII6ttmidLOump6Z39B7QVjGQC16Z0K9aySg3r46anPjIjCUYGG1L38uePAFbpVk8= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470597; c=relaxed/simple; bh=4c2g2/aBZcJU5enMBJmjzSkc9cpjlgUBMmCuy/77kP8=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=s1kdcCx/n6xSubixXX9incZqO98dB11IZtpF6JpzfHsI0AYGA/PMhWrKrymcn8UVJISGJJH9EOrt9vZf2V6WC7npKQgCOK7OOToGQvXTAz3TBbUJA082FtzzieS6HK+xEDhIrj1vA5xbM8AhXhZISrbDwYbDfX78FH1gcuOocAo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=m8OKhyqD; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="m8OKhyqD" Received: by smtp.kernel.org (Postfix) with ESMTPSA id E4B69C4CEE5; Mon, 21 Oct 2024 00:29:56 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470597; bh=4c2g2/aBZcJU5enMBJmjzSkc9cpjlgUBMmCuy/77kP8=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=m8OKhyqDcGs17zHcb6Jt5UVhwFuor56sLp5dULk3zv8sIxETjWHOz+V/XWOF//bH6 jGmc4mj20oXisgOVLQ1YneJQIhD74LNkVdnk9C1bZi/p3GKS2CnpWzPX+j7BUymCpt lYDXsl14clCrOdfkOE1F0ZUPjvPqycmkUWRtfVUAjkwCgMkr63QrYMYxdHEJrgvjbR iFCKhpv1BK1ZZkmKPADL9f/DIlqTHNAyCQ2TH+nVQed+4vEYhvkER5MuWXnFpVTzwJ ztD1BC6CsgZqsYd7mA5RbvuXJDPo701tTn82k2meXAAacdHBASCiusVzqTuYmbIs5d ot9NliUxFaahw== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 12/15] lib/crc32: make crc32c() go directly to lib Date: Sun, 20 Oct 2024 17:29:32 -0700 Message-ID: <20241021002935.325878-13-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Now that the lower level __crc32c_le() library function is optimized for each architecture, make crc32c() just call that instead of taking an inefficient and error-prone detour through the shash API. Note: a future cleanup should make crc32c_le() be the actual library function instead of __crc32c_le(). That will require updating callers of __crc32c_le() to use crc32c_le() instead, and updating callers of crc32c_le() that expect a 'const void *' arg to expect 'const u8 *' instead. Similarly, a future cleanup should remove LIBCRC32C by making everyone who is selecting it just select CRC32 directly instead. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- include/linux/crc32c.h | 7 ++-- lib/Kconfig | 10 ++---- lib/Makefile | 1 - lib/libcrc32c.c | 74 ------------------------------------------ 4 files changed, 8 insertions(+), 84 deletions(-) delete mode 100644 lib/libcrc32c.c diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h index 357ae4611a453..47eb78003c265 100644 --- a/include/linux/crc32c.h +++ b/include/linux/crc32c.h @@ -1,12 +1,15 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_CRC32C_H #define _LINUX_CRC32C_H =20 -#include +#include =20 -extern u32 crc32c(u32 crc, const void *address, unsigned int length); +static inline u32 crc32c(u32 crc, const void *address, unsigned int length) +{ + return __crc32c_le(crc, address, length); +} =20 /* This macro exists for backwards-compatibility. */ #define crc32c_le crc32c =20 #endif /* _LINUX_CRC32C_H */ diff --git a/lib/Kconfig b/lib/Kconfig index 07afcf214f353..b894ee64ff957 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -296,18 +296,14 @@ config CRC7 the kernel tree does. Such modules that use library CRC7 functions require M here. =20 config LIBCRC32C tristate "CRC32c (Castagnoli, et al) Cyclic Redundancy-Check" - select CRYPTO - select CRYPTO_CRC32C + select CRC32 help - This option is provided for the case where no in-kernel-tree - modules require CRC32c functions, but a module built outside the - kernel tree does. Such modules that use library CRC32c functions - require M here. See Castagnoli93. - Module will be libcrc32c. + This option just selects CRC32 and is provided for compatibility + purposes until the users are updated to select CRC32 directly. =20 config CRC8 tristate "CRC8 function" help This option provides CRC8 function. Drivers may select this diff --git a/lib/Makefile b/lib/Makefile index 773adf88af416..15646679aee21 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -161,11 +161,10 @@ obj-$(CONFIG_CRC_ITU_T) +=3D crc-itu-t.o obj-$(CONFIG_CRC32) +=3D crc32.o obj-$(CONFIG_CRC64) +=3D crc64.o obj-$(CONFIG_CRC32_SELFTEST) +=3D crc32test.o obj-$(CONFIG_CRC4) +=3D crc4.o obj-$(CONFIG_CRC7) +=3D crc7.o -obj-$(CONFIG_LIBCRC32C) +=3D libcrc32c.o obj-$(CONFIG_CRC8) +=3D crc8.o obj-$(CONFIG_CRC64_ROCKSOFT) +=3D crc64-rocksoft.o obj-$(CONFIG_XXHASH) +=3D xxhash.o obj-$(CONFIG_GENERIC_ALLOCATOR) +=3D genalloc.o =20 diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c deleted file mode 100644 index 649e687413a0c..0000000000000 --- a/lib/libcrc32c.c +++ /dev/null @@ -1,74 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/*=20 - * CRC32C - *@Article{castagnoli-crc, - * author =3D { Guy Castagnoli and Stefan Braeuer and Martin Herrman= }, - * title =3D {{Optimization of Cyclic Redundancy-Check Codes with 24 - * and 32 Parity Bits}}, - * journal =3D IEEE Transactions on Communication, - * year =3D {1993}, - * volume =3D {41}, - * number =3D {6}, - * pages =3D {}, - * month =3D {June}, - *} - * Used by the iSCSI driver, possibly others, and derived from - * the iscsi-crc.c module of the linux-iscsi driver at - * http://linux-iscsi.sourceforge.net. - * - * Following the example of lib/crc32, this function is intended to be - * flexible and useful for all users. Modules that currently have their - * own crc32c, but hopefully may be able to use this one are: - * net/sctp (please add all your doco to here if you change to - * use this one!) - * - * - * Copyright (c) 2004 Cisco Systems, Inc. - */ - -#include -#include -#include -#include -#include -#include - -static struct crypto_shash *tfm; - -u32 crc32c(u32 crc, const void *address, unsigned int length) -{ - SHASH_DESC_ON_STACK(shash, tfm); - u32 ret, *ctx =3D (u32 *)shash_desc_ctx(shash); - int err; - - shash->tfm =3D tfm; - *ctx =3D crc; - - err =3D crypto_shash_update(shash, address, length); - BUG_ON(err); - - ret =3D *ctx; - barrier_data(ctx); - return ret; -} - -EXPORT_SYMBOL(crc32c); - -static int __init libcrc32c_mod_init(void) -{ - tfm =3D crypto_alloc_shash("crc32c", 0, 0); - return PTR_ERR_OR_ZERO(tfm); -} - -static void __exit libcrc32c_mod_fini(void) -{ - crypto_free_shash(tfm); -} - -module_init(libcrc32c_mod_init); -module_exit(libcrc32c_mod_fini); - -MODULE_AUTHOR("Clay Haapala "); -MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations"); -MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32c"); --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 0746D136E0E; Mon, 21 Oct 2024 00:29:57 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470598; cv=none; b=rXH5cZXMOP+DM8QMFGjOhmAkxSqHaJMdEhyv6e3uyaGOGiPZVf74Qrt6gNFBLHuFOgRzNSCgypPFMqUra84ICgSYKoBHGSCmTJbQPCxH/vyR1aIndD/LIbsvwR166qGZIROaviF9UTLxFA5lgurc01JNR6V2XVABXoHlXGbeR0g= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470598; c=relaxed/simple; bh=WfvdmmPi5Dz450Yg9Si8oodEylG/NIc6y0B34+Qk+Og=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=pJiKMsAAe+iUc9UyowFG5Y8Iq0cWfjyVhfGsMtwQrOxt+C1Me4IP4A4sDf9JsAuumASxPpuB1MmpvYm083FQjq0wp1qyue1vD6JhyPlKBy17Rgg6s/jzrMhkqfWFhmMSpBayaWUx7zLXiNBx5zl3ELQ93mXUJq1j7vKK6gyXOj0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=VrWTWdrx; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="VrWTWdrx" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 77DFBC4CEE9; Mon, 21 Oct 2024 00:29:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470597; bh=WfvdmmPi5Dz450Yg9Si8oodEylG/NIc6y0B34+Qk+Og=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=VrWTWdrxCbHPXbV/KZ63ZbVxCBnhVQDyIZazR0Xckb8387jY+angqtdYjsmGsHgpD wa3okcksMQsEe4NSUkdxXEzVKXrWnd9H4BCoxc/ixTv8xN4K+sv/T6xp50JdKxgc2l mN6ggYx7ielUd395hBGezhaD4Z7/RPidRksjwQ9CR/J30iH0yY9L3H58XZEJIeC3Ij yCzjjYU8SHhwcWVpJDEvTgijM8oWTCtj5IhptFxE9thf8Exx8HzBXFqweTfBA+5GFC Sr3cyqCADtSGsgk7ktz4z/tvcSQgxbtM+gXHLK4gWVjS9igkqV43duJfhdGqD/0q2/ /CEZ/j8dFxP1g== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 13/15] ext4: switch to using the crc32c library Date: Sun, 20 Oct 2024 17:29:33 -0700 Message-ID: <20241021002935.325878-14-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Now that the crc32c() library function directly takes advantage of architecture-specific optimizations, it is unnecessary to go through the crypto API. Just use crc32c(). This is much simpler, and it improves performance due to eliminating the crypto API overhead. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- fs/ext4/Kconfig | 3 +-- fs/ext4/ext4.h | 25 +++---------------------- fs/ext4/super.c | 15 --------------- 3 files changed, 4 insertions(+), 39 deletions(-) diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig index e20d59221fc05..c9ca41d91a6c1 100644 --- a/fs/ext4/Kconfig +++ b/fs/ext4/Kconfig @@ -29,12 +29,11 @@ config EXT3_FS_SECURITY config EXT4_FS tristate "The Extended 4 (ext4) filesystem" select BUFFER_HEAD select JBD2 select CRC16 - select CRYPTO - select CRYPTO_CRC32C + select CRC32 select FS_IOMAP select FS_ENCRYPTION_ALGS if FS_ENCRYPTION help This is the next generation of the ext3 filesystem. =20 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 44b0d418143c2..99aa512a7de12 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -31,11 +31,11 @@ #include #include #include #include #include -#include +#include #include #include #include #ifdef __KERNEL__ #include @@ -1660,13 +1660,10 @@ struct ext4_sb_info { struct task_struct *s_mmp_tsk; =20 /* record the last minlen when FITRIM is called. */ unsigned long s_last_trim_minblks; =20 - /* Reference to checksum algorithm driver via cryptoapi */ - struct crypto_shash *s_chksum_driver; - /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_csum_seed; =20 /* Reclaim extents from extent status tree */ struct shrinker *s_es_shrinker; @@ -2465,23 +2462,11 @@ static inline __le16 ext4_rec_len_to_disk(unsigned = len, unsigned blocksize) #define DX_HASH_LAST DX_HASH_SIPHASH =20 static inline u32 ext4_chksum(struct ext4_sb_info *sbi, u32 crc, const void *address, unsigned int length) { - struct { - struct shash_desc shash; - char ctx[4]; - } desc; - - BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver)!=3Dsizeof(desc.ctx)); - - desc.shash.tfm =3D sbi->s_chksum_driver; - *(u32 *)desc.ctx =3D crc; - - BUG_ON(crypto_shash_update(&desc.shash, address, length)); - - return *(u32 *)desc.ctx; + return crc32c(crc, address, length); } =20 #ifdef __KERNEL__ =20 /* hash info structure used by the directory hash */ @@ -3278,15 +3263,11 @@ extern void ext4_group_desc_csum_set(struct super_b= lock *sb, __u32 group, extern int ext4_register_li_request(struct super_block *sb, ext4_group_t first_not_zeroed); =20 static inline int ext4_has_metadata_csum(struct super_block *sb) { - WARN_ON_ONCE(ext4_has_feature_metadata_csum(sb) && - !EXT4_SB(sb)->s_chksum_driver); - - return ext4_has_feature_metadata_csum(sb) && - (EXT4_SB(sb)->s_chksum_driver !=3D NULL); + return ext4_has_feature_metadata_csum(sb); } =20 static inline int ext4_has_group_desc_csum(struct super_block *sb) { return ext4_has_feature_gdt_csum(sb) || ext4_has_metadata_csum(sb); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 16a4ce704460e..1a821093cc0dd 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1371,12 +1371,10 @@ static void ext4_put_super(struct super_block *sb) * Now that we are completely done shutting down the * superblock, we need to actually destroy the kobject. */ kobject_put(&sbi->s_kobj); wait_for_completion(&sbi->s_kobj_unregister); - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->s_blockgroup_lock); fs_put_dax(sbi->s_daxdev, NULL); fscrypt_free_dummy_policy(&sbi->s_dummy_enc_policy); #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); @@ -4586,19 +4584,10 @@ static int ext4_init_metadata_csum(struct super_blo= ck *sb, struct ext4_super_blo return -EINVAL; } ext4_setup_csum_trigger(sb, EXT4_JTR_ORPHAN_FILE, ext4_orphan_file_block_trigger); =20 - /* Load the checksum driver */ - sbi->s_chksum_driver =3D crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - int ret =3D PTR_ERR(sbi->s_chksum_driver); - ext4_msg(sb, KERN_ERR, "Cannot load crc32c driver."); - sbi->s_chksum_driver =3D NULL; - return ret; - } - /* Check superblock checksum */ if (!ext4_superblock_csum_verify(sb, es)) { ext4_msg(sb, KERN_ERR, "VFS: Found ext4 filesystem with " "invalid superblock checksum. Run e2fsck?"); return -EFSBADCRC; @@ -5638,13 +5627,10 @@ failed_mount8: __maybe_unused flush_work(&sbi->s_sb_upd_work); ext4_stop_mmpd(sbi); del_timer_sync(&sbi->s_err_report); ext4_group_desc_free(sbi); failed_mount: - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); - #if IS_ENABLED(CONFIG_UNICODE) utf8_unload(sb->s_encoding); #endif =20 #ifdef CONFIG_QUOTA @@ -7433,8 +7419,7 @@ static void __exit ext4_exit_fs(void) } =20 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, = Theodore Ts'o and others"); MODULE_DESCRIPTION("Fourth Extended Filesystem"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32c"); module_init(ext4_init_fs) module_exit(ext4_exit_fs) --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id C12E7145A18; Mon, 21 Oct 2024 00:29:58 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470599; cv=none; b=mMkrRnAjMsiGrlHxsfGZlmJht7cZxV82vyzxVjsmX5cExMiOnVLiFDcV9GgPFWBP92MyTPxdREIuPDtyMPmW1sXnnTCudjGpb0xK3sgKfFtCnOAs6azQ6+U0gl2HRjBLFJBwD6XuVleNsfe9fVNQRYsaYygPTuQeZy/inITs5dc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470599; c=relaxed/simple; bh=4QrNTfxk1O5mqgXso3Uq4Yh9fW2+aliDT+yU3NaRHjU=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=HlXwDqVSOQj6KeLS3PClx8VbJx4jg/6Mwvl+b18+FR4/6K8fEFzXj0w4C5pcvutSnu0kfjl0fHX9K4Rwm/FrWhCGFvT/RMO6DdMvlZYLyQpnOzn0UOmqsm7F6sjaRcXY4RQlAJTZW37/Lxo536uT89dmwcP5n6JOz8sxU91wb84= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=FdOAy7Sh; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="FdOAy7Sh" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 061F5C4CEC7; Mon, 21 Oct 2024 00:29:57 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470598; bh=4QrNTfxk1O5mqgXso3Uq4Yh9fW2+aliDT+yU3NaRHjU=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=FdOAy7ShO/AIu2M7e2/271mIPF14WPY1XGHVgfMkByCmqKJXATBSNW69jUcXasSIz 6Oim46eD9B5dTBWt6nmgyPg4UMAVXuJsLkLL+lNyLjytgzlCpr3IPjkMy1DF6VXbar 8EVzZ0DEKZjdRjWlu3y1BaS5OTSPA+DBPd/1Asf81CkRl4d93oeP0JqzLo4nbyKgju kvA4UB7p6kTLItgXAtWMSY3Dx+Sx3wdf7P779SJHNOVnq9iLuJxD3w2UJzHd7JnH/s Vsuq5JUbm9HNa0fHQQMHG2fJnZeTF+dB1XXhRygSUbwAeBQzflon+jFnPmh8rCiWoX mGjddyhl8fmVA== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 14/15] jbd2: switch to using the crc32c library Date: Sun, 20 Oct 2024 17:29:34 -0700 Message-ID: <20241021002935.325878-15-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Now that the crc32c() library function directly takes advantage of architecture-specific optimizations, it is unnecessary to go through the crypto API. Just use crc32c(). This is much simpler, and it improves performance due to eliminating the crypto API overhead. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- fs/jbd2/Kconfig | 2 -- fs/jbd2/journal.c | 25 ++----------------------- include/linux/jbd2.h | 31 +++---------------------------- 3 files changed, 5 insertions(+), 53 deletions(-) diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig index 4ad2c67f93f15..9c19e1512101e 100644 --- a/fs/jbd2/Kconfig +++ b/fs/jbd2/Kconfig @@ -1,11 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only config JBD2 tristate select CRC32 - select CRYPTO - select CRYPTO_CRC32C help This is a generic journaling layer for block devices that support both 32-bit and 64-bit block numbers. It is currently used by the ext4 and OCFS2 filesystems, but it could also be used to add journal support to other file systems or block devices such diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 97f487c3d8fcf..56cea5a738a70 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1373,24 +1373,16 @@ static int journal_check_superblock(journal_t *jour= nal) printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 " "at the same time!\n"); return err; } =20 - /* Load the checksum driver */ if (jbd2_journal_has_csum_v2or3_feature(journal)) { if (sb->s_checksum_type !=3D JBD2_CRC32C_CHKSUM) { printk(KERN_ERR "JBD2: Unknown checksum type\n"); return err; } =20 - journal->j_chksum_driver =3D crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); - err =3D PTR_ERR(journal->j_chksum_driver); - journal->j_chksum_driver =3D NULL; - return err; - } /* Check superblock checksum */ if (sb->s_checksum !=3D jbd2_superblock_csum(journal, sb)) { printk(KERN_ERR "JBD2: journal checksum error\n"); err =3D -EFSBADCRC; return err; @@ -1611,12 +1603,10 @@ static journal_t *journal_init_common(struct block_= device *bdev, =20 return journal; =20 err_cleanup: percpu_counter_destroy(&journal->j_checkpoint_jh_count); - if (journal->j_chksum_driver) - crypto_free_shash(journal->j_chksum_driver); kfree(journal->j_wbuf); jbd2_journal_destroy_revoke(journal); journal_fail_superblock(journal); kfree(journal); return ERR_PTR(err); @@ -2194,12 +2184,10 @@ int jbd2_journal_destroy(journal_t *journal) if (journal->j_proc_entry) jbd2_stats_proc_exit(journal); iput(journal->j_inode); if (journal->j_revoke) jbd2_journal_destroy_revoke(journal); - if (journal->j_chksum_driver) - crypto_free_shash(journal->j_chksum_driver); kfree(journal->j_fc_wbuf); kfree(journal->j_wbuf); kfree(journal); =20 return err; @@ -2340,23 +2328,14 @@ int jbd2_journal_set_features(journal_t *journal, u= nsigned long compat, pr_err("JBD2: Cannot enable fast commits.\n"); return 0; } } =20 - /* Load the checksum driver if necessary */ - if ((journal->j_chksum_driver =3D=3D NULL) && - INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { - journal->j_chksum_driver =3D crypto_alloc_shash("crc32c", 0, 0); - if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); - journal->j_chksum_driver =3D NULL; - return 0; - } - /* Precompute checksum seed for all metadata */ + /* Precompute checksum seed for all metadata */ + if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) journal->j_csum_seed =3D jbd2_chksum(journal, ~0, sb->s_uuid, sizeof(sb->s_uuid)); - } =20 lock_buffer(journal->j_sb_buffer); =20 /* If enabling v3 checksums, update superblock */ if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) { diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 8aef9bb6ad573..33d25a3d15f14 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -26,11 +26,11 @@ #include #include #include #include #include -#include +#include #endif =20 #define journal_oom_retry 1 =20 /* @@ -1239,17 +1239,10 @@ struct journal_s * An opaque pointer to fs-private information. ext3 puts its * superblock pointer here. */ void *j_private; =20 - /** - * @j_chksum_driver: - * - * Reference to checksum algorithm driver via cryptoapi. - */ - struct crypto_shash *j_chksum_driver; - /** * @j_csum_seed: * * Precomputed journal UUID checksum for seeding other checksums. */ @@ -1748,14 +1741,11 @@ static inline bool jbd2_journal_has_csum_v2or3_feat= ure(journal_t *j) return jbd2_has_feature_csum2(j) || jbd2_has_feature_csum3(j); } =20 static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) { - WARN_ON_ONCE(jbd2_journal_has_csum_v2or3_feature(journal) && - journal->j_chksum_driver =3D=3D NULL); - - return journal->j_chksum_driver !=3D NULL; + return jbd2_journal_has_csum_v2or3_feature(journal); } =20 static inline int jbd2_journal_get_num_fc_blks(journal_superblock_t *jsb) { int num_fc_blocks =3D be32_to_cpu(jsb->s_num_fc_blks); @@ -1794,26 +1784,11 @@ static inline unsigned long jbd2_log_space_left(jou= rnal_t *journal) #define JBD_MAX_CHECKSUM_SIZE 4 =20 static inline u32 jbd2_chksum(journal_t *journal, u32 crc, const void *address, unsigned int length) { - struct { - struct shash_desc shash; - char ctx[JBD_MAX_CHECKSUM_SIZE]; - } desc; - int err; - - BUG_ON(crypto_shash_descsize(journal->j_chksum_driver) > - JBD_MAX_CHECKSUM_SIZE); - - desc.shash.tfm =3D journal->j_chksum_driver; - *(u32 *)desc.ctx =3D crc; - - err =3D crypto_shash_update(&desc.shash, address, length); - BUG_ON(err); - - return *(u32 *)desc.ctx; + return crc32c(crc, address, length); } =20 /* Return most recent uncommitted transaction */ static inline tid_t jbd2_get_latest_transaction(journal_t *journal) { --=20 2.47.0 From nobody Tue Nov 26 07:23:39 2024 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 4E05A14830F; Mon, 21 Oct 2024 00:29:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470599; cv=none; b=ClkoqdmqW/rwpgIF5W1bq7oCsGhwBg9CmmrG/k1jXEBaDvnzsiSs7kDKbxcFrUCapQROWKcVAfZPRuNbaYdc/YNHtElREYspXrD/ZVg0MFtNZKzn/i/NFvtgux/G6XptOS4uMeBZQsLw5nA/xSk5Z75Mjj+qJR2WT+wiluPguZk= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1729470599; c=relaxed/simple; bh=2edVn3yzlgvTIcU4OYne6VaRBmOeBMvpuDRrHbVNdrI=; h=From:To:Cc:Subject:Date:Message-ID:In-Reply-To:References: MIME-Version; b=oGoohfC3AJJKp+I9JdTM7rGeMiA+bimxvlVQmu+SCee0SBRdI8Yx7uMvS6fYNV7/XBxrGbCsaEc5lopM6st+ffv7FFT2U1nJ5KjEoUO8vmvebafRLXt7DO4xuetHDIJa4OCSukXJDopmhxQLLWr0QHCpC1TZ3Km/7I/hH/qWFP0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=NFFk4W0d; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="NFFk4W0d" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 87B75C4CEE5; Mon, 21 Oct 2024 00:29:58 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1729470598; bh=2edVn3yzlgvTIcU4OYne6VaRBmOeBMvpuDRrHbVNdrI=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=NFFk4W0dyvHD7M3v36IIWTb5ySBBZX5uZ/VLRYiArPPR5++D/baWB8d+lyfAaBOhk rztTLk/6ilOUZBWwQxzKVpNgy7hbS+bAqO2cV8DFYmzWDyAaKEezQcPZFBrYh8vShT roh7zUz+tjlNZy+E9vuB5Q59JQnN14hGjhkd5gjyQeUJqlHGIc+Uf3/5EwjEZ4O6Ps TvEpzXEcAb/PHAsselt3G1remC4TFMm//VXM0Ciyn11PZQ9zsIxfN5iARYumUTBA9J 8pJ0BrJoKwcW+JUENnQzEDWoyb2qcx6NpA2gan0ia3pz9LbggvtIVcVcrMMhn+Rn2b w+zTkdQuRqfwg== From: Eric Biggers To: linux-kernel@vger.kernel.org Cc: linux-arch@vger.kernel.org, linux-arm-kernel@lists.infradead.org, linux-crypto@vger.kernel.org, linux-ext4@vger.kernel.org, linux-f2fs-devel@lists.sourceforge.net, linux-mips@vger.kernel.org, linux-riscv@lists.infradead.org, linux-s390@vger.kernel.org, linuxppc-dev@lists.ozlabs.org, loongarch@lists.linux.dev, sparclinux@vger.kernel.org, x86@kernel.org Subject: [PATCH 15/15] f2fs: switch to using the crc32 library Date: Sun, 20 Oct 2024 17:29:35 -0700 Message-ID: <20241021002935.325878-16-ebiggers@kernel.org> X-Mailer: git-send-email 2.47.0 In-Reply-To: <20241021002935.325878-1-ebiggers@kernel.org> References: <20241021002935.325878-1-ebiggers@kernel.org> Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" From: Eric Biggers Now that the crc32() library function takes advantage of architecture-specific optimizations, it is unnecessary to go through the crypto API. Just use crc32(). This is much simpler, and it improves performance due to eliminating the crypto API overhead. Signed-off-by: Eric Biggers Reviewed-by: Ard Biesheuvel --- fs/f2fs/Kconfig | 3 +-- fs/f2fs/f2fs.h | 19 +------------------ fs/f2fs/super.c | 15 --------------- 3 files changed, 2 insertions(+), 35 deletions(-) diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig index 68a1e23e1557c..5916a02fb46dd 100644 --- a/fs/f2fs/Kconfig +++ b/fs/f2fs/Kconfig @@ -2,12 +2,11 @@ config F2FS_FS tristate "F2FS filesystem support" depends on BLOCK select BUFFER_HEAD select NLS - select CRYPTO - select CRYPTO_CRC32 + select CRC32 select F2FS_FS_XATTR if FS_ENCRYPTION select FS_ENCRYPTION_ALGS if FS_ENCRYPTION select FS_IOMAP select LZ4_COMPRESS if F2FS_FS_LZ4 select LZ4_DECOMPRESS if F2FS_FS_LZ4 diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 33f5449dc22d5..1fc5c2743c8d4 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -1761,13 +1761,10 @@ struct f2fs_sb_info { =20 /* For write statistics */ u64 sectors_written_start; u64 kbytes_written; =20 - /* Reference to checksum algorithm driver via cryptoapi */ - struct crypto_shash *s_chksum_driver; - /* Precomputed FS UUID checksum for seeding other checksums */ __u32 s_chksum_seed; =20 struct workqueue_struct *post_read_wq; /* post read workqueue */ =20 @@ -1941,25 +1938,11 @@ static inline unsigned int f2fs_time_to_wait(struct= f2fs_sb_info *sbi, * Inline functions */ static inline u32 __f2fs_crc32(struct f2fs_sb_info *sbi, u32 crc, const void *address, unsigned int length) { - struct { - struct shash_desc shash; - char ctx[4]; - } desc; - int err; - - BUG_ON(crypto_shash_descsize(sbi->s_chksum_driver) !=3D sizeof(desc.ctx)); - - desc.shash.tfm =3D sbi->s_chksum_driver; - *(u32 *)desc.ctx =3D crc; - - err =3D crypto_shash_update(&desc.shash, address, length); - BUG_ON(err); - - return *(u32 *)desc.ctx; + return crc32(crc, address, length); } =20 static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address, unsigned int length) { diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index 87ab5696bd482..003d3bcb0caa2 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -1670,12 +1670,10 @@ static void f2fs_put_super(struct super_block *sb) =20 f2fs_destroy_post_read_wq(sbi); =20 kvfree(sbi->ckpt); =20 - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi->raw_super); =20 f2fs_destroy_page_array_cache(sbi); f2fs_destroy_xattr_caches(sbi); #ifdef CONFIG_QUOTA @@ -4419,19 +4417,10 @@ static int f2fs_fill_super(struct super_block *sb, = void *data, int silent) INIT_LIST_HEAD(&sbi->inode_list[i]); spin_lock_init(&sbi->inode_lock[i]); } mutex_init(&sbi->flush_lock); =20 - /* Load the checksum driver */ - sbi->s_chksum_driver =3D crypto_alloc_shash("crc32", 0, 0); - if (IS_ERR(sbi->s_chksum_driver)) { - f2fs_err(sbi, "Cannot load crc32 driver."); - err =3D PTR_ERR(sbi->s_chksum_driver); - sbi->s_chksum_driver =3D NULL; - goto free_sbi; - } - /* set a block size */ if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) { f2fs_err(sbi, "unable to set blocksize"); goto free_sbi; } @@ -4872,12 +4861,10 @@ static int f2fs_fill_super(struct super_block *sb, = void *data, int silent) fscrypt_free_dummy_policy(&F2FS_OPTION(sbi).dummy_enc_policy); kvfree(options); free_sb_buf: kfree(raw_super); free_sbi: - if (sbi->s_chksum_driver) - crypto_free_shash(sbi->s_chksum_driver); kfree(sbi); sb->s_fs_info =3D NULL; =20 /* give only one another chance */ if (retry_cnt > 0 && skip_recovery) { @@ -5080,7 +5067,5 @@ module_init(init_f2fs_fs) module_exit(exit_f2fs_fs) =20 MODULE_AUTHOR("Samsung Electronics's Praesto Team"); MODULE_DESCRIPTION("Flash Friendly File System"); MODULE_LICENSE("GPL"); -MODULE_SOFTDEP("pre: crc32"); - --=20 2.47.0