MD5 is insecure, is no longer commonly used, and has never been
optimized for the most common architectures in the kernel. Only mips,
powerpc, and sparc have optimized MD5 code in the kernel. Of these,
only the powerpc one is actually testable in QEMU. The mips one works
only on Cavium Octeon SoCs.
Taken together, it's clear that it's time to retire these additional MD5
implementations, and focus maintenance on the MD5 generic C code.
This commit removes the PowerPC optimized MD5 code.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
arch/powerpc/configs/powernv_defconfig | 1 -
arch/powerpc/configs/ppc64_defconfig | 1 -
arch/powerpc/crypto/Kconfig | 8 -
arch/powerpc/crypto/Makefile | 2 -
arch/powerpc/crypto/md5-asm.S | 235 -------------------------
arch/powerpc/crypto/md5-glue.c | 99 -----------
6 files changed, 346 deletions(-)
delete mode 100644 arch/powerpc/crypto/md5-asm.S
delete mode 100644 arch/powerpc/crypto/md5-glue.c
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index d06388b0f66e3..bd4685612de6d 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -318,11 +318,10 @@ CONFIG_FTR_FIXUP_SELFTEST=y
CONFIG_MSI_BITMAP_SELFTEST=y
CONFIG_XMON=y
CONFIG_CRYPTO_BENCHMARK=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_HMAC=y
-CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_BLOWFISH=m
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index ce34597e9f3e1..2d92c11eea7e4 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -385,11 +385,10 @@ CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_LZO=m
-CONFIG_CRYPTO_MD5_PPC=m
CONFIG_CRYPTO_AES_GCM_P10=m
CONFIG_CRYPTO_DEV_NX=y
CONFIG_CRYPTO_DEV_NX_ENCRYPT=m
CONFIG_CRYPTO_DEV_VMX=y
CONFIG_SYSTEM_TRUSTED_KEYRING=y
diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index cfe39fc221cf8..f4b779c7352de 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -13,18 +13,10 @@ config CRYPTO_CURVE25519_PPC64
Curve25519 algorithm
Architecture: PowerPC64
- Little-endian
-config CRYPTO_MD5_PPC
- tristate "Digests: MD5"
- select CRYPTO_HASH
- help
- MD5 message digest algorithm (RFC1321)
-
- Architecture: powerpc
-
config CRYPTO_AES_PPC_SPE
tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (SPE)"
depends on SPE
select CRYPTO_SKCIPHER
help
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index bc8fd27344b8b..9eb59dce67f36 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -4,17 +4,15 @@
#
# Arch-specific CryptoAPI modules.
#
obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o
-obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o
obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o
obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
obj-$(CONFIG_CRYPTO_CURVE25519_PPC64) += curve25519-ppc64le.o
aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o
-md5-ppc-y := md5-asm.o md5-glue.o
aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o
vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o
curve25519-ppc64le-y := curve25519-ppc64le-core.o curve25519-ppc64le_asm.o
ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S
deleted file mode 100644
index fa6bc440cf4ac..0000000000000
--- a/arch/powerpc/crypto/md5-asm.S
+++ /dev/null
@@ -1,235 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Fast MD5 implementation for PPC
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/asm-compat.h>
-
-#define rHP r3
-#define rWP r4
-
-#define rH0 r0
-#define rH1 r6
-#define rH2 r7
-#define rH3 r5
-
-#define rW00 r8
-#define rW01 r9
-#define rW02 r10
-#define rW03 r11
-#define rW04 r12
-#define rW05 r14
-#define rW06 r15
-#define rW07 r16
-#define rW08 r17
-#define rW09 r18
-#define rW10 r19
-#define rW11 r20
-#define rW12 r21
-#define rW13 r22
-#define rW14 r23
-#define rW15 r24
-
-#define rT0 r25
-#define rT1 r26
-
-#define INITIALIZE \
- PPC_STLU r1,-INT_FRAME_SIZE(r1); \
- SAVE_GPRS(14, 26, r1) /* push registers onto stack */
-
-#define FINALIZE \
- REST_GPRS(14, 26, r1); /* pop registers from stack */ \
- addi r1,r1,INT_FRAME_SIZE
-
-#ifdef __BIG_ENDIAN__
-#define LOAD_DATA(reg, off) \
- lwbrx reg,0,rWP; /* load data */
-#define INC_PTR \
- addi rWP,rWP,4; /* increment per word */
-#define NEXT_BLOCK /* nothing to do */
-#else
-#define LOAD_DATA(reg, off) \
- lwz reg,off(rWP); /* load data */
-#define INC_PTR /* nothing to do */
-#define NEXT_BLOCK \
- addi rWP,rWP,64; /* increment per block */
-#endif
-
-#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \
- LOAD_DATA(w0, off) /* W */ \
- and rT0,b,c; /* 1: f = b and c */ \
- INC_PTR /* ptr++ */ \
- andc rT1,d,b; /* 1: f' = ~b and d */ \
- LOAD_DATA(w1, off+4) /* W */ \
- or rT0,rT0,rT1; /* 1: f = f or f' */ \
- addi w0,w0,k0l; /* 1: wk = w + k */ \
- add a,a,rT0; /* 1: a = a + f */ \
- addis w0,w0,k0h; /* 1: wk = w + k' */ \
- addis w1,w1,k1h; /* 2: wk = w + k */ \
- add a,a,w0; /* 1: a = a + wk */ \
- addi w1,w1,k1l; /* 2: wk = w + k' */ \
- rotrwi a,a,p; /* 1: a = a rotl x */ \
- add d,d,w1; /* 2: a = a + wk */ \
- add a,a,b; /* 1: a = a + b */ \
- and rT0,a,b; /* 2: f = b and c */ \
- andc rT1,c,a; /* 2: f' = ~b and d */ \
- or rT0,rT0,rT1; /* 2: f = f or f' */ \
- add d,d,rT0; /* 2: a = a + f */ \
- INC_PTR /* ptr++ */ \
- rotrwi d,d,q; /* 2: a = a rotl x */ \
- add d,d,a; /* 2: a = a + b */
-
-#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
- andc rT0,c,d; /* 1: f = c and ~d */ \
- and rT1,b,d; /* 1: f' = b and d */ \
- addi w0,w0,k0l; /* 1: wk = w + k */ \
- or rT0,rT0,rT1; /* 1: f = f or f' */ \
- addis w0,w0,k0h; /* 1: wk = w + k' */ \
- add a,a,rT0; /* 1: a = a + f */ \
- addi w1,w1,k1l; /* 2: wk = w + k */ \
- add a,a,w0; /* 1: a = a + wk */ \
- addis w1,w1,k1h; /* 2: wk = w + k' */ \
- andc rT0,b,c; /* 2: f = c and ~d */ \
- rotrwi a,a,p; /* 1: a = a rotl x */ \
- add a,a,b; /* 1: a = a + b */ \
- add d,d,w1; /* 2: a = a + wk */ \
- and rT1,a,c; /* 2: f' = b and d */ \
- or rT0,rT0,rT1; /* 2: f = f or f' */ \
- add d,d,rT0; /* 2: a = a + f */ \
- rotrwi d,d,q; /* 2: a = a rotl x */ \
- add d,d,a; /* 2: a = a +b */
-
-#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
- xor rT0,b,c; /* 1: f' = b xor c */ \
- addi w0,w0,k0l; /* 1: wk = w + k */ \
- xor rT1,rT0,d; /* 1: f = f xor f' */ \
- addis w0,w0,k0h; /* 1: wk = w + k' */ \
- add a,a,rT1; /* 1: a = a + f */ \
- addi w1,w1,k1l; /* 2: wk = w + k */ \
- add a,a,w0; /* 1: a = a + wk */ \
- addis w1,w1,k1h; /* 2: wk = w + k' */ \
- rotrwi a,a,p; /* 1: a = a rotl x */ \
- add d,d,w1; /* 2: a = a + wk */ \
- add a,a,b; /* 1: a = a + b */ \
- xor rT1,rT0,a; /* 2: f = b xor f' */ \
- add d,d,rT1; /* 2: a = a + f */ \
- rotrwi d,d,q; /* 2: a = a rotl x */ \
- add d,d,a; /* 2: a = a + b */
-
-#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \
- addi w0,w0,k0l; /* 1: w = w + k */ \
- orc rT0,b,d; /* 1: f = b or ~d */ \
- addis w0,w0,k0h; /* 1: w = w + k' */ \
- xor rT0,rT0,c; /* 1: f = f xor c */ \
- add a,a,w0; /* 1: a = a + wk */ \
- addi w1,w1,k1l; /* 2: w = w + k */ \
- add a,a,rT0; /* 1: a = a + f */ \
- addis w1,w1,k1h; /* 2: w = w + k' */ \
- rotrwi a,a,p; /* 1: a = a rotl x */ \
- add a,a,b; /* 1: a = a + b */ \
- orc rT0,a,c; /* 2: f = b or ~d */ \
- add d,d,w1; /* 2: a = a + wk */ \
- xor rT0,rT0,b; /* 2: f = f xor c */ \
- add d,d,rT0; /* 2: a = a + f */ \
- rotrwi d,d,q; /* 2: a = a rotl x */ \
- add d,d,a; /* 2: a = a + b */
-
-_GLOBAL(ppc_md5_transform)
- INITIALIZE
-
- mtctr r5
- lwz rH0,0(rHP)
- lwz rH1,4(rHP)
- lwz rH2,8(rHP)
- lwz rH3,12(rHP)
-
-ppc_md5_main:
- R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0,
- 0xd76b, -23432, 0xe8c8, -18602)
- R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8,
- 0x2420, 0x70db, 0xc1be, -12562)
- R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16,
- 0xf57c, 0x0faf, 0x4788, -14806)
- R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24,
- 0xa830, 0x4613, 0xfd47, -27391)
- R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32,
- 0x6981, -26408, 0x8b45, -2129)
- R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40,
- 0xffff, 0x5bb1, 0x895d, -10306)
- R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48,
- 0x6b90, 0x1122, 0xfd98, 0x7193)
- R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56,
- 0xa679, 0x438e, 0x49b4, 0x0821)
-
- R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23,
- 0x0d56, 0x6e0c, 0x1810, 0x6d2d)
- R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12,
- 0x9d02, -32109, 0x124c, 0x2332)
- R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23,
- 0x8ea7, 0x4a33, 0x0245, -18270)
- R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12,
- 0x8eee, -8608, 0xf258, -5095)
- R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23,
- 0x969d, -10697, 0x1cbe, -15288)
- R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12,
- 0x3317, 0x3e99, 0xdbd9, 0x7c15)
- R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23,
- 0xac4b, 0x7772, 0xd8cf, 0x331d)
- R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12,
- 0x6a28, 0x6dd8, 0x219a, 0x3b68)
-
- R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21,
- 0x29cb, 0x28e5, 0x4218, -7788)
- R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9,
- 0x473f, 0x06d1, 0x3aae, 0x3036)
- R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21,
- 0xaea1, -15134, 0x640b, -11295)
- R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9,
- 0x8f4c, 0x4887, 0xbc7c, -22499)
- R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21,
- 0x7eb8, -27199, 0x00ea, 0x6050)
- R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9,
- 0xe01a, 0x22fe, 0x4447, 0x69c5)
- R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21,
- 0xb7f3, 0x0253, 0x59b1, 0x4d5b)
- R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9,
- 0x4701, -27017, 0xc7bd, -19859)
-
- R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22,
- 0x0988, -1462, 0x4c70, -19401)
- R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11,
- 0xadaf, -5221, 0xfc99, 0x66f7)
- R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22,
- 0x7e80, -16418, 0xba1e, -25587)
- R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11,
- 0x4130, 0x380d, 0xe0c5, 0x738d)
- lwz rW00,0(rHP)
- R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22,
- 0xe837, -30770, 0xde8a, 0x69e8)
- lwz rW14,4(rHP)
- R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11,
- 0x9e79, 0x260f, 0x256d, -27941)
- lwz rW12,8(rHP)
- R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22,
- 0xab75, -20775, 0x4f9e, -28397)
- lwz rW10,12(rHP)
- R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11,
- 0x662b, 0x7c56, 0x11b2, 0x0358)
-
- add rH0,rH0,rW00
- stw rH0,0(rHP)
- add rH1,rH1,rW14
- stw rH1,4(rHP)
- add rH2,rH2,rW12
- stw rH2,8(rHP)
- add rH3,rH3,rW10
- stw rH3,12(rHP)
- NEXT_BLOCK
-
- bdnz ppc_md5_main
-
- FINALIZE
- blr
diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c
deleted file mode 100644
index 204440a90cd84..0000000000000
--- a/arch/powerpc/crypto/md5-glue.c
+++ /dev/null
@@ -1,99 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Glue code for MD5 implementation for PPC assembler
- *
- * Based on generic implementation.
- *
- * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de>
- */
-
-#include <crypto/internal/hash.h>
-#include <crypto/md5.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-
-extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks);
-
-static int ppc_md5_init(struct shash_desc *desc)
-{
- struct md5_state *sctx = shash_desc_ctx(desc);
-
- sctx->hash[0] = MD5_H0;
- sctx->hash[1] = MD5_H1;
- sctx->hash[2] = MD5_H2;
- sctx->hash[3] = MD5_H3;
- sctx->byte_count = 0;
-
- return 0;
-}
-
-static int ppc_md5_update(struct shash_desc *desc, const u8 *data,
- unsigned int len)
-{
- struct md5_state *sctx = shash_desc_ctx(desc);
-
- sctx->byte_count += round_down(len, MD5_HMAC_BLOCK_SIZE);
- ppc_md5_transform(sctx->hash, data, len >> 6);
- return len - round_down(len, MD5_HMAC_BLOCK_SIZE);
-}
-
-static int ppc_md5_finup(struct shash_desc *desc, const u8 *src,
- unsigned int offset, u8 *out)
-{
- struct md5_state *sctx = shash_desc_ctx(desc);
- __le64 block[MD5_BLOCK_WORDS] = {};
- u8 *p = memcpy(block, src, offset);
- __le32 *dst = (__le32 *)out;
- __le64 *pbits;
-
- src = p;
- p += offset;
- *p++ = 0x80;
- sctx->byte_count += offset;
- pbits = &block[(MD5_BLOCK_WORDS / (offset > 55 ? 1 : 2)) - 1];
- *pbits = cpu_to_le64(sctx->byte_count << 3);
- ppc_md5_transform(sctx->hash, src, (pbits - block + 1) / 8);
- memzero_explicit(block, sizeof(block));
-
- dst[0] = cpu_to_le32(sctx->hash[0]);
- dst[1] = cpu_to_le32(sctx->hash[1]);
- dst[2] = cpu_to_le32(sctx->hash[2]);
- dst[3] = cpu_to_le32(sctx->hash[3]);
- return 0;
-}
-
-static struct shash_alg alg = {
- .digestsize = MD5_DIGEST_SIZE,
- .init = ppc_md5_init,
- .update = ppc_md5_update,
- .finup = ppc_md5_finup,
- .descsize = MD5_STATE_SIZE,
- .base = {
- .cra_name = "md5",
- .cra_driver_name= "md5-ppc",
- .cra_priority = 200,
- .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY,
- .cra_blocksize = MD5_HMAC_BLOCK_SIZE,
- .cra_module = THIS_MODULE,
- }
-};
-
-static int __init ppc_md5_mod_init(void)
-{
- return crypto_register_shash(&alg);
-}
-
-static void __exit ppc_md5_mod_fini(void)
-{
- crypto_unregister_shash(&alg);
-}
-
-module_init(ppc_md5_mod_init);
-module_exit(ppc_md5_mod_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler");
-
-MODULE_ALIAS_CRYPTO("md5");
-MODULE_ALIAS_CRYPTO("md5-ppc");
--
2.50.1
On Sun, Aug 03, 2025 at 01:44:29PM -0700, Eric Biggers wrote: > MD5 is insecure, Really? Have you found an attack? Can you explain it to the rest of the world? MD5 is not recommended for future cryptographic purposes, there have been some collission "attacks" on it (quotes because such a thing is never an attack at all, merely an indication that not all is well with it, somewhere in the future an actual vulnerability might be found). Since there are newer, better, *cheaper* alternatives available, of course you should not use MD5 for anything new anymore. But claiming it is insecure is FUD. > This commit removes the PowerPC optimized MD5 code. Why? It would help to have real arguments for it! If you just want to remove code you have no interest in, there are much bigger targets available... Segher
On Sun, Aug 03, 2025 at 05:07:10PM -0500, Segher Boessenkool wrote: > On Sun, Aug 03, 2025 at 01:44:29PM -0700, Eric Biggers wrote: > > MD5 is insecure, > > Really? Have you found an attack? Can you explain it to the rest of > the world? > > MD5 is not recommended for future cryptographic purposes, there have > been some collission "attacks" on it (quotes because such a thing is > never an attack at all, merely an indication that not all is well with > it, somewhere in the future an actual vulnerability might be found). > > Since there are newer, better, *cheaper* alternatives available, of > course you should not use MD5 for anything new anymore. But claiming it > is insecure is FUD. Many attacks, including practical attacks, have been found on MD5 over the past few decades. Check out https://en.wikipedia.org/wiki/MD5 > > This commit removes the PowerPC optimized MD5 code. > > Why? It would help to have real arguments for it! Sure, check out the commit message which mentioned multiple reasons why maintaining this code is not worthwhile. - Eric
On Sun, Aug 03, 2025 at 03:14:38PM -0700, Eric Biggers wrote: > On Sun, Aug 03, 2025 at 05:07:10PM -0500, Segher Boessenkool wrote: > > On Sun, Aug 03, 2025 at 01:44:29PM -0700, Eric Biggers wrote: > > > MD5 is insecure, > > > > Really? Have you found an attack? Can you explain it to the rest of > > the world? > > > > MD5 is not recommended for future cryptographic purposes, there have > > been some collission "attacks" on it (quotes because such a thing is > > never an attack at all, merely an indication that not all is well with > > it, somewhere in the future an actual vulnerability might be found). > > > > Since there are newer, better, *cheaper* alternatives available, of > > course you should not use MD5 for anything new anymore. But claiming it > > is insecure is FUD. > > Many attacks, including practical attacks, have been found on MD5 over > the past few decades. Check out https://en.wikipedia.org/wiki/MD5 There is no new information on that page. There are no practical attacks mentioned there, either, just some collission things (which never can be practical attacks for most applications). > > > This commit removes the PowerPC optimized MD5 code. > > > > Why? It would help to have real arguments for it! > > Sure, check out the commit message which mentioned multiple reasons why > maintaining this code is not worthwhile. Of course I have read that, but that information went missing, if you intended to provide it :-( You are replacing a known-working target implementation by a lower performance generic implementation. But is that one known-working at all? Does it come with tests? Was it tested to have the same outputs as the existing thing, maybe? Just on a few inputs maybe. We were not told anything like that. Segher
On Sun, Aug 03, 2025 at 05:27:01PM -0500, Segher Boessenkool wrote: > You are replacing a known-working target implementation by a lower > performance generic implementation. That's probably correct, though FWIW there have been quite a few cases where optimized assembly code in the kernel actually turned out to be slower than the C code. (That primarily happens when the assembly code doesn't take advantage of any special CPU features, which was the case for this PowerPC code.) I don't have PowerPC hardware to check the exact performance differential here, but IMO even if there was a slowdown the factors still weigh strongly in favor of retiring this. > But is that one known-working at all? Does it come with tests? Was > it tested to have the same outputs as the existing thing, maybe? Just > on a few inputs maybe. Of course. Patch 7 adds a KUnit test suite for MD5, and there are still the older tests in crypto/testmgr.c. And of course generic code is much easier to test than arch-specific code. So not only is it tested, but the test coverage is much better than it was before. - Eric
Le 03/08/2025 à 22:44, Eric Biggers a écrit : > MD5 is insecure, is no longer commonly used, and has never been > optimized for the most common architectures in the kernel. Only mips, > powerpc, and sparc have optimized MD5 code in the kernel. Of these, > only the powerpc one is actually testable in QEMU. The mips one works > only on Cavium Octeon SoCs. > > Taken together, it's clear that it's time to retire these additional MD5 > implementations, and focus maintenance on the MD5 generic C code. Sorry, for me it is not that clear. Even if MD5 is depracated we still have several applications that use MD5 for various reasons on our boards. I ran the test on kernel v6.16 with following file: # ls -l avion.au -rw------- 1 root root 12130159 Jan 1 1970 avion.au With CONFIG_CRYPTO_MD5_PPC: # time md5sum avion.au 6513851d6109d42477b20cd56bf57f28 avion.au real 0m 1.02s user 0m 0.01s sys 0m 1.01s Without CONFIG_CRYPTO_MD5_PPC: # time md5sum avion.au 6513851d6109d42477b20cd56bf57f28 avion.au real 0m 1.35s user 0m 0.01s sys 0m 1.34s I think the difference is big enough to consider keeping optimised MD5 code. Christophe > > This commit removes the PowerPC optimized MD5 code. > > Signed-off-by: Eric Biggers <ebiggers@kernel.org> > --- > arch/powerpc/configs/powernv_defconfig | 1 - > arch/powerpc/configs/ppc64_defconfig | 1 - > arch/powerpc/crypto/Kconfig | 8 - > arch/powerpc/crypto/Makefile | 2 - > arch/powerpc/crypto/md5-asm.S | 235 ------------------------- > arch/powerpc/crypto/md5-glue.c | 99 ----------- > 6 files changed, 346 deletions(-) > delete mode 100644 arch/powerpc/crypto/md5-asm.S > delete mode 100644 arch/powerpc/crypto/md5-glue.c > > diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig > index d06388b0f66e3..bd4685612de6d 100644 > --- a/arch/powerpc/configs/powernv_defconfig > +++ b/arch/powerpc/configs/powernv_defconfig > @@ -318,11 +318,10 @@ CONFIG_FTR_FIXUP_SELFTEST=y > CONFIG_MSI_BITMAP_SELFTEST=y > CONFIG_XMON=y > CONFIG_CRYPTO_BENCHMARK=m > CONFIG_CRYPTO_PCBC=m > CONFIG_CRYPTO_HMAC=y > -CONFIG_CRYPTO_MD5_PPC=m > CONFIG_CRYPTO_MICHAEL_MIC=m > CONFIG_CRYPTO_SHA256=y > CONFIG_CRYPTO_WP512=m > CONFIG_CRYPTO_ANUBIS=m > CONFIG_CRYPTO_BLOWFISH=m > diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig > index ce34597e9f3e1..2d92c11eea7e4 100644 > --- a/arch/powerpc/configs/ppc64_defconfig > +++ b/arch/powerpc/configs/ppc64_defconfig > @@ -385,11 +385,10 @@ CONFIG_CRYPTO_TWOFISH=m > CONFIG_CRYPTO_PCBC=m > CONFIG_CRYPTO_MICHAEL_MIC=m > CONFIG_CRYPTO_SHA256=y > CONFIG_CRYPTO_WP512=m > CONFIG_CRYPTO_LZO=m > -CONFIG_CRYPTO_MD5_PPC=m > CONFIG_CRYPTO_AES_GCM_P10=m > CONFIG_CRYPTO_DEV_NX=y > CONFIG_CRYPTO_DEV_NX_ENCRYPT=m > CONFIG_CRYPTO_DEV_VMX=y > CONFIG_SYSTEM_TRUSTED_KEYRING=y > diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig > index cfe39fc221cf8..f4b779c7352de 100644 > --- a/arch/powerpc/crypto/Kconfig > +++ b/arch/powerpc/crypto/Kconfig > @@ -13,18 +13,10 @@ config CRYPTO_CURVE25519_PPC64 > Curve25519 algorithm > > Architecture: PowerPC64 > - Little-endian > > -config CRYPTO_MD5_PPC > - tristate "Digests: MD5" > - select CRYPTO_HASH > - help > - MD5 message digest algorithm (RFC1321) > - > - Architecture: powerpc > - > config CRYPTO_AES_PPC_SPE > tristate "Ciphers: AES, modes: ECB/CBC/CTR/XTS (SPE)" > depends on SPE > select CRYPTO_SKCIPHER > help > diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile > index bc8fd27344b8b..9eb59dce67f36 100644 > --- a/arch/powerpc/crypto/Makefile > +++ b/arch/powerpc/crypto/Makefile > @@ -4,17 +4,15 @@ > # > # Arch-specific CryptoAPI modules. > # > > obj-$(CONFIG_CRYPTO_AES_PPC_SPE) += aes-ppc-spe.o > -obj-$(CONFIG_CRYPTO_MD5_PPC) += md5-ppc.o > obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o > obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o > obj-$(CONFIG_CRYPTO_CURVE25519_PPC64) += curve25519-ppc64le.o > > aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o > -md5-ppc-y := md5-asm.o md5-glue.o > aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o > vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o > curve25519-ppc64le-y := curve25519-ppc64le-core.o curve25519-ppc64le_asm.o > > ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) > diff --git a/arch/powerpc/crypto/md5-asm.S b/arch/powerpc/crypto/md5-asm.S > deleted file mode 100644 > index fa6bc440cf4ac..0000000000000 > --- a/arch/powerpc/crypto/md5-asm.S > +++ /dev/null > @@ -1,235 +0,0 @@ > -/* SPDX-License-Identifier: GPL-2.0-or-later */ > -/* > - * Fast MD5 implementation for PPC > - * > - * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> > - */ > -#include <asm/ppc_asm.h> > -#include <asm/asm-offsets.h> > -#include <asm/asm-compat.h> > - > -#define rHP r3 > -#define rWP r4 > - > -#define rH0 r0 > -#define rH1 r6 > -#define rH2 r7 > -#define rH3 r5 > - > -#define rW00 r8 > -#define rW01 r9 > -#define rW02 r10 > -#define rW03 r11 > -#define rW04 r12 > -#define rW05 r14 > -#define rW06 r15 > -#define rW07 r16 > -#define rW08 r17 > -#define rW09 r18 > -#define rW10 r19 > -#define rW11 r20 > -#define rW12 r21 > -#define rW13 r22 > -#define rW14 r23 > -#define rW15 r24 > - > -#define rT0 r25 > -#define rT1 r26 > - > -#define INITIALIZE \ > - PPC_STLU r1,-INT_FRAME_SIZE(r1); \ > - SAVE_GPRS(14, 26, r1) /* push registers onto stack */ > - > -#define FINALIZE \ > - REST_GPRS(14, 26, r1); /* pop registers from stack */ \ > - addi r1,r1,INT_FRAME_SIZE > - > -#ifdef __BIG_ENDIAN__ > -#define LOAD_DATA(reg, off) \ > - lwbrx reg,0,rWP; /* load data */ > -#define INC_PTR \ > - addi rWP,rWP,4; /* increment per word */ > -#define NEXT_BLOCK /* nothing to do */ > -#else > -#define LOAD_DATA(reg, off) \ > - lwz reg,off(rWP); /* load data */ > -#define INC_PTR /* nothing to do */ > -#define NEXT_BLOCK \ > - addi rWP,rWP,64; /* increment per block */ > -#endif > - > -#define R_00_15(a, b, c, d, w0, w1, p, q, off, k0h, k0l, k1h, k1l) \ > - LOAD_DATA(w0, off) /* W */ \ > - and rT0,b,c; /* 1: f = b and c */ \ > - INC_PTR /* ptr++ */ \ > - andc rT1,d,b; /* 1: f' = ~b and d */ \ > - LOAD_DATA(w1, off+4) /* W */ \ > - or rT0,rT0,rT1; /* 1: f = f or f' */ \ > - addi w0,w0,k0l; /* 1: wk = w + k */ \ > - add a,a,rT0; /* 1: a = a + f */ \ > - addis w0,w0,k0h; /* 1: wk = w + k' */ \ > - addis w1,w1,k1h; /* 2: wk = w + k */ \ > - add a,a,w0; /* 1: a = a + wk */ \ > - addi w1,w1,k1l; /* 2: wk = w + k' */ \ > - rotrwi a,a,p; /* 1: a = a rotl x */ \ > - add d,d,w1; /* 2: a = a + wk */ \ > - add a,a,b; /* 1: a = a + b */ \ > - and rT0,a,b; /* 2: f = b and c */ \ > - andc rT1,c,a; /* 2: f' = ~b and d */ \ > - or rT0,rT0,rT1; /* 2: f = f or f' */ \ > - add d,d,rT0; /* 2: a = a + f */ \ > - INC_PTR /* ptr++ */ \ > - rotrwi d,d,q; /* 2: a = a rotl x */ \ > - add d,d,a; /* 2: a = a + b */ > - > -#define R_16_31(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ > - andc rT0,c,d; /* 1: f = c and ~d */ \ > - and rT1,b,d; /* 1: f' = b and d */ \ > - addi w0,w0,k0l; /* 1: wk = w + k */ \ > - or rT0,rT0,rT1; /* 1: f = f or f' */ \ > - addis w0,w0,k0h; /* 1: wk = w + k' */ \ > - add a,a,rT0; /* 1: a = a + f */ \ > - addi w1,w1,k1l; /* 2: wk = w + k */ \ > - add a,a,w0; /* 1: a = a + wk */ \ > - addis w1,w1,k1h; /* 2: wk = w + k' */ \ > - andc rT0,b,c; /* 2: f = c and ~d */ \ > - rotrwi a,a,p; /* 1: a = a rotl x */ \ > - add a,a,b; /* 1: a = a + b */ \ > - add d,d,w1; /* 2: a = a + wk */ \ > - and rT1,a,c; /* 2: f' = b and d */ \ > - or rT0,rT0,rT1; /* 2: f = f or f' */ \ > - add d,d,rT0; /* 2: a = a + f */ \ > - rotrwi d,d,q; /* 2: a = a rotl x */ \ > - add d,d,a; /* 2: a = a +b */ > - > -#define R_32_47(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ > - xor rT0,b,c; /* 1: f' = b xor c */ \ > - addi w0,w0,k0l; /* 1: wk = w + k */ \ > - xor rT1,rT0,d; /* 1: f = f xor f' */ \ > - addis w0,w0,k0h; /* 1: wk = w + k' */ \ > - add a,a,rT1; /* 1: a = a + f */ \ > - addi w1,w1,k1l; /* 2: wk = w + k */ \ > - add a,a,w0; /* 1: a = a + wk */ \ > - addis w1,w1,k1h; /* 2: wk = w + k' */ \ > - rotrwi a,a,p; /* 1: a = a rotl x */ \ > - add d,d,w1; /* 2: a = a + wk */ \ > - add a,a,b; /* 1: a = a + b */ \ > - xor rT1,rT0,a; /* 2: f = b xor f' */ \ > - add d,d,rT1; /* 2: a = a + f */ \ > - rotrwi d,d,q; /* 2: a = a rotl x */ \ > - add d,d,a; /* 2: a = a + b */ > - > -#define R_48_63(a, b, c, d, w0, w1, p, q, k0h, k0l, k1h, k1l) \ > - addi w0,w0,k0l; /* 1: w = w + k */ \ > - orc rT0,b,d; /* 1: f = b or ~d */ \ > - addis w0,w0,k0h; /* 1: w = w + k' */ \ > - xor rT0,rT0,c; /* 1: f = f xor c */ \ > - add a,a,w0; /* 1: a = a + wk */ \ > - addi w1,w1,k1l; /* 2: w = w + k */ \ > - add a,a,rT0; /* 1: a = a + f */ \ > - addis w1,w1,k1h; /* 2: w = w + k' */ \ > - rotrwi a,a,p; /* 1: a = a rotl x */ \ > - add a,a,b; /* 1: a = a + b */ \ > - orc rT0,a,c; /* 2: f = b or ~d */ \ > - add d,d,w1; /* 2: a = a + wk */ \ > - xor rT0,rT0,b; /* 2: f = f xor c */ \ > - add d,d,rT0; /* 2: a = a + f */ \ > - rotrwi d,d,q; /* 2: a = a rotl x */ \ > - add d,d,a; /* 2: a = a + b */ > - > -_GLOBAL(ppc_md5_transform) > - INITIALIZE > - > - mtctr r5 > - lwz rH0,0(rHP) > - lwz rH1,4(rHP) > - lwz rH2,8(rHP) > - lwz rH3,12(rHP) > - > -ppc_md5_main: > - R_00_15(rH0, rH1, rH2, rH3, rW00, rW01, 25, 20, 0, > - 0xd76b, -23432, 0xe8c8, -18602) > - R_00_15(rH2, rH3, rH0, rH1, rW02, rW03, 15, 10, 8, > - 0x2420, 0x70db, 0xc1be, -12562) > - R_00_15(rH0, rH1, rH2, rH3, rW04, rW05, 25, 20, 16, > - 0xf57c, 0x0faf, 0x4788, -14806) > - R_00_15(rH2, rH3, rH0, rH1, rW06, rW07, 15, 10, 24, > - 0xa830, 0x4613, 0xfd47, -27391) > - R_00_15(rH0, rH1, rH2, rH3, rW08, rW09, 25, 20, 32, > - 0x6981, -26408, 0x8b45, -2129) > - R_00_15(rH2, rH3, rH0, rH1, rW10, rW11, 15, 10, 40, > - 0xffff, 0x5bb1, 0x895d, -10306) > - R_00_15(rH0, rH1, rH2, rH3, rW12, rW13, 25, 20, 48, > - 0x6b90, 0x1122, 0xfd98, 0x7193) > - R_00_15(rH2, rH3, rH0, rH1, rW14, rW15, 15, 10, 56, > - 0xa679, 0x438e, 0x49b4, 0x0821) > - > - R_16_31(rH0, rH1, rH2, rH3, rW01, rW06, 27, 23, > - 0x0d56, 0x6e0c, 0x1810, 0x6d2d) > - R_16_31(rH2, rH3, rH0, rH1, rW11, rW00, 18, 12, > - 0x9d02, -32109, 0x124c, 0x2332) > - R_16_31(rH0, rH1, rH2, rH3, rW05, rW10, 27, 23, > - 0x8ea7, 0x4a33, 0x0245, -18270) > - R_16_31(rH2, rH3, rH0, rH1, rW15, rW04, 18, 12, > - 0x8eee, -8608, 0xf258, -5095) > - R_16_31(rH0, rH1, rH2, rH3, rW09, rW14, 27, 23, > - 0x969d, -10697, 0x1cbe, -15288) > - R_16_31(rH2, rH3, rH0, rH1, rW03, rW08, 18, 12, > - 0x3317, 0x3e99, 0xdbd9, 0x7c15) > - R_16_31(rH0, rH1, rH2, rH3, rW13, rW02, 27, 23, > - 0xac4b, 0x7772, 0xd8cf, 0x331d) > - R_16_31(rH2, rH3, rH0, rH1, rW07, rW12, 18, 12, > - 0x6a28, 0x6dd8, 0x219a, 0x3b68) > - > - R_32_47(rH0, rH1, rH2, rH3, rW05, rW08, 28, 21, > - 0x29cb, 0x28e5, 0x4218, -7788) > - R_32_47(rH2, rH3, rH0, rH1, rW11, rW14, 16, 9, > - 0x473f, 0x06d1, 0x3aae, 0x3036) > - R_32_47(rH0, rH1, rH2, rH3, rW01, rW04, 28, 21, > - 0xaea1, -15134, 0x640b, -11295) > - R_32_47(rH2, rH3, rH0, rH1, rW07, rW10, 16, 9, > - 0x8f4c, 0x4887, 0xbc7c, -22499) > - R_32_47(rH0, rH1, rH2, rH3, rW13, rW00, 28, 21, > - 0x7eb8, -27199, 0x00ea, 0x6050) > - R_32_47(rH2, rH3, rH0, rH1, rW03, rW06, 16, 9, > - 0xe01a, 0x22fe, 0x4447, 0x69c5) > - R_32_47(rH0, rH1, rH2, rH3, rW09, rW12, 28, 21, > - 0xb7f3, 0x0253, 0x59b1, 0x4d5b) > - R_32_47(rH2, rH3, rH0, rH1, rW15, rW02, 16, 9, > - 0x4701, -27017, 0xc7bd, -19859) > - > - R_48_63(rH0, rH1, rH2, rH3, rW00, rW07, 26, 22, > - 0x0988, -1462, 0x4c70, -19401) > - R_48_63(rH2, rH3, rH0, rH1, rW14, rW05, 17, 11, > - 0xadaf, -5221, 0xfc99, 0x66f7) > - R_48_63(rH0, rH1, rH2, rH3, rW12, rW03, 26, 22, > - 0x7e80, -16418, 0xba1e, -25587) > - R_48_63(rH2, rH3, rH0, rH1, rW10, rW01, 17, 11, > - 0x4130, 0x380d, 0xe0c5, 0x738d) > - lwz rW00,0(rHP) > - R_48_63(rH0, rH1, rH2, rH3, rW08, rW15, 26, 22, > - 0xe837, -30770, 0xde8a, 0x69e8) > - lwz rW14,4(rHP) > - R_48_63(rH2, rH3, rH0, rH1, rW06, rW13, 17, 11, > - 0x9e79, 0x260f, 0x256d, -27941) > - lwz rW12,8(rHP) > - R_48_63(rH0, rH1, rH2, rH3, rW04, rW11, 26, 22, > - 0xab75, -20775, 0x4f9e, -28397) > - lwz rW10,12(rHP) > - R_48_63(rH2, rH3, rH0, rH1, rW02, rW09, 17, 11, > - 0x662b, 0x7c56, 0x11b2, 0x0358) > - > - add rH0,rH0,rW00 > - stw rH0,0(rHP) > - add rH1,rH1,rW14 > - stw rH1,4(rHP) > - add rH2,rH2,rW12 > - stw rH2,8(rHP) > - add rH3,rH3,rW10 > - stw rH3,12(rHP) > - NEXT_BLOCK > - > - bdnz ppc_md5_main > - > - FINALIZE > - blr > diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c > deleted file mode 100644 > index 204440a90cd84..0000000000000 > --- a/arch/powerpc/crypto/md5-glue.c > +++ /dev/null > @@ -1,99 +0,0 @@ > -// SPDX-License-Identifier: GPL-2.0-or-later > -/* > - * Glue code for MD5 implementation for PPC assembler > - * > - * Based on generic implementation. > - * > - * Copyright (c) 2015 Markus Stockhausen <stockhausen@collogia.de> > - */ > - > -#include <crypto/internal/hash.h> > -#include <crypto/md5.h> > -#include <linux/kernel.h> > -#include <linux/module.h> > -#include <linux/string.h> > - > -extern void ppc_md5_transform(u32 *state, const u8 *src, u32 blocks); > - > -static int ppc_md5_init(struct shash_desc *desc) > -{ > - struct md5_state *sctx = shash_desc_ctx(desc); > - > - sctx->hash[0] = MD5_H0; > - sctx->hash[1] = MD5_H1; > - sctx->hash[2] = MD5_H2; > - sctx->hash[3] = MD5_H3; > - sctx->byte_count = 0; > - > - return 0; > -} > - > -static int ppc_md5_update(struct shash_desc *desc, const u8 *data, > - unsigned int len) > -{ > - struct md5_state *sctx = shash_desc_ctx(desc); > - > - sctx->byte_count += round_down(len, MD5_HMAC_BLOCK_SIZE); > - ppc_md5_transform(sctx->hash, data, len >> 6); > - return len - round_down(len, MD5_HMAC_BLOCK_SIZE); > -} > - > -static int ppc_md5_finup(struct shash_desc *desc, const u8 *src, > - unsigned int offset, u8 *out) > -{ > - struct md5_state *sctx = shash_desc_ctx(desc); > - __le64 block[MD5_BLOCK_WORDS] = {}; > - u8 *p = memcpy(block, src, offset); > - __le32 *dst = (__le32 *)out; > - __le64 *pbits; > - > - src = p; > - p += offset; > - *p++ = 0x80; > - sctx->byte_count += offset; > - pbits = &block[(MD5_BLOCK_WORDS / (offset > 55 ? 1 : 2)) - 1]; > - *pbits = cpu_to_le64(sctx->byte_count << 3); > - ppc_md5_transform(sctx->hash, src, (pbits - block + 1) / 8); > - memzero_explicit(block, sizeof(block)); > - > - dst[0] = cpu_to_le32(sctx->hash[0]); > - dst[1] = cpu_to_le32(sctx->hash[1]); > - dst[2] = cpu_to_le32(sctx->hash[2]); > - dst[3] = cpu_to_le32(sctx->hash[3]); > - return 0; > -} > - > -static struct shash_alg alg = { > - .digestsize = MD5_DIGEST_SIZE, > - .init = ppc_md5_init, > - .update = ppc_md5_update, > - .finup = ppc_md5_finup, > - .descsize = MD5_STATE_SIZE, > - .base = { > - .cra_name = "md5", > - .cra_driver_name= "md5-ppc", > - .cra_priority = 200, > - .cra_flags = CRYPTO_AHASH_ALG_BLOCK_ONLY, > - .cra_blocksize = MD5_HMAC_BLOCK_SIZE, > - .cra_module = THIS_MODULE, > - } > -}; > - > -static int __init ppc_md5_mod_init(void) > -{ > - return crypto_register_shash(&alg); > -} > - > -static void __exit ppc_md5_mod_fini(void) > -{ > - crypto_unregister_shash(&alg); > -} > - > -module_init(ppc_md5_mod_init); > -module_exit(ppc_md5_mod_fini); > - > -MODULE_LICENSE("GPL"); > -MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, PPC assembler"); > - > -MODULE_ALIAS_CRYPTO("md5"); > -MODULE_ALIAS_CRYPTO("md5-ppc");
On Mon, Aug 04, 2025 at 07:42:15PM +0200, Christophe Leroy wrote: > > > Le 03/08/2025 à 22:44, Eric Biggers a écrit : > > MD5 is insecure, is no longer commonly used, and has never been > > optimized for the most common architectures in the kernel. Only mips, > > powerpc, and sparc have optimized MD5 code in the kernel. Of these, > > only the powerpc one is actually testable in QEMU. The mips one works > > only on Cavium Octeon SoCs. > > > > Taken together, it's clear that it's time to retire these additional MD5 > > implementations, and focus maintenance on the MD5 generic C code. > > Sorry, for me it is not that clear. Even if MD5 is depracated we still have > several applications that use MD5 for various reasons on our boards. > > I ran the test on kernel v6.16 with following file: > > # ls -l avion.au > -rw------- 1 root root 12130159 Jan 1 1970 avion.au > > With CONFIG_CRYPTO_MD5_PPC: > > # time md5sum avion.au > 6513851d6109d42477b20cd56bf57f28 avion.au > real 0m 1.02s > user 0m 0.01s > sys 0m 1.01s > > Without CONFIG_CRYPTO_MD5_PPC: > > # time md5sum avion.au > 6513851d6109d42477b20cd56bf57f28 avion.au > real 0m 1.35s > user 0m 0.01s > sys 0m 1.34s > > I think the difference is big enough to consider keeping optimised MD5 code. But md5sum doesn't use the kernel's MD5 code. So it's implausible that it has any effect on md5sum. The difference you saw must be due to an unrelated reason like I/O caching, CPU frequency, etc. Try running your test multiple times to eliminate other sources of variation. - Eric
Le 04/08/2025 à 20:09, Eric Biggers a écrit : > On Mon, Aug 04, 2025 at 07:42:15PM +0200, Christophe Leroy wrote: >> >> >> Le 03/08/2025 à 22:44, Eric Biggers a écrit : >>> MD5 is insecure, is no longer commonly used, and has never been >>> optimized for the most common architectures in the kernel. Only mips, >>> powerpc, and sparc have optimized MD5 code in the kernel. Of these, >>> only the powerpc one is actually testable in QEMU. The mips one works >>> only on Cavium Octeon SoCs. >>> >>> Taken together, it's clear that it's time to retire these additional MD5 >>> implementations, and focus maintenance on the MD5 generic C code. >> >> Sorry, for me it is not that clear. Even if MD5 is depracated we still have >> several applications that use MD5 for various reasons on our boards. >> >> I ran the test on kernel v6.16 with following file: >> >> # ls -l avion.au >> -rw------- 1 root root 12130159 Jan 1 1970 avion.au >> >> With CONFIG_CRYPTO_MD5_PPC: >> >> # time md5sum avion.au >> 6513851d6109d42477b20cd56bf57f28 avion.au >> real 0m 1.02s >> user 0m 0.01s >> sys 0m 1.01s >> >> Without CONFIG_CRYPTO_MD5_PPC: >> >> # time md5sum avion.au >> 6513851d6109d42477b20cd56bf57f28 avion.au >> real 0m 1.35s >> user 0m 0.01s >> sys 0m 1.34s >> >> I think the difference is big enough to consider keeping optimised MD5 code. > > But md5sum doesn't use the kernel's MD5 code. So it's implausible that > it has any effect on md5sum. The difference you saw must be due to an > unrelated reason like I/O caching, CPU frequency, etc. Try running your > test multiple times to eliminate other sources of variation. md5sum uses the kernel's MD5 code: # ldd `which md5sum` linux-vdso32.so.1 (0x77b90000) libkcapi.so.1 => /usr/lib/libkcapi.so.1 (0x6ffa0000) <== libcrypt.so.1 => /lib/libcrypt.so.1 (0x6ff50000) libc.so.6 => /lib/libc.so.6 (0x6fd10000) /lib/ld.so.1 => //lib/ld.so.1 (0x77ba0000) Previous test was on an mpc8xx. I now did the test on mpc832x and the difference is even bigger: With CONFIG_CRYPTO_MD5_PPC: # time md5sum avion.au 6513851d6109d42477b20cd56bf57f28 avion.au real 0m 0.41s user 0m 0.00s sys 0m 0.34s Without CONFIG_CRYPTO_MD5_PPC: # time md5sum avion.au 6513851d6109d42477b20cd56bf57f28 avion.au real 0m 0.58s user 0m 0.00s sys 0m 0.47s Christophe
On Mon, Aug 04, 2025 at 09:02:27PM +0200, Christophe Leroy wrote: > > > Le 04/08/2025 à 20:09, Eric Biggers a écrit : > > On Mon, Aug 04, 2025 at 07:42:15PM +0200, Christophe Leroy wrote: > > > > > > > > > Le 03/08/2025 à 22:44, Eric Biggers a écrit : > > > > MD5 is insecure, is no longer commonly used, and has never been > > > > optimized for the most common architectures in the kernel. Only mips, > > > > powerpc, and sparc have optimized MD5 code in the kernel. Of these, > > > > only the powerpc one is actually testable in QEMU. The mips one works > > > > only on Cavium Octeon SoCs. > > > > > > > > Taken together, it's clear that it's time to retire these additional MD5 > > > > implementations, and focus maintenance on the MD5 generic C code. > > > > > > Sorry, for me it is not that clear. Even if MD5 is depracated we still have > > > several applications that use MD5 for various reasons on our boards. > > > > > > I ran the test on kernel v6.16 with following file: > > > > > > # ls -l avion.au > > > -rw------- 1 root root 12130159 Jan 1 1970 avion.au > > > > > > With CONFIG_CRYPTO_MD5_PPC: > > > > > > # time md5sum avion.au > > > 6513851d6109d42477b20cd56bf57f28 avion.au > > > real 0m 1.02s > > > user 0m 0.01s > > > sys 0m 1.01s > > > > > > Without CONFIG_CRYPTO_MD5_PPC: > > > > > > # time md5sum avion.au > > > 6513851d6109d42477b20cd56bf57f28 avion.au > > > real 0m 1.35s > > > user 0m 0.01s > > > sys 0m 1.34s > > > > > > I think the difference is big enough to consider keeping optimised MD5 code. > > > > But md5sum doesn't use the kernel's MD5 code. So it's implausible that > > it has any effect on md5sum. The difference you saw must be due to an > > unrelated reason like I/O caching, CPU frequency, etc. Try running your > > test multiple times to eliminate other sources of variation. > > md5sum uses the kernel's MD5 code: What? That's crazy. Userspace MD5 code would be faster and more reliable. No need to make syscalls, transfer data to and from the kernel, have an external dependency, etc. Is this the coreutils md5sum? We need to get this reported and fixed. - Eric
On Mon, Aug 04, 2025 at 10:59:01PM +0000, Eric Biggers wrote: > On Mon, Aug 04, 2025 at 09:02:27PM +0200, Christophe Leroy wrote: > > > > > > Le 04/08/2025 à 20:09, Eric Biggers a écrit : > > > On Mon, Aug 04, 2025 at 07:42:15PM +0200, Christophe Leroy wrote: > > > > > > > > > > > > Le 03/08/2025 à 22:44, Eric Biggers a écrit : > > > > > MD5 is insecure, is no longer commonly used, and has never been > > > > > optimized for the most common architectures in the kernel. Only mips, > > > > > powerpc, and sparc have optimized MD5 code in the kernel. Of these, > > > > > only the powerpc one is actually testable in QEMU. The mips one works > > > > > only on Cavium Octeon SoCs. > > > > > > > > > > Taken together, it's clear that it's time to retire these additional MD5 > > > > > implementations, and focus maintenance on the MD5 generic C code. > > > > > > > > Sorry, for me it is not that clear. Even if MD5 is depracated we still have > > > > several applications that use MD5 for various reasons on our boards. > > > > > > > > I ran the test on kernel v6.16 with following file: > > > > > > > > # ls -l avion.au > > > > -rw------- 1 root root 12130159 Jan 1 1970 avion.au > > > > > > > > With CONFIG_CRYPTO_MD5_PPC: > > > > > > > > # time md5sum avion.au > > > > 6513851d6109d42477b20cd56bf57f28 avion.au > > > > real 0m 1.02s > > > > user 0m 0.01s > > > > sys 0m 1.01s > > > > > > > > Without CONFIG_CRYPTO_MD5_PPC: > > > > > > > > # time md5sum avion.au > > > > 6513851d6109d42477b20cd56bf57f28 avion.au > > > > real 0m 1.35s > > > > user 0m 0.01s > > > > sys 0m 1.34s > > > > > > > > I think the difference is big enough to consider keeping optimised MD5 code. > > > > > > But md5sum doesn't use the kernel's MD5 code. So it's implausible that > > > it has any effect on md5sum. The difference you saw must be due to an > > > unrelated reason like I/O caching, CPU frequency, etc. Try running your > > > test multiple times to eliminate other sources of variation. > > > > md5sum uses the kernel's MD5 code: > > > > libkcapi.so.1 => /usr/lib/libkcapi.so.1 (0x6ffa0000) <== Oh, I think you used the obscure implementation of md5sum from libkcapi-tools, instead of the normal md5sum. Why? Did you check how the normal md5sum performs too? Just doing the calculation in userspace is much more efficient. - Eric
Le 05/08/2025 à 01:09, Eric Biggers a écrit : > On Mon, Aug 04, 2025 at 10:59:01PM +0000, Eric Biggers wrote: >> On Mon, Aug 04, 2025 at 09:02:27PM +0200, Christophe Leroy wrote: >>> >>> >>> Le 04/08/2025 à 20:09, Eric Biggers a écrit : >>>> On Mon, Aug 04, 2025 at 07:42:15PM +0200, Christophe Leroy wrote: >>>>> >>>>> >>>>> Le 03/08/2025 à 22:44, Eric Biggers a écrit : >>>>>> MD5 is insecure, is no longer commonly used, and has never been >>>>>> optimized for the most common architectures in the kernel. Only mips, >>>>>> powerpc, and sparc have optimized MD5 code in the kernel. Of these, >>>>>> only the powerpc one is actually testable in QEMU. The mips one works >>>>>> only on Cavium Octeon SoCs. >>>>>> >>>>>> Taken together, it's clear that it's time to retire these additional MD5 >>>>>> implementations, and focus maintenance on the MD5 generic C code. >>>>> >>>>> Sorry, for me it is not that clear. Even if MD5 is depracated we still have >>>>> several applications that use MD5 for various reasons on our boards. >>>>> >>>>> I ran the test on kernel v6.16 with following file: >>>>> >>>>> # ls -l avion.au >>>>> -rw------- 1 root root 12130159 Jan 1 1970 avion.au >>>>> >>>>> With CONFIG_CRYPTO_MD5_PPC: >>>>> >>>>> # time md5sum avion.au >>>>> 6513851d6109d42477b20cd56bf57f28 avion.au >>>>> real 0m 1.02s >>>>> user 0m 0.01s >>>>> sys 0m 1.01s >>>>> >>>>> Without CONFIG_CRYPTO_MD5_PPC: >>>>> >>>>> # time md5sum avion.au >>>>> 6513851d6109d42477b20cd56bf57f28 avion.au >>>>> real 0m 1.35s >>>>> user 0m 0.01s >>>>> sys 0m 1.34s >>>>> >>>>> I think the difference is big enough to consider keeping optimised MD5 code. >>>> >>>> But md5sum doesn't use the kernel's MD5 code. So it's implausible that >>>> it has any effect on md5sum. The difference you saw must be due to an >>>> unrelated reason like I/O caching, CPU frequency, etc. Try running your >>>> test multiple times to eliminate other sources of variation. >>> >>> md5sum uses the kernel's MD5 code: >>> >>> libkcapi.so.1 => /usr/lib/libkcapi.so.1 (0x6ffa0000) <== > > Oh, I think you used the obscure implementation of md5sum from > libkcapi-tools, instead of the normal md5sum. Why? Did you check how > the normal md5sum performs too? Just doing the calculation in userspace > is much more efficient. Calculation in userspace is less efficient on my board: # time md5sum avion.au 6513851d6109d42477b20cd56bf57f28 avion.au real 0m 1.87s user 0m 1.51s sys 0m 0.35s Christophe
Le 05/08/2025 à 00:59, Eric Biggers a écrit : > On Mon, Aug 04, 2025 at 09:02:27PM +0200, Christophe Leroy wrote: >> >> >> Le 04/08/2025 à 20:09, Eric Biggers a écrit : >>> On Mon, Aug 04, 2025 at 07:42:15PM +0200, Christophe Leroy wrote: >>>> >>>> >>>> Le 03/08/2025 à 22:44, Eric Biggers a écrit : >>>>> MD5 is insecure, is no longer commonly used, and has never been >>>>> optimized for the most common architectures in the kernel. Only mips, >>>>> powerpc, and sparc have optimized MD5 code in the kernel. Of these, >>>>> only the powerpc one is actually testable in QEMU. The mips one works >>>>> only on Cavium Octeon SoCs. >>>>> >>>>> Taken together, it's clear that it's time to retire these additional MD5 >>>>> implementations, and focus maintenance on the MD5 generic C code. >>>> >>>> Sorry, for me it is not that clear. Even if MD5 is depracated we still have >>>> several applications that use MD5 for various reasons on our boards. >>>> >>>> I ran the test on kernel v6.16 with following file: >>>> >>>> # ls -l avion.au >>>> -rw------- 1 root root 12130159 Jan 1 1970 avion.au >>>> >>>> With CONFIG_CRYPTO_MD5_PPC: >>>> >>>> # time md5sum avion.au >>>> 6513851d6109d42477b20cd56bf57f28 avion.au >>>> real 0m 1.02s >>>> user 0m 0.01s >>>> sys 0m 1.01s >>>> >>>> Without CONFIG_CRYPTO_MD5_PPC: >>>> >>>> # time md5sum avion.au >>>> 6513851d6109d42477b20cd56bf57f28 avion.au >>>> real 0m 1.35s >>>> user 0m 0.01s >>>> sys 0m 1.34s >>>> >>>> I think the difference is big enough to consider keeping optimised MD5 code. >>> >>> But md5sum doesn't use the kernel's MD5 code. So it's implausible that >>> it has any effect on md5sum. The difference you saw must be due to an >>> unrelated reason like I/O caching, CPU frequency, etc. Try running your >>> test multiple times to eliminate other sources of variation. >> >> md5sum uses the kernel's MD5 code: > > What? That's crazy. Userspace MD5 code would be faster and more > reliable. No need to make syscalls, transfer data to and from the > kernel, have an external dependency, etc. Is this the coreutils md5sum? > We need to get this reported and fixed. Content of files is already buffered inside the kernel. likcapi doesn't tranfer data, it uses splice(). As far as I know, coreutil is not able to use the TALITOS Security engine we have on the mpc885 and mpc8321 microcontroleurs. We primarily use libkcapi for that. In order to keep things consistant, we use the same userspace on boards which don't have a security engine, ie the mpc866, we rely on the kernel providing an optimised software implementation fallback. Christophe
On Tue, Aug 05, 2025 at 08:27:14AM +0200, Christophe Leroy wrote: > > What? That's crazy. Userspace MD5 code would be faster and more > > reliable. No need to make syscalls, transfer data to and from the > > kernel, have an external dependency, etc. Is this the coreutils md5sum? > > We need to get this reported and fixed. > > Content of files is already buffered inside the kernel. likcapi doesn't > tranfer data, it uses splice(). > > As far as I know, coreutil is not able to use the TALITOS Security engine we > have on the mpc885 and mpc8321 microcontroleurs. We primarily use libkcapi > for that. > > In order to keep things consistant, we use the same userspace on boards > which don't have a security engine, ie the mpc866, we rely on the kernel > providing an optimised software implementation fallback. Even if the data is transferred in a zero-copy manner, the AF_ALG version still has a lot of overhead from system calls. Running the exact same PPC optimized code in userspace would of course be faster. Now, it sounds like you don't have the PPC optimized MD5 code in userspace, and that's why you're measuring the AF_ALG based md5sum to be faster. But that's just the wrong design, and it does not give optimal performance. The fact that the kernel allows access to software code via AF_ALG is basically a bug. For now we'll keep the PPC optimized MD5 around in the kernel to accomodate userspace code that is depending on this bug, but for future reference we don't add new software implementations purely for use by userspace. (And BTW this is nothing new; this has already been the policy for many years.) Please work to bring the optimized code you need into userspace. For example contribute it to OpenSSL. - Eric
Hi, On 8/5/25 07:59, Eric Biggers wrote: >> md5sum uses the kernel's MD5 code: > What? That's crazy. Userspace MD5 code would be faster and more > reliable. No need to make syscalls, transfer data to and from the > kernel, have an external dependency, etc. Is this the coreutils md5sum? > We need to get this reported and fixed. The userspace API allows zero-copy transfers from userspace, and AFAIK also directly operating on files without ever transferring the data to userspace (so we save one copy). Userspace requests are also where the asynchronous hardware offload units get to chomp on large blocks of data while the CPU is doing something else: $ time dd if=test.bin of=/dev/zero bs=1G # warm up caches real 0m1.541s user 0m0.000s sys 0m0.732s $ time gzip -9 <test.bin >test.bin.gz # compress with the CPU real 2m57.789s user 2m55.986s sys 0m1.508s $ time ./gzfht_test test.bin # compress with NEST unit real 0m3.207s user 0m0.584s sys 0m2.487s $ time gzip -d <test.bin.nx.gz >test.bin.nx # decompress with CPU real 1m0.103s user 0m57.990s sys 0m1.878s $ time ./gunz_test test.bin.gz # decompress with NEST unit real 0m2.722s user 0m0.200s sys 0m1.872s That's why I'm objecting to measuring the general usefulness of hardware crypto units by the standards of fscrypt, which has an artificial limitation of never submitting blocks larger than 4kB: there are other use cases that don't have that limitation, and where the overhead is negligible because it is incurred only once for a few gigabytes of data. That's why I suggested changing from a priority field to "speed" and "overhead" fields, and calculate priority for each application as (size/speed+overhead) -- smallest number wins, size is what the application expects to use as the typical request size (which for fscrypt and IPsec is on the small side, so it would always select the CPU unless there was a low-overhead offload engine available) This probably needs some adjustment to allow selecting a low-power implementation (e.g. on mobile, I'd want to use offloading for fscrypt even if it is slower), and model request batching which reduces the overhead in a busy system, but it should be a good start. Simon
On Tue, Aug 05, 2025 at 01:49:31PM +0900, Simon Richter wrote: > Hi, > > On 8/5/25 07:59, Eric Biggers wrote: > > > > md5sum uses the kernel's MD5 code: > > > What? That's crazy. Userspace MD5 code would be faster and more > > reliable. No need to make syscalls, transfer data to and from the > > kernel, have an external dependency, etc. Is this the coreutils md5sum? > > We need to get this reported and fixed. > > The userspace API allows zero-copy transfers from userspace, and AFAIK also > directly operating on files without ever transferring the data to userspace > (so we save one copy). > > Userspace requests are also where the asynchronous hardware offload units > get to chomp on large blocks of data while the CPU is doing something else: > > $ time dd if=test.bin of=/dev/zero bs=1G # warm up caches > real 0m1.541s > user 0m0.000s > sys 0m0.732s > > $ time gzip -9 <test.bin >test.bin.gz # compress with the CPU > real 2m57.789s > user 2m55.986s > sys 0m1.508s > > $ time ./gzfht_test test.bin # compress with NEST unit > real 0m3.207s > user 0m0.584s > sys 0m2.487s > > $ time gzip -d <test.bin.nx.gz >test.bin.nx # decompress with CPU > real 1m0.103s > user 0m57.990s > sys 0m1.878s > > $ time ./gunz_test test.bin.gz # decompress with NEST unit > real 0m2.722s > user 0m0.200s > sys 0m1.872s > > That's why I'm objecting to measuring the general usefulness of hardware > crypto units by the standards of fscrypt, which has an artificial limitation > of never submitting blocks larger than 4kB: there are other use cases that > don't have that limitation, and where the overhead is negligible because it > is incurred only once for a few gigabytes of data. > > That's why I suggested changing from a priority field to "speed" and > "overhead" fields, and calculate priority for each application as > (size/speed+overhead) -- smallest number wins, size is what the application > expects to use as the typical request size (which for fscrypt and IPsec is > on the small side, so it would always select the CPU unless there was a > low-overhead offload engine available) > > This probably needs some adjustment to allow selecting a low-power > implementation (e.g. on mobile, I'd want to use offloading for fscrypt even > if it is slower), and model request batching which reduces the overhead in a > busy system, but it should be a good start. What does this have to do with this thread, which is about the PowerPC optimized MD5 code? - Eric
Hi, On 8/5/25 13:58, Eric Biggers wrote: > What does this have to do with this thread, which is about the PowerPC > optimized MD5 code? Hence the new subject. It is still related to removal of code, but asks about the bigger picture. The code removal changes you've been pushing lately absolutely make sense in the context of "the crypto subsystem is for internal use by the kernel, and all known users will only ever submit small work items." However, there is also the userspace API, and hardware accelerators also register with the crypto subsystem, so it is *also* the way for userspace to use specialized hardware. If these are separate, then it makes sense to acknowledge that the kernel will never use asynchronous transforms for anything, simplify the internal API, and move all the hardware support to a separate registry that is for userspace applications only. However if we don't want separate registries, then it makes no sense for the kernel to set policy for userspace here; it should offer all the alternatives. The kernel can, and should, define policy for itself, and I wholeheartedly agree that offloading small requests does not make sense, unless we're on battery power. I'm also not convinced that fscrypt cannot ever learn to submit a single large request or a large batch of small requests if it is asked to decrypt a large file, so in my opinion the common registry makes more sense. Making sure that hardware support actually works and is tested is the responsibility of the driver and port maintainers. We understand that subsystem maintainers do not have all the hardware available, but the same goes for all the other subsystems -- the DRM maintainers routinely merge drivers for hardware they do not own, because the changes come from people who *do* own the hardware, and have tested the changes. The latter is a project management issue, mostly: if there is a lack of working relationships with driver and port maintainers, then that needs to be fixed, not assumed to be an unchangeable part of the environment that technical decisions are made in. Simon
On Tue, Aug 05, 2025 at 04:17:49PM +0900, Simon Richter wrote: > Hi, > > On 8/5/25 13:58, Eric Biggers wrote: > > > What does this have to do with this thread, which is about the PowerPC > > optimized MD5 code? > > Hence the new subject. It is still related to removal of code, but asks > about the bigger picture. > > The code removal changes you've been pushing lately absolutely make sense in > the context of "the crypto subsystem is for internal use by the kernel, and > all known users will only ever submit small work items." > > However, there is also the userspace API, and hardware accelerators also > register with the crypto subsystem, so it is *also* the way for userspace to > use specialized hardware. > > If these are separate, then it makes sense to acknowledge that the kernel > will never use asynchronous transforms for anything, simplify the internal > API, and move all the hardware support to a separate registry that is for > userspace applications only. I think you're grouping together different things that aren't actually very related. For example this patch series proposed removing some software code, not async drivers. The only async driver I removed recently is one of the crc32c ones. Doesn't lib/crypto/ largely accomplish what you're thinking of as "separate registries"? As we migrate more in-kernel users to lib/crypto/, the old-school crypto API becomes more focused just on accomodating AF_ALG users. However, that is, and will continue to be, a long process. In the mean time we still have many in-kernel users of the old-school crypto API to decide what to do with. > I'm also not convinced that fscrypt cannot ever learn to submit a single > large request or a large batch of small requests if it is asked to decrypt a > large file, so in my opinion the common registry makes more sense. It's certainly never going to be the entire file or a single batch. That's just not how filesystems work. As for offloading entire I/O requests, fscrypt already supports that, just for inline crypto engines instead of the old-school separate ones. Inline encryption is fundamentally much more efficient. The same efficiency can't be achieved with a separate engine, even with a large batch. If people would like to continue to explore that approach anyway, they're free to do so, but it's not a promising approach, at least not on any of the platforms I examined. (And pointing to irrelevant data, like for compression, is not helpful.) > Making sure that hardware support actually works and is tested is the > responsibility of the driver and port maintainers. We understand that > subsystem maintainers do not have all the hardware available, but the same > goes for all the other subsystems -- the DRM maintainers routinely merge > drivers for hardware they do not own, because the changes come from people > who *do* own the hardware, and have tested the changes. > > The latter is a project management issue, mostly: if there is a lack of > working relationships with driver and port maintainers, then that needs to > be fixed, not assumed to be an unchangeable part of the environment that > technical decisions are made in. This significantly understates the challenge that exists with a large number of drivers, including orphaned drivers and architectures, as well as the high standard of correctness that cryptography code needs to have vs. more everyday device drivers. And also the fact that async offload drivers are fundamentally much harder to get correct than software code. The odds are really stacked against everyone here, and I think calling it a "project management issue" largely misses the point. - Eric
© 2016 - 2025 Red Hat, Inc.