Just before returning, blake2s_compress_ssse3() and
blake2s_compress_avx512() store updated values to the 'h', 't', and 'f'
fields of struct blake2s_ctx. But 'f' is always unchanged (which is
correct; only the C code changes it). So, there's no need to write to
'f'. Use 64-bit stores (movq and vmovq) instead of 128-bit stores
(movdqu and vmovdqu) so that only 't' is written.
Signed-off-by: Eric Biggers <ebiggers@kernel.org>
---
lib/crypto/x86/blake2s-core.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/crypto/x86/blake2s-core.S b/lib/crypto/x86/blake2s-core.S
index f805a49c590d..869064f6ac16 100644
--- a/lib/crypto/x86/blake2s-core.S
+++ b/lib/crypto/x86/blake2s-core.S
@@ -191,11 +191,11 @@ SYM_FUNC_START(blake2s_compress_ssse3)
decq NBLOCKS
jnz .Lssse3_mainloop
movdqu %xmm0,(CTX) // Store new h[0..3]
movdqu %xmm1,16(CTX) // Store new h[4..7]
- movdqu %xmm14,32(CTX) // Store new t and f
+ movq %xmm14,32(CTX) // Store new t (f is unchanged)
RET
SYM_FUNC_END(blake2s_compress_ssse3)
//
// void blake2s_compress_avx512(struct blake2s_ctx *ctx,
@@ -285,9 +285,9 @@ SYM_FUNC_START(blake2s_compress_avx512)
decq NBLOCKS
jne .Lavx512_mainloop
vmovdqu %xmm0,(CTX) // Store new h[0..3]
vmovdqu %xmm1,16(CTX) // Store new h[4..7]
- vmovdqu %xmm4,32(CTX) // Store new t and f
+ vmovq %xmm4,32(CTX) // Store new t (f is unchanged)
vzeroupper
RET
SYM_FUNC_END(blake2s_compress_avx512)
--
2.51.2