[v2] lib/crypto: x86/sha: Add PHE Extensions support

[PATCH v2 1/2] lib/crypto: x86/sha1: PHE Extensions optimized SHA1 transform function

Posted by AlanSong-oc 1 month, 3 weeks ago

Zhaoxin CPUs have implemented the SHA(Secure Hash Algorithm) as its CPU
instructions by PHE(Padlock Hash Engine) Extensions, including XSHA1,
XSHA256, XSHA384 and XSHA512 instructions.

With the help of implementation of SHA in hardware instead of software,
can develop applications with higher performance, more security and more
flexibility.

This patch includes the XSHA1 instruction optimized implementation of
SHA-1 transform function.

Signed-off-by: AlanSong-oc <AlanSong-oc@zhaoxin.com>
---
 lib/crypto/Makefile           |  3 +-
 lib/crypto/x86/sha1-phe-asm.S | 71 +++++++++++++++++++++++++++++++++++
 lib/crypto/x86/sha1.h         | 20 ++++++++++
 3 files changed, 93 insertions(+), 1 deletion(-)
 create mode 100644 lib/crypto/x86/sha1-phe-asm.S

diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index d2845b214..069069377 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -205,7 +205,8 @@ endif
 libsha1-$(CONFIG_SPARC) += sparc/sha1_asm.o
 libsha1-$(CONFIG_X86) += x86/sha1-ssse3-and-avx.o \
 			 x86/sha1-avx2-asm.o \
-			 x86/sha1-ni-asm.o
+			 x86/sha1-ni-asm.o \
+			 x86/sha1-phe-asm.o
 endif # CONFIG_CRYPTO_LIB_SHA1_ARCH
 
 ################################################################################
diff --git a/lib/crypto/x86/sha1-phe-asm.S b/lib/crypto/x86/sha1-phe-asm.S
new file mode 100644
index 000000000..eff086104
--- /dev/null
+++ b/lib/crypto/x86/sha1-phe-asm.S
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * PHE Extensions optimized implementation of a SHA-1 update function
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 	* Redistributions of source code must retain the above copyright
+ * 	  notice, this list of conditions and the following disclaimer.
+ * 	* Redistributions in binary form must reproduce the above copyright
+ * 	  notice, this list of conditions and the following disclaimer in
+ * 	  the documentation and/or other materials provided with the
+ * 	  distribution.
+ * 	* Neither the name of Intel Corporation nor the names of its
+ * 	  contributors may be used to endorse or promote products derived
+ * 	  from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#include <linux/linkage.h>
+
+/*
+ * PHE Extensions optimized implementation of a SHA-1 block function
+ *
+ * This function takes a pointer to the current SHA-1 state, a pointer to the
+ * input data, and the number of 64-byte blocks to process.  The number of
+ * blocks to process is assumed to be nonzero.  Once all blocks have been
+ * processed, the state is updated with the new state.  This function only
+ * processes complete blocks.  State initialization, buffering of partial
+ * blocks, and digest finalization are expected to be handled elsewhere.
+ *
+ * void sha1_transform_phe(u8 *state, const u8 *data, size_t nblocks)
+ */
+.text
+SYM_FUNC_START(sha1_transform_phe)
+	mov		$-1, %rax
+	mov		%rdx, %rcx
+
+	.byte	0xf3,0x0f,0xa6,0xc8
+
+	RET
+SYM_FUNC_END(sha1_transform_phe)
diff --git a/lib/crypto/x86/sha1.h b/lib/crypto/x86/sha1.h
index c48a0131f..670109c79 100644
--- a/lib/crypto/x86/sha1.h
+++ b/lib/crypto/x86/sha1.h
@@ -48,6 +48,23 @@ static void sha1_blocks_avx2(struct sha1_block_state *state,
 	}
 }
 
+#define PHE_ALIGNMENT 16
+asmlinkage void sha1_transform_phe(u8 *state, const u8 *data, size_t nblocks);
+static void sha1_blocks_phe(struct sha1_block_state *state,
+			     const u8 *data, size_t nblocks)
+{
+	/*
+	 * XSHA1 requires %edi to point to a 32-byte, 16-byte-aligned
+	 * buffer on Zhaoxin processors.
+	 */
+	u8 buf[32 + PHE_ALIGNMENT - 1];
+	u8 *dst = PTR_ALIGN(&buf[0], PHE_ALIGNMENT);
+
+	memcpy(dst, (u8 *)(state), SHA1_DIGEST_SIZE);
+	sha1_transform_phe(dst, data, nblocks);
+	memcpy((u8 *)(state), dst, SHA1_DIGEST_SIZE);
+}
+
 static void sha1_blocks(struct sha1_block_state *state,
 			const u8 *data, size_t nblocks)
 {
@@ -59,6 +76,9 @@ static void sha1_mod_init_arch(void)
 {
 	if (boot_cpu_has(X86_FEATURE_SHA_NI)) {
 		static_call_update(sha1_blocks_x86, sha1_blocks_ni);
+	} else if (boot_cpu_has(X86_FEATURE_PHE) && boot_cpu_has(X86_FEATURE_PHE_EN)) {
+		if (cpu_data(0).x86 >= 0x07)
+			static_call_update(sha1_blocks_x86, sha1_blocks_phe);
 	} else if (cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
 				     NULL) &&
 		   boot_cpu_has(X86_FEATURE_AVX)) {
-- 
2.34.1

Re: [PATCH v2 1/2] lib/crypto: x86/sha1: PHE Extensions optimized SHA1 transform function

Posted by Eric Biggers 1 month, 3 weeks ago

[+Cc x86@kernel.org]

On Fri, Dec 19, 2025 at 04:03:05PM +0800, AlanSong-oc wrote:
> diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
> index d2845b214..069069377 100644
> --- a/lib/crypto/Makefile
> +++ b/lib/crypto/Makefile
> @@ -205,7 +205,8 @@ endif
>  libsha1-$(CONFIG_SPARC) += sparc/sha1_asm.o
>  libsha1-$(CONFIG_X86) += x86/sha1-ssse3-and-avx.o \
>  			 x86/sha1-avx2-asm.o \
> -			 x86/sha1-ni-asm.o
> +			 x86/sha1-ni-asm.o \
> +			 x86/sha1-phe-asm.o
>  endif # CONFIG_CRYPTO_LIB_SHA1_ARCH
>  
>  ################################################################################
> diff --git a/lib/crypto/x86/sha1-phe-asm.S b/lib/crypto/x86/sha1-phe-asm.S
> new file mode 100644
> index 000000000..eff086104
> --- /dev/null
> +++ b/lib/crypto/x86/sha1-phe-asm.S
> @@ -0,0 +1,71 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * PHE Extensions optimized implementation of a SHA-1 update function
> + *
> + * This file is provided under a dual BSD/GPLv2 license.  When using or
> + * redistributing this file, you may do so under either license.
> + *
> + * GPL LICENSE SUMMARY
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of version 2 of the GNU General Public License as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + *
> + * BSD LICENSE
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + *
> + * 	* Redistributions of source code must retain the above copyright
> + * 	  notice, this list of conditions and the following disclaimer.
> + * 	* Redistributions in binary form must reproduce the above copyright
> + * 	  notice, this list of conditions and the following disclaimer in
> + * 	  the documentation and/or other materials provided with the
> + * 	  distribution.
> + * 	* Neither the name of Intel Corporation nor the names of its
> + * 	  contributors may be used to endorse or promote products derived
> + * 	  from this software without specific prior written permission.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + *
> + */
> +
> +#include <linux/linkage.h>
> +
> +/*
> + * PHE Extensions optimized implementation of a SHA-1 block function
> + *
> + * This function takes a pointer to the current SHA-1 state, a pointer to the
> + * input data, and the number of 64-byte blocks to process.  The number of
> + * blocks to process is assumed to be nonzero.  Once all blocks have been
> + * processed, the state is updated with the new state.  This function only
> + * processes complete blocks.  State initialization, buffering of partial
> + * blocks, and digest finalization are expected to be handled elsewhere.
> + *
> + * void sha1_transform_phe(u8 *state, const u8 *data, size_t nblocks)
> + */
> +.text
> +SYM_FUNC_START(sha1_transform_phe)
> +	mov		$-1, %rax
> +	mov		%rdx, %rcx
> +
> +	.byte	0xf3,0x0f,0xa6,0xc8
> +
> +	RET
> +SYM_FUNC_END(sha1_transform_phe)

Please make this an inline asm statement instead of using a .S file.
It's just one instruction.

> +#define PHE_ALIGNMENT 16
> +asmlinkage void sha1_transform_phe(u8 *state, const u8 *data, size_t nblocks);
> +static void sha1_blocks_phe(struct sha1_block_state *state,
> +			     const u8 *data, size_t nblocks)
> +{
> +	/*
> +	 * XSHA1 requires %edi to point to a 32-byte, 16-byte-aligned
> +	 * buffer on Zhaoxin processors.
> +	 */

What is the largest 'nblocks' that the instruction supports?

What happens if the instruction is interrupted partway through?  Does
the CPU correctly resume it in all cases?

Is it supported in both 32-bit and 64-bit modes?  Your patch doesn't
check for CONFIG_64BIT.  Should it?  New optimized assembly code
generally should be 64-bit only.

Where is this instruction specified?  Please add a comment that links to
the specification.

> +	u8 buf[32 + PHE_ALIGNMENT - 1];
> +	u8 *dst = PTR_ALIGN(&buf[0], PHE_ALIGNMENT);
> +
> +	memcpy(dst, (u8 *)(state), SHA1_DIGEST_SIZE);
> +	sha1_transform_phe(dst, data, nblocks);
> +	memcpy((u8 *)(state), dst, SHA1_DIGEST_SIZE);
> +}

The casts to 'u8 *' are unnecessary.

> +
>  static void sha1_blocks(struct sha1_block_state *state,
>  			const u8 *data, size_t nblocks)
>  {
> @@ -59,6 +76,9 @@ static void sha1_mod_init_arch(void)
>  {
>  	if (boot_cpu_has(X86_FEATURE_SHA_NI)) {
>  		static_call_update(sha1_blocks_x86, sha1_blocks_ni);
> +	} else if (boot_cpu_has(X86_FEATURE_PHE) && boot_cpu_has(X86_FEATURE_PHE_EN)) {
> +		if (cpu_data(0).x86 >= 0x07)
> +			static_call_update(sha1_blocks_x86, sha1_blocks_phe);

Check IS_ENABLED(CONFIG_CPU_SUP_ZHAOXIN) first, so that the code gets
compiled out when support for Zhaoxin CPUs isn't included in the kernel.

There are hardly any mentions of 'cpu_data(0).x86' in the kernel.  I
think you mean 'boot_cpu_data.x86', which is used much more frequently.

What is the difference between X86_FEATURE_PHE and X86_FEATURE_PHE_EN,
and why are both needed?

All these comments apply to the SHA-256 patch too.

- Eric

Re: [PATCH v2 1/2] lib/crypto: x86/sha1: PHE Extensions optimized SHA1 transform function

Posted by AlanSong-oc 3 weeks, 6 days ago

On 12/20/2025 2:18 AM, Eric Biggers wrote:
> 
> [+Cc x86@kernel.org]
> 
> On Fri, Dec 19, 2025 at 04:03:05PM +0800, AlanSong-oc wrote:
>> diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
>> index d2845b214..069069377 100644
>> --- a/lib/crypto/Makefile
>> +++ b/lib/crypto/Makefile
>> @@ -205,7 +205,8 @@ endif
>>  libsha1-$(CONFIG_SPARC) += sparc/sha1_asm.o
>>  libsha1-$(CONFIG_X86) += x86/sha1-ssse3-and-avx.o \
>>                        x86/sha1-avx2-asm.o \
>> -                      x86/sha1-ni-asm.o
>> +                      x86/sha1-ni-asm.o \
>> +                      x86/sha1-phe-asm.o
>>  endif # CONFIG_CRYPTO_LIB_SHA1_ARCH
>>
>>  ################################################################################
>> diff --git a/lib/crypto/x86/sha1-phe-asm.S b/lib/crypto/x86/sha1-phe-asm.S
>> new file mode 100644
>> index 000000000..eff086104
>> --- /dev/null
>> +++ b/lib/crypto/x86/sha1-phe-asm.S
>> @@ -0,0 +1,71 @@
>> +/* SPDX-License-Identifier: GPL-2.0 */
>> +/*
>> + * PHE Extensions optimized implementation of a SHA-1 update function
>> + *
>> + * This file is provided under a dual BSD/GPLv2 license.  When using or
>> + * redistributing this file, you may do so under either license.
>> + *
>> + * GPL LICENSE SUMMARY
>> + *
>> + * This program is free software; you can redistribute it and/or modify
>> + * it under the terms of version 2 of the GNU General Public License as
>> + * published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope that it will be useful, but
>> + * WITHOUT ANY WARRANTY; without even the implied warranty of
>> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
>> + * General Public License for more details.
>> + *
>> + * BSD LICENSE
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + *
>> + *   * Redistributions of source code must retain the above copyright
>> + *     notice, this list of conditions and the following disclaimer.
>> + *   * Redistributions in binary form must reproduce the above copyright
>> + *     notice, this list of conditions and the following disclaimer in
>> + *     the documentation and/or other materials provided with the
>> + *     distribution.
>> + *   * Neither the name of Intel Corporation nor the names of its
>> + *     contributors may be used to endorse or promote products derived
>> + *     from this software without specific prior written permission.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
>> + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
>> + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
>> + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
>> + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
>> + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
>> + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
>> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
>> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
>> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
>> + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
>> + *
>> + */
>> +
>> +#include <linux/linkage.h>
>> +
>> +/*
>> + * PHE Extensions optimized implementation of a SHA-1 block function
>> + *
>> + * This function takes a pointer to the current SHA-1 state, a pointer to the
>> + * input data, and the number of 64-byte blocks to process.  The number of
>> + * blocks to process is assumed to be nonzero.  Once all blocks have been
>> + * processed, the state is updated with the new state.  This function only
>> + * processes complete blocks.  State initialization, buffering of partial
>> + * blocks, and digest finalization are expected to be handled elsewhere.
>> + *
>> + * void sha1_transform_phe(u8 *state, const u8 *data, size_t nblocks)
>> + */
>> +.text
>> +SYM_FUNC_START(sha1_transform_phe)
>> +     mov             $-1, %rax
>> +     mov             %rdx, %rcx
>> +
>> +     .byte   0xf3,0x0f,0xa6,0xc8
>> +
>> +     RET
>> +SYM_FUNC_END(sha1_transform_phe)
> 
> Please make this an inline asm statement instead of using a .S file.
> It's just one instruction.

I will implement XSHA1 and XSHA256 instructions supported by the inline
asm statement in the next version of the patch.

> 
>> +#define PHE_ALIGNMENT 16
>> +asmlinkage void sha1_transform_phe(u8 *state, const u8 *data, size_t nblocks);
>> +static void sha1_blocks_phe(struct sha1_block_state *state,
>> +                          const u8 *data, size_t nblocks)
>> +{
>> +     /*
>> +      * XSHA1 requires %edi to point to a 32-byte, 16-byte-aligned
>> +      * buffer on Zhaoxin processors.
>> +      */
> 
> What is the largest 'nblocks' that the instruction supports?

According to the instruction specification, the maximum input data size
for the XSHA1 and XSHA256 instructions is limited by the maximum value
of CX, ECX, or RCX, depending on the operation mode. Accordingly, the
maximum value of 'nblocks'is subject to the same limitation.

> What happens if the instruction is interrupted partway through?  Does
> the CPU correctly resume it in all cases?

The specification states that XSHA1 and XSHA256 instructions are
interruptible. If an interrupt or exception occurs during execution,
the instruction can be correctly resumed after the interrupt or
exception has been handled.

> Is it supported in both 32-bit and 64-bit modes?  Your patch doesn't
> check for CONFIG_64BIT.  Should it?  New optimized assembly code
> generally should be 64-bit only.

The XSHA1 and XSHA256 are supported in both 32-bit and 64-bit modes.
Since newly optimized assembly code is typically 64-bit only, and XSHA1
and XSHA256 fully support 64-bit mode, an explicit CONFIG_64BIT check
should not required.

> Where is this instruction specified?  Please add a comment that links to
> the specification.

The instruction specification is available at the following
link.(https://gitee.com/openzhaoxin/zhaoxin_specifications/blob/20260112/ZX_Padlock_Reference.pdf)

> 
>> +     u8 buf[32 + PHE_ALIGNMENT - 1];
>> +     u8 *dst = PTR_ALIGN(&buf[0], PHE_ALIGNMENT);
>> +
>> +     memcpy(dst, (u8 *)(state), SHA1_DIGEST_SIZE);
>> +     sha1_transform_phe(dst, data, nblocks);
>> +     memcpy((u8 *)(state), dst, SHA1_DIGEST_SIZE);
>> +}
> 
> The casts to 'u8 *' are unnecessary.

I will eliminate the unnecessary cast in the next version of the patch.

>> +
>>  static void sha1_blocks(struct sha1_block_state *state,
>>                       const u8 *data, size_t nblocks)
>>  {
>> @@ -59,6 +76,9 @@ static void sha1_mod_init_arch(void)
>>  {
>>       if (boot_cpu_has(X86_FEATURE_SHA_NI)) {
>>               static_call_update(sha1_blocks_x86, sha1_blocks_ni);
>> +     } else if (boot_cpu_has(X86_FEATURE_PHE) && boot_cpu_has(X86_FEATURE_PHE_EN)) {
>> +             if (cpu_data(0).x86 >= 0x07)
>> +                     static_call_update(sha1_blocks_x86, sha1_blocks_phe);
> 
> Check IS_ENABLED(CONFIG_CPU_SUP_ZHAOXIN) first, so that the code gets
> compiled out when support for Zhaoxin CPUs isn't included in the kernel.
> 
> There are hardly any mentions of 'cpu_data(0).x86' in the kernel.  I
> think you mean 'boot_cpu_data.x86', which is used much more frequently.

I will add CONFIG_CPU_SUP_ZHAOXIN check in the relevant code paths and
use 'boot_cpu_data.x86' to identify the CPU family instead of
'cpu_data(0).x86.'

> What is the difference between X86_FEATURE_PHE and X86_FEATURE_PHE_EN,
> and why are both needed?
> 
> All these comments apply to the SHA-256 patch too.

The X86_FEATURE_PHE indicates the presence of the XSHA1 and XSHA256
instructions, whereas the X86_FEATURE_PHE_EN indicates that these
instructions are enabled for normal use. Surely, all of the suggestions
mentioned above will also apply to the SHA-256 patch.

Please accept my apologies for the delayed response due to
administrative procedures. Thank you for your review and valuable
suggestions.

Best Regards
AlanSong-oc

Re: [PATCH v2 1/2] lib/crypto: x86/sha1: PHE Extensions optimized SHA1 transform function

Posted by Eric Biggers 3 weeks, 6 days ago

On Mon, Jan 12, 2026 at 05:12:01PM +0800, AlanSong-oc wrote:
> > Is it supported in both 32-bit and 64-bit modes?  Your patch doesn't
> > check for CONFIG_64BIT.  Should it?  New optimized assembly code
> > generally should be 64-bit only.
> 
> The XSHA1 and XSHA256 are supported in both 32-bit and 64-bit modes.
> Since newly optimized assembly code is typically 64-bit only, and XSHA1
> and XSHA256 fully support 64-bit mode, an explicit CONFIG_64BIT check
> should not required.

Right, all the x86-optimized SHA-1 and SHA-256 code is already 64-bit
specific, due to CONFIG_CRYPTO_LIB_SHA1_ARCH and
CONFIG_CRYPTO_LIB_SHA256_ARCH being enabled only when CONFIG_x86_64=y.
So there's no need to check for 64-bit again.

> > What is the difference between X86_FEATURE_PHE and X86_FEATURE_PHE_EN,
> > and why are both needed?
> 
> The X86_FEATURE_PHE indicates the presence of the XSHA1 and XSHA256
> instructions, whereas the X86_FEATURE_PHE_EN indicates that these
> instructions are enabled for normal use.

I still don't understand the difference.

If you look at the other CPU feature flags, like X86_FEATURE_SHA_NI for
example, there's just a single flag for the feature.  We don't have
X86_FEATURE_SHA_NI and X86_FEATURE_SHA_NI_EN.  If the CPU supports the
feature but the kernel decides it can't or shouldn't be used for
whatever reason, the kernel just doesn't set the flag.  There's no
separate flag that tracks the CPU support independently.

Why can't the PHE flag work the same way?

- Eric

Re: [PATCH v2 1/2] lib/crypto: x86/sha1: PHE Extensions optimized SHA1 transform function

Posted by AlanSong-oc 3 weeks, 3 days ago

On 1/13/2026 3:34 AM, Eric Biggers wrote:
> 
> On Mon, Jan 12, 2026 at 05:12:01PM +0800, AlanSong-oc wrote:
>>> Is it supported in both 32-bit and 64-bit modes?  Your patch doesn't
>>> check for CONFIG_64BIT.  Should it?  New optimized assembly code
>>> generally should be 64-bit only.
>>
>> The XSHA1 and XSHA256 are supported in both 32-bit and 64-bit modes.
>> Since newly optimized assembly code is typically 64-bit only, and XSHA1
>> and XSHA256 fully support 64-bit mode, an explicit CONFIG_64BIT check
>> should not required.
> 
> Right, all the x86-optimized SHA-1 and SHA-256 code is already 64-bit
> specific, due to CONFIG_CRYPTO_LIB_SHA1_ARCH and
> CONFIG_CRYPTO_LIB_SHA256_ARCH being enabled only when CONFIG_x86_64=y.
> So there's no need to check for 64-bit again.
> 
>>> What is the difference between X86_FEATURE_PHE and X86_FEATURE_PHE_EN,
>>> and why are both needed?
>>
>> The X86_FEATURE_PHE indicates the presence of the XSHA1 and XSHA256
>> instructions, whereas the X86_FEATURE_PHE_EN indicates that these
>> instructions are enabled for normal use.
> 
> I still don't understand the difference.
> 
> If you look at the other CPU feature flags, like X86_FEATURE_SHA_NI for
> example, there's just a single flag for the feature.  We don't have
> X86_FEATURE_SHA_NI and X86_FEATURE_SHA_NI_EN.  If the CPU supports the
> feature but the kernel decides it can't or shouldn't be used for
> whatever reason, the kernel just doesn't set the flag.  There's no
> separate flag that tracks the CPU support independently.
> 
> Why can't the PHE flag work the same way?

Unlike most x86 CPU features, the PHE extension uses two bits to
describe a single feature: a present bit and an enable bit. On Zhaoxin
processors, these two bits are always identical. Therefore, in the next
revision of this patch, I will only check X86_FEATURE_PHE_EN, as with
other CPU support checks. Thanks for the suggestion.

Best Regards
AlanSong-oc

[PATCH v2 1/2] lib/crypto: x86/sha1: PHE Extensions optimized SHA1 transform function
[PATCH v2 2/2] lib/crypto: x86/sha256: PHE Extensions optimized SHA256 transform function