[RFC PATCH v2 2/7] arm64/runtime-const: Introduce runtime_const_mask_32()

K Prateek Nayak posted 7 patches 3 weeks ago
There is a newer version of this series
[RFC PATCH v2 2/7] arm64/runtime-const: Introduce runtime_const_mask_32()
Posted by K Prateek Nayak 3 weeks ago
Futex hash computation requires a mask operation with read-only after
init data that will be converted to a runtime constant in the subsequent
commit.

Introduce runtime_const_mask_32 to further optimize the mask operation
in the futex hash computation hot path. GCC generates a:

  movz  w1, #lo16, lsl #0     // w1 = bits [15:0]
  movk  w1, #hi16, lsl #16    // w1 = full 32-bit value
  and   w0, w0, w1	      // w0 = w0 & w1

pattern to tackle arbitrary 32-bit masks and the same was also suggested
by Claude which is implemented here. __runtime_fixup_ptr() already
patches a "movz, + movk lsl #16" sequence which has been reused to patch
the same sequence for __runtime_fixup_mask().

Assisted-by: Claude:claude-sonnet-4-5
Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
---
 arch/arm64/include/asm/runtime-const.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h
index c3dbd3ae68f6..4c3f0b9aad98 100644
--- a/arch/arm64/include/asm/runtime-const.h
+++ b/arch/arm64/include/asm/runtime-const.h
@@ -35,6 +35,19 @@
 		:"r" (0u+(val)));				\
 	__ret; })
 
+#define runtime_const_mask_32(val, sym) ({			\
+	unsigned long __ret;					\
+	asm_inline("1:\t"					\
+		"movz %w0, #0xcdef\n\t"				\
+		"movk %w0, #0x89ab, lsl #16\n\t"			\
+		"and %w0,%w0,%w1\n\t"				\
+		".pushsection runtime_mask_" #sym ",\"a\"\n\t"	\
+		".long 1b - .\n\t"				\
+		".popsection"					\
+		:"=r" (__ret)					\
+		:"r" (0u+(val)));				\
+	__ret; })
+
 #define runtime_const_init(type, sym) do {		\
 	extern s32 __start_runtime_##type##_##sym[];	\
 	extern s32 __stop_runtime_##type##_##sym[];	\
@@ -80,6 +93,15 @@ static inline void __runtime_fixup_shift(void *where, unsigned long val)
 	__runtime_fixup_caches(where, 1);
 }
 
+/* Immediate value is 6 bits starting at bit #16 */
+static inline void __runtime_fixup_mask(void *where, unsigned long val)
+{
+	__le32 *p = lm_alias(where);
+	__runtime_fixup_16(p, val);
+	__runtime_fixup_16(p+1, val >> 16);
+	__runtime_fixup_caches(where, 2);
+}
+
 static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
 	unsigned long val, s32 *start, s32 *end)
 {
-- 
2.43.0
Re: [RFC PATCH v2 2/7] arm64/runtime-const: Introduce runtime_const_mask_32()
Posted by David Laight 3 weeks ago
On Mon, 16 Mar 2026 05:23:56 +0000
K Prateek Nayak <kprateek.nayak@amd.com> wrote:

> Futex hash computation requires a mask operation with read-only after
> init data that will be converted to a runtime constant in the subsequent
> commit.
> 
> Introduce runtime_const_mask_32 to further optimize the mask operation
> in the futex hash computation hot path. GCC generates a:
> 
>   movz  w1, #lo16, lsl #0     // w1 = bits [15:0]
>   movk  w1, #hi16, lsl #16    // w1 = full 32-bit value
>   and   w0, w0, w1	      // w0 = w0 & w1

I don't thing the '&' needs to be part of the asm block.
Just generate the 32bit constant and do the mask in C.
That will let the compiler schedule the instructions.
It also make the code patching more generally useful.

	David


> 
> pattern to tackle arbitrary 32-bit masks and the same was also suggested
> by Claude which is implemented here. __runtime_fixup_ptr() already
> patches a "movz, + movk lsl #16" sequence which has been reused to patch
> the same sequence for __runtime_fixup_mask().
> 
> Assisted-by: Claude:claude-sonnet-4-5
> Signed-off-by: K Prateek Nayak <kprateek.nayak@amd.com>
> ---
>  arch/arm64/include/asm/runtime-const.h | 22 ++++++++++++++++++++++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/runtime-const.h b/arch/arm64/include/asm/runtime-const.h
> index c3dbd3ae68f6..4c3f0b9aad98 100644
> --- a/arch/arm64/include/asm/runtime-const.h
> +++ b/arch/arm64/include/asm/runtime-const.h
> @@ -35,6 +35,19 @@
>  		:"r" (0u+(val)));				\
>  	__ret; })
>  
> +#define runtime_const_mask_32(val, sym) ({			\
> +	unsigned long __ret;					\
> +	asm_inline("1:\t"					\
> +		"movz %w0, #0xcdef\n\t"				\
> +		"movk %w0, #0x89ab, lsl #16\n\t"			\
> +		"and %w0,%w0,%w1\n\t"				\
> +		".pushsection runtime_mask_" #sym ",\"a\"\n\t"	\
> +		".long 1b - .\n\t"				\
> +		".popsection"					\
> +		:"=r" (__ret)					\
> +		:"r" (0u+(val)));				\
> +	__ret; })
> +
>  #define runtime_const_init(type, sym) do {		\
>  	extern s32 __start_runtime_##type##_##sym[];	\
>  	extern s32 __stop_runtime_##type##_##sym[];	\
> @@ -80,6 +93,15 @@ static inline void __runtime_fixup_shift(void *where, unsigned long val)
>  	__runtime_fixup_caches(where, 1);
>  }
>  
> +/* Immediate value is 6 bits starting at bit #16 */
> +static inline void __runtime_fixup_mask(void *where, unsigned long val)
> +{
> +	__le32 *p = lm_alias(where);
> +	__runtime_fixup_16(p, val);
> +	__runtime_fixup_16(p+1, val >> 16);
> +	__runtime_fixup_caches(where, 2);
> +}
> +
>  static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
>  	unsigned long val, s32 *start, s32 *end)
>  {
Re: [RFC PATCH v2 2/7] arm64/runtime-const: Introduce runtime_const_mask_32()
Posted by K Prateek Nayak 3 weeks ago
Hello David,

On 3/16/2026 5:20 PM, David Laight wrote:
>> Introduce runtime_const_mask_32 to further optimize the mask operation
>> in the futex hash computation hot path. GCC generates a:
>>
>>   movz  w1, #lo16, lsl #0     // w1 = bits [15:0]
>>   movk  w1, #hi16, lsl #16    // w1 = full 32-bit value
>>   and   w0, w0, w1          // w0 = w0 & w1
> 
> I don't thing the '&' needs to be part of the asm block.
> Just generate the 32bit constant and do the mask in C.
> That will let the compiler schedule the instructions.
> It also make the code patching more generally useful.

Ack! That makes sense. I'll update it in the next version.
Thank you for taking a look at the series.

-- 
Thanks and Regards,
Prateek