[PATCH 4/4] arm64: barrier: Add smp_cond_load_acquire_timewait()

Ankur Arora posted 4 patches 1 year ago
[PATCH 4/4] arm64: barrier: Add smp_cond_load_acquire_timewait()
Posted by Ankur Arora 1 year ago
Add smp_cond_load_acquire_timewait(). This is substantially similar
to smp_cond_load_acquire() where we use a load-acquire in the loop
and avoid an smp_rmb() later.

To handle the unlikely case of the event-stream being unavailable,
keep the implementation simple by falling back to the generic
__smp_cond_load_relaxed_spinwait() with an smp_rmb() to follow
(via smp_acquire__after_ctrl_dep().)

Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/arm64/include/asm/barrier.h | 36 ++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 25721275a5a2..22d9291aee8d 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -232,6 +232,22 @@ do {									\
 	(typeof(*ptr))VAL;						\
 })
 
+#define __smp_cond_load_acquire_timewait(ptr, cond_expr,		\
+					 time_expr_ns, time_limit_ns)	\
+({									\
+	typeof(ptr) __PTR = (ptr);					\
+	__unqual_scalar_typeof(*ptr) VAL;				\
+	for (;;) {							\
+		VAL = smp_load_acquire(__PTR);				\
+		if (cond_expr)						\
+			break;						\
+		__cmpwait_relaxed(__PTR, VAL);				\
+		if ((time_expr_ns) >= (time_limit_ns))			\
+			break;						\
+	}								\
+	(typeof(*ptr))VAL;						\
+})
+
 /*
  * For the unlikely case that the event-stream is unavailable,
  * ward off the possibility of waiting forever by falling back
@@ -254,6 +270,26 @@ do {									\
 	(typeof(*ptr))_val;						\
 })
 
+#define smp_cond_load_acquire_timewait(ptr, cond_expr,			\
+				      time_expr_ns, time_limit_ns)	\
+({									\
+	__unqual_scalar_typeof(*ptr) _val;				\
+	int __wfe = arch_timer_evtstrm_available();			\
+									\
+	if (likely(__wfe)) {						\
+		_val = __smp_cond_load_acquire_timewait(ptr, cond_expr,	\
+							time_expr_ns,	\
+							time_limit_ns);	\
+	} else {							\
+		_val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr,	\
+							time_expr_ns,	\
+							time_limit_ns);	\
+		smp_acquire__after_ctrl_dep();				\
+	}								\
+	(typeof(*ptr))_val;						\
+})
+
+
 #include <asm-generic/barrier.h>
 
 #endif	/* __ASSEMBLY__ */
-- 
2.43.5
Re: [PATCH 4/4] arm64: barrier: Add smp_cond_load_acquire_timewait()
Posted by Okanovic, Haris 11 months, 4 weeks ago
On Mon, 2025-02-03 at 13:49 -0800, Ankur Arora wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
> 
> 
> 
> Add smp_cond_load_acquire_timewait(). This is substantially similar
> to smp_cond_load_acquire() where we use a load-acquire in the loop
> and avoid an smp_rmb() later.
> 
> To handle the unlikely case of the event-stream being unavailable,
> keep the implementation simple by falling back to the generic
> __smp_cond_load_relaxed_spinwait() with an smp_rmb() to follow
> (via smp_acquire__after_ctrl_dep().)
> 
> Cc: Will Deacon <will@kernel.org>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: linux-arm-kernel@lists.infradead.org
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
>  arch/arm64/include/asm/barrier.h | 36 ++++++++++++++++++++++++++++++++
>  1 file changed, 36 insertions(+)
> 
> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
> index 25721275a5a2..22d9291aee8d 100644
> --- a/arch/arm64/include/asm/barrier.h
> +++ b/arch/arm64/include/asm/barrier.h
> @@ -232,6 +232,22 @@ do {                                                                       \
>         (typeof(*ptr))VAL;                                              \
>  })
> 
> +#define __smp_cond_load_acquire_timewait(ptr, cond_expr,               \
> +                                        time_expr_ns, time_limit_ns)   \
> +({                                                                     \
> +       typeof(ptr) __PTR = (ptr);                                      \
> +       __unqual_scalar_typeof(*ptr) VAL;                               \
> +       for (;;) {                                                      \
> +               VAL = smp_load_acquire(__PTR);                          \
> +               if (cond_expr)                                          \
> +                       break;                                          \
> +               __cmpwait_relaxed(__PTR, VAL);                          \
> +               if ((time_expr_ns) >= (time_limit_ns))                  \
> +                       break;                                          \
> +       }                                                               \
> +       (typeof(*ptr))VAL;                                              \
> +})
> +
>  /*
>   * For the unlikely case that the event-stream is unavailable,
>   * ward off the possibility of waiting forever by falling back
> @@ -254,6 +270,26 @@ do {                                                                       \
>         (typeof(*ptr))_val;                                             \
>  })
> 
> +#define smp_cond_load_acquire_timewait(ptr, cond_expr,                 \
> +                                     time_expr_ns, time_limit_ns)      \
> +({                                                                     \
> +       __unqual_scalar_typeof(*ptr) _val;                              \
> +       int __wfe = arch_timer_evtstrm_available();                     \
> +                                                                       \
> +       if (likely(__wfe)) {                                            \
> +               _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \
> +                                                       time_expr_ns,   \
> +                                                       time_limit_ns); \
> +       } else {                                                        \
> +               _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
> +                                                       time_expr_ns,   \
> +                                                       time_limit_ns); \
> +               smp_acquire__after_ctrl_dep();                          \
> +       }                                                               \
> +       (typeof(*ptr))_val;                                             \
> +})
> +
> +
>  #include <asm-generic/barrier.h>
> 
>  #endif /* __ASSEMBLY__ */
> --
> 2.43.5

Tested both relaxed and acquire variants on AWS Graviton (ARM64
Neoverse V1) with your V9 haltpoll changes, atop master 128c8f96eb.

Reviewed-by: Haris Okanovic <harisokn@amazon.com>
Tested-by: Haris Okanovic <harisokn@amazon.com>

Re: [PATCH 4/4] arm64: barrier: Add smp_cond_load_acquire_timewait()
Posted by Ankur Arora 11 months, 3 weeks ago
Okanovic, Haris <harisokn@amazon.com> writes:

> On Mon, 2025-02-03 at 13:49 -0800, Ankur Arora wrote:
>> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>>
>>
>>
>> Add smp_cond_load_acquire_timewait(). This is substantially similar
>> to smp_cond_load_acquire() where we use a load-acquire in the loop
>> and avoid an smp_rmb() later.
>>
>> To handle the unlikely case of the event-stream being unavailable,
>> keep the implementation simple by falling back to the generic
>> __smp_cond_load_relaxed_spinwait() with an smp_rmb() to follow
>> (via smp_acquire__after_ctrl_dep().)
>>
>> Cc: Will Deacon <will@kernel.org>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: linux-arm-kernel@lists.infradead.org
>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>>  arch/arm64/include/asm/barrier.h | 36 ++++++++++++++++++++++++++++++++
>>  1 file changed, 36 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
>> index 25721275a5a2..22d9291aee8d 100644
>> --- a/arch/arm64/include/asm/barrier.h
>> +++ b/arch/arm64/include/asm/barrier.h
>> @@ -232,6 +232,22 @@ do {                                                                       \
>>         (typeof(*ptr))VAL;                                              \
>>  })
>>
>> +#define __smp_cond_load_acquire_timewait(ptr, cond_expr,               \
>> +                                        time_expr_ns, time_limit_ns)   \
>> +({                                                                     \
>> +       typeof(ptr) __PTR = (ptr);                                      \
>> +       __unqual_scalar_typeof(*ptr) VAL;                               \
>> +       for (;;) {                                                      \
>> +               VAL = smp_load_acquire(__PTR);                          \
>> +               if (cond_expr)                                          \
>> +                       break;                                          \
>> +               __cmpwait_relaxed(__PTR, VAL);                          \
>> +               if ((time_expr_ns) >= (time_limit_ns))                  \
>> +                       break;                                          \
>> +       }                                                               \
>> +       (typeof(*ptr))VAL;                                              \
>> +})
>> +
>>  /*
>>   * For the unlikely case that the event-stream is unavailable,
>>   * ward off the possibility of waiting forever by falling back
>> @@ -254,6 +270,26 @@ do {                                                                       \
>>         (typeof(*ptr))_val;                                             \
>>  })
>>
>> +#define smp_cond_load_acquire_timewait(ptr, cond_expr,                 \
>> +                                     time_expr_ns, time_limit_ns)      \
>> +({                                                                     \
>> +       __unqual_scalar_typeof(*ptr) _val;                              \
>> +       int __wfe = arch_timer_evtstrm_available();                     \
>> +                                                                       \
>> +       if (likely(__wfe)) {                                            \
>> +               _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \
>> +                                                       time_expr_ns,   \
>> +                                                       time_limit_ns); \
>> +       } else {                                                        \
>> +               _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
>> +                                                       time_expr_ns,   \
>> +                                                       time_limit_ns); \
>> +               smp_acquire__after_ctrl_dep();                          \
>> +       }                                                               \
>> +       (typeof(*ptr))_val;                                             \
>> +})
>> +
>> +
>>  #include <asm-generic/barrier.h>
>>
>>  #endif /* __ASSEMBLY__ */
>> --
>> 2.43.5
>
> Tested both relaxed and acquire variants on AWS Graviton (ARM64
> Neoverse V1) with your V9 haltpoll changes, atop master 128c8f96eb.
>
> Reviewed-by: Haris Okanovic <harisokn@amazon.com>
> Tested-by: Haris Okanovic <harisokn@amazon.com>

That's great. Thanks Haris.

--
ankur