Add smp_cond_load_acquire_timewait(). This is substantially similar
to smp_cond_load_acquire() where we use a load-acquire in the loop
and avoid an smp_rmb() later.
To handle the unlikely case of the event-stream being unavailable,
keep the implementation simple by falling back to the generic
__smp_cond_load_relaxed_spinwait() with an smp_rmb() to follow
(via smp_acquire__after_ctrl_dep().)
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
arch/arm64/include/asm/barrier.h | 36 ++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 25721275a5a2..22d9291aee8d 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -232,6 +232,22 @@ do { \
(typeof(*ptr))VAL; \
})
+#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, time_limit_ns) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = smp_load_acquire(__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ if ((time_expr_ns) >= (time_limit_ns)) \
+ break; \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
/*
* For the unlikely case that the event-stream is unavailable,
* ward off the possibility of waiting forever by falling back
@@ -254,6 +270,26 @@ do { \
(typeof(*ptr))_val; \
})
+#define smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, time_limit_ns) \
+({ \
+ __unqual_scalar_typeof(*ptr) _val; \
+ int __wfe = arch_timer_evtstrm_available(); \
+ \
+ if (likely(__wfe)) { \
+ _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \
+ time_expr_ns, \
+ time_limit_ns); \
+ } else { \
+ _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
+ time_expr_ns, \
+ time_limit_ns); \
+ smp_acquire__after_ctrl_dep(); \
+ } \
+ (typeof(*ptr))_val; \
+})
+
+
#include <asm-generic/barrier.h>
#endif /* __ASSEMBLY__ */
--
2.43.5
On Mon, 2025-02-03 at 13:49 -0800, Ankur Arora wrote:
> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>
>
>
> Add smp_cond_load_acquire_timewait(). This is substantially similar
> to smp_cond_load_acquire() where we use a load-acquire in the loop
> and avoid an smp_rmb() later.
>
> To handle the unlikely case of the event-stream being unavailable,
> keep the implementation simple by falling back to the generic
> __smp_cond_load_relaxed_spinwait() with an smp_rmb() to follow
> (via smp_acquire__after_ctrl_dep().)
>
> Cc: Will Deacon <will@kernel.org>
> Cc: Catalin Marinas <catalin.marinas@arm.com>
> Cc: linux-arm-kernel@lists.infradead.org
> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
> ---
> arch/arm64/include/asm/barrier.h | 36 ++++++++++++++++++++++++++++++++
> 1 file changed, 36 insertions(+)
>
> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
> index 25721275a5a2..22d9291aee8d 100644
> --- a/arch/arm64/include/asm/barrier.h
> +++ b/arch/arm64/include/asm/barrier.h
> @@ -232,6 +232,22 @@ do { \
> (typeof(*ptr))VAL; \
> })
>
> +#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \
> + time_expr_ns, time_limit_ns) \
> +({ \
> + typeof(ptr) __PTR = (ptr); \
> + __unqual_scalar_typeof(*ptr) VAL; \
> + for (;;) { \
> + VAL = smp_load_acquire(__PTR); \
> + if (cond_expr) \
> + break; \
> + __cmpwait_relaxed(__PTR, VAL); \
> + if ((time_expr_ns) >= (time_limit_ns)) \
> + break; \
> + } \
> + (typeof(*ptr))VAL; \
> +})
> +
> /*
> * For the unlikely case that the event-stream is unavailable,
> * ward off the possibility of waiting forever by falling back
> @@ -254,6 +270,26 @@ do { \
> (typeof(*ptr))_val; \
> })
>
> +#define smp_cond_load_acquire_timewait(ptr, cond_expr, \
> + time_expr_ns, time_limit_ns) \
> +({ \
> + __unqual_scalar_typeof(*ptr) _val; \
> + int __wfe = arch_timer_evtstrm_available(); \
> + \
> + if (likely(__wfe)) { \
> + _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \
> + time_expr_ns, \
> + time_limit_ns); \
> + } else { \
> + _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
> + time_expr_ns, \
> + time_limit_ns); \
> + smp_acquire__after_ctrl_dep(); \
> + } \
> + (typeof(*ptr))_val; \
> +})
> +
> +
> #include <asm-generic/barrier.h>
>
> #endif /* __ASSEMBLY__ */
> --
> 2.43.5
Tested both relaxed and acquire variants on AWS Graviton (ARM64
Neoverse V1) with your V9 haltpoll changes, atop master 128c8f96eb.
Reviewed-by: Haris Okanovic <harisokn@amazon.com>
Tested-by: Haris Okanovic <harisokn@amazon.com>
Okanovic, Haris <harisokn@amazon.com> writes:
> On Mon, 2025-02-03 at 13:49 -0800, Ankur Arora wrote:
>> CAUTION: This email originated from outside of the organization. Do not click links or open attachments unless you can confirm the sender and know the content is safe.
>>
>>
>>
>> Add smp_cond_load_acquire_timewait(). This is substantially similar
>> to smp_cond_load_acquire() where we use a load-acquire in the loop
>> and avoid an smp_rmb() later.
>>
>> To handle the unlikely case of the event-stream being unavailable,
>> keep the implementation simple by falling back to the generic
>> __smp_cond_load_relaxed_spinwait() with an smp_rmb() to follow
>> (via smp_acquire__after_ctrl_dep().)
>>
>> Cc: Will Deacon <will@kernel.org>
>> Cc: Catalin Marinas <catalin.marinas@arm.com>
>> Cc: linux-arm-kernel@lists.infradead.org
>> Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
>> ---
>> arch/arm64/include/asm/barrier.h | 36 ++++++++++++++++++++++++++++++++
>> 1 file changed, 36 insertions(+)
>>
>> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
>> index 25721275a5a2..22d9291aee8d 100644
>> --- a/arch/arm64/include/asm/barrier.h
>> +++ b/arch/arm64/include/asm/barrier.h
>> @@ -232,6 +232,22 @@ do { \
>> (typeof(*ptr))VAL; \
>> })
>>
>> +#define __smp_cond_load_acquire_timewait(ptr, cond_expr, \
>> + time_expr_ns, time_limit_ns) \
>> +({ \
>> + typeof(ptr) __PTR = (ptr); \
>> + __unqual_scalar_typeof(*ptr) VAL; \
>> + for (;;) { \
>> + VAL = smp_load_acquire(__PTR); \
>> + if (cond_expr) \
>> + break; \
>> + __cmpwait_relaxed(__PTR, VAL); \
>> + if ((time_expr_ns) >= (time_limit_ns)) \
>> + break; \
>> + } \
>> + (typeof(*ptr))VAL; \
>> +})
>> +
>> /*
>> * For the unlikely case that the event-stream is unavailable,
>> * ward off the possibility of waiting forever by falling back
>> @@ -254,6 +270,26 @@ do { \
>> (typeof(*ptr))_val; \
>> })
>>
>> +#define smp_cond_load_acquire_timewait(ptr, cond_expr, \
>> + time_expr_ns, time_limit_ns) \
>> +({ \
>> + __unqual_scalar_typeof(*ptr) _val; \
>> + int __wfe = arch_timer_evtstrm_available(); \
>> + \
>> + if (likely(__wfe)) { \
>> + _val = __smp_cond_load_acquire_timewait(ptr, cond_expr, \
>> + time_expr_ns, \
>> + time_limit_ns); \
>> + } else { \
>> + _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
>> + time_expr_ns, \
>> + time_limit_ns); \
>> + smp_acquire__after_ctrl_dep(); \
>> + } \
>> + (typeof(*ptr))_val; \
>> +})
>> +
>> +
>> #include <asm-generic/barrier.h>
>>
>> #endif /* __ASSEMBLY__ */
>> --
>> 2.43.5
>
> Tested both relaxed and acquire variants on AWS Graviton (ARM64
> Neoverse V1) with your V9 haltpoll changes, atop master 128c8f96eb.
>
> Reviewed-by: Haris Okanovic <harisokn@amazon.com>
> Tested-by: Haris Okanovic <harisokn@amazon.com>
That's great. Thanks Haris.
--
ankur
© 2016 - 2026 Red Hat, Inc.