Add smp_cond_load_relaxed_timewait(), a timed variant of
smp_cond_load_relaxed().
This uses __cmpwait_relaxed() to do the actual waiting, with the
event-stream guaranteeing that we wake up from WFE periodically
and not block forever in case there are no stores to the cacheline.
For cases when the event-stream is unavailable, fallback to the
generic spin-wait implementation.
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
arch/arm64/include/asm/barrier.h | 38 ++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 1ca947d5c939..25721275a5a2 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -216,6 +216,44 @@ do { \
(typeof(*ptr))VAL; \
})
+#define __smp_cond_load_relaxed_timewait(ptr, cond_expr, \
+ time_expr_ns, time_limit_ns) \
+({ \
+ typeof(ptr) __PTR = (ptr); \
+ __unqual_scalar_typeof(*ptr) VAL; \
+ for (;;) { \
+ VAL = READ_ONCE(*__PTR); \
+ if (cond_expr) \
+ break; \
+ __cmpwait_relaxed(__PTR, VAL); \
+ if ((time_expr_ns) >= (time_limit_ns)) \
+ break; \
+ } \
+ (typeof(*ptr))VAL; \
+})
+
+/*
+ * For the unlikely case that the event-stream is unavailable,
+ * ward off the possibility of waiting forever by falling back
+ * to the generic spin-wait.
+ */
+#define smp_cond_load_relaxed_timewait(ptr, cond_expr, \
+ time_expr_ns, time_limit_ns) \
+({ \
+ __unqual_scalar_typeof(*ptr) _val; \
+ int __wfe = arch_timer_evtstrm_available(); \
+ \
+ if (likely(__wfe)) \
+ _val = __smp_cond_load_relaxed_timewait(ptr, cond_expr, \
+ time_expr_ns, \
+ time_limit_ns); \
+ else \
+ _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
+ time_expr_ns, \
+ time_limit_ns); \
+ (typeof(*ptr))_val; \
+})
+
#include <asm-generic/barrier.h>
#endif /* __ASSEMBLY__ */
--
2.43.5
On Mon, Feb 03, 2025 at 01:49:10PM -0800, Ankur Arora wrote:
> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
> index 1ca947d5c939..25721275a5a2 100644
> --- a/arch/arm64/include/asm/barrier.h
> +++ b/arch/arm64/include/asm/barrier.h
> @@ -216,6 +216,44 @@ do { \
> (typeof(*ptr))VAL; \
> })
>
> +#define __smp_cond_load_relaxed_timewait(ptr, cond_expr, \
> + time_expr_ns, time_limit_ns) \
> +({ \
> + typeof(ptr) __PTR = (ptr); \
> + __unqual_scalar_typeof(*ptr) VAL; \
> + for (;;) { \
> + VAL = READ_ONCE(*__PTR); \
> + if (cond_expr) \
> + break; \
> + __cmpwait_relaxed(__PTR, VAL); \
> + if ((time_expr_ns) >= (time_limit_ns)) \
> + break; \
> + } \
> + (typeof(*ptr))VAL; \
> +})
Rename this to something like *_evstrm as this doesn't really work
unless we have the event stream. Another one would be *_wfet.
> +
> +/*
> + * For the unlikely case that the event-stream is unavailable,
> + * ward off the possibility of waiting forever by falling back
> + * to the generic spin-wait.
> + */
> +#define smp_cond_load_relaxed_timewait(ptr, cond_expr, \
> + time_expr_ns, time_limit_ns) \
> +({ \
> + __unqual_scalar_typeof(*ptr) _val; \
> + int __wfe = arch_timer_evtstrm_available(); \
This should be a bool.
> + \
> + if (likely(__wfe)) \
> + _val = __smp_cond_load_relaxed_timewait(ptr, cond_expr, \
> + time_expr_ns, \
> + time_limit_ns); \
> + else \
> + _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
> + time_expr_ns, \
> + time_limit_ns); \
> + (typeof(*ptr))_val; \
> +})
Not sure there's much to say here, this depends on the actual interface
introduced by patch 1. If we make some statements about granularity of
some time_cond_expr check, we'll have to take that into account.
--
Catalin
Catalin Marinas <catalin.marinas@arm.com> writes:
> On Mon, Feb 03, 2025 at 01:49:10PM -0800, Ankur Arora wrote:
>> diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
>> index 1ca947d5c939..25721275a5a2 100644
>> --- a/arch/arm64/include/asm/barrier.h
>> +++ b/arch/arm64/include/asm/barrier.h
>> @@ -216,6 +216,44 @@ do { \
>> (typeof(*ptr))VAL; \
>> })
>>
>> +#define __smp_cond_load_relaxed_timewait(ptr, cond_expr, \
>> + time_expr_ns, time_limit_ns) \
>> +({ \
>> + typeof(ptr) __PTR = (ptr); \
>> + __unqual_scalar_typeof(*ptr) VAL; \
>> + for (;;) { \
>> + VAL = READ_ONCE(*__PTR); \
>> + if (cond_expr) \
>> + break; \
>> + __cmpwait_relaxed(__PTR, VAL); \
>> + if ((time_expr_ns) >= (time_limit_ns)) \
>> + break; \
>> + } \
>> + (typeof(*ptr))VAL; \
>> +})
>
> Rename this to something like *_evstrm as this doesn't really work
> unless we have the event stream.
Ack.
> Another one would be *_wfet.
Hadn't sent out the WFET version yet.
Did you mean that this should be *_evtstrm or *_wfet?
>> +
>> +/*
>> + * For the unlikely case that the event-stream is unavailable,
>> + * ward off the possibility of waiting forever by falling back
>> + * to the generic spin-wait.
>> + */
>> +#define smp_cond_load_relaxed_timewait(ptr, cond_expr, \
>> + time_expr_ns, time_limit_ns) \
>> +({ \
>> + __unqual_scalar_typeof(*ptr) _val; \
>> + int __wfe = arch_timer_evtstrm_available(); \
>
> This should be a bool.
Yeah. Will fix.
>> + \
>> + if (likely(__wfe)) \
>> + _val = __smp_cond_load_relaxed_timewait(ptr, cond_expr, \
>> + time_expr_ns, \
>> + time_limit_ns); \
>> + else \
>> + _val = __smp_cond_load_relaxed_spinwait(ptr, cond_expr, \
>> + time_expr_ns, \
>> + time_limit_ns); \
>> + (typeof(*ptr))_val; \
>> +})
>
> Not sure there's much to say here, this depends on the actual interface
> introduced by patch 1. If we make some statements about granularity of
> some time_cond_expr check, we'll have to take that into account.
Agreed.
Thanks for the review!
--
ankur
© 2016 - 2026 Red Hat, Inc.