smp_cond_load_{relaxed,acquire}_timewait() wait on a condition variable
until a timeout expires. This waiting is some mix of spinning while
dereferencing an address, and waiting in a WFE for a store event or
periodic events from the event-stream.
Handle the waiting part of the policy in ___smp_cond_timewait() while
offloading the spinning to the generic ___smp_cond_spinwait().
To minimize time spent spinning when the user can tolerate a large
overshoot, choose SMP_TIMEWAIT_DEFAULT_US to be the event-stream
period.
This would result in a worst case delay of ARCH_TIMER_EVT_STREAM_PERIOD_US.
Cc: Will Deacon <will@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
arch/arm64/include/asm/barrier.h | 48 ++++++++++++++++++++++++++++++++
1 file changed, 48 insertions(+)
diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 7c56e2621c7d..a1367f2901f0 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -10,6 +10,7 @@
#ifndef __ASSEMBLY__
#include <linux/kasan-checks.h>
+#include <linux/minmax.h>
#include <asm/alternative-macros.h>
@@ -222,6 +223,53 @@ do { \
#define __smp_timewait_store(ptr, val) \
__cmpwait_relaxed(ptr, val)
+/*
+ * Redefine ARCH_TIMER_EVT_STREAM_PERIOD_US locally to avoid include hell.
+ */
+#define __ARCH_TIMER_EVT_STREAM_PERIOD_US 100UL
+extern bool arch_timer_evtstrm_available(void);
+
+static inline u64 ___smp_cond_spinwait(u64 now, u64 prev, u64 end,
+ u32 *spin, bool *wait, u64 slack);
+/*
+ * To minimize time spent spinning, we want to allow a large overshoot.
+ * So, choose a default slack value of the event-stream period.
+ */
+#define SMP_TIMEWAIT_DEFAULT_US __ARCH_TIMER_EVT_STREAM_PERIOD_US
+
+static inline u64 ___smp_cond_timewait(u64 now, u64 prev, u64 end,
+ u32 *spin, bool *wait, u64 slack)
+{
+ bool wfet = alternative_has_cap_unlikely(ARM64_HAS_WFXT);
+ bool wfe, ev = arch_timer_evtstrm_available();
+ u64 evt_period = __ARCH_TIMER_EVT_STREAM_PERIOD_US;
+ u64 remaining = end - now;
+
+ if (now >= end)
+ return 0;
+ /*
+ * Use WFE if there's enough slack to get an event-stream wakeup even
+ * if we don't come out of the WFE due to natural causes.
+ */
+ wfe = ev && ((remaining + slack) > evt_period);
+
+ if (wfe || wfet) {
+ *wait = true;
+ *spin = 0;
+ return now;
+ }
+
+ /*
+ * The time remaining is shorter than our wait granularity. Let
+ * the generic spinwait policy determine how to spin.
+ */
+ return ___smp_cond_spinwait(now, prev, end, spin, wait, slack);
+}
+
+#ifndef __smp_cond_policy
+#define __smp_cond_policy ___smp_cond_timewait
+#endif
+
#include <asm-generic/barrier.h>
#endif /* __ASSEMBLY__ */
--
2.43.5
On Thu, 26 Jun 2025, Ankur Arora wrote: > @@ -222,6 +223,53 @@ do { \ > #define __smp_timewait_store(ptr, val) \ > __cmpwait_relaxed(ptr, val) > > +/* > + * Redefine ARCH_TIMER_EVT_STREAM_PERIOD_US locally to avoid include hell. > + */ > +#define __ARCH_TIMER_EVT_STREAM_PERIOD_US 100UL > +extern bool arch_timer_evtstrm_available(void); > + > +static inline u64 ___smp_cond_spinwait(u64 now, u64 prev, u64 end, > + u32 *spin, bool *wait, u64 slack); > +/* > + * To minimize time spent spinning, we want to allow a large overshoot. > + * So, choose a default slack value of the event-stream period. > + */ > +#define SMP_TIMEWAIT_DEFAULT_US __ARCH_TIMER_EVT_STREAM_PERIOD_US > + > +static inline u64 ___smp_cond_timewait(u64 now, u64 prev, u64 end, > + u32 *spin, bool *wait, u64 slack) > +{ > + bool wfet = alternative_has_cap_unlikely(ARM64_HAS_WFXT); > + bool wfe, ev = arch_timer_evtstrm_available(); An unitialized and initialized variable on the same line. Maybe separate that. Looks confusing and unusual to me. > + u64 evt_period = __ARCH_TIMER_EVT_STREAM_PERIOD_US; > + u64 remaining = end - now; > + > + if (now >= end) > + return 0; > + /* > + * Use WFE if there's enough slack to get an event-stream wakeup even > + * if we don't come out of the WFE due to natural causes. > + */ > + wfe = ev && ((remaining + slack) > evt_period); The line above does not matter for the wfet case and the calculation is ignored. We hope that in the future wfet will be the default case.
Christoph Lameter (Ampere) <cl@gentwo.org> writes: > On Thu, 26 Jun 2025, Ankur Arora wrote: > >> @@ -222,6 +223,53 @@ do { \ >> #define __smp_timewait_store(ptr, val) \ >> __cmpwait_relaxed(ptr, val) >> >> +/* >> + * Redefine ARCH_TIMER_EVT_STREAM_PERIOD_US locally to avoid include hell. >> + */ >> +#define __ARCH_TIMER_EVT_STREAM_PERIOD_US 100UL >> +extern bool arch_timer_evtstrm_available(void); >> + >> +static inline u64 ___smp_cond_spinwait(u64 now, u64 prev, u64 end, >> + u32 *spin, bool *wait, u64 slack); >> +/* >> + * To minimize time spent spinning, we want to allow a large overshoot. >> + * So, choose a default slack value of the event-stream period. >> + */ >> +#define SMP_TIMEWAIT_DEFAULT_US __ARCH_TIMER_EVT_STREAM_PERIOD_US >> + >> +static inline u64 ___smp_cond_timewait(u64 now, u64 prev, u64 end, >> + u32 *spin, bool *wait, u64 slack) >> +{ >> + bool wfet = alternative_has_cap_unlikely(ARM64_HAS_WFXT); >> + bool wfe, ev = arch_timer_evtstrm_available(); > > An unitialized and initialized variable on the same line. Maybe separate > that. Looks confusing and unusual to me. Yeah, that makes sense. Will fix. >> + u64 evt_period = __ARCH_TIMER_EVT_STREAM_PERIOD_US; >> + u64 remaining = end - now; >> + >> + if (now >= end) >> + return 0; >> + /* >> + * Use WFE if there's enough slack to get an event-stream wakeup even >> + * if we don't come out of the WFE due to natural causes. >> + */ >> + wfe = ev && ((remaining + slack) > evt_period); > > The line above does not matter for the wfet case and the calculation is > ignored. We hope that in the future wfet will be the default case. My assumption was that the compiler would only evaluate the evt_period comparison if the wfet check evaluates false and it does need to check if wfe is true or not. But let me look at the generated code. Thanks for the comments. -- ankur
Ankur Arora <ankur.a.arora@oracle.com> writes: > Christoph Lameter (Ampere) <cl@gentwo.org> writes: > >> On Thu, 26 Jun 2025, Ankur Arora wrote: >> >>> @@ -222,6 +223,53 @@ do { \ >>> #define __smp_timewait_store(ptr, val) \ >>> __cmpwait_relaxed(ptr, val) >>> >>> +/* >>> + * Redefine ARCH_TIMER_EVT_STREAM_PERIOD_US locally to avoid include hell. >>> + */ >>> +#define __ARCH_TIMER_EVT_STREAM_PERIOD_US 100UL >>> +extern bool arch_timer_evtstrm_available(void); >>> + >>> +static inline u64 ___smp_cond_spinwait(u64 now, u64 prev, u64 end, >>> + u32 *spin, bool *wait, u64 slack); >>> +/* >>> + * To minimize time spent spinning, we want to allow a large overshoot. >>> + * So, choose a default slack value of the event-stream period. >>> + */ >>> +#define SMP_TIMEWAIT_DEFAULT_US __ARCH_TIMER_EVT_STREAM_PERIOD_US >>> + >>> +static inline u64 ___smp_cond_timewait(u64 now, u64 prev, u64 end, >>> + u32 *spin, bool *wait, u64 slack) >>> +{ >>> + bool wfet = alternative_has_cap_unlikely(ARM64_HAS_WFXT); >>> + bool wfe, ev = arch_timer_evtstrm_available(); >> >> An unitialized and initialized variable on the same line. Maybe separate >> that. Looks confusing and unusual to me. > > Yeah, that makes sense. Will fix. > >>> + u64 evt_period = __ARCH_TIMER_EVT_STREAM_PERIOD_US; >>> + u64 remaining = end - now; >>> + >>> + if (now >= end) >>> + return 0; >>> + /* >>> + * Use WFE if there's enough slack to get an event-stream wakeup even >>> + * if we don't come out of the WFE due to natural causes. >>> + */ >>> + wfe = ev && ((remaining + slack) > evt_period); >> >> The line above does not matter for the wfet case and the calculation is >> ignored. We hope that in the future wfet will be the default case. > > My assumption was that the compiler would only evaluate the evt_period > comparison if the wfet check evaluates false and it does need to check > if wfe is true or not. > > But let me look at the generated code. So, I was too optimistic. The compiler doesn't do this optimization at all. I'm guessing that's because of at least two reasons: The wfet case is unlikely: bool wfet = alternative_has_cap_unlikely(ARM64_HAS_WFXT); And, I'm testing for: if (wfe || wfet) This last check should have been "if (wfet || wfe)"" since the first clause is pretty much free. But even with that fixed, I don't think the compiler will do the right thing. I could condition the computation on arch_timer_evtstrm_available(), but I would prefer to keep this code straightforward since it will only be evaluated infrequently. But, let me see what I can do. -- ankur
© 2016 - 2025 Red Hat, Inc.