[PATCH v8 04/12] arm64: support WFET in smp_cond_relaxed_timeout()

Ankur Arora posted 12 patches 3 days, 8 hours ago
[PATCH v8 04/12] arm64: support WFET in smp_cond_relaxed_timeout()
Posted by Ankur Arora 3 days, 8 hours ago
Extend __cmpwait_relaxed() to __cmpwait_relaxed_timeout() which takes
an additional timeout value in ns.

Lacking WFET, or with zero or negative value of timeout we fallback
to WFE.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: linux-arm-kernel@lists.infradead.org
Signed-off-by: Ankur Arora <ankur.a.arora@oracle.com>
---
 arch/arm64/include/asm/barrier.h |  8 ++--
 arch/arm64/include/asm/cmpxchg.h | 72 ++++++++++++++++++++++----------
 2 files changed, 55 insertions(+), 25 deletions(-)

diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h
index 6190e178db51..fbd71cd4ef4e 100644
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -224,8 +224,8 @@ do {									\
 extern bool arch_timer_evtstrm_available(void);
 
 /*
- * In the common case, cpu_poll_relax() sits waiting in __cmpwait_relaxed()
- * for the ptr value to change.
+ * In the common case, cpu_poll_relax() sits waiting in __cmpwait_relaxed()/
+ * __cmpwait_relaxed_timeout() for the ptr value to change.
  *
  * Since this period is reasonably long, choose SMP_TIMEOUT_POLL_COUNT
  * to be 1, so smp_cond_load_{relaxed,acquire}_timeout() does a
@@ -234,7 +234,9 @@ extern bool arch_timer_evtstrm_available(void);
 #define SMP_TIMEOUT_POLL_COUNT	1
 
 #define cpu_poll_relax(ptr, val, timeout_ns) do {			\
-	if (arch_timer_evtstrm_available())				\
+	if (alternative_has_cap_unlikely(ARM64_HAS_WFXT))		\
+		__cmpwait_relaxed_timeout(ptr, val, timeout_ns);	\
+	else if (arch_timer_evtstrm_available())			\
 		__cmpwait_relaxed(ptr, val);				\
 	else								\
 		cpu_relax();						\
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h
index d7a540736741..acd01a203b62 100644
--- a/arch/arm64/include/asm/cmpxchg.h
+++ b/arch/arm64/include/asm/cmpxchg.h
@@ -12,6 +12,7 @@
 
 #include <asm/barrier.h>
 #include <asm/lse.h>
+#include <asm/delay-const.h>
 
 /*
  * We need separate acquire parameters for ll/sc and lse, since the full
@@ -208,22 +209,41 @@ __CMPXCHG_GEN(_mb)
 	__cmpxchg128((ptr), (o), (n));						\
 })
 
-#define __CMPWAIT_CASE(w, sfx, sz)					\
-static inline void __cmpwait_case_##sz(volatile void *ptr,		\
-				       unsigned long val)		\
-{									\
-	unsigned long tmp;						\
-									\
-	asm volatile(							\
-	"	sevl\n"							\
-	"	wfe\n"							\
-	"	ldxr" #sfx "\t%" #w "[tmp], %[v]\n"			\
-	"	eor	%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"	\
-	"	cbnz	%" #w "[tmp], 1f\n"				\
-	"	wfe\n"							\
-	"1:"								\
-	: [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr)			\
-	: [val] "r" (val));						\
+/* Re-declared here to avoid include dependency. */
+extern u64 (*arch_timer_read_counter)(void);
+
+#define __CMPWAIT_CASE(w, sfx, sz)						\
+static inline void __cmpwait_case_##sz(volatile void *ptr,			\
+				       unsigned long val,			\
+				       s64 timeout_ns)				\
+{										\
+	unsigned long tmp;							\
+										\
+	if (!alternative_has_cap_unlikely(ARM64_HAS_WFXT) || timeout_ns <= 0) {	\
+		asm volatile(							\
+		"	sevl\n"							\
+		"	wfe\n"							\
+		"	ldxr" #sfx "\t%" #w "[tmp], %[v]\n"			\
+		"	eor	%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"	\
+		"	cbnz	%" #w "[tmp], 1f\n"				\
+		"	wfe\n"							\
+		"1:"								\
+		: [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr)			\
+		: [val] "r" (val));						\
+	} else {								\
+		u64 ecycles = arch_timer_read_counter() +			\
+				NSECS_TO_CYCLES(timeout_ns);			\
+		asm volatile(							\
+		"	sevl\n"							\
+		"	wfe\n"							\
+		"	ldxr" #sfx "\t%" #w "[tmp], %[v]\n"			\
+		"	eor	%" #w "[tmp], %" #w "[tmp], %" #w "[val]\n"	\
+		"	cbnz	%" #w "[tmp], 2f\n"				\
+		"	msr s0_3_c1_c0_0, %[ecycles]\n"				\
+		"2:"								\
+		: [tmp] "=&r" (tmp), [v] "+Q" (*(u##sz *)ptr)			\
+		: [val] "r" (val), [ecycles] "r" (ecycles));			\
+	}									\
 }
 
 __CMPWAIT_CASE(w, b, 8);
@@ -236,17 +256,22 @@ __CMPWAIT_CASE( ,  , 64);
 #define __CMPWAIT_GEN(sfx)						\
 static __always_inline void __cmpwait##sfx(volatile void *ptr,		\
 				  unsigned long val,			\
+				  s64 timeout_ns,			\
 				  int size)				\
 {									\
 	switch (size) {							\
 	case 1:								\
-		return __cmpwait_case##sfx##_8(ptr, (u8)val);		\
+		return __cmpwait_case##sfx##_8(ptr, (u8)val,		\
+					       timeout_ns);		\
 	case 2:								\
-		return __cmpwait_case##sfx##_16(ptr, (u16)val);		\
+		return __cmpwait_case##sfx##_16(ptr, (u16)val,		\
+					       timeout_ns);		\
 	case 4:								\
-		return __cmpwait_case##sfx##_32(ptr, val);		\
+		return __cmpwait_case##sfx##_32(ptr, val,		\
+					       timeout_ns);		\
 	case 8:								\
-		return __cmpwait_case##sfx##_64(ptr, val);		\
+		return __cmpwait_case##sfx##_64(ptr, val,		\
+					       timeout_ns);		\
 	default:							\
 		BUILD_BUG();						\
 	}								\
@@ -258,7 +283,10 @@ __CMPWAIT_GEN()
 
 #undef __CMPWAIT_GEN
 
-#define __cmpwait_relaxed(ptr, val) \
-	__cmpwait((ptr), (unsigned long)(val), sizeof(*(ptr)))
+#define __cmpwait_relaxed_timeout(ptr, val, timeout_ns)			\
+	__cmpwait((ptr), (unsigned long)(val), timeout_ns, sizeof(*(ptr)))
+
+#define __cmpwait_relaxed(ptr, val)					\
+	__cmpwait_relaxed_timeout(ptr, val, 0)
 
 #endif	/* __ASM_CMPXCHG_H */
-- 
2.31.1