[PATCH 3/3] locking/atomic/x86: Introduce arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64

Uros Bizjak posted 3 patches 1 year, 10 months ago
[PATCH 3/3] locking/atomic/x86: Introduce arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64
Posted by Uros Bizjak 1 year, 10 months ago
Commit:

  6d12c8d308e68 ("percpu: Wire up cmpxchg128")

improved emulated cmpxchg8b_emu() library function to return
success/failure in a ZF flag.

Define arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64 targets
to override the generic archy_try_cmpxchg() with an optimized
target specific implementation that handles ZF flag.

The assembly code at the call sites improves from:

   bf56d:	e8 fc ff ff ff       	call   cmpxchg8b_emu
   bf572:	8b 74 24 28          	mov    0x28(%esp),%esi
   bf576:	89 c3                	mov    %eax,%ebx
   bf578:	89 d1                	mov    %edx,%ecx
   bf57a:	8b 7c 24 2c          	mov    0x2c(%esp),%edi
   bf57e:	89 f0                	mov    %esi,%eax
   bf580:	89 fa                	mov    %edi,%edx
   bf582:	31 d8                	xor    %ebx,%eax
   bf584:	31 ca                	xor    %ecx,%edx
   bf586:	09 d0                	or     %edx,%eax
   bf588:	0f 84 e3 01 00 00    	je     bf771 <...>

to:

   bf572:	e8 fc ff ff ff       	call   cmpxchg8b_emu
   bf577:	0f 84 b6 01 00 00    	je     bf733 <...>

No functional changes intended.

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 arch/x86/include/asm/cmpxchg_32.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index fe40d0681ea8..9e0d330dd5d0 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -122,6 +122,34 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64
 }
 #define arch_cmpxchg64_local arch_cmpxchg64_local
 
+#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new)			\
+({									\
+	union __u64_halves o = { .full = *(_oldp), },			\
+			   n = { .full = (_new), };			\
+	bool ret;							\
+									\
+	asm volatile(ALTERNATIVE(LOCK_PREFIX_HERE			\
+				 "call cmpxchg8b_emu",			\
+				 "lock; cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
+		     CC_SET(e)						\
+		     : CC_OUT(e) (ret),					\
+		       [ptr] "+m" (*(_ptr)),				\
+		       "+a" (o.low), "+d" (o.high)			\
+		     : "b" (n.low), "c" (n.high), "S" (_ptr)		\
+		     : "memory");					\
+									\
+	if (unlikely(!ret))						\
+		*(_oldp) = o.full;					\
+									\
+	likely(ret);							\
+})
+
+static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+	return __arch_try_cmpxchg64_emu(ptr, oldp, new);
+}
+#define arch_try_cmpxchg64 arch_try_cmpxchg64
+
 #endif
 
 #define system_has_cmpxchg64()		boot_cpu_has(X86_FEATURE_CX8)
-- 
2.44.0
Re: [PATCH 3/3] locking/atomic/x86: Introduce arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64
Posted by Ingo Molnar 1 year, 10 months ago
* Uros Bizjak <ubizjak@gmail.com> wrote:

> Commit:
> 
>   6d12c8d308e68 ("percpu: Wire up cmpxchg128")
> 
> improved emulated cmpxchg8b_emu() library function to return
> success/failure in a ZF flag.
> 
> Define arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64 targets
> to override the generic archy_try_cmpxchg() with an optimized
> target specific implementation that handles ZF flag.
> 
> The assembly code at the call sites improves from:
> 
>    bf56d:	e8 fc ff ff ff       	call   cmpxchg8b_emu
>    bf572:	8b 74 24 28          	mov    0x28(%esp),%esi
>    bf576:	89 c3                	mov    %eax,%ebx
>    bf578:	89 d1                	mov    %edx,%ecx
>    bf57a:	8b 7c 24 2c          	mov    0x2c(%esp),%edi
>    bf57e:	89 f0                	mov    %esi,%eax
>    bf580:	89 fa                	mov    %edi,%edx
>    bf582:	31 d8                	xor    %ebx,%eax
>    bf584:	31 ca                	xor    %ecx,%edx
>    bf586:	09 d0                	or     %edx,%eax
>    bf588:	0f 84 e3 01 00 00    	je     bf771 <...>
> 
> to:
> 
>    bf572:	e8 fc ff ff ff       	call   cmpxchg8b_emu
>    bf577:	0f 84 b6 01 00 00    	je     bf733 <...>
> 
> No functional changes intended.

Side note: while there's no hard-written rule for it, I tend to use the 'no 
functional changes intended' line for pure identity transformations - which 
this one isn't, as it changes code generation materially.

So I removed that line - the explanation of the patch is clear enough IMO.

Thanks,

	Ingo
[tip: locking/core] locking/atomic/x86: Introduce arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64
Posted by tip-bot2 for Uros Bizjak 1 year, 10 months ago
The following commit has been merged into the locking/core branch of tip:

Commit-ID:     aef95dac9ce4f271cc43195ffc175114ed934cbe
Gitweb:        https://git.kernel.org/tip/aef95dac9ce4f271cc43195ffc175114ed934cbe
Author:        Uros Bizjak <ubizjak@gmail.com>
AuthorDate:    Mon, 08 Apr 2024 11:13:58 +02:00
Committer:     Ingo Molnar <mingo@kernel.org>
CommitterDate: Tue, 09 Apr 2024 09:51:03 +02:00

locking/atomic/x86: Introduce arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64

Commit:

  6d12c8d308e68 ("percpu: Wire up cmpxchg128")

improved emulated cmpxchg8b_emu() library function to return
success/failure in a ZF flag.

Define arch_try_cmpxchg64() for !CONFIG_X86_CMPXCHG64 targets
to override the generic archy_try_cmpxchg() with an optimized
target specific implementation that handles ZF flag.

The assembly code at the call sites improves from:

   bf56d:	e8 fc ff ff ff       	call   cmpxchg8b_emu
   bf572:	8b 74 24 28          	mov    0x28(%esp),%esi
   bf576:	89 c3                	mov    %eax,%ebx
   bf578:	89 d1                	mov    %edx,%ecx
   bf57a:	8b 7c 24 2c          	mov    0x2c(%esp),%edi
   bf57e:	89 f0                	mov    %esi,%eax
   bf580:	89 fa                	mov    %edi,%edx
   bf582:	31 d8                	xor    %ebx,%eax
   bf584:	31 ca                	xor    %ecx,%edx
   bf586:	09 d0                	or     %edx,%eax
   bf588:	0f 84 e3 01 00 00    	je     bf771 <...>

to:

   bf572:	e8 fc ff ff ff       	call   cmpxchg8b_emu
   bf577:	0f 84 b6 01 00 00    	je     bf733 <...>

Signed-off-by: Uros Bizjak <ubizjak@gmail.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Link: https://lore.kernel.org/r/20240408091547.90111-4-ubizjak@gmail.com
---
 arch/x86/include/asm/cmpxchg_32.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
index fe40d06..9e0d330 100644
--- a/arch/x86/include/asm/cmpxchg_32.h
+++ b/arch/x86/include/asm/cmpxchg_32.h
@@ -122,6 +122,34 @@ static __always_inline u64 arch_cmpxchg64_local(volatile u64 *ptr, u64 old, u64 
 }
 #define arch_cmpxchg64_local arch_cmpxchg64_local
 
+#define __arch_try_cmpxchg64_emu(_ptr, _oldp, _new)			\
+({									\
+	union __u64_halves o = { .full = *(_oldp), },			\
+			   n = { .full = (_new), };			\
+	bool ret;							\
+									\
+	asm volatile(ALTERNATIVE(LOCK_PREFIX_HERE			\
+				 "call cmpxchg8b_emu",			\
+				 "lock; cmpxchg8b %[ptr]", X86_FEATURE_CX8) \
+		     CC_SET(e)						\
+		     : CC_OUT(e) (ret),					\
+		       [ptr] "+m" (*(_ptr)),				\
+		       "+a" (o.low), "+d" (o.high)			\
+		     : "b" (n.low), "c" (n.high), "S" (_ptr)		\
+		     : "memory");					\
+									\
+	if (unlikely(!ret))						\
+		*(_oldp) = o.full;					\
+									\
+	likely(ret);							\
+})
+
+static __always_inline bool arch_try_cmpxchg64(volatile u64 *ptr, u64 *oldp, u64 new)
+{
+	return __arch_try_cmpxchg64_emu(ptr, oldp, new);
+}
+#define arch_try_cmpxchg64 arch_try_cmpxchg64
+
 #endif
 
 #define system_has_cmpxchg64()		boot_cpu_has(X86_FEATURE_CX8)