Current futex atomic operations are implemented with ll/sc instructions
and clearing PSTATE.PAN.
Since Armv9.6, FEAT_LSUI supplies not only load/store instructions but
also atomic operation for user memory access in kernel it doesn't need
to clear PSTATE.PAN bit anymore.
With theses instructions some of futex atomic operations don't need to
be implmented with ldxr/stlxr pair instead can be implmented with
one atomic operation supplied by FEAT_LSUI.
However, some of futex atomic operations still need to use ll/sc way
via ldtxr/stltxr supplied by FEAT_LSUI since there is no correspondant
atomic instruction or doesn't support word size operation.
(i.e) eor, cas{mb}t
But It's good to work without clearing PSTATE.PAN bit.
Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
---
arch/arm64/include/asm/futex.h | 130 ++++++++++++++++++++++++++++++++-
1 file changed, 129 insertions(+), 1 deletion(-)
diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 22a6301a9f3d..ece35ca9b5d9 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -9,6 +9,8 @@
#include <linux/uaccess.h>
#include <linux/stringify.h>
+#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
#include <asm/errno.h>
#define LLSC_MAX_LOOPS 128 /* What's the largest number you can think of? */
@@ -115,11 +117,137 @@ __llsc_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
return ret;
}
+#ifdef CONFIG_AS_HAS_LSUI
+
+#define __LSUI_PREAMBLE ".arch_extension lsui\n"
+
+#define LSUI_FUTEX_ATOMIC_OP(op, asm_op, mb) \
+static __always_inline int \
+__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval) \
+{ \
+ int ret = 0; \
+ int oldval; \
+ \
+ uaccess_ttbr0_enable(); \
+ asm volatile("// __lsui_futex_atomic_" #op "\n" \
+ __LSUI_PREAMBLE \
+"1: " #asm_op #mb " %w3, %w2, %1\n" \
+"2:\n" \
+ _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \
+ : "+r" (ret), "+Q" (*uaddr), "=r" (oldval) \
+ : "r" (oparg) \
+ : "memory"); \
+ uaccess_ttbr0_disable(); \
+ \
+ if (!ret) \
+ *oval = oldval; \
+ \
+ return ret; \
+}
+
+LSUI_FUTEX_ATOMIC_OP(add, ldtadd, al)
+LSUI_FUTEX_ATOMIC_OP(or, ldtset, al)
+LSUI_FUTEX_ATOMIC_OP(andnot, ldtclr, al)
+LSUI_FUTEX_ATOMIC_OP(set, swpt, al)
+
+static __always_inline int
+__lsui_futex_atomic_and(int oparg, u32 __user *uaddr, int *oval)
+{
+ return __lsui_futex_atomic_andnot(~oparg, uaddr, oval);
+}
+
+static __always_inline int
+__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval)
+{
+ unsigned int loops = LLSC_MAX_LOOPS;
+ int ret, oldval, tmp;
+
+ uaccess_ttbr0_enable();
+ /*
+ * there are no ldteor/stteor instructions...
+ */
+ asm volatile("// __lsui_futex_atomic_eor\n"
+ __LSUI_PREAMBLE
+" prfm pstl1strm, %2\n"
+"1: ldtxr %w1, %2\n"
+" eor %w3, %w1, %w5\n"
+"2: stltxr %w0, %w3, %2\n"
+" cbz %w0, 3f\n"
+" sub %w4, %w4, %w0\n"
+" cbnz %w4, 1b\n"
+" mov %w0, %w6\n"
+"3:\n"
+" dmb ish\n"
+ _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0)
+ _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0)
+ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp),
+ "+r" (loops)
+ : "r" (oparg), "Ir" (-EAGAIN)
+ : "memory");
+ uaccess_ttbr0_disable();
+
+ if (!ret)
+ *oval = oldval;
+
+ return ret;
+}
+
+static __always_inline int
+__lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
+{
+ int ret = 0;
+ unsigned int loops = LLSC_MAX_LOOPS;
+ u32 val, tmp;
+
+ uaccess_ttbr0_enable();
+ /*
+ * cas{al}t doesn't support word size...
+ */
+ asm volatile("//__lsui_futex_cmpxchg\n"
+ __LSUI_PREAMBLE
+" prfm pstl1strm, %2\n"
+"1: ldtxr %w1, %2\n"
+" eor %w3, %w1, %w5\n"
+" cbnz %w3, 4f\n"
+"2: stltxr %w3, %w6, %2\n"
+" cbz %w3, 3f\n"
+" sub %w4, %w4, %w3\n"
+" cbnz %w4, 1b\n"
+" mov %w0, %w7\n"
+"3:\n"
+" dmb ish\n"
+"4:\n"
+ _ASM_EXTABLE_UACCESS_ERR(1b, 4b, %w0)
+ _ASM_EXTABLE_UACCESS_ERR(2b, 4b, %w0)
+ : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
+ : "r" (oldval), "r" (newval), "Ir" (-EAGAIN)
+ : "memory");
+ uaccess_ttbr0_disable();
+
+ if (!ret)
+ *oval = oldval;
+
+ return ret;
+}
+
+#define __lsui_llsc_body(op, ...) \
+({ \
+ alternative_has_cap_likely(ARM64_HAS_LSUI) ? \
+ __lsui_##op(__VA_ARGS__) : __llsc_##op(__VA_ARGS__); \
+})
+
+#else /* CONFIG_AS_HAS_LSUI */
+
+#define __lsui_llsc_body(op, ...) __llsc_##op(__VA_ARGS__)
+
+#endif /* CONFIG_AS_HAS_LSUI */
+
+
#define FUTEX_ATOMIC_OP(op) \
static __always_inline int \
__futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval) \
{ \
- return __llsc_futex_atomic_##op(oparg, uaddr, oval); \
+ return __lsui_llsc_body(futex_atomic_##op, oparg, uaddr, oval); \
}
FUTEX_ATOMIC_OP(add)
--
LEVI:{C3F47F37-75D8-414A-A8BA-3980EC8A46D7}
On Sat, Aug 16, 2025 at 04:19:29PM +0100, Yeoreum Yun wrote: > @@ -115,11 +117,137 @@ __llsc_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval) > return ret; > } > > +#ifdef CONFIG_AS_HAS_LSUI > + > +#define __LSUI_PREAMBLE ".arch_extension lsui\n" > + > +#define LSUI_FUTEX_ATOMIC_OP(op, asm_op, mb) \ > +static __always_inline int \ > +__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval) \ > +{ \ > + int ret = 0; \ > + int oldval; \ > + \ > + uaccess_ttbr0_enable(); \ I think we can drop uaccess_ttbr0_*() from these functions. At the kconfig level, TTBR0_PAN selects PAN. Hardware with LSUI will also have PAN (since 8.1), so the above is an unnecessary branch or nop, depending on how the alternatives play out. But add a comment instead. > +static __always_inline int > +__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval) > +{ > + unsigned int loops = LLSC_MAX_LOOPS; > + int ret, oldval, tmp; > + > + uaccess_ttbr0_enable(); > + /* > + * there are no ldteor/stteor instructions... > + */ > + asm volatile("// __lsui_futex_atomic_eor\n" > + __LSUI_PREAMBLE > +" prfm pstl1strm, %2\n" > +"1: ldtxr %w1, %2\n" > +" eor %w3, %w1, %w5\n" > +"2: stltxr %w0, %w3, %2\n" > +" cbz %w0, 3f\n" > +" sub %w4, %w4, %w0\n" > +" cbnz %w4, 1b\n" > +" mov %w0, %w6\n" > +"3:\n" > +" dmb ish\n" > + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) > + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0) > + : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), > + "+r" (loops) > + : "r" (oparg), "Ir" (-EAGAIN) > + : "memory"); > + uaccess_ttbr0_disable(); > + > + if (!ret) > + *oval = oldval; > + > + return ret; > +} That's an unfortunate omission from the architecture. > +#define __lsui_llsc_body(op, ...) \ > +({ \ > + alternative_has_cap_likely(ARM64_HAS_LSUI) ? \ > + __lsui_##op(__VA_ARGS__) : __llsc_##op(__VA_ARGS__); \ > +}) > + > +#else /* CONFIG_AS_HAS_LSUI */ > + > +#define __lsui_llsc_body(op, ...) __llsc_##op(__VA_ARGS__) > + > +#endif /* CONFIG_AS_HAS_LSUI */ > + > + > #define FUTEX_ATOMIC_OP(op) \ > static __always_inline int \ > __futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval) \ > { \ > - return __llsc_futex_atomic_##op(oparg, uaddr, oval); \ > + return __lsui_llsc_body(futex_atomic_##op, oparg, uaddr, oval); \ > } That's what I got confused about. It looks fine: Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Hi Catalin, > On Sat, Aug 16, 2025 at 04:19:29PM +0100, Yeoreum Yun wrote: > > @@ -115,11 +117,137 @@ __llsc_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval) > > return ret; > > } > > > > +#ifdef CONFIG_AS_HAS_LSUI > > + > > +#define __LSUI_PREAMBLE ".arch_extension lsui\n" > > + > > +#define LSUI_FUTEX_ATOMIC_OP(op, asm_op, mb) \ > > +static __always_inline int \ > > +__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval) \ > > +{ \ > > + int ret = 0; \ > > + int oldval; \ > > + \ > > + uaccess_ttbr0_enable(); \ > > I think we can drop uaccess_ttbr0_*() from these functions. At the > kconfig level, TTBR0_PAN selects PAN. Hardware with LSUI will also > have PAN (since 8.1), so the above is an unnecessary branch or nop, > depending on how the alternatives play out. But add a comment instead. Thanks to point out this. I'll change it. > > > +static __always_inline int > > +__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval) > > +{ > > + unsigned int loops = LLSC_MAX_LOOPS; > > + int ret, oldval, tmp; > > + > > + uaccess_ttbr0_enable(); > > + /* > > + * there are no ldteor/stteor instructions... > > + */ > > + asm volatile("// __lsui_futex_atomic_eor\n" > > + __LSUI_PREAMBLE > > +" prfm pstl1strm, %2\n" > > +"1: ldtxr %w1, %2\n" > > +" eor %w3, %w1, %w5\n" > > +"2: stltxr %w0, %w3, %2\n" > > +" cbz %w0, 3f\n" > > +" sub %w4, %w4, %w0\n" > > +" cbnz %w4, 1b\n" > > +" mov %w0, %w6\n" > > +"3:\n" > > +" dmb ish\n" > > + _ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0) > > + _ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0) > > + : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp), > > + "+r" (loops) > > + : "r" (oparg), "Ir" (-EAGAIN) > > + : "memory"); > > + uaccess_ttbr0_disable(); > > + > > + if (!ret) > > + *oval = oldval; > > + > > + return ret; > > +} > > That's an unfortunate omission from the architecture. > > > +#define __lsui_llsc_body(op, ...) \ > > +({ \ > > + alternative_has_cap_likely(ARM64_HAS_LSUI) ? \ > > + __lsui_##op(__VA_ARGS__) : __llsc_##op(__VA_ARGS__); \ > > +}) > > + > > +#else /* CONFIG_AS_HAS_LSUI */ > > + > > +#define __lsui_llsc_body(op, ...) __llsc_##op(__VA_ARGS__) > > + > > +#endif /* CONFIG_AS_HAS_LSUI */ > > + > > + > > #define FUTEX_ATOMIC_OP(op) \ > > static __always_inline int \ > > __futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval) \ > > { \ > > - return __llsc_futex_atomic_##op(oparg, uaddr, oval); \ > > + return __lsui_llsc_body(futex_atomic_##op, oparg, uaddr, oval); \ > > } > > That's what I got confused about. It looks fine: > > Reviewed-by: Catalin Marinas <catalin.marinas@arm.com> Thanks! -- Sincerely, Yeoreum Yun
On Sat, Aug 16, 2025 at 04:19:29PM +0100, Yeoreum Yun wrote: > Current futex atomic operations are implemented with ll/sc instructions > and clearing PSTATE.PAN. > > Since Armv9.6, FEAT_LSUI supplies not only load/store instructions but > also atomic operation for user memory access in kernel it doesn't need > to clear PSTATE.PAN bit anymore. > > With theses instructions some of futex atomic operations don't need to > be implmented with ldxr/stlxr pair instead can be implmented with > one atomic operation supplied by FEAT_LSUI. > > However, some of futex atomic operations still need to use ll/sc way > via ldtxr/stltxr supplied by FEAT_LSUI since there is no correspondant > atomic instruction or doesn't support word size operation. > (i.e) eor, cas{mb}t > > But It's good to work without clearing PSTATE.PAN bit. > > Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com> > --- > arch/arm64/include/asm/futex.h | 130 ++++++++++++++++++++++++++++++++- > 1 file changed, 129 insertions(+), 1 deletion(-) > > diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h > index 22a6301a9f3d..ece35ca9b5d9 100644 > --- a/arch/arm64/include/asm/futex.h > +++ b/arch/arm64/include/asm/futex.h > @@ -9,6 +9,8 @@ > #include <linux/uaccess.h> > #include <linux/stringify.h> > > +#include <asm/alternative.h> > +#include <asm/alternative-macros.h> > #include <asm/errno.h> > > #define LLSC_MAX_LOOPS 128 /* What's the largest number you can think of? */ > @@ -115,11 +117,137 @@ __llsc_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval) > return ret; > } > > +#ifdef CONFIG_AS_HAS_LSUI > + > +#define __LSUI_PREAMBLE ".arch_extension lsui\n" > + > +#define LSUI_FUTEX_ATOMIC_OP(op, asm_op, mb) \ > +static __always_inline int \ > +__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval) \ > +{ \ > + int ret = 0; \ > + int oldval; \ > + \ > + uaccess_ttbr0_enable(); \ > + asm volatile("// __lsui_futex_atomic_" #op "\n" \ > + __LSUI_PREAMBLE \ > +"1: " #asm_op #mb " %w3, %w2, %1\n" \ > +"2:\n" \ > + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ > + : "+r" (ret), "+Q" (*uaddr), "=r" (oldval) \ > + : "r" (oparg) \ > + : "memory"); \ > + uaccess_ttbr0_disable(); \ > + \ > + if (!ret) \ > + *oval = oldval; \ > + \ > + return ret; \ > +} > + > +LSUI_FUTEX_ATOMIC_OP(add, ldtadd, al) > +LSUI_FUTEX_ATOMIC_OP(or, ldtset, al) > +LSUI_FUTEX_ATOMIC_OP(andnot, ldtclr, al) > +LSUI_FUTEX_ATOMIC_OP(set, swpt, al) > + > +static __always_inline int > +__lsui_futex_atomic_and(int oparg, u32 __user *uaddr, int *oval) > +{ > + return __lsui_futex_atomic_andnot(~oparg, uaddr, oval); > +} > + > +static __always_inline int > +__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval) > +{ > + unsigned int loops = LLSC_MAX_LOOPS; > + int ret, oldval, tmp; > + > + uaccess_ttbr0_enable(); > + /* > + * there are no ldteor/stteor instructions... > + */ *sigh* Were these new instructions not added with futex in mind? I wonder whether CAS would be better than exclusives for xor... > +static __always_inline int > +__lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval) > +{ > + int ret = 0; > + unsigned int loops = LLSC_MAX_LOOPS; > + u32 val, tmp; > + > + uaccess_ttbr0_enable(); > + /* > + * cas{al}t doesn't support word size... > + */ What about just aligning down and doing a 64-bit cas in that case? Will
On Thu, Sep 11, 2025 at 04:22:24PM +0100, Will Deacon wrote: > On Sat, Aug 16, 2025 at 04:19:29PM +0100, Yeoreum Yun wrote: > > +static __always_inline int > > +__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval) > > +{ > > + unsigned int loops = LLSC_MAX_LOOPS; > > + int ret, oldval, tmp; > > + > > + uaccess_ttbr0_enable(); > > + /* > > + * there are no ldteor/stteor instructions... > > + */ > > *sigh* > > Were these new instructions not added with futex in mind? I guess it was _most_ of the futex. > I wonder whether CAS would be better than exclusives for xor... I was first thinking we could share some of the code with __futex_cmpxchg() but... > > +static __always_inline int > > +__lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval) > > +{ > > + int ret = 0; > > + unsigned int loops = LLSC_MAX_LOOPS; > > + u32 val, tmp; > > + > > + uaccess_ttbr0_enable(); > > + /* > > + * cas{al}t doesn't support word size... > > + */ > > What about just aligning down and doing a 64-bit cas in that case? I think it gets more complicated. Here we get the oldval from the caller, so no need to do a read. With CAS, we'd need to read the full 64-bit, replace half of it with oldval and newval just to be able to do the operation. On top of this, we need to check which half of the 64-bit value. I think it to hairy for little benefit. -- Catalin
Hi, > On Thu, Sep 11, 2025 at 04:22:24PM +0100, Will Deacon wrote: > > On Sat, Aug 16, 2025 at 04:19:29PM +0100, Yeoreum Yun wrote: > > > +static __always_inline int > > > +__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval) > > > +{ > > > + unsigned int loops = LLSC_MAX_LOOPS; > > > + int ret, oldval, tmp; > > > + > > > + uaccess_ttbr0_enable(); > > > + /* > > > + * there are no ldteor/stteor instructions... > > > + */ > > > > *sigh* > > > > Were these new instructions not added with futex in mind? > > I guess it was _most_ of the futex. > > > I wonder whether CAS would be better than exclusives for xor... > > I was first thinking we could share some of the code with > __futex_cmpxchg() but... > > > > +static __always_inline int > > > +__lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval) > > > +{ > > > + int ret = 0; > > > + unsigned int loops = LLSC_MAX_LOOPS; > > > + u32 val, tmp; > > > + > > > + uaccess_ttbr0_enable(); > > > + /* > > > + * cas{al}t doesn't support word size... > > > + */ > > > > What about just aligning down and doing a 64-bit cas in that case? > > I think it gets more complicated. Here we get the oldval from the > caller, so no need to do a read. With CAS, we'd need to read the full > 64-bit, replace half of it with oldval and newval just to be able to do > the operation. On top of this, we need to check which half of the 64-bit > value. I think it to hairy for little benefit. Agree. also the unrelated to change for other 32 bit can make a failure futex atomic operation. So, I'll keep the llsc method even using lsui for cmpxchg and eor. Thanks! -- Sincerely, Yeoreum Yun
Hi Will, > > Current futex atomic operations are implemented with ll/sc instructions > > and clearing PSTATE.PAN. > > > > Since Armv9.6, FEAT_LSUI supplies not only load/store instructions but > > also atomic operation for user memory access in kernel it doesn't need > > to clear PSTATE.PAN bit anymore. > > > > With theses instructions some of futex atomic operations don't need to > > be implmented with ldxr/stlxr pair instead can be implmented with > > one atomic operation supplied by FEAT_LSUI. > > > > However, some of futex atomic operations still need to use ll/sc way > > via ldtxr/stltxr supplied by FEAT_LSUI since there is no correspondant > > atomic instruction or doesn't support word size operation. > > (i.e) eor, cas{mb}t > > > > But It's good to work without clearing PSTATE.PAN bit. > > > > Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com> > > --- > > arch/arm64/include/asm/futex.h | 130 ++++++++++++++++++++++++++++++++- > > 1 file changed, 129 insertions(+), 1 deletion(-) > > > > diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h > > index 22a6301a9f3d..ece35ca9b5d9 100644 > > --- a/arch/arm64/include/asm/futex.h > > +++ b/arch/arm64/include/asm/futex.h > > @@ -9,6 +9,8 @@ > > #include <linux/uaccess.h> > > #include <linux/stringify.h> > > > > +#include <asm/alternative.h> > > +#include <asm/alternative-macros.h> > > #include <asm/errno.h> > > > > #define LLSC_MAX_LOOPS 128 /* What's the largest number you can think of? */ > > @@ -115,11 +117,137 @@ __llsc_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval) > > return ret; > > } > > > > +#ifdef CONFIG_AS_HAS_LSUI > > + > > +#define __LSUI_PREAMBLE ".arch_extension lsui\n" > > + > > +#define LSUI_FUTEX_ATOMIC_OP(op, asm_op, mb) \ > > +static __always_inline int \ > > +__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval) \ > > +{ \ > > + int ret = 0; \ > > + int oldval; \ > > + \ > > + uaccess_ttbr0_enable(); \ > > + asm volatile("// __lsui_futex_atomic_" #op "\n" \ > > + __LSUI_PREAMBLE \ > > +"1: " #asm_op #mb " %w3, %w2, %1\n" \ > > +"2:\n" \ > > + _ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0) \ > > + : "+r" (ret), "+Q" (*uaddr), "=r" (oldval) \ > > + : "r" (oparg) \ > > + : "memory"); \ > > + uaccess_ttbr0_disable(); \ > > + \ > > + if (!ret) \ > > + *oval = oldval; \ > > + \ > > + return ret; \ > > +} > > + > > +LSUI_FUTEX_ATOMIC_OP(add, ldtadd, al) > > +LSUI_FUTEX_ATOMIC_OP(or, ldtset, al) > > +LSUI_FUTEX_ATOMIC_OP(andnot, ldtclr, al) > > +LSUI_FUTEX_ATOMIC_OP(set, swpt, al) > > + > > +static __always_inline int > > +__lsui_futex_atomic_and(int oparg, u32 __user *uaddr, int *oval) > > +{ > > + return __lsui_futex_atomic_andnot(~oparg, uaddr, oval); > > +} > > + > > +static __always_inline int > > +__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval) > > +{ > > + unsigned int loops = LLSC_MAX_LOOPS; > > + int ret, oldval, tmp; > > + > > + uaccess_ttbr0_enable(); > > + /* > > + * there are no ldteor/stteor instructions... > > + */ > > *sigh* > > Were these new instructions not added with futex in mind? rather than the futex, this seems to be designed for atomic_op()... (like user version of LSE)... That's why it seems no "eor" for this... > I wonder whether CAS would be better than exclusives for xor... > > > +static __always_inline int > > +__lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval) > > +{ > > + int ret = 0; > > + unsigned int loops = LLSC_MAX_LOOPS; > > + u32 val, tmp; > > + > > + uaccess_ttbr0_enable(); > > + /* > > + * cas{al}t doesn't support word size... > > + */ > > What about just aligning down and doing a 64-bit cas in that case? Though it applies with cas{al}t applying to futex_eor() and futex_cmpxchg(), I think it still need to compare with old value is the same at the time of load. that means the routine will be the same for LLSC way like: again: oldval = uaddr; oldval2 = oldval cas uaddr, oldval2, newval if (oldval != oldval2) goto again; with the CAS feature, try cmpxchg if old was different, returns -EAGAIN immediately seems not the same beheavior with former __llsc_futext_atomic_op(). This patch's intension is "not to change former beheavior" but removing change of PSTATE only. If this beheavior change is allowed, I'll replace them with CAS one with delight : Thanks! -- Sincerely, Yeoreum Yun
© 2016 - 2025 Red Hat, Inc.