[PATCH v15 5/8] arm64: futex: support futex with FEAT_LSUI

Yeoreum Yun posted 8 patches 1 month, 1 week ago
There is a newer version of this series
[PATCH v15 5/8] arm64: futex: support futex with FEAT_LSUI
Posted by Yeoreum Yun 1 month, 1 week ago
Current futex atomic operations are implemented with ll/sc instructions
and clearing PSTATE.PAN.

Since Armv9.6, FEAT_LSUI supplies not only load/store instructions but
also atomic operation for user memory access in kernel it doesn't need
to clear PSTATE.PAN bit anymore.

With theses instructions some of futex atomic operations don't need to
be implmented with ldxr/stlxr pair instead can be implmented with
one atomic operation supplied by FEAT_LSUI and don't enable mto like
ldtr*/sttr* instructions usage.

However, some of futex atomic operation don't have matched
instructuion i.e) eor or cmpxchg with word size.
For those operation, uses cas{al}t to implement them.

Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
---
 arch/arm64/include/asm/futex.h | 166 ++++++++++++++++++++++++++++++++-
 arch/arm64/include/asm/lsui.h  |  27 ++++++
 2 files changed, 190 insertions(+), 3 deletions(-)
 create mode 100644 arch/arm64/include/asm/lsui.h

diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index 9a0efed50743..c89b45319c49 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -7,9 +7,9 @@
 
 #include <linux/futex.h>
 #include <linux/uaccess.h>
-#include <linux/stringify.h>
 
 #include <asm/errno.h>
+#include <asm/lsui.h>
 
 #define FUTEX_MAX_LOOPS	128 /* What's the largest number you can think of? */
 
@@ -87,11 +87,171 @@ __llsc_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
 	return ret;
 }
 
+#ifdef CONFIG_ARM64_LSUI
+
+/*
+ * FEAT_LSUI is supported since Armv9.6, where FEAT_PAN is mandatory.
+ * However, this assumption may not always hold:
+ *
+ *   - Some CPUs advertise FEAT_LSUI but lack FEAT_PAN.
+ *   - Virtualisation or ID register overrides may expose invalid
+ *     feature combinations.
+ *
+ * Rather than disabling FEAT_LSUI when FEAT_PAN is absent, wrap LSUI
+ * instructions with uaccess_ttbr0_enable()/disable() when
+ * ARM64_SW_TTBR0_PAN is enabled.
+ */
+
+#define LSUI_FUTEX_ATOMIC_OP(op, asm_op)				\
+static __always_inline int						\
+__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval)	\
+{									\
+	int ret = 0;							\
+	int oldval;							\
+									\
+	uaccess_ttbr0_enable();						\
+									\
+	asm volatile("// __lsui_futex_atomic_" #op "\n"			\
+	__LSUI_PREAMBLE							\
+"1:	" #asm_op "al	%w3, %w2, %1\n"					\
+"2:\n"									\
+	_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0)				\
+	: "+r" (ret), "+Q" (*uaddr), "=r" (oldval)			\
+	: "r" (oparg)							\
+	: "memory");							\
+									\
+	uaccess_ttbr0_disable();					\
+									\
+	if (!ret)							\
+		*oval = oldval;						\
+	return ret;							\
+}
+
+LSUI_FUTEX_ATOMIC_OP(add, ldtadd)
+LSUI_FUTEX_ATOMIC_OP(or, ldtset)
+LSUI_FUTEX_ATOMIC_OP(andnot, ldtclr)
+LSUI_FUTEX_ATOMIC_OP(set, swpt)
+
+static __always_inline int
+__lsui_cmpxchg64(u64 __user *uaddr, u64 *oldval, u64 newval)
+{
+	int ret = 0;
+
+	uaccess_ttbr0_enable();
+
+	asm volatile("// __lsui_cmpxchg64\n"
+	__LSUI_PREAMBLE
+"1:	casalt	%2, %3, %1\n"
+"2:\n"
+	_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0)
+	: "+r" (ret), "+Q" (*uaddr), "+r" (*oldval)
+	: "r" (newval)
+	: "memory");
+
+	uaccess_ttbr0_disable();
+
+	return ret;
+}
+
+static __always_inline int
+__lsui_cmpxchg32(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
+{
+	u64 __user *uaddr64;
+	bool futex_pos, other_pos;
+	int ret, i;
+	u32 other, orig_other;
+	union {
+		u32 futex[2];
+		u64 raw;
+	} oval64, orig64, nval64;
+
+	uaddr64 = (u64 __user *) PTR_ALIGN_DOWN(uaddr, sizeof(u64));
+	futex_pos = !IS_ALIGNED((unsigned long)uaddr, sizeof(u64));
+	other_pos = !futex_pos;
+
+	oval64.futex[futex_pos] = oldval;
+	ret = get_user(oval64.futex[other_pos], (u32 __user *)uaddr64 + other_pos);
+	if (ret)
+		return -EFAULT;
+
+	ret = -EAGAIN;
+	for (i = 0; i < FUTEX_MAX_LOOPS; i++) {
+		orig64.raw = nval64.raw = oval64.raw;
+
+		nval64.futex[futex_pos] = newval;
+
+		if (__lsui_cmpxchg64(uaddr64, &oval64.raw, nval64.raw))
+			return -EFAULT;
+
+		oldval = oval64.futex[futex_pos];
+		other = oval64.futex[other_pos];
+		orig_other = orig64.futex[other_pos];
+
+		if (other == orig_other) {
+			ret = 0;
+			break;
+		}
+	}
+
+	if (!ret)
+		*oval = oldval;
+
+	return ret;
+}
+
+static __always_inline int
+__lsui_futex_atomic_and(int oparg, u32 __user *uaddr, int *oval)
+{
+	/*
+	 * Undo the bitwise negation applied to the oparg passed from
+	 * arch_futex_atomic_op_inuser() with FUTEX_OP_ANDN.
+	 */
+	return __lsui_futex_atomic_andnot(~oparg, uaddr, oval);
+}
+
+static __always_inline int
+__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval)
+{
+	u32 oldval, newval, val;
+	int ret, i;
+
+	if (get_user(oldval, uaddr))
+		return -EFAULT;
+
+	/*
+	 * there are no ldteor/stteor instructions...
+	 */
+	for (i = 0; i < FUTEX_MAX_LOOPS; i++) {
+		newval = oldval ^ oparg;
+
+		ret = __lsui_cmpxchg32(uaddr, oldval, newval, &val);
+		if (ret)
+			return ret;
+
+		if (val == oldval) {
+			*oval = val;
+			return 0;
+		}
+
+		oldval = val;
+	}
+
+	return -EAGAIN;
+}
+
+static __always_inline int
+__lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
+{
+	return __lsui_cmpxchg32(uaddr, oldval, newval, oval);
+}
+#endif	/* CONFIG_ARM64_LSUI */
+
+
 #define FUTEX_ATOMIC_OP(op)						\
 static __always_inline int						\
 __futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval)		\
 {									\
-	return __llsc_futex_atomic_##op(oparg, uaddr, oval);		\
+	return __lsui_llsc_body(futex_atomic_##op, oparg, uaddr, oval);	\
 }
 
 FUTEX_ATOMIC_OP(add)
@@ -103,7 +263,7 @@ FUTEX_ATOMIC_OP(set)
 static __always_inline int
 __futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
 {
-	return __llsc_futex_cmpxchg(uaddr, oldval, newval, oval);
+	return __lsui_llsc_body(futex_cmpxchg, uaddr, oldval, newval, oval);
 }
 
 static inline int
diff --git a/arch/arm64/include/asm/lsui.h b/arch/arm64/include/asm/lsui.h
new file mode 100644
index 000000000000..8f0d81953eb6
--- /dev/null
+++ b/arch/arm64/include/asm/lsui.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_LSUI_H
+#define __ASM_LSUI_H
+
+#include <linux/compiler_types.h>
+#include <linux/stringify.h>
+#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
+#include <asm/cpucaps.h>
+
+#define __LSUI_PREAMBLE	".arch_extension lsui\n"
+
+#ifdef CONFIG_ARM64_LSUI
+
+#define __lsui_llsc_body(op, ...)					\
+({									\
+	alternative_has_cap_unlikely(ARM64_HAS_LSUI) ?			\
+		__lsui_##op(__VA_ARGS__) : __llsc_##op(__VA_ARGS__);	\
+})
+
+#else	/* CONFIG_ARM64_LSUI */
+
+#define __lsui_llsc_body(op, ...)	__llsc_##op(__VA_ARGS__)
+
+#endif	/* CONFIG_ARM64_LSUI */
+
+#endif	/* __ASM_LSUI_H */
-- 
LEVI:{C3F47F37-75D8-414A-A8BA-3980EC8A46D7}
Re: [PATCH v15 5/8] arm64: futex: support futex with FEAT_LSUI
Posted by Catalin Marinas 3 weeks, 5 days ago
On Fri, Feb 27, 2026 at 03:17:02PM +0000, Yeoreum Yun wrote:
> +#ifdef CONFIG_ARM64_LSUI
> +
> +/*
> + * FEAT_LSUI is supported since Armv9.6, where FEAT_PAN is mandatory.
> + * However, this assumption may not always hold:
> + *
> + *   - Some CPUs advertise FEAT_LSUI but lack FEAT_PAN.
> + *   - Virtualisation or ID register overrides may expose invalid
> + *     feature combinations.
> + *
> + * Rather than disabling FEAT_LSUI when FEAT_PAN is absent, wrap LSUI
> + * instructions with uaccess_ttbr0_enable()/disable() when
> + * ARM64_SW_TTBR0_PAN is enabled.
> + */

I'd just keep this comment in the commit log. Here you could simply say
that user access instructions don't require (hardware) PAN toggling. It
should be obvious why we use ttbr0 toggling like for other uaccess
routines.

> +#define LSUI_FUTEX_ATOMIC_OP(op, asm_op)				\
> +static __always_inline int						\
> +__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval)	\
> +{									\
> +	int ret = 0;							\
> +	int oldval;							\
> +									\
> +	uaccess_ttbr0_enable();						\
> +									\
> +	asm volatile("// __lsui_futex_atomic_" #op "\n"			\
> +	__LSUI_PREAMBLE							\
> +"1:	" #asm_op "al	%w3, %w2, %1\n"					\

As I mentioned on a previous patch, can we not use named operators here?

> +"2:\n"									\
> +	_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0)				\
> +	: "+r" (ret), "+Q" (*uaddr), "=r" (oldval)			\
> +	: "r" (oparg)							\
> +	: "memory");							\
> +									\
> +	uaccess_ttbr0_disable();					\
> +									\
> +	if (!ret)							\
> +		*oval = oldval;						\
> +	return ret;							\
> +}
> +
> +LSUI_FUTEX_ATOMIC_OP(add, ldtadd)
> +LSUI_FUTEX_ATOMIC_OP(or, ldtset)
> +LSUI_FUTEX_ATOMIC_OP(andnot, ldtclr)
> +LSUI_FUTEX_ATOMIC_OP(set, swpt)
> +
> +static __always_inline int
> +__lsui_cmpxchg64(u64 __user *uaddr, u64 *oldval, u64 newval)
> +{
> +	int ret = 0;
> +
> +	uaccess_ttbr0_enable();
> +
> +	asm volatile("// __lsui_cmpxchg64\n"
> +	__LSUI_PREAMBLE
> +"1:	casalt	%2, %3, %1\n"
> +"2:\n"
> +	_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0)
> +	: "+r" (ret), "+Q" (*uaddr), "+r" (*oldval)
> +	: "r" (newval)
> +	: "memory");
> +
> +	uaccess_ttbr0_disable();
> +
> +	return ret;

So here it returns EFAULT only if the check failed. The EAGAIN is only
possible in cmpxchg32. That's fine but I have more comments below.

> +}
> +
> +static __always_inline int
> +__lsui_cmpxchg32(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
> +{
> +	u64 __user *uaddr64;
> +	bool futex_pos, other_pos;
> +	int ret, i;
> +	u32 other, orig_other;
> +	union {
> +		u32 futex[2];
> +		u64 raw;
> +	} oval64, orig64, nval64;
> +
> +	uaddr64 = (u64 __user *) PTR_ALIGN_DOWN(uaddr, sizeof(u64));

Nit: we don't use space after the type cast.

> +	futex_pos = !IS_ALIGNED((unsigned long)uaddr, sizeof(u64));
> +	other_pos = !futex_pos;
> +
> +	oval64.futex[futex_pos] = oldval;
> +	ret = get_user(oval64.futex[other_pos], (u32 __user *)uaddr64 + other_pos);
> +	if (ret)
> +		return -EFAULT;
> +
> +	ret = -EAGAIN;
> +	for (i = 0; i < FUTEX_MAX_LOOPS; i++) {

I was wondering if we still need the FUTEX_MAX_LOOPS bound with LSUI. I
guess with CAS we can have some malicious user that keeps updating the
futex location or the adjacent one on another CPU. However, I think we'd
need to differentiate between futex_atomic_cmpxchg_inatomic() use and
the eor case.

> +		orig64.raw = nval64.raw = oval64.raw;
> +
> +		nval64.futex[futex_pos] = newval;

I'd keep orig64.raw = oval64.raw and set the nval64 separately (I find
it clearer, not sure the compiler cares much):

		nval64.futex[futex_pos] = newval;
		nval64.futex[other_pos] = oval64.futex[other_pos];

> +
> +		if (__lsui_cmpxchg64(uaddr64, &oval64.raw, nval64.raw))
> +			return -EFAULT;
> +
> +		oldval = oval64.futex[futex_pos];
> +		other = oval64.futex[other_pos];
> +		orig_other = orig64.futex[other_pos];
> +
> +		if (other == orig_other) {
> +			ret = 0;
> +			break;
> +		}

Is this check correct? What if the cmpxchg64 failed because futex_pos
was changed but other_pos remained the same, it will just report success
here. You need to compare the full 64-bit value to ensure the cmpxchg64
succeeded.

> +	}
> +
> +	if (!ret)
> +		*oval = oldval;
> +
> +	return ret;
> +}
> +
> +static __always_inline int
> +__lsui_futex_atomic_and(int oparg, u32 __user *uaddr, int *oval)
> +{
> +	/*
> +	 * Undo the bitwise negation applied to the oparg passed from
> +	 * arch_futex_atomic_op_inuser() with FUTEX_OP_ANDN.
> +	 */
> +	return __lsui_futex_atomic_andnot(~oparg, uaddr, oval);
> +}
> +
> +static __always_inline int
> +__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval)
> +{
> +	u32 oldval, newval, val;
> +	int ret, i;
> +
> +	if (get_user(oldval, uaddr))
> +		return -EFAULT;
> +
> +	/*
> +	 * there are no ldteor/stteor instructions...
> +	 */
> +	for (i = 0; i < FUTEX_MAX_LOOPS; i++) {
> +		newval = oldval ^ oparg;
> +
> +		ret = __lsui_cmpxchg32(uaddr, oldval, newval, &val);

Since we have a FUTEX_MAX_LOOPS here, do we need it in cmpxchg32 as
well?

For eor, we need a loop irrespective of whether futex_pos or other_pos
have changed. For cmpxchg, we need the loop only if other_pos has
changed and return -EAGAIN if futex_pos has changed since the caller
needs to update oldval and call again.

So try to differentiate these cases, maybe only keep the loop outside
cmpxchg32 (I haven't put much though into it).

> +		if (ret)
> +			return ret;
> +
> +		if (val == oldval) {
> +			*oval = val;
> +			return 0;
> +		}

I can see you are adding another check here for the actual value which
solves the other_pos comparison earlier but that's only for eor and not
the __lsui_futex_cmpxchg() case.

> +
> +		oldval = val;
> +	}
> +
> +	return -EAGAIN;
> +}
> +
> +static __always_inline int
> +__lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
> +{
> +	return __lsui_cmpxchg32(uaddr, oldval, newval, oval);
> +}
> +#endif	/* CONFIG_ARM64_LSUI */

-- 
Catalin
Re: [PATCH v15 5/8] arm64: futex: support futex with FEAT_LSUI
Posted by Yeoreum Yun 3 weeks, 4 days ago
Hi Catalin,

Thanks for your review :D

> On Fri, Feb 27, 2026 at 03:17:02PM +0000, Yeoreum Yun wrote:
> > +#ifdef CONFIG_ARM64_LSUI
> > +
> > +/*
> > + * FEAT_LSUI is supported since Armv9.6, where FEAT_PAN is mandatory.
> > + * However, this assumption may not always hold:
> > + *
> > + *   - Some CPUs advertise FEAT_LSUI but lack FEAT_PAN.
> > + *   - Virtualisation or ID register overrides may expose invalid
> > + *     feature combinations.
> > + *
> > + * Rather than disabling FEAT_LSUI when FEAT_PAN is absent, wrap LSUI
> > + * instructions with uaccess_ttbr0_enable()/disable() when
> > + * ARM64_SW_TTBR0_PAN is enabled.
> > + */
>
> I'd just keep this comment in the commit log. Here you could simply say
> that user access instructions don't require (hardware) PAN toggling. It
> should be obvious why we use ttbr0 toggling like for other uaccess
> routines.

Okay. I'll move this into commit log. Thanks!

>
> > +#define LSUI_FUTEX_ATOMIC_OP(op, asm_op)				\
> > +static __always_inline int						\
> > +__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval)	\
> > +{									\
> > +	int ret = 0;							\
> > +	int oldval;							\
> > +									\
> > +	uaccess_ttbr0_enable();						\
> > +									\
> > +	asm volatile("// __lsui_futex_atomic_" #op "\n"			\
> > +	__LSUI_PREAMBLE							\
> > +"1:	" #asm_op "al	%w3, %w2, %1\n"					\
>
> As I mentioned on a previous patch, can we not use named operators here?

I missed your message before I sent to v16, But v16 already make them
with named operands. Thanks!

[...]

> > +}
> > +
> > +static __always_inline int
> > +__lsui_cmpxchg32(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
> > +{
> > +	u64 __user *uaddr64;
> > +	bool futex_pos, other_pos;
> > +	int ret, i;
> > +	u32 other, orig_other;
> > +	union {
> > +		u32 futex[2];
> > +		u64 raw;
> > +	} oval64, orig64, nval64;
> > +
> > +	uaddr64 = (u64 __user *) PTR_ALIGN_DOWN(uaddr, sizeof(u64));
>
> Nit: we don't use space after the type cast.

Oops. I'll remove space.

>
> > +	futex_pos = !IS_ALIGNED((unsigned long)uaddr, sizeof(u64));
> > +	other_pos = !futex_pos;
> > +
> > +	oval64.futex[futex_pos] = oldval;
> > +	ret = get_user(oval64.futex[other_pos], (u32 __user *)uaddr64 + other_pos);
> > +	if (ret)
> > +		return -EFAULT;
> > +
> > +	ret = -EAGAIN;
> > +	for (i = 0; i < FUTEX_MAX_LOOPS; i++) {
>
> I was wondering if we still need the FUTEX_MAX_LOOPS bound with LSUI. I
> guess with CAS we can have some malicious user that keeps updating the
> futex location or the adjacent one on another CPU. However, I think we'd
> need to differentiate between futex_atomic_cmpxchg_inatomic() use and
> the eor case.

Hmm. I'll comment below together in eor..

>
> > +		orig64.raw = nval64.raw = oval64.raw;
> > +
> > +		nval64.futex[futex_pos] = newval;
>
> I'd keep orig64.raw = oval64.raw and set the nval64 separately (I find
> it clearer, not sure the compiler cares much):
>
> 		nval64.futex[futex_pos] = newval;
> 		nval64.futex[other_pos] = oval64.futex[other_pos];
>
> > +
> > +		if (__lsui_cmpxchg64(uaddr64, &oval64.raw, nval64.raw))
> > +			return -EFAULT;
> > +
> > +		oldval = oval64.futex[futex_pos];
> > +		other = oval64.futex[other_pos];
> > +		orig_other = orig64.futex[other_pos];
> > +
> > +		if (other == orig_other) {
> > +			ret = 0;
> > +			break;
> > +		}
>
> Is this check correct? What if the cmpxchg64 failed because futex_pos
> was changed but other_pos remained the same, it will just report success
> here. You need to compare the full 64-bit value to ensure the cmpxchg64
> succeeded.

This is not matter since "futex_cmpxchg_value_locked()" checks
the "curval" and "oldval" IOW, though it returns success,
caller of this function always checks the "curval" and "oldval"
and when it's different, It handles to change return as -EAGAIN.

>
> > +	}
> > +
> > +	if (!ret)
> > +		*oval = oldval;
> > +
> > +	return ret;
> > +}
> > +
> > +static __always_inline int
> > +__lsui_futex_atomic_and(int oparg, u32 __user *uaddr, int *oval)
> > +{
> > +	/*
> > +	 * Undo the bitwise negation applied to the oparg passed from
> > +	 * arch_futex_atomic_op_inuser() with FUTEX_OP_ANDN.
> > +	 */
> > +	return __lsui_futex_atomic_andnot(~oparg, uaddr, oval);
> > +}
> > +
> > +static __always_inline int
> > +__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval)
> > +{
> > +	u32 oldval, newval, val;
> > +	int ret, i;
> > +
> > +	if (get_user(oldval, uaddr))
> > +		return -EFAULT;
> > +
> > +	/*
> > +	 * there are no ldteor/stteor instructions...
> > +	 */
> > +	for (i = 0; i < FUTEX_MAX_LOOPS; i++) {
> > +		newval = oldval ^ oparg;
> > +
> > +		ret = __lsui_cmpxchg32(uaddr, oldval, newval, &val);
>
> Since we have a FUTEX_MAX_LOOPS here, do we need it in cmpxchg32 as
> well?
>
> For eor, we need a loop irrespective of whether futex_pos or other_pos
> have changed. For cmpxchg, we need the loop only if other_pos has
> changed and return -EAGAIN if futex_pos has changed since the caller
> needs to update oldval and call again.
>
> So try to differentiate these cases, maybe only keep the loop outside
> cmpxchg32 (I haven't put much though into it).

I think we can remove loops on __lsui_cmpxchg32() and return -EAGAIN
when other_pos is different. the __lsui_cmpxchg32() will be called
"futex_cmpxchg_value_locked()" and as I said, this always checks
whether curval & oldval when it successed.

But in "eor" when it receive "-EAGAIN" from __lsui_cmxchg32()
we can simply continue the loop.

>
> > +		if (ret)
> > +			return ret;
> > +
> > +		if (val == oldval) {
> > +			*oval = val;
> > +			return 0;
> > +		}
>
> I can see you are adding another check here for the actual value which
> solves the other_pos comparison earlier but that's only for eor and not
> the __lsui_futex_cmpxchg() case.

As I mention above, though it success, caller of futex who calls
__lsui_futex_cmpxchg() via "futex_cmpxchg_value_locked()" checks
curval and oldval is the same even on the success.

So it's not a matter.

>
> > +
> > +		oldval = val;
> > +	}
> > +
> > +	return -EAGAIN;
> > +}
> > +
> > +static __always_inline int
> > +__lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
> > +{
> > +	return __lsui_cmpxchg32(uaddr, oldval, newval, oval);
> > +}
> > +#endif	/* CONFIG_ARM64_LSUI */
>
> --
> Catalin

Thanks!

--
Sincerely,
Yeoreum Yun
Re: [PATCH v15 5/8] arm64: futex: support futex with FEAT_LSUI
Posted by Catalin Marinas 3 weeks, 4 days ago
On Fri, Mar 13, 2026 at 09:23:58AM +0000, Yeoreum Yun wrote:
> > On Fri, Feb 27, 2026 at 03:17:02PM +0000, Yeoreum Yun wrote:
> > > +
> > > +		if (__lsui_cmpxchg64(uaddr64, &oval64.raw, nval64.raw))
> > > +			return -EFAULT;
> > > +
> > > +		oldval = oval64.futex[futex_pos];
> > > +		other = oval64.futex[other_pos];
> > > +		orig_other = orig64.futex[other_pos];
> > > +
> > > +		if (other == orig_other) {
> > > +			ret = 0;
> > > +			break;
> > > +		}
> >
> > Is this check correct? What if the cmpxchg64 failed because futex_pos
> > was changed but other_pos remained the same, it will just report success
> > here. You need to compare the full 64-bit value to ensure the cmpxchg64
> > succeeded.
> 
> This is not matter since "futex_cmpxchg_value_locked()" checks
> the "curval" and "oldval" IOW, though it returns success,
> caller of this function always checks the "curval" and "oldval"
> and when it's different, It handles to change return as -EAGAIN.

Ah, ok, it makes sense (I did not check the callers).

> > > +	}
> > > +
> > > +	if (!ret)
> > > +		*oval = oldval;
> > > +
> > > +	return ret;
> > > +}
> > > +
> > > +static __always_inline int
> > > +__lsui_futex_atomic_and(int oparg, u32 __user *uaddr, int *oval)
> > > +{
> > > +	/*
> > > +	 * Undo the bitwise negation applied to the oparg passed from
> > > +	 * arch_futex_atomic_op_inuser() with FUTEX_OP_ANDN.
> > > +	 */
> > > +	return __lsui_futex_atomic_andnot(~oparg, uaddr, oval);
> > > +}
> > > +
> > > +static __always_inline int
> > > +__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval)
> > > +{
> > > +	u32 oldval, newval, val;
> > > +	int ret, i;
> > > +
> > > +	if (get_user(oldval, uaddr))
> > > +		return -EFAULT;
> > > +
> > > +	/*
> > > +	 * there are no ldteor/stteor instructions...
> > > +	 */
> > > +	for (i = 0; i < FUTEX_MAX_LOOPS; i++) {
> > > +		newval = oldval ^ oparg;
> > > +
> > > +		ret = __lsui_cmpxchg32(uaddr, oldval, newval, &val);
> >
> > Since we have a FUTEX_MAX_LOOPS here, do we need it in cmpxchg32 as
> > well?
> >
> > For eor, we need a loop irrespective of whether futex_pos or other_pos
> > have changed. For cmpxchg, we need the loop only if other_pos has
> > changed and return -EAGAIN if futex_pos has changed since the caller
> > needs to update oldval and call again.
> >
> > So try to differentiate these cases, maybe only keep the loop outside
> > cmpxchg32 (I haven't put much though into it).
> 
> I think we can remove loops on __lsui_cmpxchg32() and return -EAGAIN
> when other_pos is different. the __lsui_cmpxchg32() will be called
> "futex_cmpxchg_value_locked()" and as I said, this always checks
> whether curval & oldval when it successed.

Yes, I think for the futex_cmpxchg_value_locked(), the bounded loop
doesn't matter since the core would invoke it back on -EAGAIN. It's nice
not to fail if the actual futex did not change but in practice it
doesn't make any difference and I'd rather keep the code simple.

> But in "eor" when it receive "-EAGAIN" from __lsui_cmxchg32()
> we can simply continue the loop.

Yes, for eor we need the bounded loop. Only return -EAGAIN to the user
if we finished the loop and either __lsui_cmpxchg32() returned -EAGAIN
or the updated on futex_pos failed.

Thanks.

-- 
Catalin
Re: [PATCH v15 5/8] arm64: futex: support futex with FEAT_LSUI
Posted by Yeoreum Yun 3 weeks, 4 days ago
Hi Catalin,

> On Fri, Mar 13, 2026 at 09:23:58AM +0000, Yeoreum Yun wrote:
> > > On Fri, Feb 27, 2026 at 03:17:02PM +0000, Yeoreum Yun wrote:
> > > > +
> > > > +		if (__lsui_cmpxchg64(uaddr64, &oval64.raw, nval64.raw))
> > > > +			return -EFAULT;
> > > > +
> > > > +		oldval = oval64.futex[futex_pos];
> > > > +		other = oval64.futex[other_pos];
> > > > +		orig_other = orig64.futex[other_pos];
> > > > +
> > > > +		if (other == orig_other) {
> > > > +			ret = 0;
> > > > +			break;
> > > > +		}
> > >
> > > Is this check correct? What if the cmpxchg64 failed because futex_pos
> > > was changed but other_pos remained the same, it will just report success
> > > here. You need to compare the full 64-bit value to ensure the cmpxchg64
> > > succeeded.
> >
> > This is not matter since "futex_cmpxchg_value_locked()" checks
> > the "curval" and "oldval" IOW, though it returns success,
> > caller of this function always checks the "curval" and "oldval"
> > and when it's different, It handles to change return as -EAGAIN.
>
> Ah, ok, it makes sense (I did not check the callers).
>
> > > > +	}
> > > > +
> > > > +	if (!ret)
> > > > +		*oval = oldval;
> > > > +
> > > > +	return ret;
> > > > +}
> > > > +
> > > > +static __always_inline int
> > > > +__lsui_futex_atomic_and(int oparg, u32 __user *uaddr, int *oval)
> > > > +{
> > > > +	/*
> > > > +	 * Undo the bitwise negation applied to the oparg passed from
> > > > +	 * arch_futex_atomic_op_inuser() with FUTEX_OP_ANDN.
> > > > +	 */
> > > > +	return __lsui_futex_atomic_andnot(~oparg, uaddr, oval);
> > > > +}
> > > > +
> > > > +static __always_inline int
> > > > +__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval)
> > > > +{
> > > > +	u32 oldval, newval, val;
> > > > +	int ret, i;
> > > > +
> > > > +	if (get_user(oldval, uaddr))
> > > > +		return -EFAULT;
> > > > +
> > > > +	/*
> > > > +	 * there are no ldteor/stteor instructions...
> > > > +	 */
> > > > +	for (i = 0; i < FUTEX_MAX_LOOPS; i++) {
> > > > +		newval = oldval ^ oparg;
> > > > +
> > > > +		ret = __lsui_cmpxchg32(uaddr, oldval, newval, &val);
> > >
> > > Since we have a FUTEX_MAX_LOOPS here, do we need it in cmpxchg32 as
> > > well?
> > >
> > > For eor, we need a loop irrespective of whether futex_pos or other_pos
> > > have changed. For cmpxchg, we need the loop only if other_pos has
> > > changed and return -EAGAIN if futex_pos has changed since the caller
> > > needs to update oldval and call again.
> > >
> > > So try to differentiate these cases, maybe only keep the loop outside
> > > cmpxchg32 (I haven't put much though into it).
> >
> > I think we can remove loops on __lsui_cmpxchg32() and return -EAGAIN
> > when other_pos is different. the __lsui_cmpxchg32() will be called
> > "futex_cmpxchg_value_locked()" and as I said, this always checks
> > whether curval & oldval when it successed.
>
> Yes, I think for the futex_cmpxchg_value_locked(), the bounded loop
> doesn't matter since the core would invoke it back on -EAGAIN. It's nice
> not to fail if the actual futex did not change but in practice it
> doesn't make any difference and I'd rather keep the code simple.
>
> > But in "eor" when it receive "-EAGAIN" from __lsui_cmxchg32()
> > we can simply continue the loop.
>
> Yes, for eor we need the bounded loop. Only return -EAGAIN to the user
> if we finished the loop and either __lsui_cmpxchg32() returned -EAGAIN
> or the updated on futex_pos failed.
>
> Thanks.
>

Thanks for confirmation, I've changed them as we discussed.
But let me sent it on v7.0-rc4 based with v17.
(Unfortunately, I sent v16 yesterday with missing of those review in
this thread).

--
Sincerely,
Yeoreum Yun
Re: [PATCH v15 5/8] arm64: futex: support futex with FEAT_LSUI
Posted by Catalin Marinas 3 weeks, 4 days ago
On Fri, Mar 13, 2026 at 02:56:06PM +0000, Yeoreum Yun wrote:
> Thanks for confirmation, I've changed them as we discussed.
> But let me sent it on v7.0-rc4 based with v17.

No need to rebase to -rc4 unless there are some features you depend on.
The arm64 for-next/core branch is based on -rc3.

> (Unfortunately, I sent v16 yesterday with missing of those review in
> this thread).

No worries.

Thanks.

-- 
Catalin
Re: [PATCH v15 5/8] arm64: futex: support futex with FEAT_LSUI
Posted by Yeoreum Yun 3 weeks, 4 days ago
> On Fri, Mar 13, 2026 at 02:56:06PM +0000, Yeoreum Yun wrote:
> > Thanks for confirmation, I've changed them as we discussed.
> > But let me sent it on v7.0-rc4 based with v17.
>
> No need to rebase to -rc4 unless there are some features you depend on.
> The arm64 for-next/core branch is based on -rc3.

Okay. then I'll send v17 based on -rc3.

Thanks!

--
Sincerely,
Yeoreum Yun