[PATCH v6 5/5] arm64: futex: support futex with FEAT_LSUI

Yeoreum Yun posted 5 patches 1 month, 3 weeks ago
There is a newer version of this series
[PATCH v6 5/5] arm64: futex: support futex with FEAT_LSUI
Posted by Yeoreum Yun 1 month, 3 weeks ago
Current futex atomic operations are implemented with ll/sc instructions
and clearing PSTATE.PAN.

Since Armv9.6, FEAT_LSUI supplies not only load/store instructions but
also atomic operation for user memory access in kernel it doesn't need
to clear PSTATE.PAN bit anymore.

With theses instructions some of futex atomic operations don't need to
be implmented with ldxr/stlxr pair instead can be implmented with
one atomic operation supplied by FEAT_LSUI.

However, some of futex atomic operations still need to use ll/sc way
via ldtxr/stltxr supplied by FEAT_LSUI since there is no correspondant
atomic instruction or doesn't support word size operation.
(i.e) eor, cas{mb}t

But It's good to work without clearing PSTATE.PAN bit.

Signed-off-by: Yeoreum Yun <yeoreum.yun@arm.com>
---
 arch/arm64/include/asm/futex.h | 142 ++++++++++++++++++++++++++++++++-
 1 file changed, 141 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h
index fdec4f3f2b15..38fc98f4af46 100644
--- a/arch/arm64/include/asm/futex.h
+++ b/arch/arm64/include/asm/futex.h
@@ -9,6 +9,8 @@
 #include <linux/uaccess.h>
 #include <linux/stringify.h>
 
+#include <asm/alternative.h>
+#include <asm/alternative-macros.h>
 #include <asm/errno.h>
 
 #define LLSC_MAX_LOOPS	128 /* What's the largest number you can think of? */
@@ -115,11 +117,149 @@ __llsc_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
 	return ret;
 }
 
+#ifdef CONFIG_AS_HAS_LSUI
+
+#define __LSUI_PREAMBLE	".arch_extension lsui\n"
+
+#define LSUI_FUTEX_ATOMIC_OP(op, asm_op, mb)				\
+static __always_inline int						\
+__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval)	\
+{									\
+	int ret = 0;							\
+	int val;							\
+									\
+	mte_enable_tco();						\
+	uaccess_ttbr0_enable();						\
+									\
+	asm volatile("// __lsui_futex_atomic_" #op "\n"			\
+	__LSUI_PREAMBLE							\
+	"1:	" #asm_op #mb "	%w3, %w2, %1\n"				\
+	"2:\n"								\
+	_ASM_EXTABLE_UACCESS_ERR(1b, 2b, %w0)				\
+	: "+r" (ret), "+Q" (*uaddr), "=r" (val)				\
+	: "r" (oparg)							\
+	: "memory");							\
+									\
+	mte_disable_tco();						\
+	uaccess_ttbr0_disable();					\
+									\
+	if (!ret)							\
+		*oval = val;						\
+									\
+	return ret;							\
+}
+
+LSUI_FUTEX_ATOMIC_OP(add, ldtadd, al)
+LSUI_FUTEX_ATOMIC_OP(or, ldtset, al)
+LSUI_FUTEX_ATOMIC_OP(andnot, ldtclr, al)
+LSUI_FUTEX_ATOMIC_OP(set, swpt, al)
+
+static __always_inline int
+__lsui_futex_atomic_and(int oparg, u32 __user *uaddr, int *oval)
+{
+	return __lsui_futex_atomic_andnot(~oparg, uaddr, oval);
+}
+
+static __always_inline int
+__lsui_futex_atomic_eor(int oparg, u32 __user *uaddr, int *oval)
+{
+	unsigned int loops = LLSC_MAX_LOOPS;
+	int ret, val, tmp;
+
+	mte_enable_tco();
+	uaccess_ttbr0_enable();
+
+	/*
+	 * there are no ldteor/stteor instructions...
+	 */
+	asm volatile("// __lsui_futex_atomic_eor\n"
+	__LSUI_PREAMBLE
+	"	prfm	pstl1strm, %2\n"
+	"1:	ldtxr	%w1, %2\n"
+	"	eor	%w3, %w1, %w5\n"
+	"2:	stltxr	%w0, %w3, %2\n"
+	"	cbz	%w0, 3f\n"
+	"	sub	%w4, %w4, %w0\n"
+	"	cbnz	%w4, 1b\n"
+	"	mov	%w0, %w6\n"
+	"3:\n"
+	"	dmb	ish\n"
+	_ASM_EXTABLE_UACCESS_ERR(1b, 3b, %w0)
+	_ASM_EXTABLE_UACCESS_ERR(2b, 3b, %w0)
+	: "=&r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp),
+	  "+r" (loops)
+	: "r" (oparg), "Ir" (-EAGAIN)
+	: "memory");
+
+	mte_disable_tco();
+	uaccess_ttbr0_disable();
+
+	if (!ret)
+		*oval = val;
+
+	return ret;
+}
+
+static __always_inline int
+__lsui_futex_cmpxchg(u32 __user *uaddr, u32 oldval, u32 newval, u32 *oval)
+{
+	int ret = 0;
+	unsigned int loops = LLSC_MAX_LOOPS;
+	u32 val, tmp;
+
+	mte_enable_tco();
+	uaccess_ttbr0_enable();
+
+	/*
+	 * cas{al}t doesn't support word size...
+	 */
+	asm volatile("//__lsui_futex_cmpxchg\n"
+	__LSUI_PREAMBLE
+	"	prfm	pstl1strm, %2\n"
+	"1:	ldtxr	%w1, %2\n"
+	"	eor	%w3, %w1, %w5\n"
+	"	cbnz	%w3, 4f\n"
+	"2:	stltxr	%w3, %w6, %2\n"
+	"	cbz	%w3, 3f\n"
+	"	sub	%w4, %w4, %w3\n"
+	"	cbnz	%w4, 1b\n"
+	"	mov	%w0, %w7\n"
+	"3:\n"
+	"	dmb	ish\n"
+	"4:\n"
+	_ASM_EXTABLE_UACCESS_ERR(1b, 4b, %w0)
+	_ASM_EXTABLE_UACCESS_ERR(2b, 4b, %w0)
+	: "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp), "+r" (loops)
+	: "r" (oldval), "r" (newval), "Ir" (-EAGAIN)
+	: "memory");
+
+	mte_disable_tco();
+	uaccess_ttbr0_disable();
+
+	if (!ret)
+		*oval = oldval;
+
+	return ret;
+}
+
+#define __lsui_llsc_body(op, ...)					\
+({									\
+	alternative_has_cap_likely(ARM64_HAS_LSUI) ?			\
+		__lsui_##op(__VA_ARGS__) : __llsc_##op(__VA_ARGS__);	\
+})
+
+#else	/* CONFIG_AS_HAS_LSUI */
+
+#define __lsui_llsc_body(op, ...)	__llsc_##op(__VA_ARGS__)
+
+#endif	/* CONFIG_AS_HAS_LSUI */
+
+
 #define FUTEX_ATOMIC_OP(op)						\
 static __always_inline int						\
 __futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval)		\
 {									\
-	return __llsc_futex_atomic_##op(oparg, uaddr, oval);		\
+	return __lsui_llsc_body(futex_atomic_##op, oparg, uaddr, oval);	\
 }
 
 FUTEX_ATOMIC_OP(add)
-- 
LEVI:{C3F47F37-75D8-414A-A8BA-3980EC8A46D7}
Re: [PATCH v6 5/5] arm64: futex: support futex with FEAT_LSUI
Posted by Catalin Marinas 1 month, 2 weeks ago
On Mon, Aug 11, 2025 at 05:36:35PM +0100, Yeoreum Yun wrote:
> +#ifdef CONFIG_AS_HAS_LSUI
> +
> +#define __LSUI_PREAMBLE	".arch_extension lsui\n"
> +
> +#define LSUI_FUTEX_ATOMIC_OP(op, asm_op, mb)				\
> +static __always_inline int						\
> +__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval)	\
> +{									\
> +	int ret = 0;							\
> +	int val;							\
> +									\
> +	mte_enable_tco();						\

The reason uaccess_disable_privileged() sets the MTE TCO (tag check
override) is because the user and the kernel may have different settings
for tag checking. If we use the user instructions provided by FEAT_LSUI,
we leave the MTE checking as is.

The same comment for all the other functions here.

-- 
Catalin
Re: [PATCH v6 5/5] arm64: futex: support futex with FEAT_LSUI
Posted by Yeoreum Yun 1 month, 2 weeks ago
Hi Catalin,
> On Mon, Aug 11, 2025 at 05:36:35PM +0100, Yeoreum Yun wrote:
> > +#ifdef CONFIG_AS_HAS_LSUI
> > +
> > +#define __LSUI_PREAMBLE	".arch_extension lsui\n"
> > +
> > +#define LSUI_FUTEX_ATOMIC_OP(op, asm_op, mb)				\
> > +static __always_inline int						\
> > +__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval)	\
> > +{									\
> > +	int ret = 0;							\
> > +	int val;							\
> > +									\
> > +	mte_enable_tco();						\
>

> The reason uaccess_disable_privileged() sets the MTE TCO (tag check
> override) is because the user and the kernel may have different settings
> for tag checking. If we use the user instructions provided by FEAT_LSUI,
> we leave the MTE checking as is.
>
> The same comment for all the other functions here.

You're right. Thanks for catching this :)

--
Sincerely,
Yeoreum Yun
Re: [PATCH v6 5/5] arm64: futex: support futex with FEAT_LSUI
Posted by Yeoreum Yun 1 month, 2 weeks ago
Hi Catalin,

[...]
> > > +#ifdef CONFIG_AS_HAS_LSUI
> > > +
> > > +#define __LSUI_PREAMBLE	".arch_extension lsui\n"
> > > +
> > > +#define LSUI_FUTEX_ATOMIC_OP(op, asm_op, mb)				\
> > > +static __always_inline int						\
> > > +__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval)	\
> > > +{									\
> > > +	int ret = 0;							\
> > > +	int val;							\
> > > +									\
> > > +	mte_enable_tco();						\
> >
>
> > The reason uaccess_disable_privileged() sets the MTE TCO (tag check
> > override) is because the user and the kernel may have different settings
> > for tag checking. If we use the user instructions provided by FEAT_LSUI,
> > we leave the MTE checking as is.
> >
> > The same comment for all the other functions here.
>
> You're right. Thanks for catching this :)

But one bikeshedding question.
why we need to care about the different settings for tag checking when
we use uaccess_disable_privileged()?

IIUC, the reason we uses to uaccess_disaable_privileged() to access
user memory with copy_from/to_user() and etc.
But, although tag check fault happens in kernel side,
It seems to be handled by fixup code if user address is wrong.

Am I missing something?

> --
> Sincerely,
> Yeoreum Yun
>

--
Sincerely,
Yeoreum Yun
Re: [PATCH v6 5/5] arm64: futex: support futex with FEAT_LSUI
Posted by Catalin Marinas 1 month, 2 weeks ago
On Sat, Aug 16, 2025 at 03:57:49PM +0100, Yeoreum Yun wrote:
> > > > +#ifdef CONFIG_AS_HAS_LSUI
> > > > +
> > > > +#define __LSUI_PREAMBLE	".arch_extension lsui\n"
> > > > +
> > > > +#define LSUI_FUTEX_ATOMIC_OP(op, asm_op, mb)				\
> > > > +static __always_inline int						\
> > > > +__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval)	\
> > > > +{									\
> > > > +	int ret = 0;							\
> > > > +	int val;							\
> > > > +									\
> > > > +	mte_enable_tco();						\
> > >
> >
> > > The reason uaccess_disable_privileged() sets the MTE TCO (tag check
> > > override) is because the user and the kernel may have different settings
> > > for tag checking. If we use the user instructions provided by FEAT_LSUI,
> > > we leave the MTE checking as is.
> > >
> > > The same comment for all the other functions here.
> >
> > You're right. Thanks for catching this :)
> 
> But one bikeshedding question.
> why we need to care about the different settings for tag checking when
> we use uaccess_disable_privileged()?

Because, for example, the user may not be interested in any tag check
faults (has checking disabled) but the kernel uses KASAN with
synchronous tag check faults. If it uses the privileged instructions as
in the futex API, it either won't make progress or report errors to the
user which it does not expect.

> IIUC, the reason we uses to uaccess_disaable_privileged() to access
> user memory with copy_from/to_user() and etc.

We don't use uaccess_disable_privileged() with copy_from_user() since
those use the unprivileged instructions already.

> But, although tag check fault happens in kernel side,
> It seems to be handled by fixup code if user address is wrong.

The user may know it is wrong and not care (e.g. one wants to keep using
a buggy application).

-- 
Catalin
Re: [PATCH v6 5/5] arm64: futex: support futex with FEAT_LSUI
Posted by Yeoreum Yun 1 month, 2 weeks ago
Hi Catalin,

> On Sat, Aug 16, 2025 at 03:57:49PM +0100, Yeoreum Yun wrote:
> > > > > +#ifdef CONFIG_AS_HAS_LSUI
> > > > > +
> > > > > +#define __LSUI_PREAMBLE	".arch_extension lsui\n"
> > > > > +
> > > > > +#define LSUI_FUTEX_ATOMIC_OP(op, asm_op, mb)				\
> > > > > +static __always_inline int						\
> > > > > +__lsui_futex_atomic_##op(int oparg, u32 __user *uaddr, int *oval)	\
> > > > > +{									\
> > > > > +	int ret = 0;							\
> > > > > +	int val;							\
> > > > > +									\
> > > > > +	mte_enable_tco();						\
> > > >
> > >
> > > > The reason uaccess_disable_privileged() sets the MTE TCO (tag check
> > > > override) is because the user and the kernel may have different settings
> > > > for tag checking. If we use the user instructions provided by FEAT_LSUI,
> > > > we leave the MTE checking as is.
> > > >
> > > > The same comment for all the other functions here.
> > >
> > > You're right. Thanks for catching this :)
> >
> > But one bikeshedding question.
> > why we need to care about the different settings for tag checking when
> > we use uaccess_disable_privileged()?
>
> Because, for example, the user may not be interested in any tag check
> faults (has checking disabled) but the kernel uses KASAN with
> synchronous tag check faults. If it uses the privileged instructions as
> in the futex API, it either won't make progress or report errors to the
> user which it does not expect.
>
> > IIUC, the reason we uses to uaccess_disaable_privileged() to access
> > user memory with copy_from/to_user() and etc.
>
> We don't use uaccess_disable_privileged() with copy_from_user() since
> those use the unprivileged instructions already.

Thanks for your explaination :)

>
> > But, although tag check fault happens in kernel side,
> > It seems to be handled by fixup code if user address is wrong.
>
> The user may know it is wrong and not care (e.g. one wants to keep using
> a buggy application).

Then Does this example -- ignoring wrong and keep using a buggy
application shows us that we need to enable TCO when
we runs the LSUI instruction?

AFAIK, LSUI instruction also check memory tag -- i.e) ldtadd.
if passed user address which has unmatched tag and if user isn't
interested in tah check, It can meet the unexpected report from KASAN.

Am I missing something?

--
Sincerely,
Yeoreum Yun
Re: [PATCH v6 5/5] arm64: futex: support futex with FEAT_LSUI
Posted by Catalin Marinas 1 month, 2 weeks ago
On Mon, Aug 18, 2025 at 08:53:57PM +0100, Yeoreum Yun wrote:
> > On Sat, Aug 16, 2025 at 03:57:49PM +0100, Yeoreum Yun wrote:
> > > why we need to care about the different settings for tag checking when
> > > we use uaccess_disable_privileged()?
[...]
> > > But, although tag check fault happens in kernel side,
> > > It seems to be handled by fixup code if user address is wrong.
> >
> > The user may know it is wrong and not care (e.g. one wants to keep using
> > a buggy application).
> 
> Then Does this example -- ignoring wrong and keep using a buggy
> application shows us that we need to enable TCO when
> we runs the LSUI instruction?
> 
> AFAIK, LSUI instruction also check memory tag -- i.e) ldtadd.
> if passed user address which has unmatched tag and if user isn't
> interested in tah check, It can meet the unexpected report from KASAN.

That's a valid point w.r.t. PSTATE.TCO that applies to copy_to/from_user
as well. I don't think we documented it but we don't expect the user
PSTATE.TCO state to be taken into account while doing uaccess from the
kernel. We do, however, expect SCTLR_EL1.TCF0 to be honoured and that's
what the user normally tweaks via a prctl(). The TCO is meant to
disable tag checking briefly when TCF enabled the tag check faults.

-- 
Catalin
Re: [PATCH v6 5/5] arm64: futex: support futex with FEAT_LSUI
Posted by Yeoreum Yun 1 month, 2 weeks ago
On Tue, Aug 19, 2025 at 09:38:54AM +0100, Catalin Marinas wrote:
> On Mon, Aug 18, 2025 at 08:53:57PM +0100, Yeoreum Yun wrote:
> > > On Sat, Aug 16, 2025 at 03:57:49PM +0100, Yeoreum Yun wrote:
> > > > why we need to care about the different settings for tag checking when
> > > > we use uaccess_disable_privileged()?
> [...]
> > > > But, although tag check fault happens in kernel side,
> > > > It seems to be handled by fixup code if user address is wrong.
> > >
> > > The user may know it is wrong and not care (e.g. one wants to keep using
> > > a buggy application).
> >
> > Then Does this example -- ignoring wrong and keep using a buggy
> > application shows us that we need to enable TCO when
> > we runs the LSUI instruction?
> >
> > AFAIK, LSUI instruction also check memory tag -- i.e) ldtadd.
> > if passed user address which has unmatched tag and if user isn't
> > interested in tah check, It can meet the unexpected report from KASAN.
>
> That's a valid point w.r.t. PSTATE.TCO that applies to copy_to/from_user
> as well. I don't think we documented it but we don't expect the user
> PSTATE.TCO state to be taken into account while doing uaccess from the
> kernel. We do, however, expect SCTLR_EL1.TCF0 to be honoured and that's
> what the user normally tweaks via a prctl(). The TCO is meant to
> disable tag checking briefly when TCF enabled the tag check faults.

So, IMHO, as copy_to/from_user (ldt/sttr) enable tco before it operates,
I think futex using LSUI should enable TCO bit
before it calls LSUI instruction.
Otherwise, this sounds have a inconsistency of allowing TCF according to
SCTLR_EL1.TCF (not 0)'s configuration while kernel accescss user memory.

Am I on right way?

Thanks.

> --
> Catalin

--
Sincerely,
Yeoreum Yun
Re: [PATCH v6 5/5] arm64: futex: support futex with FEAT_LSUI
Posted by Catalin Marinas 1 month, 2 weeks ago
On Tue, Aug 19, 2025 at 10:11:02AM +0100, Yeoreum Yun wrote:
> On Tue, Aug 19, 2025 at 09:38:54AM +0100, Catalin Marinas wrote:
> > On Mon, Aug 18, 2025 at 08:53:57PM +0100, Yeoreum Yun wrote:
> > > > On Sat, Aug 16, 2025 at 03:57:49PM +0100, Yeoreum Yun wrote:
> > > > > why we need to care about the different settings for tag checking when
> > > > > we use uaccess_disable_privileged()?
> > [...]
> > > > > But, although tag check fault happens in kernel side,
> > > > > It seems to be handled by fixup code if user address is wrong.
> > > >
> > > > The user may know it is wrong and not care (e.g. one wants to keep using
> > > > a buggy application).
> > >
> > > Then Does this example -- ignoring wrong and keep using a buggy
> > > application shows us that we need to enable TCO when
> > > we runs the LSUI instruction?
> > >
> > > AFAIK, LSUI instruction also check memory tag -- i.e) ldtadd.
> > > if passed user address which has unmatched tag and if user isn't
> > > interested in tah check, It can meet the unexpected report from KASAN.
> >
> > That's a valid point w.r.t. PSTATE.TCO that applies to copy_to/from_user
> > as well. I don't think we documented it but we don't expect the user
> > PSTATE.TCO state to be taken into account while doing uaccess from the
> > kernel. We do, however, expect SCTLR_EL1.TCF0 to be honoured and that's
> > what the user normally tweaks via a prctl(). The TCO is meant to
> > disable tag checking briefly when TCF enabled the tag check faults.
> 
> So, IMHO, as copy_to/from_user (ldt/sttr) enable tco before it operates,

They don't enable TCO.

-- 
Catalin
Re: [PATCH v6 5/5] arm64: futex: support futex with FEAT_LSUI
Posted by Yeoreum Yun 1 month, 2 weeks ago
> > > > > > why we need to care about the different settings for tag checking when
> > > > > > we use uaccess_disable_privileged()?
> > > [...]
> > > > > > But, although tag check fault happens in kernel side,
> > > > > > It seems to be handled by fixup code if user address is wrong.
> > > > >
> > > > > The user may know it is wrong and not care (e.g. one wants to keep using
> > > > > a buggy application).
> > > >
> > > > Then Does this example -- ignoring wrong and keep using a buggy
> > > > application shows us that we need to enable TCO when
> > > > we runs the LSUI instruction?
> > > >
> > > > AFAIK, LSUI instruction also check memory tag -- i.e) ldtadd.
> > > > if passed user address which has unmatched tag and if user isn't
> > > > interested in tah check, It can meet the unexpected report from KASAN.
> > >
> > > That's a valid point w.r.t. PSTATE.TCO that applies to copy_to/from_user
> > > as well. I don't think we documented it but we don't expect the user
> > > PSTATE.TCO state to be taken into account while doing uaccess from the
> > > kernel. We do, however, expect SCTLR_EL1.TCF0 to be honoured and that's
> > > what the user normally tweaks via a prctl(). The TCO is meant to
> > > disable tag checking briefly when TCF enabled the tag check faults.
> >
> > So, IMHO, as copy_to/from_user (ldt/sttr) enable tco before it operates,
>
> They don't enable TCO.

Ah right. I've confused. Thanks for answer!

>
> --
> Catalin

--
Sincerely,
Yeoreum Yun