[PATCH 6/7] arm64: start using 'asm goto' for put_user() when available

Linus Torvalds posted 7 patches 1 year, 8 months ago
[PATCH 6/7] arm64: start using 'asm goto' for put_user() when available
Posted by Linus Torvalds 1 year, 8 months ago
This generates noticeably better code with compilers that support it,
since we don't need to test the error register etc, the exception just
jumps to the error handling directly.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm64/include/asm/uaccess.h | 77 +++++++++++++++++++-------------
 1 file changed, 46 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 23c2edf517ed..4ab3938290ab 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -294,29 +294,41 @@ do {									\
 	} while (0);							\
 } while (0)
 
-#define __put_mem_asm(store, reg, x, addr, err, type)			\
+#ifdef CONFIG_CC_HAS_ASM_GOTO
+#define __put_mem_asm(store, reg, x, addr, label, type)			\
+	asm goto(							\
+	"1:	" store "	" reg "0, [%1]\n"			\
+	"2:\n"								\
+	_ASM_EXTABLE_##type##ACCESS_ZERO(1b, %l2)			\
+	: : "rZ" (x), "r" (addr) : : label)
+#else
+#define __put_mem_asm(store, reg, x, addr, label, type) do {		\
+	int __pma_err = 0;						\
 	asm volatile(							\
 	"1:	" store "	" reg "1, [%2]\n"			\
 	"2:\n"								\
 	_ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0)			\
-	: "+r" (err)							\
-	: "rZ" (x), "r" (addr))
+	: "+r" (__pma_err)						\
+	: "rZ" (x), "r" (addr));					\
+	if (__pma_err) goto label;					\
+} while (0)
+#endif
 
-#define __raw_put_mem(str, x, ptr, err, type)					\
+#define __raw_put_mem(str, x, ptr, label, type)					\
 do {										\
 	__typeof__(*(ptr)) __pu_val = (x);					\
 	switch (sizeof(*(ptr))) {						\
 	case 1:									\
-		__put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type);	\
+		__put_mem_asm(str "b", "%w", __pu_val, (ptr), label, type);	\
 		break;								\
 	case 2:									\
-		__put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type);	\
+		__put_mem_asm(str "h", "%w", __pu_val, (ptr), label, type);	\
 		break;								\
 	case 4:									\
-		__put_mem_asm(str, "%w", __pu_val, (ptr), (err), type);		\
+		__put_mem_asm(str, "%w", __pu_val, (ptr), label, type);		\
 		break;								\
 	case 8:									\
-		__put_mem_asm(str, "%x", __pu_val, (ptr), (err), type);		\
+		__put_mem_asm(str, "%x", __pu_val, (ptr), label, type);		\
 		break;								\
 	default:								\
 		BUILD_BUG();							\
@@ -328,25 +340,34 @@ do {										\
  * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
  * we must evaluate these outside of the critical section.
  */
-#define __raw_put_user(x, ptr, err)					\
+#define __raw_put_user(x, ptr, label)					\
 do {									\
+	__label__ __rpu_failed;						\
 	__typeof__(*(ptr)) __user *__rpu_ptr = (ptr);			\
 	__typeof__(*(ptr)) __rpu_val = (x);				\
 	__chk_user_ptr(__rpu_ptr);					\
 									\
-	uaccess_ttbr0_enable();						\
-	__raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U);		\
-	uaccess_ttbr0_disable();					\
+	do {								\
+		uaccess_ttbr0_enable();					\
+		__raw_put_mem("sttr", __rpu_val, __rpu_ptr, __rpu_failed, U);	\
+		uaccess_ttbr0_disable();				\
+		break;							\
+	__rpu_failed:							\
+		uaccess_ttbr0_disable();				\
+		goto label;						\
+	} while (0);							\
 } while (0)
 
 #define __put_user_error(x, ptr, err)					\
 do {									\
+	__label__ __pu_failed;						\
 	__typeof__(*(ptr)) __user *__p = (ptr);				\
 	might_fault();							\
 	if (access_ok(__p, sizeof(*__p))) {				\
 		__p = uaccess_mask_ptr(__p);				\
-		__raw_put_user((x), __p, (err));			\
+		__raw_put_user((x), __p, __pu_failed);			\
 	} else	{							\
+	__pu_failed:							\
 		(err) = -EFAULT;					\
 	}								\
 } while (0)
@@ -369,15 +390,18 @@ do {									\
 do {									\
 	__typeof__(dst) __pkn_dst = (dst);				\
 	__typeof__(src) __pkn_src = (src);				\
-	int __pkn_err = 0;						\
 									\
-	__mte_enable_tco_async();					\
-	__raw_put_mem("str", *((type *)(__pkn_src)),			\
-		      (__force type *)(__pkn_dst), __pkn_err, K);	\
-	__mte_disable_tco_async();					\
-									\
-	if (unlikely(__pkn_err))					\
+	do {								\
+		__label__ __pkn_err;					\
+		__mte_enable_tco_async();				\
+		__raw_put_mem("str", *((type *)(__pkn_src)),		\
+			      (__force type *)(__pkn_dst), __pkn_err, K);	\
+		__mte_disable_tco_async();				\
+		break;							\
+	__pkn_err:							\
+		__mte_disable_tco_async();				\
 		goto err_label;						\
+	} while (0);							\
 } while(0)
 
 extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
@@ -411,17 +435,8 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt
 }
 #define user_access_begin(a,b)	user_access_begin(a,b)
 #define user_access_end()	uaccess_ttbr0_disable()
-
-/*
- * The arm64 inline asms should learn abut asm goto, and we should
- * teach user_access_begin() about address masking.
- */
-#define unsafe_put_user(x, ptr, label)	do {				\
-	int __upu_err = 0;						\
-	__raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), __upu_err, U);	\
-	if (__upu_err) goto label;				\
-} while (0)
-
+#define unsafe_put_user(x, ptr, label) \
+	__raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U)
 #define unsafe_get_user(x, ptr, label) \
 	__raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U)
 
-- 
2.45.1.209.gc6f12300df
Re: [PATCH 6/7] arm64: start using 'asm goto' for put_user() when available
Posted by Nathan Chancellor 1 year, 8 months ago
Hi Linus,

On Mon, Jun 10, 2024 at 01:48:20PM -0700, Linus Torvalds wrote:
> This generates noticeably better code with compilers that support it,
> since we don't need to test the error register etc, the exception just
> jumps to the error handling directly.
> 
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
> ---
>  arch/arm64/include/asm/uaccess.h | 77 +++++++++++++++++++-------------
>  1 file changed, 46 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
> index 23c2edf517ed..4ab3938290ab 100644
> --- a/arch/arm64/include/asm/uaccess.h
> +++ b/arch/arm64/include/asm/uaccess.h
> @@ -294,29 +294,41 @@ do {									\
>  	} while (0);							\
>  } while (0)
>  
> -#define __put_mem_asm(store, reg, x, addr, err, type)			\
> +#ifdef CONFIG_CC_HAS_ASM_GOTO

This symbol was eliminated two years ago with commit a0a12c3ed057 ("asm
goto: eradicate CC_HAS_ASM_GOTO") since all supported compilers have
support for it.

> +#define __put_mem_asm(store, reg, x, addr, label, type)			\
> +	asm goto(							\
> +	"1:	" store "	" reg "0, [%1]\n"			\
> +	"2:\n"								\
> +	_ASM_EXTABLE_##type##ACCESS_ZERO(1b, %l2)			\
> +	: : "rZ" (x), "r" (addr) : : label)
> +#else
> +#define __put_mem_asm(store, reg, x, addr, label, type) do {		\
> +	int __pma_err = 0;						\
>  	asm volatile(							\
>  	"1:	" store "	" reg "1, [%2]\n"			\
>  	"2:\n"								\
>  	_ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0)			\
> -	: "+r" (err)							\
> -	: "rZ" (x), "r" (addr))
> +	: "+r" (__pma_err)						\
> +	: "rZ" (x), "r" (addr));					\
> +	if (__pma_err) goto label;					\
> +} while (0)
> +#endif

Cheers,
Nathan
Re: [PATCH 6/7] arm64: start using 'asm goto' for put_user() when available
Posted by Linus Torvalds 1 year, 8 months ago
On Tue, 11 Jun 2024 at 14:56, Nathan Chancellor <nathan@kernel.org> wrote:
>
> > +#ifdef CONFIG_CC_HAS_ASM_GOTO
>
> This symbol was eliminated two years ago with commit a0a12c3ed057 ("asm
> goto: eradicate CC_HAS_ASM_GOTO") since all supported compilers have
> support for it.

Hah. And I was trying to be a good boy and keep the old setup working.

Instead - because the HAS_ASM_GOTO config variable no longer exists -
I didn't actually test the new case at all, and it only worked because
the old case did in fact work.

Because fixing the broken #ifdef also showed that the

  +       _ASM_EXTABLE_##type##ACCESS_ZERO(1b, %l2)

line was wrong and was a copy-and-paste error from the get_user case
(that zeroes the result register on error).

It should be just

  +       _ASM_EXTABLE_##type##ACCESS(1b, %l2)

and to make the nasty copy_to_kernel_nofault_loop() build I also need
to do the proper _ASM_EXTABLE_KACCESS macro without the zeroing that
didn't exist.

Oops.

It would be nice to get rid of the CC_HAS_ASM_GOTO_OUTPUT thing too,
but that's probably a decade away ;(

But at least  this made me now go and actually test the _actual_ old
compiler case (no asm goto output). Perhaps ironically, I did get
*that* one right. That's the case where I had actually checked the new
code for get_user().

             Linus
[PATCH 6/7 v2] arm64: start using 'asm goto' for put_user() when available
Posted by Linus Torvalds 1 year, 8 months ago
This generates noticeably better code with compilers that support it,
since we don't need to test the error register etc, the exception just
jumps to the error handling directly.

Unlike get_user(), there's no need to worry about old compilers.  All
supported compilers support the regular non-output 'asm goto', as
pointed out by Nathan Chancellor.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---

This is the fixed version that actually uses "asm goto" for put_user()
because it doesn't accidentally disable it by using the old CONFIG
option that no longer exists. 

 arch/arm64/include/asm/asm-extable.h |  3 ++
 arch/arm64/include/asm/uaccess.h     | 70 ++++++++++++++--------------
 2 files changed, 39 insertions(+), 34 deletions(-)

diff --git a/arch/arm64/include/asm/asm-extable.h b/arch/arm64/include/asm/asm-extable.h
index 980d1dd8e1a3..b8a5861dc7b7 100644
--- a/arch/arm64/include/asm/asm-extable.h
+++ b/arch/arm64/include/asm/asm-extable.h
@@ -112,6 +112,9 @@
 #define _ASM_EXTABLE_KACCESS_ERR(insn, fixup, err)			\
 	_ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, err, wzr)
 
+#define _ASM_EXTABLE_KACCESS(insn, fixup)				\
+	_ASM_EXTABLE_KACCESS_ERR_ZERO(insn, fixup, wzr, wzr)
+
 #define _ASM_EXTABLE_LOAD_UNALIGNED_ZEROPAD(insn, fixup, data, addr)		\
 	__DEFINE_ASM_GPR_NUMS							\
 	__ASM_EXTABLE_RAW(#insn, #fixup,					\
diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h
index 23c2edf517ed..6d4b16acc880 100644
--- a/arch/arm64/include/asm/uaccess.h
+++ b/arch/arm64/include/asm/uaccess.h
@@ -294,29 +294,28 @@ do {									\
 	} while (0);							\
 } while (0)
 
-#define __put_mem_asm(store, reg, x, addr, err, type)			\
-	asm volatile(							\
-	"1:	" store "	" reg "1, [%2]\n"			\
+#define __put_mem_asm(store, reg, x, addr, label, type)			\
+	asm goto(							\
+	"1:	" store "	" reg "0, [%1]\n"			\
 	"2:\n"								\
-	_ASM_EXTABLE_##type##ACCESS_ERR(1b, 2b, %w0)			\
-	: "+r" (err)							\
-	: "rZ" (x), "r" (addr))
+	_ASM_EXTABLE_##type##ACCESS(1b, %l2)				\
+	: : "rZ" (x), "r" (addr) : : label)
 
-#define __raw_put_mem(str, x, ptr, err, type)					\
+#define __raw_put_mem(str, x, ptr, label, type)					\
 do {										\
 	__typeof__(*(ptr)) __pu_val = (x);					\
 	switch (sizeof(*(ptr))) {						\
 	case 1:									\
-		__put_mem_asm(str "b", "%w", __pu_val, (ptr), (err), type);	\
+		__put_mem_asm(str "b", "%w", __pu_val, (ptr), label, type);	\
 		break;								\
 	case 2:									\
-		__put_mem_asm(str "h", "%w", __pu_val, (ptr), (err), type);	\
+		__put_mem_asm(str "h", "%w", __pu_val, (ptr), label, type);	\
 		break;								\
 	case 4:									\
-		__put_mem_asm(str, "%w", __pu_val, (ptr), (err), type);		\
+		__put_mem_asm(str, "%w", __pu_val, (ptr), label, type);		\
 		break;								\
 	case 8:									\
-		__put_mem_asm(str, "%x", __pu_val, (ptr), (err), type);		\
+		__put_mem_asm(str, "%x", __pu_val, (ptr), label, type);		\
 		break;								\
 	default:								\
 		BUILD_BUG();							\
@@ -328,25 +327,34 @@ do {										\
  * uaccess_ttbr0_disable(). As `x` and `ptr` could contain blocking functions,
  * we must evaluate these outside of the critical section.
  */
-#define __raw_put_user(x, ptr, err)					\
+#define __raw_put_user(x, ptr, label)					\
 do {									\
+	__label__ __rpu_failed;						\
 	__typeof__(*(ptr)) __user *__rpu_ptr = (ptr);			\
 	__typeof__(*(ptr)) __rpu_val = (x);				\
 	__chk_user_ptr(__rpu_ptr);					\
 									\
-	uaccess_ttbr0_enable();						\
-	__raw_put_mem("sttr", __rpu_val, __rpu_ptr, err, U);		\
-	uaccess_ttbr0_disable();					\
+	do {								\
+		uaccess_ttbr0_enable();					\
+		__raw_put_mem("sttr", __rpu_val, __rpu_ptr, __rpu_failed, U);	\
+		uaccess_ttbr0_disable();				\
+		break;							\
+	__rpu_failed:							\
+		uaccess_ttbr0_disable();				\
+		goto label;						\
+	} while (0);							\
 } while (0)
 
 #define __put_user_error(x, ptr, err)					\
 do {									\
+	__label__ __pu_failed;						\
 	__typeof__(*(ptr)) __user *__p = (ptr);				\
 	might_fault();							\
 	if (access_ok(__p, sizeof(*__p))) {				\
 		__p = uaccess_mask_ptr(__p);				\
-		__raw_put_user((x), __p, (err));			\
+		__raw_put_user((x), __p, __pu_failed);			\
 	} else	{							\
+	__pu_failed:							\
 		(err) = -EFAULT;					\
 	}								\
 } while (0)
@@ -369,15 +377,18 @@ do {									\
 do {									\
 	__typeof__(dst) __pkn_dst = (dst);				\
 	__typeof__(src) __pkn_src = (src);				\
-	int __pkn_err = 0;						\
 									\
-	__mte_enable_tco_async();					\
-	__raw_put_mem("str", *((type *)(__pkn_src)),			\
-		      (__force type *)(__pkn_dst), __pkn_err, K);	\
-	__mte_disable_tco_async();					\
-									\
-	if (unlikely(__pkn_err))					\
+	do {								\
+		__label__ __pkn_err;					\
+		__mte_enable_tco_async();				\
+		__raw_put_mem("str", *((type *)(__pkn_src)),		\
+			      (__force type *)(__pkn_dst), __pkn_err, K);	\
+		__mte_disable_tco_async();				\
+		break;							\
+	__pkn_err:							\
+		__mte_disable_tco_async();				\
 		goto err_label;						\
+	} while (0);							\
 } while(0)
 
 extern unsigned long __must_check __arch_copy_from_user(void *to, const void __user *from, unsigned long n);
@@ -411,17 +422,8 @@ static __must_check __always_inline bool user_access_begin(const void __user *pt
 }
 #define user_access_begin(a,b)	user_access_begin(a,b)
 #define user_access_end()	uaccess_ttbr0_disable()
-
-/*
- * The arm64 inline asms should learn abut asm goto, and we should
- * teach user_access_begin() about address masking.
- */
-#define unsafe_put_user(x, ptr, label)	do {				\
-	int __upu_err = 0;						\
-	__raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), __upu_err, U);	\
-	if (__upu_err) goto label;				\
-} while (0)
-
+#define unsafe_put_user(x, ptr, label) \
+	__raw_put_mem("sttr", x, uaccess_mask_ptr(ptr), label, U)
 #define unsafe_get_user(x, ptr, label) \
 	__raw_get_mem("ldtr", x, uaccess_mask_ptr(ptr), label, U)
 
-- 
2.45.1.209.gc6f12300df