[PATCH] x86: fix user address masking non-canonical speculation issue

Linus Torvalds posted 1 patch 1 month ago
arch/x86/include/asm/uaccess_64.h | 25 ++++++++++++++++++-------
arch/x86/kernel/cpu/common.c      | 10 ++++++++++
arch/x86/kernel/vmlinux.lds.S     |  1 +
arch/x86/lib/getuser.S            |  9 +++++++--
4 files changed, 36 insertions(+), 9 deletions(-)
[PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Linus Torvalds 1 month ago
It turns out that AMD has a "Meltdown Lite(tm)" issue with non-canonical
accesses in kernel space.  And so using just the high bit to decide
whether an access is in user space or kernel space ends up with the good
old "leak speculative data" if you have the right gadget using the
result:

  CVE-2020-12965 “Transient Execution of Non-Canonical Accesses“

Now, the kernel surrounds the access with a STAC/CLAC pair, and those
instructions end up serializing execution on older Zen architectures,
which closes the speculation window.

But that was true only up until Zen 5, which renames the AC bit [1].
That improves performance of STAC/CLAC a lot, but also means that the
speculation window is now open.

Note that this affects not just the new address masking, but also the
regular valid_user_address() check used by access_ok(), and the asm
version of the sign bit check in the get_user() helpers.

It does not affect put_user() or clear_user() variants, since there's no
speculative result to be used in a gadget for those operations.

Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
Link: https://lore.kernel.org/all/80d94591-1297-4afb-b510-c665efd37f10@citrix.com/
Link: https://lore.kernel.org/all/20241023094448.GAZxjFkEOOF_DM83TQ@fat_crate.local/ [1]
Link: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-1010.html
Link: https://arxiv.org/pdf/2108.10771
Cc: Josh Poimboeuf <jpoimboe@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Fixes: 2865baf54077 ("x86: support user address masking instead of non-speculative conditional")
Fixes: 6014bc27561f ("x86-64: make access_ok() independent of LAM")
Fixes: b19b74bc99b1 ("x86/mm: Rework address range check in get_user() and put_user()")
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/include/asm/uaccess_64.h | 25 ++++++++++++++++++-------
 arch/x86/kernel/cpu/common.c      | 10 ++++++++++
 arch/x86/kernel/vmlinux.lds.S     |  1 +
 arch/x86/lib/getuser.S            |  9 +++++++--
 4 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index afce8ee5d7b7..c03d49c4fe54 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -12,6 +12,13 @@
 #include <asm/cpufeatures.h>
 #include <asm/page.h>
 #include <asm/percpu.h>
+#include <asm/runtime-const.h>
+
+/*
+ * Virtual variable: there's no actual backing store for this,
+ * it can purely be used as 'runtime_const_ptr(USER_PTR_MAX)'
+ */
+extern unsigned long USER_PTR_MAX;
 
 #ifdef CONFIG_ADDRESS_MASKING
 /*
@@ -46,19 +53,23 @@ static inline unsigned long __untagged_addr_remote(struct mm_struct *mm,
 
 #endif
 
-/*
- * The virtual address space space is logically divided into a kernel
- * half and a user half.  When cast to a signed type, user pointers
- * are positive and kernel pointers are negative.
- */
-#define valid_user_address(x) ((__force long)(x) >= 0)
+#define valid_user_address(x) \
+	((__force unsigned long)(x) < runtime_const_ptr(USER_PTR_MAX))
 
 /*
  * Masking the user address is an alternative to a conditional
  * user_access_begin that can avoid the fencing. This only works
  * for dense accesses starting at the address.
  */
-#define mask_user_address(x) ((typeof(x))((long)(x)|((long)(x)>>63)))
+static inline void __user *mask_user_address(const void __user *ptr)
+{
+	void __user *ret;
+	asm("cmp %1,%0; sbb %0,%0; or %1,%0"
+		:"=r" (ret)
+		:"r" (ptr),
+		 "0" (runtime_const_ptr(USER_PTR_MAX)));
+	return ret;
+}
 #define masked_user_access_begin(x) ({				\
 	__auto_type __masked_ptr = (x);				\
 	__masked_ptr = mask_user_address(__masked_ptr);		\
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f1040cb64841..d671f78f658e 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -69,6 +69,7 @@
 #include <asm/sev.h>
 #include <asm/tdx.h>
 #include <asm/posted_intr.h>
+#include <asm/runtime-const.h>
 
 #include "cpu.h"
 
@@ -2389,6 +2390,15 @@ void __init arch_cpu_finalize_init(void)
 	alternative_instructions();
 
 	if (IS_ENABLED(CONFIG_X86_64)) {
+		unsigned long USER_PTR_MAX = TASK_SIZE_MAX;
+
+		/*
+		 * Enable this when LAM is gated on LASS support
+		if (cpu_feature_enabled(X86_FEATURE_LAM))
+			USER_PTR_MAX = (1ul << 63) - PAGE_SIZE;
+		 */
+		runtime_const_init(ptr, USER_PTR_MAX);
+
 		/*
 		 * Make sure the first 2MB area is not mapped by huge pages
 		 * There are typically fixed size MTRRs in there and overlapping
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 6726be89b7a6..b8c5741d2fb4 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -358,6 +358,7 @@ SECTIONS
 #endif
 
 	RUNTIME_CONST_VARIABLES
+	RUNTIME_CONST(ptr, USER_PTR_MAX)
 
 	. = ALIGN(PAGE_SIZE);
 
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index d066aecf8aeb..4357ec2a0bfc 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -39,8 +39,13 @@
 
 .macro check_range size:req
 .if IS_ENABLED(CONFIG_X86_64)
-	mov %rax, %rdx
-	sar $63, %rdx
+	movq $0x0123456789abcdef,%rdx
+  1:
+  .pushsection runtime_ptr_USER_PTR_MAX,"a"
+	.long 1b - 8 - .
+  .popsection
+	cmp %rax, %rdx
+	sbb %rdx, %rdx
 	or %rdx, %rax
 .else
 	cmp $TASK_SIZE_MAX-\size+1, %eax
-- 
2.46.1.608.gc56f2c11c8

Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Andrew Cooper 1 month ago
On 24/10/2024 2:31 am, Linus Torvalds wrote:
> It turns out that AMD has a "Meltdown Lite(tm)" issue with non-canonical
> accesses in kernel space.  And so using just the high bit to decide
> whether an access is in user space or kernel space ends up with the good
> old "leak speculative data" if you have the right gadget using the
> result:
>
>   CVE-2020-12965 “Transient Execution of Non-Canonical Accesses“
>
> Now, the kernel surrounds the access with a STAC/CLAC pair, and those
> instructions end up serializing execution on older Zen architectures,
> which closes the speculation window.
>
> But that was true only up until Zen 5, which renames the AC bit [1].
> That improves performance of STAC/CLAC a lot, but also means that the
> speculation window is now open.
>
> Note that this affects not just the new address masking, but also the
> regular valid_user_address() check used by access_ok(), and the asm
> version of the sign bit check in the get_user() helpers.
>
> It does not affect put_user() or clear_user() variants, since there's no
> speculative result to be used in a gadget for those operations.
>
> Reported-by: Andrew Cooper <andrew.cooper3@citrix.com>
> Link: https://lore.kernel.org/all/80d94591-1297-4afb-b510-c665efd37f10@citrix.com/
> Link: https://lore.kernel.org/all/20241023094448.GAZxjFkEOOF_DM83TQ@fat_crate.local/ [1]
> Link: https://www.amd.com/en/resources/product-security/bulletin/amd-sb-1010.html
> Link: https://arxiv.org/pdf/2108.10771
> Cc: Josh Poimboeuf <jpoimboe@kernel.org>
> Cc: Borislav Petkov <bp@alien8.de>
> Fixes: 2865baf54077 ("x86: support user address masking instead of non-speculative conditional")
> Fixes: 6014bc27561f ("x86-64: make access_ok() independent of LAM")
> Fixes: b19b74bc99b1 ("x86/mm: Rework address range check in get_user() and put_user()")
> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

Thankyou.  This looks a whole lot safer than the prior options.

Tentatively Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com>, but
it's probably worth trying to get AMD to rubber stamp it too.


Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Maciej Wieczor-Retman 1 month ago
Hello!

On 2024-10-23 at 18:31:59 -0700, Linus Torvalds wrote:
> ...
> 	if (IS_ENABLED(CONFIG_X86_64)) {
>+		unsigned long USER_PTR_MAX = TASK_SIZE_MAX;
>+
>+		/*
>+		 * Enable this when LAM is gated on LASS support
>+		if (cpu_feature_enabled(X86_FEATURE_LAM))
>+			USER_PTR_MAX = (1ul << 63) - PAGE_SIZE;
>+		 */
>+		runtime_const_init(ptr, USER_PTR_MAX);
> ...

I tested the patch on a Sierra Forest Xeon that is LAM capable and I didn't find
any issues.

To test the LAM related portion I uncommented the above if statement
and enabled LAM_SUP during the boot process as well as enabled LAM for the user
process with a syscall.

I did the tests by writing a ioctl that calls get_user() on demand and prints
whatever the user passes. Then before calling the ioctl I modified the pointer
to the variable in a few different ways:

- 5 level pointer on 4 level system
[root@srf1 ~]# ./test_get_user
[  603.531778] Running get_user on address : 0xff007ffdfce791bc
[  603.538225] Value received from user is : 0
Enabling LAM...
LAM enable syscall ret = 0, errno = 0
Int pointer before tagging : 0x7ffdfce791bc
Int pointer after tagging : 0xff007ffdfce791bc
IOCTL ret : -1, errno : 14

- Cleared top bit
[root@srf1 ~]# ./test_get_user
[  625.543365] Running get_user on address : 0x7fffffff4ddb3d4c
[  625.549828] Value received from user is : 0
Enabling LAM...
LAM enable syscall ret = 0, errno =  0
Int pointer before tagging : 0x7fff4ddb3d4c
Int pointer after tagging : 0x7fffffff4ddb3d4c
IOCTL ret : -1, errno : 14

- Cleared bit 48
[root@srf1 ~]# ./test_get_user
[  686.801259] Running get_user on address : 0xffff7ffcdc2691ac
[  686.807724] Value received from user is : 0
Enabling LAM...
LAM enable syscall ret = 0, errno =  0
Int pointer before tagging : 0x7ffcdc2691ac
Int pointer after tagging : 0xffff7ffcdc2691ac
IOCTL ret : -1, errno : 14

- Normal kernel address
[root@srf1 ~]# ./test_get_user
[ 1047.074342] Running get_user on address : 0xffffffff17bfed1c
[ 1047.080801] Value received from user is : 0
Enabling LAM...
LAM enable syscall ret = 0, errno =  0
Int pointer before tagging : 0x7fff17bfed1c
Int pointer after tagging : 0xffffffff17bfed1c
IOCTL ret : -1, errno : 14

- Control group with properly tagged pointer
[root@srf1 ~]# ./test_get_user
[  767.464666] Running get_user on address : 0x7a007ffe6c9dac3c
[  767.472644] Value received from user is : 12345
Enabling LAM...
LAM enable syscall ret = 0, errno =  0
Int pointer before tagging : 0x7ffe6c9dac3c
Int pointer after tagging : 0x7a007ffe6c9dac3c
IOCTL ret : 0, errno : 0

Tested-by: Maciej Wieczor-Retman <maciej.wieczor-retman@intel.com>

-- 
Kind regards
Maciej Wieczór-Retman
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Linus Torvalds 1 month ago
On Fri, 25 Oct 2024 at 07:16, Maciej Wieczor-Retman
<maciej.wieczor-retman@intel.com> wrote:
>
> I tested the patch on a Sierra Forest Xeon that is LAM capable and I didn't find
> any issues.
>
> To test the LAM related portion I uncommented the above if statement
> and enabled LAM_SUP during the boot process as well as enabled LAM for the user
> process with a syscall.

Thanks. I've applied the patch (with the LAM case still commented
out), and let's see if something comes up.

I guess that backporting it might be slightly painful due to the
runtime-const use, but I'm not even convinced it's needed for stable.

I'd expect that even on Zen 5 it's not actually a _practical_
information leak due to the whole GP exception thing going to mess
with any leaking signal.

          Linus
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Kirill A. Shutemov 1 month ago
On Wed, Oct 23, 2024 at 06:31:59PM -0700, Linus Torvalds wrote:
> diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
> index f1040cb64841..d671f78f658e 100644
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -69,6 +69,7 @@
>  #include <asm/sev.h>
>  #include <asm/tdx.h>
>  #include <asm/posted_intr.h>
> +#include <asm/runtime-const.h>
>  
>  #include "cpu.h"
>  
> @@ -2389,6 +2390,15 @@ void __init arch_cpu_finalize_init(void)
>  	alternative_instructions();
>  
>  	if (IS_ENABLED(CONFIG_X86_64)) {
> +		unsigned long USER_PTR_MAX = TASK_SIZE_MAX;
> +
> +		/*
> +		 * Enable this when LAM is gated on LASS support
> +		if (cpu_feature_enabled(X86_FEATURE_LAM))
> +			USER_PTR_MAX = (1ul << 63) - PAGE_SIZE;
> +		 */

It will be safe to uncomment it once the patch in tip/x86/urgent gets
merged[1]. We clear X86_FEATURE_LAM if kernel is compiled without LAM.

[1] https://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git/commit/?h=x86/urgent&id=3267cb6d3a174ff83d6287dcd5b0047bbd912452

> +		runtime_const_init(ptr, USER_PTR_MAX);
> +
>  		/*
>  		 * Make sure the first 2MB area is not mapped by huge pages
>  		 * There are typically fixed size MTRRs in there and overlapping
> diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
> index d066aecf8aeb..4357ec2a0bfc 100644
> --- a/arch/x86/lib/getuser.S
> +++ b/arch/x86/lib/getuser.S
> @@ -39,8 +39,13 @@
>  
>  .macro check_range size:req
>  .if IS_ENABLED(CONFIG_X86_64)
> -	mov %rax, %rdx
> -	sar $63, %rdx
> +	movq $0x0123456789abcdef,%rdx
> +  1:
> +  .pushsection runtime_ptr_USER_PTR_MAX,"a"
> +	.long 1b - 8 - .
> +  .popsection
> +	cmp %rax, %rdx
> +	sbb %rdx, %rdx
>  	or %rdx, %rax
>  .else
>  	cmp $TASK_SIZE_MAX-\size+1, %eax

Maybe worth adding assembly macro for the runtime_ptr?

diff --git a/arch/x86/include/asm/runtime-const.h b/arch/x86/include/asm/runtime-const.h
index 24e3a53ca255..563566f361f7 100644
--- a/arch/x86/include/asm/runtime-const.h
+++ b/arch/x86/include/asm/runtime-const.h
@@ -2,6 +2,18 @@
 #ifndef _ASM_RUNTIME_CONST_H
 #define _ASM_RUNTIME_CONST_H
 
+#ifdef __ASSEMBLY__
+
+.macro RUNTIME_CONST_PTR sym reg
+	movq	$0x0123456789abcdef, %\reg
+	1:
+	.pushsection runtime_ptr_\sym, "a"
+	.long	1b - 8 - .
+	.popsection
+.endm
+
+#else /* __ASSEMBLY__ */
+
 #define runtime_const_ptr(sym) ({				\
 	typeof(sym) __ret;					\
 	asm_inline("mov %1,%0\n1:\n"				\
@@ -58,4 +70,5 @@ static inline void runtime_const_fixup(void (*fn)(void *, unsigned long),
 	}
 }
 
+#endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index 4357ec2a0bfc..f7589d4142cd 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -32,6 +32,7 @@
 #include <asm/errno.h>
 #include <asm/asm-offsets.h>
 #include <asm/thread_info.h>
+#include <asm/runtime-const.h>
 #include <asm/asm.h>
 #include <asm/smap.h>
 
@@ -39,11 +40,7 @@
 
 .macro check_range size:req
 .if IS_ENABLED(CONFIG_X86_64)
-	movq $0x0123456789abcdef,%rdx
-  1:
-  .pushsection runtime_ptr_USER_PTR_MAX,"a"
-	.long 1b - 8 - .
-  .popsection
+	RUNTIME_CONST_PTR sym=USER_PTR_MAX reg=rdx
 	cmp %rax, %rdx
 	sbb %rdx, %rdx
 	or %rdx, %rax
-- 
  Kiryl Shutsemau / Kirill A. Shutemov
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Borislav Petkov 1 month ago
On Wed, Oct 23, 2024 at 06:31:59PM -0700, Linus Torvalds wrote:
> +static inline void __user *mask_user_address(const void __user *ptr)
> +{
> +	void __user *ret;
> +	asm("cmp %1,%0; sbb %0,%0; or %1,%0"
> +		:"=r" (ret)
> +		:"r" (ptr),
> +		 "0" (runtime_const_ptr(USER_PTR_MAX)));
> +	return ret;
> +}

Can we make this more readable pls?

Something like this:

static inline void __user *mask_user_address(const void __user *ptr)
{       
        void __user *ret;

        asm("cmp %[ptr],%[ret]\n\t"
            "sbb %[ret],%[ret]\n\t"
            "or  %[ptr],%[ret]"
                : [ret] "=r" (ret)
                : [ptr] "r" (ptr),
                  "0" (runtime_const_ptr(USER_PTR_MAX)));

        return ret;
}

This way the compiler-generated asm is more readable too due to the newlines
and tabs.

In any case, it looks good, I single-stepped strnlen_user() and I got:

# move the constant
movabs $0x7ffffffff000,%rdi

# the user pointer: rax = 0x7ffcb6839fdf
cmp    %rax,%rdi

sbb    %rdi,%rdi
# rdi = 0x0

or     %rax,%rdi
# rdi = 0x7ffcb6839fdf

stac

and user pointer is in %rdi.

Then, on the next breakpoint, I modified the user pointer:

gdb> set $rax = 0xfffcb6839fd9
cmp    %rax,%rdi

sbb    %rdi,%rdi
# rdi = 0xffffffffffffffff  -1

# flags are set
eflags         0x297               [ IOPL=0 IF SF AF PF CF ]

or     %rax,%rdi

# user pointer is -1, do_strnlen_user() will try to work with -1 and fault...

> @@ -2389,6 +2390,15 @@ void __init arch_cpu_finalize_init(void)
>  	alternative_instructions();
>  
>  	if (IS_ENABLED(CONFIG_X86_64)) {
> +		unsigned long USER_PTR_MAX = TASK_SIZE_MAX;
> +
> +		/*
> +		 * Enable this when LAM is gated on LASS support
> +		if (cpu_feature_enabled(X86_FEATURE_LAM))
> +			USER_PTR_MAX = (1ul << 63) - PAGE_SIZE;
> +		 */
> +		runtime_const_init(ptr, USER_PTR_MAX);

Looking at Documentation/arch/x86/x86_64/mm.rst, 5 level page tables define
USR_PTR_MAX as 0x00ffffffffffffff, i.e., bits [55:0].

So I guess that USER_PTR_MAX needs to look at X86_FEATURE_LA57, no?

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Linus Torvalds 1 month ago
On Thu, 24 Oct 2024 at 04:08, Borislav Petkov <bp@alien8.de> wrote:
>
> Can we make this more readable pls?
>
> Something like this:
>
> static inline void __user *mask_user_address(const void __user *ptr)
> {
>         void __user *ret;
>
>         asm("cmp %[ptr],%[ret]\n\t"
>             "sbb %[ret],%[ret]\n\t"
>             "or  %[ptr],%[ret]"
>                 : [ret] "=r" (ret)
>                 : [ptr] "r" (ptr),
>                   "0" (runtime_const_ptr(USER_PTR_MAX)));

Will do at least for the newlines.

I actually find the '%[ret]' kind of verbosity *less* readable than
using the numbers that are needed anyway to disambiguate the
input/output thing.

Yes, named arguments are good when there's enough of them to be a big
deal, but in this case the variable naming ends up just becoming line
noise and overwhelming the actual code in question.

> This way the compiler-generated asm is more readable too due to the newlines
> and tabs.

Ack. I don't find the compiler-generated asm very readable due to all
the magic section tricks, but certainly that can be avoided for the
cmp/sbb/or sequence.

> In any case, it looks good, I single-stepped strnlen_user() and I got:

Thanks, looks good.

> Looking at Documentation/arch/x86/x86_64/mm.rst, 5 level page tables define
> USR_PTR_MAX as 0x00ffffffffffffff, i.e., bits [55:0].
>
> So I guess that USER_PTR_MAX needs to look at X86_FEATURE_LA57, no?

The *page tables* only cover bits [55:0] of user space, yes.

But the user *pointers* are bigger. That's the point of LAM. The upper
bits are masked off by hardware.

So a valid user pointer with LAM57 looks like:

 [63: MBZ] [62-57: masked off] [56: MBZ] [55-0: used for TLB lookup]

LAM48 is the same, except obviously [62-48] are masked off, and bit 47
is the one that must-be-zero.

So as far as the kernel is concerned, any address with the top bit
clear is a user address.

So with LAM, you basically end up with the sign bit test again (except
that we still don't allow the last page of user space to be mapped).

We could also take the whole "bit 56 must be zero too" thing into
account and further tighten it, but it really doesn't change the
situation in any material ways, since we have that page-sized hole at
the end of the user address space anyway even without it (and with
LAM, the upper bit masking just means that special guard page is
repeated in the linear address space).

                   Linus
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Linus Torvalds 1 month ago
On Thu, 24 Oct 2024 at 10:53, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> >
> >         asm("cmp %[ptr],%[ret]\n\t"
> >             "sbb %[ret],%[ret]\n\t"
> >             "or  %[ptr],%[ret]"
> >                 : [ret] "=r" (ret)
> >                 : [ptr] "r" (ptr),
> >                   "0" (runtime_const_ptr(USER_PTR_MAX)));
>
> Will do at least for the newlines.

Actually, I should also just remove the "or" and move that into C
code. It will allow the compiler to decide which way it wants to do
the bitwise 'or', which means that the compiler can pick whichever
output register is a better choice.

Probably never matters in practice, but leaving decisions like that to
the compiler and avoiding one more fixed asm instruction is a good
thing.

It does result in a few more casts on the C side, since you can't just
do bitwise 'or' on a pointer, but I think it's still the right thing
to do. So that thing becomes

  static inline void __user *mask_user_address(const void __user *ptr)
  {
        unsigned long mask;
        asm("cmp %1,%0\n\t"
            "sbb %0,%0"
                :"=r" (mask)
                :"r" (ptr),
                 "0" (runtime_const_ptr(USER_PTR_MAX)));
        return (__force void __user *)(mask | (__force unsigned long)ptr);
  }

which I'm certainly not claiming is a thing of beauty, but the
generated code looks ok if you just ignore the #APP/#NOAPP noise:

  # ./arch/x86/include/asm/uaccess_64.h:71:                "0"
(runtime_const_ptr(USER_PTR_MAX)));
  #APP
  # 71 "./arch/x86/include/asm/uaccess_64.h" 1
        mov $81985529216486895,%rax     #, __ret
  1:
  .pushsection runtime_ptr_USER_PTR_MAX,"a"
        .long 1b - 8 - .        #
        .popsection
  # 0 "" 2
  # lib/strncpy_from_user.c:114: {
  #NO_APP
        pushq   %rbx    #
        movq    %rdi, %r9       # tmp149, dst
        movq    %rdx, %r11      # tmp151, count
  # ./arch/x86/include/asm/uaccess_64.h:67:       asm("cmp %1,%0\n\t"
  #APP
  # 67 "./arch/x86/include/asm/uaccess_64.h" 1
        cmp %rsi,%rax   # src, mask
        sbb %rax,%rax   # mask
  # 0 "" 2
  # ./arch/x86/include/asm/uaccess_64.h:72:       return (__force void
__user *)(mask | (__force unsigned long)ptr);
  #NO_APP
        orq     %rax, %rsi      # mask, _44

so you actually see gcc filling in variable names etc (well "variable
names" may be a bit generous: "_44" is a pseudo for the new value of
src, but that's just how compilers are with SSA - assignments create a
whole new temporary).

So legibility is very much in the eye of the beholder. You have to be
pretty damn used to looking at the generated asm to find any of this
even remotely legible.

                Linus
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Borislav Petkov 1 month ago
On Thu, Oct 24, 2024 at 11:13:35AM -0700, Linus Torvalds wrote:
> Actually, I should also just remove the "or" and move that into C
> code. It will allow the compiler to decide which way it wants to do
> the bitwise 'or', which means that the compiler can pick whichever
> output register is a better choice.
>
> Probably never matters in practice, but leaving decisions like that to
> the compiler and avoiding one more fixed asm instruction is a good
> thing.

Yap, that's the impression I've got too from talking to compiler folks over
the years.

> It does result in a few more casts on the C side, since you can't just
> do bitwise 'or' on a pointer, but I think it's still the right thing
> to do. So that thing becomes
> 
>   static inline void __user *mask_user_address(const void __user *ptr)
>   {
>         unsigned long mask;
>         asm("cmp %1,%0\n\t"
>             "sbb %0,%0"
>                 :"=r" (mask)
>                 :"r" (ptr),
>                  "0" (runtime_const_ptr(USER_PTR_MAX)));
>         return (__force void __user *)(mask | (__force unsigned long)ptr);
>   }
> 
> which I'm certainly not claiming is a thing of beauty,

Bah, 2 insns in asm and an OR in C code? That's fine.

> but the generated code looks ok if you just ignore the #APP/#NOAPP noise:
> 
>   # ./arch/x86/include/asm/uaccess_64.h:71:                "0"
> (runtime_const_ptr(USER_PTR_MAX)));
>   #APP
>   # 71 "./arch/x86/include/asm/uaccess_64.h" 1
>         mov $81985529216486895,%rax     #, __ret
>   1:
>   .pushsection runtime_ptr_USER_PTR_MAX,"a"
>         .long 1b - 8 - .        #
>         .popsection
>   # 0 "" 2
>   # lib/strncpy_from_user.c:114: {
>   #NO_APP
>         pushq   %rbx    #
>         movq    %rdi, %r9       # tmp149, dst
>         movq    %rdx, %r11      # tmp151, count
>   # ./arch/x86/include/asm/uaccess_64.h:67:       asm("cmp %1,%0\n\t"
>   #APP
>   # 67 "./arch/x86/include/asm/uaccess_64.h" 1
>         cmp %rsi,%rax   # src, mask
>         sbb %rax,%rax   # mask
>   # 0 "" 2
>   # ./arch/x86/include/asm/uaccess_64.h:72:       return (__force void
> __user *)(mask | (__force unsigned long)ptr);
>   #NO_APP
>         orq     %rax, %rsi      # mask, _44
> 
> so you actually see gcc filling in variable names etc (well "variable
> names" may be a bit generous: "_44" is a pseudo for the new value of
> src,

Hmm, how did it come up with "src"?

Oh, strncpy_from_user(). That's actually nice.

> but that's just how compilers are with SSA - assignments create a
> whole new temporary).

Right.

> So legibility is very much in the eye of the beholder. You have to be
> pretty damn used to looking at the generated asm to find any of this
> even remotely legible.

Haha, yeah. Especially if one is curious to see what the compiler spits out.

In any case, yeah, that's readable enough.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Josh Poimboeuf 1 month ago
On Thu, Oct 24, 2024 at 11:13:35AM -0700, Linus Torvalds wrote:
> It does result in a few more casts on the C side, since you can't just
> do bitwise 'or' on a pointer, but I think it's still the right thing
> to do. So that thing becomes
> 
>   static inline void __user *mask_user_address(const void __user *ptr)
>   {
>         unsigned long mask;
>         asm("cmp %1,%0\n\t"
>             "sbb %0,%0"
>                 :"=r" (mask)
>                 :"r" (ptr),
>                  "0" (runtime_const_ptr(USER_PTR_MAX)));
>         return (__force void __user *)(mask | (__force unsigned long)ptr);
>   }

On a non-related note, doesn't the inline asm need a "cc" clobber?

-- 
Josh
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Linus Torvalds 1 month ago
On Thu, 24 Oct 2024 at 12:09, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> On a non-related note, doesn't the inline asm need a "cc" clobber?

All inline asm is always a cc clobber on x86, probably because the
bulk of x86 instructions do.

Having one is certainly not _wrong_, but we typically don't have it.

(See for example all of the atomics, all bitops, and pretty much
everything else).

We do see to have added the "cc" clobber to a handful of cases - it
looks like the virtualization people do it, because it's mostly
<asm/vmware.h> and <asm/mshyperv.h>, and the ones I found in
<asm/barrier.h> were added by Michael Tsirkin, who is mostly also on
the virt side.

I'm not sure what triggered the virt people to do it, but it's
certainly not wrong, it's just neither needed nor traditional.

                  Linus
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Kirill A. Shutemov 1 month ago
On Thu, Oct 24, 2024 at 01:08:22PM +0200, Borislav Petkov wrote:
> > @@ -2389,6 +2390,15 @@ void __init arch_cpu_finalize_init(void)
> >  	alternative_instructions();
> >  
> >  	if (IS_ENABLED(CONFIG_X86_64)) {
> > +		unsigned long USER_PTR_MAX = TASK_SIZE_MAX;
> > +
> > +		/*
> > +		 * Enable this when LAM is gated on LASS support
> > +		if (cpu_feature_enabled(X86_FEATURE_LAM))
> > +			USER_PTR_MAX = (1ul << 63) - PAGE_SIZE;
> > +		 */
> > +		runtime_const_init(ptr, USER_PTR_MAX);
> 
> Looking at Documentation/arch/x86/x86_64/mm.rst, 5 level page tables define
> USR_PTR_MAX as 0x00ffffffffffffff, i.e., bits [55:0].
> 
> So I guess that USER_PTR_MAX needs to look at X86_FEATURE_LA57, no?

X86_FEATURE_LA57 is already handled inside TASK_SIZE_MAX definition.

Although, it might be worth updating end of userspace VA in mm.rst with
-PAGE_SIZE.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Borislav Petkov 1 month ago
On Thu, Oct 24, 2024 at 03:32:32PM +0300, Kirill A. Shutemov wrote:
> Although, it might be worth updating end of userspace VA in mm.rst with
> -PAGE_SIZE.

Sure, we try to keep that VA map file up-to-date as it comes in very handy
often times.

Thx.

-- 
Regards/Gruss,
    Boris.

https://people.kernel.org/tglx/notes-about-netiquette
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Josh Poimboeuf 1 month ago
On Wed, Oct 23, 2024 at 06:31:59PM -0700, Linus Torvalds wrote:
> @@ -2389,6 +2390,15 @@ void __init arch_cpu_finalize_init(void)
> +		/*
> +		 * Enable this when LAM is gated on LASS support
> +		if (cpu_feature_enabled(X86_FEATURE_LAM))
> +			USER_PTR_MAX = (1ul << 63) - PAGE_SIZE;
> +		 */

I'm probably missing something but once LAM is enabled, how wouldn't
this allow non-canonical address speculation?  i.e. when bit 47/56 is
set and 63 is cleared, would it not go untouched by mask_user_address()
and thus be speculatively interpreted by AMD as a kernel address?

Also, the comment above __access_ok() now seems obsolete:

/*
 * User pointers can have tag bits on x86-64.  This scheme tolerates
 * arbitrary values in those bits rather then masking them off.
 *
 * Enforce two rules:
 * 1. 'ptr' must be in the user half of the address space
 * 2. 'ptr+size' must not overflow into kernel addresses
 *
 * Note that addresses around the sign change are not valid addresses,
 * and will GP-fault even with LAM enabled if the sign bit is set (see
 * "CR3.LAM_SUP" that can narrow the canonicality check if we ever
 * enable it, but not remove it entirely).
 *
 * So the "overflow into kernel addresses" does not imply some sudden
 * exact boundary at the sign bit, and we can allow a lot of slop on the
 * size check.
 *
 * In fact, we could probably remove the size check entirely, since
 * any kernel accesses will be in increasing address order starting
 * at 'ptr', and even if the end might be in kernel space, we'll
 * hit the GP faults for non-canonical accesses before we ever get
 * there.
 *
 * That's a separate optimization, for now just handle the small
 * constant case.
 */

Other than the LAM question, it looks good to me and the code
generation+patching works as expected on a live system.

-- 
Josh
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Linus Torvalds 1 month ago
On Wed, 23 Oct 2024 at 23:13, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
>
> I'm probably missing something but once LAM is enabled, how wouldn't
> this allow non-canonical address speculation?

Once LAM is enabled, together with LASS, non-canonical addresses
basically don't exit.

>  i.e. when bit 47/56 is
> set and 63 is cleared, would it not go untouched by mask_user_address()
> and thus be speculatively interpreted by AMD as a kernel address?

AMD doesn't _have_ LAM. When they do, they had better not
speculatively mis-interpret addresses.

IOW, notice how the LAM enablement is *dynamic* based on cpu_feature_enabled()?

(Well, right now it's commented out because Intel needs LASS for it to
work right, but that's a separate issue).

> Also, the comment above __access_ok() now seems obsolete:
>
> /*
>  * User pointers can have tag bits on x86-64.  This scheme tolerates
>  * arbitrary values in those bits rather then masking them off.

No. The comment is still correct. The scheme tolerates exactly the LAM
kind of hardware-based address masking.

The difference is just that the constant is now boot-time dynamic, so
if LAM isn't there, the whole tag bit issue goes away, of course.

>  * Enforce two rules:
>  * 1. 'ptr' must be in the user half of the address space
>  * 2. 'ptr+size' must not overflow into kernel addresses

.. except the "user half" should probably be "user part".

                    Linus
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Kirill A. Shutemov 1 month ago
On Thu, Oct 24, 2024 at 10:35:33AM -0700, Linus Torvalds wrote:
> On Wed, 23 Oct 2024 at 23:13, Josh Poimboeuf <jpoimboe@kernel.org> wrote:
> >
> > I'm probably missing something but once LAM is enabled, how wouldn't
> > this allow non-canonical address speculation?
> 
> Once LAM is enabled, together with LASS, non-canonical addresses
> basically don't exit.

That's not true.

With LAM, canonically check is relaxed to bit 63 == bit 47/56.

I try to confirm internally that we don't speculate past this relaxed
canonically check. I believe we don't, but I want to double-check.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Josh Poimboeuf 1 month ago
On Thu, Oct 24, 2024 at 10:35:33AM -0700, Linus Torvalds wrote:
> >  i.e. when bit 47/56 is
> > set and 63 is cleared, would it not go untouched by mask_user_address()
> > and thus be speculatively interpreted by AMD as a kernel address?
> 
> AMD doesn't _have_ LAM. When they do, they had better not
> speculatively mis-interpret addresses.

Ok.  I was thinking AMD had its own version of LAM, though all I can
find is UAI which is actually quite different since it ignores bit 63
altogether (and isn't supported in Linux anyway).

> > Also, the comment above __access_ok() now seems obsolete:
> >
> > /*
> >  * User pointers can have tag bits on x86-64.  This scheme tolerates
> >  * arbitrary values in those bits rather then masking them off.
> 
> No. The comment is still correct. The scheme tolerates exactly the LAM
> kind of hardware-based address masking.

The comment doesn't seem right to me at all.

With LAM enabled, USER_PTR_MAX is PAGE_SIZE away from the sign change
boundary.  So __access_ok() no longer has size check slop and that whole
discussion about the sign change boundary can go away.

AFAICT #GP can only happen when LAM is enabled and bit 47 (or 56 for
LA57) doesn't match bit 63.

-- 
Josh
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Andrew Cooper 1 month ago
On 24/10/2024 7:47 pm, Josh Poimboeuf wrote:
> On Thu, Oct 24, 2024 at 10:35:33AM -0700, Linus Torvalds wrote:
>>>  i.e. when bit 47/56 is
>>> set and 63 is cleared, would it not go untouched by mask_user_address()
>>> and thus be speculatively interpreted by AMD as a kernel address?
>> AMD doesn't _have_ LAM. When they do, they had better not
>> speculatively mis-interpret addresses.
> Ok.  I was thinking AMD had its own version of LAM, though all I can
> find is UAI which is actually quite different since it ignores bit 63
> altogether (and isn't supported in Linux anyway).

Yes, UAI is AMD's offering to parallel LAM.

Perhaps the funniest aspect of UAI is that it intentionally doesn't
apply (i.e. no ignoring) for stack-relative accesses.

Now consider what happens when frame pointers are turned off and the
compiler is free to use %rbp for other things...

And then realise that you can fix this mess in 32bit with an explicit
%ds segment override, but you can't in 64bit.

In 64bit mode, most segments prefixes are ignored and %ds does not
override the default-%ss-ness of %rbp/%rsp-relative accesses.

~Andrew
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Kirill A. Shutemov 1 month ago
On Wed, Oct 23, 2024 at 11:13:00PM -0700, Josh Poimboeuf wrote:
> On Wed, Oct 23, 2024 at 06:31:59PM -0700, Linus Torvalds wrote:
> > @@ -2389,6 +2390,15 @@ void __init arch_cpu_finalize_init(void)
> > +		/*
> > +		 * Enable this when LAM is gated on LASS support
> > +		if (cpu_feature_enabled(X86_FEATURE_LAM))
> > +			USER_PTR_MAX = (1ul << 63) - PAGE_SIZE;
> > +		 */
> 
> I'm probably missing something but once LAM is enabled, how wouldn't
> this allow non-canonical address speculation?  i.e. when bit 47/56 is
> set and 63 is cleared, would it not go untouched by mask_user_address()
> and thus be speculatively interpreted by AMD as a kernel address?

CPU with LAM enabled enforces bit 63 to be equal bit 47/56 and raises #GP
otherwise.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov
Re: [PATCH] x86: fix user address masking non-canonical speculation issue
Posted by Josh Poimboeuf 1 month ago
On Thu, Oct 24, 2024 at 05:02:42PM +0300, Kirill A. Shutemov wrote:
> On Wed, Oct 23, 2024 at 11:13:00PM -0700, Josh Poimboeuf wrote:
> > On Wed, Oct 23, 2024 at 06:31:59PM -0700, Linus Torvalds wrote:
> > > @@ -2389,6 +2390,15 @@ void __init arch_cpu_finalize_init(void)
> > > +		/*
> > > +		 * Enable this when LAM is gated on LASS support
> > > +		if (cpu_feature_enabled(X86_FEATURE_LAM))
> > > +			USER_PTR_MAX = (1ul << 63) - PAGE_SIZE;
> > > +		 */
> > 
> > I'm probably missing something but once LAM is enabled, how wouldn't
> > this allow non-canonical address speculation?  i.e. when bit 47/56 is
> > set and 63 is cleared, would it not go untouched by mask_user_address()
> > and thus be speculatively interpreted by AMD as a kernel address?
> 
> CPU with LAM enabled enforces bit 63 to be equal bit 47/56 and raises #GP
> otherwise.

Right, but I'm asking about the speculation bug which happens before the
exception (and which prompted this patch in the first place).

-- 
Josh