[PATCH V2] x86/sev: Mark the code returning to user space as syscall gap

Lai Jiangshan posted 1 patch 4 years ago
arch/x86/entry/entry_64.S        | 2 ++
arch/x86/entry/entry_64_compat.S | 2 ++
arch/x86/include/asm/proto.h     | 4 ++++
arch/x86/include/asm/ptrace.h    | 4 ++++
4 files changed, 12 insertions(+)
[PATCH V2] x86/sev: Mark the code returning to user space as syscall gap
Posted by Lai Jiangshan 4 years ago
From: Lai Jiangshan <jiangshan.ljs@antgroup.com>

When returning to user space, the %rsp is user controlled value.

If it is SNP-guest and the hypervisor decides to mess with the code-page
for this path while a CPU is executing it.  This will cause a #VC on
that CPU and that could hit in the syscall return path and mislead
the #VC handler.

So make ip_within_syscall_gap() return true in this case.

Cc: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
---
[V1]: https://lore.kernel.org/lkml/20211213042215.3096-4-jiangshanlai@gmail.com/

Changed from V1:
	Update changelog.

 arch/x86/entry/entry_64.S        | 2 ++
 arch/x86/entry/entry_64_compat.S | 2 ++
 arch/x86/include/asm/proto.h     | 4 ++++
 arch/x86/include/asm/ptrace.h    | 4 ++++
 4 files changed, 12 insertions(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 4faac48ebec5..4f678b6045cd 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -215,8 +215,10 @@ syscall_return_via_sysret:
 
 	popq	%rdi
 	popq	%rsp
+SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
 	swapgs
 	sysretq
+SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
 SYM_CODE_END(entry_SYSCALL_64)
 
 /*
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 4fdb007cddbd..3c0e14960e2b 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -297,6 +297,7 @@ sysret32_from_system_call:
 	 * code.  We zero R8-R10 to avoid info leaks.
          */
 	movq	RSP-ORIG_RAX(%rsp), %rsp
+SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
 
 	/*
 	 * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
@@ -314,6 +315,7 @@ sysret32_from_system_call:
 	xorl	%r10d, %r10d
 	swapgs
 	sysretl
+SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
 SYM_CODE_END(entry_SYSCALL_compat)
 
 /*
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 0f899c8d7a4e..647d71535ce3 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -13,6 +13,8 @@ void syscall_init(void);
 #ifdef CONFIG_X86_64
 void entry_SYSCALL_64(void);
 void entry_SYSCALL_64_safe_stack(void);
+void entry_SYSRETQ_unsafe_stack(void);
+void entry_SYSRETQ_end(void);
 long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
 #endif
 
@@ -28,6 +30,8 @@ void entry_SYSENTER_compat(void);
 void __end_entry_SYSENTER_compat(void);
 void entry_SYSCALL_compat(void);
 void entry_SYSCALL_compat_safe_stack(void);
+void entry_SYSRETL_compat_unsafe_stack(void);
+void entry_SYSRETL_compat_end(void);
 void entry_INT80_compat(void);
 #ifdef CONFIG_XEN_PV
 void xen_entry_INT80_compat(void);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 4357e0f2cd5f..f4db78b09c8f 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -186,9 +186,13 @@ static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
 	bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
 		    regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack);
 
+	ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack &&
+		      regs->ip <  (unsigned long)entry_SYSRETQ_end);
 #ifdef CONFIG_IA32_EMULATION
 	ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
 		      regs->ip <  (unsigned long)entry_SYSCALL_compat_safe_stack);
+	ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack &&
+		      regs->ip <  (unsigned long)entry_SYSRETL_compat_end);
 #endif
 
 	return ret;
-- 
2.19.1.6.gb485710b
Re: [PATCH V2] x86/sev: Mark the code returning to user space as syscall gap
Posted by Joerg Roedel 3 years, 12 months ago
Hi,

On Tue, Apr 12, 2022 at 08:49:08PM +0800, Lai Jiangshan wrote:
> From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
> 
> When returning to user space, the %rsp is user controlled value.
> 
> If it is SNP-guest and the hypervisor decides to mess with the code-page
> for this path while a CPU is executing it.  This will cause a #VC on
> that CPU and that could hit in the syscall return path and mislead
> the #VC handler.
> 
> So make ip_within_syscall_gap() return true in this case.

With the SNP guest patches in tip-tree I think it actually becomes
possible that a #VC exception hits in these parts of the execution
stream. It requires good timing by the attacker, but it is not
impossible. Therefore:

Acked-by: Joerg Roedel <jroedel@suse.de>

-- 
Jörg Rödel
jroedel@suse.de

SUSE Software Solutions Germany GmbH
Maxfeldstr. 5
90409 Nürnberg
Germany
 
(HRB 36809, AG Nürnberg)
Geschäftsführer: Ivo Totev
Re: [PATCH V2] x86/sev: Mark the code returning to user space as syscall gap
Posted by Lai Jiangshan 4 years ago
On Tue, Apr 12, 2022 at 8:48 PM Lai Jiangshan <jiangshanlai@gmail.com> wrote:
>
> From: Lai Jiangshan <jiangshan.ljs@antgroup.com>
>
> When returning to user space, the %rsp is user controlled value.
>
> If it is SNP-guest and the hypervisor decides to mess with the code-page
> for this path while a CPU is executing it.  This will cause a #VC on
> that CPU and that could hit in the syscall return path and mislead
> the #VC handler.
>
> So make ip_within_syscall_gap() return true in this case.
>
> Cc: Joerg Roedel <jroedel@suse.de>
> Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>



Hello

Ping.

Thanks
Lai
[tip: x86/sev] x86/sev: Mark the code returning to user space as syscall gap
Posted by tip-bot2 for Lai Jiangshan 3 years, 11 months ago
The following commit has been merged into the x86/sev branch of tip:

Commit-ID:     47f33de4aafb2f5e43d480d590a939d0f1d566a9
Gitweb:        https://git.kernel.org/tip/47f33de4aafb2f5e43d480d590a939d0f1d566a9
Author:        Lai Jiangshan <jiangshan.ljs@antgroup.com>
AuthorDate:    Tue, 12 Apr 2022 20:49:08 +08:00
Committer:     Borislav Petkov <bp@suse.de>
CommitterDate: Thu, 19 May 2022 10:56:46 +02:00

x86/sev: Mark the code returning to user space as syscall gap

When returning to user space, %rsp is user-controlled value.

If it is a SNP-guest and the hypervisor decides to mess with the
code-page for this path while a CPU is executing it, a potential #VC
could hit in the syscall return path and mislead the #VC handler.

So make ip_within_syscall_gap() return true in this case.

Signed-off-by: Lai Jiangshan <jiangshan.ljs@antgroup.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Link: https://lore.kernel.org/r/20220412124909.10467-1-jiangshanlai@gmail.com
---
 arch/x86/entry/entry_64.S        | 2 ++
 arch/x86/entry/entry_64_compat.S | 2 ++
 arch/x86/include/asm/proto.h     | 4 ++++
 arch/x86/include/asm/ptrace.h    | 4 ++++
 4 files changed, 12 insertions(+)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index f7bd800..2fd8a5c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -215,8 +215,10 @@ syscall_return_via_sysret:
 
 	popq	%rdi
 	popq	%rsp
+SYM_INNER_LABEL(entry_SYSRETQ_unsafe_stack, SYM_L_GLOBAL)
 	swapgs
 	sysretq
+SYM_INNER_LABEL(entry_SYSRETQ_end, SYM_L_GLOBAL)
 SYM_CODE_END(entry_SYSCALL_64)
 
 /*
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index 4fdb007..3c0e149 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -297,6 +297,7 @@ sysret32_from_system_call:
 	 * code.  We zero R8-R10 to avoid info leaks.
          */
 	movq	RSP-ORIG_RAX(%rsp), %rsp
+SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL)
 
 	/*
 	 * The original userspace %rsp (RSP-ORIG_RAX(%rsp)) is stored
@@ -314,6 +315,7 @@ sysret32_from_system_call:
 	xorl	%r10d, %r10d
 	swapgs
 	sysretl
+SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL)
 SYM_CODE_END(entry_SYSCALL_compat)
 
 /*
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index feed36d..f042cfc 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -13,6 +13,8 @@ void syscall_init(void);
 #ifdef CONFIG_X86_64
 void entry_SYSCALL_64(void);
 void entry_SYSCALL_64_safe_stack(void);
+void entry_SYSRETQ_unsafe_stack(void);
+void entry_SYSRETQ_end(void);
 long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
 #endif
 
@@ -28,6 +30,8 @@ void entry_SYSENTER_compat(void);
 void __end_entry_SYSENTER_compat(void);
 void entry_SYSCALL_compat(void);
 void entry_SYSCALL_compat_safe_stack(void);
+void entry_SYSRETL_compat_unsafe_stack(void);
+void entry_SYSRETL_compat_end(void);
 void entry_INT80_compat(void);
 #ifdef CONFIG_XEN_PV
 void xen_entry_INT80_compat(void);
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 4357e0f..f4db78b 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -186,9 +186,13 @@ static __always_inline bool ip_within_syscall_gap(struct pt_regs *regs)
 	bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
 		    regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack);
 
+	ret = ret || (regs->ip >= (unsigned long)entry_SYSRETQ_unsafe_stack &&
+		      regs->ip <  (unsigned long)entry_SYSRETQ_end);
 #ifdef CONFIG_IA32_EMULATION
 	ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
 		      regs->ip <  (unsigned long)entry_SYSCALL_compat_safe_stack);
+	ret = ret || (regs->ip >= (unsigned long)entry_SYSRETL_compat_unsafe_stack &&
+		      regs->ip <  (unsigned long)entry_SYSRETL_compat_end);
 #endif
 
 	return ret;