[v1] Support userspace hypercalls for TDX

[PATCH 1/2] x86/tdx: Add prctl to allow userlevel TDX hypercalls

Posted by Tim Merrifield 1 year, 7 months ago

Add a new prctl option to enable/disable user-level hypercalls when
running in a confidential VM. Add support for checking this flag on
VMCALL #VE for TDX and transfer control to a hypervisor
vendor-specific handler.

Signed-off-by: Tim Merrifield <tim.merrifield@broadcom.com>
---
 arch/x86/coco/tdx/tdx.c            | 18 ++++++++++++++++++
 arch/x86/include/asm/thread_info.h |  2 ++
 arch/x86/include/asm/x86_init.h    |  1 +
 arch/x86/include/uapi/asm/prctl.h  |  3 +++
 arch/x86/kernel/process.c          | 20 ++++++++++++++++++++
 5 files changed, 44 insertions(+)

diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index ef8ec2425998..23111e4c1f91 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -239,6 +239,7 @@ static int ve_instr_len(struct ve_info *ve)
 	case EXIT_REASON_MSR_WRITE:
 	case EXIT_REASON_CPUID:
 	case EXIT_REASON_IO_INSTRUCTION:
+	case EXIT_REASON_VMCALL:
 		/* It is safe to use ve->instr_len for #VE due instructions */
 		return ve->instr_len;
 	case EXIT_REASON_EPT_VIOLATION:
@@ -635,6 +636,21 @@ void tdx_get_ve_info(struct ve_info *ve)
 	ve->instr_info  = upper_32_bits(args.r10);
 }
 
+/*
+ * Handle user-initiated, hypervisor-specific VMCALLs.
+ */
+static int handle_user_vmcall(struct pt_regs *regs, struct ve_info *ve)
+{
+	if (x86_platform.hyper.tdx_hcall &&
+	    test_thread_flag(TIF_COCO_USER_HCALL)) {
+		if (!x86_platform.hyper.tdx_hcall(regs))
+			return -EIO;
+		return ve_instr_len(ve);
+	} else {
+		return -EOPNOTSUPP;
+	}
+}
+
 /*
  * Handle the user initiated #VE.
  *
@@ -646,6 +662,8 @@ static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
 	switch (ve->exit_reason) {
 	case EXIT_REASON_CPUID:
 		return handle_cpuid(regs, ve);
+	case EXIT_REASON_VMCALL:
+		return handle_user_vmcall(regs, ve);
 	default:
 		pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
 		return -EIO;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 12da7dfd5ef1..9f69a26a5e68 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -106,6 +106,7 @@ struct thread_info {
 #define TIF_BLOCKSTEP		25	/* set when we want DEBUGCTLMSR_BTF */
 #define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
 #define TIF_ADDR32		29	/* 32-bit address space on 64 bits */
+#define TIF_COCO_USER_HCALL     30      /* Userland hypercalls allowed in CoCo */
 
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
 #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
@@ -128,6 +129,7 @@ struct thread_info {
 #define _TIF_BLOCKSTEP		(1 << TIF_BLOCKSTEP)
 #define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
 #define _TIF_ADDR32		(1 << TIF_ADDR32)
+#define _TIF_COCO_USER_HCALL    (1 << TIF_COCO_USER_HCALL)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW_BASE					\
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 213cf5379a5a..52975bedd33e 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -282,6 +282,7 @@ struct x86_hyper_runtime {
 	void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs);
 	bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs);
 	bool (*is_private_mmio)(u64 addr);
+	bool (*tdx_hcall)(struct pt_regs *regs);
 };
 
 /**
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 384e2cc6ac19..7fa289a1815b 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -16,6 +16,9 @@
 #define ARCH_GET_XCOMP_GUEST_PERM	0x1024
 #define ARCH_REQ_XCOMP_GUEST_PERM	0x1025
 
+#define ARCH_GET_COCO_USER_HCALL        0x1030
+#define ARCH_SET_COCO_USER_HCALL        0x1031
+
 #define ARCH_XCOMP_TILECFG		17
 #define ARCH_XCOMP_TILEDATA		18
 
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1b3d417cd6c4..16f8ab6cde2e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1039,6 +1039,21 @@ unsigned long __get_wchan(struct task_struct *p)
 	return addr;
 }
 
+static int get_coco_user_hcall_mode(void)
+{
+	return !test_thread_flag(TIF_COCO_USER_HCALL);
+}
+
+static int set_coco_user_hcall_mode(unsigned long enabled)
+{
+	if (enabled)
+		set_thread_flag(TIF_COCO_USER_HCALL);
+	else
+		clear_thread_flag(TIF_COCO_USER_HCALL);
+
+	return 0;
+}
+
 long do_arch_prctl_common(int option, unsigned long arg2)
 {
 	switch (option) {
@@ -1052,6 +1067,11 @@ long do_arch_prctl_common(int option, unsigned long arg2)
 	case ARCH_GET_XCOMP_GUEST_PERM:
 	case ARCH_REQ_XCOMP_GUEST_PERM:
 		return fpu_xstate_prctl(option, arg2);
+	case ARCH_GET_COCO_USER_HCALL:
+		return get_coco_user_hcall_mode();
+	case ARCH_SET_COCO_USER_HCALL:
+		return set_coco_user_hcall_mode(arg2);
+
 	}
 
 	return -EINVAL;
-- 
2.40.1

Re: [PATCH 1/2] x86/tdx: Add prctl to allow userlevel TDX hypercalls

Posted by Kirill A . Shutemov 1 year, 7 months ago

On Wed, Jul 03, 2024 at 11:36:00PM +0000, Tim Merrifield wrote:
> Add a new prctl option to enable/disable user-level hypercalls when
> running in a confidential VM. Add support for checking this flag on
> VMCALL #VE for TDX and transfer control to a hypervisor
> vendor-specific handler.
> 
> Signed-off-by: Tim Merrifield <tim.merrifield@broadcom.com>
> ---
>  arch/x86/coco/tdx/tdx.c            | 18 ++++++++++++++++++
>  arch/x86/include/asm/thread_info.h |  2 ++
>  arch/x86/include/asm/x86_init.h    |  1 +
>  arch/x86/include/uapi/asm/prctl.h  |  3 +++
>  arch/x86/kernel/process.c          | 20 ++++++++++++++++++++
>  5 files changed, 44 insertions(+)
> 
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index ef8ec2425998..23111e4c1f91 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -239,6 +239,7 @@ static int ve_instr_len(struct ve_info *ve)
>  	case EXIT_REASON_MSR_WRITE:
>  	case EXIT_REASON_CPUID:
>  	case EXIT_REASON_IO_INSTRUCTION:
> +	case EXIT_REASON_VMCALL:
>  		/* It is safe to use ve->instr_len for #VE due instructions */
>  		return ve->instr_len;
>  	case EXIT_REASON_EPT_VIOLATION:
> @@ -635,6 +636,21 @@ void tdx_get_ve_info(struct ve_info *ve)
>  	ve->instr_info  = upper_32_bits(args.r10);
>  }
>  
> +/*
> + * Handle user-initiated, hypervisor-specific VMCALLs.
> + */
> +static int handle_user_vmcall(struct pt_regs *regs, struct ve_info *ve)
> +{
> +	if (x86_platform.hyper.tdx_hcall &&
> +	    test_thread_flag(TIF_COCO_USER_HCALL)) {
> +		if (!x86_platform.hyper.tdx_hcall(regs))
> +			return -EIO;
> +		return ve_instr_len(ve);
> +	} else {
> +		return -EOPNOTSUPP;
> +	}

Maybe something like this would be more readable:

	if (!x86_platform.hyper.tdx_hcall)
		return -EOPNOTSUPP;

	if (!test_thread_flag(TIF_COCO_USER_HCALL))
		return -EOPNOTSUPP;

	if (!x86_platform.hyper.tdx_hcall(regs))
		return -EIO;

	return ve_instr_len(ve);

BTW, do we want tdx_hcall() to return errno instead of bool?

> +}
> +
>  /*
>   * Handle the user initiated #VE.
>   *
> @@ -646,6 +662,8 @@ static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
>  	switch (ve->exit_reason) {
>  	case EXIT_REASON_CPUID:
>  		return handle_cpuid(regs, ve);
> +	case EXIT_REASON_VMCALL:
> +		return handle_user_vmcall(regs, ve);
>  	default:
>  		pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
>  		return -EIO;
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index 12da7dfd5ef1..9f69a26a5e68 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -106,6 +106,7 @@ struct thread_info {
>  #define TIF_BLOCKSTEP		25	/* set when we want DEBUGCTLMSR_BTF */
>  #define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
>  #define TIF_ADDR32		29	/* 32-bit address space on 64 bits */
> +#define TIF_COCO_USER_HCALL     30      /* Userland hypercalls allowed in CoCo */

Tabs instead of spaces for alignment, please.

>  #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
>  #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
> @@ -128,6 +129,7 @@ struct thread_info {
>  #define _TIF_BLOCKSTEP		(1 << TIF_BLOCKSTEP)
>  #define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
>  #define _TIF_ADDR32		(1 << TIF_ADDR32)
> +#define _TIF_COCO_USER_HCALL    (1 << TIF_COCO_USER_HCALL)

Ditto.

>  
>  /* flags to check in __switch_to() */
>  #define _TIF_WORK_CTXSW_BASE					\
> diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
> index 213cf5379a5a..52975bedd33e 100644
> --- a/arch/x86/include/asm/x86_init.h
> +++ b/arch/x86/include/asm/x86_init.h
> @@ -282,6 +282,7 @@ struct x86_hyper_runtime {
>  	void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs);
>  	bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs);
>  	bool (*is_private_mmio)(u64 addr);
> +	bool (*tdx_hcall)(struct pt_regs *regs);
>  };
>  
>  /**
> diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
> index 384e2cc6ac19..7fa289a1815b 100644
> --- a/arch/x86/include/uapi/asm/prctl.h
> +++ b/arch/x86/include/uapi/asm/prctl.h
> @@ -16,6 +16,9 @@
>  #define ARCH_GET_XCOMP_GUEST_PERM	0x1024
>  #define ARCH_REQ_XCOMP_GUEST_PERM	0x1025
>  
> +#define ARCH_GET_COCO_USER_HCALL        0x1030
> +#define ARCH_SET_COCO_USER_HCALL        0x1031
> +

Ditto.

>  #define ARCH_XCOMP_TILECFG		17
>  #define ARCH_XCOMP_TILEDATA		18
>  
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 1b3d417cd6c4..16f8ab6cde2e 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -1039,6 +1039,21 @@ unsigned long __get_wchan(struct task_struct *p)
>  	return addr;
>  }
>  
> +static int get_coco_user_hcall_mode(void)
> +{
> +	return !test_thread_flag(TIF_COCO_USER_HCALL);
> +}
> +
> +static int set_coco_user_hcall_mode(unsigned long enabled)
> +{
> +	if (enabled)
> +		set_thread_flag(TIF_COCO_USER_HCALL);
> +	else
> +		clear_thread_flag(TIF_COCO_USER_HCALL);
> +
> +	return 0;
> +}
> +

Hm. Per-thread flag is odd. I think it should be per-process.

>  long do_arch_prctl_common(int option, unsigned long arg2)
>  {
>  	switch (option) {
> @@ -1052,6 +1067,11 @@ long do_arch_prctl_common(int option, unsigned long arg2)
>  	case ARCH_GET_XCOMP_GUEST_PERM:
>  	case ARCH_REQ_XCOMP_GUEST_PERM:
>  		return fpu_xstate_prctl(option, arg2);
> +	case ARCH_GET_COCO_USER_HCALL:
> +		return get_coco_user_hcall_mode();
> +	case ARCH_SET_COCO_USER_HCALL:
> +		return set_coco_user_hcall_mode(arg2);
> +
>  	}
>  
>  	return -EINVAL;
> -- 
> 2.40.1
> 

-- 
  Kiryl Shutsemau / Kirill A. Shutemov

Re: [PATCH 1/2] x86/tdx: Add prctl to allow userlevel TDX hypercalls

Posted by Tim Merrifield 1 year, 6 months ago

Thanks for the review, Kirill.

On Mon, Jul 08, 2024 at 03:19:54PM +0300, Kirill A . Shutemov wrote:
> Hm. Per-thread flag is odd. I think it should be per-process.

This is the only point I might need some clarification on. I agree
there doesn't seem to be much value in allowing per-thread control,
but I don't see any precedence for setting per-process flags through
arch_prctl or similar interfaces. Am I missing something?

Re: [PATCH 1/2] x86/tdx: Add prctl to allow userlevel TDX hypercalls

Posted by Kirill A . Shutemov 1 year, 6 months ago

On Mon, Jul 22, 2024 at 10:04:40PM -0700, Tim Merrifield wrote:
> 
> Thanks for the review, Kirill.
> 
> On Mon, Jul 08, 2024 at 03:19:54PM +0300, Kirill A . Shutemov wrote:
> > Hm. Per-thread flag is odd. I think it should be per-process.
> 
> This is the only point I might need some clarification on. I agree
> there doesn't seem to be much value in allowing per-thread control,
> but I don't see any precedence for setting per-process flags through
> arch_prctl or similar interfaces. Am I missing something?

LAM is per-process. But it can only be enabled while the process has only
one thread and locks on second thread spawn. See MM_CONTEXT_LOCK_LAM.

-- 
  Kiryl Shutsemau / Kirill A. Shutemov