Add a new prctl option to enable/disable user-level hypercalls when
running in a confidential VM. Add support for checking this flag on
VMCALL #VE for TDX and transfer control to a hypervisor
vendor-specific handler.
Signed-off-by: Tim Merrifield <tim.merrifield@broadcom.com>
---
arch/x86/coco/tdx/tdx.c | 18 ++++++++++++++++++
arch/x86/include/asm/thread_info.h | 2 ++
arch/x86/include/asm/x86_init.h | 1 +
arch/x86/include/uapi/asm/prctl.h | 3 +++
arch/x86/kernel/process.c | 20 ++++++++++++++++++++
5 files changed, 44 insertions(+)
diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
index ef8ec2425998..23111e4c1f91 100644
--- a/arch/x86/coco/tdx/tdx.c
+++ b/arch/x86/coco/tdx/tdx.c
@@ -239,6 +239,7 @@ static int ve_instr_len(struct ve_info *ve)
case EXIT_REASON_MSR_WRITE:
case EXIT_REASON_CPUID:
case EXIT_REASON_IO_INSTRUCTION:
+ case EXIT_REASON_VMCALL:
/* It is safe to use ve->instr_len for #VE due instructions */
return ve->instr_len;
case EXIT_REASON_EPT_VIOLATION:
@@ -635,6 +636,21 @@ void tdx_get_ve_info(struct ve_info *ve)
ve->instr_info = upper_32_bits(args.r10);
}
+/*
+ * Handle user-initiated, hypervisor-specific VMCALLs.
+ */
+static int handle_user_vmcall(struct pt_regs *regs, struct ve_info *ve)
+{
+ if (x86_platform.hyper.tdx_hcall &&
+ test_thread_flag(TIF_COCO_USER_HCALL)) {
+ if (!x86_platform.hyper.tdx_hcall(regs))
+ return -EIO;
+ return ve_instr_len(ve);
+ } else {
+ return -EOPNOTSUPP;
+ }
+}
+
/*
* Handle the user initiated #VE.
*
@@ -646,6 +662,8 @@ static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
switch (ve->exit_reason) {
case EXIT_REASON_CPUID:
return handle_cpuid(regs, ve);
+ case EXIT_REASON_VMCALL:
+ return handle_user_vmcall(regs, ve);
default:
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
return -EIO;
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 12da7dfd5ef1..9f69a26a5e68 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -106,6 +106,7 @@ struct thread_info {
#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
#define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
#define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
+#define TIF_COCO_USER_HCALL 30 /* Userland hypercalls allowed in CoCo */
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
@@ -128,6 +129,7 @@ struct thread_info {
#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
#define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
#define _TIF_ADDR32 (1 << TIF_ADDR32)
+#define _TIF_COCO_USER_HCALL (1 << TIF_COCO_USER_HCALL)
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW_BASE \
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 213cf5379a5a..52975bedd33e 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -282,6 +282,7 @@ struct x86_hyper_runtime {
void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs);
bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs);
bool (*is_private_mmio)(u64 addr);
+ bool (*tdx_hcall)(struct pt_regs *regs);
};
/**
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 384e2cc6ac19..7fa289a1815b 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -16,6 +16,9 @@
#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
+#define ARCH_GET_COCO_USER_HCALL 0x1030
+#define ARCH_SET_COCO_USER_HCALL 0x1031
+
#define ARCH_XCOMP_TILECFG 17
#define ARCH_XCOMP_TILEDATA 18
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 1b3d417cd6c4..16f8ab6cde2e 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -1039,6 +1039,21 @@ unsigned long __get_wchan(struct task_struct *p)
return addr;
}
+static int get_coco_user_hcall_mode(void)
+{
+ return !test_thread_flag(TIF_COCO_USER_HCALL);
+}
+
+static int set_coco_user_hcall_mode(unsigned long enabled)
+{
+ if (enabled)
+ set_thread_flag(TIF_COCO_USER_HCALL);
+ else
+ clear_thread_flag(TIF_COCO_USER_HCALL);
+
+ return 0;
+}
+
long do_arch_prctl_common(int option, unsigned long arg2)
{
switch (option) {
@@ -1052,6 +1067,11 @@ long do_arch_prctl_common(int option, unsigned long arg2)
case ARCH_GET_XCOMP_GUEST_PERM:
case ARCH_REQ_XCOMP_GUEST_PERM:
return fpu_xstate_prctl(option, arg2);
+ case ARCH_GET_COCO_USER_HCALL:
+ return get_coco_user_hcall_mode();
+ case ARCH_SET_COCO_USER_HCALL:
+ return set_coco_user_hcall_mode(arg2);
+
}
return -EINVAL;
--
2.40.1
On Wed, Jul 03, 2024 at 11:36:00PM +0000, Tim Merrifield wrote:
> Add a new prctl option to enable/disable user-level hypercalls when
> running in a confidential VM. Add support for checking this flag on
> VMCALL #VE for TDX and transfer control to a hypervisor
> vendor-specific handler.
>
> Signed-off-by: Tim Merrifield <tim.merrifield@broadcom.com>
> ---
> arch/x86/coco/tdx/tdx.c | 18 ++++++++++++++++++
> arch/x86/include/asm/thread_info.h | 2 ++
> arch/x86/include/asm/x86_init.h | 1 +
> arch/x86/include/uapi/asm/prctl.h | 3 +++
> arch/x86/kernel/process.c | 20 ++++++++++++++++++++
> 5 files changed, 44 insertions(+)
>
> diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c
> index ef8ec2425998..23111e4c1f91 100644
> --- a/arch/x86/coco/tdx/tdx.c
> +++ b/arch/x86/coco/tdx/tdx.c
> @@ -239,6 +239,7 @@ static int ve_instr_len(struct ve_info *ve)
> case EXIT_REASON_MSR_WRITE:
> case EXIT_REASON_CPUID:
> case EXIT_REASON_IO_INSTRUCTION:
> + case EXIT_REASON_VMCALL:
> /* It is safe to use ve->instr_len for #VE due instructions */
> return ve->instr_len;
> case EXIT_REASON_EPT_VIOLATION:
> @@ -635,6 +636,21 @@ void tdx_get_ve_info(struct ve_info *ve)
> ve->instr_info = upper_32_bits(args.r10);
> }
>
> +/*
> + * Handle user-initiated, hypervisor-specific VMCALLs.
> + */
> +static int handle_user_vmcall(struct pt_regs *regs, struct ve_info *ve)
> +{
> + if (x86_platform.hyper.tdx_hcall &&
> + test_thread_flag(TIF_COCO_USER_HCALL)) {
> + if (!x86_platform.hyper.tdx_hcall(regs))
> + return -EIO;
> + return ve_instr_len(ve);
> + } else {
> + return -EOPNOTSUPP;
> + }
Maybe something like this would be more readable:
if (!x86_platform.hyper.tdx_hcall)
return -EOPNOTSUPP;
if (!test_thread_flag(TIF_COCO_USER_HCALL))
return -EOPNOTSUPP;
if (!x86_platform.hyper.tdx_hcall(regs))
return -EIO;
return ve_instr_len(ve);
BTW, do we want tdx_hcall() to return errno instead of bool?
> +}
> +
> /*
> * Handle the user initiated #VE.
> *
> @@ -646,6 +662,8 @@ static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
> switch (ve->exit_reason) {
> case EXIT_REASON_CPUID:
> return handle_cpuid(regs, ve);
> + case EXIT_REASON_VMCALL:
> + return handle_user_vmcall(regs, ve);
> default:
> pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
> return -EIO;
> diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
> index 12da7dfd5ef1..9f69a26a5e68 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -106,6 +106,7 @@ struct thread_info {
> #define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */
> #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */
> #define TIF_ADDR32 29 /* 32-bit address space on 64 bits */
> +#define TIF_COCO_USER_HCALL 30 /* Userland hypercalls allowed in CoCo */
Tabs instead of spaces for alignment, please.
> #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
> #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
> @@ -128,6 +129,7 @@ struct thread_info {
> #define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP)
> #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES)
> #define _TIF_ADDR32 (1 << TIF_ADDR32)
> +#define _TIF_COCO_USER_HCALL (1 << TIF_COCO_USER_HCALL)
Ditto.
>
> /* flags to check in __switch_to() */
> #define _TIF_WORK_CTXSW_BASE \
> diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
> index 213cf5379a5a..52975bedd33e 100644
> --- a/arch/x86/include/asm/x86_init.h
> +++ b/arch/x86/include/asm/x86_init.h
> @@ -282,6 +282,7 @@ struct x86_hyper_runtime {
> void (*sev_es_hcall_prepare)(struct ghcb *ghcb, struct pt_regs *regs);
> bool (*sev_es_hcall_finish)(struct ghcb *ghcb, struct pt_regs *regs);
> bool (*is_private_mmio)(u64 addr);
> + bool (*tdx_hcall)(struct pt_regs *regs);
> };
>
> /**
> diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
> index 384e2cc6ac19..7fa289a1815b 100644
> --- a/arch/x86/include/uapi/asm/prctl.h
> +++ b/arch/x86/include/uapi/asm/prctl.h
> @@ -16,6 +16,9 @@
> #define ARCH_GET_XCOMP_GUEST_PERM 0x1024
> #define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
>
> +#define ARCH_GET_COCO_USER_HCALL 0x1030
> +#define ARCH_SET_COCO_USER_HCALL 0x1031
> +
Ditto.
> #define ARCH_XCOMP_TILECFG 17
> #define ARCH_XCOMP_TILEDATA 18
>
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 1b3d417cd6c4..16f8ab6cde2e 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -1039,6 +1039,21 @@ unsigned long __get_wchan(struct task_struct *p)
> return addr;
> }
>
> +static int get_coco_user_hcall_mode(void)
> +{
> + return !test_thread_flag(TIF_COCO_USER_HCALL);
> +}
> +
> +static int set_coco_user_hcall_mode(unsigned long enabled)
> +{
> + if (enabled)
> + set_thread_flag(TIF_COCO_USER_HCALL);
> + else
> + clear_thread_flag(TIF_COCO_USER_HCALL);
> +
> + return 0;
> +}
> +
Hm. Per-thread flag is odd. I think it should be per-process.
> long do_arch_prctl_common(int option, unsigned long arg2)
> {
> switch (option) {
> @@ -1052,6 +1067,11 @@ long do_arch_prctl_common(int option, unsigned long arg2)
> case ARCH_GET_XCOMP_GUEST_PERM:
> case ARCH_REQ_XCOMP_GUEST_PERM:
> return fpu_xstate_prctl(option, arg2);
> + case ARCH_GET_COCO_USER_HCALL:
> + return get_coco_user_hcall_mode();
> + case ARCH_SET_COCO_USER_HCALL:
> + return set_coco_user_hcall_mode(arg2);
> +
> }
>
> return -EINVAL;
> --
> 2.40.1
>
--
Kiryl Shutsemau / Kirill A. Shutemov
Thanks for the review, Kirill. On Mon, Jul 08, 2024 at 03:19:54PM +0300, Kirill A . Shutemov wrote: > Hm. Per-thread flag is odd. I think it should be per-process. This is the only point I might need some clarification on. I agree there doesn't seem to be much value in allowing per-thread control, but I don't see any precedence for setting per-process flags through arch_prctl or similar interfaces. Am I missing something?
On Mon, Jul 22, 2024 at 10:04:40PM -0700, Tim Merrifield wrote: > > Thanks for the review, Kirill. > > On Mon, Jul 08, 2024 at 03:19:54PM +0300, Kirill A . Shutemov wrote: > > Hm. Per-thread flag is odd. I think it should be per-process. > > This is the only point I might need some clarification on. I agree > there doesn't seem to be much value in allowing per-thread control, > but I don't see any precedence for setting per-process flags through > arch_prctl or similar interfaces. Am I missing something? LAM is per-process. But it can only be enabled while the process has only one thread and locks on second thread spawn. See MM_CONTEXT_LOCK_LAM. -- Kiryl Shutsemau / Kirill A. Shutemov
© 2016 - 2026 Red Hat, Inc.