From: Sean Christopherson <sean.j.christopherson@intel.com>
Implement a system-scoped ioctl to get system-wide parameters for TDX.
Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
arch/x86/include/asm/kvm-x86-ops.h | 1 +
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/include/uapi/asm/kvm.h | 48 +++++++++++++++++++++++++
arch/x86/kvm/vmx/main.c | 2 ++
arch/x86/kvm/vmx/tdx.c | 51 +++++++++++++++++++++++++++
arch/x86/kvm/vmx/x86_ops.h | 2 ++
arch/x86/kvm/x86.c | 6 ++++
tools/arch/x86/include/uapi/asm/kvm.h | 48 +++++++++++++++++++++++++
8 files changed, 159 insertions(+)
diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
index eac4b65d1b01..b46dcac078b2 100644
--- a/arch/x86/include/asm/kvm-x86-ops.h
+++ b/arch/x86/include/asm/kvm-x86-ops.h
@@ -117,6 +117,7 @@ KVM_X86_OP(enter_smm)
KVM_X86_OP(leave_smm)
KVM_X86_OP(enable_smi_window)
#endif
+KVM_X86_OP_OPTIONAL(dev_mem_enc_ioctl)
KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
KVM_X86_OP_OPTIONAL(mem_enc_register_region)
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 00c25f6ab871..49e3ca89aced 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1719,6 +1719,7 @@ struct kvm_x86_ops {
void (*enable_smi_window)(struct kvm_vcpu *vcpu);
#endif
+ int (*dev_mem_enc_ioctl)(void __user *argp);
int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 6afbfbb32d56..af4c5bd0af1c 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -562,4 +562,52 @@ struct kvm_pmu_event_filter {
#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_PROTECTED_VM 1
+/* Trust Domain eXtension sub-ioctl() commands. */
+enum kvm_tdx_cmd_id {
+ KVM_TDX_CAPABILITIES = 0,
+
+ KVM_TDX_CMD_NR_MAX,
+};
+
+struct kvm_tdx_cmd {
+ /* enum kvm_tdx_cmd_id */
+ __u32 id;
+ /* flags for sub-commend. If sub-command doesn't use this, set zero. */
+ __u32 flags;
+ /*
+ * data for each sub-command. An immediate or a pointer to the actual
+ * data in process virtual address. If sub-command doesn't use it,
+ * set zero.
+ */
+ __u64 data;
+ /*
+ * Auxiliary error code. The sub-command may return TDX SEAMCALL
+ * status code in addition to -Exxx.
+ * Defined for consistency with struct kvm_sev_cmd.
+ */
+ __u64 error;
+ /* Reserved: Defined for consistency with struct kvm_sev_cmd. */
+ __u64 unused;
+};
+
+struct kvm_tdx_cpuid_config {
+ __u32 leaf;
+ __u32 sub_leaf;
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+};
+
+struct kvm_tdx_capabilities {
+ __u64 attrs_fixed0;
+ __u64 attrs_fixed1;
+ __u64 xfam_fixed0;
+ __u64 xfam_fixed1;
+
+ __u32 nr_cpuid_configs;
+ __u32 padding;
+ struct kvm_tdx_cpuid_config cpuid_configs[0];
+};
+
#endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index c8548004802a..6a5d0c7a2950 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -201,6 +201,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
.complete_emulated_msr = kvm_complete_insn_gp,
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
+
+ .dev_mem_enc_ioctl = tdx_dev_ioctl,
};
struct kvm_x86_init_ops vt_init_ops __initdata = {
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
index e9b7aa5654e9..b59d3081d061 100644
--- a/arch/x86/kvm/vmx/tdx.c
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -21,6 +21,57 @@ int tdx_hardware_enable(void)
return tdx_cpu_enable();
}
+int tdx_dev_ioctl(void __user *argp)
+{
+ struct kvm_tdx_capabilities __user *user_caps;
+ const struct tdsysinfo_struct *tdsysinfo;
+ struct kvm_tdx_capabilities caps;
+ struct kvm_tdx_cmd cmd;
+
+ BUILD_BUG_ON(sizeof(struct kvm_tdx_cpuid_config) !=
+ sizeof(struct tdx_cpuid_config));
+
+ if (copy_from_user(&cmd, argp, sizeof(cmd)))
+ return -EFAULT;
+ if (cmd.flags || cmd.error || cmd.unused)
+ return -EINVAL;
+ /*
+ * Currently only KVM_TDX_CAPABILITIES is defined for system-scoped
+ * mem_enc_ioctl().
+ */
+ if (cmd.id != KVM_TDX_CAPABILITIES)
+ return -EINVAL;
+
+ tdsysinfo = tdx_get_sysinfo();
+ if (!tdsysinfo)
+ return -ENOTSUPP;
+
+ user_caps = (void __user *)cmd.data;
+ if (copy_from_user(&caps, user_caps, sizeof(caps)))
+ return -EFAULT;
+
+ if (caps.nr_cpuid_configs < tdsysinfo->num_cpuid_config)
+ return -E2BIG;
+
+ caps = (struct kvm_tdx_capabilities) {
+ .attrs_fixed0 = tdsysinfo->attributes_fixed0,
+ .attrs_fixed1 = tdsysinfo->attributes_fixed1,
+ .xfam_fixed0 = tdsysinfo->xfam_fixed0,
+ .xfam_fixed1 = tdsysinfo->xfam_fixed1,
+ .nr_cpuid_configs = tdsysinfo->num_cpuid_config,
+ .padding = 0,
+ };
+
+ if (copy_to_user(user_caps, &caps, sizeof(caps)))
+ return -EFAULT;
+ if (copy_to_user(user_caps->cpuid_configs, &tdsysinfo->cpuid_configs,
+ tdsysinfo->num_cpuid_config *
+ sizeof(struct tdx_cpuid_config)))
+ return -EFAULT;
+
+ return 0;
+}
+
static int __init tdx_module_setup(void)
{
const struct tdsysinfo_struct *tdsysinfo;
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index b2c74c1b5bbd..78c5537e23a1 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -141,10 +141,12 @@ void vmx_setup_mce(struct kvm_vcpu *vcpu);
int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
int tdx_hardware_enable(void);
bool tdx_is_vm_type_supported(unsigned long type);
+int tdx_dev_ioctl(void __user *argp);
#else
static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -ENOSYS; }
static inline int tdx_hardware_enable(void) { return -EOPNOTSUPP; }
static inline bool tdx_is_vm_type_supported(unsigned long type) { return false; }
+static inline int tdx_dev_ioctl(void __user *argp) { return -EOPNOTSUPP; };
#endif
#endif /* __KVM_X86_VMX_X86_OPS_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 27ab684f8374..a3dc32e33aca 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4718,6 +4718,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
r = kvm_x86_dev_has_attr(&attr);
break;
}
+ case KVM_MEMORY_ENCRYPT_OP:
+ r = -EINVAL;
+ if (!kvm_x86_ops.dev_mem_enc_ioctl)
+ goto out;
+ r = static_call(kvm_x86_dev_mem_enc_ioctl)(argp);
+ break;
default:
r = -EINVAL;
break;
diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
index 6afbfbb32d56..af4c5bd0af1c 100644
--- a/tools/arch/x86/include/uapi/asm/kvm.h
+++ b/tools/arch/x86/include/uapi/asm/kvm.h
@@ -562,4 +562,52 @@ struct kvm_pmu_event_filter {
#define KVM_X86_DEFAULT_VM 0
#define KVM_X86_PROTECTED_VM 1
+/* Trust Domain eXtension sub-ioctl() commands. */
+enum kvm_tdx_cmd_id {
+ KVM_TDX_CAPABILITIES = 0,
+
+ KVM_TDX_CMD_NR_MAX,
+};
+
+struct kvm_tdx_cmd {
+ /* enum kvm_tdx_cmd_id */
+ __u32 id;
+ /* flags for sub-commend. If sub-command doesn't use this, set zero. */
+ __u32 flags;
+ /*
+ * data for each sub-command. An immediate or a pointer to the actual
+ * data in process virtual address. If sub-command doesn't use it,
+ * set zero.
+ */
+ __u64 data;
+ /*
+ * Auxiliary error code. The sub-command may return TDX SEAMCALL
+ * status code in addition to -Exxx.
+ * Defined for consistency with struct kvm_sev_cmd.
+ */
+ __u64 error;
+ /* Reserved: Defined for consistency with struct kvm_sev_cmd. */
+ __u64 unused;
+};
+
+struct kvm_tdx_cpuid_config {
+ __u32 leaf;
+ __u32 sub_leaf;
+ __u32 eax;
+ __u32 ebx;
+ __u32 ecx;
+ __u32 edx;
+};
+
+struct kvm_tdx_capabilities {
+ __u64 attrs_fixed0;
+ __u64 attrs_fixed1;
+ __u64 xfam_fixed0;
+ __u64 xfam_fixed1;
+
+ __u32 nr_cpuid_configs;
+ __u32 padding;
+ struct kvm_tdx_cpuid_config cpuid_configs[0];
+};
+
#endif /* _ASM_X86_KVM_H */
--
2.25.1
On Sun, 12 Mar 2023 10:55:40 -0700
isaku.yamahata@intel.com wrote:
Does this have to be a new generic ioctl with a dedicated new x86_ops? SNP
does not use it at all and all the system-scoped ioctl of SNP going through
the CCP driver. So getting system-scope information of TDX/SNP will end up
differently.
Any thought, Sean? Moving getting SNP system-wide information to
KVM dev ioctl seems not ideal and TDX does not have a dedicated driver like
CCP. Maybe make this ioctl TDX-specific? KVM_TDX_DEV_OP?
> From: Sean Christopherson <sean.j.christopherson@intel.com>
>
> Implement a system-scoped ioctl to get system-wide parameters for TDX.
>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> arch/x86/include/asm/kvm-x86-ops.h | 1 +
> arch/x86/include/asm/kvm_host.h | 1 +
> arch/x86/include/uapi/asm/kvm.h | 48 +++++++++++++++++++++++++
> arch/x86/kvm/vmx/main.c | 2 ++
> arch/x86/kvm/vmx/tdx.c | 51 +++++++++++++++++++++++++++
> arch/x86/kvm/vmx/x86_ops.h | 2 ++
> arch/x86/kvm/x86.c | 6 ++++
> tools/arch/x86/include/uapi/asm/kvm.h | 48 +++++++++++++++++++++++++
> 8 files changed, 159 insertions(+)
>
> diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> index eac4b65d1b01..b46dcac078b2 100644
> --- a/arch/x86/include/asm/kvm-x86-ops.h
> +++ b/arch/x86/include/asm/kvm-x86-ops.h
> @@ -117,6 +117,7 @@ KVM_X86_OP(enter_smm)
> KVM_X86_OP(leave_smm)
> KVM_X86_OP(enable_smi_window)
> #endif
> +KVM_X86_OP_OPTIONAL(dev_mem_enc_ioctl)
> KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
> KVM_X86_OP_OPTIONAL(mem_enc_register_region)
> KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
> diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> index 00c25f6ab871..49e3ca89aced 100644
> --- a/arch/x86/include/asm/kvm_host.h
> +++ b/arch/x86/include/asm/kvm_host.h
> @@ -1719,6 +1719,7 @@ struct kvm_x86_ops {
> void (*enable_smi_window)(struct kvm_vcpu *vcpu);
> #endif
>
> + int (*dev_mem_enc_ioctl)(void __user *argp);
> int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
> int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
> int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index 6afbfbb32d56..af4c5bd0af1c 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -562,4 +562,52 @@ struct kvm_pmu_event_filter {
> #define KVM_X86_DEFAULT_VM 0
> #define KVM_X86_PROTECTED_VM 1
>
> +/* Trust Domain eXtension sub-ioctl() commands. */
> +enum kvm_tdx_cmd_id {
> + KVM_TDX_CAPABILITIES = 0,
> +
> + KVM_TDX_CMD_NR_MAX,
> +};
> +
> +struct kvm_tdx_cmd {
> + /* enum kvm_tdx_cmd_id */
> + __u32 id;
> + /* flags for sub-commend. If sub-command doesn't use this, set zero. */
> + __u32 flags;
> + /*
> + * data for each sub-command. An immediate or a pointer to the actual
> + * data in process virtual address. If sub-command doesn't use it,
> + * set zero.
> + */
> + __u64 data;
> + /*
> + * Auxiliary error code. The sub-command may return TDX SEAMCALL
> + * status code in addition to -Exxx.
> + * Defined for consistency with struct kvm_sev_cmd.
> + */
> + __u64 error;
> + /* Reserved: Defined for consistency with struct kvm_sev_cmd. */
> + __u64 unused;
> +};
> +
> +struct kvm_tdx_cpuid_config {
> + __u32 leaf;
> + __u32 sub_leaf;
> + __u32 eax;
> + __u32 ebx;
> + __u32 ecx;
> + __u32 edx;
> +};
> +
> +struct kvm_tdx_capabilities {
> + __u64 attrs_fixed0;
> + __u64 attrs_fixed1;
> + __u64 xfam_fixed0;
> + __u64 xfam_fixed1;
> +
> + __u32 nr_cpuid_configs;
> + __u32 padding;
> + struct kvm_tdx_cpuid_config cpuid_configs[0];
> +};
> +
> #endif /* _ASM_X86_KVM_H */
> diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> index c8548004802a..6a5d0c7a2950 100644
> --- a/arch/x86/kvm/vmx/main.c
> +++ b/arch/x86/kvm/vmx/main.c
> @@ -201,6 +201,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
> .complete_emulated_msr = kvm_complete_insn_gp,
>
> .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
> +
> + .dev_mem_enc_ioctl = tdx_dev_ioctl,
> };
>
> struct kvm_x86_init_ops vt_init_ops __initdata = {
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> index e9b7aa5654e9..b59d3081d061 100644
> --- a/arch/x86/kvm/vmx/tdx.c
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -21,6 +21,57 @@ int tdx_hardware_enable(void)
> return tdx_cpu_enable();
> }
>
> +int tdx_dev_ioctl(void __user *argp)
> +{
> + struct kvm_tdx_capabilities __user *user_caps;
> + const struct tdsysinfo_struct *tdsysinfo;
> + struct kvm_tdx_capabilities caps;
> + struct kvm_tdx_cmd cmd;
> +
> + BUILD_BUG_ON(sizeof(struct kvm_tdx_cpuid_config) !=
> + sizeof(struct tdx_cpuid_config));
> +
> + if (copy_from_user(&cmd, argp, sizeof(cmd)))
> + return -EFAULT;
> + if (cmd.flags || cmd.error || cmd.unused)
> + return -EINVAL;
> + /*
> + * Currently only KVM_TDX_CAPABILITIES is defined for system-scoped
> + * mem_enc_ioctl().
> + */
> + if (cmd.id != KVM_TDX_CAPABILITIES)
> + return -EINVAL;
> +
> + tdsysinfo = tdx_get_sysinfo();
> + if (!tdsysinfo)
> + return -ENOTSUPP;
> +
> + user_caps = (void __user *)cmd.data;
> + if (copy_from_user(&caps, user_caps, sizeof(caps)))
> + return -EFAULT;
> +
> + if (caps.nr_cpuid_configs < tdsysinfo->num_cpuid_config)
> + return -E2BIG;
> +
> + caps = (struct kvm_tdx_capabilities) {
> + .attrs_fixed0 = tdsysinfo->attributes_fixed0,
> + .attrs_fixed1 = tdsysinfo->attributes_fixed1,
> + .xfam_fixed0 = tdsysinfo->xfam_fixed0,
> + .xfam_fixed1 = tdsysinfo->xfam_fixed1,
> + .nr_cpuid_configs = tdsysinfo->num_cpuid_config,
> + .padding = 0,
> + };
> +
> + if (copy_to_user(user_caps, &caps, sizeof(caps)))
> + return -EFAULT;
> + if (copy_to_user(user_caps->cpuid_configs, &tdsysinfo->cpuid_configs,
> + tdsysinfo->num_cpuid_config *
> + sizeof(struct tdx_cpuid_config)))
> + return -EFAULT;
> +
> + return 0;
> +}
> +
> static int __init tdx_module_setup(void)
> {
> const struct tdsysinfo_struct *tdsysinfo;
> diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> index b2c74c1b5bbd..78c5537e23a1 100644
> --- a/arch/x86/kvm/vmx/x86_ops.h
> +++ b/arch/x86/kvm/vmx/x86_ops.h
> @@ -141,10 +141,12 @@ void vmx_setup_mce(struct kvm_vcpu *vcpu);
> int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
> int tdx_hardware_enable(void);
> bool tdx_is_vm_type_supported(unsigned long type);
> +int tdx_dev_ioctl(void __user *argp);
> #else
> static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -ENOSYS; }
> static inline int tdx_hardware_enable(void) { return -EOPNOTSUPP; }
> static inline bool tdx_is_vm_type_supported(unsigned long type) { return false; }
> +static inline int tdx_dev_ioctl(void __user *argp) { return -EOPNOTSUPP; };
> #endif
>
> #endif /* __KVM_X86_VMX_X86_OPS_H */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 27ab684f8374..a3dc32e33aca 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4718,6 +4718,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
> r = kvm_x86_dev_has_attr(&attr);
> break;
> }
> + case KVM_MEMORY_ENCRYPT_OP:
> + r = -EINVAL;
> + if (!kvm_x86_ops.dev_mem_enc_ioctl)
> + goto out;
> + r = static_call(kvm_x86_dev_mem_enc_ioctl)(argp);
> + break;
> default:
> r = -EINVAL;
> break;
> diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
> index 6afbfbb32d56..af4c5bd0af1c 100644
> --- a/tools/arch/x86/include/uapi/asm/kvm.h
> +++ b/tools/arch/x86/include/uapi/asm/kvm.h
> @@ -562,4 +562,52 @@ struct kvm_pmu_event_filter {
> #define KVM_X86_DEFAULT_VM 0
> #define KVM_X86_PROTECTED_VM 1
>
> +/* Trust Domain eXtension sub-ioctl() commands. */
> +enum kvm_tdx_cmd_id {
> + KVM_TDX_CAPABILITIES = 0,
> +
> + KVM_TDX_CMD_NR_MAX,
> +};
> +
> +struct kvm_tdx_cmd {
> + /* enum kvm_tdx_cmd_id */
> + __u32 id;
> + /* flags for sub-commend. If sub-command doesn't use this, set zero. */
> + __u32 flags;
> + /*
> + * data for each sub-command. An immediate or a pointer to the actual
> + * data in process virtual address. If sub-command doesn't use it,
> + * set zero.
> + */
> + __u64 data;
> + /*
> + * Auxiliary error code. The sub-command may return TDX SEAMCALL
> + * status code in addition to -Exxx.
> + * Defined for consistency with struct kvm_sev_cmd.
> + */
> + __u64 error;
> + /* Reserved: Defined for consistency with struct kvm_sev_cmd. */
> + __u64 unused;
> +};
> +
> +struct kvm_tdx_cpuid_config {
> + __u32 leaf;
> + __u32 sub_leaf;
> + __u32 eax;
> + __u32 ebx;
> + __u32 ecx;
> + __u32 edx;
> +};
> +
> +struct kvm_tdx_capabilities {
> + __u64 attrs_fixed0;
> + __u64 attrs_fixed1;
> + __u64 xfam_fixed0;
> + __u64 xfam_fixed1;
> +
> + __u32 nr_cpuid_configs;
> + __u32 padding;
> + struct kvm_tdx_cpuid_config cpuid_configs[0];
> +};
> +
> #endif /* _ASM_X86_KVM_H */
On 3/25/2023 4:43 PM, Zhi Wang wrote: > On Sun, 12 Mar 2023 10:55:40 -0700 > isaku.yamahata@intel.com wrote: > > Does this have to be a new generic ioctl with a dedicated new x86_ops? SNP > does not use it at all and all the system-scoped ioctl of SNP going through > the CCP driver. So getting system-scope information of TDX/SNP will end up > differently. > > Any thought, Sean? Moving getting SNP system-wide information to > KVM dev ioctl seems not ideal and TDX does not have a dedicated driver like > CCP. Maybe make this ioctl TDX-specific? KVM_TDX_DEV_OP? What's the real different of it? For me, it's just renaming KVM_MEMORY_ENCRYPT_OP to KVM_TDX_DEV_OP and maybe add some error message if the IOCTL is issued for AMD plaform.
On Fri, 31 Mar 2023 14:59:18 +0800 Xiaoyao Li <xiaoyao.li@intel.com> wrote: > On 3/25/2023 4:43 PM, Zhi Wang wrote: > > On Sun, 12 Mar 2023 10:55:40 -0700 > > isaku.yamahata@intel.com wrote: > > > > Does this have to be a new generic ioctl with a dedicated new x86_ops? SNP > > does not use it at all and all the system-scoped ioctl of SNP going through > > the CCP driver. So getting system-scope information of TDX/SNP will end up > > differently. > > > > Any thought, Sean? Moving getting SNP system-wide information to > > KVM dev ioctl seems not ideal and TDX does not have a dedicated driver like > > CCP. Maybe make this ioctl TDX-specific? KVM_TDX_DEV_OP? > > What's the real different of it? For me, it's just renaming > KVM_MEMORY_ENCRYPT_OP to KVM_TDX_DEV_OP and maybe add some error message > if the IOCTL is issued for AMD plaform. > Hi: The ioctl is the API for the userspace. The purpose is to be orthogonal, avoid confusion and reflect its nature. A "generic" name with only one implementation is fine in the early design. But if the other implementation at the same level is pretty sure not going to use it, then the abstraction, which is only abstracted for one implementation, is just confusing. The possible strategies are: 1) Re-factor the other implementation to fit the current abstraction. 2) Give up the abstraction. Go "specific". For 1), it seems not realistic due to the efforts of re-factoring the SEV driver. For 2), there can be several ways: a. renaming it, let the name reflect its nature. IMO, KVM_TDX_DEV_OP is not ideal as well, but I don't have a better one. b. moving it to a proper layer of the implementation. But it is also not realistic to have a "TDX" driver because of it. That's why I am torn here.
On Sat, Mar 25, 2023 at 10:43:06AM +0200,
Zhi Wang <zhi.wang.linux@gmail.com> wrote:
> On Sun, 12 Mar 2023 10:55:40 -0700
> isaku.yamahata@intel.com wrote:
>
> Does this have to be a new generic ioctl with a dedicated new x86_ops? SNP
> does not use it at all and all the system-scoped ioctl of SNP going through
> the CCP driver. So getting system-scope information of TDX/SNP will end up
> differently.
>
> Any thought, Sean? Moving getting SNP system-wide information to
> KVM dev ioctl seems not ideal and TDX does not have a dedicated driver like
> CCP. Maybe make this ioctl TDX-specific? KVM_TDX_DEV_OP?
We only need global parameters of the TDX module, and we don't interact with TDX
module at this point. One alternative is to export those parameters via sysfs.
Also the existence of the sysfs node indicates that the TDX module is
loaded(initialized?) or not in addition to boot log. Thus we can drop system
scope one.
What do you think?
Regarding to other TDX KVM specific ioctls (KVM_TDX_INIT_VM, KVM_TDX_INIT_VCPU,
KVM_TDX_INIT_MEM_REGION, and KVM_TDX_FINALIZE_VM), they are specific to KVM. So
I don't think it can be split out to independent driver.
Thanks,
> > From: Sean Christopherson <sean.j.christopherson@intel.com>
> >
> > Implement a system-scoped ioctl to get system-wide parameters for TDX.
> >
> > Signed-off-by: Sean Christopherson <sean.j.christopherson@intel.com>
> > Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> > ---
> > arch/x86/include/asm/kvm-x86-ops.h | 1 +
> > arch/x86/include/asm/kvm_host.h | 1 +
> > arch/x86/include/uapi/asm/kvm.h | 48 +++++++++++++++++++++++++
> > arch/x86/kvm/vmx/main.c | 2 ++
> > arch/x86/kvm/vmx/tdx.c | 51 +++++++++++++++++++++++++++
> > arch/x86/kvm/vmx/x86_ops.h | 2 ++
> > arch/x86/kvm/x86.c | 6 ++++
> > tools/arch/x86/include/uapi/asm/kvm.h | 48 +++++++++++++++++++++++++
> > 8 files changed, 159 insertions(+)
> >
> > diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h
> > index eac4b65d1b01..b46dcac078b2 100644
> > --- a/arch/x86/include/asm/kvm-x86-ops.h
> > +++ b/arch/x86/include/asm/kvm-x86-ops.h
> > @@ -117,6 +117,7 @@ KVM_X86_OP(enter_smm)
> > KVM_X86_OP(leave_smm)
> > KVM_X86_OP(enable_smi_window)
> > #endif
> > +KVM_X86_OP_OPTIONAL(dev_mem_enc_ioctl)
> > KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
> > KVM_X86_OP_OPTIONAL(mem_enc_register_region)
> > KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)
> > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
> > index 00c25f6ab871..49e3ca89aced 100644
> > --- a/arch/x86/include/asm/kvm_host.h
> > +++ b/arch/x86/include/asm/kvm_host.h
> > @@ -1719,6 +1719,7 @@ struct kvm_x86_ops {
> > void (*enable_smi_window)(struct kvm_vcpu *vcpu);
> > #endif
> >
> > + int (*dev_mem_enc_ioctl)(void __user *argp);
> > int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp);
> > int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp);
> > int (*mem_enc_unregister_region)(struct kvm *kvm, struct kvm_enc_region *argp);
> > diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> > index 6afbfbb32d56..af4c5bd0af1c 100644
> > --- a/arch/x86/include/uapi/asm/kvm.h
> > +++ b/arch/x86/include/uapi/asm/kvm.h
> > @@ -562,4 +562,52 @@ struct kvm_pmu_event_filter {
> > #define KVM_X86_DEFAULT_VM 0
> > #define KVM_X86_PROTECTED_VM 1
> >
> > +/* Trust Domain eXtension sub-ioctl() commands. */
> > +enum kvm_tdx_cmd_id {
> > + KVM_TDX_CAPABILITIES = 0,
> > +
> > + KVM_TDX_CMD_NR_MAX,
> > +};
> > +
> > +struct kvm_tdx_cmd {
> > + /* enum kvm_tdx_cmd_id */
> > + __u32 id;
> > + /* flags for sub-commend. If sub-command doesn't use this, set zero. */
> > + __u32 flags;
> > + /*
> > + * data for each sub-command. An immediate or a pointer to the actual
> > + * data in process virtual address. If sub-command doesn't use it,
> > + * set zero.
> > + */
> > + __u64 data;
> > + /*
> > + * Auxiliary error code. The sub-command may return TDX SEAMCALL
> > + * status code in addition to -Exxx.
> > + * Defined for consistency with struct kvm_sev_cmd.
> > + */
> > + __u64 error;
> > + /* Reserved: Defined for consistency with struct kvm_sev_cmd. */
> > + __u64 unused;
> > +};
> > +
> > +struct kvm_tdx_cpuid_config {
> > + __u32 leaf;
> > + __u32 sub_leaf;
> > + __u32 eax;
> > + __u32 ebx;
> > + __u32 ecx;
> > + __u32 edx;
> > +};
> > +
> > +struct kvm_tdx_capabilities {
> > + __u64 attrs_fixed0;
> > + __u64 attrs_fixed1;
> > + __u64 xfam_fixed0;
> > + __u64 xfam_fixed1;
> > +
> > + __u32 nr_cpuid_configs;
> > + __u32 padding;
> > + struct kvm_tdx_cpuid_config cpuid_configs[0];
> > +};
> > +
> > #endif /* _ASM_X86_KVM_H */
> > diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
> > index c8548004802a..6a5d0c7a2950 100644
> > --- a/arch/x86/kvm/vmx/main.c
> > +++ b/arch/x86/kvm/vmx/main.c
> > @@ -201,6 +201,8 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
> > .complete_emulated_msr = kvm_complete_insn_gp,
> >
> > .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
> > +
> > + .dev_mem_enc_ioctl = tdx_dev_ioctl,
> > };
> >
> > struct kvm_x86_init_ops vt_init_ops __initdata = {
> > diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> > index e9b7aa5654e9..b59d3081d061 100644
> > --- a/arch/x86/kvm/vmx/tdx.c
> > +++ b/arch/x86/kvm/vmx/tdx.c
> > @@ -21,6 +21,57 @@ int tdx_hardware_enable(void)
> > return tdx_cpu_enable();
> > }
> >
> > +int tdx_dev_ioctl(void __user *argp)
> > +{
> > + struct kvm_tdx_capabilities __user *user_caps;
> > + const struct tdsysinfo_struct *tdsysinfo;
> > + struct kvm_tdx_capabilities caps;
> > + struct kvm_tdx_cmd cmd;
> > +
> > + BUILD_BUG_ON(sizeof(struct kvm_tdx_cpuid_config) !=
> > + sizeof(struct tdx_cpuid_config));
> > +
> > + if (copy_from_user(&cmd, argp, sizeof(cmd)))
> > + return -EFAULT;
> > + if (cmd.flags || cmd.error || cmd.unused)
> > + return -EINVAL;
> > + /*
> > + * Currently only KVM_TDX_CAPABILITIES is defined for system-scoped
> > + * mem_enc_ioctl().
> > + */
> > + if (cmd.id != KVM_TDX_CAPABILITIES)
> > + return -EINVAL;
> > +
> > + tdsysinfo = tdx_get_sysinfo();
> > + if (!tdsysinfo)
> > + return -ENOTSUPP;
> > +
> > + user_caps = (void __user *)cmd.data;
> > + if (copy_from_user(&caps, user_caps, sizeof(caps)))
> > + return -EFAULT;
> > +
> > + if (caps.nr_cpuid_configs < tdsysinfo->num_cpuid_config)
> > + return -E2BIG;
> > +
> > + caps = (struct kvm_tdx_capabilities) {
> > + .attrs_fixed0 = tdsysinfo->attributes_fixed0,
> > + .attrs_fixed1 = tdsysinfo->attributes_fixed1,
> > + .xfam_fixed0 = tdsysinfo->xfam_fixed0,
> > + .xfam_fixed1 = tdsysinfo->xfam_fixed1,
> > + .nr_cpuid_configs = tdsysinfo->num_cpuid_config,
> > + .padding = 0,
> > + };
> > +
> > + if (copy_to_user(user_caps, &caps, sizeof(caps)))
> > + return -EFAULT;
> > + if (copy_to_user(user_caps->cpuid_configs, &tdsysinfo->cpuid_configs,
> > + tdsysinfo->num_cpuid_config *
> > + sizeof(struct tdx_cpuid_config)))
> > + return -EFAULT;
> > +
> > + return 0;
> > +}
> > +
> > static int __init tdx_module_setup(void)
> > {
> > const struct tdsysinfo_struct *tdsysinfo;
> > diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
> > index b2c74c1b5bbd..78c5537e23a1 100644
> > --- a/arch/x86/kvm/vmx/x86_ops.h
> > +++ b/arch/x86/kvm/vmx/x86_ops.h
> > @@ -141,10 +141,12 @@ void vmx_setup_mce(struct kvm_vcpu *vcpu);
> > int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
> > int tdx_hardware_enable(void);
> > bool tdx_is_vm_type_supported(unsigned long type);
> > +int tdx_dev_ioctl(void __user *argp);
> > #else
> > static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -ENOSYS; }
> > static inline int tdx_hardware_enable(void) { return -EOPNOTSUPP; }
> > static inline bool tdx_is_vm_type_supported(unsigned long type) { return false; }
> > +static inline int tdx_dev_ioctl(void __user *argp) { return -EOPNOTSUPP; };
> > #endif
> >
> > #endif /* __KVM_X86_VMX_X86_OPS_H */
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 27ab684f8374..a3dc32e33aca 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -4718,6 +4718,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
> > r = kvm_x86_dev_has_attr(&attr);
> > break;
> > }
> > + case KVM_MEMORY_ENCRYPT_OP:
> > + r = -EINVAL;
> > + if (!kvm_x86_ops.dev_mem_enc_ioctl)
> > + goto out;
> > + r = static_call(kvm_x86_dev_mem_enc_ioctl)(argp);
> > + break;
> > default:
> > r = -EINVAL;
> > break;
> > diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h
> > index 6afbfbb32d56..af4c5bd0af1c 100644
> > --- a/tools/arch/x86/include/uapi/asm/kvm.h
> > +++ b/tools/arch/x86/include/uapi/asm/kvm.h
> > @@ -562,4 +562,52 @@ struct kvm_pmu_event_filter {
> > #define KVM_X86_DEFAULT_VM 0
> > #define KVM_X86_PROTECTED_VM 1
> >
> > +/* Trust Domain eXtension sub-ioctl() commands. */
> > +enum kvm_tdx_cmd_id {
> > + KVM_TDX_CAPABILITIES = 0,
> > +
> > + KVM_TDX_CMD_NR_MAX,
> > +};
> > +
> > +struct kvm_tdx_cmd {
> > + /* enum kvm_tdx_cmd_id */
> > + __u32 id;
> > + /* flags for sub-commend. If sub-command doesn't use this, set zero. */
> > + __u32 flags;
> > + /*
> > + * data for each sub-command. An immediate or a pointer to the actual
> > + * data in process virtual address. If sub-command doesn't use it,
> > + * set zero.
> > + */
> > + __u64 data;
> > + /*
> > + * Auxiliary error code. The sub-command may return TDX SEAMCALL
> > + * status code in addition to -Exxx.
> > + * Defined for consistency with struct kvm_sev_cmd.
> > + */
> > + __u64 error;
> > + /* Reserved: Defined for consistency with struct kvm_sev_cmd. */
> > + __u64 unused;
> > +};
> > +
> > +struct kvm_tdx_cpuid_config {
> > + __u32 leaf;
> > + __u32 sub_leaf;
> > + __u32 eax;
> > + __u32 ebx;
> > + __u32 ecx;
> > + __u32 edx;
> > +};
> > +
> > +struct kvm_tdx_capabilities {
> > + __u64 attrs_fixed0;
> > + __u64 attrs_fixed1;
> > + __u64 xfam_fixed0;
> > + __u64 xfam_fixed1;
> > +
> > + __u32 nr_cpuid_configs;
> > + __u32 padding;
> > + struct kvm_tdx_cpuid_config cpuid_configs[0];
> > +};
> > +
> > #endif /* _ASM_X86_KVM_H */
>
--
Isaku Yamahata <isaku.yamahata@gmail.com>
On Wed, Mar 29, 2023 at 04:17:22PM -0700,
Isaku Yamahata <isaku.yamahata@gmail.com> wrote:
> On Sat, Mar 25, 2023 at 10:43:06AM +0200,
> Zhi Wang <zhi.wang.linux@gmail.com> wrote:
>
> > On Sun, 12 Mar 2023 10:55:40 -0700
> > isaku.yamahata@intel.com wrote:
> >
> > Does this have to be a new generic ioctl with a dedicated new x86_ops? SNP
> > does not use it at all and all the system-scoped ioctl of SNP going through
> > the CCP driver. So getting system-scope information of TDX/SNP will end up
> > differently.
> >
> > Any thought, Sean? Moving getting SNP system-wide information to
> > KVM dev ioctl seems not ideal and TDX does not have a dedicated driver like
> > CCP. Maybe make this ioctl TDX-specific? KVM_TDX_DEV_OP?
>
> We only need global parameters of the TDX module, and we don't interact with TDX
> module at this point. One alternative is to export those parameters via sysfs.
> Also the existence of the sysfs node indicates that the TDX module is
> loaded(initialized?) or not in addition to boot log. Thus we can drop system
> scope one.
> What do you think?
>
> Regarding to other TDX KVM specific ioctls (KVM_TDX_INIT_VM, KVM_TDX_INIT_VCPU,
> KVM_TDX_INIT_MEM_REGION, and KVM_TDX_FINALIZE_VM), they are specific to KVM. So
> I don't think it can be split out to independent driver.
Here is the patch to export those info via sysfs.
From e0744e506eb92e47d8317e489945a3ba804edfa7 Mon Sep 17 00:00:00 2001
Message-Id: <e0744e506eb92e47d8317e489945a3ba804edfa7.1680221730.git.isaku.yamahata@intel.com>
In-Reply-To: <8e0bc0e8e5d435f54f10c7642a862629ef2acb89.1680221729.git.isaku.yamahata@intel.com>
References: <8e0bc0e8e5d435f54f10c7642a862629ef2acb89.1680221729.git.isaku.yamahata@intel.com>
From: Isaku Yamahata <isaku.yamahata@intel.com>
Date: Thu, 30 Mar 2023 00:05:03 -0700
Subject: [PATCH] x86/virt/tdx: Export TD config params of TDX module via sysfs
TDX module has parameters for VMM to configure TD. User space VMM, e.g.
qemu, needs to know it. Export them to user space via sysfs.
TDX 1.0 provides TDH.SYS.INFO to provide system information in
TDSYSINFO_STRUCT. Its future extensibility is limited because of its
struct. From TDX 1.5, TDH.SYS.RD(metadata field_id) to read the info
specified by field id. So instead of exporting TDSYSINFO_STRUCT, adapt
metadata way to export those system information.
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
Documentation/ABI/testing/sysfs-firmware-tdx | 23 +++
arch/x86/include/asm/tdx.h | 33 ++++
arch/x86/virt/vmx/tdx/tdx.c | 164 +++++++++++++++++++
arch/x86/virt/vmx/tdx/tdx.h | 18 ++
4 files changed, 238 insertions(+)
create mode 100644 Documentation/ABI/testing/sysfs-firmware-tdx
diff --git a/Documentation/ABI/testing/sysfs-firmware-tdx b/Documentation/ABI/testing/sysfs-firmware-tdx
new file mode 100644
index 000000000000..1f26fb178144
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-firmware-tdx
@@ -0,0 +1,23 @@
+What: /sys/firmware/tdx/tdx_module/metadata
+Date: March 2023
+KernelVersion: 6.3
+Contact: Isaku Yamahata <isaku.yamahata@intel.com>, kvm@vger.kernel.org
+Users: qemu, libvirt
+Description:
+ The TDX feature requires a firmware that is known as the TDX
+ module. The TDX module exposes its metadata in the following
+ read-only files. The information corresponds to the TDX global
+ metadata specified by 64bit field id. The file name is hex
+ string in lower case. The value is binary.
+ User space VMM like qemu needs refer to them to determine what
+ parameters are needed or allowed to configure guest TDs.
+
+ ================== ============================================
+ 1900000300000000 ATTRIBUTES_FIXED0
+ 1900000300000001 ATTRIBUTES_FIXED1
+ 1900000300000002 XFAM_FIXED0
+ 1900000300000003 XFAM_FIXED1
+ 9900000100000004 NUM_CPUID_CONFIG
+ 9900000300000400 CPUID_LEAVES
+ 9900000300000500 CPUID_VALUES
+ ================== ============================================
\ No newline at end of file
diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
index 05870e5ed131..c650ac22a916 100644
--- a/arch/x86/include/asm/tdx.h
+++ b/arch/x86/include/asm/tdx.h
@@ -110,6 +110,39 @@ struct tdx_cpuid_config {
u32 edx;
} __packed;
+struct tdx_cpuid_config_leaf {
+ u32 leaf;
+ u32 sub_leaf;
+} __packed;
+static_assert(offsetof(struct tdx_cpuid_config, leaf) ==
+ offsetof(struct tdx_cpuid_config_leaf, leaf));
+static_assert(offsetof(struct tdx_cpuid_config, sub_leaf) ==
+ offsetof(struct tdx_cpuid_config_leaf, sub_leaf));
+static_assert(offsetofend(struct tdx_cpuid_config, sub_leaf) ==
+ sizeof(struct tdx_cpuid_config_leaf));
+
+struct tdx_cpuid_config_value {
+ u32 eax;
+ u32 ebx;
+ u32 ecx;
+ u32 edx;
+} __packed;
+static_assert(offsetof(struct tdx_cpuid_config, eax) -
+ offsetof(struct tdx_cpuid_config, eax) ==
+ offsetof(struct tdx_cpuid_config_value, eax));
+static_assert(offsetof(struct tdx_cpuid_config, ebx) -
+ offsetof(struct tdx_cpuid_config, eax) ==
+ offsetof(struct tdx_cpuid_config_value, ebx));
+static_assert(offsetof(struct tdx_cpuid_config, ecx) -
+ offsetof(struct tdx_cpuid_config, eax) ==
+ offsetof(struct tdx_cpuid_config_value, ecx));
+static_assert(offsetof(struct tdx_cpuid_config, edx) -
+ offsetof(struct tdx_cpuid_config, eax) ==
+ offsetof(struct tdx_cpuid_config_value, edx));
+static_assert(offsetofend(struct tdx_cpuid_config, edx) -
+ offsetof(struct tdx_cpuid_config, eax) ==
+ sizeof(struct tdx_cpuid_config_value));
+
#define TDSYSINFO_STRUCT_SIZE 1024
#define TDSYSINFO_STRUCT_ALIGNMENT 1024
diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
index f9f9c1b76501..56ca520d67d6 100644
--- a/arch/x86/virt/vmx/tdx/tdx.c
+++ b/arch/x86/virt/vmx/tdx/tdx.c
@@ -33,6 +33,12 @@
#include <asm/tdx.h>
#include "tdx.h"
+#ifdef CONFIG_SYSFS
+static int tdx_sysfs_init(void);
+#else
+static inline int tdx_sysfs_init(void) { return 0;}
+#endif
+
u32 tdx_global_keyid __ro_after_init;
EXPORT_SYMBOL_GPL(tdx_global_keyid);
static u32 tdx_guest_keyid_start __ro_after_init;
@@ -399,6 +405,10 @@ static int __tdx_get_sysinfo(struct tdsysinfo_struct *sysinfo,
if (ret)
return ret;
+ ret = tdx_sysfs_init();
+ if (ret)
+ return ret;
+
pr_info("TDX module: atributes 0x%x, vendor_id 0x%x, major_version %u, minor_version %u, build_date %u, build_num %u",
sysinfo->attributes, sysinfo->vendor_id,
sysinfo->major_version, sysinfo->minor_version,
@@ -1367,3 +1377,157 @@ int tdx_enable(void)
return ret;
}
EXPORT_SYMBOL_GPL(tdx_enable);
+
+#ifdef CONFIG_SYSFS
+
+static struct kobject *tdx_kobj;
+static struct kobject *tdx_module_kobj;
+static struct kobject *tdx_metadata_kobj;
+
+#define TDX_METADATA_ATTR(_name, field_id_name, _size) \
+static struct bin_attribute tdx_metadata_ ## _name = { \
+ .attr = { \
+ .name = field_id_name, \
+ .mode = 0444, \
+ }, \
+ .size = _size, \
+ .read = tdx_metadata_ ## _name ## _show, \
+}
+
+#define TDX_METADATA_ATTR_SHOW(_name, field_id_name) \
+static ssize_t tdx_metadata_ ## _name ## _show(struct file *filp, struct kobject *kobj, \
+ struct bin_attribute *bin_attr, \
+ char *buf, loff_t offset, size_t count) \
+{ \
+ struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo); \
+ \
+ return memory_read_from_buffer(buf, count, &offset, \
+ &sysinfo->_name, \
+ sizeof(sysinfo->_name)); \
+} \
+TDX_METADATA_ATTR(_name, field_id_name, sizeof_field(struct tdsysinfo_struct, _name))
+
+TDX_METADATA_ATTR_SHOW(attributes_fixed0, TDX_METADATA_ATTRIBUTES_FIXED0_NAME);
+TDX_METADATA_ATTR_SHOW(attributes_fixed1, TDX_METADATA_ATTRIBUTES_FIXED1_NAME);
+TDX_METADATA_ATTR_SHOW(xfam_fixed0, TDX_METADATA_XFAM_FIXED0_NAME);
+TDX_METADATA_ATTR_SHOW(xfam_fixed1, TDX_METADATA_XFAM_FIXED1_NAME);
+
+static ssize_t tdx_metadata_num_cpuid_config_show(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t offset, size_t count)
+{
+ struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo);
+ /*
+ * Although tdsysinfo_struct.num_cpuid_config is defined as u32 for
+ * alignment, TDX 1.5 defines metadata NUM_CONFIG_CPUID as u16.
+ */
+ u16 tmp = (u16)sysinfo->num_cpuid_config;
+
+ WARN_ON_ONCE(tmp != sysinfo->num_cpuid_config);
+ return memory_read_from_buffer(buf, count, &offset, &tmp, sizeof(tmp));
+}
+TDX_METADATA_ATTR(num_cpuid_config, TDX_METADATA_NUM_CPUID_CONFIG_NAME, sizeof(u16));
+
+static ssize_t tdx_metadata_cpuid_leaves_show(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf,
+ loff_t offset, size_t count)
+{
+ struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo);
+ ssize_t r;
+ struct tdx_cpuid_config_leaf *tmp;
+ u32 i;
+
+ tmp = kmalloc(bin_attr->size, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ for (i = 0; i < sysinfo->num_cpuid_config; i++) {
+ struct tdx_cpuid_config *c = &sysinfo->cpuid_configs[i];
+ struct tdx_cpuid_config_leaf *leaf = (struct tdx_cpuid_config_leaf *)c;
+
+ memcpy(tmp + i, leaf, sizeof(*leaf));
+ }
+
+ r = memory_read_from_buffer(buf, count, &offset, tmp, bin_attr->size);
+ kfree(tmp);
+ return r;
+}
+
+TDX_METADATA_ATTR(cpuid_leaves, TDX_METADATA_CPUID_LEAVES_NAME, 0);
+
+static ssize_t tdx_metadata_cpuid_values_show(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf,
+ loff_t offset, size_t count)
+{
+ struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo);
+ struct tdx_cpuid_config_value *tmp;
+ ssize_t r;
+ u32 i;
+
+ tmp = kmalloc(bin_attr->size, GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ for (i = 0; i < sysinfo->num_cpuid_config; i++) {
+ struct tdx_cpuid_config *c = &sysinfo->cpuid_configs[i];
+ struct tdx_cpuid_config_value *value = (struct tdx_cpuid_config_value *)&c->eax;
+
+ memcpy(tmp + i, value, sizeof(*value));
+ }
+
+ r = memory_read_from_buffer(buf, count, &offset, tmp, bin_attr->size);
+ kfree(tmp);
+ return r;
+}
+
+TDX_METADATA_ATTR(cpuid_values, TDX_METADATA_CPUID_VALUES_NAME, 0);
+
+static struct bin_attribute *tdx_metadata_attrs[] = {
+ &tdx_metadata_attributes_fixed0,
+ &tdx_metadata_attributes_fixed1,
+ &tdx_metadata_xfam_fixed0,
+ &tdx_metadata_xfam_fixed1,
+ &tdx_metadata_num_cpuid_config,
+ &tdx_metadata_cpuid_leaves,
+ &tdx_metadata_cpuid_values,
+ NULL,
+};
+
+static const struct attribute_group tdx_metadata_attr_group = {
+ .bin_attrs = tdx_metadata_attrs,
+};
+
+static int tdx_sysfs_init(void)
+{
+ struct tdsysinfo_struct *sysinfo;
+ int ret;
+
+ tdx_kobj = kobject_create_and_add("tdx", firmware_kobj);
+ if (!tdx_kobj) {
+ pr_err("kobject_create_and_add tdx failed\n");
+ return -EINVAL;
+ }
+
+ tdx_module_kobj = kobject_create_and_add("tdx_module", tdx_kobj);
+ if (!tdx_module_kobj) {
+ pr_err("kobject_create_and_add tdx_module failed\n");
+ return -EINVAL;
+ }
+ tdx_metadata_kobj = kobject_create_and_add("metadata", tdx_module_kobj);
+ if (!tdx_metadata_kobj) {
+ pr_err("Sysfs exporting tdx global metadata failed %d\n", ret);
+ return -EINVAL;
+ }
+
+ sysinfo = &PADDED_STRUCT(tdsysinfo);
+ tdx_metadata_cpuid_leaves.size = sysinfo->num_cpuid_config *
+ sizeof(struct tdx_cpuid_config_leaf);
+ tdx_metadata_cpuid_values.size = sysinfo->num_cpuid_config *
+ sizeof(struct tdx_cpuid_config_value);
+ ret = sysfs_create_group(tdx_metadata_kobj, &tdx_metadata_attr_group);
+ if (ret)
+ pr_err("Sysfs exporting tdx module attributes failed %d\n", ret);
+
+ return ret;
+}
+#endif
diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
index db0cbcceb5b3..a48f38fe6cc4 100644
--- a/arch/x86/virt/vmx/tdx/tdx.h
+++ b/arch/x86/virt/vmx/tdx/tdx.h
@@ -93,4 +93,22 @@ struct tdmr_info_list {
int max_tdmrs; /* How many 'tdmr_info's are allocated */
};
+/* TDX metadata base field id. */
+#define TDX_METADATA_ATTRIBUTES_FIXED0 0x1900000300000000ULL
+#define TDX_METADATA_ATTRIBUTES_FIXED1 0x1900000300000001ULL
+#define TDX_METADATA_XFAM_FIXED0 0x1900000300000002ULL
+#define TDX_METADATA_XFAM_FIXED1 0x1900000300000003ULL
+#define TDX_METADATA_NUM_CPUID_CONFIG 0x9900000100000004ULL
+#define TDX_METADATA_CPUID_LEAVES 0x9900000300000400ULL
+#define TDX_METADATA_CPUID_VALUES 0x9900000300000500ULL
+
+/* File name for sysfs: hex with lower case. */
+#define TDX_METADATA_ATTRIBUTES_FIXED0_NAME "1900000300000000"
+#define TDX_METADATA_ATTRIBUTES_FIXED1_NAME "1900000300000001"
+#define TDX_METADATA_XFAM_FIXED0_NAME "1900000300000002"
+#define TDX_METADATA_XFAM_FIXED1_NAME "1900000300000003"
+#define TDX_METADATA_NUM_CPUID_CONFIG_NAME "9900000100000004"
+#define TDX_METADATA_CPUID_LEAVES_NAME "9900000300000400"
+#define TDX_METADATA_CPUID_VALUES_NAME "9900000300000500"
+
#endif
--
2.25.1
--
Isaku Yamahata <isaku.yamahata@gmail.com>
Isaku Yamahata wrote:
> On Wed, Mar 29, 2023 at 04:17:22PM -0700,
> Isaku Yamahata <isaku.yamahata@gmail.com> wrote:
>
> > On Sat, Mar 25, 2023 at 10:43:06AM +0200,
> > Zhi Wang <zhi.wang.linux@gmail.com> wrote:
> >
> > > On Sun, 12 Mar 2023 10:55:40 -0700
> > > isaku.yamahata@intel.com wrote:
> > >
> > > Does this have to be a new generic ioctl with a dedicated new x86_ops? SNP
> > > does not use it at all and all the system-scoped ioctl of SNP going through
> > > the CCP driver. So getting system-scope information of TDX/SNP will end up
> > > differently.
> > >
> > > Any thought, Sean? Moving getting SNP system-wide information to
> > > KVM dev ioctl seems not ideal and TDX does not have a dedicated driver like
> > > CCP. Maybe make this ioctl TDX-specific? KVM_TDX_DEV_OP?
> >
> > We only need global parameters of the TDX module, and we don't interact with TDX
> > module at this point. One alternative is to export those parameters via sysfs.
> > Also the existence of the sysfs node indicates that the TDX module is
> > loaded(initialized?) or not in addition to boot log. Thus we can drop system
> > scope one.
> > What do you think?
> >
> > Regarding to other TDX KVM specific ioctls (KVM_TDX_INIT_VM, KVM_TDX_INIT_VCPU,
> > KVM_TDX_INIT_MEM_REGION, and KVM_TDX_FINALIZE_VM), they are specific to KVM. So
> > I don't think it can be split out to independent driver.
>
> Here is the patch to export those info via sysfs.
>
> From e0744e506eb92e47d8317e489945a3ba804edfa7 Mon Sep 17 00:00:00 2001
> Message-Id: <e0744e506eb92e47d8317e489945a3ba804edfa7.1680221730.git.isaku.yamahata@intel.com>
> In-Reply-To: <8e0bc0e8e5d435f54f10c7642a862629ef2acb89.1680221729.git.isaku.yamahata@intel.com>
> References: <8e0bc0e8e5d435f54f10c7642a862629ef2acb89.1680221729.git.isaku.yamahata@intel.com>
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> Date: Thu, 30 Mar 2023 00:05:03 -0700
> Subject: [PATCH] x86/virt/tdx: Export TD config params of TDX module via sysfs
>
> TDX module has parameters for VMM to configure TD. User space VMM, e.g.
> qemu, needs to know it. Export them to user space via sysfs.
>
> TDX 1.0 provides TDH.SYS.INFO to provide system information in
> TDSYSINFO_STRUCT. Its future extensibility is limited because of its
> struct. From TDX 1.5, TDH.SYS.RD(metadata field_id) to read the info
> specified by field id. So instead of exporting TDSYSINFO_STRUCT, adapt
> metadata way to export those system information.
Hi, I came across tdx_sysfs_init() recently and had some comments if
this proposal is going to move forward:
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> Documentation/ABI/testing/sysfs-firmware-tdx | 23 +++
> arch/x86/include/asm/tdx.h | 33 ++++
> arch/x86/virt/vmx/tdx/tdx.c | 164 +++++++++++++++++++
> arch/x86/virt/vmx/tdx/tdx.h | 18 ++
> 4 files changed, 238 insertions(+)
> create mode 100644 Documentation/ABI/testing/sysfs-firmware-tdx
>
> diff --git a/Documentation/ABI/testing/sysfs-firmware-tdx b/Documentation/ABI/testing/sysfs-firmware-tdx
> new file mode 100644
> index 000000000000..1f26fb178144
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-firmware-tdx
> @@ -0,0 +1,23 @@
> +What: /sys/firmware/tdx/tdx_module/metadata
The TDX module is not "platform firmware" in comparison to the other EFI
and ACPI inhabitants in /sys/firmware. It is especially not static
platform firmware given it needs to be dynamically activated via KVM
module initialization.
Instead, sysfs already has a location for pure software construct
objects to host a sysfs ABI and that is /sys/bus/virtual. I propose a
common "TSM" class device here [1] and TDX can simply publish a named
attribute group, "host", to extend that class device with TDX specifics.
For cross-vendor consistency "host" is a symlink to the CCP device on
AMD.
[1]: http://lore.kernel.org/r/170660662589.224441.11503798303914595072.stgit@dwillia2-xfh.jf.intel.com
> +Date: March 2023
> +KernelVersion: 6.3
> +Contact: Isaku Yamahata <isaku.yamahata@intel.com>, kvm@vger.kernel.org
> +Users: qemu, libvirt
> +Description:
> + The TDX feature requires a firmware that is known as the TDX
> + module. The TDX module exposes its metadata in the following
> + read-only files. The information corresponds to the TDX global
> + metadata specified by 64bit field id.
> + string in lower case. The value is binary.
> + User space VMM like qemu needs refer to them to determine what
> + parameters are needed or allowed to configure guest TDs.
> +
> + ================== ============================================
> + 1900000300000000 ATTRIBUTES_FIXED0
> + 1900000300000001 ATTRIBUTES_FIXED1
> + 1900000300000002 XFAM_FIXED0
> + 1900000300000003 XFAM_FIXED1
> + 9900000100000004 NUM_CPUID_CONFIG
> + 9900000300000400 CPUID_LEAVES
> + 9900000300000500 CPUID_VALUES
> + ================== ============================================
This documentation needs to be per file. With an explanation of how each
file is expected to be used. Someone should reasonably be able to read
this documentation and go write a tool, I don't get that from this
documentation.
> \ No newline at end of file
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index 05870e5ed131..c650ac22a916 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -110,6 +110,39 @@ struct tdx_cpuid_config {
> u32 edx;
> } __packed;
>
> +struct tdx_cpuid_config_leaf {
> + u32 leaf;
> + u32 sub_leaf;
> +} __packed;
> +static_assert(offsetof(struct tdx_cpuid_config, leaf) ==
> + offsetof(struct tdx_cpuid_config_leaf, leaf));
> +static_assert(offsetof(struct tdx_cpuid_config, sub_leaf) ==
> + offsetof(struct tdx_cpuid_config_leaf, sub_leaf));
> +static_assert(offsetofend(struct tdx_cpuid_config, sub_leaf) ==
> + sizeof(struct tdx_cpuid_config_leaf));
> +
> +struct tdx_cpuid_config_value {
> + u32 eax;
> + u32 ebx;
> + u32 ecx;
> + u32 edx;
> +} __packed;
> +static_assert(offsetof(struct tdx_cpuid_config, eax) -
> + offsetof(struct tdx_cpuid_config, eax) ==
> + offsetof(struct tdx_cpuid_config_value, eax));
> +static_assert(offsetof(struct tdx_cpuid_config, ebx) -
> + offsetof(struct tdx_cpuid_config, eax) ==
> + offsetof(struct tdx_cpuid_config_value, ebx));
> +static_assert(offsetof(struct tdx_cpuid_config, ecx) -
> + offsetof(struct tdx_cpuid_config, eax) ==
> + offsetof(struct tdx_cpuid_config_value, ecx));
> +static_assert(offsetof(struct tdx_cpuid_config, edx) -
> + offsetof(struct tdx_cpuid_config, eax) ==
> + offsetof(struct tdx_cpuid_config_value, edx));
> +static_assert(offsetofend(struct tdx_cpuid_config, edx) -
> + offsetof(struct tdx_cpuid_config, eax) ==
> + sizeof(struct tdx_cpuid_config_value));
> +
> #define TDSYSINFO_STRUCT_SIZE 1024
> #define TDSYSINFO_STRUCT_ALIGNMENT 1024
>
> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> index f9f9c1b76501..56ca520d67d6 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.c
> +++ b/arch/x86/virt/vmx/tdx/tdx.c
> @@ -33,6 +33,12 @@
> #include <asm/tdx.h>
> #include "tdx.h"
>
> +#ifdef CONFIG_SYSFS
> +static int tdx_sysfs_init(void);
> +#else
> +static inline int tdx_sysfs_init(void) { return 0;}
> +#endif
> +
> u32 tdx_global_keyid __ro_after_init;
> EXPORT_SYMBOL_GPL(tdx_global_keyid);
> static u32 tdx_guest_keyid_start __ro_after_init;
> @@ -399,6 +405,10 @@ static int __tdx_get_sysinfo(struct tdsysinfo_struct *sysinfo,
> if (ret)
> return ret;
>
> + ret = tdx_sysfs_init();
> + if (ret)
> + return ret;
> +
> pr_info("TDX module: atributes 0x%x, vendor_id 0x%x, major_version %u, minor_version %u, build_date %u, build_num %u",
> sysinfo->attributes, sysinfo->vendor_id,
> sysinfo->major_version, sysinfo->minor_version,
> @@ -1367,3 +1377,157 @@ int tdx_enable(void)
> return ret;
> }
> EXPORT_SYMBOL_GPL(tdx_enable);
> +
> +#ifdef CONFIG_SYSFS
> +
> +static struct kobject *tdx_kobj;
> +static struct kobject *tdx_module_kobj;
> +static struct kobject *tdx_metadata_kobj;
> +
> +#define TDX_METADATA_ATTR(_name, field_id_name, _size) \
> +static struct bin_attribute tdx_metadata_ ## _name = { \
> + .attr = { \
> + .name = field_id_name, \
> + .mode = 0444, \
> + }, \
> + .size = _size, \
> + .read = tdx_metadata_ ## _name ## _show, \
> +}
> +
> +#define TDX_METADATA_ATTR_SHOW(_name, field_id_name) \
> +static ssize_t tdx_metadata_ ## _name ## _show(struct file *filp, struct kobject *kobj, \
> + struct bin_attribute *bin_attr, \
> + char *buf, loff_t offset, size_t count) \
> +{ \
> + struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo); \
> + \
> + return memory_read_from_buffer(buf, count, &offset, \
> + &sysinfo->_name, \
> + sizeof(sysinfo->_name)); \
> +} \
> +TDX_METADATA_ATTR(_name, field_id_name, sizeof_field(struct tdsysinfo_struct, _name))
> +
> +TDX_METADATA_ATTR_SHOW(attributes_fixed0, TDX_METADATA_ATTRIBUTES_FIXED0_NAME);
> +TDX_METADATA_ATTR_SHOW(attributes_fixed1, TDX_METADATA_ATTRIBUTES_FIXED1_NAME);
> +TDX_METADATA_ATTR_SHOW(xfam_fixed0, TDX_METADATA_XFAM_FIXED0_NAME);
> +TDX_METADATA_ATTR_SHOW(xfam_fixed1, TDX_METADATA_XFAM_FIXED1_NAME);
> +
> +static ssize_t tdx_metadata_num_cpuid_config_show(struct file *filp, struct kobject *kobj,
> + struct bin_attribute *bin_attr,
> + char *buf, loff_t offset, size_t count)
> +{
> + struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo);
> + /*
> + * Although tdsysinfo_struct.num_cpuid_config is defined as u32 for
> + * alignment, TDX 1.5 defines metadata NUM_CONFIG_CPUID as u16.
> + */
> + u16 tmp = (u16)sysinfo->num_cpuid_config;
> +
> + WARN_ON_ONCE(tmp != sysinfo->num_cpuid_config);
Why crash the kernel here?
> + return memory_read_from_buffer(buf, count, &offset, &tmp, sizeof(tmp));
> +}
> +TDX_METADATA_ATTR(num_cpuid_config, TDX_METADATA_NUM_CPUID_CONFIG_NAME, sizeof(u16));
> +
> +static ssize_t tdx_metadata_cpuid_leaves_show(struct file *filp, struct kobject *kobj,
> + struct bin_attribute *bin_attr, char *buf,
> + loff_t offset, size_t count)
> +{
> + struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo);
> + ssize_t r;
> + struct tdx_cpuid_config_leaf *tmp;
> + u32 i;
> +
> + tmp = kmalloc(bin_attr->size, GFP_KERNEL);
> + if (!tmp)
> + return -ENOMEM;
Why is this allocating and then blindly copying bin_attr->size into
@buf? It it either knows that @buf is big enough, no need to allocate,
or if it does not know if @buf is big enough then the copy into @tmp
offers no protection.
> +
> + for (i = 0; i < sysinfo->num_cpuid_config; i++) {
> + struct tdx_cpuid_config *c = &sysinfo->cpuid_configs[i];
> + struct tdx_cpuid_config_leaf *leaf = (struct tdx_cpuid_config_leaf *)c;
> +
> + memcpy(tmp + i, leaf, sizeof(*leaf));
> + }
> +
> + r = memory_read_from_buffer(buf, count, &offset, tmp, bin_attr->size);
> + kfree(tmp);
> + return r;
> +}
> +
> +TDX_METADATA_ATTR(cpuid_leaves, TDX_METADATA_CPUID_LEAVES_NAME, 0);
> +
> +static ssize_t tdx_metadata_cpuid_values_show(struct file *filp, struct kobject *kobj,
> + struct bin_attribute *bin_attr, char *buf,
> + loff_t offset, size_t count)
> +{
> + struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo);
> + struct tdx_cpuid_config_value *tmp;
> + ssize_t r;
> + u32 i;
> +
> + tmp = kmalloc(bin_attr->size, GFP_KERNEL);
> + if (!tmp)
> + return -ENOMEM;
> +
> + for (i = 0; i < sysinfo->num_cpuid_config; i++) {
> + struct tdx_cpuid_config *c = &sysinfo->cpuid_configs[i];
> + struct tdx_cpuid_config_value *value = (struct tdx_cpuid_config_value *)&c->eax;
> +
> + memcpy(tmp + i, value, sizeof(*value));
> + }
> +
> + r = memory_read_from_buffer(buf, count, &offset, tmp, bin_attr->size);
> + kfree(tmp);
> + return r;
> +}
> +
> +TDX_METADATA_ATTR(cpuid_values, TDX_METADATA_CPUID_VALUES_NAME, 0);
> +
> +static struct bin_attribute *tdx_metadata_attrs[] = {
> + &tdx_metadata_attributes_fixed0,
> + &tdx_metadata_attributes_fixed1,
> + &tdx_metadata_xfam_fixed0,
> + &tdx_metadata_xfam_fixed1,
> + &tdx_metadata_num_cpuid_config,
> + &tdx_metadata_cpuid_leaves,
> + &tdx_metadata_cpuid_values,
> + NULL,
> +};
> +
> +static const struct attribute_group tdx_metadata_attr_group = {
> + .bin_attrs = tdx_metadata_attrs,
> +};
> +
> +static int tdx_sysfs_init(void)
> +{
> + struct tdsysinfo_struct *sysinfo;
> + int ret;
> +
> + tdx_kobj = kobject_create_and_add("tdx", firmware_kobj);
> + if (!tdx_kobj) {
> + pr_err("kobject_create_and_add tdx failed\n");
> + return -EINVAL;
> + }
Subsystems, PCI for example [2], are slowly unwinding their usage of dynamic
sysfs_create_*() APIs in favor of static attribute groups. Dynamic
kobject_create_*() usage is even more of an anti-pattern for new code.
This goes away with static attribute group registration.
[2]: https://lore.kernel.org/linux-pci/20231019200110.GA1410324@bhelgaas/
On Thu, 30 Mar 2023 17:18:03 -0700
Isaku Yamahata <isaku.yamahata@gmail.com> wrote:
> On Wed, Mar 29, 2023 at 04:17:22PM -0700,
> Isaku Yamahata <isaku.yamahata@gmail.com> wrote:
>
> > On Sat, Mar 25, 2023 at 10:43:06AM +0200,
> > Zhi Wang <zhi.wang.linux@gmail.com> wrote:
> >
> > > On Sun, 12 Mar 2023 10:55:40 -0700
> > > isaku.yamahata@intel.com wrote:
> > >
> > > Does this have to be a new generic ioctl with a dedicated new x86_ops? SNP
> > > does not use it at all and all the system-scoped ioctl of SNP going through
> > > the CCP driver. So getting system-scope information of TDX/SNP will end up
> > > differently.
> > >
> > > Any thought, Sean? Moving getting SNP system-wide information to
> > > KVM dev ioctl seems not ideal and TDX does not have a dedicated driver like
> > > CCP. Maybe make this ioctl TDX-specific? KVM_TDX_DEV_OP?
> >
> > We only need global parameters of the TDX module, and we don't interact with TDX
> > module at this point. One alternative is to export those parameters via sysfs.
> > Also the existence of the sysfs node indicates that the TDX module is
> > loaded(initialized?) or not in addition to boot log. Thus we can drop system
> > scope one.
> > What do you think?
> >
I like this idea and the patch below, it feels right for me now. It would be nice
if more folks can chime in and comment.
> > Regarding to other TDX KVM specific ioctls (KVM_TDX_INIT_VM, KVM_TDX_INIT_VCPU,
> > KVM_TDX_INIT_MEM_REGION, and KVM_TDX_FINALIZE_VM), they are specific to KVM. So
> > I don't think it can be split out to independent driver.
>
They can stay in KVM as they are KVM-specific. SNP also has KVM-specific ioctls
which wraps the SEV driver calls. At this level, both TDX and SNP go their specific
implementation without more abstraction other than KVM_ENCRYPT_MEMORY_OP. Their
strategies are aligned.
The problem of the previous approach was the abstraction that no other implementation
is using it. It is like, TDX wants a higher abstraction to cover both TDX and SNP,
but SNP is not using it, which makes the abstraction looks strange.
> Here is the patch to export those info via sysfs.
>
> From e0744e506eb92e47d8317e489945a3ba804edfa7 Mon Sep 17 00:00:00 2001
> Message-Id: <e0744e506eb92e47d8317e489945a3ba804edfa7.1680221730.git.isaku.yamahata@intel.com>
> In-Reply-To: <8e0bc0e8e5d435f54f10c7642a862629ef2acb89.1680221729.git.isaku.yamahata@intel.com>
> References: <8e0bc0e8e5d435f54f10c7642a862629ef2acb89.1680221729.git.isaku.yamahata@intel.com>
> From: Isaku Yamahata <isaku.yamahata@intel.com>
> Date: Thu, 30 Mar 2023 00:05:03 -0700
> Subject: [PATCH] x86/virt/tdx: Export TD config params of TDX module via sysfs
>
> TDX module has parameters for VMM to configure TD. User space VMM, e.g.
> qemu, needs to know it. Export them to user space via sysfs.
>
> TDX 1.0 provides TDH.SYS.INFO to provide system information in
> TDSYSINFO_STRUCT. Its future extensibility is limited because of its
> struct. From TDX 1.5, TDH.SYS.RD(metadata field_id) to read the info
> specified by field id. So instead of exporting TDSYSINFO_STRUCT, adapt
> metadata way to export those system information.
>
> Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
> ---
> Documentation/ABI/testing/sysfs-firmware-tdx | 23 +++
> arch/x86/include/asm/tdx.h | 33 ++++
> arch/x86/virt/vmx/tdx/tdx.c | 164 +++++++++++++++++++
> arch/x86/virt/vmx/tdx/tdx.h | 18 ++
> 4 files changed, 238 insertions(+)
> create mode 100644 Documentation/ABI/testing/sysfs-firmware-tdx
>
> diff --git a/Documentation/ABI/testing/sysfs-firmware-tdx b/Documentation/ABI/testing/sysfs-firmware-tdx
> new file mode 100644
> index 000000000000..1f26fb178144
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-firmware-tdx
> @@ -0,0 +1,23 @@
> +What: /sys/firmware/tdx/tdx_module/metadata
> +Date: March 2023
> +KernelVersion: 6.3
> +Contact: Isaku Yamahata <isaku.yamahata@intel.com>, kvm@vger.kernel.org
> +Users: qemu, libvirt
> +Description:
> + The TDX feature requires a firmware that is known as the TDX
> + module. The TDX module exposes its metadata in the following
> + read-only files. The information corresponds to the TDX global
> + metadata specified by 64bit field id. The file name is hex
> + string in lower case. The value is binary.
> + User space VMM like qemu needs refer to them to determine what
> + parameters are needed or allowed to configure guest TDs.
> +
> + ================== ============================================
> + 1900000300000000 ATTRIBUTES_FIXED0
> + 1900000300000001 ATTRIBUTES_FIXED1
> + 1900000300000002 XFAM_FIXED0
> + 1900000300000003 XFAM_FIXED1
> + 9900000100000004 NUM_CPUID_CONFIG
> + 9900000300000400 CPUID_LEAVES
> + 9900000300000500 CPUID_VALUES
> + ================== ============================================
> \ No newline at end of file
> diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h
> index 05870e5ed131..c650ac22a916 100644
> --- a/arch/x86/include/asm/tdx.h
> +++ b/arch/x86/include/asm/tdx.h
> @@ -110,6 +110,39 @@ struct tdx_cpuid_config {
> u32 edx;
> } __packed;
>
> +struct tdx_cpuid_config_leaf {
> + u32 leaf;
> + u32 sub_leaf;
> +} __packed;
> +static_assert(offsetof(struct tdx_cpuid_config, leaf) ==
> + offsetof(struct tdx_cpuid_config_leaf, leaf));
> +static_assert(offsetof(struct tdx_cpuid_config, sub_leaf) ==
> + offsetof(struct tdx_cpuid_config_leaf, sub_leaf));
> +static_assert(offsetofend(struct tdx_cpuid_config, sub_leaf) ==
> + sizeof(struct tdx_cpuid_config_leaf));
> +
> +struct tdx_cpuid_config_value {
> + u32 eax;
> + u32 ebx;
> + u32 ecx;
> + u32 edx;
> +} __packed;
> +static_assert(offsetof(struct tdx_cpuid_config, eax) -
> + offsetof(struct tdx_cpuid_config, eax) ==
> + offsetof(struct tdx_cpuid_config_value, eax));
> +static_assert(offsetof(struct tdx_cpuid_config, ebx) -
> + offsetof(struct tdx_cpuid_config, eax) ==
> + offsetof(struct tdx_cpuid_config_value, ebx));
> +static_assert(offsetof(struct tdx_cpuid_config, ecx) -
> + offsetof(struct tdx_cpuid_config, eax) ==
> + offsetof(struct tdx_cpuid_config_value, ecx));
> +static_assert(offsetof(struct tdx_cpuid_config, edx) -
> + offsetof(struct tdx_cpuid_config, eax) ==
> + offsetof(struct tdx_cpuid_config_value, edx));
> +static_assert(offsetofend(struct tdx_cpuid_config, edx) -
> + offsetof(struct tdx_cpuid_config, eax) ==
> + sizeof(struct tdx_cpuid_config_value));
> +
> #define TDSYSINFO_STRUCT_SIZE 1024
> #define TDSYSINFO_STRUCT_ALIGNMENT 1024
>
> diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c
> index f9f9c1b76501..56ca520d67d6 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.c
> +++ b/arch/x86/virt/vmx/tdx/tdx.c
> @@ -33,6 +33,12 @@
> #include <asm/tdx.h>
> #include "tdx.h"
>
> +#ifdef CONFIG_SYSFS
> +static int tdx_sysfs_init(void);
> +#else
> +static inline int tdx_sysfs_init(void) { return 0;}
> +#endif
> +
> u32 tdx_global_keyid __ro_after_init;
> EXPORT_SYMBOL_GPL(tdx_global_keyid);
> static u32 tdx_guest_keyid_start __ro_after_init;
> @@ -399,6 +405,10 @@ static int __tdx_get_sysinfo(struct tdsysinfo_struct *sysinfo,
> if (ret)
> return ret;
>
> + ret = tdx_sysfs_init();
> + if (ret)
> + return ret;
> +
> pr_info("TDX module: atributes 0x%x, vendor_id 0x%x, major_version %u, minor_version %u, build_date %u, build_num %u",
> sysinfo->attributes, sysinfo->vendor_id,
> sysinfo->major_version, sysinfo->minor_version,
> @@ -1367,3 +1377,157 @@ int tdx_enable(void)
> return ret;
> }
> EXPORT_SYMBOL_GPL(tdx_enable);
> +
> +#ifdef CONFIG_SYSFS
> +
> +static struct kobject *tdx_kobj;
> +static struct kobject *tdx_module_kobj;
> +static struct kobject *tdx_metadata_kobj;
> +
> +#define TDX_METADATA_ATTR(_name, field_id_name, _size) \
> +static struct bin_attribute tdx_metadata_ ## _name = { \
> + .attr = { \
> + .name = field_id_name, \
> + .mode = 0444, \
> + }, \
> + .size = _size, \
> + .read = tdx_metadata_ ## _name ## _show, \
> +}
> +
> +#define TDX_METADATA_ATTR_SHOW(_name, field_id_name) \
> +static ssize_t tdx_metadata_ ## _name ## _show(struct file *filp, struct kobject *kobj, \
> + struct bin_attribute *bin_attr, \
> + char *buf, loff_t offset, size_t count) \
> +{ \
> + struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo); \
> + \
> + return memory_read_from_buffer(buf, count, &offset, \
> + &sysinfo->_name, \
> + sizeof(sysinfo->_name)); \
> +} \
> +TDX_METADATA_ATTR(_name, field_id_name, sizeof_field(struct tdsysinfo_struct, _name))
> +
> +TDX_METADATA_ATTR_SHOW(attributes_fixed0, TDX_METADATA_ATTRIBUTES_FIXED0_NAME);
> +TDX_METADATA_ATTR_SHOW(attributes_fixed1, TDX_METADATA_ATTRIBUTES_FIXED1_NAME);
> +TDX_METADATA_ATTR_SHOW(xfam_fixed0, TDX_METADATA_XFAM_FIXED0_NAME);
> +TDX_METADATA_ATTR_SHOW(xfam_fixed1, TDX_METADATA_XFAM_FIXED1_NAME);
> +
> +static ssize_t tdx_metadata_num_cpuid_config_show(struct file *filp, struct kobject *kobj,
> + struct bin_attribute *bin_attr,
> + char *buf, loff_t offset, size_t count)
> +{
> + struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo);
> + /*
> + * Although tdsysinfo_struct.num_cpuid_config is defined as u32 for
> + * alignment, TDX 1.5 defines metadata NUM_CONFIG_CPUID as u16.
> + */
> + u16 tmp = (u16)sysinfo->num_cpuid_config;
> +
> + WARN_ON_ONCE(tmp != sysinfo->num_cpuid_config);
> + return memory_read_from_buffer(buf, count, &offset, &tmp, sizeof(tmp));
> +}
> +TDX_METADATA_ATTR(num_cpuid_config, TDX_METADATA_NUM_CPUID_CONFIG_NAME, sizeof(u16));
> +
> +static ssize_t tdx_metadata_cpuid_leaves_show(struct file *filp, struct kobject *kobj,
> + struct bin_attribute *bin_attr, char *buf,
> + loff_t offset, size_t count)
> +{
> + struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo);
> + ssize_t r;
> + struct tdx_cpuid_config_leaf *tmp;
> + u32 i;
> +
> + tmp = kmalloc(bin_attr->size, GFP_KERNEL);
> + if (!tmp)
> + return -ENOMEM;
> +
> + for (i = 0; i < sysinfo->num_cpuid_config; i++) {
> + struct tdx_cpuid_config *c = &sysinfo->cpuid_configs[i];
> + struct tdx_cpuid_config_leaf *leaf = (struct tdx_cpuid_config_leaf *)c;
> +
> + memcpy(tmp + i, leaf, sizeof(*leaf));
> + }
> +
> + r = memory_read_from_buffer(buf, count, &offset, tmp, bin_attr->size);
> + kfree(tmp);
> + return r;
> +}
> +
> +TDX_METADATA_ATTR(cpuid_leaves, TDX_METADATA_CPUID_LEAVES_NAME, 0);
> +
> +static ssize_t tdx_metadata_cpuid_values_show(struct file *filp, struct kobject *kobj,
> + struct bin_attribute *bin_attr, char *buf,
> + loff_t offset, size_t count)
> +{
> + struct tdsysinfo_struct *sysinfo = &PADDED_STRUCT(tdsysinfo);
> + struct tdx_cpuid_config_value *tmp;
> + ssize_t r;
> + u32 i;
> +
> + tmp = kmalloc(bin_attr->size, GFP_KERNEL);
> + if (!tmp)
> + return -ENOMEM;
> +
> + for (i = 0; i < sysinfo->num_cpuid_config; i++) {
> + struct tdx_cpuid_config *c = &sysinfo->cpuid_configs[i];
> + struct tdx_cpuid_config_value *value = (struct tdx_cpuid_config_value *)&c->eax;
> +
> + memcpy(tmp + i, value, sizeof(*value));
> + }
> +
> + r = memory_read_from_buffer(buf, count, &offset, tmp, bin_attr->size);
> + kfree(tmp);
> + return r;
> +}
> +
> +TDX_METADATA_ATTR(cpuid_values, TDX_METADATA_CPUID_VALUES_NAME, 0);
> +
> +static struct bin_attribute *tdx_metadata_attrs[] = {
> + &tdx_metadata_attributes_fixed0,
> + &tdx_metadata_attributes_fixed1,
> + &tdx_metadata_xfam_fixed0,
> + &tdx_metadata_xfam_fixed1,
> + &tdx_metadata_num_cpuid_config,
> + &tdx_metadata_cpuid_leaves,
> + &tdx_metadata_cpuid_values,
> + NULL,
> +};
> +
> +static const struct attribute_group tdx_metadata_attr_group = {
> + .bin_attrs = tdx_metadata_attrs,
> +};
> +
> +static int tdx_sysfs_init(void)
> +{
> + struct tdsysinfo_struct *sysinfo;
> + int ret;
> +
> + tdx_kobj = kobject_create_and_add("tdx", firmware_kobj);
> + if (!tdx_kobj) {
> + pr_err("kobject_create_and_add tdx failed\n");
> + return -EINVAL;
> + }
> +
> + tdx_module_kobj = kobject_create_and_add("tdx_module", tdx_kobj);
> + if (!tdx_module_kobj) {
> + pr_err("kobject_create_and_add tdx_module failed\n");
> + return -EINVAL;
> + }
> + tdx_metadata_kobj = kobject_create_and_add("metadata", tdx_module_kobj);
> + if (!tdx_metadata_kobj) {
> + pr_err("Sysfs exporting tdx global metadata failed %d\n", ret);
> + return -EINVAL;
> + }
> +
> + sysinfo = &PADDED_STRUCT(tdsysinfo);
> + tdx_metadata_cpuid_leaves.size = sysinfo->num_cpuid_config *
> + sizeof(struct tdx_cpuid_config_leaf);
> + tdx_metadata_cpuid_values.size = sysinfo->num_cpuid_config *
> + sizeof(struct tdx_cpuid_config_value);
> + ret = sysfs_create_group(tdx_metadata_kobj, &tdx_metadata_attr_group);
> + if (ret)
> + pr_err("Sysfs exporting tdx module attributes failed %d\n", ret);
> +
> + return ret;
> +}
> +#endif
> diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h
> index db0cbcceb5b3..a48f38fe6cc4 100644
> --- a/arch/x86/virt/vmx/tdx/tdx.h
> +++ b/arch/x86/virt/vmx/tdx/tdx.h
> @@ -93,4 +93,22 @@ struct tdmr_info_list {
> int max_tdmrs; /* How many 'tdmr_info's are allocated */
> };
>
> +/* TDX metadata base field id. */
> +#define TDX_METADATA_ATTRIBUTES_FIXED0 0x1900000300000000ULL
> +#define TDX_METADATA_ATTRIBUTES_FIXED1 0x1900000300000001ULL
> +#define TDX_METADATA_XFAM_FIXED0 0x1900000300000002ULL
> +#define TDX_METADATA_XFAM_FIXED1 0x1900000300000003ULL
> +#define TDX_METADATA_NUM_CPUID_CONFIG 0x9900000100000004ULL
> +#define TDX_METADATA_CPUID_LEAVES 0x9900000300000400ULL
> +#define TDX_METADATA_CPUID_VALUES 0x9900000300000500ULL
> +
> +/* File name for sysfs: hex with lower case. */
> +#define TDX_METADATA_ATTRIBUTES_FIXED0_NAME "1900000300000000"
> +#define TDX_METADATA_ATTRIBUTES_FIXED1_NAME "1900000300000001"
> +#define TDX_METADATA_XFAM_FIXED0_NAME "1900000300000002"
> +#define TDX_METADATA_XFAM_FIXED1_NAME "1900000300000003"
> +#define TDX_METADATA_NUM_CPUID_CONFIG_NAME "9900000100000004"
> +#define TDX_METADATA_CPUID_LEAVES_NAME "9900000300000400"
> +#define TDX_METADATA_CPUID_VALUES_NAME "9900000300000500"
> +
> #endif
On 3/31/2023 8:44 PM, Zhi Wang wrote: > On Thu, 30 Mar 2023 17:18:03 -0700 > Isaku Yamahata <isaku.yamahata@gmail.com> wrote: > >> On Wed, Mar 29, 2023 at 04:17:22PM -0700, >> Isaku Yamahata <isaku.yamahata@gmail.com> wrote: >> >>> On Sat, Mar 25, 2023 at 10:43:06AM +0200, >>> Zhi Wang <zhi.wang.linux@gmail.com> wrote: >>> >>>> On Sun, 12 Mar 2023 10:55:40 -0700 >>>> isaku.yamahata@intel.com wrote: >>>> >>>> Does this have to be a new generic ioctl with a dedicated new x86_ops? SNP >>>> does not use it at all and all the system-scoped ioctl of SNP going through >>>> the CCP driver. So getting system-scope information of TDX/SNP will end up >>>> differently. >>>> >>>> Any thought, Sean? Moving getting SNP system-wide information to >>>> KVM dev ioctl seems not ideal and TDX does not have a dedicated driver like >>>> CCP. Maybe make this ioctl TDX-specific? KVM_TDX_DEV_OP? >>> >>> We only need global parameters of the TDX module, and we don't interact with TDX >>> module at this point. One alternative is to export those parameters via sysfs. >>> Also the existence of the sysfs node indicates that the TDX module is >>> loaded(initialized?) or not in addition to boot log. Thus we can drop system >>> scope one. >>> What do you think? >>> > > I like this idea and the patch below, it feels right for me now. It would be nice > if more folks can chime in and comment. SYSFS option requires CONFIG_SYSFS, which reqiures CONFIG_KVM_TDX to select CONFIG_SYSFS. >>> Regarding to other TDX KVM specific ioctls (KVM_TDX_INIT_VM, KVM_TDX_INIT_VCPU, >>> KVM_TDX_INIT_MEM_REGION, and KVM_TDX_FINALIZE_VM), they are specific to KVM. So >>> I don't think it can be split out to independent driver. >> > > They can stay in KVM as they are KVM-specific. SNP also has KVM-specific ioctls > which wraps the SEV driver calls. At this level, both TDX and SNP go their specific > implementation without more abstraction other than KVM_ENCRYPT_MEMORY_OP. Their > strategies are aligned. > > The problem of the previous approach was the abstraction that no other implementation > is using it. It is like, TDX wants a higher abstraction to cover both TDX and SNP, > but SNP is not using it, which makes the abstraction looks strange. Note, before this TDX enabling series, KVM_MEMORY_ENCRYPT_OP is a VM scope ioctl, that only serves for SEV and no other implementation uses it. I see no reason why cannot introduce a new IOCTL in x86 KVM that serves only one vendor. We choose KVM_MEMORY_ENCRYPT_OP for TDX platform scope, just because we reuse KVM_MEMORY_ENCRYPT_OP for TDX VM-scope and extend it to TDX vcpu scope. It's just to avoid defining a new IOCTL number. We can rename it to KVM_GET_CC_CAPABILITIES, and even return different capabilities based on VM type. And even, if SNP wants to use it, I think it can wrap SNP driver calls inside this IOCTL? kvm.ko is special that it needs to serve two vendors. Sometime it's unaviodable that an interface is only used by one vendor.
On Mon, 3 Apr 2023 11:46:15 +0800 Xiaoyao Li <xiaoyao.li@intel.com> wrote: > On 3/31/2023 8:44 PM, Zhi Wang wrote: > > On Thu, 30 Mar 2023 17:18:03 -0700 > > Isaku Yamahata <isaku.yamahata@gmail.com> wrote: > > > >> On Wed, Mar 29, 2023 at 04:17:22PM -0700, > >> Isaku Yamahata <isaku.yamahata@gmail.com> wrote: > >> > >>> On Sat, Mar 25, 2023 at 10:43:06AM +0200, > >>> Zhi Wang <zhi.wang.linux@gmail.com> wrote: > >>> > >>>> On Sun, 12 Mar 2023 10:55:40 -0700 > >>>> isaku.yamahata@intel.com wrote: > >>>> > >>>> Does this have to be a new generic ioctl with a dedicated new x86_ops? SNP > >>>> does not use it at all and all the system-scoped ioctl of SNP going through > >>>> the CCP driver. So getting system-scope information of TDX/SNP will end up > >>>> differently. > >>>> > >>>> Any thought, Sean? Moving getting SNP system-wide information to > >>>> KVM dev ioctl seems not ideal and TDX does not have a dedicated driver like > >>>> CCP. Maybe make this ioctl TDX-specific? KVM_TDX_DEV_OP? > >>> > >>> We only need global parameters of the TDX module, and we don't interact with TDX > >>> module at this point. One alternative is to export those parameters via sysfs. > >>> Also the existence of the sysfs node indicates that the TDX module is > >>> loaded(initialized?) or not in addition to boot log. Thus we can drop system > >>> scope one. > >>> What do you think? > >>> > > > > I like this idea and the patch below, it feels right for me now. It would be nice > > if more folks can chime in and comment. > > SYSFS option requires CONFIG_SYSFS, which reqiures CONFIG_KVM_TDX to > select CONFIG_SYSFS. > > >>> Regarding to other TDX KVM specific ioctls (KVM_TDX_INIT_VM, KVM_TDX_INIT_VCPU, > >>> KVM_TDX_INIT_MEM_REGION, and KVM_TDX_FINALIZE_VM), they are specific to KVM. So > >>> I don't think it can be split out to independent driver. > >> > > > > They can stay in KVM as they are KVM-specific. SNP also has KVM-specific ioctls > > which wraps the SEV driver calls. At this level, both TDX and SNP go their specific > > implementation without more abstraction other than KVM_ENCRYPT_MEMORY_OP. Their > > strategies are aligned. > > > > The problem of the previous approach was the abstraction that no other implementation > > is using it. It is like, TDX wants a higher abstraction to cover both TDX and SNP, > > but SNP is not using it, which makes the abstraction looks strange. > > Note, before this TDX enabling series, KVM_MEMORY_ENCRYPT_OP is a VM > scope ioctl, that only serves for SEV and no other implementation uses > it. I see no reason why cannot introduce a new IOCTL in x86 KVM that > serves only one vendor. > My point is: time is different. When KVM_MEMORY_ENCRYPT_OP is there, there was *only* one vendor and SEV/SNP didn't know how the future vendor is going to use the ioctl. That is a reasonable case an generic ioctl can have one vendor to back up. The background here is: now another vendor is coming and there are going to be two vendors. The two vendors' flows are much clearer than early stage. Like, they know which flow is going to be used by each other. With these kept in mind, IMHO, it is not appropriate to introduce an generic ioctl that only one vendor is going to use, meanwhile we have already known another vendor is not going to use it. Defining a new userspace ABI is a serious thing and it is not an early stage anymore. Actually I think it is the best time to see how the code infrastructure should be re-purposed at this time. > We choose KVM_MEMORY_ENCRYPT_OP for TDX platform scope, just because we > reuse KVM_MEMORY_ENCRYPT_OP for TDX VM-scope and extend it to TDX vcpu > scope. It's just to avoid defining a new IOCTL number. > > We can rename it to KVM_GET_CC_CAPABILITIES, and even return different > capabilities based on VM type. And even, if SNP wants to use it, I think > it can wrap SNP driver calls inside this IOCTL? > I am not opposed to this option as it shows effort to improve it and it is constructive. But this needs to be figured out with AMD folks and maintainers. E.g. what should be the best CC ioctl scheme for KVM? vendor-specific or generic, which brings better benefit for the userspace, and less maintenance burden. Back to the reason why I think a vendor-specific sysinfo interface for TDX is necessary: 1) SEV driver has been there for quite some time. Unless people thinks an generic CC ioctl scheme is a way to go, then there will be motivation and efforts putting on it. The efforts is not only about wrapping SEV ioctls, it needs a systematic spec of generic CC ioctl scheme. 2) TDX doesn't have a driver like SEV and possibly not going to have one in the future. For those non-KVM related control flow of TDX in future, they can re-use this and stay away from KVM interface. (If vendor-specific scheme is the future direction.) > kvm.ko is special that it needs to serve two vendors. Sometime it's > unaviodable that an interface is only used by one vendor. I am afraid that in this case it is avoidable right?
On Mon, Apr 03, 2023 at 05:28:35PM +0300, Zhi Wang <zhi.wang.linux@gmail.com> wrote: > On Mon, 3 Apr 2023 11:46:15 +0800 > Xiaoyao Li <xiaoyao.li@intel.com> wrote: > > > On 3/31/2023 8:44 PM, Zhi Wang wrote: > > > On Thu, 30 Mar 2023 17:18:03 -0700 > > > Isaku Yamahata <isaku.yamahata@gmail.com> wrote: > > > > > >> On Wed, Mar 29, 2023 at 04:17:22PM -0700, > > >> Isaku Yamahata <isaku.yamahata@gmail.com> wrote: > > >> > > >>> On Sat, Mar 25, 2023 at 10:43:06AM +0200, > > >>> Zhi Wang <zhi.wang.linux@gmail.com> wrote: > > >>> > > >>>> On Sun, 12 Mar 2023 10:55:40 -0700 > > >>>> isaku.yamahata@intel.com wrote: > > >>>> > > >>>> Does this have to be a new generic ioctl with a dedicated new x86_ops? SNP > > >>>> does not use it at all and all the system-scoped ioctl of SNP going through > > >>>> the CCP driver. So getting system-scope information of TDX/SNP will end up > > >>>> differently. > > >>>> > > >>>> Any thought, Sean? Moving getting SNP system-wide information to > > >>>> KVM dev ioctl seems not ideal and TDX does not have a dedicated driver like > > >>>> CCP. Maybe make this ioctl TDX-specific? KVM_TDX_DEV_OP? > > >>> > > >>> We only need global parameters of the TDX module, and we don't interact with TDX > > >>> module at this point. One alternative is to export those parameters via sysfs. > > >>> Also the existence of the sysfs node indicates that the TDX module is > > >>> loaded(initialized?) or not in addition to boot log. Thus we can drop system > > >>> scope one. > > >>> What do you think? > > >>> > > > > > > I like this idea and the patch below, it feels right for me now. It would be nice > > > if more folks can chime in and comment. > > > > SYSFS option requires CONFIG_SYSFS, which reqiures CONFIG_KVM_TDX to > > select CONFIG_SYSFS. > > > > >>> Regarding to other TDX KVM specific ioctls (KVM_TDX_INIT_VM, KVM_TDX_INIT_VCPU, > > >>> KVM_TDX_INIT_MEM_REGION, and KVM_TDX_FINALIZE_VM), they are specific to KVM. So > > >>> I don't think it can be split out to independent driver. > > >> > > > > > > They can stay in KVM as they are KVM-specific. SNP also has KVM-specific ioctls > > > which wraps the SEV driver calls. At this level, both TDX and SNP go their specific > > > implementation without more abstraction other than KVM_ENCRYPT_MEMORY_OP. Their > > > strategies are aligned. > > > > > > The problem of the previous approach was the abstraction that no other implementation > > > is using it. It is like, TDX wants a higher abstraction to cover both TDX and SNP, > > > but SNP is not using it, which makes the abstraction looks strange. > > > > Note, before this TDX enabling series, KVM_MEMORY_ENCRYPT_OP is a VM > > scope ioctl, that only serves for SEV and no other implementation uses > > it. I see no reason why cannot introduce a new IOCTL in x86 KVM that > > serves only one vendor. > > > > My point is: time is different. When KVM_MEMORY_ENCRYPT_OP is there, > there was *only* one vendor and SEV/SNP didn't know how the future vendor > is going to use the ioctl. That is a reasonable case an generic ioctl can > have one vendor to back up. > > The background here is: now another vendor is coming and there are going to > be two vendors. The two vendors' flows are much clearer than early stage. > Like, they know which flow is going to be used by each other. > > With these kept in mind, IMHO, it is not appropriate to introduce > an generic ioctl that only one vendor is going to use, meanwhile > we have already known another vendor is not going to use it. > > Defining a new userspace ABI is a serious thing and it is not an early > stage anymore. Actually I think it is the best time to see how the > code infrastructure should be re-purposed at this time. > > > We choose KVM_MEMORY_ENCRYPT_OP for TDX platform scope, just because we > > reuse KVM_MEMORY_ENCRYPT_OP for TDX VM-scope and extend it to TDX vcpu > > scope. It's just to avoid defining a new IOCTL number. > > > > We can rename it to KVM_GET_CC_CAPABILITIES, and even return different > > capabilities based on VM type. And even, if SNP wants to use it, I think > > it can wrap SNP driver calls inside this IOCTL? > > > > I am not opposed to this option as it shows effort to improve it and it > is constructive. But this needs to be figured out with AMD folks and > maintainers. E.g. what should be the best CC ioctl scheme for KVM? > vendor-specific or generic, which brings better benefit for the userspace, > and less maintenance burden. > > Back to the reason why I think a vendor-specific sysinfo interface for TDX > is necessary: > > 1) SEV driver has been there for quite some time. Unless people thinks an > generic CC ioctl scheme is a way to go, then there will be motivation and > efforts putting on it. The efforts is not only about wrapping SEV ioctls, > it needs a systematic spec of generic CC ioctl scheme. > > 2) TDX doesn't have a driver like SEV and possibly not going to have one in > the future. For those non-KVM related control flow of TDX in future, they > can re-use this and stay away from KVM interface. (If vendor-specific > scheme is the future direction.) > > > kvm.ko is special that it needs to serve two vendors. Sometime it's > > unaviodable that an interface is only used by one vendor. > > I am afraid that in this case it is avoidable right? We can make KVM_TDX_CAPABILITIES vm-scoped one so that devoce-scoped KVM_EMORY_ENCRYPT_OP isn't needed. At least qemu is fine. Do you think vm-scoped KVM_TDX_CAPABILITIES is fine? -- Isaku Yamahata <isaku.yamahata@gmail.com>
On Wed, 5 Apr 2023 11:07:20 -0700 Isaku Yamahata <isaku.yamahata@gmail.com> wrote: > On Mon, Apr 03, 2023 at 05:28:35PM +0300, > Zhi Wang <zhi.wang.linux@gmail.com> wrote: > > > On Mon, 3 Apr 2023 11:46:15 +0800 > > Xiaoyao Li <xiaoyao.li@intel.com> wrote: > > > > > On 3/31/2023 8:44 PM, Zhi Wang wrote: > > > > On Thu, 30 Mar 2023 17:18:03 -0700 > > > > Isaku Yamahata <isaku.yamahata@gmail.com> wrote: > > > > > > > >> On Wed, Mar 29, 2023 at 04:17:22PM -0700, > > > >> Isaku Yamahata <isaku.yamahata@gmail.com> wrote: > > > >> > > > >>> On Sat, Mar 25, 2023 at 10:43:06AM +0200, > > > >>> Zhi Wang <zhi.wang.linux@gmail.com> wrote: > > > >>> > > > >>>> On Sun, 12 Mar 2023 10:55:40 -0700 > > > >>>> isaku.yamahata@intel.com wrote: > > > >>>> > > > >>>> Does this have to be a new generic ioctl with a dedicated new x86_ops? SNP > > > >>>> does not use it at all and all the system-scoped ioctl of SNP going through > > > >>>> the CCP driver. So getting system-scope information of TDX/SNP will end up > > > >>>> differently. > > > >>>> > > > >>>> Any thought, Sean? Moving getting SNP system-wide information to > > > >>>> KVM dev ioctl seems not ideal and TDX does not have a dedicated driver like > > > >>>> CCP. Maybe make this ioctl TDX-specific? KVM_TDX_DEV_OP? > > > >>> > > > >>> We only need global parameters of the TDX module, and we don't interact with TDX > > > >>> module at this point. One alternative is to export those parameters via sysfs. > > > >>> Also the existence of the sysfs node indicates that the TDX module is > > > >>> loaded(initialized?) or not in addition to boot log. Thus we can drop system > > > >>> scope one. > > > >>> What do you think? > > > >>> > > > > > > > > I like this idea and the patch below, it feels right for me now. It would be nice > > > > if more folks can chime in and comment. > > > > > > SYSFS option requires CONFIG_SYSFS, which reqiures CONFIG_KVM_TDX to > > > select CONFIG_SYSFS. > > > > > > >>> Regarding to other TDX KVM specific ioctls (KVM_TDX_INIT_VM, KVM_TDX_INIT_VCPU, > > > >>> KVM_TDX_INIT_MEM_REGION, and KVM_TDX_FINALIZE_VM), they are specific to KVM. So > > > >>> I don't think it can be split out to independent driver. > > > >> > > > > > > > > They can stay in KVM as they are KVM-specific. SNP also has KVM-specific ioctls > > > > which wraps the SEV driver calls. At this level, both TDX and SNP go their specific > > > > implementation without more abstraction other than KVM_ENCRYPT_MEMORY_OP. Their > > > > strategies are aligned. > > > > > > > > The problem of the previous approach was the abstraction that no other implementation > > > > is using it. It is like, TDX wants a higher abstraction to cover both TDX and SNP, > > > > but SNP is not using it, which makes the abstraction looks strange. > > > > > > Note, before this TDX enabling series, KVM_MEMORY_ENCRYPT_OP is a VM > > > scope ioctl, that only serves for SEV and no other implementation uses > > > it. I see no reason why cannot introduce a new IOCTL in x86 KVM that > > > serves only one vendor. > > > > > > > My point is: time is different. When KVM_MEMORY_ENCRYPT_OP is there, > > there was *only* one vendor and SEV/SNP didn't know how the future vendor > > is going to use the ioctl. That is a reasonable case an generic ioctl can > > have one vendor to back up. > > > > The background here is: now another vendor is coming and there are going to > > be two vendors. The two vendors' flows are much clearer than early stage. > > Like, they know which flow is going to be used by each other. > > > > With these kept in mind, IMHO, it is not appropriate to introduce > > an generic ioctl that only one vendor is going to use, meanwhile > > we have already known another vendor is not going to use it. > > > > Defining a new userspace ABI is a serious thing and it is not an early > > stage anymore. Actually I think it is the best time to see how the > > code infrastructure should be re-purposed at this time. > > > > > We choose KVM_MEMORY_ENCRYPT_OP for TDX platform scope, just because we > > > reuse KVM_MEMORY_ENCRYPT_OP for TDX VM-scope and extend it to TDX vcpu > > > scope. It's just to avoid defining a new IOCTL number. > > > > > > We can rename it to KVM_GET_CC_CAPABILITIES, and even return different > > > capabilities based on VM type. And even, if SNP wants to use it, I think > > > it can wrap SNP driver calls inside this IOCTL? > > > > > > > I am not opposed to this option as it shows effort to improve it and it > > is constructive. But this needs to be figured out with AMD folks and > > maintainers. E.g. what should be the best CC ioctl scheme for KVM? > > vendor-specific or generic, which brings better benefit for the userspace, > > and less maintenance burden. > > > > Back to the reason why I think a vendor-specific sysinfo interface for TDX > > is necessary: > > > > 1) SEV driver has been there for quite some time. Unless people thinks an > > generic CC ioctl scheme is a way to go, then there will be motivation and > > efforts putting on it. The efforts is not only about wrapping SEV ioctls, > > it needs a systematic spec of generic CC ioctl scheme. > > > > 2) TDX doesn't have a driver like SEV and possibly not going to have one in > > the future. For those non-KVM related control flow of TDX in future, they > > can re-use this and stay away from KVM interface. (If vendor-specific > > scheme is the future direction.) > > > > > kvm.ko is special that it needs to serve two vendors. Sometime it's > > > unaviodable that an interface is only used by one vendor. > > > > I am afraid that in this case it is avoidable right? > > We can make KVM_TDX_CAPABILITIES vm-scoped one so that devoce-scoped > KVM_EMORY_ENCRYPT_OP isn't needed. At least qemu is fine. > > Do you think vm-scoped KVM_TDX_CAPABILITIES is fine? That looks better to me as it will stay with other KVM_TDX* ioctls then.
© 2016 - 2026 Red Hat, Inc.