[PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module

isaku.yamahata@intel.com posted 113 patches 2 years, 11 months ago
There is a newer version of this series
[PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by isaku.yamahata@intel.com 2 years, 11 months ago
From: Isaku Yamahata <isaku.yamahata@intel.com>

TDX requires several initialization steps for KVM to create guest TDs.
Detect CPU feature, enable VMX (TDX is based on VMX), detect the TDX module
availability, and initialize it.

There are several options on when to initialize the TDX module.  A.) kernel
module loading time, B.) the first guest TD creation time.  A.) was chosen.
With B.), a user may hit an error of the TDX initialization when trying to
create the first guest TD.  The machine that fails to initialize the TDX
module can't boot any guest TD further.  Such failure is undesirable and a
surprise because the user expects that the machine can accommodate guest
TD, but actually not.  So A.) is better than B.).

Introduce a module parameter, enable_tdx, to explicitly enable TDX KVM
support.  It's off by default to keep same behavior for those who don't use
TDX.  Implement hardware_setup method to detect TDX feature of CPU.
Because TDX requires all present CPUs to enable VMX (VMXON), the x86
specific kvm_arch_post_hardware_enable_setup overrides the existing weak
symbol of kvm_arch_post_hardware_enable_setup which is called at the KVM
module initialization.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/Makefile      |  1 +
 arch/x86/kvm/vmx/main.c    | 18 ++++++++++-
 arch/x86/kvm/vmx/tdx.c     | 63 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c     | 39 +++++++++++++++++++++++
 arch/x86/kvm/vmx/x86_ops.h | 10 ++++++
 5 files changed, 130 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kvm/vmx/tdx.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 0e894ae23cbc..4b01ab842ab7 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -25,6 +25,7 @@ kvm-$(CONFIG_KVM_SMM)	+= smm.o
 kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
 			   vmx/hyperv.o vmx/nested.o vmx/posted_intr.o vmx/main.o
 kvm-intel-$(CONFIG_X86_SGX_KVM)	+= vmx/sgx.o
+kvm-intel-$(CONFIG_INTEL_TDX_HOST)	+= vmx/tdx.o
 
 kvm-amd-y		+= svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o \
 			   svm/sev.o svm/hyperv.o
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 3f49e8e38b6b..5c9f5e00b3c4 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -6,6 +6,22 @@
 #include "nested.h"
 #include "pmu.h"
 
+static bool enable_tdx __ro_after_init;
+module_param_named(tdx, enable_tdx, bool, 0444);
+
+static __init int vt_hardware_setup(void)
+{
+	int ret;
+
+	ret = vmx_hardware_setup();
+	if (ret)
+		return ret;
+
+	enable_tdx = enable_tdx && !tdx_hardware_setup(&vt_x86_ops);
+
+	return 0;
+}
+
 #define VMX_REQUIRED_APICV_INHIBITS		       \
 (						       \
        BIT(APICV_INHIBIT_REASON_DISABLE)|	       \
@@ -159,7 +175,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 };
 
 struct kvm_x86_init_ops vt_init_ops __initdata = {
-	.hardware_setup = vmx_hardware_setup,
+	.hardware_setup = vt_hardware_setup,
 	.handle_intel_pt_intr = NULL,
 
 	.runtime_ops = &vt_x86_ops,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
new file mode 100644
index 000000000000..f3eb0138b60b
--- /dev/null
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpu.h>
+
+#include <asm/tdx.h>
+
+#include "capabilities.h"
+#include "x86_ops.h"
+#include "x86.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+static int __init tdx_module_setup(void)
+{
+	int ret;
+
+	ret = tdx_enable();
+	if (ret) {
+		pr_info("Failed to initialize TDX module.\n");
+		return ret;
+	}
+
+	pr_info("TDX is supported.\n");
+	return 0;
+}
+
+static int __init tdx_cpu_enable_cpu(void *unused)
+{
+	return tdx_cpu_enable();
+}
+
+int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
+{
+	int r;
+
+	if (!enable_ept) {
+		pr_warn("Cannot enable TDX with EPT disabled\n");
+		return -EINVAL;
+	}
+
+	/* tdx_enable() in tdx_module_setup() requires cpus lock. */
+	cpus_read_lock();
+	/* TDX requires VMX. */
+	r = vmxon_all();
+	if (!r) {
+		int cpu;
+
+		/*
+		 * Because tdx_cpu_enabel() acquire spin locks, on_each_cpu()
+		 * can't be used.
+		 */
+		for_each_online_cpu(cpu) {
+			if (smp_call_on_cpu(cpu, tdx_cpu_enable_cpu, NULL, false))
+				r = -EIO;
+		}
+		if (!r)
+			r = tdx_module_setup();
+	}
+	vmxoff_all();
+	cpus_read_unlock();
+
+	return r;
+}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3bbd07412f00..d23830d92f61 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8123,6 +8123,45 @@ static unsigned int vmx_handle_intel_pt_intr(void)
 	return 1;
 }
 
+static __init void vmxon(void *arg)
+{
+	int cpu = raw_smp_processor_id();
+	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
+	atomic_t *failed = arg;
+	int r;
+
+	if (cr4_read_shadow() & X86_CR4_VMXE) {
+		r = -EBUSY;
+		goto out;
+	}
+
+	r = kvm_cpu_vmxon(phys_addr);
+out:
+	if (r)
+		atomic_inc(failed);
+}
+
+__init int vmxon_all(void)
+{
+	atomic_t failed = ATOMIC_INIT(0);
+
+	on_each_cpu(vmxon, &failed, 1);
+
+	if (atomic_read(&failed))
+		return -EBUSY;
+	return 0;
+}
+
+static __init void vmxoff(void *junk)
+{
+	cpu_vmxoff();
+}
+
+__init void vmxoff_all(void)
+{
+	on_each_cpu(vmxoff, NULL, 1);
+}
+
 static __init void vmx_setup_user_return_msrs(void)
 {
 
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 051b5c4b5c2f..0f200aead411 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -20,6 +20,10 @@ bool kvm_is_vmx_supported(void);
 int __init vmx_init(void);
 void vmx_exit(void);
 
+__init int vmxon_all(void);
+__init void vmxoff_all(void);
+__init int vmx_hardware_setup(void);
+
 extern struct kvm_x86_ops vt_x86_ops __initdata;
 extern struct kvm_x86_init_ops vt_init_ops __initdata;
 
@@ -133,4 +137,10 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
 #endif
 void vmx_setup_mce(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_INTEL_TDX_HOST
+int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
+#else
+static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -ENOSYS; }
+#endif
+
 #endif /* __KVM_X86_VMX_X86_OPS_H */
-- 
2.25.1
Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Huang, Kai 2 years, 11 months ago
On Sun, 2023-03-12 at 10:55 -0700, isaku.yamahata@intel.com wrote:
> +int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> +{
> +	int r;
> +
> +	if (!enable_ept) {
> +		pr_warn("Cannot enable TDX with EPT disabled\n");
> +		return -EINVAL;
> +	}
> +
> +	/* tdx_enable() in tdx_module_setup() requires cpus lock. */
> +	cpus_read_lock();
> +	/* TDX requires VMX. */
> +	r = vmxon_all();

Why not using hardware_enable_all()?

> +	if (!r) {
> +		int cpu;
> +
> +		/*
> +		 * Because tdx_cpu_enabel() acquire spin locks, on_each_cpu()
> +		 * can't be used.
> +		 */
> +		for_each_online_cpu(cpu) {
> +			if (smp_call_on_cpu(cpu, tdx_cpu_enable_cpu, NULL, false))
> +				r = -EIO;
> +		}
> +		if (!r)
> +			r = tdx_module_setup();
> +	}
> +	vmxoff_all();
> +	cpus_read_unlock();
> +
> +	return r;
> +}

I think you should use hardware_enable_all(), and just do something similar to
below in vmx_hardware_enable():

diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 7eec0226d56a..b7b3f99c0095 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -2504,6 +2504,15 @@ static int vmx_hardware_enable(void)
                return r;
        }
 
+       if (enable_tdx) {
+               r = tdx_cpu_enable();
+               if (r) {
+                       cpu_vmxoff();
+                       intel_pt_handle_vmx(0);
+                       return r;
+               }
+       }
+
        if (enable_ept)
                ept_sync_global();

It handles two cases automatically:

1) When user wants to use TDX while loading KVM module, if above fails, then
hardware_enable_all() returns error, and you just give up initializing TDX
module and mark enable_tdx as false.

2) When a new cpu becomes online, and when TDX is being used by KVM, then if
above fails, it automatically rejects the CPU which isn't TDX-runnable although
it is VMX-runnable.
Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Isaku Yamahata 2 years, 11 months ago
On Tue, Mar 14, 2023 at 02:38:06AM +0000,
"Huang, Kai" <kai.huang@intel.com> wrote:

> On Sun, 2023-03-12 at 10:55 -0700, isaku.yamahata@intel.com wrote:
> > +int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> > +{
> > +	int r;
> > +
> > +	if (!enable_ept) {
> > +		pr_warn("Cannot enable TDX with EPT disabled\n");
> > +		return -EINVAL;
> > +	}
> > +
> > +	/* tdx_enable() in tdx_module_setup() requires cpus lock. */
> > +	cpus_read_lock();
> > +	/* TDX requires VMX. */
> > +	r = vmxon_all();
> 
> Why not using hardware_enable_all()?
> 
> > +	if (!r) {
> > +		int cpu;
> > +
> > +		/*
> > +		 * Because tdx_cpu_enabel() acquire spin locks, on_each_cpu()
> > +		 * can't be used.
> > +		 */
> > +		for_each_online_cpu(cpu) {
> > +			if (smp_call_on_cpu(cpu, tdx_cpu_enable_cpu, NULL, false))
> > +				r = -EIO;
> > +		}
> > +		if (!r)
> > +			r = tdx_module_setup();
> > +	}
> > +	vmxoff_all();
> > +	cpus_read_unlock();
> > +
> > +	return r;
> > +}
> 
> I think you should use hardware_enable_all(), and just do something similar to
> below in vmx_hardware_enable():

The use of hardware_enable_all() make us circle back to refactoring KVM
hardware initialization topic.  I'd like to stay away from it for now for TDX.
I find that vmxon_all() is unnecessary and we can move VMXON to
tdx_cpu_enable_cpu().
Here is the version of dropping vmxon_all().

From f8fa8fe9786f1c4d4a3b0af0d0228d2842984cba Mon Sep 17 00:00:00 2001
Message-Id: <f8fa8fe9786f1c4d4a3b0af0d0228d2842984cba.1678864879.git.isaku.yamahata@intel.com>
In-Reply-To: <d2aa2142665b8204b628232ab615c98090371c99.1678864879.git.isaku.yamahata@intel.com>
References: <d2aa2142665b8204b628232ab615c98090371c99.1678864879.git.isaku.yamahata@intel.com>
From: Isaku Yamahata <isaku.yamahata@intel.com>
Date: Tue, 22 Feb 2022 14:44:15 -0800
Subject: [PATCH] KVM: TDX: Initialize the TDX module when loading the
 KVM intel kernel module

TDX requires several initialization steps for KVM to create guest TDs.
Detect CPU feature, enable VMX (TDX is based on VMX), detect the TDX module
availability, and initialize it.

There are several options on when to initialize the TDX module.  A.) kernel
module loading time, B.) the first guest TD creation time.  A.) was chosen.
With B.), a user may hit an error of the TDX initialization when trying to
create the first guest TD.  The machine that fails to initialize the TDX
module can't boot any guest TD further.  Such failure is undesirable and a
surprise because the user expects that the machine can accommodate guest
TD, but actually not.  So A.) is better than B.).

Introduce a module parameter, kvm_intel.tdx, to explicitly enable TDX KVM
support.  It's off by default to keep same behavior for those who don't use
TDX.  Implement hardware_setup method to detect TDX feature of CPU and
initialize TDX module.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/Makefile      |  1 +
 arch/x86/kvm/vmx/main.c    | 18 +++++++++-
 arch/x86/kvm/vmx/tdx.c     | 74 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/vmx.c     |  7 ++++
 arch/x86/kvm/vmx/x86_ops.h |  9 +++++
 5 files changed, 108 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/kvm/vmx/tdx.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 0e894ae23cbc..4b01ab842ab7 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -25,6 +25,7 @@ kvm-$(CONFIG_KVM_SMM)	+= smm.o
 kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
 			   vmx/hyperv.o vmx/nested.o vmx/posted_intr.o vmx/main.o
 kvm-intel-$(CONFIG_X86_SGX_KVM)	+= vmx/sgx.o
+kvm-intel-$(CONFIG_INTEL_TDX_HOST)	+= vmx/tdx.o
 
 kvm-amd-y		+= svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o \
 			   svm/sev.o svm/hyperv.o
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 3f49e8e38b6b..5c9f5e00b3c4 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -6,6 +6,22 @@
 #include "nested.h"
 #include "pmu.h"
 
+static bool enable_tdx __ro_after_init;
+module_param_named(tdx, enable_tdx, bool, 0444);
+
+static __init int vt_hardware_setup(void)
+{
+	int ret;
+
+	ret = vmx_hardware_setup();
+	if (ret)
+		return ret;
+
+	enable_tdx = enable_tdx && !tdx_hardware_setup(&vt_x86_ops);
+
+	return 0;
+}
+
 #define VMX_REQUIRED_APICV_INHIBITS		       \
 (						       \
        BIT(APICV_INHIBIT_REASON_DISABLE)|	       \
@@ -159,7 +175,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 };
 
 struct kvm_x86_init_ops vt_init_ops __initdata = {
-	.hardware_setup = vmx_hardware_setup,
+	.hardware_setup = vt_hardware_setup,
 	.handle_intel_pt_intr = NULL,
 
 	.runtime_ops = &vt_x86_ops,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
new file mode 100644
index 000000000000..8d265d7ae6fb
--- /dev/null
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpu.h>
+
+#include <asm/tdx.h>
+
+#include "capabilities.h"
+#include "x86_ops.h"
+#include "x86.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+static int __init tdx_module_setup(void)
+{
+	int ret;
+
+	ret = tdx_enable();
+	if (ret) {
+		pr_info("Failed to initialize TDX module.\n");
+		return ret;
+	}
+
+	pr_info("TDX is supported.\n");
+	return 0;
+}
+
+static int __init tdx_cpu_enable_cpu(void *unused)
+{
+	int r;
+
+	/*
+	 * TDX requires VMX. Because thread can be migrated, keep VMXON on
+	 * all online cpus until all TDX module initialization is done.
+	 */
+	r = vmxon();
+	if (r)
+		return r;
+	return tdx_cpu_enable();
+}
+
+static void __init tdx_vmxoff_cpu(void *unused)
+{
+	WARN_ON_ONCE(cpu_vmxoff());
+}
+
+int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
+{
+	int cpu;
+	int r = 0;
+
+	if (!enable_ept) {
+		pr_warn("Cannot enable TDX with EPT disabled\n");
+		return -EINVAL;
+	}
+
+	/* tdx_enable() in tdx_module_setup() requires cpus lock. */
+	cpus_read_lock();
+	/*
+	 * Because tdx_cpu_enable() acquires spin locks, on_each_cpu()
+	 * can't be used.
+	 */
+	for_each_online_cpu(cpu) {
+		if (smp_call_on_cpu(cpu, tdx_cpu_enable_cpu, NULL, false)) {
+			r = -EIO;
+			break;
+		}
+	}
+	if (!r)
+		r = tdx_module_setup();
+	on_each_cpu(tdx_vmxoff_cpu, NULL, 1);
+	cpus_read_unlock();
+
+	return r;
+}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 3bbd07412f00..ce48e0bc9e00 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -8123,6 +8123,13 @@ static unsigned int vmx_handle_intel_pt_intr(void)
 	return 1;
 }
 
+__init int vmxon(void)
+{
+	if (cr4_read_shadow() & X86_CR4_VMXE)
+		return -EBUSY;
+	return kvm_cpu_vmxon(__pa(this_cpu_read(vmxarea)));
+}
+
 static __init void vmx_setup_user_return_msrs(void)
 {
 
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 051b5c4b5c2f..892c39f7d771 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -20,6 +20,9 @@ bool kvm_is_vmx_supported(void);
 int __init vmx_init(void);
 void vmx_exit(void);
 
+__init int vmxon(void);
+__init int vmx_hardware_setup(void);
+
 extern struct kvm_x86_ops vt_x86_ops __initdata;
 extern struct kvm_x86_init_ops vt_init_ops __initdata;
 
@@ -133,4 +136,10 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
 #endif
 void vmx_setup_mce(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_INTEL_TDX_HOST
+int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
+#else
+static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -ENOSYS; }
+#endif
+
 #endif /* __KVM_X86_VMX_X86_OPS_H */
-- 
2.25.1



-- 
Isaku Yamahata <isaku.yamahata@gmail.com>
Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Huang, Kai 2 years, 11 months ago
> > 
> > I think you should use hardware_enable_all(), and just do something similar to
> > below in vmx_hardware_enable():
> 
> The use of hardware_enable_all() make us circle back to refactoring KVM
> hardware initialization topic.  I'd like to stay away from it for now for TDX.

Sean's series to improve hardware enable has been merged to upstream already.  

Can you elaborate what's the issue here?

[...]

> +static bool enable_tdx __ro_after_init;
> +module_param_named(tdx, enable_tdx, bool, 0444);
> +
> +static __init int vt_hardware_setup(void)
> +{
> +	int ret;
> +
> +	ret = vmx_hardware_setup();
> +	if (ret)
> +		return ret;
> +
> +	enable_tdx = enable_tdx && !tdx_hardware_setup(&vt_x86_ops);

Unfortunately, the enable_tdx should also be protected by the cpus_read_lock(),
because CPU hotplug code path checks it too (as seen in your next patch).

> +
> +	return 0;
> +}
> +
>  #define VMX_REQUIRED_APICV_INHIBITS		       \
>  (						       \
>         BIT(APICV_INHIBIT_REASON_DISABLE)|	       \
> @@ -159,7 +175,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>  };
>  
>  struct kvm_x86_init_ops vt_init_ops __initdata = {
> -	.hardware_setup = vmx_hardware_setup,
> +	.hardware_setup = vt_hardware_setup,
>  	.handle_intel_pt_intr = NULL,
>  
>  	.runtime_ops = &vt_x86_ops,
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> new file mode 100644
> index 000000000000..8d265d7ae6fb
> --- /dev/null
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -0,0 +1,74 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/cpu.h>
> +
> +#include <asm/tdx.h>
> +
> +#include "capabilities.h"
> +#include "x86_ops.h"
> +#include "x86.h"
> +
> +#undef pr_fmt
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +static int __init tdx_module_setup(void)
> +{
> +	int ret;
> +
> +	ret = tdx_enable();
> +	if (ret) {
> +		pr_info("Failed to initialize TDX module.\n");
> +		return ret;
> +	}
> +
> +	pr_info("TDX is supported.\n");
> +	return 0;
> +}
> +
> +static int __init tdx_cpu_enable_cpu(void *unused)
> +{
> +	int r;
> +
> +	/*
> +	 * TDX requires VMX. Because thread can be migrated, keep VMXON on
> +	 * all online cpus until all TDX module initialization is done.
> +	 */

The second sentence in this comment should be somewhere else, but not here.

> +	r = vmxon();
> +	if (r)
> +		return r;
> +	return tdx_cpu_enable();
> +}
> +
> +static void __init tdx_vmxoff_cpu(void *unused)
> +{
> +	WARN_ON_ONCE(cpu_vmxoff());
> +}
> +
> +int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> +{
> +	int cpu;
> +	int r = 0;
> +
> +	if (!enable_ept) {
> +		pr_warn("Cannot enable TDX with EPT disabled\n");
> +		return -EINVAL;
> +	}
> +
> +	/* tdx_enable() in tdx_module_setup() requires cpus lock. */
> +	cpus_read_lock();
> +	/*
> +	 * Because tdx_cpu_enable() acquires spin locks, on_each_cpu()
> +	 * can't be used.
> +	 */

Here you have cpus_read_lock() protection, so tdx_cpu_enable() cannot be called
from CPU hotplug code path when you are doing here.

So, using on_each_cpu() to do tdx_cpu_enable() is OK here, because on one
particular cpu, when it already has taken the spinlock, it cannot receive IPI
anymore which can try to take the spinlock again.

> +	for_each_online_cpu(cpu) {
> +		if (smp_call_on_cpu(cpu, tdx_cpu_enable_cpu, NULL, false)) {
> +			r = -EIO;
> +			break;
> +		}
> +	}
> +	if (!r)
> +		r = tdx_module_setup();
> +	on_each_cpu(tdx_vmxoff_cpu, NULL, 1);
> +	cpus_read_unlock();
> +
> +	return r;
> +}

I think you can merge next patch with this one because they are kinda related.  

Putting them together allows people to review more easily.
Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Isaku Yamahata 2 years, 11 months ago
On Wed, Mar 15, 2023 at 09:46:00AM +0000,
"Huang, Kai" <kai.huang@intel.com> wrote:

> 
> > > 
> > > I think you should use hardware_enable_all(), and just do something similar to
> > > below in vmx_hardware_enable():
> > 
> > The use of hardware_enable_all() make us circle back to refactoring KVM
> > hardware initialization topic.  I'd like to stay away from it for now for TDX.
> 
> Sean's series to improve hardware enable has been merged to upstream already.  

I revised the patch and I can use hardware_enable_all().

> > +	for_each_online_cpu(cpu) {
> > +		if (smp_call_on_cpu(cpu, tdx_cpu_enable_cpu, NULL, false)) {
> > +			r = -EIO;
> > +			break;
> > +		}
> > +	}
> > +	if (!r)
> > +		r = tdx_module_setup();
> > +	on_each_cpu(tdx_vmxoff_cpu, NULL, 1);
> > +	cpus_read_unlock();
> > +
> > +	return r;
> > +}
> 
> I think you can merge next patch with this one because they are kinda related.  
> 
> Putting them together allows people to review more easily.

Finally I come up with the version that uses kvm_hardware_enable_all().
that doesn't issue cpus_read_lock().
I had to patch tdx_cpu_enable(). I'll reply to the patch itself.


From 38774fc6e2bc5f0eddd0a0ab035ba8e712ee5ff2 Mon Sep 17 00:00:00 2001
Message-Id: <38774fc6e2bc5f0eddd0a0ab035ba8e712ee5ff2.1678926122.git.isaku.yamahata@intel.com>
In-Reply-To: <d2aa2142665b8204b628232ab615c98090371c99.1678926122.git.isaku.yamahata@intel.com>
References: <d2aa2142665b8204b628232ab615c98090371c99.1678926122.git.isaku.yamahata@intel.com>
From: Isaku Yamahata <isaku.yamahata@intel.com>
Date: Tue, 22 Feb 2022 14:44:15 -0800
Subject: [PATCH] KVM: TDX: Initialize the TDX module when loading the
 KVM intel kernel module

TDX requires several initialization steps for KVM to create guest TDs.
Detect CPU feature, enable VMX (TDX is based on VMX) on all online CPUs,
detect the TDX module availability, initialize it and disable VMX.

To enable VMX on all online CPUs, utilize kvm_hardware_enable_all() that
calls hardware_enable() methods.  The method also initialize each CPU for
TDX.  TDX requires to call a TDX initialization function per logical
processor (LP) before the LP uses TDX.  When CPU is onlined, call the TDX
LP initialization API when cpu is onlined.  If it failed refuse onlininig
of the cpu for simplicity instead of TDX avoiding the LP.

There are several options on when to initialize the TDX module.  A.) kernel
module loading time, B.) the first guest TD creation time.  A.) was chosen.
With B.), a user may hit an error of the TDX initialization when trying to
create the first guest TD.  The machine that fails to initialize the TDX
module can't boot any guest TD further.  Such failure is undesirable and a
surprise because the user expects that the machine can accommodate guest
TD, but actually not.  So A.) is better than B.).

Introduce a module parameter, kvm_intel.tdx, to explicitly enable TDX KVM
support.  It's off by default to keep same behavior for those who don't use
TDX.  Implement hardware_setup method to detect TDX feature of CPU and
initialize TDX module.

Suggested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Isaku Yamahata <isaku.yamahata@intel.com>
---
 arch/x86/kvm/Makefile      |  1 +
 arch/x86/kvm/vmx/main.c    | 34 ++++++++++++++++++++++++++--
 arch/x86/kvm/vmx/tdx.c     | 46 ++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/vmx/x86_ops.h |  8 +++++++
 arch/x86/kvm/x86.c         | 10 +++++++++
 5 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 arch/x86/kvm/vmx/tdx.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 0e894ae23cbc..4b01ab842ab7 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -25,6 +25,7 @@ kvm-$(CONFIG_KVM_SMM)	+= smm.o
 kvm-intel-y		+= vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
 			   vmx/hyperv.o vmx/nested.o vmx/posted_intr.o vmx/main.o
 kvm-intel-$(CONFIG_X86_SGX_KVM)	+= vmx/sgx.o
+kvm-intel-$(CONFIG_INTEL_TDX_HOST)	+= vmx/tdx.o
 
 kvm-amd-y		+= svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o \
 			   svm/sev.o svm/hyperv.o
diff --git a/arch/x86/kvm/vmx/main.c b/arch/x86/kvm/vmx/main.c
index 3f49e8e38b6b..f7bfa9888d9e 100644
--- a/arch/x86/kvm/vmx/main.c
+++ b/arch/x86/kvm/vmx/main.c
@@ -6,6 +6,36 @@
 #include "nested.h"
 #include "pmu.h"
 
+static bool enable_tdx __ro_after_init;
+module_param_named(tdx, enable_tdx, bool, 0444);
+
+static int vt_hardware_enable(void)
+{
+	int ret;
+
+	ret = vmx_hardware_enable();
+	if (ret || !enable_tdx)
+		return ret;
+
+	ret = tdx_cpu_enable();
+	if (ret)
+		vmx_hardware_disable();
+	return ret;
+}
+
+static __init int vt_hardware_setup(void)
+{
+	int ret;
+
+	ret = vmx_hardware_setup();
+	if (ret)
+		return ret;
+
+	enable_tdx = enable_tdx && !tdx_hardware_setup(&vt_x86_ops);
+
+	return 0;
+}
+
 #define VMX_REQUIRED_APICV_INHIBITS		       \
 (						       \
        BIT(APICV_INHIBIT_REASON_DISABLE)|	       \
@@ -24,7 +54,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 
 	.hardware_unsetup = vmx_hardware_unsetup,
 
-	.hardware_enable = vmx_hardware_enable,
+	.hardware_enable = vt_hardware_enable,
 	.hardware_disable = vmx_hardware_disable,
 	.has_emulated_msr = vmx_has_emulated_msr,
 
@@ -159,7 +189,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
 };
 
 struct kvm_x86_init_ops vt_init_ops __initdata = {
-	.hardware_setup = vmx_hardware_setup,
+	.hardware_setup = vt_hardware_setup,
 	.handle_intel_pt_intr = NULL,
 
 	.runtime_ops = &vt_x86_ops,
diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
new file mode 100644
index 000000000000..f13fdf71430b
--- /dev/null
+++ b/arch/x86/kvm/vmx/tdx.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/cpu.h>
+
+#include <asm/tdx.h>
+
+#include "capabilities.h"
+#include "x86_ops.h"
+#include "x86.h"
+
+#undef pr_fmt
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+static int __init tdx_module_setup(void)
+{
+	int ret;
+
+	ret = tdx_enable();
+	if (ret) {
+		pr_info("Failed to initialize TDX module.\n");
+		return ret;
+	}
+
+	pr_info("TDX is supported.\n");
+	return 0;
+}
+
+int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
+{
+	int r = 0;
+
+	if (!enable_ept) {
+		pr_warn("Cannot enable TDX with EPT disabled\n");
+		return -EINVAL;
+	}
+
+	/* tdx_enable() in tdx_module_setup() requires cpus lock. */
+	cpus_read_lock();
+	r = kvm_hardware_enable_all();
+	if (!r) {
+		r = tdx_module_setup();
+		kvm_hardware_disable_all();
+	}
+	cpus_read_unlock();
+
+	return r;
+}
diff --git a/arch/x86/kvm/vmx/x86_ops.h b/arch/x86/kvm/vmx/x86_ops.h
index 051b5c4b5c2f..f59e5197836a 100644
--- a/arch/x86/kvm/vmx/x86_ops.h
+++ b/arch/x86/kvm/vmx/x86_ops.h
@@ -20,6 +20,8 @@ bool kvm_is_vmx_supported(void);
 int __init vmx_init(void);
 void vmx_exit(void);
 
+__init int vmx_hardware_setup(void);
+
 extern struct kvm_x86_ops vt_x86_ops __initdata;
 extern struct kvm_x86_init_ops vt_init_ops __initdata;
 
@@ -133,4 +135,10 @@ void vmx_cancel_hv_timer(struct kvm_vcpu *vcpu);
 #endif
 void vmx_setup_mce(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_INTEL_TDX_HOST
+int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops);
+#else
+static inline int tdx_hardware_setup(struct kvm_x86_ops *x86_ops) { return -ENOSYS; }
+#endif
+
 #endif /* __KVM_X86_VMX_X86_OPS_H */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 2125fcaa3973..b264012a8478 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9435,6 +9435,16 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 
 	kvm_init_pmu_capability(ops->pmu_ops);
 
+	/*
+	 * TDX requires those methods to enable VMXON by
+	 * kvm_hardware_enable/disable_all()
+	 */
+	static_call_update(kvm_x86_check_processor_compatibility,
+			   ops->runtime_ops->check_processor_compatibility);
+	static_call_update(kvm_x86_hardware_enable,
+			   ops->runtime_ops->hardware_enable);
+	static_call_update(kvm_x86_hardware_disable,
+			   ops->runtime_ops->hardware_disable);
 	r = ops->hardware_setup();
 	if (r != 0)
 		goto out_mmu_exit;
-- 
2.25.1



-- 
Isaku Yamahata <isaku.yamahata@gmail.com>
Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Huang, Kai 2 years, 10 months ago
Sorry for late reply.

>  
> +static bool enable_tdx __ro_after_init;
> +module_param_named(tdx, enable_tdx, bool, 0444);
> +
> +static int vt_hardware_enable(void)
> +{
> +	int ret;
> +
> +	ret = vmx_hardware_enable();
> +	if (ret || !enable_tdx)
> +		return ret;
> +
> +	ret = tdx_cpu_enable();
> +	if (ret)
> +		vmx_hardware_disable();
> +	return ret;
> +}
> +
> +static __init int vt_hardware_setup(void)
> +{
> +	int ret;
> +
> +	ret = vmx_hardware_setup();
> +	if (ret)
> +		return ret;
> +
> +	enable_tdx = enable_tdx && !tdx_hardware_setup(&vt_x86_ops);
> +
> +	return 0;
> +}
> +
>  #define VMX_REQUIRED_APICV_INHIBITS		       \
>  (						       \
>         BIT(APICV_INHIBIT_REASON_DISABLE)|	       \
> @@ -24,7 +54,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>  
>  	.hardware_unsetup = vmx_hardware_unsetup,
>  
> -	.hardware_enable = vmx_hardware_enable,
> +	.hardware_enable = vt_hardware_enable,
>  	.hardware_disable = vmx_hardware_disable,
>  	.has_emulated_msr = vmx_has_emulated_msr,
>  
> @@ -159,7 +189,7 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
>  };
>  
>  struct kvm_x86_init_ops vt_init_ops __initdata = {
> -	.hardware_setup = vmx_hardware_setup,
> +	.hardware_setup = vt_hardware_setup,
>  	.handle_intel_pt_intr = NULL,
>  
>  	.runtime_ops = &vt_x86_ops,
> diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c
> new file mode 100644
> index 000000000000..f13fdf71430b
> --- /dev/null
> +++ b/arch/x86/kvm/vmx/tdx.c
> @@ -0,0 +1,46 @@
> +// SPDX-License-Identifier: GPL-2.0
> +#include <linux/cpu.h>
> +
> +#include <asm/tdx.h>
> +
> +#include "capabilities.h"
> +#include "x86_ops.h"
> +#include "x86.h"
> +
> +#undef pr_fmt
> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> +
> +static int __init tdx_module_setup(void)
> +{
> +	int ret;
> +
> +	ret = tdx_enable();
> +	if (ret) {
> +		pr_info("Failed to initialize TDX module.\n");
> +		return ret;
> +	}
> +
> +	pr_info("TDX is supported.\n");

Both pr_info()s are not required, because tdx_enable() internally prints them.

> +	return 0;
> +}
> +
> +int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> +{
> +	int r = 0;
> +
> +	if (!enable_ept) {
> +		pr_warn("Cannot enable TDX with EPT disabled\n");
> +		return -EINVAL;
> +	}
> +
> +	/* tdx_enable() in tdx_module_setup() requires cpus lock. */
> +	cpus_read_lock();
> +	r = kvm_hardware_enable_all();
> +	if (!r) {
> +		r = tdx_module_setup();
> +		kvm_hardware_disable_all();
> +	}
> +	cpus_read_unlock();
> +
> +	return r;
> +}
> 

[...]


>  #endif /* __KVM_X86_VMX_X86_OPS_H */
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 2125fcaa3973..b264012a8478 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -9435,6 +9435,16 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
>  
>  	kvm_init_pmu_capability(ops->pmu_ops);
>  
> +	/*
> +	 * TDX requires those methods to enable VMXON by
> +	 * kvm_hardware_enable/disable_all()
> +	 */
> +	static_call_update(kvm_x86_check_processor_compatibility,
> +			   ops->runtime_ops->check_processor_compatibility);
> +	static_call_update(kvm_x86_hardware_enable,
> +			   ops->runtime_ops->hardware_enable);
> +	static_call_update(kvm_x86_hardware_disable,
> +			   ops->runtime_ops->hardware_disable);
>  	r = ops->hardware_setup();
>  	if (r != 0)
>  		goto out_mmu_exit;

Hmm.. I think this is ugly.  Perhaps we should never do any
static_call(kvm_x86_xxx)() in hardware_setup(), because hardware_setup() is
called before kvm_ops_update() and may update vendor's kvm_x86_ops.

So probably use hardware_enable_all() in hardware_setup() is a bad idea.

I think we have below options on how to handle:

1) Use VMX's kvm_x86_ops directly in tdx_hardware_setup().  For instance,
something like below:

int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
{
	...

	cpus_read_lock();
	r = on_each_cpu(vt_x86_ops.hardware_enable, ...);
	if (!r)
		r = tdx_module_setup();
	on_each_cpu(vt_x86_ops.hardware_disable, ...);
	cpus_read_unlock();

	...
}

But this doesn't clean up nicely when there's some particular cpus fail to do
hardware_enable().  To clean up nicely, we do need additional things similar to
the hardware_enable_all() code path: a per-cpu variable or a cpumask_t + a
wrapper of vt_x86_ops->hardware_enable() to track which cpus have done
hardware_enable() successfully.

2) Move those static_call_update() into tdx_hardware_setup() so they are TDX
code self-contained.  But this would require exposing kvm_x86_ops as symbol,
which isn't nice either.

3) Introduce another kvm_x86_init_ops->hardware_post_setup(), which is called
after kvm_ops_update().

Personally, I think 3) perhaps is the most elegant one, but not sure whether
Sean/Paolo has any opinion.




Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Isaku Yamahata 2 years, 10 months ago
On Fri, Mar 24, 2023 at 10:41:56AM +0000,
"Huang, Kai" <kai.huang@intel.com> wrote:

> > +static int __init tdx_module_setup(void)
> > +{
> > +	int ret;
> > +
> > +	ret = tdx_enable();
> > +	if (ret) {
> > +		pr_info("Failed to initialize TDX module.\n");
> > +		return ret;
> > +	}
> > +
> > +	pr_info("TDX is supported.\n");
> 
> Both pr_info()s are not required, because tdx_enable() internally prints them.

Ok, will drop this line.


> >  #endif /* __KVM_X86_VMX_X86_OPS_H */
> > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> > index 2125fcaa3973..b264012a8478 100644
> > --- a/arch/x86/kvm/x86.c
> > +++ b/arch/x86/kvm/x86.c
> > @@ -9435,6 +9435,16 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
> >  
> >  	kvm_init_pmu_capability(ops->pmu_ops);
> >  
> > +	/*
> > +	 * TDX requires those methods to enable VMXON by
> > +	 * kvm_hardware_enable/disable_all()
> > +	 */
> > +	static_call_update(kvm_x86_check_processor_compatibility,
> > +			   ops->runtime_ops->check_processor_compatibility);
> > +	static_call_update(kvm_x86_hardware_enable,
> > +			   ops->runtime_ops->hardware_enable);
> > +	static_call_update(kvm_x86_hardware_disable,
> > +			   ops->runtime_ops->hardware_disable);
> >  	r = ops->hardware_setup();
> >  	if (r != 0)
> >  		goto out_mmu_exit;
> 
> Hmm.. I think this is ugly.  Perhaps we should never do any
> static_call(kvm_x86_xxx)() in hardware_setup(), because hardware_setup() is
> called before kvm_ops_update() and may update vendor's kvm_x86_ops.
> 
> So probably use hardware_enable_all() in hardware_setup() is a bad idea.
> 
> I think we have below options on how to handle:
> 
> 1) Use VMX's kvm_x86_ops directly in tdx_hardware_setup().  For instance,
> something like below:
> 
> int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> {
> 	...
> 
> 	cpus_read_lock();
> 	r = on_each_cpu(vt_x86_ops.hardware_enable, ...);
> 	if (!r)
> 		r = tdx_module_setup();
> 	on_each_cpu(vt_x86_ops.hardware_disable, ...);
> 	cpus_read_unlock();
> 
> 	...
> }
> 
> But this doesn't clean up nicely when there's some particular cpus fail to do
> hardware_enable().  To clean up nicely, we do need additional things similar to
> the hardware_enable_all() code path: a per-cpu variable or a cpumask_t + a
> wrapper of vt_x86_ops->hardware_enable() to track which cpus have done
> hardware_enable() successfully.
> 
> 2) Move those static_call_update() into tdx_hardware_setup() so they are TDX
> code self-contained.  But this would require exposing kvm_x86_ops as symbol,
> which isn't nice either.
> 
> 3) Introduce another kvm_x86_init_ops->hardware_post_setup(), which is called
> after kvm_ops_update().
> 
> Personally, I think 3) perhaps is the most elegant one, but not sure whether
> Sean/Paolo has any opinion.

I think we can simply update the ops before calling hardware_enable() and
clean up ops on failure.


diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 709134e7c12e..42c9b58fd1ef 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -9436,20 +9436,15 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops)
 	kvm_init_pmu_capability(ops->pmu_ops);
 
 	/*
-	 * TDX requires those methods to enable VMXON by
-	 * kvm_hardware_enable/disable_all_nolock()
+	 * Because TDX hardware_setup uses x86_ops, update ops before calling
+	 * ops->hardware_setup().
 	 */
-	static_call_update(kvm_x86_check_processor_compatibility,
-			   ops->runtime_ops->check_processor_compatibility);
-	static_call_update(kvm_x86_hardware_enable,
-			   ops->runtime_ops->hardware_enable);
-	static_call_update(kvm_x86_hardware_disable,
-			   ops->runtime_ops->hardware_disable);
+	kvm_ops_update(ops);
 	r = ops->hardware_setup();
-	if (r != 0)
+	if (r != 0) {
+		kvm_x86_ops.hardware_enable = NULL;
 		goto out_mmu_exit;
-
-	kvm_ops_update(ops);
+	}
 
 	for_each_online_cpu(cpu) {
 		smp_call_function_single(cpu, kvm_x86_check_cpu_compat, &r, 1);


-- 
Isaku Yamahata <isaku.yamahata@gmail.com>
Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Huang, Kai 2 years, 10 months ago
> 
> > >  
> > > +	/*
> > > +	 * TDX requires those methods to enable VMXON by
> > > +	 * kvm_hardware_enable/disable_all()
> > > +	 */
> > > +	static_call_update(kvm_x86_check_processor_compatibility,
> > > +			   ops->runtime_ops->check_processor_compatibility);
> > > +	static_call_update(kvm_x86_hardware_enable,
> > > +			   ops->runtime_ops->hardware_enable);
> > > +	static_call_update(kvm_x86_hardware_disable,
> > > +			   ops->runtime_ops->hardware_disable);
> > >  	r = ops->hardware_setup();
> > >  	if (r != 0)
> > >  		goto out_mmu_exit;
> > 
> > Hmm.. I think this is ugly.  Perhaps we should never do any
> > static_call(kvm_x86_xxx)() in hardware_setup(), because hardware_setup() is
> > called before kvm_ops_update() and may update vendor's kvm_x86_ops.
> > 
> > So probably use hardware_enable_all() in hardware_setup() is a bad idea.
> > 
> > I think we have below options on how to handle:
> > 
> > 1) Use VMX's kvm_x86_ops directly in tdx_hardware_setup().  For instance,
> > something like below:
> > 
> > int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> > {
> > 	...
> > 
> > 	cpus_read_lock();
> > 	r = on_each_cpu(vt_x86_ops.hardware_enable, ...);
> > 	if (!r)
> > 		r = tdx_module_setup();
> > 	on_each_cpu(vt_x86_ops.hardware_disable, ...);
> > 	cpus_read_unlock();
> > 
> > 	...
> > }
> > 
> > But this doesn't clean up nicely when there's some particular cpus fail to do
> > hardware_enable().  To clean up nicely, we do need additional things similar to
> > the hardware_enable_all() code path: a per-cpu variable or a cpumask_t + a
> > wrapper of vt_x86_ops->hardware_enable() to track which cpus have done
> > hardware_enable() successfully.
> > 
> > 2) Move those static_call_update() into tdx_hardware_setup() so they are TDX
> > code self-contained.  But this would require exposing kvm_x86_ops as symbol,
> > which isn't nice either.
> > 
> > 3) Introduce another kvm_x86_init_ops->hardware_post_setup(), which is called
> > after kvm_ops_update().
> > 
> > Personally, I think 3) perhaps is the most elegant one, but not sure whether
> > Sean/Paolo has any opinion.
> 
> I think we can simply update the ops before calling hardware_enable() and
> clean up ops on failure.
> 
> 

This doesn't work because hardware_setup() may update vendor's kvm_x86_ops.

If you do kvm_ops_update() before hardware_setup(), you need to manually update
those updated (in hardware_setup()) callbacks again after. 
Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Isaku Yamahata 2 years, 10 months ago
On Wed, Mar 29, 2023 at 01:13:45AM +0000,
"Huang, Kai" <kai.huang@intel.com> wrote:

> > 
> > > >  
> > > > +	/*
> > > > +	 * TDX requires those methods to enable VMXON by
> > > > +	 * kvm_hardware_enable/disable_all()
> > > > +	 */
> > > > +	static_call_update(kvm_x86_check_processor_compatibility,
> > > > +			   ops->runtime_ops->check_processor_compatibility);
> > > > +	static_call_update(kvm_x86_hardware_enable,
> > > > +			   ops->runtime_ops->hardware_enable);
> > > > +	static_call_update(kvm_x86_hardware_disable,
> > > > +			   ops->runtime_ops->hardware_disable);
> > > >  	r = ops->hardware_setup();
> > > >  	if (r != 0)
> > > >  		goto out_mmu_exit;
> > > 
> > > Hmm.. I think this is ugly.  Perhaps we should never do any
> > > static_call(kvm_x86_xxx)() in hardware_setup(), because hardware_setup() is
> > > called before kvm_ops_update() and may update vendor's kvm_x86_ops.
> > > 
> > > So probably use hardware_enable_all() in hardware_setup() is a bad idea.
> > > 
> > > I think we have below options on how to handle:
> > > 
> > > 1) Use VMX's kvm_x86_ops directly in tdx_hardware_setup().  For instance,
> > > something like below:
> > > 
> > > int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> > > {
> > > 	...
> > > 
> > > 	cpus_read_lock();
> > > 	r = on_each_cpu(vt_x86_ops.hardware_enable, ...);
> > > 	if (!r)
> > > 		r = tdx_module_setup();
> > > 	on_each_cpu(vt_x86_ops.hardware_disable, ...);
> > > 	cpus_read_unlock();
> > > 
> > > 	...
> > > }
> > > 
> > > But this doesn't clean up nicely when there's some particular cpus fail to do
> > > hardware_enable().  To clean up nicely, we do need additional things similar to
> > > the hardware_enable_all() code path: a per-cpu variable or a cpumask_t + a
> > > wrapper of vt_x86_ops->hardware_enable() to track which cpus have done
> > > hardware_enable() successfully.
> > > 
> > > 2) Move those static_call_update() into tdx_hardware_setup() so they are TDX
> > > code self-contained.  But this would require exposing kvm_x86_ops as symbol,
> > > which isn't nice either.
> > > 
> > > 3) Introduce another kvm_x86_init_ops->hardware_post_setup(), which is called
> > > after kvm_ops_update().
> > > 
> > > Personally, I think 3) perhaps is the most elegant one, but not sure whether
> > > Sean/Paolo has any opinion.
> > 
> > I think we can simply update the ops before calling hardware_enable() and
> > clean up ops on failure.
> > 
> > 
> 
> This doesn't work because hardware_setup() may update vendor's kvm_x86_ops.
> 
> If you do kvm_ops_update() before hardware_setup(), you need to manually update
> those updated (in hardware_setup()) callbacks again after. 

We can call kvm_ops_update() twice before and after hardware_setup().
-- 
Isaku Yamahata <isaku.yamahata@gmail.com>
Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Huang, Kai 2 years, 10 months ago
On Wed, 2023-03-29 at 14:56 -0700, Isaku Yamahata wrote:
> On Wed, Mar 29, 2023 at 01:13:45AM +0000,
> "Huang, Kai" <kai.huang@intel.com> wrote:
> 
> > > 
> > > > >  
> > > > > +	/*
> > > > > +	 * TDX requires those methods to enable VMXON by
> > > > > +	 * kvm_hardware_enable/disable_all()
> > > > > +	 */
> > > > > +	static_call_update(kvm_x86_check_processor_compatibility,
> > > > > +			   ops->runtime_ops->check_processor_compatibility);
> > > > > +	static_call_update(kvm_x86_hardware_enable,
> > > > > +			   ops->runtime_ops->hardware_enable);
> > > > > +	static_call_update(kvm_x86_hardware_disable,
> > > > > +			   ops->runtime_ops->hardware_disable);
> > > > >  	r = ops->hardware_setup();
> > > > >  	if (r != 0)
> > > > >  		goto out_mmu_exit;
> > > > 
> > > > Hmm.. I think this is ugly.  Perhaps we should never do any
> > > > static_call(kvm_x86_xxx)() in hardware_setup(), because hardware_setup() is
> > > > called before kvm_ops_update() and may update vendor's kvm_x86_ops.
> > > > 
> > > > So probably use hardware_enable_all() in hardware_setup() is a bad idea.
> > > > 
> > > > I think we have below options on how to handle:
> > > > 
> > > > 1) Use VMX's kvm_x86_ops directly in tdx_hardware_setup().  For instance,
> > > > something like below:
> > > > 
> > > > int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> > > > {
> > > > 	...
> > > > 
> > > > 	cpus_read_lock();
> > > > 	r = on_each_cpu(vt_x86_ops.hardware_enable, ...);
> > > > 	if (!r)
> > > > 		r = tdx_module_setup();
> > > > 	on_each_cpu(vt_x86_ops.hardware_disable, ...);
> > > > 	cpus_read_unlock();
> > > > 
> > > > 	...
> > > > }
> > > > 
> > > > But this doesn't clean up nicely when there's some particular cpus fail to do
> > > > hardware_enable().  To clean up nicely, we do need additional things similar to
> > > > the hardware_enable_all() code path: a per-cpu variable or a cpumask_t + a
> > > > wrapper of vt_x86_ops->hardware_enable() to track which cpus have done
> > > > hardware_enable() successfully.
> > > > 
> > > > 2) Move those static_call_update() into tdx_hardware_setup() so they are TDX
> > > > code self-contained.  But this would require exposing kvm_x86_ops as symbol,
> > > > which isn't nice either.
> > > > 
> > > > 3) Introduce another kvm_x86_init_ops->hardware_post_setup(), which is called
> > > > after kvm_ops_update().
> > > > 
> > > > Personally, I think 3) perhaps is the most elegant one, but not sure whether
> > > > Sean/Paolo has any opinion.
> > > 
> > > I think we can simply update the ops before calling hardware_enable() and
> > > clean up ops on failure.
> > > 
> > > 
> > 
> > This doesn't work because hardware_setup() may update vendor's kvm_x86_ops.
> > 
> > If you do kvm_ops_update() before hardware_setup(), you need to manually update
> > those updated (in hardware_setup()) callbacks again after. 
> 
> We can call kvm_ops_update() twice before and after hardware_setup().
> 

Personally I think it's too ugly.  
Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Isaku Yamahata 2 years, 10 months ago
On Wed, Mar 29, 2023 at 11:17:31PM +0000,
"Huang, Kai" <kai.huang@intel.com> wrote:

> On Wed, 2023-03-29 at 14:56 -0700, Isaku Yamahata wrote:
> > On Wed, Mar 29, 2023 at 01:13:45AM +0000,
> > "Huang, Kai" <kai.huang@intel.com> wrote:
> > 
> > > > 
> > > > > >  
> > > > > > +	/*
> > > > > > +	 * TDX requires those methods to enable VMXON by
> > > > > > +	 * kvm_hardware_enable/disable_all()
> > > > > > +	 */
> > > > > > +	static_call_update(kvm_x86_check_processor_compatibility,
> > > > > > +			   ops->runtime_ops->check_processor_compatibility);
> > > > > > +	static_call_update(kvm_x86_hardware_enable,
> > > > > > +			   ops->runtime_ops->hardware_enable);
> > > > > > +	static_call_update(kvm_x86_hardware_disable,
> > > > > > +			   ops->runtime_ops->hardware_disable);
> > > > > >  	r = ops->hardware_setup();
> > > > > >  	if (r != 0)
> > > > > >  		goto out_mmu_exit;
> > > > > 
> > > > > Hmm.. I think this is ugly.  Perhaps we should never do any
> > > > > static_call(kvm_x86_xxx)() in hardware_setup(), because hardware_setup() is
> > > > > called before kvm_ops_update() and may update vendor's kvm_x86_ops.
> > > > > 
> > > > > So probably use hardware_enable_all() in hardware_setup() is a bad idea.
> > > > > 
> > > > > I think we have below options on how to handle:
> > > > > 
> > > > > 1) Use VMX's kvm_x86_ops directly in tdx_hardware_setup().  For instance,
> > > > > something like below:
> > > > > 
> > > > > int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> > > > > {
> > > > > 	...
> > > > > 
> > > > > 	cpus_read_lock();
> > > > > 	r = on_each_cpu(vt_x86_ops.hardware_enable, ...);
> > > > > 	if (!r)
> > > > > 		r = tdx_module_setup();
> > > > > 	on_each_cpu(vt_x86_ops.hardware_disable, ...);
> > > > > 	cpus_read_unlock();
> > > > > 
> > > > > 	...
> > > > > }
> > > > > 
> > > > > But this doesn't clean up nicely when there's some particular cpus fail to do
> > > > > hardware_enable().  To clean up nicely, we do need additional things similar to
> > > > > the hardware_enable_all() code path: a per-cpu variable or a cpumask_t + a
> > > > > wrapper of vt_x86_ops->hardware_enable() to track which cpus have done
> > > > > hardware_enable() successfully.
> > > > > 
> > > > > 2) Move those static_call_update() into tdx_hardware_setup() so they are TDX
> > > > > code self-contained.  But this would require exposing kvm_x86_ops as symbol,
> > > > > which isn't nice either.
> > > > > 
> > > > > 3) Introduce another kvm_x86_init_ops->hardware_post_setup(), which is called
> > > > > after kvm_ops_update().
> > > > > 
> > > > > Personally, I think 3) perhaps is the most elegant one, but not sure whether
> > > > > Sean/Paolo has any opinion.
> > > > 
> > > > I think we can simply update the ops before calling hardware_enable() and
> > > > clean up ops on failure.
> > > > 
> > > > 
> > > 
> > > This doesn't work because hardware_setup() may update vendor's kvm_x86_ops.
> > > 
> > > If you do kvm_ops_update() before hardware_setup(), you need to manually update
> > > those updated (in hardware_setup()) callbacks again after. 
> > 
> > We can call kvm_ops_update() twice before and after hardware_setup().
> > 
> 
> Personally I think it's too ugly.  

So you prefer the option 3 to calling kvm_ops_update() twice. Okay, I'll update
the patch.
-- 
Isaku Yamahata <isaku.yamahata@gmail.com>
Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Isaku Yamahata 2 years, 10 months ago
On Wed, Mar 29, 2023 at 06:04:38PM -0700,
Isaku Yamahata <isaku.yamahata@gmail.com> wrote:

> On Wed, Mar 29, 2023 at 11:17:31PM +0000,
> "Huang, Kai" <kai.huang@intel.com> wrote:
> 
> > On Wed, 2023-03-29 at 14:56 -0700, Isaku Yamahata wrote:
> > > On Wed, Mar 29, 2023 at 01:13:45AM +0000,
> > > "Huang, Kai" <kai.huang@intel.com> wrote:
> > > 
> > > > > 
> > > > > > >  
> > > > > > > +	/*
> > > > > > > +	 * TDX requires those methods to enable VMXON by
> > > > > > > +	 * kvm_hardware_enable/disable_all()
> > > > > > > +	 */
> > > > > > > +	static_call_update(kvm_x86_check_processor_compatibility,
> > > > > > > +			   ops->runtime_ops->check_processor_compatibility);
> > > > > > > +	static_call_update(kvm_x86_hardware_enable,
> > > > > > > +			   ops->runtime_ops->hardware_enable);
> > > > > > > +	static_call_update(kvm_x86_hardware_disable,
> > > > > > > +			   ops->runtime_ops->hardware_disable);
> > > > > > >  	r = ops->hardware_setup();
> > > > > > >  	if (r != 0)
> > > > > > >  		goto out_mmu_exit;
> > > > > > 
> > > > > > Hmm.. I think this is ugly.  Perhaps we should never do any
> > > > > > static_call(kvm_x86_xxx)() in hardware_setup(), because hardware_setup() is
> > > > > > called before kvm_ops_update() and may update vendor's kvm_x86_ops.
> > > > > > 
> > > > > > So probably use hardware_enable_all() in hardware_setup() is a bad idea.
> > > > > > 
> > > > > > I think we have below options on how to handle:
> > > > > > 
> > > > > > 1) Use VMX's kvm_x86_ops directly in tdx_hardware_setup().  For instance,
> > > > > > something like below:
> > > > > > 
> > > > > > int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> > > > > > {
> > > > > > 	...
> > > > > > 
> > > > > > 	cpus_read_lock();
> > > > > > 	r = on_each_cpu(vt_x86_ops.hardware_enable, ...);
> > > > > > 	if (!r)
> > > > > > 		r = tdx_module_setup();
> > > > > > 	on_each_cpu(vt_x86_ops.hardware_disable, ...);
> > > > > > 	cpus_read_unlock();
> > > > > > 
> > > > > > 	...
> > > > > > }
> > > > > > 
> > > > > > But this doesn't clean up nicely when there's some particular cpus fail to do
> > > > > > hardware_enable().  To clean up nicely, we do need additional things similar to
> > > > > > the hardware_enable_all() code path: a per-cpu variable or a cpumask_t + a
> > > > > > wrapper of vt_x86_ops->hardware_enable() to track which cpus have done
> > > > > > hardware_enable() successfully.
> > > > > > 
> > > > > > 2) Move those static_call_update() into tdx_hardware_setup() so they are TDX
> > > > > > code self-contained.  But this would require exposing kvm_x86_ops as symbol,
> > > > > > which isn't nice either.
> > > > > > 
> > > > > > 3) Introduce another kvm_x86_init_ops->hardware_post_setup(), which is called
> > > > > > after kvm_ops_update().
> > > > > > 
> > > > > > Personally, I think 3) perhaps is the most elegant one, but not sure whether
> > > > > > Sean/Paolo has any opinion.
> > > > > 
> > > > > I think we can simply update the ops before calling hardware_enable() and
> > > > > clean up ops on failure.
> > > > > 
> > > > > 
> > > > 
> > > > This doesn't work because hardware_setup() may update vendor's kvm_x86_ops.
> > > > 
> > > > If you do kvm_ops_update() before hardware_setup(), you need to manually update
> > > > those updated (in hardware_setup()) callbacks again after. 
> > > 
> > > We can call kvm_ops_update() twice before and after hardware_setup().
> > > 
> > 
> > Personally I think it's too ugly.  
> 
> So you prefer the option 3 to calling kvm_ops_update() twice. Okay, I'll update
> the patch.

After playing with hardware_post_setup(), it's inevitable to call
kvm_ops_update() twice.
When VMX initialization succeeded with hardware_setup(), but TDX initialization
with hardware_post_setup() failed, we'd like to support only VMX with warning
message.  In such case, we need to revert x86_ops to VMX only.
It doesn't make sense to introduce hardware_post_setup() to avoid calling
kvm_update_ops twice.
-- 
Isaku Yamahata <isaku.yamahata@gmail.com>
Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Huang, Kai 2 years, 10 months ago
On Wed, 2023-04-05 at 13:07 -0700, Isaku Yamahata wrote:
> On Wed, Mar 29, 2023 at 06:04:38PM -0700,
> Isaku Yamahata <isaku.yamahata@gmail.com> wrote:
> 
> > On Wed, Mar 29, 2023 at 11:17:31PM +0000,
> > "Huang, Kai" <kai.huang@intel.com> wrote:
> > 
> > > On Wed, 2023-03-29 at 14:56 -0700, Isaku Yamahata wrote:
> > > > On Wed, Mar 29, 2023 at 01:13:45AM +0000,
> > > > "Huang, Kai" <kai.huang@intel.com> wrote:
> > > > 
> > > > > > 
> > > > > > > >  
> > > > > > > > +	/*
> > > > > > > > +	 * TDX requires those methods to enable VMXON by
> > > > > > > > +	 * kvm_hardware_enable/disable_all()
> > > > > > > > +	 */
> > > > > > > > +	static_call_update(kvm_x86_check_processor_compatibility,
> > > > > > > > +			   ops->runtime_ops->check_processor_compatibility);
> > > > > > > > +	static_call_update(kvm_x86_hardware_enable,
> > > > > > > > +			   ops->runtime_ops->hardware_enable);
> > > > > > > > +	static_call_update(kvm_x86_hardware_disable,
> > > > > > > > +			   ops->runtime_ops->hardware_disable);
> > > > > > > >  	r = ops->hardware_setup();
> > > > > > > >  	if (r != 0)
> > > > > > > >  		goto out_mmu_exit;
> > > > > > > 
> > > > > > > Hmm.. I think this is ugly.  Perhaps we should never do any
> > > > > > > static_call(kvm_x86_xxx)() in hardware_setup(), because hardware_setup() is
> > > > > > > called before kvm_ops_update() and may update vendor's kvm_x86_ops.
> > > > > > > 
> > > > > > > So probably use hardware_enable_all() in hardware_setup() is a bad idea.
> > > > > > > 
> > > > > > > I think we have below options on how to handle:
> > > > > > > 
> > > > > > > 1) Use VMX's kvm_x86_ops directly in tdx_hardware_setup().  For instance,
> > > > > > > something like below:
> > > > > > > 
> > > > > > > int __init tdx_hardware_setup(struct kvm_x86_ops *x86_ops)
> > > > > > > {
> > > > > > > 	...
> > > > > > > 
> > > > > > > 	cpus_read_lock();
> > > > > > > 	r = on_each_cpu(vt_x86_ops.hardware_enable, ...);
> > > > > > > 	if (!r)
> > > > > > > 		r = tdx_module_setup();
> > > > > > > 	on_each_cpu(vt_x86_ops.hardware_disable, ...);
> > > > > > > 	cpus_read_unlock();
> > > > > > > 
> > > > > > > 	...
> > > > > > > }
> > > > > > > 
> > > > > > > But this doesn't clean up nicely when there's some particular cpus fail to do
> > > > > > > hardware_enable().  To clean up nicely, we do need additional things similar to
> > > > > > > the hardware_enable_all() code path: a per-cpu variable or a cpumask_t + a
> > > > > > > wrapper of vt_x86_ops->hardware_enable() to track which cpus have done
> > > > > > > hardware_enable() successfully.
> > > > > > > 
> > > > > > > 2) Move those static_call_update() into tdx_hardware_setup() so they are TDX
> > > > > > > code self-contained.  But this would require exposing kvm_x86_ops as symbol,
> > > > > > > which isn't nice either.
> > > > > > > 
> > > > > > > 3) Introduce another kvm_x86_init_ops->hardware_post_setup(), which is called
> > > > > > > after kvm_ops_update().
> > > > > > > 
> > > > > > > Personally, I think 3) perhaps is the most elegant one, but not sure whether
> > > > > > > Sean/Paolo has any opinion.
> > > > > > 
> > > > > > I think we can simply update the ops before calling hardware_enable() and
> > > > > > clean up ops on failure.
> > > > > > 
> > > > > > 
> > > > > 
> > > > > This doesn't work because hardware_setup() may update vendor's kvm_x86_ops.
> > > > > 
> > > > > If you do kvm_ops_update() before hardware_setup(), you need to manually update
> > > > > those updated (in hardware_setup()) callbacks again after. 
> > > > 
> > > > We can call kvm_ops_update() twice before and after hardware_setup().
> > > > 
> > > 
> > > Personally I think it's too ugly.  
> > 
> > So you prefer the option 3 to calling kvm_ops_update() twice. Okay, I'll update
> > the patch.
> 
> After playing with hardware_post_setup(), it's inevitable to call
> kvm_ops_update() twice.
> When VMX initialization succeeded with hardware_setup(), but TDX initialization
> with hardware_post_setup() failed, we'd like to support only VMX with warning
> message.  In such case, we need to revert x86_ops to VMX only.
> It doesn't make sense to introduce hardware_post_setup() to avoid calling
> kvm_update_ops twice.
> 

OK.  Then how about option 1) ?

We just need another wrapper around  vt_x86_ops.hardware_{enable|disable}() and
use a VT's own per-cpu variable to track which has cpu has done the
vmx_hardware_enable().

We can even put the per-cpu variable inside the vt_hardware_enable() itself w/o
introducing the wrapper.

But again it's better if Sean can input here.

Re: [PATCH v13 003/113] KVM: TDX: Initialize the TDX module when loading the KVM intel kernel module
Posted by Huang, Kai 2 years, 11 months ago
> 
> > +static bool enable_tdx __ro_after_init;
> > +module_param_named(tdx, enable_tdx, bool, 0444);
> > +
> > +static __init int vt_hardware_setup(void)
> > +{
> > +	int ret;
> > +
> > +	ret = vmx_hardware_setup();
> > +	if (ret)
> > +		return ret;
> > +
> > +	enable_tdx = enable_tdx && !tdx_hardware_setup(&vt_x86_ops);
> 
> Unfortunately, the enable_tdx should also be protected by the
> cpus_read_lock(),
> because CPU hotplug code path checks it too (as seen in your next patch).

Sorry I was wrong.  I forgot the CPU hotplug callbacks are registered in
kvm_init(), which happens after hardware_setup(), so the CPU hotplug code cannot
race with this hardware_setup() code here.

[...]

> 
> I think you can merge next patch with this one because they are kinda related.
>  
> 
> Putting them together allows people to review more easily.

And perhaps this isn't necessary either.