Relocate the VMCLEAR of VMCSs from KVM to cpu_disable_virtualization() in
x86. This eliminates the need to call cpu_emergency_disable_virtualization()
before cpu_disable_virtualization() and prepares for removing the emergency
reboot callback that calls into KVM from the CPU reboot path.
Signed-off-by: Xin Li (Intel) <xin@zytor.com>
---
arch/x86/include/asm/processor.h | 1 +
arch/x86/kernel/cpu/common.c | 34 ++++++++++++++++++++++++++++++++
arch/x86/kernel/crash.c | 3 ---
arch/x86/kernel/reboot.c | 7 +++----
arch/x86/kernel/smp.c | 6 ------
arch/x86/kvm/vmx/vmcs.h | 5 ++++-
arch/x86/kvm/vmx/vmx.c | 34 +++-----------------------------
7 files changed, 45 insertions(+), 45 deletions(-)
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 0bfd4eb1e9e2..d8a28c57248d 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -230,6 +230,7 @@ void init_cpu_devs(void);
void get_cpu_vendor(struct cpuinfo_x86 *c);
extern void early_cpu_init(void);
extern void identify_secondary_cpu(unsigned int cpu);
+extern struct list_head* get_loaded_vmcss_on_cpu(int cpu);
extern void cpu_enable_virtualization(void);
extern void cpu_disable_virtualization(void);
extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 39b9be9a2fb1..73abacf57ed4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1950,6 +1950,18 @@ union vmxon_vmcs {
};
static DEFINE_PER_CPU_PAGE_ALIGNED(union vmxon_vmcs, vmxon_vmcs);
+/*
+ * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
+ * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
+ */
+static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
+
+/* Export an accessor rather than the raw data */
+struct list_head* get_loaded_vmcss_on_cpu(int cpu)
+{
+ return &per_cpu(loaded_vmcss_on_cpu, cpu);
+}
+EXPORT_SYMBOL_GPL(get_loaded_vmcss_on_cpu);
/*
* Executed during the CPU startup phase to execute VMXON to enable VMX. This
@@ -1975,6 +1987,8 @@ void cpu_enable_virtualization(void)
return;
}
+ INIT_LIST_HEAD(get_loaded_vmcss_on_cpu(cpu));
+
memset(this_cpu_ptr(&vmxon_vmcs), 0, PAGE_SIZE);
/*
@@ -2002,6 +2016,18 @@ void cpu_enable_virtualization(void)
intel_pt_handle_vmx(0);
}
+static __always_inline void vmclear(void *p)
+{
+ u64 pa = __pa(p);
+ asm volatile ("vmclear %0" : : "m"(pa) : "cc");
+}
+
+struct loaded_vmcs_basic {
+ struct list_head loaded_vmcss_on_cpu_link;
+ struct vmcs_hdr *vmcs;
+ struct vmcs_hdr *shadow_vmcs;
+};
+
/*
* Because INIT interrupts are blocked during VMX operation, this function
* must be called just before a CPU shuts down to ensure it can be brought
@@ -2016,6 +2042,7 @@ void cpu_enable_virtualization(void)
void cpu_disable_virtualization(void)
{
int cpu = raw_smp_processor_id();
+ struct loaded_vmcs_basic *v;
if (!is_vmx_supported())
return;
@@ -2025,6 +2052,13 @@ void cpu_disable_virtualization(void)
return;
}
+ list_for_each_entry(v, get_loaded_vmcss_on_cpu(cpu),
+ loaded_vmcss_on_cpu_link) {
+ vmclear(v->vmcs);
+ if (v->shadow_vmcs)
+ vmclear(v->shadow_vmcs);
+ }
+
asm goto("1: vmxoff\n\t"
_ASM_EXTABLE(1b, %l[fault])
::: "cc", "memory" : fault);
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 772c6d350b50..e5b374587be2 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -111,9 +111,6 @@ void native_machine_crash_shutdown(struct pt_regs *regs)
crash_smp_send_stop();
- /* Kept to VMCLEAR loaded VMCSs */
- cpu_emergency_disable_virtualization();
-
/*
* Disable Intel PT to stop its logging
*/
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 7433e634018f..d8c3e2d8481f 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -633,7 +633,7 @@ static void native_machine_emergency_restart(void)
unsigned short mode;
if (reboot_emergency)
- emergency_reboot_disable_virtualization();
+ nmi_shootdown_cpus_on_restart();
tboot_shutdown(TB_SHUTDOWN_REBOOT);
@@ -876,9 +876,6 @@ static int crash_nmi_callback(unsigned int val, struct pt_regs *regs)
if (shootdown_callback)
shootdown_callback(cpu, regs);
- /* Kept to VMCLEAR loaded VMCSs */
- cpu_emergency_disable_virtualization();
-
atomic_dec(&waiting_for_crash_ipi);
/* Disable virtualization, usually this is an AP */
@@ -955,6 +952,8 @@ void nmi_shootdown_cpus(nmi_shootdown_cb callback)
static inline void nmi_shootdown_cpus_on_restart(void)
{
+ local_irq_disable();
+
if (!crash_ipi_issued)
nmi_shootdown_cpus(NULL);
}
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index eb6a389ba1a9..b4f50c88e7e2 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -124,9 +124,6 @@ static int smp_stop_nmi_callback(unsigned int val, struct pt_regs *regs)
if (raw_smp_processor_id() == atomic_read(&stopping_cpu))
return NMI_HANDLED;
- /* Kept to VMCLEAR loaded VMCSs */
- cpu_emergency_disable_virtualization();
-
stop_this_cpu(NULL);
return NMI_HANDLED;
@@ -139,9 +136,6 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_reboot)
{
apic_eoi();
- /* Kept to VMCLEAR loaded VMCSs */
- cpu_emergency_disable_virtualization();
-
stop_this_cpu(NULL);
}
diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h
index da5631924432..10cbfd567dec 100644
--- a/arch/x86/kvm/vmx/vmcs.h
+++ b/arch/x86/kvm/vmx/vmcs.h
@@ -52,8 +52,12 @@ struct vmcs_controls_shadow {
* Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
* remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
* loaded on this CPU (so we can clear them if the CPU goes down).
+ *
+ * Note, the first three members must be a list_head and two pointers, please
+ * refer to struct loaded_vmcs_basic defined in arch/x86/kernel/cpu/common.c.
*/
struct loaded_vmcs {
+ struct list_head loaded_vmcss_on_cpu_link;
struct vmcs *vmcs;
struct vmcs *shadow_vmcs;
int cpu;
@@ -65,7 +69,6 @@ struct loaded_vmcs {
ktime_t entry_time;
s64 vnmi_blocked_time;
unsigned long *msr_bitmap;
- struct list_head loaded_vmcss_on_cpu_link;
struct vmcs_host_state host_state;
struct vmcs_controls_shadow controls_shadow;
};
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 26af0a8ae08f..b033288e645a 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -469,11 +469,6 @@ noinline void invept_error(unsigned long ext, u64 eptp)
}
DEFINE_PER_CPU(struct vmcs *, current_vmcs);
-/*
- * We maintain a per-CPU linked-list of VMCS loaded on that CPU. This is needed
- * when a CPU is brought down, and we need to VMCLEAR all VMCSs loaded on it.
- */
-static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
static DECLARE_BITMAP(vmx_vpid_bitmap, VMX_NR_VPIDS);
static DEFINE_SPINLOCK(vmx_vpid_lock);
@@ -676,26 +671,6 @@ static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
void vmx_emergency_disable_virtualization_cpu(void)
{
- int cpu = raw_smp_processor_id();
- struct loaded_vmcs *v;
-
- kvm_rebooting = true;
-
- /*
- * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
- * set in task context. If this races with VMX is disabled by an NMI,
- * VMCLEAR and VMXOFF may #UD, but KVM will eat those faults due to
- * kvm_rebooting set.
- */
- if (!(__read_cr4() & X86_CR4_VMXE))
- return;
-
- list_for_each_entry(v, &per_cpu(loaded_vmcss_on_cpu, cpu),
- loaded_vmcss_on_cpu_link) {
- vmcs_clear(v->vmcs);
- if (v->shadow_vmcs)
- vmcs_clear(v->shadow_vmcs);
- }
}
static void __loaded_vmcs_clear(void *arg)
@@ -1388,7 +1363,7 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
smp_rmb();
list_add(&vmx->loaded_vmcs->loaded_vmcss_on_cpu_link,
- &per_cpu(loaded_vmcss_on_cpu, cpu));
+ get_loaded_vmcss_on_cpu(cpu));
local_irq_enable();
}
@@ -2754,7 +2729,7 @@ static void vmclear_local_loaded_vmcss(void)
int cpu = raw_smp_processor_id();
struct loaded_vmcs *v, *n;
- list_for_each_entry_safe(v, n, &per_cpu(loaded_vmcss_on_cpu, cpu),
+ list_for_each_entry_safe(v, n, get_loaded_vmcss_on_cpu(cpu),
loaded_vmcss_on_cpu_link)
__loaded_vmcs_clear(v);
}
@@ -8441,11 +8416,8 @@ int __init vmx_init(void)
if (r)
goto err_l1d_flush;
- for_each_possible_cpu(cpu) {
- INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
-
+ for_each_possible_cpu(cpu)
pi_init_cpu(cpu);
- }
vmx_check_vmcs12_offsets();
--
2.51.0
© 2016 - 2025 Red Hat, Inc.