From nobody Wed Dec 31 06:36:33 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id E2A25C4332F for ; Tue, 7 Nov 2023 20:29:58 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1343791AbjKGU37 (ORCPT ); Tue, 7 Nov 2023 15:29:59 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:39896 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S235427AbjKGUVM (ORCPT ); Tue, 7 Nov 2023 15:21:12 -0500 Received: from mail-yw1-x114a.google.com (mail-yw1-x114a.google.com [IPv6:2607:f8b0:4864:20::114a]) by lindbergh.monkeyblade.net (Postfix) with ESMTPS id 745CA19B5 for ; Tue, 7 Nov 2023 12:20:44 -0800 (PST) Received: by mail-yw1-x114a.google.com with SMTP id 00721157ae682-5b0c27d504fso584607b3.1 for ; Tue, 07 Nov 2023 12:20:44 -0800 (PST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=google.com; s=20230601; t=1699388443; x=1699993243; darn=vger.kernel.org; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:from:to:cc:subject:date:message-id:reply-to; bh=yXDUOFgM+YJ6fuoNpSYf2QUTfSwK89RGtXlj6ZKeva0=; b=k62b1Vm9rGg+lOIx/+mdtr9YnTaAlXhxMXdrcg3Rsf6LD0zz4YI7+nR0L4WrFnjUPb LJbVxcT4ncaeog5PBAZZZcaTZOnMYgU0Ns2bJcx813dokSLUwPxq9A0ACYB1fToUn4G4 qwCP6A8ZOgbJGs4dFi0izlL5y5/rEKK5Wj9gfUAkLFQ970ves82tLI6H2Xw2IH4Yv4aP Bqsi5DMxIuVskLer2BJnsYLmZp/aPeUOjTRIU/y/RyJrJLJ3FNqJBEISrVgUWoBVnG31 DNTpLOfhooQY+P6eEN8WOvxp6TEuMU+CGEeU6LM8snYLtUUiSixUdp6qDqyC25wXaUKc 0pqw== X-Google-DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=1e100.net; s=20230601; t=1699388443; x=1699993243; h=cc:to:from:subject:message-id:references:mime-version:in-reply-to :date:x-gm-message-state:from:to:cc:subject:date:message-id:reply-to; bh=yXDUOFgM+YJ6fuoNpSYf2QUTfSwK89RGtXlj6ZKeva0=; b=a3xrs9C5Ep/4g1+qtwwo+Q3jYPXPglIF+hVMb0jk8pfoDY/D8xGBw54DPfpkEUXnNF 0itkfe3L3eqW2W+3wzf6rhUoEpvR1F6JxsdlTmLF7BEur/g9L9PcRAc9m5f2+A4td7QW dT9d2KqmSnlkzqMHFDZGwLylTeDDlgCw+w51gg24qHk9M9XgS10tOBmEjQYPEKleWCK5 Ta7+NBS2HYbST1V5r3RsgUVdJecDVbZQ3vyRgdjO4Py99Iso3HWV7CWs4tTKxlVnBbTh KB2xiRSNPSTO4aYaqfF4e6L/1QT+ROfIItD2fUFztYmRIcjbricHfhY+Z0EMNg9Dwf2W N2ZA== X-Gm-Message-State: AOJu0YwjlB7UQWSg4zoO8KWs5XWeztmdNI9nnzVMHkH50gEMHWsjaq+q QH69sfyeJ3iillnYNUCjSKHEjMeXk1OkFw== X-Google-Smtp-Source: AGHT+IGngbHKbHvkhMevIYXtZILRES11OdFIH5EJXTOVmK/Q4JcN12vJAVedP442pRgmwB2lAyEha9qaYf+TcQ== X-Received: from aghulati-dev.c.googlers.com ([fda3:e722:ac3:cc00:2b:ff92:c0a8:18bb]) (user=aghulati job=sendgmr) by 2002:a0d:d7cb:0:b0:58c:e8da:4d1a with SMTP id z194-20020a0dd7cb000000b0058ce8da4d1amr81006ywd.2.1699388443344; Tue, 07 Nov 2023 12:20:43 -0800 (PST) Date: Tue, 7 Nov 2023 20:20:01 +0000 In-Reply-To: <20231107202002.667900-1-aghulati@google.com> Mime-Version: 1.0 References: <20231107202002.667900-1-aghulati@google.com> X-Mailer: git-send-email 2.42.0.869.gea05f2083d-goog Message-ID: <20231107202002.667900-14-aghulati@google.com> Subject: [RFC PATCH 13/14] KVM: x86: VAC: Move all hardware enable/disable code into VAC From: Anish Ghulati To: kvm@vger.kernel.org, linux-kernel@vger.kernel.org, Sean Christopherson , Paolo Bonzini , Thomas Gleixner , Ingo Molnar , Borislav Petkov , Dave Hansen , x86@kernel.org, hpa@zytor.com, Vitaly Kuznetsov , peterz@infradead.org, paulmck@kernel.org, Mark Rutland Cc: Anish Ghulati Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Transfer-Encoding: quoted-printable Content-Type: text/plain; charset="utf-8" De-indirect hardware enable and disable. Now that all of these functions are in the VAC, they don't need to be called via an indirect ops table and static call. Signed-off-by: Anish Ghulati --- arch/x86/include/asm/kvm-x86-ops.h | 2 - arch/x86/kvm/svm/svm.c | 2 - arch/x86/kvm/svm/svm_ops.h | 1 + arch/x86/kvm/vac.c | 46 +++++++++++- arch/x86/kvm/vac.h | 9 ++- arch/x86/kvm/vmx/vmx.c | 2 - arch/x86/kvm/vmx/vmx_ops.h | 1 + arch/x86/kvm/x86.c | 117 ----------------------------- arch/x86/kvm/x86.h | 2 - include/linux/kvm_host.h | 2 + virt/kvm/vac.h | 5 ++ 11 files changed, 58 insertions(+), 131 deletions(-) diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-= x86-ops.h index 764be4a26a0c..340dcae9dd32 100644 --- a/arch/x86/include/asm/kvm-x86-ops.h +++ b/arch/x86/include/asm/kvm-x86-ops.h @@ -16,8 +16,6 @@ BUILD_BUG_ON(1) */ KVM_X86_OP(vendor_exit) KVM_X86_OP(check_processor_compatibility) -KVM_X86_OP(hardware_enable) -KVM_X86_OP(hardware_disable) KVM_X86_OP(hardware_unsetup) KVM_X86_OP(has_emulated_msr) KVM_X86_OP(vcpu_after_set_cpuid) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index df5673c98e7b..fb2c72430c7a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4739,8 +4739,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata =3D { .check_processor_compatibility =3D svm_check_processor_compat, =20 .hardware_unsetup =3D svm_hardware_unsetup, - .hardware_enable =3D svm_hardware_enable, - .hardware_disable =3D svm_hardware_disable, .has_emulated_msr =3D svm_has_emulated_msr, =20 .vcpu_create =3D svm_vcpu_create, diff --git a/arch/x86/kvm/svm/svm_ops.h b/arch/x86/kvm/svm/svm_ops.h index 36c8af87a707..5e89c06b5147 100644 --- a/arch/x86/kvm/svm/svm_ops.h +++ b/arch/x86/kvm/svm/svm_ops.h @@ -4,6 +4,7 @@ =20 #include =20 +#include "../vac.h" #include "x86.h" =20 #define svm_asm(insn, clobber...) \ diff --git a/arch/x86/kvm/vac.c b/arch/x86/kvm/vac.c index ab77aee4e1fa..79f5c2ac159a 100644 --- a/arch/x86/kvm/vac.c +++ b/arch/x86/kvm/vac.c @@ -3,6 +3,8 @@ #include "vac.h" #include =20 +extern bool kvm_rebooting; + u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS]; struct kvm_user_return_msrs __percpu *user_return_msrs; =20 @@ -35,7 +37,7 @@ void kvm_on_user_return(struct user_return_notifier *urn) } } =20 -void kvm_user_return_msr_cpu_online(void) +static void kvm_user_return_msr_cpu_online(void) { unsigned int cpu =3D smp_processor_id(); struct kvm_user_return_msrs *msrs =3D per_cpu_ptr(user_return_msrs, cpu); @@ -49,7 +51,7 @@ void kvm_user_return_msr_cpu_online(void) } } =20 -void drop_user_return_notifiers(void) +static void drop_user_return_notifiers(void) { unsigned int cpu =3D smp_processor_id(); struct kvm_user_return_msrs *msrs =3D per_cpu_ptr(user_return_msrs, cpu); @@ -117,6 +119,46 @@ int kvm_set_user_return_msr(unsigned int slot, u64 val= ue, u64 mask) return 0; } =20 +int kvm_arch_hardware_enable(void) +{ + int ret =3D -EIO; + + kvm_user_return_msr_cpu_online(); + + if (kvm_is_vmx_supported()) + ret =3D vmx_hardware_enable(); + else if (kvm_is_svm_supported()) + ret =3D svm_hardware_enable(); + if (ret !=3D 0) + return ret; + + // TODO: Handle unstable TSC + + return 0; +} + +void kvm_arch_hardware_disable(void) +{ + if (kvm_is_vmx_supported()) + vmx_hardware_disable(); + else if (kvm_is_svm_supported()) + svm_hardware_disable(); + drop_user_return_notifiers(); +} + +/* + * Handle a fault on a hardware virtualization (VMX or SVM) instruction. + * + * Hardware virtualization extension instructions may fault if a reboot tu= rns + * off virtualization while processes are running. Usually after catching= the + * fault we just panic; during reboot instead the instruction is ignored. + */ +noinstr void kvm_spurious_fault(void) +{ + /* Fault while not rebooting. We want the trace. */ + BUG_ON(!kvm_rebooting); +} + int kvm_alloc_user_return_msrs(void) { user_return_msrs =3D alloc_percpu(struct kvm_user_return_msrs); diff --git a/arch/x86/kvm/vac.h b/arch/x86/kvm/vac.h index 5be30cce5a1c..daf1f137d196 100644 --- a/arch/x86/kvm/vac.h +++ b/arch/x86/kvm/vac.h @@ -5,13 +5,14 @@ =20 #include =20 -int __init vac_init(void); -void vac_exit(void); +void kvm_spurious_fault(void); =20 #ifdef CONFIG_KVM_INTEL bool kvm_is_vmx_supported(void); int __init vac_vmx_init(void); void vac_vmx_exit(void); +int vmx_hardware_enable(void); +void vmx_hardware_disable(void); #else bool kvm_is_vmx_supported(void) { return false } int __init vac_vmx_init(void) @@ -25,6 +26,8 @@ void vac_vmx_exit(void) {} bool kvm_is_svm_supported(void); int __init vac_svm_init(void); void vac_svm_exit(void); +int svm_hardware_enable(void); +void svm_hardware_disable(void); #else bool kvm_is_svm_supported(void) { return false } int __init vac_svm_init(void) @@ -59,8 +62,6 @@ int kvm_add_user_return_msr(u32 msr); int kvm_find_user_return_msr(u32 msr); int kvm_set_user_return_msr(unsigned int slot, u64 value, u64 mask); void kvm_on_user_return(struct user_return_notifier *urn); -void kvm_user_return_msr_cpu_online(void); -void drop_user_return_notifiers(void); =20 static inline bool kvm_is_supported_user_return_msr(u32 msr) { diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 6301b49e0e80..69a6a8591996 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -8013,8 +8013,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata =3D { =20 .hardware_unsetup =3D vmx_hardware_unsetup, =20 - .hardware_enable =3D vmx_hardware_enable, - .hardware_disable =3D vmx_hardware_disable, .has_emulated_msr =3D vmx_has_emulated_msr, =20 .vm_size =3D sizeof(struct kvm_vmx), diff --git a/arch/x86/kvm/vmx/vmx_ops.h b/arch/x86/kvm/vmx/vmx_ops.h index 33af7b4c6eb4..60325fd39120 100644 --- a/arch/x86/kvm/vmx/vmx_ops.h +++ b/arch/x86/kvm/vmx/vmx_ops.h @@ -8,6 +8,7 @@ =20 #include "hyperv.h" #include "vmcs.h" +#include "../vac.h" #include "../x86.h" =20 void vmread_error(unsigned long field); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7466a5945147..a74139061e4d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -370,19 +370,6 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct ms= r_data *msr_info) return 0; } =20 -/* - * Handle a fault on a hardware virtualization (VMX or SVM) instruction. - * - * Hardware virtualization extension instructions may fault if a reboot tu= rns - * off virtualization while processes are running. Usually after catching= the - * fault we just panic; during reboot instead the instruction is ignored. - */ -noinstr void kvm_spurious_fault(void) -{ - /* Fault while not rebooting. We want the trace. */ - BUG_ON(!kvm_rebooting); -} - #define EXCPT_BENIGN 0 #define EXCPT_CONTRIBUTORY 1 #define EXCPT_PF 2 @@ -9363,7 +9350,6 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_= ops *ops) return 0; =20 out_unwind_ops: - kvm_x86_ops.hardware_enable =3D NULL; static_call(kvm_x86_hardware_unsetup)(); out_mmu_exit: kvm_mmu_vendor_module_exit(); @@ -9414,7 +9400,6 @@ void kvm_x86_vendor_exit(void) WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key)); #endif mutex_lock(&vendor_module_lock); - kvm_x86_ops.hardware_enable =3D NULL; mutex_unlock(&vendor_module_lock); } =20 @@ -11952,108 +11937,6 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu= *vcpu, u8 vector) kvm_rip_write(vcpu, 0); } =20 -int kvm_arch_hardware_enable(void) -{ - struct kvm *kvm; - struct kvm_vcpu *vcpu; - unsigned long i; - int ret; - u64 local_tsc; - u64 max_tsc =3D 0; - bool stable, backwards_tsc =3D false; - - kvm_user_return_msr_cpu_online(); - - ret =3D kvm_x86_check_processor_compatibility(); - if (ret) - return ret; - - ret =3D static_call(kvm_x86_hardware_enable)(); - if (ret !=3D 0) - return ret; - - local_tsc =3D rdtsc(); - stable =3D !kvm_check_tsc_unstable(); - list_for_each_entry(kvm, &vm_list, vm_list) { - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!stable && vcpu->cpu =3D=3D smp_processor_id()) - kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); - if (stable && vcpu->arch.last_host_tsc > local_tsc) { - backwards_tsc =3D true; - if (vcpu->arch.last_host_tsc > max_tsc) - max_tsc =3D vcpu->arch.last_host_tsc; - } - } - } - - /* - * Sometimes, even reliable TSCs go backwards. This happens on - * platforms that reset TSC during suspend or hibernate actions, but - * maintain synchronization. We must compensate. Fortunately, we can - * detect that condition here, which happens early in CPU bringup, - * before any KVM threads can be running. Unfortunately, we can't - * bring the TSCs fully up to date with real time, as we aren't yet far - * enough into CPU bringup that we know how much real time has actually - * elapsed; our helper function, ktime_get_boottime_ns() will be using bo= ot - * variables that haven't been updated yet. - * - * So we simply find the maximum observed TSC above, then record the - * adjustment to TSC in each VCPU. When the VCPU later gets loaded, - * the adjustment will be applied. Note that we accumulate - * adjustments, in case multiple suspend cycles happen before some VCPU - * gets a chance to run again. In the event that no KVM threads get a - * chance to run, we will miss the entire elapsed period, as we'll have - * reset last_host_tsc, so VCPUs will not have the TSC adjusted and may - * loose cycle time. This isn't too big a deal, since the loss will be - * uniform across all VCPUs (not to mention the scenario is extremely - * unlikely). It is possible that a second hibernate recovery happens - * much faster than a first, causing the observed TSC here to be - * smaller; this would require additional padding adjustment, which is - * why we set last_host_tsc to the local tsc observed here. - * - * N.B. - this code below runs only on platforms with reliable TSC, - * as that is the only way backwards_tsc is set above. Also note - * that this runs for ALL vcpus, which is not a bug; all VCPUs should - * have the same delta_cyc adjustment applied if backwards_tsc - * is detected. Note further, this adjustment is only done once, - * as we reset last_host_tsc on all VCPUs to stop this from being - * called multiple times (one for each physical CPU bringup). - * - * Platforms with unreliable TSCs don't have to deal with this, they - * will be compensated by the logic in vcpu_load, which sets the TSC to - * catchup mode. This will catchup all VCPUs to real time, but cannot - * guarantee that they stay in perfect synchronization. - */ - if (backwards_tsc) { - u64 delta_cyc =3D max_tsc - local_tsc; - list_for_each_entry(kvm, &vm_list, vm_list) { - kvm->arch.backwards_tsc_observed =3D true; - kvm_for_each_vcpu(i, vcpu, kvm) { - vcpu->arch.tsc_offset_adjustment +=3D delta_cyc; - vcpu->arch.last_host_tsc =3D local_tsc; - kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); - } - - /* - * We have to disable TSC offset matching.. if you were - * booting a VM while issuing an S4 host suspend.... - * you may have some problem. Solving this issue is - * left as an exercise to the reader. - */ - kvm->arch.last_tsc_nsec =3D 0; - kvm->arch.last_tsc_write =3D 0; - } - - } - return 0; -} - -void kvm_arch_hardware_disable(void) -{ - static_call(kvm_x86_hardware_disable)(); - drop_user_return_notifiers(); -} - bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->arch.bsp_vcpu_id =3D=3D vcpu->vcpu_id; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 1da8efcd3e9c..17ff3917b9a8 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -40,8 +40,6 @@ struct kvm_caps { u64 supported_perf_cap; }; =20 -void kvm_spurious_fault(void); - #define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check) \ ({ \ bool failed =3D (consistency_check); \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f0afe549c0d6..d26671682764 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1467,9 +1467,11 @@ static inline void kvm_create_vcpu_debugfs(struct kv= m_vcpu *vcpu) {} #endif =20 #ifdef CONFIG_KVM_GENERIC_HARDWARE_ENABLING +#ifndef CONFIG_X86 int kvm_arch_hardware_enable(void); void kvm_arch_hardware_disable(void); #endif +#endif int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/vac.h b/virt/kvm/vac.h index f3e7b08168df..b5159fa3f18d 100644 --- a/virt/kvm/vac.h +++ b/virt/kvm/vac.h @@ -13,6 +13,11 @@ int kvm_offline_cpu(unsigned int cpu); void hardware_disable_all(void); int hardware_enable_all(void); =20 +#ifdef CONFIG_X86 +int kvm_arch_hardware_enable(void); +void kvm_arch_hardware_disable(void); +#endif + extern struct notifier_block kvm_reboot_notifier; =20 extern struct syscore_ops kvm_syscore_ops; --=20 2.42.0.869.gea05f2083d-goog