From nobody Thu Sep 18 21:38:21 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id EBC06C43217 for ; Thu, 1 Dec 2022 11:04:49 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S230192AbiLALEs (ORCPT ); Thu, 1 Dec 2022 06:04:48 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44368 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S229974AbiLALEC (ORCPT ); Thu, 1 Dec 2022 06:04:02 -0500 Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 22F29A9CC3; Thu, 1 Dec 2022 03:03:52 -0800 (PST) Received: from jinankjain-dranzer.zrrkmle5drku1h0apvxbr2u2ee.ix.internal.cloudapp.net (unknown [20.188.121.5]) by linux.microsoft.com (Postfix) with ESMTPSA id EA86720B83C5; Thu, 1 Dec 2022 03:03:46 -0800 (PST) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com EA86720B83C5 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1669892631; bh=wqo8SKjqAvAPfB0JD2Zy4udGVJ0BdIhQjoIMI1f64Wg=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=pYkEj71o+8YSGOwx2H+WwoM1aJLHHM1c4qq8Mb68MP5EmrZZV88jXio965DtQkE8l BAoPrSsubSQ/IILsisHz1aLgon01UsZBsQ2Re21ykd2oJ+npmrpZKzmrnU3n4xJzkb HJ0P0WlxJYBUHuOWRNEVmy5cI+TplNzLI0/63NvE= From: Jinank Jain To: jinankjain@microsoft.com Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org, decui@microsoft.com, tglx@linutronix.de, mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com, arnd@arndb.de, peterz@infradead.org, jpoimboe@kernel.org, jinankjain@linux.microsoft.com, seanjc@google.com, kirill.shutemov@linux.intel.com, ak@linux.intel.com, sathyanarayanan.kuppuswamy@linux.intel.com, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org, anrayabh@linux.microsoft.com, mikelley@microsoft.com Subject: [PATCH v7 1/5] x86/hyperv: Add support for detecting nested hypervisor Date: Thu, 1 Dec 2022 11:03:35 +0000 Message-Id: X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Detect if Linux is running as a nested hypervisor in the root partition for Microsoft Hypervisor, using flags provided by MSHV. Expose a new variable hv_nested that is used later for decisions specific to the nested use case. Signed-off-by: Jinank Jain --- arch/x86/include/asm/hyperv-tlfs.h | 3 +++ arch/x86/kernel/cpu/mshyperv.c | 7 +++++++ drivers/hv/hv_common.c | 9 ++++++--- include/asm-generic/mshyperv.h | 1 + 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hype= rv-tlfs.h index 6d9368ea3701..58c03d18c235 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -114,6 +114,9 @@ /* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11) =20 +/* Indicates that the hypervisor is nested within a Hyper-V partition. */ +#define HV_X64_HYPERV_NESTED BIT(12) + /* Recommend using enlightened VMCS */ #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14) =20 diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 46668e255421..f9b78d4829e3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -37,6 +37,8 @@ =20 /* Is Linux running as the root partition? */ bool hv_root_partition; +/* Is Linux running on nested Microsoft Hypervisor */ +bool hv_nested; struct ms_hyperv_info ms_hyperv; =20 #if IS_ENABLED(CONFIG_HYPERV) @@ -301,6 +303,11 @@ static void __init ms_hyperv_init_platform(void) pr_info("Hyper-V: running as root partition\n"); } =20 + if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) { + hv_nested =3D true; + pr_info("Hyper-V: running on a nested hypervisor\n"); + } + /* * Extract host information. */ diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index ae68298c0dca..52a6f89ccdbd 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -25,17 +25,20 @@ #include =20 /* - * hv_root_partition and ms_hyperv are defined here with other Hyper-V - * specific globals so they are shared across all architectures and are + * hv_root_partition, ms_hyperv and hv_nested are defined here with other + * Hyper-V specific globals so they are shared across all architectures an= d are * built only when CONFIG_HYPERV is defined. But on x86, * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not - * defined, and it uses these two variables. So mark them as __weak + * defined, and it uses these three variables. So mark them as __weak * here, allowing for an overriding definition in the module containing * ms_hyperv_init_platform(). */ bool __weak hv_root_partition; EXPORT_SYMBOL_GPL(hv_root_partition); =20 +bool __weak hv_nested; +EXPORT_SYMBOL_GPL(hv_nested); + struct ms_hyperv_info __weak ms_hyperv; EXPORT_SYMBOL_GPL(ms_hyperv); =20 diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index bfb9eb9d7215..f131027830c3 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -48,6 +48,7 @@ struct ms_hyperv_info { u64 shared_gpa_boundary; }; extern struct ms_hyperv_info ms_hyperv; +extern bool hv_nested; =20 extern void * __percpu *hyperv_pcpu_input_arg; extern void * __percpu *hyperv_pcpu_output_arg; --=20 2.25.1 From nobody Thu Sep 18 21:38:21 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id C4F58C4708E for ; Thu, 1 Dec 2022 11:05:06 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231251AbiLALFE (ORCPT ); Thu, 1 Dec 2022 06:05:04 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:45046 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230473AbiLALEJ (ORCPT ); Thu, 1 Dec 2022 06:04:09 -0500 Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 7BB275C0C5; Thu, 1 Dec 2022 03:03:57 -0800 (PST) Received: from jinankjain-dranzer.zrrkmle5drku1h0apvxbr2u2ee.ix.internal.cloudapp.net (unknown [20.188.121.5]) by linux.microsoft.com (Postfix) with ESMTPSA id 589DD20B83CB; Thu, 1 Dec 2022 03:03:52 -0800 (PST) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 589DD20B83CB DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1669892637; bh=0+KAr3P31K1yqWvx8AGnhGsn4fOeV9YS2a/eXiz7wcM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=HDYrzKlW4YL2Fie6AkQZkIMiNTPfIKBiFUoK+nEYaLksUDK+PXtue+ByyVqhIovIa Pese7jM7yQolBBKioItSoEKlclMdXS5o6yJMXOG8Pl/PlGi2ZxI+Mb8Q4l0F3BD8ag eqamzDZWE+Rh362Tdai1yF9JXWldbKaOGpDSAlv4= From: Jinank Jain To: jinankjain@microsoft.com Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org, decui@microsoft.com, tglx@linutronix.de, mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com, arnd@arndb.de, peterz@infradead.org, jpoimboe@kernel.org, jinankjain@linux.microsoft.com, seanjc@google.com, kirill.shutemov@linux.intel.com, ak@linux.intel.com, sathyanarayanan.kuppuswamy@linux.intel.com, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org, anrayabh@linux.microsoft.com, mikelley@microsoft.com Subject: [PATCH v7 2/5] Drivers: hv: Setup synic registers in case of nested root partition Date: Thu, 1 Dec 2022 11:03:36 +0000 Message-Id: X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Child partitions are free to allocate SynIC message and event page but in case of root partition it must use the pages allocated by Microsoft Hypervisor (MSHV). Base address for these pages can be found using synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs for nested vs non-nested root partition. Signed-off-by: Jinank Jain --- arch/x86/include/asm/hyperv-tlfs.h | 11 ++++ arch/x86/include/asm/mshyperv.h | 30 ++------- arch/x86/kernel/cpu/mshyperv.c | 69 +++++++++++++++++++++ drivers/hv/hv.c | 99 ++++++++++++++++++++++-------- include/asm-generic/mshyperv.h | 5 +- 5 files changed, 165 insertions(+), 49 deletions(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hype= rv-tlfs.h index 58c03d18c235..b5019becb618 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -225,6 +225,17 @@ enum hv_isolation_type { #define HV_REGISTER_SINT14 0x4000009E #define HV_REGISTER_SINT15 0x4000009F =20 +/* + * Define synthetic interrupt controller model specific registers for + * nested hypervisor. + */ +#define HV_REGISTER_NESTED_SCONTROL 0x40001080 +#define HV_REGISTER_NESTED_SVERSION 0x40001081 +#define HV_REGISTER_NESTED_SIEFP 0x40001082 +#define HV_REGISTER_NESTED_SIMP 0x40001083 +#define HV_REGISTER_NESTED_EOM 0x40001084 +#define HV_REGISTER_NESTED_SINT0 0x40001090 + /* * Synthetic Timer MSRs. Four timers per vcpu. */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyper= v.h index 61f0c206bff0..3197d49c888c 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg) return false; } =20 -static inline u64 hv_get_register(unsigned int reg) -{ - u64 value; - - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) - hv_ghcb_msr_read(reg, &value); - else - rdmsrl(reg, value); - return value; -} - -static inline void hv_set_register(unsigned int reg, u64 value) -{ - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { - hv_ghcb_msr_write(reg, value); - - /* Write proxy bit via wrmsl instruction */ - if (reg >=3D HV_REGISTER_SINT0 && - reg <=3D HV_REGISTER_SINT15) - wrmsrl(reg, value | 1 << 20); - } else { - wrmsrl(reg, value); - } -} +u64 hv_get_register(unsigned int reg); +void hv_set_register(unsigned int reg, u64 value); +u64 hv_get_nested_register(unsigned int reg); +void hv_set_nested_register(unsigned int reg, u64 value); =20 #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 = as, } static inline void hv_set_register(unsigned int reg, u64 value) { } static inline u64 hv_get_register(unsigned int reg) { return 0; } +static inline void hv_set_nested_register(unsigned int reg, u64 value) { } +static inline u64 hv_get_nested_register(unsigned int reg) { return 0; } static inline int hv_set_mem_host_visibility(unsigned long addr, int numpa= ges, bool visible) { diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f9b78d4829e3..f2f6e10301a8 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -41,7 +41,76 @@ bool hv_root_partition; bool hv_nested; struct ms_hyperv_info ms_hyperv; =20 +static inline unsigned int hv_get_nested_reg(unsigned int reg) +{ + switch (reg) { + case HV_REGISTER_SIMP: + return HV_REGISTER_NESTED_SIMP; + case HV_REGISTER_NESTED_SIEFP: + return HV_REGISTER_SIEFP; + case HV_REGISTER_SCONTROL: + return HV_REGISTER_NESTED_SCONTROL; + case HV_REGISTER_SINT0: + return HV_REGISTER_NESTED_SINT0; + case HV_REGISTER_EOM: + return HV_REGISTER_NESTED_EOM; + default: + return reg; + } +} + #if IS_ENABLED(CONFIG_HYPERV) +static u64 _hv_get_register(unsigned int reg, bool nested) +{ + u64 value; + + if (nested) + reg =3D hv_get_nested_reg(reg); + + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) + hv_ghcb_msr_read(reg, &value); + else + rdmsrl(reg, value); + return value; +} + +static void _hv_set_register(unsigned int reg, u64 value, bool nested) +{ + if (nested) + reg =3D hv_get_nested_reg(reg); + + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { + hv_ghcb_msr_write(reg, value); + + /* Write proxy bit via wrmsl instruction */ + if (reg >=3D HV_REGISTER_SINT0 && + reg <=3D HV_REGISTER_SINT15) + wrmsrl(reg, value | 1 << 20); + } else { + wrmsrl(reg, value); + } +} + +u64 hv_get_register(unsigned int reg) +{ + return _hv_get_register(reg, false); +} + +void hv_set_register(unsigned int reg, u64 value) +{ + _hv_set_register(reg, value, false); +} + +u64 hv_get_nested_register(unsigned int reg) +{ + return _hv_get_register(reg, true); +} + +void hv_set_nested_register(unsigned int reg, u64 value) +{ + _hv_set_register(reg, value, true); +} + static void (*vmbus_handler)(void); static void (*hv_stimer0_handler)(void); static void (*hv_kexec_handler)(void); diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 4d6480d57546..0ed052f2423e 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -147,7 +147,7 @@ int hv_synic_alloc(void) * Synic message and event pages are allocated by paravisor. * Skip these pages allocation here. */ - if (!hv_isolation_type_snp()) { + if (!hv_isolation_type_snp() && !hv_root_partition) { hv_cpu->synic_message_page =3D (void *)get_zeroed_page(GFP_ATOMIC); if (hv_cpu->synic_message_page =3D=3D NULL) { @@ -188,8 +188,16 @@ void hv_synic_free(void) struct hv_per_cpu_context *hv_cpu =3D per_cpu_ptr(hv_context.cpu_context, cpu); =20 - free_page((unsigned long)hv_cpu->synic_event_page); - free_page((unsigned long)hv_cpu->synic_message_page); + if (hv_root_partition) { + if (hv_cpu->synic_event_page !=3D NULL) + memunmap(hv_cpu->synic_event_page); + + if (hv_cpu->synic_message_page !=3D NULL) + memunmap(hv_cpu->synic_message_page); + } else { + free_page((unsigned long)hv_cpu->synic_event_page); + free_page((unsigned long)hv_cpu->synic_message_page); + } free_page((unsigned long)hv_cpu->post_msg_page); } =20 @@ -213,10 +221,12 @@ void hv_synic_enable_regs(unsigned int cpu) union hv_synic_scontrol sctrl; =20 /* Setup the Synic's message page */ - simp.as_uint64 =3D hv_get_register(HV_REGISTER_SIMP); + simp.as_uint64 =3D hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) : + hv_get_register(HV_REGISTER_SIMP); + simp.simp_enabled =3D 1; =20 - if (hv_isolation_type_snp()) { + if (hv_isolation_type_snp() || hv_root_partition) { hv_cpu->synic_message_page =3D memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT, HV_HYP_PAGE_SIZE, MEMREMAP_WB); @@ -227,13 +237,18 @@ void hv_synic_enable_regs(unsigned int cpu) >> HV_HYP_PAGE_SHIFT; } =20 - hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); + if (hv_nested) + hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64); + else + hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); =20 /* Setup the Synic's event page */ - siefp.as_uint64 =3D hv_get_register(HV_REGISTER_SIEFP); + siefp.as_uint64 =3D hv_nested ? + hv_get_nested_register(HV_REGISTER_SIEFP) : + hv_get_register(HV_REGISTER_SIEFP); siefp.siefp_enabled =3D 1; =20 - if (hv_isolation_type_snp()) { + if (hv_isolation_type_snp() || hv_root_partition) { hv_cpu->synic_event_page =3D memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT, HV_HYP_PAGE_SIZE, MEMREMAP_WB); @@ -245,13 +260,19 @@ void hv_synic_enable_regs(unsigned int cpu) >> HV_HYP_PAGE_SHIFT; } =20 - hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); + if (hv_nested) + hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64); + else + hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); =20 /* Setup the shared SINT. */ if (vmbus_irq !=3D -1) enable_percpu_irq(vmbus_irq, 0); - shared_sint.as_uint64 =3D hv_get_register(HV_REGISTER_SINT0 + - VMBUS_MESSAGE_SINT); + shared_sint.as_uint64 =3D + hv_nested ? + hv_get_nested_register(HV_REGISTER_SINT0 + + VMBUS_MESSAGE_SINT) : + hv_get_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT); =20 shared_sint.vector =3D vmbus_interrupt; shared_sint.masked =3D false; @@ -266,14 +287,22 @@ void hv_synic_enable_regs(unsigned int cpu) #else shared_sint.auto_eoi =3D 0; #endif - hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, + if (hv_nested) + hv_set_nested_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, + shared_sint.as_uint64); + else + hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); - /* Enable the global synic bit */ - sctrl.as_uint64 =3D hv_get_register(HV_REGISTER_SCONTROL); + sctrl.as_uint64 =3D hv_nested ? + hv_get_nested_register(HV_REGISTER_SCONTROL) : + hv_get_register(HV_REGISTER_SCONTROL); sctrl.enable =3D 1; =20 - hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); + if (hv_nested) + hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); + else + hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); } =20 int hv_synic_init(unsigned int cpu) @@ -297,17 +326,25 @@ void hv_synic_disable_regs(unsigned int cpu) union hv_synic_siefp siefp; union hv_synic_scontrol sctrl; =20 - shared_sint.as_uint64 =3D hv_get_register(HV_REGISTER_SINT0 + - VMBUS_MESSAGE_SINT); + shared_sint.as_uint64 =3D + hv_nested ? + hv_get_nested_register(HV_REGISTER_SINT0 + + VMBUS_MESSAGE_SINT) : + hv_get_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT); =20 shared_sint.masked =3D 1; =20 /* Need to correctly cleanup in the case of SMP!!! */ /* Disable the interrupt */ - hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, + if (hv_nested) + hv_set_nested_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, + shared_sint.as_uint64); + else + hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); =20 - simp.as_uint64 =3D hv_get_register(HV_REGISTER_SIMP); + simp.as_uint64 =3D hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) : + hv_get_register(HV_REGISTER_SIMP); /* * In Isolation VM, sim and sief pages are allocated by * paravisor. These pages also will be used by kdump @@ -320,9 +357,14 @@ void hv_synic_disable_regs(unsigned int cpu) else simp.base_simp_gpa =3D 0; =20 - hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); + if (hv_nested) + hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64); + else + hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); =20 - siefp.as_uint64 =3D hv_get_register(HV_REGISTER_SIEFP); + siefp.as_uint64 =3D hv_nested ? + hv_get_nested_register(HV_REGISTER_SIEFP) : + hv_get_register(HV_REGISTER_SIEFP); siefp.siefp_enabled =3D 0; =20 if (hv_isolation_type_snp()) @@ -330,12 +372,21 @@ void hv_synic_disable_regs(unsigned int cpu) else siefp.base_siefp_gpa =3D 0; =20 - hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); + if (hv_nested) + hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64); + else + hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); =20 /* Disable the global synic bit */ - sctrl.as_uint64 =3D hv_get_register(HV_REGISTER_SCONTROL); + sctrl.as_uint64 =3D hv_nested ? + hv_get_nested_register(HV_REGISTER_SCONTROL) : + hv_get_register(HV_REGISTER_SCONTROL); sctrl.enable =3D 0; - hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); + + if (hv_nested) + hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); + else + hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); =20 if (vmbus_irq !=3D -1) disable_percpu_irq(vmbus_irq); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index f131027830c3..db0b5be1e087 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -147,7 +147,10 @@ static inline void vmbus_signal_eom(struct hv_message = *msg, u32 old_msg_type) * possibly deliver another msg from the * hypervisor */ - hv_set_register(HV_REGISTER_EOM, 0); + if (hv_nested) + hv_set_nested_register(HV_REGISTER_EOM, 0); + else + hv_set_register(HV_REGISTER_EOM, 0); } } =20 --=20 2.25.1 From nobody Thu Sep 18 21:38:21 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 3565DC43217 for ; Thu, 1 Dec 2022 11:05:11 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231256AbiLALFJ (ORCPT ); Thu, 1 Dec 2022 06:05:09 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44534 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S230508AbiLALEM (ORCPT ); Thu, 1 Dec 2022 06:04:12 -0500 Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id D78CF13D20; Thu, 1 Dec 2022 03:04:02 -0800 (PST) Received: from jinankjain-dranzer.zrrkmle5drku1h0apvxbr2u2ee.ix.internal.cloudapp.net (unknown [20.188.121.5]) by linux.microsoft.com (Postfix) with ESMTPSA id B82CD20B83DC; Thu, 1 Dec 2022 03:03:57 -0800 (PST) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com B82CD20B83DC DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1669892642; bh=0XXpwjMaWgOBfkprXGejLOu/IxffncbvZikzfYyKED0=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=AC4W4BPZx8Nia0TS5K7t6HTAeL8KJJyOoQo2ob88pdXSz8ZB8tk1tMe20QNqutma1 ALAaPF+MUaMhukj0un16TJEzxvEAtnttZT9NbmqOhzvZkAuXL01twIpkFDE2M2StMf X08L3FxRR2MlKH9YbKzZdraj71Yjl8QIqu4hs740= From: Jinank Jain To: jinankjain@microsoft.com Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org, decui@microsoft.com, tglx@linutronix.de, mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com, arnd@arndb.de, peterz@infradead.org, jpoimboe@kernel.org, jinankjain@linux.microsoft.com, seanjc@google.com, kirill.shutemov@linux.intel.com, ak@linux.intel.com, sathyanarayanan.kuppuswamy@linux.intel.com, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org, anrayabh@linux.microsoft.com, mikelley@microsoft.com Subject: [PATCH v7 3/5] x86/hyperv: Add an interface to do nested hypercalls Date: Thu, 1 Dec 2022 11:03:37 +0000 Message-Id: <0714327373829ec0fc372b78ac3f55c23b1417af.1669788587.git.jinankjain@linux.microsoft.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" According to TLFS, in order to communicate to L0 hypervisor there needs to be an additional bit set in the control register. This communication is required to perform privileged instructions which can only be performed by L0 hypervisor. An example of that could be setting up the VMBus infrastructure. Signed-off-by: Jinank Jain --- arch/x86/include/asm/hyperv-tlfs.h | 3 ++- arch/x86/include/asm/mshyperv.h | 42 +++++++++++++++++++++++++++--- include/asm-generic/hyperv-tlfs.h | 1 + 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hype= rv-tlfs.h index b5019becb618..7758c495541d 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -380,7 +380,8 @@ struct hv_nested_enlightenments_control { __u32 reserved:31; } features; struct { - __u32 reserved; + __u32 inter_partition_comm:1; + __u32 reserved:31; } hypercallControls; } __packed; =20 diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyper= v.h index 3197d49c888c..fbd7a9589b0d 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -74,10 +74,16 @@ static inline u64 hv_do_hypercall(u64 control, void *in= put, void *output) return hv_status; } =20 +/* Hypercall to the L0 hypervisor */ +static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *o= utput) +{ + return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output); +} + /* Fast hypercall with 8 bytes of input and no output */ -static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) +static inline u64 _hv_do_fast_hypercall8(u64 control, u16 code, u64 input1) { - u64 hv_status, control =3D (u64)code | HV_HYPERCALL_FAST_BIT; + u64 hv_status; =20 #ifdef CONFIG_X86_64 { @@ -105,10 +111,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64= input1) return hv_status; } =20 +static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) +{ + u64 control =3D (u64)code | HV_HYPERCALL_FAST_BIT; + + return _hv_do_fast_hypercall8(control, code, input1); +} + +static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1) +{ + u64 control =3D (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; + + return _hv_do_fast_hypercall8(control, code, input1); +} + /* Fast hypercall with 16 bytes of input */ -static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) +static inline u64 _hv_do_fast_hypercall16(u64 control, u16 code, u64 input= 1, u64 input2) { - u64 hv_status, control =3D (u64)code | HV_HYPERCALL_FAST_BIT; + u64 hv_status; =20 #ifdef CONFIG_X86_64 { @@ -139,6 +159,20 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64= input1, u64 input2) return hv_status; } =20 +static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) +{ + u64 control =3D (u64)code | HV_HYPERCALL_FAST_BIT; + + return _hv_do_fast_hypercall16(control, code, input1, input2); +} + +static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 = input2) +{ + u64 control =3D (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED; + + return _hv_do_fast_hypercall16(control, code, input1, input2); +} + extern struct hv_vp_assist_page **hv_vp_assist_page; =20 static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int= cpu) diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv= -tlfs.h index b17c6eeb9afa..e61ee461c4fc 100644 --- a/include/asm-generic/hyperv-tlfs.h +++ b/include/asm-generic/hyperv-tlfs.h @@ -194,6 +194,7 @@ enum HV_GENERIC_SET_FORMAT { #define HV_HYPERCALL_VARHEAD_OFFSET 17 #define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17) #define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27) +#define HV_HYPERCALL_NESTED BIT_ULL(31) #define HV_HYPERCALL_REP_COMP_OFFSET 32 #define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32) #define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) --=20 2.25.1 From nobody Thu Sep 18 21:38:21 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 65E91C43217 for ; Thu, 1 Dec 2022 11:05:29 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231267AbiLALF0 (ORCPT ); Thu, 1 Dec 2022 06:05:26 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44634 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231166AbiLALEU (ORCPT ); Thu, 1 Dec 2022 06:04:20 -0500 Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id 45271E4E; Thu, 1 Dec 2022 03:04:08 -0800 (PST) Received: from jinankjain-dranzer.zrrkmle5drku1h0apvxbr2u2ee.ix.internal.cloudapp.net (unknown [20.188.121.5]) by linux.microsoft.com (Postfix) with ESMTPSA id 23E1E20B83E2; Thu, 1 Dec 2022 03:04:02 -0800 (PST) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 23E1E20B83E2 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1669892648; bh=WwgTdzq933yo46nX8RcCPBXut+LFdh0yPYBgTMkkcFM=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=XvhAqjVqWrB5AyTgqwZzssPR0l+0kJSN5vfKMA1fbRetW5+K9t5NWU0E7w2QLUDrW 5fNjsYHA8IT3LHXiQg7ASwFWlHf/1fa1C+0dE0ACjGO3Tx3HDvg6ZMSZCbpO1Fm3iA KX1xaxKEQzfQYJxp9snY7snOPerQvIhMbqTWWuU4= From: Jinank Jain To: jinankjain@microsoft.com Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org, decui@microsoft.com, tglx@linutronix.de, mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com, arnd@arndb.de, peterz@infradead.org, jpoimboe@kernel.org, jinankjain@linux.microsoft.com, seanjc@google.com, kirill.shutemov@linux.intel.com, ak@linux.intel.com, sathyanarayanan.kuppuswamy@linux.intel.com, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org, anrayabh@linux.microsoft.com, mikelley@microsoft.com Subject: [PATCH v7 4/5] Drivers: hv: Enable vmbus driver for nested root partition Date: Thu, 1 Dec 2022 11:03:38 +0000 Message-Id: <85c3aa671e52fe0f58bcd30c37bd4f60b8bcd14a.1669788587.git.jinankjain@linux.microsoft.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Currently VMBus driver is not initialized for root partition but we need to enable the VMBus driver for nested root partition. This is required, so that L2 root can use the VMBus devices. Signed-off-by: Jinank Jain --- drivers/hv/vmbus_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 0f00d57b7c25..6324e01d5eec 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2745,7 +2745,7 @@ static int __init hv_acpi_init(void) if (!hv_is_hyperv_initialized()) return -ENODEV; =20 - if (hv_root_partition) + if (hv_root_partition && !hv_nested) return 0; =20 /* --=20 2.25.1 From nobody Thu Sep 18 21:38:21 2025 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org Received: from vger.kernel.org (vger.kernel.org [23.128.96.18]) by smtp.lore.kernel.org (Postfix) with ESMTP id 5E6AAC43217 for ; Thu, 1 Dec 2022 11:05:32 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S231273AbiLALFa (ORCPT ); Thu, 1 Dec 2022 06:05:30 -0500 Received: from lindbergh.monkeyblade.net ([23.128.96.19]:44212 "EHLO lindbergh.monkeyblade.net" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S231194AbiLALEZ (ORCPT ); Thu, 1 Dec 2022 06:04:25 -0500 Received: from linux.microsoft.com (linux.microsoft.com [13.77.154.182]) by lindbergh.monkeyblade.net (Postfix) with ESMTP id A2811167EB; Thu, 1 Dec 2022 03:04:13 -0800 (PST) Received: from jinankjain-dranzer.zrrkmle5drku1h0apvxbr2u2ee.ix.internal.cloudapp.net (unknown [20.188.121.5]) by linux.microsoft.com (Postfix) with ESMTPSA id 840BE20B83E5; Thu, 1 Dec 2022 03:04:08 -0800 (PST) DKIM-Filter: OpenDKIM Filter v2.11.0 linux.microsoft.com 840BE20B83E5 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=linux.microsoft.com; s=default; t=1669892653; bh=Qb+/xMIUURF2qw1ybF3Cm9sldgz9mUzBUTrN37WRKvA=; h=From:To:Cc:Subject:Date:In-Reply-To:References:From; b=tOHjliGqx/XyZF7N/1VP5/oZmWVnyvR2npDlg71bDbFug1cemQlbrwI5gIjiisVse 3I6uXrF7USDHpl/7sWBjcYUmBUhXTp6f1sUoF08g5NA6XabsuDItuRksi/uRVLFGgX aiFjkuNSAfB7PaSrg2P3/t5Sb95qxfZ3wTqQhKnc= From: Jinank Jain To: jinankjain@microsoft.com Cc: kys@microsoft.com, haiyangz@microsoft.com, wei.liu@kernel.org, decui@microsoft.com, tglx@linutronix.de, mingo@redhat.com, bp@alien8.de, dave.hansen@linux.intel.com, x86@kernel.org, hpa@zytor.com, arnd@arndb.de, peterz@infradead.org, jpoimboe@kernel.org, jinankjain@linux.microsoft.com, seanjc@google.com, kirill.shutemov@linux.intel.com, ak@linux.intel.com, sathyanarayanan.kuppuswamy@linux.intel.com, linux-hyperv@vger.kernel.org, linux-kernel@vger.kernel.org, linux-arch@vger.kernel.org, anrayabh@linux.microsoft.com, mikelley@microsoft.com Subject: [PATCH v7 5/5] x86/hyperv: Change interrupt vector for nested root partition Date: Thu, 1 Dec 2022 11:03:39 +0000 Message-Id: <4a36d47f50aca7a7de8e89dbfb0cd407de549bed.1669788587.git.jinankjain@linux.microsoft.com> X-Mailer: git-send-email 2.25.1 In-Reply-To: References: MIME-Version: 1.0 Content-Transfer-Encoding: quoted-printable Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Type: text/plain; charset="utf-8" Traditionally we have been using the HYPERVISOR_CALLBACK_VECTOR to relay the VMBus interrupt. But this does not work in case of nested hypervisor. Microsoft Hypervisor reserves 0x31 to 0x34 as the interrupt vector range for VMBus and thus we have to use one of the vectors from that range and setup the IDT accordingly. Signed-off-by: Jinank Jain --- arch/x86/include/asm/idtentry.h | 2 ++ arch/x86/include/asm/irq_vectors.h | 6 ++++++ arch/x86/kernel/cpu/mshyperv.c | 15 +++++++++++++++ arch/x86/kernel/idt.c | 9 +++++++++ drivers/hv/vmbus_drv.c | 3 ++- 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentr= y.h index 72184b0b2219..c0648e3e4d4a 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -686,6 +686,8 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysv= ec_kvm_posted_intr_nested DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback= ); DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenl= ightenment); DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0); +DECLARE_IDTENTRY_SYSVEC(HYPERV_INTR_NESTED_VMBUS_VECTOR, + sysvec_hyperv_nested_vmbus_intr); #endif =20 #if IS_ENABLED(CONFIG_ACRN_GUEST) diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_= vectors.h index 43dcb9284208..729d19eab7f5 100644 --- a/arch/x86/include/asm/irq_vectors.h +++ b/arch/x86/include/asm/irq_vectors.h @@ -102,6 +102,12 @@ #if IS_ENABLED(CONFIG_HYPERV) #define HYPERV_REENLIGHTENMENT_VECTOR 0xee #define HYPERV_STIMER0_VECTOR 0xed +/* + * FIXME: Change this, once Microsoft Hypervisor changes its assumption + * around VMBus interrupt vector allocation for nested root partition. + * Or provides a better interface to detect this instead of hardcoding. + */ +#define HYPERV_INTR_NESTED_VMBUS_VECTOR 0x31 #endif =20 #define LOCAL_TIMER_VECTOR 0xec diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index f2f6e10301a8..9f31c7704715 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -130,6 +130,21 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback) set_irq_regs(old_regs); } =20 +DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_nested_vmbus_intr) +{ + struct pt_regs *old_regs =3D set_irq_regs(regs); + + inc_irq_stat(irq_hv_callback_count); + + if (vmbus_handler) + vmbus_handler(); + + if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED) + ack_APIC_irq(); + + set_irq_regs(old_regs); +} + void hv_setup_vmbus_handler(void (*handler)(void)) { vmbus_handler =3D handler; diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index a58c6bc1cd68..ace648856a0b 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -160,6 +160,15 @@ static const __initconst struct idt_data apic_idts[] = =3D { # endif INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt), INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt), +#ifdef CONFIG_HYPERV + /* + * This is a hack because we cannot install this interrupt handler via al= loc_intr_gate + * as it does not allow interrupt vector less than FIRST_SYSTEM_VECTORS. = And hyperv + * does not want anything other than 0x31-0x34 as the interrupt vector fo= r vmbus + * interrupt in case of nested setup. + */ + INTG(HYPERV_INTR_NESTED_VMBUS_VECTOR, asm_sysvec_hyperv_nested_vmbus_intr= ), +#endif #endif }; =20 diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 6324e01d5eec..740878367426 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -2768,7 +2768,8 @@ static int __init hv_acpi_init(void) * normal Linux IRQ mechanism is not used in this case. */ #ifdef HYPERVISOR_CALLBACK_VECTOR - vmbus_interrupt =3D HYPERVISOR_CALLBACK_VECTOR; + vmbus_interrupt =3D hv_nested ? HYPERV_INTR_NESTED_VMBUS_VECTOR : + HYPERVISOR_CALLBACK_VECTOR; vmbus_irq =3D -1; #endif =20 --=20 2.25.1