arch/x86/include/asm/hyperv-tlfs.h | 17 +++++++- arch/x86/include/asm/idtentry.h | 2 + arch/x86/include/asm/irq_vectors.h | 6 +++ arch/x86/include/asm/mshyperv.h | 68 ++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/mshyperv.c | 22 ++++++++++ arch/x86/kernel/idt.c | 9 ++++ drivers/hv/hv.c | 18 +++++--- drivers/hv/vmbus_drv.c | 5 ++- include/asm-generic/hyperv-tlfs.h | 1 + include/asm-generic/mshyperv.h | 1 + 10 files changed, 137 insertions(+), 12 deletions(-)
This patch series plans to add support for running nested Microsoft Hypervisor. In case of nested Microsoft Hypervisor there are few privileged hypercalls which need to go L0 Hypervisor instead of L1 Hypervisor. This patches series basically identifies such hypercalls and replace them with nested hypercalls. Jinank Jain (5): mshv: Add support for detecting nested hypervisor hv: Setup synic registers in case of nested root partition hv: Add an interface to do nested hypercalls hv: Enable vmbus driver for nested root partition hv, mshv : Change interrupt vector for nested root partition arch/x86/include/asm/hyperv-tlfs.h | 17 +++++++- arch/x86/include/asm/idtentry.h | 2 + arch/x86/include/asm/irq_vectors.h | 6 +++ arch/x86/include/asm/mshyperv.h | 68 ++++++++++++++++++++++++++++-- arch/x86/kernel/cpu/mshyperv.c | 22 ++++++++++ arch/x86/kernel/idt.c | 9 ++++ drivers/hv/hv.c | 18 +++++--- drivers/hv/vmbus_drv.c | 5 ++- include/asm-generic/hyperv-tlfs.h | 1 + include/asm-generic/mshyperv.h | 1 + 10 files changed, 137 insertions(+), 12 deletions(-) -- 2.25.1
This patch series plans to add support for running nested Microsoft Hypervisor. In case of nested Microsoft Hypervisor there are few privileged hypercalls which need to go L0 Hypervisor instead of L1 Hypervisor. This patches series basically identifies such hypercalls and replace them with nested hypercalls. Jinank Jain (5): x86/hyperv: Add support for detecting nested hypervisor Drivers: hv: Setup synic registers in case of nested root partition x86/hyperv: Add an interface to do nested hypercalls Drivers: hv: Enable vmbus driver for nested root partition x86/hyperv: Change interrupt vector for nested root partition [v4] - Fix ARM64 compilation [v5] - Fix comments from Michael Kelly [v6] - Send the correct patches from the right folder Jinank Jain (5): x86/hyperv: Add support for detecting nested hypervisor Drivers: hv: Setup synic registers in case of nested root partition x86/hyperv: Add an interface to do nested hypercalls Drivers: hv: Enable vmbus driver for nested root partition x86/hyperv: Change interrupt vector for nested root partition arch/x86/include/asm/hyperv-tlfs.h | 17 ++++++- arch/x86/include/asm/idtentry.h | 2 + arch/x86/include/asm/irq_vectors.h | 6 +++ arch/x86/include/asm/mshyperv.h | 68 ++++++++++++++++------------ arch/x86/kernel/cpu/mshyperv.c | 71 ++++++++++++++++++++++++++++++ arch/x86/kernel/idt.c | 9 ++++ drivers/hv/hv.c | 18 +++++--- drivers/hv/hv_common.c | 9 ++-- drivers/hv/vmbus_drv.c | 5 ++- include/asm-generic/hyperv-tlfs.h | 1 + include/asm-generic/mshyperv.h | 1 + 11 files changed, 168 insertions(+), 39 deletions(-) -- 2.25.1
Detect if Linux is running as a nested hypervisor in the root
partition for Microsoft Hypervisor, using flags provided by MSHV.
Expose a new variable hv_nested that is used later for decisions
specific to the nested use case.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 3 +++
arch/x86/kernel/cpu/mshyperv.c | 7 +++++++
drivers/hv/hv_common.c | 9 ++++++---
include/asm-generic/mshyperv.h | 1 +
4 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 6d9368ea3701..58c03d18c235 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -114,6 +114,9 @@
/* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
+/* Indicates that the hypervisor is nested within a Hyper-V partition. */
+#define HV_X64_HYPERV_NESTED BIT(12)
+
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 831613959a92..9a4204139490 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -37,6 +37,8 @@
/* Is Linux running as the root partition? */
bool hv_root_partition;
+/* Is Linux running on nested Microsoft Hypervisor */
+bool hv_nested;
struct ms_hyperv_info ms_hyperv;
#if IS_ENABLED(CONFIG_HYPERV)
@@ -301,6 +303,11 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: running as root partition\n");
}
+ if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
+ hv_nested = true;
+ pr_info("Hyper-V: running on a nested hypervisor\n");
+ }
+
/*
* Extract host information.
*/
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index ae68298c0dca..52a6f89ccdbd 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -25,17 +25,20 @@
#include <asm/mshyperv.h>
/*
- * hv_root_partition and ms_hyperv are defined here with other Hyper-V
- * specific globals so they are shared across all architectures and are
+ * hv_root_partition, ms_hyperv and hv_nested are defined here with other
+ * Hyper-V specific globals so they are shared across all architectures and are
* built only when CONFIG_HYPERV is defined. But on x86,
* ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
- * defined, and it uses these two variables. So mark them as __weak
+ * defined, and it uses these three variables. So mark them as __weak
* here, allowing for an overriding definition in the module containing
* ms_hyperv_init_platform().
*/
bool __weak hv_root_partition;
EXPORT_SYMBOL_GPL(hv_root_partition);
+bool __weak hv_nested;
+EXPORT_SYMBOL_GPL(hv_nested);
+
struct ms_hyperv_info __weak ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bfb9eb9d7215..5df6e944e6a9 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -164,6 +164,7 @@ extern int vmbus_interrupt;
extern int vmbus_irq;
extern bool hv_root_partition;
+extern bool hv_nested;
#if IS_ENABLED(CONFIG_HYPERV)
/*
--
2.25.1
Child partitions are free to allocate SynIC message and event page but in
case of root partition it must use the pages allocated by Microsoft
Hypervisor (MSHV). Base address for these pages can be found using
synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
for nested vs non-nested root partition.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 11 +++++++
arch/x86/include/asm/mshyperv.h | 26 ++--------------
arch/x86/kernel/cpu/mshyperv.c | 49 ++++++++++++++++++++++++++++++
drivers/hv/hv.c | 18 ++++++++---
4 files changed, 75 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 58c03d18c235..b5019becb618 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -225,6 +225,17 @@ enum hv_isolation_type {
#define HV_REGISTER_SINT14 0x4000009E
#define HV_REGISTER_SINT15 0x4000009F
+/*
+ * Define synthetic interrupt controller model specific registers for
+ * nested hypervisor.
+ */
+#define HV_REGISTER_NESTED_SCONTROL 0x40001080
+#define HV_REGISTER_NESTED_SVERSION 0x40001081
+#define HV_REGISTER_NESTED_SIEFP 0x40001082
+#define HV_REGISTER_NESTED_SIMP 0x40001083
+#define HV_REGISTER_NESTED_EOM 0x40001084
+#define HV_REGISTER_NESTED_SINT0 0x40001090
+
/*
* Synthetic Timer MSRs. Four timers per vcpu.
*/
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 61f0c206bff0..326d699b30d5 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -198,30 +198,8 @@ static inline bool hv_is_synic_reg(unsigned int reg)
return false;
}
-static inline u64 hv_get_register(unsigned int reg)
-{
- u64 value;
-
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
- hv_ghcb_msr_read(reg, &value);
- else
- rdmsrl(reg, value);
- return value;
-}
-
-static inline void hv_set_register(unsigned int reg, u64 value)
-{
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
- hv_ghcb_msr_write(reg, value);
-
- /* Write proxy bit via wrmsl instruction */
- if (reg >= HV_REGISTER_SINT0 &&
- reg <= HV_REGISTER_SINT15)
- wrmsrl(reg, value | 1 << 20);
- } else {
- wrmsrl(reg, value);
- }
-}
+u64 hv_get_register(unsigned int reg);
+void hv_set_register(unsigned int reg, u64 value);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 9a4204139490..97d8ce744e47 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -41,6 +41,55 @@ bool hv_root_partition;
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
+static inline unsigned int hv_get_nested_reg(unsigned int reg)
+{
+ switch (reg) {
+ case HV_REGISTER_SIMP:
+ return HV_REGISTER_NESTED_SIMP;
+ case HV_REGISTER_NESTED_SIEFP:
+ return HV_REGISTER_SIEFP;
+ case HV_REGISTER_SCONTROL:
+ return HV_REGISTER_NESTED_SCONTROL;
+ case HV_REGISTER_SINT0:
+ return HV_REGISTER_NESTED_SINT0;
+ case HV_REGISTER_EOM:
+ return HV_REGISTER_NESTED_EOM;
+ default:
+ return reg;
+ }
+}
+
+u64 hv_get_register(unsigned int reg)
+{
+ u64 value;
+
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
+ hv_ghcb_msr_read(reg, &value);
+ else
+ rdmsrl(reg, value);
+ return value;
+}
+
+void hv_set_register(unsigned int reg, u64 value)
+{
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
+ hv_ghcb_msr_write(reg, value);
+
+ /* Write proxy bit via wrmsl instruction */
+ if (reg >= HV_REGISTER_SINT0 &&
+ reg <= HV_REGISTER_SINT15)
+ wrmsrl(reg, value | 1 << 20);
+ } else {
+ wrmsrl(reg, value);
+ }
+}
+
#if IS_ENABLED(CONFIG_HYPERV)
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 4d6480d57546..9e1eb50cc76f 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -147,7 +147,7 @@ int hv_synic_alloc(void)
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
- if (!hv_isolation_type_snp()) {
+ if (!hv_isolation_type_snp() && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
@@ -188,8 +188,16 @@ void hv_synic_free(void)
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- free_page((unsigned long)hv_cpu->synic_event_page);
- free_page((unsigned long)hv_cpu->synic_message_page);
+ if (hv_root_partition) {
+ if (hv_cpu->synic_event_page != NULL)
+ memunmap(hv_cpu->synic_event_page);
+
+ if (hv_cpu->synic_message_page != NULL)
+ memunmap(hv_cpu->synic_message_page);
+ } else {
+ free_page((unsigned long)hv_cpu->synic_event_page);
+ free_page((unsigned long)hv_cpu->synic_message_page);
+ }
free_page((unsigned long)hv_cpu->post_msg_page);
}
@@ -216,7 +224,7 @@ void hv_synic_enable_regs(unsigned int cpu)
simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
simp.simp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_message_page
= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -233,7 +241,7 @@ void hv_synic_enable_regs(unsigned int cpu)
siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_event_page =
memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
--
2.25.1
From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Wednesday, November 23, 2022 10:02 PM
>
> Child partitions are free to allocate SynIC message and event page but in
> case of root partition it must use the pages allocated by Microsoft
> Hypervisor (MSHV). Base address for these pages can be found using
> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
> for nested vs non-nested root partition.
>
> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 11 +++++++
> arch/x86/include/asm/mshyperv.h | 26 ++--------------
> arch/x86/kernel/cpu/mshyperv.c | 49 ++++++++++++++++++++++++++++++
> drivers/hv/hv.c | 18 ++++++++---
> 4 files changed, 75 insertions(+), 29 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 58c03d18c235..b5019becb618 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -225,6 +225,17 @@ enum hv_isolation_type {
> #define HV_REGISTER_SINT14 0x4000009E
> #define HV_REGISTER_SINT15 0x4000009F
>
> +/*
> + * Define synthetic interrupt controller model specific registers for
> + * nested hypervisor.
> + */
> +#define HV_REGISTER_NESTED_SCONTROL 0x40001080
> +#define HV_REGISTER_NESTED_SVERSION 0x40001081
> +#define HV_REGISTER_NESTED_SIEFP 0x40001082
> +#define HV_REGISTER_NESTED_SIMP 0x40001083
> +#define HV_REGISTER_NESTED_EOM 0x40001084
> +#define HV_REGISTER_NESTED_SINT0 0x40001090
> +
> /*
> * Synthetic Timer MSRs. Four timers per vcpu.
> */
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 61f0c206bff0..326d699b30d5 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -198,30 +198,8 @@ static inline bool hv_is_synic_reg(unsigned int reg)
> return false;
> }
>
> -static inline u64 hv_get_register(unsigned int reg)
> -{
> - u64 value;
> -
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> - hv_ghcb_msr_read(reg, &value);
> - else
> - rdmsrl(reg, value);
> - return value;
> -}
> -
> -static inline void hv_set_register(unsigned int reg, u64 value)
> -{
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> - hv_ghcb_msr_write(reg, value);
> -
> - /* Write proxy bit via wrmsl instruction */
> - if (reg >= HV_REGISTER_SINT0 &&
> - reg <= HV_REGISTER_SINT15)
> - wrmsrl(reg, value | 1 << 20);
> - } else {
> - wrmsrl(reg, value);
> - }
> -}
> +u64 hv_get_register(unsigned int reg);
> +void hv_set_register(unsigned int reg, u64 value);
>
> #else /* CONFIG_HYPERV */
> static inline void hyperv_init(void) {}
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 9a4204139490..97d8ce744e47 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -41,6 +41,55 @@ bool hv_root_partition;
> bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
> +{
> + switch (reg) {
> + case HV_REGISTER_SIMP:
> + return HV_REGISTER_NESTED_SIMP;
> + case HV_REGISTER_NESTED_SIEFP:
> + return HV_REGISTER_SIEFP;
> + case HV_REGISTER_SCONTROL:
> + return HV_REGISTER_NESTED_SCONTROL;
> + case HV_REGISTER_SINT0:
> + return HV_REGISTER_NESTED_SINT0;
> + case HV_REGISTER_EOM:
> + return HV_REGISTER_NESTED_EOM;
> + default:
> + return reg;
> + }
> +}
> +
> +u64 hv_get_register(unsigned int reg)
> +{
> + u64 value;
> +
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> + hv_ghcb_msr_read(reg, &value);
> + else
> + rdmsrl(reg, value);
> + return value;
> +}
> +
> +void hv_set_register(unsigned int reg, u64 value)
> +{
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> + hv_ghcb_msr_write(reg, value);
> +
> + /* Write proxy bit via wrmsl instruction */
> + if (reg >= HV_REGISTER_SINT0 &&
> + reg <= HV_REGISTER_SINT15)
> + wrmsrl(reg, value | 1 << 20);
> + } else {
> + wrmsrl(reg, value);
> + }
> +}
> +
With hv_get_register(), hv_set_register(), and hv_get_nested_reg()
moved here, they need to be under the #if IS_ENABLED(CONFIG_HYPERV).
If CONFIG_HYPERV=n, this module is still compiled, and you end up
with the implementation of hv_get_register() that's here and the no-op
implementation in arch/x86/include/asm/mshyperv.h. The linker
will rightfully complain.
There's also the issue of hv_ghcb_msr_read() and hv_ghcb_msr_write().
With CONFIG_AMD_MEM_ENCRYPT=y, there needs to be a non-stub
implementation. But nothing in arch/x86/hyperv is built if
CONFIG_HYPERV=n, so you'll get a linker error because of missing
an implementation of those functions.
Putting the code under #if IS_ENABLED(CONFIG_HYPERV) should
solve both problems.
You should specifically test with CONFIG_HYPERV=n after any changes
to arch/x86/kernel/cpu/mshyperv.c.
Michael
> #if IS_ENABLED(CONFIG_HYPERV)
> static void (*vmbus_handler)(void);
> static void (*hv_stimer0_handler)(void);
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 4d6480d57546..9e1eb50cc76f 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -147,7 +147,7 @@ int hv_synic_alloc(void)
> * Synic message and event pages are allocated by paravisor.
> * Skip these pages allocation here.
> */
> - if (!hv_isolation_type_snp()) {
> + if (!hv_isolation_type_snp() && !hv_root_partition) {
> hv_cpu->synic_message_page =
> (void *)get_zeroed_page(GFP_ATOMIC);
> if (hv_cpu->synic_message_page == NULL) {
> @@ -188,8 +188,16 @@ void hv_synic_free(void)
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
>
> - free_page((unsigned long)hv_cpu->synic_event_page);
> - free_page((unsigned long)hv_cpu->synic_message_page);
> + if (hv_root_partition) {
> + if (hv_cpu->synic_event_page != NULL)
> + memunmap(hv_cpu->synic_event_page);
> +
> + if (hv_cpu->synic_message_page != NULL)
> + memunmap(hv_cpu->synic_message_page);
> + } else {
> + free_page((unsigned long)hv_cpu->synic_event_page);
> + free_page((unsigned long)hv_cpu->synic_message_page);
> + }
> free_page((unsigned long)hv_cpu->post_msg_page);
> }
>
> @@ -216,7 +224,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> simp.simp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_message_page
> = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> @@ -233,7 +241,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> siefp.siefp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_event_page =
> memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> --
> 2.25.1
According to TLFS, in order to communicate to L0 hypervisor there needs
to be an additional bit set in the control register. This communication
is required to perform privileged instructions which can only be
performed by L0 hypervisor. An example of that could be setting up the
VMBus infrastructure.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 3 ++-
arch/x86/include/asm/mshyperv.h | 42 +++++++++++++++++++++++++++---
include/asm-generic/hyperv-tlfs.h | 1 +
3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index b5019becb618..7758c495541d 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -380,7 +380,8 @@ struct hv_nested_enlightenments_control {
__u32 reserved:31;
} features;
struct {
- __u32 reserved;
+ __u32 inter_partition_comm:1;
+ __u32 reserved:31;
} hypercallControls;
} __packed;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 326d699b30d5..42e42cea0384 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -74,10 +74,16 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_status;
}
+/* Hypercall to the L0 hypervisor */
+static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output)
+{
+ return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output);
+}
+
/* Fast hypercall with 8 bytes of input and no output */
-static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+static inline u64 _hv_do_fast_hypercall8(u64 control, u16 code, u64 input1)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -105,10 +111,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall8(control, code, input1);
+}
+
+static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall8(control, code, input1);
+}
+
/* Fast hypercall with 16 bytes of input */
-static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+static inline u64 _hv_do_fast_hypercall16(u64 control, u16 code, u64 input1, u64 input2)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -139,6 +159,20 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall16(control, code, input1, input2);
+}
+
+static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall16(control, code, input1, input2);
+}
+
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index b17c6eeb9afa..e61ee461c4fc 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -194,6 +194,7 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_HYPERCALL_VARHEAD_OFFSET 17
#define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17)
#define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27)
+#define HV_HYPERCALL_NESTED BIT_ULL(31)
#define HV_HYPERCALL_REP_COMP_OFFSET 32
#define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32)
#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
--
2.25.1
Currently VMBus driver is not initialized for root partition but we need
to enable the VMBus driver for nested root partition. This is required,
so that L2 root can use the VMBus devices.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
drivers/hv/vmbus_drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index db00d20c726d..0937877eade9 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2744,7 +2744,7 @@ static int __init hv_acpi_init(void)
if (!hv_is_hyperv_initialized())
return -ENODEV;
- if (hv_root_partition)
+ if (hv_root_partition && !hv_nested)
return 0;
/*
--
2.25.1
Traditionally we have been using the HYPERVISOR_CALLBACK_VECTOR to relay
the VMBus interrupt. But this does not work in case of nested
hypervisor. Microsoft Hypervisor reserves 0x31 to 0x34 as the interrupt
vector range for VMBus and thus we have to use one of the vectors from
that range and setup the IDT accordingly.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/idtentry.h | 2 ++
arch/x86/include/asm/irq_vectors.h | 6 ++++++
arch/x86/kernel/cpu/mshyperv.c | 15 +++++++++++++++
arch/x86/kernel/idt.c | 9 +++++++++
drivers/hv/vmbus_drv.c | 3 ++-
5 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 72184b0b2219..c0648e3e4d4a 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -686,6 +686,8 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_INTR_NESTED_VMBUS_VECTOR,
+ sysvec_hyperv_nested_vmbus_intr);
#endif
#if IS_ENABLED(CONFIG_ACRN_GUEST)
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 43dcb9284208..729d19eab7f5 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,6 +102,12 @@
#if IS_ENABLED(CONFIG_HYPERV)
#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
#define HYPERV_STIMER0_VECTOR 0xed
+/*
+ * FIXME: Change this, once Microsoft Hypervisor changes its assumption
+ * around VMBus interrupt vector allocation for nested root partition.
+ * Or provides a better interface to detect this instead of hardcoding.
+ */
+#define HYPERV_INTR_NESTED_VMBUS_VECTOR 0x31
#endif
#define LOCAL_TIMER_VECTOR 0xec
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 97d8ce744e47..8a692dd6d789 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -110,6 +110,21 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
set_irq_regs(old_regs);
}
+DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_nested_vmbus_intr)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (vmbus_handler)
+ vmbus_handler();
+
+ if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
+ ack_APIC_irq();
+
+ set_irq_regs(old_regs);
+}
+
void hv_setup_vmbus_handler(void (*handler)(void))
{
vmbus_handler = handler;
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index a58c6bc1cd68..ace648856a0b 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -160,6 +160,15 @@ static const __initconst struct idt_data apic_idts[] = {
# endif
INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
+#ifdef CONFIG_HYPERV
+ /*
+ * This is a hack because we cannot install this interrupt handler via alloc_intr_gate
+ * as it does not allow interrupt vector less than FIRST_SYSTEM_VECTORS. And hyperv
+ * does not want anything other than 0x31-0x34 as the interrupt vector for vmbus
+ * interrupt in case of nested setup.
+ */
+ INTG(HYPERV_INTR_NESTED_VMBUS_VECTOR, asm_sysvec_hyperv_nested_vmbus_intr),
+#endif
#endif
};
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 0937877eade9..c1477f3a08dd 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2767,7 +2767,8 @@ static int __init hv_acpi_init(void)
* normal Linux IRQ mechanism is not used in this case.
*/
#ifdef HYPERVISOR_CALLBACK_VECTOR
- vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR;
+ vmbus_interrupt = hv_nested ? HYPERV_INTR_NESTED_VMBUS_VECTOR :
+ HYPERVISOR_CALLBACK_VECTOR;
vmbus_irq = -1;
#endif
--
2.25.1
This patch series plans to add support for running nested Microsoft Hypervisor. In case of nested Microsoft Hypervisor there are few privileged hypercalls which need to go L0 Hypervisor instead of L1 Hypervisor. This patches series basically identifies such hypercalls and replace them with nested hypercalls. Jinank Jain (5): x86/hyperv: Add support for detecting nested hypervisor Drivers: hv: Setup synic registers in case of nested root partition x86/hyperv: Add an interface to do nested hypercalls Drivers: hv: Enable vmbus driver for nested root partition x86/hyperv: Change interrupt vector for nested root partition [v4] - Fix ARM64 compilation [v5] - Fix comments from Michael Kelly [v6] - Send the correct patches from the right folder [v7] - Fix linker issues for CONFIG_HYPERV=n pointed out by Michael - Fix comments from Nuno: created two separate functions for fetching nested vs non-nested registers. [v8] - Refactor as per the recommendation from Michael Kelly arch/x86/include/asm/hyperv-tlfs.h | 17 +++++- arch/x86/include/asm/idtentry.h | 2 + arch/x86/include/asm/irq_vectors.h | 6 +++ arch/x86/include/asm/mshyperv.h | 72 ++++++++++++++++---------- arch/x86/kernel/cpu/mshyperv.c | 83 ++++++++++++++++++++++++++++++ arch/x86/kernel/idt.c | 10 ++++ drivers/hv/hv.c | 32 ++++++++---- drivers/hv/hv_common.c | 9 ++-- drivers/hv/vmbus_drv.c | 5 +- include/asm-generic/hyperv-tlfs.h | 1 + include/asm-generic/mshyperv.h | 1 + 11 files changed, 193 insertions(+), 45 deletions(-) -- 2.25.1
Detect if Linux is running as a nested hypervisor in the root
partition for Microsoft Hypervisor, using flags provided by MSHV.
Expose a new variable hv_nested that is used later for decisions
specific to the nested use case.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 3 +++
arch/x86/kernel/cpu/mshyperv.c | 7 +++++++
drivers/hv/hv_common.c | 9 ++++++---
include/asm-generic/mshyperv.h | 1 +
4 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 6d9368ea3701..58c03d18c235 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -114,6 +114,9 @@
/* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
+/* Indicates that the hypervisor is nested within a Hyper-V partition. */
+#define HV_X64_HYPERV_NESTED BIT(12)
+
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 46668e255421..f9b78d4829e3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -37,6 +37,8 @@
/* Is Linux running as the root partition? */
bool hv_root_partition;
+/* Is Linux running on nested Microsoft Hypervisor */
+bool hv_nested;
struct ms_hyperv_info ms_hyperv;
#if IS_ENABLED(CONFIG_HYPERV)
@@ -301,6 +303,11 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: running as root partition\n");
}
+ if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
+ hv_nested = true;
+ pr_info("Hyper-V: running on a nested hypervisor\n");
+ }
+
/*
* Extract host information.
*/
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index ae68298c0dca..52a6f89ccdbd 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -25,17 +25,20 @@
#include <asm/mshyperv.h>
/*
- * hv_root_partition and ms_hyperv are defined here with other Hyper-V
- * specific globals so they are shared across all architectures and are
+ * hv_root_partition, ms_hyperv and hv_nested are defined here with other
+ * Hyper-V specific globals so they are shared across all architectures and are
* built only when CONFIG_HYPERV is defined. But on x86,
* ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
- * defined, and it uses these two variables. So mark them as __weak
+ * defined, and it uses these three variables. So mark them as __weak
* here, allowing for an overriding definition in the module containing
* ms_hyperv_init_platform().
*/
bool __weak hv_root_partition;
EXPORT_SYMBOL_GPL(hv_root_partition);
+bool __weak hv_nested;
+EXPORT_SYMBOL_GPL(hv_nested);
+
struct ms_hyperv_info __weak ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bfb9eb9d7215..f131027830c3 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -48,6 +48,7 @@ struct ms_hyperv_info {
u64 shared_gpa_boundary;
};
extern struct ms_hyperv_info ms_hyperv;
+extern bool hv_nested;
extern void * __percpu *hyperv_pcpu_input_arg;
extern void * __percpu *hyperv_pcpu_output_arg;
--
2.25.1
On 12/8/2022 9:32 PM, Jinank Jain wrote:
> Detect if Linux is running as a nested hypervisor in the root
> partition for Microsoft Hypervisor, using flags provided by MSHV.
> Expose a new variable hv_nested that is used later for decisions
> specific to the nested use case.
>
> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 3 +++
> arch/x86/kernel/cpu/mshyperv.c | 7 +++++++
> drivers/hv/hv_common.c | 9 ++++++---
> include/asm-generic/mshyperv.h | 1 +
> 4 files changed, 17 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 6d9368ea3701..58c03d18c235 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -114,6 +114,9 @@
> /* Recommend using the newer ExProcessorMasks interface */
> #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
>
> +/* Indicates that the hypervisor is nested within a Hyper-V partition. */
> +#define HV_X64_HYPERV_NESTED BIT(12)
> +
> /* Recommend using enlightened VMCS */
> #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
>
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 46668e255421..f9b78d4829e3 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -37,6 +37,8 @@
>
> /* Is Linux running as the root partition? */
> bool hv_root_partition;
> +/* Is Linux running on nested Microsoft Hypervisor */
> +bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> #if IS_ENABLED(CONFIG_HYPERV)
> @@ -301,6 +303,11 @@ static void __init ms_hyperv_init_platform(void)
> pr_info("Hyper-V: running as root partition\n");
> }
>
> + if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
> + hv_nested = true;
> + pr_info("Hyper-V: running on a nested hypervisor\n");
> + }
> +
> /*
> * Extract host information.
> */
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index ae68298c0dca..52a6f89ccdbd 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -25,17 +25,20 @@
> #include <asm/mshyperv.h>
>
> /*
> - * hv_root_partition and ms_hyperv are defined here with other Hyper-V
> - * specific globals so they are shared across all architectures and are
> + * hv_root_partition, ms_hyperv and hv_nested are defined here with other
> + * Hyper-V specific globals so they are shared across all architectures and are
> * built only when CONFIG_HYPERV is defined. But on x86,
> * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
> - * defined, and it uses these two variables. So mark them as __weak
> + * defined, and it uses these three variables. So mark them as __weak
> * here, allowing for an overriding definition in the module containing
> * ms_hyperv_init_platform().
> */
> bool __weak hv_root_partition;
> EXPORT_SYMBOL_GPL(hv_root_partition);
>
> +bool __weak hv_nested;
> +EXPORT_SYMBOL_GPL(hv_nested);
> +
> struct ms_hyperv_info __weak ms_hyperv;
> EXPORT_SYMBOL_GPL(ms_hyperv);
>
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index bfb9eb9d7215..f131027830c3 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -48,6 +48,7 @@ struct ms_hyperv_info {
> u64 shared_gpa_boundary;
> };
> extern struct ms_hyperv_info ms_hyperv;
> +extern bool hv_nested;
>
> extern void * __percpu *hyperv_pcpu_input_arg;
> extern void * __percpu *hyperv_pcpu_output_arg;
>
Reviewed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
Child partitions are free to allocate SynIC message and event page but in
case of root partition it must use the pages allocated by Microsoft
Hypervisor (MSHV). Base address for these pages can be found using
synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
for nested vs non-nested root partition.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 11 ++++++
arch/x86/include/asm/mshyperv.h | 30 +++------------
arch/x86/kernel/cpu/mshyperv.c | 61 ++++++++++++++++++++++++++++++
drivers/hv/hv.c | 32 ++++++++++------
4 files changed, 99 insertions(+), 35 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 58c03d18c235..b5019becb618 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -225,6 +225,17 @@ enum hv_isolation_type {
#define HV_REGISTER_SINT14 0x4000009E
#define HV_REGISTER_SINT15 0x4000009F
+/*
+ * Define synthetic interrupt controller model specific registers for
+ * nested hypervisor.
+ */
+#define HV_REGISTER_NESTED_SCONTROL 0x40001080
+#define HV_REGISTER_NESTED_SVERSION 0x40001081
+#define HV_REGISTER_NESTED_SIEFP 0x40001082
+#define HV_REGISTER_NESTED_SIMP 0x40001083
+#define HV_REGISTER_NESTED_EOM 0x40001084
+#define HV_REGISTER_NESTED_SINT0 0x40001090
+
/*
* Synthetic Timer MSRs. Four timers per vcpu.
*/
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 61f0c206bff0..c38e4c66a3ac 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
return false;
}
-static inline u64 hv_get_register(unsigned int reg)
-{
- u64 value;
-
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
- hv_ghcb_msr_read(reg, &value);
- else
- rdmsrl(reg, value);
- return value;
-}
-
-static inline void hv_set_register(unsigned int reg, u64 value)
-{
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
- hv_ghcb_msr_write(reg, value);
-
- /* Write proxy bit via wrmsl instruction */
- if (reg >= HV_REGISTER_SINT0 &&
- reg <= HV_REGISTER_SINT15)
- wrmsrl(reg, value | 1 << 20);
- } else {
- wrmsrl(reg, value);
- }
-}
+u64 hv_get_register(unsigned int reg);
+void hv_set_register(unsigned int reg, u64 value);
+u64 hv_get_non_nested_register(unsigned int reg);
+void hv_set_non_nested_register(unsigned int reg, u64 value);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
@@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
}
static inline void hv_set_register(unsigned int reg, u64 value) { }
static inline u64 hv_get_register(unsigned int reg) { return 0; }
+static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { }
+static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
bool visible)
{
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f9b78d4829e3..47ffec5de9b8 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -41,7 +41,68 @@ bool hv_root_partition;
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
+static inline unsigned int hv_get_nested_reg(unsigned int reg)
+{
+ switch (reg) {
+ case HV_REGISTER_SIMP:
+ return HV_REGISTER_NESTED_SIMP;
+ case HV_REGISTER_SIEFP:
+ return HV_REGISTER_NESTED_SIEFP;
+ case HV_REGISTER_SVERSION:
+ return HV_REGISTER_NESTED_SVERSION;
+ case HV_REGISTER_SCONTROL:
+ return HV_REGISTER_NESTED_SCONTROL;
+ case HV_REGISTER_SINT0:
+ return HV_REGISTER_NESTED_SINT0;
+ case HV_REGISTER_EOM:
+ return HV_REGISTER_NESTED_EOM;
+ default:
+ return reg;
+ }
+}
+
#if IS_ENABLED(CONFIG_HYPERV)
+u64 hv_get_non_nested_register(unsigned int reg)
+{
+ u64 value;
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
+ hv_ghcb_msr_read(reg, &value);
+ else
+ rdmsrl(reg, value);
+ return value;
+}
+
+void hv_set_non_nested_register(unsigned int reg, u64 value)
+{
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
+ hv_ghcb_msr_write(reg, value);
+
+ /* Write proxy bit via wrmsl instruction */
+ if (reg >= HV_REGISTER_SINT0 &&
+ reg <= HV_REGISTER_SINT15)
+ wrmsrl(reg, value | 1 << 20);
+ } else {
+ wrmsrl(reg, value);
+ }
+}
+
+u64 hv_get_register(unsigned int reg)
+{
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ return hv_get_non_nested_register(reg);
+}
+
+void hv_set_register(unsigned int reg, u64 value)
+{
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ hv_set_non_nested_register(reg, value);
+}
+
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 4d6480d57546..a422cb7b18d3 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -147,7 +147,7 @@ int hv_synic_alloc(void)
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
- if (!hv_isolation_type_snp()) {
+ if (!hv_isolation_type_snp() && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
@@ -188,8 +188,16 @@ void hv_synic_free(void)
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- free_page((unsigned long)hv_cpu->synic_event_page);
- free_page((unsigned long)hv_cpu->synic_message_page);
+ if (hv_root_partition) {
+ if (hv_cpu->synic_event_page != NULL)
+ memunmap(hv_cpu->synic_event_page);
+
+ if (hv_cpu->synic_message_page != NULL)
+ memunmap(hv_cpu->synic_message_page);
+ } else {
+ free_page((unsigned long)hv_cpu->synic_event_page);
+ free_page((unsigned long)hv_cpu->synic_message_page);
+ }
free_page((unsigned long)hv_cpu->post_msg_page);
}
@@ -214,9 +222,10 @@ void hv_synic_enable_regs(unsigned int cpu)
/* Setup the Synic's message page */
simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
+
simp.simp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_message_page
= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -233,7 +242,7 @@ void hv_synic_enable_regs(unsigned int cpu)
siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_event_page =
memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -250,8 +259,8 @@ void hv_synic_enable_regs(unsigned int cpu)
/* Setup the shared SINT. */
if (vmbus_irq != -1)
enable_percpu_irq(vmbus_irq, 0);
- shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
- VMBUS_MESSAGE_SINT);
+ shared_sint.as_uint64 =
+ hv_get_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT);
shared_sint.vector = vmbus_interrupt;
shared_sint.masked = false;
@@ -267,7 +276,7 @@ void hv_synic_enable_regs(unsigned int cpu)
shared_sint.auto_eoi = 0;
#endif
hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
- shared_sint.as_uint64);
+ shared_sint.as_uint64);
/* Enable the global synic bit */
sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
@@ -297,15 +306,15 @@ void hv_synic_disable_regs(unsigned int cpu)
union hv_synic_siefp siefp;
union hv_synic_scontrol sctrl;
- shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
- VMBUS_MESSAGE_SINT);
+ shared_sint.as_uint64 =
+ hv_get_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT);
shared_sint.masked = 1;
/* Need to correctly cleanup in the case of SMP!!! */
/* Disable the interrupt */
hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
- shared_sint.as_uint64);
+ shared_sint.as_uint64);
simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
/*
@@ -335,6 +344,7 @@ void hv_synic_disable_regs(unsigned int cpu)
/* Disable the global synic bit */
sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
sctrl.enable = 0;
+
hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
if (vmbus_irq != -1)
--
2.25.1
From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Thursday, December 8, 2022 9:32 PM
>
> Child partitions are free to allocate SynIC message and event page but in
> case of root partition it must use the pages allocated by Microsoft
> Hypervisor (MSHV). Base address for these pages can be found using
> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
> for nested vs non-nested root partition.
>
> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 11 ++++++
> arch/x86/include/asm/mshyperv.h | 30 +++------------
> arch/x86/kernel/cpu/mshyperv.c | 61 ++++++++++++++++++++++++++++++
> drivers/hv/hv.c | 32 ++++++++++------
> 4 files changed, 99 insertions(+), 35 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 58c03d18c235..b5019becb618 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -225,6 +225,17 @@ enum hv_isolation_type {
> #define HV_REGISTER_SINT14 0x4000009E
> #define HV_REGISTER_SINT15 0x4000009F
>
> +/*
> + * Define synthetic interrupt controller model specific registers for
> + * nested hypervisor.
> + */
> +#define HV_REGISTER_NESTED_SCONTROL 0x40001080
> +#define HV_REGISTER_NESTED_SVERSION 0x40001081
> +#define HV_REGISTER_NESTED_SIEFP 0x40001082
> +#define HV_REGISTER_NESTED_SIMP 0x40001083
> +#define HV_REGISTER_NESTED_EOM 0x40001084
> +#define HV_REGISTER_NESTED_SINT0 0x40001090
> +
> /*
> * Synthetic Timer MSRs. Four timers per vcpu.
> */
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 61f0c206bff0..c38e4c66a3ac 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
> return false;
> }
>
> -static inline u64 hv_get_register(unsigned int reg)
> -{
> - u64 value;
> -
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> - hv_ghcb_msr_read(reg, &value);
> - else
> - rdmsrl(reg, value);
> - return value;
> -}
> -
> -static inline void hv_set_register(unsigned int reg, u64 value)
> -{
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> - hv_ghcb_msr_write(reg, value);
> -
> - /* Write proxy bit via wrmsl instruction */
> - if (reg >= HV_REGISTER_SINT0 &&
> - reg <= HV_REGISTER_SINT15)
> - wrmsrl(reg, value | 1 << 20);
> - } else {
> - wrmsrl(reg, value);
> - }
> -}
> +u64 hv_get_register(unsigned int reg);
> +void hv_set_register(unsigned int reg, u64 value);
> +u64 hv_get_non_nested_register(unsigned int reg);
> +void hv_set_non_nested_register(unsigned int reg, u64 value);
>
> #else /* CONFIG_HYPERV */
> static inline void hyperv_init(void) {}
> @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
> }
> static inline void hv_set_register(unsigned int reg, u64 value) { }
> static inline u64 hv_get_register(unsigned int reg) { return 0; }
> +static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { }
> +static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
> static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
> bool visible)
> {
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index f9b78d4829e3..47ffec5de9b8 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -41,7 +41,68 @@ bool hv_root_partition;
> bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
> +{
> + switch (reg) {
> + case HV_REGISTER_SIMP:
> + return HV_REGISTER_NESTED_SIMP;
> + case HV_REGISTER_SIEFP:
> + return HV_REGISTER_NESTED_SIEFP;
> + case HV_REGISTER_SVERSION:
> + return HV_REGISTER_NESTED_SVERSION;
> + case HV_REGISTER_SCONTROL:
> + return HV_REGISTER_NESTED_SCONTROL;
> + case HV_REGISTER_SINT0:
> + return HV_REGISTER_NESTED_SINT0;
> + case HV_REGISTER_EOM:
> + return HV_REGISTER_NESTED_EOM;
> + default:
> + return reg;
> + }
> +}
> +
> #if IS_ENABLED(CONFIG_HYPERV)
> +u64 hv_get_non_nested_register(unsigned int reg)
> +{
> + u64 value;
> +
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> + hv_ghcb_msr_read(reg, &value);
> + else
> + rdmsrl(reg, value);
> + return value;
> +}
> +
> +void hv_set_non_nested_register(unsigned int reg, u64 value)
> +{
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> + hv_ghcb_msr_write(reg, value);
> +
> + /* Write proxy bit via wrmsl instruction */
> + if (reg >= HV_REGISTER_SINT0 &&
> + reg <= HV_REGISTER_SINT15)
> + wrmsrl(reg, value | 1 << 20);
> + } else {
> + wrmsrl(reg, value);
> + }
> +}
> +
> +u64 hv_get_register(unsigned int reg)
> +{
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + return hv_get_non_nested_register(reg);
> +}
> +
> +void hv_set_register(unsigned int reg, u64 value)
> +{
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + hv_set_non_nested_register(reg, value);
> +}
> +
This refactoring looks good. But there's still one tweak needed.
These four functions must be marked as exported because they
are used in code in drivers/hv that is part of the Hyper-V module.
If CONFIG_HYPERV=m, you'll get a link error if these functions
aren't exported. By "exported", I mean adding
EXPORT_SYMBOL_GPL(<func_name>);
after each of the above four functions. A good test is to build
with CONFIG_HYPERV=m instead of CONFIG_HYPERV=y.
> static void (*vmbus_handler)(void);
> static void (*hv_stimer0_handler)(void);
> static void (*hv_kexec_handler)(void);
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 4d6480d57546..a422cb7b18d3 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -147,7 +147,7 @@ int hv_synic_alloc(void)
> * Synic message and event pages are allocated by paravisor.
> * Skip these pages allocation here.
> */
> - if (!hv_isolation_type_snp()) {
> + if (!hv_isolation_type_snp() && !hv_root_partition) {
> hv_cpu->synic_message_page =
> (void *)get_zeroed_page(GFP_ATOMIC);
> if (hv_cpu->synic_message_page == NULL) {
> @@ -188,8 +188,16 @@ void hv_synic_free(void)
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
>
> - free_page((unsigned long)hv_cpu->synic_event_page);
> - free_page((unsigned long)hv_cpu->synic_message_page);
> + if (hv_root_partition) {
> + if (hv_cpu->synic_event_page != NULL)
> + memunmap(hv_cpu->synic_event_page);
> +
> + if (hv_cpu->synic_message_page != NULL)
> + memunmap(hv_cpu->synic_message_page);
> + } else {
> + free_page((unsigned long)hv_cpu->synic_event_page);
> + free_page((unsigned long)hv_cpu->synic_message_page);
> + }
> free_page((unsigned long)hv_cpu->post_msg_page);
> }
>
> @@ -214,9 +222,10 @@ void hv_synic_enable_regs(unsigned int cpu)
>
> /* Setup the Synic's message page */
> simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> +
This additional blank line is a spurious/gratuitous whitespace change, which
should be avoided. Especially when a patch has gone through several revisions,
it's likely that you'll end up with some whitespace changes like this. But it's
important to go back and remove them so that the patch isn't cluttered with
changes that don't add any value. Such gratuitous changes make it harder
to review the patch, and are unnecessary code churn.
> simp.simp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_message_page
> = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> @@ -233,7 +242,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> siefp.siefp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_event_page =
> memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> @@ -250,8 +259,8 @@ void hv_synic_enable_regs(unsigned int cpu)
> /* Setup the shared SINT. */
> if (vmbus_irq != -1)
> enable_percpu_irq(vmbus_irq, 0);
> - shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
> - VMBUS_MESSAGE_SINT);
> + shared_sint.as_uint64 =
> + hv_get_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT);
The above change, and all the changes from here down in this patch, are
spurious/gratuitous whitespace changes that should be removed. Go back
to the original code and formatting. Doing so will make the patch a lot
shorter. :-)
Michael
>
> shared_sint.vector = vmbus_interrupt;
> shared_sint.masked = false;
> @@ -267,7 +276,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> shared_sint.auto_eoi = 0;
> #endif
> hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
> - shared_sint.as_uint64);
> + shared_sint.as_uint64);
>
> /* Enable the global synic bit */
> sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
> @@ -297,15 +306,15 @@ void hv_synic_disable_regs(unsigned int cpu)
> union hv_synic_siefp siefp;
> union hv_synic_scontrol sctrl;
>
> - shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
> - VMBUS_MESSAGE_SINT);
> + shared_sint.as_uint64 =
> + hv_get_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT);
>
> shared_sint.masked = 1;
>
> /* Need to correctly cleanup in the case of SMP!!! */
> /* Disable the interrupt */
> hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
> - shared_sint.as_uint64);
> + shared_sint.as_uint64);
>
> simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> /*
> @@ -335,6 +344,7 @@ void hv_synic_disable_regs(unsigned int cpu)
> /* Disable the global synic bit */
> sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
> sctrl.enable = 0;
> +
> hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>
> if (vmbus_irq != -1)
> --
> 2.25.1
According to TLFS, in order to communicate to L0 hypervisor there needs
to be an additional bit set in the control register. This communication
is required to perform privileged instructions which can only be
performed by L0 hypervisor. An example of that could be setting up the
VMBus infrastructure.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 3 ++-
arch/x86/include/asm/mshyperv.h | 42 +++++++++++++++++++++++++++---
include/asm-generic/hyperv-tlfs.h | 1 +
3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index b5019becb618..7758c495541d 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -380,7 +380,8 @@ struct hv_nested_enlightenments_control {
__u32 reserved:31;
} features;
struct {
- __u32 reserved;
+ __u32 inter_partition_comm:1;
+ __u32 reserved:31;
} hypercallControls;
} __packed;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index c38e4c66a3ac..0272733166df 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -74,10 +74,16 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_status;
}
+/* Hypercall to the L0 hypervisor */
+static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output)
+{
+ return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output);
+}
+
/* Fast hypercall with 8 bytes of input and no output */
-static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+static inline u64 _hv_do_fast_hypercall8(u64 control, u16 code, u64 input1)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -105,10 +111,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall8(control, code, input1);
+}
+
+static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall8(control, code, input1);
+}
+
/* Fast hypercall with 16 bytes of input */
-static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+static inline u64 _hv_do_fast_hypercall16(u64 control, u16 code, u64 input1, u64 input2)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -139,6 +159,20 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall16(control, code, input1, input2);
+}
+
+static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall16(control, code, input1, input2);
+}
+
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index b17c6eeb9afa..e61ee461c4fc 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -194,6 +194,7 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_HYPERCALL_VARHEAD_OFFSET 17
#define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17)
#define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27)
+#define HV_HYPERCALL_NESTED BIT_ULL(31)
#define HV_HYPERCALL_REP_COMP_OFFSET 32
#define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32)
#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
--
2.25.1
On 12/8/2022 9:32 PM, Jinank Jain wrote:
> /* Fast hypercall with 8 bytes of input and no output */
> -static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> +static inline u64 _hv_do_fast_hypercall8(u64 control, u16 code, u64 input1)
> {
> - u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> + u64 hv_status;
The parameter 'code' seems to be unused in this function now.
Can we just replace it with 'control'?
>
> #ifdef CONFIG_X86_64
> {
> @@ -105,10 +111,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> return hv_status;
> }
>
> +static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
> +
> + return _hv_do_fast_hypercall8(control, code, input1);
> +}
> +
> +static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
> +
> + return _hv_do_fast_hypercall8(control, code, input1);
> +}
> +
> /* Fast hypercall with 16 bytes of input */
> -static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> +static inline u64 _hv_do_fast_hypercall16(u64 control, u16 code, u64 input1, u64 input2)
> {
> - u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> + u64 hv_status;
Ditto
>
> #ifdef CONFIG_X86_64
> {
> @@ -139,6 +159,20 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> return hv_status;
> }
>
> +static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
> +
> + return _hv_do_fast_hypercall16(control, code, input1, input2);
> +}
> +
> +static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
> +
> + return _hv_do_fast_hypercall16(control, code, input1, input2);
> +}
Currently VMBus driver is not initialized for root partition but we need
to enable the VMBus driver for nested root partition. This is required,
so that L2 root can use the VMBus devices.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
drivers/hv/vmbus_drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 0f00d57b7c25..6324e01d5eec 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2745,7 +2745,7 @@ static int __init hv_acpi_init(void)
if (!hv_is_hyperv_initialized())
return -ENODEV;
- if (hv_root_partition)
+ if (hv_root_partition && !hv_nested)
return 0;
/*
--
2.25.1
On 12/8/2022 9:32 PM, Jinank Jain wrote: > Currently VMBus driver is not initialized for root partition but we need > to enable the VMBus driver for nested root partition. This is required, > so that L2 root can use the VMBus devices. > > Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com> > --- > drivers/hv/vmbus_drv.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c > index 0f00d57b7c25..6324e01d5eec 100644 > --- a/drivers/hv/vmbus_drv.c > +++ b/drivers/hv/vmbus_drv.c > @@ -2745,7 +2745,7 @@ static int __init hv_acpi_init(void) > if (!hv_is_hyperv_initialized()) > return -ENODEV; > > - if (hv_root_partition) > + if (hv_root_partition && !hv_nested) > return 0; > > /* > Reviewed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
Traditionally we have been using the HYPERVISOR_CALLBACK_VECTOR to relay
the VMBus interrupt. But this does not work in case of nested
hypervisor. Microsoft Hypervisor reserves 0x31 to 0x34 as the interrupt
vector range for VMBus and thus we have to use one of the vectors from
that range and setup the IDT accordingly.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/idtentry.h | 2 ++
arch/x86/include/asm/irq_vectors.h | 6 ++++++
arch/x86/kernel/cpu/mshyperv.c | 15 +++++++++++++++
arch/x86/kernel/idt.c | 10 ++++++++++
drivers/hv/vmbus_drv.c | 3 ++-
5 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 72184b0b2219..c0648e3e4d4a 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -686,6 +686,8 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_INTR_NESTED_VMBUS_VECTOR,
+ sysvec_hyperv_nested_vmbus_intr);
#endif
#if IS_ENABLED(CONFIG_ACRN_GUEST)
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 43dcb9284208..729d19eab7f5 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,6 +102,12 @@
#if IS_ENABLED(CONFIG_HYPERV)
#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
#define HYPERV_STIMER0_VECTOR 0xed
+/*
+ * FIXME: Change this, once Microsoft Hypervisor changes its assumption
+ * around VMBus interrupt vector allocation for nested root partition.
+ * Or provides a better interface to detect this instead of hardcoding.
+ */
+#define HYPERV_INTR_NESTED_VMBUS_VECTOR 0x31
#endif
#define LOCAL_TIMER_VECTOR 0xec
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 47ffec5de9b8..dc2be733cadc 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -122,6 +122,21 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
set_irq_regs(old_regs);
}
+DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_nested_vmbus_intr)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (vmbus_handler)
+ vmbus_handler();
+
+ if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
+ ack_APIC_irq();
+
+ set_irq_regs(old_regs);
+}
+
void hv_setup_vmbus_handler(void (*handler)(void))
{
vmbus_handler = handler;
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index a58c6bc1cd68..3536935cea39 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -160,6 +160,16 @@ static const __initconst struct idt_data apic_idts[] = {
# endif
INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
+#ifdef CONFIG_HYPERV
+ /*
+ * This is a hack because we cannot install this interrupt handler
+ * via alloc_intr_gate as it does not allow interrupt vector less
+ * than FIRST_SYSTEM_VECTORS. And hyperv does not want anything other
+ * than 0x31-0x34 as the interrupt vector for vmbus interrupt in case
+ * of nested setup.
+ */
+ INTG(HYPERV_INTR_NESTED_VMBUS_VECTOR, asm_sysvec_hyperv_nested_vmbus_intr),
+#endif
#endif
};
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 6324e01d5eec..740878367426 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2768,7 +2768,8 @@ static int __init hv_acpi_init(void)
* normal Linux IRQ mechanism is not used in this case.
*/
#ifdef HYPERVISOR_CALLBACK_VECTOR
- vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR;
+ vmbus_interrupt = hv_nested ? HYPERV_INTR_NESTED_VMBUS_VECTOR :
+ HYPERVISOR_CALLBACK_VECTOR;
vmbus_irq = -1;
#endif
--
2.25.1
This patch series plans to add support for running nested Microsoft Hypervisor. In case of nested Microsoft Hypervisor there are few privileged hypercalls which need to go L0 Hypervisor instead of L1 Hypervisor. This patches series basically identifies such hypercalls and replace them with nested hypercalls. Jinank Jain (5): x86/hyperv: Add support for detecting nested hypervisor Drivers: hv: Setup synic registers in case of nested root partition x86/hyperv: Add an interface to do nested hypercalls Drivers: hv: Enable vmbus driver for nested root partition x86/hyperv: Change interrupt vector for nested root partition [v4] - Fix ARM64 compilation [v5] - Fix comments from Michael Kelly [v6] - Send the correct patches from the right folder [v7] - Fix linker issues for CONFIG_HYPERV=n pointed out by Michael - Fix comments from Nuno: created two separate functions for fetching nested vs non-nested registers. arch/x86/include/asm/hyperv-tlfs.h | 17 ++++- arch/x86/include/asm/idtentry.h | 2 + arch/x86/include/asm/irq_vectors.h | 6 ++ arch/x86/include/asm/mshyperv.h | 72 +++++++++++++--------- arch/x86/kernel/cpu/mshyperv.c | 91 +++++++++++++++++++++++++++ arch/x86/kernel/idt.c | 9 +++ drivers/hv/hv.c | 99 ++++++++++++++++++++++-------- drivers/hv/hv_common.c | 9 ++- drivers/hv/vmbus_drv.c | 5 +- include/asm-generic/hyperv-tlfs.h | 1 + include/asm-generic/mshyperv.h | 6 +- 11 files changed, 258 insertions(+), 59 deletions(-) -- 2.25.1
Detect if Linux is running as a nested hypervisor in the root
partition for Microsoft Hypervisor, using flags provided by MSHV.
Expose a new variable hv_nested that is used later for decisions
specific to the nested use case.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 3 +++
arch/x86/kernel/cpu/mshyperv.c | 7 +++++++
drivers/hv/hv_common.c | 9 ++++++---
include/asm-generic/mshyperv.h | 1 +
4 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 6d9368ea3701..58c03d18c235 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -114,6 +114,9 @@
/* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
+/* Indicates that the hypervisor is nested within a Hyper-V partition. */
+#define HV_X64_HYPERV_NESTED BIT(12)
+
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 46668e255421..f9b78d4829e3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -37,6 +37,8 @@
/* Is Linux running as the root partition? */
bool hv_root_partition;
+/* Is Linux running on nested Microsoft Hypervisor */
+bool hv_nested;
struct ms_hyperv_info ms_hyperv;
#if IS_ENABLED(CONFIG_HYPERV)
@@ -301,6 +303,11 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: running as root partition\n");
}
+ if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
+ hv_nested = true;
+ pr_info("Hyper-V: running on a nested hypervisor\n");
+ }
+
/*
* Extract host information.
*/
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index ae68298c0dca..52a6f89ccdbd 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -25,17 +25,20 @@
#include <asm/mshyperv.h>
/*
- * hv_root_partition and ms_hyperv are defined here with other Hyper-V
- * specific globals so they are shared across all architectures and are
+ * hv_root_partition, ms_hyperv and hv_nested are defined here with other
+ * Hyper-V specific globals so they are shared across all architectures and are
* built only when CONFIG_HYPERV is defined. But on x86,
* ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
- * defined, and it uses these two variables. So mark them as __weak
+ * defined, and it uses these three variables. So mark them as __weak
* here, allowing for an overriding definition in the module containing
* ms_hyperv_init_platform().
*/
bool __weak hv_root_partition;
EXPORT_SYMBOL_GPL(hv_root_partition);
+bool __weak hv_nested;
+EXPORT_SYMBOL_GPL(hv_nested);
+
struct ms_hyperv_info __weak ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bfb9eb9d7215..f131027830c3 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -48,6 +48,7 @@ struct ms_hyperv_info {
u64 shared_gpa_boundary;
};
extern struct ms_hyperv_info ms_hyperv;
+extern bool hv_nested;
extern void * __percpu *hyperv_pcpu_input_arg;
extern void * __percpu *hyperv_pcpu_output_arg;
--
2.25.1
Child partitions are free to allocate SynIC message and event page but in
case of root partition it must use the pages allocated by Microsoft
Hypervisor (MSHV). Base address for these pages can be found using
synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
for nested vs non-nested root partition.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 11 ++++
arch/x86/include/asm/mshyperv.h | 30 ++-------
arch/x86/kernel/cpu/mshyperv.c | 69 +++++++++++++++++++++
drivers/hv/hv.c | 99 ++++++++++++++++++++++--------
include/asm-generic/mshyperv.h | 5 +-
5 files changed, 165 insertions(+), 49 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 58c03d18c235..b5019becb618 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -225,6 +225,17 @@ enum hv_isolation_type {
#define HV_REGISTER_SINT14 0x4000009E
#define HV_REGISTER_SINT15 0x4000009F
+/*
+ * Define synthetic interrupt controller model specific registers for
+ * nested hypervisor.
+ */
+#define HV_REGISTER_NESTED_SCONTROL 0x40001080
+#define HV_REGISTER_NESTED_SVERSION 0x40001081
+#define HV_REGISTER_NESTED_SIEFP 0x40001082
+#define HV_REGISTER_NESTED_SIMP 0x40001083
+#define HV_REGISTER_NESTED_EOM 0x40001084
+#define HV_REGISTER_NESTED_SINT0 0x40001090
+
/*
* Synthetic Timer MSRs. Four timers per vcpu.
*/
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 61f0c206bff0..3197d49c888c 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
return false;
}
-static inline u64 hv_get_register(unsigned int reg)
-{
- u64 value;
-
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
- hv_ghcb_msr_read(reg, &value);
- else
- rdmsrl(reg, value);
- return value;
-}
-
-static inline void hv_set_register(unsigned int reg, u64 value)
-{
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
- hv_ghcb_msr_write(reg, value);
-
- /* Write proxy bit via wrmsl instruction */
- if (reg >= HV_REGISTER_SINT0 &&
- reg <= HV_REGISTER_SINT15)
- wrmsrl(reg, value | 1 << 20);
- } else {
- wrmsrl(reg, value);
- }
-}
+u64 hv_get_register(unsigned int reg);
+void hv_set_register(unsigned int reg, u64 value);
+u64 hv_get_nested_register(unsigned int reg);
+void hv_set_nested_register(unsigned int reg, u64 value);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
@@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
}
static inline void hv_set_register(unsigned int reg, u64 value) { }
static inline u64 hv_get_register(unsigned int reg) { return 0; }
+static inline void hv_set_nested_register(unsigned int reg, u64 value) { }
+static inline u64 hv_get_nested_register(unsigned int reg) { return 0; }
static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
bool visible)
{
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f9b78d4829e3..f2f6e10301a8 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -41,7 +41,76 @@ bool hv_root_partition;
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
+static inline unsigned int hv_get_nested_reg(unsigned int reg)
+{
+ switch (reg) {
+ case HV_REGISTER_SIMP:
+ return HV_REGISTER_NESTED_SIMP;
+ case HV_REGISTER_NESTED_SIEFP:
+ return HV_REGISTER_SIEFP;
+ case HV_REGISTER_SCONTROL:
+ return HV_REGISTER_NESTED_SCONTROL;
+ case HV_REGISTER_SINT0:
+ return HV_REGISTER_NESTED_SINT0;
+ case HV_REGISTER_EOM:
+ return HV_REGISTER_NESTED_EOM;
+ default:
+ return reg;
+ }
+}
+
#if IS_ENABLED(CONFIG_HYPERV)
+static u64 _hv_get_register(unsigned int reg, bool nested)
+{
+ u64 value;
+
+ if (nested)
+ reg = hv_get_nested_reg(reg);
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
+ hv_ghcb_msr_read(reg, &value);
+ else
+ rdmsrl(reg, value);
+ return value;
+}
+
+static void _hv_set_register(unsigned int reg, u64 value, bool nested)
+{
+ if (nested)
+ reg = hv_get_nested_reg(reg);
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
+ hv_ghcb_msr_write(reg, value);
+
+ /* Write proxy bit via wrmsl instruction */
+ if (reg >= HV_REGISTER_SINT0 &&
+ reg <= HV_REGISTER_SINT15)
+ wrmsrl(reg, value | 1 << 20);
+ } else {
+ wrmsrl(reg, value);
+ }
+}
+
+u64 hv_get_register(unsigned int reg)
+{
+ return _hv_get_register(reg, false);
+}
+
+void hv_set_register(unsigned int reg, u64 value)
+{
+ _hv_set_register(reg, value, false);
+}
+
+u64 hv_get_nested_register(unsigned int reg)
+{
+ return _hv_get_register(reg, true);
+}
+
+void hv_set_nested_register(unsigned int reg, u64 value)
+{
+ _hv_set_register(reg, value, true);
+}
+
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 4d6480d57546..0ed052f2423e 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -147,7 +147,7 @@ int hv_synic_alloc(void)
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
- if (!hv_isolation_type_snp()) {
+ if (!hv_isolation_type_snp() && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
@@ -188,8 +188,16 @@ void hv_synic_free(void)
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- free_page((unsigned long)hv_cpu->synic_event_page);
- free_page((unsigned long)hv_cpu->synic_message_page);
+ if (hv_root_partition) {
+ if (hv_cpu->synic_event_page != NULL)
+ memunmap(hv_cpu->synic_event_page);
+
+ if (hv_cpu->synic_message_page != NULL)
+ memunmap(hv_cpu->synic_message_page);
+ } else {
+ free_page((unsigned long)hv_cpu->synic_event_page);
+ free_page((unsigned long)hv_cpu->synic_message_page);
+ }
free_page((unsigned long)hv_cpu->post_msg_page);
}
@@ -213,10 +221,12 @@ void hv_synic_enable_regs(unsigned int cpu)
union hv_synic_scontrol sctrl;
/* Setup the Synic's message page */
- simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
+ simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
+ hv_get_register(HV_REGISTER_SIMP);
+
simp.simp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_message_page
= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -227,13 +237,18 @@ void hv_synic_enable_regs(unsigned int cpu)
>> HV_HYP_PAGE_SHIFT;
}
- hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
/* Setup the Synic's event page */
- siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
+ siefp.as_uint64 = hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SIEFP) :
+ hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_event_page =
memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -245,13 +260,19 @@ void hv_synic_enable_regs(unsigned int cpu)
>> HV_HYP_PAGE_SHIFT;
}
- hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
/* Setup the shared SINT. */
if (vmbus_irq != -1)
enable_percpu_irq(vmbus_irq, 0);
- shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
- VMBUS_MESSAGE_SINT);
+ shared_sint.as_uint64 =
+ hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SINT0 +
+ VMBUS_MESSAGE_SINT) :
+ hv_get_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT);
shared_sint.vector = vmbus_interrupt;
shared_sint.masked = false;
@@ -266,14 +287,22 @@ void hv_synic_enable_regs(unsigned int cpu)
#else
shared_sint.auto_eoi = 0;
#endif
- hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
+ shared_sint.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
shared_sint.as_uint64);
-
/* Enable the global synic bit */
- sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
+ sctrl.as_uint64 = hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SCONTROL) :
+ hv_get_register(HV_REGISTER_SCONTROL);
sctrl.enable = 1;
- hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
}
int hv_synic_init(unsigned int cpu)
@@ -297,17 +326,25 @@ void hv_synic_disable_regs(unsigned int cpu)
union hv_synic_siefp siefp;
union hv_synic_scontrol sctrl;
- shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
- VMBUS_MESSAGE_SINT);
+ shared_sint.as_uint64 =
+ hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SINT0 +
+ VMBUS_MESSAGE_SINT) :
+ hv_get_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT);
shared_sint.masked = 1;
/* Need to correctly cleanup in the case of SMP!!! */
/* Disable the interrupt */
- hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
+ shared_sint.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
shared_sint.as_uint64);
- simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
+ simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
+ hv_get_register(HV_REGISTER_SIMP);
/*
* In Isolation VM, sim and sief pages are allocated by
* paravisor. These pages also will be used by kdump
@@ -320,9 +357,14 @@ void hv_synic_disable_regs(unsigned int cpu)
else
simp.base_simp_gpa = 0;
- hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
- siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
+ siefp.as_uint64 = hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SIEFP) :
+ hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 0;
if (hv_isolation_type_snp())
@@ -330,12 +372,21 @@ void hv_synic_disable_regs(unsigned int cpu)
else
siefp.base_siefp_gpa = 0;
- hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
/* Disable the global synic bit */
- sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
+ sctrl.as_uint64 = hv_nested ?
+ hv_get_nested_register(HV_REGISTER_SCONTROL) :
+ hv_get_register(HV_REGISTER_SCONTROL);
sctrl.enable = 0;
- hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
+ else
+ hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
if (vmbus_irq != -1)
disable_percpu_irq(vmbus_irq);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index f131027830c3..db0b5be1e087 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -147,7 +147,10 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type)
* possibly deliver another msg from the
* hypervisor
*/
- hv_set_register(HV_REGISTER_EOM, 0);
+ if (hv_nested)
+ hv_set_nested_register(HV_REGISTER_EOM, 0);
+ else
+ hv_set_register(HV_REGISTER_EOM, 0);
}
}
--
2.25.1
From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Thursday, December 1, 2022 3:04 AM
>
> Child partitions are free to allocate SynIC message and event page but in
> case of root partition it must use the pages allocated by Microsoft
> Hypervisor (MSHV). Base address for these pages can be found using
> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
> for nested vs non-nested root partition.
>
> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 11 ++++
> arch/x86/include/asm/mshyperv.h | 30 ++-------
> arch/x86/kernel/cpu/mshyperv.c | 69 +++++++++++++++++++++
> drivers/hv/hv.c | 99 ++++++++++++++++++++++--------
> include/asm-generic/mshyperv.h | 5 +-
> 5 files changed, 165 insertions(+), 49 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 58c03d18c235..b5019becb618 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -225,6 +225,17 @@ enum hv_isolation_type {
> #define HV_REGISTER_SINT14 0x4000009E
> #define HV_REGISTER_SINT15 0x4000009F
>
> +/*
> + * Define synthetic interrupt controller model specific registers for
> + * nested hypervisor.
> + */
> +#define HV_REGISTER_NESTED_SCONTROL 0x40001080
> +#define HV_REGISTER_NESTED_SVERSION 0x40001081
> +#define HV_REGISTER_NESTED_SIEFP 0x40001082
> +#define HV_REGISTER_NESTED_SIMP 0x40001083
> +#define HV_REGISTER_NESTED_EOM 0x40001084
> +#define HV_REGISTER_NESTED_SINT0 0x40001090
> +
> /*
> * Synthetic Timer MSRs. Four timers per vcpu.
> */
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 61f0c206bff0..3197d49c888c 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
> return false;
> }
>
> -static inline u64 hv_get_register(unsigned int reg)
> -{
> - u64 value;
> -
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> - hv_ghcb_msr_read(reg, &value);
> - else
> - rdmsrl(reg, value);
> - return value;
> -}
> -
> -static inline void hv_set_register(unsigned int reg, u64 value)
> -{
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> - hv_ghcb_msr_write(reg, value);
> -
> - /* Write proxy bit via wrmsl instruction */
> - if (reg >= HV_REGISTER_SINT0 &&
> - reg <= HV_REGISTER_SINT15)
> - wrmsrl(reg, value | 1 << 20);
> - } else {
> - wrmsrl(reg, value);
> - }
> -}
> +u64 hv_get_register(unsigned int reg);
> +void hv_set_register(unsigned int reg, u64 value);
> +u64 hv_get_nested_register(unsigned int reg);
> +void hv_set_nested_register(unsigned int reg, u64 value);
>
> #else /* CONFIG_HYPERV */
> static inline void hyperv_init(void) {}
> @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
> }
> static inline void hv_set_register(unsigned int reg, u64 value) { }
> static inline u64 hv_get_register(unsigned int reg) { return 0; }
> +static inline void hv_set_nested_register(unsigned int reg, u64 value) { }
> +static inline u64 hv_get_nested_register(unsigned int reg) { return 0; }
> static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
> bool visible)
> {
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index f9b78d4829e3..f2f6e10301a8 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -41,7 +41,76 @@ bool hv_root_partition;
> bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
> +{
> + switch (reg) {
> + case HV_REGISTER_SIMP:
> + return HV_REGISTER_NESTED_SIMP;
> + case HV_REGISTER_NESTED_SIEFP:
> + return HV_REGISTER_SIEFP;
> + case HV_REGISTER_SCONTROL:
> + return HV_REGISTER_NESTED_SCONTROL;
> + case HV_REGISTER_SINT0:
> + return HV_REGISTER_NESTED_SINT0;
> + case HV_REGISTER_EOM:
> + return HV_REGISTER_NESTED_EOM;
> + default:
> + return reg;
> + }
Just a question: You added #defines for 6 nested registers. But
the switch statement above maps only 5 registers. Is it intentional
that there's not a mapping for HV_REGISTER_SVERSION?
> +}
> +
> #if IS_ENABLED(CONFIG_HYPERV)
> +static u64 _hv_get_register(unsigned int reg, bool nested)
> +{
> + u64 value;
> +
> + if (nested)
> + reg = hv_get_nested_reg(reg);
> +
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> + hv_ghcb_msr_read(reg, &value);
> + else
> + rdmsrl(reg, value);
> + return value;
> +}
> +
> +static void _hv_set_register(unsigned int reg, u64 value, bool nested)
> +{
> + if (nested)
> + reg = hv_get_nested_reg(reg);
> +
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> + hv_ghcb_msr_write(reg, value);
> +
> + /* Write proxy bit via wrmsl instruction */
> + if (reg >= HV_REGISTER_SINT0 &&
> + reg <= HV_REGISTER_SINT15)
> + wrmsrl(reg, value | 1 << 20);
> + } else {
> + wrmsrl(reg, value);
> + }
> +}
> +
> +u64 hv_get_register(unsigned int reg)
> +{
> + return _hv_get_register(reg, false);
> +}
> +
> +void hv_set_register(unsigned int reg, u64 value)
> +{
> + _hv_set_register(reg, value, false);
> +}
> +
> +u64 hv_get_nested_register(unsigned int reg)
> +{
> + return _hv_get_register(reg, true);
> +}
> +
> +void hv_set_nested_register(unsigned int reg, u64 value)
> +{
> + _hv_set_register(reg, value, true);
> +}
> +
> static void (*vmbus_handler)(void);
> static void (*hv_stimer0_handler)(void);
> static void (*hv_kexec_handler)(void);
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 4d6480d57546..0ed052f2423e 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -147,7 +147,7 @@ int hv_synic_alloc(void)
> * Synic message and event pages are allocated by paravisor.
> * Skip these pages allocation here.
> */
> - if (!hv_isolation_type_snp()) {
> + if (!hv_isolation_type_snp() && !hv_root_partition) {
> hv_cpu->synic_message_page =
> (void *)get_zeroed_page(GFP_ATOMIC);
> if (hv_cpu->synic_message_page == NULL) {
> @@ -188,8 +188,16 @@ void hv_synic_free(void)
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
>
> - free_page((unsigned long)hv_cpu->synic_event_page);
> - free_page((unsigned long)hv_cpu->synic_message_page);
> + if (hv_root_partition) {
> + if (hv_cpu->synic_event_page != NULL)
> + memunmap(hv_cpu->synic_event_page);
> +
> + if (hv_cpu->synic_message_page != NULL)
> + memunmap(hv_cpu->synic_message_page);
> + } else {
> + free_page((unsigned long)hv_cpu->synic_event_page);
> + free_page((unsigned long)hv_cpu->synic_message_page);
> + }
> free_page((unsigned long)hv_cpu->post_msg_page);
> }
>
> @@ -213,10 +221,12 @@ void hv_synic_enable_regs(unsigned int cpu)
> union hv_synic_scontrol sctrl;
>
> /* Setup the Synic's message page */
> - simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> + simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
> + hv_get_register(HV_REGISTER_SIMP);
Unfortunately, this code and the similar places below will run into
problems on ARM64. Drivers/hv/hv.c is common code on all architectures
so it needs to compile and run on ARM64 as well as x86/x64. But there's
no hv_get_nested_register() defined or implemented on the ARM64 side,
so the code will fail to compile.
I think there's a better way to do this. Based on Nuno's comments, it
seems like there are two hv_get_register() functions needed:
1) Get the value of the register or its nested cousin, based on the value
of hv_nested. That's what you are explicitly coding here.
2) Get the value of the register. Don't access the nested cousin, regardless
of the value of hv_nested.
Based on how you coded things earlier, I'm assuming #1 is what you want to
use in most cases, and specifically here in drivers/hv/hv.c. That's good,
because #1 can hide the testing of hv_nested in the x86-specific
implementation of hv_get_register(), while the ARM64 version of
hv_get_register() continues to do whatever it does now with no changes.
I'm also assuming that #2 may be used in particular cases in the code
that is specifically related to nesting. Give the #2 version a different
name --- maybe hv_get_nonnested_register(), or something like that --
and use it only in code under arch/x86 that is related to nesting. That
way, ARM64 won't be affected.
Of course, the same approach applies to hv_set_register().
hv_get_register() and hv_get_nonnested_register() will obviously
share some code. But rather than calling a common function starting
with underscore like you've done above, let me suggest that
hv_get_register() test hv_nested and potentially do the translation,
then call hv_get_nonnested_register(). That way you'll end up
with just two functions instead of three as above with
hv_get_register(), hv_get_nested_register(), and _hv_get_register().
I haven't coded up any of this, so take it as a suggestion. There
could be some problem with it that I haven't seen, or my assumptions
might be wrong. But give it a try and see if it works out. I'm hoping
it can all be handled on the x86 side without having to add complexity
on the ARM64 side.
Michael
> +
> simp.simp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_message_page
> = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> @@ -227,13 +237,18 @@ void hv_synic_enable_regs(unsigned int cpu)
> >> HV_HYP_PAGE_SHIFT;
> }
>
> - hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
> + if (hv_nested)
> + hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64);
> + else
> + hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
>
> /* Setup the Synic's event page */
> - siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> + siefp.as_uint64 = hv_nested ?
> + hv_get_nested_register(HV_REGISTER_SIEFP) :
> + hv_get_register(HV_REGISTER_SIEFP);
> siefp.siefp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_event_page =
> memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> @@ -245,13 +260,19 @@ void hv_synic_enable_regs(unsigned int cpu)
> >> HV_HYP_PAGE_SHIFT;
> }
>
> - hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
> + if (hv_nested)
> + hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64);
> + else
> + hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>
> /* Setup the shared SINT. */
> if (vmbus_irq != -1)
> enable_percpu_irq(vmbus_irq, 0);
> - shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
> - VMBUS_MESSAGE_SINT);
> + shared_sint.as_uint64 =
> + hv_nested ?
> + hv_get_nested_register(HV_REGISTER_SINT0 +
> + VMBUS_MESSAGE_SINT) :
> + hv_get_register(HV_REGISTER_SINT0 +
> VMBUS_MESSAGE_SINT);
>
> shared_sint.vector = vmbus_interrupt;
> shared_sint.masked = false;
> @@ -266,14 +287,22 @@ void hv_synic_enable_regs(unsigned int cpu)
> #else
> shared_sint.auto_eoi = 0;
> #endif
> - hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
> + if (hv_nested)
> + hv_set_nested_register(HV_REGISTER_SINT0 +
> VMBUS_MESSAGE_SINT,
> + shared_sint.as_uint64);
> + else
> + hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
> shared_sint.as_uint64);
> -
> /* Enable the global synic bit */
> - sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
> + sctrl.as_uint64 = hv_nested ?
> + hv_get_nested_register(HV_REGISTER_SCONTROL) :
> + hv_get_register(HV_REGISTER_SCONTROL);
> sctrl.enable = 1;
>
> - hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
> + if (hv_nested)
> + hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
> + else
> + hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
> }
>
> int hv_synic_init(unsigned int cpu)
> @@ -297,17 +326,25 @@ void hv_synic_disable_regs(unsigned int cpu)
> union hv_synic_siefp siefp;
> union hv_synic_scontrol sctrl;
>
> - shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
> - VMBUS_MESSAGE_SINT);
> + shared_sint.as_uint64 =
> + hv_nested ?
> + hv_get_nested_register(HV_REGISTER_SINT0 +
> + VMBUS_MESSAGE_SINT) :
> + hv_get_register(HV_REGISTER_SINT0 +
> VMBUS_MESSAGE_SINT);
>
> shared_sint.masked = 1;
>
> /* Need to correctly cleanup in the case of SMP!!! */
> /* Disable the interrupt */
> - hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
> + if (hv_nested)
> + hv_set_nested_register(HV_REGISTER_SINT0 +
> VMBUS_MESSAGE_SINT,
> + shared_sint.as_uint64);
> + else
> + hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
> shared_sint.as_uint64);
>
> - simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> + simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
> + hv_get_register(HV_REGISTER_SIMP);
> /*
> * In Isolation VM, sim and sief pages are allocated by
> * paravisor. These pages also will be used by kdump
> @@ -320,9 +357,14 @@ void hv_synic_disable_regs(unsigned int cpu)
> else
> simp.base_simp_gpa = 0;
>
> - hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
> + if (hv_nested)
> + hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64);
> + else
> + hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
>
> - siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> + siefp.as_uint64 = hv_nested ?
> + hv_get_nested_register(HV_REGISTER_SIEFP) :
> + hv_get_register(HV_REGISTER_SIEFP);
> siefp.siefp_enabled = 0;
>
> if (hv_isolation_type_snp())
> @@ -330,12 +372,21 @@ void hv_synic_disable_regs(unsigned int cpu)
> else
> siefp.base_siefp_gpa = 0;
>
> - hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
> + if (hv_nested)
> + hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64);
> + else
> + hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>
> /* Disable the global synic bit */
> - sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
> + sctrl.as_uint64 = hv_nested ?
> + hv_get_nested_register(HV_REGISTER_SCONTROL) :
> + hv_get_register(HV_REGISTER_SCONTROL);
> sctrl.enable = 0;
> - hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
> +
> + if (hv_nested)
> + hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
> + else
> + hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>
> if (vmbus_irq != -1)
> disable_percpu_irq(vmbus_irq);
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index f131027830c3..db0b5be1e087 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -147,7 +147,10 @@ static inline void vmbus_signal_eom(struct hv_message *msg,
> u32 old_msg_type)
> * possibly deliver another msg from the
> * hypervisor
> */
> - hv_set_register(HV_REGISTER_EOM, 0);
> + if (hv_nested)
> + hv_set_nested_register(HV_REGISTER_EOM, 0);
> + else
> + hv_set_register(HV_REGISTER_EOM, 0);
> }
> }
>
> --
> 2.25.1
On 12/2/2022 9:30 AM, Michael Kelley (LINUX) wrote:
> From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Thursday, December 1, 2022 3:04 AM
>> Child partitions are free to allocate SynIC message and event page but in
>> case of root partition it must use the pages allocated by Microsoft
>> Hypervisor (MSHV). Base address for these pages can be found using
>> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
>> for nested vs non-nested root partition.
>>
>> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
>> ---
>> arch/x86/include/asm/hyperv-tlfs.h | 11 ++++
>> arch/x86/include/asm/mshyperv.h | 30 ++-------
>> arch/x86/kernel/cpu/mshyperv.c | 69 +++++++++++++++++++++
>> drivers/hv/hv.c | 99 ++++++++++++++++++++++--------
>> include/asm-generic/mshyperv.h | 5 +-
>> 5 files changed, 165 insertions(+), 49 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
>> index 58c03d18c235..b5019becb618 100644
>> --- a/arch/x86/include/asm/hyperv-tlfs.h
>> +++ b/arch/x86/include/asm/hyperv-tlfs.h
>> @@ -225,6 +225,17 @@ enum hv_isolation_type {
>> #define HV_REGISTER_SINT14 0x4000009E
>> #define HV_REGISTER_SINT15 0x4000009F
>>
>> +/*
>> + * Define synthetic interrupt controller model specific registers for
>> + * nested hypervisor.
>> + */
>> +#define HV_REGISTER_NESTED_SCONTROL 0x40001080
>> +#define HV_REGISTER_NESTED_SVERSION 0x40001081
>> +#define HV_REGISTER_NESTED_SIEFP 0x40001082
>> +#define HV_REGISTER_NESTED_SIMP 0x40001083
>> +#define HV_REGISTER_NESTED_EOM 0x40001084
>> +#define HV_REGISTER_NESTED_SINT0 0x40001090
>> +
>> /*
>> * Synthetic Timer MSRs. Four timers per vcpu.
>> */
>> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
>> index 61f0c206bff0..3197d49c888c 100644
>> --- a/arch/x86/include/asm/mshyperv.h
>> +++ b/arch/x86/include/asm/mshyperv.h
>> @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
>> return false;
>> }
>>
>> -static inline u64 hv_get_register(unsigned int reg)
>> -{
>> - u64 value;
>> -
>> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
>> - hv_ghcb_msr_read(reg, &value);
>> - else
>> - rdmsrl(reg, value);
>> - return value;
>> -}
>> -
>> -static inline void hv_set_register(unsigned int reg, u64 value)
>> -{
>> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
>> - hv_ghcb_msr_write(reg, value);
>> -
>> - /* Write proxy bit via wrmsl instruction */
>> - if (reg >= HV_REGISTER_SINT0 &&
>> - reg <= HV_REGISTER_SINT15)
>> - wrmsrl(reg, value | 1 << 20);
>> - } else {
>> - wrmsrl(reg, value);
>> - }
>> -}
>> +u64 hv_get_register(unsigned int reg);
>> +void hv_set_register(unsigned int reg, u64 value);
>> +u64 hv_get_nested_register(unsigned int reg);
>> +void hv_set_nested_register(unsigned int reg, u64 value);
>>
>> #else /* CONFIG_HYPERV */
>> static inline void hyperv_init(void) {}
>> @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
>> }
>> static inline void hv_set_register(unsigned int reg, u64 value) { }
>> static inline u64 hv_get_register(unsigned int reg) { return 0; }
>> +static inline void hv_set_nested_register(unsigned int reg, u64 value) { }
>> +static inline u64 hv_get_nested_register(unsigned int reg) { return 0; }
>> static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
>> bool visible)
>> {
>> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
>> index f9b78d4829e3..f2f6e10301a8 100644
>> --- a/arch/x86/kernel/cpu/mshyperv.c
>> +++ b/arch/x86/kernel/cpu/mshyperv.c
>> @@ -41,7 +41,76 @@ bool hv_root_partition;
>> bool hv_nested;
>> struct ms_hyperv_info ms_hyperv;
>>
>> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
>> +{
>> + switch (reg) {
>> + case HV_REGISTER_SIMP:
>> + return HV_REGISTER_NESTED_SIMP;
>> + case HV_REGISTER_NESTED_SIEFP:
>> + return HV_REGISTER_SIEFP;
>> + case HV_REGISTER_SCONTROL:
>> + return HV_REGISTER_NESTED_SCONTROL;
>> + case HV_REGISTER_SINT0:
>> + return HV_REGISTER_NESTED_SINT0;
>> + case HV_REGISTER_EOM:
>> + return HV_REGISTER_NESTED_EOM;
>> + default:
>> + return reg;
>> + }
> Just a question: You added #defines for 6 nested registers. But
> the switch statement above maps only 5 registers. Is it intentional
> that there's not a mapping for HV_REGISTER_SVERSION?
Good catch! Will fix it in the next revision.
>
>> +}
>> +
>> #if IS_ENABLED(CONFIG_HYPERV)
>> +static u64 _hv_get_register(unsigned int reg, bool nested)
>> +{
>> + u64 value;
>> +
>> + if (nested)
>> + reg = hv_get_nested_reg(reg);
>> +
>> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
>> + hv_ghcb_msr_read(reg, &value);
>> + else
>> + rdmsrl(reg, value);
>> + return value;
>> +}
>> +
>> +static void _hv_set_register(unsigned int reg, u64 value, bool nested)
>> +{
>> + if (nested)
>> + reg = hv_get_nested_reg(reg);
>> +
>> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
>> + hv_ghcb_msr_write(reg, value);
>> +
>> + /* Write proxy bit via wrmsl instruction */
>> + if (reg >= HV_REGISTER_SINT0 &&
>> + reg <= HV_REGISTER_SINT15)
>> + wrmsrl(reg, value | 1 << 20);
>> + } else {
>> + wrmsrl(reg, value);
>> + }
>> +}
>> +
>> +u64 hv_get_register(unsigned int reg)
>> +{
>> + return _hv_get_register(reg, false);
>> +}
>> +
>> +void hv_set_register(unsigned int reg, u64 value)
>> +{
>> + _hv_set_register(reg, value, false);
>> +}
>> +
>> +u64 hv_get_nested_register(unsigned int reg)
>> +{
>> + return _hv_get_register(reg, true);
>> +}
>> +
>> +void hv_set_nested_register(unsigned int reg, u64 value)
>> +{
>> + _hv_set_register(reg, value, true);
>> +}
>> +
>> static void (*vmbus_handler)(void);
>> static void (*hv_stimer0_handler)(void);
>> static void (*hv_kexec_handler)(void);
>> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
>> index 4d6480d57546..0ed052f2423e 100644
>> --- a/drivers/hv/hv.c
>> +++ b/drivers/hv/hv.c
>> @@ -147,7 +147,7 @@ int hv_synic_alloc(void)
>> * Synic message and event pages are allocated by paravisor.
>> * Skip these pages allocation here.
>> */
>> - if (!hv_isolation_type_snp()) {
>> + if (!hv_isolation_type_snp() && !hv_root_partition) {
>> hv_cpu->synic_message_page =
>> (void *)get_zeroed_page(GFP_ATOMIC);
>> if (hv_cpu->synic_message_page == NULL) {
>> @@ -188,8 +188,16 @@ void hv_synic_free(void)
>> struct hv_per_cpu_context *hv_cpu
>> = per_cpu_ptr(hv_context.cpu_context, cpu);
>>
>> - free_page((unsigned long)hv_cpu->synic_event_page);
>> - free_page((unsigned long)hv_cpu->synic_message_page);
>> + if (hv_root_partition) {
>> + if (hv_cpu->synic_event_page != NULL)
>> + memunmap(hv_cpu->synic_event_page);
>> +
>> + if (hv_cpu->synic_message_page != NULL)
>> + memunmap(hv_cpu->synic_message_page);
>> + } else {
>> + free_page((unsigned long)hv_cpu->synic_event_page);
>> + free_page((unsigned long)hv_cpu->synic_message_page);
>> + }
>> free_page((unsigned long)hv_cpu->post_msg_page);
>> }
>>
>> @@ -213,10 +221,12 @@ void hv_synic_enable_regs(unsigned int cpu)
>> union hv_synic_scontrol sctrl;
>>
>> /* Setup the Synic's message page */
>> - simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
>> + simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
>> + hv_get_register(HV_REGISTER_SIMP);
> Unfortunately, this code and the similar places below will run into
> problems on ARM64. Drivers/hv/hv.c is common code on all architectures
> so it needs to compile and run on ARM64 as well as x86/x64. But there's
> no hv_get_nested_register() defined or implemented on the ARM64 side,
> so the code will fail to compile.
>
> I think there's a better way to do this. Based on Nuno's comments, it
> seems like there are two hv_get_register() functions needed:
>
> 1) Get the value of the register or its nested cousin, based on the value
> of hv_nested. That's what you are explicitly coding here.
> 2) Get the value of the register. Don't access the nested cousin, regardless
> of the value of hv_nested.
>
> Based on how you coded things earlier, I'm assuming #1 is what you want to
> use in most cases, and specifically here in drivers/hv/hv.c. That's good,
> because #1 can hide the testing of hv_nested in the x86-specific
> implementation of hv_get_register(), while the ARM64 version of
> hv_get_register() continues to do whatever it does now with no changes.
>
> I'm also assuming that #2 may be used in particular cases in the code
> that is specifically related to nesting. Give the #2 version a different
> name --- maybe hv_get_nonnested_register(), or something like that --
> and use it only in code under arch/x86 that is related to nesting. That
> way, ARM64 won't be affected.
>
> Of course, the same approach applies to hv_set_register().
>
> hv_get_register() and hv_get_nonnested_register() will obviously
> share some code. But rather than calling a common function starting
> with underscore like you've done above, let me suggest that
> hv_get_register() test hv_nested and potentially do the translation,
> then call hv_get_nonnested_register(). That way you'll end up
> with just two functions instead of three as above with
> hv_get_register(), hv_get_nested_register(), and _hv_get_register().
I tried the way you suggested and it worked for ARM64 this time. But
still I would have three functions. Because the base function
_hv_get_register() would still be required in order to avoid code
duplication in hv_get_non_nested_register().
>
> I haven't coded up any of this, so take it as a suggestion. There
> could be some problem with it that I haven't seen, or my assumptions
> might be wrong. But give it a try and see if it works out. I'm hoping
> it can all be handled on the x86 side without having to add complexity
> on the ARM64 side.
>
> Michael
>
>> +
>> simp.simp_enabled = 1;
>>
>> - if (hv_isolation_type_snp()) {
>> + if (hv_isolation_type_snp() || hv_root_partition) {
>> hv_cpu->synic_message_page
>> = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
>> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
>> @@ -227,13 +237,18 @@ void hv_synic_enable_regs(unsigned int cpu)
>> >> HV_HYP_PAGE_SHIFT;
>> }
>>
>> - hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
>> + if (hv_nested)
>> + hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64);
>> + else
>> + hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
>>
>> /* Setup the Synic's event page */
>> - siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
>> + siefp.as_uint64 = hv_nested ?
>> + hv_get_nested_register(HV_REGISTER_SIEFP) :
>> + hv_get_register(HV_REGISTER_SIEFP);
>> siefp.siefp_enabled = 1;
>>
>> - if (hv_isolation_type_snp()) {
>> + if (hv_isolation_type_snp() || hv_root_partition) {
>> hv_cpu->synic_event_page =
>> memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
>> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
>> @@ -245,13 +260,19 @@ void hv_synic_enable_regs(unsigned int cpu)
>> >> HV_HYP_PAGE_SHIFT;
>> }
>>
>> - hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>> + if (hv_nested)
>> + hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>> + else
>> + hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>>
>> /* Setup the shared SINT. */
>> if (vmbus_irq != -1)
>> enable_percpu_irq(vmbus_irq, 0);
>> - shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
>> - VMBUS_MESSAGE_SINT);
>> + shared_sint.as_uint64 =
>> + hv_nested ?
>> + hv_get_nested_register(HV_REGISTER_SINT0 +
>> + VMBUS_MESSAGE_SINT) :
>> + hv_get_register(HV_REGISTER_SINT0 +
>> VMBUS_MESSAGE_SINT);
>>
>> shared_sint.vector = vmbus_interrupt;
>> shared_sint.masked = false;
>> @@ -266,14 +287,22 @@ void hv_synic_enable_regs(unsigned int cpu)
>> #else
>> shared_sint.auto_eoi = 0;
>> #endif
>> - hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
>> + if (hv_nested)
>> + hv_set_nested_register(HV_REGISTER_SINT0 +
>> VMBUS_MESSAGE_SINT,
>> + shared_sint.as_uint64);
>> + else
>> + hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
>> shared_sint.as_uint64);
>> -
>> /* Enable the global synic bit */
>> - sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
>> + sctrl.as_uint64 = hv_nested ?
>> + hv_get_nested_register(HV_REGISTER_SCONTROL) :
>> + hv_get_register(HV_REGISTER_SCONTROL);
>> sctrl.enable = 1;
>>
>> - hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>> + if (hv_nested)
>> + hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>> + else
>> + hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>> }
>>
>> int hv_synic_init(unsigned int cpu)
>> @@ -297,17 +326,25 @@ void hv_synic_disable_regs(unsigned int cpu)
>> union hv_synic_siefp siefp;
>> union hv_synic_scontrol sctrl;
>>
>> - shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 +
>> - VMBUS_MESSAGE_SINT);
>> + shared_sint.as_uint64 =
>> + hv_nested ?
>> + hv_get_nested_register(HV_REGISTER_SINT0 +
>> + VMBUS_MESSAGE_SINT) :
>> + hv_get_register(HV_REGISTER_SINT0 +
>> VMBUS_MESSAGE_SINT);
>>
>> shared_sint.masked = 1;
>>
>> /* Need to correctly cleanup in the case of SMP!!! */
>> /* Disable the interrupt */
>> - hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
>> + if (hv_nested)
>> + hv_set_nested_register(HV_REGISTER_SINT0 +
>> VMBUS_MESSAGE_SINT,
>> + shared_sint.as_uint64);
>> + else
>> + hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT,
>> shared_sint.as_uint64);
>>
>> - simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
>> + simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
>> + hv_get_register(HV_REGISTER_SIMP);
>> /*
>> * In Isolation VM, sim and sief pages are allocated by
>> * paravisor. These pages also will be used by kdump
>> @@ -320,9 +357,14 @@ void hv_synic_disable_regs(unsigned int cpu)
>> else
>> simp.base_simp_gpa = 0;
>>
>> - hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
>> + if (hv_nested)
>> + hv_set_nested_register(HV_REGISTER_SIMP, simp.as_uint64);
>> + else
>> + hv_set_register(HV_REGISTER_SIMP, simp.as_uint64);
>>
>> - siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
>> + siefp.as_uint64 = hv_nested ?
>> + hv_get_nested_register(HV_REGISTER_SIEFP) :
>> + hv_get_register(HV_REGISTER_SIEFP);
>> siefp.siefp_enabled = 0;
>>
>> if (hv_isolation_type_snp())
>> @@ -330,12 +372,21 @@ void hv_synic_disable_regs(unsigned int cpu)
>> else
>> siefp.base_siefp_gpa = 0;
>>
>> - hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>> + if (hv_nested)
>> + hv_set_nested_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>> + else
>> + hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64);
>>
>> /* Disable the global synic bit */
>> - sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL);
>> + sctrl.as_uint64 = hv_nested ?
>> + hv_get_nested_register(HV_REGISTER_SCONTROL) :
>> + hv_get_register(HV_REGISTER_SCONTROL);
>> sctrl.enable = 0;
>> - hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>> +
>> + if (hv_nested)
>> + hv_set_nested_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>> + else
>> + hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64);
>>
>> if (vmbus_irq != -1)
>> disable_percpu_irq(vmbus_irq);
>> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
>> index f131027830c3..db0b5be1e087 100644
>> --- a/include/asm-generic/mshyperv.h
>> +++ b/include/asm-generic/mshyperv.h
>> @@ -147,7 +147,10 @@ static inline void vmbus_signal_eom(struct hv_message *msg,
>> u32 old_msg_type)
>> * possibly deliver another msg from the
>> * hypervisor
>> */
>> - hv_set_register(HV_REGISTER_EOM, 0);
>> + if (hv_nested)
>> + hv_set_nested_register(HV_REGISTER_EOM, 0);
>> + else
>> + hv_set_register(HV_REGISTER_EOM, 0);
>> }
>> }
>>
>> --
>> 2.25.1
Regards,
Jinank
From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Thursday, December 1, 2022 11:05 PM
>
> On 12/2/2022 9:30 AM, Michael Kelley (LINUX) wrote:
> > From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Thursday, December 1,
> 2022 3:04 AM
> >> Child partitions are free to allocate SynIC message and event page but in
> >> case of root partition it must use the pages allocated by Microsoft
> >> Hypervisor (MSHV). Base address for these pages can be found using
> >> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
> >> for nested vs non-nested root partition.
> >>
> >> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
> >> ---
> >> arch/x86/include/asm/hyperv-tlfs.h | 11 ++++
> >> arch/x86/include/asm/mshyperv.h | 30 ++-------
> >> arch/x86/kernel/cpu/mshyperv.c | 69 +++++++++++++++++++++
> >> drivers/hv/hv.c | 99 ++++++++++++++++++++++--------
> >> include/asm-generic/mshyperv.h | 5 +-
> >> 5 files changed, 165 insertions(+), 49 deletions(-)
> >>
> >> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-
> tlfs.h
> >> index 58c03d18c235..b5019becb618 100644
> >> --- a/arch/x86/include/asm/hyperv-tlfs.h
> >> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> >> @@ -225,6 +225,17 @@ enum hv_isolation_type {
> >> #define HV_REGISTER_SINT14 0x4000009E
> >> #define HV_REGISTER_SINT15 0x4000009F
> >>
> >> +/*
> >> + * Define synthetic interrupt controller model specific registers for
> >> + * nested hypervisor.
> >> + */
> >> +#define HV_REGISTER_NESTED_SCONTROL 0x40001080
> >> +#define HV_REGISTER_NESTED_SVERSION 0x40001081
> >> +#define HV_REGISTER_NESTED_SIEFP 0x40001082
> >> +#define HV_REGISTER_NESTED_SIMP 0x40001083
> >> +#define HV_REGISTER_NESTED_EOM 0x40001084
> >> +#define HV_REGISTER_NESTED_SINT0 0x40001090
> >> +
> >> /*
> >> * Synthetic Timer MSRs. Four timers per vcpu.
> >> */
> >> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> >> index 61f0c206bff0..3197d49c888c 100644
> >> --- a/arch/x86/include/asm/mshyperv.h
> >> +++ b/arch/x86/include/asm/mshyperv.h
> >> @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
> >> return false;
> >> }
> >>
> >> -static inline u64 hv_get_register(unsigned int reg)
> >> -{
> >> - u64 value;
> >> -
> >> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> >> - hv_ghcb_msr_read(reg, &value);
> >> - else
> >> - rdmsrl(reg, value);
> >> - return value;
> >> -}
> >> -
> >> -static inline void hv_set_register(unsigned int reg, u64 value)
> >> -{
> >> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> >> - hv_ghcb_msr_write(reg, value);
> >> -
> >> - /* Write proxy bit via wrmsl instruction */
> >> - if (reg >= HV_REGISTER_SINT0 &&
> >> - reg <= HV_REGISTER_SINT15)
> >> - wrmsrl(reg, value | 1 << 20);
> >> - } else {
> >> - wrmsrl(reg, value);
> >> - }
> >> -}
> >> +u64 hv_get_register(unsigned int reg);
> >> +void hv_set_register(unsigned int reg, u64 value);
> >> +u64 hv_get_nested_register(unsigned int reg);
> >> +void hv_set_nested_register(unsigned int reg, u64 value);
> >>
> >> #else /* CONFIG_HYPERV */
> >> static inline void hyperv_init(void) {}
> >> @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
> >> }
> >> static inline void hv_set_register(unsigned int reg, u64 value) { }
> >> static inline u64 hv_get_register(unsigned int reg) { return 0; }
> >> +static inline void hv_set_nested_register(unsigned int reg, u64 value) { }
> >> +static inline u64 hv_get_nested_register(unsigned int reg) { return 0; }
> >> static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
> >> bool visible)
> >> {
> >> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> >> index f9b78d4829e3..f2f6e10301a8 100644
> >> --- a/arch/x86/kernel/cpu/mshyperv.c
> >> +++ b/arch/x86/kernel/cpu/mshyperv.c
> >> @@ -41,7 +41,76 @@ bool hv_root_partition;
> >> bool hv_nested;
> >> struct ms_hyperv_info ms_hyperv;
> >>
> >> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
> >> +{
> >> + switch (reg) {
> >> + case HV_REGISTER_SIMP:
> >> + return HV_REGISTER_NESTED_SIMP;
> >> + case HV_REGISTER_NESTED_SIEFP:
> >> + return HV_REGISTER_SIEFP;
> >> + case HV_REGISTER_SCONTROL:
> >> + return HV_REGISTER_NESTED_SCONTROL;
> >> + case HV_REGISTER_SINT0:
> >> + return HV_REGISTER_NESTED_SINT0;
> >> + case HV_REGISTER_EOM:
> >> + return HV_REGISTER_NESTED_EOM;
> >> + default:
> >> + return reg;
> >> + }
> > Just a question: You added #defines for 6 nested registers. But
> > the switch statement above maps only 5 registers. Is it intentional
> > that there's not a mapping for HV_REGISTER_SVERSION?
>
> Good catch! Will fix it in the next revision.
>
> >
> >> +}
> >> +
> >> #if IS_ENABLED(CONFIG_HYPERV)
> >> +static u64 _hv_get_register(unsigned int reg, bool nested)
> >> +{
> >> + u64 value;
> >> +
> >> + if (nested)
> >> + reg = hv_get_nested_reg(reg);
> >> +
> >> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> >> + hv_ghcb_msr_read(reg, &value);
> >> + else
> >> + rdmsrl(reg, value);
> >> + return value;
> >> +}
> >> +
> >> +static void _hv_set_register(unsigned int reg, u64 value, bool nested)
> >> +{
> >> + if (nested)
> >> + reg = hv_get_nested_reg(reg);
> >> +
> >> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> >> + hv_ghcb_msr_write(reg, value);
> >> +
> >> + /* Write proxy bit via wrmsl instruction */
> >> + if (reg >= HV_REGISTER_SINT0 &&
> >> + reg <= HV_REGISTER_SINT15)
> >> + wrmsrl(reg, value | 1 << 20);
> >> + } else {
> >> + wrmsrl(reg, value);
> >> + }
> >> +}
> >> +
> >> +u64 hv_get_register(unsigned int reg)
> >> +{
> >> + return _hv_get_register(reg, false);
> >> +}
> >> +
> >> +void hv_set_register(unsigned int reg, u64 value)
> >> +{
> >> + _hv_set_register(reg, value, false);
> >> +}
> >> +
> >> +u64 hv_get_nested_register(unsigned int reg)
> >> +{
> >> + return _hv_get_register(reg, true);
> >> +}
> >> +
> >> +void hv_set_nested_register(unsigned int reg, u64 value)
> >> +{
> >> + _hv_set_register(reg, value, true);
> >> +}
> >> +
> >> static void (*vmbus_handler)(void);
> >> static void (*hv_stimer0_handler)(void);
> >> static void (*hv_kexec_handler)(void);
> >> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> >> index 4d6480d57546..0ed052f2423e 100644
> >> --- a/drivers/hv/hv.c
> >> +++ b/drivers/hv/hv.c
> >> @@ -147,7 +147,7 @@ int hv_synic_alloc(void)
> >> * Synic message and event pages are allocated by paravisor.
> >> * Skip these pages allocation here.
> >> */
> >> - if (!hv_isolation_type_snp()) {
> >> + if (!hv_isolation_type_snp() && !hv_root_partition) {
> >> hv_cpu->synic_message_page =
> >> (void *)get_zeroed_page(GFP_ATOMIC);
> >> if (hv_cpu->synic_message_page == NULL) {
> >> @@ -188,8 +188,16 @@ void hv_synic_free(void)
> >> struct hv_per_cpu_context *hv_cpu
> >> = per_cpu_ptr(hv_context.cpu_context, cpu);
> >>
> >> - free_page((unsigned long)hv_cpu->synic_event_page);
> >> - free_page((unsigned long)hv_cpu->synic_message_page);
> >> + if (hv_root_partition) {
> >> + if (hv_cpu->synic_event_page != NULL)
> >> + memunmap(hv_cpu->synic_event_page);
> >> +
> >> + if (hv_cpu->synic_message_page != NULL)
> >> + memunmap(hv_cpu->synic_message_page);
> >> + } else {
> >> + free_page((unsigned long)hv_cpu->synic_event_page);
> >> + free_page((unsigned long)hv_cpu->synic_message_page);
> >> + }
> >> free_page((unsigned long)hv_cpu->post_msg_page);
> >> }
> >>
> >> @@ -213,10 +221,12 @@ void hv_synic_enable_regs(unsigned int cpu)
> >> union hv_synic_scontrol sctrl;
> >>
> >> /* Setup the Synic's message page */
> >> - simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> >> + simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
> >> + hv_get_register(HV_REGISTER_SIMP);
> > Unfortunately, this code and the similar places below will run into
> > problems on ARM64. Drivers/hv/hv.c is common code on all architectures
> > so it needs to compile and run on ARM64 as well as x86/x64. But there's
> > no hv_get_nested_register() defined or implemented on the ARM64 side,
> > so the code will fail to compile.
> >
> > I think there's a better way to do this. Based on Nuno's comments, it
> > seems like there are two hv_get_register() functions needed:
> >
> > 1) Get the value of the register or its nested cousin, based on the value
> > of hv_nested. That's what you are explicitly coding here.
> > 2) Get the value of the register. Don't access the nested cousin, regardless
> > of the value of hv_nested.
> >
> > Based on how you coded things earlier, I'm assuming #1 is what you want to
> > use in most cases, and specifically here in drivers/hv/hv.c. That's good,
> > because #1 can hide the testing of hv_nested in the x86-specific
> > implementation of hv_get_register(), while the ARM64 version of
> > hv_get_register() continues to do whatever it does now with no changes.
> >
> > I'm also assuming that #2 may be used in particular cases in the code
> > that is specifically related to nesting. Give the #2 version a different
> > name --- maybe hv_get_nonnested_register(), or something like that --
> > and use it only in code under arch/x86 that is related to nesting. That
> > way, ARM64 won't be affected.
> >
> > Of course, the same approach applies to hv_set_register().
> >
> > hv_get_register() and hv_get_nonnested_register() will obviously
> > share some code. But rather than calling a common function starting
> > with underscore like you've done above, let me suggest that
> > hv_get_register() test hv_nested and potentially do the translation,
> > then call hv_get_nonnested_register(). That way you'll end up
> > with just two functions instead of three as above with
> > hv_get_register(), hv_get_nested_register(), and _hv_get_register().
>
> I tried the way you suggested and it worked for ARM64 this time.
OK, good.
> But still I would have three functions. Because the base function
> _hv_get_register() would still be required in order to avoid code
> duplication in hv_get_non_nested_register().
To make this is a bit more concrete, here's what I was thinking
(not even compile tested):
u64 hv_get_non_nested_register(unsigned int reg)
{
u64 value;
if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
hv_ghcb_msr_read(reg, &value);
else
rdmsrl(reg, value);
return value;
}
u64 hv_get_register(unsigned int reg)
{
if (hv_nested)
reg = hv_get_nested_reg(reg);
return hv_get_non_nested_register(reg);
}
But maybe I'm missing something ....
Michael
According to TLFS, in order to communicate to L0 hypervisor there needs
to be an additional bit set in the control register. This communication
is required to perform privileged instructions which can only be
performed by L0 hypervisor. An example of that could be setting up the
VMBus infrastructure.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 3 ++-
arch/x86/include/asm/mshyperv.h | 42 +++++++++++++++++++++++++++---
include/asm-generic/hyperv-tlfs.h | 1 +
3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index b5019becb618..7758c495541d 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -380,7 +380,8 @@ struct hv_nested_enlightenments_control {
__u32 reserved:31;
} features;
struct {
- __u32 reserved;
+ __u32 inter_partition_comm:1;
+ __u32 reserved:31;
} hypercallControls;
} __packed;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 3197d49c888c..fbd7a9589b0d 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -74,10 +74,16 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_status;
}
+/* Hypercall to the L0 hypervisor */
+static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output)
+{
+ return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output);
+}
+
/* Fast hypercall with 8 bytes of input and no output */
-static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+static inline u64 _hv_do_fast_hypercall8(u64 control, u16 code, u64 input1)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -105,10 +111,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall8(control, code, input1);
+}
+
+static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall8(control, code, input1);
+}
+
/* Fast hypercall with 16 bytes of input */
-static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+static inline u64 _hv_do_fast_hypercall16(u64 control, u16 code, u64 input1, u64 input2)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -139,6 +159,20 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall16(control, code, input1, input2);
+}
+
+static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall16(control, code, input1, input2);
+}
+
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index b17c6eeb9afa..e61ee461c4fc 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -194,6 +194,7 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_HYPERCALL_VARHEAD_OFFSET 17
#define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17)
#define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27)
+#define HV_HYPERCALL_NESTED BIT_ULL(31)
#define HV_HYPERCALL_REP_COMP_OFFSET 32
#define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32)
#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
--
2.25.1
Currently VMBus driver is not initialized for root partition but we need
to enable the VMBus driver for nested root partition. This is required,
so that L2 root can use the VMBus devices.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
drivers/hv/vmbus_drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 0f00d57b7c25..6324e01d5eec 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2745,7 +2745,7 @@ static int __init hv_acpi_init(void)
if (!hv_is_hyperv_initialized())
return -ENODEV;
- if (hv_root_partition)
+ if (hv_root_partition && !hv_nested)
return 0;
/*
--
2.25.1
Traditionally we have been using the HYPERVISOR_CALLBACK_VECTOR to relay
the VMBus interrupt. But this does not work in case of nested
hypervisor. Microsoft Hypervisor reserves 0x31 to 0x34 as the interrupt
vector range for VMBus and thus we have to use one of the vectors from
that range and setup the IDT accordingly.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/idtentry.h | 2 ++
arch/x86/include/asm/irq_vectors.h | 6 ++++++
arch/x86/kernel/cpu/mshyperv.c | 15 +++++++++++++++
arch/x86/kernel/idt.c | 9 +++++++++
drivers/hv/vmbus_drv.c | 3 ++-
5 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 72184b0b2219..c0648e3e4d4a 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -686,6 +686,8 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_INTR_NESTED_VMBUS_VECTOR,
+ sysvec_hyperv_nested_vmbus_intr);
#endif
#if IS_ENABLED(CONFIG_ACRN_GUEST)
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 43dcb9284208..729d19eab7f5 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,6 +102,12 @@
#if IS_ENABLED(CONFIG_HYPERV)
#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
#define HYPERV_STIMER0_VECTOR 0xed
+/*
+ * FIXME: Change this, once Microsoft Hypervisor changes its assumption
+ * around VMBus interrupt vector allocation for nested root partition.
+ * Or provides a better interface to detect this instead of hardcoding.
+ */
+#define HYPERV_INTR_NESTED_VMBUS_VECTOR 0x31
#endif
#define LOCAL_TIMER_VECTOR 0xec
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f2f6e10301a8..9f31c7704715 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -130,6 +130,21 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
set_irq_regs(old_regs);
}
+DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_nested_vmbus_intr)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (vmbus_handler)
+ vmbus_handler();
+
+ if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
+ ack_APIC_irq();
+
+ set_irq_regs(old_regs);
+}
+
void hv_setup_vmbus_handler(void (*handler)(void))
{
vmbus_handler = handler;
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index a58c6bc1cd68..ace648856a0b 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -160,6 +160,15 @@ static const __initconst struct idt_data apic_idts[] = {
# endif
INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
+#ifdef CONFIG_HYPERV
+ /*
+ * This is a hack because we cannot install this interrupt handler via alloc_intr_gate
+ * as it does not allow interrupt vector less than FIRST_SYSTEM_VECTORS. And hyperv
+ * does not want anything other than 0x31-0x34 as the interrupt vector for vmbus
+ * interrupt in case of nested setup.
+ */
+ INTG(HYPERV_INTR_NESTED_VMBUS_VECTOR, asm_sysvec_hyperv_nested_vmbus_intr),
+#endif
#endif
};
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 6324e01d5eec..740878367426 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2768,7 +2768,8 @@ static int __init hv_acpi_init(void)
* normal Linux IRQ mechanism is not used in this case.
*/
#ifdef HYPERVISOR_CALLBACK_VECTOR
- vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR;
+ vmbus_interrupt = hv_nested ? HYPERV_INTR_NESTED_VMBUS_VECTOR :
+ HYPERVISOR_CALLBACK_VECTOR;
vmbus_irq = -1;
#endif
--
2.25.1
This patch series plans to add support for running nested Microsoft Hypervisor. In case of nested Microsoft Hypervisor there are few privileged hypercalls which need to go L0 Hypervisor instead of L1 Hypervisor. This patches series basically identifies such hypercalls and replace them with nested hypercalls. Jinank Jain (5): x86/hyperv: Add support for detecting nested hypervisor Drivers: hv: Setup synic registers in case of nested root partition x86/hyperv: Add an interface to do nested hypercalls Drivers: hv: Enable vmbus driver for nested root partition x86/hyperv: Change interrupt vector for nested root partition [v4] - Fix ARM64 compilation arch/arm64/hyperv/mshyperv.c | 6 +++ arch/x86/include/asm/hyperv-tlfs.h | 17 ++++++- arch/x86/include/asm/idtentry.h | 2 + arch/x86/include/asm/irq_vectors.h | 6 +++ arch/x86/include/asm/mshyperv.h | 68 ++++++++++++++++------------ arch/x86/kernel/cpu/mshyperv.c | 71 ++++++++++++++++++++++++++++++ arch/x86/kernel/idt.c | 9 ++++ drivers/hv/hv.c | 18 +++++--- drivers/hv/hv_common.c | 7 ++- drivers/hv/vmbus_drv.c | 5 ++- include/asm-generic/hyperv-tlfs.h | 1 + include/asm-generic/mshyperv.h | 1 + 12 files changed, 173 insertions(+), 38 deletions(-) -- 2.25.1
When Linux runs as a root partition for Microsoft Hypervisor. It is
possible to detect if it is running as nested hypervisor using
hints exposed by mshv. While at it expose a new variable called
hv_nested which can be used later for making decisions specific to
nested use case.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/arm64/hyperv/mshyperv.c | 6 ++++++
arch/x86/include/asm/hyperv-tlfs.h | 3 +++
arch/x86/kernel/cpu/mshyperv.c | 7 +++++++
drivers/hv/hv_common.c | 7 +++++--
include/asm-generic/mshyperv.h | 1 +
5 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c
index a406454578f0..2024b19dc514 100644
--- a/arch/arm64/hyperv/mshyperv.c
+++ b/arch/arm64/hyperv/mshyperv.c
@@ -19,6 +19,9 @@
static bool hyperv_initialized;
+/* Is Linux running on nested Microsoft Hypervisor */
+bool hv_nested;
+
static int __init hyperv_init(void)
{
struct hv_get_vp_registers_output result;
@@ -63,6 +66,9 @@ static int __init hyperv_init(void)
pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
b >> 16, b & 0xFFFF, a, d & 0xFFFFFF, c, d >> 24);
+ /* ARM64 does not support nested virtualization */
+ hv_nested = false;
+
ret = hv_common_init();
if (ret)
return ret;
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 6d9368ea3701..58c03d18c235 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -114,6 +114,9 @@
/* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
+/* Indicates that the hypervisor is nested within a Hyper-V partition. */
+#define HV_X64_HYPERV_NESTED BIT(12)
+
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 831613959a92..9a4204139490 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -37,6 +37,8 @@
/* Is Linux running as the root partition? */
bool hv_root_partition;
+/* Is Linux running on nested Microsoft Hypervisor */
+bool hv_nested;
struct ms_hyperv_info ms_hyperv;
#if IS_ENABLED(CONFIG_HYPERV)
@@ -301,6 +303,11 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: running as root partition\n");
}
+ if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
+ hv_nested = true;
+ pr_info("Hyper-V: running on a nested hypervisor\n");
+ }
+
/*
* Extract host information.
*/
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index ae68298c0dca..dcb336ce374f 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -25,8 +25,8 @@
#include <asm/mshyperv.h>
/*
- * hv_root_partition and ms_hyperv are defined here with other Hyper-V
- * specific globals so they are shared across all architectures and are
+ * hv_root_partition, ms_hyperv and hv_nested are defined here with other
+ * Hyper-V specific globals so they are shared across all architectures and are
* built only when CONFIG_HYPERV is defined. But on x86,
* ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
* defined, and it uses these two variables. So mark them as __weak
@@ -36,6 +36,9 @@
bool __weak hv_root_partition;
EXPORT_SYMBOL_GPL(hv_root_partition);
+bool __weak hv_nested;
+EXPORT_SYMBOL_GPL(hv_nested);
+
struct ms_hyperv_info __weak ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bfb9eb9d7215..5df6e944e6a9 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -164,6 +164,7 @@ extern int vmbus_interrupt;
extern int vmbus_irq;
extern bool hv_root_partition;
+extern bool hv_nested;
#if IS_ENABLED(CONFIG_HYPERV)
/*
--
2.25.1
From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Wednesday, November 16, 2022 7:28 PM
>
> When Linux runs as a root partition for Microsoft Hypervisor. It is
> possible to detect if it is running as nested hypervisor using
> hints exposed by mshv. While at it expose a new variable called
> hv_nested which can be used later for making decisions specific to
> nested use case.
Make the commit statement a bit more direct, and avoid equivocating
words like "possible" and "can be" when there isn't anything that is
doubtful. Here is my suggestion:
Detect if Linux is running as a nested hypervisor in the root partition
for Microsoft Hypervisor, using flags provided by MSHV. Expose a new
variable hv_nested that is used later for decisions specific to the
nested use case.
>
> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
> ---
> arch/arm64/hyperv/mshyperv.c | 6 ++++++
> arch/x86/include/asm/hyperv-tlfs.h | 3 +++
> arch/x86/kernel/cpu/mshyperv.c | 7 +++++++
> drivers/hv/hv_common.c | 7 +++++--
> include/asm-generic/mshyperv.h | 1 +
> 5 files changed, 22 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c
> index a406454578f0..2024b19dc514 100644
> --- a/arch/arm64/hyperv/mshyperv.c
> +++ b/arch/arm64/hyperv/mshyperv.c
> @@ -19,6 +19,9 @@
>
> static bool hyperv_initialized;
>
> +/* Is Linux running on nested Microsoft Hypervisor */
> +bool hv_nested;
> +
> static int __init hyperv_init(void)
> {
> struct hv_get_vp_registers_output result;
> @@ -63,6 +66,9 @@ static int __init hyperv_init(void)
> pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n",
> b >> 16, b & 0xFFFF, a, d & 0xFFFFFF, c, d >> 24);
>
> + /* ARM64 does not support nested virtualization */
> + hv_nested = false;
> +
> ret = hv_common_init();
> if (ret)
> return ret;
The above ARM64 additions aren't needed. An architecture that works
with the default value of "0" (i.e., "false") doesn't have to do anything
as it uses the version in hv_common.c. While explicitly coding it on
the ARM64 side doesn't break anything, one of intentions of the __weak
approach is that we don't have to update the ARM64 side every time
we add something that is x86 only. To avoid irrelevant clutter on the
ARM64 side, the preference is to *not* add such code. Similarly,
you'll notice that the ARM64 code doesn't initialize hv_root_partition.
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 6d9368ea3701..58c03d18c235 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -114,6 +114,9 @@
> /* Recommend using the newer ExProcessorMasks interface */
> #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
>
> +/* Indicates that the hypervisor is nested within a Hyper-V partition. */
> +#define HV_X64_HYPERV_NESTED BIT(12)
> +
> /* Recommend using enlightened VMCS */
> #define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
>
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 831613959a92..9a4204139490 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -37,6 +37,8 @@
>
> /* Is Linux running as the root partition? */
> bool hv_root_partition;
> +/* Is Linux running on nested Microsoft Hypervisor */
> +bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> #if IS_ENABLED(CONFIG_HYPERV)
> @@ -301,6 +303,11 @@ static void __init ms_hyperv_init_platform(void)
> pr_info("Hyper-V: running as root partition\n");
> }
>
> + if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
> + hv_nested = true;
> + pr_info("Hyper-V: running on a nested hypervisor\n");
> + }
> +
> /*
> * Extract host information.
> */
> diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
> index ae68298c0dca..dcb336ce374f 100644
> --- a/drivers/hv/hv_common.c
> +++ b/drivers/hv/hv_common.c
> @@ -25,8 +25,8 @@
> #include <asm/mshyperv.h>
>
> /*
> - * hv_root_partition and ms_hyperv are defined here with other Hyper-V
> - * specific globals so they are shared across all architectures and are
> + * hv_root_partition, ms_hyperv and hv_nested are defined here with other
> + * Hyper-V specific globals so they are shared across all architectures and are
> * built only when CONFIG_HYPERV is defined. But on x86,
> * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
> * defined, and it uses these two variables. So mark them as __weak
s/two/three/ (since we now have three such variables)
> @@ -36,6 +36,9 @@
> bool __weak hv_root_partition;
> EXPORT_SYMBOL_GPL(hv_root_partition);
>
> +bool __weak hv_nested;
> +EXPORT_SYMBOL_GPL(hv_nested);
> +
> struct ms_hyperv_info __weak ms_hyperv;
> EXPORT_SYMBOL_GPL(ms_hyperv);
>
> diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
> index bfb9eb9d7215..5df6e944e6a9 100644
> --- a/include/asm-generic/mshyperv.h
> +++ b/include/asm-generic/mshyperv.h
> @@ -164,6 +164,7 @@ extern int vmbus_interrupt;
> extern int vmbus_irq;
>
> extern bool hv_root_partition;
> +extern bool hv_nested;
>
> #if IS_ENABLED(CONFIG_HYPERV)
> /*
> --
> 2.25.1
Child partitions are free to allocate SynIC message and event page but in
case of root partition it must use the pages allocated by Microsoft
Hypervisor (MSHV). Base address for these pages can be found using
synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
for nested vs non-nested root partition.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 11 +++++++
arch/x86/include/asm/mshyperv.h | 26 ++--------------
arch/x86/kernel/cpu/mshyperv.c | 49 ++++++++++++++++++++++++++++++
drivers/hv/hv.c | 18 ++++++++---
4 files changed, 75 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 58c03d18c235..b5019becb618 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -225,6 +225,17 @@ enum hv_isolation_type {
#define HV_REGISTER_SINT14 0x4000009E
#define HV_REGISTER_SINT15 0x4000009F
+/*
+ * Define synthetic interrupt controller model specific registers for
+ * nested hypervisor.
+ */
+#define HV_REGISTER_NESTED_SCONTROL 0x40001080
+#define HV_REGISTER_NESTED_SVERSION 0x40001081
+#define HV_REGISTER_NESTED_SIEFP 0x40001082
+#define HV_REGISTER_NESTED_SIMP 0x40001083
+#define HV_REGISTER_NESTED_EOM 0x40001084
+#define HV_REGISTER_NESTED_SINT0 0x40001090
+
/*
* Synthetic Timer MSRs. Four timers per vcpu.
*/
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 61f0c206bff0..326d699b30d5 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -198,30 +198,8 @@ static inline bool hv_is_synic_reg(unsigned int reg)
return false;
}
-static inline u64 hv_get_register(unsigned int reg)
-{
- u64 value;
-
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
- hv_ghcb_msr_read(reg, &value);
- else
- rdmsrl(reg, value);
- return value;
-}
-
-static inline void hv_set_register(unsigned int reg, u64 value)
-{
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
- hv_ghcb_msr_write(reg, value);
-
- /* Write proxy bit via wrmsl instruction */
- if (reg >= HV_REGISTER_SINT0 &&
- reg <= HV_REGISTER_SINT15)
- wrmsrl(reg, value | 1 << 20);
- } else {
- wrmsrl(reg, value);
- }
-}
+u64 hv_get_register(unsigned int reg);
+void hv_set_register(unsigned int reg, u64 value);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 9a4204139490..3e6711a6af6b 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -41,6 +41,55 @@ bool hv_root_partition;
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
+static inline unsigned int hv_get_nested_reg(unsigned int reg)
+{
+ switch (reg) {
+ case HV_REGISTER_SIMP:
+ return HV_REGISTER_NESTED_SIMP;
+ case HV_REGISTER_NESTED_SIEFP:
+ return HV_REGISTER_SIEFP;
+ case HV_REGISTER_SCONTROL:
+ return HV_REGISTER_NESTED_SCONTROL;
+ case HV_REGISTER_SINT0:
+ return HV_REGISTER_NESTED_SINT0;
+ case HV_REGISTER_EOM:
+ return HV_REGISTER_NESTED_EOM;
+ default:
+ return reg;
+ }
+}
+
+inline u64 hv_get_register(unsigned int reg)
+{
+ u64 value;
+
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
+ hv_ghcb_msr_read(reg, &value);
+ else
+ rdmsrl(reg, value);
+ return value;
+}
+
+inline void hv_set_register(unsigned int reg, u64 value)
+{
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
+ hv_ghcb_msr_write(reg, value);
+
+ /* Write proxy bit via wrmsl instruction */
+ if (reg >= HV_REGISTER_SINT0 &&
+ reg <= HV_REGISTER_SINT15)
+ wrmsrl(reg, value | 1 << 20);
+ } else {
+ wrmsrl(reg, value);
+ }
+}
+
#if IS_ENABLED(CONFIG_HYPERV)
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 4d6480d57546..9e1eb50cc76f 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -147,7 +147,7 @@ int hv_synic_alloc(void)
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
- if (!hv_isolation_type_snp()) {
+ if (!hv_isolation_type_snp() && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
@@ -188,8 +188,16 @@ void hv_synic_free(void)
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- free_page((unsigned long)hv_cpu->synic_event_page);
- free_page((unsigned long)hv_cpu->synic_message_page);
+ if (hv_root_partition) {
+ if (hv_cpu->synic_event_page != NULL)
+ memunmap(hv_cpu->synic_event_page);
+
+ if (hv_cpu->synic_message_page != NULL)
+ memunmap(hv_cpu->synic_message_page);
+ } else {
+ free_page((unsigned long)hv_cpu->synic_event_page);
+ free_page((unsigned long)hv_cpu->synic_message_page);
+ }
free_page((unsigned long)hv_cpu->post_msg_page);
}
@@ -216,7 +224,7 @@ void hv_synic_enable_regs(unsigned int cpu)
simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
simp.simp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_message_page
= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -233,7 +241,7 @@ void hv_synic_enable_regs(unsigned int cpu)
siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_event_page =
memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
--
2.25.1
On 11/16/2022 7:27 PM, Jinank Jain wrote:
> Child partitions are free to allocate SynIC message and event page but in
> case of root partition it must use the pages allocated by Microsoft
> Hypervisor (MSHV). Base address for these pages can be found using
> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
> for nested vs non-nested root partition.
>
> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 11 +++++++
> arch/x86/include/asm/mshyperv.h | 26 ++--------------
> arch/x86/kernel/cpu/mshyperv.c | 49 ++++++++++++++++++++++++++++++
> drivers/hv/hv.c | 18 ++++++++---
> 4 files changed, 75 insertions(+), 29 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 58c03d18c235..b5019becb618 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -225,6 +225,17 @@ enum hv_isolation_type {
> #define HV_REGISTER_SINT14 0x4000009E
> #define HV_REGISTER_SINT15 0x4000009F
>
> +/*
> + * Define synthetic interrupt controller model specific registers for
> + * nested hypervisor.
> + */
> +#define HV_REGISTER_NESTED_SCONTROL 0x40001080
> +#define HV_REGISTER_NESTED_SVERSION 0x40001081
> +#define HV_REGISTER_NESTED_SIEFP 0x40001082
> +#define HV_REGISTER_NESTED_SIMP 0x40001083
> +#define HV_REGISTER_NESTED_EOM 0x40001084
> +#define HV_REGISTER_NESTED_SINT0 0x40001090
> +
> /*
> * Synthetic Timer MSRs. Four timers per vcpu.
> */
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 61f0c206bff0..326d699b30d5 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -198,30 +198,8 @@ static inline bool hv_is_synic_reg(unsigned int reg)
> return false;
> }
>
> -static inline u64 hv_get_register(unsigned int reg)
> -{
> - u64 value;
> -
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> - hv_ghcb_msr_read(reg, &value);
> - else
> - rdmsrl(reg, value);
> - return value;
> -}
> -
> -static inline void hv_set_register(unsigned int reg, u64 value)
> -{
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> - hv_ghcb_msr_write(reg, value);
> -
> - /* Write proxy bit via wrmsl instruction */
> - if (reg >= HV_REGISTER_SINT0 &&
> - reg <= HV_REGISTER_SINT15)
> - wrmsrl(reg, value | 1 << 20);
> - } else {
> - wrmsrl(reg, value);
> - }
> -}
> +u64 hv_get_register(unsigned int reg);
> +void hv_set_register(unsigned int reg, u64 value);
>
> #else /* CONFIG_HYPERV */
> static inline void hyperv_init(void) {}
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 9a4204139490..3e6711a6af6b 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -41,6 +41,55 @@ bool hv_root_partition;
> bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
> +{
> + switch (reg) {
> + case HV_REGISTER_SIMP:
> + return HV_REGISTER_NESTED_SIMP;
> + case HV_REGISTER_NESTED_SIEFP:
> + return HV_REGISTER_SIEFP;
> + case HV_REGISTER_SCONTROL:
> + return HV_REGISTER_NESTED_SCONTROL;
> + case HV_REGISTER_SINT0:
> + return HV_REGISTER_NESTED_SINT0;
> + case HV_REGISTER_EOM:
> + return HV_REGISTER_NESTED_EOM;
> + default:
> + return reg;
> + }
> +}
> +
> +inline u64 hv_get_register(unsigned int reg)
> +{
> + u64 value;
> +
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> + hv_ghcb_msr_read(reg, &value);
> + else
> + rdmsrl(reg, value);
> + return value;
> +}
> +
> +inline void hv_set_register(unsigned int reg, u64 value)
> +{
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> + hv_ghcb_msr_write(reg, value);
> +
> + /* Write proxy bit via wrmsl instruction */
> + if (reg >= HV_REGISTER_SINT0 &&
> + reg <= HV_REGISTER_SINT15)
> + wrmsrl(reg, value | 1 << 20);
> + } else {
> + wrmsrl(reg, value);
> + }
> +}
This approach has a problem, in that it removes the interface for getting and
setting the non-nested SIMP etc...
We will need to use the non-nested SIMP for getting intercepts in the root
partition from the L1 hypervisor.
Nuno
From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Wednesday, November 16, 2022 7:28 PM
>
> Child partitions are free to allocate SynIC message and event page but in
> case of root partition it must use the pages allocated by Microsoft
> Hypervisor (MSHV). Base address for these pages can be found using
> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
> for nested vs non-nested root partition.
>
> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 11 +++++++
> arch/x86/include/asm/mshyperv.h | 26 ++--------------
> arch/x86/kernel/cpu/mshyperv.c | 49 ++++++++++++++++++++++++++++++
> drivers/hv/hv.c | 18 ++++++++---
> 4 files changed, 75 insertions(+), 29 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 58c03d18c235..b5019becb618 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -225,6 +225,17 @@ enum hv_isolation_type {
> #define HV_REGISTER_SINT14 0x4000009E
> #define HV_REGISTER_SINT15 0x4000009F
>
> +/*
> + * Define synthetic interrupt controller model specific registers for
> + * nested hypervisor.
> + */
> +#define HV_REGISTER_NESTED_SCONTROL 0x40001080
> +#define HV_REGISTER_NESTED_SVERSION 0x40001081
> +#define HV_REGISTER_NESTED_SIEFP 0x40001082
> +#define HV_REGISTER_NESTED_SIMP 0x40001083
> +#define HV_REGISTER_NESTED_EOM 0x40001084
> +#define HV_REGISTER_NESTED_SINT0 0x40001090
> +
> /*
> * Synthetic Timer MSRs. Four timers per vcpu.
> */
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 61f0c206bff0..326d699b30d5 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -198,30 +198,8 @@ static inline bool hv_is_synic_reg(unsigned int reg)
> return false;
> }
>
> -static inline u64 hv_get_register(unsigned int reg)
> -{
> - u64 value;
> -
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> - hv_ghcb_msr_read(reg, &value);
> - else
> - rdmsrl(reg, value);
> - return value;
> -}
> -
> -static inline void hv_set_register(unsigned int reg, u64 value)
> -{
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> - hv_ghcb_msr_write(reg, value);
> -
> - /* Write proxy bit via wrmsl instruction */
> - if (reg >= HV_REGISTER_SINT0 &&
> - reg <= HV_REGISTER_SINT15)
> - wrmsrl(reg, value | 1 << 20);
> - } else {
> - wrmsrl(reg, value);
> - }
> -}
> +u64 hv_get_register(unsigned int reg);
> +void hv_set_register(unsigned int reg, u64 value);
>
> #else /* CONFIG_HYPERV */
> static inline void hyperv_init(void) {}
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 9a4204139490..3e6711a6af6b 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -41,6 +41,55 @@ bool hv_root_partition;
> bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
> +{
> + switch (reg) {
> + case HV_REGISTER_SIMP:
> + return HV_REGISTER_NESTED_SIMP;
> + case HV_REGISTER_NESTED_SIEFP:
> + return HV_REGISTER_SIEFP;
> + case HV_REGISTER_SCONTROL:
> + return HV_REGISTER_NESTED_SCONTROL;
> + case HV_REGISTER_SINT0:
> + return HV_REGISTER_NESTED_SINT0;
> + case HV_REGISTER_EOM:
> + return HV_REGISTER_NESTED_EOM;
> + default:
> + return reg;
> + }
> +}
> +
> +inline u64 hv_get_register(unsigned int reg)
I don't think "inline" here does anything. There aren't any invocations
of hv_get_register() in this module. Previously, when the code was in
an include file, then "inline" would do something.
But I'm not an expert in compilers/linkers, so maybe I'm wrong. :-)
> +{
> + u64 value;
> +
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> + hv_ghcb_msr_read(reg, &value);
> + else
> + rdmsrl(reg, value);
> + return value;
> +}
> +
> +inline void hv_set_register(unsigned int reg, u64 value)
Same here.
> +{
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> + hv_ghcb_msr_write(reg, value);
> +
> + /* Write proxy bit via wrmsl instruction */
> + if (reg >= HV_REGISTER_SINT0 &&
> + reg <= HV_REGISTER_SINT15)
> + wrmsrl(reg, value | 1 << 20);
> + } else {
> + wrmsrl(reg, value);
> + }
> +}
> +
> #if IS_ENABLED(CONFIG_HYPERV)
> static void (*vmbus_handler)(void);
> static void (*hv_stimer0_handler)(void);
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 4d6480d57546..9e1eb50cc76f 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -147,7 +147,7 @@ int hv_synic_alloc(void)
> * Synic message and event pages are allocated by paravisor.
> * Skip these pages allocation here.
> */
> - if (!hv_isolation_type_snp()) {
> + if (!hv_isolation_type_snp() && !hv_root_partition) {
> hv_cpu->synic_message_page =
> (void *)get_zeroed_page(GFP_ATOMIC);
> if (hv_cpu->synic_message_page == NULL) {
> @@ -188,8 +188,16 @@ void hv_synic_free(void)
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
>
> - free_page((unsigned long)hv_cpu->synic_event_page);
> - free_page((unsigned long)hv_cpu->synic_message_page);
> + if (hv_root_partition) {
> + if (hv_cpu->synic_event_page != NULL)
> + memunmap(hv_cpu->synic_event_page);
> +
> + if (hv_cpu->synic_message_page != NULL)
> + memunmap(hv_cpu->synic_message_page);
> + } else {
> + free_page((unsigned long)hv_cpu->synic_event_page);
> + free_page((unsigned long)hv_cpu->synic_message_page);
> + }
> free_page((unsigned long)hv_cpu->post_msg_page);
> }
>
> @@ -216,7 +224,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> simp.simp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_message_page
> = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> @@ -233,7 +241,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> siefp.siefp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_event_page =
> memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> --
> 2.25.1
According to TLFS, in order to communicate to L0 hypervisor there needs
to be an additional bit set in the control register. This communication
is required to perform priviledged instructions which can only be
performed by L0 hypervisor. An example of that could be setting up the
VMBus infrastructure.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 3 ++-
arch/x86/include/asm/mshyperv.h | 42 +++++++++++++++++++++++++++---
include/asm-generic/hyperv-tlfs.h | 1 +
3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index b5019becb618..7758c495541d 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -380,7 +380,8 @@ struct hv_nested_enlightenments_control {
__u32 reserved:31;
} features;
struct {
- __u32 reserved;
+ __u32 inter_partition_comm:1;
+ __u32 reserved:31;
} hypercallControls;
} __packed;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 326d699b30d5..42e42cea0384 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -74,10 +74,16 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_status;
}
+/* Hypercall to the L0 hypervisor */
+static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output)
+{
+ return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output);
+}
+
/* Fast hypercall with 8 bytes of input and no output */
-static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+static inline u64 _hv_do_fast_hypercall8(u64 control, u16 code, u64 input1)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -105,10 +111,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall8(control, code, input1);
+}
+
+static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall8(control, code, input1);
+}
+
/* Fast hypercall with 16 bytes of input */
-static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+static inline u64 _hv_do_fast_hypercall16(u64 control, u16 code, u64 input1, u64 input2)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -139,6 +159,20 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall16(control, code, input1, input2);
+}
+
+static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall16(control, code, input1, input2);
+}
+
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index b17c6eeb9afa..e61ee461c4fc 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -194,6 +194,7 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_HYPERCALL_VARHEAD_OFFSET 17
#define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17)
#define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27)
+#define HV_HYPERCALL_NESTED BIT_ULL(31)
#define HV_HYPERCALL_REP_COMP_OFFSET 32
#define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32)
#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
--
2.25.1
From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Wednesday, November 16, 2022 7:28 PM
>
> According to TLFS, in order to communicate to L0 hypervisor there needs
> to be an additional bit set in the control register. This communication
> is required to perform priviledged instructions which can only be
s/priviledged/privileged/
> performed by L0 hypervisor. An example of that could be setting up the
> VMBus infrastructure.
>
> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 3 ++-
> arch/x86/include/asm/mshyperv.h | 42 +++++++++++++++++++++++++++---
> include/asm-generic/hyperv-tlfs.h | 1 +
> 3 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index b5019becb618..7758c495541d 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -380,7 +380,8 @@ struct hv_nested_enlightenments_control {
> __u32 reserved:31;
> } features;
> struct {
> - __u32 reserved;
> + __u32 inter_partition_comm:1;
> + __u32 reserved:31;
> } hypercallControls;
> } __packed;
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 326d699b30d5..42e42cea0384 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -74,10 +74,16 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void
> *output)
> return hv_status;
> }
>
> +/* Hypercall to the L0 hypervisor */
> +static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output)
> +{
> + return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output);
> +}
> +
> /* Fast hypercall with 8 bytes of input and no output */
> -static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> +static inline u64 _hv_do_fast_hypercall8(u64 control, u16 code, u64 input1)
> {
> - u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> + u64 hv_status;
>
> #ifdef CONFIG_X86_64
> {
> @@ -105,10 +111,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> return hv_status;
> }
>
> +static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
> +
> + return _hv_do_fast_hypercall8(control, code, input1);
> +}
> +
> +static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT |
> HV_HYPERCALL_NESTED;
> +
> + return _hv_do_fast_hypercall8(control, code, input1);
> +}
> +
> /* Fast hypercall with 16 bytes of input */
> -static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> +static inline u64 _hv_do_fast_hypercall16(u64 control, u16 code, u64 input1, u64
> input2)
> {
> - u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> + u64 hv_status;
>
> #ifdef CONFIG_X86_64
> {
> @@ -139,6 +159,20 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1,
> u64 input2)
> return hv_status;
> }
>
> +static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
> +
> + return _hv_do_fast_hypercall16(control, code, input1, input2);
> +}
> +
> +static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT |
> HV_HYPERCALL_NESTED;
> +
> + return _hv_do_fast_hypercall16(control, code, input1, input2);
> +}
> +
> extern struct hv_vp_assist_page **hv_vp_assist_page;
>
> static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index b17c6eeb9afa..e61ee461c4fc 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -194,6 +194,7 @@ enum HV_GENERIC_SET_FORMAT {
> #define HV_HYPERCALL_VARHEAD_OFFSET 17
> #define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17)
> #define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27)
> +#define HV_HYPERCALL_NESTED BIT_ULL(31)
> #define HV_HYPERCALL_REP_COMP_OFFSET 32
> #define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32)
> #define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
> --
> 2.25.1
Currently VMBus driver is not initialized for root partition but we need
to enable the VMBus driver for nested root partition. This is required,
so that L2 root can use the VMBus devices.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
drivers/hv/vmbus_drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index db00d20c726d..0937877eade9 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2744,7 +2744,7 @@ static int __init hv_acpi_init(void)
if (!hv_is_hyperv_initialized())
return -ENODEV;
- if (hv_root_partition)
+ if (hv_root_partition && !hv_nested)
return 0;
/*
--
2.25.1
From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Wednesday, November 16, 2022 7:28 PM > > Currently VMBus driver is not initialized for root partition but we need > to enable the VMBus driver for nested root partition. This is required, > so that L2 root can use the VMBus devices. > > Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com> > --- > drivers/hv/vmbus_drv.c | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c > index db00d20c726d..0937877eade9 100644 > --- a/drivers/hv/vmbus_drv.c > +++ b/drivers/hv/vmbus_drv.c > @@ -2744,7 +2744,7 @@ static int __init hv_acpi_init(void) > if (!hv_is_hyperv_initialized()) > return -ENODEV; > > - if (hv_root_partition) > + if (hv_root_partition && !hv_nested) > return 0; > > /* > -- > 2.25.1 Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Traditionally we have been using the HYPERVISOR_CALLBACK_VECTOR to relay
the VMBus interrupt. But this does not work in case of nested
hypervisor. Microsoft Hypervisor reserves 0x31 to 0x34 as the interrupt
vector range for VMBus and thus we have to use one of the vectors from
that range and setup the IDT accordingly.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/idtentry.h | 2 ++
arch/x86/include/asm/irq_vectors.h | 6 ++++++
arch/x86/kernel/cpu/mshyperv.c | 15 +++++++++++++++
arch/x86/kernel/idt.c | 9 +++++++++
drivers/hv/vmbus_drv.c | 3 ++-
5 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 72184b0b2219..c0648e3e4d4a 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -686,6 +686,8 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_INTR_NESTED_VMBUS_VECTOR,
+ sysvec_hyperv_nested_vmbus_intr);
#endif
#if IS_ENABLED(CONFIG_ACRN_GUEST)
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 43dcb9284208..729d19eab7f5 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,6 +102,12 @@
#if IS_ENABLED(CONFIG_HYPERV)
#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
#define HYPERV_STIMER0_VECTOR 0xed
+/*
+ * FIXME: Change this, once Microsoft Hypervisor changes its assumption
+ * around VMBus interrupt vector allocation for nested root partition.
+ * Or provides a better interface to detect this instead of hardcoding.
+ */
+#define HYPERV_INTR_NESTED_VMBUS_VECTOR 0x31
#endif
#define LOCAL_TIMER_VECTOR 0xec
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 3e6711a6af6b..ec7fef43e03b 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -110,6 +110,21 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
set_irq_regs(old_regs);
}
+DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_nested_vmbus_intr)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (vmbus_handler)
+ vmbus_handler();
+
+ if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
+ ack_APIC_irq();
+
+ set_irq_regs(old_regs);
+}
+
void hv_setup_vmbus_handler(void (*handler)(void))
{
vmbus_handler = handler;
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index a58c6bc1cd68..ace648856a0b 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -160,6 +160,15 @@ static const __initconst struct idt_data apic_idts[] = {
# endif
INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
+#ifdef CONFIG_HYPERV
+ /*
+ * This is a hack because we cannot install this interrupt handler via alloc_intr_gate
+ * as it does not allow interrupt vector less than FIRST_SYSTEM_VECTORS. And hyperv
+ * does not want anything other than 0x31-0x34 as the interrupt vector for vmbus
+ * interrupt in case of nested setup.
+ */
+ INTG(HYPERV_INTR_NESTED_VMBUS_VECTOR, asm_sysvec_hyperv_nested_vmbus_intr),
+#endif
#endif
};
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 0937877eade9..c1477f3a08dd 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2767,7 +2767,8 @@ static int __init hv_acpi_init(void)
* normal Linux IRQ mechanism is not used in this case.
*/
#ifdef HYPERVISOR_CALLBACK_VECTOR
- vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR;
+ vmbus_interrupt = hv_nested ? HYPERV_INTR_NESTED_VMBUS_VECTOR :
+ HYPERVISOR_CALLBACK_VECTOR;
vmbus_irq = -1;
#endif
--
2.25.1
From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Wednesday, November 16, 2022 7:28 PM
>
> Traditionally we have been using the HYPERVISOR_CALLBACK_VECTOR to relay
> the VMBus interrupt. But this does not work in case of nested
> hypervisor. Microsoft Hypervisor reserves 0x31 to 0x34 as the interrupt
> vector range for VMBus and thus we have to use one of the vectors from
> that range and setup the IDT accordingly.
>
> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
> ---
> arch/x86/include/asm/idtentry.h | 2 ++
> arch/x86/include/asm/irq_vectors.h | 6 ++++++
> arch/x86/kernel/cpu/mshyperv.c | 15 +++++++++++++++
> arch/x86/kernel/idt.c | 9 +++++++++
> drivers/hv/vmbus_drv.c | 3 ++-
> 5 files changed, 34 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
> index 72184b0b2219..c0648e3e4d4a 100644
> --- a/arch/x86/include/asm/idtentry.h
> +++ b/arch/x86/include/asm/idtentry.h
> @@ -686,6 +686,8 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR,
> sysvec_kvm_posted_intr_nested
> DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR,
> sysvec_hyperv_callback);
> DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR,
> sysvec_hyperv_reenlightenment);
> DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR,
> sysvec_hyperv_stimer0);
> +DECLARE_IDTENTRY_SYSVEC(HYPERV_INTR_NESTED_VMBUS_VECTOR,
> + sysvec_hyperv_nested_vmbus_intr);
> #endif
>
> #if IS_ENABLED(CONFIG_ACRN_GUEST)
> diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
> index 43dcb9284208..729d19eab7f5 100644
> --- a/arch/x86/include/asm/irq_vectors.h
> +++ b/arch/x86/include/asm/irq_vectors.h
> @@ -102,6 +102,12 @@
> #if IS_ENABLED(CONFIG_HYPERV)
> #define HYPERV_REENLIGHTENMENT_VECTOR 0xee
> #define HYPERV_STIMER0_VECTOR 0xed
> +/*
> + * FIXME: Change this, once Microsoft Hypervisor changes its assumption
> + * around VMBus interrupt vector allocation for nested root partition.
> + * Or provides a better interface to detect this instead of hardcoding.
> + */
> +#define HYPERV_INTR_NESTED_VMBUS_VECTOR 0x31
> #endif
>
> #define LOCAL_TIMER_VECTOR 0xec
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index 3e6711a6af6b..ec7fef43e03b 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -110,6 +110,21 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
> set_irq_regs(old_regs);
> }
>
> +DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_nested_vmbus_intr)
> +{
> + struct pt_regs *old_regs = set_irq_regs(regs);
> +
> + inc_irq_stat(irq_hv_callback_count);
> +
> + if (vmbus_handler)
> + vmbus_handler();
> +
> + if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
> + ack_APIC_irq();
> +
> + set_irq_regs(old_regs);
> +}
> +
> void hv_setup_vmbus_handler(void (*handler)(void))
> {
> vmbus_handler = handler;
> diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
> index a58c6bc1cd68..ace648856a0b 100644
> --- a/arch/x86/kernel/idt.c
> +++ b/arch/x86/kernel/idt.c
> @@ -160,6 +160,15 @@ static const __initconst struct idt_data apic_idts[] = {
> # endif
> INTG(SPURIOUS_APIC_VECTOR,
> asm_sysvec_spurious_apic_interrupt),
> INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
> +#ifdef CONFIG_HYPERV
> + /*
> + * This is a hack because we cannot install this interrupt handler via alloc_intr_gate
> + * as it does not allow interrupt vector less than FIRST_SYSTEM_VECTORS. And hyperv
> + * does not want anything other than 0x31-0x34 as the interrupt vector for vmbus
> + * interrupt in case of nested setup.
> + */
> + INTG(HYPERV_INTR_NESTED_VMBUS_VECTOR,
> asm_sysvec_hyperv_nested_vmbus_intr),
> +#endif
> #endif
> };
>
> diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
> index 0937877eade9..c1477f3a08dd 100644
> --- a/drivers/hv/vmbus_drv.c
> +++ b/drivers/hv/vmbus_drv.c
> @@ -2767,7 +2767,8 @@ static int __init hv_acpi_init(void)
> * normal Linux IRQ mechanism is not used in this case.
> */
> #ifdef HYPERVISOR_CALLBACK_VECTOR
> - vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR;
> + vmbus_interrupt = hv_nested ? HYPERV_INTR_NESTED_VMBUS_VECTOR :
> + HYPERVISOR_CALLBACK_VECTOR;
> vmbus_irq = -1;
> #endif
>
> --
> 2.25.1
Given the backdrop that this is a real hack due to MSHV limitations,
this looks OK to me. But someone on the x86 side will have to weigh
in on whether this will break anything in x86 vector management.
A somewhat dubious,
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
This patch series plans to add support for running nested Microsoft Hypervisor. In case of nested Microsoft Hypervisor there are few privileged hypercalls which need to go L0 Hypervisor instead of L1 Hypervisor. This patches series basically identifies such hypercalls and replace them with nested hypercalls. Jinank Jain (5): x86/hyperv: Add support for detecting nested hypervisor Drivers: hv: Setup synic registers in case of nested root partition x86/hyperv: Add an interface to do nested hypercalls Drivers: hv: Enable vmbus driver for nested root partition x86/hyperv: Change interrupt vector for nested root partition [v4] - Fix ARM64 compilation [v5] - Fix comments from Michael Kelly [v6] - Send the correct patches from the right folder [v7] - Fix linker issues for CONFIG_HYPERV=n pointed out by Michael - Fix comments from Nuno: created two separate functions for fetching nested vs non-nested registers. [v8] - Refactor as per the recommendation from Michael Kelly [v9] - Address comments from Michael and Nuno. arch/x86/include/asm/hyperv-tlfs.h | 17 +++++- arch/x86/include/asm/idtentry.h | 2 + arch/x86/include/asm/irq_vectors.h | 6 +++ arch/x86/include/asm/mshyperv.h | 72 +++++++++++++++---------- arch/x86/kernel/cpu/mshyperv.c | 87 ++++++++++++++++++++++++++++++ arch/x86/kernel/idt.c | 10 ++++ drivers/hv/hv.c | 19 +++++-- drivers/hv/hv_common.c | 9 ++-- drivers/hv/vmbus_drv.c | 5 +- include/asm-generic/hyperv-tlfs.h | 1 + include/asm-generic/mshyperv.h | 1 + 11 files changed, 190 insertions(+), 39 deletions(-) -- 2.25.1
Detect if Linux is running as a nested hypervisor in the root
partition for Microsoft Hypervisor, using flags provided by MSHV.
Expose a new variable hv_nested that is used later for decisions
specific to the nested use case.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 3 +++
arch/x86/kernel/cpu/mshyperv.c | 7 +++++++
drivers/hv/hv_common.c | 9 ++++++---
include/asm-generic/mshyperv.h | 1 +
4 files changed, 17 insertions(+), 3 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 6d9368ea3701..58c03d18c235 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -114,6 +114,9 @@
/* Recommend using the newer ExProcessorMasks interface */
#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED BIT(11)
+/* Indicates that the hypervisor is nested within a Hyper-V partition. */
+#define HV_X64_HYPERV_NESTED BIT(12)
+
/* Recommend using enlightened VMCS */
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 46668e255421..f9b78d4829e3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -37,6 +37,8 @@
/* Is Linux running as the root partition? */
bool hv_root_partition;
+/* Is Linux running on nested Microsoft Hypervisor */
+bool hv_nested;
struct ms_hyperv_info ms_hyperv;
#if IS_ENABLED(CONFIG_HYPERV)
@@ -301,6 +303,11 @@ static void __init ms_hyperv_init_platform(void)
pr_info("Hyper-V: running as root partition\n");
}
+ if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
+ hv_nested = true;
+ pr_info("Hyper-V: running on a nested hypervisor\n");
+ }
+
/*
* Extract host information.
*/
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index ae68298c0dca..52a6f89ccdbd 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -25,17 +25,20 @@
#include <asm/mshyperv.h>
/*
- * hv_root_partition and ms_hyperv are defined here with other Hyper-V
- * specific globals so they are shared across all architectures and are
+ * hv_root_partition, ms_hyperv and hv_nested are defined here with other
+ * Hyper-V specific globals so they are shared across all architectures and are
* built only when CONFIG_HYPERV is defined. But on x86,
* ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
- * defined, and it uses these two variables. So mark them as __weak
+ * defined, and it uses these three variables. So mark them as __weak
* here, allowing for an overriding definition in the module containing
* ms_hyperv_init_platform().
*/
bool __weak hv_root_partition;
EXPORT_SYMBOL_GPL(hv_root_partition);
+bool __weak hv_nested;
+EXPORT_SYMBOL_GPL(hv_nested);
+
struct ms_hyperv_info __weak ms_hyperv;
EXPORT_SYMBOL_GPL(ms_hyperv);
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index bfb9eb9d7215..f131027830c3 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -48,6 +48,7 @@ struct ms_hyperv_info {
u64 shared_gpa_boundary;
};
extern struct ms_hyperv_info ms_hyperv;
+extern bool hv_nested;
extern void * __percpu *hyperv_pcpu_input_arg;
extern void * __percpu *hyperv_pcpu_output_arg;
--
2.25.1
Child partitions are free to allocate SynIC message and event page but in
case of root partition it must use the pages allocated by Microsoft
Hypervisor (MSHV). Base address for these pages can be found using
synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
for nested vs non-nested root partition.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 11 +++++
arch/x86/include/asm/mshyperv.h | 30 +++-----------
arch/x86/kernel/cpu/mshyperv.c | 65 ++++++++++++++++++++++++++++++
drivers/hv/hv.c | 19 ++++++---
4 files changed, 96 insertions(+), 29 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index 58c03d18c235..b5019becb618 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -225,6 +225,17 @@ enum hv_isolation_type {
#define HV_REGISTER_SINT14 0x4000009E
#define HV_REGISTER_SINT15 0x4000009F
+/*
+ * Define synthetic interrupt controller model specific registers for
+ * nested hypervisor.
+ */
+#define HV_REGISTER_NESTED_SCONTROL 0x40001080
+#define HV_REGISTER_NESTED_SVERSION 0x40001081
+#define HV_REGISTER_NESTED_SIEFP 0x40001082
+#define HV_REGISTER_NESTED_SIMP 0x40001083
+#define HV_REGISTER_NESTED_EOM 0x40001084
+#define HV_REGISTER_NESTED_SINT0 0x40001090
+
/*
* Synthetic Timer MSRs. Four timers per vcpu.
*/
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 61f0c206bff0..c38e4c66a3ac 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
return false;
}
-static inline u64 hv_get_register(unsigned int reg)
-{
- u64 value;
-
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
- hv_ghcb_msr_read(reg, &value);
- else
- rdmsrl(reg, value);
- return value;
-}
-
-static inline void hv_set_register(unsigned int reg, u64 value)
-{
- if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
- hv_ghcb_msr_write(reg, value);
-
- /* Write proxy bit via wrmsl instruction */
- if (reg >= HV_REGISTER_SINT0 &&
- reg <= HV_REGISTER_SINT15)
- wrmsrl(reg, value | 1 << 20);
- } else {
- wrmsrl(reg, value);
- }
-}
+u64 hv_get_register(unsigned int reg);
+void hv_set_register(unsigned int reg, u64 value);
+u64 hv_get_non_nested_register(unsigned int reg);
+void hv_set_non_nested_register(unsigned int reg, u64 value);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
@@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
}
static inline void hv_set_register(unsigned int reg, u64 value) { }
static inline u64 hv_get_register(unsigned int reg) { return 0; }
+static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { }
+static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
bool visible)
{
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f9b78d4829e3..938fc82edf05 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -41,7 +41,72 @@ bool hv_root_partition;
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
+static inline unsigned int hv_get_nested_reg(unsigned int reg)
+{
+ switch (reg) {
+ case HV_REGISTER_SIMP:
+ return HV_REGISTER_NESTED_SIMP;
+ case HV_REGISTER_SIEFP:
+ return HV_REGISTER_NESTED_SIEFP;
+ case HV_REGISTER_SVERSION:
+ return HV_REGISTER_NESTED_SVERSION;
+ case HV_REGISTER_SCONTROL:
+ return HV_REGISTER_NESTED_SCONTROL;
+ case HV_REGISTER_SINT0:
+ return HV_REGISTER_NESTED_SINT0;
+ case HV_REGISTER_EOM:
+ return HV_REGISTER_NESTED_EOM;
+ default:
+ return reg;
+ }
+}
+
#if IS_ENABLED(CONFIG_HYPERV)
+u64 hv_get_non_nested_register(unsigned int reg)
+{
+ u64 value;
+
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
+ hv_ghcb_msr_read(reg, &value);
+ else
+ rdmsrl(reg, value);
+ return value;
+}
+EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
+
+void hv_set_non_nested_register(unsigned int reg, u64 value)
+{
+ if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
+ hv_ghcb_msr_write(reg, value);
+
+ /* Write proxy bit via wrmsl instruction */
+ if (reg >= HV_REGISTER_SINT0 &&
+ reg <= HV_REGISTER_SINT15)
+ wrmsrl(reg, value | 1 << 20);
+ } else {
+ wrmsrl(reg, value);
+ }
+}
+EXPORT_SYMBOL_GPL(hv_set_non_nested_register);
+
+u64 hv_get_register(unsigned int reg)
+{
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ return hv_get_non_nested_register(reg);
+}
+EXPORT_SYMBOL_GPL(hv_get_register);
+
+void hv_set_register(unsigned int reg, u64 value)
+{
+ if (hv_nested)
+ reg = hv_get_nested_reg(reg);
+
+ hv_set_non_nested_register(reg, value);
+}
+EXPORT_SYMBOL_GPL(hv_set_register);
+
static void (*vmbus_handler)(void);
static void (*hv_stimer0_handler)(void);
static void (*hv_kexec_handler)(void);
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 4d6480d57546..986814a903ee 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -147,7 +147,7 @@ int hv_synic_alloc(void)
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
- if (!hv_isolation_type_snp()) {
+ if (!hv_isolation_type_snp() && !hv_root_partition) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (hv_cpu->synic_message_page == NULL) {
@@ -188,8 +188,16 @@ void hv_synic_free(void)
struct hv_per_cpu_context *hv_cpu
= per_cpu_ptr(hv_context.cpu_context, cpu);
- free_page((unsigned long)hv_cpu->synic_event_page);
- free_page((unsigned long)hv_cpu->synic_message_page);
+ if (hv_root_partition) {
+ if (hv_cpu->synic_event_page != NULL)
+ memunmap(hv_cpu->synic_event_page);
+
+ if (hv_cpu->synic_message_page != NULL)
+ memunmap(hv_cpu->synic_message_page);
+ } else {
+ free_page((unsigned long)hv_cpu->synic_event_page);
+ free_page((unsigned long)hv_cpu->synic_message_page);
+ }
free_page((unsigned long)hv_cpu->post_msg_page);
}
@@ -214,9 +222,10 @@ void hv_synic_enable_regs(unsigned int cpu)
/* Setup the Synic's message page */
simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
+
simp.simp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_message_page
= memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
@@ -233,7 +242,7 @@ void hv_synic_enable_regs(unsigned int cpu)
siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
siefp.siefp_enabled = 1;
- if (hv_isolation_type_snp()) {
+ if (hv_isolation_type_snp() || hv_root_partition) {
hv_cpu->synic_event_page =
memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
HV_HYP_PAGE_SIZE, MEMREMAP_WB);
--
2.25.1
From: Jinank Jain <jinankjain@linux.microsoft.com> Sent: Tuesday, December 13, 2022 10:33 PM
>
> Child partitions are free to allocate SynIC message and event page but in
> case of root partition it must use the pages allocated by Microsoft
> Hypervisor (MSHV). Base address for these pages can be found using
> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
> for nested vs non-nested root partition.
>
> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
You have addressed all my previous comments and those areas
look good. But I see a new issue that I had not noticed before.
See comment below.
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 11 +++++
> arch/x86/include/asm/mshyperv.h | 30 +++-----------
> arch/x86/kernel/cpu/mshyperv.c | 65 ++++++++++++++++++++++++++++++
> drivers/hv/hv.c | 19 ++++++---
> 4 files changed, 96 insertions(+), 29 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index 58c03d18c235..b5019becb618 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -225,6 +225,17 @@ enum hv_isolation_type {
> #define HV_REGISTER_SINT14 0x4000009E
> #define HV_REGISTER_SINT15 0x4000009F
>
> +/*
> + * Define synthetic interrupt controller model specific registers for
> + * nested hypervisor.
> + */
> +#define HV_REGISTER_NESTED_SCONTROL 0x40001080
> +#define HV_REGISTER_NESTED_SVERSION 0x40001081
> +#define HV_REGISTER_NESTED_SIEFP 0x40001082
> +#define HV_REGISTER_NESTED_SIMP 0x40001083
> +#define HV_REGISTER_NESTED_EOM 0x40001084
> +#define HV_REGISTER_NESTED_SINT0 0x40001090
> +
> /*
> * Synthetic Timer MSRs. Four timers per vcpu.
> */
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index 61f0c206bff0..c38e4c66a3ac 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
> return false;
> }
>
> -static inline u64 hv_get_register(unsigned int reg)
> -{
> - u64 value;
> -
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> - hv_ghcb_msr_read(reg, &value);
> - else
> - rdmsrl(reg, value);
> - return value;
> -}
> -
> -static inline void hv_set_register(unsigned int reg, u64 value)
> -{
> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> - hv_ghcb_msr_write(reg, value);
> -
> - /* Write proxy bit via wrmsl instruction */
> - if (reg >= HV_REGISTER_SINT0 &&
> - reg <= HV_REGISTER_SINT15)
> - wrmsrl(reg, value | 1 << 20);
> - } else {
> - wrmsrl(reg, value);
> - }
> -}
> +u64 hv_get_register(unsigned int reg);
> +void hv_set_register(unsigned int reg, u64 value);
> +u64 hv_get_non_nested_register(unsigned int reg);
> +void hv_set_non_nested_register(unsigned int reg, u64 value);
>
> #else /* CONFIG_HYPERV */
> static inline void hyperv_init(void) {}
> @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
> }
> static inline void hv_set_register(unsigned int reg, u64 value) { }
> static inline u64 hv_get_register(unsigned int reg) { return 0; }
> +static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { }
> +static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; }
> static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
> bool visible)
> {
> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> index f9b78d4829e3..938fc82edf05 100644
> --- a/arch/x86/kernel/cpu/mshyperv.c
> +++ b/arch/x86/kernel/cpu/mshyperv.c
> @@ -41,7 +41,72 @@ bool hv_root_partition;
> bool hv_nested;
> struct ms_hyperv_info ms_hyperv;
>
> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
> +{
> + switch (reg) {
> + case HV_REGISTER_SIMP:
> + return HV_REGISTER_NESTED_SIMP;
> + case HV_REGISTER_SIEFP:
> + return HV_REGISTER_NESTED_SIEFP;
> + case HV_REGISTER_SVERSION:
> + return HV_REGISTER_NESTED_SVERSION;
> + case HV_REGISTER_SCONTROL:
> + return HV_REGISTER_NESTED_SCONTROL;
> + case HV_REGISTER_SINT0:
> + return HV_REGISTER_NESTED_SINT0;
> + case HV_REGISTER_EOM:
> + return HV_REGISTER_NESTED_EOM;
> + default:
> + return reg;
> + }
> +}
> +
> #if IS_ENABLED(CONFIG_HYPERV)
> +u64 hv_get_non_nested_register(unsigned int reg)
> +{
> + u64 value;
> +
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> + hv_ghcb_msr_read(reg, &value);
> + else
> + rdmsrl(reg, value);
> + return value;
> +}
> +EXPORT_SYMBOL_GPL(hv_get_non_nested_register);
> +
> +void hv_set_non_nested_register(unsigned int reg, u64 value)
> +{
> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> + hv_ghcb_msr_write(reg, value);
> +
> + /* Write proxy bit via wrmsl instruction */
> + if (reg >= HV_REGISTER_SINT0 &&
> + reg <= HV_REGISTER_SINT15)
> + wrmsrl(reg, value | 1 << 20);
> + } else {
> + wrmsrl(reg, value);
> + }
> +}
> +EXPORT_SYMBOL_GPL(hv_set_non_nested_register);
> +
> +u64 hv_get_register(unsigned int reg)
> +{
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + return hv_get_non_nested_register(reg);
> +}
> +EXPORT_SYMBOL_GPL(hv_get_register);
> +
> +void hv_set_register(unsigned int reg, u64 value)
> +{
> + if (hv_nested)
> + reg = hv_get_nested_reg(reg);
> +
> + hv_set_non_nested_register(reg, value);
> +}
> +EXPORT_SYMBOL_GPL(hv_set_register);
> +
> static void (*vmbus_handler)(void);
> static void (*hv_stimer0_handler)(void);
> static void (*hv_kexec_handler)(void);
> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> index 4d6480d57546..986814a903ee 100644
> --- a/drivers/hv/hv.c
> +++ b/drivers/hv/hv.c
> @@ -147,7 +147,7 @@ int hv_synic_alloc(void)
> * Synic message and event pages are allocated by paravisor.
> * Skip these pages allocation here.
> */
> - if (!hv_isolation_type_snp()) {
> + if (!hv_isolation_type_snp() && !hv_root_partition) {
> hv_cpu->synic_message_page =
> (void *)get_zeroed_page(GFP_ATOMIC);
> if (hv_cpu->synic_message_page == NULL) {
> @@ -188,8 +188,16 @@ void hv_synic_free(void)
> struct hv_per_cpu_context *hv_cpu
> = per_cpu_ptr(hv_context.cpu_context, cpu);
>
> - free_page((unsigned long)hv_cpu->synic_event_page);
> - free_page((unsigned long)hv_cpu->synic_message_page);
> + if (hv_root_partition) {
> + if (hv_cpu->synic_event_page != NULL)
> + memunmap(hv_cpu->synic_event_page);
> +
> + if (hv_cpu->synic_message_page != NULL)
> + memunmap(hv_cpu->synic_message_page);
These memunmap() calls seem to be done in the wrong place. There are two
pairs of functions that should be symmetrical unless there's a really good
reason otherwise:
1. hv_synic_alloc() and hv_synic_free()
2. hv_synic_enable_regs() and hv_synic_disable_regs()
The functions in #1 handle the allocation and freeing of these three
pages: synic_event_page, synic_message_page, and post_msg_page.
If the synic_event_page and synic_message_page don't need to be
allocated because they are provided by the hypervisor or paravisor,
then the allocation is skipped in hv_synic_alloc(), and the free_page
operations in hv_synic_free() are no-ops if the corresponding pointer
is NULL.
The functions in #2 should handle the mapping and unmapping in the
case of pages provided by the hypervisor or paravisor. It appears
that the hv_isolation_type_snp() case does this (mostly) correctly.
But your code does the unmap in hv_synic_free(), which isn't
symmetrical. Unless there's something unique about the situation
when running in the root partition, the unmap should be done in
hv_synic_disable_regs() like it is for the SNP case.
My "mostly" correctly comment above is because the current code
in hv_synic_disable_regs() should be setting hv_cpu->synic_message_page
and hv_cpu->synic_event_page to NULL after the unmap is done.
Those pointers must be reverted to NULL so that if hv_synic_free() is
then run, it won't try to free pages that were provided by the hypervisor
or paravisor.
Michael
> + } else {
> + free_page((unsigned long)hv_cpu->synic_event_page);
> + free_page((unsigned long)hv_cpu->synic_message_page);
> + }
> free_page((unsigned long)hv_cpu->post_msg_page);
> }
>
> @@ -214,9 +222,10 @@ void hv_synic_enable_regs(unsigned int cpu)
>
> /* Setup the Synic's message page */
> simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> +
> simp.simp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_message_page
> = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> @@ -233,7 +242,7 @@ void hv_synic_enable_regs(unsigned int cpu)
> siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP);
> siefp.siefp_enabled = 1;
>
> - if (hv_isolation_type_snp()) {
> + if (hv_isolation_type_snp() || hv_root_partition) {
> hv_cpu->synic_event_page =
> memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT,
> HV_HYP_PAGE_SIZE, MEMREMAP_WB);
> --
> 2.25.1
According to TLFS, in order to communicate to L0 hypervisor there needs
to be an additional bit set in the control register. This communication
is required to perform privileged instructions which can only be
performed by L0 hypervisor. An example of that could be setting up the
VMBus infrastructure.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/hyperv-tlfs.h | 3 ++-
arch/x86/include/asm/mshyperv.h | 42 +++++++++++++++++++++++++++---
include/asm-generic/hyperv-tlfs.h | 1 +
3 files changed, 41 insertions(+), 5 deletions(-)
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index b5019becb618..7758c495541d 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -380,7 +380,8 @@ struct hv_nested_enlightenments_control {
__u32 reserved:31;
} features;
struct {
- __u32 reserved;
+ __u32 inter_partition_comm:1;
+ __u32 reserved:31;
} hypercallControls;
} __packed;
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index c38e4c66a3ac..9e5535044ed0 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -74,10 +74,16 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
return hv_status;
}
+/* Hypercall to the L0 hypervisor */
+static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output)
+{
+ return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output);
+}
+
/* Fast hypercall with 8 bytes of input and no output */
-static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -105,10 +111,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall8(control, input1);
+}
+
+static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall8(control, input1);
+}
+
/* Fast hypercall with 16 bytes of input */
-static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
{
- u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
+ u64 hv_status;
#ifdef CONFIG_X86_64
{
@@ -139,6 +159,20 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
return hv_status;
}
+static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
+
+ return _hv_do_fast_hypercall16(control, input1, input2);
+}
+
+static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
+{
+ u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
+
+ return _hv_do_fast_hypercall16(control, input1, input2);
+}
+
extern struct hv_vp_assist_page **hv_vp_assist_page;
static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index b17c6eeb9afa..e61ee461c4fc 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -194,6 +194,7 @@ enum HV_GENERIC_SET_FORMAT {
#define HV_HYPERCALL_VARHEAD_OFFSET 17
#define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17)
#define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27)
+#define HV_HYPERCALL_NESTED BIT_ULL(31)
#define HV_HYPERCALL_REP_COMP_OFFSET 32
#define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32)
#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
--
2.25.1
On 12/13/2022 10:33 PM, Jinank Jain wrote:
> According to TLFS, in order to communicate to L0 hypervisor there needs
> to be an additional bit set in the control register. This communication
> is required to perform privileged instructions which can only be
> performed by L0 hypervisor. An example of that could be setting up the
> VMBus infrastructure.
>
> Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
> ---
> arch/x86/include/asm/hyperv-tlfs.h | 3 ++-
> arch/x86/include/asm/mshyperv.h | 42 +++++++++++++++++++++++++++---
> include/asm-generic/hyperv-tlfs.h | 1 +
> 3 files changed, 41 insertions(+), 5 deletions(-)
>
> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
> index b5019becb618..7758c495541d 100644
> --- a/arch/x86/include/asm/hyperv-tlfs.h
> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> @@ -380,7 +380,8 @@ struct hv_nested_enlightenments_control {
> __u32 reserved:31;
> } features;
> struct {
> - __u32 reserved;
> + __u32 inter_partition_comm:1;
> + __u32 reserved:31;
> } hypercallControls;
> } __packed;
>
> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> index c38e4c66a3ac..9e5535044ed0 100644
> --- a/arch/x86/include/asm/mshyperv.h
> +++ b/arch/x86/include/asm/mshyperv.h
> @@ -74,10 +74,16 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
> return hv_status;
> }
>
> +/* Hypercall to the L0 hypervisor */
> +static inline u64 hv_do_nested_hypercall(u64 control, void *input, void *output)
> +{
> + return hv_do_hypercall(control | HV_HYPERCALL_NESTED, input, output);
> +}
> +
> /* Fast hypercall with 8 bytes of input and no output */
> -static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> +static inline u64 _hv_do_fast_hypercall8(u64 control, u64 input1)
> {
> - u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> + u64 hv_status;
>
> #ifdef CONFIG_X86_64
> {
> @@ -105,10 +111,24 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> return hv_status;
> }
>
> +static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
> +
> + return _hv_do_fast_hypercall8(control, input1);
> +}
> +
> +static inline u64 hv_do_fast_nested_hypercall8(u16 code, u64 input1)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
> +
> + return _hv_do_fast_hypercall8(control, input1);
> +}
> +
> /* Fast hypercall with 16 bytes of input */
> -static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> +static inline u64 _hv_do_fast_hypercall16(u64 control, u64 input1, u64 input2)
> {
> - u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT;
> + u64 hv_status;
>
> #ifdef CONFIG_X86_64
> {
> @@ -139,6 +159,20 @@ static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> return hv_status;
> }
>
> +static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT;
> +
> + return _hv_do_fast_hypercall16(control, input1, input2);
> +}
> +
> +static inline u64 hv_do_fast_nested_hypercall16(u16 code, u64 input1, u64 input2)
> +{
> + u64 control = (u64)code | HV_HYPERCALL_FAST_BIT | HV_HYPERCALL_NESTED;
> +
> + return _hv_do_fast_hypercall16(control, input1, input2);
> +}
> +
> extern struct hv_vp_assist_page **hv_vp_assist_page;
>
> static inline struct hv_vp_assist_page *hv_get_vp_assist_page(unsigned int cpu)
> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
> index b17c6eeb9afa..e61ee461c4fc 100644
> --- a/include/asm-generic/hyperv-tlfs.h
> +++ b/include/asm-generic/hyperv-tlfs.h
> @@ -194,6 +194,7 @@ enum HV_GENERIC_SET_FORMAT {
> #define HV_HYPERCALL_VARHEAD_OFFSET 17
> #define HV_HYPERCALL_VARHEAD_MASK GENMASK_ULL(26, 17)
> #define HV_HYPERCALL_RSVD0_MASK GENMASK_ULL(31, 27)
> +#define HV_HYPERCALL_NESTED BIT_ULL(31)
> #define HV_HYPERCALL_REP_COMP_OFFSET 32
> #define HV_HYPERCALL_REP_COMP_1 BIT_ULL(32)
> #define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32)
Reviewed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
Currently VMBus driver is not initialized for root partition but we need
to enable the VMBus driver for nested root partition. This is required,
so that L2 root can use the VMBus devices.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
drivers/hv/vmbus_drv.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 0f00d57b7c25..6324e01d5eec 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2745,7 +2745,7 @@ static int __init hv_acpi_init(void)
if (!hv_is_hyperv_initialized())
return -ENODEV;
- if (hv_root_partition)
+ if (hv_root_partition && !hv_nested)
return 0;
/*
--
2.25.1
Traditionally we have been using the HYPERVISOR_CALLBACK_VECTOR to relay
the VMBus interrupt. But this does not work in case of nested
hypervisor. Microsoft Hypervisor reserves 0x31 to 0x34 as the interrupt
vector range for VMBus and thus we have to use one of the vectors from
that range and setup the IDT accordingly.
Signed-off-by: Jinank Jain <jinankjain@linux.microsoft.com>
---
arch/x86/include/asm/idtentry.h | 2 ++
arch/x86/include/asm/irq_vectors.h | 6 ++++++
arch/x86/kernel/cpu/mshyperv.c | 15 +++++++++++++++
arch/x86/kernel/idt.c | 10 ++++++++++
drivers/hv/vmbus_drv.c | 3 ++-
5 files changed, 35 insertions(+), 1 deletion(-)
diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h
index 72184b0b2219..c0648e3e4d4a 100644
--- a/arch/x86/include/asm/idtentry.h
+++ b/arch/x86/include/asm/idtentry.h
@@ -686,6 +686,8 @@ DECLARE_IDTENTRY_SYSVEC(POSTED_INTR_NESTED_VECTOR, sysvec_kvm_posted_intr_nested
DECLARE_IDTENTRY_SYSVEC(HYPERVISOR_CALLBACK_VECTOR, sysvec_hyperv_callback);
DECLARE_IDTENTRY_SYSVEC(HYPERV_REENLIGHTENMENT_VECTOR, sysvec_hyperv_reenlightenment);
DECLARE_IDTENTRY_SYSVEC(HYPERV_STIMER0_VECTOR, sysvec_hyperv_stimer0);
+DECLARE_IDTENTRY_SYSVEC(HYPERV_INTR_NESTED_VMBUS_VECTOR,
+ sysvec_hyperv_nested_vmbus_intr);
#endif
#if IS_ENABLED(CONFIG_ACRN_GUEST)
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 43dcb9284208..729d19eab7f5 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -102,6 +102,12 @@
#if IS_ENABLED(CONFIG_HYPERV)
#define HYPERV_REENLIGHTENMENT_VECTOR 0xee
#define HYPERV_STIMER0_VECTOR 0xed
+/*
+ * FIXME: Change this, once Microsoft Hypervisor changes its assumption
+ * around VMBus interrupt vector allocation for nested root partition.
+ * Or provides a better interface to detect this instead of hardcoding.
+ */
+#define HYPERV_INTR_NESTED_VMBUS_VECTOR 0x31
#endif
#define LOCAL_TIMER_VECTOR 0xec
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index 938fc82edf05..4dfe0f9d7be3 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -126,6 +126,21 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_callback)
set_irq_regs(old_regs);
}
+DEFINE_IDTENTRY_SYSVEC(sysvec_hyperv_nested_vmbus_intr)
+{
+ struct pt_regs *old_regs = set_irq_regs(regs);
+
+ inc_irq_stat(irq_hv_callback_count);
+
+ if (vmbus_handler)
+ vmbus_handler();
+
+ if (ms_hyperv.hints & HV_DEPRECATING_AEOI_RECOMMENDED)
+ ack_APIC_irq();
+
+ set_irq_regs(old_regs);
+}
+
void hv_setup_vmbus_handler(void (*handler)(void))
{
vmbus_handler = handler;
diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c
index a58c6bc1cd68..3536935cea39 100644
--- a/arch/x86/kernel/idt.c
+++ b/arch/x86/kernel/idt.c
@@ -160,6 +160,16 @@ static const __initconst struct idt_data apic_idts[] = {
# endif
INTG(SPURIOUS_APIC_VECTOR, asm_sysvec_spurious_apic_interrupt),
INTG(ERROR_APIC_VECTOR, asm_sysvec_error_interrupt),
+#ifdef CONFIG_HYPERV
+ /*
+ * This is a hack because we cannot install this interrupt handler
+ * via alloc_intr_gate as it does not allow interrupt vector less
+ * than FIRST_SYSTEM_VECTORS. And hyperv does not want anything other
+ * than 0x31-0x34 as the interrupt vector for vmbus interrupt in case
+ * of nested setup.
+ */
+ INTG(HYPERV_INTR_NESTED_VMBUS_VECTOR, asm_sysvec_hyperv_nested_vmbus_intr),
+#endif
#endif
};
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 6324e01d5eec..740878367426 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2768,7 +2768,8 @@ static int __init hv_acpi_init(void)
* normal Linux IRQ mechanism is not used in this case.
*/
#ifdef HYPERVISOR_CALLBACK_VECTOR
- vmbus_interrupt = HYPERVISOR_CALLBACK_VECTOR;
+ vmbus_interrupt = hv_nested ? HYPERV_INTR_NESTED_VMBUS_VECTOR :
+ HYPERVISOR_CALLBACK_VECTOR;
vmbus_irq = -1;
#endif
--
2.25.1
© 2016 - 2026 Red Hat, Inc.