arch/arm64/hyperv/mshyperv.c | 2 ++ arch/x86/hyperv/hv_init.c | 10 ++++---- arch/x86/kernel/cpu/mshyperv.c | 24 ++---------------- drivers/clocksource/hyperv_timer.c | 4 +-- drivers/hv/Kconfig | 12 +++++++++ drivers/hv/Makefile | 3 ++- drivers/hv/hv.c | 10 ++++---- drivers/hv/hv_common.c | 32 +++++++++++++++++++----- drivers/hv/vmbus_drv.c | 2 +- drivers/iommu/hyperv-iommu.c | 4 +-- include/asm-generic/mshyperv.h | 39 +++++++++++++++++++++++++----- 11 files changed, 92 insertions(+), 50 deletions(-)
Introduce CONFIG_MSHV_ROOT as a tristate to enable root partition
booting and future mshv driver functionality.
Change hv_root_partition into a function which always returns false
if CONFIG_MSHV_ROOT=n.
Introduce hv_current_partition_type to store the type of partition
(guest, root, or other kinds in future), and hv_identify_partition_type()
to it up early in Hyper-V initialization.
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
---
Depends on
https://lore.kernel.org/linux-hyperv/1738955002-20821-3-git-send-email-nunodasneves@linux.microsoft.com/
arch/arm64/hyperv/mshyperv.c | 2 ++
arch/x86/hyperv/hv_init.c | 10 ++++----
arch/x86/kernel/cpu/mshyperv.c | 24 ++----------------
drivers/clocksource/hyperv_timer.c | 4 +--
drivers/hv/Kconfig | 12 +++++++++
drivers/hv/Makefile | 3 ++-
drivers/hv/hv.c | 10 ++++----
drivers/hv/hv_common.c | 32 +++++++++++++++++++-----
drivers/hv/vmbus_drv.c | 2 +-
drivers/iommu/hyperv-iommu.c | 4 +--
include/asm-generic/mshyperv.h | 39 +++++++++++++++++++++++++-----
11 files changed, 92 insertions(+), 50 deletions(-)
diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c
index 29fcfd595f48..2265ea5ce5ad 100644
--- a/arch/arm64/hyperv/mshyperv.c
+++ b/arch/arm64/hyperv/mshyperv.c
@@ -61,6 +61,8 @@ static int __init hyperv_init(void)
ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints,
ms_hyperv.misc_features);
+ hv_identify_partition_type();
+
ret = hv_common_init();
if (ret)
return ret;
diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c
index 9be1446f5bd3..ddeb40930bc8 100644
--- a/arch/x86/hyperv/hv_init.c
+++ b/arch/x86/hyperv/hv_init.c
@@ -90,7 +90,7 @@ static int hv_cpu_init(unsigned int cpu)
return 0;
hvp = &hv_vp_assist_page[cpu];
- if (hv_root_partition) {
+ if (hv_root_partition()) {
/*
* For root partition we get the hypervisor provided VP assist
* page, instead of allocating a new page.
@@ -242,7 +242,7 @@ static int hv_cpu_die(unsigned int cpu)
if (hv_vp_assist_page && hv_vp_assist_page[cpu]) {
union hv_vp_assist_msr_contents msr = { 0 };
- if (hv_root_partition) {
+ if (hv_root_partition()) {
/*
* For root partition the VP assist page is mapped to
* hypervisor provided page, and thus we unmap the
@@ -317,7 +317,7 @@ static int hv_suspend(void)
union hv_x64_msr_hypercall_contents hypercall_msr;
int ret;
- if (hv_root_partition)
+ if (hv_root_partition())
return -EPERM;
/*
@@ -518,7 +518,7 @@ void __init hyperv_init(void)
rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64);
hypercall_msr.enable = 1;
- if (hv_root_partition) {
+ if (hv_root_partition()) {
struct page *pg;
void *src;
@@ -592,7 +592,7 @@ void __init hyperv_init(void)
* If we're running as root, we want to create our own PCI MSI domain.
* We can't set this in hv_pci_init because that would be too late.
*/
- if (hv_root_partition)
+ if (hv_root_partition())
x86_init.irqs.create_pci_msi_domain = hv_create_pci_msi_domain;
#endif
diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
index f285757618fc..4f01f424ea5b 100644
--- a/arch/x86/kernel/cpu/mshyperv.c
+++ b/arch/x86/kernel/cpu/mshyperv.c
@@ -33,8 +33,6 @@
#include <asm/numa.h>
#include <asm/svm.h>
-/* Is Linux running as the root partition? */
-bool hv_root_partition;
/* Is Linux running on nested Microsoft Hypervisor */
bool hv_nested;
struct ms_hyperv_info ms_hyperv;
@@ -451,25 +449,7 @@ static void __init ms_hyperv_init_platform(void)
pr_debug("Hyper-V: max %u virtual processors, %u logical processors\n",
ms_hyperv.max_vp_index, ms_hyperv.max_lp_index);
- /*
- * Check CPU management privilege.
- *
- * To mirror what Windows does we should extract CPU management
- * features and use the ReservedIdentityBit to detect if Linux is the
- * root partition. But that requires negotiating CPU management
- * interface (a process to be finalized). For now, use the privilege
- * flag as the indicator for running as root.
- *
- * Hyper-V should never specify running as root and as a Confidential
- * VM. But to protect against a compromised/malicious Hyper-V trying
- * to exploit root behavior to expose Confidential VM memory, ignore
- * the root partition setting if also a Confidential VM.
- */
- if ((ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
- !(ms_hyperv.priv_high & HV_ISOLATION)) {
- hv_root_partition = true;
- pr_info("Hyper-V: running as root partition\n");
- }
+ hv_identify_partition_type();
if (ms_hyperv.hints & HV_X64_HYPERV_NESTED) {
hv_nested = true;
@@ -618,7 +598,7 @@ static void __init ms_hyperv_init_platform(void)
# ifdef CONFIG_SMP
smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu;
- if (hv_root_partition ||
+ if (hv_root_partition() ||
(!ms_hyperv.paravisor_present && hv_isolation_type_snp()))
smp_ops.smp_prepare_cpus = hv_smp_prepare_cpus;
# endif
diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c
index f00019b078a7..09549451dd51 100644
--- a/drivers/clocksource/hyperv_timer.c
+++ b/drivers/clocksource/hyperv_timer.c
@@ -582,7 +582,7 @@ static void __init hv_init_tsc_clocksource(void)
* mapped.
*/
tsc_msr.as_uint64 = hv_get_msr(HV_MSR_REFERENCE_TSC);
- if (hv_root_partition)
+ if (hv_root_partition())
tsc_pfn = tsc_msr.pfn;
else
tsc_pfn = HVPFN_DOWN(virt_to_phys(tsc_page));
@@ -627,7 +627,7 @@ void __init hv_remap_tsc_clocksource(void)
if (!(ms_hyperv.features & HV_MSR_REFERENCE_TSC_AVAILABLE))
return;
- if (!hv_root_partition) {
+ if (!hv_root_partition()) {
WARN(1, "%s: attempt to remap TSC page in guest partition\n",
__func__);
return;
diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig
index 862c47b191af..aac172942f6c 100644
--- a/drivers/hv/Kconfig
+++ b/drivers/hv/Kconfig
@@ -55,4 +55,16 @@ config HYPERV_BALLOON
help
Select this option to enable Hyper-V Balloon driver.
+config MSHV_ROOT
+ tristate "Microsoft Hyper-V root partition support"
+ depends on HYPERV
+ depends on !HYPERV_VTL_MODE
+ depends on PAGE_SIZE_4KB
+ default n
+ help
+ Select this option to enable support for booting and running as root
+ partition on Microsoft Hyper-V.
+
+ If unsure, say N.
+
endmenu
diff --git a/drivers/hv/Makefile b/drivers/hv/Makefile
index 9afcabb3fbd2..2b8dc954b350 100644
--- a/drivers/hv/Makefile
+++ b/drivers/hv/Makefile
@@ -13,4 +13,5 @@ hv_vmbus-$(CONFIG_HYPERV_TESTING) += hv_debugfs.o
hv_utils-y := hv_util.o hv_kvp.o hv_snapshot.o hv_utils_transport.o
# Code that must be built-in
-obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o hv_proc.o
+obj-$(subst m,y,$(CONFIG_HYPERV)) += hv_common.o
+obj-$(subst m,y,$(CONFIG_MSHV_ROOT)) += hv_proc.o
diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
index 36d9ba097ff5..93d82382351c 100644
--- a/drivers/hv/hv.c
+++ b/drivers/hv/hv.c
@@ -144,7 +144,7 @@ int hv_synic_alloc(void)
* Synic message and event pages are allocated by paravisor.
* Skip these pages allocation here.
*/
- if (!ms_hyperv.paravisor_present && !hv_root_partition) {
+ if (!ms_hyperv.paravisor_present && !hv_root_partition()) {
hv_cpu->synic_message_page =
(void *)get_zeroed_page(GFP_ATOMIC);
if (!hv_cpu->synic_message_page) {
@@ -272,7 +272,7 @@ void hv_synic_enable_regs(unsigned int cpu)
simp.as_uint64 = hv_get_msr(HV_MSR_SIMP);
simp.simp_enabled = 1;
- if (ms_hyperv.paravisor_present || hv_root_partition) {
+ if (ms_hyperv.paravisor_present || hv_root_partition()) {
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (simp.base_simp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
@@ -291,7 +291,7 @@ void hv_synic_enable_regs(unsigned int cpu)
siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP);
siefp.siefp_enabled = 1;
- if (ms_hyperv.paravisor_present || hv_root_partition) {
+ if (ms_hyperv.paravisor_present || hv_root_partition()) {
/* Mask out vTOM bit. ioremap_cache() maps decrypted */
u64 base = (siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT) &
~ms_hyperv.shared_gpa_boundary;
@@ -367,7 +367,7 @@ void hv_synic_disable_regs(unsigned int cpu)
* addresses.
*/
simp.simp_enabled = 0;
- if (ms_hyperv.paravisor_present || hv_root_partition) {
+ if (ms_hyperv.paravisor_present || hv_root_partition()) {
iounmap(hv_cpu->synic_message_page);
hv_cpu->synic_message_page = NULL;
} else {
@@ -379,7 +379,7 @@ void hv_synic_disable_regs(unsigned int cpu)
siefp.as_uint64 = hv_get_msr(HV_MSR_SIEFP);
siefp.siefp_enabled = 0;
- if (ms_hyperv.paravisor_present || hv_root_partition) {
+ if (ms_hyperv.paravisor_present || hv_root_partition()) {
iounmap(hv_cpu->synic_event_page);
hv_cpu->synic_event_page = NULL;
} else {
diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c
index cb3ea49020ef..d1227e85c5b7 100644
--- a/drivers/hv/hv_common.c
+++ b/drivers/hv/hv_common.c
@@ -34,8 +34,11 @@
u64 hv_current_partition_id = HV_PARTITION_ID_SELF;
EXPORT_SYMBOL_GPL(hv_current_partition_id);
+enum hv_partition_type hv_current_partition_type;
+EXPORT_SYMBOL_GPL(hv_current_partition_type);
+
/*
- * hv_root_partition, ms_hyperv and hv_nested are defined here with other
+ * ms_hyperv and hv_nested are defined here with other
* Hyper-V specific globals so they are shared across all architectures and are
* built only when CONFIG_HYPERV is defined. But on x86,
* ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not
@@ -43,9 +46,6 @@ EXPORT_SYMBOL_GPL(hv_current_partition_id);
* here, allowing for an overriding definition in the module containing
* ms_hyperv_init_platform().
*/
-bool __weak hv_root_partition;
-EXPORT_SYMBOL_GPL(hv_root_partition);
-
bool __weak hv_nested;
EXPORT_SYMBOL_GPL(hv_nested);
@@ -283,7 +283,7 @@ static void hv_kmsg_dump_register(void)
static inline bool hv_output_page_exists(void)
{
- return hv_root_partition || IS_ENABLED(CONFIG_HYPERV_VTL_MODE);
+ return hv_root_partition() || IS_ENABLED(CONFIG_HYPERV_VTL_MODE);
}
void __init hv_get_partition_id(void)
@@ -594,7 +594,7 @@ EXPORT_SYMBOL_GPL(hv_setup_dma_ops);
bool hv_is_hibernation_supported(void)
{
- return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4);
+ return !hv_root_partition() && acpi_sleep_state_supported(ACPI_STATE_S4);
}
EXPORT_SYMBOL_GPL(hv_is_hibernation_supported);
@@ -683,3 +683,23 @@ u64 __weak hv_tdx_hypercall(u64 control, u64 param1, u64 param2)
return HV_STATUS_INVALID_PARAMETER;
}
EXPORT_SYMBOL_GPL(hv_tdx_hypercall);
+
+void hv_identify_partition_type(void)
+{
+ /*
+ * Check partition creation and cpu management privileges
+ *
+ * Hyper-V should never specify running as root and as a Confidential
+ * VM. But to protect against a compromised/malicious Hyper-V trying
+ * to exploit root behavior to expose Confidential VM memory, ignore
+ * the root partition setting if also a Confidential VM.
+ */
+ if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) &&
+ (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
+ !(ms_hyperv.priv_high & HV_ISOLATION)) {
+ hv_current_partition_type = HV_PARTITION_TYPE_ROOT;
+ pr_info("Hyper-V: running as root partition\n");
+ } else {
+ hv_current_partition_type = HV_PARTITION_TYPE_GUEST;
+ }
+}
diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c
index 0f6cd44fff29..844eba0429fa 100644
--- a/drivers/hv/vmbus_drv.c
+++ b/drivers/hv/vmbus_drv.c
@@ -2646,7 +2646,7 @@ static int __init hv_acpi_init(void)
if (!hv_is_hyperv_initialized())
return -ENODEV;
- if (hv_root_partition && !hv_nested)
+ if (hv_root_partition() && !hv_nested)
return 0;
/*
diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c
index 2a86aa5d54c6..53e4b37716af 100644
--- a/drivers/iommu/hyperv-iommu.c
+++ b/drivers/iommu/hyperv-iommu.c
@@ -130,7 +130,7 @@ static int __init hyperv_prepare_irq_remapping(void)
x86_init.hyper.msi_ext_dest_id())
return -ENODEV;
- if (hv_root_partition) {
+ if (hv_root_partition()) {
name = "HYPERV-ROOT-IR";
ops = &hyperv_root_ir_domain_ops;
} else {
@@ -151,7 +151,7 @@ static int __init hyperv_prepare_irq_remapping(void)
return -ENOMEM;
}
- if (hv_root_partition)
+ if (hv_root_partition())
return 0; /* The rest is only relevant to guests */
/*
diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h
index 7adc10a4fa3e..6f898792fb51 100644
--- a/include/asm-generic/mshyperv.h
+++ b/include/asm-generic/mshyperv.h
@@ -28,6 +28,11 @@
#define VTPM_BASE_ADDRESS 0xfed40000
+enum hv_partition_type {
+ HV_PARTITION_TYPE_GUEST,
+ HV_PARTITION_TYPE_ROOT,
+};
+
struct ms_hyperv_info {
u32 features;
u32 priv_high;
@@ -59,6 +64,7 @@ struct ms_hyperv_info {
extern struct ms_hyperv_info ms_hyperv;
extern bool hv_nested;
extern u64 hv_current_partition_id;
+extern enum hv_partition_type hv_current_partition_type;
extern void * __percpu *hyperv_pcpu_input_arg;
extern void * __percpu *hyperv_pcpu_output_arg;
@@ -190,8 +196,6 @@ void hv_remove_crash_handler(void);
extern int vmbus_interrupt;
extern int vmbus_irq;
-extern bool hv_root_partition;
-
#if IS_ENABLED(CONFIG_HYPERV)
/*
* Hypervisor's notion of virtual processor ID is different from
@@ -213,15 +217,12 @@ void __init hv_common_free(void);
void __init ms_hyperv_late_init(void);
int hv_common_cpu_init(unsigned int cpu);
int hv_common_cpu_die(unsigned int cpu);
+void hv_identify_partition_type(void);
void *hv_alloc_hyperv_page(void);
void *hv_alloc_hyperv_zeroed_page(void);
void hv_free_hyperv_page(void *addr);
-int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
-int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
-int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
-
/**
* hv_cpu_number_to_vp_number() - Map CPU to VP.
* @cpu_number: CPU number in Linux terms
@@ -309,6 +310,7 @@ void hyperv_cleanup(void);
bool hv_query_ext_cap(u64 cap_query);
void hv_setup_dma_ops(struct device *dev, bool coherent);
#else /* CONFIG_HYPERV */
+static inline void hv_identify_partition_type(void) {}
static inline bool hv_is_hyperv_initialized(void) { return false; }
static inline bool hv_is_hibernation_supported(void) { return false; }
static inline void hyperv_cleanup(void) {}
@@ -320,4 +322,29 @@ static inline enum hv_isolation_type hv_get_isolation_type(void)
}
#endif /* CONFIG_HYPERV */
+#if IS_ENABLED(CONFIG_MSHV_ROOT)
+static inline bool hv_root_partition(void)
+{
+ return hv_current_partition_type == HV_PARTITION_TYPE_ROOT;
+}
+int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
+int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
+int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
+
+#else /* CONFIG_MSHV_ROOT */
+static inline bool hv_root_partition(void) { return false; }
+static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
+{
+ return hv_result(U64_MAX);
+}
+static inline int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id)
+{
+ return hv_result(U64_MAX);
+}
+static inline int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
+{
+ return hv_result(U64_MAX);
+}
+#endif /* CONFIG_MSHV_ROOT */
+
#endif
--
2.34.1
On 2/11/2025 2:21 PM, Nuno Das Neves wrote:
> Introduce CONFIG_MSHV_ROOT as a tristate to enable root partition
> booting and future mshv driver functionality.
>
> Change hv_root_partition into a function which always returns false
> if CONFIG_MSHV_ROOT=n.
>
> Introduce hv_current_partition_type to store the type of partition
> (guest, root, or other kinds in future), and hv_identify_partition_type()
> to it up early in Hyper-V initialization.
...to *set* it up early?
>
> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
> ---
> Depends on
> https://lore.kernel.org/linux-hyperv/1738955002-20821-3-git-send-email-nunodasneves@linux.microsoft.com/
>
> arch/arm64/hyperv/mshyperv.c | 2 ++
> arch/x86/hyperv/hv_init.c | 10 ++++----
> arch/x86/kernel/cpu/mshyperv.c | 24 ++----------------
> drivers/clocksource/hyperv_timer.c | 4 +--
> drivers/hv/Kconfig | 12 +++++++++
> drivers/hv/Makefile | 3 ++-
> drivers/hv/hv.c | 10 ++++----
> drivers/hv/hv_common.c | 32 +++++++++++++++++++-----
> drivers/hv/vmbus_drv.c | 2 +-
> drivers/iommu/hyperv-iommu.c | 4 +--
> include/asm-generic/mshyperv.h | 39 +++++++++++++++++++++++++-----
> 11 files changed, 92 insertions(+), 50 deletions(-)
>
<snip>
> +
> +void hv_identify_partition_type(void)
> +{
> + /*
> + * Check partition creation and cpu management privileges
> + *
> + * Hyper-V should never specify running as root and as a Confidential
> + * VM. But to protect against a compromised/malicious Hyper-V trying
> + * to exploit root behavior to expose Confidential VM memory, ignore
> + * the root partition setting if also a Confidential VM.
> + */
> + if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) &&
> + (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
> + !(ms_hyperv.priv_high & HV_ISOLATION)) {
> + hv_current_partition_type = HV_PARTITION_TYPE_ROOT;
> + pr_info("Hyper-V: running as root partition\n");
> + } else {
> + hv_current_partition_type = HV_PARTITION_TYPE_GUEST;
> + }
> +}
This should assume GUEST as default and modify to ROOT if all the checks pass.
<snip>
> +static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> +{
> + return hv_result(U64_MAX);
> +}
Is there value in perhaps #defining hv_result_<whatever this is> as U64_MAX and returning that for documentation?
For e.g. assuming this is something like EOPNOTSUPP
#define HV_RESULT_NOT_SUPP U64_MAX
static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
{ return hv_result(HV_RESULT_NOT_SUPP); }
<snip>
Thanks,
Easwar (he/him)
On 2/11/2025 9:47 PM, Easwar Hariharan wrote:
> On 2/11/2025 2:21 PM, Nuno Das Neves wrote:
>> Introduce CONFIG_MSHV_ROOT as a tristate to enable root partition
>> booting and future mshv driver functionality.
>>
>> Change hv_root_partition into a function which always returns false
>> if CONFIG_MSHV_ROOT=n.
>>
>> Introduce hv_current_partition_type to store the type of partition
>> (guest, root, or other kinds in future), and hv_identify_partition_type()
>> to it up early in Hyper-V initialization.
>
> ...to *set* it up early?
>
Yep! Thanks for catching that
>>
>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>> ---
>> Depends on
>> https://lore.kernel.org/linux-hyperv/1738955002-20821-3-git-send-email-nunodasneves@linux.microsoft.com/
>>
>> arch/arm64/hyperv/mshyperv.c | 2 ++
>> arch/x86/hyperv/hv_init.c | 10 ++++----
>> arch/x86/kernel/cpu/mshyperv.c | 24 ++----------------
>> drivers/clocksource/hyperv_timer.c | 4 +--
>> drivers/hv/Kconfig | 12 +++++++++
>> drivers/hv/Makefile | 3 ++-
>> drivers/hv/hv.c | 10 ++++----
>> drivers/hv/hv_common.c | 32 +++++++++++++++++++-----
>> drivers/hv/vmbus_drv.c | 2 +-
>> drivers/iommu/hyperv-iommu.c | 4 +--
>> include/asm-generic/mshyperv.h | 39 +++++++++++++++++++++++++-----
>> 11 files changed, 92 insertions(+), 50 deletions(-)
>>
>
> <snip>
>
>> +
>> +void hv_identify_partition_type(void)
>> +{
>> + /*
>> + * Check partition creation and cpu management privileges
>> + *
>> + * Hyper-V should never specify running as root and as a Confidential
>> + * VM. But to protect against a compromised/malicious Hyper-V trying
>> + * to exploit root behavior to expose Confidential VM memory, ignore
>> + * the root partition setting if also a Confidential VM.
>> + */
>> + if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) &&
>> + (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
>> + !(ms_hyperv.priv_high & HV_ISOLATION)) {
>> + hv_current_partition_type = HV_PARTITION_TYPE_ROOT;
>> + pr_info("Hyper-V: running as root partition\n");
>> + } else {
>> + hv_current_partition_type = HV_PARTITION_TYPE_GUEST;
>> + }
>> +}
>
> This should assume GUEST as default and modify to ROOT if all the checks pass.
>
It is doing that, isn't it?
In fact the 'else' branch here is redundant and just there for additional clarity.
hv_current_partition_type is zeroed (so GUEST) by default, but I could make that explicit
if you prefer:
+enum hv_partition_type hv_current_partition_type = HV_PARTITION_TYPE_GUEST;
How does that sound? Am I misunderstanding something here?
> <snip>
>
>> +static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>> +{
>> + return hv_result(U64_MAX);
>> +}
>
> Is there value in perhaps #defining hv_result_<whatever this is> as U64_MAX and returning that for documentation?
> For e.g. assuming this is something like EOPNOTSUPP
>
> #define HV_RESULT_NOT_SUPP U64_MAX
>
> static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> { return hv_result(HV_RESULT_NOT_SUPP); }
>
The idea here was to copy what hv_do_hypercall does returning U64_MAX in case the hypercall
page is missing, which will hv_result() into an invalid status code. A special value for
that status could make this pattern clearer. I'd want to call out that this isn't a "real"
Hyper-V status code somehow. HV_STATUS's are 16 bits, so it would look more like:
/* "LINUX" because this isn't really a status from the hypervisor.. */
#define HV_STATUS_LINUX_FAIL 0xFFFF
static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
{ return HV_STATUS_LINUX_FAIL; }
Another option: there is another patch coming (which you know of) which maps hypercall
status codes to regular Linux errors like -EOPNOTSUPP. I could simply merge that patch
with this one (or make this a series for v2), and that would result in less churn.
(And leave alone the current use of U64_MAX in hv_do_hypercall, for now).
Nuno
> <snip>
>
> Thanks,
> Easwar (he/him)
On 2/12/2025 3:01 PM, Nuno Das Neves wrote:
> On 2/11/2025 9:47 PM, Easwar Hariharan wrote:
>> On 2/11/2025 2:21 PM, Nuno Das Neves wrote:
>>> Introduce CONFIG_MSHV_ROOT as a tristate to enable root partition
>>> booting and future mshv driver functionality.
>>>
>>> Change hv_root_partition into a function which always returns false
>>> if CONFIG_MSHV_ROOT=n.
>>>
>>> Introduce hv_current_partition_type to store the type of partition
>>> (guest, root, or other kinds in future), and hv_identify_partition_type()
>>> to it up early in Hyper-V initialization.
>>
>> ...to *set* it up early?
>>
> Yep! Thanks for catching that
>
>>>
>>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>>> ---
>>> Depends on
>>> https://lore.kernel.org/linux-hyperv/1738955002-20821-3-git-send-email-nunodasneves@linux.microsoft.com/
>>>
>>> arch/arm64/hyperv/mshyperv.c | 2 ++
>>> arch/x86/hyperv/hv_init.c | 10 ++++----
>>> arch/x86/kernel/cpu/mshyperv.c | 24 ++----------------
>>> drivers/clocksource/hyperv_timer.c | 4 +--
>>> drivers/hv/Kconfig | 12 +++++++++
>>> drivers/hv/Makefile | 3 ++-
>>> drivers/hv/hv.c | 10 ++++----
>>> drivers/hv/hv_common.c | 32 +++++++++++++++++++-----
>>> drivers/hv/vmbus_drv.c | 2 +-
>>> drivers/iommu/hyperv-iommu.c | 4 +--
>>> include/asm-generic/mshyperv.h | 39 +++++++++++++++++++++++++-----
>>> 11 files changed, 92 insertions(+), 50 deletions(-)
>>>
>>
>> <snip>
>>
>>> +
>>> +void hv_identify_partition_type(void)
>>> +{
>>> + /*
>>> + * Check partition creation and cpu management privileges
>>> + *
>>> + * Hyper-V should never specify running as root and as a Confidential
>>> + * VM. But to protect against a compromised/malicious Hyper-V trying
>>> + * to exploit root behavior to expose Confidential VM memory, ignore
>>> + * the root partition setting if also a Confidential VM.
>>> + */
>>> + if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) &&
>>> + (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
>>> + !(ms_hyperv.priv_high & HV_ISOLATION)) {
>>> + hv_current_partition_type = HV_PARTITION_TYPE_ROOT;
>>> + pr_info("Hyper-V: running as root partition\n");
>>> + } else {
>>> + hv_current_partition_type = HV_PARTITION_TYPE_GUEST;
>>> + }
>>> +}
>>
>> This should assume GUEST as default and modify to ROOT if all the checks pass.
>>
> It is doing that, isn't it?
>
> In fact the 'else' branch here is redundant and just there for additional clarity.
>
> hv_current_partition_type is zeroed (so GUEST) by default, but I could make that explicit
> if you prefer:
Yes, explicit is better, but see comment below.
> +enum hv_partition_type hv_current_partition_type = HV_PARTITION_TYPE_GUEST;
>
> How does that sound? Am I misunderstanding something here?
I'd suggest centralizing that in this function, instead of having it spread in 2 places.
Since your commit message hints at future partition types, it's ideal to have this function be
a central clearing house, which I suppose is the intent. The preferred pattern in general, and what I'm
suggesting, is something like this:
void hv_identify_partition_type(void)
{
/* Assume guest role */
hv_current_partition_type = HV_PARTITION_TYPE_GUEST;
/*
* Check partition creation and cpu management privileges
*
* Hyper-V should never specify running as root and as a Confidential
* VM. But to protect against a compromised/malicious Hyper-V trying
* to exploit root behavior to expose Confidential VM memory, ignore
* the root partition setting if also a Confidential VM.
*/
if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) &&
(ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
!(ms_hyperv.priv_high & HV_ISOLATION)) {
hv_current_partition_type = HV_PARTITION_TYPE_ROOT;
pr_info("Hyper-V: running as root partition\n");
}
}
>
>> <snip>
>>
>>> +static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>>> +{
>>> + return hv_result(U64_MAX);
>>> +}
>>
>> Is there value in perhaps #defining hv_result_<whatever this is> as U64_MAX and returning that for documentation?
>> For e.g. assuming this is something like EOPNOTSUPP
>>
>> #define HV_RESULT_NOT_SUPP U64_MAX
>>
>> static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>> { return hv_result(HV_RESULT_NOT_SUPP); }
>>
> The idea here was to copy what hv_do_hypercall does returning U64_MAX in case the hypercall
> page is missing, which will hv_result() into an invalid status code. A special value for
> that status could make this pattern clearer.
Agreed, having a name for that status would be helpful, but we don't want to diverge too much from the hypervisor
definitions, especially if we're going to change it later again anyway.
I'd want to call out that this isn't a "real"
> Hyper-V status code somehow. HV_STATUS's are 16 bits, so it would look more like:
>
> /* "LINUX" because this isn't really a status from the hypervisor.. */
> #define HV_STATUS_LINUX_FAIL 0xFFFF
> static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
> { return HV_STATUS_LINUX_FAIL; }
>
> Another option: there is another patch coming (which you know of) which maps hypercall
> status codes to regular Linux errors like -EOPNOTSUPP. I could simply merge that patch
> with this one (or make this a series for v2), and that would result in less churn.
> (And leave alone the current use of U64_MAX in hv_do_hypercall, for now).
>
I think that second option is a good idea. The hypervisor status should remain restricted to the functions that are
hv_do_hypercall() or call it directly, while the rest of the code uses standard errno values. I'd suggest making
it a series so each commit does 1 thing.
Thanks,
Easwar (he/him)
On 2/12/2025 3:25 PM, Easwar Hariharan wrote:
> On 2/12/2025 3:01 PM, Nuno Das Neves wrote:
>> On 2/11/2025 9:47 PM, Easwar Hariharan wrote:
>>> On 2/11/2025 2:21 PM, Nuno Das Neves wrote:
>>>> Introduce CONFIG_MSHV_ROOT as a tristate to enable root partition
>>>> booting and future mshv driver functionality.
>>>>
>>>> Change hv_root_partition into a function which always returns false
>>>> if CONFIG_MSHV_ROOT=n.
>>>>
>>>> Introduce hv_current_partition_type to store the type of partition
>>>> (guest, root, or other kinds in future), and hv_identify_partition_type()
>>>> to it up early in Hyper-V initialization.
>>>
>>> ...to *set* it up early?
>>>
>> Yep! Thanks for catching that
>>
>>>>
>>>> Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
>>>> ---
>>>> Depends on
>>>> https://lore.kernel.org/linux-hyperv/1738955002-20821-3-git-send-email-nunodasneves@linux.microsoft.com/
>>>>
>>>> arch/arm64/hyperv/mshyperv.c | 2 ++
>>>> arch/x86/hyperv/hv_init.c | 10 ++++----
>>>> arch/x86/kernel/cpu/mshyperv.c | 24 ++----------------
>>>> drivers/clocksource/hyperv_timer.c | 4 +--
>>>> drivers/hv/Kconfig | 12 +++++++++
>>>> drivers/hv/Makefile | 3 ++-
>>>> drivers/hv/hv.c | 10 ++++----
>>>> drivers/hv/hv_common.c | 32 +++++++++++++++++++-----
>>>> drivers/hv/vmbus_drv.c | 2 +-
>>>> drivers/iommu/hyperv-iommu.c | 4 +--
>>>> include/asm-generic/mshyperv.h | 39 +++++++++++++++++++++++++-----
>>>> 11 files changed, 92 insertions(+), 50 deletions(-)
>>>>
>>>
>>> <snip>
>>>
>>>> +
>>>> +void hv_identify_partition_type(void)
>>>> +{
>>>> + /*
>>>> + * Check partition creation and cpu management privileges
>>>> + *
>>>> + * Hyper-V should never specify running as root and as a Confidential
>>>> + * VM. But to protect against a compromised/malicious Hyper-V trying
>>>> + * to exploit root behavior to expose Confidential VM memory, ignore
>>>> + * the root partition setting if also a Confidential VM.
>>>> + */
>>>> + if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) &&
>>>> + (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
>>>> + !(ms_hyperv.priv_high & HV_ISOLATION)) {
>>>> + hv_current_partition_type = HV_PARTITION_TYPE_ROOT;
>>>> + pr_info("Hyper-V: running as root partition\n");
>>>> + } else {
>>>> + hv_current_partition_type = HV_PARTITION_TYPE_GUEST;
>>>> + }
>>>> +}
>>>
>>> This should assume GUEST as default and modify to ROOT if all the checks pass.
>>>
>> It is doing that, isn't it?
>>
>> In fact the 'else' branch here is redundant and just there for additional clarity.
>>
>> hv_current_partition_type is zeroed (so GUEST) by default, but I could make that explicit
>> if you prefer:
>
> Yes, explicit is better, but see comment below.
>
>> +enum hv_partition_type hv_current_partition_type = HV_PARTITION_TYPE_GUEST;
>>
>> How does that sound? Am I misunderstanding something here?
>
> I'd suggest centralizing that in this function, instead of having it spread in 2 places.
> Since your commit message hints at future partition types, it's ideal to have this function be
> a central clearing house, which I suppose is the intent. The preferred pattern in general, and what I'm
> suggesting, is something like this:
>
> void hv_identify_partition_type(void)
> {
> /* Assume guest role */
> hv_current_partition_type = HV_PARTITION_TYPE_GUEST;
>
> /*
> * Check partition creation and cpu management privileges
> *
> * Hyper-V should never specify running as root and as a Confidential
> * VM. But to protect against a compromised/malicious Hyper-V trying
> * to exploit root behavior to expose Confidential VM memory, ignore
> * the root partition setting if also a Confidential VM.
> */
> if ((ms_hyperv.priv_high & HV_CREATE_PARTITIONS) &&
> (ms_hyperv.priv_high & HV_CPU_MANAGEMENT) &&
> !(ms_hyperv.priv_high & HV_ISOLATION)) {
> hv_current_partition_type = HV_PARTITION_TYPE_ROOT;
> pr_info("Hyper-V: running as root partition\n");
> }
> }
>
Fair enough, happy to do it this way.
>>
>>> <snip>
>>>
>>>> +static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>>>> +{
>>>> + return hv_result(U64_MAX);
>>>> +}
>>>
>>> Is there value in perhaps #defining hv_result_<whatever this is> as U64_MAX and returning that for documentation?
>>> For e.g. assuming this is something like EOPNOTSUPP
>>>
>>> #define HV_RESULT_NOT_SUPP U64_MAX
>>>
>>> static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>>> { return hv_result(HV_RESULT_NOT_SUPP); }
>>>
>> The idea here was to copy what hv_do_hypercall does returning U64_MAX in case the hypercall
>> page is missing, which will hv_result() into an invalid status code. A special value for
>> that status could make this pattern clearer.
>
> Agreed, having a name for that status would be helpful, but we don't want to diverge too much from the hypervisor
> definitions, especially if we're going to change it later again anyway.
>
>> I'd want to call out that this isn't a "real"
>> Hyper-V status code somehow. HV_STATUS's are 16 bits, so it would look more like:
>>
>> /* "LINUX" because this isn't really a status from the hypervisor.. */
>> #define HV_STATUS_LINUX_FAIL 0xFFFF
>> static inline int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
>> { return HV_STATUS_LINUX_FAIL; }
>>
>> Another option: there is another patch coming (which you know of) which maps hypercall
>> status codes to regular Linux errors like -EOPNOTSUPP. I could simply merge that patch
>> with this one (or make this a series for v2), and that would result in less churn.
>> (And leave alone the current use of U64_MAX in hv_do_hypercall, for now).
>>
>
> I think that second option is a good idea. The hypervisor status should remain restricted to the functions that are
> hv_do_hypercall() or call it directly, while the rest of the code uses standard errno values. I'd suggest making
> it a series so each commit does 1 thing.
>
Ok I'll do that, thanks.
Nuno
> Thanks,
> Easwar (he/him)
© 2016 - 2025 Red Hat, Inc.