From: Tianyu Lan <Tianyu.Lan@microsoft.com>
Add new hvcall guest address host visibility support to mark
memory visible to host. Call it inside set_memory_decrypted
/encrypted().
Signed-off-by: Tianyu Lan <Tianyu.Lan@microsoft.com>
---
arch/x86/hyperv/Makefile | 2 +-
arch/x86/hyperv/ivm.c | 112 +++++++++++++++++++++++++++++
arch/x86/include/asm/hyperv-tlfs.h | 18 +++++
arch/x86/include/asm/mshyperv.h | 3 +-
arch/x86/mm/pat/set_memory.c | 6 +-
include/asm-generic/hyperv-tlfs.h | 1 +
6 files changed, 139 insertions(+), 3 deletions(-)
create mode 100644 arch/x86/hyperv/ivm.c
diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile
index 48e2c51464e8..5d2de10809ae 100644
--- a/arch/x86/hyperv/Makefile
+++ b/arch/x86/hyperv/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-y := hv_init.o mmu.o nested.o irqdomain.o
+obj-y := hv_init.o mmu.o nested.o irqdomain.o ivm.o
obj-$(CONFIG_X86_64) += hv_apic.o hv_proc.o
ifdef CONFIG_X86_64
diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c
new file mode 100644
index 000000000000..24a58795abd8
--- /dev/null
+++ b/arch/x86/hyperv/ivm.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V Isolation VM interface with paravisor and hypervisor
+ *
+ * Author:
+ * Tianyu Lan <Tianyu.Lan@microsoft.com>
+ */
+
+#include <linux/hyperv.h>
+#include <linux/types.h>
+#include <linux/bitfield.h>
+#include <linux/slab.h>
+#include <asm/io.h>
+#include <asm/mshyperv.h>
+
+/*
+ * hv_mark_gpa_visibility - Set pages visible to host via hvcall.
+ *
+ * In Isolation VM, all guest memory is encripted from host and guest
+ * needs to set memory visible to host via hvcall before sharing memory
+ * with host.
+ */
+int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility)
+{
+ struct hv_gpa_range_for_visibility **input_pcpu, *input;
+ u16 pages_processed;
+ u64 hv_status;
+ unsigned long flags;
+
+ /* no-op if partition isolation is not enabled */
+ if (!hv_is_isolation_supported())
+ return 0;
+
+ if (count > HV_MAX_MODIFY_GPA_REP_COUNT) {
+ pr_err("Hyper-V: GPA count:%d exceeds supported:%lu\n", count,
+ HV_MAX_MODIFY_GPA_REP_COUNT);
+ return -EINVAL;
+ }
+
+ local_irq_save(flags);
+ input_pcpu = (struct hv_gpa_range_for_visibility **)
+ this_cpu_ptr(hyperv_pcpu_input_arg);
+ input = *input_pcpu;
+ if (unlikely(!input)) {
+ local_irq_restore(flags);
+ return -EINVAL;
+ }
+
+ input->partition_id = HV_PARTITION_ID_SELF;
+ input->host_visibility = visibility;
+ input->reserved0 = 0;
+ input->reserved1 = 0;
+ memcpy((void *)input->gpa_page_list, pfn, count * sizeof(*pfn));
+ hv_status = hv_do_rep_hypercall(
+ HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY, count,
+ 0, input, &pages_processed);
+ local_irq_restore(flags);
+
+ if (!(hv_status & HV_HYPERCALL_RESULT_MASK))
+ return 0;
+
+ return hv_status & HV_HYPERCALL_RESULT_MASK;
+}
+EXPORT_SYMBOL(hv_mark_gpa_visibility);
+
+/*
+ * hv_set_mem_host_visibility - Set specified memory visible to host.
+ *
+ * In Isolation VM, all guest memory is encrypted from host and guest
+ * needs to set memory visible to host via hvcall before sharing memory
+ * with host. This function works as wrap of hv_mark_gpa_visibility()
+ * with memory base and size.
+ */
+static int hv_set_mem_host_visibility(void *kbuffer, size_t size, u32 visibility)
+{
+ int pagecount = size >> HV_HYP_PAGE_SHIFT;
+ u64 *pfn_array;
+ int ret = 0;
+ int i, pfn;
+
+ if (!hv_is_isolation_supported() || !ms_hyperv.ghcb_base)
+ return 0;
+
+ pfn_array = kzalloc(HV_HYP_PAGE_SIZE, GFP_KERNEL);
+ if (!pfn_array)
+ return -ENOMEM;
+
+ for (i = 0, pfn = 0; i < pagecount; i++) {
+ pfn_array[pfn] = virt_to_hvpfn(kbuffer + i * HV_HYP_PAGE_SIZE);
+ pfn++;
+
+ if (pfn == HV_MAX_MODIFY_GPA_REP_COUNT || i == pagecount - 1) {
+ ret |= hv_mark_gpa_visibility(pfn, pfn_array, visibility);
+ pfn = 0;
+
+ if (ret)
+ goto err_free_pfn_array;
+ }
+ }
+
+ err_free_pfn_array:
+ kfree(pfn_array);
+ return ret;
+}
+
+int hv_set_mem_enc(unsigned long addr, int numpages, bool enc)
+{
+ return hv_set_mem_host_visibility((void *)addr,
+ numpages * HV_HYP_PAGE_SIZE,
+ enc ? VMBUS_PAGE_NOT_VISIBLE
+ : VMBUS_PAGE_VISIBLE_READ_WRITE);
+}
diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h
index f1366ce609e3..f027b5bf6076 100644
--- a/arch/x86/include/asm/hyperv-tlfs.h
+++ b/arch/x86/include/asm/hyperv-tlfs.h
@@ -276,6 +276,11 @@ enum hv_isolation_type {
#define HV_X64_MSR_TIME_REF_COUNT HV_REGISTER_TIME_REF_COUNT
#define HV_X64_MSR_REFERENCE_TSC HV_REGISTER_REFERENCE_TSC
+/* Hyper-V GPA map flags */
+#define VMBUS_PAGE_NOT_VISIBLE 0
+#define VMBUS_PAGE_VISIBLE_READ_ONLY 1
+#define VMBUS_PAGE_VISIBLE_READ_WRITE 3
+
/*
* Declare the MSR used to setup pages used to communicate with the hypervisor.
*/
@@ -578,4 +583,17 @@ enum hv_interrupt_type {
#include <asm-generic/hyperv-tlfs.h>
+/* All input parameters should be in single page. */
+#define HV_MAX_MODIFY_GPA_REP_COUNT \
+ ((PAGE_SIZE / sizeof(u64)) - 2)
+
+/* HvCallModifySparseGpaPageHostVisibility hypercall */
+struct hv_gpa_range_for_visibility {
+ u64 partition_id;
+ u32 host_visibility:2;
+ u32 reserved0:30;
+ u32 reserved1;
+ u64 gpa_page_list[HV_MAX_MODIFY_GPA_REP_COUNT];
+} __packed;
+
#endif
diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
index 6627cfd2bfba..68dd207c2603 100644
--- a/arch/x86/include/asm/mshyperv.h
+++ b/arch/x86/include/asm/mshyperv.h
@@ -190,7 +190,8 @@ struct irq_domain *hv_create_pci_msi_domain(void);
int hv_map_ioapic_interrupt(int ioapic_id, bool level, int vcpu, int vector,
struct hv_interrupt_entry *entry);
int hv_unmap_ioapic_interrupt(int ioapic_id, struct hv_interrupt_entry *entry);
-
+int hv_mark_gpa_visibility(u16 count, const u64 pfn[], u32 visibility);
+int hv_set_mem_enc(unsigned long addr, int numpages, bool enc);
#else /* CONFIG_HYPERV */
static inline void hyperv_init(void) {}
static inline void hyperv_setup_mmu_ops(void) {}
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index ad8a5c586a35..ba2a22886976 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -29,6 +29,8 @@
#include <asm/proto.h>
#include <asm/memtype.h>
#include <asm/set_memory.h>
+#include <asm/hyperv-tlfs.h>
+#include <asm/mshyperv.h>
#include "../mm_internal.h"
@@ -1986,7 +1988,9 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
int ret;
/* Nothing to do if memory encryption is not active */
- if (!mem_encrypt_active())
+ if (hv_is_isolation_supported())
+ return hv_set_mem_enc(addr, numpages, enc);
+ else if (!mem_encrypt_active())
return 0;
/* Should not be working on unaligned addresses */
diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h
index 56348a541c50..8ed6733d5146 100644
--- a/include/asm-generic/hyperv-tlfs.h
+++ b/include/asm-generic/hyperv-tlfs.h
@@ -158,6 +158,7 @@ struct ms_hyperv_tsc_page {
#define HVCALL_RETARGET_INTERRUPT 0x007e
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+#define HVCALL_MODIFY_SPARSE_GPA_PAGE_HOST_VISIBILITY 0x00db
/* Extended hypercalls */
#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001
--
2.25.1
On 7/28/21 7:52 AM, Tianyu Lan wrote:
> @@ -1986,7 +1988,9 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
> int ret;
>
> /* Nothing to do if memory encryption is not active */
> - if (!mem_encrypt_active())
> + if (hv_is_isolation_supported())
> + return hv_set_mem_enc(addr, numpages, enc);
> + else if (!mem_encrypt_active())
> return 0;
__set_memory_enc_dec() is turning into a real mess. SEV, TDX and now
Hyper-V are messing around in here.
It doesn't help that these additions are totally uncommented. Even
worse is that hv_set_mem_enc() was intentionally named "enc" when it
presumably has nothing to do with encryption.
This needs to be refactored. The current __set_memory_enc_dec() can
become __set_memory_enc_pgtable(). It gets used for the hypervisors
that get informed about "encryption" status via page tables: SEV and TDX.
Then, rename hv_set_mem_enc() to hv_set_visible_hcall(). You'll end up
with:
int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
{
if (hv_is_isolation_supported())
return hv_set_visible_hcall(...);
if (mem_encrypt_active() || ...)
return __set_memory_enc_pgtable();
/* Nothing to do */
return 0;
}
That tells the story pretty effectively, in code.
> +int hv_set_mem_enc(unsigned long addr, int numpages, bool enc)
> +{
> + return hv_set_mem_host_visibility((void *)addr,
> + numpages * HV_HYP_PAGE_SIZE,
> + enc ? VMBUS_PAGE_NOT_VISIBLE
> + : VMBUS_PAGE_VISIBLE_READ_WRITE);
> +}
I know this is off in Hyper-V code, but this just makes my eyes bleed.
I'd much rather see something which is less compact but readable.
> +/* Hyper-V GPA map flags */
> +#define VMBUS_PAGE_NOT_VISIBLE 0
> +#define VMBUS_PAGE_VISIBLE_READ_ONLY 1
> +#define VMBUS_PAGE_VISIBLE_READ_WRITE 3
That looks suspiciously like an enum.
Hi Dave:
Thanks for your review.
On 7/28/2021 11:29 PM, Dave Hansen wrote:
> On 7/28/21 7:52 AM, Tianyu Lan wrote:
>> @@ -1986,7 +1988,9 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
>> int ret;
>>
>> /* Nothing to do if memory encryption is not active */
>> - if (!mem_encrypt_active())
>> + if (hv_is_isolation_supported())
>> + return hv_set_mem_enc(addr, numpages, enc);
>> + else if (!mem_encrypt_active())
>> return 0;
>
> __set_memory_enc_dec() is turning into a real mess. SEV, TDX and now
> Hyper-V are messing around in here.
>
> It doesn't help that these additions are totally uncommented. Even
> worse is that hv_set_mem_enc() was intentionally named "enc" when it
> presumably has nothing to do with encryption.
>
> This needs to be refactored. The current __set_memory_enc_dec() can
> become __set_memory_enc_pgtable(). It gets used for the hypervisors
> that get informed about "encryption" status via page tables: SEV and TDX.
>
> Then, rename hv_set_mem_enc() to hv_set_visible_hcall(). You'll end up
> with:
>
> int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
> {
> if (hv_is_isolation_supported())
> return hv_set_visible_hcall(...);
>
> if (mem_encrypt_active() || ...)
> return __set_memory_enc_pgtable();
>
> /* Nothing to do */
> return 0;
> }
>
> That tells the story pretty effectively, in code.
Yes, this is good idea. Thanks for your suggestion.
>
>> +int hv_set_mem_enc(unsigned long addr, int numpages, bool enc)
>> +{
>> + return hv_set_mem_host_visibility((void *)addr,
>> + numpages * HV_HYP_PAGE_SIZE,
>> + enc ? VMBUS_PAGE_NOT_VISIBLE
>> + : VMBUS_PAGE_VISIBLE_READ_WRITE);
>> +}
>
> I know this is off in Hyper-V code, but this just makes my eyes bleed.
> I'd much rather see something which is less compact but readable.
OK. Will update.
>
>> +/* Hyper-V GPA map flags */
>> +#define VMBUS_PAGE_NOT_VISIBLE 0
>> +#define VMBUS_PAGE_VISIBLE_READ_ONLY 1
>> +#define VMBUS_PAGE_VISIBLE_READ_WRITE 3
>
> That looks suspiciously like an enum.
>
OK. Will update.
On Wed, Jul 28, 2021 at 08:29:41AM -0700, Dave Hansen wrote: > __set_memory_enc_dec() is turning into a real mess. SEV, TDX and now > Hyper-V are messing around in here. I was going to suggest a PV_OPS call where the fitting implementation for the guest environment can be plugged in at boot. There is TDX and an SEV(-SNP) case, a Hyper-V case, and likely more coming up from other cloud/hypervisor vendors. Hiding all these behind feature checks is not going to make things cleaner. Regards, Joerg
On 8/2/2021 8:01 PM, Joerg Roedel wrote: > On Wed, Jul 28, 2021 at 08:29:41AM -0700, Dave Hansen wrote: >> __set_memory_enc_dec() is turning into a real mess. SEV, TDX and now >> Hyper-V are messing around in here. > > I was going to suggest a PV_OPS call where the fitting implementation > for the guest environment can be plugged in at boot. There is TDX and an > SEV(-SNP) case, a Hyper-V case, and likely more coming up from other > cloud/hypervisor vendors. Hiding all these behind feature checks is not > going to make things cleaner. > Yes, that makes sense. I will do this in the next version.
On 02.08.21 14:01, Joerg Roedel wrote: > On Wed, Jul 28, 2021 at 08:29:41AM -0700, Dave Hansen wrote: >> __set_memory_enc_dec() is turning into a real mess. SEV, TDX and now >> Hyper-V are messing around in here. > > I was going to suggest a PV_OPS call where the fitting implementation > for the guest environment can be plugged in at boot. There is TDX and an > SEV(-SNP) case, a Hyper-V case, and likely more coming up from other > cloud/hypervisor vendors. Hiding all these behind feature checks is not > going to make things cleaner. As those cases are all mutually exclusive, wouldn't a static_call() be the appropriate solution? Juergen
On Mon, Aug 02, 2021 at 03:11:40PM +0200, Juergen Gross wrote: > As those cases are all mutually exclusive, wouldn't a static_call() be > the appropriate solution? Right, static_call() is even better, thanks.
On 7/28/21 7:52 AM, Tianyu Lan wrote:
> @@ -1986,7 +1988,9 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc)
> int ret;
>
> /* Nothing to do if memory encryption is not active */
> - if (!mem_encrypt_active())
> + if (hv_is_isolation_supported())
> + return hv_set_mem_enc(addr, numpages, enc);
> + else if (!mem_encrypt_active())
> return 0;
One more thing. If you're going to be patching generic code, please
start using feature checks that can get optimized away at runtime.
hv_is_isolation_supported() doesn't look like the world's cheapest
check. It can't be inlined and costs at least a function call.
These checks could, with basically no effort be wrapped in a header like
this:
static inline bool hv_is_isolation_supported(void)
{
if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
return 0;
// out of line function call:
return __hv_is_isolation_supported();
}
I don't think it would be the end of the world to add an
X86_FEATURE_HYPERV_GUEST, either. There are plenty of bits allocated
for Xen and VMWare.
On 7/29/2021 1:06 AM, Dave Hansen wrote: > On 7/28/21 7:52 AM, Tianyu Lan wrote: >> @@ -1986,7 +1988,9 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) >> int ret; >> >> /* Nothing to do if memory encryption is not active */ >> - if (!mem_encrypt_active()) >> + if (hv_is_isolation_supported()) >> + return hv_set_mem_enc(addr, numpages, enc); >> + else if (!mem_encrypt_active()) >> return 0; > > One more thing. If you're going to be patching generic code, please > start using feature checks that can get optimized away at runtime. > hv_is_isolation_supported() doesn't look like the world's cheapest > check. It can't be inlined and costs at least a function call. Yes, you are right. How about adding a static branch key for the check of isolation VM? This may reduce the check cost.
On 7/29/21 6:01 AM, Tianyu Lan wrote: > On 7/29/2021 1:06 AM, Dave Hansen wrote: >> On 7/28/21 7:52 AM, Tianyu Lan wrote: >>> @@ -1986,7 +1988,9 @@ static int __set_memory_enc_dec(unsigned long >>> addr, int numpages, bool enc) >>> int ret; >>> /* Nothing to do if memory encryption is not active */ >>> - if (!mem_encrypt_active()) >>> + if (hv_is_isolation_supported()) >>> + return hv_set_mem_enc(addr, numpages, enc); >>> + else if (!mem_encrypt_active()) >>> return 0; >> >> One more thing. If you're going to be patching generic code, please >> start using feature checks that can get optimized away at runtime. >> hv_is_isolation_supported() doesn't look like the world's cheapest >> check. It can't be inlined and costs at least a function call. > > Yes, you are right. How about adding a static branch key for the check > of isolation VM? This may reduce the check cost. I don't think you need a static key. There are basically three choices: 1. Use an existing X86_FEATURE bit. I think there's already one for when you are running under a hypervisor. It's not super precise, but it's better than what you have. 2. Define a new X86_FEATURE bit for when you are running under Hyper-V. 3. Define a new X86_FEATURE bit specifically for Hyper-V isolation VM support. This particular feature might be a little uncommon to deserve its own bit. I'd probably just do #2.
On 7/29/2021 10:09 PM, Dave Hansen wrote:
> On 7/29/21 6:01 AM, Tianyu Lan wrote:
>> On 7/29/2021 1:06 AM, Dave Hansen wrote:
>>> On 7/28/21 7:52 AM, Tianyu Lan wrote:
>>>> @@ -1986,7 +1988,9 @@ static int __set_memory_enc_dec(unsigned long
>>>> addr, int numpages, bool enc)
>>>> int ret;
>>>> /* Nothing to do if memory encryption is not active */
>>>> - if (!mem_encrypt_active())
>>>> + if (hv_is_isolation_supported())
>>>> + return hv_set_mem_enc(addr, numpages, enc);
>>>> + else if (!mem_encrypt_active())
>>>> return 0;
>>>
>>> One more thing. If you're going to be patching generic code, please
>>> start using feature checks that can get optimized away at runtime.
>>> hv_is_isolation_supported() doesn't look like the world's cheapest
>>> check. It can't be inlined and costs at least a function call.
>>
>> Yes, you are right. How about adding a static branch key for the check
>> of isolation VM? This may reduce the check cost.
>
> I don't think you need a static key.
>
> There are basically three choices:
> 1. Use an existing X86_FEATURE bit. I think there's already one for
> when you are running under a hypervisor. It's not super precise,
> but it's better than what you have.
> 2. Define a new X86_FEATURE bit for when you are running under
> Hyper-V.
> 3. Define a new X86_FEATURE bit specifically for Hyper-V isolation VM
> support. This particular feature might be a little uncommon to
> deserve its own bit.
>
> I'd probably just do #2.
>
There is x86_hyper_type to identify hypervisor type and we may check
this variable after checking X86_FEATURE_HYPERVISOR.
static inline bool hv_is_isolation_supported(void)
{
if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
return 0;
if (x86_hyper_type != X86_HYPER_MS_HYPERV)
return 0;
// out of line function call:
return __hv_is_isolation_supported();
}
On 7/29/21 8:02 AM, Tianyu Lan wrote:
>>
>
> There is x86_hyper_type to identify hypervisor type and we may check
> this variable after checking X86_FEATURE_HYPERVISOR.
>
> static inline bool hv_is_isolation_supported(void)
> {
> if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
> return 0;
>
> if (x86_hyper_type != X86_HYPER_MS_HYPERV)
> return 0;
>
> // out of line function call:
> return __hv_is_isolation_supported();
> }
Looks fine. You just might want to use this existing helper:
static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
{
return x86_hyper_type == type;
}
On 7/30/2021 12:05 AM, Dave Hansen wrote:
> On 7/29/21 8:02 AM, Tianyu Lan wrote:
>>>
>>
>> There is x86_hyper_type to identify hypervisor type and we may check
>> this variable after checking X86_FEATURE_HYPERVISOR.
>>
>> static inline bool hv_is_isolation_supported(void)
>> {
>> if (!cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
>> return 0;
>>
>> if (x86_hyper_type != X86_HYPER_MS_HYPERV)
>> return 0;
>>
>> // out of line function call:
>> return __hv_is_isolation_supported();
>> }
>
> Looks fine. You just might want to use this existing helper:
>
> static inline bool hypervisor_is_type(enum x86_hypervisor_type type)
> {
> return x86_hyper_type == type;
> }
>
Yes,thanks for suggestion and will update in the next version.
© 2016 - 2026 Red Hat, Inc.