[RFC PATCH 09/25] accel/mshv: Register guest memory regions with hypervisor

Magnus Kulke posted 25 patches 8 months, 3 weeks ago
There is a newer version of this series
[RFC PATCH 09/25] accel/mshv: Register guest memory regions with hypervisor
Posted by Magnus Kulke 8 months, 3 weeks ago
Handle region_add events by invoking the MSHV memory registration
ioctl to map guest memory into the hypervisor partition. This allows
the guest to access memory through MSHV-managed mappings.

Note that this assumes the hypervisor will accept regions that overlap
in userspace_addr. Currently that's not the case, it will be addressed
in a later commit in the series.

Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
---
 accel/mshv/mem.c        | 116 ++++++++++++++++++++++++++++++++++++++--
 accel/mshv/trace-events |   1 +
 include/system/mshv.h   |  11 ++++
 3 files changed, 125 insertions(+), 3 deletions(-)

diff --git a/accel/mshv/mem.c b/accel/mshv/mem.c
index eddd83ae83..2bbeae4f4a 100644
--- a/accel/mshv/mem.c
+++ b/accel/mshv/mem.c
@@ -13,13 +13,123 @@
 
 #include "qemu/osdep.h"
 #include "qemu/error-report.h"
+#include "hw/hyperv/linux-mshv.h"
 #include "system/address-spaces.h"
 #include "system/mshv.h"
+#include "exec/memattrs.h"
+#include <sys/ioctl.h>
+#include "trace.h"
+
+static int set_guest_memory(int vm_fd, const mshv_user_mem_region *region)
+{
+    int ret;
+
+    ret = ioctl(vm_fd, MSHV_SET_GUEST_MEMORY, region);
+    if (ret < 0) {
+        error_report("failed to set guest memory");
+        return -errno;
+    }
+
+    return 0;
+}
+
+static int map_or_unmap(int vm_fd, const MshvMemoryRegion *mr, bool add)
+{
+    struct mshv_user_mem_region region = {0};
+
+    region.guest_pfn = mr->guest_phys_addr >> MSHV_PAGE_SHIFT;
+    region.size = mr->memory_size;
+    region.userspace_addr = mr->userspace_addr;
+
+    if (!add) {
+        region.flags |= (1 << MSHV_SET_MEM_BIT_UNMAP);
+        return set_guest_memory(vm_fd, &region);
+    }
+
+    region.flags = (1 << MSHV_SET_MEM_BIT_EXECUTABLE);
+    if (!mr->readonly) {
+        region.flags |= (1 << MSHV_SET_MEM_BIT_WRITABLE);
+    }
+
+    return set_guest_memory(vm_fd, &region);
+}
+
+static int set_memory(const MshvMemoryRegion *mshv_mr, bool add)
+{
+    int ret = 0;
+
+    if (!mshv_mr) {
+        error_report("Invalid mshv_mr");
+        return -1;
+    }
+
+    trace_mshv_set_memory(add, mshv_mr->guest_phys_addr,
+                          mshv_mr->memory_size,
+                          mshv_mr->userspace_addr, mshv_mr->readonly,
+                          ret);
+    return map_or_unmap(mshv_state->vm, mshv_mr, add);
+}
+
+/*
+ * Calculate and align the start address and the size of the section.
+ * Return the size. If the size is 0, the aligned section is empty.
+ */
+static hwaddr align_section(MemoryRegionSection *section, hwaddr *start)
+{
+    hwaddr size = int128_get64(section->size);
+    hwaddr delta, aligned;
+
+    /*
+     * works in page size chunks, but the function may be called
+     * with sub-page size and unaligned start address. Pad the start
+     * address to next and truncate size to previous page boundary.
+     */
+    aligned = ROUND_UP(section->offset_within_address_space,
+                       qemu_real_host_page_size());
+    delta = aligned - section->offset_within_address_space;
+    *start = aligned;
+    if (delta > size) {
+        return 0;
+    }
+
+    return (size - delta) & qemu_real_host_page_mask();
+}
 
 void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
                        bool add)
 {
-	error_report("unimplemented");
-	abort();
-}
+    int ret = 0;
+    MemoryRegion *area = section->mr;
+    bool writable = !area->readonly && !area->rom_device;
+    hwaddr start_addr, mr_offset, size;
+    void *ram;
+    MshvMemoryRegion tmp, *mshv_mr = &tmp;
+
+    if (!memory_region_is_ram(area)) {
+        if (writable) {
+            return;
+        }
+    }
+
+    size = align_section(section, &start_addr);
+    if (!size) {
+        return;
+    }
+
+    mr_offset = section->offset_within_region + start_addr -
+                section->offset_within_address_space;
 
+    ram = memory_region_get_ram_ptr(area) + mr_offset;
+
+    memset(mshv_mr, 0, sizeof(*mshv_mr));
+    mshv_mr->guest_phys_addr = start_addr;
+    mshv_mr->memory_size = size;
+    mshv_mr->readonly = !writable;
+    mshv_mr->userspace_addr = (uint64_t)ram;
+
+    ret = set_memory(mshv_mr, add);
+    if (ret < 0) {
+        error_report("Failed to set memory region");
+        abort();
+    }
+}
diff --git a/accel/mshv/trace-events b/accel/mshv/trace-events
index f99e8c5a41..63625192ec 100644
--- a/accel/mshv/trace-events
+++ b/accel/mshv/trace-events
@@ -1,3 +1,4 @@
 # See docs/devel/tracing.rst for syntax documentation.
 
+mshv_set_memory(bool add, uint64_t gpa, uint64_t size, uint64_t user_addr, bool readonly, int ret) "[add = %d] gpa = %lx size = %lx user = %lx readonly = %d result = %d"
 mshv_hvcall_args(const char* hvcall, uint16_t code, uint16_t in_sz) "built args for '%s' code: %d in_sz: %d"
diff --git a/include/system/mshv.h b/include/system/mshv.h
index 398cda3254..bed28b48a9 100644
--- a/include/system/mshv.h
+++ b/include/system/mshv.h
@@ -32,6 +32,8 @@
 #define CONFIG_MSHV_IS_POSSIBLE
 #endif
 
+#define MSHV_PAGE_SHIFT 12
+
 
 #ifdef CONFIG_MSHV_IS_POSSIBLE
 extern bool mshv_allowed;
@@ -84,6 +86,15 @@ int mshv_hvcall(int mshv_fd, const struct mshv_root_hvcall *args);
 
 
 /* memory */
+typedef struct MshvMemoryRegion {
+    uint64_t guest_phys_addr;
+    uint64_t memory_size;
+    uint64_t userspace_addr;
+    bool readonly;
+} MshvMemoryRegion;
+
+int mshv_add_mem(int vm_fd, const MshvMemoryRegion *mr);
+int mshv_remove_mem(int vm_fd, const MshvMemoryRegion *mr);
 void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
                        bool add);
 /* interrupt */
-- 
2.34.1
Re: [RFC PATCH 09/25] accel/mshv: Register guest memory regions with hypervisor
Posted by Wei Liu 8 months, 3 weeks ago
On Tue, May 20, 2025 at 01:30:02PM +0200, Magnus Kulke wrote:
> Handle region_add events by invoking the MSHV memory registration
> ioctl to map guest memory into the hypervisor partition. This allows
> the guest to access memory through MSHV-managed mappings.
> 
> Note that this assumes the hypervisor will accept regions that overlap
> in userspace_addr. Currently that's not the case, it will be addressed
> in a later commit in the series.
> 
> Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
> ---
>  accel/mshv/mem.c        | 116 ++++++++++++++++++++++++++++++++++++++--
>  accel/mshv/trace-events |   1 +
>  include/system/mshv.h   |  11 ++++
>  3 files changed, 125 insertions(+), 3 deletions(-)
> 
> diff --git a/accel/mshv/mem.c b/accel/mshv/mem.c
> index eddd83ae83..2bbeae4f4a 100644
> --- a/accel/mshv/mem.c
> +++ b/accel/mshv/mem.c
> @@ -13,13 +13,123 @@
>  
>  #include "qemu/osdep.h"
>  #include "qemu/error-report.h"
> +#include "hw/hyperv/linux-mshv.h"
>  #include "system/address-spaces.h"
>  #include "system/mshv.h"
> +#include "exec/memattrs.h"
> +#include <sys/ioctl.h>
> +#include "trace.h"
> +
> +static int set_guest_memory(int vm_fd, const mshv_user_mem_region *region)
> +{
> +    int ret;
> +
> +    ret = ioctl(vm_fd, MSHV_SET_GUEST_MEMORY, region);
> +    if (ret < 0) {
> +        error_report("failed to set guest memory");
> +        return -errno;
> +    }
> +
> +    return 0;
> +}
> +
> +static int map_or_unmap(int vm_fd, const MshvMemoryRegion *mr, bool add)

Change "add" to "map" to match the name of the function.

> +{
> +    struct mshv_user_mem_region region = {0};
> +
> +    region.guest_pfn = mr->guest_phys_addr >> MSHV_PAGE_SHIFT;
> +    region.size = mr->memory_size;
> +    region.userspace_addr = mr->userspace_addr;
> +
> +    if (!add) {
> +        region.flags |= (1 << MSHV_SET_MEM_BIT_UNMAP);

Use BIT() like you did in other places?

> +        return set_guest_memory(vm_fd, &region);
> +    }
> +
> +    region.flags = (1 << MSHV_SET_MEM_BIT_EXECUTABLE);

Should this be always set? Is there a way to get more information from
the caller or QEMU's core memory region management logic?

> +    if (!mr->readonly) {
> +        region.flags |= (1 << MSHV_SET_MEM_BIT_WRITABLE);
> +    }
> +
> +    return set_guest_memory(vm_fd, &region);
> +}
> +
> +static int set_memory(const MshvMemoryRegion *mshv_mr, bool add)
> +{
> +    int ret = 0;
> +
> +    if (!mshv_mr) {
> +        error_report("Invalid mshv_mr");
> +        return -1;
> +    }
> +
> +    trace_mshv_set_memory(add, mshv_mr->guest_phys_addr,
> +                          mshv_mr->memory_size,
> +                          mshv_mr->userspace_addr, mshv_mr->readonly,
> +                          ret);
> +    return map_or_unmap(mshv_state->vm, mshv_mr, add);
> +}
> +
> +/*
> + * Calculate and align the start address and the size of the section.
> + * Return the size. If the size is 0, the aligned section is empty.
> + */
> +static hwaddr align_section(MemoryRegionSection *section, hwaddr *start)
> +{
> +    hwaddr size = int128_get64(section->size);
> +    hwaddr delta, aligned;
> +
> +    /*
> +     * works in page size chunks, but the function may be called
> +     * with sub-page size and unaligned start address. Pad the start
> +     * address to next and truncate size to previous page boundary.
> +     */
> +    aligned = ROUND_UP(section->offset_within_address_space,
> +                       qemu_real_host_page_size());
> +    delta = aligned - section->offset_within_address_space;
> +    *start = aligned;
> +    if (delta > size) {
> +        return 0;
> +    }
> +
> +    return (size - delta) & qemu_real_host_page_mask();
> +}
>  
>  void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
>                         bool add)
>  {
> -	error_report("unimplemented");
> -	abort();
> -}
> +    int ret = 0;
> +    MemoryRegion *area = section->mr;
> +    bool writable = !area->readonly && !area->rom_device;
> +    hwaddr start_addr, mr_offset, size;
> +    void *ram;
> +    MshvMemoryRegion tmp, *mshv_mr = &tmp;
> +
> +    if (!memory_region_is_ram(area)) {
> +        if (writable) {
> +            return;
> +        }
> +    }
> +

I don't follow the check here. Can you put in a comment?

Thanks,
Wei.

> +    size = align_section(section, &start_addr);
> +    if (!size) {
> +        return;
> +    }
> +
> +    mr_offset = section->offset_within_region + start_addr -
> +                section->offset_within_address_space;
>  
> +    ram = memory_region_get_ram_ptr(area) + mr_offset;
> +
> +    memset(mshv_mr, 0, sizeof(*mshv_mr));
> +    mshv_mr->guest_phys_addr = start_addr;
> +    mshv_mr->memory_size = size;
> +    mshv_mr->readonly = !writable;
> +    mshv_mr->userspace_addr = (uint64_t)ram;
> +
> +    ret = set_memory(mshv_mr, add);
> +    if (ret < 0) {
> +        error_report("Failed to set memory region");
> +        abort();
> +    }
> +}
Re: [RFC PATCH 09/25] accel/mshv: Register guest memory regions with hypervisor
Posted by Magnus Kulke 8 months, 2 weeks ago
On Tue, May 20, 2025 at 08:07:27PM +0000, Wei Liu wrote:
> On Tue, May 20, 2025 at 01:30:02PM +0200, Magnus Kulke wrote:
> > Handle region_add events by invoking the MSHV memory registration
> > +        return set_guest_memory(vm_fd, &region);
> > +    }
> > +
> > +    region.flags = (1 << MSHV_SET_MEM_BIT_EXECUTABLE);
> 
> Should this be always set? Is there a way to get more information from
> the caller or QEMU's core memory region management logic?
> 

HVF always sets the bit and as far as I can tell KVM doesn't have a
KVM_MEM_EXECUTE flag, so it's implied.

Still, there might be some criteria to determine whether a region is
executable or not, I'll look further into that.