This change introduces the functions required to perform dirty page
tracking to speed up migrations. We are using the sync, global_start,
and global_stop hooks.
The sync is implemented in batches.
Before we can disable the dirty page tracking we have to set all dirty bits.
Signed-off-by: Magnus Kulke <magnuskulke@linux.microsoft.com>
---
accel/mshv/mem.c | 211 ++++++++++++++++++++++++++++++++++++++
accel/mshv/mshv-all.c | 3 +
include/system/mshv_int.h | 5 +
3 files changed, 219 insertions(+)
diff --git a/accel/mshv/mem.c b/accel/mshv/mem.c
index e55c38d4db..820f87ef0c 100644
--- a/accel/mshv/mem.c
+++ b/accel/mshv/mem.c
@@ -12,10 +12,13 @@
#include "qemu/osdep.h"
#include "qemu/error-report.h"
+#include "qapi/error.h"
#include "linux/mshv.h"
#include "system/address-spaces.h"
#include "system/mshv.h"
#include "system/mshv_int.h"
+#include "hw/hyperv/hvhdk_mini.h"
+#include "system/physmem.h"
#include "exec/memattrs.h"
#include <sys/ioctl.h>
#include "trace.h"
@@ -211,3 +214,211 @@ void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
abort();
}
}
+
+static int enable_dirty_page_tracking(int vm_fd)
+{
+ int ret;
+ struct hv_input_set_partition_property in = {0};
+ struct mshv_root_hvcall args = {0};
+
+ in.property_code = HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING;
+ in.property_value = 1;
+
+ args.code = HVCALL_SET_PARTITION_PROPERTY;
+ args.in_sz = sizeof(in);
+ args.in_ptr = (uint64_t)∈
+
+ ret = mshv_hvcall(vm_fd, &args);
+ if (ret < 0) {
+ error_report("Failed to enable dirty page tracking: %s",
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Retrieve dirty page bitmap for a GPA range, clearing the dirty bits
+ * atomically. Large ranges are handled in batches.
+ */
+static int get_dirty_log(int vm_fd, uint64_t base_pfn, uint64_t page_count,
+ unsigned long *bitmap, size_t bitmap_size)
+{
+ uint64_t batch, bitmap_offset, completed = 0;
+ struct mshv_gpap_access_bitmap args = {0};
+ int ret;
+
+ QEMU_BUILD_BUG_ON(MSHV_DIRTY_PAGES_BATCH_SIZE % BITS_PER_LONG != 0);
+ assert(bitmap_size >= ROUND_UP(page_count, BITS_PER_LONG) / 8);
+
+ while (completed < page_count) {
+ batch = MIN(MSHV_DIRTY_PAGES_BATCH_SIZE, page_count - completed);
+ bitmap_offset = completed / BITS_PER_LONG;
+
+ args.access_type = MSHV_GPAP_ACCESS_TYPE_DIRTY;
+ args.access_op = MSHV_GPAP_ACCESS_OP_CLEAR;
+ args.page_count = batch;
+ args.gpap_base = base_pfn + completed;
+ args.bitmap_ptr = (uint64_t)(bitmap + bitmap_offset);
+
+ ret = ioctl(vm_fd, MSHV_GET_GPAP_ACCESS_BITMAP, &args);
+ if (ret < 0) {
+ error_report("Failed to get dirty log (base_pfn=0x%" PRIx64
+ " batch=%" PRIu64 "): %s",
+ base_pfn + completed, batch, strerror(errno));
+ return -1;
+ }
+ completed += batch;
+ }
+
+ return 0;
+}
+
+bool mshv_log_global_start(MemoryListener *listener, Error **errp)
+{
+ int ret;
+
+ ret = enable_dirty_page_tracking(mshv_state->vm);
+ if (ret < 0) {
+ error_setg_errno(errp, -ret, "Failed to enable dirty page tracking");
+ return false;
+ }
+ return true;
+}
+
+static int disable_dirty_page_tracking(int vm_fd)
+{
+ int ret;
+ struct hv_input_set_partition_property in = {0};
+ struct mshv_root_hvcall args = {0};
+
+ in.property_code = HV_PARTITION_PROPERTY_GPA_PAGE_ACCESS_TRACKING;
+ in.property_value = 0;
+
+ args.code = HVCALL_SET_PARTITION_PROPERTY;
+ args.in_sz = sizeof(in);
+ args.in_ptr = (uint64_t)∈
+
+ ret = mshv_hvcall(vm_fd, &args);
+ if (ret < 0) {
+ error_report("Failed to disable dirty page tracking: %s",
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int set_dirty_pages(int vm_fd, uint64_t base_pfn, uint64_t page_count)
+{
+ uint64_t batch, completed = 0;
+ unsigned long bitmap[MSHV_DIRTY_PAGES_BATCH_SIZE / BITS_PER_LONG];
+ struct mshv_gpap_access_bitmap args = {0};
+ int ret;
+
+ while (completed < page_count) {
+ batch = MIN(MSHV_DIRTY_PAGES_BATCH_SIZE, page_count - completed);
+
+ args.access_type = MSHV_GPAP_ACCESS_TYPE_DIRTY;
+ args.access_op = MSHV_GPAP_ACCESS_OP_SET;
+ args.page_count = batch;
+ args.gpap_base = base_pfn + completed;
+ args.bitmap_ptr = (uint64_t)bitmap;
+
+ ret = ioctl(vm_fd, MSHV_GET_GPAP_ACCESS_BITMAP, &args);
+ if (ret < 0) {
+ error_report("Failed to set dirty pages (base_pfn=0x%" PRIx64
+ " batch=%" PRIu64 "): %s",
+ base_pfn + completed, batch, strerror(errno));
+ return -1;
+ }
+ completed += batch;
+ }
+
+ return 0;
+}
+
+static bool set_dirty_bits_cb(Int128 start, Int128 len, const MemoryRegion *mr,
+ hwaddr offset_in_region, void *opaque)
+{
+ int ret, *errp = opaque;
+ hwaddr gpa, size;
+ uint64_t page_count, base_pfn;
+
+ gpa = int128_get64(start);
+ size = int128_get64(len);
+ page_count = size >> MSHV_PAGE_SHIFT;
+ base_pfn = gpa >> MSHV_PAGE_SHIFT;
+
+ if (!mr->ram || mr->readonly) {
+ return false;
+ }
+
+ if (page_count == 0) {
+ return false;
+ }
+
+ ret = set_dirty_pages(mshv_state->vm, base_pfn, page_count);
+
+ /* true aborts the iteration, which is what we want if there's an error */
+ if (ret < 0) {
+ *errp = ret;
+ return true;
+ }
+
+ return false;
+}
+
+void mshv_log_global_stop(MemoryListener *listener)
+{
+ int err = 0;
+ /* MSHV requires all dirty bits to be set before disabling tracking. */
+ FlatView *fv = address_space_to_flatview(&address_space_memory);
+ flatview_for_each_range(fv, set_dirty_bits_cb, &err);
+
+ if (err < 0) {
+ error_report("Failed to set dirty bits before disabling tracking");
+ }
+
+ disable_dirty_page_tracking(mshv_state->vm);
+}
+
+void mshv_log_sync(MemoryListener *listener, MemoryRegionSection *section)
+{
+ hwaddr size, start_addr, mr_offset;
+ uint64_t page_count, base_pfn;
+ size_t bitmap_size;
+ unsigned long *bitmap;
+ ram_addr_t ram_addr;
+ int ret;
+ MemoryRegion *mr = section->mr;
+
+ if (!memory_region_is_ram(mr) || memory_region_is_rom(mr)) {
+ return;
+ }
+
+ size = align_section(section, &start_addr);
+ if (!size) {
+ return;
+ }
+
+ page_count = size >> MSHV_PAGE_SHIFT;
+ base_pfn = start_addr >> MSHV_PAGE_SHIFT;
+ bitmap_size = ROUND_UP(page_count, BITS_PER_LONG) / 8;
+ bitmap = g_malloc0(bitmap_size);
+
+ ret = get_dirty_log(mshv_state->vm, base_pfn, page_count, bitmap,
+ bitmap_size);
+ if (ret < 0) {
+ g_free(bitmap);
+ return;
+ }
+
+ mr_offset = section->offset_within_region + start_addr -
+ section->offset_within_address_space;
+ ram_addr = memory_region_get_ram_addr(mr) + mr_offset;
+
+ physical_memory_set_dirty_lebitmap(bitmap, ram_addr, page_count);
+ g_free(bitmap);
+}
diff --git a/accel/mshv/mshv-all.c b/accel/mshv/mshv-all.c
index 45fe1ef468..87ed785302 100644
--- a/accel/mshv/mshv-all.c
+++ b/accel/mshv/mshv-all.c
@@ -546,6 +546,9 @@ static MemoryListener mshv_memory_listener = {
.region_del = mem_region_del,
.eventfd_add = mem_ioeventfd_add,
.eventfd_del = mem_ioeventfd_del,
+ .log_sync = mshv_log_sync,
+ .log_global_start = mshv_log_global_start,
+ .log_global_stop = mshv_log_global_stop,
};
static MemoryListener mshv_io_listener = {
diff --git a/include/system/mshv_int.h b/include/system/mshv_int.h
index c24efc8675..ddbdd76076 100644
--- a/include/system/mshv_int.h
+++ b/include/system/mshv_int.h
@@ -31,6 +31,8 @@ struct mshv_get_set_vp_state;
#define MSHV_HV_INTERRUPTION_TYPE_PRIV_SW_EXC 5
#define MSHV_HV_INTERRUPTION_TYPE_SW_EXC 6
+#define MSHV_DIRTY_PAGES_BATCH_SIZE 0x10000
+
typedef struct hyperv_message hv_message;
typedef struct MshvHvCallArgs {
@@ -128,6 +130,9 @@ int mshv_guest_mem_write(uint64_t gpa, const uint8_t *data, uintptr_t size,
bool is_secure_mode);
void mshv_set_phys_mem(MshvMemoryListener *mml, MemoryRegionSection *section,
bool add);
+void mshv_log_sync(MemoryListener *listener, MemoryRegionSection *section);
+bool mshv_log_global_start(MemoryListener *listener, Error **errp);
+void mshv_log_global_stop(MemoryListener *listener);
/* msr */
int mshv_init_msrs(const CPUState *cpu);
--
2.34.1