Add memory-listener object which is used to keep the view of the RAM
in sync between QEMU and remote process.
A MemoryListener is registered for system-memory AddressSpace. The
listener sends SYNC_SYSMEM message to the remote process when memory
listener commits the changes to memory, the remote process receives
the message and processes it in the handler for SYNC_SYSMEM message.
TODO: No need to create object for remote memory listener.
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
---
Makefile.target | 3 +
hw/proxy/memory-sync.c | 212 +++++++++++++++++++++++++++++++++++++++++
hw/proxy/qemu-proxy.c | 5 +
include/hw/proxy/memory-sync.h | 37 +++++++
include/hw/proxy/qemu-proxy.h | 5 +
remote/remote-main.c | 11 +++
6 files changed, 273 insertions(+)
create mode 100644 hw/proxy/memory-sync.c
create mode 100644 include/hw/proxy/memory-sync.h
diff --git a/Makefile.target b/Makefile.target
index cfd36c1..271d883 100644
--- a/Makefile.target
+++ b/Makefile.target
@@ -127,6 +127,9 @@ obj-$(CONFIG_TCG) += fpu/softfloat.o
obj-y += target/$(TARGET_BASE_ARCH)/
obj-y += disas.o
obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
+ifeq ($(TARGET_NAME)-$(CONFIG_MPQEMU)-$(CONFIG_USER_ONLY), x86_64-y-)
+obj-$(CONFIG_MPQEMU) += hw/proxy/memory-sync.o
+endif
LIBS := $(libs_cpu) $(LIBS)
obj-$(CONFIG_PLUGIN) += plugins/
diff --git a/hw/proxy/memory-sync.c b/hw/proxy/memory-sync.c
new file mode 100644
index 0000000..3edbb19
--- /dev/null
+++ b/hw/proxy/memory-sync.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright © 2018, 2020 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include <sys/types.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "qemu/osdep.h"
+#include "qemu/compiler.h"
+#include "qemu/int128.h"
+#include "qemu/range.h"
+#include "exec/memory.h"
+#include "exec/cpu-common.h"
+#include "cpu.h"
+#include "exec/ram_addr.h"
+#include "exec/address-spaces.h"
+#include "io/mpqemu-link.h"
+#include "hw/proxy/memory-sync.h"
+
+static const TypeInfo remote_mem_sync_type_info = {
+ .name = TYPE_MEMORY_LISTENER,
+ .parent = TYPE_OBJECT,
+ .instance_size = sizeof(RemoteMemSync),
+};
+
+static void remote_mem_sync_register_types(void)
+{
+ type_register_static(&remote_mem_sync_type_info);
+}
+
+type_init(remote_mem_sync_register_types)
+
+static void proxy_ml_begin(MemoryListener *listener)
+{
+ RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
+ int mrs;
+
+ for (mrs = 0; mrs < sync->n_mr_sections; mrs++) {
+ memory_region_unref(sync->mr_sections[mrs].mr);
+ }
+
+ g_free(sync->mr_sections);
+ sync->mr_sections = NULL;
+ sync->n_mr_sections = 0;
+}
+
+static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset)
+{
+ MemoryRegion *mr;
+ ram_addr_t off;
+
+ mr = memory_region_from_host((void *)(uintptr_t)host, &off);
+
+ if (offset) {
+ *offset = off;
+ }
+
+ return memory_region_get_fd(mr);
+}
+
+static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size)
+{
+ bool merge;
+ int fd1, fd2;
+
+ fd1 = get_fd_from_hostaddr(host, NULL);
+
+ fd2 = get_fd_from_hostaddr(prev_host, NULL);
+
+ merge = (fd1 == fd2);
+
+ merge &= ((prev_host + size) == host);
+
+ return merge;
+}
+
+static void proxy_ml_region_addnop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
+ bool need_add = true;
+ uint64_t mrs_size, mrs_gpa, mrs_page;
+ uintptr_t mrs_host;
+ RAMBlock *mrs_rb;
+ MemoryRegionSection *prev_sec;
+
+ if (!(memory_region_is_ram(section->mr) &&
+ !memory_region_is_rom(section->mr))) {
+ return;
+ }
+
+ mrs_rb = section->mr->ram_block;
+ mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
+ mrs_size = int128_get64(section->size);
+ mrs_gpa = section->offset_within_address_space;
+ mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
+ section->offset_within_region;
+
+ if (get_fd_from_hostaddr(mrs_host, NULL) <= 0) {
+ return;
+ }
+
+ mrs_host = mrs_host & ~(mrs_page - 1);
+ mrs_gpa = mrs_gpa & ~(mrs_page - 1);
+ mrs_size = ROUND_UP(mrs_size, mrs_page);
+
+ if (sync->n_mr_sections) {
+ prev_sec = sync->mr_sections + (sync->n_mr_sections - 1);
+ uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
+ uint64_t prev_size = int128_get64(prev_sec->size);
+ uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size);
+ uint64_t prev_host_start =
+ (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
+ prev_sec->offset_within_region;
+ uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
+
+ if (mrs_gpa <= (prev_gpa_end + 1)) {
+ if (mrs_gpa < prev_gpa_start) {
+ assert(0);
+ }
+
+ if ((section->mr == prev_sec->mr) &&
+ proxy_mrs_can_merge(mrs_host, prev_host_start,
+ (mrs_gpa - prev_gpa_start))) {
+ uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
+ need_add = false;
+ prev_sec->offset_within_address_space =
+ MIN(prev_gpa_start, mrs_gpa);
+ prev_sec->offset_within_region =
+ MIN(prev_host_start, mrs_host) -
+ (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
+ prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
+ mrs_host));
+ }
+ }
+ }
+
+ if (need_add) {
+ ++sync->n_mr_sections;
+ sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections,
+ sync->n_mr_sections);
+ sync->mr_sections[sync->n_mr_sections - 1] = *section;
+ sync->mr_sections[sync->n_mr_sections - 1].fv = NULL;
+ memory_region_ref(section->mr);
+ }
+}
+
+static void proxy_ml_commit(MemoryListener *listener)
+{
+ RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
+ MPQemuMsg msg;
+ MemoryRegionSection section;
+ ram_addr_t offset;
+ uintptr_t host_addr;
+ int region;
+
+ memset(&msg, 0, sizeof(MPQemuMsg));
+
+ msg.cmd = SYNC_SYSMEM;
+ msg.bytestream = 0;
+ msg.num_fds = sync->n_mr_sections;
+ msg.size = sizeof(msg.data1);
+ assert(msg.num_fds <= REMOTE_MAX_FDS);
+
+ for (region = 0; region < sync->n_mr_sections; region++) {
+ section = sync->mr_sections[region];
+ msg.data1.sync_sysmem.gpas[region] =
+ section.offset_within_address_space;
+ msg.data1.sync_sysmem.sizes[region] = int128_get64(section.size);
+ host_addr = (uintptr_t)memory_region_get_ram_ptr(section.mr) +
+ section.offset_within_region;
+ msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);
+ msg.data1.sync_sysmem.offsets[region] = offset;
+ }
+ mpqemu_msg_send(&msg, sync->mpqemu_link->com);
+}
+
+void deconfigure_memory_sync(RemoteMemSync *sync)
+{
+ memory_listener_unregister(&sync->listener);
+}
+
+/*
+ * TODO: Memory Sync need not be instantianted once per every proxy device.
+ * All remote devices are going to get the exact same updates at the
+ * same time. It therefore makes sense to have a broadcast model.
+ *
+ * Broadcast model would involve running the MemorySync object in a
+ * thread. MemorySync would contain a list of mpqemu-link objects
+ * that need notification. proxy_ml_commit() could send the same
+ * message to all the links at the same time.
+ */
+void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link)
+{
+ sync->n_mr_sections = 0;
+ sync->mr_sections = NULL;
+
+ sync->mpqemu_link = mpqemu_link;
+
+ sync->listener.begin = proxy_ml_begin;
+ sync->listener.commit = proxy_ml_commit;
+ sync->listener.region_add = proxy_ml_region_addnop;
+ sync->listener.region_nop = proxy_ml_region_addnop;
+ sync->listener.priority = 10;
+
+ memory_listener_register(&sync->listener, &address_space_memory);
+}
diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c
index b17d9bb..d3a9d38 100644
--- a/hw/proxy/qemu-proxy.c
+++ b/hw/proxy/qemu-proxy.c
@@ -16,6 +16,8 @@
#include "qapi/qmp/qjson.h"
#include "qapi/qmp/qstring.h"
#include "hw/proxy/qemu-proxy.h"
+#include "hw/proxy/memory-sync.h"
+#include "qom/object.h"
static void pci_proxy_dev_realize(PCIDevice *dev, Error **errp);
@@ -243,6 +245,8 @@ static void init_proxy(PCIDevice *dev, char *command, char *exec_name,
mpqemu_init_channel(pdev->mpqemu_link, &pdev->mpqemu_link->com,
pdev->socket);
+
+ configure_memory_sync(pdev->sync, pdev->mpqemu_link);
}
static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
@@ -261,6 +265,7 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
dev->set_proxy_sock = set_proxy_sock;
dev->get_proxy_sock = get_proxy_sock;
dev->init_proxy = init_proxy;
+ dev->sync = REMOTE_MEM_SYNC(object_new(TYPE_MEMORY_LISTENER));
}
static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr,
diff --git a/include/hw/proxy/memory-sync.h b/include/hw/proxy/memory-sync.h
new file mode 100644
index 0000000..d8329c9
--- /dev/null
+++ b/include/hw/proxy/memory-sync.h
@@ -0,0 +1,37 @@
+/*
+ * Copyright © 2018, 2020 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef MEMORY_SYNC_H
+#define MEMORY_SYNC_H
+
+#include <sys/types.h>
+
+#include "qemu/osdep.h"
+#include "qom/object.h"
+#include "exec/memory.h"
+#include "io/mpqemu-link.h"
+
+#define TYPE_MEMORY_LISTENER "memory-listener"
+#define REMOTE_MEM_SYNC(obj) \
+ OBJECT_CHECK(RemoteMemSync, (obj), TYPE_MEMORY_LISTENER)
+
+typedef struct RemoteMemSync {
+ Object obj;
+
+ MemoryListener listener;
+
+ int n_mr_sections;
+ MemoryRegionSection *mr_sections;
+
+ MPQemuLinkState *mpqemu_link;
+} RemoteMemSync;
+
+void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link);
+void deconfigure_memory_sync(RemoteMemSync *sync);
+
+#endif
diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h
index 44e370e..c93ffe3 100644
--- a/include/hw/proxy/qemu-proxy.h
+++ b/include/hw/proxy/qemu-proxy.h
@@ -10,6 +10,7 @@
#define QEMU_PROXY_H
#include "io/mpqemu-link.h"
+#include "hw/proxy/memory-sync.h"
#define TYPE_PCI_PROXY_DEV "pci-proxy-dev"
@@ -37,8 +38,12 @@ extern const MemoryRegionOps proxy_default_ops;
struct PCIProxyDev {
PCIDevice parent_dev;
+ int n_mr_sections;
+ MemoryRegionSection *mr_sections;
+
MPQemuLinkState *mpqemu_link;
+ RemoteMemSync *sync;
pid_t remote_pid;
int socket;
diff --git a/remote/remote-main.c b/remote/remote-main.c
index acd8daf..9512a3b 100644
--- a/remote/remote-main.c
+++ b/remote/remote-main.c
@@ -34,6 +34,7 @@
#include "block/block.h"
#include "exec/ramlist.h"
#include "exec/memattrs.h"
+#include "exec/address-spaces.h"
static MPQemuLinkState *mpqemu_link;
PCIDevice *remote_pci_dev;
@@ -161,6 +162,16 @@ static void process_msg(GIOCondition cond, MPQemuChannel *chan)
goto finalize_loop;
}
break;
+ case SYNC_SYSMEM:
+ /*
+ * TODO: ensure no active DMA is happening when
+ * sysmem is being updated
+ */
+ remote_sysmem_reconfig(msg, &err);
+ if (err) {
+ goto finalize_loop;
+ }
+ break;
default:
error_setg(&err, "Unknown command");
goto finalize_loop;
--
1.8.3.1
* Jagannathan Raman (jag.raman@oracle.com) wrote:
> Add memory-listener object which is used to keep the view of the RAM
> in sync between QEMU and remote process.
> A MemoryListener is registered for system-memory AddressSpace. The
> listener sends SYNC_SYSMEM message to the remote process when memory
> listener commits the changes to memory, the remote process receives
> the message and processes it in the handler for SYNC_SYSMEM message.
>
> TODO: No need to create object for remote memory listener.
>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> ---
> Makefile.target | 3 +
> hw/proxy/memory-sync.c | 212 +++++++++++++++++++++++++++++++++++++++++
> hw/proxy/qemu-proxy.c | 5 +
> include/hw/proxy/memory-sync.h | 37 +++++++
> include/hw/proxy/qemu-proxy.h | 5 +
> remote/remote-main.c | 11 +++
> 6 files changed, 273 insertions(+)
> create mode 100644 hw/proxy/memory-sync.c
> create mode 100644 include/hw/proxy/memory-sync.h
>
> diff --git a/Makefile.target b/Makefile.target
> index cfd36c1..271d883 100644
> --- a/Makefile.target
> +++ b/Makefile.target
> @@ -127,6 +127,9 @@ obj-$(CONFIG_TCG) += fpu/softfloat.o
> obj-y += target/$(TARGET_BASE_ARCH)/
> obj-y += disas.o
> obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
> +ifeq ($(TARGET_NAME)-$(CONFIG_MPQEMU)-$(CONFIG_USER_ONLY), x86_64-y-)
> +obj-$(CONFIG_MPQEMU) += hw/proxy/memory-sync.o
> +endif
> LIBS := $(libs_cpu) $(LIBS)
>
> obj-$(CONFIG_PLUGIN) += plugins/
> diff --git a/hw/proxy/memory-sync.c b/hw/proxy/memory-sync.c
> new file mode 100644
> index 0000000..3edbb19
> --- /dev/null
> +++ b/hw/proxy/memory-sync.c
> @@ -0,0 +1,212 @@
> +/*
> + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include <sys/types.h>
> +#include <stdio.h>
> +#include <string.h>
> +
> +#include "qemu/osdep.h"
> +#include "qemu/compiler.h"
> +#include "qemu/int128.h"
> +#include "qemu/range.h"
> +#include "exec/memory.h"
> +#include "exec/cpu-common.h"
> +#include "cpu.h"
> +#include "exec/ram_addr.h"
> +#include "exec/address-spaces.h"
> +#include "io/mpqemu-link.h"
> +#include "hw/proxy/memory-sync.h"
> +
> +static const TypeInfo remote_mem_sync_type_info = {
> + .name = TYPE_MEMORY_LISTENER,
> + .parent = TYPE_OBJECT,
> + .instance_size = sizeof(RemoteMemSync),
> +};
> +
> +static void remote_mem_sync_register_types(void)
> +{
> + type_register_static(&remote_mem_sync_type_info);
> +}
> +
> +type_init(remote_mem_sync_register_types)
> +
> +static void proxy_ml_begin(MemoryListener *listener)
> +{
> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> + int mrs;
> +
> + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) {
> + memory_region_unref(sync->mr_sections[mrs].mr);
> + }
> +
> + g_free(sync->mr_sections);
> + sync->mr_sections = NULL;
> + sync->n_mr_sections = 0;
> +}
> +
> +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset)
> +{
> + MemoryRegion *mr;
> + ram_addr_t off;
> +
> + mr = memory_region_from_host((void *)(uintptr_t)host, &off);
Do you need to just check we found an 'mr' ?
> + if (offset) {
> + *offset = off;
> + }
> +
> + return memory_region_get_fd(mr);
> +}
> +
> +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size)
> +{
> + bool merge;
> + int fd1, fd2;
> +
> + fd1 = get_fd_from_hostaddr(host, NULL);
> +
> + fd2 = get_fd_from_hostaddr(prev_host, NULL);
> +
> + merge = (fd1 == fd2);
> +
> + merge &= ((prev_host + size) == host);
It's interesting; I think the vhost code checks that the two mr's are
the same where you are checking for the same underlying fd - but I think
that's OK.
(I wonder if we need to check offset's within the fd's match up when
they're merged - since you added that offset feature in an earlier
patch?
That would also need checking in vhost_region_add_section)
> + return merge;
> +}
> +
> +static void proxy_ml_region_addnop(MemoryListener *listener,
> + MemoryRegionSection *section)
> +{
> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> + bool need_add = true;
> + uint64_t mrs_size, mrs_gpa, mrs_page;
> + uintptr_t mrs_host;
> + RAMBlock *mrs_rb;
> + MemoryRegionSection *prev_sec;
> +
> + if (!(memory_region_is_ram(section->mr) &&
> + !memory_region_is_rom(section->mr))) {
> + return;
> + }
> +
> + mrs_rb = section->mr->ram_block;
> + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
> + mrs_size = int128_get64(section->size);
> + mrs_gpa = section->offset_within_address_space;
> + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> + section->offset_within_region;
> +
> + if (get_fd_from_hostaddr(mrs_host, NULL) <= 0) {
> + return;
> + }
> +
> + mrs_host = mrs_host & ~(mrs_page - 1);
> + mrs_gpa = mrs_gpa & ~(mrs_page - 1);
> + mrs_size = ROUND_UP(mrs_size, mrs_page);
OK, just note the more complex code in vhost_region_add_section for page
aligning regions that are needed for postcopy; I think that would be the
same if you wanted to do postcopy with remote processes.
> + if (sync->n_mr_sections) {
> + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1);
> + uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
> + uint64_t prev_size = int128_get64(prev_sec->size);
> + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size);
> + uint64_t prev_host_start =
> + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
> + prev_sec->offset_within_region;
> + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
> +
> + if (mrs_gpa <= (prev_gpa_end + 1)) {
> + if (mrs_gpa < prev_gpa_start) {
> + assert(0);
> + }
g_assert(mrs_gpa < prev_gpa_start);
> + if ((section->mr == prev_sec->mr) &&
> + proxy_mrs_can_merge(mrs_host, prev_host_start,
> + (mrs_gpa - prev_gpa_start))) {
> + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
> + need_add = false;
> + prev_sec->offset_within_address_space =
> + MIN(prev_gpa_start, mrs_gpa);
> + prev_sec->offset_within_region =
> + MIN(prev_host_start, mrs_host) -
> + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
> + prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
> + mrs_host));
> + }
> + }
> + }
> +
> + if (need_add) {
> + ++sync->n_mr_sections;
> + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections,
> + sync->n_mr_sections);
> + sync->mr_sections[sync->n_mr_sections - 1] = *section;
> + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL;
> + memory_region_ref(section->mr);
> + }
I'd add some tracing in this function; it's a nightmare to debug when it
does something unexpected.
> +}
> +
> +static void proxy_ml_commit(MemoryListener *listener)
> +{
> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> + MPQemuMsg msg;
> + MemoryRegionSection section;
> + ram_addr_t offset;
> + uintptr_t host_addr;
> + int region;
> +
> + memset(&msg, 0, sizeof(MPQemuMsg));
> +
> + msg.cmd = SYNC_SYSMEM;
> + msg.bytestream = 0;
> + msg.num_fds = sync->n_mr_sections;
> + msg.size = sizeof(msg.data1);
> + assert(msg.num_fds <= REMOTE_MAX_FDS);
> +
> + for (region = 0; region < sync->n_mr_sections; region++) {
> + section = sync->mr_sections[region];
> + msg.data1.sync_sysmem.gpas[region] =
> + section.offset_within_address_space;
> + msg.data1.sync_sysmem.sizes[region] = int128_get64(section.size);
> + host_addr = (uintptr_t)memory_region_get_ram_ptr(section.mr) +
> + section.offset_within_region;
> + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);
Since you already have section.mr you cna use memory_region_get_fd.
> + msg.data1.sync_sysmem.offsets[region] = offset;
> + }
> + mpqemu_msg_send(&msg, sync->mpqemu_link->com);
> +}
> +
> +void deconfigure_memory_sync(RemoteMemSync *sync)
> +{
> + memory_listener_unregister(&sync->listener);
> +}
> +
> +/*
> + * TODO: Memory Sync need not be instantianted once per every proxy device.
> + * All remote devices are going to get the exact same updates at the
> + * same time. It therefore makes sense to have a broadcast model.
> + *
> + * Broadcast model would involve running the MemorySync object in a
> + * thread. MemorySync would contain a list of mpqemu-link objects
> + * that need notification. proxy_ml_commit() could send the same
> + * message to all the links at the same time.
> + */
> +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link)
> +{
> + sync->n_mr_sections = 0;
> + sync->mr_sections = NULL;
> +
> + sync->mpqemu_link = mpqemu_link;
> +
> + sync->listener.begin = proxy_ml_begin;
> + sync->listener.commit = proxy_ml_commit;
> + sync->listener.region_add = proxy_ml_region_addnop;
> + sync->listener.region_nop = proxy_ml_region_addnop;
> + sync->listener.priority = 10;
> +
> + memory_listener_register(&sync->listener, &address_space_memory);
> +}
> diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c
> index b17d9bb..d3a9d38 100644
> --- a/hw/proxy/qemu-proxy.c
> +++ b/hw/proxy/qemu-proxy.c
> @@ -16,6 +16,8 @@
> #include "qapi/qmp/qjson.h"
> #include "qapi/qmp/qstring.h"
> #include "hw/proxy/qemu-proxy.h"
> +#include "hw/proxy/memory-sync.h"
> +#include "qom/object.h"
>
> static void pci_proxy_dev_realize(PCIDevice *dev, Error **errp);
>
> @@ -243,6 +245,8 @@ static void init_proxy(PCIDevice *dev, char *command, char *exec_name,
>
> mpqemu_init_channel(pdev->mpqemu_link, &pdev->mpqemu_link->com,
> pdev->socket);
> +
> + configure_memory_sync(pdev->sync, pdev->mpqemu_link);
> }
>
> static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
> @@ -261,6 +265,7 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
> dev->set_proxy_sock = set_proxy_sock;
> dev->get_proxy_sock = get_proxy_sock;
> dev->init_proxy = init_proxy;
> + dev->sync = REMOTE_MEM_SYNC(object_new(TYPE_MEMORY_LISTENER));
> }
>
> static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr,
> diff --git a/include/hw/proxy/memory-sync.h b/include/hw/proxy/memory-sync.h
> new file mode 100644
> index 0000000..d8329c9
> --- /dev/null
> +++ b/include/hw/proxy/memory-sync.h
> @@ -0,0 +1,37 @@
> +/*
> + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef MEMORY_SYNC_H
> +#define MEMORY_SYNC_H
> +
> +#include <sys/types.h>
> +
> +#include "qemu/osdep.h"
> +#include "qom/object.h"
> +#include "exec/memory.h"
> +#include "io/mpqemu-link.h"
> +
> +#define TYPE_MEMORY_LISTENER "memory-listener"
> +#define REMOTE_MEM_SYNC(obj) \
> + OBJECT_CHECK(RemoteMemSync, (obj), TYPE_MEMORY_LISTENER)
> +
> +typedef struct RemoteMemSync {
> + Object obj;
> +
> + MemoryListener listener;
> +
> + int n_mr_sections;
> + MemoryRegionSection *mr_sections;
> +
> + MPQemuLinkState *mpqemu_link;
> +} RemoteMemSync;
> +
> +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link);
> +void deconfigure_memory_sync(RemoteMemSync *sync);
> +
> +#endif
> diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h
> index 44e370e..c93ffe3 100644
> --- a/include/hw/proxy/qemu-proxy.h
> +++ b/include/hw/proxy/qemu-proxy.h
> @@ -10,6 +10,7 @@
> #define QEMU_PROXY_H
>
> #include "io/mpqemu-link.h"
> +#include "hw/proxy/memory-sync.h"
>
> #define TYPE_PCI_PROXY_DEV "pci-proxy-dev"
>
> @@ -37,8 +38,12 @@ extern const MemoryRegionOps proxy_default_ops;
> struct PCIProxyDev {
> PCIDevice parent_dev;
>
> + int n_mr_sections;
> + MemoryRegionSection *mr_sections;
> +
> MPQemuLinkState *mpqemu_link;
>
> + RemoteMemSync *sync;
> pid_t remote_pid;
> int socket;
>
> diff --git a/remote/remote-main.c b/remote/remote-main.c
> index acd8daf..9512a3b 100644
> --- a/remote/remote-main.c
> +++ b/remote/remote-main.c
> @@ -34,6 +34,7 @@
> #include "block/block.h"
> #include "exec/ramlist.h"
> #include "exec/memattrs.h"
> +#include "exec/address-spaces.h"
>
> static MPQemuLinkState *mpqemu_link;
> PCIDevice *remote_pci_dev;
> @@ -161,6 +162,16 @@ static void process_msg(GIOCondition cond, MPQemuChannel *chan)
> goto finalize_loop;
> }
> break;
> + case SYNC_SYSMEM:
> + /*
> + * TODO: ensure no active DMA is happening when
> + * sysmem is being updated
In practice this turns out to be hard!
Dave
> + */
> + remote_sysmem_reconfig(msg, &err);
> + if (err) {
> + goto finalize_loop;
> + }
> + break;
> default:
> error_setg(&err, "Unknown command");
> goto finalize_loop;
> --
> 1.8.3.1
>
--
Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 3/4/2020 6:53 AM, Dr. David Alan Gilbert wrote:
> * Jagannathan Raman (jag.raman@oracle.com) wrote:
>> Add memory-listener object which is used to keep the view of the RAM
>> in sync between QEMU and remote process.
>> A MemoryListener is registered for system-memory AddressSpace. The
>> listener sends SYNC_SYSMEM message to the remote process when memory
>> listener commits the changes to memory, the remote process receives
>> the message and processes it in the handler for SYNC_SYSMEM message.
>>
>> TODO: No need to create object for remote memory listener.
>>
>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
>> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
>> ---
>> Makefile.target | 3 +
>> hw/proxy/memory-sync.c | 212 +++++++++++++++++++++++++++++++++++++++++
>> hw/proxy/qemu-proxy.c | 5 +
>> include/hw/proxy/memory-sync.h | 37 +++++++
>> include/hw/proxy/qemu-proxy.h | 5 +
>> remote/remote-main.c | 11 +++
>> 6 files changed, 273 insertions(+)
>> create mode 100644 hw/proxy/memory-sync.c
>> create mode 100644 include/hw/proxy/memory-sync.h
>>
>> diff --git a/Makefile.target b/Makefile.target
>> index cfd36c1..271d883 100644
>> --- a/Makefile.target
>> +++ b/Makefile.target
>> @@ -127,6 +127,9 @@ obj-$(CONFIG_TCG) += fpu/softfloat.o
>> obj-y += target/$(TARGET_BASE_ARCH)/
>> obj-y += disas.o
>> obj-$(call notempty,$(TARGET_XML_FILES)) += gdbstub-xml.o
>> +ifeq ($(TARGET_NAME)-$(CONFIG_MPQEMU)-$(CONFIG_USER_ONLY), x86_64-y-)
>> +obj-$(CONFIG_MPQEMU) += hw/proxy/memory-sync.o
>> +endif
>> LIBS := $(libs_cpu) $(LIBS)
>>
>> obj-$(CONFIG_PLUGIN) += plugins/
>> diff --git a/hw/proxy/memory-sync.c b/hw/proxy/memory-sync.c
>> new file mode 100644
>> index 0000000..3edbb19
>> --- /dev/null
>> +++ b/hw/proxy/memory-sync.c
>> @@ -0,0 +1,212 @@
>> +/*
>> + * Copyright © 2018, 2020 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#include <sys/types.h>
>> +#include <stdio.h>
>> +#include <string.h>
>> +
>> +#include "qemu/osdep.h"
>> +#include "qemu/compiler.h"
>> +#include "qemu/int128.h"
>> +#include "qemu/range.h"
>> +#include "exec/memory.h"
>> +#include "exec/cpu-common.h"
>> +#include "cpu.h"
>> +#include "exec/ram_addr.h"
>> +#include "exec/address-spaces.h"
>> +#include "io/mpqemu-link.h"
>> +#include "hw/proxy/memory-sync.h"
>> +
>> +static const TypeInfo remote_mem_sync_type_info = {
>> + .name = TYPE_MEMORY_LISTENER,
>> + .parent = TYPE_OBJECT,
>> + .instance_size = sizeof(RemoteMemSync),
>> +};
>> +
>> +static void remote_mem_sync_register_types(void)
>> +{
>> + type_register_static(&remote_mem_sync_type_info);
>> +}
>> +
>> +type_init(remote_mem_sync_register_types)
>> +
>> +static void proxy_ml_begin(MemoryListener *listener)
>> +{
>> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
>> + int mrs;
>> +
>> + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) {
>> + memory_region_unref(sync->mr_sections[mrs].mr);
>> + }
>> +
>> + g_free(sync->mr_sections);
>> + sync->mr_sections = NULL;
>> + sync->n_mr_sections = 0;
>> +}
>> +
>> +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset)
>> +{
>> + MemoryRegion *mr;
>> + ram_addr_t off;
>> +
>> + mr = memory_region_from_host((void *)(uintptr_t)host, &off);
>
> Do you need to just check we found an 'mr' ?
OK, we'll add this check.
>
>> + if (offset) {
>> + *offset = off;
>> + }
>> +
>> + return memory_region_get_fd(mr);
>> +}
>> +
>> +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size)
>> +{
>> + bool merge;
>> + int fd1, fd2;
>> +
>> + fd1 = get_fd_from_hostaddr(host, NULL);
>> +
>> + fd2 = get_fd_from_hostaddr(prev_host, NULL);
>> +
>> + merge = (fd1 == fd2);
>> +
>> + merge &= ((prev_host + size) == host);
>
> It's interesting; I think the vhost code checks that the two mr's are
> the same where you are checking for the same underlying fd - but I think
> that's OK.
> (I wonder if we need to check offset's within the fd's match up when
> they're merged - since you added that offset feature in an earlier
> patch?
> That would also need checking in vhost_region_add_section)
If the fds are the same, and the subsequent check ((prev_host + size) ==
host) passes, then I believe the offsets would match as well.
>
>> + return merge;
>> +}
>> +
>> +static void proxy_ml_region_addnop(MemoryListener *listener,
>> + MemoryRegionSection *section)
>> +{
>> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
>> + bool need_add = true;
>> + uint64_t mrs_size, mrs_gpa, mrs_page;
>> + uintptr_t mrs_host;
>> + RAMBlock *mrs_rb;
>> + MemoryRegionSection *prev_sec;
>> +
>> + if (!(memory_region_is_ram(section->mr) &&
>> + !memory_region_is_rom(section->mr))) {
>> + return;
>> + }
>> +
>> + mrs_rb = section->mr->ram_block;
>> + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
>> + mrs_size = int128_get64(section->size);
>> + mrs_gpa = section->offset_within_address_space;
>> + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
>> + section->offset_within_region;
>> +
>> + if (get_fd_from_hostaddr(mrs_host, NULL) <= 0) {
>> + return;
>> + }
>> +
>> + mrs_host = mrs_host & ~(mrs_page - 1);
>> + mrs_gpa = mrs_gpa & ~(mrs_page - 1);
>> + mrs_size = ROUND_UP(mrs_size, mrs_page);
>
> OK, just note the more complex code in vhost_region_add_section for page
> aligning regions that are needed for postcopy; I think that would be the
> same if you wanted to do postcopy with remote processes.
Since mmap requires the addresses to be aligned with a page boundry, we
added these checks. We are essentially doing the same with alignage as
compared with vhost user. So we should be compliant with postcopy I
believe.
>
>> + if (sync->n_mr_sections) {
>> + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1);
>> + uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
>> + uint64_t prev_size = int128_get64(prev_sec->size);
>> + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size);
>> + uint64_t prev_host_start =
>> + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
>> + prev_sec->offset_within_region;
>> + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
>> +
>> + if (mrs_gpa <= (prev_gpa_end + 1)) {
>> + if (mrs_gpa < prev_gpa_start) {
>> + assert(0);
>> + }
>
> g_assert(mrs_gpa < prev_gpa_start);
Thank you, we'll update the above check with the version you proposed.
>
>
>> + if ((section->mr == prev_sec->mr) &&
>> + proxy_mrs_can_merge(mrs_host, prev_host_start,
>> + (mrs_gpa - prev_gpa_start))) {
>> + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
>> + need_add = false;
>> + prev_sec->offset_within_address_space =
>> + MIN(prev_gpa_start, mrs_gpa);
>> + prev_sec->offset_within_region =
>> + MIN(prev_host_start, mrs_host) -
>> + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
>> + prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
>> + mrs_host));
>> + }
>> + }
>> + }
>> +
>> + if (need_add) {
>> + ++sync->n_mr_sections;
>> + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections,
>> + sync->n_mr_sections);
>> + sync->mr_sections[sync->n_mr_sections - 1] = *section;
>> + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL;
>> + memory_region_ref(section->mr);
>> + }
>
> I'd add some tracing in this function; it's a nightmare to debug when it
> does something unexpected.
Thank you, we'll add the tracing.
>
>> +}
>> +
>> +static void proxy_ml_commit(MemoryListener *listener)
>> +{
>> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
>> + MPQemuMsg msg;
>> + MemoryRegionSection section;
>> + ram_addr_t offset;
>> + uintptr_t host_addr;
>> + int region;
>> +
>> + memset(&msg, 0, sizeof(MPQemuMsg));
>> +
>> + msg.cmd = SYNC_SYSMEM;
>> + msg.bytestream = 0;
>> + msg.num_fds = sync->n_mr_sections;
>> + msg.size = sizeof(msg.data1);
>> + assert(msg.num_fds <= REMOTE_MAX_FDS);
>> +
>> + for (region = 0; region < sync->n_mr_sections; region++) {
>> + section = sync->mr_sections[region];
>> + msg.data1.sync_sysmem.gpas[region] =
>> + section.offset_within_address_space;
>> + msg.data1.sync_sysmem.sizes[region] = int128_get64(section.size);
>> + host_addr = (uintptr_t)memory_region_get_ram_ptr(section.mr) +
>> + section.offset_within_region;
>> + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);
>
> Since you already have section.mr you cna use memory_region_get_fd.
OK.
Thank you!
--
Jag
>
>> + msg.data1.sync_sysmem.offsets[region] = offset;
>> + }
>> + mpqemu_msg_send(&msg, sync->mpqemu_link->com);
>> +}
>> +
>> +void deconfigure_memory_sync(RemoteMemSync *sync)
>> +{
>> + memory_listener_unregister(&sync->listener);
>> +}
>> +
>> +/*
>> + * TODO: Memory Sync need not be instantianted once per every proxy device.
>> + * All remote devices are going to get the exact same updates at the
>> + * same time. It therefore makes sense to have a broadcast model.
>> + *
>> + * Broadcast model would involve running the MemorySync object in a
>> + * thread. MemorySync would contain a list of mpqemu-link objects
>> + * that need notification. proxy_ml_commit() could send the same
>> + * message to all the links at the same time.
>> + */
>> +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link)
>> +{
>> + sync->n_mr_sections = 0;
>> + sync->mr_sections = NULL;
>> +
>> + sync->mpqemu_link = mpqemu_link;
>> +
>> + sync->listener.begin = proxy_ml_begin;
>> + sync->listener.commit = proxy_ml_commit;
>> + sync->listener.region_add = proxy_ml_region_addnop;
>> + sync->listener.region_nop = proxy_ml_region_addnop;
>> + sync->listener.priority = 10;
>> +
>> + memory_listener_register(&sync->listener, &address_space_memory);
>> +}
>> diff --git a/hw/proxy/qemu-proxy.c b/hw/proxy/qemu-proxy.c
>> index b17d9bb..d3a9d38 100644
>> --- a/hw/proxy/qemu-proxy.c
>> +++ b/hw/proxy/qemu-proxy.c
>> @@ -16,6 +16,8 @@
>> #include "qapi/qmp/qjson.h"
>> #include "qapi/qmp/qstring.h"
>> #include "hw/proxy/qemu-proxy.h"
>> +#include "hw/proxy/memory-sync.h"
>> +#include "qom/object.h"
>>
>> static void pci_proxy_dev_realize(PCIDevice *dev, Error **errp);
>>
>> @@ -243,6 +245,8 @@ static void init_proxy(PCIDevice *dev, char *command, char *exec_name,
>>
>> mpqemu_init_channel(pdev->mpqemu_link, &pdev->mpqemu_link->com,
>> pdev->socket);
>> +
>> + configure_memory_sync(pdev->sync, pdev->mpqemu_link);
>> }
>>
>> static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
>> @@ -261,6 +265,7 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
>> dev->set_proxy_sock = set_proxy_sock;
>> dev->get_proxy_sock = get_proxy_sock;
>> dev->init_proxy = init_proxy;
>> + dev->sync = REMOTE_MEM_SYNC(object_new(TYPE_MEMORY_LISTENER));
>> }
>>
>> static void send_bar_access_msg(PCIProxyDev *dev, MemoryRegion *mr,
>> diff --git a/include/hw/proxy/memory-sync.h b/include/hw/proxy/memory-sync.h
>> new file mode 100644
>> index 0000000..d8329c9
>> --- /dev/null
>> +++ b/include/hw/proxy/memory-sync.h
>> @@ -0,0 +1,37 @@
>> +/*
>> + * Copyright © 2018, 2020 Oracle and/or its affiliates.
>> + *
>> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
>> + * See the COPYING file in the top-level directory.
>> + *
>> + */
>> +
>> +#ifndef MEMORY_SYNC_H
>> +#define MEMORY_SYNC_H
>> +
>> +#include <sys/types.h>
>> +
>> +#include "qemu/osdep.h"
>> +#include "qom/object.h"
>> +#include "exec/memory.h"
>> +#include "io/mpqemu-link.h"
>> +
>> +#define TYPE_MEMORY_LISTENER "memory-listener"
>> +#define REMOTE_MEM_SYNC(obj) \
>> + OBJECT_CHECK(RemoteMemSync, (obj), TYPE_MEMORY_LISTENER)
>> +
>> +typedef struct RemoteMemSync {
>> + Object obj;
>> +
>> + MemoryListener listener;
>> +
>> + int n_mr_sections;
>> + MemoryRegionSection *mr_sections;
>> +
>> + MPQemuLinkState *mpqemu_link;
>> +} RemoteMemSync;
>> +
>> +void configure_memory_sync(RemoteMemSync *sync, MPQemuLinkState *mpqemu_link);
>> +void deconfigure_memory_sync(RemoteMemSync *sync);
>> +
>> +#endif
>> diff --git a/include/hw/proxy/qemu-proxy.h b/include/hw/proxy/qemu-proxy.h
>> index 44e370e..c93ffe3 100644
>> --- a/include/hw/proxy/qemu-proxy.h
>> +++ b/include/hw/proxy/qemu-proxy.h
>> @@ -10,6 +10,7 @@
>> #define QEMU_PROXY_H
>>
>> #include "io/mpqemu-link.h"
>> +#include "hw/proxy/memory-sync.h"
>>
>> #define TYPE_PCI_PROXY_DEV "pci-proxy-dev"
>>
>> @@ -37,8 +38,12 @@ extern const MemoryRegionOps proxy_default_ops;
>> struct PCIProxyDev {
>> PCIDevice parent_dev;
>>
>> + int n_mr_sections;
>> + MemoryRegionSection *mr_sections;
>> +
>> MPQemuLinkState *mpqemu_link;
>>
>> + RemoteMemSync *sync;
>> pid_t remote_pid;
>> int socket;
>>
>> diff --git a/remote/remote-main.c b/remote/remote-main.c
>> index acd8daf..9512a3b 100644
>> --- a/remote/remote-main.c
>> +++ b/remote/remote-main.c
>> @@ -34,6 +34,7 @@
>> #include "block/block.h"
>> #include "exec/ramlist.h"
>> #include "exec/memattrs.h"
>> +#include "exec/address-spaces.h"
>>
>> static MPQemuLinkState *mpqemu_link;
>> PCIDevice *remote_pci_dev;
>> @@ -161,6 +162,16 @@ static void process_msg(GIOCondition cond, MPQemuChannel *chan)
>> goto finalize_loop;
>> }
>> break;
>> + case SYNC_SYSMEM:
>> + /*
>> + * TODO: ensure no active DMA is happening when
>> + * sysmem is being updated
>
> In practice this turns out to be hard!
>
> Dave
>
>> + */
>> + remote_sysmem_reconfig(msg, &err);
>> + if (err) {
>> + goto finalize_loop;
>> + }
>> + break;
>> default:
>> error_setg(&err, "Unknown command");
>> goto finalize_loop;
>> --
>> 1.8.3.1
>>
> --
> Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
>
© 2016 - 2026 Red Hat, Inc.