Add memory-listener object which is used to keep the view of the RAM
in sync between QEMU and remote process.
A MemoryListener is registered for system-memory AddressSpace. The
listener sends SYNC_SYSMEM message to the remote process when memory
listener commits the changes to memory, the remote process receives
the message and processes it in the handler for SYNC_SYSMEM message.
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
---
include/hw/remote/memory-sync.h | 27 ++++++
include/hw/remote/proxy.h | 2 +
hw/remote/memory-sync.c | 210 ++++++++++++++++++++++++++++++++++++++++
hw/remote/message.c | 5 +
hw/remote/proxy.c | 6 ++
MAINTAINERS | 2 +
hw/remote/meson.build | 1 +
7 files changed, 253 insertions(+)
create mode 100644 include/hw/remote/memory-sync.h
create mode 100644 hw/remote/memory-sync.c
diff --git a/include/hw/remote/memory-sync.h b/include/hw/remote/memory-sync.h
new file mode 100644
index 0000000..785f76a
--- /dev/null
+++ b/include/hw/remote/memory-sync.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright © 2018, 2020 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#ifndef MEMORY_SYNC_H
+#define MEMORY_SYNC_H
+
+#include "exec/memory.h"
+#include "io/channel.h"
+
+typedef struct RemoteMemSync {
+ MemoryListener listener;
+
+ int n_mr_sections;
+ MemoryRegionSection *mr_sections;
+
+ QIOChannel *ioc;
+} RemoteMemSync;
+
+void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc);
+void deconfigure_memory_sync(RemoteMemSync *sync);
+
+#endif
diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
index e29c61b..a687b7d 100644
--- a/include/hw/remote/proxy.h
+++ b/include/hw/remote/proxy.h
@@ -11,6 +11,7 @@
#include "hw/pci/pci.h"
#include "io/channel.h"
+#include "hw/remote/memory-sync.h"
#define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
@@ -40,6 +41,7 @@ struct PCIProxyDev {
QemuMutex io_mutex;
QIOChannel *ioc;
Error *migration_blocker;
+ RemoteMemSync sync;
ProxyMemoryRegion region[PCI_NUM_REGIONS];
};
diff --git a/hw/remote/memory-sync.c b/hw/remote/memory-sync.c
new file mode 100644
index 0000000..2365e69
--- /dev/null
+++ b/hw/remote/memory-sync.c
@@ -0,0 +1,210 @@
+/*
+ * Copyright © 2018, 2020 Oracle and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+
+#include "qemu/osdep.h"
+#include "qemu-common.h"
+
+#include "qemu/compiler.h"
+#include "qemu/int128.h"
+#include "qemu/range.h"
+#include "exec/memory.h"
+#include "exec/cpu-common.h"
+#include "cpu.h"
+#include "exec/ram_addr.h"
+#include "exec/address-spaces.h"
+#include "hw/remote/mpqemu-link.h"
+#include "hw/remote/memory-sync.h"
+
+static void proxy_ml_begin(MemoryListener *listener)
+{
+ RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
+ int mrs;
+
+ for (mrs = 0; mrs < sync->n_mr_sections; mrs++) {
+ memory_region_unref(sync->mr_sections[mrs].mr);
+ }
+
+ g_free(sync->mr_sections);
+ sync->mr_sections = NULL;
+ sync->n_mr_sections = 0;
+}
+
+static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset)
+{
+ MemoryRegion *mr;
+ ram_addr_t off;
+
+ /**
+ * Assumes that the host address is a valid address as it's
+ * coming from the MemoryListener system. In the case host
+ * address is not valid, the following call would return
+ * the default subregion of "system_memory" region, and
+ * not NULL. So it's not possible to check for NULL here.
+ */
+ mr = memory_region_from_host((void *)(uintptr_t)host, &off);
+
+ if (offset) {
+ *offset = off;
+ }
+
+ return memory_region_get_fd(mr);
+}
+
+static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size)
+{
+ bool merge;
+ int fd1, fd2;
+
+ fd1 = get_fd_from_hostaddr(host, NULL);
+
+ fd2 = get_fd_from_hostaddr(prev_host, NULL);
+
+ merge = (fd1 == fd2);
+
+ merge &= ((prev_host + size) == host);
+
+ return merge;
+}
+
+static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section)
+{
+ uint64_t mrs_size, mrs_gpa, mrs_page;
+ MemoryRegionSection *prev_sec;
+ bool merged = false;
+ uintptr_t mrs_host;
+ RAMBlock *mrs_rb;
+
+ if (!sync->n_mr_sections) {
+ return false;
+ }
+
+ mrs_rb = section->mr->ram_block;
+ mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
+ mrs_size = int128_get64(section->size);
+ mrs_gpa = section->offset_within_address_space;
+ mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
+ section->offset_within_region;
+
+ if (get_fd_from_hostaddr(mrs_host, NULL) < 0) {
+ return true;
+ }
+
+ mrs_host = mrs_host & ~(mrs_page - 1);
+ mrs_gpa = mrs_gpa & ~(mrs_page - 1);
+ mrs_size = ROUND_UP(mrs_size, mrs_page);
+
+ prev_sec = sync->mr_sections + (sync->n_mr_sections - 1);
+ uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
+ uint64_t prev_size = int128_get64(prev_sec->size);
+ uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size);
+ uint64_t prev_host_start =
+ (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
+ prev_sec->offset_within_region;
+ uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
+
+ if (mrs_gpa <= (prev_gpa_end + 1)) {
+ g_assert(mrs_gpa > prev_gpa_start);
+
+ if ((section->mr == prev_sec->mr) &&
+ proxy_mrs_can_merge(mrs_host, prev_host_start,
+ (mrs_gpa - prev_gpa_start))) {
+ uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
+ merged = true;
+ prev_sec->offset_within_address_space =
+ MIN(prev_gpa_start, mrs_gpa);
+ prev_sec->offset_within_region =
+ MIN(prev_host_start, mrs_host) -
+ (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
+ prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
+ mrs_host));
+ }
+ }
+
+ return merged;
+}
+
+static void proxy_ml_region_addnop(MemoryListener *listener,
+ MemoryRegionSection *section)
+{
+ RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
+
+ if (!(memory_region_is_ram(section->mr) &&
+ !memory_region_is_rom(section->mr))) {
+ return;
+ }
+
+ if (try_merge(sync, section)) {
+ return;
+ }
+
+ ++sync->n_mr_sections;
+ sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections,
+ sync->n_mr_sections);
+ sync->mr_sections[sync->n_mr_sections - 1] = *section;
+ sync->mr_sections[sync->n_mr_sections - 1].fv = NULL;
+ memory_region_ref(section->mr);
+}
+
+static void proxy_ml_commit(MemoryListener *listener)
+{
+ RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
+ MPQemuMsg msg;
+ MemoryRegionSection *section;
+ ram_addr_t offset;
+ uintptr_t host_addr;
+ int region;
+ Error *local_err = NULL;
+
+ memset(&msg, 0, sizeof(MPQemuMsg));
+
+ msg.cmd = SYNC_SYSMEM;
+ msg.num_fds = sync->n_mr_sections;
+ msg.size = sizeof(SyncSysmemMsg);
+ if (msg.num_fds > REMOTE_MAX_FDS) {
+ error_report("Number of fds is more than %d", REMOTE_MAX_FDS);
+ return;
+ }
+
+ for (region = 0; region < sync->n_mr_sections; region++) {
+ section = &sync->mr_sections[region];
+ msg.data.sync_sysmem.gpas[region] =
+ section->offset_within_address_space;
+ msg.data.sync_sysmem.sizes[region] = int128_get64(section->size);
+ host_addr = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
+ section->offset_within_region;
+ msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);
+ msg.data.sync_sysmem.offsets[region] = offset;
+ }
+ mpqemu_msg_send(&msg, sync->ioc, &local_err);
+ if (local_err) {
+ error_report("Error in sending command %d", msg.cmd);
+ }
+}
+
+void deconfigure_memory_sync(RemoteMemSync *sync)
+{
+ memory_listener_unregister(&sync->listener);
+
+ proxy_ml_begin(&sync->listener);
+}
+
+void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc)
+{
+ sync->n_mr_sections = 0;
+ sync->mr_sections = NULL;
+
+ sync->ioc = ioc;
+
+ sync->listener.begin = proxy_ml_begin;
+ sync->listener.commit = proxy_ml_commit;
+ sync->listener.region_add = proxy_ml_region_addnop;
+ sync->listener.region_nop = proxy_ml_region_addnop;
+ sync->listener.priority = 10;
+
+ memory_listener_register(&sync->listener, &address_space_memory);
+}
diff --git a/hw/remote/message.c b/hw/remote/message.c
index 0f3e38a..454fd2d 100644
--- a/hw/remote/message.c
+++ b/hw/remote/message.c
@@ -17,6 +17,7 @@
#include "sysemu/runstate.h"
#include "hw/pci/pci.h"
#include "exec/memattrs.h"
+#include "hw/remote/memory.h"
static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
MPQemuMsg *msg);
@@ -64,6 +65,10 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
case BAR_READ:
process_bar_read(com->ioc, &msg, &local_err);
break;
+ case SYNC_SYSMEM:
+ remote_sysmem_reconfig(&msg, &local_err);
+ break;
+
default:
error_setg(&local_err,
"Unknown command (%d) received for device %s (pid=%d)",
diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
index 039347d..0f2d1aa 100644
--- a/hw/remote/proxy.c
+++ b/hw/remote/proxy.c
@@ -18,6 +18,8 @@
#include "migration/blocker.h"
#include "hw/remote/mpqemu-link.h"
#include "qemu/error-report.h"
+#include "hw/remote/memory-sync.h"
+#include "qom/object.h"
static void proxy_set_socket(PCIProxyDev *pdev, int fd, Error **errp)
{
@@ -58,6 +60,8 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
qemu_mutex_init(&dev->io_mutex);
qio_channel_set_blocking(dev->ioc, true, NULL);
+
+ configure_memory_sync(&dev->sync, dev->ioc);
}
static void pci_proxy_dev_exit(PCIDevice *pdev)
@@ -69,6 +73,8 @@ static void pci_proxy_dev_exit(PCIDevice *pdev)
migrate_del_blocker(dev->migration_blocker);
error_free(dev->migration_blocker);
+
+ deconfigure_memory_sync(&dev->sync);
}
static int config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t *val,
diff --git a/MAINTAINERS b/MAINTAINERS
index ebd1d1d..5d78b78 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3150,6 +3150,8 @@ F: include/hw/remote/memory.h
F: hw/remote/memory.c
F: hw/remote/proxy.c
F: include/hw/remote/proxy.h
+F: hw/remote/memory-sync.c
+F: include/hw/remote/memory-sync.h
Build and test automation
-------------------------
diff --git a/hw/remote/meson.build b/hw/remote/meson.build
index 569cd20..7d434a5 100644
--- a/hw/remote/meson.build
+++ b/hw/remote/meson.build
@@ -7,5 +7,6 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c'))
remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory.c'))
+specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory-sync.c'))
softmmu_ss.add_all(when: 'CONFIG_MULTIPROCESS', if_true: remote_ss)
--
1.8.3.1
Hi
On Wed, Dec 2, 2020 at 12:23 AM Jagannathan Raman <jag.raman@oracle.com>
wrote:
> Add memory-listener object which is used to keep the view of the RAM
> in sync between QEMU and remote process.
> A MemoryListener is registered for system-memory AddressSpace. The
> listener sends SYNC_SYSMEM message to the remote process when memory
> listener commits the changes to memory, the remote process receives
> the message and processes it in the handler for SYNC_SYSMEM message.
>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> include/hw/remote/memory-sync.h | 27 ++++++
> include/hw/remote/proxy.h | 2 +
> hw/remote/memory-sync.c | 210
> ++++++++++++++++++++++++++++++++++++++++
> hw/remote/message.c | 5 +
> hw/remote/proxy.c | 6 ++
> MAINTAINERS | 2 +
> hw/remote/meson.build | 1 +
> 7 files changed, 253 insertions(+)
> create mode 100644 include/hw/remote/memory-sync.h
> create mode 100644 hw/remote/memory-sync.c
>
> diff --git a/include/hw/remote/memory-sync.h
> b/include/hw/remote/memory-sync.h
> new file mode 100644
> index 0000000..785f76a
> --- /dev/null
> +++ b/include/hw/remote/memory-sync.h
> @@ -0,0 +1,27 @@
> +/*
> + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef MEMORY_SYNC_H
> +#define MEMORY_SYNC_H
> +
> +#include "exec/memory.h"
> +#include "io/channel.h"
> +
> +typedef struct RemoteMemSync {
> + MemoryListener listener;
> +
> + int n_mr_sections;
> + MemoryRegionSection *mr_sections;
> +
> + QIOChannel *ioc;
> +} RemoteMemSync;
> +
> +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc);
> +void deconfigure_memory_sync(RemoteMemSync *sync);
>
RemoteMemSync vs MemorySync, and function with _memory_sync suffixes...
Naming things is hard, but trying to be consistent generally helps.
My understanding is that this is a proxy-dev helper to handle memory
listening and sending SYNC_SYSMEM.
I would thus suggest naming it ProxyMemoryListener. It could eventually be
folded in proxy.c
Please try to be consistent with header naming, structure naming, type,
functions and enum prefixes etc.
proxy_memory_listener isn't that long imho.
+
> +#endif
> diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
> index e29c61b..a687b7d 100644
> --- a/include/hw/remote/proxy.h
> +++ b/include/hw/remote/proxy.h
> @@ -11,6 +11,7 @@
>
> #include "hw/pci/pci.h"
> #include "io/channel.h"
> +#include "hw/remote/memory-sync.h"
>
> #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
>
> @@ -40,6 +41,7 @@ struct PCIProxyDev {
> QemuMutex io_mutex;
> QIOChannel *ioc;
> Error *migration_blocker;
> + RemoteMemSync sync;
> ProxyMemoryRegion region[PCI_NUM_REGIONS];
> };
>
> diff --git a/hw/remote/memory-sync.c b/hw/remote/memory-sync.c
> new file mode 100644
> index 0000000..2365e69
> --- /dev/null
> +++ b/hw/remote/memory-sync.c
> @@ -0,0 +1,210 @@
> +/*
> + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +
> +#include "qemu/compiler.h"
> +#include "qemu/int128.h"
> +#include "qemu/range.h"
> +#include "exec/memory.h"
> +#include "exec/cpu-common.h"
> +#include "cpu.h"
> +#include "exec/ram_addr.h"
> +#include "exec/address-spaces.h"
> +#include "hw/remote/mpqemu-link.h"
> +#include "hw/remote/memory-sync.h"
> +
> +static void proxy_ml_begin(MemoryListener *listener)
>
I suggest to rename begin -> reset
+{
> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> + int mrs;
> +
> + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) {
> + memory_region_unref(sync->mr_sections[mrs].mr);
> + }
> +
> + g_free(sync->mr_sections);
> + sync->mr_sections = NULL;
> + sync->n_mr_sections = 0;
> +}
> +
> +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset)
>
This function is very similar to vhost_user_get_mr_data(). That suggests we
could factor the code.
Perhaps a new memory_region_from_host_full(), or extend
memory_region_from_host() with an extra optional "int *fd" argument.
> +{
> + MemoryRegion *mr;
> + ram_addr_t off;
> +
> + /**
> + * Assumes that the host address is a valid address as it's
> + * coming from the MemoryListener system. In the case host
> + * address is not valid, the following call would return
> + * the default subregion of "system_memory" region, and
> + * not NULL. So it's not possible to check for NULL here.
> + */
> + mr = memory_region_from_host((void *)(uintptr_t)host, &off);
> +
> + if (offset) {
> + *offset = off;
> + }
> +
> + return memory_region_get_fd(mr);
> +}
> +
> +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t
> size)
> +{
>
This seems similar to vhost_user_can_merge().
+ bool merge;
> + int fd1, fd2;
> +
> + fd1 = get_fd_from_hostaddr(host, NULL);
> +
> + fd2 = get_fd_from_hostaddr(prev_host, NULL);
> +
> + merge = (fd1 == fd2);
>
This could be written in a simpler manner, ex:
if (get_fd_from_hostaddr(host, NULL) != get_fd_from_hostaddr(prev_host,
NULL))
return false
+
> + merge &= ((prev_host + size) == host);
>
That check could be done early on before doing the more expensive
memory_region_from_host() calls
+
> + return merge;
> +}
> +
> +static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section)
> +{
> + uint64_t mrs_size, mrs_gpa, mrs_page;
> + MemoryRegionSection *prev_sec;
> + bool merged = false;
> + uintptr_t mrs_host;
> + RAMBlock *mrs_rb;
> +
> + if (!sync->n_mr_sections) {
> + return false;
> + }
> +
> + mrs_rb = section->mr->ram_block;
> + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
> + mrs_size = int128_get64(section->size);
> + mrs_gpa = section->offset_within_address_space;
> + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> + section->offset_within_region;
> +
> + if (get_fd_from_hostaddr(mrs_host, NULL) < 0) {
> + return true;
> + }
> +
> + mrs_host = mrs_host & ~(mrs_page - 1);
> + mrs_gpa = mrs_gpa & ~(mrs_page - 1);
> + mrs_size = ROUND_UP(mrs_size, mrs_page);
> +
> + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1);
> + uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
> + uint64_t prev_size = int128_get64(prev_sec->size);
> + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size);
> + uint64_t prev_host_start =
> + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
> + prev_sec->offset_within_region;
> + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
> +
> + if (mrs_gpa <= (prev_gpa_end + 1)) {
> + g_assert(mrs_gpa > prev_gpa_start);
> +
> + if ((section->mr == prev_sec->mr) &&
> + proxy_mrs_can_merge(mrs_host, prev_host_start,
> + (mrs_gpa - prev_gpa_start))) {
> + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
> + merged = true;
> + prev_sec->offset_within_address_space =
> + MIN(prev_gpa_start, mrs_gpa);
> + prev_sec->offset_within_region =
> + MIN(prev_host_start, mrs_host) -
> + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
> + prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
> + mrs_host));
> + }
> + }
> +
> + return merged;
> +}
> +
> +static void proxy_ml_region_addnop(MemoryListener *listener,
> + MemoryRegionSection *section)
> +{
> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> +
> + if (!(memory_region_is_ram(section->mr) &&
> + !memory_region_is_rom(section->mr))) {
> + return;
>
A bit clearer in vhost.c:
if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) {
> + }
> +
> + if (try_merge(sync, section)) {
> + return;
> + }
> +
> + ++sync->n_mr_sections;
> + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections,
> + sync->n_mr_sections);
> + sync->mr_sections[sync->n_mr_sections - 1] = *section;
> + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL;
> + memory_region_ref(section->mr);
> +}
> +
> +static void proxy_ml_commit(MemoryListener *listener)
> +{
> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> + MPQemuMsg msg;
> + MemoryRegionSection *section;
> + ram_addr_t offset;
> + uintptr_t host_addr;
> + int region;
> + Error *local_err = NULL;
> +
> + memset(&msg, 0, sizeof(MPQemuMsg));
> +
> + msg.cmd = SYNC_SYSMEM;
> + msg.num_fds = sync->n_mr_sections;
> + msg.size = sizeof(SyncSysmemMsg);
> + if (msg.num_fds > REMOTE_MAX_FDS) {
> + error_report("Number of fds is more than %d", REMOTE_MAX_FDS);
> + return;
> + }
> +
> + for (region = 0; region < sync->n_mr_sections; region++) {
> + section = &sync->mr_sections[region];
> + msg.data.sync_sysmem.gpas[region] =
> + section->offset_within_address_space;
> + msg.data.sync_sysmem.sizes[region] = int128_get64(section->size);
> + host_addr = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> + section->offset_within_region;
> + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);
> + msg.data.sync_sysmem.offsets[region] = offset;
> + }
> + mpqemu_msg_send(&msg, sync->ioc, &local_err);
> + if (local_err) {
> + error_report("Error in sending command %d", msg.cmd);
> + }
> +}
>
That whole complex code above duplicates much of the logic in vhost.c. Can
we try to factorize it instead?
+
> +void deconfigure_memory_sync(RemoteMemSync *sync)
> +{
> + memory_listener_unregister(&sync->listener);
> +
> + proxy_ml_begin(&sync->listener);
> +}
> +
> +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc)
> +{
> + sync->n_mr_sections = 0;
> + sync->mr_sections = NULL;
> +
> + sync->ioc = ioc;
> +
> + sync->listener.begin = proxy_ml_begin;
> + sync->listener.commit = proxy_ml_commit;
> + sync->listener.region_add = proxy_ml_region_addnop;
> + sync->listener.region_nop = proxy_ml_region_addnop;
> + sync->listener.priority = 10;
> +
> + memory_listener_register(&sync->listener, &address_space_memory);
> +}
> diff --git a/hw/remote/message.c b/hw/remote/message.c
> index 0f3e38a..454fd2d 100644
> --- a/hw/remote/message.c
> +++ b/hw/remote/message.c
> @@ -17,6 +17,7 @@
> #include "sysemu/runstate.h"
> #include "hw/pci/pci.h"
> #include "exec/memattrs.h"
> +#include "hw/remote/memory.h"
>
> static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
> MPQemuMsg *msg);
> @@ -64,6 +65,10 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
> case BAR_READ:
> process_bar_read(com->ioc, &msg, &local_err);
> break;
> + case SYNC_SYSMEM:
> + remote_sysmem_reconfig(&msg, &local_err);
> + break;
> +
> default:
> error_setg(&local_err,
> "Unknown command (%d) received for device %s
> (pid=%d)",
> diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
> index 039347d..0f2d1aa 100644
> --- a/hw/remote/proxy.c
> +++ b/hw/remote/proxy.c
> @@ -18,6 +18,8 @@
> #include "migration/blocker.h"
> #include "hw/remote/mpqemu-link.h"
> #include "qemu/error-report.h"
> +#include "hw/remote/memory-sync.h"
> +#include "qom/object.h"
>
> static void proxy_set_socket(PCIProxyDev *pdev, int fd, Error **errp)
> {
> @@ -58,6 +60,8 @@ static void pci_proxy_dev_realize(PCIDevice *device,
> Error **errp)
>
> qemu_mutex_init(&dev->io_mutex);
> qio_channel_set_blocking(dev->ioc, true, NULL);
> +
> + configure_memory_sync(&dev->sync, dev->ioc);
> }
>
> static void pci_proxy_dev_exit(PCIDevice *pdev)
> @@ -69,6 +73,8 @@ static void pci_proxy_dev_exit(PCIDevice *pdev)
> migrate_del_blocker(dev->migration_blocker);
>
> error_free(dev->migration_blocker);
> +
> + deconfigure_memory_sync(&dev->sync);
> }
>
> static int config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t *val,
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ebd1d1d..5d78b78 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -3150,6 +3150,8 @@ F: include/hw/remote/memory.h
> F: hw/remote/memory.c
> F: hw/remote/proxy.c
> F: include/hw/remote/proxy.h
> +F: hw/remote/memory-sync.c
> +F: include/hw/remote/memory-sync.h
>
> Build and test automation
> -------------------------
> diff --git a/hw/remote/meson.build b/hw/remote/meson.build
> index 569cd20..7d434a5 100644
> --- a/hw/remote/meson.build
> +++ b/hw/remote/meson.build
> @@ -7,5 +7,6 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true:
> files('remote-obj.c'))
> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
>
> specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory.c'))
> +specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true:
> files('memory-sync.c'))
>
> softmmu_ss.add_all(when: 'CONFIG_MULTIPROCESS', if_true: remote_ss)
> --
> 1.8.3.1
>
>
--
Marc-André Lureau
> On Dec 8, 2020, at 8:57 AM, Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
>
> Hi
>
> On Wed, Dec 2, 2020 at 12:23 AM Jagannathan Raman <jag.raman@oracle.com> wrote:
> Add memory-listener object which is used to keep the view of the RAM
> in sync between QEMU and remote process.
> A MemoryListener is registered for system-memory AddressSpace. The
> listener sends SYNC_SYSMEM message to the remote process when memory
> listener commits the changes to memory, the remote process receives
> the message and processes it in the handler for SYNC_SYSMEM message.
>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> ---
> include/hw/remote/memory-sync.h | 27 ++++++
> include/hw/remote/proxy.h | 2 +
> hw/remote/memory-sync.c | 210 ++++++++++++++++++++++++++++++++++++++++
> hw/remote/message.c | 5 +
> hw/remote/proxy.c | 6 ++
> MAINTAINERS | 2 +
> hw/remote/meson.build | 1 +
> 7 files changed, 253 insertions(+)
> create mode 100644 include/hw/remote/memory-sync.h
> create mode 100644 hw/remote/memory-sync.c
>
> diff --git a/include/hw/remote/memory-sync.h b/include/hw/remote/memory-sync.h
> new file mode 100644
> index 0000000..785f76a
> --- /dev/null
> +++ b/include/hw/remote/memory-sync.h
> @@ -0,0 +1,27 @@
> +/*
> + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#ifndef MEMORY_SYNC_H
> +#define MEMORY_SYNC_H
> +
> +#include "exec/memory.h"
> +#include "io/channel.h"
> +
> +typedef struct RemoteMemSync {
> + MemoryListener listener;
> +
> + int n_mr_sections;
> + MemoryRegionSection *mr_sections;
> +
> + QIOChannel *ioc;
> +} RemoteMemSync;
> +
> +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc);
> +void deconfigure_memory_sync(RemoteMemSync *sync);
>
> RemoteMemSync vs MemorySync, and function with _memory_sync suffixes...
> Naming things is hard, but trying to be consistent generally helps.
>
> My understanding is that this is a proxy-dev helper to handle memory listening and sending SYNC_SYSMEM.
>
> I would thus suggest naming it ProxyMemoryListener. It could eventually be folded in proxy.c
>
> Please try to be consistent with header naming, structure naming, type, functions and enum prefixes etc.
>
> proxy_memory_listener isn't that long imho.
>
> +
> +#endif
> diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
> index e29c61b..a687b7d 100644
> --- a/include/hw/remote/proxy.h
> +++ b/include/hw/remote/proxy.h
> @@ -11,6 +11,7 @@
>
> #include "hw/pci/pci.h"
> #include "io/channel.h"
> +#include "hw/remote/memory-sync.h"
>
> #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
>
> @@ -40,6 +41,7 @@ struct PCIProxyDev {
> QemuMutex io_mutex;
> QIOChannel *ioc;
> Error *migration_blocker;
> + RemoteMemSync sync;
> ProxyMemoryRegion region[PCI_NUM_REGIONS];
> };
>
> diff --git a/hw/remote/memory-sync.c b/hw/remote/memory-sync.c
> new file mode 100644
> index 0000000..2365e69
> --- /dev/null
> +++ b/hw/remote/memory-sync.c
> @@ -0,0 +1,210 @@
> +/*
> + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> + *
> + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> + * See the COPYING file in the top-level directory.
> + *
> + */
> +
> +#include "qemu/osdep.h"
> +#include "qemu-common.h"
> +
> +#include "qemu/compiler.h"
> +#include "qemu/int128.h"
> +#include "qemu/range.h"
> +#include "exec/memory.h"
> +#include "exec/cpu-common.h"
> +#include "cpu.h"
> +#include "exec/ram_addr.h"
> +#include "exec/address-spaces.h"
> +#include "hw/remote/mpqemu-link.h"
> +#include "hw/remote/memory-sync.h"
> +
> +static void proxy_ml_begin(MemoryListener *listener)
>
> I suggest to rename begin -> reset
>
> +{
> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> + int mrs;
> +
> + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) {
> + memory_region_unref(sync->mr_sections[mrs].mr);
> + }
> +
> + g_free(sync->mr_sections);
> + sync->mr_sections = NULL;
> + sync->n_mr_sections = 0;
> +}
> +
> +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset)
>
> This function is very similar to vhost_user_get_mr_data(). That suggests we could factor the code.
>
> Perhaps a new memory_region_from_host_full(), or extend memory_region_from_host() with an extra optional "int *fd" argument.
>
> +{
> + MemoryRegion *mr;
> + ram_addr_t off;
> +
> + /**
> + * Assumes that the host address is a valid address as it's
> + * coming from the MemoryListener system. In the case host
> + * address is not valid, the following call would return
> + * the default subregion of "system_memory" region, and
> + * not NULL. So it's not possible to check for NULL here.
> + */
> + mr = memory_region_from_host((void *)(uintptr_t)host, &off);
> +
> + if (offset) {
> + *offset = off;
> + }
> +
> + return memory_region_get_fd(mr);
> +}
> +
> +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size)
> +{
>
> This seems similar to vhost_user_can_merge().
>
> + bool merge;
> + int fd1, fd2;
> +
> + fd1 = get_fd_from_hostaddr(host, NULL);
> +
> + fd2 = get_fd_from_hostaddr(prev_host, NULL);
> +
> + merge = (fd1 == fd2);
>
> This could be written in a simpler manner, ex:
>
> if (get_fd_from_hostaddr(host, NULL) != get_fd_from_hostaddr(prev_host, NULL))
> return false
>
> +
> + merge &= ((prev_host + size) == host);
>
> That check could be done early on before doing the more expensive memory_region_from_host() calls
>
> +
> + return merge;
> +}
> +
> +static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section)
> +{
> + uint64_t mrs_size, mrs_gpa, mrs_page;
> + MemoryRegionSection *prev_sec;
> + bool merged = false;
> + uintptr_t mrs_host;
> + RAMBlock *mrs_rb;
> +
> + if (!sync->n_mr_sections) {
> + return false;
> + }
> +
> + mrs_rb = section->mr->ram_block;
> + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
> + mrs_size = int128_get64(section->size);
> + mrs_gpa = section->offset_within_address_space;
> + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> + section->offset_within_region;
> +
> + if (get_fd_from_hostaddr(mrs_host, NULL) < 0) {
> + return true;
> + }
> +
> + mrs_host = mrs_host & ~(mrs_page - 1);
> + mrs_gpa = mrs_gpa & ~(mrs_page - 1);
> + mrs_size = ROUND_UP(mrs_size, mrs_page);
> +
> + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1);
> + uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
> + uint64_t prev_size = int128_get64(prev_sec->size);
> + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size);
> + uint64_t prev_host_start =
> + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
> + prev_sec->offset_within_region;
> + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
> +
> + if (mrs_gpa <= (prev_gpa_end + 1)) {
> + g_assert(mrs_gpa > prev_gpa_start);
> +
> + if ((section->mr == prev_sec->mr) &&
> + proxy_mrs_can_merge(mrs_host, prev_host_start,
> + (mrs_gpa - prev_gpa_start))) {
> + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
> + merged = true;
> + prev_sec->offset_within_address_space =
> + MIN(prev_gpa_start, mrs_gpa);
> + prev_sec->offset_within_region =
> + MIN(prev_host_start, mrs_host) -
> + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
> + prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
> + mrs_host));
> + }
> + }
> +
> + return merged;
> +}
> +
> +static void proxy_ml_region_addnop(MemoryListener *listener,
> + MemoryRegionSection *section)
> +{
> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> +
> + if (!(memory_region_is_ram(section->mr) &&
> + !memory_region_is_rom(section->mr))) {
> + return;
>
> A bit clearer in vhost.c:
> if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) {
>
> + }
> +
> + if (try_merge(sync, section)) {
> + return;
> + }
> +
> + ++sync->n_mr_sections;
> + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections,
> + sync->n_mr_sections);
> + sync->mr_sections[sync->n_mr_sections - 1] = *section;
> + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL;
> + memory_region_ref(section->mr);
> +}
> +
> +static void proxy_ml_commit(MemoryListener *listener)
> +{
> + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> + MPQemuMsg msg;
> + MemoryRegionSection *section;
> + ram_addr_t offset;
> + uintptr_t host_addr;
> + int region;
> + Error *local_err = NULL;
> +
> + memset(&msg, 0, sizeof(MPQemuMsg));
> +
> + msg.cmd = SYNC_SYSMEM;
> + msg.num_fds = sync->n_mr_sections;
> + msg.size = sizeof(SyncSysmemMsg);
> + if (msg.num_fds > REMOTE_MAX_FDS) {
> + error_report("Number of fds is more than %d", REMOTE_MAX_FDS);
> + return;
> + }
> +
> + for (region = 0; region < sync->n_mr_sections; region++) {
> + section = &sync->mr_sections[region];
> + msg.data.sync_sysmem.gpas[region] =
> + section->offset_within_address_space;
> + msg.data.sync_sysmem.sizes[region] = int128_get64(section->size);
> + host_addr = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> + section->offset_within_region;
> + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);
> + msg.data.sync_sysmem.offsets[region] = offset;
> + }
> + mpqemu_msg_send(&msg, sync->ioc, &local_err);
> + if (local_err) {
> + error_report("Error in sending command %d", msg.cmd);
> + }
> +}
>
> That whole complex code above duplicates much of the logic in vhost.c. Can we try to factorize it instead?
Hi Marc-Andre,
Thank you for sharing your feedback!
Would it be alright if we addressed this item alone in a separate patch in the future? Since
this refactoring affects vhost code, we’re wondering it would be better to address it in a
future patch to help with any regression analysis in the future.
Thank you!
—
Jag
>
> +
> +void deconfigure_memory_sync(RemoteMemSync *sync)
> +{
> + memory_listener_unregister(&sync->listener);
> +
> + proxy_ml_begin(&sync->listener);
> +}
> +
> +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc)
> +{
> + sync->n_mr_sections = 0;
> + sync->mr_sections = NULL;
> +
> + sync->ioc = ioc;
> +
> + sync->listener.begin = proxy_ml_begin;
> + sync->listener.commit = proxy_ml_commit;
> + sync->listener.region_add = proxy_ml_region_addnop;
> + sync->listener.region_nop = proxy_ml_region_addnop;
> + sync->listener.priority = 10;
> +
> + memory_listener_register(&sync->listener, &address_space_memory);
> +}
> diff --git a/hw/remote/message.c b/hw/remote/message.c
> index 0f3e38a..454fd2d 100644
> --- a/hw/remote/message.c
> +++ b/hw/remote/message.c
> @@ -17,6 +17,7 @@
> #include "sysemu/runstate.h"
> #include "hw/pci/pci.h"
> #include "exec/memattrs.h"
> +#include "hw/remote/memory.h"
>
> static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
> MPQemuMsg *msg);
> @@ -64,6 +65,10 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
> case BAR_READ:
> process_bar_read(com->ioc, &msg, &local_err);
> break;
> + case SYNC_SYSMEM:
> + remote_sysmem_reconfig(&msg, &local_err);
> + break;
> +
> default:
> error_setg(&local_err,
> "Unknown command (%d) received for device %s (pid=%d)",
> diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
> index 039347d..0f2d1aa 100644
> --- a/hw/remote/proxy.c
> +++ b/hw/remote/proxy.c
> @@ -18,6 +18,8 @@
> #include "migration/blocker.h"
> #include "hw/remote/mpqemu-link.h"
> #include "qemu/error-report.h"
> +#include "hw/remote/memory-sync.h"
> +#include "qom/object.h"
>
> static void proxy_set_socket(PCIProxyDev *pdev, int fd, Error **errp)
> {
> @@ -58,6 +60,8 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
>
> qemu_mutex_init(&dev->io_mutex);
> qio_channel_set_blocking(dev->ioc, true, NULL);
> +
> + configure_memory_sync(&dev->sync, dev->ioc);
> }
>
> static void pci_proxy_dev_exit(PCIDevice *pdev)
> @@ -69,6 +73,8 @@ static void pci_proxy_dev_exit(PCIDevice *pdev)
> migrate_del_blocker(dev->migration_blocker);
>
> error_free(dev->migration_blocker);
> +
> + deconfigure_memory_sync(&dev->sync);
> }
>
> static int config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t *val,
> diff --git a/MAINTAINERS b/MAINTAINERS
> index ebd1d1d..5d78b78 100644
> --- a/MAINTAINERS
> +++ b/MAINTAINERS
> @@ -3150,6 +3150,8 @@ F: include/hw/remote/memory.h
> F: hw/remote/memory.c
> F: hw/remote/proxy.c
> F: include/hw/remote/proxy.h
> +F: hw/remote/memory-sync.c
> +F: include/hw/remote/memory-sync.h
>
> Build and test automation
> -------------------------
> diff --git a/hw/remote/meson.build b/hw/remote/meson.build
> index 569cd20..7d434a5 100644
> --- a/hw/remote/meson.build
> +++ b/hw/remote/meson.build
> @@ -7,5 +7,6 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c'))
> remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
>
> specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory.c'))
> +specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory-sync.c'))
>
> softmmu_ss.add_all(when: 'CONFIG_MULTIPROCESS', if_true: remote_ss)
> --
> 1.8.3.1
>
>
>
> --
> Marc-André Lureau
On Wed, Dec 9, 2020 at 8:20 PM Jag Raman <jag.raman@oracle.com> wrote:
>
>
> > On Dec 8, 2020, at 8:57 AM, Marc-André Lureau <
> marcandre.lureau@gmail.com> wrote:
> >
> > Hi
> >
> > On Wed, Dec 2, 2020 at 12:23 AM Jagannathan Raman <jag.raman@oracle.com>
> wrote:
> > Add memory-listener object which is used to keep the view of the RAM
> > in sync between QEMU and remote process.
> > A MemoryListener is registered for system-memory AddressSpace. The
> > listener sends SYNC_SYSMEM message to the remote process when memory
> > listener commits the changes to memory, the remote process receives
> > the message and processes it in the handler for SYNC_SYSMEM message.
> >
> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > include/hw/remote/memory-sync.h | 27 ++++++
> > include/hw/remote/proxy.h | 2 +
> > hw/remote/memory-sync.c | 210
> ++++++++++++++++++++++++++++++++++++++++
> > hw/remote/message.c | 5 +
> > hw/remote/proxy.c | 6 ++
> > MAINTAINERS | 2 +
> > hw/remote/meson.build | 1 +
> > 7 files changed, 253 insertions(+)
> > create mode 100644 include/hw/remote/memory-sync.h
> > create mode 100644 hw/remote/memory-sync.c
> >
> > diff --git a/include/hw/remote/memory-sync.h
> b/include/hw/remote/memory-sync.h
> > new file mode 100644
> > index 0000000..785f76a
> > --- /dev/null
> > +++ b/include/hw/remote/memory-sync.h
> > @@ -0,0 +1,27 @@
> > +/*
> > + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#ifndef MEMORY_SYNC_H
> > +#define MEMORY_SYNC_H
> > +
> > +#include "exec/memory.h"
> > +#include "io/channel.h"
> > +
> > +typedef struct RemoteMemSync {
> > + MemoryListener listener;
> > +
> > + int n_mr_sections;
> > + MemoryRegionSection *mr_sections;
> > +
> > + QIOChannel *ioc;
> > +} RemoteMemSync;
> > +
> > +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc);
> > +void deconfigure_memory_sync(RemoteMemSync *sync);
> >
> > RemoteMemSync vs MemorySync, and function with _memory_sync suffixes...
> > Naming things is hard, but trying to be consistent generally helps.
> >
> > My understanding is that this is a proxy-dev helper to handle memory
> listening and sending SYNC_SYSMEM.
> >
> > I would thus suggest naming it ProxyMemoryListener. It could eventually
> be folded in proxy.c
> >
> > Please try to be consistent with header naming, structure naming, type,
> functions and enum prefixes etc.
> >
> > proxy_memory_listener isn't that long imho.
> >
> > +
> > +#endif
> > diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
> > index e29c61b..a687b7d 100644
> > --- a/include/hw/remote/proxy.h
> > +++ b/include/hw/remote/proxy.h
> > @@ -11,6 +11,7 @@
> >
> > #include "hw/pci/pci.h"
> > #include "io/channel.h"
> > +#include "hw/remote/memory-sync.h"
> >
> > #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
> >
> > @@ -40,6 +41,7 @@ struct PCIProxyDev {
> > QemuMutex io_mutex;
> > QIOChannel *ioc;
> > Error *migration_blocker;
> > + RemoteMemSync sync;
> > ProxyMemoryRegion region[PCI_NUM_REGIONS];
> > };
> >
> > diff --git a/hw/remote/memory-sync.c b/hw/remote/memory-sync.c
> > new file mode 100644
> > index 0000000..2365e69
> > --- /dev/null
> > +++ b/hw/remote/memory-sync.c
> > @@ -0,0 +1,210 @@
> > +/*
> > + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or
> later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu-common.h"
> > +
> > +#include "qemu/compiler.h"
> > +#include "qemu/int128.h"
> > +#include "qemu/range.h"
> > +#include "exec/memory.h"
> > +#include "exec/cpu-common.h"
> > +#include "cpu.h"
> > +#include "exec/ram_addr.h"
> > +#include "exec/address-spaces.h"
> > +#include "hw/remote/mpqemu-link.h"
> > +#include "hw/remote/memory-sync.h"
> > +
> > +static void proxy_ml_begin(MemoryListener *listener)
> >
> > I suggest to rename begin -> reset
> >
> > +{
> > + RemoteMemSync *sync = container_of(listener, RemoteMemSync,
> listener);
> > + int mrs;
> > +
> > + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) {
> > + memory_region_unref(sync->mr_sections[mrs].mr);
> > + }
> > +
> > + g_free(sync->mr_sections);
> > + sync->mr_sections = NULL;
> > + sync->n_mr_sections = 0;
> > +}
> > +
> > +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset)
> >
> > This function is very similar to vhost_user_get_mr_data(). That suggests
> we could factor the code.
> >
> > Perhaps a new memory_region_from_host_full(), or extend
> memory_region_from_host() with an extra optional "int *fd" argument.
> >
> > +{
> > + MemoryRegion *mr;
> > + ram_addr_t off;
> > +
> > + /**
> > + * Assumes that the host address is a valid address as it's
> > + * coming from the MemoryListener system. In the case host
> > + * address is not valid, the following call would return
> > + * the default subregion of "system_memory" region, and
> > + * not NULL. So it's not possible to check for NULL here.
> > + */
> > + mr = memory_region_from_host((void *)(uintptr_t)host, &off);
> > +
> > + if (offset) {
> > + *offset = off;
> > + }
> > +
> > + return memory_region_get_fd(mr);
> > +}
> > +
> > +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host,
> size_t size)
> > +{
> >
> > This seems similar to vhost_user_can_merge().
> >
> > + bool merge;
> > + int fd1, fd2;
> > +
> > + fd1 = get_fd_from_hostaddr(host, NULL);
> > +
> > + fd2 = get_fd_from_hostaddr(prev_host, NULL);
> > +
> > + merge = (fd1 == fd2);
> >
> > This could be written in a simpler manner, ex:
> >
> > if (get_fd_from_hostaddr(host, NULL) != get_fd_from_hostaddr(prev_host,
> NULL))
> > return false
> >
> > +
> > + merge &= ((prev_host + size) == host);
> >
> > That check could be done early on before doing the more expensive
> memory_region_from_host() calls
> >
> > +
> > + return merge;
> > +}
> > +
> > +static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section)
> > +{
> > + uint64_t mrs_size, mrs_gpa, mrs_page;
> > + MemoryRegionSection *prev_sec;
> > + bool merged = false;
> > + uintptr_t mrs_host;
> > + RAMBlock *mrs_rb;
> > +
> > + if (!sync->n_mr_sections) {
> > + return false;
> > + }
> > +
> > + mrs_rb = section->mr->ram_block;
> > + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
> > + mrs_size = int128_get64(section->size);
> > + mrs_gpa = section->offset_within_address_space;
> > + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> > + section->offset_within_region;
> > +
> > + if (get_fd_from_hostaddr(mrs_host, NULL) < 0) {
> > + return true;
> > + }
> > +
> > + mrs_host = mrs_host & ~(mrs_page - 1);
> > + mrs_gpa = mrs_gpa & ~(mrs_page - 1);
> > + mrs_size = ROUND_UP(mrs_size, mrs_page);
> > +
> > + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1);
> > + uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
> > + uint64_t prev_size = int128_get64(prev_sec->size);
> > + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size);
> > + uint64_t prev_host_start =
> > + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
> > + prev_sec->offset_within_region;
> > + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
> > +
> > + if (mrs_gpa <= (prev_gpa_end + 1)) {
> > + g_assert(mrs_gpa > prev_gpa_start);
> > +
> > + if ((section->mr == prev_sec->mr) &&
> > + proxy_mrs_can_merge(mrs_host, prev_host_start,
> > + (mrs_gpa - prev_gpa_start))) {
> > + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
> > + merged = true;
> > + prev_sec->offset_within_address_space =
> > + MIN(prev_gpa_start, mrs_gpa);
> > + prev_sec->offset_within_region =
> > + MIN(prev_host_start, mrs_host) -
> > + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
> > + prev_sec->size = int128_make64(max_end -
> MIN(prev_host_start,
> > + mrs_host));
> > + }
> > + }
> > +
> > + return merged;
> > +}
> > +
> > +static void proxy_ml_region_addnop(MemoryListener *listener,
> > + MemoryRegionSection *section)
> > +{
> > + RemoteMemSync *sync = container_of(listener, RemoteMemSync,
> listener);
> > +
> > + if (!(memory_region_is_ram(section->mr) &&
> > + !memory_region_is_rom(section->mr))) {
> > + return;
> >
> > A bit clearer in vhost.c:
> > if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) {
> >
> > + }
> > +
> > + if (try_merge(sync, section)) {
> > + return;
> > + }
> > +
> > + ++sync->n_mr_sections;
> > + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections,
> > + sync->n_mr_sections);
> > + sync->mr_sections[sync->n_mr_sections - 1] = *section;
> > + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL;
> > + memory_region_ref(section->mr);
> > +}
> > +
> > +static void proxy_ml_commit(MemoryListener *listener)
> > +{
> > + RemoteMemSync *sync = container_of(listener, RemoteMemSync,
> listener);
> > + MPQemuMsg msg;
> > + MemoryRegionSection *section;
> > + ram_addr_t offset;
> > + uintptr_t host_addr;
> > + int region;
> > + Error *local_err = NULL;
> > +
> > + memset(&msg, 0, sizeof(MPQemuMsg));
> > +
> > + msg.cmd = SYNC_SYSMEM;
> > + msg.num_fds = sync->n_mr_sections;
> > + msg.size = sizeof(SyncSysmemMsg);
> > + if (msg.num_fds > REMOTE_MAX_FDS) {
> > + error_report("Number of fds is more than %d", REMOTE_MAX_FDS);
> > + return;
> > + }
> > +
> > + for (region = 0; region < sync->n_mr_sections; region++) {
> > + section = &sync->mr_sections[region];
> > + msg.data.sync_sysmem.gpas[region] =
> > + section->offset_within_address_space;
> > + msg.data.sync_sysmem.sizes[region] =
> int128_get64(section->size);
> > + host_addr = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> > + section->offset_within_region;
> > + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);
> > + msg.data.sync_sysmem.offsets[region] = offset;
> > + }
> > + mpqemu_msg_send(&msg, sync->ioc, &local_err);
> > + if (local_err) {
> > + error_report("Error in sending command %d", msg.cmd);
> > + }
> > +}
> >
> > That whole complex code above duplicates much of the logic in vhost.c.
> Can we try to factorize it instead?
>
> Hi Marc-Andre,
>
> Thank you for sharing your feedback!
>
> Would it be alright if we addressed this item alone in a separate patch in
> the future? Since
> this refactoring affects vhost code, we’re wondering it would be better to
> address it in a
> future patch to help with any regression analysis in the future.
>
That's fine with me, but please leave a TODO note in the code then.
thanks
> Thank you!
> —
> Jag
>
> >
> > +
> > +void deconfigure_memory_sync(RemoteMemSync *sync)
> > +{
> > + memory_listener_unregister(&sync->listener);
> > +
> > + proxy_ml_begin(&sync->listener);
> > +}
> > +
> > +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc)
> > +{
> > + sync->n_mr_sections = 0;
> > + sync->mr_sections = NULL;
> > +
> > + sync->ioc = ioc;
> > +
> > + sync->listener.begin = proxy_ml_begin;
> > + sync->listener.commit = proxy_ml_commit;
> > + sync->listener.region_add = proxy_ml_region_addnop;
> > + sync->listener.region_nop = proxy_ml_region_addnop;
> > + sync->listener.priority = 10;
> > +
> > + memory_listener_register(&sync->listener, &address_space_memory);
> > +}
> > diff --git a/hw/remote/message.c b/hw/remote/message.c
> > index 0f3e38a..454fd2d 100644
> > --- a/hw/remote/message.c
> > +++ b/hw/remote/message.c
> > @@ -17,6 +17,7 @@
> > #include "sysemu/runstate.h"
> > #include "hw/pci/pci.h"
> > #include "exec/memattrs.h"
> > +#include "hw/remote/memory.h"
> >
> > static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
> > MPQemuMsg *msg);
> > @@ -64,6 +65,10 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void
> *data)
> > case BAR_READ:
> > process_bar_read(com->ioc, &msg, &local_err);
> > break;
> > + case SYNC_SYSMEM:
> > + remote_sysmem_reconfig(&msg, &local_err);
> > + break;
> > +
> > default:
> > error_setg(&local_err,
> > "Unknown command (%d) received for device %s
> (pid=%d)",
> > diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
> > index 039347d..0f2d1aa 100644
> > --- a/hw/remote/proxy.c
> > +++ b/hw/remote/proxy.c
> > @@ -18,6 +18,8 @@
> > #include "migration/blocker.h"
> > #include "hw/remote/mpqemu-link.h"
> > #include "qemu/error-report.h"
> > +#include "hw/remote/memory-sync.h"
> > +#include "qom/object.h"
> >
> > static void proxy_set_socket(PCIProxyDev *pdev, int fd, Error **errp)
> > {
> > @@ -58,6 +60,8 @@ static void pci_proxy_dev_realize(PCIDevice *device,
> Error **errp)
> >
> > qemu_mutex_init(&dev->io_mutex);
> > qio_channel_set_blocking(dev->ioc, true, NULL);
> > +
> > + configure_memory_sync(&dev->sync, dev->ioc);
> > }
> >
> > static void pci_proxy_dev_exit(PCIDevice *pdev)
> > @@ -69,6 +73,8 @@ static void pci_proxy_dev_exit(PCIDevice *pdev)
> > migrate_del_blocker(dev->migration_blocker);
> >
> > error_free(dev->migration_blocker);
> > +
> > + deconfigure_memory_sync(&dev->sync);
> > }
> >
> > static int config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t
> *val,
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index ebd1d1d..5d78b78 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -3150,6 +3150,8 @@ F: include/hw/remote/memory.h
> > F: hw/remote/memory.c
> > F: hw/remote/proxy.c
> > F: include/hw/remote/proxy.h
> > +F: hw/remote/memory-sync.c
> > +F: include/hw/remote/memory-sync.h
> >
> > Build and test automation
> > -------------------------
> > diff --git a/hw/remote/meson.build b/hw/remote/meson.build
> > index 569cd20..7d434a5 100644
> > --- a/hw/remote/meson.build
> > +++ b/hw/remote/meson.build
> > @@ -7,5 +7,6 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true:
> files('remote-obj.c'))
> > remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
> >
> > specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory.c'))
> > +specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true:
> files('memory-sync.c'))
> >
> > softmmu_ss.add_all(when: 'CONFIG_MULTIPROCESS', if_true: remote_ss)
> > --
> > 1.8.3.1
> >
> >
> >
> > --
> > Marc-André Lureau
>
>
--
Marc-André Lureau
> On Dec 9, 2020, at 4:28 PM, Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
>
>
>
> On Wed, Dec 9, 2020 at 8:20 PM Jag Raman <jag.raman@oracle.com> wrote:
>
>
> > On Dec 8, 2020, at 8:57 AM, Marc-André Lureau <marcandre.lureau@gmail.com> wrote:
> >
> > Hi
> >
> > On Wed, Dec 2, 2020 at 12:23 AM Jagannathan Raman <jag.raman@oracle.com> wrote:
> > Add memory-listener object which is used to keep the view of the RAM
> > in sync between QEMU and remote process.
> > A MemoryListener is registered for system-memory AddressSpace. The
> > listener sends SYNC_SYSMEM message to the remote process when memory
> > listener commits the changes to memory, the remote process receives
> > the message and processes it in the handler for SYNC_SYSMEM message.
> >
> > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> > Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
> > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> > Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
> > ---
> > include/hw/remote/memory-sync.h | 27 ++++++
> > include/hw/remote/proxy.h | 2 +
> > hw/remote/memory-sync.c | 210 ++++++++++++++++++++++++++++++++++++++++
> > hw/remote/message.c | 5 +
> > hw/remote/proxy.c | 6 ++
> > MAINTAINERS | 2 +
> > hw/remote/meson.build | 1 +
> > 7 files changed, 253 insertions(+)
> > create mode 100644 include/hw/remote/memory-sync.h
> > create mode 100644 hw/remote/memory-sync.c
> >
> > diff --git a/include/hw/remote/memory-sync.h b/include/hw/remote/memory-sync.h
> > new file mode 100644
> > index 0000000..785f76a
> > --- /dev/null
> > +++ b/include/hw/remote/memory-sync.h
> > @@ -0,0 +1,27 @@
> > +/*
> > + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#ifndef MEMORY_SYNC_H
> > +#define MEMORY_SYNC_H
> > +
> > +#include "exec/memory.h"
> > +#include "io/channel.h"
> > +
> > +typedef struct RemoteMemSync {
> > + MemoryListener listener;
> > +
> > + int n_mr_sections;
> > + MemoryRegionSection *mr_sections;
> > +
> > + QIOChannel *ioc;
> > +} RemoteMemSync;
> > +
> > +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc);
> > +void deconfigure_memory_sync(RemoteMemSync *sync);
> >
> > RemoteMemSync vs MemorySync, and function with _memory_sync suffixes...
> > Naming things is hard, but trying to be consistent generally helps.
> >
> > My understanding is that this is a proxy-dev helper to handle memory listening and sending SYNC_SYSMEM.
> >
> > I would thus suggest naming it ProxyMemoryListener. It could eventually be folded in proxy.c
> >
> > Please try to be consistent with header naming, structure naming, type, functions and enum prefixes etc.
> >
> > proxy_memory_listener isn't that long imho.
> >
> > +
> > +#endif
> > diff --git a/include/hw/remote/proxy.h b/include/hw/remote/proxy.h
> > index e29c61b..a687b7d 100644
> > --- a/include/hw/remote/proxy.h
> > +++ b/include/hw/remote/proxy.h
> > @@ -11,6 +11,7 @@
> >
> > #include "hw/pci/pci.h"
> > #include "io/channel.h"
> > +#include "hw/remote/memory-sync.h"
> >
> > #define TYPE_PCI_PROXY_DEV "x-pci-proxy-dev"
> >
> > @@ -40,6 +41,7 @@ struct PCIProxyDev {
> > QemuMutex io_mutex;
> > QIOChannel *ioc;
> > Error *migration_blocker;
> > + RemoteMemSync sync;
> > ProxyMemoryRegion region[PCI_NUM_REGIONS];
> > };
> >
> > diff --git a/hw/remote/memory-sync.c b/hw/remote/memory-sync.c
> > new file mode 100644
> > index 0000000..2365e69
> > --- /dev/null
> > +++ b/hw/remote/memory-sync.c
> > @@ -0,0 +1,210 @@
> > +/*
> > + * Copyright © 2018, 2020 Oracle and/or its affiliates.
> > + *
> > + * This work is licensed under the terms of the GNU GPL, version 2 or later.
> > + * See the COPYING file in the top-level directory.
> > + *
> > + */
> > +
> > +#include "qemu/osdep.h"
> > +#include "qemu-common.h"
> > +
> > +#include "qemu/compiler.h"
> > +#include "qemu/int128.h"
> > +#include "qemu/range.h"
> > +#include "exec/memory.h"
> > +#include "exec/cpu-common.h"
> > +#include "cpu.h"
> > +#include "exec/ram_addr.h"
> > +#include "exec/address-spaces.h"
> > +#include "hw/remote/mpqemu-link.h"
> > +#include "hw/remote/memory-sync.h"
> > +
> > +static void proxy_ml_begin(MemoryListener *listener)
> >
> > I suggest to rename begin -> reset
> >
> > +{
> > + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> > + int mrs;
> > +
> > + for (mrs = 0; mrs < sync->n_mr_sections; mrs++) {
> > + memory_region_unref(sync->mr_sections[mrs].mr);
> > + }
> > +
> > + g_free(sync->mr_sections);
> > + sync->mr_sections = NULL;
> > + sync->n_mr_sections = 0;
> > +}
> > +
> > +static int get_fd_from_hostaddr(uint64_t host, ram_addr_t *offset)
> >
> > This function is very similar to vhost_user_get_mr_data(). That suggests we could factor the code.
> >
> > Perhaps a new memory_region_from_host_full(), or extend memory_region_from_host() with an extra optional "int *fd" argument.
> >
> > +{
> > + MemoryRegion *mr;
> > + ram_addr_t off;
> > +
> > + /**
> > + * Assumes that the host address is a valid address as it's
> > + * coming from the MemoryListener system. In the case host
> > + * address is not valid, the following call would return
> > + * the default subregion of "system_memory" region, and
> > + * not NULL. So it's not possible to check for NULL here.
> > + */
> > + mr = memory_region_from_host((void *)(uintptr_t)host, &off);
> > +
> > + if (offset) {
> > + *offset = off;
> > + }
> > +
> > + return memory_region_get_fd(mr);
> > +}
> > +
> > +static bool proxy_mrs_can_merge(uint64_t host, uint64_t prev_host, size_t size)
> > +{
> >
> > This seems similar to vhost_user_can_merge().
> >
> > + bool merge;
> > + int fd1, fd2;
> > +
> > + fd1 = get_fd_from_hostaddr(host, NULL);
> > +
> > + fd2 = get_fd_from_hostaddr(prev_host, NULL);
> > +
> > + merge = (fd1 == fd2);
> >
> > This could be written in a simpler manner, ex:
> >
> > if (get_fd_from_hostaddr(host, NULL) != get_fd_from_hostaddr(prev_host, NULL))
> > return false
> >
> > +
> > + merge &= ((prev_host + size) == host);
> >
> > That check could be done early on before doing the more expensive memory_region_from_host() calls
> >
> > +
> > + return merge;
> > +}
> > +
> > +static bool try_merge(RemoteMemSync *sync, MemoryRegionSection *section)
> > +{
> > + uint64_t mrs_size, mrs_gpa, mrs_page;
> > + MemoryRegionSection *prev_sec;
> > + bool merged = false;
> > + uintptr_t mrs_host;
> > + RAMBlock *mrs_rb;
> > +
> > + if (!sync->n_mr_sections) {
> > + return false;
> > + }
> > +
> > + mrs_rb = section->mr->ram_block;
> > + mrs_page = (uint64_t)qemu_ram_pagesize(mrs_rb);
> > + mrs_size = int128_get64(section->size);
> > + mrs_gpa = section->offset_within_address_space;
> > + mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> > + section->offset_within_region;
> > +
> > + if (get_fd_from_hostaddr(mrs_host, NULL) < 0) {
> > + return true;
> > + }
> > +
> > + mrs_host = mrs_host & ~(mrs_page - 1);
> > + mrs_gpa = mrs_gpa & ~(mrs_page - 1);
> > + mrs_size = ROUND_UP(mrs_size, mrs_page);
> > +
> > + prev_sec = sync->mr_sections + (sync->n_mr_sections - 1);
> > + uint64_t prev_gpa_start = prev_sec->offset_within_address_space;
> > + uint64_t prev_size = int128_get64(prev_sec->size);
> > + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size);
> > + uint64_t prev_host_start =
> > + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) +
> > + prev_sec->offset_within_region;
> > + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size);
> > +
> > + if (mrs_gpa <= (prev_gpa_end + 1)) {
> > + g_assert(mrs_gpa > prev_gpa_start);
> > +
> > + if ((section->mr == prev_sec->mr) &&
> > + proxy_mrs_can_merge(mrs_host, prev_host_start,
> > + (mrs_gpa - prev_gpa_start))) {
> > + uint64_t max_end = MAX(prev_host_end, mrs_host + mrs_size);
> > + merged = true;
> > + prev_sec->offset_within_address_space =
> > + MIN(prev_gpa_start, mrs_gpa);
> > + prev_sec->offset_within_region =
> > + MIN(prev_host_start, mrs_host) -
> > + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr);
> > + prev_sec->size = int128_make64(max_end - MIN(prev_host_start,
> > + mrs_host));
> > + }
> > + }
> > +
> > + return merged;
> > +}
> > +
> > +static void proxy_ml_region_addnop(MemoryListener *listener,
> > + MemoryRegionSection *section)
> > +{
> > + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> > +
> > + if (!(memory_region_is_ram(section->mr) &&
> > + !memory_region_is_rom(section->mr))) {
> > + return;
> >
> > A bit clearer in vhost.c:
> > if (memory_region_is_ram(mr) && !memory_region_is_rom(mr)) {
> >
> > + }
> > +
> > + if (try_merge(sync, section)) {
> > + return;
> > + }
> > +
> > + ++sync->n_mr_sections;
> > + sync->mr_sections = g_renew(MemoryRegionSection, sync->mr_sections,
> > + sync->n_mr_sections);
> > + sync->mr_sections[sync->n_mr_sections - 1] = *section;
> > + sync->mr_sections[sync->n_mr_sections - 1].fv = NULL;
> > + memory_region_ref(section->mr);
> > +}
> > +
> > +static void proxy_ml_commit(MemoryListener *listener)
> > +{
> > + RemoteMemSync *sync = container_of(listener, RemoteMemSync, listener);
> > + MPQemuMsg msg;
> > + MemoryRegionSection *section;
> > + ram_addr_t offset;
> > + uintptr_t host_addr;
> > + int region;
> > + Error *local_err = NULL;
> > +
> > + memset(&msg, 0, sizeof(MPQemuMsg));
> > +
> > + msg.cmd = SYNC_SYSMEM;
> > + msg.num_fds = sync->n_mr_sections;
> > + msg.size = sizeof(SyncSysmemMsg);
> > + if (msg.num_fds > REMOTE_MAX_FDS) {
> > + error_report("Number of fds is more than %d", REMOTE_MAX_FDS);
> > + return;
> > + }
> > +
> > + for (region = 0; region < sync->n_mr_sections; region++) {
> > + section = &sync->mr_sections[region];
> > + msg.data.sync_sysmem.gpas[region] =
> > + section->offset_within_address_space;
> > + msg.data.sync_sysmem.sizes[region] = int128_get64(section->size);
> > + host_addr = (uintptr_t)memory_region_get_ram_ptr(section->mr) +
> > + section->offset_within_region;
> > + msg.fds[region] = get_fd_from_hostaddr(host_addr, &offset);
> > + msg.data.sync_sysmem.offsets[region] = offset;
> > + }
> > + mpqemu_msg_send(&msg, sync->ioc, &local_err);
> > + if (local_err) {
> > + error_report("Error in sending command %d", msg.cmd);
> > + }
> > +}
> >
> > That whole complex code above duplicates much of the logic in vhost.c. Can we try to factorize it instead?
>
> Hi Marc-Andre,
>
> Thank you for sharing your feedback!
>
> Would it be alright if we addressed this item alone in a separate patch in the future? Since
> this refactoring affects vhost code, we’re wondering it would be better to address it in a
> future patch to help with any regression analysis in the future.
>
> That's fine with me, but please leave a TODO note in the code then.
>
> thanks
Thank you very much for confirming!
—
Jag
>
>
> Thank you!
> —
> Jag
>
> >
> > +
> > +void deconfigure_memory_sync(RemoteMemSync *sync)
> > +{
> > + memory_listener_unregister(&sync->listener);
> > +
> > + proxy_ml_begin(&sync->listener);
> > +}
> > +
> > +void configure_memory_sync(RemoteMemSync *sync, QIOChannel *ioc)
> > +{
> > + sync->n_mr_sections = 0;
> > + sync->mr_sections = NULL;
> > +
> > + sync->ioc = ioc;
> > +
> > + sync->listener.begin = proxy_ml_begin;
> > + sync->listener.commit = proxy_ml_commit;
> > + sync->listener.region_add = proxy_ml_region_addnop;
> > + sync->listener.region_nop = proxy_ml_region_addnop;
> > + sync->listener.priority = 10;
> > +
> > + memory_listener_register(&sync->listener, &address_space_memory);
> > +}
> > diff --git a/hw/remote/message.c b/hw/remote/message.c
> > index 0f3e38a..454fd2d 100644
> > --- a/hw/remote/message.c
> > +++ b/hw/remote/message.c
> > @@ -17,6 +17,7 @@
> > #include "sysemu/runstate.h"
> > #include "hw/pci/pci.h"
> > #include "exec/memattrs.h"
> > +#include "hw/remote/memory.h"
> >
> > static void process_config_write(QIOChannel *ioc, PCIDevice *dev,
> > MPQemuMsg *msg);
> > @@ -64,6 +65,10 @@ void coroutine_fn mpqemu_remote_msg_loop_co(void *data)
> > case BAR_READ:
> > process_bar_read(com->ioc, &msg, &local_err);
> > break;
> > + case SYNC_SYSMEM:
> > + remote_sysmem_reconfig(&msg, &local_err);
> > + break;
> > +
> > default:
> > error_setg(&local_err,
> > "Unknown command (%d) received for device %s (pid=%d)",
> > diff --git a/hw/remote/proxy.c b/hw/remote/proxy.c
> > index 039347d..0f2d1aa 100644
> > --- a/hw/remote/proxy.c
> > +++ b/hw/remote/proxy.c
> > @@ -18,6 +18,8 @@
> > #include "migration/blocker.h"
> > #include "hw/remote/mpqemu-link.h"
> > #include "qemu/error-report.h"
> > +#include "hw/remote/memory-sync.h"
> > +#include "qom/object.h"
> >
> > static void proxy_set_socket(PCIProxyDev *pdev, int fd, Error **errp)
> > {
> > @@ -58,6 +60,8 @@ static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
> >
> > qemu_mutex_init(&dev->io_mutex);
> > qio_channel_set_blocking(dev->ioc, true, NULL);
> > +
> > + configure_memory_sync(&dev->sync, dev->ioc);
> > }
> >
> > static void pci_proxy_dev_exit(PCIDevice *pdev)
> > @@ -69,6 +73,8 @@ static void pci_proxy_dev_exit(PCIDevice *pdev)
> > migrate_del_blocker(dev->migration_blocker);
> >
> > error_free(dev->migration_blocker);
> > +
> > + deconfigure_memory_sync(&dev->sync);
> > }
> >
> > static int config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t *val,
> > diff --git a/MAINTAINERS b/MAINTAINERS
> > index ebd1d1d..5d78b78 100644
> > --- a/MAINTAINERS
> > +++ b/MAINTAINERS
> > @@ -3150,6 +3150,8 @@ F: include/hw/remote/memory.h
> > F: hw/remote/memory.c
> > F: hw/remote/proxy.c
> > F: include/hw/remote/proxy.h
> > +F: hw/remote/memory-sync.c
> > +F: include/hw/remote/memory-sync.h
> >
> > Build and test automation
> > -------------------------
> > diff --git a/hw/remote/meson.build b/hw/remote/meson.build
> > index 569cd20..7d434a5 100644
> > --- a/hw/remote/meson.build
> > +++ b/hw/remote/meson.build
> > @@ -7,5 +7,6 @@ remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('remote-obj.c'))
> > remote_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('proxy.c'))
> >
> > specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory.c'))
> > +specific_ss.add(when: 'CONFIG_MULTIPROCESS', if_true: files('memory-sync.c'))
> >
> > softmmu_ss.add_all(when: 'CONFIG_MULTIPROCESS', if_true: remote_ss)
> > --
> > 1.8.3.1
> >
> >
> >
> > --
> > Marc-André Lureau
>
>
>
> --
> Marc-André Lureau
© 2016 - 2025 Red Hat, Inc.