On 3/7/23 13:54, Joao Martins wrote:
> According to the device DMA logging uAPI, IOVA ranges to be logged by
> the device must be provided all at once upon DMA logging start.
>
> As preparation for the following patches which will add device dirty
> page tracking, keep a record of all DMA mapped IOVA ranges so later they
> can be used for DMA logging start.
>
> Signed-off-by: Avihai Horon <avihaih@nvidia.com>
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
> hw/vfio/common.c | 85 ++++++++++++++++++++++++++++++++++++++++++++
> hw/vfio/trace-events | 1 +
> 2 files changed, 86 insertions(+)
>
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 63831eab78a1..811502dbc97c 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -1325,11 +1325,96 @@ static int vfio_set_dirty_page_tracking(VFIOContainer *container, bool start)
> return ret;
> }
>
> +typedef struct VFIODirtyRanges {
> + hwaddr min32;
> + hwaddr max32;
> + hwaddr min64;
> + hwaddr max64;
> +} VFIODirtyRanges;
> +
> +typedef struct VFIODirtyRangesListener {
> + VFIOContainer *container;
> + VFIODirtyRanges ranges;
I would have introduced a pointer instead, to avoid the memcpy.
Anyhow, this is minor.
Reviewed-by: Cédric Le Goater <clg@redhat.com>
Thanks,
C.
> + MemoryListener listener;
> +} VFIODirtyRangesListener;
> +
> +static void vfio_dirty_tracking_update(MemoryListener *listener,
> + MemoryRegionSection *section)
> +{
> + VFIODirtyRangesListener *dirty = container_of(listener,
> + VFIODirtyRangesListener,
> + listener);
> + VFIODirtyRanges *range = &dirty->ranges;
> + hwaddr iova, end, *min, *max;
> +
> + if (!vfio_listener_valid_section(section, "tracking_update") ||
> + !vfio_get_section_iova_range(dirty->container, section,
> + &iova, &end, NULL)) {
> + return;
> + }
> +
> + /*
> + * The address space passed to the dirty tracker is reduced to two ranges:
> + * one for 32-bit DMA ranges, and another one for 64-bit DMA ranges.
> + * The underlying reports of dirty will query a sub-interval of each of
> + * these ranges.
> + *
> + * The purpose of the dual range handling is to handle known cases of big
> + * holes in the address space, like the x86 AMD 1T hole. The alternative
> + * would be an IOVATree but that has a much bigger runtime overhead and
> + * unnecessary complexity.
> + */
> + min = (end <= UINT32_MAX) ? &range->min32 : &range->min64;
> + max = (end <= UINT32_MAX) ? &range->max32 : &range->max64;
> +
> + if (*min > iova) {
> + *min = iova;
> + }
> + if (*max < end) {
> + *max = end;
> + }
> +
> + trace_vfio_device_dirty_tracking_update(iova, end, *min, *max);
> + return;
> +}
> +
> +static const MemoryListener vfio_dirty_tracking_listener = {
> + .name = "vfio-tracking",
> + .region_add = vfio_dirty_tracking_update,
> +};
> +
> +static void vfio_dirty_tracking_init(VFIOContainer *container,
> + VFIODirtyRanges *ranges)
> +{
> + VFIODirtyRangesListener dirty;
> +
> + memset(&dirty, 0, sizeof(dirty));
> + dirty.ranges.min32 = UINT32_MAX;
> + dirty.ranges.min64 = UINT64_MAX;
> + dirty.listener = vfio_dirty_tracking_listener;
> + dirty.container = container;
> +
> + memory_listener_register(&dirty.listener,
> + container->space->as);
> +
> + *ranges = dirty.ranges;
> +
> + /*
> + * The memory listener is synchronous, and used to calculate the range
> + * to dirty tracking. Unregister it after we are done as we are not
> + * interested in any follow-up updates.
> + */
> + memory_listener_unregister(&dirty.listener);
> +}
> +
> static void vfio_listener_log_global_start(MemoryListener *listener)
> {
> VFIOContainer *container = container_of(listener, VFIOContainer, listener);
> + VFIODirtyRanges ranges;
> int ret;
>
> + vfio_dirty_tracking_init(container, &ranges);
> +
> ret = vfio_set_dirty_page_tracking(container, true);
> if (ret) {
> vfio_set_migration_error(ret);
> diff --git a/hw/vfio/trace-events b/hw/vfio/trace-events
> index 7173e6a5c721..dd9fd7b9bddb 100644
> --- a/hw/vfio/trace-events
> +++ b/hw/vfio/trace-events
> @@ -103,6 +103,7 @@ vfio_listener_region_add_ram(uint64_t iova_start, uint64_t iova_end, void *vaddr
> vfio_known_safe_misalignment(const char *name, uint64_t iova, uint64_t offset_within_region, uintptr_t page_size) "Region \"%s\" iova=0x%"PRIx64" offset_within_region=0x%"PRIx64" qemu_real_host_page_size=0x%"PRIxPTR
> vfio_listener_region_add_no_dma_map(const char *name, uint64_t iova, uint64_t size, uint64_t page_size) "Region \"%s\" 0x%"PRIx64" size=0x%"PRIx64" is not aligned to 0x%"PRIx64" and cannot be mapped for DMA"
> vfio_listener_region_del(uint64_t start, uint64_t end) "region_del 0x%"PRIx64" - 0x%"PRIx64
> +vfio_device_dirty_tracking_update(uint64_t start, uint64_t end, uint64_t min, uint64_t max) "section 0x%"PRIx64" - 0x%"PRIx64" -> update [0x%"PRIx64" - 0x%"PRIx64"]"
> vfio_disconnect_container(int fd) "close container->fd=%d"
> vfio_put_group(int fd) "close group->fd=%d"
> vfio_get_device(const char * name, unsigned int flags, unsigned int num_regions, unsigned int num_irqs) "Device %s flags: %u, regions: %u, irqs: %u"