Some containers can directly implement unmapping all regions;
add a new flag to support this.
Originally-by: John Johnson <john.g.johnson@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: John Levon <john.levon@nutanix.com>
---
hw/vfio/common.c | 24 +++++++----------
hw/vfio/container-base.c | 4 +--
hw/vfio/container.c | 38 +++++++++++++++++++++++++--
hw/vfio/iommufd.c | 19 +++++++++++++-
include/hw/vfio/vfio-common.h | 1 +
include/hw/vfio/vfio-container-base.h | 4 +--
6 files changed, 68 insertions(+), 22 deletions(-)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 6f106167fd..b49aafc40c 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -324,7 +324,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
}
} else {
ret = vfio_container_dma_unmap(bcontainer, iova,
- iotlb->addr_mask + 1, iotlb);
+ iotlb->addr_mask + 1, iotlb, 0);
if (ret) {
error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx") = %d (%s)",
@@ -348,7 +348,7 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
int ret;
/* Unmap with a single call. */
- ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL);
+ ret = vfio_container_dma_unmap(bcontainer, iova, size, NULL, 0);
if (ret) {
error_report("%s: vfio_container_dma_unmap() failed: %s", __func__,
strerror(-ret));
@@ -806,21 +806,15 @@ static void vfio_listener_region_del(MemoryListener *listener,
}
if (try_unmap) {
+ int flags = 0;
+
if (int128_eq(llsize, int128_2_64())) {
- /* The unmap ioctl doesn't accept a full 64-bit span. */
- llsize = int128_rshift(llsize, 1);
- ret = vfio_container_dma_unmap(bcontainer, iova,
- int128_get64(llsize), NULL);
- if (ret) {
- error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%s)",
- bcontainer, iova, int128_get64(llsize), ret,
- strerror(-ret));
- }
- iova += int128_get64(llsize);
+ flags = VFIO_DMA_UNMAP_FLAG_ALL;
}
- ret = vfio_container_dma_unmap(bcontainer, iova,
- int128_get64(llsize), NULL);
+
+ ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize),
+ NULL, flags);
+
if (ret) {
error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx") = %d (%s)",
diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
index 5e0c9700d9..db27e9c31d 100644
--- a/hw/vfio/container-base.c
+++ b/hw/vfio/container-base.c
@@ -27,12 +27,12 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer,
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb)
+ IOMMUTLBEntry *iotlb, int flags)
{
VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
g_assert(vioc->dma_unmap);
- return vioc->dma_unmap(bcontainer, iova, size, iotlb);
+ return vioc->dma_unmap(bcontainer, iova, size, iotlb, flags);
}
bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 0db0055f39..82987063e5 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -117,7 +117,7 @@ unmap_exit:
*/
static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb)
+ IOMMUTLBEntry *iotlb, int flags)
{
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
bcontainer);
@@ -140,6 +140,34 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
need_dirty_sync = true;
}
+ /* use unmap all if supported */
+ if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
+ unmap.iova = 0;
+ unmap.size = 0;
+ if (container->unmap_all_supported) {
+ ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+ } else {
+ /* unmap in halves */
+ Int128 llsize = int128_rshift(int128_2_64(), 1);
+
+ unmap.size = int128_get64(llsize);
+
+ ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+
+ if (ret == 0) {
+ unmap.iova += int128_get64(llsize);
+
+ ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
+ }
+ }
+
+ if (ret != 0) {
+ return -errno;
+ }
+
+ goto out;
+ }
+
while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
/*
* The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
@@ -162,6 +190,7 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
return -errno;
}
+out:
if (need_dirty_sync) {
ret = vfio_get_dirty_bitmap(bcontainer, iova, size,
iotlb->translated_addr, &local_err);
@@ -199,7 +228,7 @@ static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
*/
if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
(errno == EBUSY &&
- vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 &&
+ vfio_legacy_dma_unmap(bcontainer, iova, size, NULL, 0) == 0 &&
ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
return 0;
}
@@ -533,6 +562,11 @@ static bool vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp)
vfio_get_info_iova_range(info, bcontainer);
vfio_get_iommu_info_migration(container, info);
+
+ ret = ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL);
+
+ container->unmap_all_supported = (ret != 0);
+
return true;
}
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 583b063707..e295f251c0 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -41,11 +41,28 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb)
+ IOMMUTLBEntry *iotlb, int flags)
{
const VFIOIOMMUFDContainer *container =
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
+ /* unmap in halves */
+ if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
+ Int128 llsize = int128_rshift(int128_2_64(), 1);
+ int ret;
+
+ ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
+ iova, int128_get64(llsize));
+ iova += int128_get64(llsize);
+
+ if (ret == 0) {
+ ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
+ iova, int128_get64(llsize));
+ }
+
+ return ret;
+ }
+
/* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
return iommufd_backend_unmap_dma(container->be,
container->ioas_id, iova, size);
diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index ac35136a11..f4f08eb8a6 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -84,6 +84,7 @@ typedef struct VFIOContainer {
VFIOContainerBase bcontainer;
int fd; /* /dev/vfio/vfio, empowered by the attached groups */
unsigned iommu_type;
+ bool unmap_all_supported;
QLIST_HEAD(, VFIOGroup) group_list;
} VFIOContainer;
diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
index 0a863df0dc..24e48e3a07 100644
--- a/include/hw/vfio/vfio-container-base.h
+++ b/include/hw/vfio/vfio-container-base.h
@@ -76,7 +76,7 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer,
void *vaddr, bool readonly, MemoryRegion *mrp);
int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb);
+ IOMMUTLBEntry *iotlb, int flags);
bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
MemoryRegionSection *section,
Error **errp);
@@ -118,7 +118,7 @@ struct VFIOIOMMUClass {
void *vaddr, bool readonly, MemoryRegion *mrp);
int (*dma_unmap)(const VFIOContainerBase *bcontainer,
hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb);
+ IOMMUTLBEntry *iotlb, int flags);
bool (*attach_device)(const char *name, VFIODevice *vbasedev,
AddressSpace *as, Error **errp);
void (*detach_device)(VFIODevice *vbasedev);
--
2.34.1
On 2/19/25 15:48, John Levon wrote:
> Some containers can directly implement unmapping all regions;
> add a new flag to support this.
>
> Originally-by: John Johnson <john.g.johnson@oracle.com>
> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
> hw/vfio/common.c | 24 +++++++----------
> hw/vfio/container-base.c | 4 +--
> hw/vfio/container.c | 38 +++++++++++++++++++++++++--
> hw/vfio/iommufd.c | 19 +++++++++++++-
> include/hw/vfio/vfio-common.h | 1 +
> include/hw/vfio/vfio-container-base.h | 4 +--
> 6 files changed, 68 insertions(+), 22 deletions(-)
This is difficult to understand. There are no functional changes right ?
I think it should be broken down further to clarify the changes.
Thanks,
C.
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 6f106167fd..b49aafc40c 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -324,7 +324,7 @@ static void vfio_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb)
> }
> } else {
> ret = vfio_container_dma_unmap(bcontainer, iova,
> - iotlb->addr_mask + 1, iotlb);
> + iotlb->addr_mask + 1, iotlb, 0);
> if (ret) {
> error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> "0x%"HWADDR_PRIx") = %d (%s)",
> @@ -348,7 +348,7 @@ static void vfio_ram_discard_notify_discard(RamDiscardListener *rdl,
> int ret;
>
> /* Unmap with a single call. */
> - ret = vfio_container_dma_unmap(bcontainer, iova, size , NULL);
> + ret = vfio_container_dma_unmap(bcontainer, iova, size, NULL, 0);
> if (ret) {
> error_report("%s: vfio_container_dma_unmap() failed: %s", __func__,
> strerror(-ret));
> @@ -806,21 +806,15 @@ static void vfio_listener_region_del(MemoryListener *listener,
> }
>
> if (try_unmap) {
> + int flags = 0;
> +
> if (int128_eq(llsize, int128_2_64())) {
> - /* The unmap ioctl doesn't accept a full 64-bit span. */
> - llsize = int128_rshift(llsize, 1);
> - ret = vfio_container_dma_unmap(bcontainer, iova,
> - int128_get64(llsize), NULL);
> - if (ret) {
> - error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> - "0x%"HWADDR_PRIx") = %d (%s)",
> - bcontainer, iova, int128_get64(llsize), ret,
> - strerror(-ret));
> - }
> - iova += int128_get64(llsize);
> + flags = VFIO_DMA_UNMAP_FLAG_ALL;
> }
> - ret = vfio_container_dma_unmap(bcontainer, iova,
> - int128_get64(llsize), NULL);
> +
> + ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize),
> + NULL, flags);
> +
> if (ret) {
> error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> "0x%"HWADDR_PRIx") = %d (%s)",
> diff --git a/hw/vfio/container-base.c b/hw/vfio/container-base.c
> index 5e0c9700d9..db27e9c31d 100644
> --- a/hw/vfio/container-base.c
> +++ b/hw/vfio/container-base.c
> @@ -27,12 +27,12 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer,
>
> int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
> hwaddr iova, ram_addr_t size,
> - IOMMUTLBEntry *iotlb)
> + IOMMUTLBEntry *iotlb, int flags)
> {
> VFIOIOMMUClass *vioc = VFIO_IOMMU_GET_CLASS(bcontainer);
>
> g_assert(vioc->dma_unmap);
> - return vioc->dma_unmap(bcontainer, iova, size, iotlb);
> + return vioc->dma_unmap(bcontainer, iova, size, iotlb, flags);
> }
>
> bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
> index 0db0055f39..82987063e5 100644
> --- a/hw/vfio/container.c
> +++ b/hw/vfio/container.c
> @@ -117,7 +117,7 @@ unmap_exit:
> */
> static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
> hwaddr iova, ram_addr_t size,
> - IOMMUTLBEntry *iotlb)
> + IOMMUTLBEntry *iotlb, int flags)
> {
> const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
> bcontainer);
> @@ -140,6 +140,34 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
> need_dirty_sync = true;
> }
>
> + /* use unmap all if supported */
> + if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
> + unmap.iova = 0;
> + unmap.size = 0;
> + if (container->unmap_all_supported) {
> + ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
> + } else {
> + /* unmap in halves */
> + Int128 llsize = int128_rshift(int128_2_64(), 1);
> +
> + unmap.size = int128_get64(llsize);
> +
> + ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
> +
> + if (ret == 0) {
> + unmap.iova += int128_get64(llsize);
> +
> + ret = ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, unmap);
> + }
> + }
> +
> + if (ret != 0) {
> + return -errno;
> + }
> +
> + goto out;
> + }
> +
> while (ioctl(container->fd, VFIO_IOMMU_UNMAP_DMA, &unmap)) {
> /*
> * The type1 backend has an off-by-one bug in the kernel (71a7d3d78e3c
> @@ -162,6 +190,7 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
> return -errno;
> }
>
> +out:
> if (need_dirty_sync) {
> ret = vfio_get_dirty_bitmap(bcontainer, iova, size,
> iotlb->translated_addr, &local_err);
> @@ -199,7 +228,7 @@ static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
> */
> if (ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0 ||
> (errno == EBUSY &&
> - vfio_legacy_dma_unmap(bcontainer, iova, size, NULL) == 0 &&
> + vfio_legacy_dma_unmap(bcontainer, iova, size, NULL, 0) == 0 &&
> ioctl(container->fd, VFIO_IOMMU_MAP_DMA, &map) == 0)) {
> return 0;
> }
> @@ -533,6 +562,11 @@ static bool vfio_legacy_setup(VFIOContainerBase *bcontainer, Error **errp)
> vfio_get_info_iova_range(info, bcontainer);
>
> vfio_get_iommu_info_migration(container, info);
> +
> + ret = ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL);
> +
> + container->unmap_all_supported = (ret != 0);
> +
> return true;
> }
>
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 583b063707..e295f251c0 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -41,11 +41,28 @@ static int iommufd_cdev_map(const VFIOContainerBase *bcontainer, hwaddr iova,
>
> static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
> hwaddr iova, ram_addr_t size,
> - IOMMUTLBEntry *iotlb)
> + IOMMUTLBEntry *iotlb, int flags)
> {
> const VFIOIOMMUFDContainer *container =
> container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
>
> + /* unmap in halves */
> + if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
> + Int128 llsize = int128_rshift(int128_2_64(), 1);
> + int ret;
> +
> + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
> + iova, int128_get64(llsize));
> + iova += int128_get64(llsize);
> +
> + if (ret == 0) {
> + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
> + iova, int128_get64(llsize));
> + }
> +
> + return ret;
> + }
> +
> /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
> return iommufd_backend_unmap_dma(container->be,
> container->ioas_id, iova, size);
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index ac35136a11..f4f08eb8a6 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -84,6 +84,7 @@ typedef struct VFIOContainer {
> VFIOContainerBase bcontainer;
> int fd; /* /dev/vfio/vfio, empowered by the attached groups */
> unsigned iommu_type;
> + bool unmap_all_supported;
> QLIST_HEAD(, VFIOGroup) group_list;
> } VFIOContainer;
>
> diff --git a/include/hw/vfio/vfio-container-base.h b/include/hw/vfio/vfio-container-base.h
> index 0a863df0dc..24e48e3a07 100644
> --- a/include/hw/vfio/vfio-container-base.h
> +++ b/include/hw/vfio/vfio-container-base.h
> @@ -76,7 +76,7 @@ int vfio_container_dma_map(VFIOContainerBase *bcontainer,
> void *vaddr, bool readonly, MemoryRegion *mrp);
> int vfio_container_dma_unmap(VFIOContainerBase *bcontainer,
> hwaddr iova, ram_addr_t size,
> - IOMMUTLBEntry *iotlb);
> + IOMMUTLBEntry *iotlb, int flags);
> bool vfio_container_add_section_window(VFIOContainerBase *bcontainer,
> MemoryRegionSection *section,
> Error **errp);
> @@ -118,7 +118,7 @@ struct VFIOIOMMUClass {
> void *vaddr, bool readonly, MemoryRegion *mrp);
> int (*dma_unmap)(const VFIOContainerBase *bcontainer,
> hwaddr iova, ram_addr_t size,
> - IOMMUTLBEntry *iotlb);
> + IOMMUTLBEntry *iotlb, int flags);
> bool (*attach_device)(const char *name, VFIODevice *vbasedev,
> AddressSpace *as, Error **errp);
> void (*detach_device)(VFIODevice *vbasedev);
On Wed, Apr 02, 2025 at 06:49:50PM +0200, Cédric Le Goater wrote: > On 2/19/25 15:48, John Levon wrote: > > Some containers can directly implement unmapping all regions; > > add a new flag to support this. > > > > Originally-by: John Johnson <john.g.johnson@oracle.com> > > Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> > > Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> > > Signed-off-by: John Levon <john.levon@nutanix.com> > > --- > > hw/vfio/common.c | 24 +++++++---------- > > hw/vfio/container-base.c | 4 +-- > > hw/vfio/container.c | 38 +++++++++++++++++++++++++-- > > hw/vfio/iommufd.c | 19 +++++++++++++- > > include/hw/vfio/vfio-common.h | 1 + > > include/hw/vfio/vfio-container-base.h | 4 +-- > > 6 files changed, 68 insertions(+), 22 deletions(-) > > This is difficult to understand. There are no functional changes right ? + ret = ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL); This is new, we previously never even tried to use this. > I think it should be broken down further to clarify the changes. patch 1: add a flags param to the callbacks, always zero patch 2: pass through unmap all flag to callbacks patch 3: check for unmap_all extension and use it That sound better? regards john
On 4/3/25 11:45, John Levon wrote: > On Wed, Apr 02, 2025 at 06:49:50PM +0200, Cédric Le Goater wrote: > >> On 2/19/25 15:48, John Levon wrote: >>> Some containers can directly implement unmapping all regions; >>> add a new flag to support this. >>> >>> Originally-by: John Johnson <john.g.johnson@oracle.com> >>> Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com> >>> Signed-off-by: Jagannathan Raman <jag.raman@oracle.com> >>> Signed-off-by: John Levon <john.levon@nutanix.com> >>> --- >>> hw/vfio/common.c | 24 +++++++---------- >>> hw/vfio/container-base.c | 4 +-- >>> hw/vfio/container.c | 38 +++++++++++++++++++++++++-- >>> hw/vfio/iommufd.c | 19 +++++++++++++- >>> include/hw/vfio/vfio-common.h | 1 + >>> include/hw/vfio/vfio-container-base.h | 4 +-- >>> 6 files changed, 68 insertions(+), 22 deletions(-) >> >> This is difficult to understand. There are no functional changes right ? > > + ret = ioctl(container->fd, VFIO_CHECK_EXTENSION, VFIO_UNMAP_ALL); > > This is new, we previously never even tried to use this. > >> I think it should be broken down further to clarify the changes. > > patch 1: add a flags param to the callbacks, always zero > patch 2: pass through unmap all flag to callbacks > patch 3: check for unmap_all extension and use it > > That sound better? yes. Let's see at next respin. Thanks, C.
© 2016 - 2026 Red Hat, Inc.