[PATCH 07/14] vfio: specify VFIO_DMA_UNMAP_FLAG_ALL to callback

John Levon posted 14 patches 7 months, 1 week ago
There is a newer version of this series
[PATCH 07/14] vfio: specify VFIO_DMA_UNMAP_FLAG_ALL to callback
Posted by John Levon 7 months, 1 week ago
Use the new flags parameter to indicate when we want to unmap
everything; no functional change is intended.

Signed-off-by: John Levon <john.levon@nutanix.com>
---
 hw/vfio/container.c | 49 ++++++++++++++++++++++++++++++++++++---------
 hw/vfio/iommufd.c   | 19 +++++++++++++++++-
 hw/vfio/listener.c  | 19 ++++++------------
 3 files changed, 63 insertions(+), 24 deletions(-)

diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 625bbe82a7..37b1217fd8 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -119,12 +119,9 @@ unmap_exit:
     return ret;
 }
 
-/*
- * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
- */
-static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
-                                 hwaddr iova, ram_addr_t size,
-                                 IOMMUTLBEntry *iotlb, int flags)
+static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer,
+                                     hwaddr iova, ram_addr_t size,
+                                     IOMMUTLBEntry *iotlb)
 {
     const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
                                                   bcontainer);
@@ -138,10 +135,6 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
     int ret;
     Error *local_err = NULL;
 
-    if (flags != 0) {
-        return -ENOTSUP;
-    }
-
     if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) {
         if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
             bcontainer->dirty_pages_supported) {
@@ -185,6 +178,42 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
     return 0;
 }
 
+/*
+ * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
+ */
+static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
+                                 hwaddr iova, ram_addr_t size,
+                                 IOMMUTLBEntry *iotlb, int flags)
+{
+    int ret;
+
+    if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {
+        return -ENOTSUP;
+    }
+
+    if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
+        /* The unmap ioctl doesn't accept a full 64-bit span. */
+        Int128 llsize = int128_rshift(int128_2_64(), 1);
+
+        ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize),
+                                        iotlb);
+
+        if (ret == 0) {
+            ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize),
+                                            int128_get64(llsize), iotlb);
+        }
+
+    } else {
+        ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb);
+    }
+
+    if (ret != 0) {
+        return -errno;
+    }
+
+    return 0;
+}
+
 static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
                                ram_addr_t size, void *vaddr, bool readonly)
 {
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 07334e65b5..22e5b16967 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -51,10 +51,27 @@ static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
     const VFIOIOMMUFDContainer *container =
         container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
 
-    if (flags != 0) {
+    if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {
         return -ENOTSUP;
     }
 
+    /* unmap in halves */
+    if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
+        Int128 llsize = int128_rshift(int128_2_64(), 1);
+        int ret;
+
+        ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
+                                        0, int128_get64(llsize));
+
+        if (ret == 0) {
+            ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
+                                            int128_get64(llsize),
+                                            int128_get64(llsize));
+        }
+
+        return ret;
+    }
+
     /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
     return iommufd_backend_unmap_dma(container->be,
                                      container->ioas_id, iova, size);
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index c52d4a52ef..bcf2b98e79 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -661,21 +661,14 @@ static void vfio_listener_region_del(MemoryListener *listener,
     }
 
     if (try_unmap) {
+        int flags = 0;
+
         if (int128_eq(llsize, int128_2_64())) {
-            /* The unmap ioctl doesn't accept a full 64-bit span. */
-            llsize = int128_rshift(llsize, 1);
-            ret = vfio_container_dma_unmap(bcontainer, iova,
-                                           int128_get64(llsize), NULL, 0);
-            if (ret) {
-                error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
-                             "0x%"HWADDR_PRIx") = %d (%s)",
-                             bcontainer, iova, int128_get64(llsize), ret,
-                             strerror(-ret));
-            }
-            iova += int128_get64(llsize);
+            flags = VFIO_DMA_UNMAP_FLAG_ALL;
+            llsize = 0;
         }
-        ret = vfio_container_dma_unmap(bcontainer, iova,
-                                       int128_get64(llsize), NULL, 0);
+        ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize),
+                                       NULL, flags);
         if (ret) {
             error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
                          "0x%"HWADDR_PRIx") = %d (%s)",
-- 
2.34.1
Re: [PATCH 07/14] vfio: specify VFIO_DMA_UNMAP_FLAG_ALL to callback
Posted by Cédric Le Goater 6 months, 3 weeks ago
On 4/9/25 15:48, John Levon wrote:
> Use the new flags parameter to indicate when we want to unmap
> everything; no functional change is intended.

I find these changes confusing. Most likely there are not well presented
or I am missing something. Some more below.

> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
>   hw/vfio/container.c | 49 ++++++++++++++++++++++++++++++++++++---------
>   hw/vfio/iommufd.c   | 19 +++++++++++++++++-
>   hw/vfio/listener.c  | 19 ++++++------------
>   3 files changed, 63 insertions(+), 24 deletions(-)
> 
> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
> index 625bbe82a7..37b1217fd8 100644
> --- a/hw/vfio/container.c
> +++ b/hw/vfio/container.c
> @@ -119,12 +119,9 @@ unmap_exit:
>       return ret;
>   }
>   
> -/*
> - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
> - */
> -static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
> -                                 hwaddr iova, ram_addr_t size,
> -                                 IOMMUTLBEntry *iotlb, int flags)
> +static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer,
> +                                     hwaddr iova, ram_addr_t size,
> +                                     IOMMUTLBEntry *iotlb)
>   {
>       const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
>                                                     bcontainer);
> @@ -138,10 +135,6 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
>       int ret;
>       Error *local_err = NULL;
>   
> -    if (flags != 0) {
> -        return -ENOTSUP;
> -    }
> -
>       if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) {
>           if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
>               bcontainer->dirty_pages_supported) {
> @@ -185,6 +178,42 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
>       return 0;
>   }
>   
> +/*
> + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
> + */
> +static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
> +                                 hwaddr iova, ram_addr_t size,
> +                                 IOMMUTLBEntry *iotlb, int flags)
> +{
> +    int ret;
> +
> +    if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {

VFIO_DMA_UNMAP_FLAG_ALL is a kernel uapi flag. It should be used only with
the corresponding ioctl(VFIO_IOMMU_UNMAP_DMA) and not internally between
QEMU routines.

I think adding a 'bool unmap_all' paremeter to vfio_legacy_dma_unmap() would
make more sense.


> +        return -ENOTSUP;
> +    }
> +
> +    if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
> +        /* The unmap ioctl doesn't accept a full 64-bit span. */
> +        Int128 llsize = int128_rshift(int128_2_64(), 1);
> +
> +        ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize),
> +                                        iotlb);
> +
> +        if (ret == 0) {
> +            ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize),
> +                                            int128_get64(llsize), iotlb);
> +        }
> +
> +    } else {
> +        ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb);
> +    }
> +
> +    if (ret != 0) {
> +        return -errno;
> +    }
> +
> +    return 0;
> +}
> +
>   static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
>                                  ram_addr_t size, void *vaddr, bool readonly)
>   {
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 07334e65b5..22e5b16967 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -51,10 +51,27 @@ static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
>       const VFIOIOMMUFDContainer *container =
>           container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
>   
> -    if (flags != 0) {
> +    if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {
>           return -ENOTSUP;
>       }
>   
> +    /* unmap in halves */
> +    if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
> +        Int128 llsize = int128_rshift(int128_2_64(), 1);
> +        int ret;
> +
> +        ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
> +                                        0, int128_get64(llsize));
> +
> +        if (ret == 0) {
> +            ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
> +                                            int128_get64(llsize),
> +                                            int128_get64(llsize));
> +        }
> +
> +        return ret;
> +    }
> +
>       /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
>       return iommufd_backend_unmap_dma(container->be,
>                                        container->ioas_id, iova, size);
> diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
> index c52d4a52ef..bcf2b98e79 100644
> --- a/hw/vfio/listener.c
> +++ b/hw/vfio/listener.c
> @@ -661,21 +661,14 @@ static void vfio_listener_region_del(MemoryListener *listener,
>       }
>   
>       if (try_unmap) {
> +        int flags = 0;
> +
>           if (int128_eq(llsize, int128_2_64())) {
> -            /* The unmap ioctl doesn't accept a full 64-bit span. */
> -            llsize = int128_rshift(llsize, 1);
> -            ret = vfio_container_dma_unmap(bcontainer, iova,
> -                                           int128_get64(llsize), NULL, 0);
> -            if (ret) {
> -                error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> -                             "0x%"HWADDR_PRIx") = %d (%s)",
> -                             bcontainer, iova, int128_get64(llsize), ret,
> -                             strerror(-ret));
> -            }
> -            iova += int128_get64(llsize);
> +            flags = VFIO_DMA_UNMAP_FLAG_ALL;
> +            llsize = 0;

please change this initialization to :

             llsize = int128_zero();

>           }
> -        ret = vfio_container_dma_unmap(bcontainer, iova,
> -                                       int128_get64(llsize), NULL, 0);
> +        ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize),
> +                                       NULL, flags);

Why not unmap the halves here instead of in the backends ?


Thanks,

C.



>           if (ret) {
>               error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
>                            "0x%"HWADDR_PRIx") = %d (%s)",
Re: [PATCH 07/14] vfio: specify VFIO_DMA_UNMAP_FLAG_ALL to callback
Posted by John Levon 6 months, 3 weeks ago
On Wed, Apr 23, 2025 at 07:01:23PM +0200, Cédric Le Goater wrote:

> On 4/9/25 15:48, John Levon wrote:
> > Use the new flags parameter to indicate when we want to unmap
> > everything; no functional change is intended.
> 
> I find these changes confusing. Most likely there are not well presented
> or I am missing something. Some more below.

I don't see any way to further break up the change unfortunately.

> > +/*
> > + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
> > + */
> > +static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
> > +                                 hwaddr iova, ram_addr_t size,
> > +                                 IOMMUTLBEntry *iotlb, int flags)
> > +{
> > +    int ret;
> > +
> > +    if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {
> 
> VFIO_DMA_UNMAP_FLAG_ALL is a kernel uapi flag. It should be used only with
> the corresponding ioctl(VFIO_IOMMU_UNMAP_DMA) and not internally between
> QEMU routines.

Happy to use a different define for the flags if you like, but surely it's
better to have a flags field so it's extendable and it's always clear what the
meaning is? Problem with a boolean is you just see "true" or "false" in the
caller and have no real idea what it means until you look it up.

> I think adding a 'bool unmap_all' paremeter to vfio_legacy_dma_unmap() would
> make more sense.

Having said that I'm OK with going back to just a simple boolean if you'd really
prefer.

> >           }
> > -        ret = vfio_container_dma_unmap(bcontainer, iova,
> > -                                       int128_get64(llsize), NULL, 0);
> > +        ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize),
> > +                                       NULL, flags);
> 
> Why not unmap the halves here instead of in the backends ?

The whole point of the change is that right now the generic listener.c code has
a workaround that is specific to one particular backend. vfio-user doesn't have
any need to unmap in halves and in fact *has* to pass an "unmap all" flag.

In theory, neither does vfio if the flag is supported, but I dropped that patch
as I couldn't figure out a clean way to use it WRT the dirty tracking code.

regards
john
Re: [PATCH 07/14] vfio: specify VFIO_DMA_UNMAP_FLAG_ALL to callback
Posted by Cédric Le Goater 6 months, 3 weeks ago
On 4/23/25 19:17, John Levon wrote:
> On Wed, Apr 23, 2025 at 07:01:23PM +0200, Cédric Le Goater wrote:
> 
>> On 4/9/25 15:48, John Levon wrote:
>>> Use the new flags parameter to indicate when we want to unmap
>>> everything; no functional change is intended.
>>
>> I find these changes confusing. Most likely there are not well presented
>> or I am missing something. Some more below.
> 
> I don't see any way to further break up the change unfortunately.
> 
>>> +/*
>>> + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
>>> + */
>>> +static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
>>> +                                 hwaddr iova, ram_addr_t size,
>>> +                                 IOMMUTLBEntry *iotlb, int flags)
>>> +{
>>> +    int ret;
>>> +
>>> +    if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {
>>
>> VFIO_DMA_UNMAP_FLAG_ALL is a kernel uapi flag. It should be used only with
>> the corresponding ioctl(VFIO_IOMMU_UNMAP_DMA) and not internally between
>> QEMU routines.
> 
> Happy to use a different define for the flags if you like, but surely it's
> better to have a flags field so it's extendable and it's always clear what the
> meaning is? Problem with a boolean is you just see "true" or "false" in the
> caller and have no real idea what it means until you look it up.
> 
>> I think adding a 'bool unmap_all' paremeter to vfio_legacy_dma_unmap() would
>> make more sense.
> 
> Having said that I'm OK with going back to just a simple boolean if you'd really
> prefer.

yes. VFIO_DMA_UNMAP_FLAG_ALL is a kernel interface and we don't
need more than one flag today.

>>>            }
>>> -        ret = vfio_container_dma_unmap(bcontainer, iova,
>>> -                                       int128_get64(llsize), NULL, 0);
>>> +        ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize),
>>> +                                       NULL, flags);
>>
>> Why not unmap the halves here instead of in the backends ?
> 
> The whole point of the change is that right now the generic listener.c code has
> a workaround that is specific to one particular backend.

It's due to the ARM IO space size AFAICT.

> vfio-user doesn't have
> any need to unmap in halves and in fact *has* to pass an "unmap all" flag.

OK. So this flag is a vfio-user requirement. Why can't we call
vfio_container_dma_unmap() twice from vfio_listener_region_del() ?


Thanks,

C.



> In theory, neither does vfio if the flag is supported, but I dropped that patch
> as I couldn't figure out a clean way to use it WRT the dirty tracking code.
>
> regards
> john
> 


Re: [PATCH 07/14] vfio: specify VFIO_DMA_UNMAP_FLAG_ALL to callback
Posted by John Levon 6 months, 3 weeks ago
On Thu, Apr 24, 2025 at 07:16:52PM +0200, Cédric Le Goater wrote:

> > Having said that I'm OK with going back to just a simple boolean if you'd really
> > prefer.
> 
> yes. VFIO_DMA_UNMAP_FLAG_ALL is a kernel interface and we don't
> need more than one flag today.

OK

> > > Why not unmap the halves here instead of in the backends ?
> > 
> > The whole point of the change is that right now the generic listener.c code has
> > a workaround that is specific to one particular backend.
> 
> It's due to the ARM IO space size AFAICT.
> 
> > vfio-user doesn't have
> > any need to unmap in halves and in fact *has* to pass an "unmap all" flag.
> 
> OK. So this flag is a vfio-user requirement. Why can't we call
> vfio_container_dma_unmap() twice from vfio_listener_region_del() ?

Are you suggesting that the vfio-user backend - and the protocol - somehow
accounts for the two unmaps and translates it back into an unmap all? How would
that work?

Surely it's very ugly indeed to embed a foible of the (old) vfio kernel
interface into every backend.

regards
john
Re: [PATCH 07/14] vfio: specify VFIO_DMA_UNMAP_FLAG_ALL to callback
Posted by Cédric Le Goater 6 months, 3 weeks ago
On 4/24/25 21:35, John Levon wrote:
> On Thu, Apr 24, 2025 at 07:16:52PM +0200, Cédric Le Goater wrote:
> 
>>> Having said that I'm OK with going back to just a simple boolean if you'd really
>>> prefer.
>>
>> yes. VFIO_DMA_UNMAP_FLAG_ALL is a kernel interface and we don't
>> need more than one flag today.
> 
> OK
> 
>>>> Why not unmap the halves here instead of in the backends ?
>>>
>>> The whole point of the change is that right now the generic listener.c code has
>>> a workaround that is specific to one particular backend.
>>
>> It's due to the ARM IO space size AFAICT.
>>
>>> vfio-user doesn't have
>>> any need to unmap in halves and in fact *has* to pass an "unmap all" flag.
>>
>> OK. So this flag is a vfio-user requirement. Why can't we call
>> vfio_container_dma_unmap() twice from vfio_listener_region_del() ?
> 
> Are you suggesting that the vfio-user backend - and the protocol - somehow
> accounts for the two unmaps and translates it back into an unmap all? How would
> that work?

ok. Let's keep that way. It's not too invasive a change.


Thanks,

C.



> 
> Surely it's very ugly indeed to embed a foible of the (old) vfio kernel
> interface into every backend.
> 
> regards
> john
>