Use the new flags parameter to indicate when we want to unmap
everything; no functional change is intended.
Signed-off-by: John Levon <john.levon@nutanix.com>
---
hw/vfio/container.c | 49 ++++++++++++++++++++++++++++++++++++---------
hw/vfio/iommufd.c | 19 +++++++++++++++++-
hw/vfio/listener.c | 19 ++++++------------
3 files changed, 63 insertions(+), 24 deletions(-)
diff --git a/hw/vfio/container.c b/hw/vfio/container.c
index 625bbe82a7..37b1217fd8 100644
--- a/hw/vfio/container.c
+++ b/hw/vfio/container.c
@@ -119,12 +119,9 @@ unmap_exit:
return ret;
}
-/*
- * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
- */
-static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
- hwaddr iova, ram_addr_t size,
- IOMMUTLBEntry *iotlb, int flags)
+static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb)
{
const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
bcontainer);
@@ -138,10 +135,6 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
int ret;
Error *local_err = NULL;
- if (flags != 0) {
- return -ENOTSUP;
- }
-
if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) {
if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
bcontainer->dirty_pages_supported) {
@@ -185,6 +178,42 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
return 0;
}
+/*
+ * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
+ */
+static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
+ hwaddr iova, ram_addr_t size,
+ IOMMUTLBEntry *iotlb, int flags)
+{
+ int ret;
+
+ if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {
+ return -ENOTSUP;
+ }
+
+ if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
+ /* The unmap ioctl doesn't accept a full 64-bit span. */
+ Int128 llsize = int128_rshift(int128_2_64(), 1);
+
+ ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize),
+ iotlb);
+
+ if (ret == 0) {
+ ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize),
+ int128_get64(llsize), iotlb);
+ }
+
+ } else {
+ ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb);
+ }
+
+ if (ret != 0) {
+ return -errno;
+ }
+
+ return 0;
+}
+
static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
ram_addr_t size, void *vaddr, bool readonly)
{
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 07334e65b5..22e5b16967 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -51,10 +51,27 @@ static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
const VFIOIOMMUFDContainer *container =
container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
- if (flags != 0) {
+ if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {
return -ENOTSUP;
}
+ /* unmap in halves */
+ if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
+ Int128 llsize = int128_rshift(int128_2_64(), 1);
+ int ret;
+
+ ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
+ 0, int128_get64(llsize));
+
+ if (ret == 0) {
+ ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
+ int128_get64(llsize),
+ int128_get64(llsize));
+ }
+
+ return ret;
+ }
+
/* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
return iommufd_backend_unmap_dma(container->be,
container->ioas_id, iova, size);
diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
index c52d4a52ef..bcf2b98e79 100644
--- a/hw/vfio/listener.c
+++ b/hw/vfio/listener.c
@@ -661,21 +661,14 @@ static void vfio_listener_region_del(MemoryListener *listener,
}
if (try_unmap) {
+ int flags = 0;
+
if (int128_eq(llsize, int128_2_64())) {
- /* The unmap ioctl doesn't accept a full 64-bit span. */
- llsize = int128_rshift(llsize, 1);
- ret = vfio_container_dma_unmap(bcontainer, iova,
- int128_get64(llsize), NULL, 0);
- if (ret) {
- error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
- "0x%"HWADDR_PRIx") = %d (%s)",
- bcontainer, iova, int128_get64(llsize), ret,
- strerror(-ret));
- }
- iova += int128_get64(llsize);
+ flags = VFIO_DMA_UNMAP_FLAG_ALL;
+ llsize = 0;
}
- ret = vfio_container_dma_unmap(bcontainer, iova,
- int128_get64(llsize), NULL, 0);
+ ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize),
+ NULL, flags);
if (ret) {
error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
"0x%"HWADDR_PRIx") = %d (%s)",
--
2.34.1
On 4/9/25 15:48, John Levon wrote:
> Use the new flags parameter to indicate when we want to unmap
> everything; no functional change is intended.
I find these changes confusing. Most likely there are not well presented
or I am missing something. Some more below.
> Signed-off-by: John Levon <john.levon@nutanix.com>
> ---
> hw/vfio/container.c | 49 ++++++++++++++++++++++++++++++++++++---------
> hw/vfio/iommufd.c | 19 +++++++++++++++++-
> hw/vfio/listener.c | 19 ++++++------------
> 3 files changed, 63 insertions(+), 24 deletions(-)
>
> diff --git a/hw/vfio/container.c b/hw/vfio/container.c
> index 625bbe82a7..37b1217fd8 100644
> --- a/hw/vfio/container.c
> +++ b/hw/vfio/container.c
> @@ -119,12 +119,9 @@ unmap_exit:
> return ret;
> }
>
> -/*
> - * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
> - */
> -static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
> - hwaddr iova, ram_addr_t size,
> - IOMMUTLBEntry *iotlb, int flags)
> +static int vfio_legacy_dma_unmap_one(const VFIOContainerBase *bcontainer,
> + hwaddr iova, ram_addr_t size,
> + IOMMUTLBEntry *iotlb)
> {
> const VFIOContainer *container = container_of(bcontainer, VFIOContainer,
> bcontainer);
> @@ -138,10 +135,6 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
> int ret;
> Error *local_err = NULL;
>
> - if (flags != 0) {
> - return -ENOTSUP;
> - }
> -
> if (iotlb && vfio_container_dirty_tracking_is_started(bcontainer)) {
> if (!vfio_container_devices_dirty_tracking_is_supported(bcontainer) &&
> bcontainer->dirty_pages_supported) {
> @@ -185,6 +178,42 @@ static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
> return 0;
> }
>
> +/*
> + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
> + */
> +static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
> + hwaddr iova, ram_addr_t size,
> + IOMMUTLBEntry *iotlb, int flags)
> +{
> + int ret;
> +
> + if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {
VFIO_DMA_UNMAP_FLAG_ALL is a kernel uapi flag. It should be used only with
the corresponding ioctl(VFIO_IOMMU_UNMAP_DMA) and not internally between
QEMU routines.
I think adding a 'bool unmap_all' paremeter to vfio_legacy_dma_unmap() would
make more sense.
> + return -ENOTSUP;
> + }
> +
> + if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
> + /* The unmap ioctl doesn't accept a full 64-bit span. */
> + Int128 llsize = int128_rshift(int128_2_64(), 1);
> +
> + ret = vfio_legacy_dma_unmap_one(bcontainer, 0, int128_get64(llsize),
> + iotlb);
> +
> + if (ret == 0) {
> + ret = vfio_legacy_dma_unmap_one(bcontainer, int128_get64(llsize),
> + int128_get64(llsize), iotlb);
> + }
> +
> + } else {
> + ret = vfio_legacy_dma_unmap_one(bcontainer, iova, size, iotlb);
> + }
> +
> + if (ret != 0) {
> + return -errno;
> + }
> +
> + return 0;
> +}
> +
> static int vfio_legacy_dma_map(const VFIOContainerBase *bcontainer, hwaddr iova,
> ram_addr_t size, void *vaddr, bool readonly)
> {
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 07334e65b5..22e5b16967 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -51,10 +51,27 @@ static int iommufd_cdev_unmap(const VFIOContainerBase *bcontainer,
> const VFIOIOMMUFDContainer *container =
> container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
>
> - if (flags != 0) {
> + if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {
> return -ENOTSUP;
> }
>
> + /* unmap in halves */
> + if (flags & VFIO_DMA_UNMAP_FLAG_ALL) {
> + Int128 llsize = int128_rshift(int128_2_64(), 1);
> + int ret;
> +
> + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
> + 0, int128_get64(llsize));
> +
> + if (ret == 0) {
> + ret = iommufd_backend_unmap_dma(container->be, container->ioas_id,
> + int128_get64(llsize),
> + int128_get64(llsize));
> + }
> +
> + return ret;
> + }
> +
> /* TODO: Handle dma_unmap_bitmap with iotlb args (migration) */
> return iommufd_backend_unmap_dma(container->be,
> container->ioas_id, iova, size);
> diff --git a/hw/vfio/listener.c b/hw/vfio/listener.c
> index c52d4a52ef..bcf2b98e79 100644
> --- a/hw/vfio/listener.c
> +++ b/hw/vfio/listener.c
> @@ -661,21 +661,14 @@ static void vfio_listener_region_del(MemoryListener *listener,
> }
>
> if (try_unmap) {
> + int flags = 0;
> +
> if (int128_eq(llsize, int128_2_64())) {
> - /* The unmap ioctl doesn't accept a full 64-bit span. */
> - llsize = int128_rshift(llsize, 1);
> - ret = vfio_container_dma_unmap(bcontainer, iova,
> - int128_get64(llsize), NULL, 0);
> - if (ret) {
> - error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> - "0x%"HWADDR_PRIx") = %d (%s)",
> - bcontainer, iova, int128_get64(llsize), ret,
> - strerror(-ret));
> - }
> - iova += int128_get64(llsize);
> + flags = VFIO_DMA_UNMAP_FLAG_ALL;
> + llsize = 0;
please change this initialization to :
llsize = int128_zero();
> }
> - ret = vfio_container_dma_unmap(bcontainer, iova,
> - int128_get64(llsize), NULL, 0);
> + ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize),
> + NULL, flags);
Why not unmap the halves here instead of in the backends ?
Thanks,
C.
> if (ret) {
> error_report("vfio_container_dma_unmap(%p, 0x%"HWADDR_PRIx", "
> "0x%"HWADDR_PRIx") = %d (%s)",
On Wed, Apr 23, 2025 at 07:01:23PM +0200, Cédric Le Goater wrote:
> On 4/9/25 15:48, John Levon wrote:
> > Use the new flags parameter to indicate when we want to unmap
> > everything; no functional change is intended.
>
> I find these changes confusing. Most likely there are not well presented
> or I am missing something. Some more below.
I don't see any way to further break up the change unfortunately.
> > +/*
> > + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
> > + */
> > +static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
> > + hwaddr iova, ram_addr_t size,
> > + IOMMUTLBEntry *iotlb, int flags)
> > +{
> > + int ret;
> > +
> > + if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {
>
> VFIO_DMA_UNMAP_FLAG_ALL is a kernel uapi flag. It should be used only with
> the corresponding ioctl(VFIO_IOMMU_UNMAP_DMA) and not internally between
> QEMU routines.
Happy to use a different define for the flags if you like, but surely it's
better to have a flags field so it's extendable and it's always clear what the
meaning is? Problem with a boolean is you just see "true" or "false" in the
caller and have no real idea what it means until you look it up.
> I think adding a 'bool unmap_all' paremeter to vfio_legacy_dma_unmap() would
> make more sense.
Having said that I'm OK with going back to just a simple boolean if you'd really
prefer.
> > }
> > - ret = vfio_container_dma_unmap(bcontainer, iova,
> > - int128_get64(llsize), NULL, 0);
> > + ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize),
> > + NULL, flags);
>
> Why not unmap the halves here instead of in the backends ?
The whole point of the change is that right now the generic listener.c code has
a workaround that is specific to one particular backend. vfio-user doesn't have
any need to unmap in halves and in fact *has* to pass an "unmap all" flag.
In theory, neither does vfio if the flag is supported, but I dropped that patch
as I couldn't figure out a clean way to use it WRT the dirty tracking code.
regards
john
On 4/23/25 19:17, John Levon wrote:
> On Wed, Apr 23, 2025 at 07:01:23PM +0200, Cédric Le Goater wrote:
>
>> On 4/9/25 15:48, John Levon wrote:
>>> Use the new flags parameter to indicate when we want to unmap
>>> everything; no functional change is intended.
>>
>> I find these changes confusing. Most likely there are not well presented
>> or I am missing something. Some more below.
>
> I don't see any way to further break up the change unfortunately.
>
>>> +/*
>>> + * DMA - Mapping and unmapping for the "type1" IOMMU interface used on x86
>>> + */
>>> +static int vfio_legacy_dma_unmap(const VFIOContainerBase *bcontainer,
>>> + hwaddr iova, ram_addr_t size,
>>> + IOMMUTLBEntry *iotlb, int flags)
>>> +{
>>> + int ret;
>>> +
>>> + if ((flags & ~(VFIO_DMA_UNMAP_FLAG_ALL)) != 0) {
>>
>> VFIO_DMA_UNMAP_FLAG_ALL is a kernel uapi flag. It should be used only with
>> the corresponding ioctl(VFIO_IOMMU_UNMAP_DMA) and not internally between
>> QEMU routines.
>
> Happy to use a different define for the flags if you like, but surely it's
> better to have a flags field so it's extendable and it's always clear what the
> meaning is? Problem with a boolean is you just see "true" or "false" in the
> caller and have no real idea what it means until you look it up.
>
>> I think adding a 'bool unmap_all' paremeter to vfio_legacy_dma_unmap() would
>> make more sense.
>
> Having said that I'm OK with going back to just a simple boolean if you'd really
> prefer.
yes. VFIO_DMA_UNMAP_FLAG_ALL is a kernel interface and we don't
need more than one flag today.
>>> }
>>> - ret = vfio_container_dma_unmap(bcontainer, iova,
>>> - int128_get64(llsize), NULL, 0);
>>> + ret = vfio_container_dma_unmap(bcontainer, iova, int128_get64(llsize),
>>> + NULL, flags);
>>
>> Why not unmap the halves here instead of in the backends ?
>
> The whole point of the change is that right now the generic listener.c code has
> a workaround that is specific to one particular backend.
It's due to the ARM IO space size AFAICT.
> vfio-user doesn't have
> any need to unmap in halves and in fact *has* to pass an "unmap all" flag.
OK. So this flag is a vfio-user requirement. Why can't we call
vfio_container_dma_unmap() twice from vfio_listener_region_del() ?
Thanks,
C.
> In theory, neither does vfio if the flag is supported, but I dropped that patch
> as I couldn't figure out a clean way to use it WRT the dirty tracking code.
>
> regards
> john
>
On Thu, Apr 24, 2025 at 07:16:52PM +0200, Cédric Le Goater wrote: > > Having said that I'm OK with going back to just a simple boolean if you'd really > > prefer. > > yes. VFIO_DMA_UNMAP_FLAG_ALL is a kernel interface and we don't > need more than one flag today. OK > > > Why not unmap the halves here instead of in the backends ? > > > > The whole point of the change is that right now the generic listener.c code has > > a workaround that is specific to one particular backend. > > It's due to the ARM IO space size AFAICT. > > > vfio-user doesn't have > > any need to unmap in halves and in fact *has* to pass an "unmap all" flag. > > OK. So this flag is a vfio-user requirement. Why can't we call > vfio_container_dma_unmap() twice from vfio_listener_region_del() ? Are you suggesting that the vfio-user backend - and the protocol - somehow accounts for the two unmaps and translates it back into an unmap all? How would that work? Surely it's very ugly indeed to embed a foible of the (old) vfio kernel interface into every backend. regards john
On 4/24/25 21:35, John Levon wrote: > On Thu, Apr 24, 2025 at 07:16:52PM +0200, Cédric Le Goater wrote: > >>> Having said that I'm OK with going back to just a simple boolean if you'd really >>> prefer. >> >> yes. VFIO_DMA_UNMAP_FLAG_ALL is a kernel interface and we don't >> need more than one flag today. > > OK > >>>> Why not unmap the halves here instead of in the backends ? >>> >>> The whole point of the change is that right now the generic listener.c code has >>> a workaround that is specific to one particular backend. >> >> It's due to the ARM IO space size AFAICT. >> >>> vfio-user doesn't have >>> any need to unmap in halves and in fact *has* to pass an "unmap all" flag. >> >> OK. So this flag is a vfio-user requirement. Why can't we call >> vfio_container_dma_unmap() twice from vfio_listener_region_del() ? > > Are you suggesting that the vfio-user backend - and the protocol - somehow > accounts for the two unmaps and translates it back into an unmap all? How would > that work? ok. Let's keep that way. It's not too invasive a change. Thanks, C. > > Surely it's very ugly indeed to embed a foible of the (old) vfio kernel > interface into every backend. > > regards > john >
© 2016 - 2025 Red Hat, Inc.