From: Leon Romanovsky <leonro@nvidia.com>
dma-buf invalidation is handled asynchronously by the hardware, so VFIO
must wait until all affected objects have been fully invalidated.
In addition, the dma-buf exporter is expecting that all importers unmap any
buffers they previously mapped.
Fixes: 5d74781ebc86 ("vfio/pci: Add dma-buf export support for MMIO regions")
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
---
drivers/vfio/pci/vfio_pci_dmabuf.c | 61 +++++++++++++++++++++++++++++++++++---
1 file changed, 57 insertions(+), 4 deletions(-)
diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
index d8ceafabef48..78d47e260f34 100644
--- a/drivers/vfio/pci/vfio_pci_dmabuf.c
+++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
@@ -17,6 +17,8 @@ struct vfio_pci_dma_buf {
struct dma_buf_phys_vec *phys_vec;
struct p2pdma_provider *provider;
u32 nr_ranges;
+ struct kref kref;
+ struct completion comp;
u8 revoked : 1;
};
@@ -44,27 +46,46 @@ static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf,
return 0;
}
+static void vfio_pci_dma_buf_done(struct kref *kref)
+{
+ struct vfio_pci_dma_buf *priv =
+ container_of(kref, struct vfio_pci_dma_buf, kref);
+
+ complete(&priv->comp);
+}
+
static struct sg_table *
vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
enum dma_data_direction dir)
{
struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
+ struct sg_table *ret;
dma_resv_assert_held(priv->dmabuf->resv);
if (priv->revoked)
return ERR_PTR(-ENODEV);
- return dma_buf_phys_vec_to_sgt(attachment, priv->provider,
- priv->phys_vec, priv->nr_ranges,
- priv->size, dir);
+ ret = dma_buf_phys_vec_to_sgt(attachment, priv->provider,
+ priv->phys_vec, priv->nr_ranges,
+ priv->size, dir);
+ if (IS_ERR(ret))
+ return ret;
+
+ kref_get(&priv->kref);
+ return ret;
}
static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
struct sg_table *sgt,
enum dma_data_direction dir)
{
+ struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
+
+ dma_resv_assert_held(priv->dmabuf->resv);
+
dma_buf_free_sgt(attachment, sgt, dir);
+ kref_put(&priv->kref, vfio_pci_dma_buf_done);
}
static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf)
@@ -287,6 +308,9 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
goto err_dev_put;
}
+ kref_init(&priv->kref);
+ init_completion(&priv->comp);
+
/* dma_buf_put() now frees priv */
INIT_LIST_HEAD(&priv->dmabufs_elm);
down_write(&vdev->memory_lock);
@@ -331,9 +355,33 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
if (priv->revoked != revoked) {
dma_resv_lock(priv->dmabuf->resv, NULL);
- priv->revoked = revoked;
+ if (revoked)
+ priv->revoked = true;
dma_buf_invalidate_mappings(priv->dmabuf);
+ dma_resv_wait_timeout(priv->dmabuf->resv,
+ DMA_RESV_USAGE_BOOKKEEP, false,
+ MAX_SCHEDULE_TIMEOUT);
dma_resv_unlock(priv->dmabuf->resv);
+ if (revoked) {
+ kref_put(&priv->kref, vfio_pci_dma_buf_done);
+ wait_for_completion(&priv->comp);
+ } else {
+ /*
+ * Kref is initialize again, because when revoke
+ * was performed the reference counter was decreased
+ * to zero to trigger completion.
+ */
+ kref_init(&priv->kref);
+ /*
+ * There is no need to wait as no mapping was
+ * performed when the previous status was
+ * priv->revoked == true.
+ */
+ reinit_completion(&priv->comp);
+ dma_resv_lock(priv->dmabuf->resv, NULL);
+ priv->revoked = false;
+ dma_resv_unlock(priv->dmabuf->resv);
+ }
}
fput(priv->dmabuf->file);
}
@@ -354,7 +402,12 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
priv->vdev = NULL;
priv->revoked = true;
dma_buf_invalidate_mappings(priv->dmabuf);
+ dma_resv_wait_timeout(priv->dmabuf->resv,
+ DMA_RESV_USAGE_BOOKKEEP, false,
+ MAX_SCHEDULE_TIMEOUT);
dma_resv_unlock(priv->dmabuf->resv);
+ kref_put(&priv->kref, vfio_pci_dma_buf_done);
+ wait_for_completion(&priv->comp);
vfio_device_put_registration(&vdev->vdev);
fput(priv->dmabuf->file);
}
--
2.52.0
On Sat, 31 Jan 2026 07:34:14 +0200
Leon Romanovsky <leon@kernel.org> wrote:
> From: Leon Romanovsky <leonro@nvidia.com>
>
> dma-buf invalidation is handled asynchronously by the hardware, so VFIO
> must wait until all affected objects have been fully invalidated.
>
> In addition, the dma-buf exporter is expecting that all importers unmap any
> buffers they previously mapped.
>
> Fixes: 5d74781ebc86 ("vfio/pci: Add dma-buf export support for MMIO regions")
> Reviewed-by: Kevin Tian <kevin.tian@intel.com>
> Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
> ---
> drivers/vfio/pci/vfio_pci_dmabuf.c | 61 +++++++++++++++++++++++++++++++++++---
> 1 file changed, 57 insertions(+), 4 deletions(-)
Reviewed-by: Alex Williamson <alex@shazbot.org>
>
> diff --git a/drivers/vfio/pci/vfio_pci_dmabuf.c b/drivers/vfio/pci/vfio_pci_dmabuf.c
> index d8ceafabef48..78d47e260f34 100644
> --- a/drivers/vfio/pci/vfio_pci_dmabuf.c
> +++ b/drivers/vfio/pci/vfio_pci_dmabuf.c
> @@ -17,6 +17,8 @@ struct vfio_pci_dma_buf {
> struct dma_buf_phys_vec *phys_vec;
> struct p2pdma_provider *provider;
> u32 nr_ranges;
> + struct kref kref;
> + struct completion comp;
> u8 revoked : 1;
> };
>
> @@ -44,27 +46,46 @@ static int vfio_pci_dma_buf_attach(struct dma_buf *dmabuf,
> return 0;
> }
>
> +static void vfio_pci_dma_buf_done(struct kref *kref)
> +{
> + struct vfio_pci_dma_buf *priv =
> + container_of(kref, struct vfio_pci_dma_buf, kref);
> +
> + complete(&priv->comp);
> +}
> +
> static struct sg_table *
> vfio_pci_dma_buf_map(struct dma_buf_attachment *attachment,
> enum dma_data_direction dir)
> {
> struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> + struct sg_table *ret;
>
> dma_resv_assert_held(priv->dmabuf->resv);
>
> if (priv->revoked)
> return ERR_PTR(-ENODEV);
>
> - return dma_buf_phys_vec_to_sgt(attachment, priv->provider,
> - priv->phys_vec, priv->nr_ranges,
> - priv->size, dir);
> + ret = dma_buf_phys_vec_to_sgt(attachment, priv->provider,
> + priv->phys_vec, priv->nr_ranges,
> + priv->size, dir);
> + if (IS_ERR(ret))
> + return ret;
> +
> + kref_get(&priv->kref);
> + return ret;
> }
>
> static void vfio_pci_dma_buf_unmap(struct dma_buf_attachment *attachment,
> struct sg_table *sgt,
> enum dma_data_direction dir)
> {
> + struct vfio_pci_dma_buf *priv = attachment->dmabuf->priv;
> +
> + dma_resv_assert_held(priv->dmabuf->resv);
> +
> dma_buf_free_sgt(attachment, sgt, dir);
> + kref_put(&priv->kref, vfio_pci_dma_buf_done);
> }
>
> static void vfio_pci_dma_buf_release(struct dma_buf *dmabuf)
> @@ -287,6 +308,9 @@ int vfio_pci_core_feature_dma_buf(struct vfio_pci_core_device *vdev, u32 flags,
> goto err_dev_put;
> }
>
> + kref_init(&priv->kref);
> + init_completion(&priv->comp);
> +
> /* dma_buf_put() now frees priv */
> INIT_LIST_HEAD(&priv->dmabufs_elm);
> down_write(&vdev->memory_lock);
> @@ -331,9 +355,33 @@ void vfio_pci_dma_buf_move(struct vfio_pci_core_device *vdev, bool revoked)
>
> if (priv->revoked != revoked) {
> dma_resv_lock(priv->dmabuf->resv, NULL);
> - priv->revoked = revoked;
> + if (revoked)
> + priv->revoked = true;
> dma_buf_invalidate_mappings(priv->dmabuf);
> + dma_resv_wait_timeout(priv->dmabuf->resv,
> + DMA_RESV_USAGE_BOOKKEEP, false,
> + MAX_SCHEDULE_TIMEOUT);
> dma_resv_unlock(priv->dmabuf->resv);
> + if (revoked) {
> + kref_put(&priv->kref, vfio_pci_dma_buf_done);
> + wait_for_completion(&priv->comp);
> + } else {
> + /*
> + * Kref is initialize again, because when revoke
> + * was performed the reference counter was decreased
> + * to zero to trigger completion.
> + */
> + kref_init(&priv->kref);
> + /*
> + * There is no need to wait as no mapping was
> + * performed when the previous status was
> + * priv->revoked == true.
> + */
> + reinit_completion(&priv->comp);
> + dma_resv_lock(priv->dmabuf->resv, NULL);
> + priv->revoked = false;
> + dma_resv_unlock(priv->dmabuf->resv);
> + }
> }
> fput(priv->dmabuf->file);
> }
> @@ -354,7 +402,12 @@ void vfio_pci_dma_buf_cleanup(struct vfio_pci_core_device *vdev)
> priv->vdev = NULL;
> priv->revoked = true;
> dma_buf_invalidate_mappings(priv->dmabuf);
> + dma_resv_wait_timeout(priv->dmabuf->resv,
> + DMA_RESV_USAGE_BOOKKEEP, false,
> + MAX_SCHEDULE_TIMEOUT);
> dma_resv_unlock(priv->dmabuf->resv);
> + kref_put(&priv->kref, vfio_pci_dma_buf_done);
> + wait_for_completion(&priv->comp);
> vfio_device_put_registration(&vdev->vdev);
> fput(priv->dmabuf->file);
> }
>
© 2016 - 2026 Red Hat, Inc.