Add new kAPIs against iommufd_device to support TSM Bind/Unbind
commands issued by CoCo-VM. The TSM bind means VMM does all
preparations for private device assignement, lock down the device by
transiting it to TDISP CONFIG_LOCKED or RUN state (when in RUN state,
TSM could still block any accessing to/from device), so that the device
is ready for attestation by CoCo-VM.
The interfaces are added against IOMMUFD because IOMMUFD builds several
abstract objects applicable for private device assignment, e.g. viommu
for secure iommu & kvm, vdevice for vBDF. IOMMUFD links them up to
finish all configurations required by secure firmware. That also means
TSM Bind interface should be called after viommu & vdevice allocation.
Suggested-by: Jason Gunthorpe <jgg@nvidia.com>
Originally-by: Alexey Kardashevskiy <aik@amd.com>
Signed-off-by: Xu Yilun <yilun.xu@linux.intel.com>
---
drivers/iommu/iommufd/device.c | 84 +++++++++++++++++++++++++
drivers/iommu/iommufd/iommufd_private.h | 6 ++
drivers/iommu/iommufd/viommu.c | 44 +++++++++++++
include/linux/iommufd.h | 3 +
4 files changed, 137 insertions(+)
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 37ef6bec2009..984780c66ab2 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -3,6 +3,7 @@
*/
#include <linux/iommu.h>
#include <linux/iommufd.h>
+#include <linux/pci.h>
#include <linux/pci-ats.h>
#include <linux/slab.h>
#include <uapi/linux/iommufd.h>
@@ -1561,3 +1562,86 @@ int iommufd_get_hw_info(struct iommufd_ucmd *ucmd)
iommufd_put_object(ucmd->ictx, &idev->obj);
return rc;
}
+
+/**
+ * iommufd_device_tsm_bind - Move a device to TSM Bind state
+ * @idev: device to attach
+ * @vdev_id: Input a IOMMUFD_OBJ_VDEVICE
+ *
+ * This configures for device Confidential Computing(CC), and moves the device
+ * to the TSM Bind state. Once this completes the device is locked down (TDISP
+ * CONFIG_LOCKED or RUN), waiting for guest's attestation.
+ *
+ * This function is undone by calling iommufd_device_tsm_unbind().
+ */
+int iommufd_device_tsm_bind(struct iommufd_device *idev, u32 vdevice_id)
+{
+ struct iommufd_vdevice *vdev;
+ int rc;
+
+ if (!dev_is_pci(idev->dev))
+ return -ENODEV;
+
+ vdev = container_of(iommufd_get_object(idev->ictx, vdevice_id, IOMMUFD_OBJ_VDEVICE),
+ struct iommufd_vdevice, obj);
+ if (IS_ERR(vdev))
+ return PTR_ERR(vdev);
+
+ if (vdev->dev != idev->dev) {
+ rc = -EINVAL;
+ goto out_put_vdev;
+ }
+
+ mutex_lock(&idev->igroup->lock);
+ if (idev->vdev) {
+ rc = -EEXIST;
+ goto out_unlock;
+ }
+
+ rc = iommufd_vdevice_tsm_bind(vdev);
+ if (rc)
+ goto out_unlock;
+
+ idev->vdev = vdev;
+ refcount_inc(&vdev->obj.users);
+ mutex_unlock(&idev->igroup->lock);
+
+ /*
+ * Pairs with iommufd_device_tsm_unbind() - catches caller bugs attempting
+ * to destroy a bound device.
+ */
+ refcount_inc(&idev->obj.users);
+ goto out_put_vdev;
+
+out_unlock:
+ mutex_unlock(&idev->igroup->lock);
+out_put_vdev:
+ iommufd_put_object(idev->ictx, &vdev->obj);
+ return rc;
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_tsm_bind, "IOMMUFD");
+
+/**
+ * iommufd_device_tsm_unbind - Move a device out of TSM bind state
+ * @idev: device to detach
+ *
+ * Undo iommufd_device_tsm_bind(). This removes all Confidential Computing
+ * configurations, Once this completes the device is unlocked (TDISP
+ * CONFIG_UNLOCKED).
+ */
+void iommufd_device_tsm_unbind(struct iommufd_device *idev)
+{
+ mutex_lock(&idev->igroup->lock);
+ if (!idev->vdev) {
+ mutex_unlock(&idev->igroup->lock);
+ return;
+ }
+
+ iommufd_vdevice_tsm_unbind(idev->vdev);
+ refcount_dec(&idev->vdev->obj.users);
+ idev->vdev = NULL;
+ mutex_unlock(&idev->igroup->lock);
+
+ refcount_dec(&idev->obj.users);
+}
+EXPORT_SYMBOL_NS_GPL(iommufd_device_tsm_unbind, "IOMMUFD");
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 297e4e2a12d1..29af8616e4aa 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -430,6 +430,7 @@ struct iommufd_device {
/* protect iopf_enabled counter */
struct mutex iopf_lock;
unsigned int iopf_enabled;
+ struct iommufd_vdevice *vdev;
};
static inline struct iommufd_device *
@@ -615,8 +616,13 @@ struct iommufd_vdevice {
struct iommufd_viommu *viommu;
struct device *dev;
u64 id; /* per-vIOMMU virtual ID */
+ struct mutex tsm_lock;
+ bool tsm_bound;
};
+int iommufd_vdevice_tsm_bind(struct iommufd_vdevice *vdev);
+void iommufd_vdevice_tsm_unbind(struct iommufd_vdevice *vdev);
+
#ifdef CONFIG_IOMMUFD_TEST
int iommufd_test(struct iommufd_ucmd *ucmd);
void iommufd_selftest_destroy(struct iommufd_object *obj);
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 2fcef3f8d1a5..296143e21368 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -4,6 +4,7 @@
#if IS_ENABLED(CONFIG_KVM)
#include <linux/kvm_host.h>
#endif
+#include <linux/pci-tsm.h>
#include "iommufd_private.h"
@@ -193,11 +194,13 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
goto out_put_idev;
}
+ vdev->ictx = ucmd->ictx; //This is a unrelated fix for vdevice alloc
vdev->id = virt_id;
vdev->dev = idev->dev;
get_device(idev->dev);
vdev->viommu = viommu;
refcount_inc(&viommu->obj.users);
+ mutex_init(&vdev->tsm_lock);
curr = xa_cmpxchg(&viommu->vdevs, virt_id, NULL, vdev, GFP_KERNEL);
if (curr) {
@@ -220,3 +223,44 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
iommufd_put_object(ucmd->ictx, &viommu->obj);
return rc;
}
+
+int iommufd_vdevice_tsm_bind(struct iommufd_vdevice *vdev)
+{
+ struct kvm *kvm;
+ int rc;
+
+ mutex_lock(&vdev->tsm_lock);
+ if (vdev->tsm_bound) {
+ rc = -EEXIST;
+ goto out_unlock;
+ }
+
+ kvm = vdev->viommu->kvm;
+ if (!kvm) {
+ rc = -ENOENT;
+ goto out_unlock;
+ }
+
+ rc = pci_tsm_bind(to_pci_dev(vdev->dev), kvm, vdev->id);
+ if (rc)
+ goto out_unlock;
+
+ vdev->tsm_bound = true;
+
+out_unlock:
+ mutex_unlock(&vdev->tsm_lock);
+ return rc;
+}
+
+void iommufd_vdevice_tsm_unbind(struct iommufd_vdevice *vdev)
+{
+ mutex_lock(&vdev->tsm_lock);
+ if (!vdev->tsm_bound)
+ goto out_unlock;
+
+ pci_tsm_unbind(to_pci_dev(vdev->dev));
+ vdev->tsm_bound = false;
+
+out_unlock:
+ mutex_unlock(&vdev->tsm_lock);
+}
diff --git a/include/linux/iommufd.h b/include/linux/iommufd.h
index 2712421802b9..5f9a286232ac 100644
--- a/include/linux/iommufd.h
+++ b/include/linux/iommufd.h
@@ -63,6 +63,9 @@ int iommufd_device_replace(struct iommufd_device *idev, ioasid_t pasid,
u32 *pt_id);
void iommufd_device_detach(struct iommufd_device *idev, ioasid_t pasid);
+int iommufd_device_tsm_bind(struct iommufd_device *idev, u32 vdevice_id);
+void iommufd_device_tsm_unbind(struct iommufd_device *idev);
+
struct iommufd_ctx *iommufd_device_to_ictx(struct iommufd_device *idev);
u32 iommufd_device_to_id(struct iommufd_device *idev);
--
2.25.1
Xu Yilun <yilun.xu@linux.intel.com> writes:
....
> +/**
> + * iommufd_device_tsm_bind - Move a device to TSM Bind state
> + * @idev: device to attach
> + * @vdev_id: Input a IOMMUFD_OBJ_VDEVICE
> + *
> + * This configures for device Confidential Computing(CC), and moves the device
> + * to the TSM Bind state. Once this completes the device is locked down (TDISP
> + * CONFIG_LOCKED or RUN), waiting for guest's attestation.
> + *
> + * This function is undone by calling iommufd_device_tsm_unbind().
> + */
> +int iommufd_device_tsm_bind(struct iommufd_device *idev, u32 vdevice_id)
> +{
> + struct iommufd_vdevice *vdev;
> + int rc;
> +
> + if (!dev_is_pci(idev->dev))
> + return -ENODEV;
> +
> + vdev = container_of(iommufd_get_object(idev->ictx, vdevice_id, IOMMUFD_OBJ_VDEVICE),
> + struct iommufd_vdevice, obj);
> + if (IS_ERR(vdev))
> + return PTR_ERR(vdev);
> +
> + if (vdev->dev != idev->dev) {
> + rc = -EINVAL;
> + goto out_put_vdev;
> + }
> +
> + mutex_lock(&idev->igroup->lock);
> + if (idev->vdev) {
> + rc = -EEXIST;
> + goto out_unlock;
> + }
> +
> + rc = iommufd_vdevice_tsm_bind(vdev);
> + if (rc)
> + goto out_unlock;
> +
> + idev->vdev = vdev;
> + refcount_inc(&vdev->obj.users);
> + mutex_unlock(&idev->igroup->lock);
> +
> + /*
> + * Pairs with iommufd_device_tsm_unbind() - catches caller bugs attempting
> + * to destroy a bound device.
> + */
> + refcount_inc(&idev->obj.users);
>
Do we really need this refcount_inc? As I understand it, the objects
aren't being pinned directly. Instead, the reference count seems to be
used more as a way to establish an object hierarchy, ensuring that
objects are freed in the correct order.
In vfio_pci_core_close_device(), you’re decrementing the reference, and
on the iommufd side, we’re covered because the VFIO bind operation takes
a file reference (fget)—so iommufd_fops_release() won’t be called
prematurely.
Wouldn’t it be simpler to skip the reference count increment altogether
and just call tsm_unbind in the virtual device’s destroy callback?
(iommufd_vdevice_destroy())
> + goto out_put_vdev;
> +
> +out_unlock:
> + mutex_unlock(&idev->igroup->lock);
> +out_put_vdev:
> + iommufd_put_object(idev->ictx, &vdev->obj);
> + return rc;
> +}
> +EXPORT_SYMBOL_NS_GPL(iommufd_device_tsm_bind, "IOMMUFD");
-aneesh
On Mon, Jun 02, 2025 at 06:13:16PM +0530, Aneesh Kumar K.V wrote:
> Xu Yilun <yilun.xu@linux.intel.com> writes:
>
> ....
>
> > +/**
> > + * iommufd_device_tsm_bind - Move a device to TSM Bind state
> > + * @idev: device to attach
> > + * @vdev_id: Input a IOMMUFD_OBJ_VDEVICE
> > + *
> > + * This configures for device Confidential Computing(CC), and moves the device
> > + * to the TSM Bind state. Once this completes the device is locked down (TDISP
> > + * CONFIG_LOCKED or RUN), waiting for guest's attestation.
> > + *
> > + * This function is undone by calling iommufd_device_tsm_unbind().
> > + */
> > +int iommufd_device_tsm_bind(struct iommufd_device *idev, u32 vdevice_id)
> > +{
> > + struct iommufd_vdevice *vdev;
> > + int rc;
> > +
> > + if (!dev_is_pci(idev->dev))
> > + return -ENODEV;
> > +
> > + vdev = container_of(iommufd_get_object(idev->ictx, vdevice_id, IOMMUFD_OBJ_VDEVICE),
> > + struct iommufd_vdevice, obj);
> > + if (IS_ERR(vdev))
> > + return PTR_ERR(vdev);
> > +
> > + if (vdev->dev != idev->dev) {
> > + rc = -EINVAL;
> > + goto out_put_vdev;
> > + }
> > +
> > + mutex_lock(&idev->igroup->lock);
> > + if (idev->vdev) {
> > + rc = -EEXIST;
> > + goto out_unlock;
> > + }
> > +
> > + rc = iommufd_vdevice_tsm_bind(vdev);
> > + if (rc)
> > + goto out_unlock;
> > +
> > + idev->vdev = vdev;
> > + refcount_inc(&vdev->obj.users);
> > + mutex_unlock(&idev->igroup->lock);
> > +
> > + /*
> > + * Pairs with iommufd_device_tsm_unbind() - catches caller bugs attempting
> > + * to destroy a bound device.
> > + */
> > + refcount_inc(&idev->obj.users);
> >
>
> Do we really need this refcount_inc? As I understand it, the objects
The idev refcount is not necessary, it is just to "catch caller bug".
> aren't being pinned directly. Instead, the reference count seems to be
> used more as a way to establish an object hierarchy, ensuring that
> objects are freed in the correct order.
>
> In vfio_pci_core_close_device(), you’re decrementing the reference, and
> on the iommufd side, we’re covered because the VFIO bind operation takes
> a file reference (fget)—so iommufd_fops_release() won’t be called
> prematurely.
Correct.
>
> Wouldn’t it be simpler to skip the reference count increment altogether
> and just call tsm_unbind in the virtual device’s destroy callback?
> (iommufd_vdevice_destroy())
The vdevice refcount is the main concern, there is also an IOMMU_DESTROY
ioctl. User could just free the vdevice instance if no refcount, while VFIO
is still in bound state. That seems not the correct free order.
Thanks,
Yilun
>
> > + goto out_put_vdev;
> > +
> > +out_unlock:
> > + mutex_unlock(&idev->igroup->lock);
> > +out_put_vdev:
> > + iommufd_put_object(idev->ictx, &vdev->obj);
> > + return rc;
> > +}
> > +EXPORT_SYMBOL_NS_GPL(iommufd_device_tsm_bind, "IOMMUFD");
>
> -aneesh
On Tue, Jun 03, 2025 at 02:20:51PM +0800, Xu Yilun wrote: > > Wouldn’t it be simpler to skip the reference count increment altogether > > and just call tsm_unbind in the virtual device’s destroy callback? > > (iommufd_vdevice_destroy()) > > The vdevice refcount is the main concern, there is also an IOMMU_DESTROY > ioctl. User could just free the vdevice instance if no refcount, while VFIO > is still in bound state. That seems not the correct free order. Freeing the vdevice should automatically unbind it.. Jason
Jason Gunthorpe <jgg@nvidia.com> writes:
> On Tue, Jun 03, 2025 at 02:20:51PM +0800, Xu Yilun wrote:
>> > Wouldn’t it be simpler to skip the reference count increment altogether
>> > and just call tsm_unbind in the virtual device’s destroy callback?
>> > (iommufd_vdevice_destroy())
>>
>> The vdevice refcount is the main concern, there is also an IOMMU_DESTROY
>> ioctl. User could just free the vdevice instance if no refcount, while VFIO
>> is still in bound state. That seems not the correct free order.
>
> Freeing the vdevice should automatically unbind it..
>
One challenge I ran into during implementation was the dependency of
vfio on iommufd_device. When vfio needs to perform a tsm_unbind,
it only has access to an iommufd_device.
However, TSM operations like binding and unbinding are handled at the
iommufd_vdevice level. The issue? There’s no direct link from
iommufd_device back to iommufd_vdevice.
To address this, I modified the following structures:
modified drivers/iommu/iommufd/iommufd_private.h
@@ -428,6 +428,7 @@ struct iommufd_device {
/* protect iopf_enabled counter */
struct mutex iopf_lock;
unsigned int iopf_enabled;
+ struct iommufd_vdevice *vdev;
};
static inline struct iommufd_device *
@@ -613,6 +614,7 @@ struct iommufd_vdevice {
struct iommufd_object obj;
struct iommufd_ctx *ictx;
struct iommufd_viommu *viommu;
+ struct iommufd_device *idev;
struct device *dev;
struct mutex mutex; /* mutex to synchronize updates to tsm_bound */
u64 id; /* per-vIOMMU virtual ID */
These fields are updated during tsm_bind and tsm_unbind, so they must be
protected by the appropriate locks:
Updating vdevice->idev requires holding vdev->mutex (vdev_lock).
Updating device->vdev requires idev->igroup->lock (idev_lock).
tsm_unbind in vdevice_destroy:
vdevice_destroy() ends up calling tsm_unbind() while holding only the
vdev_lock. At first glance, this seems unsafe. But in practice, it's
fine because the corresponding iommufd_device has already been destroyed
when the VFIO device file descriptor was closed—triggering
vfio_df_iommufd_unbind().
I’ve added an in-code comment to explain why tsm_unbind() is safe here
without acquiring the idev_lock. Hope that is ok.
-aneesh
On Wed, Jun 04, 2025 at 02:10:43PM +0530, Aneesh Kumar K.V wrote:
> Jason Gunthorpe <jgg@nvidia.com> writes:
>
> > On Tue, Jun 03, 2025 at 02:20:51PM +0800, Xu Yilun wrote:
> >> > Wouldn’t it be simpler to skip the reference count increment altogether
> >> > and just call tsm_unbind in the virtual device’s destroy callback?
> >> > (iommufd_vdevice_destroy())
> >>
> >> The vdevice refcount is the main concern, there is also an IOMMU_DESTROY
> >> ioctl. User could just free the vdevice instance if no refcount, while VFIO
> >> is still in bound state. That seems not the correct free order.
> >
> > Freeing the vdevice should automatically unbind it..
> >
>
> One challenge I ran into during implementation was the dependency of
> vfio on iommufd_device. When vfio needs to perform a tsm_unbind,
> it only has access to an iommufd_device.
VFIO should never do that except by destroying the idevice..
> However, TSM operations like binding and unbinding are handled at the
> iommufd_vdevice level. The issue? There’s no direct link from
> iommufd_device back to iommufd_vdevice.
Yes.
> To address this, I modified the following structures:
>
> modified drivers/iommu/iommufd/iommufd_private.h
> @@ -428,6 +428,7 @@ struct iommufd_device {
> /* protect iopf_enabled counter */
> struct mutex iopf_lock;
> unsigned int iopf_enabled;
> + struct iommufd_vdevice *vdev;
> };
Locking will be painful:
> Updating vdevice->idev requires holding vdev->mutex (vdev_lock).
> Updating device->vdev requires idev->igroup->lock (idev_lock).
I wonder if that can work on the destory paths..
You also have to prevent more than one vdevice from being created for
an idevice, I don't think we do that today.
> tsm_unbind in vdevice_destroy:
>
> vdevice_destroy() ends up calling tsm_unbind() while holding only the
> vdev_lock. At first glance, this seems unsafe. But in practice, it's
> fine because the corresponding iommufd_device has already been destroyed
> when the VFIO device file descriptor was closed—triggering
> vfio_df_iommufd_unbind().
This needs some kind of fixing the idevice should destroy the vdevices
during idevice destruction so we don't get this out of order where the
idevice is destroyed before the vdevice.
This should be a separate patch as it is an immediate bug fix..
Jason
Jason Gunthorpe <jgg@nvidia.com> writes:
....
>> tsm_unbind in vdevice_destroy:
>>
>> vdevice_destroy() ends up calling tsm_unbind() while holding only the
>> vdev_lock. At first glance, this seems unsafe. But in practice, it's
>> fine because the corresponding iommufd_device has already been destroyed
>> when the VFIO device file descriptor was closed—triggering
>> vfio_df_iommufd_unbind().
>
> This needs some kind of fixing the idevice should destroy the vdevices
> during idevice destruction so we don't get this out of order where the
> idevice is destroyed before the vdevice.
>
> This should be a separate patch as it is an immediate bug fix..
>
Something like below?
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 86244403b532..a49b293bd516 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -221,6 +221,8 @@ struct iommufd_device *iommufd_device_bind(struct iommufd_ctx *ictx,
refcount_inc(&idev->obj.users);
/* igroup refcount moves into iommufd_device */
idev->igroup = igroup;
+ idev->vdev = NULL;
+ mutex_init(&idev->lock);
/*
* If the caller fails after this success it must call
@@ -282,6 +284,12 @@ EXPORT_SYMBOL_NS_GPL(iommufd_ctx_has_group, "IOMMUFD");
*/
void iommufd_device_unbind(struct iommufd_device *idev)
{
+ /* this will be unlocked while destroying the idev obj */
+ mutex_lock(&idev->lock);
+
+ if (idev->vdev)
+ /* extra refcount taken during vdevice alloc */
+ iommufd_object_destroy_user(idev->ictx, &idev->vdev->obj);
iommufd_object_destroy_user(idev->ictx, &idev->obj);
}
EXPORT_SYMBOL_NS_GPL(iommufd_device_unbind, "IOMMUFD");
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 9ccc83341f32..d85bd8b38751 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -425,6 +425,10 @@ struct iommufd_device {
/* always the physical device */
struct device *dev;
bool enforce_cache_coherency;
+ /* to protect the following members*/
+ struct mutex lock;
+ /* if there is a vdevice mapping the idev */
+ struct iommufd_vdevice *vdev;
};
static inline struct iommufd_device *
@@ -606,6 +610,7 @@ struct iommufd_vdevice {
struct iommufd_ctx *ictx;
struct iommufd_viommu *viommu;
struct device *dev;
+ struct iommufd_device *idev;
u64 id; /* per-vIOMMU virtual ID */
};
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index 3df468f64e7d..c38303df536f 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -172,6 +172,11 @@ int iommufd_object_remove(struct iommufd_ctx *ictx,
ictx->vfio_ioas = NULL;
xa_unlock(&ictx->objects);
+ if (obj->type == IOMMUFD_OBJ_DEVICE) {
+ /* idevice should be freed with lock held */
+ struct iommufd_device *idev = container_of(obj, struct iommufd_device, obj);
+ mutex_unlock(&idev->lock);
+ }
/*
* Since users is zero any positive users_shortterm must be racing
* iommufd_put_object(), or we have a bug.
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
index 01df2b985f02..17f189bc9e2c 100644
--- a/drivers/iommu/iommufd/viommu.c
+++ b/drivers/iommu/iommufd/viommu.c
@@ -84,15 +84,24 @@ int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
return rc;
}
+/* This will be called from iommufd_device_unbind */
void iommufd_vdevice_destroy(struct iommufd_object *obj)
{
struct iommufd_vdevice *vdev =
container_of(obj, struct iommufd_vdevice, obj);
struct iommufd_viommu *viommu = vdev->viommu;
+ struct iommufd_device *idev = vdev->idev;
+
+ /*
+ * since we have an refcount on idev, it can't be freed.
+ */
+ lockdep_assert_held(&idev->lock);
/* xa_cmpxchg is okay to fail if alloc failed xa_cmpxchg previously */
xa_cmpxchg(&viommu->vdevs, vdev->id, vdev, NULL, GFP_KERNEL);
refcount_dec(&viommu->obj.users);
+ idev->vdev = NULL;
+ refcount_dec(&idev->obj.users);
put_device(vdev->dev);
}
@@ -124,10 +133,15 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
goto out_put_idev;
}
+ mutex_lock(&idev->lock);
+ if (idev->vdev) {
+ rc = -EINVAL;
+ goto out_put_idev_unlock;
+ }
vdev = iommufd_object_alloc(ucmd->ictx, vdev, IOMMUFD_OBJ_VDEVICE);
if (IS_ERR(vdev)) {
rc = PTR_ERR(vdev);
- goto out_put_idev;
+ goto out_put_idev_unlock;
}
vdev->id = virt_id;
@@ -147,10 +161,18 @@ int iommufd_vdevice_alloc_ioctl(struct iommufd_ucmd *ucmd)
if (rc)
goto out_abort;
iommufd_object_finalize(ucmd->ictx, &vdev->obj);
- goto out_put_idev;
+ /* don't allow idev free without vdev free */
+ refcount_inc(&idev->obj.users);
+ vdev->idev = idev;
+ /* vdev lifecycle now managed by idev */
+ idev->vdev = vdev;
+ refcount_inc(&vdev->obj.users);
+ goto out_put_idev_unlock;
out_abort:
iommufd_object_abort_and_destroy(ucmd->ictx, &vdev->obj);
+out_put_idev_unlock:
+ mutex_unlock(&idev->lock);
out_put_idev:
iommufd_put_object(ucmd->ictx, &idev->obj);
out_put_viommu:
© 2016 - 2025 Red Hat, Inc.