Currently, when attaching a domain to a device or its PASID, domain is
stored within the iommu group. It could be retrieved for use during the
window between attachment and detachment.
With new features introduced, there's a need to store more information
than just a domain pointer. This information essentially represents the
association between a domain and a device. For example, the SVA code
already has a custom struct iommu_sva which represents a bond between
sva domain and a PASID of a device. Looking forward, the IOMMUFD needs
a place to store the iommufd_device pointer in the core, so that the
device object ID could be quickly retrieved in the critical fault handling
path.
Introduce domain attachment handle that explicitly represents the
attachment relationship between a domain and a device or its PASID.
Co-developed-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
include/linux/iommu.h | 18 +++++++++++++++---
drivers/dma/idxd/init.c | 2 +-
drivers/iommu/iommu-sva.c | 13 ++++++++-----
drivers/iommu/iommu.c | 23 +++++++++++++----------
4 files changed, 37 insertions(+), 19 deletions(-)
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7bc8dff7cf6d..b89404acacd6 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -989,12 +989,22 @@ struct iommu_fwspec {
/* ATS is supported */
#define IOMMU_FWSPEC_PCI_RC_ATS (1 << 0)
+/*
+ * An iommu attach handle represents a relationship between an iommu domain
+ * and a PASID or RID of a device. It is allocated and managed by the component
+ * that manages the domain and is stored in the iommu group during the time the
+ * domain is attached.
+ */
+struct iommu_attach_handle {
+ struct iommu_domain *domain;
+};
+
/**
* struct iommu_sva - handle to a device-mm bond
*/
struct iommu_sva {
+ struct iommu_attach_handle handle;
struct device *dev;
- struct iommu_domain *domain;
struct list_head handle_item;
refcount_t users;
};
@@ -1052,7 +1062,8 @@ int iommu_device_claim_dma_owner(struct device *dev, void *owner);
void iommu_device_release_dma_owner(struct device *dev);
int iommu_attach_device_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid);
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle);
void iommu_detach_device_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t pasid);
struct iommu_domain *
@@ -1388,7 +1399,8 @@ static inline int iommu_device_claim_dma_owner(struct device *dev, void *owner)
}
static inline int iommu_attach_device_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle)
{
return -ENODEV;
}
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index a7295943fa22..385c488c9cd1 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -584,7 +584,7 @@ static int idxd_enable_system_pasid(struct idxd_device *idxd)
* DMA domain is owned by the driver, it should support all valid
* types such as DMA-FQ, identity, etc.
*/
- ret = iommu_attach_device_pasid(domain, dev, pasid);
+ ret = iommu_attach_device_pasid(domain, dev, pasid, NULL);
if (ret) {
dev_err(dev, "failed to attach device pasid %d, domain type %d",
pasid, domain->type);
diff --git a/drivers/iommu/iommu-sva.c b/drivers/iommu/iommu-sva.c
index 18a35e798b72..0fb923254062 100644
--- a/drivers/iommu/iommu-sva.c
+++ b/drivers/iommu/iommu-sva.c
@@ -99,7 +99,9 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
/* Search for an existing domain. */
list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) {
- ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid);
+ handle->handle.domain = domain;
+ ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid,
+ &handle->handle);
if (!ret) {
domain->users++;
goto out;
@@ -113,7 +115,9 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
goto out_free_handle;
}
- ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid);
+ handle->handle.domain = domain;
+ ret = iommu_attach_device_pasid(domain, dev, iommu_mm->pasid,
+ &handle->handle);
if (ret)
goto out_free_domain;
domain->users = 1;
@@ -124,7 +128,6 @@ struct iommu_sva *iommu_sva_bind_device(struct device *dev, struct mm_struct *mm
list_add(&handle->handle_item, &mm->iommu_mm->sva_handles);
mutex_unlock(&iommu_sva_lock);
handle->dev = dev;
- handle->domain = domain;
return handle;
out_free_domain:
@@ -147,7 +150,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_bind_device);
*/
void iommu_sva_unbind_device(struct iommu_sva *handle)
{
- struct iommu_domain *domain = handle->domain;
+ struct iommu_domain *domain = handle->handle.domain;
struct iommu_mm_data *iommu_mm = domain->mm->iommu_mm;
struct device *dev = handle->dev;
@@ -170,7 +173,7 @@ EXPORT_SYMBOL_GPL(iommu_sva_unbind_device);
u32 iommu_sva_get_pasid(struct iommu_sva *handle)
{
- struct iommu_domain *domain = handle->domain;
+ struct iommu_domain *domain = handle->handle.domain;
return mm_get_enqcmd_pasid(domain->mm);
}
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 9df7cc75c1bc..efd281ddfa63 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3352,16 +3352,17 @@ static void __iommu_remove_group_pasid(struct iommu_group *group,
* @domain: the iommu domain.
* @dev: the attached device.
* @pasid: the pasid of the device.
+ * @handle: the attach handle.
*
* Return: 0 on success, or an error.
*/
int iommu_attach_device_pasid(struct iommu_domain *domain,
- struct device *dev, ioasid_t pasid)
+ struct device *dev, ioasid_t pasid,
+ struct iommu_attach_handle *handle)
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
struct group_device *device;
- void *curr;
int ret;
if (!domain->ops->set_dev_pasid)
@@ -3382,11 +3383,9 @@ int iommu_attach_device_pasid(struct iommu_domain *domain,
}
}
- curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain, GFP_KERNEL);
- if (curr) {
- ret = xa_err(curr) ? : -EBUSY;
+ ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL);
+ if (ret)
goto out_unlock;
- }
ret = __iommu_set_group_pasid(domain, group, pasid);
if (ret)
@@ -3414,7 +3413,7 @@ void iommu_detach_device_pasid(struct iommu_domain *domain, struct device *dev,
mutex_lock(&group->mutex);
__iommu_remove_group_pasid(group, pasid, domain);
- WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
+ xa_erase(&group->pasid_array, pasid);
mutex_unlock(&group->mutex);
}
EXPORT_SYMBOL_GPL(iommu_detach_device_pasid);
@@ -3439,15 +3438,19 @@ struct iommu_domain *iommu_get_domain_for_dev_pasid(struct device *dev,
{
/* Caller must be a probed driver on dev */
struct iommu_group *group = dev->iommu_group;
- struct iommu_domain *domain;
+ struct iommu_attach_handle *handle;
+ struct iommu_domain *domain = NULL;
if (!group)
return NULL;
xa_lock(&group->pasid_array);
- domain = xa_load(&group->pasid_array, pasid);
+ handle = xa_load(&group->pasid_array, pasid);
+ if (handle)
+ domain = handle->domain;
+
if (type && domain && domain->type != type)
- domain = ERR_PTR(-EBUSY);
+ domain = NULL;
xa_unlock(&group->pasid_array);
return domain;
--
2.34.1
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Monday, May 27, 2024 12:05 PM
>
> @@ -99,7 +99,9 @@ struct iommu_sva *iommu_sva_bind_device(struct device
> *dev, struct mm_struct *mm
>
> /* Search for an existing domain. */
> list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) {
> - ret = iommu_attach_device_pasid(domain, dev, iommu_mm-
> >pasid);
> + handle->handle.domain = domain;
> + ret = iommu_attach_device_pasid(domain, dev, iommu_mm-
> >pasid,
> + &handle->handle);
move the setting of handle.domain into the helper?
> @@ -3382,11 +3383,9 @@ int iommu_attach_device_pasid(struct
> iommu_domain *domain,
> }
> }
>
> - curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain,
> GFP_KERNEL);
> - if (curr) {
> - ret = xa_err(curr) ? : -EBUSY;
> + ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL);
> + if (ret)
> goto out_unlock;
> - }
this leads to a slightly different implication comparing to the old code.
the domain pointer was always tracked in the old code but now the handle
is optional.
if iommu core only needs to know whether a pasid has been attached (as
in this helper), it still works as xa_insert() will mark the entry as reserved
if handle==NULL and next xa_insert() at the same index will fail due to
the entry being reserved.
But if certain path (other than iopf) in the iommu core needs to know
the exact domain pointer then this change breaks it.
Anyway some explanation is welcomed here for why this change is safe.
> @@ -3414,7 +3413,7 @@ void iommu_detach_device_pasid(struct
> iommu_domain *domain, struct device *dev,
>
> mutex_lock(&group->mutex);
> __iommu_remove_group_pasid(group, pasid, domain);
> - WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
> + xa_erase(&group->pasid_array, pasid);
if the entry is valid do we want to keep the WARN_ON() upon handle->domain?
On 6/5/24 4:02 PM, Tian, Kevin wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>> Sent: Monday, May 27, 2024 12:05 PM
>>
>> @@ -99,7 +99,9 @@ struct iommu_sva *iommu_sva_bind_device(struct device
>> *dev, struct mm_struct *mm
>>
>> /* Search for an existing domain. */
>> list_for_each_entry(domain, &mm->iommu_mm->sva_domains, next) {
>> - ret = iommu_attach_device_pasid(domain, dev, iommu_mm-
>>> pasid);
>> + handle->handle.domain = domain;
>> + ret = iommu_attach_device_pasid(domain, dev, iommu_mm-
>>> pasid,
>> + &handle->handle);
>
> move the setting of handle.domain into the helper?
Yes.
>
>> @@ -3382,11 +3383,9 @@ int iommu_attach_device_pasid(struct
>> iommu_domain *domain,
>> }
>> }
>>
>> - curr = xa_cmpxchg(&group->pasid_array, pasid, NULL, domain,
>> GFP_KERNEL);
>> - if (curr) {
>> - ret = xa_err(curr) ? : -EBUSY;
>> + ret = xa_insert(&group->pasid_array, pasid, handle, GFP_KERNEL);
>> + if (ret)
>> goto out_unlock;
>> - }
>
> this leads to a slightly different implication comparing to the old code.
>
> the domain pointer was always tracked in the old code but now the handle
> is optional.
>
> if iommu core only needs to know whether a pasid has been attached (as
> in this helper), it still works as xa_insert() will mark the entry as reserved
> if handle==NULL and next xa_insert() at the same index will fail due to
> the entry being reserved.
>
> But if certain path (other than iopf) in the iommu core needs to know
> the exact domain pointer then this change breaks it.
The iommu core should not fetch the domain pointer in paths other than
attach/detach/replace. There is currently no reference counter for an
iommu domain, hence fetching the domain for other purposes will
potentially lead to a use-after-free issue.
>
> Anyway some explanation is welcomed here for why this change is safe.
>
>> @@ -3414,7 +3413,7 @@ void iommu_detach_device_pasid(struct
>> iommu_domain *domain, struct device *dev,
>>
>> mutex_lock(&group->mutex);
>> __iommu_remove_group_pasid(group, pasid, domain);
>> - WARN_ON(xa_erase(&group->pasid_array, pasid) != domain);
>> + xa_erase(&group->pasid_array, pasid);
>
> if the entry is valid do we want to keep the WARN_ON() upon handle->domain?
The domain pointer has already been passed to the iommu driver in
__iommu_remove_group_pasid(). Therefore, the check for the pointer's
validity should be performed before calling
__iommu_remove_group_pasid(). Hence, in my view, using WARN_ON() around
xa_erase() is not very useful.
Best regards,
baolu
On Thu, Jun 06, 2024 at 01:33:29PM +0800, Baolu Lu wrote: > > But if certain path (other than iopf) in the iommu core needs to know > > the exact domain pointer then this change breaks it. > > The iommu core should not fetch the domain pointer in paths other than > attach/detach/replace. There is currently no reference counter for an > iommu domain, hence fetching the domain for other purposes will > potentially lead to a use-after-free issue. If you are doing this then we should get rid of iommu_get_domain_for_dev_pasid, and always return the handle there. Making it clear that all those flows require handles to work. But is this really OK? What happened to the patch fixing the error unwind in attach_pasid? Doesn't it always need the domain to undo a failed attach? Jason
On 6/12/24 9:10 PM, Jason Gunthorpe wrote: > On Thu, Jun 06, 2024 at 01:33:29PM +0800, Baolu Lu wrote: > >>> But if certain path (other than iopf) in the iommu core needs to know >>> the exact domain pointer then this change breaks it. >> >> The iommu core should not fetch the domain pointer in paths other than >> attach/detach/replace. There is currently no reference counter for an >> iommu domain, hence fetching the domain for other purposes will >> potentially lead to a use-after-free issue. > > If you are doing this then we should get rid of > iommu_get_domain_for_dev_pasid, and always return the handle > there. Making it clear that all those flows require handles to work. I removed iommu_get_domain_for_dev_pasid() in this series. The iopf handling is the only path that requires fetching domain, where we requires attach handle to work. > > But is this really OK? What happened to the patch fixing the error > unwind in attach_pasid? Doesn't it always need the domain to undo a > failed attach? attach_pasid switches from being unassigned to being assigned with a real domain, so the unwind operation simply involves calling iommu_ops->remove_dev_pasid. In the future, probably we will introduce a replace interface for pasid. In that scenario, the caller should explicitly pass both the old and new domains. If the replace operation fails, the interface will revert to the old domain. In my mind, the iommu core should only manage the default domain for kernel DMA. All domains that are allocated by domain_alloc interfaces should be managed by the callers and passed through the attach/detach /replace interfaces. > > Jason > Best regards, baolu
© 2016 - 2026 Red Hat, Inc.