It's somewhat hard to see, but arm64's arch_setup_dma_ops() should only
ever call iommu_setup_dma_ops() after a successful iommu_probe_device(),
which means there should be no harm in achieving the same orer of
operations by running it off the back of iommu_probe_device() itself.
This then puts it in line with the x86 and s390 .probe_finalize bodges,
letting us pull it all into the main flow properly. As a bonus this lets
us fold in and de-scope the PCI workaround setup as well.
At this point we can also then pull the call inside the group mutex, and
avoid having to think about whether iommu_group_store_type() could
theroetically race and free the domain where iommu_setup_dma_ops() may
currently run just *before* iommu_device_use_default_domain() claims it.
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
---
v2: Shuffle around to make sure the iommu_group_do_probe_finalize() cases
are covered as well, with bonus side-effects as above.
---
arch/arm64/mm/dma-mapping.c | 2 --
drivers/iommu/amd/iommu.c | 8 --------
drivers/iommu/dma-iommu.c | 18 ++++++------------
drivers/iommu/dma-iommu.h | 14 ++++++--------
drivers/iommu/intel/iommu.c | 7 -------
drivers/iommu/iommu.c | 6 +++---
drivers/iommu/s390-iommu.c | 6 ------
drivers/iommu/virtio-iommu.c | 10 ----------
include/linux/iommu.h | 7 -------
9 files changed, 15 insertions(+), 63 deletions(-)
diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
index 3cb101e8cb29..96ff791199e8 100644
--- a/arch/arm64/mm/dma-mapping.c
+++ b/arch/arm64/mm/dma-mapping.c
@@ -58,8 +58,6 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
ARCH_DMA_MINALIGN, cls);
dev->dma_coherent = coherent;
- if (iommu)
- iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1);
xen_setup_dma_ops(dev);
}
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index fcc987f5d4ed..9418808009ba 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -1985,13 +1985,6 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
return iommu_dev;
}
-static void amd_iommu_probe_finalize(struct device *dev)
-{
- /* Domains are initialized for this device - have a look what we ended up with */
- set_dma_ops(dev, NULL);
- iommu_setup_dma_ops(dev, 0, U64_MAX);
-}
-
static void amd_iommu_release_device(struct device *dev)
{
struct amd_iommu *iommu;
@@ -2646,7 +2639,6 @@ const struct iommu_ops amd_iommu_ops = {
.domain_alloc_user = amd_iommu_domain_alloc_user,
.probe_device = amd_iommu_probe_device,
.release_device = amd_iommu_release_device,
- .probe_finalize = amd_iommu_probe_finalize,
.device_group = amd_iommu_device_group,
.get_resv_regions = amd_iommu_get_resv_regions,
.is_attach_deferred = amd_iommu_is_attach_deferred,
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 27a167f4cd3e..d808c8dcf5cb 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -1724,25 +1724,20 @@ static const struct dma_map_ops iommu_dma_ops = {
.opt_mapping_size = iommu_dma_opt_mapping_size,
};
-/*
- * The IOMMU core code allocates the default DMA domain, which the underlying
- * IOMMU driver needs to support via the dma-iommu layer.
- */
-void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
+void iommu_setup_dma_ops(struct device *dev)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
- if (!domain)
- goto out_err;
+ if (dev_is_pci(dev))
+ dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;
- /*
- * The IOMMU core code allocates the default DMA domain, which the
- * underlying IOMMU driver needs to support via the dma-iommu layer.
- */
if (iommu_is_dma_domain(domain)) {
if (iommu_dma_init_domain(domain, dev))
goto out_err;
dev->dma_ops = &iommu_dma_ops;
+ } else if (dev->dma_ops == &iommu_dma_ops) {
+ /* Clean up if we've switched *from* a DMA domain */
+ dev->dma_ops = NULL;
}
return;
@@ -1750,7 +1745,6 @@ void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n",
dev_name(dev));
}
-EXPORT_SYMBOL_GPL(iommu_setup_dma_ops);
static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev,
phys_addr_t msi_addr, struct iommu_domain *domain)
diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h
index c829f1f82a99..c12d63457c76 100644
--- a/drivers/iommu/dma-iommu.h
+++ b/drivers/iommu/dma-iommu.h
@@ -9,6 +9,8 @@
#ifdef CONFIG_IOMMU_DMA
+void iommu_setup_dma_ops(struct device *dev);
+
int iommu_get_dma_cookie(struct iommu_domain *domain);
void iommu_put_dma_cookie(struct iommu_domain *domain);
@@ -17,13 +19,13 @@ int iommu_dma_init_fq(struct iommu_domain *domain);
void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list);
extern bool iommu_dma_forcedac;
-static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev)
-{
- dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;
-}
#else /* CONFIG_IOMMU_DMA */
+static inline void iommu_setup_dma_ops(struct device *dev)
+{
+}
+
static inline int iommu_dma_init_fq(struct iommu_domain *domain)
{
return -EINVAL;
@@ -42,9 +44,5 @@ static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_he
{
}
-static inline void iommu_dma_set_pci_32bit_workaround(struct device *dev)
-{
-}
-
#endif /* CONFIG_IOMMU_DMA */
#endif /* __DMA_IOMMU_H */
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 3531b956556c..f7347ed41e89 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4480,12 +4480,6 @@ static void intel_iommu_release_device(struct device *dev)
set_dma_ops(dev, NULL);
}
-static void intel_iommu_probe_finalize(struct device *dev)
-{
- set_dma_ops(dev, NULL);
- iommu_setup_dma_ops(dev, 0, U64_MAX);
-}
-
static void intel_iommu_get_resv_regions(struct device *device,
struct list_head *head)
{
@@ -4937,7 +4931,6 @@ const struct iommu_ops intel_iommu_ops = {
.domain_alloc = intel_iommu_domain_alloc,
.domain_alloc_user = intel_iommu_domain_alloc_user,
.probe_device = intel_iommu_probe_device,
- .probe_finalize = intel_iommu_probe_finalize,
.release_device = intel_iommu_release_device,
.get_resv_regions = intel_iommu_get_resv_regions,
.device_group = intel_iommu_device_group,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 824989874dee..43f630d0530e 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -560,10 +560,10 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
if (list_empty(&group->entry))
list_add_tail(&group->entry, group_list);
}
- mutex_unlock(&group->mutex);
- if (dev_is_pci(dev))
- iommu_dma_set_pci_32bit_workaround(dev);
+ iommu_setup_dma_ops(dev);
+
+ mutex_unlock(&group->mutex);
return 0;
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 9a5196f523de..d8eaa7ea380b 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -695,11 +695,6 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
return size;
}
-static void s390_iommu_probe_finalize(struct device *dev)
-{
- iommu_setup_dma_ops(dev, 0, U64_MAX);
-}
-
struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
{
if (!zdev || !zdev->s390_domain)
@@ -785,7 +780,6 @@ static const struct iommu_ops s390_iommu_ops = {
.capable = s390_iommu_capable,
.domain_alloc_paging = s390_domain_alloc_paging,
.probe_device = s390_iommu_probe_device,
- .probe_finalize = s390_iommu_probe_finalize,
.release_device = s390_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = SZ_4K,
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 9bcffdde6175..5a558456946b 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -998,15 +998,6 @@ static struct iommu_device *viommu_probe_device(struct device *dev)
return ERR_PTR(ret);
}
-static void viommu_probe_finalize(struct device *dev)
-{
-#ifndef CONFIG_ARCH_HAS_SETUP_DMA_OPS
- /* First clear the DMA ops in case we're switching from a DMA domain */
- set_dma_ops(dev, NULL);
- iommu_setup_dma_ops(dev, 0, U64_MAX);
-#endif
-}
-
static void viommu_release_device(struct device *dev)
{
struct viommu_endpoint *vdev = dev_iommu_priv_get(dev);
@@ -1043,7 +1034,6 @@ static struct iommu_ops viommu_ops = {
.capable = viommu_capable,
.domain_alloc = viommu_domain_alloc,
.probe_device = viommu_probe_device,
- .probe_finalize = viommu_probe_finalize,
.release_device = viommu_release_device,
.device_group = viommu_device_group,
.get_resv_regions = viommu_get_resv_regions,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 20ed3a4fc5e0..3a3bf8afb8ca 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1283,9 +1283,6 @@ static inline void iommu_debugfs_setup(void) {}
#ifdef CONFIG_IOMMU_DMA
#include <linux/msi.h>
-/* Setup call for arch DMA mapping code */
-void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit);
-
int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base);
int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr);
@@ -1296,10 +1293,6 @@ void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg);
struct msi_desc;
struct msi_msg;
-static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
-{
-}
-
static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base)
{
return -ENODEV;
--
2.39.2.101.g768bb238c484.dirty
On Wed, Dec 13, 2023 at 05:17:59PM +0000, Robin Murphy wrote:
> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
> index 27a167f4cd3e..d808c8dcf5cb 100644
> --- a/drivers/iommu/dma-iommu.c
> +++ b/drivers/iommu/dma-iommu.c
> @@ -1724,25 +1724,20 @@ static const struct dma_map_ops iommu_dma_ops = {
> .opt_mapping_size = iommu_dma_opt_mapping_size,
> };
>
> -/*
> - * The IOMMU core code allocates the default DMA domain, which the underlying
> - * IOMMU driver needs to support via the dma-iommu layer.
> - */
> -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
> +void iommu_setup_dma_ops(struct device *dev)
> {
> struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
>
> - if (!domain)
> - goto out_err;
> + if (dev_is_pci(dev))
> + dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;
>
> - /*
> - * The IOMMU core code allocates the default DMA domain, which the
> - * underlying IOMMU driver needs to support via the dma-iommu layer.
> - */
> if (iommu_is_dma_domain(domain)) {
...
> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
> index 824989874dee..43f630d0530e 100644
> --- a/drivers/iommu/iommu.c
> +++ b/drivers/iommu/iommu.c
> @@ -560,10 +560,10 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
> if (list_empty(&group->entry))
> list_add_tail(&group->entry, group_list);
> }
> - mutex_unlock(&group->mutex);
>
> - if (dev_is_pci(dev))
> - iommu_dma_set_pci_32bit_workaround(dev);
> + iommu_setup_dma_ops(dev);
With Intel VT-d (QEMU emulation) I get a crash in iommu_setup_dma_ops()
because at this point group->domain and group->default_domain are still
NULL, group_list is non-NULL.
No problem with x86 virtio-iommu, it goes through
iommu_setup_default_domain() earlier in the function. Arm virtio-iommu is
fine too.
Thanks,
Jean
On 2023-12-14 4:51 pm, Jean-Philippe Brucker wrote:
> On Wed, Dec 13, 2023 at 05:17:59PM +0000, Robin Murphy wrote:
>> diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
>> index 27a167f4cd3e..d808c8dcf5cb 100644
>> --- a/drivers/iommu/dma-iommu.c
>> +++ b/drivers/iommu/dma-iommu.c
>> @@ -1724,25 +1724,20 @@ static const struct dma_map_ops iommu_dma_ops = {
>> .opt_mapping_size = iommu_dma_opt_mapping_size,
>> };
>>
>> -/*
>> - * The IOMMU core code allocates the default DMA domain, which the underlying
>> - * IOMMU driver needs to support via the dma-iommu layer.
>> - */
>> -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit)
>> +void iommu_setup_dma_ops(struct device *dev)
>> {
>> struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
>>
>> - if (!domain)
>> - goto out_err;
>> + if (dev_is_pci(dev))
>> + dev->iommu->pci_32bit_workaround = !iommu_dma_forcedac;
>>
>> - /*
>> - * The IOMMU core code allocates the default DMA domain, which the
>> - * underlying IOMMU driver needs to support via the dma-iommu layer.
>> - */
>> if (iommu_is_dma_domain(domain)) {
>
> ...
>
>> diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
>> index 824989874dee..43f630d0530e 100644
>> --- a/drivers/iommu/iommu.c
>> +++ b/drivers/iommu/iommu.c
>> @@ -560,10 +560,10 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
>> if (list_empty(&group->entry))
>> list_add_tail(&group->entry, group_list);
>> }
>> - mutex_unlock(&group->mutex);
>>
>> - if (dev_is_pci(dev))
>> - iommu_dma_set_pci_32bit_workaround(dev);
>> + iommu_setup_dma_ops(dev);
>
> With Intel VT-d (QEMU emulation) I get a crash in iommu_setup_dma_ops()
> because at this point group->domain and group->default_domain are still
> NULL, group_list is non-NULL.
Ugh, clearly I'd manage to confuse myself, since what I wrote in the
changelog isn't even right...
Taking yet another look, there's not actually one single place we can do
this right now which will work in a manageable way for all cases. With 2
or 3 more levels of mess unpicked it's going to clean up much further
(it's also becoming clear that iommu-dma wants better separation of its
own per-device and per-domain bits), but for the immediate task in this
series of finally getting out of arch code, I guess that continuing to
echo the current probe_finalize flows is going to be safest. Something
like the diff below (but I'll have a further think about it with a fresh
head tomorrow).
Thanks,
Robin.
----->8-----
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 8972b7f22a9a..ba4cd5251205 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -562,7 +562,8 @@ static int __iommu_probe_device(struct device *dev, struct list_head *group_list
list_add_tail(&group->entry, group_list);
}
- iommu_setup_dma_ops(dev);
+ if (group->default_domain)
+ iommu_setup_dma_ops(dev);
mutex_unlock(&group->mutex);
@@ -1992,6 +1993,8 @@ int bus_iommu_probe(const struct bus_type *bus)
mutex_unlock(&group->mutex);
return ret;
}
+ for_each_group_device(group, gdev)
+ iommu_setup_dma_ops(gdev->dev);
mutex_unlock(&group->mutex);
/*
@@ -3217,18 +3220,9 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
if (ret)
goto out_unlock;
- /*
- * Release the mutex here because ops->probe_finalize() call-back of
- * some vendor IOMMU drivers calls arm_iommu_attach_device() which
- * in-turn might call back into IOMMU core code, where it tries to take
- * group->mutex, resulting in a deadlock.
- */
- mutex_unlock(&group->mutex);
-
/* Make sure dma_ops is appropriatley set */
for_each_group_device(group, gdev)
- iommu_group_do_probe_finalize(gdev->dev);
- return count;
+ iommu_setup_dma_ops(gdev->dev);
out_unlock:
mutex_unlock(&group->mutex);
On Thu, Dec 14, 2023 at 06:22:49PM +0000, Robin Murphy wrote: > Taking yet another look, there's not actually one single place we can do > this right now which will work in a manageable way for all cases. The dma ops should be set after changing the translation and there is only one place that attachs the domain? What prevents putting it there? > @@ -3217,18 +3220,9 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, > if (ret) > goto out_unlock; > - /* > - * Release the mutex here because ops->probe_finalize() call-back of > - * some vendor IOMMU drivers calls arm_iommu_attach_device() which > - * in-turn might call back into IOMMU core code, where it tries to take > - * group->mutex, resulting in a deadlock. > - */ > - mutex_unlock(&group->mutex); > - > /* Make sure dma_ops is appropriatley set */ > for_each_group_device(group, gdev) > - iommu_group_do_probe_finalize(gdev->dev); > - return count; If we are turning this into something that only works for the ARM DMA then the remaining caller should be guarded by an IS_ENABLED Jason
© 2016 - 2025 Red Hat, Inc.