[PATCH v2] iommu/s390: Implement blocking domain

Matthew Rosato posted 1 patch 1 year, 5 months ago
There is a newer version of this series
drivers/iommu/s390-iommu.c | 64 ++++++++++++++++++++++++++------------
1 file changed, 44 insertions(+), 20 deletions(-)
[PATCH v2] iommu/s390: Implement blocking domain
Posted by Matthew Rosato 1 year, 5 months ago
This fixes a crash when surprise hot-unplugging a PCI device. This crash
happens because during hot-unplug __iommu_group_set_domain_nofail()
attaching the default domain fails when the platform no longer
recognizes the device as it has already been removed and we end up with
a NULL domain pointer and UAF. This is exactly the case referred to in
the second comment in __iommu_device_set_domain() and just as stated
there if we can instead attach the blocking domain the UAF is prevented
as this can handle the already removed device. Implement the blocking
domain to use this handling.  With this change, the crash is fixed but
we still hit a warning attempting to change DMA ownership on a blocked
device.

Fixes: c76c067e488c ("s390/pci: Use dma-iommu layer")
Co-developed-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
Changes for v2:
- Added co-author tag from Niklas + my SoB
- Removed changes to drivers/iommu/iommu.c
- Revert back to -EIO for failed attach in s390-iommu
- Set blocking domain during probe_device / remove s390_domain check during
  blocking attach
- Remove s390_iommu_release_device
- Update commit message to reflect changes
---
 drivers/iommu/s390-iommu.c | 64 ++++++++++++++++++++++++++------------
 1 file changed, 44 insertions(+), 20 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index d8eaa7ea380b..11663a920cbb 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -33,6 +33,8 @@ struct s390_domain {
 	struct rcu_head		rcu;
 };
 
+static struct s390_domain s390_blocking_domain;
+
 static inline unsigned int calc_rtx(dma_addr_t ptr)
 {
 	return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
@@ -376,12 +378,21 @@ static void s390_iommu_detach_device(struct iommu_domain *domain,
 	struct zpci_dev *zdev = to_zpci_dev(dev);
 	unsigned long flags;
 
+	/*
+	 * The static blocking domain doesn't need to track devices nor
+	 * does it have an IOAT registered. As there is no harm
+	 * in keeping zdev->s390_domain set to blocking until
+	 * it is overwritten detach is a no-op.
+	 */
+	if (s390_domain->domain.type == IOMMU_DOMAIN_BLOCKED)
+		return;
+
 	spin_lock_irqsave(&s390_domain->list_lock, flags);
 	list_del_rcu(&zdev->iommu_list);
 	spin_unlock_irqrestore(&s390_domain->list_lock, flags);
 
 	zpci_unregister_ioat(zdev, 0);
-	zdev->s390_domain = NULL;
+	zdev->s390_domain = &s390_blocking_domain;
 	zdev->dma_table = NULL;
 }
 
@@ -403,16 +414,14 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
 
 	if (zdev->s390_domain)
 		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
-
-	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
-				virt_to_phys(s390_domain->dma_table), &status);
 	/*
-	 * If the device is undergoing error recovery the reset code
-	 * will re-establish the new domain.
+	 * Detach set the blocking domain. If we fail now DMA remains blocked
+	 * and the blocking domain attached.
 	 */
+	cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+				virt_to_phys(s390_domain->dma_table), &status);
 	if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
 		return -EIO;
-
 	zdev->dma_table = s390_domain->dma_table;
 	zdev->s390_domain = s390_domain;
 
@@ -466,19 +475,10 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
 	if (zdev->tlb_refresh)
 		dev->iommu->shadow_on_flush = 1;
 
-	return &zdev->iommu_dev;
-}
-
-static void s390_iommu_release_device(struct device *dev)
-{
-	struct zpci_dev *zdev = to_zpci_dev(dev);
+	/* Start with DMA blocked */
+	zdev->s390_domain = &s390_blocking_domain;
 
-	/*
-	 * release_device is expected to detach any domain currently attached
-	 * to the device, but keep it attached to other devices in the group.
-	 */
-	if (zdev)
-		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
+	return &zdev->iommu_dev;
 }
 
 static int zpci_refresh_all(struct zpci_dev *zdev)
@@ -702,6 +702,29 @@ struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
 	return &zdev->s390_domain->ctrs;
 }
 
+static int blocking_domain_attach_device(struct iommu_domain *domain,
+					 struct device *dev)
+{
+	struct s390_domain *s390_domain = to_s390_domain(domain);
+	struct zpci_dev *zdev = to_zpci_dev(dev);
+	unsigned long flags;
+
+	if (!zdev)
+		return 0;
+
+	/* Detach sets the blocking domain */
+	s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
+	return 0;
+}
+
+static struct s390_domain s390_blocking_domain = {
+	.domain = {
+		.type = IOMMU_DOMAIN_BLOCKED,
+		.ops = &(const struct iommu_domain_ops) {
+			.attach_dev	= blocking_domain_attach_device,
+	}}
+};
+
 int zpci_init_iommu(struct zpci_dev *zdev)
 {
 	u64 aperture_size;
@@ -777,10 +800,11 @@ static int __init s390_iommu_init(void)
 subsys_initcall(s390_iommu_init);
 
 static const struct iommu_ops s390_iommu_ops = {
+	.blocked_domain		= &s390_blocking_domain.domain,
+	.release_domain		= &s390_blocking_domain.domain,
 	.capable = s390_iommu_capable,
 	.domain_alloc_paging = s390_domain_alloc_paging,
 	.probe_device = s390_iommu_probe_device,
-	.release_device = s390_iommu_release_device,
 	.device_group = generic_device_group,
 	.pgsize_bitmap = SZ_4K,
 	.get_resv_regions = s390_iommu_get_resv_regions,
-- 
2.45.2
Re: [PATCH v2] iommu/s390: Implement blocking domain
Posted by Jason Gunthorpe 1 year, 5 months ago
On Tue, Aug 13, 2024 at 03:28:03PM -0400, Matthew Rosato wrote:
> This fixes a crash when surprise hot-unplugging a PCI device. This crash
> happens because during hot-unplug __iommu_group_set_domain_nofail()
> attaching the default domain fails when the platform no longer
> recognizes the device as it has already been removed and we end up with
> a NULL domain pointer and UAF. This is exactly the case referred to in
> the second comment in __iommu_device_set_domain() and just as stated
> there if we can instead attach the blocking domain the UAF is prevented
> as this can handle the already removed device. Implement the blocking
> domain to use this handling.  With this change, the crash is fixed but
> we still hit a warning attempting to change DMA ownership on a blocked
> device.
> 
> Fixes: c76c067e488c ("s390/pci: Use dma-iommu layer")
> Co-developed-by: Niklas Schnelle <schnelle@linux.ibm.com>
> Signed-off-by: Niklas Schnelle <schnelle@linux.ibm.com>
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
> Changes for v2:
> - Added co-author tag from Niklas + my SoB
> - Removed changes to drivers/iommu/iommu.c
> - Revert back to -EIO for failed attach in s390-iommu
> - Set blocking domain during probe_device / remove s390_domain check during
>   blocking attach
> - Remove s390_iommu_release_device
> - Update commit message to reflect changes
> ---
>  drivers/iommu/s390-iommu.c | 64 ++++++++++++++++++++++++++------------
>  1 file changed, 44 insertions(+), 20 deletions(-)

This is probably OK as is, but there are a few things that don't quite
match the pattern still here

The blocking domain should be the iommu_domain type since it is
global and shared. None of the additional driver members should ever
be touched as that would maybe become dangerous.

> +static struct s390_domain s390_blocking_domain = {

Ideally would be struct iommu_domain s390_blocking_domain

That in turn means going around and looking carefully at
zdev->s390_domain. All uses can probably be removed except for this:

struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
{
	if (!zdev || !zdev->s390_domain)
		return NULL;
	return &zdev->s390_domain->ctrs;
}

Which doesn't look good for a blocking domain anyhow. Also the above
looks racy, nothing prevents s390_domain from being freed if it is
read outside an iommu op.

The checks for null zdev also don't make sense:

static int blocking_domain_attach_device(struct iommu_domain *domain,
					 struct device *dev)
{
	struct s390_domain *s390_domain = to_s390_domain(domain);
	struct zpci_dev *zdev = to_zpci_dev(dev);
	unsigned long flags;

	if (!zdev)
		return 0;

The core guarentees these functions are never called unless probe
succeeds and probe won't succeed if zdev is NULL.

And it would be good to rename s390_iommu_detach_device() to
blocking_domain_attach_device(), then the obsolete "detach" naming is
gone and this driver just uses a blocked before attach pattern.

> @@ -403,16 +414,14 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
>  
>  	if (zdev->s390_domain)
>  		s390_iommu_detach_device(&zdev->s390_domain->domain, dev);

s390_domain is never NULL now, the test can go away

> +static int blocking_domain_attach_device(struct iommu_domain *domain,
> +					 struct device *dev)
> +{
> +	struct s390_domain *s390_domain = to_s390_domain(domain);
> +	struct zpci_dev *zdev = to_zpci_dev(dev);
> +	unsigned long flags;

flags is never used? Compiler didn't warn?

Jason