[PATCH v5 5/5] iommu/s390: allow larger region tables

Matthew Rosato posted 5 patches 8 months, 1 week ago
[PATCH v5 5/5] iommu/s390: allow larger region tables
Posted by Matthew Rosato 8 months, 1 week ago
Extend the aperture calculation to consider sizes beyond the maximum
size of a region third table.  Attempt to always use the smallest
table size possible to avoid unnecessary extra steps during translation.
Update reserved region calculations to use the appropriate table size.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 arch/s390/include/asm/pci_dma.h |  1 +
 drivers/iommu/s390-iommu.c      | 70 ++++++++++++++++++++++++---------
 2 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 8d8962e4fd58..d12e17201661 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -25,6 +25,7 @@ enum zpci_ioat_dtype {
 #define ZPCI_KEY			(PAGE_DEFAULT_KEY << 5)
 
 #define ZPCI_TABLE_SIZE_RT	(1UL << 42)
+#define ZPCI_TABLE_SIZE_RS	(1UL << 53)
 
 #define ZPCI_IOTA_STO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
 #define ZPCI_IOTA_RTTO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 46f45b136993..433b59f43530 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -511,9 +511,25 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
 	}
 }
 
+static inline u64 max_tbl_size(struct s390_domain *domain)
+{
+	switch (domain->origin_type) {
+	case ZPCI_TABLE_TYPE_RTX:
+		return ZPCI_TABLE_SIZE_RT - 1;
+	case ZPCI_TABLE_TYPE_RSX:
+		return ZPCI_TABLE_SIZE_RS - 1;
+	case ZPCI_TABLE_TYPE_RFX:
+		return U64_MAX;
+	default:
+		return 0;
+	}
+}
+
 static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
 {
+	struct zpci_dev *zdev = to_zpci_dev(dev);
 	struct s390_domain *s390_domain;
+	u64 aperture_size;
 
 	s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
 	if (!s390_domain)
@@ -524,10 +540,26 @@ static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
 		kfree(s390_domain);
 		return NULL;
 	}
+
+	aperture_size = min(s390_iommu_aperture,
+			    zdev->end_dma - zdev->start_dma + 1);
+	if (aperture_size <= (ZPCI_TABLE_SIZE_RT - zdev->start_dma)) {
+		s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX;
+	} else if (aperture_size <= (ZPCI_TABLE_SIZE_RS - zdev->start_dma) &&
+		  (zdev->dtsm & ZPCI_IOTA_DT_RS)) {
+		s390_domain->origin_type = ZPCI_TABLE_TYPE_RSX;
+	} else if (zdev->dtsm & ZPCI_IOTA_DT_RF) {
+		s390_domain->origin_type = ZPCI_TABLE_TYPE_RFX;
+	} else {
+		/* Assume RTX available */
+		s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX;
+		aperture_size = ZPCI_TABLE_SIZE_RT - zdev->start_dma;
+	}
+	zdev->end_dma = zdev->start_dma + aperture_size - 1;
+
 	s390_domain->domain.geometry.force_aperture = true;
 	s390_domain->domain.geometry.aperture_start = 0;
-	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
-	s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX;
+	s390_domain->domain.geometry.aperture_end = max_tbl_size(s390_domain);
 
 	spin_lock_init(&s390_domain->list_lock);
 	INIT_LIST_HEAD_RCU(&s390_domain->devices);
@@ -680,6 +712,8 @@ static void s390_iommu_get_resv_regions(struct device *dev,
 {
 	struct zpci_dev *zdev = to_zpci_dev(dev);
 	struct iommu_resv_region *region;
+	u64 max_size, end_resv;
+	unsigned long flags;
 
 	if (zdev->start_dma) {
 		region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
@@ -689,10 +723,21 @@ static void s390_iommu_get_resv_regions(struct device *dev,
 		list_add_tail(&region->list, list);
 	}
 
-	if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
-		region = iommu_alloc_resv_region(zdev->end_dma + 1,
-						 ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
-						 0, IOMMU_RESV_RESERVED, GFP_KERNEL);
+	spin_lock_irqsave(&zdev->dom_lock, flags);
+	if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED ||
+	    zdev->s390_domain->type == IOMMU_DOMAIN_IDENTITY) {
+		spin_unlock_irqrestore(&zdev->dom_lock, flags);
+		return;
+	}
+
+	max_size = max_tbl_size(to_s390_domain(zdev->s390_domain));
+	spin_unlock_irqrestore(&zdev->dom_lock, flags);
+
+	if (zdev->end_dma < max_size) {
+		end_resv = max_size - zdev->end_dma;
+		region = iommu_alloc_resv_region(zdev->end_dma + 1, end_resv,
+						 0, IOMMU_RESV_RESERVED,
+						 GFP_KERNEL);
 		if (!region)
 			return;
 		list_add_tail(&region->list, list);
@@ -708,13 +753,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
 
 	zdev = to_zpci_dev(dev);
 
-	if (zdev->start_dma > zdev->end_dma ||
-	    zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
+	if (zdev->start_dma > zdev->end_dma)
 		return ERR_PTR(-EINVAL);
 
-	if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
-		zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
-
 	if (zdev->tlb_refresh)
 		dev->iommu->shadow_on_flush = 1;
 
@@ -999,7 +1040,6 @@ struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
 
 int zpci_init_iommu(struct zpci_dev *zdev)
 {
-	u64 aperture_size;
 	int rc = 0;
 
 	rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
@@ -1017,12 +1057,6 @@ int zpci_init_iommu(struct zpci_dev *zdev)
 	if (rc)
 		goto out_sysfs;
 
-	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
-	aperture_size = min3(s390_iommu_aperture,
-			     ZPCI_TABLE_SIZE_RT - zdev->start_dma,
-			     zdev->end_dma - zdev->start_dma + 1);
-	zdev->end_dma = zdev->start_dma + aperture_size - 1;
-
 	return 0;
 
 out_sysfs:
-- 
2.49.0
Re: [PATCH v5 5/5] iommu/s390: allow larger region tables
Posted by Niklas Schnelle 8 months ago
On Fri, 2025-04-11 at 16:24 -0400, Matthew Rosato wrote:
> Extend the aperture calculation to consider sizes beyond the maximum
> size of a region third table.  Attempt to always use the smallest
> table size possible to avoid unnecessary extra steps during translation.
> Update reserved region calculations to use the appropriate table size.
> 
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
>  arch/s390/include/asm/pci_dma.h |  1 +
>  drivers/iommu/s390-iommu.c      | 70 ++++++++++++++++++++++++---------
>  2 files changed, 53 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
> index 8d8962e4fd58..d12e17201661 100644
> --- a/arch/s390/include/asm/pci_dma.h
> +++ b/arch/s390/include/asm/pci_dma.h
> @@ -25,6 +25,7 @@ enum zpci_ioat_dtype {
>  #define ZPCI_KEY			(PAGE_DEFAULT_KEY << 5)
>  
>  #define ZPCI_TABLE_SIZE_RT	(1UL << 42)
> +#define ZPCI_TABLE_SIZE_RS	(1UL << 53)
>  
>  #define ZPCI_IOTA_STO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_ST)
>  #define ZPCI_IOTA_RTTO_FLAG	(ZPCI_IOTA_IOT_ENABLED | ZPCI_KEY | ZPCI_IOTA_DT_RT)
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index 46f45b136993..433b59f43530 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -511,9 +511,25 @@ static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
>  	}
>  }
>  
> +static inline u64 max_tbl_size(struct s390_domain *domain)
> +{
> +	switch (domain->origin_type) {
> +	case ZPCI_TABLE_TYPE_RTX:
> +		return ZPCI_TABLE_SIZE_RT - 1;
> +	case ZPCI_TABLE_TYPE_RSX:
> +		return ZPCI_TABLE_SIZE_RS - 1;
> +	case ZPCI_TABLE_TYPE_RFX:
> +		return U64_MAX;
> +	default:
> +		return 0;
> +	}
> +}
> +
>  static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
>  {
> +	struct zpci_dev *zdev = to_zpci_dev(dev);
>  	struct s390_domain *s390_domain;
> +	u64 aperture_size;
>  
>  	s390_domain = kzalloc(sizeof(*s390_domain), GFP_KERNEL);
>  	if (!s390_domain)
> @@ -524,10 +540,26 @@ static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
>  		kfree(s390_domain);
>  		return NULL;
>  	}
> +
> +	aperture_size = min(s390_iommu_aperture,
> +			    zdev->end_dma - zdev->start_dma + 1);
> +	if (aperture_size <= (ZPCI_TABLE_SIZE_RT - zdev->start_dma)) {
> +		s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX;
> +	} else if (aperture_size <= (ZPCI_TABLE_SIZE_RS - zdev->start_dma) &&
> +		  (zdev->dtsm & ZPCI_IOTA_DT_RS)) {
> +		s390_domain->origin_type = ZPCI_TABLE_TYPE_RSX;
> +	} else if (zdev->dtsm & ZPCI_IOTA_DT_RF) {
> +		s390_domain->origin_type = ZPCI_TABLE_TYPE_RFX;
> +	} else {
> +		/* Assume RTX available */
> +		s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX;
> +		aperture_size = ZPCI_TABLE_SIZE_RT - zdev->start_dma;
> +	}
> +	zdev->end_dma = zdev->start_dma + aperture_size - 1;
> +
>  	s390_domain->domain.geometry.force_aperture = true;
>  	s390_domain->domain.geometry.aperture_start = 0;
> -	s390_domain->domain.geometry.aperture_end = ZPCI_TABLE_SIZE_RT - 1;
> -	s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX;
> +	s390_domain->domain.geometry.aperture_end = max_tbl_size(s390_domain);
>  
>  	spin_lock_init(&s390_domain->list_lock);
>  	INIT_LIST_HEAD_RCU(&s390_domain->devices);
> @@ -680,6 +712,8 @@ static void s390_iommu_get_resv_regions(struct device *dev,
>  {
>  	struct zpci_dev *zdev = to_zpci_dev(dev);
>  	struct iommu_resv_region *region;
> +	u64 max_size, end_resv;
> +	unsigned long flags;
>  
>  	if (zdev->start_dma) {
>  		region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
> @@ -689,10 +723,21 @@ static void s390_iommu_get_resv_regions(struct device *dev,
>  		list_add_tail(&region->list, list);
>  	}
>  
> -	if (zdev->end_dma < ZPCI_TABLE_SIZE_RT - 1) {
> -		region = iommu_alloc_resv_region(zdev->end_dma + 1,
> -						 ZPCI_TABLE_SIZE_RT - zdev->end_dma - 1,
> -						 0, IOMMU_RESV_RESERVED, GFP_KERNEL);
> +	spin_lock_irqsave(&zdev->dom_lock, flags);
> +	if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED ||
> +	    zdev->s390_domain->type == IOMMU_DOMAIN_IDENTITY) {
> +		spin_unlock_irqrestore(&zdev->dom_lock, flags);
> +		return;
> +	}
> +
> +	max_size = max_tbl_size(to_s390_domain(zdev->s390_domain));
> +	spin_unlock_irqrestore(&zdev->dom_lock, flags);
> +
> +	if (zdev->end_dma < max_size) {
> +		end_resv = max_size - zdev->end_dma;
> +		region = iommu_alloc_resv_region(zdev->end_dma + 1, end_resv,
> +						 0, IOMMU_RESV_RESERVED,
> +						 GFP_KERNEL);
>  		if (!region)
>  			return;
>  		list_add_tail(&region->list, list);
> @@ -708,13 +753,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
>  
>  	zdev = to_zpci_dev(dev);
>  
> -	if (zdev->start_dma > zdev->end_dma ||
> -	    zdev->start_dma > ZPCI_TABLE_SIZE_RT - 1)
> +	if (zdev->start_dma > zdev->end_dma)
>  		return ERR_PTR(-EINVAL);
>  
> -	if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
> -		zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
> -
>  	if (zdev->tlb_refresh)
>  		dev->iommu->shadow_on_flush = 1;
>  
> @@ -999,7 +1040,6 @@ struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
>  
>  int zpci_init_iommu(struct zpci_dev *zdev)
>  {
> -	u64 aperture_size;
>  	int rc = 0;
>  
>  	rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
> @@ -1017,12 +1057,6 @@ int zpci_init_iommu(struct zpci_dev *zdev)
>  	if (rc)
>  		goto out_sysfs;
>  
> -	zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
> -	aperture_size = min3(s390_iommu_aperture,
> -			     ZPCI_TABLE_SIZE_RT - zdev->start_dma,
> -			     zdev->end_dma - zdev->start_dma + 1);
> -	zdev->end_dma = zdev->start_dma + aperture_size - 1;
> -
>  	return 0;
>  
>  out_sysfs:

Looks good, thanks for the great work!

Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>