[PATCH v5 4/5] iommu/s390: support map/unmap for additional table regions

Matthew Rosato posted 5 patches 8 months, 1 week ago
[PATCH v5 4/5] iommu/s390: support map/unmap for additional table regions
Posted by Matthew Rosato 8 months, 1 week ago
Map and unmap ops use the shared dma_walk_cpu_trans routine, update
this using the origin_type of the dma_table to determine how many
table levels must be walked.

Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
---
 drivers/iommu/s390-iommu.c | 127 ++++++++++++++++++++++++++++++++++---
 1 file changed, 119 insertions(+), 8 deletions(-)

diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 338a7381e918..46f45b136993 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -67,6 +67,20 @@ static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
 	*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
 }
 
+static inline void set_rf_rso(unsigned long *entry, phys_addr_t rso)
+{
+	*entry &= ZPCI_RTE_FLAG_MASK;
+	*entry |= (rso & ZPCI_RTE_ADDR_MASK);
+	*entry |= ZPCI_TABLE_TYPE_RFX;
+}
+
+static inline void set_rs_rto(unsigned long *entry, phys_addr_t rto)
+{
+	*entry &= ZPCI_RTE_FLAG_MASK;
+	*entry |= (rto & ZPCI_RTE_ADDR_MASK);
+	*entry |= ZPCI_TABLE_TYPE_RSX;
+}
+
 static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
 {
 	*entry &= ZPCI_RTE_FLAG_MASK;
@@ -81,6 +95,22 @@ static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
 	*entry |= ZPCI_TABLE_TYPE_SX;
 }
 
+static inline void validate_rf_entry(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_VALID_MASK;
+	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
+	*entry |= ZPCI_TABLE_VALID;
+	*entry |= ZPCI_TABLE_LEN_RFX;
+}
+
+static inline void validate_rs_entry(unsigned long *entry)
+{
+	*entry &= ~ZPCI_TABLE_VALID_MASK;
+	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
+	*entry |= ZPCI_TABLE_VALID;
+	*entry |= ZPCI_TABLE_LEN_RSX;
+}
+
 static inline void validate_rt_entry(unsigned long *entry)
 {
 	*entry &= ~ZPCI_TABLE_VALID_MASK;
@@ -286,6 +316,70 @@ static unsigned long *dma_alloc_page_table(gfp_t gfp)
 	return table;
 }
 
+static unsigned long *dma_walk_rs_table(unsigned long *rso,
+					dma_addr_t dma_addr, gfp_t gfp)
+{
+	unsigned int rsx = calc_rsx(dma_addr);
+	unsigned long old_rse, rse;
+	unsigned long *rsep, *rto;
+
+	rsep = &rso[rsx];
+	rse = READ_ONCE(*rsep);
+	if (reg_entry_isvalid(rse)) {
+		rto = get_rs_rto(rse);
+	} else {
+		rto = dma_alloc_cpu_table(gfp);
+		if (!rto)
+			return NULL;
+
+		set_rs_rto(&rse, virt_to_phys(rto));
+		validate_rs_entry(&rse);
+		entry_clr_protected(&rse);
+
+		old_rse = cmpxchg(rsep, ZPCI_TABLE_INVALID, rse);
+		if (old_rse != ZPCI_TABLE_INVALID) {
+			/* Somone else was faster, use theirs */
+			dma_free_cpu_table(rto);
+			rto = get_rs_rto(old_rse);
+		}
+	}
+	return rto;
+}
+
+static unsigned long *dma_walk_rf_table(unsigned long *rfo,
+					dma_addr_t dma_addr, gfp_t gfp)
+{
+	unsigned int rfx = calc_rfx(dma_addr);
+	unsigned long old_rfe, rfe;
+	unsigned long *rfep, *rso;
+
+	rfep = &rfo[rfx];
+	rfe = READ_ONCE(*rfep);
+	if (reg_entry_isvalid(rfe)) {
+		rso = get_rf_rso(rfe);
+	} else {
+		rso = dma_alloc_cpu_table(gfp);
+		if (!rso)
+			return NULL;
+
+		set_rf_rso(&rfe, virt_to_phys(rso));
+		validate_rf_entry(&rfe);
+		entry_clr_protected(&rfe);
+
+		old_rfe = cmpxchg(rfep, ZPCI_TABLE_INVALID, rfe);
+		if (old_rfe != ZPCI_TABLE_INVALID) {
+			/* Somone else was faster, use theirs */
+			dma_free_cpu_table(rso);
+			rso = get_rf_rso(old_rfe);
+		}
+	}
+
+	if (!rso)
+		return NULL;
+
+	return dma_walk_rs_table(rso, dma_addr, gfp);
+}
+
 static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
 {
 	unsigned long old_rte, rte;
@@ -339,11 +433,31 @@ static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
 	return pto;
 }
 
-static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
+static unsigned long *dma_walk_region_tables(struct s390_domain *domain,
+					     dma_addr_t dma_addr, gfp_t gfp)
 {
-	unsigned long *sto, *pto;
+	switch (domain->origin_type) {
+	case ZPCI_TABLE_TYPE_RFX:
+		return dma_walk_rf_table(domain->dma_table, dma_addr, gfp);
+	case ZPCI_TABLE_TYPE_RSX:
+		return dma_walk_rs_table(domain->dma_table, dma_addr, gfp);
+	case ZPCI_TABLE_TYPE_RTX:
+		return domain->dma_table;
+	default:
+		return NULL;
+	}
+}
+
+static unsigned long *dma_walk_cpu_trans(struct s390_domain *domain,
+					 dma_addr_t dma_addr, gfp_t gfp)
+{
+	unsigned long *rto, *sto, *pto;
 	unsigned int rtx, sx, px;
 
+	rto = dma_walk_region_tables(domain, dma_addr, gfp);
+	if (!rto)
+		return NULL;
+
 	rtx = calc_rtx(dma_addr);
 	sto = dma_get_seg_table_origin(&rto[rtx], gfp);
 	if (!sto)
@@ -690,8 +804,7 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
 	int rc;
 
 	for (i = 0; i < nr_pages; i++) {
-		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
-					   gfp);
+		entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp);
 		if (unlikely(!entry)) {
 			rc = -ENOMEM;
 			goto undo_cpu_trans;
@@ -706,8 +819,7 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
 undo_cpu_trans:
 	while (i-- > 0) {
 		dma_addr -= PAGE_SIZE;
-		entry = dma_walk_cpu_trans(s390_domain->dma_table,
-					   dma_addr, gfp);
+		entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp);
 		if (!entry)
 			break;
 		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
@@ -724,8 +836,7 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
 	int rc = 0;
 
 	for (i = 0; i < nr_pages; i++) {
-		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
-					   GFP_ATOMIC);
+		entry = dma_walk_cpu_trans(s390_domain, dma_addr, GFP_ATOMIC);
 		if (unlikely(!entry)) {
 			rc = -EINVAL;
 			break;
-- 
2.49.0
Re: [PATCH v5 4/5] iommu/s390: support map/unmap for additional table regions
Posted by Niklas Schnelle 8 months ago
On Fri, 2025-04-11 at 16:24 -0400, Matthew Rosato wrote:
> Map and unmap ops use the shared dma_walk_cpu_trans routine, update
> this using the origin_type of the dma_table to determine how many
> table levels must be walked.
> 
> Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
> ---
>  drivers/iommu/s390-iommu.c | 127 ++++++++++++++++++++++++++++++++++---
>  1 file changed, 119 insertions(+), 8 deletions(-)
> 
> diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
> index 338a7381e918..46f45b136993 100644
> --- a/drivers/iommu/s390-iommu.c
> +++ b/drivers/iommu/s390-iommu.c
> @@ -67,6 +67,20 @@ static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
>  	*entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
>  }
>  
> +static inline void set_rf_rso(unsigned long *entry, phys_addr_t rso)
> +{
> +	*entry &= ZPCI_RTE_FLAG_MASK;
> +	*entry |= (rso & ZPCI_RTE_ADDR_MASK);
> +	*entry |= ZPCI_TABLE_TYPE_RFX;
> +}
> +
> +static inline void set_rs_rto(unsigned long *entry, phys_addr_t rto)
> +{
> +	*entry &= ZPCI_RTE_FLAG_MASK;
> +	*entry |= (rto & ZPCI_RTE_ADDR_MASK);
> +	*entry |= ZPCI_TABLE_TYPE_RSX;
> +}
> +
>  static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
>  {
>  	*entry &= ZPCI_RTE_FLAG_MASK;
> @@ -81,6 +95,22 @@ static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
>  	*entry |= ZPCI_TABLE_TYPE_SX;
>  }
>  
> +static inline void validate_rf_entry(unsigned long *entry)
> +{
> +	*entry &= ~ZPCI_TABLE_VALID_MASK;
> +	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
> +	*entry |= ZPCI_TABLE_VALID;
> +	*entry |= ZPCI_TABLE_LEN_RFX;
> +}
> +
> +static inline void validate_rs_entry(unsigned long *entry)
> +{
> +	*entry &= ~ZPCI_TABLE_VALID_MASK;
> +	*entry &= ~ZPCI_TABLE_OFFSET_MASK;
> +	*entry |= ZPCI_TABLE_VALID;
> +	*entry |= ZPCI_TABLE_LEN_RSX;
> +}
> +
> 
--- snip ---
>  
> -static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
> +static unsigned long *dma_walk_region_tables(struct s390_domain *domain,
> +					     dma_addr_t dma_addr, gfp_t gfp)
>  {
> -	unsigned long *sto, *pto;
> +	switch (domain->origin_type) {
> +	case ZPCI_TABLE_TYPE_RFX:
> +		return dma_walk_rf_table(domain->dma_table, dma_addr, gfp);
> +	case ZPCI_TABLE_TYPE_RSX:
> +		return dma_walk_rs_table(domain->dma_table, dma_addr, gfp);
> +	case ZPCI_TABLE_TYPE_RTX:
> +		return domain->dma_table;
> +	default:
> +		return NULL;
> +	}
> +}
> +
> +static unsigned long *dma_walk_cpu_trans(struct s390_domain *domain,
> +					 dma_addr_t dma_addr, gfp_t gfp)
> +{
> +	unsigned long *rto, *sto, *pto;
>  	unsigned int rtx, sx, px;
>  
> +	rto = dma_walk_region_tables(domain, dma_addr, gfp);
> +	if (!rto)
> +		return NULL;
> +
>  	rtx = calc_rtx(dma_addr);
>  	sto = dma_get_seg_table_origin(&rto[rtx], gfp);
>  	if (!sto)
> @@ -690,8 +804,7 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
>  	int rc;
>  
>  	for (i = 0; i < nr_pages; i++) {
> -		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
> -					   gfp);
> +		entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp);
>  		if (unlikely(!entry)) {
>  			rc = -ENOMEM;
>  			goto undo_cpu_trans;
> @@ -706,8 +819,7 @@ static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
>  undo_cpu_trans:
>  	while (i-- > 0) {
>  		dma_addr -= PAGE_SIZE;
> -		entry = dma_walk_cpu_trans(s390_domain->dma_table,
> -					   dma_addr, gfp);
> +		entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp);
>  		if (!entry)
>  			break;
>  		dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
> @@ -724,8 +836,7 @@ static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
>  	int rc = 0;
>  
>  	for (i = 0; i < nr_pages; i++) {
> -		entry = dma_walk_cpu_trans(s390_domain->dma_table, dma_addr,
> -					   GFP_ATOMIC);
> +		entry = dma_walk_cpu_trans(s390_domain, dma_addr, GFP_ATOMIC);
>  		if (unlikely(!entry)) {
>  			rc = -EINVAL;
>  			break;

Looks good to me and we even get nicer formatting :)

Reviewed-by: Niklas Schnelle <schnelle@linux.ibm.com>