[v4] vfio: handle DMA map/unmap up to the addressable limit

[PATCH v4 3/3] vfio/type1: handle DMA map/unmap up to the addressable limit

Posted by Alex Mastro 3 months, 4 weeks ago

Handle DMA map/unmap operations up to the addressable limit by comparing
against inclusive end-of-range limits, and changing iteration to
perform relative traversals across range sizes, rather than absolute
traversals across addresses.

vfio_link_dma inserts a zero-sized vfio_dma into the rb-tree, and is
only used for that purpose, so discard the size from consideration for
the insertion point.

Signed-off-by: Alex Mastro <amastro@fb.com>
---
 drivers/vfio/vfio_iommu_type1.c | 77 ++++++++++++++++++++++-------------------
 1 file changed, 42 insertions(+), 35 deletions(-)

diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
index 48b84a7af2e1..a65625dcf708 100644
--- a/drivers/vfio/vfio_iommu_type1.c
+++ b/drivers/vfio/vfio_iommu_type1.c
@@ -166,12 +166,14 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
 {
 	struct rb_node *node = iommu->dma_list.rb_node;
 
+	WARN_ON(!size);
+
 	while (node) {
 		struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
 
-		if (start + size <= dma->iova)
+		if (start + size - 1 < dma->iova)
 			node = node->rb_left;
-		else if (start >= dma->iova + dma->size)
+		else if (start > dma->iova + dma->size - 1)
 			node = node->rb_right;
 		else
 			return dma;
@@ -181,16 +183,19 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
 }
 
 static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu,
-						dma_addr_t start, size_t size)
+						dma_addr_t start,
+						dma_addr_t end)
 {
 	struct rb_node *res = NULL;
 	struct rb_node *node = iommu->dma_list.rb_node;
 	struct vfio_dma *dma_res = NULL;
 
+	WARN_ON(end < start);
+
 	while (node) {
 		struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
 
-		if (start < dma->iova + dma->size) {
+		if (start <= dma->iova + dma->size - 1) {
 			res = node;
 			dma_res = dma;
 			if (start >= dma->iova)
@@ -200,7 +205,7 @@ static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu,
 			node = node->rb_right;
 		}
 	}
-	if (res && size && dma_res->iova >= start + size)
+	if (res && dma_res->iova > end)
 		res = NULL;
 	return res;
 }
@@ -210,11 +215,13 @@ static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
 	struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
 	struct vfio_dma *dma;
 
+	WARN_ON(new->size != 0);
+
 	while (*link) {
 		parent = *link;
 		dma = rb_entry(parent, struct vfio_dma, node);
 
-		if (new->iova + new->size <= dma->iova)
+		if (new->iova <= dma->iova)
 			link = &(*link)->rb_left;
 		else
 			link = &(*link)->rb_right;
@@ -1071,12 +1078,12 @@ static size_t unmap_unpin_slow(struct vfio_domain *domain,
 static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 			     bool do_accounting)
 {
-	dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
 	struct vfio_domain *domain, *d;
 	LIST_HEAD(unmapped_region_list);
 	struct iommu_iotlb_gather iotlb_gather;
 	int unmapped_region_cnt = 0;
 	long unlocked = 0;
+	size_t pos = 0;
 
 	if (!dma->size)
 		return 0;
@@ -1100,13 +1107,14 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 	}
 
 	iommu_iotlb_gather_init(&iotlb_gather);
-	while (iova < end) {
+	while (pos < dma->size) {
 		size_t unmapped, len;
 		phys_addr_t phys, next;
+		dma_addr_t iova = dma->iova + pos;
 
 		phys = iommu_iova_to_phys(domain->domain, iova);
 		if (WARN_ON(!phys)) {
-			iova += PAGE_SIZE;
+			pos += PAGE_SIZE;
 			continue;
 		}
 
@@ -1115,7 +1123,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 		 * may require hardware cache flushing, try to find the
 		 * largest contiguous physical memory chunk to unmap.
 		 */
-		for (len = PAGE_SIZE; iova + len < end; len += PAGE_SIZE) {
+		for (len = PAGE_SIZE; pos + len < dma->size; len += PAGE_SIZE) {
 			next = iommu_iova_to_phys(domain->domain, iova + len);
 			if (next != phys + len)
 				break;
@@ -1136,7 +1144,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 				break;
 		}
 
-		iova += unmapped;
+		pos += unmapped;
 	}
 
 	dma->iommu_mapped = false;
@@ -1228,7 +1236,7 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
 }
 
 static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
-				  dma_addr_t iova, size_t size, size_t pgsize)
+				  dma_addr_t iova, dma_addr_t iova_end, size_t pgsize)
 {
 	struct vfio_dma *dma;
 	struct rb_node *n;
@@ -1245,8 +1253,8 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
 	if (dma && dma->iova != iova)
 		return -EINVAL;
 
-	dma = vfio_find_dma(iommu, iova + size - 1, 0);
-	if (dma && dma->iova + dma->size != iova + size)
+	dma = vfio_find_dma(iommu, iova_end, 1);
+	if (dma && dma->iova + dma->size - 1 != iova_end)
 		return -EINVAL;
 
 	for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
@@ -1255,7 +1263,7 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
 		if (dma->iova < iova)
 			continue;
 
-		if (dma->iova > iova + size - 1)
+		if (dma->iova > iova_end)
 			break;
 
 		ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize);
@@ -1348,7 +1356,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 	if (unmap_all) {
 		if (iova || size)
 			goto unlock;
-		size = SIZE_MAX;
+		iova_end = ~(dma_addr_t)0;
 	} else {
 		if (!size || size & (pgsize - 1))
 			goto unlock;
@@ -1403,17 +1411,17 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 		if (dma && dma->iova != iova)
 			goto unlock;
 
-		dma = vfio_find_dma(iommu, iova_end, 0);
-		if (dma && dma->iova + dma->size != iova + size)
+		dma = vfio_find_dma(iommu, iova_end, 1);
+		if (dma && dma->iova + dma->size - 1 != iova_end)
 			goto unlock;
 	}
 
 	ret = 0;
-	n = first_n = vfio_find_dma_first_node(iommu, iova, size);
+	n = first_n = vfio_find_dma_first_node(iommu, iova, iova_end);
 
 	while (n) {
 		dma = rb_entry(n, struct vfio_dma, node);
-		if (dma->iova >= iova + size)
+		if (dma->iova > iova_end)
 			break;
 
 		if (!iommu->v2 && iova > dma->iova)
@@ -1743,12 +1751,12 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 
 	for (; n; n = rb_next(n)) {
 		struct vfio_dma *dma;
-		dma_addr_t iova;
+		size_t pos = 0;
 
 		dma = rb_entry(n, struct vfio_dma, node);
-		iova = dma->iova;
 
-		while (iova < dma->iova + dma->size) {
+		while (pos < dma->size) {
+			dma_addr_t iova = dma->iova + pos;
 			phys_addr_t phys;
 			size_t size;
 
@@ -1764,14 +1772,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 				phys = iommu_iova_to_phys(d->domain, iova);
 
 				if (WARN_ON(!phys)) {
-					iova += PAGE_SIZE;
+					pos += PAGE_SIZE;
 					continue;
 				}
 
 				size = PAGE_SIZE;
 				p = phys + size;
 				i = iova + size;
-				while (i < dma->iova + dma->size &&
+				while (pos + size < dma->size &&
 				       p == iommu_iova_to_phys(d->domain, i)) {
 					size += PAGE_SIZE;
 					p += PAGE_SIZE;
@@ -1779,9 +1787,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 				}
 			} else {
 				unsigned long pfn;
-				unsigned long vaddr = dma->vaddr +
-						     (iova - dma->iova);
-				size_t n = dma->iova + dma->size - iova;
+				unsigned long vaddr = dma->vaddr + pos;
+				size_t n = dma->size - pos;
 				long npage;
 
 				npage = vfio_pin_pages_remote(dma, vaddr,
@@ -1812,7 +1819,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 				goto unwind;
 			}
 
-			iova += size;
+			pos += size;
 		}
 	}
 
@@ -1829,29 +1836,29 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 unwind:
 	for (; n; n = rb_prev(n)) {
 		struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
-		dma_addr_t iova;
+		size_t pos = 0;
 
 		if (dma->iommu_mapped) {
 			iommu_unmap(domain->domain, dma->iova, dma->size);
 			continue;
 		}
 
-		iova = dma->iova;
-		while (iova < dma->iova + dma->size) {
+		while (pos < dma->size) {
+			dma_addr_t iova = dma->iova + pos;
 			phys_addr_t phys, p;
 			size_t size;
 			dma_addr_t i;
 
 			phys = iommu_iova_to_phys(domain->domain, iova);
 			if (!phys) {
-				iova += PAGE_SIZE;
+				pos += PAGE_SIZE;
 				continue;
 			}
 
 			size = PAGE_SIZE;
 			p = phys + size;
 			i = iova + size;
-			while (i < dma->iova + dma->size &&
+			while (pos + size < dma->size &&
 			       p == iommu_iova_to_phys(domain->domain, i)) {
 				size += PAGE_SIZE;
 				p += PAGE_SIZE;
@@ -2989,7 +2996,7 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
 
 		if (iommu->dirty_page_tracking)
 			ret = vfio_iova_dirty_bitmap(range.bitmap.data,
-						     iommu, iova, size,
+						     iommu, iova, iova_end,
 						     range.bitmap.pgsize);
 		else
 			ret = -EINVAL;

-- 
2.47.3

Re: [PATCH v4 3/3] vfio/type1: handle DMA map/unmap up to the addressable limit

Posted by Alejandro Jimenez 3 months, 2 weeks ago

Hi Alex,

On 10/13/25 1:32 AM, Alex Mastro wrote:
> Handle DMA map/unmap operations up to the addressable limit by comparing
> against inclusive end-of-range limits, and changing iteration to
> perform relative traversals across range sizes, rather than absolute
> traversals across addresses.
> 
> vfio_link_dma inserts a zero-sized vfio_dma into the rb-tree, and is
> only used for that purpose, so discard the size from consideration for
> the insertion point.

I made a small comment about this on the corresponding code below..

> 
> Signed-off-by: Alex Mastro <amastro@fb.com>
> ---
>   drivers/vfio/vfio_iommu_type1.c | 77 ++++++++++++++++++++++-------------------
>   1 file changed, 42 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c
> index 48b84a7af2e1..a65625dcf708 100644
> --- a/drivers/vfio/vfio_iommu_type1.c
> +++ b/drivers/vfio/vfio_iommu_type1.c
> @@ -166,12 +166,14 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
>   {
>   	struct rb_node *node = iommu->dma_list.rb_node;
>   
> +	WARN_ON(!size);
> +
>   	while (node) {
>   		struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
>   
> -		if (start + size <= dma->iova)
> +		if (start + size - 1 < dma->iova)
>   			node = node->rb_left;
> -		else if (start >= dma->iova + dma->size)
> +		else if (start > dma->iova + dma->size - 1)
>   			node = node->rb_right;
>   		else
>   			return dma;
> @@ -181,16 +183,19 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
>   }
>   
>   static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu,
> -						dma_addr_t start, size_t size)
> +						dma_addr_t start,
> +						dma_addr_t end)
>   {
>   	struct rb_node *res = NULL;
>   	struct rb_node *node = iommu->dma_list.rb_node;
>   	struct vfio_dma *dma_res = NULL;
>   
> +	WARN_ON(end < start);
> +
>   	while (node) {
>   		struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
>   
> -		if (start < dma->iova + dma->size) {
> +		if (start <= dma->iova + dma->size - 1) {
>   			res = node;
>   			dma_res = dma;
>   			if (start >= dma->iova)
> @@ -200,7 +205,7 @@ static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu,
>   			node = node->rb_right;
>   		}
>   	}
> -	if (res && size && dma_res->iova >= start + size)
> +	if (res && dma_res->iova > end)
>   		res = NULL;
>   	return res;
>   }
> @@ -210,11 +215,13 @@ static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
>   	struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
>   	struct vfio_dma *dma;
>   
> +	WARN_ON(new->size != 0);
> +
>   	while (*link) {
>   		parent = *link;
>   		dma = rb_entry(parent, struct vfio_dma, node);
>   
> -		if (new->iova + new->size <= dma->iova)
> +		if (new->iova <= dma->iova)
It is possible I missed a previous thread where this was already 
discussed, but why are we adding this new restriction that 
vfio_link_dma() will _always_ be called with dma->size = 0? I know it is 
the case now, but is there a reason why future code could not try to 
insert a non-zero sized node?

I thought it would be more fitting to add overflow protection here too, 
as it is done for other code paths in the file? I know the WARN_ON() 
above will make us aware if there is ever another caller that attempts 
to use size !=0, so this is more of a nit about consistency than a 
concern about correctness.

Thank you,
Alejandro

>   			link = &(*link)->rb_left;
>   		else
>   			link = &(*link)->rb_right;
> @@ -1071,12 +1078,12 @@ static size_t unmap_unpin_slow(struct vfio_domain *domain,
>   static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
>   			     bool do_accounting)
>   {
> -	dma_addr_t iova = dma->iova, end = dma->iova + dma->size;
>   	struct vfio_domain *domain, *d;
>   	LIST_HEAD(unmapped_region_list);
>   	struct iommu_iotlb_gather iotlb_gather;
>   	int unmapped_region_cnt = 0;
>   	long unlocked = 0;
> +	size_t pos = 0;
>   
>   	if (!dma->size)
>   		return 0;
> @@ -1100,13 +1107,14 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
>   	}
>   
>   	iommu_iotlb_gather_init(&iotlb_gather);
> -	while (iova < end) {
> +	while (pos < dma->size) {
>   		size_t unmapped, len;
>   		phys_addr_t phys, next;
> +		dma_addr_t iova = dma->iova + pos;
>   
>   		phys = iommu_iova_to_phys(domain->domain, iova);
>   		if (WARN_ON(!phys)) {
> -			iova += PAGE_SIZE;
> +			pos += PAGE_SIZE;
>   			continue;
>   		}
>   
> @@ -1115,7 +1123,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
>   		 * may require hardware cache flushing, try to find the
>   		 * largest contiguous physical memory chunk to unmap.
>   		 */
> -		for (len = PAGE_SIZE; iova + len < end; len += PAGE_SIZE) {
> +		for (len = PAGE_SIZE; pos + len < dma->size; len += PAGE_SIZE) {
>   			next = iommu_iova_to_phys(domain->domain, iova + len);
>   			if (next != phys + len)
>   				break;
> @@ -1136,7 +1144,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
>   				break;
>   		}
>   
> -		iova += unmapped;
> +		pos += unmapped;
>   	}
>   
>   	dma->iommu_mapped = false;
> @@ -1228,7 +1236,7 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
>   }
>   
>   static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
> -				  dma_addr_t iova, size_t size, size_t pgsize)
> +				  dma_addr_t iova, dma_addr_t iova_end, size_t pgsize)
>   {
>   	struct vfio_dma *dma;
>   	struct rb_node *n;
> @@ -1245,8 +1253,8 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
>   	if (dma && dma->iova != iova)
>   		return -EINVAL;
>   
> -	dma = vfio_find_dma(iommu, iova + size - 1, 0);
> -	if (dma && dma->iova + dma->size != iova + size)
> +	dma = vfio_find_dma(iommu, iova_end, 1);
> +	if (dma && dma->iova + dma->size - 1 != iova_end)
>   		return -EINVAL;
>   
>   	for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
> @@ -1255,7 +1263,7 @@ static int vfio_iova_dirty_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
>   		if (dma->iova < iova)
>   			continue;
>   
> -		if (dma->iova > iova + size - 1)
> +		if (dma->iova > iova_end)
>   			break;
>   
>   		ret = update_user_bitmap(bitmap, iommu, dma, iova, pgsize);
> @@ -1348,7 +1356,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>   	if (unmap_all) {
>   		if (iova || size)
>   			goto unlock;
> -		size = SIZE_MAX;
> +		iova_end = ~(dma_addr_t)0;
>   	} else {
>   		if (!size || size & (pgsize - 1))
>   			goto unlock;
> @@ -1403,17 +1411,17 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
>   		if (dma && dma->iova != iova)
>   			goto unlock;
>   
> -		dma = vfio_find_dma(iommu, iova_end, 0);
> -		if (dma && dma->iova + dma->size != iova + size)
> +		dma = vfio_find_dma(iommu, iova_end, 1);
> +		if (dma && dma->iova + dma->size - 1 != iova_end)
>   			goto unlock;
>   	}
>   
>   	ret = 0;
> -	n = first_n = vfio_find_dma_first_node(iommu, iova, size);
> +	n = first_n = vfio_find_dma_first_node(iommu, iova, iova_end);
>   
>   	while (n) {
>   		dma = rb_entry(n, struct vfio_dma, node);
> -		if (dma->iova >= iova + size)
> +		if (dma->iova > iova_end)
>   			break;
>   
>   		if (!iommu->v2 && iova > dma->iova)
> @@ -1743,12 +1751,12 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
>   
>   	for (; n; n = rb_next(n)) {
>   		struct vfio_dma *dma;
> -		dma_addr_t iova;
> +		size_t pos = 0;
>   
>   		dma = rb_entry(n, struct vfio_dma, node);
> -		iova = dma->iova;
>   
> -		while (iova < dma->iova + dma->size) {
> +		while (pos < dma->size) {
> +			dma_addr_t iova = dma->iova + pos;
>   			phys_addr_t phys;
>   			size_t size;
>   
> @@ -1764,14 +1772,14 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
>   				phys = iommu_iova_to_phys(d->domain, iova);
>   
>   				if (WARN_ON(!phys)) {
> -					iova += PAGE_SIZE;
> +					pos += PAGE_SIZE;
>   					continue;
>   				}
>   
>   				size = PAGE_SIZE;
>   				p = phys + size;
>   				i = iova + size;
> -				while (i < dma->iova + dma->size &&
> +				while (pos + size < dma->size &&
>   				       p == iommu_iova_to_phys(d->domain, i)) {
>   					size += PAGE_SIZE;
>   					p += PAGE_SIZE;
> @@ -1779,9 +1787,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
>   				}
>   			} else {
>   				unsigned long pfn;
> -				unsigned long vaddr = dma->vaddr +
> -						     (iova - dma->iova);
> -				size_t n = dma->iova + dma->size - iova;
> +				unsigned long vaddr = dma->vaddr + pos;
> +				size_t n = dma->size - pos;
>   				long npage;
>   
>   				npage = vfio_pin_pages_remote(dma, vaddr,
> @@ -1812,7 +1819,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
>   				goto unwind;
>   			}
>   
> -			iova += size;
> +			pos += size;
>   		}
>   	}
>   
> @@ -1829,29 +1836,29 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
>   unwind:
>   	for (; n; n = rb_prev(n)) {
>   		struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
> -		dma_addr_t iova;
> +		size_t pos = 0;
>   
>   		if (dma->iommu_mapped) {
>   			iommu_unmap(domain->domain, dma->iova, dma->size);
>   			continue;
>   		}
>   
> -		iova = dma->iova;
> -		while (iova < dma->iova + dma->size) {
> +		while (pos < dma->size) {
> +			dma_addr_t iova = dma->iova + pos;
>   			phys_addr_t phys, p;
>   			size_t size;
>   			dma_addr_t i;
>   
>   			phys = iommu_iova_to_phys(domain->domain, iova);
>   			if (!phys) {
> -				iova += PAGE_SIZE;
> +				pos += PAGE_SIZE;
>   				continue;
>   			}
>   
>   			size = PAGE_SIZE;
>   			p = phys + size;
>   			i = iova + size;
> -			while (i < dma->iova + dma->size &&
> +			while (pos + size < dma->size &&
>   			       p == iommu_iova_to_phys(domain->domain, i)) {
>   				size += PAGE_SIZE;
>   				p += PAGE_SIZE;
> @@ -2989,7 +2996,7 @@ static int vfio_iommu_type1_dirty_pages(struct vfio_iommu *iommu,
>   
>   		if (iommu->dirty_page_tracking)
>   			ret = vfio_iova_dirty_bitmap(range.bitmap.data,
> -						     iommu, iova, size,
> +						     iommu, iova, iova_end,
>   						     range.bitmap.pgsize);
>   		else
>   			ret = -EINVAL;
>

Re: [PATCH v4 3/3] vfio/type1: handle DMA map/unmap up to the addressable limit

Posted by Alex Mastro 3 months, 2 weeks ago

On Tue, Oct 21, 2025 at 06:18:00PM -0400, Alejandro Jimenez wrote:
> @@ -210,11 +215,13 @@ static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
> >   	struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
> >   	struct vfio_dma *dma;
> > +	WARN_ON(new->size != 0);
> > +
> >   	while (*link) {
> >   		parent = *link;
> >   		dma = rb_entry(parent, struct vfio_dma, node);
> > -		if (new->iova + new->size <= dma->iova)
> > +		if (new->iova <= dma->iova)
> It is possible I missed a previous thread where this was already discussed,
> but why are we adding this new restriction that vfio_link_dma() will
> _always_ be called with dma->size = 0? I know it is the case now, but is
> there a reason why future code could not try to insert a non-zero sized
> node?

Perhaps the WARN_ON is too coddlesome, but given that this helper is used for
exactly one purpose today, the intent is to strongly hint to a future user to
consider what they're doing by deviating from the current usage.

iommu->dma_list's invariant is that all elems should have non-overlapping iova
ranges, which is currently enforced pre-insertion in vfio_dma_do_map by the
vfio_find_dma check. After vfio_pin_map_dma returns, either the vfio_dma has
been grown to its full size, or has been removed from iommu->dma_list on error
via vfio_remove_dma.

> I thought it would be more fitting to add overflow protection here too, as
> it is done for other code paths in the file? I know the WARN_ON() above will
> make us aware if there is ever another caller that attempts to use size !=0,
> so this is more of a nit about consistency than a concern about correctness.

The other code paths which check for overflow focus on sanitizing args at
the vfio_iommu_driver_ops boundary. Since this helper is downstream from those
existing checks, and given its specificity, I'm not sure additional checks here
would be helpful.

[PATCH v4 1/3] vfio/type1: sanitize for overflow using check_*_overflow
[PATCH v4 2/3] vfio/type1: move iova increment to unmap_unpin_* caller
[PATCH v4 3/3] vfio/type1: handle DMA map/unmap up to the addressable limit