[PATCH v2 1/4] vfio: selftests: add iova range query helpers

Alex Mastro posted 4 patches 2 months, 4 weeks ago
There is a newer version of this series
[PATCH v2 1/4] vfio: selftests: add iova range query helpers
Posted by Alex Mastro 2 months, 4 weeks ago
VFIO selftests need to map IOVAs from legally accessible ranges, which
could vary between hardware. Tests in vfio_dma_mapping_test.c are making
excessively strong assumptions about which IOVAs can be mapped.

Add vfio_iommu_iova_ranges(), which queries IOVA ranges from the
IOMMUFD or VFIO container associated with the device. The queried ranges
are normalized to IOMMUFD's iommu_iova_range representation so that
handling of IOVA ranges up the stack can be implementation-agnostic.
iommu_iova_range and vfio_iova_range are equivalent, so bias to using the
new interface's struct.

Query IOMMUFD's ranges with IOMMU_IOAS_IOVA_RANGES.
Query VFIO container's ranges with VFIO_IOMMU_GET_INFO and
VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE.

The underlying vfio_iommu_type1_info buffer-related functionality has
been kept generic so the same helpers can be used to query other
capability chain information, if needed.

Signed-off-by: Alex Mastro <amastro@fb.com>
---
 .../testing/selftests/vfio/lib/include/vfio_util.h |   8 +-
 tools/testing/selftests/vfio/lib/vfio_pci_device.c | 167 +++++++++++++++++++++
 2 files changed, 174 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
index 240409bf5f8a..ef8f06ef0c13 100644
--- a/tools/testing/selftests/vfio/lib/include/vfio_util.h
+++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
@@ -4,9 +4,12 @@
 
 #include <fcntl.h>
 #include <string.h>
-#include <linux/vfio.h>
+
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
 #include <linux/list.h>
 #include <linux/pci_regs.h>
+#include <linux/vfio.h>
 
 #include "../../../kselftest.h"
 
@@ -206,6 +209,9 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_
 void vfio_pci_device_cleanup(struct vfio_pci_device *device);
 void vfio_pci_device_reset(struct vfio_pci_device *device);
 
+struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
+					      u32 *nranges);
+
 int __vfio_pci_dma_map(struct vfio_pci_device *device,
 		       struct vfio_dma_region *region);
 int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index a381fd253aa7..7a523e3f2dce 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -29,6 +29,173 @@
 	VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
 } while (0)
 
+static struct vfio_info_cap_header *next_cap_hdr(void *buf, size_t bufsz,
+						 size_t *cap_offset)
+{
+	struct vfio_info_cap_header *hdr;
+
+	if (!*cap_offset)
+		return NULL;
+
+	VFIO_ASSERT_LT(*cap_offset, bufsz);
+	VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
+
+	hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
+
+	if (hdr->next)
+		VFIO_ASSERT_GT(hdr->next, *cap_offset);
+
+	*cap_offset = hdr->next;
+
+	return hdr;
+}
+
+static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info,
+							    u16 cap_id)
+{
+	struct vfio_info_cap_header *hdr;
+	size_t cap_offset = info->cap_offset;
+
+	if (!(info->flags & VFIO_IOMMU_INFO_CAPS))
+		return NULL;
+
+	if (cap_offset)
+		VFIO_ASSERT_GE(cap_offset, sizeof(struct vfio_iommu_type1_info));
+
+	while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) {
+		if (hdr->id == cap_id)
+			return hdr;
+	}
+
+	return NULL;
+}
+
+/* Return buffer including capability chain, if present. Free with free() */
+static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device)
+{
+	struct vfio_iommu_type1_info *info;
+
+	info = malloc(sizeof(*info));
+	VFIO_ASSERT_NOT_NULL(info);
+
+	*info = (struct vfio_iommu_type1_info) {
+		.argsz = sizeof(*info),
+	};
+
+	ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info);
+
+	info = realloc(info, info->argsz);
+	VFIO_ASSERT_NOT_NULL(info);
+
+	ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info);
+
+	return info;
+}
+
+/*
+ * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
+ * report iommufd's iommu_iova_range. Free with free().
+ */
+static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
+						       u32 *nranges)
+{
+	struct vfio_iommu_type1_info_cap_iova_range *cap_range;
+	struct vfio_iommu_type1_info *info;
+	struct vfio_info_cap_header *hdr;
+	struct iommu_iova_range *ranges = NULL;
+
+	info = vfio_iommu_get_info(device);
+	hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+	VFIO_ASSERT_NOT_NULL(hdr);
+
+	cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
+	VFIO_ASSERT_GT(cap_range->nr_iovas, 0);
+
+	ranges = calloc(cap_range->nr_iovas, sizeof(*ranges));
+	VFIO_ASSERT_NOT_NULL(ranges);
+
+	for (u32 i = 0; i < cap_range->nr_iovas; i++) {
+		ranges[i] = (struct iommu_iova_range){
+			.start = cap_range->iova_ranges[i].start,
+			.last = cap_range->iova_ranges[i].end,
+		};
+	}
+
+	*nranges = cap_range->nr_iovas;
+
+	free(info);
+	return ranges;
+}
+
+/* Return iova ranges of the device's IOAS. Free with free() */
+static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
+						    u32 *nranges)
+{
+	struct iommu_iova_range *ranges;
+	int ret;
+
+	struct iommu_ioas_iova_ranges query = {
+		.size = sizeof(query),
+		.ioas_id = device->ioas_id,
+	};
+
+	ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+	VFIO_ASSERT_EQ(ret, -1);
+	VFIO_ASSERT_EQ(errno, EMSGSIZE);
+	VFIO_ASSERT_GT(query.num_iovas, 0);
+
+	ranges = calloc(query.num_iovas, sizeof(*ranges));
+	VFIO_ASSERT_NOT_NULL(ranges);
+
+	query.allowed_iovas = (uintptr_t)ranges;
+
+	ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+	*nranges = query.num_iovas;
+
+	return ranges;
+}
+
+static int iova_range_comp(const void *a, const void *b)
+{
+	const struct iommu_iova_range *ra = a, *rb = b;
+
+	if (ra->start < rb->start)
+		return -1;
+
+	if (ra->start > rb->start)
+		return 1;
+
+	return 0;
+}
+
+/* Return sorted IOVA ranges of the device. Free with free(). */
+struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
+					      u32 *nranges)
+{
+	struct iommu_iova_range *ranges;
+
+	if (device->iommufd)
+		ranges = iommufd_iova_ranges(device, nranges);
+	else
+		ranges = vfio_iommu_iova_ranges(device, nranges);
+
+	if (!ranges)
+		return NULL;
+
+	VFIO_ASSERT_GT(*nranges, 0);
+
+	/* Sort and check that ranges are sane and non-overlapping */
+	qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp);
+	VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
+
+	for (u32 i = 1; i < *nranges; i++) {
+		VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
+		VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
+	}
+
+	return ranges;
+}
+
 iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
 {
 	struct vfio_dma_region *region;

-- 
2.47.3
Re: [PATCH v2 1/4] vfio: selftests: add iova range query helpers
Posted by Alex Williamson 2 months, 4 weeks ago
On Tue, 11 Nov 2025 06:52:02 -0800
Alex Mastro <amastro@fb.com> wrote:
> diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> index a381fd253aa7..7a523e3f2dce 100644
> --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> @@ -29,6 +29,173 @@
>  	VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
>  } while (0)
>  
> +static struct vfio_info_cap_header *next_cap_hdr(void *buf, size_t bufsz,
> +						 size_t *cap_offset)
> +{
> +	struct vfio_info_cap_header *hdr;
> +
> +	if (!*cap_offset)
> +		return NULL;
> +
> +	VFIO_ASSERT_LT(*cap_offset, bufsz);
> +	VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
> +
> +	hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
> +
> +	if (hdr->next)
> +		VFIO_ASSERT_GT(hdr->next, *cap_offset);

This might be implementation, but I don't think it's a requirement.
The vfio capability chains are based on PCI capabilities, which have no
ordering requirement.  Thanks,

Alex

> +
> +	*cap_offset = hdr->next;
> +
> +	return hdr;
> +}
Re: [PATCH v2 1/4] vfio: selftests: add iova range query helpers
Posted by Alex Mastro 2 months, 4 weeks ago
On Tue, Nov 11, 2025 at 10:09:48AM -0700, Alex Williamson wrote:
> On Tue, 11 Nov 2025 06:52:02 -0800
> Alex Mastro <amastro@fb.com> wrote:
> > diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > index a381fd253aa7..7a523e3f2dce 100644
> > --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > @@ -29,6 +29,173 @@
> >  	VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
> >  } while (0)
> >  
> > +static struct vfio_info_cap_header *next_cap_hdr(void *buf, size_t bufsz,
> > +						 size_t *cap_offset)
> > +{
> > +	struct vfio_info_cap_header *hdr;
> > +
> > +	if (!*cap_offset)
> > +		return NULL;
> > +
> > +	VFIO_ASSERT_LT(*cap_offset, bufsz);
> > +	VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
> > +
> > +	hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
> > +
> > +	if (hdr->next)
> > +		VFIO_ASSERT_GT(hdr->next, *cap_offset);
> 
> This might be implementation, but I don't think it's a requirement.
> The vfio capability chains are based on PCI capabilities, which have no
> ordering requirement.  Thanks,

My main interest was to enforce that the chain doesn't contain a cycle, and
checking for monotonically increasing cap offset was the simplest way I could
think of to guarantee such.

If there isn't such a check, and kernel vends a malformed cycle-containing
chain, chain traversal would infinite loop.

Given the location of this test code coupled to the kernel tree, do you think
such assumptions about implementation still reach too far? If yes, I can either
remove this check, or try to make cycle detection more relaxed about offsets
potentially going backwards.

> Alex
> 
> > +
> > +	*cap_offset = hdr->next;
> > +
> > +	return hdr;
> > +}
Re: [PATCH v2 1/4] vfio: selftests: add iova range query helpers
Posted by Alex Williamson 2 months, 4 weeks ago
On Tue, 11 Nov 2025 09:35:31 -0800
Alex Mastro <amastro@fb.com> wrote:

> On Tue, Nov 11, 2025 at 10:09:48AM -0700, Alex Williamson wrote:
> > On Tue, 11 Nov 2025 06:52:02 -0800
> > Alex Mastro <amastro@fb.com> wrote:  
> > > diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > > index a381fd253aa7..7a523e3f2dce 100644
> > > --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > > +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > > @@ -29,6 +29,173 @@
> > >  	VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
> > >  } while (0)
> > >  
> > > +static struct vfio_info_cap_header *next_cap_hdr(void *buf, size_t bufsz,
> > > +						 size_t *cap_offset)
> > > +{
> > > +	struct vfio_info_cap_header *hdr;
> > > +
> > > +	if (!*cap_offset)
> > > +		return NULL;
> > > +
> > > +	VFIO_ASSERT_LT(*cap_offset, bufsz);
> > > +	VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
> > > +
> > > +	hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
> > > +
> > > +	if (hdr->next)
> > > +		VFIO_ASSERT_GT(hdr->next, *cap_offset);  
> > 
> > This might be implementation, but I don't think it's a requirement.
> > The vfio capability chains are based on PCI capabilities, which have no
> > ordering requirement.  Thanks,  
> 
> My main interest was to enforce that the chain doesn't contain a cycle, and
> checking for monotonically increasing cap offset was the simplest way I could
> think of to guarantee such.
> 
> If there isn't such a check, and kernel vends a malformed cycle-containing
> chain, chain traversal would infinite loop.
> 
> Given the location of this test code coupled to the kernel tree, do you think
> such assumptions about implementation still reach too far? If yes, I can either
> remove this check, or try to make cycle detection more relaxed about offsets
> potentially going backwards.

I've seen cycle detection in PCI config space implemented as just a
depth/ttl counter.  Max cycles is roughly (buffer-size/header-size).  I
think that would be sufficient if we want to include that sanity
testing.  Thanks,

Alex
Re: [PATCH v2 1/4] vfio: selftests: add iova range query helpers
Posted by Alex Mastro 2 months, 4 weeks ago
On Tue, Nov 11, 2025 at 10:52:02AM -0700, Alex Williamson wrote:
> On Tue, 11 Nov 2025 09:35:31 -0800
> Alex Mastro <amastro@fb.com> wrote:
> 
> > On Tue, Nov 11, 2025 at 10:09:48AM -0700, Alex Williamson wrote:
> > > On Tue, 11 Nov 2025 06:52:02 -0800
> > > Alex Mastro <amastro@fb.com> wrote:  
> > > > diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > > > index a381fd253aa7..7a523e3f2dce 100644
> > > > --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > > > +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > > > @@ -29,6 +29,173 @@
> > > >  	VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
> > > >  } while (0)
> > > >  
> > > > +static struct vfio_info_cap_header *next_cap_hdr(void *buf, size_t bufsz,
> > > > +						 size_t *cap_offset)
> > > > +{
> > > > +	struct vfio_info_cap_header *hdr;
> > > > +
> > > > +	if (!*cap_offset)
> > > > +		return NULL;
> > > > +
> > > > +	VFIO_ASSERT_LT(*cap_offset, bufsz);
> > > > +	VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
> > > > +
> > > > +	hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
> > > > +
> > > > +	if (hdr->next)
> > > > +		VFIO_ASSERT_GT(hdr->next, *cap_offset);  
> > > 
> > > This might be implementation, but I don't think it's a requirement.
> > > The vfio capability chains are based on PCI capabilities, which have no
> > > ordering requirement.  Thanks,  
> > 
> > My main interest was to enforce that the chain doesn't contain a cycle, and
> > checking for monotonically increasing cap offset was the simplest way I could
> > think of to guarantee such.
> > 
> > If there isn't such a check, and kernel vends a malformed cycle-containing
> > chain, chain traversal would infinite loop.
> > 
> > Given the location of this test code coupled to the kernel tree, do you think
> > such assumptions about implementation still reach too far? If yes, I can either
> > remove this check, or try to make cycle detection more relaxed about offsets
> > potentially going backwards.
> 
> I've seen cycle detection in PCI config space implemented as just a
> depth/ttl counter.  Max cycles is roughly (buffer-size/header-size).  I
> think that would be sufficient if we want to include that sanity
> testing.  Thanks,

Thanks, that's a good suggestion -- will take this in v3.

> 
> Alex