[PATCH 1/4] vfio: selftests: add iova range query helpers

Alex Mastro posted 4 patches 3 months ago
There is a newer version of this series
[PATCH 1/4] vfio: selftests: add iova range query helpers
Posted by Alex Mastro 3 months ago
VFIO selftests need to map IOVAs from legally accessible ranges, which
could vary between hardware. Tests in vfio_dma_mapping_test.c are making
excessively strong assumptions about which IOVAs can be mapped.

Add vfio_iommu_iova_ranges(), which queries IOVA ranges from the
IOMMUFD or VFIO container associated with the device. The queried ranges
are normalized to IOMMUFD's iommu_iova_range representation so that
handling of IOVA ranges up the stack can be implementation-agnostic.
iommu_iova_range and vfio_iova_range are equivalent, so bias to using the
new interface's struct.

Query IOMMUFD's ranges with IOMMU_IOAS_IOVA_RANGES.
Query VFIO container's ranges with VFIO_IOMMU_GET_INFO and
VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE.

The underlying vfio_iommu_type1_info buffer-related functionality has
been kept generic so the same helpers can be used to query other
capability chain information, if needed.

Signed-off-by: Alex Mastro <amastro@fb.com>
---
 .../testing/selftests/vfio/lib/include/vfio_util.h |   8 +-
 tools/testing/selftests/vfio/lib/vfio_pci_device.c | 161 +++++++++++++++++++++
 2 files changed, 168 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
index 240409bf5f8a..fb5efec52316 100644
--- a/tools/testing/selftests/vfio/lib/include/vfio_util.h
+++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
@@ -4,9 +4,12 @@
 
 #include <fcntl.h>
 #include <string.h>
-#include <linux/vfio.h>
+
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
 #include <linux/list.h>
 #include <linux/pci_regs.h>
+#include <linux/vfio.h>
 
 #include "../../../kselftest.h"
 
@@ -206,6 +209,9 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_
 void vfio_pci_device_cleanup(struct vfio_pci_device *device);
 void vfio_pci_device_reset(struct vfio_pci_device *device);
 
+struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
+					      size_t *nranges);
+
 int __vfio_pci_dma_map(struct vfio_pci_device *device,
 		       struct vfio_dma_region *region);
 int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index a381fd253aa7..6bedbe65f0a1 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -29,6 +29,167 @@
 	VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
 } while (0)
 
+static struct vfio_info_cap_header *next_cap_hdr(void *buf, size_t bufsz,
+						 size_t *cap_offset)
+{
+	struct vfio_info_cap_header *hdr;
+
+	if (!*cap_offset)
+		return NULL;
+
+	/* Cap offset must be in bounds */
+	VFIO_ASSERT_LT(*cap_offset, bufsz);
+	/* There must be enough remaining space to contain the header */
+	VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
+
+	hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
+
+	/* If there is a next, offset must increase by at least the header size */
+	if (hdr->next) {
+		VFIO_ASSERT_GT(hdr->next, *cap_offset);
+		VFIO_ASSERT_GE(hdr->next - *cap_offset, sizeof(*hdr));
+	}
+
+	*cap_offset = hdr->next;
+
+	return hdr;
+}
+
+static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *buf,
+							    u16 cap_id)
+{
+	struct vfio_info_cap_header *hdr;
+	size_t cap_offset = buf->cap_offset;
+
+	if (!(buf->flags & VFIO_IOMMU_INFO_CAPS))
+		return NULL;
+
+	if (cap_offset)
+		VFIO_ASSERT_GE(cap_offset, sizeof(struct vfio_iommu_type1_info));
+
+	while ((hdr = next_cap_hdr(buf, buf->argsz, &cap_offset))) {
+		if (hdr->id == cap_id)
+			return hdr;
+	}
+
+	return NULL;
+}
+
+/* Return buffer including capability chain, if present. Free with free() */
+static struct vfio_iommu_type1_info *vfio_iommu_info_buf(struct vfio_pci_device *device)
+{
+	struct vfio_iommu_type1_info *buf;
+
+	buf = malloc(sizeof(*buf));
+	VFIO_ASSERT_NOT_NULL(buf);
+
+	*buf = (struct vfio_iommu_type1_info) {
+		.argsz = sizeof(*buf),
+	};
+
+	ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
+
+	buf = realloc(buf, buf->argsz);
+	VFIO_ASSERT_NOT_NULL(buf);
+
+	ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
+
+	return buf;
+}
+
+/*
+ * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
+ * report iommufd's iommu_iova_range. Free with free().
+ */
+static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
+						       size_t *nranges)
+{
+	struct vfio_iommu_type1_info_cap_iova_range *cap_range;
+	struct vfio_iommu_type1_info *buf;
+	struct vfio_info_cap_header *hdr;
+	struct iommu_iova_range *ranges = NULL;
+
+	buf = vfio_iommu_info_buf(device);
+	VFIO_ASSERT_NOT_NULL(buf);
+
+	hdr = vfio_iommu_info_cap_hdr(buf, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+	if (!hdr)
+		goto free_buf;
+
+	cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
+	if (!cap_range->nr_iovas)
+		goto free_buf;
+
+	ranges = malloc(cap_range->nr_iovas * sizeof(*ranges));
+	VFIO_ASSERT_NOT_NULL(ranges);
+
+	for (u32 i = 0; i < cap_range->nr_iovas; i++) {
+		ranges[i] = (struct iommu_iova_range){
+			.start = cap_range->iova_ranges[i].start,
+			.last = cap_range->iova_ranges[i].end,
+		};
+	}
+
+	*nranges = cap_range->nr_iovas;
+
+free_buf:
+	free(buf);
+	return ranges;
+}
+
+/* Return iova ranges of the device's IOAS. Free with free() */
+struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
+					     size_t *nranges)
+{
+	struct iommu_iova_range *ranges;
+	int ret;
+
+	struct iommu_ioas_iova_ranges query = {
+		.size = sizeof(query),
+		.ioas_id = device->ioas_id,
+	};
+
+	ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+	VFIO_ASSERT_EQ(ret, -1);
+	VFIO_ASSERT_EQ(errno, EMSGSIZE);
+	VFIO_ASSERT_GT(query.num_iovas, 0);
+
+	ranges = malloc(query.num_iovas * sizeof(*ranges));
+	VFIO_ASSERT_NOT_NULL(ranges);
+
+	query.allowed_iovas = (uintptr_t)ranges;
+
+	ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+	*nranges = query.num_iovas;
+
+	return ranges;
+}
+
+struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
+					      size_t *nranges)
+{
+	struct iommu_iova_range *ranges;
+
+	if (device->iommufd)
+		ranges = iommufd_iova_ranges(device, nranges);
+	else
+		ranges = vfio_iommu_iova_ranges(device, nranges);
+
+	if (!ranges)
+		return NULL;
+
+	/* ranges should be valid, ascending, and non-overlapping */
+	VFIO_ASSERT_GT(*nranges, 0);
+	VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
+
+	for (size_t i = 1; i < *nranges; i++) {
+		VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
+		VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
+	}
+
+	return ranges;
+}
+
 iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
 {
 	struct vfio_dma_region *region;

-- 
2.47.3
Re: [PATCH 1/4] vfio: selftests: add iova range query helpers
Posted by David Matlack 3 months ago
On 2025-11-10 01:10 PM, Alex Mastro wrote:
> +/*
> + * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
> + * report iommufd's iommu_iova_range. Free with free().
> + */
> +static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
> +						       size_t *nranges)
> +{
> +	struct vfio_iommu_type1_info_cap_iova_range *cap_range;
> +	struct vfio_iommu_type1_info *buf;

nit: Maybe name this variable `info` here and in vfio_iommu_info_buf()
and vfio_iommu_info_cap_hdr()? It is not an opaque buffer.

> +	struct vfio_info_cap_header *hdr;
> +	struct iommu_iova_range *ranges = NULL;
> +
> +	buf = vfio_iommu_info_buf(device);

nit: How about naming this vfio_iommu_get_info() since it actually
fetches the info from VFIO? (It doesn't just allocate a buffer.)

> +	VFIO_ASSERT_NOT_NULL(buf);

This assert is unnecessary.

> +
> +	hdr = vfio_iommu_info_cap_hdr(buf, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
> +	if (!hdr)
> +		goto free_buf;

Is this to account for running on old versions of VFIO? Or are there
some scenarios when VFIO can't report the list of IOVA ranges?

> +
> +	cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
> +	if (!cap_range->nr_iovas)
> +		goto free_buf;
> +
> +	ranges = malloc(cap_range->nr_iovas * sizeof(*ranges));
> +	VFIO_ASSERT_NOT_NULL(ranges);
> +
> +	for (u32 i = 0; i < cap_range->nr_iovas; i++) {
> +		ranges[i] = (struct iommu_iova_range){
> +			.start = cap_range->iova_ranges[i].start,
> +			.last = cap_range->iova_ranges[i].end,
> +		};
> +	}
> +
> +	*nranges = cap_range->nr_iovas;
> +
> +free_buf:
> +	free(buf);
> +	return ranges;
> +}
> +
> +/* Return iova ranges of the device's IOAS. Free with free() */
> +struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
> +					     size_t *nranges)
> +{
> +	struct iommu_iova_range *ranges;
> +	int ret;
> +
> +	struct iommu_ioas_iova_ranges query = {
> +		.size = sizeof(query),
> +		.ioas_id = device->ioas_id,
> +	};
> +
> +	ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> +	VFIO_ASSERT_EQ(ret, -1);
> +	VFIO_ASSERT_EQ(errno, EMSGSIZE);
> +	VFIO_ASSERT_GT(query.num_iovas, 0);
> +
> +	ranges = malloc(query.num_iovas * sizeof(*ranges));
> +	VFIO_ASSERT_NOT_NULL(ranges);
> +
> +	query.allowed_iovas = (uintptr_t)ranges;
> +
> +	ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> +	*nranges = query.num_iovas;
> +
> +	return ranges;
> +}
> +
> +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
> +					      size_t *nranges)

nit: Both iommufd and VFIO represent the number of IOVA ranges as a u32.
Perhaps we should do the same in VFIO selftests?
Re: [PATCH 1/4] vfio: selftests: add iova range query helpers
Posted by Alex Mastro 3 months ago
On Mon, Nov 10, 2025 at 10:03:54PM +0000, David Matlack wrote:
> On 2025-11-10 01:10 PM, Alex Mastro wrote:
> > +/*
> > + * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
> > + * report iommufd's iommu_iova_range. Free with free().
> > + */
> > +static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
> > +						       size_t *nranges)
> > +{
> > +	struct vfio_iommu_type1_info_cap_iova_range *cap_range;
> > +	struct vfio_iommu_type1_info *buf;
> 
> nit: Maybe name this variable `info` here and in vfio_iommu_info_buf()
> and vfio_iommu_info_cap_hdr()? It is not an opaque buffer.
> 
> > +	struct vfio_info_cap_header *hdr;
> > +	struct iommu_iova_range *ranges = NULL;
> > +
> > +	buf = vfio_iommu_info_buf(device);
> 
> nit: How about naming this vfio_iommu_get_info() since it actually
> fetches the info from VFIO? (It doesn't just allocate a buffer.)
> 
> > +	VFIO_ASSERT_NOT_NULL(buf);
> 
> This assert is unnecessary.
> 
> > +
> > +	hdr = vfio_iommu_info_cap_hdr(buf, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
> > +	if (!hdr)
> > +		goto free_buf;
> 
> Is this to account for running on old versions of VFIO? Or are there
> some scenarios when VFIO can't report the list of IOVA ranges?

I wanted to avoid being overly assertive in this low-level helper function,
mostly out of ignorance about where/in which system states this capability may
not be reported.

> > +
> > +	cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
> > +	if (!cap_range->nr_iovas)
> > +		goto free_buf;
> > +
> > +	ranges = malloc(cap_range->nr_iovas * sizeof(*ranges));
> > +	VFIO_ASSERT_NOT_NULL(ranges);
> > +
> > +	for (u32 i = 0; i < cap_range->nr_iovas; i++) {
> > +		ranges[i] = (struct iommu_iova_range){
> > +			.start = cap_range->iova_ranges[i].start,
> > +			.last = cap_range->iova_ranges[i].end,
> > +		};
> > +	}
> > +
> > +	*nranges = cap_range->nr_iovas;
> > +
> > +free_buf:
> > +	free(buf);
> > +	return ranges;
> > +}
> > +
> > +/* Return iova ranges of the device's IOAS. Free with free() */
> > +struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
> > +					     size_t *nranges)
> > +{
> > +	struct iommu_iova_range *ranges;
> > +	int ret;
> > +
> > +	struct iommu_ioas_iova_ranges query = {
> > +		.size = sizeof(query),
> > +		.ioas_id = device->ioas_id,
> > +	};
> > +
> > +	ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> > +	VFIO_ASSERT_EQ(ret, -1);
> > +	VFIO_ASSERT_EQ(errno, EMSGSIZE);
> > +	VFIO_ASSERT_GT(query.num_iovas, 0);
> > +
> > +	ranges = malloc(query.num_iovas * sizeof(*ranges));
> > +	VFIO_ASSERT_NOT_NULL(ranges);
> > +
> > +	query.allowed_iovas = (uintptr_t)ranges;
> > +
> > +	ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> > +	*nranges = query.num_iovas;
> > +
> > +	return ranges;
> > +}
> > +
> > +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
> > +					      size_t *nranges)
> 
> nit: Both iommufd and VFIO represent the number of IOVA ranges as a u32.
> Perhaps we should do the same in VFIO selftests?

Thanks David. All suggestions SGTM -- will roll into v2.
Re: [PATCH 1/4] vfio: selftests: add iova range query helpers
Posted by David Matlack 3 months ago
On 2025-11-10 02:32 PM, Alex Mastro wrote:
> On Mon, Nov 10, 2025 at 10:03:54PM +0000, David Matlack wrote:
> > On 2025-11-10 01:10 PM, Alex Mastro wrote:
> > > +
> > > +	hdr = vfio_iommu_info_cap_hdr(buf, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
> > > +	if (!hdr)
> > > +		goto free_buf;
> > 
> > Is this to account for running on old versions of VFIO? Or are there
> > some scenarios when VFIO can't report the list of IOVA ranges?
> 
> I wanted to avoid being overly assertive in this low-level helper function,
> mostly out of ignorance about where/in which system states this capability may
> not be reported.

Makes sense, but IIUC a failure here will eventually turn into an
assertion failure in all callers that exist today. So there's currently
no reason to plumb it up the stack.

For situations like this, I think we should err on asserting at the
lower level helpers, and only propagating errors up as needed. That
keeps all the happy-path callers simple, and those should be the
majority of callers (if not all callers).
Re: [PATCH 1/4] vfio: selftests: add iova range query helpers
Posted by Alex Mastro 3 months ago
On Mon, Nov 10, 2025 at 11:02:32PM +0000, David Matlack wrote:
> On 2025-11-10 02:32 PM, Alex Mastro wrote:
> > On Mon, Nov 10, 2025 at 10:03:54PM +0000, David Matlack wrote:
> > > On 2025-11-10 01:10 PM, Alex Mastro wrote:
> > > > +
> > > > +	hdr = vfio_iommu_info_cap_hdr(buf, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
> > > > +	if (!hdr)
> > > > +		goto free_buf;
> > > 
> > > Is this to account for running on old versions of VFIO? Or are there
> > > some scenarios when VFIO can't report the list of IOVA ranges?
> > 
> > I wanted to avoid being overly assertive in this low-level helper function,
> > mostly out of ignorance about where/in which system states this capability may
> > not be reported.
> 
> Makes sense, but IIUC a failure here will eventually turn into an
> assertion failure in all callers that exist today. So there's currently
> no reason to plumb it up the stack.

Yes, the first part is true.

> 
> For situations like this, I think we should err on asserting at the
> lower level helpers, and only propagating errors up as needed. That
> keeps all the happy-path callers simple, and those should be the
> majority of callers (if not all callers).

SGTM -- I will do this.
Re: [PATCH 1/4] vfio: selftests: add iova range query helpers
Posted by Alex Williamson 3 months ago
On Mon, 10 Nov 2025 13:10:41 -0800
Alex Mastro <amastro@fb.com> wrote:

> VFIO selftests need to map IOVAs from legally accessible ranges, which
> could vary between hardware. Tests in vfio_dma_mapping_test.c are making
> excessively strong assumptions about which IOVAs can be mapped.
> 
> Add vfio_iommu_iova_ranges(), which queries IOVA ranges from the
> IOMMUFD or VFIO container associated with the device. The queried ranges
> are normalized to IOMMUFD's iommu_iova_range representation so that
> handling of IOVA ranges up the stack can be implementation-agnostic.
> iommu_iova_range and vfio_iova_range are equivalent, so bias to using the
> new interface's struct.
> 
> Query IOMMUFD's ranges with IOMMU_IOAS_IOVA_RANGES.
> Query VFIO container's ranges with VFIO_IOMMU_GET_INFO and
> VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE.
> 
> The underlying vfio_iommu_type1_info buffer-related functionality has
> been kept generic so the same helpers can be used to query other
> capability chain information, if needed.
> 
> Signed-off-by: Alex Mastro <amastro@fb.com>
> ---
>  .../testing/selftests/vfio/lib/include/vfio_util.h |   8 +-
>  tools/testing/selftests/vfio/lib/vfio_pci_device.c | 161 +++++++++++++++++++++
>  2 files changed, 168 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
> index 240409bf5f8a..fb5efec52316 100644
> --- a/tools/testing/selftests/vfio/lib/include/vfio_util.h
> +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
> @@ -4,9 +4,12 @@
>  
>  #include <fcntl.h>
>  #include <string.h>
> -#include <linux/vfio.h>
> +
> +#include <uapi/linux/types.h>
> +#include <linux/iommufd.h>
>  #include <linux/list.h>
>  #include <linux/pci_regs.h>
> +#include <linux/vfio.h>
>  
>  #include "../../../kselftest.h"
>  
> @@ -206,6 +209,9 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_
>  void vfio_pci_device_cleanup(struct vfio_pci_device *device);
>  void vfio_pci_device_reset(struct vfio_pci_device *device);
>  
> +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
> +					      size_t *nranges);
> +
>  int __vfio_pci_dma_map(struct vfio_pci_device *device,
>  		       struct vfio_dma_region *region);
>  int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
> diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> index a381fd253aa7..6bedbe65f0a1 100644
> --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> @@ -29,6 +29,167 @@
>  	VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
>  } while (0)
>  
> +static struct vfio_info_cap_header *next_cap_hdr(void *buf, size_t bufsz,
> +						 size_t *cap_offset)
> +{
> +	struct vfio_info_cap_header *hdr;
> +
> +	if (!*cap_offset)
> +		return NULL;
> +
> +	/* Cap offset must be in bounds */
> +	VFIO_ASSERT_LT(*cap_offset, bufsz);
> +	/* There must be enough remaining space to contain the header */
> +	VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
> +
> +	hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
> +
> +	/* If there is a next, offset must increase by at least the header size */
> +	if (hdr->next) {
> +		VFIO_ASSERT_GT(hdr->next, *cap_offset);
> +		VFIO_ASSERT_GE(hdr->next - *cap_offset, sizeof(*hdr));
> +	}
> +
> +	*cap_offset = hdr->next;
> +
> +	return hdr;
> +}
> +
> +static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *buf,
> +							    u16 cap_id)
> +{
> +	struct vfio_info_cap_header *hdr;
> +	size_t cap_offset = buf->cap_offset;
> +
> +	if (!(buf->flags & VFIO_IOMMU_INFO_CAPS))
> +		return NULL;
> +
> +	if (cap_offset)
> +		VFIO_ASSERT_GE(cap_offset, sizeof(struct vfio_iommu_type1_info));
> +
> +	while ((hdr = next_cap_hdr(buf, buf->argsz, &cap_offset))) {
> +		if (hdr->id == cap_id)
> +			return hdr;
> +	}
> +
> +	return NULL;
> +}
> +
> +/* Return buffer including capability chain, if present. Free with free() */
> +static struct vfio_iommu_type1_info *vfio_iommu_info_buf(struct vfio_pci_device *device)
> +{
> +	struct vfio_iommu_type1_info *buf;
> +
> +	buf = malloc(sizeof(*buf));
> +	VFIO_ASSERT_NOT_NULL(buf);
> +
> +	*buf = (struct vfio_iommu_type1_info) {
> +		.argsz = sizeof(*buf),
> +	};
> +
> +	ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
> +
> +	buf = realloc(buf, buf->argsz);
> +	VFIO_ASSERT_NOT_NULL(buf);
> +
> +	ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
> +
> +	return buf;
> +}
> +
> +/*
> + * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
> + * report iommufd's iommu_iova_range. Free with free().
> + */
> +static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
> +						       size_t *nranges)
> +{
> +	struct vfio_iommu_type1_info_cap_iova_range *cap_range;
> +	struct vfio_iommu_type1_info *buf;
> +	struct vfio_info_cap_header *hdr;
> +	struct iommu_iova_range *ranges = NULL;
> +
> +	buf = vfio_iommu_info_buf(device);
> +	VFIO_ASSERT_NOT_NULL(buf);
> +
> +	hdr = vfio_iommu_info_cap_hdr(buf, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
> +	if (!hdr)
> +		goto free_buf;
> +
> +	cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
> +	if (!cap_range->nr_iovas)
> +		goto free_buf;
> +
> +	ranges = malloc(cap_range->nr_iovas * sizeof(*ranges));


Natural calloc() use case.

> +	VFIO_ASSERT_NOT_NULL(ranges);
> +
> +	for (u32 i = 0; i < cap_range->nr_iovas; i++) {
> +		ranges[i] = (struct iommu_iova_range){
> +			.start = cap_range->iova_ranges[i].start,
> +			.last = cap_range->iova_ranges[i].end,
> +		};
> +	}
> +
> +	*nranges = cap_range->nr_iovas;
> +
> +free_buf:
> +	free(buf);
> +	return ranges;
> +}
> +
> +/* Return iova ranges of the device's IOAS. Free with free() */
> +struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
> +					     size_t *nranges)
> +{
> +	struct iommu_iova_range *ranges;
> +	int ret;
> +
> +	struct iommu_ioas_iova_ranges query = {
> +		.size = sizeof(query),
> +		.ioas_id = device->ioas_id,
> +	};
> +
> +	ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> +	VFIO_ASSERT_EQ(ret, -1);
> +	VFIO_ASSERT_EQ(errno, EMSGSIZE);
> +	VFIO_ASSERT_GT(query.num_iovas, 0);
> +
> +	ranges = malloc(query.num_iovas * sizeof(*ranges));

Same.

> +	VFIO_ASSERT_NOT_NULL(ranges);
> +
> +	query.allowed_iovas = (uintptr_t)ranges;
> +
> +	ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> +	*nranges = query.num_iovas;
> +
> +	return ranges;
> +}
> +
> +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
> +					      size_t *nranges)
> +{
> +	struct iommu_iova_range *ranges;
> +
> +	if (device->iommufd)
> +		ranges = iommufd_iova_ranges(device, nranges);
> +	else
> +		ranges = vfio_iommu_iova_ranges(device, nranges);
> +
> +	if (!ranges)
> +		return NULL;
> +
> +	/* ranges should be valid, ascending, and non-overlapping */

I don't recall that ranges are required to be in any particular order.
Thanks,

Alex

> +	VFIO_ASSERT_GT(*nranges, 0);
> +	VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
> +
> +	for (size_t i = 1; i < *nranges; i++) {
> +		VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
> +		VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
> +	}
> +
> +	return ranges;
> +}
> +
>  iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
>  {
>  	struct vfio_dma_region *region;
>
Re: [PATCH 1/4] vfio: selftests: add iova range query helpers
Posted by Alex Mastro 3 months ago
On Mon, Nov 10, 2025 at 02:31:53PM -0700, Alex Williamson wrote:
> On Mon, 10 Nov 2025 13:10:41 -0800
> Alex Mastro <amastro@fb.com> wrote:
> 
> > VFIO selftests need to map IOVAs from legally accessible ranges, which
> > could vary between hardware. Tests in vfio_dma_mapping_test.c are making
> > excessively strong assumptions about which IOVAs can be mapped.
> > 
> > Add vfio_iommu_iova_ranges(), which queries IOVA ranges from the
> > IOMMUFD or VFIO container associated with the device. The queried ranges
> > are normalized to IOMMUFD's iommu_iova_range representation so that
> > handling of IOVA ranges up the stack can be implementation-agnostic.
> > iommu_iova_range and vfio_iova_range are equivalent, so bias to using the
> > new interface's struct.
> > 
> > Query IOMMUFD's ranges with IOMMU_IOAS_IOVA_RANGES.
> > Query VFIO container's ranges with VFIO_IOMMU_GET_INFO and
> > VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE.
> > 
> > The underlying vfio_iommu_type1_info buffer-related functionality has
> > been kept generic so the same helpers can be used to query other
> > capability chain information, if needed.
> > 
> > Signed-off-by: Alex Mastro <amastro@fb.com>
> > ---
> >  .../testing/selftests/vfio/lib/include/vfio_util.h |   8 +-
> >  tools/testing/selftests/vfio/lib/vfio_pci_device.c | 161 +++++++++++++++++++++
> >  2 files changed, 168 insertions(+), 1 deletion(-)
> > 
> > diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
> > index 240409bf5f8a..fb5efec52316 100644
> > --- a/tools/testing/selftests/vfio/lib/include/vfio_util.h
> > +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
> > @@ -4,9 +4,12 @@
> >  
> >  #include <fcntl.h>
> >  #include <string.h>
> > -#include <linux/vfio.h>
> > +
> > +#include <uapi/linux/types.h>
> > +#include <linux/iommufd.h>
> >  #include <linux/list.h>
> >  #include <linux/pci_regs.h>
> > +#include <linux/vfio.h>
> >  
> >  #include "../../../kselftest.h"
> >  
> > @@ -206,6 +209,9 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_
> >  void vfio_pci_device_cleanup(struct vfio_pci_device *device);
> >  void vfio_pci_device_reset(struct vfio_pci_device *device);
> >  
> > +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
> > +					      size_t *nranges);
> > +
> >  int __vfio_pci_dma_map(struct vfio_pci_device *device,
> >  		       struct vfio_dma_region *region);
> >  int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
> > diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > index a381fd253aa7..6bedbe65f0a1 100644
> > --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> > @@ -29,6 +29,167 @@
> >  	VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
> >  } while (0)
> >  
> > +static struct vfio_info_cap_header *next_cap_hdr(void *buf, size_t bufsz,
> > +						 size_t *cap_offset)
> > +{
> > +	struct vfio_info_cap_header *hdr;
> > +
> > +	if (!*cap_offset)
> > +		return NULL;
> > +
> > +	/* Cap offset must be in bounds */
> > +	VFIO_ASSERT_LT(*cap_offset, bufsz);
> > +	/* There must be enough remaining space to contain the header */
> > +	VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
> > +
> > +	hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
> > +
> > +	/* If there is a next, offset must increase by at least the header size */
> > +	if (hdr->next) {
> > +		VFIO_ASSERT_GT(hdr->next, *cap_offset);
> > +		VFIO_ASSERT_GE(hdr->next - *cap_offset, sizeof(*hdr));
> > +	}
> > +
> > +	*cap_offset = hdr->next;
> > +
> > +	return hdr;
> > +}
> > +
> > +static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *buf,
> > +							    u16 cap_id)
> > +{
> > +	struct vfio_info_cap_header *hdr;
> > +	size_t cap_offset = buf->cap_offset;
> > +
> > +	if (!(buf->flags & VFIO_IOMMU_INFO_CAPS))
> > +		return NULL;
> > +
> > +	if (cap_offset)
> > +		VFIO_ASSERT_GE(cap_offset, sizeof(struct vfio_iommu_type1_info));
> > +
> > +	while ((hdr = next_cap_hdr(buf, buf->argsz, &cap_offset))) {
> > +		if (hdr->id == cap_id)
> > +			return hdr;
> > +	}
> > +
> > +	return NULL;
> > +}
> > +
> > +/* Return buffer including capability chain, if present. Free with free() */
> > +static struct vfio_iommu_type1_info *vfio_iommu_info_buf(struct vfio_pci_device *device)
> > +{
> > +	struct vfio_iommu_type1_info *buf;
> > +
> > +	buf = malloc(sizeof(*buf));
> > +	VFIO_ASSERT_NOT_NULL(buf);
> > +
> > +	*buf = (struct vfio_iommu_type1_info) {
> > +		.argsz = sizeof(*buf),
> > +	};
> > +
> > +	ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
> > +
> > +	buf = realloc(buf, buf->argsz);
> > +	VFIO_ASSERT_NOT_NULL(buf);
> > +
> > +	ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
> > +
> > +	return buf;
> > +}
> > +
> > +/*
> > + * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to
> > + * report iommufd's iommu_iova_range. Free with free().
> > + */
> > +static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
> > +						       size_t *nranges)
> > +{
> > +	struct vfio_iommu_type1_info_cap_iova_range *cap_range;
> > +	struct vfio_iommu_type1_info *buf;
> > +	struct vfio_info_cap_header *hdr;
> > +	struct iommu_iova_range *ranges = NULL;
> > +
> > +	buf = vfio_iommu_info_buf(device);
> > +	VFIO_ASSERT_NOT_NULL(buf);
> > +
> > +	hdr = vfio_iommu_info_cap_hdr(buf, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
> > +	if (!hdr)
> > +		goto free_buf;
> > +
> > +	cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
> > +	if (!cap_range->nr_iovas)
> > +		goto free_buf;
> > +
> > +	ranges = malloc(cap_range->nr_iovas * sizeof(*ranges));
> 
> 
> Natural calloc() use case.

Ack.

> 
> > +	VFIO_ASSERT_NOT_NULL(ranges);
> > +
> > +	for (u32 i = 0; i < cap_range->nr_iovas; i++) {
> > +		ranges[i] = (struct iommu_iova_range){
> > +			.start = cap_range->iova_ranges[i].start,
> > +			.last = cap_range->iova_ranges[i].end,
> > +		};
> > +	}
> > +
> > +	*nranges = cap_range->nr_iovas;
> > +
> > +free_buf:
> > +	free(buf);
> > +	return ranges;
> > +}
> > +
> > +/* Return iova ranges of the device's IOAS. Free with free() */
> > +struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
> > +					     size_t *nranges)
> > +{
> > +	struct iommu_iova_range *ranges;
> > +	int ret;
> > +
> > +	struct iommu_ioas_iova_ranges query = {
> > +		.size = sizeof(query),
> > +		.ioas_id = device->ioas_id,
> > +	};
> > +
> > +	ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> > +	VFIO_ASSERT_EQ(ret, -1);
> > +	VFIO_ASSERT_EQ(errno, EMSGSIZE);
> > +	VFIO_ASSERT_GT(query.num_iovas, 0);
> > +
> > +	ranges = malloc(query.num_iovas * sizeof(*ranges));
> 
> Same.

Ack.

> 
> > +	VFIO_ASSERT_NOT_NULL(ranges);
> > +
> > +	query.allowed_iovas = (uintptr_t)ranges;
> > +
> > +	ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> > +	*nranges = query.num_iovas;
> > +
> > +	return ranges;
> > +}
> > +
> > +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
> > +					      size_t *nranges)
> > +{
> > +	struct iommu_iova_range *ranges;
> > +
> > +	if (device->iommufd)
> > +		ranges = iommufd_iova_ranges(device, nranges);
> > +	else
> > +		ranges = vfio_iommu_iova_ranges(device, nranges);
> > +
> > +	if (!ranges)
> > +		return NULL;
> > +
> > +	/* ranges should be valid, ascending, and non-overlapping */
> 
> I don't recall that ranges are required to be in any particular order.

Yes, this is assuming more than the UAPI guarantees. I'll update this to
sort what the kernel vends so that we can preserve the sanity checks.

> Thanks,
> 
> Alex
> 
> > +	VFIO_ASSERT_GT(*nranges, 0);
> > +	VFIO_ASSERT_LT(ranges[0].start, ranges[0].last);
> > +
> > +	for (size_t i = 1; i < *nranges; i++) {
> > +		VFIO_ASSERT_LT(ranges[i].start, ranges[i].last);
> > +		VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start);
> > +	}
> > +
> > +	return ranges;
> > +}
> > +
> >  iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
> >  {
> >  	struct vfio_dma_region *region;
> > 
>