.../testing/selftests/vfio/vfio_dma_mapping_test.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-)
Skip vfio_dma_map_limit_test.{unmap_range,unmap_all} (instead of
failing) on systems that do not support mapping in the page-sized region
at the top of the u64 address space. Use -EINVAL as the signal for
detecting systems with this limitation, as that is what both VFIO Type1
and iommufd return.
A more robust solution that could be considered in the future would be
to explicitly check the range of supported IOVA regions and key off
that, instead of inferring from -EINVAL.
Fixes: de8d1f2fd5a5 ("vfio: selftests: add end of address space DMA map/unmap tests")
Signed-off-by: David Matlack <dmatlack@google.com>
---
.../testing/selftests/vfio/vfio_dma_mapping_test.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
index 4f1ea79a200c..52b49cae58fe 100644
--- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -249,7 +249,12 @@ TEST_F(vfio_dma_map_limit_test, unmap_range)
u64 unmapped;
int rc;
- vfio_pci_dma_map(self->device, region);
+ rc = __vfio_pci_dma_map(self->device, region);
+ if (rc == -EINVAL)
+ SKIP(return, "Unable to map at iova 0x%lx\n", region->iova);
+ else
+ ASSERT_EQ(rc, 0);
+
ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
rc = __vfio_pci_dma_unmap(self->device, region, &unmapped);
@@ -263,7 +268,12 @@ TEST_F(vfio_dma_map_limit_test, unmap_all)
u64 unmapped;
int rc;
- vfio_pci_dma_map(self->device, region);
+ rc = __vfio_pci_dma_map(self->device, region);
+ if (rc == -EINVAL)
+ SKIP(return, "Unable to map at iova 0x%lx\n", region->iova);
+ else
+ ASSERT_EQ(rc, 0);
+
ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
rc = __vfio_pci_dma_unmap_all(self->device, &unmapped);
base-commit: a1388fcb52fcad3e0b06e2cdd0ed757a82a5be30
--
2.51.2.1041.gc1ab5b90ca-goog
On Fri, Nov 07, 2025 at 10:20:58PM +0000, David Matlack wrote:
> Skip vfio_dma_map_limit_test.{unmap_range,unmap_all} (instead of
> failing) on systems that do not support mapping in the page-sized region
> at the top of the u64 address space. Use -EINVAL as the signal for
> detecting systems with this limitation, as that is what both VFIO Type1
> and iommufd return.
>
> A more robust solution that could be considered in the future would be
> to explicitly check the range of supported IOVA regions and key off
> that, instead of inferring from -EINVAL.
>
> Fixes: de8d1f2fd5a5 ("vfio: selftests: add end of address space DMA map/unmap tests")
> Signed-off-by: David Matlack <dmatlack@google.com>
Makes sense -- thanks David. Agree about keying this off
VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE longer term.
Reviewed-by: Alex Mastro <amastro@fb.com>
On Fri, Nov 07, 2025 at 04:17:24PM -0800, Alex Mastro wrote:
> On Fri, Nov 07, 2025 at 10:20:58PM +0000, David Matlack wrote:
> > Skip vfio_dma_map_limit_test.{unmap_range,unmap_all} (instead of
> > failing) on systems that do not support mapping in the page-sized region
> > at the top of the u64 address space. Use -EINVAL as the signal for
> > detecting systems with this limitation, as that is what both VFIO Type1
> > and iommufd return.
> >
> > A more robust solution that could be considered in the future would be
> > to explicitly check the range of supported IOVA regions and key off
> > that, instead of inferring from -EINVAL.
> >
> > Fixes: de8d1f2fd5a5 ("vfio: selftests: add end of address space DMA map/unmap tests")
> > Signed-off-by: David Matlack <dmatlack@google.com>
>
> Makes sense -- thanks David. Agree about keying this off
> VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE longer term.
>
> Reviewed-by: Alex Mastro <amastro@fb.com>
Here's my attempt at adding some machinery to query iova ranges, with
normalization to iommufd's struct. I kept the vfio capability chain stuff
relatively generic so we can use it for other things in the future if needed.
I can sequence this after your fix?
diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
index 240409bf5f8a..fb5efec52316 100644
--- a/tools/testing/selftests/vfio/lib/include/vfio_util.h
+++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
@@ -4,9 +4,12 @@
#include <fcntl.h>
#include <string.h>
-#include <linux/vfio.h>
+
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
#include <linux/list.h>
#include <linux/pci_regs.h>
+#include <linux/vfio.h>
#include "../../../kselftest.h"
@@ -206,6 +209,9 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_
void vfio_pci_device_cleanup(struct vfio_pci_device *device);
void vfio_pci_device_reset(struct vfio_pci_device *device);
+struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
+ size_t *nranges);
+
int __vfio_pci_dma_map(struct vfio_pci_device *device,
struct vfio_dma_region *region);
int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
index a381fd253aa7..3297a41fdc31 100644
--- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
+++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
@@ -29,6 +29,145 @@
VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
} while (0)
+static struct vfio_info_cap_header *next_cap_hdr(void *buf, size_t bufsz,
+ size_t *cap_offset)
+{
+ struct vfio_info_cap_header *hdr;
+
+ if (!*cap_offset)
+ return NULL;
+
+ /* Cap offset must be in bounds */
+ VFIO_ASSERT_LT(*cap_offset, bufsz);
+ /* There must be enough remaining space to contain the header */
+ VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
+ hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
+ /* If there is a next, offset must monotonically increase */
+ if (hdr->next)
+ VFIO_ASSERT_GT(hdr->next, *cap_offset);
+ *cap_offset = hdr->next;
+
+ return hdr;
+}
+
+static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *buf,
+ u16 cap_id)
+{
+ struct vfio_info_cap_header *hdr;
+ size_t cap_offset = buf->cap_offset;
+
+ if (!(buf->flags & VFIO_IOMMU_INFO_CAPS))
+ return NULL;
+
+ if (cap_offset)
+ VFIO_ASSERT_GE(cap_offset, sizeof(struct vfio_iommu_type1_info));
+
+ while ((hdr = next_cap_hdr(buf, buf->argsz, &cap_offset))) {
+ if (hdr->id == cap_id)
+ return hdr;
+ }
+
+ return NULL;
+}
+
+/* Return buffer including capability chain, if present. Free with free() */
+static struct vfio_iommu_type1_info *vfio_iommu_info_buf(struct vfio_pci_device *device)
+{
+ struct vfio_iommu_type1_info *buf;
+
+ buf = malloc(sizeof(*buf));
+ VFIO_ASSERT_NOT_NULL(buf);
+
+ *buf = (struct vfio_iommu_type1_info) {
+ .argsz = sizeof(*buf),
+ };
+
+ ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
+
+ buf = realloc(buf, buf->argsz);
+ VFIO_ASSERT_NOT_NULL(buf);
+
+ ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
+
+ return buf;
+}
+
+/*
+ * Normalize vfio_iommu_type1 to report iommufd's iommu_iova_range. Free with
+ * free().
+ */
+static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
+ size_t *nranges)
+{
+ struct vfio_iommu_type1_info_cap_iova_range *cap_range;
+ struct vfio_iommu_type1_info *buf;
+ struct vfio_info_cap_header *hdr;
+ struct iommu_iova_range *ranges = NULL;
+
+ buf = vfio_iommu_info_buf(device);
+ VFIO_ASSERT_NOT_NULL(buf);
+
+ hdr = vfio_iommu_info_cap_hdr(buf, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
+ if (!hdr)
+ goto free_buf;
+
+ cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
+ if (!cap_range->nr_iovas)
+ goto free_buf;
+
+ ranges = malloc(cap_range->nr_iovas * sizeof(*ranges));
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ for (u32 i = 0; i < cap_range->nr_iovas; i++) {
+ ranges[i] = (struct iommu_iova_range){
+ .start = cap_range->iova_ranges[i].start,
+ .last = cap_range->iova_ranges[i].end,
+ };
+ }
+
+ *nranges = cap_range->nr_iovas;
+
+free_buf:
+ free(buf);
+ return ranges;
+}
+
+struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
+ size_t *nranges)
+{
+ struct iommu_iova_range *ranges;
+ int ret;
+
+ struct iommu_ioas_iova_ranges query = {
+ .size = sizeof(query),
+ .ioas_id = device->ioas_id,
+ };
+
+ ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+ VFIO_ASSERT_EQ(ret, -1);
+ VFIO_ASSERT_EQ(errno, EMSGSIZE);
+ VFIO_ASSERT_GT(query.num_iovas, 0);
+
+ ranges = malloc(query.num_iovas * sizeof(*ranges));
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ query.allowed_iovas = (uintptr_t)ranges;
+
+ ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
+ *nranges = query.num_iovas;
+
+ return ranges;
+}
+
+struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
+ size_t *nranges)
+{
+ if (device->iommufd)
+ return iommufd_iova_ranges(device, nranges);
+
+ return vfio_iommu_iova_ranges(device, nranges);
+}
+
iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
{
struct vfio_dma_region *region;
diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
index 4f1ea79a200c..78983c4c293b 100644
--- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
+++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
@@ -3,6 +3,8 @@
#include <sys/mman.h>
#include <unistd.h>
+#include <uapi/linux/types.h>
+#include <linux/iommufd.h>
#include <linux/limits.h>
#include <linux/mman.h>
#include <linux/sizes.h>
@@ -243,12 +245,31 @@ FIXTURE_TEARDOWN(vfio_dma_map_limit_test)
ASSERT_EQ(munmap(self->region.vaddr, self->mmap_size), 0);
}
+static iova_t last_legal_iova(struct vfio_pci_device *device)
+{
+ struct iommu_iova_range *ranges;
+ size_t nranges;
+ iova_t ret;
+
+ ranges = vfio_pci_iova_ranges(device, &nranges);
+ VFIO_ASSERT_NOT_NULL(ranges);
+
+ ret = ranges[nranges - 1].last;
+ free(ranges);
+
+ return ret;
+}
+
TEST_F(vfio_dma_map_limit_test, unmap_range)
{
+ iova_t last_iova = last_legal_iova(self->device);
struct vfio_dma_region *region = &self->region;
u64 unmapped;
int rc;
+ if (last_iova != ~(iova_t)0)
+ SKIP(return, "last legal iova=0x%lx\n", last_iova);
+
vfio_pci_dma_map(self->device, region);
ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
@@ -259,10 +280,14 @@ TEST_F(vfio_dma_map_limit_test, unmap_range)
TEST_F(vfio_dma_map_limit_test, unmap_all)
{
+ iova_t last_iova = last_legal_iova(self->device);
struct vfio_dma_region *region = &self->region;
u64 unmapped;
int rc;
+ if (last_iova != ~(iova_t)0)
+ SKIP(return, "last legal iova=0x%lx\n", last_iova);
+
vfio_pci_dma_map(self->device, region);
ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
On Sat, 8 Nov 2025 12:19:48 -0800
Alex Mastro <amastro@fb.com> wrote:
> On Fri, Nov 07, 2025 at 04:17:24PM -0800, Alex Mastro wrote:
> > On Fri, Nov 07, 2025 at 10:20:58PM +0000, David Matlack wrote:
> > > Skip vfio_dma_map_limit_test.{unmap_range,unmap_all} (instead of
> > > failing) on systems that do not support mapping in the page-sized region
> > > at the top of the u64 address space. Use -EINVAL as the signal for
> > > detecting systems with this limitation, as that is what both VFIO Type1
> > > and iommufd return.
> > >
> > > A more robust solution that could be considered in the future would be
> > > to explicitly check the range of supported IOVA regions and key off
> > > that, instead of inferring from -EINVAL.
> > >
> > > Fixes: de8d1f2fd5a5 ("vfio: selftests: add end of address space DMA map/unmap tests")
> > > Signed-off-by: David Matlack <dmatlack@google.com>
> >
> > Makes sense -- thanks David. Agree about keying this off
> > VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE longer term.
> >
> > Reviewed-by: Alex Mastro <amastro@fb.com>
>
> Here's my attempt at adding some machinery to query iova ranges, with
> normalization to iommufd's struct. I kept the vfio capability chain stuff
> relatively generic so we can use it for other things in the future if needed.
Seems we were both hacking on this, I hadn't seen you posted this
before sending:
https://lore.kernel.org/kvm/20251108212954.26477-1-alex@shazbot.org/T/#u
Maybe we can combine the best merits of each. Thanks,
Alex
> I can sequence this after your fix?
>
> diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h
> index 240409bf5f8a..fb5efec52316 100644
> --- a/tools/testing/selftests/vfio/lib/include/vfio_util.h
> +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h
> @@ -4,9 +4,12 @@
>
> #include <fcntl.h>
> #include <string.h>
> -#include <linux/vfio.h>
> +
> +#include <uapi/linux/types.h>
> +#include <linux/iommufd.h>
> #include <linux/list.h>
> #include <linux/pci_regs.h>
> +#include <linux/vfio.h>
>
> #include "../../../kselftest.h"
>
> @@ -206,6 +209,9 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_
> void vfio_pci_device_cleanup(struct vfio_pci_device *device);
> void vfio_pci_device_reset(struct vfio_pci_device *device);
>
> +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
> + size_t *nranges);
> +
> int __vfio_pci_dma_map(struct vfio_pci_device *device,
> struct vfio_dma_region *region);
> int __vfio_pci_dma_unmap(struct vfio_pci_device *device,
> diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> index a381fd253aa7..3297a41fdc31 100644
> --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c
> @@ -29,6 +29,145 @@
> VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \
> } while (0)
>
> +static struct vfio_info_cap_header *next_cap_hdr(void *buf, size_t bufsz,
> + size_t *cap_offset)
> +{
> + struct vfio_info_cap_header *hdr;
> +
> + if (!*cap_offset)
> + return NULL;
> +
> + /* Cap offset must be in bounds */
> + VFIO_ASSERT_LT(*cap_offset, bufsz);
> + /* There must be enough remaining space to contain the header */
> + VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr));
> + hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset);
> + /* If there is a next, offset must monotonically increase */
> + if (hdr->next)
> + VFIO_ASSERT_GT(hdr->next, *cap_offset);
> + *cap_offset = hdr->next;
> +
> + return hdr;
> +}
> +
> +static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *buf,
> + u16 cap_id)
> +{
> + struct vfio_info_cap_header *hdr;
> + size_t cap_offset = buf->cap_offset;
> +
> + if (!(buf->flags & VFIO_IOMMU_INFO_CAPS))
> + return NULL;
> +
> + if (cap_offset)
> + VFIO_ASSERT_GE(cap_offset, sizeof(struct vfio_iommu_type1_info));
> +
> + while ((hdr = next_cap_hdr(buf, buf->argsz, &cap_offset))) {
> + if (hdr->id == cap_id)
> + return hdr;
> + }
> +
> + return NULL;
> +}
> +
> +/* Return buffer including capability chain, if present. Free with free() */
> +static struct vfio_iommu_type1_info *vfio_iommu_info_buf(struct vfio_pci_device *device)
> +{
> + struct vfio_iommu_type1_info *buf;
> +
> + buf = malloc(sizeof(*buf));
> + VFIO_ASSERT_NOT_NULL(buf);
> +
> + *buf = (struct vfio_iommu_type1_info) {
> + .argsz = sizeof(*buf),
> + };
> +
> + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
> +
> + buf = realloc(buf, buf->argsz);
> + VFIO_ASSERT_NOT_NULL(buf);
> +
> + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, buf);
> +
> + return buf;
> +}
> +
> +/*
> + * Normalize vfio_iommu_type1 to report iommufd's iommu_iova_range. Free with
> + * free().
> + */
> +static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device,
> + size_t *nranges)
> +{
> + struct vfio_iommu_type1_info_cap_iova_range *cap_range;
> + struct vfio_iommu_type1_info *buf;
> + struct vfio_info_cap_header *hdr;
> + struct iommu_iova_range *ranges = NULL;
> +
> + buf = vfio_iommu_info_buf(device);
> + VFIO_ASSERT_NOT_NULL(buf);
> +
> + hdr = vfio_iommu_info_cap_hdr(buf, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE);
> + if (!hdr)
> + goto free_buf;
> +
> + cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header);
> + if (!cap_range->nr_iovas)
> + goto free_buf;
> +
> + ranges = malloc(cap_range->nr_iovas * sizeof(*ranges));
> + VFIO_ASSERT_NOT_NULL(ranges);
> +
> + for (u32 i = 0; i < cap_range->nr_iovas; i++) {
> + ranges[i] = (struct iommu_iova_range){
> + .start = cap_range->iova_ranges[i].start,
> + .last = cap_range->iova_ranges[i].end,
> + };
> + }
> +
> + *nranges = cap_range->nr_iovas;
> +
> +free_buf:
> + free(buf);
> + return ranges;
> +}
> +
> +struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device,
> + size_t *nranges)
> +{
> + struct iommu_iova_range *ranges;
> + int ret;
> +
> + struct iommu_ioas_iova_ranges query = {
> + .size = sizeof(query),
> + .ioas_id = device->ioas_id,
> + };
> +
> + ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> + VFIO_ASSERT_EQ(ret, -1);
> + VFIO_ASSERT_EQ(errno, EMSGSIZE);
> + VFIO_ASSERT_GT(query.num_iovas, 0);
> +
> + ranges = malloc(query.num_iovas * sizeof(*ranges));
> + VFIO_ASSERT_NOT_NULL(ranges);
> +
> + query.allowed_iovas = (uintptr_t)ranges;
> +
> + ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query);
> + *nranges = query.num_iovas;
> +
> + return ranges;
> +}
> +
> +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device,
> + size_t *nranges)
> +{
> + if (device->iommufd)
> + return iommufd_iova_ranges(device, nranges);
> +
> + return vfio_iommu_iova_ranges(device, nranges);
> +}
> +
> iova_t __to_iova(struct vfio_pci_device *device, void *vaddr)
> {
> struct vfio_dma_region *region;
> diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
> index 4f1ea79a200c..78983c4c293b 100644
> --- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
> +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c
> @@ -3,6 +3,8 @@
> #include <sys/mman.h>
> #include <unistd.h>
>
> +#include <uapi/linux/types.h>
> +#include <linux/iommufd.h>
> #include <linux/limits.h>
> #include <linux/mman.h>
> #include <linux/sizes.h>
> @@ -243,12 +245,31 @@ FIXTURE_TEARDOWN(vfio_dma_map_limit_test)
> ASSERT_EQ(munmap(self->region.vaddr, self->mmap_size), 0);
> }
>
> +static iova_t last_legal_iova(struct vfio_pci_device *device)
> +{
> + struct iommu_iova_range *ranges;
> + size_t nranges;
> + iova_t ret;
> +
> + ranges = vfio_pci_iova_ranges(device, &nranges);
> + VFIO_ASSERT_NOT_NULL(ranges);
> +
> + ret = ranges[nranges - 1].last;
> + free(ranges);
> +
> + return ret;
> +}
> +
> TEST_F(vfio_dma_map_limit_test, unmap_range)
> {
> + iova_t last_iova = last_legal_iova(self->device);
> struct vfio_dma_region *region = &self->region;
> u64 unmapped;
> int rc;
>
> + if (last_iova != ~(iova_t)0)
> + SKIP(return, "last legal iova=0x%lx\n", last_iova);
> +
> vfio_pci_dma_map(self->device, region);
> ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
>
> @@ -259,10 +280,14 @@ TEST_F(vfio_dma_map_limit_test, unmap_range)
>
> TEST_F(vfio_dma_map_limit_test, unmap_all)
> {
> + iova_t last_iova = last_legal_iova(self->device);
> struct vfio_dma_region *region = &self->region;
> u64 unmapped;
> int rc;
>
> + if (last_iova != ~(iova_t)0)
> + SKIP(return, "last legal iova=0x%lx\n", last_iova);
> +
> vfio_pci_dma_map(self->device, region);
> ASSERT_EQ(region->iova, to_iova(self->device, region->vaddr));
>
>
On Sat, Nov 08, 2025 at 02:37:10PM -0700, Alex Williamson wrote: > On Sat, 8 Nov 2025 12:19:48 -0800 > Alex Mastro <amastro@fb.com> wrote: > > Here's my attempt at adding some machinery to query iova ranges, with > > normalization to iommufd's struct. I kept the vfio capability chain stuff > > relatively generic so we can use it for other things in the future if needed. > > Seems we were both hacking on this, I hadn't seen you posted this > before sending: > > https://lore.kernel.org/kvm/20251108212954.26477-1-alex@shazbot.org/T/#u > > Maybe we can combine the best merits of each. Thanks, Yes! I have been thinking along the following lines - Your idea to change the end of address space test to allocate at the end of the supported range is better and more general than my idea of skipping the test if ~(iova_t)0 is out of bounds. We should do that. - Introducing the concept iova allocator makes sense. - I think it's worthwhile to keep common test concepts like vfio_pci_device less opinionated/stateful so as not to close the door on certain categories of testing in the future. For example, if we ever wanted to test IOVA range contraction after binding additional devices to an IOAS or vfio container. - What do you think about making the concept of an IOVA allocator something standalone for which tests that need it can create one? I think it would compose pretty cleanly on top of my vfio_pci_iova_ranges(). Alex
On Sat, 8 Nov 2025 17:20:10 -0800 Alex Mastro <amastro@fb.com> wrote: > On Sat, Nov 08, 2025 at 02:37:10PM -0700, Alex Williamson wrote: > > On Sat, 8 Nov 2025 12:19:48 -0800 > > Alex Mastro <amastro@fb.com> wrote: > > > Here's my attempt at adding some machinery to query iova ranges, with > > > normalization to iommufd's struct. I kept the vfio capability chain stuff > > > relatively generic so we can use it for other things in the future if needed. > > > > Seems we were both hacking on this, I hadn't seen you posted this > > before sending: > > > > https://lore.kernel.org/kvm/20251108212954.26477-1-alex@shazbot.org/T/#u > > > > Maybe we can combine the best merits of each. Thanks, > > Yes! I have been thinking along the following lines > - Your idea to change the end of address space test to allocate at the end of > the supported range is better and more general than my idea of skipping the > test if ~(iova_t)0 is out of bounds. We should do that. > - Introducing the concept iova allocator makes sense. > - I think it's worthwhile to keep common test concepts like vfio_pci_device > less opinionated/stateful so as not to close the door on certain categories of > testing in the future. For example, if we ever wanted to test IOVA range > contraction after binding additional devices to an IOAS or vfio container. Yes, fetching the IOVA ranges should really occur after all the devices are attached to the container/ioas rather than in device init. We need another layer of abstraction for the shared IOMMU state. We can probably work on that incrementally. I certainly like the idea of testing range contraction, but I don't know where we can reliably see that behavior. > - What do you think about making the concept of an IOVA allocator something > standalone for which tests that need it can create one? I think it would > compose pretty cleanly on top of my vfio_pci_iova_ranges(). Yep, that sounds good. Obviously what's there is just the simplest possible linear, aligned allocator with no attempt to fill gaps or track allocations for freeing. We're not likely to exhaust the address space in an individual unit test, I just wanted to relieve the test from the burden of coming up with a valid IOVA, while leaving some degree of geometry info for exploring the boundaries. Are you interested in generating a combined v2? TBH I'm not sure that just marking a test as skipped based on the DMA mapping return is worthwhile with a couple proposals to add IOVA range support already on the table. Thanks, Alex
On Mon, Nov 10, 2025 at 08:17:09AM -0700, Alex Williamson wrote: > On Sat, 8 Nov 2025 17:20:10 -0800 > Alex Mastro <amastro@fb.com> wrote: > > > On Sat, Nov 08, 2025 at 02:37:10PM -0700, Alex Williamson wrote: > > > On Sat, 8 Nov 2025 12:19:48 -0800 > > > Alex Mastro <amastro@fb.com> wrote: > > > > Here's my attempt at adding some machinery to query iova ranges, with > > > > normalization to iommufd's struct. I kept the vfio capability chain stuff > > > > relatively generic so we can use it for other things in the future if needed. > > > > > > Seems we were both hacking on this, I hadn't seen you posted this > > > before sending: > > > > > > https://lore.kernel.org/kvm/20251108212954.26477-1-alex@shazbot.org/T/#u > > > > > > Maybe we can combine the best merits of each. Thanks, > > > > Yes! I have been thinking along the following lines > > - Your idea to change the end of address space test to allocate at the end of > > the supported range is better and more general than my idea of skipping the > > test if ~(iova_t)0 is out of bounds. We should do that. > > - Introducing the concept iova allocator makes sense. > > - I think it's worthwhile to keep common test concepts like vfio_pci_device > > less opinionated/stateful so as not to close the door on certain categories of > > testing in the future. For example, if we ever wanted to test IOVA range > > contraction after binding additional devices to an IOAS or vfio container. > > Yes, fetching the IOVA ranges should really occur after all the devices > are attached to the container/ioas rather than in device init. We need > another layer of abstraction for the shared IOMMU state. We can > probably work on that incrementally. > > I certainly like the idea of testing range contraction, but I don't > know where we can reliably see that behavior. I'm not sure about the exact testing strategy for that yet either actually. > > - What do you think about making the concept of an IOVA allocator something > > standalone for which tests that need it can create one? I think it would > > compose pretty cleanly on top of my vfio_pci_iova_ranges(). > > Yep, that sounds good. Obviously what's there is just the simplest > possible linear, aligned allocator with no attempt to fill gaps or > track allocations for freeing. We're not likely to exhaust the address > space in an individual unit test, I just wanted to relieve the test > from the burden of coming up with a valid IOVA, while leaving some > degree of geometry info for exploring the boundaries. Keeping the simple linear allocator makes sense to me. > Are you interested in generating a combined v2? Sure -- I can put up a v2 series which stages like so - adds stateless low level iova ranges queries - adds iova allocator utility object - fixes end of ranges tests, uses iova allocator instead of iova=vaddr > TBH I'm not sure that just marking a test as skipped based on the DMA > mapping return is worthwhile with a couple proposals to add IOVA range > support already on the table. Thanks, I'll put up the new series rooted on linux-vfio/next soon.
On 2025-11-10 08:48 AM, Alex Mastro wrote: > On Mon, Nov 10, 2025 at 08:17:09AM -0700, Alex Williamson wrote: > > On Sat, 8 Nov 2025 17:20:10 -0800 > > Alex Mastro <amastro@fb.com> wrote: > > > > > On Sat, Nov 08, 2025 at 02:37:10PM -0700, Alex Williamson wrote: > > > > On Sat, 8 Nov 2025 12:19:48 -0800 > > > > Alex Mastro <amastro@fb.com> wrote: > > > > > Here's my attempt at adding some machinery to query iova ranges, with > > > > > normalization to iommufd's struct. I kept the vfio capability chain stuff > > > > > relatively generic so we can use it for other things in the future if needed. > > > > > > > > Seems we were both hacking on this, I hadn't seen you posted this > > > > before sending: > > > > > > > > https://lore.kernel.org/kvm/20251108212954.26477-1-alex@shazbot.org/T/#u > > > > > > > > Maybe we can combine the best merits of each. Thanks, > > > > > > Yes! I have been thinking along the following lines > > > - Your idea to change the end of address space test to allocate at the end of > > > the supported range is better and more general than my idea of skipping the > > > test if ~(iova_t)0 is out of bounds. We should do that. > > > - Introducing the concept iova allocator makes sense. > > > - I think it's worthwhile to keep common test concepts like vfio_pci_device > > > less opinionated/stateful so as not to close the door on certain categories of > > > testing in the future. For example, if we ever wanted to test IOVA range > > > contraction after binding additional devices to an IOAS or vfio container. > > > > Yes, fetching the IOVA ranges should really occur after all the devices > > are attached to the container/ioas rather than in device init. We need > > another layer of abstraction for the shared IOMMU state. We can > > probably work on that incrementally. I am working on pulling the iommu state out of struct vfio_pci_device here: https://lore.kernel.org/kvm/20251008232531.1152035-5-dmatlack@google.com/ But if we keep the iova allocator a separate object, then we can introduce it mosty indepently from this series. I imagine the only thing that will change is passing a struct iommu * instead of a struct vfio_pci_device * when initializing the allocator. > > > > I certainly like the idea of testing range contraction, but I don't > > know where we can reliably see that behavior. > > I'm not sure about the exact testing strategy for that yet either actually. > > > > - What do you think about making the concept of an IOVA allocator something > > > standalone for which tests that need it can create one? I think it would > > > compose pretty cleanly on top of my vfio_pci_iova_ranges(). > > > > Yep, that sounds good. Obviously what's there is just the simplest > > possible linear, aligned allocator with no attempt to fill gaps or > > track allocations for freeing. We're not likely to exhaust the address > > space in an individual unit test, I just wanted to relieve the test > > from the burden of coming up with a valid IOVA, while leaving some > > degree of geometry info for exploring the boundaries. > > Keeping the simple linear allocator makes sense to me. > > > Are you interested in generating a combined v2? > > Sure -- I can put up a v2 series which stages like so > - adds stateless low level iova ranges queries > - adds iova allocator utility object > - fixes end of ranges tests, uses iova allocator instead of iova=vaddr +1 to getting rid of iova=vaddr. But note that the HugeTLB tests in vfio_dma_mapping_test.c have alignment requirements to pass on Intel (since it validates the pages are mapped at the right level in the I/O page tables using the Intel debugfs interface). > > TBH I'm not sure that just marking a test as skipped based on the DMA > > mapping return is worthwhile with a couple proposals to add IOVA range > > support already on the table. Thanks, > > I'll put up the new series rooted on linux-vfio/next soon. I think we should try to get vfio_dma_mapping_test back to passing in time for Linux 6.18, since the newly failing test was added in 6.18. The sequence I was imagining was: 1. Merge the quick fix to skip the test into 6.18. 2. Split struct iommu from struct vfio_pci_device. 3. Add iova allocator. AlexW, how much time do we have to get AlexM's series ready? I am fine with doing (3), then (2), and dropping (1) if there's enough time.
On Mon, 10 Nov 2025 18:00:08 +0000 David Matlack <dmatlack@google.com> wrote: > On 2025-11-10 08:48 AM, Alex Mastro wrote: > > On Mon, Nov 10, 2025 at 08:17:09AM -0700, Alex Williamson wrote: > > > On Sat, 8 Nov 2025 17:20:10 -0800 > > > Alex Mastro <amastro@fb.com> wrote: > > > > > > > On Sat, Nov 08, 2025 at 02:37:10PM -0700, Alex Williamson wrote: > > > > > On Sat, 8 Nov 2025 12:19:48 -0800 > > > > > Alex Mastro <amastro@fb.com> wrote: > > > > > > Here's my attempt at adding some machinery to query iova ranges, with > > > > > > normalization to iommufd's struct. I kept the vfio capability chain stuff > > > > > > relatively generic so we can use it for other things in the future if needed. > > > > > > > > > > Seems we were both hacking on this, I hadn't seen you posted this > > > > > before sending: > > > > > > > > > > https://lore.kernel.org/kvm/20251108212954.26477-1-alex@shazbot.org/T/#u > > > > > > > > > > Maybe we can combine the best merits of each. Thanks, > > > > > > > > Yes! I have been thinking along the following lines > > > > - Your idea to change the end of address space test to allocate at the end of > > > > the supported range is better and more general than my idea of skipping the > > > > test if ~(iova_t)0 is out of bounds. We should do that. > > > > - Introducing the concept iova allocator makes sense. > > > > - I think it's worthwhile to keep common test concepts like vfio_pci_device > > > > less opinionated/stateful so as not to close the door on certain categories of > > > > testing in the future. For example, if we ever wanted to test IOVA range > > > > contraction after binding additional devices to an IOAS or vfio container. > > > > > > Yes, fetching the IOVA ranges should really occur after all the devices > > > are attached to the container/ioas rather than in device init. We need > > > another layer of abstraction for the shared IOMMU state. We can > > > probably work on that incrementally. > > I am working on pulling the iommu state out of struct vfio_pci_device > here: > > https://lore.kernel.org/kvm/20251008232531.1152035-5-dmatlack@google.com/ > > But if we keep the iova allocator a separate object, then we can > introduce it mosty indepently from this series. I imagine the only thing > that will change is passing a struct iommu * instead of a struct > vfio_pci_device * when initializing the allocator. > > > > > > > I certainly like the idea of testing range contraction, but I don't > > > know where we can reliably see that behavior. > > > > I'm not sure about the exact testing strategy for that yet either actually. > > > > > > - What do you think about making the concept of an IOVA allocator something > > > > standalone for which tests that need it can create one? I think it would > > > > compose pretty cleanly on top of my vfio_pci_iova_ranges(). > > > > > > Yep, that sounds good. Obviously what's there is just the simplest > > > possible linear, aligned allocator with no attempt to fill gaps or > > > track allocations for freeing. We're not likely to exhaust the address > > > space in an individual unit test, I just wanted to relieve the test > > > from the burden of coming up with a valid IOVA, while leaving some > > > degree of geometry info for exploring the boundaries. > > > > Keeping the simple linear allocator makes sense to me. > > > > > Are you interested in generating a combined v2? > > > > Sure -- I can put up a v2 series which stages like so > > - adds stateless low level iova ranges queries > > - adds iova allocator utility object > > - fixes end of ranges tests, uses iova allocator instead of iova=vaddr > > +1 to getting rid of iova=vaddr. > > But note that the HugeTLB tests in vfio_dma_mapping_test.c have > alignment requirements to pass on Intel (since it validates the pages > are mapped at the right level in the I/O page tables using the Intel > debugfs interface). > > > > TBH I'm not sure that just marking a test as skipped based on the DMA > > > mapping return is worthwhile with a couple proposals to add IOVA range > > > support already on the table. Thanks, > > > > I'll put up the new series rooted on linux-vfio/next soon. > > I think we should try to get vfio_dma_mapping_test back to passing in > time for Linux 6.18, since the newly failing test was added in 6.18. > > The sequence I was imagining was: > > 1. Merge the quick fix to skip the test into 6.18. We'd still have the iova=vaddr failure on some platforms, but could hack around that by hard coding some "well supporteD" IOVA like 0 or 4GB. > 2. Split struct iommu from struct vfio_pci_device. > 3. Add iova allocator. > > AlexW, how much time do we have to get AlexM's series ready? I am fine > with doing (3), then (2), and dropping (1) if there's enough time. I'll certainly agree that it'd be a much better precedent if the self test were initially working, but also we should not increase the scope beyond what we need to make it work for v6.18. If we can get that done in the next day or two, add it to linux-next mid-week, and get Linus to pull for rc6, I think that'd be reasonable. Thanks, Alex
On Mon, Nov 10, 2025 at 10:38 AM Alex Williamson <alex@shazbot.org> wrote: > > On Mon, 10 Nov 2025 18:00:08 +0000 > David Matlack <dmatlack@google.com> wrote: > > > On 2025-11-10 08:48 AM, Alex Mastro wrote: > > > On Mon, Nov 10, 2025 at 08:17:09AM -0700, Alex Williamson wrote: > > > > On Sat, 8 Nov 2025 17:20:10 -0800 > > > > Alex Mastro <amastro@fb.com> wrote: > > > > > > > > > On Sat, Nov 08, 2025 at 02:37:10PM -0700, Alex Williamson wrote: > > > > > > On Sat, 8 Nov 2025 12:19:48 -0800 > > > > > > Alex Mastro <amastro@fb.com> wrote: > > > > > > > Here's my attempt at adding some machinery to query iova ranges, with > > > > > > > normalization to iommufd's struct. I kept the vfio capability chain stuff > > > > > > > relatively generic so we can use it for other things in the future if needed. > > > > > > > > > > > > Seems we were both hacking on this, I hadn't seen you posted this > > > > > > before sending: > > > > > > > > > > > > https://lore.kernel.org/kvm/20251108212954.26477-1-alex@shazbot.org/T/#u > > > > > > > > > > > > Maybe we can combine the best merits of each. Thanks, > > > > > > > > > > Yes! I have been thinking along the following lines > > > > > - Your idea to change the end of address space test to allocate at the end of > > > > > the supported range is better and more general than my idea of skipping the > > > > > test if ~(iova_t)0 is out of bounds. We should do that. > > > > > - Introducing the concept iova allocator makes sense. > > > > > - I think it's worthwhile to keep common test concepts like vfio_pci_device > > > > > less opinionated/stateful so as not to close the door on certain categories of > > > > > testing in the future. For example, if we ever wanted to test IOVA range > > > > > contraction after binding additional devices to an IOAS or vfio container. > > > > > > > > Yes, fetching the IOVA ranges should really occur after all the devices > > > > are attached to the container/ioas rather than in device init. We need > > > > another layer of abstraction for the shared IOMMU state. We can > > > > probably work on that incrementally. > > > > I am working on pulling the iommu state out of struct vfio_pci_device > > here: > > > > https://lore.kernel.org/kvm/20251008232531.1152035-5-dmatlack@google.com/ > > > > But if we keep the iova allocator a separate object, then we can > > introduce it mosty indepently from this series. I imagine the only thing > > that will change is passing a struct iommu * instead of a struct > > vfio_pci_device * when initializing the allocator. > > > > > > > > > > I certainly like the idea of testing range contraction, but I don't > > > > know where we can reliably see that behavior. > > > > > > I'm not sure about the exact testing strategy for that yet either actually. > > > > > > > > - What do you think about making the concept of an IOVA allocator something > > > > > standalone for which tests that need it can create one? I think it would > > > > > compose pretty cleanly on top of my vfio_pci_iova_ranges(). > > > > > > > > Yep, that sounds good. Obviously what's there is just the simplest > > > > possible linear, aligned allocator with no attempt to fill gaps or > > > > track allocations for freeing. We're not likely to exhaust the address > > > > space in an individual unit test, I just wanted to relieve the test > > > > from the burden of coming up with a valid IOVA, while leaving some > > > > degree of geometry info for exploring the boundaries. > > > > > > Keeping the simple linear allocator makes sense to me. > > > > > > > Are you interested in generating a combined v2? > > > > > > Sure -- I can put up a v2 series which stages like so > > > - adds stateless low level iova ranges queries > > > - adds iova allocator utility object > > > - fixes end of ranges tests, uses iova allocator instead of iova=vaddr > > > > +1 to getting rid of iova=vaddr. > > > > But note that the HugeTLB tests in vfio_dma_mapping_test.c have > > alignment requirements to pass on Intel (since it validates the pages > > are mapped at the right level in the I/O page tables using the Intel > > debugfs interface). > > > > > > TBH I'm not sure that just marking a test as skipped based on the DMA > > > > mapping return is worthwhile with a couple proposals to add IOVA range > > > > support already on the table. Thanks, > > > > > > I'll put up the new series rooted on linux-vfio/next soon. > > > > I think we should try to get vfio_dma_mapping_test back to passing in > > time for Linux 6.18, since the newly failing test was added in 6.18. > > > > The sequence I was imagining was: > > > > 1. Merge the quick fix to skip the test into 6.18. > > We'd still have the iova=vaddr failure on some platforms, but could > hack around that by hard coding some "well supporteD" IOVA like 0 or > 4GB. Good point. We tried using IOVA=0 internally for a while but hit issues on ARM platforms that have a reserved region at [0x8000000, 0x8100000). So I think iova=4GB would be better. > > > 2. Split struct iommu from struct vfio_pci_device. > > 3. Add iova allocator. > > > > AlexW, how much time do we have to get AlexM's series ready? I am fine > > with doing (3), then (2), and dropping (1) if there's enough time. > > I'll certainly agree that it'd be a much better precedent if the self > test were initially working, but also we should not increase the scope > beyond what we need to make it work for v6.18. If we can get that done > in the next day or two, add it to linux-next mid-week, and get Linus to > pull for rc6, I think that'd be reasonable. Thanks, Ack. I'll send a small series with this patch plus a patch to replace iova=vaddr with iova=4G, and we can use that as a back-up plan if AlexM's iova allocator isn't ready in time for 6.18.
On Mon, Nov 10, 2025 at 11:45 AM David Matlack <dmatlack@google.com> wrote: > > On Mon, Nov 10, 2025 at 10:38 AM Alex Williamson <alex@shazbot.org> wrote: > > > > On Mon, 10 Nov 2025 18:00:08 +0000 > > David Matlack <dmatlack@google.com> wrote: > > > AlexW, how much time do we have to get AlexM's series ready? I am fine > > > with doing (3), then (2), and dropping (1) if there's enough time. > > > > I'll certainly agree that it'd be a much better precedent if the self > > test were initially working, but also we should not increase the scope > > beyond what we need to make it work for v6.18. If we can get that done > > in the next day or two, add it to linux-next mid-week, and get Linus to > > pull for rc6, I think that'd be reasonable. Thanks, > > Ack. I'll send a small series with this patch plus a patch to replace > iova=vaddr with iova=4G, and we can use that as a back-up plan if > AlexM's iova allocator isn't ready in time for 6.18. I think we have a good chance to get the allocator series ready in time for 6.18 (AlexM is quick :), so I'll hold off on sending a v2 of my short term fixes.
© 2016 - 2026 Red Hat, Inc.