In case we detect the address space is translated by
a virtual IOMMU which requires HW nested paging to
integrate with VFIO, let's set up the container with
the VFIO_TYPE1_NESTING_IOMMU iommu_type.
Signed-off-by: Eric Auger <eric.auger@redhat.com>
---
v4 -> v5:
- fail immediatly if nested is wanted but not supported
v2 -> v3:
- add "nested only is selected if requested by @force_nested"
comment in this patch
---
hw/vfio/common.c | 36 ++++++++++++++++++++++++++++--------
1 file changed, 28 insertions(+), 8 deletions(-)
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 0b3593b3c0..ac417b5dbd 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1155,27 +1155,38 @@ static void vfio_put_address_space(VFIOAddressSpace *space)
* vfio_get_iommu_type - selects the richest iommu_type (v2 first)
*/
static int vfio_get_iommu_type(VFIOContainer *container,
+ bool want_nested,
Error **errp)
{
- int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
+ int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU,
+ VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
- int i;
+ int i, ret = -EINVAL;
for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
- return iommu_types[i];
+ if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested) {
+ continue;
+ }
+ ret = iommu_types[i];
+ break;
}
}
- error_setg(errp, "No available IOMMU models");
- return -EINVAL;
+ if (ret < 0) {
+ error_setg(errp, "No available IOMMU models");
+ } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) {
+ error_setg(errp, "Nested mode requested but not supported");
+ ret = -EINVAL;
+ }
+ return ret;
}
static int vfio_init_container(VFIOContainer *container, int group_fd,
- Error **errp)
+ bool want_nested, Error **errp)
{
int iommu_type, ret;
- iommu_type = vfio_get_iommu_type(container, errp);
+ iommu_type = vfio_get_iommu_type(container, want_nested, errp);
if (iommu_type < 0) {
return iommu_type;
}
@@ -1211,6 +1222,14 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
VFIOContainer *container;
int ret, fd;
VFIOAddressSpace *space;
+ IOMMUMemoryRegion *iommu_mr;
+ bool nested = false;
+
+ if (as != &address_space_memory && memory_region_is_iommu(as->root)) {
+ iommu_mr = IOMMU_MEMORY_REGION(as->root);
+ memory_region_iommu_get_attr(iommu_mr, IOMMU_ATTR_VFIO_NESTED,
+ (void *)&nested);
+ }
space = vfio_get_address_space(as);
@@ -1272,12 +1291,13 @@ static int vfio_connect_container(VFIOGroup *group, AddressSpace *as,
QLIST_INIT(&container->giommu_list);
QLIST_INIT(&container->hostwin_list);
- ret = vfio_init_container(container, group->fd, errp);
+ ret = vfio_init_container(container, group->fd, nested, errp);
if (ret) {
goto free_container_exit;
}
switch (container->iommu_type) {
+ case VFIO_TYPE1_NESTING_IOMMU:
case VFIO_TYPE1v2_IOMMU:
case VFIO_TYPE1_IOMMU:
{
--
2.20.1
Hi Eric,
> From: Eric Auger <eric.auger@redhat.com>
> Sent: Saturday, March 21, 2020 12:58 AM
> To: eric.auger.pro@gmail.com; eric.auger@redhat.com; qemu-devel@nongnu.org;
> Subject: [RFC v6 09/24] vfio: Force nested if iommu requires it
>
> In case we detect the address space is translated by
> a virtual IOMMU which requires HW nested paging to
> integrate with VFIO, let's set up the container with
> the VFIO_TYPE1_NESTING_IOMMU iommu_type.
>
> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>
> ---
>
> v4 -> v5:
> - fail immediatly if nested is wanted but not supported
>
> v2 -> v3:
> - add "nested only is selected if requested by @force_nested"
> comment in this patch
> ---
> hw/vfio/common.c | 36 ++++++++++++++++++++++++++++--------
> 1 file changed, 28 insertions(+), 8 deletions(-)
>
> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> index 0b3593b3c0..ac417b5dbd 100644
> --- a/hw/vfio/common.c
> +++ b/hw/vfio/common.c
> @@ -1155,27 +1155,38 @@ static void vfio_put_address_space(VFIOAddressSpace
> *space)
> * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
> */
> static int vfio_get_iommu_type(VFIOContainer *container,
> + bool want_nested,
> Error **errp)
> {
> - int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
> + int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU,
> + VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
> VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
> - int i;
> + int i, ret = -EINVAL;
>
> for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
> if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
> - return iommu_types[i];
> + if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested) {
> + continue;
> + }
> + ret = iommu_types[i];
> + break;
> }
> }
> - error_setg(errp, "No available IOMMU models");
> - return -EINVAL;
> + if (ret < 0) {
> + error_setg(errp, "No available IOMMU models");
> + } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) {
> + error_setg(errp, "Nested mode requested but not supported");
> + ret = -EINVAL;
> + }
> + return ret;
> }
>
> static int vfio_init_container(VFIOContainer *container, int group_fd,
> - Error **errp)
> + bool want_nested, Error **errp)
> {
> int iommu_type, ret;
>
> - iommu_type = vfio_get_iommu_type(container, errp);
> + iommu_type = vfio_get_iommu_type(container, want_nested, errp);
> if (iommu_type < 0) {
> return iommu_type;
> }
> @@ -1211,6 +1222,14 @@ static int vfio_connect_container(VFIOGroup *group,
> AddressSpace *as,
> VFIOContainer *container;
> int ret, fd;
> VFIOAddressSpace *space;
> + IOMMUMemoryRegion *iommu_mr;
> + bool nested = false;
> +
> + if (as != &address_space_memory && memory_region_is_iommu(as->root)) {
I tried on my side. For virtual VT-d, it doesn't work as in intel_iommu,
we have a dynamic switch mechanism. Thus that, the
memory_region_is_iommu(as->root) won't return true as expected. I'm afraid
it doesn't work for virtual VT-d. So firstly, I'm wondering if
as != &address_space_memory is enough. Secondly, I'm considering if it is
good to let vfio_get_group() caller to provide a hint whether vIOMMU is
exposed. e.g. vfio_realize() in vfio/pci.c could figure out whether vIOMMU
is set easily. Thoughts?
Regards,
Yi Liu
Yi,
On 3/31/20 8:34 AM, Liu, Yi L wrote:
> Hi Eric,
>
>> From: Eric Auger <eric.auger@redhat.com>
>> Sent: Saturday, March 21, 2020 12:58 AM
>> To: eric.auger.pro@gmail.com; eric.auger@redhat.com; qemu-devel@nongnu.org;
>> Subject: [RFC v6 09/24] vfio: Force nested if iommu requires it
>>
>> In case we detect the address space is translated by
>> a virtual IOMMU which requires HW nested paging to
>> integrate with VFIO, let's set up the container with
>> the VFIO_TYPE1_NESTING_IOMMU iommu_type.
>>
>> Signed-off-by: Eric Auger <eric.auger@redhat.com>
>>
>> ---
>>
>> v4 -> v5:
>> - fail immediatly if nested is wanted but not supported
>>
>> v2 -> v3:
>> - add "nested only is selected if requested by @force_nested"
>> comment in this patch
>> ---
>> hw/vfio/common.c | 36 ++++++++++++++++++++++++++++--------
>> 1 file changed, 28 insertions(+), 8 deletions(-)
>>
>> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
>> index 0b3593b3c0..ac417b5dbd 100644
>> --- a/hw/vfio/common.c
>> +++ b/hw/vfio/common.c
>> @@ -1155,27 +1155,38 @@ static void vfio_put_address_space(VFIOAddressSpace
>> *space)
>> * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
>> */
>> static int vfio_get_iommu_type(VFIOContainer *container,
>> + bool want_nested,
>> Error **errp)
>> {
>> - int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
>> + int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU,
>> + VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
>> VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
>> - int i;
>> + int i, ret = -EINVAL;
>>
>> for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
>> if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
>> - return iommu_types[i];
>> + if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested) {
>> + continue;
>> + }
>> + ret = iommu_types[i];
>> + break;
>> }
>> }
>> - error_setg(errp, "No available IOMMU models");
>> - return -EINVAL;
>> + if (ret < 0) {
>> + error_setg(errp, "No available IOMMU models");
>> + } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) {
>> + error_setg(errp, "Nested mode requested but not supported");
>> + ret = -EINVAL;
>> + }
>> + return ret;
>> }
>>
>> static int vfio_init_container(VFIOContainer *container, int group_fd,
>> - Error **errp)
>> + bool want_nested, Error **errp)
>> {
>> int iommu_type, ret;
>>
>> - iommu_type = vfio_get_iommu_type(container, errp);
>> + iommu_type = vfio_get_iommu_type(container, want_nested, errp);
>> if (iommu_type < 0) {
>> return iommu_type;
>> }
>> @@ -1211,6 +1222,14 @@ static int vfio_connect_container(VFIOGroup *group,
>> AddressSpace *as,
>> VFIOContainer *container;
>> int ret, fd;
>> VFIOAddressSpace *space;
>> + IOMMUMemoryRegion *iommu_mr;
>> + bool nested = false;
>> +
>> + if (as != &address_space_memory && memory_region_is_iommu(as->root)) {
>
> I tried on my side. For virtual VT-d, it doesn't work as in intel_iommu,
> we have a dynamic switch mechanism. Thus that, the
> memory_region_is_iommu(as->root) won't return true as expected. I'm afraid
> it doesn't work for virtual VT-d. So firstly, I'm wondering if
> as != &address_space_memory is enough.
(as != &address_space_memory) should be sufficient to tell that a vIOMMU
is being used. But then, for example, you don't want to set nested
paging for the virtio-iommu because virtio-iommu/VFIO uses notify-on-my
(CM similar implementation). That's why I devised an attribute to
retrieve the vIOMMU need for nested.
Secondly, I'm considering if it is
> good to let vfio_get_group() caller to provide a hint whether vIOMMU is
> exposed. e.g. vfio_realize() in vfio/pci.c could figure out whether vIOMMU
> is set easily. Thoughts?
Sorry I don't get your point here. Why is it easier to figure out
whether vIOMMU is set in vfio_realize()?
pci_device_iommu_address_space(pdev) != &address_space_memory
does determine whether a vIOMMU is in place, no?
Thanks
Eric
>
> Regards,
> Yi Liu
>
> From: Auger Eric <eric.auger@redhat.com>
> Sent: Tuesday, March 31, 2020 4:05 PM
> To: Liu, Yi L <yi.l.liu@intel.com>; eric.auger.pro@gmail.com; qemu-
> Subject: Re: [RFC v6 09/24] vfio: Force nested if iommu requires it
>
> Yi,
>
> On 3/31/20 8:34 AM, Liu, Yi L wrote:
> > Hi Eric,
> >
> >> From: Eric Auger <eric.auger@redhat.com>
> >> Sent: Saturday, March 21, 2020 12:58 AM
> >> To: eric.auger.pro@gmail.com; eric.auger@redhat.com; qemu-
> devel@nongnu.org;
> >> Subject: [RFC v6 09/24] vfio: Force nested if iommu requires it
> >>
> >> In case we detect the address space is translated by
> >> a virtual IOMMU which requires HW nested paging to
> >> integrate with VFIO, let's set up the container with
> >> the VFIO_TYPE1_NESTING_IOMMU iommu_type.
> >>
> >> Signed-off-by: Eric Auger <eric.auger@redhat.com>
> >>
> >> ---
> >>
> >> v4 -> v5:
> >> - fail immediatly if nested is wanted but not supported
> >>
> >> v2 -> v3:
> >> - add "nested only is selected if requested by @force_nested"
> >> comment in this patch
> >> ---
> >> hw/vfio/common.c | 36 ++++++++++++++++++++++++++++--------
> >> 1 file changed, 28 insertions(+), 8 deletions(-)
> >>
> >> diff --git a/hw/vfio/common.c b/hw/vfio/common.c
> >> index 0b3593b3c0..ac417b5dbd 100644
> >> --- a/hw/vfio/common.c
> >> +++ b/hw/vfio/common.c
> >> @@ -1155,27 +1155,38 @@ static void
> vfio_put_address_space(VFIOAddressSpace
> >> *space)
> >> * vfio_get_iommu_type - selects the richest iommu_type (v2 first)
> >> */
> >> static int vfio_get_iommu_type(VFIOContainer *container,
> >> + bool want_nested,
> >> Error **errp)
> >> {
> >> - int iommu_types[] = { VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
> >> + int iommu_types[] = { VFIO_TYPE1_NESTING_IOMMU,
> >> + VFIO_TYPE1v2_IOMMU, VFIO_TYPE1_IOMMU,
> >> VFIO_SPAPR_TCE_v2_IOMMU, VFIO_SPAPR_TCE_IOMMU };
> >> - int i;
> >> + int i, ret = -EINVAL;
> >>
> >> for (i = 0; i < ARRAY_SIZE(iommu_types); i++) {
> >> if (ioctl(container->fd, VFIO_CHECK_EXTENSION, iommu_types[i])) {
> >> - return iommu_types[i];
> >> + if (iommu_types[i] == VFIO_TYPE1_NESTING_IOMMU && !want_nested)
> {
> >> + continue;
> >> + }
> >> + ret = iommu_types[i];
> >> + break;
> >> }
> >> }
> >> - error_setg(errp, "No available IOMMU models");
> >> - return -EINVAL;
> >> + if (ret < 0) {
> >> + error_setg(errp, "No available IOMMU models");
> >> + } else if (want_nested && ret != VFIO_TYPE1_NESTING_IOMMU) {
> >> + error_setg(errp, "Nested mode requested but not supported");
> >> + ret = -EINVAL;
> >> + }
> >> + return ret;
> >> }
> >>
> >> static int vfio_init_container(VFIOContainer *container, int group_fd,
> >> - Error **errp)
> >> + bool want_nested, Error **errp)
> >> {
> >> int iommu_type, ret;
> >>
> >> - iommu_type = vfio_get_iommu_type(container, errp);
> >> + iommu_type = vfio_get_iommu_type(container, want_nested, errp);
> >> if (iommu_type < 0) {
> >> return iommu_type;
> >> }
> >> @@ -1211,6 +1222,14 @@ static int vfio_connect_container(VFIOGroup *group,
> >> AddressSpace *as,
> >> VFIOContainer *container;
> >> int ret, fd;
> >> VFIOAddressSpace *space;
> >> + IOMMUMemoryRegion *iommu_mr;
> >> + bool nested = false;
> >> +
> >> + if (as != &address_space_memory && memory_region_is_iommu(as->root))
> {
> >
> > I tried on my side. For virtual VT-d, it doesn't work as in intel_iommu,
> > we have a dynamic switch mechanism. Thus that, the
> > memory_region_is_iommu(as->root) won't return true as expected. I'm afraid
> > it doesn't work for virtual VT-d. So firstly, I'm wondering if
> > as != &address_space_memory is enough.
>
> (as != &address_space_memory) should be sufficient to tell that a vIOMMU
> is being used. But then, for example, you don't want to set nested
> paging for the virtio-iommu because virtio-iommu/VFIO uses notify-on-my
> (CM similar implementation). That's why I devised an attribute to
> retrieve the vIOMMU need for nested.
>
> Secondly, I'm considering if it is
> > good to let vfio_get_group() caller to provide a hint whether vIOMMU is
> > exposed. e.g. vfio_realize() in vfio/pci.c could figure out whether vIOMMU
> > is set easily. Thoughts?
> Sorry I don't get your point here. Why is it easier to figure out
> whether vIOMMU is set in vfio_realize()?
>
> pci_device_iommu_address_space(pdev) != &address_space_memory
> does determine whether a vIOMMU is in place, no?
>
No it's not just pci_device_iommu_address_space(pdev) != &address_space_memory,
I agree with your above comment, it's not enough to tell whether nesting is
needed or not. I'd like to add an API like pci_device_iommu_nesting_required(),
so that it can be determined. In the meanwhile, adding a query callback in
PCIIOMMUOps introduced in below pathc. Guess it works?
[v2,05/22] hw/pci: modify pci_setup_iommu() to set PCIIOMMUOps
https://patchwork.kernel.org/patch/11464577/
Regards,
Yi Liu
© 2016 - 2026 Red Hat, Inc.