[PATCH v3 05/10] vfio/iommufd: Probe and request hwpt dirty tracking capability

Joao Martins posted 10 patches 4 months, 2 weeks ago
Maintainers: Yi Liu <yi.l.liu@intel.com>, Eric Auger <eric.auger@redhat.com>, Zhenzhong Duan <zhenzhong.duan@intel.com>, Alex Williamson <alex.williamson@redhat.com>, "Cédric Le Goater" <clg@redhat.com>
[PATCH v3 05/10] vfio/iommufd: Probe and request hwpt dirty tracking capability
Posted by Joao Martins 4 months, 2 weeks ago
Probe hardware dirty tracking support by querying device hw capabilities via
IOMMUFD_GET_HW_INFO.

In preparation to using the dirty tracking UAPI, request dirty tracking in the
HWPT flags when the IOMMU supports dirty tracking.

The auto domain logic allows different IOMMU domains to be created when DMA
dirty tracking is not desired (and VF can provide it) while others doesn't have
it and want the IOMMU capability. This is not used in this way here given how
VFIODevice migration capability checking takes place *after* the device
attachment. But such granularity is a nice property that can be implemented
later on.

Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 include/hw/vfio/vfio-common.h |  1 +
 hw/vfio/iommufd.c             | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
index 82c5a4aaa61e..7ce925cfab19 100644
--- a/include/hw/vfio/vfio-common.h
+++ b/include/hw/vfio/vfio-common.h
@@ -97,6 +97,7 @@ typedef struct IOMMUFDBackend IOMMUFDBackend;
 
 typedef struct VFIOIOASHwpt {
     uint32_t hwpt_id;
+    uint32_t hwpt_flags;
     QLIST_HEAD(, VFIODevice) device_list;
     QLIST_ENTRY(VFIOIOASHwpt) next;
 } VFIOIOASHwpt;
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 2ca9a32cc7b6..1b5b46d28ed6 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -212,6 +212,20 @@ static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
     return true;
 }
 
+static bool iommufd_device_dirty_tracking(IOMMUFDBackend *iommufd,
+                                          VFIODevice *vbasedev)
+{
+    enum iommu_hw_info_type type;
+    uint64_t caps;
+
+    if (!iommufd_backend_get_device_info(iommufd, vbasedev->devid, &type,
+                                         NULL, 0, &caps, NULL)) {
+        return false;
+    }
+
+    return caps & IOMMU_HW_CAP_DIRTY_TRACKING;
+}
+
 static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
                                          VFIOIOMMUFDContainer *container,
                                          Error **errp)
@@ -239,6 +253,15 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
         }
     }
 
+    /*
+     * This is quite early and VFIODevice isn't yet fully initialized,
+     * thus rely on IOMMU hardware capabilities as to whether IOMMU dirty
+     * tracking is going to be needed.
+     */
+    if (iommufd_device_dirty_tracking(iommufd, vbasedev)) {
+        flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
+    }
+
     ret = iommufd_backend_alloc_hwpt(iommufd,
                                      vbasedev->devid,
                                      container->ioas_id, flags,
@@ -255,6 +278,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
 
     hwpt = g_malloc0(sizeof(*hwpt));
     hwpt->hwpt_id = hwpt_id;
+    hwpt->hwpt_flags = flags;
     QLIST_INIT(&hwpt->device_list);
 
     ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
@@ -267,6 +291,8 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
     vbasedev->hwpt = hwpt;
     QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
     QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
+    container->bcontainer.dirty_pages_supported |=
+                              (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING);
     return true;
 }
 
-- 
2.17.2
Re: [PATCH v3 05/10] vfio/iommufd: Probe and request hwpt dirty tracking capability
Posted by Cédric Le Goater 4 months, 2 weeks ago
On 7/8/24 4:34 PM, Joao Martins wrote:
> Probe hardware dirty tracking support by querying device hw capabilities via
> IOMMUFD_GET_HW_INFO.
> 
> In preparation to using the dirty tracking UAPI, request dirty tracking in the
> HWPT flags when the IOMMU supports dirty tracking.
> 
> The auto domain logic allows different IOMMU domains to be created when DMA
> dirty tracking is not desired (and VF can provide it) while others doesn't have
> it and want the IOMMU capability. This is not used in this way here given how
> VFIODevice migration capability checking takes place *after* the device
> attachment. But such granularity is a nice property that can be implemented
> later on.
> 
> Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
> ---
>   include/hw/vfio/vfio-common.h |  1 +
>   hw/vfio/iommufd.c             | 26 ++++++++++++++++++++++++++
>   2 files changed, 27 insertions(+)
> 
> diff --git a/include/hw/vfio/vfio-common.h b/include/hw/vfio/vfio-common.h
> index 82c5a4aaa61e..7ce925cfab19 100644
> --- a/include/hw/vfio/vfio-common.h
> +++ b/include/hw/vfio/vfio-common.h
> @@ -97,6 +97,7 @@ typedef struct IOMMUFDBackend IOMMUFDBackend;
>   
>   typedef struct VFIOIOASHwpt {
>       uint32_t hwpt_id;
> +    uint32_t hwpt_flags;
>       QLIST_HEAD(, VFIODevice) device_list;
>       QLIST_ENTRY(VFIOIOASHwpt) next;
>   } VFIOIOASHwpt;
> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
> index 2ca9a32cc7b6..1b5b46d28ed6 100644
> --- a/hw/vfio/iommufd.c
> +++ b/hw/vfio/iommufd.c
> @@ -212,6 +212,20 @@ static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice *vbasedev, Error **errp)
>       return true;
>   }
>   
> +static bool iommufd_device_dirty_tracking(IOMMUFDBackend *iommufd,
> +                                          VFIODevice *vbasedev)
> +{
> +    enum iommu_hw_info_type type;
> +    uint64_t caps;
> +
> +    if (!iommufd_backend_get_device_info(iommufd, vbasedev->devid, &type,
> +                                         NULL, 0, &caps, NULL)) {

I think we should report the error and not ignore it.

That said, since we are probing the hw features of the host IOMMU device,
could we use the data cached in the HostIOMMUDevice struct instead ?
This means would need to move the ->realize() call doing the probing
before attaching the device in vfio_attach_device(). That way we would
catch probing errors in one place. Does this make sense ?

Thanks,

C.




> +        return false;
> +    }
> +
> +    return caps & IOMMU_HW_CAP_DIRTY_TRACKING;
> +}
> +
>   static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>                                            VFIOIOMMUFDContainer *container,
>                                            Error **errp)
> @@ -239,6 +253,15 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>           }
>       }
>   
> +    /*
> +     * This is quite early and VFIODevice isn't yet fully initialized,
> +     * thus rely on IOMMU hardware capabilities as to whether IOMMU dirty
> +     * tracking is going to be needed.
> +     */
> +    if (iommufd_device_dirty_tracking(iommufd, vbasedev)) {
> +        flags = IOMMU_HWPT_ALLOC_DIRTY_TRACKING;
> +    }
> +
>       ret = iommufd_backend_alloc_hwpt(iommufd,
>                                        vbasedev->devid,
>                                        container->ioas_id, flags,
> @@ -255,6 +278,7 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>   
>       hwpt = g_malloc0(sizeof(*hwpt));
>       hwpt->hwpt_id = hwpt_id;
> +    hwpt->hwpt_flags = flags;
>       QLIST_INIT(&hwpt->device_list);
>   
>       ret = iommufd_cdev_attach_ioas_hwpt(vbasedev, hwpt->hwpt_id, errp);
> @@ -267,6 +291,8 @@ static bool iommufd_cdev_autodomains_get(VFIODevice *vbasedev,
>       vbasedev->hwpt = hwpt;
>       QLIST_INSERT_HEAD(&hwpt->device_list, vbasedev, hwpt_next);
>       QLIST_INSERT_HEAD(&container->hwpt_list, hwpt, next);
> +    container->bcontainer.dirty_pages_supported |=
> +                              (flags & IOMMU_HWPT_ALLOC_DIRTY_TRACKING);
>       return true;
>   }
>
Re: [PATCH v3 05/10] vfio/iommufd: Probe and request hwpt dirty tracking capability
Posted by Joao Martins 4 months, 2 weeks ago
On 09/07/2024 07:28, Cédric Le Goater wrote:
> On 7/8/24 4:34 PM, Joao Martins wrote:
>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>> index 2ca9a32cc7b6..1b5b46d28ed6 100644
>> --- a/hw/vfio/iommufd.c
>> +++ b/hw/vfio/iommufd.c
>> @@ -212,6 +212,20 @@ static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice
>> *vbasedev, Error **errp)
>>       return true;
>>   }
>>   +static bool iommufd_device_dirty_tracking(IOMMUFDBackend *iommufd,
>> +                                          VFIODevice *vbasedev)
>> +{
>> +    enum iommu_hw_info_type type;
>> +    uint64_t caps;
>> +
>> +    if (!iommufd_backend_get_device_info(iommufd, vbasedev->devid, &type,
>> +                                         NULL, 0, &caps, NULL)) {
> 
> I think we should report the error and not ignore it.
> 
> That said, since we are probing the hw features of the host IOMMU device,
> could we use the data cached in the HostIOMMUDevice struct instead ?
> This means would need to move the ->realize() call doing the probing
> before attaching the device in vfio_attach_device(). That way we would
> catch probing errors in one place. Does this make sense ?

Yeap. It also helps centralizing cap checking in addition.

This stanadlone use of iommufd_backend_get_device_info() was also annoying me a
little, and there doesn't seem to have a reason not to move the initialization
of caps earlier. I'll do that

	Joao

Re: [PATCH v3 05/10] vfio/iommufd: Probe and request hwpt dirty tracking capability
Posted by Joao Martins 4 months, 2 weeks ago
On 09/07/2024 10:04, Joao Martins wrote:
> On 09/07/2024 07:28, Cédric Le Goater wrote:
>> On 7/8/24 4:34 PM, Joao Martins wrote:
>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>> index 2ca9a32cc7b6..1b5b46d28ed6 100644
>>> --- a/hw/vfio/iommufd.c
>>> +++ b/hw/vfio/iommufd.c
>>> @@ -212,6 +212,20 @@ static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice
>>> *vbasedev, Error **errp)
>>>       return true;
>>>   }
>>>   +static bool iommufd_device_dirty_tracking(IOMMUFDBackend *iommufd,
>>> +                                          VFIODevice *vbasedev)
>>> +{
>>> +    enum iommu_hw_info_type type;
>>> +    uint64_t caps;
>>> +
>>> +    if (!iommufd_backend_get_device_info(iommufd, vbasedev->devid, &type,
>>> +                                         NULL, 0, &caps, NULL)) {
>>
>> I think we should report the error and not ignore it.
>>
>> That said, since we are probing the hw features of the host IOMMU device,
>> could we use the data cached in the HostIOMMUDevice struct instead ?
>> This means would need to move the ->realize() call doing the probing
>> before attaching the device in vfio_attach_device(). That way we would
>> catch probing errors in one place. Does this make sense ?
> 
> Yeap. It also helps centralizing cap checking in addition.
> 
> This stanadlone use of iommufd_backend_get_device_info() was also annoying me a
> little, and there doesn't seem to have a reason not to move the initialization
> of caps earlier. I'll do that

Maybe I was a little too early into this. I had the snip below, but I forgot
that vbasedev::devid / vbasedev::iommufd are used by hiod realize() and that is
done in the very beginning of ->attach_device() of iommufd backend. Not enterily
sure how to unravel that hmmm

diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 7cdb969fd396..42931891bf8e 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1552,17 +1552,17 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,

     assert(ops);

-    if (!ops->attach_device(name, vbasedev, as, errp)) {
-        return false;
-    }
-
     hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
     if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
         object_unref(hiod);
-        ops->detach_device(vbasedev);
         return false;
     }
+
     vbasedev->hiod = hiod;
+    if (!ops->attach_device(name, vbasedev, as, errp)) {
+        object_unref(hiod);
+        return false;
+    }

     return true;
 }


Re: [PATCH v3 05/10] vfio/iommufd: Probe and request hwpt dirty tracking capability
Posted by Joao Martins 4 months, 2 weeks ago
On 09/07/2024 13:47, Joao Martins wrote:
> On 09/07/2024 10:04, Joao Martins wrote:
>> On 09/07/2024 07:28, Cédric Le Goater wrote:
>>> On 7/8/24 4:34 PM, Joao Martins wrote:
>>>> diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
>>>> index 2ca9a32cc7b6..1b5b46d28ed6 100644
>>>> --- a/hw/vfio/iommufd.c
>>>> +++ b/hw/vfio/iommufd.c
>>>> @@ -212,6 +212,20 @@ static bool iommufd_cdev_detach_ioas_hwpt(VFIODevice
>>>> *vbasedev, Error **errp)
>>>>       return true;
>>>>   }
>>>>   +static bool iommufd_device_dirty_tracking(IOMMUFDBackend *iommufd,
>>>> +                                          VFIODevice *vbasedev)
>>>> +{
>>>> +    enum iommu_hw_info_type type;
>>>> +    uint64_t caps;
>>>> +
>>>> +    if (!iommufd_backend_get_device_info(iommufd, vbasedev->devid, &type,
>>>> +                                         NULL, 0, &caps, NULL)) {
>>>
>>> I think we should report the error and not ignore it.
>>>
>>> That said, since we are probing the hw features of the host IOMMU device,
>>> could we use the data cached in the HostIOMMUDevice struct instead ?
>>> This means would need to move the ->realize() call doing the probing
>>> before attaching the device in vfio_attach_device(). That way we would
>>> catch probing errors in one place. Does this make sense ?
>>
>> Yeap. It also helps centralizing cap checking in addition.
>>
>> This stanadlone use of iommufd_backend_get_device_info() was also annoying me a
>> little, and there doesn't seem to have a reason not to move the initialization
>> of caps earlier. I'll do that
> 
> Maybe I was a little too early into this. I had the snip below, but I forgot
> that vbasedev::devid / vbasedev::iommufd are used by hiod realize() and that is
> done in the very beginning of ->attach_device() of iommufd backend. Not enterily
> sure how to unravel that hmmm
> 

Here's what I came up with (below). Does it matches what you expected?

One thing I wasn't quite clear is what you and Zhenzhong purpose with
HostIOMMUDevice. It looks quite geared towards IOMMUFD with 'enough'
compatiblity for legacy backend:

 typedef struct HostIOMMUDeviceCaps {
     uint32_t type;
     uint8_t aw_bits;
+     struct {
+         void *hw_data;
+         uint64_t hw_data_len;
+         uint64_t hw_caps;
+     } iommufd;
 } HostIOMMUDeviceCaps;

So I namespaced the new data we get from ioctl with iommufd, but maybe it wasn't
needed and this doesn't match the style? Let me know your thoughts

--------------->8----------------

From: Joao Martins <joao.m.martins@oracle.com>
Date: Tue, 9 Jul 2024 11:56:18 +0000
Subject: [PATCH] vfio/common: Initialize HostIOMMUDeviceCaps during
 attach_device()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fetch IOMMU hw raw caps behind the device and thus move part of what
happens in  HostIOMMUDevice::realize() to be done during the attach
of the device. It allows us to cache the information obtained from
IOMMU_GET_HW_INFO from iommufd and then serialize to external agents.

This is in preparation to fetch parse hw capabilities and understand
if dirty tracking is supported by device backing IOMMU without
necessarily duplicating the amount of calls we do to IOMMU_GET_HW_INFO.

Suggested-by: Cédric Le Goater <clg@redhat.com
Signed-off-by: Joao Martins <joao.m.martins@oracle.com>
---
 include/sysemu/host_iommu_device.h |  5 ++++
 backends/host_iommu_device.c       |  3 +++
 hw/vfio/common.c                   |  7 ++++--
 hw/vfio/iommufd.c                  | 39 +++++++++++++++++++++---------
 4 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/include/sysemu/host_iommu_device.h b/include/sysemu/host_iommu_device.h
index ee6c813c8b22..9f5f368d97f0 100644
--- a/include/sysemu/host_iommu_device.h
+++ b/include/sysemu/host_iommu_device.h
@@ -25,6 +25,11 @@
 typedef struct HostIOMMUDeviceCaps {
     uint32_t type;
     uint8_t aw_bits;
+    struct {
+        void *hw_data;
+        uint64_t hw_data_len;
+        uint64_t hw_caps;
+    } iommufd;
 } HostIOMMUDeviceCaps;

 #define TYPE_HOST_IOMMU_DEVICE "host-iommu-device"
diff --git a/backends/host_iommu_device.c b/backends/host_iommu_device.c
index 8f2dda1beb9b..df74b740f8fe 100644
--- a/backends/host_iommu_device.c
+++ b/backends/host_iommu_device.c
@@ -29,5 +29,8 @@ static void host_iommu_device_finalize(Object *obj)
 {
     HostIOMMUDevice *hiod = HOST_IOMMU_DEVICE(obj);

+    g_free(hiod->caps.iommufd.hw_data);
+    hiod->caps.iommufd.hw_data_len = 0;
+
     g_free(hiod->name);
 }
diff --git a/hw/vfio/common.c b/hw/vfio/common.c
index 7cdb969fd396..f3e7fb550788 100644
--- a/hw/vfio/common.c
+++ b/hw/vfio/common.c
@@ -1552,17 +1552,20 @@ bool vfio_attach_device(char *name, VFIODevice *vbasedev,

     assert(ops);

+
+    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
+    vbasedev->hiod = hiod;
+
     if (!ops->attach_device(name, vbasedev, as, errp)) {
         return false;
     }

-    hiod = HOST_IOMMU_DEVICE(object_new(ops->hiod_typename));
     if (!HOST_IOMMU_DEVICE_GET_CLASS(hiod)->realize(hiod, vbasedev, errp)) {
         object_unref(hiod);
         ops->detach_device(vbasedev);
+        vbasedev->hiod = NULL;
         return false;
     }
-    vbasedev->hiod = hiod;

     return true;
 }
diff --git a/hw/vfio/iommufd.c b/hw/vfio/iommufd.c
index 69a13d240811..87c2d2f07d0a 100644
--- a/hw/vfio/iommufd.c
+++ b/hw/vfio/iommufd.c
@@ -381,6 +381,29 @@ error:
     return false;
 }

+static bool iommufd_cdev_hiod_caps_init(VFIODevice *vbasedev, Error **errp)
+{
+    HostIOMMUDeviceCaps *caps;
+    union {
+        struct iommu_hw_info_vtd vtd;
+    } hw_data;
+
+    g_assert(vbasedev->hiod);
+
+    /* Cache IOMMU hardware information ahead of HostIOMMUDevice::realize() */
+    caps = &vbasedev->hiod->caps;
+    caps->iommufd.hw_data = g_malloc0(sizeof(hw_data));
+    caps->iommufd.hw_data_len = sizeof(hw_data);
+    if (!iommufd_backend_get_device_info(vbasedev->iommufd, vbasedev->devid,
+                                         &caps->type, &caps->iommufd.hw_data,
+                                         caps->iommufd.hw_data_len,
+                                         &caps->iommufd.hw_caps, errp)) {
+        return false;
+    }
+
+    return true;
+}
+
 static bool iommufd_cdev_attach(const char *name, VFIODevice *vbasedev,
                                 AddressSpace *as, Error **errp)
 {
@@ -410,6 +433,10 @@ static bool iommufd_cdev_attach(const char *name,
VFIODevice *vbasedev,

     space = vfio_get_address_space(as);

+    if (!iommufd_cdev_hiod_caps_init(vbasedev, errp)) {
+        goto err_alloc_ioas;
+    }
+
     /* try to attach to an existing container in this space */
     QLIST_FOREACH(bcontainer, &space->containers, next) {
         container = container_of(bcontainer, VFIOIOMMUFDContainer, bcontainer);
@@ -715,11 +742,6 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice
*hiod, void *opaque,
 {
     VFIODevice *vdev = opaque;
     HostIOMMUDeviceCaps *caps = &hiod->caps;
-    enum iommu_hw_info_type type;
-    union {
-        struct iommu_hw_info_vtd vtd;
-    } data;
-    uint64_t hw_caps;

     hiod->agent = opaque;

@@ -727,14 +749,7 @@ static bool hiod_iommufd_vfio_realize(HostIOMMUDevice
*hiod, void *opaque,
         return true;
     }

-    if (!iommufd_backend_get_device_info(vdev->iommufd, vdev->devid,
-                                         &type, &data, sizeof(data),
-                                         &hw_caps, errp)) {
-        return false;
-    }
-
     hiod->name = g_strdup(vdev->name);
-    caps->type = type;
     caps->aw_bits = vfio_device_get_aw_bits(vdev);

     return true;
--
2.17.2