Add iopf-capable hw page table attach/detach/replace helpers. The pointer
to iommufd_device is stored in the domain attachment handle, so that it
can be echo'ed back in the iopf_group.
The iopf-capable hw page tables can only be attached to devices that
support the IOMMU_DEV_FEAT_IOPF feature. On the first attachment of an
iopf-capable hw_pagetable to the device, the IOPF feature is enabled on
the device. Similarly, after the last iopf-capable hwpt is detached from
the device, the IOPF feature is disabled on the device.
The current implementation allows a replacement between iopf-capable and
non-iopf-capable hw page tables. This matches the nested translation use
case, where a parent domain is attached by default and can then be
replaced with a nested user domain with iopf support.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
---
drivers/iommu/iommufd/iommufd_private.h | 10 ++
drivers/iommu/iommufd/device.c | 15 ++-
drivers/iommu/iommufd/fault.c | 118 ++++++++++++++++++++++++
3 files changed, 140 insertions(+), 3 deletions(-)
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index acb89bbf9f8e..18c8d7d38dcd 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -293,6 +293,7 @@ int iommufd_check_iova_range(struct io_pagetable *iopt,
struct iommufd_hw_pagetable {
struct iommufd_object obj;
struct iommu_domain *domain;
+ struct iommufd_fault *fault;
};
struct iommufd_hwpt_paging {
@@ -396,6 +397,7 @@ struct iommufd_device {
/* always the physical device */
struct device *dev;
bool enforce_cache_coherency;
+ bool iopf_enabled;
/* outstanding faults awaiting response indexed by fault group id */
struct xarray faults;
};
@@ -450,6 +452,14 @@ struct iommufd_fault {
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
void iommufd_fault_destroy(struct iommufd_object *obj);
+int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev);
+void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev);
+int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
+ struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_hw_pagetable *old);
+
#ifdef CONFIG_IOMMUFD_TEST
int iommufd_test(struct iommufd_ucmd *ucmd);
void iommufd_selftest_destroy(struct iommufd_object *obj);
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 22e22969363a..2bee3f399ec7 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -377,7 +377,10 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
* attachment.
*/
if (list_empty(&idev->igroup->device_list)) {
- rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
+ if (hwpt->fault)
+ rc = iommufd_fault_domain_attach_dev(hwpt, idev);
+ else
+ rc = iommu_attach_group(hwpt->domain, idev->igroup->group);
if (rc)
goto err_unresv;
idev->igroup->hwpt = hwpt;
@@ -403,7 +406,10 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
mutex_lock(&idev->igroup->lock);
list_del(&idev->group_item);
if (list_empty(&idev->igroup->device_list)) {
- iommu_detach_group(hwpt->domain, idev->igroup->group);
+ if (hwpt->fault)
+ iommufd_fault_domain_detach_dev(hwpt, idev);
+ else
+ iommu_detach_group(hwpt->domain, idev->igroup->group);
idev->igroup->hwpt = NULL;
}
if (hwpt_is_paging(hwpt))
@@ -498,7 +504,10 @@ iommufd_device_do_replace(struct iommufd_device *idev,
goto err_unlock;
}
- rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
+ if (old_hwpt->fault || hwpt->fault)
+ rc = iommufd_fault_domain_replace_dev(idev, hwpt, old_hwpt);
+ else
+ rc = iommu_group_replace_domain(igroup->group, hwpt->domain);
if (rc)
goto err_unresv;
diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
index 13125c0feecb..6357229bf3b4 100644
--- a/drivers/iommu/iommufd/fault.c
+++ b/drivers/iommu/iommufd/fault.c
@@ -15,6 +15,124 @@
#include "../iommu-priv.h"
#include "iommufd_private.h"
+static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
+{
+ int ret;
+
+ if (idev->iopf_enabled)
+ return 0;
+
+ ret = iommu_dev_enable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
+ if (ret)
+ return ret;
+
+ idev->iopf_enabled = true;
+
+ return 0;
+}
+
+static void iommufd_fault_iopf_disable(struct iommufd_device *idev)
+{
+ if (!idev->iopf_enabled)
+ return;
+
+ iommu_dev_disable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
+ idev->iopf_enabled = false;
+}
+
+int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev)
+{
+ struct iommu_attach_handle *handle;
+ int ret;
+
+ if (!hwpt->fault)
+ return -EINVAL;
+
+ ret = iommufd_fault_iopf_enable(idev);
+ if (ret)
+ return ret;
+
+ ret = iommu_attach_group(hwpt->domain, idev->igroup->group);
+ if (ret) {
+ iommufd_fault_iopf_disable(idev);
+ return ret;
+ }
+
+ handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
+ handle->idev = idev;
+
+ return 0;
+}
+
+static void iommufd_auto_response_faults(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev)
+{
+ struct iommufd_fault *fault = hwpt->fault;
+ struct iopf_group *group, *next;
+ unsigned long index;
+
+ if (!fault)
+ return;
+
+ mutex_lock(&fault->mutex);
+ list_for_each_entry_safe(group, next, &fault->deliver, node) {
+ if (group->domain != hwpt->domain ||
+ group->fault_param->dev != idev->dev)
+ continue;
+ list_del(&group->node);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+
+ xa_for_each(&idev->faults, index, group) {
+ if (group->domain != hwpt->domain)
+ continue;
+ xa_erase(&idev->faults, index);
+ iopf_group_response(group, IOMMU_PAGE_RESP_INVALID);
+ iopf_free_group(group);
+ }
+ mutex_unlock(&fault->mutex);
+}
+
+void iommufd_fault_domain_detach_dev(struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_device *idev)
+{
+ iommu_detach_group(hwpt->domain, idev->igroup->group);
+ iommufd_fault_iopf_disable(idev);
+ iommufd_auto_response_faults(hwpt, idev);
+}
+
+int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
+ struct iommufd_hw_pagetable *hwpt,
+ struct iommufd_hw_pagetable *old)
+{
+ struct iommu_attach_handle *handle;
+ int ret;
+
+ if (hwpt->fault)
+ ret = iommufd_fault_iopf_enable(idev);
+ else
+ iommufd_fault_iopf_disable(idev);
+
+ ret = iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
+ if (ret)
+ goto out_cleanup;
+
+ iommufd_auto_response_faults(old, idev);
+ handle = iommu_attach_handle_get(idev->igroup->group, IOMMU_NO_PASID, 0);
+ handle->idev = idev;
+
+ return 0;
+out_cleanup:
+ if (old->fault)
+ ret = iommufd_fault_iopf_enable(idev);
+ else
+ iommufd_fault_iopf_disable(idev);
+
+ return ret;
+}
+
void iommufd_fault_destroy(struct iommufd_object *obj)
{
struct iommufd_fault *fault = container_of(obj, struct iommufd_fault, obj);
--
2.34.1
> From: Lu Baolu <baolu.lu@linux.intel.com>
> Sent: Tuesday, April 30, 2024 10:57 PM
> +
> +int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
> + struct iommufd_hw_pagetable *hwpt,
> + struct iommufd_hw_pagetable *old)
> +{
> + struct iommu_attach_handle *handle;
> + int ret;
> +
> + if (hwpt->fault)
> + ret = iommufd_fault_iopf_enable(idev);
> + else
> + iommufd_fault_iopf_disable(idev);
> +
> + ret = iommu_group_replace_domain(idev->igroup->group, hwpt-
> >domain);
> + if (ret)
> + goto out_cleanup;
> +
> + iommufd_auto_response_faults(old, idev);
> + handle = iommu_attach_handle_get(idev->igroup->group,
> IOMMU_NO_PASID, 0);
> + handle->idev = idev;
why is auto response required in replace? new requests can come
after the auto response anyway...
The user should prepare for faults delivered to the old or new hwpt
in the transition window.
On 5/15/24 4:43 PM, Tian, Kevin wrote:
>> From: Lu Baolu <baolu.lu@linux.intel.com>
>> Sent: Tuesday, April 30, 2024 10:57 PM
>> +
>> +int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
>> + struct iommufd_hw_pagetable *hwpt,
>> + struct iommufd_hw_pagetable *old)
>> +{
>> + struct iommu_attach_handle *handle;
>> + int ret;
>> +
>> + if (hwpt->fault)
>> + ret = iommufd_fault_iopf_enable(idev);
>> + else
>> + iommufd_fault_iopf_disable(idev);
>> +
>> + ret = iommu_group_replace_domain(idev->igroup->group, hwpt-
>>> domain);
>> + if (ret)
>> + goto out_cleanup;
>> +
>> + iommufd_auto_response_faults(old, idev);
>> + handle = iommu_attach_handle_get(idev->igroup->group,
>> IOMMU_NO_PASID, 0);
>> + handle->idev = idev;
>
> why is auto response required in replace? new requests can come
> after the auto response anyway...
>
> The user should prepare for faults delivered to the old or new hwpt
> in the transition window.
The current design of replace allows switching between one that is not
IOPF-capable and one that is. This implies that if we switch from an
IOPF-capable hwpt to a non-IOPF-capable one, the response queue needs to
be auto responded.
Best regards,
baolu
> From: Baolu Lu <baolu.lu@linux.intel.com>
> Sent: Monday, May 20, 2024 10:10 AM
>
> On 5/15/24 4:43 PM, Tian, Kevin wrote:
> >> From: Lu Baolu <baolu.lu@linux.intel.com>
> >> Sent: Tuesday, April 30, 2024 10:57 PM
> >> +
> >> +int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
> >> + struct iommufd_hw_pagetable *hwpt,
> >> + struct iommufd_hw_pagetable *old)
> >> +{
> >> + struct iommu_attach_handle *handle;
> >> + int ret;
> >> +
> >> + if (hwpt->fault)
> >> + ret = iommufd_fault_iopf_enable(idev);
> >> + else
> >> + iommufd_fault_iopf_disable(idev);
> >> +
> >> + ret = iommu_group_replace_domain(idev->igroup->group, hwpt-
> >>> domain);
> >> + if (ret)
> >> + goto out_cleanup;
> >> +
> >> + iommufd_auto_response_faults(old, idev);
> >> + handle = iommu_attach_handle_get(idev->igroup->group,
> >> IOMMU_NO_PASID, 0);
> >> + handle->idev = idev;
> >
> > why is auto response required in replace? new requests can come
> > after the auto response anyway...
> >
> > The user should prepare for faults delivered to the old or new hwpt
> > in the transition window.
>
> The current design of replace allows switching between one that is not
> IOPF-capable and one that is. This implies that if we switch from an
> IOPF-capable hwpt to a non-IOPF-capable one, the response queue needs to
> be auto responded.
>
then do it only for that scenario?
On 5/20/24 11:35 AM, Tian, Kevin wrote:
>> From: Baolu Lu <baolu.lu@linux.intel.com>
>> Sent: Monday, May 20, 2024 10:10 AM
>>
>> On 5/15/24 4:43 PM, Tian, Kevin wrote:
>>>> From: Lu Baolu <baolu.lu@linux.intel.com>
>>>> Sent: Tuesday, April 30, 2024 10:57 PM
>>>> +
>>>> +int iommufd_fault_domain_replace_dev(struct iommufd_device *idev,
>>>> + struct iommufd_hw_pagetable *hwpt,
>>>> + struct iommufd_hw_pagetable *old)
>>>> +{
>>>> + struct iommu_attach_handle *handle;
>>>> + int ret;
>>>> +
>>>> + if (hwpt->fault)
>>>> + ret = iommufd_fault_iopf_enable(idev);
>>>> + else
>>>> + iommufd_fault_iopf_disable(idev);
>>>> +
>>>> + ret = iommu_group_replace_domain(idev->igroup->group, hwpt-
>>>>> domain);
>>>> + if (ret)
>>>> + goto out_cleanup;
>>>> +
>>>> + iommufd_auto_response_faults(old, idev);
>>>> + handle = iommu_attach_handle_get(idev->igroup->group,
>>>> IOMMU_NO_PASID, 0);
>>>> + handle->idev = idev;
>>>
>>> why is auto response required in replace? new requests can come
>>> after the auto response anyway...
>>>
>>> The user should prepare for faults delivered to the old or new hwpt
>>> in the transition window.
>>
>> The current design of replace allows switching between one that is not
>> IOPF-capable and one that is. This implies that if we switch from an
>> IOPF-capable hwpt to a non-IOPF-capable one, the response queue needs to
>> be auto responded.
>>
>
> then do it only for that scenario?
Yes. Will do this in the new version.
Best regards,
baolu
On Tue, Apr 30, 2024 at 10:57:07PM +0800, Lu Baolu wrote:
> diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
> index 13125c0feecb..6357229bf3b4 100644
> --- a/drivers/iommu/iommufd/fault.c
> +++ b/drivers/iommu/iommufd/fault.c
> @@ -15,6 +15,124 @@
> #include "../iommu-priv.h"
> #include "iommufd_private.h"
>
> +static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
> +{
> + int ret;
> +
> + if (idev->iopf_enabled)
> + return 0;
> +
> + ret = iommu_dev_enable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
> + if (ret)
> + return ret;
> +
> + idev->iopf_enabled = true;
> +
> + return 0;
> +}
I would greatly prefer we remove this from the drivers :\ I guess it
is Ok for now
Doesn't this need a counter? We can have many fault capable PASIDs?
That will get changed in the PASID series?
Jason
On 5/8/24 8:18 AM, Jason Gunthorpe wrote:
> On Tue, Apr 30, 2024 at 10:57:07PM +0800, Lu Baolu wrote:
>> diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
>> index 13125c0feecb..6357229bf3b4 100644
>> --- a/drivers/iommu/iommufd/fault.c
>> +++ b/drivers/iommu/iommufd/fault.c
>> @@ -15,6 +15,124 @@
>> #include "../iommu-priv.h"
>> #include "iommufd_private.h"
>>
>> +static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
>> +{
>> + int ret;
>> +
>> + if (idev->iopf_enabled)
>> + return 0;
>> +
>> + ret = iommu_dev_enable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
>> + if (ret)
>> + return ret;
>> +
>> + idev->iopf_enabled = true;
>> +
>> + return 0;
>> +}
> I would greatly prefer we remove this from the drivers :\ I guess it
> is Ok for now
>
> Doesn't this need a counter? We can have many fault capable PASIDs?
> That will get changed in the PASID series?
Okay, let's design this more gracefully after the PASID interfaces are
landed. For now, we assume that the device driver will do this.
Best regards,
baolu
On Fri, May 10, 2024 at 11:20:01AM +0800, Baolu Lu wrote:
> On 5/8/24 8:18 AM, Jason Gunthorpe wrote:
> > On Tue, Apr 30, 2024 at 10:57:07PM +0800, Lu Baolu wrote:
> > > diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
> > > index 13125c0feecb..6357229bf3b4 100644
> > > --- a/drivers/iommu/iommufd/fault.c
> > > +++ b/drivers/iommu/iommufd/fault.c
> > > @@ -15,6 +15,124 @@
> > > #include "../iommu-priv.h"
> > > #include "iommufd_private.h"
> > > +static int iommufd_fault_iopf_enable(struct iommufd_device *idev)
> > > +{
> > > + int ret;
> > > +
> > > + if (idev->iopf_enabled)
> > > + return 0;
> > > +
> > > + ret = iommu_dev_enable_feature(idev->dev, IOMMU_DEV_FEAT_IOPF);
> > > + if (ret)
> > > + return ret;
> > > +
> > > + idev->iopf_enabled = true;
> > > +
> > > + return 0;
> > > +}
> > I would greatly prefer we remove this from the drivers :\ I guess it
> > is Ok for now
> >
> > Doesn't this need a counter? We can have many fault capable PASIDs?
> > That will get changed in the PASID series?
>
> Okay, let's design this more gracefully after the PASID interfaces are
> landed. For now, we assume that the device driver will do this.
Well, for now to work the device drivers do still need these calls.
I'm trying to get them into NOPs in the drivers so we can remove this.
Jason
© 2016 - 2025 Red Hat, Inc.