When allocating a user iommufd_hw_pagetable, the user space is allowed to
associate a fault object with the hw_pagetable by specifying the fault
object ID in the page table allocation data and setting the
IOMMU_HWPT_FAULT_ID_VALID flag bit.
On a successful return of hwpt allocation, the user can retrieve and
respond to page faults by reading and writing the file interface of the
fault object.
Once a fault object has been associated with a hwpt, the hwpt is
iopf-capable, indicated by hwpt->fault is non NULL. Attaching,
detaching, or replacing an iopf-capable hwpt to an RID or PASID will
differ from those that are not iopf-capable.
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
---
drivers/iommu/iommufd/iommufd_private.h | 9 ++++++
include/uapi/linux/iommufd.h | 8 +++++
drivers/iommu/iommufd/fault.c | 17 ++++++++++
drivers/iommu/iommufd/hw_pagetable.c | 41 +++++++++++++++++++------
4 files changed, 66 insertions(+), 9 deletions(-)
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index aa4c26c87cb9..92efe30a8f0d 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -458,8 +458,17 @@ struct iommufd_attach_handle {
/* Convert an iommu attach handle to iommufd handle. */
#define to_iommufd_handle(hdl) container_of(hdl, struct iommufd_attach_handle, handle)
+static inline struct iommufd_fault *
+iommufd_get_fault(struct iommufd_ucmd *ucmd, u32 id)
+{
+ return container_of(iommufd_get_object(ucmd->ictx, id,
+ IOMMUFD_OBJ_FAULT),
+ struct iommufd_fault, obj);
+}
+
int iommufd_fault_alloc(struct iommufd_ucmd *ucmd);
void iommufd_fault_destroy(struct iommufd_object *obj);
+int iommufd_fault_iopf_handler(struct iopf_group *group);
int iommufd_fault_domain_attach_dev(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev);
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 70b8a38fcd46..ede2b464a761 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -357,10 +357,13 @@ struct iommu_vfio_ioas {
* the parent HWPT in a nesting configuration.
* @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is
* enforced on device attachment
+ * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is
+ * valid.
*/
enum iommufd_hwpt_alloc_flags {
IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0,
IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1,
+ IOMMU_HWPT_FAULT_ID_VALID = 1 << 2,
};
/**
@@ -412,6 +415,9 @@ enum iommu_hwpt_data_type {
* @data_type: One of enum iommu_hwpt_data_type
* @data_len: Length of the type specific data
* @data_uptr: User pointer to the type specific data
+ * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of
+ * IOMMU_HWPT_FAULT_ID_VALID is set.
+ * @__reserved2: Padding to 64-bit alignment. Must be 0.
*
* Explicitly allocate a hardware page table object. This is the same object
* type that is returned by iommufd_device_attach() and represents the
@@ -442,6 +448,8 @@ struct iommu_hwpt_alloc {
__u32 data_type;
__u32 data_len;
__aligned_u64 data_uptr;
+ __u32 fault_id;
+ __u32 __reserved2;
};
#define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC)
diff --git a/drivers/iommu/iommufd/fault.c b/drivers/iommu/iommufd/fault.c
index 4934ae572638..54d6cd20a673 100644
--- a/drivers/iommu/iommufd/fault.c
+++ b/drivers/iommu/iommufd/fault.c
@@ -414,3 +414,20 @@ int iommufd_fault_alloc(struct iommufd_ucmd *ucmd)
return rc;
}
+
+int iommufd_fault_iopf_handler(struct iopf_group *group)
+{
+ struct iommufd_hw_pagetable *hwpt;
+ struct iommufd_fault *fault;
+
+ hwpt = group->attach_handle->domain->fault_data;
+ fault = hwpt->fault;
+
+ mutex_lock(&fault->mutex);
+ list_add_tail(&group->node, &fault->deliver);
+ mutex_unlock(&fault->mutex);
+
+ wake_up_interruptible(&fault->wait_queue);
+
+ return 0;
+}
diff --git a/drivers/iommu/iommufd/hw_pagetable.c b/drivers/iommu/iommufd/hw_pagetable.c
index 33d142f8057d..14a32bf80549 100644
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -8,6 +8,15 @@
#include "../iommu-priv.h"
#include "iommufd_private.h"
+static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
+{
+ if (hwpt->domain)
+ iommu_domain_free(hwpt->domain);
+
+ if (hwpt->fault)
+ iommufd_put_object(hwpt->fault->ictx, &hwpt->fault->obj);
+}
+
void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
{
struct iommufd_hwpt_paging *hwpt_paging =
@@ -22,9 +31,7 @@ void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
hwpt_paging->common.domain);
}
- if (hwpt_paging->common.domain)
- iommu_domain_free(hwpt_paging->common.domain);
-
+ __iommufd_hwpt_destroy(&hwpt_paging->common);
refcount_dec(&hwpt_paging->ioas->obj.users);
}
@@ -49,9 +56,7 @@ void iommufd_hwpt_nested_destroy(struct iommufd_object *obj)
struct iommufd_hwpt_nested *hwpt_nested =
container_of(obj, struct iommufd_hwpt_nested, common.obj);
- if (hwpt_nested->common.domain)
- iommu_domain_free(hwpt_nested->common.domain);
-
+ __iommufd_hwpt_destroy(&hwpt_nested->common);
refcount_dec(&hwpt_nested->parent->common.obj.users);
}
@@ -213,7 +218,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
struct iommufd_hw_pagetable *hwpt;
int rc;
- if (flags || !user_data->len || !ops->domain_alloc_user)
+ if ((flags & ~IOMMU_HWPT_FAULT_ID_VALID) ||
+ !user_data->len || !ops->domain_alloc_user)
return ERR_PTR(-EOPNOTSUPP);
if (parent->auto_domain || !parent->nest_parent)
return ERR_PTR(-EINVAL);
@@ -227,7 +233,8 @@ iommufd_hwpt_nested_alloc(struct iommufd_ctx *ictx,
refcount_inc(&parent->common.obj.users);
hwpt_nested->parent = parent;
- hwpt->domain = ops->domain_alloc_user(idev->dev, flags,
+ hwpt->domain = ops->domain_alloc_user(idev->dev,
+ flags & ~IOMMU_HWPT_FAULT_ID_VALID,
parent->common.domain, user_data);
if (IS_ERR(hwpt->domain)) {
rc = PTR_ERR(hwpt->domain);
@@ -308,13 +315,29 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
goto out_put_pt;
}
+ if (cmd->flags & IOMMU_HWPT_FAULT_ID_VALID) {
+ struct iommufd_fault *fault;
+
+ fault = iommufd_get_fault(ucmd, cmd->fault_id);
+ if (IS_ERR(fault)) {
+ rc = PTR_ERR(fault);
+ goto out_hwpt;
+ }
+ hwpt->fault = fault;
+ hwpt->domain->iopf_handler = iommufd_fault_iopf_handler;
+ hwpt->domain->fault_data = hwpt;
+ }
+
cmd->out_hwpt_id = hwpt->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
- goto out_hwpt;
+ goto out_put_fault;
iommufd_object_finalize(ucmd->ictx, &hwpt->obj);
goto out_unlock;
+out_put_fault:
+ if (cmd->flags & IOMMU_HWPT_FAULT_ID_VALID)
+ iommufd_put_object(ucmd->ictx, &hwpt->fault->obj);
out_hwpt:
iommufd_object_abort_and_destroy(ucmd->ictx, &hwpt->obj);
out_unlock:
--
2.34.1
On Sun, Jun 16, 2024 at 02:11:53PM +0800, Lu Baolu wrote:
> @@ -308,13 +315,29 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
> goto out_put_pt;
> }
>
> + if (cmd->flags & IOMMU_HWPT_FAULT_ID_VALID) {
> + struct iommufd_fault *fault;
> +
> + fault = iommufd_get_fault(ucmd, cmd->fault_id);
> + if (IS_ERR(fault)) {
> + rc = PTR_ERR(fault);
> + goto out_hwpt;
> + }
> + hwpt->fault = fault;
> + hwpt->domain->iopf_handler = iommufd_fault_iopf_handler;
> + hwpt->domain->fault_data = hwpt;
This is not the right refcounting for a longterm reference... The PT
above shows the pattern:
pt_obj = iommufd_get_object(ucmd->ictx, cmd->pt_id, IOMMUFD_OBJ_ANY);
hwpt_paging = iommufd_hwpt_paging_alloc()
refcount_inc(&ioas->obj.users);
iommufd_put_object(ucmd->ictx, pt_obj);
Which is to say you need to incr users and then do the put object. And
iommufd_object_abort_and_destroy() will always destroy the ref on the
fault if the fault is non-null so the error handling will double free.
fail_nth is intended to catch this, but you have to add enough inputs
to cover the new cases when you add them, it seems like that is
missing in this series. ie add a fault object and hwpt alloc to a
fail_nth test and see we execute the iommufd_ucmd_respond() failure
path.
--- a/drivers/iommu/iommufd/hw_pagetable.c
+++ b/drivers/iommu/iommufd/hw_pagetable.c
@@ -14,7 +14,7 @@ static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
iommu_domain_free(hwpt->domain);
if (hwpt->fault)
- iommufd_put_object(hwpt->fault->ictx, &hwpt->fault->obj);
+ refcount_dec(&hwpt->fault->obj.users);
}
void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
@@ -326,18 +326,17 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
hwpt->fault = fault;
hwpt->domain->iopf_handler = iommufd_fault_iopf_handler;
hwpt->domain->fault_data = hwpt;
+ refcount_inc(&fault->obj.users);
+ iommufd_put_object(ucmd->ictx, &fault->obj);
}
cmd->out_hwpt_id = hwpt->obj.id;
rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
if (rc)
- goto out_put_fault;
+ goto out_hwpt;
iommufd_object_finalize(ucmd->ictx, &hwpt->obj);
goto out_unlock;
-out_put_fault:
- if (cmd->flags & IOMMU_HWPT_FAULT_ID_VALID)
- iommufd_put_object(ucmd->ictx, &hwpt->fault->obj);
out_hwpt:
iommufd_object_abort_and_destroy(ucmd->ictx, &hwpt->obj);
out_unlock:
On 2024/6/29 6:13, Jason Gunthorpe wrote:
> On Sun, Jun 16, 2024 at 02:11:53PM +0800, Lu Baolu wrote:
>
>> @@ -308,13 +315,29 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
>> goto out_put_pt;
>> }
>>
>> + if (cmd->flags & IOMMU_HWPT_FAULT_ID_VALID) {
>> + struct iommufd_fault *fault;
>> +
>> + fault = iommufd_get_fault(ucmd, cmd->fault_id);
>> + if (IS_ERR(fault)) {
>> + rc = PTR_ERR(fault);
>> + goto out_hwpt;
>> + }
>> + hwpt->fault = fault;
>> + hwpt->domain->iopf_handler = iommufd_fault_iopf_handler;
>> + hwpt->domain->fault_data = hwpt;
>
> This is not the right refcounting for a longterm reference... The PT
> above shows the pattern:
>
> pt_obj = iommufd_get_object(ucmd->ictx, cmd->pt_id, IOMMUFD_OBJ_ANY);
> hwpt_paging = iommufd_hwpt_paging_alloc()
> refcount_inc(&ioas->obj.users);
>
> iommufd_put_object(ucmd->ictx, pt_obj);
>
> Which is to say you need to incr users and then do the put object. And
> iommufd_object_abort_and_destroy() will always destroy the ref on the
> fault if the fault is non-null so the error handling will double free.
>
> fail_nth is intended to catch this, but you have to add enough inputs
> to cover the new cases when you add them, it seems like that is
> missing in this series. ie add a fault object and hwpt alloc to a
> fail_nth test and see we execute the iommufd_ucmd_respond() failure
> path.
Yes. I will add below fail_nth case:
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -201,6 +201,12 @@ static int _test_cmd_hwpt_alloc(int fd, __u32
device_id, __u32 pt_id, __u32 ft_i
ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id,
fault_id, \
flags, hwpt_id, data_type,
data, \
data_len))
+#define test_err_hwpt_alloc_iopf(_errno, device_id, pt_id, fault_id,
flags, \
+ hwpt_id, data_type, data, data_len)
\
+ EXPECT_ERRNO(_errno,
\
+ _test_cmd_hwpt_alloc(self->fd, device_id, pt_id,
fault_id, \
+ flags, hwpt_id, data_type,
data, \
+ data_len))
#define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected)
\
({
\
diff --git a/tools/testing/selftests/iommu/iommufd.c
b/tools/testing/selftests/iommu/iommufd.c
index 5b0169875a4d..93634e53e95e 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -338,6 +338,10 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
&nested_hwpt_id[1],
IOMMU_HWPT_DATA_SELFTEST, &data,
sizeof(data));
+ test_err_hwpt_alloc_iopf(ENOENT, self->device_id,
parent_hwpt_id,
+ UINT32_MAX,
IOMMU_HWPT_FAULT_ID_VALID,
+ &iopf_hwpt_id,
IOMMU_HWPT_DATA_SELFTEST,
+ &data, sizeof(data));
test_cmd_hwpt_alloc_iopf(self->device_id,
parent_hwpt_id, fault_id,
IOMMU_HWPT_FAULT_ID_VALID,
&iopf_hwpt_id,
IOMMU_HWPT_DATA_SELFTEST, &data,
>
> --- a/drivers/iommu/iommufd/hw_pagetable.c
> +++ b/drivers/iommu/iommufd/hw_pagetable.c
> @@ -14,7 +14,7 @@ static void __iommufd_hwpt_destroy(struct iommufd_hw_pagetable *hwpt)
> iommu_domain_free(hwpt->domain);
>
> if (hwpt->fault)
> - iommufd_put_object(hwpt->fault->ictx, &hwpt->fault->obj);
> + refcount_dec(&hwpt->fault->obj.users);
> }
>
> void iommufd_hwpt_paging_destroy(struct iommufd_object *obj)
> @@ -326,18 +326,17 @@ int iommufd_hwpt_alloc(struct iommufd_ucmd *ucmd)
> hwpt->fault = fault;
> hwpt->domain->iopf_handler = iommufd_fault_iopf_handler;
> hwpt->domain->fault_data = hwpt;
> + refcount_inc(&fault->obj.users);
> + iommufd_put_object(ucmd->ictx, &fault->obj);
> }
>
> cmd->out_hwpt_id = hwpt->obj.id;
> rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
> if (rc)
> - goto out_put_fault;
> + goto out_hwpt;
> iommufd_object_finalize(ucmd->ictx, &hwpt->obj);
> goto out_unlock;
>
> -out_put_fault:
> - if (cmd->flags & IOMMU_HWPT_FAULT_ID_VALID)
> - iommufd_put_object(ucmd->ictx, &hwpt->fault->obj);
> out_hwpt:
> iommufd_object_abort_and_destroy(ucmd->ictx, &hwpt->obj);
> out_unlock:
>
.. and merge above change to this patch.
Best regards,
baolu
© 2016 - 2025 Red Hat, Inc.