Add a new IOMMUFD_OBJ_VIOMMU with an iommufd_viommu structure to represent
a vIOMMU instance in the user space, backed by a physical IOMMU for its HW
accelerated virtualization feature, such as nested translation support for
a multi-viommu-instance VM, NVIDIA CMDQ-Virtualization extension for ARM
SMMUv3, and AMD Hardware Accelerated Virtualized IOMMU (vIOMMU).
Also, add a new ioctl for user space to do a viommu allocation. It must be
based on a nested parent HWPT, so take its refcount.
As an initial version, support a viommu of IOMMU_VIOMMU_TYPE_DEFAULT type.
IOMMUFD core can use this viommu to store a virtual device ID lookup table
in a following patch.
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
drivers/iommu/iommufd/Makefile | 3 +-
drivers/iommu/iommufd/iommufd_private.h | 12 +++++
drivers/iommu/iommufd/main.c | 6 +++
drivers/iommu/iommufd/viommu.c | 72 +++++++++++++++++++++++++
include/uapi/linux/iommufd.h | 30 +++++++++++
5 files changed, 122 insertions(+), 1 deletion(-)
create mode 100644 drivers/iommu/iommufd/viommu.c
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index cf4605962bea..df490e836b30 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -7,7 +7,8 @@ iommufd-y := \
ioas.o \
main.o \
pages.o \
- vfio_compat.o
+ vfio_compat.o \
+ viommu.o
iommufd-$(CONFIG_IOMMUFD_TEST) += selftest.o
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 5d3768d77099..154f7ba5f45c 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -131,6 +131,7 @@ enum iommufd_object_type {
IOMMUFD_OBJ_IOAS,
IOMMUFD_OBJ_ACCESS,
IOMMUFD_OBJ_FAULT,
+ IOMMUFD_OBJ_VIOMMU,
#ifdef CONFIG_IOMMUFD_TEST
IOMMUFD_OBJ_SELFTEST,
#endif
@@ -526,6 +527,17 @@ static inline int iommufd_hwpt_replace_device(struct iommufd_device *idev,
return iommu_group_replace_domain(idev->igroup->group, hwpt->domain);
}
+struct iommufd_viommu {
+ struct iommufd_object obj;
+ struct iommufd_ctx *ictx;
+ struct iommufd_hwpt_paging *hwpt;
+
+ unsigned int type;
+};
+
+int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd);
+void iommufd_viommu_destroy(struct iommufd_object *obj);
+
#ifdef CONFIG_IOMMUFD_TEST
int iommufd_test(struct iommufd_ucmd *ucmd);
void iommufd_selftest_destroy(struct iommufd_object *obj);
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index b5f5d27ee963..288ee51b6829 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -333,6 +333,7 @@ union ucmd_buffer {
struct iommu_ioas_unmap unmap;
struct iommu_option option;
struct iommu_vfio_ioas vfio_ioas;
+ struct iommu_viommu_alloc viommu;
#ifdef CONFIG_IOMMUFD_TEST
struct iommu_test_cmd test;
#endif
@@ -384,6 +385,8 @@ static const struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
val64),
IOCTL_OP(IOMMU_VFIO_IOAS, iommufd_vfio_ioas, struct iommu_vfio_ioas,
__reserved),
+ IOCTL_OP(IOMMU_VIOMMU_ALLOC, iommufd_viommu_alloc_ioctl,
+ struct iommu_viommu_alloc, out_viommu_id),
#ifdef CONFIG_IOMMUFD_TEST
IOCTL_OP(IOMMU_TEST_CMD, iommufd_test, struct iommu_test_cmd, last),
#endif
@@ -519,6 +522,9 @@ static const struct iommufd_object_ops iommufd_object_ops[] = {
[IOMMUFD_OBJ_FAULT] = {
.destroy = iommufd_fault_destroy,
},
+ [IOMMUFD_OBJ_VIOMMU] = {
+ .destroy = iommufd_viommu_destroy,
+ },
#ifdef CONFIG_IOMMUFD_TEST
[IOMMUFD_OBJ_SELFTEST] = {
.destroy = iommufd_selftest_destroy,
diff --git a/drivers/iommu/iommufd/viommu.c b/drivers/iommu/iommufd/viommu.c
new file mode 100644
index 000000000000..200653a4bf57
--- /dev/null
+++ b/drivers/iommu/iommufd/viommu.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+
+#include "iommufd_private.h"
+
+void iommufd_viommu_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_viommu *viommu =
+ container_of(obj, struct iommufd_viommu, obj);
+
+ refcount_dec(&viommu->hwpt->common.obj.users);
+}
+
+int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_viommu_alloc *cmd = ucmd->cmd;
+ struct iommufd_hwpt_paging *hwpt_paging;
+ struct iommufd_viommu *viommu;
+ struct iommufd_device *idev;
+ int rc;
+
+ if (cmd->flags)
+ return -EOPNOTSUPP;
+
+ idev = iommufd_get_device(ucmd, cmd->dev_id);
+ if (IS_ERR(idev))
+ return PTR_ERR(idev);
+
+ hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id);
+ if (IS_ERR(hwpt_paging)) {
+ rc = PTR_ERR(hwpt_paging);
+ goto out_put_idev;
+ }
+
+ if (!hwpt_paging->nest_parent) {
+ rc = -EINVAL;
+ goto out_put_hwpt;
+ }
+
+ if (cmd->type != IOMMU_VIOMMU_TYPE_DEFAULT) {
+ rc = -EOPNOTSUPP;
+ goto out_put_hwpt;
+ }
+
+ viommu = iommufd_object_alloc(ucmd->ictx, viommu, IOMMUFD_OBJ_VIOMMU);
+ if (IS_ERR(viommu)) {
+ rc = PTR_ERR(viommu);
+ goto out_put_hwpt;
+ }
+
+ viommu->type = cmd->type;
+ viommu->ictx = ucmd->ictx;
+ viommu->hwpt = hwpt_paging;
+
+ refcount_inc(&viommu->hwpt->common.obj.users);
+
+ cmd->out_viommu_id = viommu->obj.id;
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_abort;
+ iommufd_object_finalize(ucmd->ictx, &viommu->obj);
+ goto out_put_hwpt;
+
+out_abort:
+ iommufd_object_abort_and_destroy(ucmd->ictx, &viommu->obj);
+out_put_hwpt:
+ iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
+out_put_idev:
+ iommufd_put_object(ucmd->ictx, &idev->obj);
+ return rc;
+}
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index cd4920886ad0..ac77903b5cc4 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -51,6 +51,7 @@ enum {
IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c,
IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d,
IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e,
+ IOMMUFD_CMD_VIOMMU_ALLOC = 0x8f,
};
/**
@@ -852,4 +853,33 @@ struct iommu_fault_alloc {
__u32 out_fault_fd;
};
#define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC)
+
+/**
+ * enum iommu_viommu_type - Virtual IOMMU Type
+ * @IOMMU_VIOMMU_TYPE_DEFAULT: Core-managed VIOMMU type
+ */
+enum iommu_viommu_type {
+ IOMMU_VIOMMU_TYPE_DEFAULT = 0,
+};
+
+/**
+ * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC)
+ * @size: sizeof(struct iommu_viommu_alloc)
+ * @flags: Must be 0
+ * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type
+ * @dev_id: The device to allocate this virtual IOMMU for
+ * @hwpt_id: ID of a nesting parent HWPT to associate to
+ * @out_viommu_id: Output virtual IOMMU ID for the allocated object
+ *
+ * Allocate a virtual IOMMU object that holds a (shared) nesting parent HWPT
+ */
+struct iommu_viommu_alloc {
+ __u32 size;
+ __u32 flags;
+ __u32 type;
+ __u32 dev_id;
+ __u32 hwpt_id;
+ __u32 out_viommu_id;
+};
+#define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC)
#endif
--
2.43.0
On Tue, Aug 27, 2024 at 09:59:39AM -0700, Nicolin Chen wrote: > +/** > + * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC) > + * @size: sizeof(struct iommu_viommu_alloc) > + * @flags: Must be 0 > + * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type > + * @dev_id: The device to allocate this virtual IOMMU for @dev_id: The device's physical IOMMU will be used to back t he vIOMMU > + * @hwpt_id: ID of a nesting parent HWPT to associate to A nesting parent HWPT that will provide translation for an vIOMMU DMA > + * @out_viommu_id: Output virtual IOMMU ID for the allocated object > + * > + * Allocate a virtual IOMMU object that holds a (shared) nesting parent HWPT Allocate a virtual IOMMU object that represents the underlying physical IOMMU's virtualization support. The vIOMMU object is a security isolated slice of the physical IOMMU HW that is unique to a specific VM. Operations global to the IOMMU are connected to the vIOMMU, such as: - Security namespace for guest owned ID, eg guest controlled cache tags - Virtualization of various platforms IDs like RIDs and others - direct assigned invalidation queues - direct assigned interrupts - non-affiliated event reporting - Delivery of paravirtualized invalidation Jason
On Thu, Sep 05, 2024 at 12:53:02PM -0300, Jason Gunthorpe wrote: > On Tue, Aug 27, 2024 at 09:59:39AM -0700, Nicolin Chen wrote: > > +/** > > + * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC) > > + * @size: sizeof(struct iommu_viommu_alloc) > > + * @flags: Must be 0 > > + * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type > > + * @dev_id: The device to allocate this virtual IOMMU for > > @dev_id: The device's physical IOMMU will be used to back t he vIOMMU > > > + * @hwpt_id: ID of a nesting parent HWPT to associate to > > A nesting parent HWPT that will provide translation for an vIOMMU DMA > > > + * @out_viommu_id: Output virtual IOMMU ID for the allocated object > > + * > > + * Allocate a virtual IOMMU object that holds a (shared) nesting parent HWPT > > Allocate a virtual IOMMU object that represents the underlying > physical IOMMU's virtualization support. The vIOMMU object is a > security isolated slice of the physical IOMMU HW that is unique to a > specific VM. Operations global to the IOMMU are connected to the > vIOMMU, such as: > - Security namespace for guest owned ID, eg guest controlled cache tags > - Virtualization of various platforms IDs like RIDs and others > - direct assigned invalidation queues > - direct assigned interrupts > - non-affiliated event reporting > - Delivery of paravirtualized invalidation Ack. Looks like you prefer using "vIOMMU" v.s. "VIOMMU"? I would go through all the patches (QEMU including) to keep that aligned. Thanks Nicolin
On Thu, Sep 05, 2024 at 10:10:38AM -0700, Nicolin Chen wrote: > On Thu, Sep 05, 2024 at 12:53:02PM -0300, Jason Gunthorpe wrote: > > On Tue, Aug 27, 2024 at 09:59:39AM -0700, Nicolin Chen wrote: > > > +/** > > > + * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC) > > > + * @size: sizeof(struct iommu_viommu_alloc) > > > + * @flags: Must be 0 > > > + * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type > > > + * @dev_id: The device to allocate this virtual IOMMU for > > > > @dev_id: The device's physical IOMMU will be used to back t he vIOMMU > > > > > + * @hwpt_id: ID of a nesting parent HWPT to associate to > > > > A nesting parent HWPT that will provide translation for an vIOMMU DMA > > > > > + * @out_viommu_id: Output virtual IOMMU ID for the allocated object > > > + * > > > + * Allocate a virtual IOMMU object that holds a (shared) nesting parent HWPT > > > > Allocate a virtual IOMMU object that represents the underlying > > physical IOMMU's virtualization support. The vIOMMU object is a > > security isolated slice of the physical IOMMU HW that is unique to a > > specific VM. Operations global to the IOMMU are connected to the > > vIOMMU, such as: > > - Security namespace for guest owned ID, eg guest controlled cache tags > > - Virtualization of various platforms IDs like RIDs and others > > - direct assigned invalidation queues > > - direct assigned interrupts > > - non-affiliated event reporting > > - Delivery of paravirtualized invalidation > > Ack. Also write something about the HWPT.. > Looks like you prefer using "vIOMMU" v.s. "VIOMMU"? I would go > through all the patches (QEMU including) to keep that aligned. Yeah, VIOMMU just for all-caps constants Jason
On Thu, Sep 05, 2024 at 02:41:00PM -0300, Jason Gunthorpe wrote: > > > > + * @out_viommu_id: Output virtual IOMMU ID for the allocated object > > > > + * > > > > + * Allocate a virtual IOMMU object that holds a (shared) nesting parent HWPT > > > > > > Allocate a virtual IOMMU object that represents the underlying > > > physical IOMMU's virtualization support. The vIOMMU object is a > > > security isolated slice of the physical IOMMU HW that is unique to a > > > specific VM. Operations global to the IOMMU are connected to the > > > vIOMMU, such as: > > > - Security namespace for guest owned ID, eg guest controlled cache tags > > > - Virtualization of various platforms IDs like RIDs and others > > > - direct assigned invalidation queues > > > - direct assigned interrupts > > > - non-affiliated event reporting > > > - Delivery of paravirtualized invalidation > > > > Ack. > > Also write something about the HWPT.. Assuming it's about sharing parent HWPT, ack. Nicolin
On 2024/8/28 0:59, Nicolin Chen wrote:
> +int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> +{
> + struct iommu_viommu_alloc *cmd = ucmd->cmd;
> + struct iommufd_hwpt_paging *hwpt_paging;
> + struct iommufd_viommu *viommu;
> + struct iommufd_device *idev;
> + int rc;
> +
> + if (cmd->flags)
> + return -EOPNOTSUPP;
> +
> + idev = iommufd_get_device(ucmd, cmd->dev_id);
Why does a device reference count is needed here? When is this reference
count released after the VIOMMU is allocated?
> + if (IS_ERR(idev))
> + return PTR_ERR(idev);
> +
> + hwpt_paging = iommufd_get_hwpt_paging(ucmd, cmd->hwpt_id);
> + if (IS_ERR(hwpt_paging)) {
> + rc = PTR_ERR(hwpt_paging);
> + goto out_put_idev;
> + }
> +
> + if (!hwpt_paging->nest_parent) {
> + rc = -EINVAL;
> + goto out_put_hwpt;
> + }
> +
> + if (cmd->type != IOMMU_VIOMMU_TYPE_DEFAULT) {
> + rc = -EOPNOTSUPP;
> + goto out_put_hwpt;
> + }
> +
> + viommu = iommufd_object_alloc(ucmd->ictx, viommu, IOMMUFD_OBJ_VIOMMU);
> + if (IS_ERR(viommu)) {
> + rc = PTR_ERR(viommu);
> + goto out_put_hwpt;
> + }
> +
> + viommu->type = cmd->type;
> + viommu->ictx = ucmd->ictx;
> + viommu->hwpt = hwpt_paging;
> +
> + refcount_inc(&viommu->hwpt->common.obj.users);
> +
> + cmd->out_viommu_id = viommu->obj.id;
> + rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
> + if (rc)
> + goto out_abort;
> + iommufd_object_finalize(ucmd->ictx, &viommu->obj);
> + goto out_put_hwpt;
> +
> +out_abort:
> + iommufd_object_abort_and_destroy(ucmd->ictx, &viommu->obj);
> +out_put_hwpt:
> + iommufd_put_object(ucmd->ictx, &hwpt_paging->common.obj);
> +out_put_idev:
> + iommufd_put_object(ucmd->ictx, &idev->obj);
> + return rc;
> +}
Thanks,
baolu
On Sun, Sep 01, 2024 at 10:39:17AM +0800, Baolu Lu wrote:
> On 2024/8/28 0:59, Nicolin Chen wrote:
> > +int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> > +{
> > + struct iommu_viommu_alloc *cmd = ucmd->cmd;
> > + struct iommufd_hwpt_paging *hwpt_paging;
> > + struct iommufd_viommu *viommu;
> > + struct iommufd_device *idev;
> > + int rc;
> > +
> > + if (cmd->flags)
> > + return -EOPNOTSUPP;
> > +
> > + idev = iommufd_get_device(ucmd, cmd->dev_id);
>
> Why does a device reference count is needed here? When is this reference
> count released after the VIOMMU is allocated?
Hmm, it was used to get dev->iommu->iommu_dev to pin the VIOMMU to
a physical IOMMU instance (in v1). Jason suggested to remove that,
yet I didn't realize that this idev is now completely useless.
With that being said, a parent HWPT could be shared across VIOMUs
allocated for the same VM. So, I think we do need a dev pointer to
know which physical instance the VIOMMU allocates for, especially
for a driver-managed VIOMMU.
Perhaps we should add back the iommu_dev and properly refcount it.
Thanks
Nicolin
On Sun, Sep 01, 2024 at 10:27:09PM -0700, Nicolin Chen wrote:
> On Sun, Sep 01, 2024 at 10:39:17AM +0800, Baolu Lu wrote:
> > On 2024/8/28 0:59, Nicolin Chen wrote:
> > > +int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> > > +{
> > > + struct iommu_viommu_alloc *cmd = ucmd->cmd;
> > > + struct iommufd_hwpt_paging *hwpt_paging;
> > > + struct iommufd_viommu *viommu;
> > > + struct iommufd_device *idev;
> > > + int rc;
> > > +
> > > + if (cmd->flags)
> > > + return -EOPNOTSUPP;
> > > +
> > > + idev = iommufd_get_device(ucmd, cmd->dev_id);
> >
> > Why does a device reference count is needed here? When is this reference
> > count released after the VIOMMU is allocated?
>
> Hmm, it was used to get dev->iommu->iommu_dev to pin the VIOMMU to
> a physical IOMMU instance (in v1). Jason suggested to remove that,
> yet I didn't realize that this idev is now completely useless.
>
> With that being said, a parent HWPT could be shared across VIOMUs
> allocated for the same VM. So, I think we do need a dev pointer to
> know which physical instance the VIOMMU allocates for, especially
> for a driver-managed VIOMMU.
Eventually you need a way to pin the physical iommu, without pinning
any idevs. Not sure how best to do that
Jason
On Wed, Sep 04, 2024 at 01:26:21PM -0300, Jason Gunthorpe wrote:
> On Sun, Sep 01, 2024 at 10:27:09PM -0700, Nicolin Chen wrote:
> > On Sun, Sep 01, 2024 at 10:39:17AM +0800, Baolu Lu wrote:
> > > On 2024/8/28 0:59, Nicolin Chen wrote:
> > > > +int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> > > > +{
> > > > + struct iommu_viommu_alloc *cmd = ucmd->cmd;
> > > > + struct iommufd_hwpt_paging *hwpt_paging;
> > > > + struct iommufd_viommu *viommu;
> > > > + struct iommufd_device *idev;
> > > > + int rc;
> > > > +
> > > > + if (cmd->flags)
> > > > + return -EOPNOTSUPP;
> > > > +
> > > > + idev = iommufd_get_device(ucmd, cmd->dev_id);
> > >
> > > Why does a device reference count is needed here? When is this reference
> > > count released after the VIOMMU is allocated?
> >
> > Hmm, it was used to get dev->iommu->iommu_dev to pin the VIOMMU to
> > a physical IOMMU instance (in v1). Jason suggested to remove that,
> > yet I didn't realize that this idev is now completely useless.
> >
> > With that being said, a parent HWPT could be shared across VIOMUs
> > allocated for the same VM. So, I think we do need a dev pointer to
> > know which physical instance the VIOMMU allocates for, especially
> > for a driver-managed VIOMMU.
>
> Eventually you need a way to pin the physical iommu, without pinning
> any idevs. Not sure how best to do that
Just trying to clarify "without pinning any idevs", does it mean
we shouldn't pass in an idev_id to get dev->iommu->iommu_dev?
Otherwise, iommu_probe_device_lock and iommu_device_lock in the
iommu.c are good enough to lock dev->iommu and iommu->list. And
I think we just need an iommu helper refcounting the dev_iommu
(or iommu_device) as we previously discussed.
Thanks
Nicolin
On Wed, Sep 04, 2024 at 10:29:26AM -0700, Nicolin Chen wrote:
> On Wed, Sep 04, 2024 at 01:26:21PM -0300, Jason Gunthorpe wrote:
> > On Sun, Sep 01, 2024 at 10:27:09PM -0700, Nicolin Chen wrote:
> > > On Sun, Sep 01, 2024 at 10:39:17AM +0800, Baolu Lu wrote:
> > > > On 2024/8/28 0:59, Nicolin Chen wrote:
> > > > > +int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> > > > > +{
> > > > > + struct iommu_viommu_alloc *cmd = ucmd->cmd;
> > > > > + struct iommufd_hwpt_paging *hwpt_paging;
> > > > > + struct iommufd_viommu *viommu;
> > > > > + struct iommufd_device *idev;
> > > > > + int rc;
> > > > > +
> > > > > + if (cmd->flags)
> > > > > + return -EOPNOTSUPP;
> > > > > +
> > > > > + idev = iommufd_get_device(ucmd, cmd->dev_id);
> > > >
> > > > Why does a device reference count is needed here? When is this reference
> > > > count released after the VIOMMU is allocated?
> > >
> > > Hmm, it was used to get dev->iommu->iommu_dev to pin the VIOMMU to
> > > a physical IOMMU instance (in v1). Jason suggested to remove that,
> > > yet I didn't realize that this idev is now completely useless.
> > >
> > > With that being said, a parent HWPT could be shared across VIOMUs
> > > allocated for the same VM. So, I think we do need a dev pointer to
> > > know which physical instance the VIOMMU allocates for, especially
> > > for a driver-managed VIOMMU.
> >
> > Eventually you need a way to pin the physical iommu, without pinning
> > any idevs. Not sure how best to do that
>
> Just trying to clarify "without pinning any idevs", does it mean
> we shouldn't pass in an idev_id to get dev->iommu->iommu_dev?
From userspace we have no choice but to use an idev_id to locate the
physical iommu
But since we want to support hotplug it is rather problematic if that
idev is permanently locked down.
> Otherwise, iommu_probe_device_lock and iommu_device_lock in the
> iommu.c are good enough to lock dev->iommu and iommu->list. And
> I think we just need an iommu helper refcounting the dev_iommu
> (or iommu_device) as we previously discussed.
If you have a ref on an idev then the iommu_dev has to be stable, so
you can just incr some refcount and then drop the idev stuff.
Jason
On Wed, Sep 04, 2024 at 08:37:07PM -0300, Jason Gunthorpe wrote:
> On Wed, Sep 04, 2024 at 10:29:26AM -0700, Nicolin Chen wrote:
> > On Wed, Sep 04, 2024 at 01:26:21PM -0300, Jason Gunthorpe wrote:
> > > On Sun, Sep 01, 2024 at 10:27:09PM -0700, Nicolin Chen wrote:
> > > > On Sun, Sep 01, 2024 at 10:39:17AM +0800, Baolu Lu wrote:
> > > > > On 2024/8/28 0:59, Nicolin Chen wrote:
> > > > > > +int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> > > > > > +{
> > > > > > + struct iommu_viommu_alloc *cmd = ucmd->cmd;
> > > > > > + struct iommufd_hwpt_paging *hwpt_paging;
> > > > > > + struct iommufd_viommu *viommu;
> > > > > > + struct iommufd_device *idev;
> > > > > > + int rc;
> > > > > > +
> > > > > > + if (cmd->flags)
> > > > > > + return -EOPNOTSUPP;
> > > > > > +
> > > > > > + idev = iommufd_get_device(ucmd, cmd->dev_id);
> > > > >
> > > > > Why does a device reference count is needed here? When is this reference
> > > > > count released after the VIOMMU is allocated?
> > > >
> > > > Hmm, it was used to get dev->iommu->iommu_dev to pin the VIOMMU to
> > > > a physical IOMMU instance (in v1). Jason suggested to remove that,
> > > > yet I didn't realize that this idev is now completely useless.
> > > >
> > > > With that being said, a parent HWPT could be shared across VIOMUs
> > > > allocated for the same VM. So, I think we do need a dev pointer to
> > > > know which physical instance the VIOMMU allocates for, especially
> > > > for a driver-managed VIOMMU.
> > >
> > > Eventually you need a way to pin the physical iommu, without pinning
> > > any idevs. Not sure how best to do that
> >
> > Just trying to clarify "without pinning any idevs", does it mean
> > we shouldn't pass in an idev_id to get dev->iommu->iommu_dev?
>
> From userspace we have no choice but to use an idev_id to locate the
> physical iommu
>
> But since we want to support hotplug it is rather problematic if that
> idev is permanently locked down.
>
> > Otherwise, iommu_probe_device_lock and iommu_device_lock in the
> > iommu.c are good enough to lock dev->iommu and iommu->list. And
> > I think we just need an iommu helper refcounting the dev_iommu
> > (or iommu_device) as we previously discussed.
>
> If you have a ref on an idev then the iommu_dev has to be stable, so
> you can just incr some refcount and then drop the idev stuff.
Looks like a refcount could only WARN on an unbalanced iommu_dev in
iommu_device_unregister() and iommu_device_unregister_bus(), either
of which returns void so no way of doing a retry. And their callers
would also likely free the entire memory of the driver-level struct
where iommu_dev usually locates.. I feel it gets less meaningful to
add the refcount if the lifecycle cannot be guaranteed.
You mentioned that actually only the iommufd selftest might hit such
a corner case, so perhaps we should do something in the selftest code
v.s. the iommu core. What do you think?
Thanks
Nicolin
On Wed, Sep 11, 2024 at 08:39:57PM -0700, Nicolin Chen wrote: > You mentioned that actually only the iommufd selftest might hit such > a corner case, so perhaps we should do something in the selftest code > v.s. the iommu core. What do you think? Maybe, if there were viommu allocation callbacks maybe those can pin the memory in the selftest.. Jason
On Wed, Sep 04, 2024 at 08:37:07PM -0300, Jason Gunthorpe wrote:
> On Wed, Sep 04, 2024 at 10:29:26AM -0700, Nicolin Chen wrote:
> > On Wed, Sep 04, 2024 at 01:26:21PM -0300, Jason Gunthorpe wrote:
> > > On Sun, Sep 01, 2024 at 10:27:09PM -0700, Nicolin Chen wrote:
> > > > On Sun, Sep 01, 2024 at 10:39:17AM +0800, Baolu Lu wrote:
> > > > > On 2024/8/28 0:59, Nicolin Chen wrote:
> > > > > > +int iommufd_viommu_alloc_ioctl(struct iommufd_ucmd *ucmd)
> > > > > > +{
> > > > > > + struct iommu_viommu_alloc *cmd = ucmd->cmd;
> > > > > > + struct iommufd_hwpt_paging *hwpt_paging;
> > > > > > + struct iommufd_viommu *viommu;
> > > > > > + struct iommufd_device *idev;
> > > > > > + int rc;
> > > > > > +
> > > > > > + if (cmd->flags)
> > > > > > + return -EOPNOTSUPP;
> > > > > > +
> > > > > > + idev = iommufd_get_device(ucmd, cmd->dev_id);
> > > > >
> > > > > Why does a device reference count is needed here? When is this reference
> > > > > count released after the VIOMMU is allocated?
> > > >
> > > > Hmm, it was used to get dev->iommu->iommu_dev to pin the VIOMMU to
> > > > a physical IOMMU instance (in v1). Jason suggested to remove that,
> > > > yet I didn't realize that this idev is now completely useless.
> > > >
> > > > With that being said, a parent HWPT could be shared across VIOMUs
> > > > allocated for the same VM. So, I think we do need a dev pointer to
> > > > know which physical instance the VIOMMU allocates for, especially
> > > > for a driver-managed VIOMMU.
> > >
> > > Eventually you need a way to pin the physical iommu, without pinning
> > > any idevs. Not sure how best to do that
> >
> > Just trying to clarify "without pinning any idevs", does it mean
> > we shouldn't pass in an idev_id to get dev->iommu->iommu_dev?
>
> From userspace we have no choice but to use an idev_id to locate the
> physical iommu
>
> But since we want to support hotplug it is rather problematic if that
> idev is permanently locked down.
Agreed. Thanks for clarification.
> > Otherwise, iommu_probe_device_lock and iommu_device_lock in the
> > iommu.c are good enough to lock dev->iommu and iommu->list. And
> > I think we just need an iommu helper refcounting the dev_iommu
> > (or iommu_device) as we previously discussed.
>
> If you have a ref on an idev then the iommu_dev has to be stable, so
> you can just incr some refcount and then drop the idev stuff.
Yes. The small routine would be like:
(1) Lock/get idev, dev->iommu, and dev->iommu->list
(2) Increase the refcount at dev_iommu
(3) Save the dev_iommu to viommu
(4) Unlock/put those in (1).
Thank you
Nicolin
© 2016 - 2025 Red Hat, Inc.