drivers/iommu/iommufd/device.c | 74 +++++++++++++------------ drivers/iommu/iommufd/iommufd_private.h | 21 ++++++- 2 files changed, 60 insertions(+), 35 deletions(-)
Currently, device reserved regions are only enforced when the device is
attached to an hwpt_paging. In other words, if the device gets attached
to an hwpt_nested directly, the parent hwpt_paging of the hwpt_nested's
would not enforce those reserved IOVAs. This works for most of reserved
region types, but not for IOMMU_RESV_SW_MSI, which is a unique software
defined window, required by a nesting case too to setup an MSI doorbell
on the parent stage-2 hwpt/domain.
Kevin pointed out that:
1) there is no usage using up closely the entire IOVA space yet,
2) guest may change the viommu mode to switch between nested
and paging then VMM has to take all devices' reserved regions
into consideration anyway, when composing the GPA space.
Link: https://lore.kernel.org/all/BN9PR11MB5276497781C96415272E6FED8CB12@BN9PR11MB5276.namprd11.prod.outlook.com/
So it would be actually convenient for us to also enforce reserved IOVA
onto the parent hwpt_paging, when attaching a device to an hwpt_nested.
Repurpose the existing attach/replace_paging helpers to attach device's
reserved IOVAs exclusively. Allow a common hwpt input to support both a
hwpt_paging type and a hwpt_nested type.
Rework the to_hwpt_paging helper, which is only used by these reserved
IOVA helpers, to allow an IOMMUFD_OBJ_HWPT_NESTED hwpt to direct to its
parent hwpt_paging. Return a NULL for any potential new HWPT type, and
make a NOP in those reserved IOVA helpers accordingly.
Suggested-by: Tian, Kevin <kevin.tian@intel.com>
Signed-off-by: Nicolin Chen <nicolinc@nvidia.com>
---
[To Kevin]
I found 4~5 places doing the same hwpt_paging routine in those IOVA
helpers as you suggested previously. So, I ended up with moving them
back to the to_hwpt_paging helper, while making a room for any other
HWPT type that concerned you.
drivers/iommu/iommufd/device.c | 74 +++++++++++++------------
drivers/iommu/iommufd/iommufd_private.h | 21 ++++++-
2 files changed, 60 insertions(+), 35 deletions(-)
diff --git a/drivers/iommu/iommufd/device.c b/drivers/iommu/iommufd/device.c
index 76b7297d22b0f..f4c27d5359531 100644
--- a/drivers/iommu/iommufd/device.c
+++ b/drivers/iommu/iommufd/device.c
@@ -338,13 +338,18 @@ static int iommufd_group_setup_msi(struct iommufd_group *igroup,
return 0;
}
-static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging,
- struct iommufd_device *idev)
+static int
+iommufd_device_attach_reserved_iova(struct iommufd_device *idev,
+ struct iommufd_hw_pagetable *hwpt)
{
+ struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt);
int rc;
lockdep_assert_held(&idev->igroup->lock);
+ if (!hwpt_paging)
+ return 0;
+
rc = iopt_table_enforce_dev_resv_regions(&hwpt_paging->ioas->iopt,
idev->dev,
&idev->igroup->sw_msi_start);
@@ -362,6 +367,18 @@ static int iommufd_hwpt_paging_attach(struct iommufd_hwpt_paging *hwpt_paging,
return 0;
}
+static void
+iommufd_device_remove_reserved_iova(struct iommufd_device *idev,
+ struct iommufd_hw_pagetable *hwpt)
+{
+ struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt);
+
+ if (!hwpt_paging)
+ return;
+
+ iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, idev->dev);
+}
+
int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
struct iommufd_device *idev)
{
@@ -374,11 +391,9 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
goto err_unlock;
}
- if (hwpt_is_paging(hwpt)) {
- rc = iommufd_hwpt_paging_attach(to_hwpt_paging(hwpt), idev);
- if (rc)
- goto err_unlock;
- }
+ rc = iommufd_device_attach_reserved_iova(idev, hwpt);
+ if (rc)
+ goto err_unlock;
/*
* Only attach to the group once for the first device that is in the
@@ -398,9 +413,7 @@ int iommufd_hw_pagetable_attach(struct iommufd_hw_pagetable *hwpt,
mutex_unlock(&idev->igroup->lock);
return 0;
err_unresv:
- if (hwpt_is_paging(hwpt))
- iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt,
- idev->dev);
+ iommufd_device_remove_reserved_iova(idev, hwpt);
err_unlock:
mutex_unlock(&idev->igroup->lock);
return rc;
@@ -417,9 +430,7 @@ iommufd_hw_pagetable_detach(struct iommufd_device *idev)
iommufd_hwpt_detach_device(hwpt, idev);
idev->igroup->hwpt = NULL;
}
- if (hwpt_is_paging(hwpt))
- iopt_remove_reserved_iova(&to_hwpt_paging(hwpt)->ioas->iopt,
- idev->dev);
+ iommufd_device_remove_reserved_iova(idev, hwpt);
mutex_unlock(&idev->igroup->lock);
/* Caller must destroy hwpt */
@@ -440,28 +451,31 @@ iommufd_device_do_attach(struct iommufd_device *idev,
static void
iommufd_group_remove_reserved_iova(struct iommufd_group *igroup,
- struct iommufd_hwpt_paging *hwpt_paging)
+ struct iommufd_hw_pagetable *hwpt)
{
struct iommufd_device *cur;
lockdep_assert_held(&igroup->lock);
list_for_each_entry(cur, &igroup->device_list, group_item)
- iopt_remove_reserved_iova(&hwpt_paging->ioas->iopt, cur->dev);
+ iommufd_device_remove_reserved_iova(cur, hwpt);
}
static int
-iommufd_group_do_replace_paging(struct iommufd_group *igroup,
- struct iommufd_hwpt_paging *hwpt_paging)
+iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
+ struct iommufd_hw_pagetable *hwpt)
{
+ struct iommufd_hwpt_paging *hwpt_paging = to_hwpt_paging(hwpt);
struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt;
struct iommufd_device *cur;
int rc;
lockdep_assert_held(&igroup->lock);
- if (!hwpt_is_paging(old_hwpt) ||
- hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) {
+ if (!hwpt_paging)
+ return 0;
+
+ if (iommufd_hw_pagetable_compare_ioas(old_hwpt, hwpt)) {
list_for_each_entry(cur, &igroup->device_list, group_item) {
rc = iopt_table_enforce_dev_resv_regions(
&hwpt_paging->ioas->iopt, cur->dev, NULL);
@@ -476,7 +490,7 @@ iommufd_group_do_replace_paging(struct iommufd_group *igroup,
return 0;
err_unresv:
- iommufd_group_remove_reserved_iova(igroup, hwpt_paging);
+ iommufd_group_remove_reserved_iova(igroup, hwpt);
return rc;
}
@@ -502,22 +516,16 @@ iommufd_device_do_replace(struct iommufd_device *idev,
}
old_hwpt = igroup->hwpt;
- if (hwpt_is_paging(hwpt)) {
- rc = iommufd_group_do_replace_paging(igroup,
- to_hwpt_paging(hwpt));
- if (rc)
- goto err_unlock;
- }
+ rc = iommufd_group_do_replace_reserved_iova(igroup, hwpt);
+ if (rc)
+ goto err_unlock;
rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
if (rc)
goto err_unresv;
- if (hwpt_is_paging(old_hwpt) &&
- (!hwpt_is_paging(hwpt) ||
- to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas))
- iommufd_group_remove_reserved_iova(igroup,
- to_hwpt_paging(old_hwpt));
+ if (iommufd_hw_pagetable_compare_ioas(old_hwpt, hwpt))
+ iommufd_group_remove_reserved_iova(igroup, old_hwpt);
igroup->hwpt = hwpt;
@@ -535,9 +543,7 @@ iommufd_device_do_replace(struct iommufd_device *idev,
/* Caller must destroy old_hwpt */
return old_hwpt;
err_unresv:
- if (hwpt_is_paging(hwpt))
- iommufd_group_remove_reserved_iova(igroup,
- to_hwpt_paging(hwpt));
+ iommufd_group_remove_reserved_iova(igroup, hwpt);
err_unlock:
mutex_unlock(&idev->igroup->lock);
return ERR_PTR(rc);
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index 04109572a53ca..414d5230a497b 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -316,7 +316,26 @@ static inline bool hwpt_is_paging(struct iommufd_hw_pagetable *hwpt)
static inline struct iommufd_hwpt_paging *
to_hwpt_paging(struct iommufd_hw_pagetable *hwpt)
{
- return container_of(hwpt, struct iommufd_hwpt_paging, common);
+ switch (hwpt->obj.type) {
+ case IOMMUFD_OBJ_HWPT_PAGING:
+ return container_of(hwpt, struct iommufd_hwpt_paging, common);
+ case IOMMUFD_OBJ_HWPT_NESTED:
+ return container_of(hwpt, struct iommufd_hwpt_nested, common)->parent;
+ default:
+ return NULL;
+ }
+}
+
+static inline bool
+iommufd_hw_pagetable_compare_ioas(struct iommufd_hw_pagetable *old_hwpt,
+ struct iommufd_hw_pagetable *new_hwpt)
+{
+ struct iommufd_hwpt_paging *old_hwpt_paging = to_hwpt_paging(old_hwpt);
+ struct iommufd_hwpt_paging *new_hwpt_paging = to_hwpt_paging(new_hwpt);
+
+ if (!old_hwpt_paging || !new_hwpt_paging)
+ return false;
+ return old_hwpt_paging->ioas != new_hwpt_paging->ioas;
}
static inline struct iommufd_hwpt_paging *
--
2.34.1
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Friday, August 2, 2024 1:35 PM
>
> static int
> -iommufd_group_do_replace_paging(struct iommufd_group *igroup,
> - struct iommufd_hwpt_paging *hwpt_paging)
> +iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
> + struct iommufd_hw_pagetable *hwpt)
> {
> + struct iommufd_hwpt_paging *hwpt_paging =
> to_hwpt_paging(hwpt);
> struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt;
> struct iommufd_device *cur;
> int rc;
>
> lockdep_assert_held(&igroup->lock);
>
> - if (!hwpt_is_paging(old_hwpt) ||
> - hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) {
> + if (!hwpt_paging)
> + return 0;
> +
> + if (iommufd_hw_pagetable_compare_ioas(old_hwpt, hwpt)) {
hmm this change is broken. In this helper:
if (!old_hwpt_paging || !new_hwpt_paging)
return false;
return old_hwpt_paging->ioas != new_hwpt_paging->ioas;
Obviously the original code wants to enforce reserved regions if
new_hwpt is paging && old_hwpt is not paging, but this change
skips this scenario.
>
> rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
> if (rc)
> goto err_unresv;
>
> - if (hwpt_is_paging(old_hwpt) &&
> - (!hwpt_is_paging(hwpt) ||
> - to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas))
> - iommufd_group_remove_reserved_iova(igroup,
> - to_hwpt_paging(old_hwpt));
> + if (iommufd_hw_pagetable_compare_ioas(old_hwpt, hwpt))
> + iommufd_group_remove_reserved_iova(igroup, old_hwpt);
this is also broken.
Probably it's clearer to continue open-coding those conditions in
iommufd_group_do_replace_reserved_iova() and
iommufd_group_remove_reserved_iova().
On Mon, Aug 05, 2024 at 07:40:45AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Friday, August 2, 2024 1:35 PM
> >
> > static int
> > -iommufd_group_do_replace_paging(struct iommufd_group *igroup,
> > - struct iommufd_hwpt_paging *hwpt_paging)
> > +iommufd_group_do_replace_reserved_iova(struct iommufd_group *igroup,
> > + struct iommufd_hw_pagetable *hwpt)
> > {
> > + struct iommufd_hwpt_paging *hwpt_paging =
> > to_hwpt_paging(hwpt);
> > struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt;
> > struct iommufd_device *cur;
> > int rc;
> >
> > lockdep_assert_held(&igroup->lock);
> >
> > - if (!hwpt_is_paging(old_hwpt) ||
> > - hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) {
> > + if (!hwpt_paging)
> > + return 0;
> > +
> > + if (iommufd_hw_pagetable_compare_ioas(old_hwpt, hwpt)) {
>
> hmm this change is broken. In this helper:
>
> if (!old_hwpt_paging || !new_hwpt_paging)
> return false;
> return old_hwpt_paging->ioas != new_hwpt_paging->ioas;
>
> Obviously the original code wants to enforce reserved regions if
> new_hwpt is paging && old_hwpt is not paging, but this change
> skips this scenario.
Hmm..I think that is the intention of this patch?
The original code does that because it didn't enforce reserved
region (to the parent paging hwpt) when attaching the group to
a nested one. Now, it does. So, we basically check whether the
associated ioas has changed or not. Right?
Perhaps I should have added a line of comments to highlight it
getting the parent hwpt pointer now for a nested hwpt.
> >
> > rc = iommufd_hwpt_replace_device(idev, hwpt, old_hwpt);
> > if (rc)
> > goto err_unresv;
> >
> > - if (hwpt_is_paging(old_hwpt) &&
> > - (!hwpt_is_paging(hwpt) ||
> > - to_hwpt_paging(hwpt)->ioas != to_hwpt_paging(old_hwpt)->ioas))
> > - iommufd_group_remove_reserved_iova(igroup,
> > - to_hwpt_paging(old_hwpt));
> > + if (iommufd_hw_pagetable_compare_ioas(old_hwpt, hwpt))
> > + iommufd_group_remove_reserved_iova(igroup, old_hwpt);
>
> this is also broken.
>
> Probably it's clearer to continue open-coding those conditions in
> iommufd_group_do_replace_reserved_iova() and
> iommufd_group_remove_reserved_iova().
Here it basically compares the ioas too. The original code too,
while having an additional hwpt_is_paging() check on both hwpts
because of the same reason.
Thanks
Nicolin
> From: Nicolin Chen <nicolinc@nvidia.com>
> Sent: Tuesday, August 6, 2024 2:04 AM
>
> On Mon, Aug 05, 2024 at 07:40:45AM +0000, Tian, Kevin wrote:
> > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > Sent: Friday, August 2, 2024 1:35 PM
> > >
> > > static int
> > > -iommufd_group_do_replace_paging(struct iommufd_group *igroup,
> > > - struct iommufd_hwpt_paging *hwpt_paging)
> > > +iommufd_group_do_replace_reserved_iova(struct iommufd_group
> *igroup,
> > > + struct iommufd_hw_pagetable *hwpt)
> > > {
> > > + struct iommufd_hwpt_paging *hwpt_paging =
> > > to_hwpt_paging(hwpt);
> > > struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt;
> > > struct iommufd_device *cur;
> > > int rc;
> > >
> > > lockdep_assert_held(&igroup->lock);
> > >
> > > - if (!hwpt_is_paging(old_hwpt) ||
> > > - hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) {
> > > + if (!hwpt_paging)
> > > + return 0;
> > > +
> > > + if (iommufd_hw_pagetable_compare_ioas(old_hwpt, hwpt)) {
> >
> > hmm this change is broken. In this helper:
> >
> > if (!old_hwpt_paging || !new_hwpt_paging)
> > return false;
> > return old_hwpt_paging->ioas != new_hwpt_paging->ioas;
> >
> > Obviously the original code wants to enforce reserved regions if
> > new_hwpt is paging && old_hwpt is not paging, but this change
> > skips this scenario.
>
> Hmm..I think that is the intention of this patch?
>
> The original code does that because it didn't enforce reserved
> region (to the parent paging hwpt) when attaching the group to
> a nested one. Now, it does. So, we basically check whether the
> associated ioas has changed or not. Right?
>
that is too much tied to nested vs. paging type. In case a new
type comes w/o parent/ioas in the future, above will be broken.
IMHO to_hwpt_paging() already resolves the problem about
nested parent while the original code above appears to be
more future proof (w/ the enhanced to_hwpt_paging()), i.e.
adding reserved region to the new hwpt w/ ioas as long as
old/new hwpts don't belong to a same ioas (including the case
where old has no ioas), instead of adding reserved region
only when both old/new have an ioas but different. 😊
On Tue, Aug 06, 2024 at 02:59:53AM +0000, Tian, Kevin wrote:
> > From: Nicolin Chen <nicolinc@nvidia.com>
> > Sent: Tuesday, August 6, 2024 2:04 AM
> >
> > On Mon, Aug 05, 2024 at 07:40:45AM +0000, Tian, Kevin wrote:
> > > > From: Nicolin Chen <nicolinc@nvidia.com>
> > > > Sent: Friday, August 2, 2024 1:35 PM
> > > >
> > > > static int
> > > > -iommufd_group_do_replace_paging(struct iommufd_group *igroup,
> > > > - struct iommufd_hwpt_paging *hwpt_paging)
> > > > +iommufd_group_do_replace_reserved_iova(struct iommufd_group
> > *igroup,
> > > > + struct iommufd_hw_pagetable *hwpt)
> > > > {
> > > > + struct iommufd_hwpt_paging *hwpt_paging =
> > > > to_hwpt_paging(hwpt);
> > > > struct iommufd_hw_pagetable *old_hwpt = igroup->hwpt;
> > > > struct iommufd_device *cur;
> > > > int rc;
> > > >
> > > > lockdep_assert_held(&igroup->lock);
> > > >
> > > > - if (!hwpt_is_paging(old_hwpt) ||
> > > > - hwpt_paging->ioas != to_hwpt_paging(old_hwpt)->ioas) {
> > > > + if (!hwpt_paging)
> > > > + return 0;
> > > > +
> > > > + if (iommufd_hw_pagetable_compare_ioas(old_hwpt, hwpt)) {
> > >
> > > hmm this change is broken. In this helper:
> > >
> > > if (!old_hwpt_paging || !new_hwpt_paging)
> > > return false;
> > > return old_hwpt_paging->ioas != new_hwpt_paging->ioas;
> > >
> > > Obviously the original code wants to enforce reserved regions if
> > > new_hwpt is paging && old_hwpt is not paging, but this change
> > > skips this scenario.
> >
> > Hmm..I think that is the intention of this patch?
> >
> > The original code does that because it didn't enforce reserved
> > region (to the parent paging hwpt) when attaching the group to
> > a nested one. Now, it does. So, we basically check whether the
> > associated ioas has changed or not. Right?
> >
>
> that is too much tied to nested vs. paging type. In case a new
> type comes w/o parent/ioas in the future, above will be broken.
>
> IMHO to_hwpt_paging() already resolves the problem about
> nested parent while the original code above appears to be
> more future proof (w/ the enhanced to_hwpt_paging()), i.e.
> adding reserved region to the new hwpt w/ ioas as long as
> old/new hwpts don't belong to a same ioas (including the case
> where old has no ioas), instead of adding reserved region
> only when both old/new have an ioas but different. 😊
Ah, I see your point. It failed to cover the case where one of
the hwpts returns NULL from to_hwpt_paging().
Will fix and respin.
Thanks
Nicolin
© 2016 - 2026 Red Hat, Inc.