From: Rob Clark <robdclark@chromium.org>
Add an io-pgtable method to walk the pgtable returning the raw PTEs that
would be traversed for a given iova access.
Signed-off-by: Rob Clark <robdclark@chromium.org>
---
drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------
include/linux/io-pgtable.h | 4 +++
2 files changed, 46 insertions(+), 9 deletions(-)
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index f7828a7aad41..f47a0e64bb35 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -693,17 +693,19 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov
data->start_level, ptep);
}
-static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
- unsigned long iova)
+static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova,
+ int (*cb)(void *cb_data, void *pte, int level),
+ void *cb_data)
{
struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
arm_lpae_iopte pte, *ptep = data->pgd;
int lvl = data->start_level;
+ int ret;
do {
/* Valid IOPTE pointer? */
if (!ptep)
- return 0;
+ return -EFAULT;
/* Grab the IOPTE we're interested in */
ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
@@ -711,22 +713,52 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
/* Valid entry? */
if (!pte)
- return 0;
+ return -EFAULT;
+
+ ret = cb(cb_data, &pte, lvl);
+ if (ret)
+ return ret;
- /* Leaf entry? */
+ /* Leaf entry? If so, we've found the translation */
if (iopte_leaf(pte, lvl, data->iop.fmt))
- goto found_translation;
+ return 0;
/* Take it to the next level */
ptep = iopte_deref(pte, data);
} while (++lvl < ARM_LPAE_MAX_LEVELS);
/* Ran out of page tables to walk */
+ return -EFAULT;
+}
+
+struct iova_to_phys_walk_data {
+ arm_lpae_iopte pte;
+ int level;
+};
+
+static int iova_to_phys_walk_cb(void *cb_data, void *pte, int level)
+{
+ struct iova_to_phys_walk_data *d = cb_data;
+
+ d->pte = *(arm_lpae_iopte *)pte;
+ d->level = level;
+
return 0;
+}
+
+static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
+ unsigned long iova)
+{
+ struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
+ struct iova_to_phys_walk_data d;
+ int ret;
+
+ ret = arm_lpae_pgtable_walk(ops, iova, iova_to_phys_walk_cb, &d);
+ if (ret)
+ return 0;
-found_translation:
- iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
- return iopte_to_paddr(pte, data) | iova;
+ iova &= (ARM_LPAE_BLOCK_SIZE(d.level, data) - 1);
+ return iopte_to_paddr(d.pte, data) | iova;
}
static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
@@ -807,6 +839,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
.map_pages = arm_lpae_map_pages,
.unmap_pages = arm_lpae_unmap_pages,
.iova_to_phys = arm_lpae_iova_to_phys,
+ .pgtable_walk = arm_lpae_pgtable_walk,
};
return data;
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 86cf1f7ae389..261b48af068a 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -177,6 +177,7 @@ struct io_pgtable_cfg {
* @map_pages: Map a physically contiguous range of pages of the same size.
* @unmap_pages: Unmap a range of virtually contiguous pages of the same size.
* @iova_to_phys: Translate iova to physical address.
+ * @pgtable_walk: (optional) Perform a page table walk for a given iova.
*
* These functions map directly onto the iommu_ops member functions with
* the same names.
@@ -190,6 +191,9 @@ struct io_pgtable_ops {
struct iommu_iotlb_gather *gather);
phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
unsigned long iova);
+ int (*pgtable_walk)(struct io_pgtable_ops *ops, unsigned long iova,
+ int (*cb)(void *cb_data, void *pte, int level),
+ void *cb_data);
int (*read_and_clear_dirty)(struct io_pgtable_ops *ops,
unsigned long iova, size_t size,
unsigned long flags,
--
2.45.1
On Thu, May 23, 2024 at 10:52:21AM -0700, Rob Clark wrote:
> From: Rob Clark <robdclark@chromium.org>
>
> Add an io-pgtable method to walk the pgtable returning the raw PTEs that
> would be traversed for a given iova access.
>
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
> drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------
> include/linux/io-pgtable.h | 4 +++
> 2 files changed, 46 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index f7828a7aad41..f47a0e64bb35 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -693,17 +693,19 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov
> data->start_level, ptep);
> }
>
> -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> - unsigned long iova)
> +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova,
> + int (*cb)(void *cb_data, void *pte, int level),
> + void *cb_data)
> {
> struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> arm_lpae_iopte pte, *ptep = data->pgd;
> int lvl = data->start_level;
> + int ret;
>
> do {
> /* Valid IOPTE pointer? */
> if (!ptep)
> - return 0;
> + return -EFAULT;
nit: -ENOENT might be a little better, as we're only checking against a
NULL entry rather than strictly any faulting entry.
> /* Grab the IOPTE we're interested in */
> ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
> @@ -711,22 +713,52 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
>
> /* Valid entry? */
> if (!pte)
> - return 0;
> + return -EFAULT;
Same here (and at the end of the function).
> +
> + ret = cb(cb_data, &pte, lvl);
Since pte is on the stack, rather than pointing into the actual pgtable,
I think it would be clearer to pass it by value to the callback.
> + if (ret)
> + return ret;
>
> - /* Leaf entry? */
> + /* Leaf entry? If so, we've found the translation */
> if (iopte_leaf(pte, lvl, data->iop.fmt))
> - goto found_translation;
> + return 0;
>
> /* Take it to the next level */
> ptep = iopte_deref(pte, data);
> } while (++lvl < ARM_LPAE_MAX_LEVELS);
>
> /* Ran out of page tables to walk */
> + return -EFAULT;
> +}
> +
> +struct iova_to_phys_walk_data {
> + arm_lpae_iopte pte;
> + int level;
> +};
Expanding a little on Robin's suggestion, why don't we drop this structure
in favour of something more generic:
struct arm_lpae_walk_data {
arm_lpae_iopte ptes[ARM_LPAE_MAX_LEVELS];
};
and then do something in the walker like:
if (cb && !cb(pte, lvl))
walk_data->ptes[lvl] = pte;
which could return the physical address at the end, if it reaches a leaf
entry. That way arm_lpae_iova_to_phys() is just passing a NULL callback
to the walker and your debug callback just needs to return 0 (i.e. the
callback is basically just saying whether or not to continue the walk).
Will
On Mon, Jun 24, 2024 at 8:14 AM Will Deacon <will@kernel.org> wrote:
>
> On Thu, May 23, 2024 at 10:52:21AM -0700, Rob Clark wrote:
> > From: Rob Clark <robdclark@chromium.org>
> >
> > Add an io-pgtable method to walk the pgtable returning the raw PTEs that
> > would be traversed for a given iova access.
> >
> > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > ---
> > drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------
> > include/linux/io-pgtable.h | 4 +++
> > 2 files changed, 46 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> > index f7828a7aad41..f47a0e64bb35 100644
> > --- a/drivers/iommu/io-pgtable-arm.c
> > +++ b/drivers/iommu/io-pgtable-arm.c
> > @@ -693,17 +693,19 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov
> > data->start_level, ptep);
> > }
> >
> > -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> > - unsigned long iova)
> > +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova,
> > + int (*cb)(void *cb_data, void *pte, int level),
> > + void *cb_data)
> > {
> > struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> > arm_lpae_iopte pte, *ptep = data->pgd;
> > int lvl = data->start_level;
> > + int ret;
> >
> > do {
> > /* Valid IOPTE pointer? */
> > if (!ptep)
> > - return 0;
> > + return -EFAULT;
>
> nit: -ENOENT might be a little better, as we're only checking against a
> NULL entry rather than strictly any faulting entry.
>
> > /* Grab the IOPTE we're interested in */
> > ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
> > @@ -711,22 +713,52 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> >
> > /* Valid entry? */
> > if (!pte)
> > - return 0;
> > + return -EFAULT;
>
> Same here (and at the end of the function).
>
> > +
> > + ret = cb(cb_data, &pte, lvl);
>
> Since pte is on the stack, rather than pointing into the actual pgtable,
> I think it would be clearer to pass it by value to the callback.
>
> > + if (ret)
> > + return ret;
> >
> > - /* Leaf entry? */
> > + /* Leaf entry? If so, we've found the translation */
> > if (iopte_leaf(pte, lvl, data->iop.fmt))
> > - goto found_translation;
> > + return 0;
> >
> > /* Take it to the next level */
> > ptep = iopte_deref(pte, data);
> > } while (++lvl < ARM_LPAE_MAX_LEVELS);
> >
> > /* Ran out of page tables to walk */
> > + return -EFAULT;
> > +}
> > +
> > +struct iova_to_phys_walk_data {
> > + arm_lpae_iopte pte;
> > + int level;
> > +};
>
> Expanding a little on Robin's suggestion, why don't we drop this structure
> in favour of something more generic:
>
> struct arm_lpae_walk_data {
> arm_lpae_iopte ptes[ARM_LPAE_MAX_LEVELS];
> };
>
> and then do something in the walker like:
>
> if (cb && !cb(pte, lvl))
> walk_data->ptes[lvl] = pte;
>
So thinking about this some more... if I use a walk_data struct to
return the PTEs, I can just get rid of the callback entirely. That
ends up looking more like my first version. The callback taking
void* was mainly to avoid coding the PTE size in the generic
io_pgtable interface. But if we just go with u64, because that is the
biggest PTE size we need to deal with, then it all gets simpler. (The
callback was actually a semi-awkward interface to use from drm/msm.)
BR,
-R
> which could return the physical address at the end, if it reaches a leaf
> entry. That way arm_lpae_iova_to_phys() is just passing a NULL callback
> to the walker and your debug callback just needs to return 0 (i.e. the
> callback is basically just saying whether or not to continue the walk).
>
> Will
On Mon, Jun 24, 2024 at 8:14 AM Will Deacon <will@kernel.org> wrote:
>
> On Thu, May 23, 2024 at 10:52:21AM -0700, Rob Clark wrote:
> > From: Rob Clark <robdclark@chromium.org>
> >
> > Add an io-pgtable method to walk the pgtable returning the raw PTEs that
> > would be traversed for a given iova access.
> >
> > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > ---
> > drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------
> > include/linux/io-pgtable.h | 4 +++
> > 2 files changed, 46 insertions(+), 9 deletions(-)
> >
> > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> > index f7828a7aad41..f47a0e64bb35 100644
> > --- a/drivers/iommu/io-pgtable-arm.c
> > +++ b/drivers/iommu/io-pgtable-arm.c
> > @@ -693,17 +693,19 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov
> > data->start_level, ptep);
> > }
> >
> > -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> > - unsigned long iova)
> > +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova,
> > + int (*cb)(void *cb_data, void *pte, int level),
> > + void *cb_data)
> > {
> > struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> > arm_lpae_iopte pte, *ptep = data->pgd;
> > int lvl = data->start_level;
> > + int ret;
> >
> > do {
> > /* Valid IOPTE pointer? */
> > if (!ptep)
> > - return 0;
> > + return -EFAULT;
>
> nit: -ENOENT might be a little better, as we're only checking against a
> NULL entry rather than strictly any faulting entry.
>
> > /* Grab the IOPTE we're interested in */
> > ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
> > @@ -711,22 +713,52 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> >
> > /* Valid entry? */
> > if (!pte)
> > - return 0;
> > + return -EFAULT;
>
> Same here (and at the end of the function).
>
> > +
> > + ret = cb(cb_data, &pte, lvl);
>
> Since pte is on the stack, rather than pointing into the actual pgtable,
> I think it would be clearer to pass it by value to the callback.
fwiw, I passed it as a void* to avoid the pte size.. although I guess
it could be a union of all the possible pte types
BR,
-R
>
> > + if (ret)
> > + return ret;
> >
> > - /* Leaf entry? */
> > + /* Leaf entry? If so, we've found the translation */
> > if (iopte_leaf(pte, lvl, data->iop.fmt))
> > - goto found_translation;
> > + return 0;
> >
> > /* Take it to the next level */
> > ptep = iopte_deref(pte, data);
> > } while (++lvl < ARM_LPAE_MAX_LEVELS);
> >
> > /* Ran out of page tables to walk */
> > + return -EFAULT;
> > +}
> > +
> > +struct iova_to_phys_walk_data {
> > + arm_lpae_iopte pte;
> > + int level;
> > +};
>
> Expanding a little on Robin's suggestion, why don't we drop this structure
> in favour of something more generic:
>
> struct arm_lpae_walk_data {
> arm_lpae_iopte ptes[ARM_LPAE_MAX_LEVELS];
> };
>
> and then do something in the walker like:
>
> if (cb && !cb(pte, lvl))
> walk_data->ptes[lvl] = pte;
>
> which could return the physical address at the end, if it reaches a leaf
> entry. That way arm_lpae_iova_to_phys() is just passing a NULL callback
> to the walker and your debug callback just needs to return 0 (i.e. the
> callback is basically just saying whether or not to continue the walk).
>
> Will
On Mon, Jun 24, 2024 at 08:37:26AM -0700, Rob Clark wrote:
> On Mon, Jun 24, 2024 at 8:14 AM Will Deacon <will@kernel.org> wrote:
> >
> > On Thu, May 23, 2024 at 10:52:21AM -0700, Rob Clark wrote:
> > > From: Rob Clark <robdclark@chromium.org>
> > >
> > > Add an io-pgtable method to walk the pgtable returning the raw PTEs that
> > > would be traversed for a given iova access.
> > >
> > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > ---
> > > drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------
> > > include/linux/io-pgtable.h | 4 +++
> > > 2 files changed, 46 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> > > index f7828a7aad41..f47a0e64bb35 100644
> > > --- a/drivers/iommu/io-pgtable-arm.c
> > > +++ b/drivers/iommu/io-pgtable-arm.c
> > > @@ -693,17 +693,19 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov
> > > data->start_level, ptep);
> > > }
> > >
> > > -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> > > - unsigned long iova)
> > > +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova,
> > > + int (*cb)(void *cb_data, void *pte, int level),
> > > + void *cb_data)
> > > {
> > > struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> > > arm_lpae_iopte pte, *ptep = data->pgd;
> > > int lvl = data->start_level;
> > > + int ret;
> > >
> > > do {
> > > /* Valid IOPTE pointer? */
> > > if (!ptep)
> > > - return 0;
> > > + return -EFAULT;
> >
> > nit: -ENOENT might be a little better, as we're only checking against a
> > NULL entry rather than strictly any faulting entry.
> >
> > > /* Grab the IOPTE we're interested in */
> > > ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
> > > @@ -711,22 +713,52 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> > >
> > > /* Valid entry? */
> > > if (!pte)
> > > - return 0;
> > > + return -EFAULT;
> >
> > Same here (and at the end of the function).
> >
> > > +
> > > + ret = cb(cb_data, &pte, lvl);
> >
> > Since pte is on the stack, rather than pointing into the actual pgtable,
> > I think it would be clearer to pass it by value to the callback.
>
> fwiw, I passed it as a void* to avoid the pte size.. although I guess
> it could be a union of all the possible pte types
Can you just get away with a u64?
Will
On Tue, Jun 25, 2024 at 4:27 AM Will Deacon <will@kernel.org> wrote:
>
> On Mon, Jun 24, 2024 at 08:37:26AM -0700, Rob Clark wrote:
> > On Mon, Jun 24, 2024 at 8:14 AM Will Deacon <will@kernel.org> wrote:
> > >
> > > On Thu, May 23, 2024 at 10:52:21AM -0700, Rob Clark wrote:
> > > > From: Rob Clark <robdclark@chromium.org>
> > > >
> > > > Add an io-pgtable method to walk the pgtable returning the raw PTEs that
> > > > would be traversed for a given iova access.
> > > >
> > > > Signed-off-by: Rob Clark <robdclark@chromium.org>
> > > > ---
> > > > drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------
> > > > include/linux/io-pgtable.h | 4 +++
> > > > 2 files changed, 46 insertions(+), 9 deletions(-)
> > > >
> > > > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> > > > index f7828a7aad41..f47a0e64bb35 100644
> > > > --- a/drivers/iommu/io-pgtable-arm.c
> > > > +++ b/drivers/iommu/io-pgtable-arm.c
> > > > @@ -693,17 +693,19 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov
> > > > data->start_level, ptep);
> > > > }
> > > >
> > > > -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> > > > - unsigned long iova)
> > > > +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova,
> > > > + int (*cb)(void *cb_data, void *pte, int level),
> > > > + void *cb_data)
> > > > {
> > > > struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> > > > arm_lpae_iopte pte, *ptep = data->pgd;
> > > > int lvl = data->start_level;
> > > > + int ret;
> > > >
> > > > do {
> > > > /* Valid IOPTE pointer? */
> > > > if (!ptep)
> > > > - return 0;
> > > > + return -EFAULT;
> > >
> > > nit: -ENOENT might be a little better, as we're only checking against a
> > > NULL entry rather than strictly any faulting entry.
> > >
> > > > /* Grab the IOPTE we're interested in */
> > > > ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
> > > > @@ -711,22 +713,52 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> > > >
> > > > /* Valid entry? */
> > > > if (!pte)
> > > > - return 0;
> > > > + return -EFAULT;
> > >
> > > Same here (and at the end of the function).
> > >
> > > > +
> > > > + ret = cb(cb_data, &pte, lvl);
> > >
> > > Since pte is on the stack, rather than pointing into the actual pgtable,
> > > I think it would be clearer to pass it by value to the callback.
> >
> > fwiw, I passed it as a void* to avoid the pte size.. although I guess
> > it could be a union of all the possible pte types
>
> Can you just get away with a u64?
yeah, that wfm if you're ok with it
BR,
-R
On 23/05/2024 6:52 pm, Rob Clark wrote:
> From: Rob Clark <robdclark@chromium.org>
>
> Add an io-pgtable method to walk the pgtable returning the raw PTEs that
> would be traversed for a given iova access.
Have to say I'm a little torn here - with my iommu-dma hat on I'm not
super enthusiastic about adding any more overhead to iova_to_phys, but
in terms of maintaining io-pgtable I do like the overall shape of the
implementation...
Will, how much would you hate a compromise of inlining iova_to_phys as
the default walk behaviour if cb is NULL? :)
That said, looking at the unmap figures for dma_map_benchmark on a
Neoverse N1, any difference I think I see is still well within the
noise, so maybe a handful of extra indirect calls isn't really enough to
worry about?
Cheers,
Robin.
> Signed-off-by: Rob Clark <robdclark@chromium.org>
> ---
> drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------
> include/linux/io-pgtable.h | 4 +++
> 2 files changed, 46 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
> index f7828a7aad41..f47a0e64bb35 100644
> --- a/drivers/iommu/io-pgtable-arm.c
> +++ b/drivers/iommu/io-pgtable-arm.c
> @@ -693,17 +693,19 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov
> data->start_level, ptep);
> }
>
> -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> - unsigned long iova)
> +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, unsigned long iova,
> + int (*cb)(void *cb_data, void *pte, int level),
> + void *cb_data)
> {
> struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> arm_lpae_iopte pte, *ptep = data->pgd;
> int lvl = data->start_level;
> + int ret;
>
> do {
> /* Valid IOPTE pointer? */
> if (!ptep)
> - return 0;
> + return -EFAULT;
>
> /* Grab the IOPTE we're interested in */
> ptep += ARM_LPAE_LVL_IDX(iova, lvl, data);
> @@ -711,22 +713,52 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
>
> /* Valid entry? */
> if (!pte)
> - return 0;
> + return -EFAULT;
> +
> + ret = cb(cb_data, &pte, lvl);
> + if (ret)
> + return ret;
>
> - /* Leaf entry? */
> + /* Leaf entry? If so, we've found the translation */
> if (iopte_leaf(pte, lvl, data->iop.fmt))
> - goto found_translation;
> + return 0;
>
> /* Take it to the next level */
> ptep = iopte_deref(pte, data);
> } while (++lvl < ARM_LPAE_MAX_LEVELS);
>
> /* Ran out of page tables to walk */
> + return -EFAULT;
> +}
> +
> +struct iova_to_phys_walk_data {
> + arm_lpae_iopte pte;
> + int level;
> +};
> +
> +static int iova_to_phys_walk_cb(void *cb_data, void *pte, int level)
> +{
> + struct iova_to_phys_walk_data *d = cb_data;
> +
> + d->pte = *(arm_lpae_iopte *)pte;
> + d->level = level;
> +
> return 0;
> +}
> +
> +static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops,
> + unsigned long iova)
> +{
> + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops);
> + struct iova_to_phys_walk_data d;
> + int ret;
> +
> + ret = arm_lpae_pgtable_walk(ops, iova, iova_to_phys_walk_cb, &d);
> + if (ret)
> + return 0;
>
> -found_translation:
> - iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1);
> - return iopte_to_paddr(pte, data) | iova;
> + iova &= (ARM_LPAE_BLOCK_SIZE(d.level, data) - 1);
> + return iopte_to_paddr(d.pte, data) | iova;
> }
>
> static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg)
> @@ -807,6 +839,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg)
> .map_pages = arm_lpae_map_pages,
> .unmap_pages = arm_lpae_unmap_pages,
> .iova_to_phys = arm_lpae_iova_to_phys,
> + .pgtable_walk = arm_lpae_pgtable_walk,
> };
>
> return data;
> diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
> index 86cf1f7ae389..261b48af068a 100644
> --- a/include/linux/io-pgtable.h
> +++ b/include/linux/io-pgtable.h
> @@ -177,6 +177,7 @@ struct io_pgtable_cfg {
> * @map_pages: Map a physically contiguous range of pages of the same size.
> * @unmap_pages: Unmap a range of virtually contiguous pages of the same size.
> * @iova_to_phys: Translate iova to physical address.
> + * @pgtable_walk: (optional) Perform a page table walk for a given iova.
> *
> * These functions map directly onto the iommu_ops member functions with
> * the same names.
> @@ -190,6 +191,9 @@ struct io_pgtable_ops {
> struct iommu_iotlb_gather *gather);
> phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops,
> unsigned long iova);
> + int (*pgtable_walk)(struct io_pgtable_ops *ops, unsigned long iova,
> + int (*cb)(void *cb_data, void *pte, int level),
> + void *cb_data);
> int (*read_and_clear_dirty)(struct io_pgtable_ops *ops,
> unsigned long iova, size_t size,
> unsigned long flags,
On Mon, Jun 17, 2024 at 04:13:41PM +0100, Robin Murphy wrote: > On 23/05/2024 6:52 pm, Rob Clark wrote: > > From: Rob Clark <robdclark@chromium.org> > > > > Add an io-pgtable method to walk the pgtable returning the raw PTEs that > > would be traversed for a given iova access. > > Have to say I'm a little torn here - with my iommu-dma hat on I'm not super > enthusiastic about adding any more overhead to iova_to_phys, but in terms of > maintaining io-pgtable I do like the overall shape of the implementation... If you mark arm_lpae_pgtable_walk() and the callbacks as __always_inline then the compiler should generate the same code as today for arm_lpae_iova_to_phys(). Jason
On Thu, May 23, 2024 at 10:52:21AM -0700, Rob Clark wrote: > From: Rob Clark <robdclark@chromium.org> > > Add an io-pgtable method to walk the pgtable returning the raw PTEs that > would be traversed for a given iova access. > > Signed-off-by: Rob Clark <robdclark@chromium.org> > --- > drivers/iommu/io-pgtable-arm.c | 51 ++++++++++++++++++++++++++++------ > include/linux/io-pgtable.h | 4 +++ > 2 files changed, 46 insertions(+), 9 deletions(-) Acked-by: Joerg Roedel <jroedel@suse.de>
© 2016 - 2026 Red Hat, Inc.