Using the new calls, use an atomic refcount to track how many times
a page is mapped in any of the IOMMUs.
For unmap we need to use iova_to_phys() to get the physical address
of the pages.
We use the smallest supported page size as the granularity of tracking
per domain.
This is important as it possible to map pages and unmap them with
larger sizes (as in map_sg()) cases.
Signed-off-by: Mostafa Saleh <smostafa@google.com>
Tested-by: Qinxin Xia <xiaqinxin@huawei.com>
---
drivers/iommu/iommu-debug-pagealloc.c | 74 +++++++++++++++++++++++++++
1 file changed, 74 insertions(+)
diff --git a/drivers/iommu/iommu-debug-pagealloc.c b/drivers/iommu/iommu-debug-pagealloc.c
index a6a2f844b09d..0e14104b971c 100644
--- a/drivers/iommu/iommu-debug-pagealloc.c
+++ b/drivers/iommu/iommu-debug-pagealloc.c
@@ -27,16 +27,90 @@ struct page_ext_operations page_iommu_debug_ops = {
.need = need_iommu_debug,
};
+static struct page_ext *get_iommu_page_ext(phys_addr_t phys)
+{
+ struct page *page = phys_to_page(phys);
+ struct page_ext *page_ext = page_ext_get(page);
+
+ return page_ext;
+}
+
+static struct iommu_debug_metadate *get_iommu_data(struct page_ext *page_ext)
+{
+ return page_ext_data(page_ext, &page_iommu_debug_ops);
+}
+
+static void iommu_debug_inc_page(phys_addr_t phys)
+{
+ struct page_ext *page_ext = get_iommu_page_ext(phys);
+ struct iommu_debug_metadate *d = get_iommu_data(page_ext);
+
+ WARN_ON(atomic_inc_return(&d->ref) <= 0);
+ page_ext_put(page_ext);
+}
+
+static void iommu_debug_dec_page(phys_addr_t phys)
+{
+ struct page_ext *page_ext = get_iommu_page_ext(phys);
+ struct iommu_debug_metadate *d = get_iommu_data(page_ext);
+
+ WARN_ON(atomic_dec_return(&d->ref) < 0);
+ page_ext_put(page_ext);
+}
+
+/*
+ * IOMMU page size might not match the CPU page size, in that case, we use
+ * the smallest IOMMU page size to refcount the pages in the vmemmap.
+ * That is important as both map and unmap has to use the same page size
+ * to update the refcount to avoid double counting the same page.
+ * And as we can't know from iommu_unmap() what was the original page size
+ * used for map, we just use the minimum supported one for both.
+ */
+static size_t iommu_debug_page_size(struct iommu_domain *domain)
+{
+ return 1UL << __ffs(domain->pgsize_bitmap);
+}
+
void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size)
{
+ size_t off;
+ size_t page_size = iommu_debug_page_size(domain);
+
+ for (off = 0 ; off < size ; off += page_size) {
+ if (!pfn_valid(__phys_to_pfn(phys + off)))
+ continue;
+ iommu_debug_inc_page(phys + off);
+ }
}
void __iommu_debug_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
{
+ size_t off;
+ size_t page_size = iommu_debug_page_size(domain);
+
+ for (off = 0 ; off < size ; off += page_size) {
+ phys_addr_t phys = iommu_iova_to_phys(domain, iova + off);
+
+ if (!phys || !pfn_valid(__phys_to_pfn(phys + off)))
+ continue;
+
+ iommu_debug_dec_page(phys);
+ }
}
void __iommu_debug_remap(struct iommu_domain *domain, unsigned long iova, size_t size)
{
+ size_t off;
+ size_t page_size = iommu_debug_page_size(domain);
+
+ for (off = 0 ; off < size ; off += page_size) {
+ phys_addr_t phys = iommu_iova_to_phys(domain, iova + off);
+
+ if (!phys || !pfn_valid(__phys_to_pfn(phys + off)))
+ continue;
+
+ iommu_debug_inc_page(phys);
+ }
}
void iommu_debug_init(void)
--
2.51.2.1026.g39e6a42477-goog
On Thu, Nov 06, 2025 at 04:39:52PM +0000, Mostafa Saleh wrote:
> Using the new calls, use an atomic refcount to track how many times
> a page is mapped in any of the IOMMUs.
>
> For unmap we need to use iova_to_phys() to get the physical address
> of the pages.
>
> We use the smallest supported page size as the granularity of tracking
> per domain.
> This is important as it possible to map pages and unmap them with
> larger sizes (as in map_sg()) cases.
>
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> Tested-by: Qinxin Xia <xiaqinxin@huawei.com>
> ---
> drivers/iommu/iommu-debug-pagealloc.c | 74 +++++++++++++++++++++++++++
> 1 file changed, 74 insertions(+)
>
> diff --git a/drivers/iommu/iommu-debug-pagealloc.c b/drivers/iommu/iommu-debug-pagealloc.c
> index a6a2f844b09d..0e14104b971c 100644
> --- a/drivers/iommu/iommu-debug-pagealloc.c
> +++ b/drivers/iommu/iommu-debug-pagealloc.c
> @@ -27,16 +27,90 @@ struct page_ext_operations page_iommu_debug_ops = {
> .need = need_iommu_debug,
> };
>
> +static struct page_ext *get_iommu_page_ext(phys_addr_t phys)
> +{
> + struct page *page = phys_to_page(phys);
> + struct page_ext *page_ext = page_ext_get(page);
> +
> + return page_ext;
> +}
> +
> +static struct iommu_debug_metadate *get_iommu_data(struct page_ext *page_ext)
> +{
> + return page_ext_data(page_ext, &page_iommu_debug_ops);
> +}
> +
> +static void iommu_debug_inc_page(phys_addr_t phys)
> +{
> + struct page_ext *page_ext = get_iommu_page_ext(phys);
> + struct iommu_debug_metadate *d = get_iommu_data(page_ext);
> +
> + WARN_ON(atomic_inc_return(&d->ref) <= 0);
Is it worth dumping some information about the page in addition to the
WARN_ON()? That way, you might be able to benefit from other debug
options (e.g. PAGE_OWNER) if they are enabled.
> + page_ext_put(page_ext);
> +}
> +
> +static void iommu_debug_dec_page(phys_addr_t phys)
> +{
> + struct page_ext *page_ext = get_iommu_page_ext(phys);
> + struct iommu_debug_metadate *d = get_iommu_data(page_ext);
> +
> + WARN_ON(atomic_dec_return(&d->ref) < 0);
nit: I can't see why you need memory ordering guarantees for the refcount,
so you could use the relaxed variants for the inc/dec operations.
> + page_ext_put(page_ext);
> +}
> +
> +/*
> + * IOMMU page size might not match the CPU page size, in that case, we use
> + * the smallest IOMMU page size to refcount the pages in the vmemmap.
> + * That is important as both map and unmap has to use the same page size
> + * to update the refcount to avoid double counting the same page.
> + * And as we can't know from iommu_unmap() what was the original page size
> + * used for map, we just use the minimum supported one for both.
> + */
> +static size_t iommu_debug_page_size(struct iommu_domain *domain)
> +{
> + return 1UL << __ffs(domain->pgsize_bitmap);
> +}
> +
> void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size)
> {
> + size_t off;
> + size_t page_size = iommu_debug_page_size(domain);
Since this is a debug feature, is it worth checking other properties of
the arguments too? For example, that phys is non-zero and that phys +
size doesn't overflow?
> + for (off = 0 ; off < size ; off += page_size) {
> + if (!pfn_valid(__phys_to_pfn(phys + off)))
> + continue;
> + iommu_debug_inc_page(phys + off);
> + }
> }
>
> void __iommu_debug_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
> {
> + size_t off;
> + size_t page_size = iommu_debug_page_size(domain);
> +
> + for (off = 0 ; off < size ; off += page_size) {
> + phys_addr_t phys = iommu_iova_to_phys(domain, iova + off);
> +
> + if (!phys || !pfn_valid(__phys_to_pfn(phys + off)))
> + continue;
Hmm, it looks weird to add 'off' to both 'iova' _and_ the resulting
physical address. Is that correct?
> + iommu_debug_dec_page(phys);
> + }
> }
>
> void __iommu_debug_remap(struct iommu_domain *domain, unsigned long iova, size_t size)
> {
> + size_t off;
> + size_t page_size = iommu_debug_page_size(domain);
> +
> + for (off = 0 ; off < size ; off += page_size) {
> + phys_addr_t phys = iommu_iova_to_phys(domain, iova + off);
> +
> + if (!phys || !pfn_valid(__phys_to_pfn(phys + off)))
> + continue;
> +
> + iommu_debug_inc_page(phys);
> + }
You can make the bulk of this code common with the unmap function.
Will
On Thu, Nov 13, 2025 at 11:00:29AM +0000, Will Deacon wrote:
> On Thu, Nov 06, 2025 at 04:39:52PM +0000, Mostafa Saleh wrote:
> > Using the new calls, use an atomic refcount to track how many times
> > a page is mapped in any of the IOMMUs.
> >
> > For unmap we need to use iova_to_phys() to get the physical address
> > of the pages.
> >
> > We use the smallest supported page size as the granularity of tracking
> > per domain.
> > This is important as it possible to map pages and unmap them with
> > larger sizes (as in map_sg()) cases.
> >
> > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > Tested-by: Qinxin Xia <xiaqinxin@huawei.com>
> > ---
> > drivers/iommu/iommu-debug-pagealloc.c | 74 +++++++++++++++++++++++++++
> > 1 file changed, 74 insertions(+)
> >
> > diff --git a/drivers/iommu/iommu-debug-pagealloc.c b/drivers/iommu/iommu-debug-pagealloc.c
> > index a6a2f844b09d..0e14104b971c 100644
> > --- a/drivers/iommu/iommu-debug-pagealloc.c
> > +++ b/drivers/iommu/iommu-debug-pagealloc.c
> > @@ -27,16 +27,90 @@ struct page_ext_operations page_iommu_debug_ops = {
> > .need = need_iommu_debug,
> > };
> >
> > +static struct page_ext *get_iommu_page_ext(phys_addr_t phys)
> > +{
> > + struct page *page = phys_to_page(phys);
> > + struct page_ext *page_ext = page_ext_get(page);
> > +
> > + return page_ext;
> > +}
> > +
> > +static struct iommu_debug_metadate *get_iommu_data(struct page_ext *page_ext)
> > +{
> > + return page_ext_data(page_ext, &page_iommu_debug_ops);
> > +}
> > +
> > +static void iommu_debug_inc_page(phys_addr_t phys)
> > +{
> > + struct page_ext *page_ext = get_iommu_page_ext(phys);
> > + struct iommu_debug_metadate *d = get_iommu_data(page_ext);
> > +
> > + WARN_ON(atomic_inc_return(&d->ref) <= 0);
>
> Is it worth dumping some information about the page in addition to the
> WARN_ON()? That way, you might be able to benefit from other debug
> options (e.g. PAGE_OWNER) if they are enabled.
These WARN_ON are for overflows, which should never happen.
I initially thought about using the refcount_t, but it didn’t seem
suitable as refcount_add() expects that the refcount is already “1”
indicating that an object was already created which doesn’t fit
in the semantics of what this is. Similar for refcount_dec().
In the next patch there is a WARN_ON for the refcount check
to capture the mis-behaving context, I will add a debug print with
the leaked physical address in that case as this is the important one.
>
> > + page_ext_put(page_ext);
> > +}
> > +
> > +static void iommu_debug_dec_page(phys_addr_t phys)
> > +{
> > + struct page_ext *page_ext = get_iommu_page_ext(phys);
> > + struct iommu_debug_metadate *d = get_iommu_data(page_ext);
> > +
> > + WARN_ON(atomic_dec_return(&d->ref) < 0);
>
> nit: I can't see why you need memory ordering guarantees for the refcount,
> so you could use the relaxed variants for the inc/dec operations.
Will do.
>
> > + page_ext_put(page_ext);
> > +}
> > +
> > +/*
> > + * IOMMU page size might not match the CPU page size, in that case, we use
> > + * the smallest IOMMU page size to refcount the pages in the vmemmap.
> > + * That is important as both map and unmap has to use the same page size
> > + * to update the refcount to avoid double counting the same page.
> > + * And as we can't know from iommu_unmap() what was the original page size
> > + * used for map, we just use the minimum supported one for both.
> > + */
> > +static size_t iommu_debug_page_size(struct iommu_domain *domain)
> > +{
> > + return 1UL << __ffs(domain->pgsize_bitmap);
> > +}
> > +
> > void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size)
> > {
> > + size_t off;
> > + size_t page_size = iommu_debug_page_size(domain);
>
> Since this is a debug feature, is it worth checking other properties of
> the arguments too? For example, that phys is non-zero and that phys +
> size doesn't overflow?
>
Makes sense, I will add some more checks.
> > + for (off = 0 ; off < size ; off += page_size) {
> > + if (!pfn_valid(__phys_to_pfn(phys + off)))
> > + continue;
> > + iommu_debug_inc_page(phys + off);
> > + }
> > }
> >
> > void __iommu_debug_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
> > {
> > + size_t off;
> > + size_t page_size = iommu_debug_page_size(domain);
> > +
> > + for (off = 0 ; off < size ; off += page_size) {
> > + phys_addr_t phys = iommu_iova_to_phys(domain, iova + off);
> > +
> > + if (!phys || !pfn_valid(__phys_to_pfn(phys + off)))
> > + continue;
>
> Hmm, it looks weird to add 'off' to both 'iova' _and_ the resulting
> physical address. Is that correct?
>
Yes, that's a bug, I will fix it.
> > + iommu_debug_dec_page(phys);
> > + }
> > }
> >
> > void __iommu_debug_remap(struct iommu_domain *domain, unsigned long iova, size_t size)
> > {
> > + size_t off;
> > + size_t page_size = iommu_debug_page_size(domain);
> > +
> > + for (off = 0 ; off < size ; off += page_size) {
> > + phys_addr_t phys = iommu_iova_to_phys(domain, iova + off);
> > +
> > + if (!phys || !pfn_valid(__phys_to_pfn(phys + off)))
> > + continue;
> > +
> > + iommu_debug_inc_page(phys);
> > + }
>
> You can make the bulk of this code common with the unmap function.
Will do.
Thank,
Mostafa
>
> Will
On Mon, Nov 24, 2025 at 12:37:31PM +0000, Mostafa Saleh wrote:
> On Thu, Nov 13, 2025 at 11:00:29AM +0000, Will Deacon wrote:
> > On Thu, Nov 06, 2025 at 04:39:52PM +0000, Mostafa Saleh wrote:
> > > Using the new calls, use an atomic refcount to track how many times
> > > a page is mapped in any of the IOMMUs.
> > >
> > > For unmap we need to use iova_to_phys() to get the physical address
> > > of the pages.
> > >
> > > We use the smallest supported page size as the granularity of tracking
> > > per domain.
> > > This is important as it possible to map pages and unmap them with
> > > larger sizes (as in map_sg()) cases.
> > >
> > > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > > Tested-by: Qinxin Xia <xiaqinxin@huawei.com>
> > > ---
> > > drivers/iommu/iommu-debug-pagealloc.c | 74 +++++++++++++++++++++++++++
> > > 1 file changed, 74 insertions(+)
> > >
> > > diff --git a/drivers/iommu/iommu-debug-pagealloc.c b/drivers/iommu/iommu-debug-pagealloc.c
> > > index a6a2f844b09d..0e14104b971c 100644
> > > --- a/drivers/iommu/iommu-debug-pagealloc.c
> > > +++ b/drivers/iommu/iommu-debug-pagealloc.c
> > > @@ -27,16 +27,90 @@ struct page_ext_operations page_iommu_debug_ops = {
> > > .need = need_iommu_debug,
> > > };
> > >
> > > +static struct page_ext *get_iommu_page_ext(phys_addr_t phys)
> > > +{
> > > + struct page *page = phys_to_page(phys);
> > > + struct page_ext *page_ext = page_ext_get(page);
> > > +
> > > + return page_ext;
> > > +}
> > > +
> > > +static struct iommu_debug_metadate *get_iommu_data(struct page_ext *page_ext)
> > > +{
> > > + return page_ext_data(page_ext, &page_iommu_debug_ops);
> > > +}
> > > +
> > > +static void iommu_debug_inc_page(phys_addr_t phys)
> > > +{
> > > + struct page_ext *page_ext = get_iommu_page_ext(phys);
> > > + struct iommu_debug_metadate *d = get_iommu_data(page_ext);
> > > +
> > > + WARN_ON(atomic_inc_return(&d->ref) <= 0);
> >
> > Is it worth dumping some information about the page in addition to the
> > WARN_ON()? That way, you might be able to benefit from other debug
> > options (e.g. PAGE_OWNER) if they are enabled.
>
> These WARN_ON are for overflows, which should never happen.
> I initially thought about using the refcount_t, but it didn’t seem
> suitable as refcount_add() expects that the refcount is already “1”
> indicating that an object was already created which doesn’t fit
> in the semantics of what this is. Similar for refcount_dec().
>
> In the next patch there is a WARN_ON for the refcount check
> to capture the mis-behaving context, I will add a debug print with
> the leaked physical address in that case as this is the important one.
I was thinking specifically about calling dump_page_owner().
Will
On Mon, Nov 24, 2025 at 03:35:47PM +0000, Will Deacon wrote:
> On Mon, Nov 24, 2025 at 12:37:31PM +0000, Mostafa Saleh wrote:
> > On Thu, Nov 13, 2025 at 11:00:29AM +0000, Will Deacon wrote:
> > > On Thu, Nov 06, 2025 at 04:39:52PM +0000, Mostafa Saleh wrote:
> > > > Using the new calls, use an atomic refcount to track how many times
> > > > a page is mapped in any of the IOMMUs.
> > > >
> > > > For unmap we need to use iova_to_phys() to get the physical address
> > > > of the pages.
> > > >
> > > > We use the smallest supported page size as the granularity of tracking
> > > > per domain.
> > > > This is important as it possible to map pages and unmap them with
> > > > larger sizes (as in map_sg()) cases.
> > > >
> > > > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > > > Tested-by: Qinxin Xia <xiaqinxin@huawei.com>
> > > > ---
> > > > drivers/iommu/iommu-debug-pagealloc.c | 74 +++++++++++++++++++++++++++
> > > > 1 file changed, 74 insertions(+)
> > > >
> > > > diff --git a/drivers/iommu/iommu-debug-pagealloc.c b/drivers/iommu/iommu-debug-pagealloc.c
> > > > index a6a2f844b09d..0e14104b971c 100644
> > > > --- a/drivers/iommu/iommu-debug-pagealloc.c
> > > > +++ b/drivers/iommu/iommu-debug-pagealloc.c
> > > > @@ -27,16 +27,90 @@ struct page_ext_operations page_iommu_debug_ops = {
> > > > .need = need_iommu_debug,
> > > > };
> > > >
> > > > +static struct page_ext *get_iommu_page_ext(phys_addr_t phys)
> > > > +{
> > > > + struct page *page = phys_to_page(phys);
> > > > + struct page_ext *page_ext = page_ext_get(page);
> > > > +
> > > > + return page_ext;
> > > > +}
> > > > +
> > > > +static struct iommu_debug_metadate *get_iommu_data(struct page_ext *page_ext)
> > > > +{
> > > > + return page_ext_data(page_ext, &page_iommu_debug_ops);
> > > > +}
> > > > +
> > > > +static void iommu_debug_inc_page(phys_addr_t phys)
> > > > +{
> > > > + struct page_ext *page_ext = get_iommu_page_ext(phys);
> > > > + struct iommu_debug_metadate *d = get_iommu_data(page_ext);
> > > > +
> > > > + WARN_ON(atomic_inc_return(&d->ref) <= 0);
> > >
> > > Is it worth dumping some information about the page in addition to the
> > > WARN_ON()? That way, you might be able to benefit from other debug
> > > options (e.g. PAGE_OWNER) if they are enabled.
> >
> > These WARN_ON are for overflows, which should never happen.
> > I initially thought about using the refcount_t, but it didn’t seem
> > suitable as refcount_add() expects that the refcount is already “1”
> > indicating that an object was already created which doesn’t fit
> > in the semantics of what this is. Similar for refcount_dec().
> >
> > In the next patch there is a WARN_ON for the refcount check
> > to capture the mis-behaving context, I will add a debug print with
> > the leaked physical address in that case as this is the important one.
>
> I was thinking specifically about calling dump_page_owner().
I see, that makes sense.
Thanks,
Mostafa
>
> Will
© 2016 - 2025 Red Hat, Inc.