From: PanJason <pyyjason@gmail.com>
This patch adds support for damos_stat in virtual address space.
It leverages the walk_page_range to walk the page table and gets
the folio from page table. The last folio scanned is stored in
damos->last_applied to prevent double counting.
---
mm/damon/vaddr.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 112 insertions(+), 1 deletion(-)
diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c
index 87e825349bdf..3e319b51cfd4 100644
--- a/mm/damon/vaddr.c
+++ b/mm/damon/vaddr.c
@@ -890,6 +890,117 @@ static unsigned long damos_va_migrate(struct damon_target *target,
return applied * PAGE_SIZE;
}
+struct damos_va_stat_private {
+ struct damos *scheme;
+ unsigned long *sz_filter_passed;
+};
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct damos_va_stat_private *priv = walk->private;
+ struct damos *s = priv->scheme;
+ unsigned long *sz_filter_passed = priv->sz_filter_passed;
+ struct folio *folio;
+ spinlock_t *ptl;
+ pmd_t pmde;
+
+ ptl = pmd_lock(walk->mm, pmd);
+ pmde = pmdp_get(pmd);
+
+ if (!pmd_present(pmde) || !pmd_trans_huge(pmde))
+ goto unlock;
+
+ /* Tell page walk code to not split the PMD */
+ walk->action = ACTION_CONTINUE;
+
+ folio = damon_get_folio(pmd_pfn(pmde));
+ if (!folio)
+ goto unlock;
+
+ if (damon_invalid_damos_folio(folio, s))
+ goto update_last_applied;
+
+ if (!damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)){
+ *sz_filter_passed += folio_size(folio);
+ }
+
+ folio_put(folio);
+update_last_applied:
+ s->last_applied = folio;
+unlock:
+ spin_unlock(ptl);
+ return 0;
+}
+#else
+#define damon_va_stat_pmd_entry NULL
+#endif
+
+static int damos_va_stat_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long next, struct mm_walk *walk)
+{
+ struct damos_va_stat_private *priv = walk->private;
+ struct damos *s = priv->scheme;
+ unsigned long *sz_filter_passed = priv->sz_filter_passed;
+ struct folio *folio;
+ pte_t ptent;
+
+ ptent = ptep_get(pte);
+ if (pte_none(ptent) || !pte_present(ptent))
+ return 0;
+
+ folio = damon_get_folio(pte_pfn(ptent));
+ if (!folio)
+ return 0;
+
+ if (damon_invalid_damos_folio(folio, s))
+ goto update_last_applied;
+
+ if (!damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL)){
+ *sz_filter_passed += folio_size(folio);
+ }
+
+ folio_put(folio);
+
+update_last_applied:
+ s->last_applied = folio;
+ return 0;
+}
+
+static unsigned long damos_va_stat(struct damon_target *target,
+ struct damon_region *r, struct damos *s,
+ unsigned long *sz_filter_passed)
+{
+
+ struct damos_va_stat_private priv;
+ struct mm_struct *mm;
+ struct mm_walk_ops walk_ops = {
+ .pmd_entry = damos_va_stat_pmd_entry,
+ .pte_entry = damos_va_stat_pte_entry,
+ .walk_lock = PGWALK_RDLOCK,
+ };
+
+ priv.scheme = s;
+ priv.sz_filter_passed = sz_filter_passed;
+
+ if (!damon_scheme_has_filter(s)){
+ return 0;
+ }
+
+ mm = damon_get_mm(target);
+ if (!mm)
+ return 0;
+
+ mmap_read_lock(mm);
+ walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv);
+ mmap_read_unlock(mm);
+ mmput(mm);
+ pr_debug("Call va_stat: %lu\n", *sz_filter_passed);
+ return 0;
+
+}
+
static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
struct damon_target *t, struct damon_region *r,
struct damos *scheme, unsigned long *sz_filter_passed)
@@ -916,7 +1027,7 @@ static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx,
case DAMOS_MIGRATE_COLD:
return damos_va_migrate(t, r, scheme, sz_filter_passed);
case DAMOS_STAT:
- return 0;
+ return damos_va_stat(t, r, scheme, sz_filter_passed);
default:
/*
* DAMOS actions that are not yet supported by 'vaddr'.
--
2.47.3
On Tue, 29 Jul 2025 06:53:30 -0700 Yueyang Pan <pyyjason@gmail.com> wrote: > From: PanJason <pyyjason@gmail.com> > > This patch adds support for damos_stat in virtual address space. > It leverages the walk_page_range to walk the page table and gets > the folio from page table. The last folio scanned is stored in > damos->last_applied to prevent double counting. Please add your Singed-off-by: tag[1] here. [1] https://docs.kernel.org/process/submitting-patches.html#developer-s-certificate-of-origin-1-1 Thanks, SJ [...]
On Tue, 29 Jul 2025 06:53:30 -0700 Yueyang Pan <pyyjason@gmail.com> wrote: > From: PanJason <pyyjason@gmail.com> > > This patch adds support for damos_stat in virtual address space. > It leverages the walk_page_range to walk the page table and gets > the folio from page table. The last folio scanned is stored in > damos->last_applied to prevent double counting. Thank you for this patch, Pan! I left a few comments below. I think those are mostly insignificant change requests, though. > --- > mm/damon/vaddr.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 112 insertions(+), 1 deletion(-) > > diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c > index 87e825349bdf..3e319b51cfd4 100644 > --- a/mm/damon/vaddr.c > +++ b/mm/damon/vaddr.c > @@ -890,6 +890,117 @@ static unsigned long damos_va_migrate(struct damon_target *target, > return applied * PAGE_SIZE; > } > > +struct damos_va_stat_private { > + struct damos *scheme; > + unsigned long *sz_filter_passed; > +}; > + > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, > + unsigned long next, struct mm_walk *walk) > +{ > + struct damos_va_stat_private *priv = walk->private; > + struct damos *s = priv->scheme; > + unsigned long *sz_filter_passed = priv->sz_filter_passed; > + struct folio *folio; > + spinlock_t *ptl; > + pmd_t pmde; > + > + ptl = pmd_lock(walk->mm, pmd); > + pmde = pmdp_get(pmd); > + > + if (!pmd_present(pmde) || !pmd_trans_huge(pmde)) > + goto unlock; > + > + /* Tell page walk code to not split the PMD */ > + walk->action = ACTION_CONTINUE; As David suggested, let's unify this with pte handler following the pattern of madvise_cold_or_pageout_pte_range() and drop above ACTION_CONTINUE code, unless you have different opinions. > + > + folio = damon_get_folio(pmd_pfn(pmde)); As also David suggested, let's use vm_normal_folio_pmd() instead, and drop unnecessary folio_put(). > + if (!folio) > + goto unlock; damon_invalid_damos_folio() returns true if folio is NULL, so I think above check is unnecessary. > + > + if (damon_invalid_damos_folio(folio, s)) > + goto update_last_applied; Because we didn't really apply the DAMOS action, I think it is more proper to goto 'unlock' directly. Oh, and I now realize damon_invalid_damos_folio() puts the folio for none-NULL invalid folio... Because the code is simple, let's implement and use 'va' version invalid_damos_folio(), say, damon_va_invalid_damos_folio(), which doesn't put the folio. > + > + if (!damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)){ > + *sz_filter_passed += folio_size(folio); > + } Let's remove braces for single statement, as suggested[1] by the coding style. > + > + folio_put(folio); > +update_last_applied: > + s->last_applied = folio; > +unlock: > + spin_unlock(ptl); > + return 0; > +} > +#else > +#define damon_va_stat_pmd_entry NULL > +#endif > + > +static int damos_va_stat_pte_entry(pte_t *pte, unsigned long addr, > + unsigned long next, struct mm_walk *walk) > +{ > + struct damos_va_stat_private *priv = walk->private; > + struct damos *s = priv->scheme; > + unsigned long *sz_filter_passed = priv->sz_filter_passed; > + struct folio *folio; > + pte_t ptent; > + > + ptent = ptep_get(pte); > + if (pte_none(ptent) || !pte_present(ptent)) > + return 0; > + > + folio = damon_get_folio(pte_pfn(ptent)); As David suggested, let's use vm_normal_folio() here, and remove below folio_put(). > + if (!folio) > + return 0; As also mentioned above, let's drop above NULL case check, in favor of that in damon_va_invalid_damos_folio(). > + > + if (damon_invalid_damos_folio(folio, s)) > + goto update_last_applied; Again, I don't think we need to update s->last_applied in this case. Let's do only necessary cleanups and return. > + > + if (!damos_va_filter_out(s, folio, walk->vma, addr, pte, NULL)){ > + *sz_filter_passed += folio_size(folio); > + } Let's drop braces for single statement[1]. > + > + folio_put(folio); > + > +update_last_applied: > + s->last_applied = folio; > + return 0; > +} > + > +static unsigned long damos_va_stat(struct damon_target *target, > + struct damon_region *r, struct damos *s, > + unsigned long *sz_filter_passed) > +{ > + Let's remove this unnecessary blank line. > + struct damos_va_stat_private priv; > + struct mm_struct *mm; > + struct mm_walk_ops walk_ops = { > + .pmd_entry = damos_va_stat_pmd_entry, > + .pte_entry = damos_va_stat_pte_entry, > + .walk_lock = PGWALK_RDLOCK, > + }; > + > + priv.scheme = s; > + priv.sz_filter_passed = sz_filter_passed; > + > + if (!damon_scheme_has_filter(s)){ > + return 0; > + } Let's remove braces for single statement[1]. > + > + mm = damon_get_mm(target); > + if (!mm) > + return 0; > + > + mmap_read_lock(mm); > + walk_page_range(mm, r->ar.start, r->ar.end, &walk_ops, &priv); > + mmap_read_unlock(mm); > + mmput(mm); > + pr_debug("Call va_stat: %lu\n", *sz_filter_passed); I don't think we really need this debug log. Can we remove? > + return 0; > + Yet another unnecessary blank line. Let's remove. > +} > + > static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, > struct damon_target *t, struct damon_region *r, > struct damos *scheme, unsigned long *sz_filter_passed) > @@ -916,7 +1027,7 @@ static unsigned long damon_va_apply_scheme(struct damon_ctx *ctx, > case DAMOS_MIGRATE_COLD: > return damos_va_migrate(t, r, scheme, sz_filter_passed); > case DAMOS_STAT: > - return 0; > + return damos_va_stat(t, r, scheme, sz_filter_passed); > default: > /* > * DAMOS actions that are not yet supported by 'vaddr'. > -- > 2.47.3 [1] https://docs.kernel.org/process/coding-style.html Thanks, SJ
On 29.07.25 15:53, Yueyang Pan wrote: > From: PanJason <pyyjason@gmail.com> > > This patch adds support for damos_stat in virtual address space. > It leverages the walk_page_range to walk the page table and gets > the folio from page table. The last folio scanned is stored in > damos->last_applied to prevent double counting. > --- > mm/damon/vaddr.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 112 insertions(+), 1 deletion(-) > > diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c > index 87e825349bdf..3e319b51cfd4 100644 > --- a/mm/damon/vaddr.c > +++ b/mm/damon/vaddr.c > @@ -890,6 +890,117 @@ static unsigned long damos_va_migrate(struct damon_target *target, > return applied * PAGE_SIZE; > } > > +struct damos_va_stat_private { > + struct damos *scheme; > + unsigned long *sz_filter_passed; > +}; > + > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > +static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, > + unsigned long next, struct mm_walk *walk) > +{ > + struct damos_va_stat_private *priv = walk->private; > + struct damos *s = priv->scheme; > + unsigned long *sz_filter_passed = priv->sz_filter_passed; > + struct folio *folio; > + spinlock_t *ptl; > + pmd_t pmde; > + > + ptl = pmd_lock(walk->mm, pmd); > + pmde = pmdp_get(pmd); > + > + if (!pmd_present(pmde) || !pmd_trans_huge(pmde)) > + goto unlock; > + > + /* Tell page walk code to not split the PMD */ > + walk->action = ACTION_CONTINUE; > + > + folio = damon_get_folio(pmd_pfn(pmde)); > + if (!folio) > + goto unlock; > + > + if (damon_invalid_damos_folio(folio, s)) > + goto update_last_applied; > + > + if (!damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)){ > + *sz_filter_passed += folio_size(folio); See my comment below regarding vm_normal_page and folio references. But this split into two handlers is fairly odd. Usually we only have a pmd_entry callback (see madvise_cold_or_pageout_pte_range as an example), and handle !CONFIG_TRANSPARENT_HUGEPAGE in there. Then, there is also no need to mess with ACTION_CONTINUE > + } > + > + folio_put(folio); > +update_last_applied: > + s->last_applied = folio; > +unlock: > + spin_unlock(ptl); > + return 0; > +} > +#else > +#define damon_va_stat_pmd_entry NULL > +#endif > + > +static int damos_va_stat_pte_entry(pte_t *pte, unsigned long addr, > + unsigned long next, struct mm_walk *walk) > +{ > + struct damos_va_stat_private *priv = walk->private; > + struct damos *s = priv->scheme; > + unsigned long *sz_filter_passed = priv->sz_filter_passed; > + struct folio *folio; > + pte_t ptent; > + > + ptent = ptep_get(pte); > + if (pte_none(ptent) || !pte_present(ptent)) > + return 0; > + > + folio = damon_get_folio(pte_pfn(ptent)); > + if (!folio) > + return 0; We have vm_normal_folio() and friends for a reason -- so you don't have to do pte_pfn() manually. ... and now I am confused. We are holding the PTL, so why would you have to grab+put a folio reference here *at all*. -- Cheers, David / dhildenb
On Tue, Jul 29, 2025 at 04:11:32PM +0200, David Hildenbrand wrote: > On 29.07.25 15:53, Yueyang Pan wrote: > > From: PanJason <pyyjason@gmail.com> > > > > This patch adds support for damos_stat in virtual address space. > > It leverages the walk_page_range to walk the page table and gets > > the folio from page table. The last folio scanned is stored in > > damos->last_applied to prevent double counting. > > --- > > mm/damon/vaddr.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 112 insertions(+), 1 deletion(-) > > > > diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c > > index 87e825349bdf..3e319b51cfd4 100644 > > --- a/mm/damon/vaddr.c > > +++ b/mm/damon/vaddr.c > > @@ -890,6 +890,117 @@ static unsigned long damos_va_migrate(struct damon_target *target, > > return applied * PAGE_SIZE; > > } > > +struct damos_va_stat_private { > > + struct damos *scheme; > > + unsigned long *sz_filter_passed; > > +}; > > + > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > > +static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, > > + unsigned long next, struct mm_walk *walk) > > +{ > > + struct damos_va_stat_private *priv = walk->private; > > + struct damos *s = priv->scheme; > > + unsigned long *sz_filter_passed = priv->sz_filter_passed; > > + struct folio *folio; > > + spinlock_t *ptl; > > + pmd_t pmde; > > + > > + ptl = pmd_lock(walk->mm, pmd); > > + pmde = pmdp_get(pmd); > > + > > + if (!pmd_present(pmde) || !pmd_trans_huge(pmde)) > > + goto unlock; > > + > > + /* Tell page walk code to not split the PMD */ > > + walk->action = ACTION_CONTINUE; > > + > > + folio = damon_get_folio(pmd_pfn(pmde)); > > + if (!folio) > > + goto unlock; > > + > > + if (damon_invalid_damos_folio(folio, s)) > > + goto update_last_applied; > > + > > + if (!damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)){ > > + *sz_filter_passed += folio_size(folio); > > See my comment below regarding vm_normal_page and folio references. > > But this split into two handlers is fairly odd. Usually we only have a > pmd_entry callback (see madvise_cold_or_pageout_pte_range as an example), > and handle !CONFIG_TRANSPARENT_HUGEPAGE in there. > > Then, there is also no need to mess with ACTION_CONTINUE > Hi David. Thanks for your comment. I was not aware of the convention so I followed the existing code. I had a discussion with SJ today and in the next version I will change this by combining *pmd_entry() and *pte_entry(). We will also format the existing code in future patches. > > + } > > + > > + folio_put(folio); > > +update_last_applied: > > + s->last_applied = folio; > > +unlock: > > + spin_unlock(ptl); > > + return 0; > > +} > > +#else > > +#define damon_va_stat_pmd_entry NULL > > +#endif > > + > > +static int damos_va_stat_pte_entry(pte_t *pte, unsigned long addr, > > + unsigned long next, struct mm_walk *walk) > > +{ > > + struct damos_va_stat_private *priv = walk->private; > > + struct damos *s = priv->scheme; > > + unsigned long *sz_filter_passed = priv->sz_filter_passed; > > + struct folio *folio; > > + pte_t ptent; > > + > > + ptent = ptep_get(pte); > > + if (pte_none(ptent) || !pte_present(ptent)) > > + return 0; > > + > > + folio = damon_get_folio(pte_pfn(ptent)); > > + if (!folio) > > + return 0; > > We have vm_normal_folio() and friends for a reason -- so you don't have to > do pte_pfn() manually. > > ... and now I am confused. We are holding the PTL, so why would you have to > grab+put a folio reference here *at all*. > Thanks for pointing out. I thought someone could still change the folio and realized it could not happen with PTL. I will use vm_normal_folio* in the next version. > -- > Cheers, > > David / dhildenb >
Hi Pan and David, thank you for this patch and comments! On Tue, 29 Jul 2025 16:11:32 +0200 David Hildenbrand <david@redhat.com> wrote: > On 29.07.25 15:53, Yueyang Pan wrote: > > From: PanJason <pyyjason@gmail.com> > > > > This patch adds support for damos_stat in virtual address space. > > It leverages the walk_page_range to walk the page table and gets > > the folio from page table. The last folio scanned is stored in > > damos->last_applied to prevent double counting. > > --- > > mm/damon/vaddr.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 112 insertions(+), 1 deletion(-) > > > > diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c > > index 87e825349bdf..3e319b51cfd4 100644 > > --- a/mm/damon/vaddr.c > > +++ b/mm/damon/vaddr.c > > @@ -890,6 +890,117 @@ static unsigned long damos_va_migrate(struct damon_target *target, > > return applied * PAGE_SIZE; > > } > > > > +struct damos_va_stat_private { > > + struct damos *scheme; > > + unsigned long *sz_filter_passed; > > +}; > > + > > +#ifdef CONFIG_TRANSPARENT_HUGEPAGE > > +static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, > > + unsigned long next, struct mm_walk *walk) > > +{ > > + struct damos_va_stat_private *priv = walk->private; > > + struct damos *s = priv->scheme; > > + unsigned long *sz_filter_passed = priv->sz_filter_passed; > > + struct folio *folio; > > + spinlock_t *ptl; > > + pmd_t pmde; > > + > > + ptl = pmd_lock(walk->mm, pmd); > > + pmde = pmdp_get(pmd); > > + > > + if (!pmd_present(pmde) || !pmd_trans_huge(pmde)) > > + goto unlock; > > + > > + /* Tell page walk code to not split the PMD */ > > + walk->action = ACTION_CONTINUE; > > + > > + folio = damon_get_folio(pmd_pfn(pmde)); > > + if (!folio) > > + goto unlock; > > + > > + if (damon_invalid_damos_folio(folio, s)) > > + goto update_last_applied; > > + > > + if (!damos_va_filter_out(s, folio, walk->vma, addr, NULL, pmd)){ > > + *sz_filter_passed += folio_size(folio); > > See my comment below regarding vm_normal_page and folio references. > > But this split into two handlers is fairly odd. Usually we only have a > pmd_entry callback (see madvise_cold_or_pageout_pte_range as an > example), and handle !CONFIG_TRANSPARENT_HUGEPAGE in there. > > Then, there is also no need to mess with ACTION_CONTINUE I don't really mind this, but I agree keeping the consisteency would be good. Pan, could you please unify the handlers into one? > > > + } > > + > > + folio_put(folio); > > +update_last_applied: > > + s->last_applied = folio; > > +unlock: > > + spin_unlock(ptl); > > + return 0; > > +} > > +#else > > +#define damon_va_stat_pmd_entry NULL > > +#endif > > + > > +static int damos_va_stat_pte_entry(pte_t *pte, unsigned long addr, > > + unsigned long next, struct mm_walk *walk) > > +{ > > + struct damos_va_stat_private *priv = walk->private; > > + struct damos *s = priv->scheme; > > + unsigned long *sz_filter_passed = priv->sz_filter_passed; > > + struct folio *folio; > > + pte_t ptent; > > + > > + ptent = ptep_get(pte); > > + if (pte_none(ptent) || !pte_present(ptent)) > > + return 0; > > + > > + folio = damon_get_folio(pte_pfn(ptent)); > > + if (!folio) > > + return 0; > > We have vm_normal_folio() and friends for a reason -- so you don't have > to do pte_pfn() manually. > > ... and now I am confused. We are holding the PTL, so why would you have > to grab+put a folio reference here *at all*. We don't have to. I think Pan does so because other similar existing code in this file is also doing so. I was doing so because I wanted to use the handy damon_get_folio() and those are not making real problems. But, yes, unnecessary things are unnecessary things. Pan, could you please use vm_normal_folio() instead of damon_get_folio() and remove the related folio_put() call? I will also work on cleanup of existing unnecessary folio reference manipulations, regardless of this patch series. Thanks, SJ [...]
© 2016 - 2025 Red Hat, Inc.