[PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters

Wenchao Hao posted 1 patch 1 year, 2 months ago
There is a newer version of this series
Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
include/linux/huge_mm.h                    |  2 ++
mm/huge_memory.c                           |  6 ++++++
mm/memory.c                                |  2 ++
4 files changed, 20 insertions(+)
[PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters
Posted by Wenchao Hao 1 year, 2 months ago
Currently, large folio swap-in is supported, but we lack a method to
analyze their success ratio. Similar to anon_fault_fallback, we introduce
per-order mTHP swpin_fallback and swpin_fallback_charge counters for
calculating their success ratio. The new counters are located at:

/sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats/
	swpin_fallback
	swpin_fallback_charge

Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
---
V2:
 Introduce swapin_fallback_charge, which increments if it fails to
 charge a huge page to memory despite successful allocation.

 Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
 include/linux/huge_mm.h                    |  2 ++
 mm/huge_memory.c                           |  6 ++++++
 mm/memory.c                                |  2 ++
 4 files changed, 20 insertions(+)

diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 5034915f4e8e..9c07612281b5 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -561,6 +561,16 @@ swpin
 	is incremented every time a huge page is swapped in from a non-zswap
 	swap device in one piece.
 
+swpin_fallback
+	is incremented if a huge page swapin fails to allocate or charge
+	a huge page and instead falls back to using huge pages with
+	lower orders or small pages.
+
+swpin_fallback_charge
+	is incremented if a page swapin fails to charge a huge page and
+	instead falls back to using huge pages with lower orders or
+	small pages even though the allocation was successful.
+
 swpout
 	is incremented every time a huge page is swapped out to a non-zswap
 	swap device in one piece without splitting.
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index b94c2e8ee918..93e509b6c00e 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -121,6 +121,8 @@ enum mthp_stat_item {
 	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
 	MTHP_STAT_ZSWPOUT,
 	MTHP_STAT_SWPIN,
+	MTHP_STAT_SWPIN_FALLBACK,
+	MTHP_STAT_SWPIN_FALLBACK_CHARGE,
 	MTHP_STAT_SWPOUT,
 	MTHP_STAT_SWPOUT_FALLBACK,
 	MTHP_STAT_SHMEM_ALLOC,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index ee335d96fc39..46749dded1c9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -617,6 +617,8 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
 DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
 DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
 DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
+DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
+DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
 DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
 DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
 #ifdef CONFIG_SHMEM
@@ -637,6 +639,8 @@ static struct attribute *anon_stats_attrs[] = {
 #ifndef CONFIG_SHMEM
 	&zswpout_attr.attr,
 	&swpin_attr.attr,
+	&swpin_fallback_attr.attr,
+	&swpin_fallback_charge_attr.attr,
 	&swpout_attr.attr,
 	&swpout_fallback_attr.attr,
 #endif
@@ -669,6 +673,8 @@ static struct attribute *any_stats_attrs[] = {
 #ifdef CONFIG_SHMEM
 	&zswpout_attr.attr,
 	&swpin_attr.attr,
+	&swpin_fallback_attr.attr,
+	&swpin_fallback_charge_attr.attr,
 	&swpout_attr.attr,
 	&swpout_fallback_attr.attr,
 #endif
diff --git a/mm/memory.c b/mm/memory.c
index 209885a4134f..774dfd309cfe 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4189,8 +4189,10 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
 			if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
 							    gfp, entry))
 				return folio;
+			count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
 			folio_put(folio);
 		}
+		count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
 		order = next_order(&orders, order);
 	}
 
-- 
2.45.0
Re: [PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters
Posted by Lance Yang 1 year, 2 months ago
Hi Wenchao,

On Sat, Nov 23, 2024 at 12:14 AM Wenchao Hao <haowenchao22@gmail.com> wrote:
>
> Currently, large folio swap-in is supported, but we lack a method to
> analyze their success ratio. Similar to anon_fault_fallback, we introduce
> per-order mTHP swpin_fallback and swpin_fallback_charge counters for
> calculating their success ratio. The new counters are located at:
>
> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats/
>         swpin_fallback
>         swpin_fallback_charge
>
> Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
> ---
> V2:
>  Introduce swapin_fallback_charge, which increments if it fails to
>  charge a huge page to memory despite successful allocation.
>
>  Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
>  include/linux/huge_mm.h                    |  2 ++
>  mm/huge_memory.c                           |  6 ++++++
>  mm/memory.c                                |  2 ++
>  4 files changed, 20 insertions(+)
>
> diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> index 5034915f4e8e..9c07612281b5 100644
> --- a/Documentation/admin-guide/mm/transhuge.rst
> +++ b/Documentation/admin-guide/mm/transhuge.rst
> @@ -561,6 +561,16 @@ swpin
>         is incremented every time a huge page is swapped in from a non-zswap
>         swap device in one piece.
>

Would the following be better?

+swpin_fallback
+       is incremented if a huge page swapin fails to allocate or charge
+       it and instead falls back to using small pages.

+swpin_fallback_charge
+       is incremented if a huge page swapin fails to charge it and instead
+       falls back to using small pages even though the allocation was
+       successful.

Thanks,
Lance

> +swpin_fallback
> +       is incremented if a huge page swapin fails to allocate or charge
> +       a huge page and instead falls back to using huge pages with
> +       lower orders or small pages.
> +
> +swpin_fallback_charge
> +       is incremented if a page swapin fails to charge a huge page and
> +       instead falls back to using huge pages with lower orders or
> +       small pages even though the allocation was successful.
> +
>  swpout
>         is incremented every time a huge page is swapped out to a non-zswap
>         swap device in one piece without splitting.
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index b94c2e8ee918..93e509b6c00e 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -121,6 +121,8 @@ enum mthp_stat_item {
>         MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
>         MTHP_STAT_ZSWPOUT,
>         MTHP_STAT_SWPIN,
> +       MTHP_STAT_SWPIN_FALLBACK,
> +       MTHP_STAT_SWPIN_FALLBACK_CHARGE,
>         MTHP_STAT_SWPOUT,
>         MTHP_STAT_SWPOUT_FALLBACK,
>         MTHP_STAT_SHMEM_ALLOC,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index ee335d96fc39..46749dded1c9 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -617,6 +617,8 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
>  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
>  DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
>  DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
> +DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
> +DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
>  DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
>  DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
>  #ifdef CONFIG_SHMEM
> @@ -637,6 +639,8 @@ static struct attribute *anon_stats_attrs[] = {
>  #ifndef CONFIG_SHMEM
>         &zswpout_attr.attr,
>         &swpin_attr.attr,
> +       &swpin_fallback_attr.attr,
> +       &swpin_fallback_charge_attr.attr,
>         &swpout_attr.attr,
>         &swpout_fallback_attr.attr,
>  #endif
> @@ -669,6 +673,8 @@ static struct attribute *any_stats_attrs[] = {
>  #ifdef CONFIG_SHMEM
>         &zswpout_attr.attr,
>         &swpin_attr.attr,
> +       &swpin_fallback_attr.attr,
> +       &swpin_fallback_charge_attr.attr,
>         &swpout_attr.attr,
>         &swpout_fallback_attr.attr,
>  #endif
> diff --git a/mm/memory.c b/mm/memory.c
> index 209885a4134f..774dfd309cfe 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -4189,8 +4189,10 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
>                         if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
>                                                             gfp, entry))
>                                 return folio;
> +                       count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
>                         folio_put(folio);
>                 }
> +               count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
>                 order = next_order(&orders, order);
>         }
>
> --
> 2.45.0
>
Re: [PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters
Posted by Barry Song 1 year, 2 months ago
On Sat, Nov 23, 2024 at 10:36 AM Lance Yang <ioworker0@gmail.com> wrote:
>
> Hi Wenchao,
>
> On Sat, Nov 23, 2024 at 12:14 AM Wenchao Hao <haowenchao22@gmail.com> wrote:
> >
> > Currently, large folio swap-in is supported, but we lack a method to
> > analyze their success ratio. Similar to anon_fault_fallback, we introduce
> > per-order mTHP swpin_fallback and swpin_fallback_charge counters for
> > calculating their success ratio. The new counters are located at:
> >
> > /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats/
> >         swpin_fallback
> >         swpin_fallback_charge
> >
> > Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
> > ---
> > V2:
> >  Introduce swapin_fallback_charge, which increments if it fails to
> >  charge a huge page to memory despite successful allocation.
> >
> >  Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
> >  include/linux/huge_mm.h                    |  2 ++
> >  mm/huge_memory.c                           |  6 ++++++
> >  mm/memory.c                                |  2 ++
> >  4 files changed, 20 insertions(+)
> >
> > diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> > index 5034915f4e8e..9c07612281b5 100644
> > --- a/Documentation/admin-guide/mm/transhuge.rst
> > +++ b/Documentation/admin-guide/mm/transhuge.rst
> > @@ -561,6 +561,16 @@ swpin
> >         is incremented every time a huge page is swapped in from a non-zswap
> >         swap device in one piece.
> >
>
> Would the following be better?
>
> +swpin_fallback
> +       is incremented if a huge page swapin fails to allocate or charge
> +       it and instead falls back to using small pages.
>
> +swpin_fallback_charge
> +       is incremented if a huge page swapin fails to charge it and instead
> +       falls back to using small pages even though the allocation was
> +       successful.

much better, but it is better to align with "huge pages with
lower orders or small pages", not necessarily small pages:

anon_fault_fallback
is incremented if a page fault fails to allocate or charge
a huge page and instead falls back to using huge pages with
lower orders or small pages.

anon_fault_fallback_charge
is incremented if a page fault fails to charge a huge page and
instead falls back to using huge pages with lower orders or
small pages even though the allocation was successful.

>
> Thanks,
> Lance
>
> > +swpin_fallback
> > +       is incremented if a huge page swapin fails to allocate or charge
> > +       a huge page and instead falls back to using huge pages with
> > +       lower orders or small pages.
> > +
> > +swpin_fallback_charge
> > +       is incremented if a page swapin fails to charge a huge page and
> > +       instead falls back to using huge pages with lower orders or
> > +       small pages even though the allocation was successful.
> > +
> >  swpout
> >         is incremented every time a huge page is swapped out to a non-zswap
> >         swap device in one piece without splitting.
> > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > index b94c2e8ee918..93e509b6c00e 100644
> > --- a/include/linux/huge_mm.h
> > +++ b/include/linux/huge_mm.h
> > @@ -121,6 +121,8 @@ enum mthp_stat_item {
> >         MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> >         MTHP_STAT_ZSWPOUT,
> >         MTHP_STAT_SWPIN,
> > +       MTHP_STAT_SWPIN_FALLBACK,
> > +       MTHP_STAT_SWPIN_FALLBACK_CHARGE,
> >         MTHP_STAT_SWPOUT,
> >         MTHP_STAT_SWPOUT_FALLBACK,
> >         MTHP_STAT_SHMEM_ALLOC,
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > index ee335d96fc39..46749dded1c9 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -617,6 +617,8 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> >  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> >  DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
> >  DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
> > +DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
> > +DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> >  DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
> >  DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
> >  #ifdef CONFIG_SHMEM
> > @@ -637,6 +639,8 @@ static struct attribute *anon_stats_attrs[] = {
> >  #ifndef CONFIG_SHMEM
> >         &zswpout_attr.attr,
> >         &swpin_attr.attr,
> > +       &swpin_fallback_attr.attr,
> > +       &swpin_fallback_charge_attr.attr,
> >         &swpout_attr.attr,
> >         &swpout_fallback_attr.attr,
> >  #endif
> > @@ -669,6 +673,8 @@ static struct attribute *any_stats_attrs[] = {
> >  #ifdef CONFIG_SHMEM
> >         &zswpout_attr.attr,
> >         &swpin_attr.attr,
> > +       &swpin_fallback_attr.attr,
> > +       &swpin_fallback_charge_attr.attr,
> >         &swpout_attr.attr,
> >         &swpout_fallback_attr.attr,
> >  #endif
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 209885a4134f..774dfd309cfe 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -4189,8 +4189,10 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
> >                         if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
> >                                                             gfp, entry))
> >                                 return folio;
> > +                       count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> >                         folio_put(folio);
> >                 }
> > +               count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
> >                 order = next_order(&orders, order);
> >         }
> >
> > --
> > 2.45.0
> >

Thanks
Barry
Re: [PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters
Posted by Lance Yang 1 year, 2 months ago
On Sat, Nov 23, 2024 at 6:27 PM Barry Song <21cnbao@gmail.com> wrote:
>
> On Sat, Nov 23, 2024 at 10:36 AM Lance Yang <ioworker0@gmail.com> wrote:
> >
> > Hi Wenchao,
> >
> > On Sat, Nov 23, 2024 at 12:14 AM Wenchao Hao <haowenchao22@gmail.com> wrote:
> > >
> > > Currently, large folio swap-in is supported, but we lack a method to
> > > analyze their success ratio. Similar to anon_fault_fallback, we introduce
> > > per-order mTHP swpin_fallback and swpin_fallback_charge counters for
> > > calculating their success ratio. The new counters are located at:
> > >
> > > /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats/
> > >         swpin_fallback
> > >         swpin_fallback_charge
> > >
> > > Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
> > > ---
> > > V2:
> > >  Introduce swapin_fallback_charge, which increments if it fails to
> > >  charge a huge page to memory despite successful allocation.
> > >
> > >  Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
> > >  include/linux/huge_mm.h                    |  2 ++
> > >  mm/huge_memory.c                           |  6 ++++++
> > >  mm/memory.c                                |  2 ++
> > >  4 files changed, 20 insertions(+)
> > >
> > > diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> > > index 5034915f4e8e..9c07612281b5 100644
> > > --- a/Documentation/admin-guide/mm/transhuge.rst
> > > +++ b/Documentation/admin-guide/mm/transhuge.rst
> > > @@ -561,6 +561,16 @@ swpin
> > >         is incremented every time a huge page is swapped in from a non-zswap
> > >         swap device in one piece.
> > >
> >
> > Would the following be better?
> >
> > +swpin_fallback
> > +       is incremented if a huge page swapin fails to allocate or charge
> > +       it and instead falls back to using small pages.
> >
> > +swpin_fallback_charge
> > +       is incremented if a huge page swapin fails to charge it and instead
> > +       falls back to using small pages even though the allocation was
> > +       successful.
>
> much better, but it is better to align with "huge pages with
> lower orders or small pages", not necessarily small pages:
>
> anon_fault_fallback
> is incremented if a page fault fails to allocate or charge
> a huge page and instead falls back to using huge pages with
> lower orders or small pages.
>
> anon_fault_fallback_charge
> is incremented if a page fault fails to charge a huge page and
> instead falls back to using huge pages with lower orders or
> small pages even though the allocation was successful.

Right, I clearly overlooked that ;)

Thanks,
Lance

>
> >
> > Thanks,
> > Lance
> >
> > > +swpin_fallback
> > > +       is incremented if a huge page swapin fails to allocate or charge
> > > +       a huge page and instead falls back to using huge pages with
> > > +       lower orders or small pages.
> > > +
> > > +swpin_fallback_charge
> > > +       is incremented if a page swapin fails to charge a huge page and
> > > +       instead falls back to using huge pages with lower orders or
> > > +       small pages even though the allocation was successful.
> > > +
> > >  swpout
> > >         is incremented every time a huge page is swapped out to a non-zswap
> > >         swap device in one piece without splitting.
> > > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > > index b94c2e8ee918..93e509b6c00e 100644
> > > --- a/include/linux/huge_mm.h
> > > +++ b/include/linux/huge_mm.h
> > > @@ -121,6 +121,8 @@ enum mthp_stat_item {
> > >         MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> > >         MTHP_STAT_ZSWPOUT,
> > >         MTHP_STAT_SWPIN,
> > > +       MTHP_STAT_SWPIN_FALLBACK,
> > > +       MTHP_STAT_SWPIN_FALLBACK_CHARGE,
> > >         MTHP_STAT_SWPOUT,
> > >         MTHP_STAT_SWPOUT_FALLBACK,
> > >         MTHP_STAT_SHMEM_ALLOC,
> > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > > index ee335d96fc39..46749dded1c9 100644
> > > --- a/mm/huge_memory.c
> > > +++ b/mm/huge_memory.c
> > > @@ -617,6 +617,8 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> > >  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> > >  DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
> > >  DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
> > > +DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
> > > +DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> > >  DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
> > >  DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
> > >  #ifdef CONFIG_SHMEM
> > > @@ -637,6 +639,8 @@ static struct attribute *anon_stats_attrs[] = {
> > >  #ifndef CONFIG_SHMEM
> > >         &zswpout_attr.attr,
> > >         &swpin_attr.attr,
> > > +       &swpin_fallback_attr.attr,
> > > +       &swpin_fallback_charge_attr.attr,
> > >         &swpout_attr.attr,
> > >         &swpout_fallback_attr.attr,
> > >  #endif
> > > @@ -669,6 +673,8 @@ static struct attribute *any_stats_attrs[] = {
> > >  #ifdef CONFIG_SHMEM
> > >         &zswpout_attr.attr,
> > >         &swpin_attr.attr,
> > > +       &swpin_fallback_attr.attr,
> > > +       &swpin_fallback_charge_attr.attr,
> > >         &swpout_attr.attr,
> > >         &swpout_fallback_attr.attr,
> > >  #endif
> > > diff --git a/mm/memory.c b/mm/memory.c
> > > index 209885a4134f..774dfd309cfe 100644
> > > --- a/mm/memory.c
> > > +++ b/mm/memory.c
> > > @@ -4189,8 +4189,10 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
> > >                         if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
> > >                                                             gfp, entry))
> > >                                 return folio;
> > > +                       count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> > >                         folio_put(folio);
> > >                 }
> > > +               count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
> > >                 order = next_order(&orders, order);
> > >         }
> > >
> > > --
> > > 2.45.0
> > >
>
> Thanks
> Barry
Re: [PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters
Posted by Wenchao Hao 1 year, 2 months ago
On 2024/11/23 19:52, Lance Yang wrote:
> On Sat, Nov 23, 2024 at 6:27 PM Barry Song <21cnbao@gmail.com> wrote:
>>
>> On Sat, Nov 23, 2024 at 10:36 AM Lance Yang <ioworker0@gmail.com> wrote:
>>>
>>> Hi Wenchao,
>>>
>>> On Sat, Nov 23, 2024 at 12:14 AM Wenchao Hao <haowenchao22@gmail.com> wrote:
>>>>
>>>> Currently, large folio swap-in is supported, but we lack a method to
>>>> analyze their success ratio. Similar to anon_fault_fallback, we introduce
>>>> per-order mTHP swpin_fallback and swpin_fallback_charge counters for
>>>> calculating their success ratio. The new counters are located at:
>>>>
>>>> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats/
>>>>         swpin_fallback
>>>>         swpin_fallback_charge
>>>>
>>>> Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
>>>> ---
>>>> V2:
>>>>  Introduce swapin_fallback_charge, which increments if it fails to
>>>>  charge a huge page to memory despite successful allocation.
>>>>
>>>>  Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
>>>>  include/linux/huge_mm.h                    |  2 ++
>>>>  mm/huge_memory.c                           |  6 ++++++
>>>>  mm/memory.c                                |  2 ++
>>>>  4 files changed, 20 insertions(+)
>>>>
>>>> diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
>>>> index 5034915f4e8e..9c07612281b5 100644
>>>> --- a/Documentation/admin-guide/mm/transhuge.rst
>>>> +++ b/Documentation/admin-guide/mm/transhuge.rst
>>>> @@ -561,6 +561,16 @@ swpin
>>>>         is incremented every time a huge page is swapped in from a non-zswap
>>>>         swap device in one piece.
>>>>
>>>
>>> Would the following be better?
>>>
>>> +swpin_fallback
>>> +       is incremented if a huge page swapin fails to allocate or charge
>>> +       it and instead falls back to using small pages.
>>>
>>> +swpin_fallback_charge
>>> +       is incremented if a huge page swapin fails to charge it and instead
>>> +       falls back to using small pages even though the allocation was
>>> +       successful.
>>
>> much better, but it is better to align with "huge pages with
>> lower orders or small pages", not necessarily small pages:
>>
>> anon_fault_fallback
>> is incremented if a page fault fails to allocate or charge
>> a huge page and instead falls back to using huge pages with
>> lower orders or small pages.
>>
>> anon_fault_fallback_charge
>> is incremented if a page fault fails to charge a huge page and
>> instead falls back to using huge pages with lower orders or
>> small pages even though the allocation was successful.
> 
> Right, I clearly overlooked that ;)
> 

Hi Lance and Barry,

Do you think the following expression is clear? Compared to my original
version, I’ve removed the word “huge” from the first line, and it now
looks almost identical to anon_fault_fallback/anon_fault_fallback_charge.

swpin_fallback
       is incremented if a page swapin fails to allocate or charge
       a huge page and instead falls back to using huge pages with
       lower orders or small pages.

swpin_fallback_charge
       is incremented if a page swapin fails to charge a huge page and
       instead falls back to using huge pages with lower orders or
       small pages even though the allocation was successful.

Thanks,
Wencaho

> Thanks,
> Lance
> 
>>
>>>
>>> Thanks,
>>> Lance
>>>
>>>> +swpin_fallback
>>>> +       is incremented if a huge page swapin fails to allocate or charge
>>>> +       a huge page and instead falls back to using huge pages with
>>>> +       lower orders or small pages.
>>>> +
>>>> +swpin_fallback_charge
>>>> +       is incremented if a page swapin fails to charge a huge page and
>>>> +       instead falls back to using huge pages with lower orders or
>>>> +       small pages even though the allocation was successful.
>>>> +
>>>>  swpout
>>>>         is incremented every time a huge page is swapped out to a non-zswap
>>>>         swap device in one piece without splitting.
>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>> index b94c2e8ee918..93e509b6c00e 100644
>>>> --- a/include/linux/huge_mm.h
>>>> +++ b/include/linux/huge_mm.h
>>>> @@ -121,6 +121,8 @@ enum mthp_stat_item {
>>>>         MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
>>>>         MTHP_STAT_ZSWPOUT,
>>>>         MTHP_STAT_SWPIN,
>>>> +       MTHP_STAT_SWPIN_FALLBACK,
>>>> +       MTHP_STAT_SWPIN_FALLBACK_CHARGE,
>>>>         MTHP_STAT_SWPOUT,
>>>>         MTHP_STAT_SWPOUT_FALLBACK,
>>>>         MTHP_STAT_SHMEM_ALLOC,
>>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>>> index ee335d96fc39..46749dded1c9 100644
>>>> --- a/mm/huge_memory.c
>>>> +++ b/mm/huge_memory.c
>>>> @@ -617,6 +617,8 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
>>>>  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
>>>>  DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
>>>>  DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
>>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
>>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
>>>>  DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
>>>>  DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
>>>>  #ifdef CONFIG_SHMEM
>>>> @@ -637,6 +639,8 @@ static struct attribute *anon_stats_attrs[] = {
>>>>  #ifndef CONFIG_SHMEM
>>>>         &zswpout_attr.attr,
>>>>         &swpin_attr.attr,
>>>> +       &swpin_fallback_attr.attr,
>>>> +       &swpin_fallback_charge_attr.attr,
>>>>         &swpout_attr.attr,
>>>>         &swpout_fallback_attr.attr,
>>>>  #endif
>>>> @@ -669,6 +673,8 @@ static struct attribute *any_stats_attrs[] = {
>>>>  #ifdef CONFIG_SHMEM
>>>>         &zswpout_attr.attr,
>>>>         &swpin_attr.attr,
>>>> +       &swpin_fallback_attr.attr,
>>>> +       &swpin_fallback_charge_attr.attr,
>>>>         &swpout_attr.attr,
>>>>         &swpout_fallback_attr.attr,
>>>>  #endif
>>>> diff --git a/mm/memory.c b/mm/memory.c
>>>> index 209885a4134f..774dfd309cfe 100644
>>>> --- a/mm/memory.c
>>>> +++ b/mm/memory.c
>>>> @@ -4189,8 +4189,10 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
>>>>                         if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
>>>>                                                             gfp, entry))
>>>>                                 return folio;
>>>> +                       count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
>>>>                         folio_put(folio);
>>>>                 }
>>>> +               count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
>>>>                 order = next_order(&orders, order);
>>>>         }
>>>>
>>>> --
>>>> 2.45.0
>>>>
>>
>> Thanks
>> Barry

Re: [PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters
Posted by Lance Yang 1 year, 2 months ago
On Sat, Nov 23, 2024 at 9:17 PM Wenchao Hao <haowenchao22@gmail.com> wrote:
>
> On 2024/11/23 19:52, Lance Yang wrote:
> > On Sat, Nov 23, 2024 at 6:27 PM Barry Song <21cnbao@gmail.com> wrote:
> >>
> >> On Sat, Nov 23, 2024 at 10:36 AM Lance Yang <ioworker0@gmail.com> wrote:
> >>>
> >>> Hi Wenchao,
> >>>
> >>> On Sat, Nov 23, 2024 at 12:14 AM Wenchao Hao <haowenchao22@gmail.com> wrote:
> >>>>
> >>>> Currently, large folio swap-in is supported, but we lack a method to
> >>>> analyze their success ratio. Similar to anon_fault_fallback, we introduce
> >>>> per-order mTHP swpin_fallback and swpin_fallback_charge counters for
> >>>> calculating their success ratio. The new counters are located at:
> >>>>
> >>>> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats/
> >>>>         swpin_fallback
> >>>>         swpin_fallback_charge
> >>>>
> >>>> Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
> >>>> ---
> >>>> V2:
> >>>>  Introduce swapin_fallback_charge, which increments if it fails to
> >>>>  charge a huge page to memory despite successful allocation.
> >>>>
> >>>>  Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
> >>>>  include/linux/huge_mm.h                    |  2 ++
> >>>>  mm/huge_memory.c                           |  6 ++++++
> >>>>  mm/memory.c                                |  2 ++
> >>>>  4 files changed, 20 insertions(+)
> >>>>
> >>>> diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> >>>> index 5034915f4e8e..9c07612281b5 100644
> >>>> --- a/Documentation/admin-guide/mm/transhuge.rst
> >>>> +++ b/Documentation/admin-guide/mm/transhuge.rst
> >>>> @@ -561,6 +561,16 @@ swpin
> >>>>         is incremented every time a huge page is swapped in from a non-zswap
> >>>>         swap device in one piece.
> >>>>
> >>>
> >>> Would the following be better?
> >>>
> >>> +swpin_fallback
> >>> +       is incremented if a huge page swapin fails to allocate or charge
> >>> +       it and instead falls back to using small pages.
> >>>
> >>> +swpin_fallback_charge
> >>> +       is incremented if a huge page swapin fails to charge it and instead
> >>> +       falls back to using small pages even though the allocation was
> >>> +       successful.
> >>
> >> much better, but it is better to align with "huge pages with
> >> lower orders or small pages", not necessarily small pages:
> >>
> >> anon_fault_fallback
> >> is incremented if a page fault fails to allocate or charge
> >> a huge page and instead falls back to using huge pages with
> >> lower orders or small pages.
> >>
> >> anon_fault_fallback_charge
> >> is incremented if a page fault fails to charge a huge page and
> >> instead falls back to using huge pages with lower orders or
> >> small pages even though the allocation was successful.
> >
> > Right, I clearly overlooked that ;)
> >
>
> Hi Lance and Barry,
>
> Do you think the following expression is clear? Compared to my original
> version, I’ve removed the word “huge” from the first line, and it now
> looks almost identical to anon_fault_fallback/anon_fault_fallback_charge.

Well, that's fine with me. And let's see Barry's opinion as well ;)

Thanks,
Lance

>
> swpin_fallback
>        is incremented if a page swapin fails to allocate or charge
>        a huge page and instead falls back to using huge pages with
>        lower orders or small pages.
>
> swpin_fallback_charge
>        is incremented if a page swapin fails to charge a huge page and
>        instead falls back to using huge pages with lower orders or
>        small pages even though the allocation was successful.
>
> Thanks,
> Wencaho
>
> > Thanks,
> > Lance
> >
> >>
> >>>
> >>> Thanks,
> >>> Lance
> >>>
> >>>> +swpin_fallback
> >>>> +       is incremented if a huge page swapin fails to allocate or charge
> >>>> +       a huge page and instead falls back to using huge pages with
> >>>> +       lower orders or small pages.
> >>>> +
> >>>> +swpin_fallback_charge
> >>>> +       is incremented if a page swapin fails to charge a huge page and
> >>>> +       instead falls back to using huge pages with lower orders or
> >>>> +       small pages even though the allocation was successful.
> >>>> +
> >>>>  swpout
> >>>>         is incremented every time a huge page is swapped out to a non-zswap
> >>>>         swap device in one piece without splitting.
> >>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> >>>> index b94c2e8ee918..93e509b6c00e 100644
> >>>> --- a/include/linux/huge_mm.h
> >>>> +++ b/include/linux/huge_mm.h
> >>>> @@ -121,6 +121,8 @@ enum mthp_stat_item {
> >>>>         MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> >>>>         MTHP_STAT_ZSWPOUT,
> >>>>         MTHP_STAT_SWPIN,
> >>>> +       MTHP_STAT_SWPIN_FALLBACK,
> >>>> +       MTHP_STAT_SWPIN_FALLBACK_CHARGE,
> >>>>         MTHP_STAT_SWPOUT,
> >>>>         MTHP_STAT_SWPOUT_FALLBACK,
> >>>>         MTHP_STAT_SHMEM_ALLOC,
> >>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> >>>> index ee335d96fc39..46749dded1c9 100644
> >>>> --- a/mm/huge_memory.c
> >>>> +++ b/mm/huge_memory.c
> >>>> @@ -617,6 +617,8 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> >>>>  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> >>>>  DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
> >>>>  DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
> >>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
> >>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> >>>>  DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
> >>>>  DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
> >>>>  #ifdef CONFIG_SHMEM
> >>>> @@ -637,6 +639,8 @@ static struct attribute *anon_stats_attrs[] = {
> >>>>  #ifndef CONFIG_SHMEM
> >>>>         &zswpout_attr.attr,
> >>>>         &swpin_attr.attr,
> >>>> +       &swpin_fallback_attr.attr,
> >>>> +       &swpin_fallback_charge_attr.attr,
> >>>>         &swpout_attr.attr,
> >>>>         &swpout_fallback_attr.attr,
> >>>>  #endif
> >>>> @@ -669,6 +673,8 @@ static struct attribute *any_stats_attrs[] = {
> >>>>  #ifdef CONFIG_SHMEM
> >>>>         &zswpout_attr.attr,
> >>>>         &swpin_attr.attr,
> >>>> +       &swpin_fallback_attr.attr,
> >>>> +       &swpin_fallback_charge_attr.attr,
> >>>>         &swpout_attr.attr,
> >>>>         &swpout_fallback_attr.attr,
> >>>>  #endif
> >>>> diff --git a/mm/memory.c b/mm/memory.c
> >>>> index 209885a4134f..774dfd309cfe 100644
> >>>> --- a/mm/memory.c
> >>>> +++ b/mm/memory.c
> >>>> @@ -4189,8 +4189,10 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
> >>>>                         if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
> >>>>                                                             gfp, entry))
> >>>>                                 return folio;
> >>>> +                       count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> >>>>                         folio_put(folio);
> >>>>                 }
> >>>> +               count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
> >>>>                 order = next_order(&orders, order);
> >>>>         }
> >>>>
> >>>> --
> >>>> 2.45.0
> >>>>
> >>
> >> Thanks
> >> Barry
>
Re: [PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters
Posted by Barry Song 1 year, 2 months ago
On Sun, Nov 24, 2024 at 2:56 PM Lance Yang <ioworker0@gmail.com> wrote:
>
> On Sat, Nov 23, 2024 at 9:17 PM Wenchao Hao <haowenchao22@gmail.com> wrote:
> >
> > On 2024/11/23 19:52, Lance Yang wrote:
> > > On Sat, Nov 23, 2024 at 6:27 PM Barry Song <21cnbao@gmail.com> wrote:
> > >>
> > >> On Sat, Nov 23, 2024 at 10:36 AM Lance Yang <ioworker0@gmail.com> wrote:
> > >>>
> > >>> Hi Wenchao,
> > >>>
> > >>> On Sat, Nov 23, 2024 at 12:14 AM Wenchao Hao <haowenchao22@gmail.com> wrote:
> > >>>>
> > >>>> Currently, large folio swap-in is supported, but we lack a method to
> > >>>> analyze their success ratio. Similar to anon_fault_fallback, we introduce
> > >>>> per-order mTHP swpin_fallback and swpin_fallback_charge counters for
> > >>>> calculating their success ratio. The new counters are located at:
> > >>>>
> > >>>> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats/
> > >>>>         swpin_fallback
> > >>>>         swpin_fallback_charge
> > >>>>
> > >>>> Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
> > >>>> ---
> > >>>> V2:
> > >>>>  Introduce swapin_fallback_charge, which increments if it fails to
> > >>>>  charge a huge page to memory despite successful allocation.
> > >>>>
> > >>>>  Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
> > >>>>  include/linux/huge_mm.h                    |  2 ++
> > >>>>  mm/huge_memory.c                           |  6 ++++++
> > >>>>  mm/memory.c                                |  2 ++
> > >>>>  4 files changed, 20 insertions(+)
> > >>>>
> > >>>> diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> > >>>> index 5034915f4e8e..9c07612281b5 100644
> > >>>> --- a/Documentation/admin-guide/mm/transhuge.rst
> > >>>> +++ b/Documentation/admin-guide/mm/transhuge.rst
> > >>>> @@ -561,6 +561,16 @@ swpin
> > >>>>         is incremented every time a huge page is swapped in from a non-zswap
> > >>>>         swap device in one piece.
> > >>>>
> > >>>
> > >>> Would the following be better?
> > >>>
> > >>> +swpin_fallback
> > >>> +       is incremented if a huge page swapin fails to allocate or charge
> > >>> +       it and instead falls back to using small pages.
> > >>>
> > >>> +swpin_fallback_charge
> > >>> +       is incremented if a huge page swapin fails to charge it and instead
> > >>> +       falls back to using small pages even though the allocation was
> > >>> +       successful.
> > >>
> > >> much better, but it is better to align with "huge pages with
> > >> lower orders or small pages", not necessarily small pages:
> > >>
> > >> anon_fault_fallback
> > >> is incremented if a page fault fails to allocate or charge
> > >> a huge page and instead falls back to using huge pages with
> > >> lower orders or small pages.
> > >>
> > >> anon_fault_fallback_charge
> > >> is incremented if a page fault fails to charge a huge page and
> > >> instead falls back to using huge pages with lower orders or
> > >> small pages even though the allocation was successful.
> > >
> > > Right, I clearly overlooked that ;)
> > >
> >
> > Hi Lance and Barry,
> >
> > Do you think the following expression is clear? Compared to my original
> > version, I’ve removed the word “huge” from the first line, and it now
> > looks almost identical to anon_fault_fallback/anon_fault_fallback_charge.
>
> Well, that's fine with me. And let's see Barry's opinion as well ;)

I still prefer Lance's version. The fallback path in it only needs to
be adjusted to
include huge pages with lower orders. In contrast, Wenchao's version feels less
natural to me because "page swapin" sounds quite odd - we often hear
"page fault,"
but we have never encountered "page swapin."

So I mean:

 swpin_fallback
        is incremented if swapin fails to allocate or charge a huge
page and instead
        falls back to using huge pages with lower orders or small pages.

 swpin_fallback_charge
        is incremented if swapin fails to charge a huge page and instead
        falls back to using  huge pages with lower orders or small
pages even though
        the allocation was successful.

>
> Thanks,
> Lance
>
> >
> > swpin_fallback
> >        is incremented if a page swapin fails to allocate or charge
> >        a huge page and instead falls back to using huge pages with
> >        lower orders or small pages.
> >
> > swpin_fallback_charge
> >        is incremented if a page swapin fails to charge a huge page and
> >        instead falls back to using huge pages with lower orders or
> >        small pages even though the allocation was successful.
> >
> > Thanks,
> > Wencaho
> >
> > > Thanks,
> > > Lance
> > >
> > >>
> > >>>
> > >>> Thanks,
> > >>> Lance
> > >>>
> > >>>> +swpin_fallback
> > >>>> +       is incremented if a huge page swapin fails to allocate or charge
> > >>>> +       a huge page and instead falls back to using huge pages with
> > >>>> +       lower orders or small pages.
> > >>>> +
> > >>>> +swpin_fallback_charge
> > >>>> +       is incremented if a page swapin fails to charge a huge page and
> > >>>> +       instead falls back to using huge pages with lower orders or
> > >>>> +       small pages even though the allocation was successful.
> > >>>> +
> > >>>>  swpout
> > >>>>         is incremented every time a huge page is swapped out to a non-zswap
> > >>>>         swap device in one piece without splitting.
> > >>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > >>>> index b94c2e8ee918..93e509b6c00e 100644
> > >>>> --- a/include/linux/huge_mm.h
> > >>>> +++ b/include/linux/huge_mm.h
> > >>>> @@ -121,6 +121,8 @@ enum mthp_stat_item {
> > >>>>         MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> > >>>>         MTHP_STAT_ZSWPOUT,
> > >>>>         MTHP_STAT_SWPIN,
> > >>>> +       MTHP_STAT_SWPIN_FALLBACK,
> > >>>> +       MTHP_STAT_SWPIN_FALLBACK_CHARGE,
> > >>>>         MTHP_STAT_SWPOUT,
> > >>>>         MTHP_STAT_SWPOUT_FALLBACK,
> > >>>>         MTHP_STAT_SHMEM_ALLOC,
> > >>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > >>>> index ee335d96fc39..46749dded1c9 100644
> > >>>> --- a/mm/huge_memory.c
> > >>>> +++ b/mm/huge_memory.c
> > >>>> @@ -617,6 +617,8 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> > >>>>  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> > >>>>  DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
> > >>>>  DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
> > >>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
> > >>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> > >>>>  DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
> > >>>>  DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
> > >>>>  #ifdef CONFIG_SHMEM
> > >>>> @@ -637,6 +639,8 @@ static struct attribute *anon_stats_attrs[] = {
> > >>>>  #ifndef CONFIG_SHMEM
> > >>>>         &zswpout_attr.attr,
> > >>>>         &swpin_attr.attr,
> > >>>> +       &swpin_fallback_attr.attr,
> > >>>> +       &swpin_fallback_charge_attr.attr,
> > >>>>         &swpout_attr.attr,
> > >>>>         &swpout_fallback_attr.attr,
> > >>>>  #endif
> > >>>> @@ -669,6 +673,8 @@ static struct attribute *any_stats_attrs[] = {
> > >>>>  #ifdef CONFIG_SHMEM
> > >>>>         &zswpout_attr.attr,
> > >>>>         &swpin_attr.attr,
> > >>>> +       &swpin_fallback_attr.attr,
> > >>>> +       &swpin_fallback_charge_attr.attr,
> > >>>>         &swpout_attr.attr,
> > >>>>         &swpout_fallback_attr.attr,
> > >>>>  #endif
> > >>>> diff --git a/mm/memory.c b/mm/memory.c
> > >>>> index 209885a4134f..774dfd309cfe 100644
> > >>>> --- a/mm/memory.c
> > >>>> +++ b/mm/memory.c
> > >>>> @@ -4189,8 +4189,10 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
> > >>>>                         if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
> > >>>>                                                             gfp, entry))
> > >>>>                                 return folio;
> > >>>> +                       count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> > >>>>                         folio_put(folio);
> > >>>>                 }
> > >>>> +               count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
> > >>>>                 order = next_order(&orders, order);
> > >>>>         }
> > >>>>
> > >>>> --
> > >>>> 2.45.0
> > >>>>
> > >>

Thanks
Barry
Re: [PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters
Posted by Lance Yang 1 year, 2 months ago
On Sun, Nov 24, 2024 at 3:11 PM Barry Song <21cnbao@gmail.com> wrote:
>
> On Sun, Nov 24, 2024 at 2:56 PM Lance Yang <ioworker0@gmail.com> wrote:
> >
> > On Sat, Nov 23, 2024 at 9:17 PM Wenchao Hao <haowenchao22@gmail.com> wrote:
> > >
> > > On 2024/11/23 19:52, Lance Yang wrote:
> > > > On Sat, Nov 23, 2024 at 6:27 PM Barry Song <21cnbao@gmail.com> wrote:
> > > >>
> > > >> On Sat, Nov 23, 2024 at 10:36 AM Lance Yang <ioworker0@gmail.com> wrote:
> > > >>>
> > > >>> Hi Wenchao,
> > > >>>
> > > >>> On Sat, Nov 23, 2024 at 12:14 AM Wenchao Hao <haowenchao22@gmail.com> wrote:
> > > >>>>
> > > >>>> Currently, large folio swap-in is supported, but we lack a method to
> > > >>>> analyze their success ratio. Similar to anon_fault_fallback, we introduce
> > > >>>> per-order mTHP swpin_fallback and swpin_fallback_charge counters for
> > > >>>> calculating their success ratio. The new counters are located at:
> > > >>>>
> > > >>>> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats/
> > > >>>>         swpin_fallback
> > > >>>>         swpin_fallback_charge
> > > >>>>
> > > >>>> Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
> > > >>>> ---
> > > >>>> V2:
> > > >>>>  Introduce swapin_fallback_charge, which increments if it fails to
> > > >>>>  charge a huge page to memory despite successful allocation.
> > > >>>>
> > > >>>>  Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
> > > >>>>  include/linux/huge_mm.h                    |  2 ++
> > > >>>>  mm/huge_memory.c                           |  6 ++++++
> > > >>>>  mm/memory.c                                |  2 ++
> > > >>>>  4 files changed, 20 insertions(+)
> > > >>>>
> > > >>>> diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> > > >>>> index 5034915f4e8e..9c07612281b5 100644
> > > >>>> --- a/Documentation/admin-guide/mm/transhuge.rst
> > > >>>> +++ b/Documentation/admin-guide/mm/transhuge.rst
> > > >>>> @@ -561,6 +561,16 @@ swpin
> > > >>>>         is incremented every time a huge page is swapped in from a non-zswap
> > > >>>>         swap device in one piece.
> > > >>>>
> > > >>>
> > > >>> Would the following be better?
> > > >>>
> > > >>> +swpin_fallback
> > > >>> +       is incremented if a huge page swapin fails to allocate or charge
> > > >>> +       it and instead falls back to using small pages.
> > > >>>
> > > >>> +swpin_fallback_charge
> > > >>> +       is incremented if a huge page swapin fails to charge it and instead
> > > >>> +       falls back to using small pages even though the allocation was
> > > >>> +       successful.
> > > >>
> > > >> much better, but it is better to align with "huge pages with
> > > >> lower orders or small pages", not necessarily small pages:
> > > >>
> > > >> anon_fault_fallback
> > > >> is incremented if a page fault fails to allocate or charge
> > > >> a huge page and instead falls back to using huge pages with
> > > >> lower orders or small pages.
> > > >>
> > > >> anon_fault_fallback_charge
> > > >> is incremented if a page fault fails to charge a huge page and
> > > >> instead falls back to using huge pages with lower orders or
> > > >> small pages even though the allocation was successful.
> > > >
> > > > Right, I clearly overlooked that ;)
> > > >
> > >
> > > Hi Lance and Barry,
> > >
> > > Do you think the following expression is clear? Compared to my original
> > > version, I’ve removed the word “huge” from the first line, and it now
> > > looks almost identical to anon_fault_fallback/anon_fault_fallback_charge.
> >
> > Well, that's fine with me. And let's see Barry's opinion as well ;)
>
> I still prefer Lance's version. The fallback path in it only needs to
> be adjusted to
> include huge pages with lower orders. In contrast, Wenchao's version feels less
> natural to me because "page swapin" sounds quite odd - we often hear
> "page fault,"
> but we have never encountered "page swapin."

Yeah, it makes sense to me ~

>
> So I mean:
>
>  swpin_fallback
>         is incremented if swapin fails to allocate or charge a huge
> page and instead
>         falls back to using huge pages with lower orders or small pages.
>
>  swpin_fallback_charge
>         is incremented if swapin fails to charge a huge page and instead
>         falls back to using  huge pages with lower orders or small
> pages even though
>         the allocation was successful.

IHMO, much better and clearer than before ;)

Thank,
Lance

>
> >
> > Thanks,
> > Lance
> >
> > >
> > > swpin_fallback
> > >        is incremented if a page swapin fails to allocate or charge
> > >        a huge page and instead falls back to using huge pages with
> > >        lower orders or small pages.
> > >
> > > swpin_fallback_charge
> > >        is incremented if a page swapin fails to charge a huge page and
> > >        instead falls back to using huge pages with lower orders or
> > >        small pages even though the allocation was successful.
> > >
> > > Thanks,
> > > Wencaho
> > >
> > > > Thanks,
> > > > Lance
> > > >
> > > >>
> > > >>>
> > > >>> Thanks,
> > > >>> Lance
> > > >>>
> > > >>>> +swpin_fallback
> > > >>>> +       is incremented if a huge page swapin fails to allocate or charge
> > > >>>> +       a huge page and instead falls back to using huge pages with
> > > >>>> +       lower orders or small pages.
> > > >>>> +
> > > >>>> +swpin_fallback_charge
> > > >>>> +       is incremented if a page swapin fails to charge a huge page and
> > > >>>> +       instead falls back to using huge pages with lower orders or
> > > >>>> +       small pages even though the allocation was successful.
> > > >>>> +
> > > >>>>  swpout
> > > >>>>         is incremented every time a huge page is swapped out to a non-zswap
> > > >>>>         swap device in one piece without splitting.
> > > >>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > > >>>> index b94c2e8ee918..93e509b6c00e 100644
> > > >>>> --- a/include/linux/huge_mm.h
> > > >>>> +++ b/include/linux/huge_mm.h
> > > >>>> @@ -121,6 +121,8 @@ enum mthp_stat_item {
> > > >>>>         MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> > > >>>>         MTHP_STAT_ZSWPOUT,
> > > >>>>         MTHP_STAT_SWPIN,
> > > >>>> +       MTHP_STAT_SWPIN_FALLBACK,
> > > >>>> +       MTHP_STAT_SWPIN_FALLBACK_CHARGE,
> > > >>>>         MTHP_STAT_SWPOUT,
> > > >>>>         MTHP_STAT_SWPOUT_FALLBACK,
> > > >>>>         MTHP_STAT_SHMEM_ALLOC,
> > > >>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > > >>>> index ee335d96fc39..46749dded1c9 100644
> > > >>>> --- a/mm/huge_memory.c
> > > >>>> +++ b/mm/huge_memory.c
> > > >>>> @@ -617,6 +617,8 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> > > >>>>  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> > > >>>>  DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
> > > >>>>  DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
> > > >>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
> > > >>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> > > >>>>  DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
> > > >>>>  DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
> > > >>>>  #ifdef CONFIG_SHMEM
> > > >>>> @@ -637,6 +639,8 @@ static struct attribute *anon_stats_attrs[] = {
> > > >>>>  #ifndef CONFIG_SHMEM
> > > >>>>         &zswpout_attr.attr,
> > > >>>>         &swpin_attr.attr,
> > > >>>> +       &swpin_fallback_attr.attr,
> > > >>>> +       &swpin_fallback_charge_attr.attr,
> > > >>>>         &swpout_attr.attr,
> > > >>>>         &swpout_fallback_attr.attr,
> > > >>>>  #endif
> > > >>>> @@ -669,6 +673,8 @@ static struct attribute *any_stats_attrs[] = {
> > > >>>>  #ifdef CONFIG_SHMEM
> > > >>>>         &zswpout_attr.attr,
> > > >>>>         &swpin_attr.attr,
> > > >>>> +       &swpin_fallback_attr.attr,
> > > >>>> +       &swpin_fallback_charge_attr.attr,
> > > >>>>         &swpout_attr.attr,
> > > >>>>         &swpout_fallback_attr.attr,
> > > >>>>  #endif
> > > >>>> diff --git a/mm/memory.c b/mm/memory.c
> > > >>>> index 209885a4134f..774dfd309cfe 100644
> > > >>>> --- a/mm/memory.c
> > > >>>> +++ b/mm/memory.c
> > > >>>> @@ -4189,8 +4189,10 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
> > > >>>>                         if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
> > > >>>>                                                             gfp, entry))
> > > >>>>                                 return folio;
> > > >>>> +                       count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> > > >>>>                         folio_put(folio);
> > > >>>>                 }
> > > >>>> +               count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
> > > >>>>                 order = next_order(&orders, order);
> > > >>>>         }
> > > >>>>
> > > >>>> --
> > > >>>> 2.45.0
> > > >>>>
> > > >>
>
> Thanks
> Barry
Re: [PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters
Posted by Wenchao Hao 1 year, 2 months ago
On 2024/11/24 15:28, Lance Yang wrote:
> On Sun, Nov 24, 2024 at 3:11 PM Barry Song <21cnbao@gmail.com> wrote:
>>
>> On Sun, Nov 24, 2024 at 2:56 PM Lance Yang <ioworker0@gmail.com> wrote:
>>>
>>> On Sat, Nov 23, 2024 at 9:17 PM Wenchao Hao <haowenchao22@gmail.com> wrote:
>>>>
>>>> On 2024/11/23 19:52, Lance Yang wrote:
>>>>> On Sat, Nov 23, 2024 at 6:27 PM Barry Song <21cnbao@gmail.com> wrote:
>>>>>>
>>>>>> On Sat, Nov 23, 2024 at 10:36 AM Lance Yang <ioworker0@gmail.com> wrote:
>>>>>>>
>>>>>>> Hi Wenchao,
>>>>>>>
>>>>>>> On Sat, Nov 23, 2024 at 12:14 AM Wenchao Hao <haowenchao22@gmail.com> wrote:
>>>>>>>>
>>>>>>>> Currently, large folio swap-in is supported, but we lack a method to
>>>>>>>> analyze their success ratio. Similar to anon_fault_fallback, we introduce
>>>>>>>> per-order mTHP swpin_fallback and swpin_fallback_charge counters for
>>>>>>>> calculating their success ratio. The new counters are located at:
>>>>>>>>
>>>>>>>> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats/
>>>>>>>>         swpin_fallback
>>>>>>>>         swpin_fallback_charge
>>>>>>>>
>>>>>>>> Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
>>>>>>>> ---
>>>>>>>> V2:
>>>>>>>>  Introduce swapin_fallback_charge, which increments if it fails to
>>>>>>>>  charge a huge page to memory despite successful allocation.
>>>>>>>>
>>>>>>>>  Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
>>>>>>>>  include/linux/huge_mm.h                    |  2 ++
>>>>>>>>  mm/huge_memory.c                           |  6 ++++++
>>>>>>>>  mm/memory.c                                |  2 ++
>>>>>>>>  4 files changed, 20 insertions(+)
>>>>>>>>
>>>>>>>> diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
>>>>>>>> index 5034915f4e8e..9c07612281b5 100644
>>>>>>>> --- a/Documentation/admin-guide/mm/transhuge.rst
>>>>>>>> +++ b/Documentation/admin-guide/mm/transhuge.rst
>>>>>>>> @@ -561,6 +561,16 @@ swpin
>>>>>>>>         is incremented every time a huge page is swapped in from a non-zswap
>>>>>>>>         swap device in one piece.
>>>>>>>>
>>>>>>>
>>>>>>> Would the following be better?
>>>>>>>
>>>>>>> +swpin_fallback
>>>>>>> +       is incremented if a huge page swapin fails to allocate or charge
>>>>>>> +       it and instead falls back to using small pages.
>>>>>>>
>>>>>>> +swpin_fallback_charge
>>>>>>> +       is incremented if a huge page swapin fails to charge it and instead
>>>>>>> +       falls back to using small pages even though the allocation was
>>>>>>> +       successful.
>>>>>>
>>>>>> much better, but it is better to align with "huge pages with
>>>>>> lower orders or small pages", not necessarily small pages:
>>>>>>
>>>>>> anon_fault_fallback
>>>>>> is incremented if a page fault fails to allocate or charge
>>>>>> a huge page and instead falls back to using huge pages with
>>>>>> lower orders or small pages.
>>>>>>
>>>>>> anon_fault_fallback_charge
>>>>>> is incremented if a page fault fails to charge a huge page and
>>>>>> instead falls back to using huge pages with lower orders or
>>>>>> small pages even though the allocation was successful.
>>>>>
>>>>> Right, I clearly overlooked that ;)
>>>>>
>>>>
>>>> Hi Lance and Barry,
>>>>
>>>> Do you think the following expression is clear? Compared to my original
>>>> version, I’ve removed the word “huge” from the first line, and it now
>>>> looks almost identical to anon_fault_fallback/anon_fault_fallback_charge.
>>>
>>> Well, that's fine with me. And let's see Barry's opinion as well ;)
>>
>> I still prefer Lance's version. The fallback path in it only needs to
>> be adjusted to
>> include huge pages with lower orders. In contrast, Wenchao's version feels less
>> natural to me because "page swapin" sounds quite odd - we often hear
>> "page fault,"
>> but we have never encountered "page swapin."
> 
> Yeah, it makes sense to me ~
> 
>>
>> So I mean:
>>
>>  swpin_fallback
>>         is incremented if swapin fails to allocate or charge a huge
>> page and instead
>>         falls back to using huge pages with lower orders or small pages.
>>
>>  swpin_fallback_charge
>>         is incremented if swapin fails to charge a huge page and instead
>>         falls back to using  huge pages with lower orders or small
>> pages even though
>>         the allocation was successful.
> 
> IHMO, much better and clearer than before ;)
> 

Hi,

Thank you both very much for your valuable suggestions. I am only
now able to respond to your emails due to a network issue.

I will make the revisions based on your feedback and send the third
version of the patch.

Should I include a "Reviewed-by" or any other tags?

Thanks again,
Wenchao


> Thank,
> Lance
> 
>>
>>>
>>> Thanks,
>>> Lance
>>>
>>>>
>>>> swpin_fallback
>>>>        is incremented if a page swapin fails to allocate or charge
>>>>        a huge page and instead falls back to using huge pages with
>>>>        lower orders or small pages.
>>>>
>>>> swpin_fallback_charge
>>>>        is incremented if a page swapin fails to charge a huge page and
>>>>        instead falls back to using huge pages with lower orders or
>>>>        small pages even though the allocation was successful.
>>>>
>>>> Thanks,
>>>> Wencaho
>>>>
>>>>> Thanks,
>>>>> Lance
>>>>>
>>>>>>
>>>>>>>
>>>>>>> Thanks,
>>>>>>> Lance
>>>>>>>
>>>>>>>> +swpin_fallback
>>>>>>>> +       is incremented if a huge page swapin fails to allocate or charge
>>>>>>>> +       a huge page and instead falls back to using huge pages with
>>>>>>>> +       lower orders or small pages.
>>>>>>>> +
>>>>>>>> +swpin_fallback_charge
>>>>>>>> +       is incremented if a page swapin fails to charge a huge page and
>>>>>>>> +       instead falls back to using huge pages with lower orders or
>>>>>>>> +       small pages even though the allocation was successful.
>>>>>>>> +
>>>>>>>>  swpout
>>>>>>>>         is incremented every time a huge page is swapped out to a non-zswap
>>>>>>>>         swap device in one piece without splitting.
>>>>>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
>>>>>>>> index b94c2e8ee918..93e509b6c00e 100644
>>>>>>>> --- a/include/linux/huge_mm.h
>>>>>>>> +++ b/include/linux/huge_mm.h
>>>>>>>> @@ -121,6 +121,8 @@ enum mthp_stat_item {
>>>>>>>>         MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
>>>>>>>>         MTHP_STAT_ZSWPOUT,
>>>>>>>>         MTHP_STAT_SWPIN,
>>>>>>>> +       MTHP_STAT_SWPIN_FALLBACK,
>>>>>>>> +       MTHP_STAT_SWPIN_FALLBACK_CHARGE,
>>>>>>>>         MTHP_STAT_SWPOUT,
>>>>>>>>         MTHP_STAT_SWPOUT_FALLBACK,
>>>>>>>>         MTHP_STAT_SHMEM_ALLOC,
>>>>>>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
>>>>>>>> index ee335d96fc39..46749dded1c9 100644
>>>>>>>> --- a/mm/huge_memory.c
>>>>>>>> +++ b/mm/huge_memory.c
>>>>>>>> @@ -617,6 +617,8 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
>>>>>>>>  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
>>>>>>>>  DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
>>>>>>>>  DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
>>>>>>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
>>>>>>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
>>>>>>>>  DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
>>>>>>>>  DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
>>>>>>>>  #ifdef CONFIG_SHMEM
>>>>>>>> @@ -637,6 +639,8 @@ static struct attribute *anon_stats_attrs[] = {
>>>>>>>>  #ifndef CONFIG_SHMEM
>>>>>>>>         &zswpout_attr.attr,
>>>>>>>>         &swpin_attr.attr,
>>>>>>>> +       &swpin_fallback_attr.attr,
>>>>>>>> +       &swpin_fallback_charge_attr.attr,
>>>>>>>>         &swpout_attr.attr,
>>>>>>>>         &swpout_fallback_attr.attr,
>>>>>>>>  #endif
>>>>>>>> @@ -669,6 +673,8 @@ static struct attribute *any_stats_attrs[] = {
>>>>>>>>  #ifdef CONFIG_SHMEM
>>>>>>>>         &zswpout_attr.attr,
>>>>>>>>         &swpin_attr.attr,
>>>>>>>> +       &swpin_fallback_attr.attr,
>>>>>>>> +       &swpin_fallback_charge_attr.attr,
>>>>>>>>         &swpout_attr.attr,
>>>>>>>>         &swpout_fallback_attr.attr,
>>>>>>>>  #endif
>>>>>>>> diff --git a/mm/memory.c b/mm/memory.c
>>>>>>>> index 209885a4134f..774dfd309cfe 100644
>>>>>>>> --- a/mm/memory.c
>>>>>>>> +++ b/mm/memory.c
>>>>>>>> @@ -4189,8 +4189,10 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
>>>>>>>>                         if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
>>>>>>>>                                                             gfp, entry))
>>>>>>>>                                 return folio;
>>>>>>>> +                       count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
>>>>>>>>                         folio_put(folio);
>>>>>>>>                 }
>>>>>>>> +               count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
>>>>>>>>                 order = next_order(&orders, order);
>>>>>>>>         }
>>>>>>>>
>>>>>>>> --
>>>>>>>> 2.45.0
>>>>>>>>
>>>>>>
>>
>> Thanks
>> Barry

Re: [PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters
Posted by Lance Yang 1 year, 2 months ago
On Fri, Nov 29, 2024 at 7:49 PM Wenchao Hao <haowenchao22@gmail.com> wrote:
>
> On 2024/11/24 15:28, Lance Yang wrote:
> > On Sun, Nov 24, 2024 at 3:11 PM Barry Song <21cnbao@gmail.com> wrote:
> >>
> >> On Sun, Nov 24, 2024 at 2:56 PM Lance Yang <ioworker0@gmail.com> wrote:
> >>>
> >>> On Sat, Nov 23, 2024 at 9:17 PM Wenchao Hao <haowenchao22@gmail.com> wrote:
> >>>>
> >>>> On 2024/11/23 19:52, Lance Yang wrote:
> >>>>> On Sat, Nov 23, 2024 at 6:27 PM Barry Song <21cnbao@gmail.com> wrote:
> >>>>>>
> >>>>>> On Sat, Nov 23, 2024 at 10:36 AM Lance Yang <ioworker0@gmail.com> wrote:
> >>>>>>>
> >>>>>>> Hi Wenchao,
> >>>>>>>
> >>>>>>> On Sat, Nov 23, 2024 at 12:14 AM Wenchao Hao <haowenchao22@gmail.com> wrote:
> >>>>>>>>
> >>>>>>>> Currently, large folio swap-in is supported, but we lack a method to
> >>>>>>>> analyze their success ratio. Similar to anon_fault_fallback, we introduce
> >>>>>>>> per-order mTHP swpin_fallback and swpin_fallback_charge counters for
> >>>>>>>> calculating their success ratio. The new counters are located at:
> >>>>>>>>
> >>>>>>>> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats/
> >>>>>>>>         swpin_fallback
> >>>>>>>>         swpin_fallback_charge
> >>>>>>>>
> >>>>>>>> Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
> >>>>>>>> ---
> >>>>>>>> V2:
> >>>>>>>>  Introduce swapin_fallback_charge, which increments if it fails to
> >>>>>>>>  charge a huge page to memory despite successful allocation.
> >>>>>>>>
> >>>>>>>>  Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
> >>>>>>>>  include/linux/huge_mm.h                    |  2 ++
> >>>>>>>>  mm/huge_memory.c                           |  6 ++++++
> >>>>>>>>  mm/memory.c                                |  2 ++
> >>>>>>>>  4 files changed, 20 insertions(+)
> >>>>>>>>
> >>>>>>>> diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> >>>>>>>> index 5034915f4e8e..9c07612281b5 100644
> >>>>>>>> --- a/Documentation/admin-guide/mm/transhuge.rst
> >>>>>>>> +++ b/Documentation/admin-guide/mm/transhuge.rst
> >>>>>>>> @@ -561,6 +561,16 @@ swpin
> >>>>>>>>         is incremented every time a huge page is swapped in from a non-zswap
> >>>>>>>>         swap device in one piece.
> >>>>>>>>
> >>>>>>>
> >>>>>>> Would the following be better?
> >>>>>>>
> >>>>>>> +swpin_fallback
> >>>>>>> +       is incremented if a huge page swapin fails to allocate or charge
> >>>>>>> +       it and instead falls back to using small pages.
> >>>>>>>
> >>>>>>> +swpin_fallback_charge
> >>>>>>> +       is incremented if a huge page swapin fails to charge it and instead
> >>>>>>> +       falls back to using small pages even though the allocation was
> >>>>>>> +       successful.
> >>>>>>
> >>>>>> much better, but it is better to align with "huge pages with
> >>>>>> lower orders or small pages", not necessarily small pages:
> >>>>>>
> >>>>>> anon_fault_fallback
> >>>>>> is incremented if a page fault fails to allocate or charge
> >>>>>> a huge page and instead falls back to using huge pages with
> >>>>>> lower orders or small pages.
> >>>>>>
> >>>>>> anon_fault_fallback_charge
> >>>>>> is incremented if a page fault fails to charge a huge page and
> >>>>>> instead falls back to using huge pages with lower orders or
> >>>>>> small pages even though the allocation was successful.
> >>>>>
> >>>>> Right, I clearly overlooked that ;)
> >>>>>
> >>>>
> >>>> Hi Lance and Barry,
> >>>>
> >>>> Do you think the following expression is clear? Compared to my original
> >>>> version, I’ve removed the word “huge” from the first line, and it now
> >>>> looks almost identical to anon_fault_fallback/anon_fault_fallback_charge.
> >>>
> >>> Well, that's fine with me. And let's see Barry's opinion as well ;)
> >>
> >> I still prefer Lance's version. The fallback path in it only needs to
> >> be adjusted to
> >> include huge pages with lower orders. In contrast, Wenchao's version feels less
> >> natural to me because "page swapin" sounds quite odd - we often hear
> >> "page fault,"
> >> but we have never encountered "page swapin."
> >
> > Yeah, it makes sense to me ~
> >
> >>
> >> So I mean:
> >>
> >>  swpin_fallback
> >>         is incremented if swapin fails to allocate or charge a huge
> >> page and instead
> >>         falls back to using huge pages with lower orders or small pages.
> >>
> >>  swpin_fallback_charge
> >>         is incremented if swapin fails to charge a huge page and instead
> >>         falls back to using  huge pages with lower orders or small
> >> pages even though
> >>         the allocation was successful.
> >
> > IHMO, much better and clearer than before ;)
> >
>
> Hi,
>
> Thank you both very much for your valuable suggestions. I am only
> now able to respond to your emails due to a network issue.
>
> I will make the revisions based on your feedback and send the third
> version of the patch.

Thanks for following up.

>
> Should I include a "Reviewed-by" or any other tags?

Seems like there is no RB yet. I think we just tweak the doc as Barry
suggested, resend out the new version, and then wait for the folks to
review it ;)

Thanks,
Lance

>
> Thanks again,
> Wenchao
>
>
> > Thank,
> > Lance
> >
> >>
> >>>
> >>> Thanks,
> >>> Lance
> >>>
> >>>>
> >>>> swpin_fallback
> >>>>        is incremented if a page swapin fails to allocate or charge
> >>>>        a huge page and instead falls back to using huge pages with
> >>>>        lower orders or small pages.
> >>>>
> >>>> swpin_fallback_charge
> >>>>        is incremented if a page swapin fails to charge a huge page and
> >>>>        instead falls back to using huge pages with lower orders or
> >>>>        small pages even though the allocation was successful.
> >>>>
> >>>> Thanks,
> >>>> Wencaho
> >>>>
> >>>>> Thanks,
> >>>>> Lance
> >>>>>
> >>>>>>
> >>>>>>>
> >>>>>>> Thanks,
> >>>>>>> Lance
> >>>>>>>
> >>>>>>>> +swpin_fallback
> >>>>>>>> +       is incremented if a huge page swapin fails to allocate or charge
> >>>>>>>> +       a huge page and instead falls back to using huge pages with
> >>>>>>>> +       lower orders or small pages.
> >>>>>>>> +
> >>>>>>>> +swpin_fallback_charge
> >>>>>>>> +       is incremented if a page swapin fails to charge a huge page and
> >>>>>>>> +       instead falls back to using huge pages with lower orders or
> >>>>>>>> +       small pages even though the allocation was successful.
> >>>>>>>> +
> >>>>>>>>  swpout
> >>>>>>>>         is incremented every time a huge page is swapped out to a non-zswap
> >>>>>>>>         swap device in one piece without splitting.
> >>>>>>>> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> >>>>>>>> index b94c2e8ee918..93e509b6c00e 100644
> >>>>>>>> --- a/include/linux/huge_mm.h
> >>>>>>>> +++ b/include/linux/huge_mm.h
> >>>>>>>> @@ -121,6 +121,8 @@ enum mthp_stat_item {
> >>>>>>>>         MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> >>>>>>>>         MTHP_STAT_ZSWPOUT,
> >>>>>>>>         MTHP_STAT_SWPIN,
> >>>>>>>> +       MTHP_STAT_SWPIN_FALLBACK,
> >>>>>>>> +       MTHP_STAT_SWPIN_FALLBACK_CHARGE,
> >>>>>>>>         MTHP_STAT_SWPOUT,
> >>>>>>>>         MTHP_STAT_SWPOUT_FALLBACK,
> >>>>>>>>         MTHP_STAT_SHMEM_ALLOC,
> >>>>>>>> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> >>>>>>>> index ee335d96fc39..46749dded1c9 100644
> >>>>>>>> --- a/mm/huge_memory.c
> >>>>>>>> +++ b/mm/huge_memory.c
> >>>>>>>> @@ -617,6 +617,8 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> >>>>>>>>  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> >>>>>>>>  DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
> >>>>>>>>  DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
> >>>>>>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
> >>>>>>>> +DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> >>>>>>>>  DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
> >>>>>>>>  DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
> >>>>>>>>  #ifdef CONFIG_SHMEM
> >>>>>>>> @@ -637,6 +639,8 @@ static struct attribute *anon_stats_attrs[] = {
> >>>>>>>>  #ifndef CONFIG_SHMEM
> >>>>>>>>         &zswpout_attr.attr,
> >>>>>>>>         &swpin_attr.attr,
> >>>>>>>> +       &swpin_fallback_attr.attr,
> >>>>>>>> +       &swpin_fallback_charge_attr.attr,
> >>>>>>>>         &swpout_attr.attr,
> >>>>>>>>         &swpout_fallback_attr.attr,
> >>>>>>>>  #endif
> >>>>>>>> @@ -669,6 +673,8 @@ static struct attribute *any_stats_attrs[] = {
> >>>>>>>>  #ifdef CONFIG_SHMEM
> >>>>>>>>         &zswpout_attr.attr,
> >>>>>>>>         &swpin_attr.attr,
> >>>>>>>> +       &swpin_fallback_attr.attr,
> >>>>>>>> +       &swpin_fallback_charge_attr.attr,
> >>>>>>>>         &swpout_attr.attr,
> >>>>>>>>         &swpout_fallback_attr.attr,
> >>>>>>>>  #endif
> >>>>>>>> diff --git a/mm/memory.c b/mm/memory.c
> >>>>>>>> index 209885a4134f..774dfd309cfe 100644
> >>>>>>>> --- a/mm/memory.c
> >>>>>>>> +++ b/mm/memory.c
> >>>>>>>> @@ -4189,8 +4189,10 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
> >>>>>>>>                         if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
> >>>>>>>>                                                             gfp, entry))
> >>>>>>>>                                 return folio;
> >>>>>>>> +                       count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> >>>>>>>>                         folio_put(folio);
> >>>>>>>>                 }
> >>>>>>>> +               count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
> >>>>>>>>                 order = next_order(&orders, order);
> >>>>>>>>         }
> >>>>>>>>
> >>>>>>>> --
> >>>>>>>> 2.45.0
> >>>>>>>>
> >>>>>>
> >>
> >> Thanks
> >> Barry
>
Re: [PATCH v2] mm: add per-order mTHP swap-in fallback/fallback_charge counters
Posted by Barry Song 1 year, 2 months ago
On Sat, Nov 23, 2024 at 10:36 AM Lance Yang <ioworker0@gmail.com> wrote:
>
> Hi Wenchao,
>
> On Sat, Nov 23, 2024 at 12:14 AM Wenchao Hao <haowenchao22@gmail.com> wrote:
> >
> > Currently, large folio swap-in is supported, but we lack a method to
> > analyze their success ratio. Similar to anon_fault_fallback, we introduce
> > per-order mTHP swpin_fallback and swpin_fallback_charge counters for
> > calculating their success ratio. The new counters are located at:
> >
> > /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats/
> >         swpin_fallback
> >         swpin_fallback_charge
> >
> > Signed-off-by: Wenchao Hao <haowenchao22@gmail.com>
> > ---
> > V2:
> >  Introduce swapin_fallback_charge, which increments if it fails to
> >  charge a huge page to memory despite successful allocation.
> >
> >  Documentation/admin-guide/mm/transhuge.rst | 10 ++++++++++
> >  include/linux/huge_mm.h                    |  2 ++
> >  mm/huge_memory.c                           |  6 ++++++
> >  mm/memory.c                                |  2 ++
> >  4 files changed, 20 insertions(+)
> >
> > diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> > index 5034915f4e8e..9c07612281b5 100644
> > --- a/Documentation/admin-guide/mm/transhuge.rst
> > +++ b/Documentation/admin-guide/mm/transhuge.rst
> > @@ -561,6 +561,16 @@ swpin
> >         is incremented every time a huge page is swapped in from a non-zswap
> >         swap device in one piece.
> >
>
> Would the following be better?
>
> +swpin_fallback
> +       is incremented if a huge page swapin fails to allocate or charge
> +       it and instead falls back to using small pages.
>
> +swpin_fallback_charge
> +       is incremented if a huge page swapin fails to charge it and instead
> +       falls back to using small pages even though the allocation was
> +       successful.

much better, but it is better to align with "huge pages with
lower orders or small pages."

anon_fault_fallback
is incremented if a page fault fails to allocate or charge
a huge page and instead falls back to using huge pages with
lower orders or small pages.

anon_fault_fallback_charge
is incremented if a page fault fails to charge a huge page and
instead falls back to using huge pages with lower orders or
small pages even though the allocation was successful.


>
> Thanks,
> Lance
>
> > +swpin_fallback
> > +       is incremented if a huge page swapin fails to allocate or charge
> > +       a huge page and instead falls back to using huge pages with
> > +       lower orders or small pages.
> > +
> > +swpin_fallback_charge
> > +       is incremented if a page swapin fails to charge a huge page and
> > +       instead falls back to using huge pages with lower orders or
> > +       small pages even though the allocation was successful.
> > +
> >  swpout
> >         is incremented every time a huge page is swapped out to a non-zswap
> >         swap device in one piece without splitting.
> > diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> > index b94c2e8ee918..93e509b6c00e 100644
> > --- a/include/linux/huge_mm.h
> > +++ b/include/linux/huge_mm.h
> > @@ -121,6 +121,8 @@ enum mthp_stat_item {
> >         MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
> >         MTHP_STAT_ZSWPOUT,
> >         MTHP_STAT_SWPIN,
> > +       MTHP_STAT_SWPIN_FALLBACK,
> > +       MTHP_STAT_SWPIN_FALLBACK_CHARGE,
> >         MTHP_STAT_SWPOUT,
> >         MTHP_STAT_SWPOUT_FALLBACK,
> >         MTHP_STAT_SHMEM_ALLOC,
> > diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> > index ee335d96fc39..46749dded1c9 100644
> > --- a/mm/huge_memory.c
> > +++ b/mm/huge_memory.c
> > @@ -617,6 +617,8 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
> >  DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
> >  DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
> >  DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
> > +DEFINE_MTHP_STAT_ATTR(swpin_fallback, MTHP_STAT_SWPIN_FALLBACK);
> > +DEFINE_MTHP_STAT_ATTR(swpin_fallback_charge, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> >  DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
> >  DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
> >  #ifdef CONFIG_SHMEM
> > @@ -637,6 +639,8 @@ static struct attribute *anon_stats_attrs[] = {
> >  #ifndef CONFIG_SHMEM
> >         &zswpout_attr.attr,
> >         &swpin_attr.attr,
> > +       &swpin_fallback_attr.attr,
> > +       &swpin_fallback_charge_attr.attr,
> >         &swpout_attr.attr,
> >         &swpout_fallback_attr.attr,
> >  #endif
> > @@ -669,6 +673,8 @@ static struct attribute *any_stats_attrs[] = {
> >  #ifdef CONFIG_SHMEM
> >         &zswpout_attr.attr,
> >         &swpin_attr.attr,
> > +       &swpin_fallback_attr.attr,
> > +       &swpin_fallback_charge_attr.attr,
> >         &swpout_attr.attr,
> >         &swpout_fallback_attr.attr,
> >  #endif
> > diff --git a/mm/memory.c b/mm/memory.c
> > index 209885a4134f..774dfd309cfe 100644
> > --- a/mm/memory.c
> > +++ b/mm/memory.c
> > @@ -4189,8 +4189,10 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf)
> >                         if (!mem_cgroup_swapin_charge_folio(folio, vma->vm_mm,
> >                                                             gfp, entry))
> >                                 return folio;
> > +                       count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK_CHARGE);
> >                         folio_put(folio);
> >                 }
> > +               count_mthp_stat(order, MTHP_STAT_SWPIN_FALLBACK);
> >                 order = next_order(&orders, order);
> >         }
> >
> > --
> > 2.45.0
> >

Thanks
Barry