[PATCH] mm: add per-order mTHP swpin counters

Barry Song posted 1 patch 1 month ago
There is a newer version of this series
Documentation/admin-guide/mm/transhuge.rst | 3 +++
include/linux/huge_mm.h                    | 1 +
mm/huge_memory.c                           | 3 +++
mm/page_io.c                               | 3 +++
4 files changed, 10 insertions(+)
[PATCH] mm: add per-order mTHP swpin counters
Posted by Barry Song 1 month ago
From: Barry Song <v-songbaohua@oppo.com>

This helps profile the sizes of folios being swapped in. Currently,
only mTHP swap-out is being counted.

Cc: David Hildenbrand <david@redhat.com>
Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Chris Li <chrisl@kernel.org>
Cc: Yosry Ahmed <yosryahmed@google.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Kairui Song <kasong@tencent.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
Cc: Usama Arif <usamaarif642@gmail.com>
Signed-off-by: Barry Song <v-songbaohua@oppo.com>
---
 Documentation/admin-guide/mm/transhuge.rst | 3 +++
 include/linux/huge_mm.h                    | 1 +
 mm/huge_memory.c                           | 3 +++
 mm/page_io.c                               | 3 +++
 4 files changed, 10 insertions(+)

diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
index 2a171ed5206e..203ba7aaf5fc 100644
--- a/Documentation/admin-guide/mm/transhuge.rst
+++ b/Documentation/admin-guide/mm/transhuge.rst
@@ -533,6 +533,9 @@ anon_fault_fallback_charge
 zswpout
 	is incremented every time a huge page is swapped out to zswap in one
 	piece without splitting.
+swpin
+	is incremented every time a huge page is swapped in from a non-zswap
+	swap device in one piece.
 
 swpout
 	is incremented every time a huge page is swapped out to a non-zswap
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index c59e5aa9b081..b94c2e8ee918 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -120,6 +120,7 @@ enum mthp_stat_item {
 	MTHP_STAT_ANON_FAULT_FALLBACK,
 	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
 	MTHP_STAT_ZSWPOUT,
+	MTHP_STAT_SWPIN,
 	MTHP_STAT_SWPOUT,
 	MTHP_STAT_SWPOUT_FALLBACK,
 	MTHP_STAT_SHMEM_ALLOC,
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 830d6aa5bf97..846c1a43f61c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -616,6 +616,7 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC);
 DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
 DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
 DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
+DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
 DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
 DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
 #ifdef CONFIG_SHMEM
@@ -635,6 +636,7 @@ static struct attribute *anon_stats_attrs[] = {
 	&anon_fault_fallback_charge_attr.attr,
 #ifndef CONFIG_SHMEM
 	&zswpout_attr.attr,
+	&swpin_attr.attr,
 	&swpout_attr.attr,
 	&swpout_fallback_attr.attr,
 #endif
@@ -666,6 +668,7 @@ static struct attribute_group file_stats_attr_grp = {
 static struct attribute *any_stats_attrs[] = {
 #ifdef CONFIG_SHMEM
 	&zswpout_attr.attr,
+	&swpin_attr.attr,
 	&swpout_attr.attr,
 	&swpout_fallback_attr.attr,
 #endif
diff --git a/mm/page_io.c b/mm/page_io.c
index c69fab5060a1..5d9b6e6cf96c 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -487,6 +487,7 @@ static void sio_read_complete(struct kiocb *iocb, long ret)
 		for (p = 0; p < sio->pages; p++) {
 			struct folio *folio = page_folio(sio->bvec[p].bv_page);
 
+			count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN);
 			count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
 			folio_mark_uptodate(folio);
 			folio_unlock(folio);
@@ -573,6 +574,7 @@ static void swap_read_folio_bdev_sync(struct folio *folio,
 	 * attempt to access it in the page fault retry time check.
 	 */
 	get_task_struct(current);
+	count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN);
 	count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
 	count_vm_events(PSWPIN, folio_nr_pages(folio));
 	submit_bio_wait(&bio);
@@ -589,6 +591,7 @@ static void swap_read_folio_bdev_async(struct folio *folio,
 	bio->bi_iter.bi_sector = swap_folio_sector(folio);
 	bio->bi_end_io = end_swap_bio_read;
 	bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
+	count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN);
 	count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
 	count_vm_events(PSWPIN, folio_nr_pages(folio));
 	submit_bio(bio);
-- 
2.39.3 (Apple Git-146)
Re: [PATCH] mm: add per-order mTHP swpin counters
Posted by David Hildenbrand 3 weeks, 5 days ago
On 26.10.24 10:24, Barry Song wrote:
> From: Barry Song <v-songbaohua@oppo.com>
> 
> This helps profile the sizes of folios being swapped in. Currently,
> only mTHP swap-out is being counted.
> 

Acked-by: David Hildenbrand <david@redhat.com>

-- 
Cheers,

David / dhildenb
Re: [PATCH] mm: add per-order mTHP swpin counters
Posted by Huang, Ying 3 weeks, 5 days ago
Barry Song <21cnbao@gmail.com> writes:

> From: Barry Song <v-songbaohua@oppo.com>
>
> This helps profile the sizes of folios being swapped in. Currently,
> only mTHP swap-out is being counted.

Better to describe the user space interface in patch description?

[snip]

--
Best Regards,
Huang, Ying
Re: [PATCH] mm: add per-order mTHP swpin counters
Posted by Barry Song 3 weeks, 5 days ago
On Wed, Oct 30, 2024 at 4:45 PM Huang, Ying <ying.huang@intel.com> wrote:
>
> Barry Song <21cnbao@gmail.com> writes:
>
> > From: Barry Song <v-songbaohua@oppo.com>
> >
> > This helps profile the sizes of folios being swapped in. Currently,
> > only mTHP swap-out is being counted.
>
> Better to describe the user space interface in patch description?
>

Do you mean something as below?

The new interface can be found at:

/sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
         swapin

> [snip]
>
> --
> Best Regards,
> Huang, Ying
Re: [PATCH] mm: add per-order mTHP swpin counters
Posted by Huang, Ying 3 weeks, 5 days ago
Barry Song <21cnbao@gmail.com> writes:

> On Wed, Oct 30, 2024 at 4:45 PM Huang, Ying <ying.huang@intel.com> wrote:
>>
>> Barry Song <21cnbao@gmail.com> writes:
>>
>> > From: Barry Song <v-songbaohua@oppo.com>
>> >
>> > This helps profile the sizes of folios being swapped in. Currently,
>> > only mTHP swap-out is being counted.
>>
>> Better to describe the user space interface in patch description?
>>
>
> Do you mean something as below?
>
> The new interface can be found at:
>
> /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
>          swapin

Yes.  Some example output may be even better.

--
Best Regards,
Huang, Ying
Re: [PATCH] mm: add per-order mTHP swpin counters
Posted by Barry Song 3 weeks, 5 days ago
On Wed, Oct 30, 2024 at 6:29 PM Huang, Ying <ying.huang@intel.com> wrote:
>
> Barry Song <21cnbao@gmail.com> writes:
>
> > On Wed, Oct 30, 2024 at 4:45 PM Huang, Ying <ying.huang@intel.com> wrote:
> >>
> >> Barry Song <21cnbao@gmail.com> writes:
> >>
> >> > From: Barry Song <v-songbaohua@oppo.com>
> >> >
> >> > This helps profile the sizes of folios being swapped in. Currently,
> >> > only mTHP swap-out is being counted.
> >>
> >> Better to describe the user space interface in patch description?
> >>
> >
> > Do you mean something as below?
> >
> > The new interface can be found at:
> >
> > /sys/kernel/mm/transparent_hugepage/hugepages-<size>/stats
> >          swapin
>
> Yes.  Some example output may be even better.

ok. let me add some examples in changelog:

cat /sys/kernel/mm/transparent_hugepage/hugepages-64kB/stats/swpin
12809

cat /sys/kernel/mm/transparent_hugepage/hugepages-32kB/stats/swpin
4763

>
> --
> Best Regards,
> Huang, Ying
Re: [PATCH] mm: add per-order mTHP swpin counters
Posted by Baolin Wang 4 weeks ago

On 2024/10/26 16:24, Barry Song wrote:
> From: Barry Song <v-songbaohua@oppo.com>
> 
> This helps profile the sizes of folios being swapped in. Currently,
> only mTHP swap-out is being counted.
> 
> Cc: David Hildenbrand <david@redhat.com>
> Cc: Baolin Wang <baolin.wang@linux.alibaba.com>
> Cc: Chris Li <chrisl@kernel.org>
> Cc: Yosry Ahmed <yosryahmed@google.com>
> Cc: "Huang, Ying" <ying.huang@intel.com>
> Cc: Kairui Song <kasong@tencent.com>
> Cc: Ryan Roberts <ryan.roberts@arm.com>
> Cc: Kanchana P Sridhar <kanchana.p.sridhar@intel.com>
> Cc: Usama Arif <usamaarif642@gmail.com>
> Signed-off-by: Barry Song <v-songbaohua@oppo.com>

Looks reasonable to me.
Reviewed-by: Baolin Wang <baolin.wang@linux.alibaba.com>

> ---
>   Documentation/admin-guide/mm/transhuge.rst | 3 +++
>   include/linux/huge_mm.h                    | 1 +
>   mm/huge_memory.c                           | 3 +++
>   mm/page_io.c                               | 3 +++
>   4 files changed, 10 insertions(+)
> 
> diff --git a/Documentation/admin-guide/mm/transhuge.rst b/Documentation/admin-guide/mm/transhuge.rst
> index 2a171ed5206e..203ba7aaf5fc 100644
> --- a/Documentation/admin-guide/mm/transhuge.rst
> +++ b/Documentation/admin-guide/mm/transhuge.rst
> @@ -533,6 +533,9 @@ anon_fault_fallback_charge
>   zswpout
>   	is incremented every time a huge page is swapped out to zswap in one
>   	piece without splitting.
> +swpin
> +	is incremented every time a huge page is swapped in from a non-zswap
> +	swap device in one piece.
>   
>   swpout
>   	is incremented every time a huge page is swapped out to a non-zswap
> diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
> index c59e5aa9b081..b94c2e8ee918 100644
> --- a/include/linux/huge_mm.h
> +++ b/include/linux/huge_mm.h
> @@ -120,6 +120,7 @@ enum mthp_stat_item {
>   	MTHP_STAT_ANON_FAULT_FALLBACK,
>   	MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE,
>   	MTHP_STAT_ZSWPOUT,
> +	MTHP_STAT_SWPIN,
>   	MTHP_STAT_SWPOUT,
>   	MTHP_STAT_SWPOUT_FALLBACK,
>   	MTHP_STAT_SHMEM_ALLOC,
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 830d6aa5bf97..846c1a43f61c 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -616,6 +616,7 @@ DEFINE_MTHP_STAT_ATTR(anon_fault_alloc, MTHP_STAT_ANON_FAULT_ALLOC);
>   DEFINE_MTHP_STAT_ATTR(anon_fault_fallback, MTHP_STAT_ANON_FAULT_FALLBACK);
>   DEFINE_MTHP_STAT_ATTR(anon_fault_fallback_charge, MTHP_STAT_ANON_FAULT_FALLBACK_CHARGE);
>   DEFINE_MTHP_STAT_ATTR(zswpout, MTHP_STAT_ZSWPOUT);
> +DEFINE_MTHP_STAT_ATTR(swpin, MTHP_STAT_SWPIN);
>   DEFINE_MTHP_STAT_ATTR(swpout, MTHP_STAT_SWPOUT);
>   DEFINE_MTHP_STAT_ATTR(swpout_fallback, MTHP_STAT_SWPOUT_FALLBACK);
>   #ifdef CONFIG_SHMEM
> @@ -635,6 +636,7 @@ static struct attribute *anon_stats_attrs[] = {
>   	&anon_fault_fallback_charge_attr.attr,
>   #ifndef CONFIG_SHMEM
>   	&zswpout_attr.attr,
> +	&swpin_attr.attr,
>   	&swpout_attr.attr,
>   	&swpout_fallback_attr.attr,
>   #endif
> @@ -666,6 +668,7 @@ static struct attribute_group file_stats_attr_grp = {
>   static struct attribute *any_stats_attrs[] = {
>   #ifdef CONFIG_SHMEM
>   	&zswpout_attr.attr,
> +	&swpin_attr.attr,
>   	&swpout_attr.attr,
>   	&swpout_fallback_attr.attr,
>   #endif
> diff --git a/mm/page_io.c b/mm/page_io.c
> index c69fab5060a1..5d9b6e6cf96c 100644
> --- a/mm/page_io.c
> +++ b/mm/page_io.c
> @@ -487,6 +487,7 @@ static void sio_read_complete(struct kiocb *iocb, long ret)
>   		for (p = 0; p < sio->pages; p++) {
>   			struct folio *folio = page_folio(sio->bvec[p].bv_page);
>   
> +			count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN);
>   			count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
>   			folio_mark_uptodate(folio);
>   			folio_unlock(folio);
> @@ -573,6 +574,7 @@ static void swap_read_folio_bdev_sync(struct folio *folio,
>   	 * attempt to access it in the page fault retry time check.
>   	 */
>   	get_task_struct(current);
> +	count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN);
>   	count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
>   	count_vm_events(PSWPIN, folio_nr_pages(folio));
>   	submit_bio_wait(&bio);
> @@ -589,6 +591,7 @@ static void swap_read_folio_bdev_async(struct folio *folio,
>   	bio->bi_iter.bi_sector = swap_folio_sector(folio);
>   	bio->bi_end_io = end_swap_bio_read;
>   	bio_add_folio_nofail(bio, folio, folio_size(folio), 0);
> +	count_mthp_stat(folio_order(folio), MTHP_STAT_SWPIN);
>   	count_memcg_folio_events(folio, PSWPIN, folio_nr_pages(folio));
>   	count_vm_events(PSWPIN, folio_nr_pages(folio));
>   	submit_bio(bio);