include/trace/events/vmscan.h | 41 +++++++++++++++++++++++++++++++++++ mm/vmscan.c | 40 +++++++++++++++++++++++++++++----- 2 files changed, 76 insertions(+), 5 deletions(-)
The reclaim_folio_list uses a dummy reclaim_stat and is not being
used. To know the memory stat, add a new trace event. This is useful how
how many pages are not reclaimed or why.
This is an example.
mm_vmscan_reclaim_pages: nr_scanned=17 nr_reclaimed=17 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0
Currenlty reclaim_folio_list is only called by reclaim_pages, and
reclaim_pages is used by damon and madvise. In the latest Android,
reclaim_pages is also used by shmem to reclaim all pages in a
address_space.
Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com>
---
include/trace/events/vmscan.h | 41 +++++++++++++++++++++++++++++++++++
mm/vmscan.c | 40 +++++++++++++++++++++++++++++-----
2 files changed, 76 insertions(+), 5 deletions(-)
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 1a488c30afa5..509110a12fa5 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -346,6 +346,47 @@ TRACE_EVENT(mm_vmscan_write_folio,
show_reclaim_flags(__entry->reclaim_flags))
);
+TRACE_EVENT(mm_vmscan_reclaim_pages,
+
+ TP_PROTO(unsigned long nr_scanned, unsigned long nr_reclaimed,
+ struct reclaim_stat *stat),
+
+ TP_ARGS(nr_scanned, nr_reclaimed, stat),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, nr_scanned)
+ __field(unsigned long, nr_reclaimed)
+ __field(unsigned long, nr_dirty)
+ __field(unsigned long, nr_writeback)
+ __field(unsigned long, nr_congested)
+ __field(unsigned long, nr_immediate)
+ __field(unsigned int, nr_activate0)
+ __field(unsigned int, nr_activate1)
+ __field(unsigned long, nr_ref_keep)
+ __field(unsigned long, nr_unmap_fail)
+ ),
+
+ TP_fast_assign(
+ __entry->nr_scanned = nr_scanned;
+ __entry->nr_reclaimed = nr_reclaimed;
+ __entry->nr_dirty = stat->nr_dirty;
+ __entry->nr_writeback = stat->nr_writeback;
+ __entry->nr_congested = stat->nr_congested;
+ __entry->nr_immediate = stat->nr_immediate;
+ __entry->nr_activate0 = stat->nr_activate[0];
+ __entry->nr_activate1 = stat->nr_activate[1];
+ __entry->nr_ref_keep = stat->nr_ref_keep;
+ __entry->nr_unmap_fail = stat->nr_unmap_fail;
+ ),
+
+ TP_printk("nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld",
+ __entry->nr_scanned, __entry->nr_reclaimed,
+ __entry->nr_dirty, __entry->nr_writeback,
+ __entry->nr_congested, __entry->nr_immediate,
+ __entry->nr_activate0, __entry->nr_activate1,
+ __entry->nr_ref_keep, __entry->nr_unmap_fail)
+);
+
TRACE_EVENT(mm_vmscan_lru_shrink_inactive,
TP_PROTO(int nid,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 749cdc110c74..4776c42dfd2a 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2126,9 +2126,9 @@ static void shrink_active_list(unsigned long nr_to_scan,
}
static unsigned int reclaim_folio_list(struct list_head *folio_list,
- struct pglist_data *pgdat)
+ struct pglist_data *pgdat,
+ struct reclaim_stat *stat)
{
- struct reclaim_stat dummy_stat;
unsigned int nr_reclaimed;
struct folio *folio;
struct scan_control sc = {
@@ -2139,7 +2139,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
.no_demotion = 1,
};
- nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, true);
+ nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, stat, true);
while (!list_empty(folio_list)) {
folio = lru_to_folio(folio_list);
list_del(&folio->lru);
@@ -2149,16 +2149,40 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list,
return nr_reclaimed;
}
+static void reclaim_stat_add(struct reclaim_stat *stat_from,
+ struct reclaim_stat *stat_to)
+{
+ int type;
+
+ if (!trace_mm_vmscan_reclaim_pages_enabled())
+ return;
+
+ stat_to->nr_dirty += stat_from->nr_dirty;
+ stat_to->nr_unqueued_dirty += stat_from->nr_unqueued_dirty;
+ stat_to->nr_congested += stat_from->nr_congested;
+ stat_to->nr_writeback += stat_from->nr_writeback;
+ stat_to->nr_immediate += stat_from->nr_immediate;
+ stat_to->nr_pageout += stat_from->nr_pageout;
+ for (type = 0; type < ANON_AND_FILE; type++)
+ stat_to->nr_activate[type] += stat_from->nr_activate[type];
+ stat_to->nr_ref_keep += stat_from->nr_ref_keep;
+ stat_to->nr_unmap_fail += stat_from->nr_unmap_fail;
+ stat_to->nr_lazyfree_fail += stat_from->nr_lazyfree_fail;
+}
+
unsigned long reclaim_pages(struct list_head *folio_list)
{
int nid;
+ unsigned int nr_scanned = 0;
unsigned int nr_reclaimed = 0;
LIST_HEAD(node_folio_list);
unsigned int noreclaim_flag;
+ struct reclaim_stat stat_total, stat_one;
if (list_empty(folio_list))
return nr_reclaimed;
+ memset(&stat_total, 0, sizeof(stat_total));
noreclaim_flag = memalloc_noreclaim_save();
nid = folio_nid(lru_to_folio(folio_list));
@@ -2168,14 +2192,20 @@ unsigned long reclaim_pages(struct list_head *folio_list)
if (nid == folio_nid(folio)) {
folio_clear_active(folio);
list_move(&folio->lru, &node_folio_list);
+ nr_scanned += folio_nr_pages(folio);
continue;
}
- nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid));
+ nr_reclaimed += reclaim_folio_list(&node_folio_list,
+ NODE_DATA(nid), &stat_one);
+ reclaim_stat_add(&stat_one, &stat_total);
nid = folio_nid(lru_to_folio(folio_list));
} while (!list_empty(folio_list));
- nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid));
+ nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid),
+ &stat_one);
+ reclaim_stat_add(&stat_one, &stat_total);
+ trace_mm_vmscan_reclaim_pages(nr_scanned, nr_reclaimed, &stat_total);
memalloc_noreclaim_restore(noreclaim_flag);
--
2.25.1
On 10/9/24 11:31 AM, Jaewon Kim wrote: > The reclaim_folio_list uses a dummy reclaim_stat and is not being > used. To know the memory stat, add a new trace event. This is useful how > how many pages are not reclaimed or why. > > This is an example. > mm_vmscan_reclaim_pages: nr_scanned=17 nr_reclaimed=17 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 > > Currenlty reclaim_folio_list is only called by reclaim_pages, and > reclaim_pages is used by damon and madvise. In the latest Android, > reclaim_pages is also used by shmem to reclaim all pages in a > address_space. > > Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com> > --- > include/trace/events/vmscan.h | 41 +++++++++++++++++++++++++++++++++++ > mm/vmscan.c | 40 +++++++++++++++++++++++++++++----- > 2 files changed, 76 insertions(+), 5 deletions(-) > > diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h > index 1a488c30afa5..509110a12fa5 100644 > --- a/include/trace/events/vmscan.h > +++ b/include/trace/events/vmscan.h > @@ -346,6 +346,47 @@ TRACE_EVENT(mm_vmscan_write_folio, > show_reclaim_flags(__entry->reclaim_flags)) > ); > > +TRACE_EVENT(mm_vmscan_reclaim_pages, > + > + TP_PROTO(unsigned long nr_scanned, unsigned long nr_reclaimed, > + struct reclaim_stat *stat), > + > + TP_ARGS(nr_scanned, nr_reclaimed, stat), > + > + TP_STRUCT__entry( > + __field(unsigned long, nr_scanned) > + __field(unsigned long, nr_reclaimed) > + __field(unsigned long, nr_dirty) > + __field(unsigned long, nr_writeback) > + __field(unsigned long, nr_congested) > + __field(unsigned long, nr_immediate) > + __field(unsigned int, nr_activate0) > + __field(unsigned int, nr_activate1) > + __field(unsigned long, nr_ref_keep) > + __field(unsigned long, nr_unmap_fail) > + ), > + > + TP_fast_assign( > + __entry->nr_scanned = nr_scanned; > + __entry->nr_reclaimed = nr_reclaimed; > + __entry->nr_dirty = stat->nr_dirty; > + __entry->nr_writeback = stat->nr_writeback; > + __entry->nr_congested = stat->nr_congested; > + __entry->nr_immediate = stat->nr_immediate; > + __entry->nr_activate0 = stat->nr_activate[0]; > + __entry->nr_activate1 = stat->nr_activate[1]; > + __entry->nr_ref_keep = stat->nr_ref_keep; > + __entry->nr_unmap_fail = stat->nr_unmap_fail; > + ), > + > + TP_printk("nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld", > + __entry->nr_scanned, __entry->nr_reclaimed, > + __entry->nr_dirty, __entry->nr_writeback, > + __entry->nr_congested, __entry->nr_immediate, > + __entry->nr_activate0, __entry->nr_activate1, > + __entry->nr_ref_keep, __entry->nr_unmap_fail) > +); > + > TRACE_EVENT(mm_vmscan_lru_shrink_inactive, > > TP_PROTO(int nid, > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 749cdc110c74..4776c42dfd2a 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -2126,9 +2126,9 @@ static void shrink_active_list(unsigned long nr_to_scan, > } > > static unsigned int reclaim_folio_list(struct list_head *folio_list, > - struct pglist_data *pgdat) > + struct pglist_data *pgdat, > + struct reclaim_stat *stat) > { > - struct reclaim_stat dummy_stat; > unsigned int nr_reclaimed; > struct folio *folio; > struct scan_control sc = { > @@ -2139,7 +2139,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list, > .no_demotion = 1, > }; > > - nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, true); > + nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, stat, true); > while (!list_empty(folio_list)) { > folio = lru_to_folio(folio_list); > list_del(&folio->lru); > @@ -2149,16 +2149,40 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list, > return nr_reclaimed; > } > > +static void reclaim_stat_add(struct reclaim_stat *stat_from, > + struct reclaim_stat *stat_to) > +{ > + int type; > + > + if (!trace_mm_vmscan_reclaim_pages_enabled()) > + return; > + > + stat_to->nr_dirty += stat_from->nr_dirty; > + stat_to->nr_unqueued_dirty += stat_from->nr_unqueued_dirty; > + stat_to->nr_congested += stat_from->nr_congested; > + stat_to->nr_writeback += stat_from->nr_writeback; > + stat_to->nr_immediate += stat_from->nr_immediate; > + stat_to->nr_pageout += stat_from->nr_pageout; > + for (type = 0; type < ANON_AND_FILE; type++) > + stat_to->nr_activate[type] += stat_from->nr_activate[type]; > + stat_to->nr_ref_keep += stat_from->nr_ref_keep; > + stat_to->nr_unmap_fail += stat_from->nr_unmap_fail; > + stat_to->nr_lazyfree_fail += stat_from->nr_lazyfree_fail; > +} Could we avoid this by using a single stat that just accumulates over multiple calls to reclaim_folio_list()? That means shrink_folio_list() would not do the initial memset(0) and it would be caller responsibility. AFAICS shrink_folio_list() only cares about these fields: pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; in order to do count_vm_events(PGACTIVATE, pgactivate); Which could be adjusted to deal with accumulating stat - i.e. take an initial sum of the fields in stat and subtract from the final sum to get the delta. > unsigned long reclaim_pages(struct list_head *folio_list) > { > int nid; > + unsigned int nr_scanned = 0; > unsigned int nr_reclaimed = 0; > LIST_HEAD(node_folio_list); > unsigned int noreclaim_flag; > + struct reclaim_stat stat_total, stat_one; > > if (list_empty(folio_list)) > return nr_reclaimed; > > + memset(&stat_total, 0, sizeof(stat_total)); > noreclaim_flag = memalloc_noreclaim_save(); > > nid = folio_nid(lru_to_folio(folio_list)); > @@ -2168,14 +2192,20 @@ unsigned long reclaim_pages(struct list_head *folio_list) > if (nid == folio_nid(folio)) { > folio_clear_active(folio); > list_move(&folio->lru, &node_folio_list); > + nr_scanned += folio_nr_pages(folio); > continue; > } > > - nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid)); > + nr_reclaimed += reclaim_folio_list(&node_folio_list, > + NODE_DATA(nid), &stat_one); > + reclaim_stat_add(&stat_one, &stat_total); > nid = folio_nid(lru_to_folio(folio_list)); > } while (!list_empty(folio_list)); > > - nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid)); > + nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid), > + &stat_one); > + reclaim_stat_add(&stat_one, &stat_total); > + trace_mm_vmscan_reclaim_pages(nr_scanned, nr_reclaimed, &stat_total); > > memalloc_noreclaim_restore(noreclaim_flag); >
>> The reclaim_folio_list uses a dummy reclaim_stat and is not being >> used. To know the memory stat, add a new trace event. This is useful how >> how many pages are not reclaimed or why. >> >> This is an example. >> mm_vmscan_reclaim_pages: nr_scanned=17 nr_reclaimed=17 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 >> >> Currenlty reclaim_folio_list is only called by reclaim_pages, and >> reclaim_pages is used by damon and madvise. In the latest Android, >> reclaim_pages is also used by shmem to reclaim all pages in a >> address_space. >> >> Signed-off-by: Jaewon Kim <jaewon31.kim@samsung.com> >> --- >> include/trace/events/vmscan.h | 41 +++++++++++++++++++++++++++++++++++ >> mm/vmscan.c | 40 +++++++++++++++++++++++++++++----- >> 2 files changed, 76 insertions(+), 5 deletions(-) >> >> diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h >> index 1a488c30afa5..509110a12fa5 100644 >> --- a/include/trace/events/vmscan.h >> +++ b/include/trace/events/vmscan.h >> @@ -346,6 +346,47 @@ TRACE_EVENT(mm_vmscan_write_folio, >> show_reclaim_flags(__entry->reclaim_flags)) >> ); >> >> +TRACE_EVENT(mm_vmscan_reclaim_pages, >> + >> + TP_PROTO(unsigned long nr_scanned, unsigned long nr_reclaimed, >> + struct reclaim_stat *stat), >> + >> + TP_ARGS(nr_scanned, nr_reclaimed, stat), >> + >> + TP_STRUCT__entry( >> + __field(unsigned long, nr_scanned) >> + __field(unsigned long, nr_reclaimed) >> + __field(unsigned long, nr_dirty) >> + __field(unsigned long, nr_writeback) >> + __field(unsigned long, nr_congested) >> + __field(unsigned long, nr_immediate) >> + __field(unsigned int, nr_activate0) >> + __field(unsigned int, nr_activate1) >> + __field(unsigned long, nr_ref_keep) >> + __field(unsigned long, nr_unmap_fail) >> + ), >> + >> + TP_fast_assign( >> + __entry->nr_scanned = nr_scanned; >> + __entry->nr_reclaimed = nr_reclaimed; >> + __entry->nr_dirty = stat->nr_dirty; >> + __entry->nr_writeback = stat->nr_writeback; >> + __entry->nr_congested = stat->nr_congested; >> + __entry->nr_immediate = stat->nr_immediate; >> + __entry->nr_activate0 = stat->nr_activate[0]; >> + __entry->nr_activate1 = stat->nr_activate[1]; >> + __entry->nr_ref_keep = stat->nr_ref_keep; >> + __entry->nr_unmap_fail = stat->nr_unmap_fail; >> + ), >> + >> + TP_printk("nr_scanned=%ld nr_reclaimed=%ld nr_dirty=%ld nr_writeback=%ld nr_congested=%ld nr_immediate=%ld nr_activate_anon=%d nr_activate_file=%d nr_ref_keep=%ld nr_unmap_fail=%ld", >> + __entry->nr_scanned, __entry->nr_reclaimed, >> + __entry->nr_dirty, __entry->nr_writeback, >> + __entry->nr_congested, __entry->nr_immediate, >> + __entry->nr_activate0, __entry->nr_activate1, >> + __entry->nr_ref_keep, __entry->nr_unmap_fail) >> +); >> + >> TRACE_EVENT(mm_vmscan_lru_shrink_inactive, >> >> TP_PROTO(int nid, >> diff --git a/mm/vmscan.c b/mm/vmscan.c >> index 749cdc110c74..4776c42dfd2a 100644 >> --- a/mm/vmscan.c >> +++ b/mm/vmscan.c >> @@ -2126,9 +2126,9 @@ static void shrink_active_list(unsigned long nr_to_scan, >> } >> >> static unsigned int reclaim_folio_list(struct list_head *folio_list, >> - struct pglist_data *pgdat) >> + struct pglist_data *pgdat, >> + struct reclaim_stat *stat) >> { >> - struct reclaim_stat dummy_stat; >> unsigned int nr_reclaimed; >> struct folio *folio; >> struct scan_control sc = { >> @@ -2139,7 +2139,7 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list, >> .no_demotion = 1, >> }; >> >> - nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, &dummy_stat, true); >> + nr_reclaimed = shrink_folio_list(folio_list, pgdat, &sc, stat, true); >> while (!list_empty(folio_list)) { >> folio = lru_to_folio(folio_list); >> list_del(&folio->lru); >> @@ -2149,16 +2149,40 @@ static unsigned int reclaim_folio_list(struct list_head *folio_list, >> return nr_reclaimed; >> } >> >> +static void reclaim_stat_add(struct reclaim_stat *stat_from, >> + struct reclaim_stat *stat_to) >> +{ >> + int type; >> + >> + if (!trace_mm_vmscan_reclaim_pages_enabled()) >> + return; >> + >> + stat_to->nr_dirty += stat_from->nr_dirty; >> + stat_to->nr_unqueued_dirty += stat_from->nr_unqueued_dirty; >> + stat_to->nr_congested += stat_from->nr_congested; >> + stat_to->nr_writeback += stat_from->nr_writeback; >> + stat_to->nr_immediate += stat_from->nr_immediate; >> + stat_to->nr_pageout += stat_from->nr_pageout; >> + for (type = 0; type < ANON_AND_FILE; type++) >> + stat_to->nr_activate[type] += stat_from->nr_activate[type]; >> + stat_to->nr_ref_keep += stat_from->nr_ref_keep; >> + stat_to->nr_unmap_fail += stat_from->nr_unmap_fail; >> + stat_to->nr_lazyfree_fail += stat_from->nr_lazyfree_fail; >> +} > >Could we avoid this by using a single stat that just accumulates over >multiple calls to reclaim_folio_list()? > >That means shrink_folio_list() would not do the initial memset(0) and it >would be caller responsibility. Hi Thank you for your coment. Yes if it is allowed, I can do that way. When I checked, the following functions should do the memset(). reclaim_clean_pages_from_list shrink_inactive_list reclaim_folio_list evict_folios Actually I was planning to move trace_mm_vmscan_reclaim_pages into reclaim_folio_list so that we don't have to sum up and we may be able to print node number, too. As we will see log for each node, if we'd like to know the sum, that would be the post parser's job. Option 1. No change on memset, but print on each node. mm_vmscan_reclaim_pages: nid=0 nr_scanned=112 nr_reclaimed=112 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 mm_vmscan_reclaim_pages: nid=1 ... mm_vmscan_reclaim_pages: nid=2 ... Option 2. Change on memset, but we don't care the stat from each node. mm_vmscan_reclaim_pages: nr_scanned=35 nr_reclaimed=35 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 Would you give me you preference between the two options? Thank you Jaewon Kim > >AFAICS shrink_folio_list() only cares about these fields: > >pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; > >in order to do > >count_vm_events(PGACTIVATE, pgactivate); > >Which could be adjusted to deal with accumulating stat - i.e. take an >initial sum of the fields in stat and subtract from the final sum to get >the delta. > >> unsigned long reclaim_pages(struct list_head *folio_list) >> { >> int nid; >> + unsigned int nr_scanned = 0; >> unsigned int nr_reclaimed = 0; >> LIST_HEAD(node_folio_list); >> unsigned int noreclaim_flag; >> + struct reclaim_stat stat_total, stat_one; >> >> if (list_empty(folio_list)) >> return nr_reclaimed; >> >> + memset(&stat_total, 0, sizeof(stat_total)); >> noreclaim_flag = memalloc_noreclaim_save(); >> >> nid = folio_nid(lru_to_folio(folio_list)); >> @@ -2168,14 +2192,20 @@ unsigned long reclaim_pages(struct list_head *folio_list) >> if (nid == folio_nid(folio)) { >> folio_clear_active(folio); >> list_move(&folio->lru, &node_folio_list); >> + nr_scanned += folio_nr_pages(folio); >> continue; >> } >> >> - nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid)); >> + nr_reclaimed += reclaim_folio_list(&node_folio_list, >> + NODE_DATA(nid), &stat_one); >> + reclaim_stat_add(&stat_one, &stat_total); >> nid = folio_nid(lru_to_folio(folio_list)); >> } while (!list_empty(folio_list)); >> >> - nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid)); >> + nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid), >> + &stat_one); >> + reclaim_stat_add(&stat_one, &stat_total); >> + trace_mm_vmscan_reclaim_pages(nr_scanned, nr_reclaimed, &stat_total); >> >> memalloc_noreclaim_restore(noreclaim_flag); >>
On 10/11/24 10:25 AM, Jaewon Kim wrote: > Hi > > Thank you for your coment. Yes if it is allowed, I can do that way. When > I checked, the following functions should do the memset(). > > reclaim_clean_pages_from_list > shrink_inactive_list > reclaim_folio_list > evict_folios > > Actually I was planning to move trace_mm_vmscan_reclaim_pages into > reclaim_folio_list so that we don't have to sum up and we may be able > to print node number, too. As we will see log for each node, if we'd > like to know the sum, that would be the post parser's job. > > Option 1. No change on memset, but print on each node. > mm_vmscan_reclaim_pages: nid=0 nr_scanned=112 nr_reclaimed=112 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 > mm_vmscan_reclaim_pages: nid=1 ... > mm_vmscan_reclaim_pages: nid=2 ... I see. Note it processes a list that might be from multiple nodes and will group consecutive pages from the same node, but if pages come from random nodes, the nodes will repeat and there might be many trace events, each for few pages only. Guess it depends on the workload if it has its pages from the same node. Maybe you can try and see how noisy it is in practice? > Option 2. Change on memset, but we don't care the stat from each node. > mm_vmscan_reclaim_pages: nr_scanned=35 nr_reclaimed=35 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 > > Would you give me you preference between the two options? > > Thank you > Jaewon Kim > >> >> AFAICS shrink_folio_list() only cares about these fields: >> >> pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; >> >> in order to do >> >> count_vm_events(PGACTIVATE, pgactivate); >> >> Which could be adjusted to deal with accumulating stat - i.e. take an >> initial sum of the fields in stat and subtract from the final sum to get >> the delta. >> >>> unsigned long reclaim_pages(struct list_head *folio_list) >>> { >>> int nid; >>> + unsigned int nr_scanned = 0; >>> unsigned int nr_reclaimed = 0; >>> LIST_HEAD(node_folio_list); >>> unsigned int noreclaim_flag; >>> + struct reclaim_stat stat_total, stat_one; >>> >>> if (list_empty(folio_list)) >>> return nr_reclaimed; >>> >>> + memset(&stat_total, 0, sizeof(stat_total)); >>> noreclaim_flag = memalloc_noreclaim_save(); >>> >>> nid = folio_nid(lru_to_folio(folio_list)); >>> @@ -2168,14 +2192,20 @@ unsigned long reclaim_pages(struct list_head *folio_list) >>> if (nid == folio_nid(folio)) { >>> folio_clear_active(folio); >>> list_move(&folio->lru, &node_folio_list); >>> + nr_scanned += folio_nr_pages(folio); >>> continue; >>> } >>> >>> - nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid)); >>> + nr_reclaimed += reclaim_folio_list(&node_folio_list, >>> + NODE_DATA(nid), &stat_one); >>> + reclaim_stat_add(&stat_one, &stat_total); >>> nid = folio_nid(lru_to_folio(folio_list)); >>> } while (!list_empty(folio_list)); >>> >>> - nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid)); >>> + nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid), >>> + &stat_one); >>> + reclaim_stat_add(&stat_one, &stat_total); >>> + trace_mm_vmscan_reclaim_pages(nr_scanned, nr_reclaimed, &stat_total); >>> >>> memalloc_noreclaim_restore(noreclaim_flag); >>>
>On 10/11/24 10:25 AM, Jaewon Kim wrote: >> Hi >> >> Thank you for your coment. Yes if it is allowed, I can do that way. When >> I checked, the following functions should do the memset(). >> >> reclaim_clean_pages_from_list >> shrink_inactive_list >> reclaim_folio_list >> evict_folios >> >> Actually I was planning to move trace_mm_vmscan_reclaim_pages into >> reclaim_folio_list so that we don't have to sum up and we may be able >> to print node number, too. As we will see log for each node, if we'd >> like to know the sum, that would be the post parser's job. >> >> Option 1. No change on memset, but print on each node. >> mm_vmscan_reclaim_pages: nid=0 nr_scanned=112 nr_reclaimed=112 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 >> mm_vmscan_reclaim_pages: nid=1 ... >> mm_vmscan_reclaim_pages: nid=2 ... > >I see. Note it processes a list that might be from multiple nodes and >will group consecutive pages from the same node, but if pages come from >random nodes, the nodes will repeat and there might be many trace >events, each for few pages only. > >Guess it depends on the workload if it has its pages from the same node. >Maybe you can try and see how noisy it is in practice? > Hi Actually my Android test device has only one node, so I cannot test several nodes cases. But it shows quite many of mm_vmscan_reclaim_pages log when I do the Android shmem based call to reclaim_pages. I have to do the post parsing or make a fancy ftrace hist command. I think madvise and damon, which are the other caller to reclaim_pages, are not interested in showing trace log for each node. I'm just worried changing policy, memset(0) is the caller responsibility, could be error-prone if we forget someday. So if possible, let me take the option 1. To show a clean code, let me submit the v2 patch. Thank you. >> Option 2. Change on memset, but we don't care the stat from each node. >> mm_vmscan_reclaim_pages: nr_scanned=35 nr_reclaimed=35 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 >> >> Would you give me you preference between the two options? >> >> Thank you >> Jaewon Kim >> >>> >>> AFAICS shrink_folio_list() only cares about these fields: >>> >>> pgactivate = stat->nr_activate[0] + stat->nr_activate[1]; >>> >>> in order to do >>> >>> count_vm_events(PGACTIVATE, pgactivate); >>> >>> Which could be adjusted to deal with accumulating stat - i.e. take an >>> initial sum of the fields in stat and subtract from the final sum to get >>> the delta. >>> >>>> unsigned long reclaim_pages(struct list_head *folio_list) >>>> { >>>> int nid; >>>> + unsigned int nr_scanned = 0; >>>> unsigned int nr_reclaimed = 0; >>>> LIST_HEAD(node_folio_list); >>>> unsigned int noreclaim_flag; >>>> + struct reclaim_stat stat_total, stat_one; >>>> >>>> if (list_empty(folio_list)) >>>> return nr_reclaimed; >>>> >>>> + memset(&stat_total, 0, sizeof(stat_total)); >>>> noreclaim_flag = memalloc_noreclaim_save(); >>>> >>>> nid = folio_nid(lru_to_folio(folio_list)); >>>> @@ -2168,14 +2192,20 @@ unsigned long reclaim_pages(struct list_head *folio_list) >>>> if (nid == folio_nid(folio)) { >>>> folio_clear_active(folio); >>>> list_move(&folio->lru, &node_folio_list); >>>> + nr_scanned += folio_nr_pages(folio); >>>> continue; >>>> } >>>> >>>> - nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid)); >>>> + nr_reclaimed += reclaim_folio_list(&node_folio_list, >>>> + NODE_DATA(nid), &stat_one); >>>> + reclaim_stat_add(&stat_one, &stat_total); >>>> nid = folio_nid(lru_to_folio(folio_list)); >>>> } while (!list_empty(folio_list)); >>>> >>>> - nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid)); >>>> + nr_reclaimed += reclaim_folio_list(&node_folio_list, NODE_DATA(nid), >>>> + &stat_one); >>>> + reclaim_stat_add(&stat_one, &stat_total); >>>> + trace_mm_vmscan_reclaim_pages(nr_scanned, nr_reclaimed, &stat_total); >>>> >>>> memalloc_noreclaim_restore(noreclaim_flag); >>>>
On Wed, 9 Oct 2024 18:31:24 +0900 Jaewon Kim <jaewon31.kim@samsung.com> wrote: > The reclaim_folio_list uses a dummy reclaim_stat and is not being > used. To know the memory stat, add a new trace event. This is useful how > how many pages are not reclaimed or why. > > This is an example. > mm_vmscan_reclaim_pages: nr_scanned=17 nr_reclaimed=17 nr_dirty=0 nr_writeback=0 nr_congested=0 nr_immediate=0 nr_activate_anon=0 nr_activate_file=0 nr_ref_keep=0 nr_unmap_fail=0 > > Currenlty reclaim_folio_list is only called by reclaim_pages, and > reclaim_pages is used by damon and madvise. In the latest Android, > reclaim_pages is also used by shmem to reclaim all pages in a > address_space. > This looks like it will add some overhead when tracing has been enabled. Has this been measured and is it significant? Also, we're adding a significant amount of code for a simple trace record. Do others think this is justifiable?
© 2016 - 2024 Red Hat, Inc.