[PATCH v1 2/2] mm: memcg: introduce new event to trace shrink_memcg

Dmitry Rokosov posted 2 patches 2 years, 1 month ago
There is a newer version of this series
[PATCH v1 2/2] mm: memcg: introduce new event to trace shrink_memcg
Posted by Dmitry Rokosov 2 years, 1 month ago
The shrink_memcg flow plays a crucial role in memcg reclamation.
Currently, it is not possible to trace this point from non-direct
reclaim paths. However, direct reclaim has its own tracepoint, so there
is no issue there. In certain cases, when debugging memcg pressure,
developers may need to identify all potential requests for memcg
reclamation including kswapd(). The patchset introduces the tracepoints
mm_vmscan_memcg_shrink_{begin|end}() to address this problem.

Example of output in the kswapd context (non-direct reclaim):
    kswapd0-39      [001] .....   240.356378: mm_vmscan_memcg_shrink_begin: memcg=test order=0 gfp_flags=GFP_KERNEL
    kswapd0-39      [001] .....   240.356396: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=0
    kswapd0-39      [001] .....   240.356420: mm_vmscan_memcg_shrink_begin: memcg=test oorder=0 gfp_flags=GFP_KERNEL
    kswapd0-39      [001] .....   240.356454: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=1
    kswapd0-39      [001] .....   240.356479: mm_vmscan_memcg_shrink_begin: memcg=test oorder=0 gfp_flags=GFP_KERNEL
    kswapd0-39      [001] .....   240.356506: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=4
    kswapd0-39      [001] .....   240.356525: mm_vmscan_memcg_shrink_begin: memcg=test oorder=0 gfp_flags=GFP_KERNEL
    kswapd0-39      [001] .....   240.356593: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=11
    kswapd0-39      [001] .....   240.356614: mm_vmscan_memcg_shrink_begin: memcg=test oorder=0 gfp_flags=GFP_KERNEL
    kswapd0-39      [001] .....   240.356738: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=25
    kswapd0-39      [001] .....   240.356790: mm_vmscan_memcg_shrink_begin: memcg=test oorder=0 gfp_flags=GFP_KERNEL
    kswapd0-39      [001] .....   240.357125: mm_vmscan_memcg_shrink_end: memcg=test nr_reclaimed=53

Signed-off-by: Dmitry Rokosov <ddrokosov@salutedevices.com>
---
 include/trace/events/vmscan.h | 14 ++++++++++++++
 mm/vmscan.c                   |  7 +++++++
 2 files changed, 21 insertions(+)

diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 124bc22866c8..518e7232c9eb 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -182,6 +182,13 @@ DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template, mm_vmscan_memcg_softlimit_r
 	TP_ARGS(memcg, order, gfp_flags)
 );
 
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_begin_template, mm_vmscan_memcg_shrink_begin,
+
+	TP_PROTO(const struct mem_cgroup *memcg, int order, gfp_t gfp_flags),
+
+	TP_ARGS(memcg, order, gfp_flags)
+);
+
 #endif /* CONFIG_MEMCG */
 
 DECLARE_EVENT_CLASS(mm_vmscan_direct_reclaim_end_template,
@@ -247,6 +254,13 @@ DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template, mm_vmscan_memcg_softlimit_rec
 	TP_ARGS(memcg, nr_reclaimed)
 );
 
+DEFINE_EVENT(mm_vmscan_memcg_reclaim_end_template, mm_vmscan_memcg_shrink_end,
+
+	TP_PROTO(const struct mem_cgroup *memcg, unsigned long nr_reclaimed),
+
+	TP_ARGS(memcg, nr_reclaimed)
+);
+
 #endif /* CONFIG_MEMCG */
 
 TRACE_EVENT(mm_shrink_slab_start,
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 4309eaf188b4..6b9619922dfb 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -6461,6 +6461,10 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 		 */
 		cond_resched();
 
+		trace_mm_vmscan_memcg_shrink_begin(memcg,
+						   sc->order,
+						   sc->gfp_mask);
+
 		mem_cgroup_calculate_protection(target_memcg, memcg);
 
 		if (mem_cgroup_below_min(target_memcg, memcg)) {
@@ -6491,6 +6495,9 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 		shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
 			    sc->priority);
 
+		trace_mm_vmscan_memcg_shrink_end(memcg,
+						 sc->nr_reclaimed - reclaimed);
+
 		/* Record the group's reclaim efficiency */
 		if (!sc->proactive)
 			vmpressure(sc->gfp_mask, memcg, false,
-- 
2.25.1
Re: [PATCH v1 2/2] mm: memcg: introduce new event to trace shrink_memcg
Posted by kernel test robot 2 years, 1 month ago
Hi Dmitry,

kernel test robot noticed the following build errors:

[auto build test ERROR on akpm-mm/mm-everything]

url:    https://github.com/intel-lab-lkp/linux/commits/Dmitry-Rokosov/mm-memcg-print-out-cgroup-name-in-the-memcg-tracepoints/20231101-183040
base:   https://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm.git mm-everything
patch link:    https://lore.kernel.org/r/20231101102837.25205-3-ddrokosov%40salutedevices.com
patch subject: [PATCH v1 2/2] mm: memcg: introduce new event to trace shrink_memcg
config: sh-allnoconfig (https://download.01.org/0day-ci/archive/20231102/202311021126.DNKIAcbq-lkp@intel.com/config)
compiler: sh4-linux-gcc (GCC) 13.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20231102/202311021126.DNKIAcbq-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202311021126.DNKIAcbq-lkp@intel.com/

All errors (new ones prefixed by >>):

   mm/vmscan.c: In function 'shrink_node_memcgs':
>> mm/vmscan.c:5811:17: error: implicit declaration of function 'trace_mm_vmscan_memcg_shrink_begin'; did you mean 'trace_mm_vmscan_lru_shrink_active'? [-Werror=implicit-function-declaration]
    5811 |                 trace_mm_vmscan_memcg_shrink_begin(memcg,
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                 trace_mm_vmscan_lru_shrink_active
   mm/vmscan.c:5845:17: error: implicit declaration of function 'trace_mm_vmscan_memcg_shrink_end'; did you mean 'trace_mm_vmscan_lru_shrink_active'? [-Werror=implicit-function-declaration]
    5845 |                 trace_mm_vmscan_memcg_shrink_end(memcg,
         |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
         |                 trace_mm_vmscan_lru_shrink_active
   cc1: some warnings being treated as errors


vim +5811 mm/vmscan.c

  5791	
  5792	static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
  5793	{
  5794		struct mem_cgroup *target_memcg = sc->target_mem_cgroup;
  5795		struct mem_cgroup *memcg;
  5796	
  5797		memcg = mem_cgroup_iter(target_memcg, NULL, NULL);
  5798		do {
  5799			struct lruvec *lruvec = mem_cgroup_lruvec(memcg, pgdat);
  5800			unsigned long reclaimed;
  5801			unsigned long scanned;
  5802	
  5803			/*
  5804			 * This loop can become CPU-bound when target memcgs
  5805			 * aren't eligible for reclaim - either because they
  5806			 * don't have any reclaimable pages, or because their
  5807			 * memory is explicitly protected. Avoid soft lockups.
  5808			 */
  5809			cond_resched();
  5810	
> 5811			trace_mm_vmscan_memcg_shrink_begin(memcg,
  5812							   sc->order,
  5813							   sc->gfp_mask);
  5814	
  5815			mem_cgroup_calculate_protection(target_memcg, memcg);
  5816	
  5817			if (mem_cgroup_below_min(target_memcg, memcg)) {
  5818				/*
  5819				 * Hard protection.
  5820				 * If there is no reclaimable memory, OOM.
  5821				 */
  5822				continue;
  5823			} else if (mem_cgroup_below_low(target_memcg, memcg)) {
  5824				/*
  5825				 * Soft protection.
  5826				 * Respect the protection only as long as
  5827				 * there is an unprotected supply
  5828				 * of reclaimable memory from other cgroups.
  5829				 */
  5830				if (!sc->memcg_low_reclaim) {
  5831					sc->memcg_low_skipped = 1;
  5832					continue;
  5833				}
  5834				memcg_memory_event(memcg, MEMCG_LOW);
  5835			}
  5836	
  5837			reclaimed = sc->nr_reclaimed;
  5838			scanned = sc->nr_scanned;
  5839	
  5840			shrink_lruvec(lruvec, sc);
  5841	
  5842			shrink_slab(sc->gfp_mask, pgdat->node_id, memcg,
  5843				    sc->priority);
  5844	
  5845			trace_mm_vmscan_memcg_shrink_end(memcg,
  5846							 sc->nr_reclaimed - reclaimed);
  5847	
  5848			/* Record the group's reclaim efficiency */
  5849			if (!sc->proactive)
  5850				vmpressure(sc->gfp_mask, memcg, false,
  5851					   sc->nr_scanned - scanned,
  5852					   sc->nr_reclaimed - reclaimed);
  5853	
  5854		} while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
  5855	}
  5856	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki