[v1] Revert "mm: skip CMA pages when they are not available"

[PATCH] Revert "mm: skip CMA pages when they are not available"

Posted by Usama Arif 1 year, 5 months ago

This reverts commit 5da226dbfce3a2f44978c2c7cf88166e69a6788b.

lruvec->lru_lock is highly contended and is held when calling
isolate_lru_folios. If the lru has a large number of CMA folios
consecutively, while the allocation type requested is not
MIGRATE_MOVABLE, isolate_lru_folios can hold the lock for a very long
time while it skips those. For FIO workload, ~150million order=0
folios were skipped to isolate a few ZONE_DMA folios [1].
This can cause lockups [1] and high memory pressure for extended periods
of time [2].

[1] https://lore.kernel.org/all/CAOUHufbkhMZYz20aM_3rHZ3OcK4m2puji2FGpUpn_-DevGk3Kg@mail.gmail.com/
[2] https://lore.kernel.org/all/ZrssOrcJIDy8hacI@gmail.com/

Signed-off-by: Usama Arif <usamaarif642@gmail.com>
---
 mm/vmscan.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5dc96a843466..78ad4a141409 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1690,8 +1690,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 		nr_pages = folio_nr_pages(folio);
 		total_scan += nr_pages;
 
-		if (folio_zonenum(folio) > sc->reclaim_idx ||
-				skip_cma(folio, sc)) {
+		if (folio_zonenum(folio) > sc->reclaim_idx) {
 			nr_skipped[folio_zonenum(folio)] += nr_pages;
 			move_to = &folios_skipped;
 			goto move;
-- 
2.43.5

Re: [PATCH] Revert "mm: skip CMA pages when they are not available"

Posted by Usama Arif 1 year, 5 months ago


On 21/08/2024 15:35, Usama Arif wrote:
> This reverts commit 5da226dbfce3a2f44978c2c7cf88166e69a6788b.
> 
> lruvec->lru_lock is highly contended and is held when calling
> isolate_lru_folios. If the lru has a large number of CMA folios
> consecutively, while the allocation type requested is not
> MIGRATE_MOVABLE, isolate_lru_folios can hold the lock for a very long
> time while it skips those. For FIO workload, ~150million order=0
> folios were skipped to isolate a few ZONE_DMA folios [1].
> This can cause lockups [1] and high memory pressure for extended periods
> of time [2].
> 
> [1] https://lore.kernel.org/all/CAOUHufbkhMZYz20aM_3rHZ3OcK4m2puji2FGpUpn_-DevGk3Kg@mail.gmail.com/
> [2] https://lore.kernel.org/all/ZrssOrcJIDy8hacI@gmail.com/
> 
> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
> ---
>  mm/vmscan.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 5dc96a843466..78ad4a141409 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -1690,8 +1690,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
>  		nr_pages = folio_nr_pages(folio);
>  		total_scan += nr_pages;
>  
> -		if (folio_zonenum(folio) > sc->reclaim_idx ||
> -				skip_cma(folio, sc)) {
> +		if (folio_zonenum(folio) > sc->reclaim_idx) {
>  			nr_skipped[folio_zonenum(folio)] += nr_pages;
>  			move_to = &folios_skipped;
>  			goto move;


Actually, I didn't build test this with other config options before sending this patch.
skip_cma is only neeeded with MGLRU after this patch, we need to move the definition of
the function to be guarded under CONFIG_LRU_GEN, so that we don't get 
‘skip_cma’ defined but not used error. Below patch should be good:


From 1aae7f04a5cb203ea2c3ede7973dd9eddbbd7a8b Mon Sep 17 00:00:00 2001
From: Usama Arif <usamaarif642@gmail.com>
Date: Wed, 21 Aug 2024 20:26:07 +0100
Subject: [PATCH] Revert "mm: skip CMA pages when they are not available"

This reverts commit 5da226dbfce3a2f44978c2c7cf88166e69a6788b.

lruvec->lru_lock is highly contended and is held when calling
isolate_lru_folios. If the lru has a large number of CMA folios
consecutively, while the allocation type requested is not
MIGRATE_MOVABLE, isolate_lru_folios can hold the lock for a very long
time while it skips those. For FIO workload, ~150million order=0
folios were skipped to isolate a few ZONE_DMA folios [1].
This can cause lockups [1] and high memory pressure for extended periods
of time [2].

[1] https://lore.kernel.org/all/CAOUHufbkhMZYz20aM_3rHZ3OcK4m2puji2FGpUpn_-DevGk3Kg@mail.gmail.com/
[2] https://lore.kernel.org/all/ZrssOrcJIDy8hacI@gmail.com/

Signed-off-by: Usama Arif <usamaarif642@gmail.com>
---
 mm/vmscan.c | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5dc96a843466..53372f726a1b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1625,25 +1625,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
 
 }
 
-#ifdef CONFIG_CMA
-/*
- * It is waste of effort to scan and reclaim CMA pages if it is not available
- * for current allocation context. Kswapd can not be enrolled as it can not
- * distinguish this scenario by using sc->gfp_mask = GFP_KERNEL
- */
-static bool skip_cma(struct folio *folio, struct scan_control *sc)
-{
-	return !current_is_kswapd() &&
-			gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
-			folio_migratetype(folio) == MIGRATE_CMA;
-}
-#else
-static bool skip_cma(struct folio *folio, struct scan_control *sc)
-{
-	return false;
-}
-#endif
-
 /*
  * Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
  *
@@ -1690,8 +1671,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 		nr_pages = folio_nr_pages(folio);
 		total_scan += nr_pages;
 
-		if (folio_zonenum(folio) > sc->reclaim_idx ||
-				skip_cma(folio, sc)) {
+		if (folio_zonenum(folio) > sc->reclaim_idx) {
 			nr_skipped[folio_zonenum(folio)] += nr_pages;
 			move_to = &folios_skipped;
 			goto move;
@@ -4297,6 +4277,25 @@ void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid)
 
 #endif /* CONFIG_MEMCG */
 
+#ifdef CONFIG_CMA
+/*
+ * It is waste of effort to scan and reclaim CMA pages if it is not available
+ * for current allocation context. Kswapd can not be enrolled as it can not
+ * distinguish this scenario by using sc->gfp_mask = GFP_KERNEL
+ */
+static bool skip_cma(struct folio *folio, struct scan_control *sc)
+{
+	return !current_is_kswapd() &&
+			gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
+			folio_migratetype(folio) == MIGRATE_CMA;
+}
+#else
+static bool skip_cma(struct folio *folio, struct scan_control *sc)
+{
+	return false;
+}
+#endif
+
 /******************************************************************************
  *                          the eviction
  ******************************************************************************/
-- 
2.43.5

Re: [PATCH] Revert "mm: skip CMA pages when they are not available"

Posted by Johannes Weiner 1 year, 5 months ago

On Wed, Aug 21, 2024 at 03:53:21PM -0400, Usama Arif wrote:
> From 1aae7f04a5cb203ea2c3ede7973dd9eddbbd7a8b Mon Sep 17 00:00:00 2001
> From: Usama Arif <usamaarif642@gmail.com>
> Date: Wed, 21 Aug 2024 20:26:07 +0100
> Subject: [PATCH] Revert "mm: skip CMA pages when they are not available"
> 
> This reverts commit 5da226dbfce3a2f44978c2c7cf88166e69a6788b.
> 
> lruvec->lru_lock is highly contended and is held when calling
> isolate_lru_folios. If the lru has a large number of CMA folios
> consecutively, while the allocation type requested is not
> MIGRATE_MOVABLE, isolate_lru_folios can hold the lock for a very long
> time while it skips those. For FIO workload, ~150million order=0
> folios were skipped to isolate a few ZONE_DMA folios [1].
> This can cause lockups [1] and high memory pressure for extended periods
> of time [2].
> 
> [1] https://lore.kernel.org/all/CAOUHufbkhMZYz20aM_3rHZ3OcK4m2puji2FGpUpn_-DevGk3Kg@mail.gmail.com/
> [2] https://lore.kernel.org/all/ZrssOrcJIDy8hacI@gmail.com/
> 
> Signed-off-by: Usama Arif <usamaarif642@gmail.com>

Acked-by: Johannes Weiner <hannes@cmpxchg.org>

I think this is the right move for now, until there is a robust
solution for the original issue.

But hould b7108d66318abf3e060c7839eabcba52e9461568 be reverted along
with it? From its changelog:

    No observable issue without this patch on MGLRU, but logically it make
    sense to skip the CMA page reclaim when those pages can't be satisfied for
    the current allocation context.

Presumably it has the same risk reward profile as it does on
conventional reclaim, with long skip runs while holding the
lruvec->lock.

Re: [PATCH] Revert "mm: skip CMA pages when they are not available"

Posted by Usama Arif 1 year, 5 months ago


On 22/08/2024 06:43, Johannes Weiner wrote:
> On Wed, Aug 21, 2024 at 03:53:21PM -0400, Usama Arif wrote:
>> From 1aae7f04a5cb203ea2c3ede7973dd9eddbbd7a8b Mon Sep 17 00:00:00 2001
>> From: Usama Arif <usamaarif642@gmail.com>
>> Date: Wed, 21 Aug 2024 20:26:07 +0100
>> Subject: [PATCH] Revert "mm: skip CMA pages when they are not available"
>>
>> This reverts commit 5da226dbfce3a2f44978c2c7cf88166e69a6788b.
>>
>> lruvec->lru_lock is highly contended and is held when calling
>> isolate_lru_folios. If the lru has a large number of CMA folios
>> consecutively, while the allocation type requested is not
>> MIGRATE_MOVABLE, isolate_lru_folios can hold the lock for a very long
>> time while it skips those. For FIO workload, ~150million order=0
>> folios were skipped to isolate a few ZONE_DMA folios [1].
>> This can cause lockups [1] and high memory pressure for extended periods
>> of time [2].
>>
>> [1] https://lore.kernel.org/all/CAOUHufbkhMZYz20aM_3rHZ3OcK4m2puji2FGpUpn_-DevGk3Kg@mail.gmail.com/
>> [2] https://lore.kernel.org/all/ZrssOrcJIDy8hacI@gmail.com/
>>
>> Signed-off-by: Usama Arif <usamaarif642@gmail.com>
> 
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> 
> I think this is the right move for now, until there is a robust
> solution for the original issue.
> 
> But hould b7108d66318abf3e060c7839eabcba52e9461568 be reverted along
> with it? From its changelog:
> 
>     No observable issue without this patch on MGLRU, but logically it make
>     sense to skip the CMA page reclaim when those pages can't be satisfied for
>     the current allocation context.
> 
> Presumably it has the same risk reward profile as it does on
> conventional reclaim, with long skip runs while holding the
> lruvec->lock.

Yes makes sense to remove it from there a well, Just doing it in a single commit below:

From 9ad9fb73edf2c04ef932d128fc2729dfd8391c0c Mon Sep 17 00:00:00 2001
From: Usama Arif <usamaarif642@gmail.com>
Date: Wed, 21 Aug 2024 20:26:07 +0100
Subject: [PATCH] Revert "mm: skip CMA pages when they are not available"

This reverts commit 5da226dbfce3a2f44978c2c7cf88166e69a6788b and
b7108d66318abf3e060c7839eabcba52e9461568.

lruvec->lru_lock is highly contended and is held when calling
isolate_lru_folios.  If the lru has a large number of CMA folios
consecutively, while the allocation type requested is not MIGRATE_MOVABLE,
isolate_lru_folios can hold the lock for a very long time while it skips
those.  For FIO workload, ~150million order=0 folios were skipped to
isolate a few ZONE_DMA folios [1].  This can cause lockups [1] and high
memory pressure for extended periods of time [2].

Remove skipping CMA for MGLRU as well, as it was introduced in
sort_folio for the same resaon as 5da226dbfce3a2f44978c2c7cf88166e69a6788b.

[1] https://lore.kernel.org/all/CAOUHufbkhMZYz20aM_3rHZ3OcK4m2puji2FGpUpn_-DevGk3Kg@mail.gmail.com/
[2] https://lore.kernel.org/all/ZrssOrcJIDy8hacI@gmail.com/

Link: https://lkml.kernel.org/r/9060a32d-b2d7-48c0-8626-1db535653c54@gmail.com
Fixes: 5da226dbfce3 ("mm: skip CMA pages when they are not available")
Signed-off-by: Usama Arif <usamaarif642@gmail.com>
Cc: Bharata B Rao <bharata@amd.com>
Cc: Breno Leitao <leitao@debian.org>
Cc: David Hildenbrand <david@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Rik van Riel <riel@surriel.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Yu Zhao <yuzhao@google.com>
Cc: Zhaoyang Huang <huangzhaoyang@gmail.com>
Cc: Zhaoyang Huang <zhaoyang.huang@unisoc.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
 mm/vmscan.c | 24 ++----------------------
 1 file changed, 2 insertions(+), 22 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index cfa839284b92..bd489c1af228 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1604,25 +1604,6 @@ static __always_inline void update_lru_sizes(struct lruvec *lruvec,
 
 }
 
-#ifdef CONFIG_CMA
-/*
- * It is waste of effort to scan and reclaim CMA pages if it is not available
- * for current allocation context. Kswapd can not be enrolled as it can not
- * distinguish this scenario by using sc->gfp_mask = GFP_KERNEL
- */
-static bool skip_cma(struct folio *folio, struct scan_control *sc)
-{
-	return !current_is_kswapd() &&
-			gfp_migratetype(sc->gfp_mask) != MIGRATE_MOVABLE &&
-			folio_migratetype(folio) == MIGRATE_CMA;
-}
-#else
-static bool skip_cma(struct folio *folio, struct scan_control *sc)
-{
-	return false;
-}
-#endif
-
 /*
  * Isolating page from the lruvec to fill in @dst list by nr_to_scan times.
  *
@@ -1669,8 +1650,7 @@ static unsigned long isolate_lru_folios(unsigned long nr_to_scan,
 		nr_pages = folio_nr_pages(folio);
 		total_scan += nr_pages;
 
-		if (folio_zonenum(folio) > sc->reclaim_idx ||
-				skip_cma(folio, sc)) {
+		if (folio_zonenum(folio) > sc->reclaim_idx) {
 			nr_skipped[folio_zonenum(folio)] += nr_pages;
 			move_to = &folios_skipped;
 			goto move;
@@ -4320,7 +4300,7 @@ static bool sort_folio(struct lruvec *lruvec, struct folio *folio, struct scan_c
 	}
 
 	/* ineligible */
-	if (zone > sc->reclaim_idx || skip_cma(folio, sc)) {
+	if (zone > sc->reclaim_idx) {
 		gen = folio_inc_gen(lruvec, folio, false);
 		list_move_tail(&folio->lru, &lrugen->folios[gen][type][zone]);
 		return true;
-- 
2.43.5