[PATCH v3 24/30] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock

Qi Zheng posted 30 patches 3 weeks, 4 days ago
There is a newer version of this series
[PATCH v3 24/30] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
Posted by Qi Zheng 3 weeks, 4 days ago
From: Muchun Song <songmuchun@bytedance.com>

The following diagram illustrates how to ensure the safety of the folio
lruvec lock when LRU folios undergo reparenting.

In the folio_lruvec_lock(folio) function:
```
    rcu_read_lock();
retry:
    lruvec = folio_lruvec(folio);
    /* There is a possibility of folio reparenting at this point. */
    spin_lock(&lruvec->lru_lock);
    if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
        /*
         * The wrong lruvec lock was acquired, and a retry is required.
         * This is because the folio resides on the parent memcg lruvec
         * list.
         */
        spin_unlock(&lruvec->lru_lock);
        goto retry;
    }

    /* Reaching here indicates that folio_memcg() is stable. */
```

In the memcg_reparent_objcgs(memcg) function:
```
    spin_lock(&lruvec->lru_lock);
    spin_lock(&lruvec_parent->lru_lock);
    /* Transfer folios from the lruvec list to the parent's. */
    spin_unlock(&lruvec_parent->lru_lock);
    spin_unlock(&lruvec->lru_lock);
```

After acquiring the lruvec lock, it is necessary to verify whether
the folio has been reparented. If reparenting has occurred, the new
lruvec lock must be reacquired. During the LRU folio reparenting
process, the lruvec lock will also be acquired (this will be
implemented in a subsequent patch). Therefore, folio_memcg() remains
unchanged while the lruvec lock is held.

Given that lruvec_memcg(lruvec) is always equal to folio_memcg(folio)
after the lruvec lock is acquired, the lruvec_memcg_debug() check is
redundant. Hence, it is removed.

This patch serves as a preparation for the reparenting of LRU folios.

Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/memcontrol.h | 45 +++++++++++++++++++----------
 include/linux/swap.h       |  1 +
 mm/compaction.c            | 29 +++++++++++++++----
 mm/memcontrol.c            | 59 +++++++++++++++++++++-----------------
 mm/swap.c                  |  4 +++
 5 files changed, 91 insertions(+), 47 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4b6f20dc694ba..26c3c0e375f58 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -742,7 +742,15 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
  * folio_lruvec - return lruvec for isolating/putting an LRU folio
  * @folio: Pointer to the folio.
  *
- * This function relies on folio->mem_cgroup being stable.
+ * Call with rcu_read_lock() held to ensure the lifetime of the returned lruvec.
+ * Note that this alone will NOT guarantee the stability of the folio->lruvec
+ * association; the folio can be reparented to an ancestor if this races with
+ * cgroup deletion.
+ *
+ * Use folio_lruvec_lock() to ensure both lifetime and stability of the binding.
+ * Once a lruvec is locked, folio_lruvec() can be called on other folios, and
+ * their binding is stable if the returned lruvec matches the one the caller has
+ * locked. Useful for lock batching.
  */
 static inline struct lruvec *folio_lruvec(struct folio *folio)
 {
@@ -761,18 +769,15 @@ struct mem_cgroup *get_mem_cgroup_from_current(void);
 struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio);
 
 struct lruvec *folio_lruvec_lock(struct folio *folio);
+	__acquires(&lruvec->lru_lock)
+	__acquires(rcu)
 struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
+	__acquires(&lruvec->lru_lock)
+	__acquires(rcu)
 struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
 						unsigned long *flags);
-
-#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
-#else
-static inline
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
-}
-#endif
+	__acquires(&lruvec->lru_lock)
+	__acquires(rcu)
 
 static inline
 struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
@@ -1199,11 +1204,6 @@ static inline struct lruvec *folio_lruvec(struct folio *folio)
 	return &pgdat->__lruvec;
 }
 
-static inline
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
-}
-
 static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 {
 	return NULL;
@@ -1262,6 +1262,7 @@ static inline struct lruvec *folio_lruvec_lock(struct folio *folio)
 {
 	struct pglist_data *pgdat = folio_pgdat(folio);
 
+	rcu_read_lock();
 	spin_lock(&pgdat->__lruvec.lru_lock);
 	return &pgdat->__lruvec;
 }
@@ -1270,6 +1271,7 @@ static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
 {
 	struct pglist_data *pgdat = folio_pgdat(folio);
 
+	rcu_read_lock();
 	spin_lock_irq(&pgdat->__lruvec.lru_lock);
 	return &pgdat->__lruvec;
 }
@@ -1279,6 +1281,7 @@ static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
 {
 	struct pglist_data *pgdat = folio_pgdat(folio);
 
+	rcu_read_lock();
 	spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
 	return &pgdat->__lruvec;
 }
@@ -1500,24 +1503,36 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
 }
 
 static inline void lruvec_lock_irq(struct lruvec *lruvec)
+	__acquires(&lruvec->lru_lock)
+	__acquires(rcu)
 {
+	rcu_read_lock();
 	spin_lock_irq(&lruvec->lru_lock);
 }
 
 static inline void lruvec_unlock(struct lruvec *lruvec)
+	__releases(&lruvec->lru_lock)
+	__releases(rcu)
 {
 	spin_unlock(&lruvec->lru_lock);
+	rcu_read_unlock();
 }
 
 static inline void lruvec_unlock_irq(struct lruvec *lruvec)
+	__releases(&lruvec->lru_lock)
+	__releases(rcu)
 {
 	spin_unlock_irq(&lruvec->lru_lock);
+	rcu_read_unlock();
 }
 
 static inline void lruvec_unlock_irqrestore(struct lruvec *lruvec,
 		unsigned long flags)
+	__releases(&lruvec->lru_lock)
+	__releases(rcu)
 {
 	spin_unlock_irqrestore(&lruvec->lru_lock, flags);
+	rcu_read_unlock();
 }
 
 /* Test requires a stable folio->memcg binding, see folio_memcg() */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 62fc7499b4089..e60f45b48e74d 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -330,6 +330,7 @@ extern unsigned long totalreserve_pages;
 void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
 		unsigned int nr_io, unsigned int nr_rotated)
 		__releases(lruvec->lru_lock);
+		__releases(rcu)
 void lru_note_cost_refault(struct folio *);
 void folio_add_lru(struct folio *);
 void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
diff --git a/mm/compaction.c b/mm/compaction.c
index c3e338aaa0ffb..3648ce22c8072 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -518,6 +518,24 @@ static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
 	return true;
 }
 
+static struct lruvec *
+compact_folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flags,
+				  struct compact_control *cc)
+{
+	struct lruvec *lruvec;
+
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
+	compact_lock_irqsave(&lruvec->lru_lock, flags, cc);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
+		goto retry;
+	}
+
+	return lruvec;
+}
+
 /*
  * Compaction requires the taking of some coarse locks that are potentially
  * very heavily contended. The lock should be periodically unlocked to avoid
@@ -839,7 +857,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 {
 	pg_data_t *pgdat = cc->zone->zone_pgdat;
 	unsigned long nr_scanned = 0, nr_isolated = 0;
-	struct lruvec *lruvec;
+	struct lruvec *lruvec = NULL;
 	unsigned long flags = 0;
 	struct lruvec *locked = NULL;
 	struct folio *folio = NULL;
@@ -1153,18 +1171,17 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		if (!folio_test_clear_lru(folio))
 			goto isolate_fail_put;
 
-		lruvec = folio_lruvec(folio);
+		if (locked)
+			lruvec = folio_lruvec(folio);
 
 		/* If we already hold the lock, we can skip some rechecking */
-		if (lruvec != locked) {
+		if (lruvec != locked || !locked) {
 			if (locked)
 				lruvec_unlock_irqrestore(locked, flags);
 
-			compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
+			lruvec = compact_folio_lruvec_lock_irqsave(folio, &flags, cc);
 			locked = lruvec;
 
-			lruvec_memcg_debug(lruvec, folio);
-
 			/*
 			 * Try get exclusive access under lock. If marked for
 			 * skip, the scan is aborted unless the current context
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 548e67dbf2386..a1573600d4188 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1201,23 +1201,6 @@ void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
 	}
 }
 
-#ifdef CONFIG_DEBUG_VM
-void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
-{
-	struct mem_cgroup *memcg;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	memcg = folio_memcg(folio);
-
-	if (!memcg)
-		VM_BUG_ON_FOLIO(!mem_cgroup_is_root(lruvec_memcg(lruvec)), folio);
-	else
-		VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio);
-}
-#endif
-
 /**
  * folio_lruvec_lock - Lock the lruvec for a folio.
  * @folio: Pointer to the folio.
@@ -1227,14 +1210,22 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
  * - folio_test_lru false
  * - folio frozen (refcount of 0)
  *
- * Return: The lruvec this folio is on with its lock held.
+ * Return: The lruvec this folio is on with its lock held and rcu read lock held.
  */
 struct lruvec *folio_lruvec_lock(struct folio *folio)
+	__acquires(&lruvec->lru_lock)
+	__acquires(rcu)
 {
-	struct lruvec *lruvec = folio_lruvec(folio);
+	struct lruvec *lruvec;
 
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
 	spin_lock(&lruvec->lru_lock);
-	lruvec_memcg_debug(lruvec, folio);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock(&lruvec->lru_lock);
+		goto retry;
+	}
 
 	return lruvec;
 }
@@ -1249,14 +1240,22 @@ struct lruvec *folio_lruvec_lock(struct folio *folio)
  * - folio frozen (refcount of 0)
  *
  * Return: The lruvec this folio is on with its lock held and interrupts
- * disabled.
+ * disabled and rcu read lock held.
  */
 struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
+	__acquires(&lruvec->lru_lock)
+	__acquires(rcu)
 {
-	struct lruvec *lruvec = folio_lruvec(folio);
+	struct lruvec *lruvec;
 
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
 	spin_lock_irq(&lruvec->lru_lock);
-	lruvec_memcg_debug(lruvec, folio);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock_irq(&lruvec->lru_lock);
+		goto retry;
+	}
 
 	return lruvec;
 }
@@ -1272,15 +1271,23 @@ struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
  * - folio frozen (refcount of 0)
  *
  * Return: The lruvec this folio is on with its lock held and interrupts
- * disabled.
+ * disabled and rcu read lock held.
  */
 struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
 		unsigned long *flags)
+	__acquires(&lruvec->lru_lock)
+	__acquires(rcu)
 {
-	struct lruvec *lruvec = folio_lruvec(folio);
+	struct lruvec *lruvec;
 
+	rcu_read_lock();
+retry:
+	lruvec = folio_lruvec(folio);
 	spin_lock_irqsave(&lruvec->lru_lock, *flags);
-	lruvec_memcg_debug(lruvec, folio);
+	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
+		spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
+		goto retry;
+	}
 
 	return lruvec;
 }
diff --git a/mm/swap.c b/mm/swap.c
index cb1148a92d8ec..7e53479ca1732 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -240,6 +240,7 @@ void folio_rotate_reclaimable(struct folio *folio)
 void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
 		unsigned int nr_io, unsigned int nr_rotated)
 		__releases(lruvec->lru_lock)
+		__releases(rcu)
 {
 	unsigned long cost;
 
@@ -253,6 +254,7 @@ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
 	cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated;
 	if (!cost) {
 		spin_unlock_irq(&lruvec->lru_lock);
+		rcu_read_unlock();
 		return;
 	}
 
@@ -284,9 +286,11 @@ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
 		}
 
 		spin_unlock_irq(&lruvec->lru_lock);
+		rcu_read_unlock();
 		lruvec = parent_lruvec(lruvec);
 		if (!lruvec)
 			break;
+		rcu_read_lock();
 		spin_lock_irq(&lruvec->lru_lock);
 	}
 }
-- 
2.20.1
Re: [PATCH v3 24/30] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
Posted by Harry Yoo 2 weeks, 6 days ago
On Wed, Jan 14, 2026 at 07:32:51PM +0800, Qi Zheng wrote:
> From: Muchun Song <songmuchun@bytedance.com>
> 
> The following diagram illustrates how to ensure the safety of the folio
> lruvec lock when LRU folios undergo reparenting.
> 
> In the folio_lruvec_lock(folio) function:
> ```
>     rcu_read_lock();
> retry:
>     lruvec = folio_lruvec(folio);
>     /* There is a possibility of folio reparenting at this point. */
>     spin_lock(&lruvec->lru_lock);
>     if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
>         /*
>          * The wrong lruvec lock was acquired, and a retry is required.
>          * This is because the folio resides on the parent memcg lruvec
>          * list.
>          */
>         spin_unlock(&lruvec->lru_lock);
>         goto retry;
>     }
> 
>     /* Reaching here indicates that folio_memcg() is stable. */
> ```
> 
> In the memcg_reparent_objcgs(memcg) function:
> ```
>     spin_lock(&lruvec->lru_lock);
>     spin_lock(&lruvec_parent->lru_lock);
>     /* Transfer folios from the lruvec list to the parent's. */
>     spin_unlock(&lruvec_parent->lru_lock);
>     spin_unlock(&lruvec->lru_lock);
> ```
> 
> After acquiring the lruvec lock, it is necessary to verify whether
> the folio has been reparented. If reparenting has occurred, the new
> lruvec lock must be reacquired. During the LRU folio reparenting
> process, the lruvec lock will also be acquired (this will be
> implemented in a subsequent patch). Therefore, folio_memcg() remains
> unchanged while the lruvec lock is held.
> 
> Given that lruvec_memcg(lruvec) is always equal to folio_memcg(folio)
> after the lruvec lock is acquired, the lruvec_memcg_debug() check is
> redundant. Hence, it is removed.
> 
> This patch serves as a preparation for the reparenting of LRU folios.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
>  include/linux/memcontrol.h | 45 +++++++++++++++++++----------
>  include/linux/swap.h       |  1 +
>  mm/compaction.c            | 29 +++++++++++++++----
>  mm/memcontrol.c            | 59 +++++++++++++++++++++-----------------
>  mm/swap.c                  |  4 +++
>  5 files changed, 91 insertions(+), 47 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 4b6f20dc694ba..26c3c0e375f58 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -742,7 +742,15 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
>   * folio_lruvec - return lruvec for isolating/putting an LRU folio
>   * @folio: Pointer to the folio.
>   *
> - * This function relies on folio->mem_cgroup being stable.
> + * Call with rcu_read_lock() held to ensure the lifetime of the returned lruvec.
> + * Note that this alone will NOT guarantee the stability of the folio->lruvec
> + * association; the folio can be reparented to an ancestor if this races with
> + * cgroup deletion.
> + *
> + * Use folio_lruvec_lock() to ensure both lifetime and stability of the binding.
> + * Once a lruvec is locked, folio_lruvec() can be called on other folios, and
> + * their binding is stable if the returned lruvec matches the one the caller has
> + * locked. Useful for lock batching.
>   */
>  static inline struct lruvec *folio_lruvec(struct folio *folio)
>  {
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 548e67dbf2386..a1573600d4188 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> diff --git a/mm/swap.c b/mm/swap.c
> index cb1148a92d8ec..7e53479ca1732 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -284,9 +286,11 @@ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
>  		}
>  
>  		spin_unlock_irq(&lruvec->lru_lock);
> +		rcu_read_unlock();
>  		lruvec = parent_lruvec(lruvec);

It looks bit weird to call parent_lruvec(lruvec) outside RCU read lock
because the reason why it holds RCU read lock is to prevent release of
memory cgroup and its lruvec.

I guess this isn't broken (for now) because all callers of
lru_note_cost_unlock_irq() are holding a reference to the memcg?

>  		if (!lruvec)
>  			break;
> +		rcu_read_lock();
>  		spin_lock_irq(&lruvec->lru_lock);
>  	}
>  }

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH v3 24/30] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
Posted by Qi Zheng 2 weeks, 5 days ago

On 1/20/26 4:21 PM, Harry Yoo wrote:
> On Wed, Jan 14, 2026 at 07:32:51PM +0800, Qi Zheng wrote:
>> From: Muchun Song <songmuchun@bytedance.com>
>>
>> The following diagram illustrates how to ensure the safety of the folio
>> lruvec lock when LRU folios undergo reparenting.
>>
>> In the folio_lruvec_lock(folio) function:
>> ```
>>      rcu_read_lock();
>> retry:
>>      lruvec = folio_lruvec(folio);
>>      /* There is a possibility of folio reparenting at this point. */
>>      spin_lock(&lruvec->lru_lock);
>>      if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
>>          /*
>>           * The wrong lruvec lock was acquired, and a retry is required.
>>           * This is because the folio resides on the parent memcg lruvec
>>           * list.
>>           */
>>          spin_unlock(&lruvec->lru_lock);
>>          goto retry;
>>      }
>>
>>      /* Reaching here indicates that folio_memcg() is stable. */
>> ```
>>
>> In the memcg_reparent_objcgs(memcg) function:
>> ```
>>      spin_lock(&lruvec->lru_lock);
>>      spin_lock(&lruvec_parent->lru_lock);
>>      /* Transfer folios from the lruvec list to the parent's. */
>>      spin_unlock(&lruvec_parent->lru_lock);
>>      spin_unlock(&lruvec->lru_lock);
>> ```
>>
>> After acquiring the lruvec lock, it is necessary to verify whether
>> the folio has been reparented. If reparenting has occurred, the new
>> lruvec lock must be reacquired. During the LRU folio reparenting
>> process, the lruvec lock will also be acquired (this will be
>> implemented in a subsequent patch). Therefore, folio_memcg() remains
>> unchanged while the lruvec lock is held.
>>
>> Given that lruvec_memcg(lruvec) is always equal to folio_memcg(folio)
>> after the lruvec lock is acquired, the lruvec_memcg_debug() check is
>> redundant. Hence, it is removed.
>>
>> This patch serves as a preparation for the reparenting of LRU folios.
>>
>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
>> ---
>>   include/linux/memcontrol.h | 45 +++++++++++++++++++----------
>>   include/linux/swap.h       |  1 +
>>   mm/compaction.c            | 29 +++++++++++++++----
>>   mm/memcontrol.c            | 59 +++++++++++++++++++++-----------------
>>   mm/swap.c                  |  4 +++
>>   5 files changed, 91 insertions(+), 47 deletions(-)
>>
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index 4b6f20dc694ba..26c3c0e375f58 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -742,7 +742,15 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
>>    * folio_lruvec - return lruvec for isolating/putting an LRU folio
>>    * @folio: Pointer to the folio.
>>    *
>> - * This function relies on folio->mem_cgroup being stable.
>> + * Call with rcu_read_lock() held to ensure the lifetime of the returned lruvec.
>> + * Note that this alone will NOT guarantee the stability of the folio->lruvec
>> + * association; the folio can be reparented to an ancestor if this races with
>> + * cgroup deletion.
>> + *
>> + * Use folio_lruvec_lock() to ensure both lifetime and stability of the binding.
>> + * Once a lruvec is locked, folio_lruvec() can be called on other folios, and
>> + * their binding is stable if the returned lruvec matches the one the caller has
>> + * locked. Useful for lock batching.
>>    */
>>   static inline struct lruvec *folio_lruvec(struct folio *folio)
>>   {
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 548e67dbf2386..a1573600d4188 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> diff --git a/mm/swap.c b/mm/swap.c
>> index cb1148a92d8ec..7e53479ca1732 100644
>> --- a/mm/swap.c
>> +++ b/mm/swap.c
>> @@ -284,9 +286,11 @@ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
>>   		}
>>   
>>   		spin_unlock_irq(&lruvec->lru_lock);
>> +		rcu_read_unlock();
>>   		lruvec = parent_lruvec(lruvec);
> 
> It looks bit weird to call parent_lruvec(lruvec) outside RCU read lock
> because the reason why it holds RCU read lock is to prevent release of
> memory cgroup and its lruvec.
> 
> I guess this isn't broken (for now) because all callers of
> lru_note_cost_unlock_irq() are holding a reference to the memcg?

I checked all the callers again, and they do indeed hold the refcnt
for the memcg, so it's safe for now. But it seems rather fragile,
perhaps we should also include parent_lruvec() within the RCU lock.

> 
>>   		if (!lruvec)
>>   			break;
>> +		rcu_read_lock();
>>   		spin_lock_irq(&lruvec->lru_lock);
>>   	}
>>   }
>
Re: [PATCH v3 24/30] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
Posted by Harry Yoo 2 weeks, 5 days ago
On Tue, Jan 20, 2026 at 07:51:29PM +0800, Qi Zheng wrote:
> 
> 
> On 1/20/26 4:21 PM, Harry Yoo wrote:
> > On Wed, Jan 14, 2026 at 07:32:51PM +0800, Qi Zheng wrote:
> > > From: Muchun Song <songmuchun@bytedance.com>
> > > 
> > > The following diagram illustrates how to ensure the safety of the folio
> > > lruvec lock when LRU folios undergo reparenting.
> > > 
> > > In the folio_lruvec_lock(folio) function:
> > > ```
> > >      rcu_read_lock();
> > > retry:
> > >      lruvec = folio_lruvec(folio);
> > >      /* There is a possibility of folio reparenting at this point. */
> > >      spin_lock(&lruvec->lru_lock);
> > >      if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
> > >          /*
> > >           * The wrong lruvec lock was acquired, and a retry is required.
> > >           * This is because the folio resides on the parent memcg lruvec
> > >           * list.
> > >           */
> > >          spin_unlock(&lruvec->lru_lock);
> > >          goto retry;
> > >      }
> > > 
> > >      /* Reaching here indicates that folio_memcg() is stable. */
> > > ```
> > > 
> > > In the memcg_reparent_objcgs(memcg) function:
> > > ```
> > >      spin_lock(&lruvec->lru_lock);
> > >      spin_lock(&lruvec_parent->lru_lock);
> > >      /* Transfer folios from the lruvec list to the parent's. */
> > >      spin_unlock(&lruvec_parent->lru_lock);
> > >      spin_unlock(&lruvec->lru_lock);
> > > ```
> > > 
> > > After acquiring the lruvec lock, it is necessary to verify whether
> > > the folio has been reparented. If reparenting has occurred, the new
> > > lruvec lock must be reacquired. During the LRU folio reparenting
> > > process, the lruvec lock will also be acquired (this will be
> > > implemented in a subsequent patch). Therefore, folio_memcg() remains
> > > unchanged while the lruvec lock is held.
> > > 
> > > Given that lruvec_memcg(lruvec) is always equal to folio_memcg(folio)
> > > after the lruvec lock is acquired, the lruvec_memcg_debug() check is
> > > redundant. Hence, it is removed.
> > > 
> > > This patch serves as a preparation for the reparenting of LRU folios.
> > > 
> > > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > > Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> > > Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> > > ---
> > >   include/linux/memcontrol.h | 45 +++++++++++++++++++----------
> > >   include/linux/swap.h       |  1 +
> > >   mm/compaction.c            | 29 +++++++++++++++----
> > >   mm/memcontrol.c            | 59 +++++++++++++++++++++-----------------
> > >   mm/swap.c                  |  4 +++
> > >   5 files changed, 91 insertions(+), 47 deletions(-)
> > > 
> > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > > index 4b6f20dc694ba..26c3c0e375f58 100644
> > > --- a/include/linux/memcontrol.h
> > > +++ b/include/linux/memcontrol.h
> > > @@ -742,7 +742,15 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
> > >    * folio_lruvec - return lruvec for isolating/putting an LRU folio
> > >    * @folio: Pointer to the folio.
> > >    *
> > > - * This function relies on folio->mem_cgroup being stable.
> > > + * Call with rcu_read_lock() held to ensure the lifetime of the returned lruvec.
> > > + * Note that this alone will NOT guarantee the stability of the folio->lruvec
> > > + * association; the folio can be reparented to an ancestor if this races with
> > > + * cgroup deletion.
> > > + *
> > > + * Use folio_lruvec_lock() to ensure both lifetime and stability of the binding.
> > > + * Once a lruvec is locked, folio_lruvec() can be called on other folios, and
> > > + * their binding is stable if the returned lruvec matches the one the caller has
> > > + * locked. Useful for lock batching.
> > >    */
> > >   static inline struct lruvec *folio_lruvec(struct folio *folio)
> > >   {
> > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > > index 548e67dbf2386..a1573600d4188 100644
> > > --- a/mm/memcontrol.c
> > > +++ b/mm/memcontrol.c
> > > diff --git a/mm/swap.c b/mm/swap.c
> > > index cb1148a92d8ec..7e53479ca1732 100644
> > > --- a/mm/swap.c
> > > +++ b/mm/swap.c
> > > @@ -284,9 +286,11 @@ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
> > >   		}
> > >   		spin_unlock_irq(&lruvec->lru_lock);
> > > +		rcu_read_unlock();
> > >   		lruvec = parent_lruvec(lruvec);
> > 
> > It looks bit weird to call parent_lruvec(lruvec) outside RCU read lock
> > because the reason why it holds RCU read lock is to prevent release of
> > memory cgroup and its lruvec.
> > 
> > I guess this isn't broken (for now) because all callers of
> > lru_note_cost_unlock_irq() are holding a reference to the memcg?
> 
> I checked all the callers again, and they do indeed hold the refcnt
> for the memcg, so it's safe for now.

Thanks for double checking!

> But it seems rather fragile,

Yeah, it's fragile and

> perhaps we should also include parent_lruvec() within the RCU lock.

that would be much better.

> > 
> > >   		if (!lruvec)
> > >   			break;
> > > +		rcu_read_lock();
> > >   		spin_lock_irq(&lruvec->lru_lock);
> > >   	}
> > >   }

-- 
Cheers,
Harry / Hyeonggon
Re: [PATCH v3 24/30] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
Posted by Shakeel Butt 3 weeks, 1 day ago
On Wed, Jan 14, 2026 at 07:32:51PM +0800, Qi Zheng wrote:
> From: Muchun Song <songmuchun@bytedance.com>
> 
> The following diagram illustrates how to ensure the safety of the folio
> lruvec lock when LRU folios undergo reparenting.
> 
> In the folio_lruvec_lock(folio) function:
> ```
>     rcu_read_lock();
> retry:
>     lruvec = folio_lruvec(folio);
>     /* There is a possibility of folio reparenting at this point. */
>     spin_lock(&lruvec->lru_lock);
>     if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
>         /*
>          * The wrong lruvec lock was acquired, and a retry is required.
>          * This is because the folio resides on the parent memcg lruvec
>          * list.
>          */
>         spin_unlock(&lruvec->lru_lock);
>         goto retry;
>     }
> 
>     /* Reaching here indicates that folio_memcg() is stable. */
> ```
> 
> In the memcg_reparent_objcgs(memcg) function:
> ```
>     spin_lock(&lruvec->lru_lock);
>     spin_lock(&lruvec_parent->lru_lock);
>     /* Transfer folios from the lruvec list to the parent's. */
>     spin_unlock(&lruvec_parent->lru_lock);
>     spin_unlock(&lruvec->lru_lock);
> ```
> 
> After acquiring the lruvec lock, it is necessary to verify whether
> the folio has been reparented. If reparenting has occurred, the new
> lruvec lock must be reacquired. During the LRU folio reparenting
> process, the lruvec lock will also be acquired (this will be
> implemented in a subsequent patch). Therefore, folio_memcg() remains
> unchanged while the lruvec lock is held.
> 
> Given that lruvec_memcg(lruvec) is always equal to folio_memcg(folio)
> after the lruvec lock is acquired, the lruvec_memcg_debug() check is
> redundant. Hence, it is removed.
> 
> This patch serves as a preparation for the reparenting of LRU folios.
> 
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>

Acked-by: Shakeel Butt <shakeel.butt@linux.dev>
Re: [PATCH v3 24/30] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
Posted by Muchun Song 3 weeks, 2 days ago

On 2026/1/14 19:32, Qi Zheng wrote:
> From: Muchun Song <songmuchun@bytedance.com>
>
> The following diagram illustrates how to ensure the safety of the folio
> lruvec lock when LRU folios undergo reparenting.
>
> In the folio_lruvec_lock(folio) function:
> ```
>      rcu_read_lock();
> retry:
>      lruvec = folio_lruvec(folio);
>      /* There is a possibility of folio reparenting at this point. */
>      spin_lock(&lruvec->lru_lock);
>      if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
>          /*
>           * The wrong lruvec lock was acquired, and a retry is required.
>           * This is because the folio resides on the parent memcg lruvec
>           * list.
>           */
>          spin_unlock(&lruvec->lru_lock);
>          goto retry;
>      }
>
>      /* Reaching here indicates that folio_memcg() is stable. */
> ```
>
> In the memcg_reparent_objcgs(memcg) function:
> ```
>      spin_lock(&lruvec->lru_lock);
>      spin_lock(&lruvec_parent->lru_lock);
>      /* Transfer folios from the lruvec list to the parent's. */
>      spin_unlock(&lruvec_parent->lru_lock);
>      spin_unlock(&lruvec->lru_lock);
> ```
>
> After acquiring the lruvec lock, it is necessary to verify whether
> the folio has been reparented. If reparenting has occurred, the new
> lruvec lock must be reacquired. During the LRU folio reparenting
> process, the lruvec lock will also be acquired (this will be
> implemented in a subsequent patch). Therefore, folio_memcg() remains
> unchanged while the lruvec lock is held.
>
> Given that lruvec_memcg(lruvec) is always equal to folio_memcg(folio)
> after the lruvec lock is acquired, the lruvec_memcg_debug() check is
> redundant. Hence, it is removed.
>
> This patch serves as a preparation for the reparenting of LRU folios.
>
> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
>   include/linux/memcontrol.h | 45 +++++++++++++++++++----------
>   include/linux/swap.h       |  1 +
>   mm/compaction.c            | 29 +++++++++++++++----
>   mm/memcontrol.c            | 59 +++++++++++++++++++++-----------------
>   mm/swap.c                  |  4 +++
>   5 files changed, 91 insertions(+), 47 deletions(-)
>
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 4b6f20dc694ba..26c3c0e375f58 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -742,7 +742,15 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
>    * folio_lruvec - return lruvec for isolating/putting an LRU folio
>    * @folio: Pointer to the folio.
>    *
> - * This function relies on folio->mem_cgroup being stable.
> + * Call with rcu_read_lock() held to ensure the lifetime of the returned lruvec.
> + * Note that this alone will NOT guarantee the stability of the folio->lruvec
> + * association; the folio can be reparented to an ancestor if this races with
> + * cgroup deletion.
> + *
> + * Use folio_lruvec_lock() to ensure both lifetime and stability of the binding.
> + * Once a lruvec is locked, folio_lruvec() can be called on other folios, and
> + * their binding is stable if the returned lruvec matches the one the caller has
> + * locked. Useful for lock batching.
>    */
>   static inline struct lruvec *folio_lruvec(struct folio *folio)
>   {
> @@ -761,18 +769,15 @@ struct mem_cgroup *get_mem_cgroup_from_current(void);
>   struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio);
>   
>   struct lruvec *folio_lruvec_lock(struct folio *folio);
> +	__acquires(&lruvec->lru_lock)
> +	__acquires(rcu)
>   struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
> +	__acquires(&lruvec->lru_lock)
> +	__acquires(rcu)
>   struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
>   						unsigned long *flags);
> -
> -#ifdef CONFIG_DEBUG_VM
> -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
> -#else
> -static inline
> -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
> -{
> -}
> -#endif
> +	__acquires(&lruvec->lru_lock)
> +	__acquires(rcu)
>   
>   static inline
>   struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css){
> @@ -1199,11 +1204,6 @@ static inline struct lruvec *folio_lruvec(struct folio *folio)
>   	return &pgdat->__lruvec;
>   }
>   
> -static inline
> -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
> -{
> -}
> -
>   static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
>   {
>   	return NULL;
> @@ -1262,6 +1262,7 @@ static inline struct lruvec *folio_lruvec_lock(struct folio *folio)
>   {
>   	struct pglist_data *pgdat = folio_pgdat(folio);
>   
> +	rcu_read_lock();
>   	spin_lock(&pgdat->__lruvec.lru_lock);
>   	return &pgdat->__lruvec;
>   }
> @@ -1270,6 +1271,7 @@ static inline struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
>   {
>   	struct pglist_data *pgdat = folio_pgdat(folio);
>   
> +	rcu_read_lock();
>   	spin_lock_irq(&pgdat->__lruvec.lru_lock);
>   	return &pgdat->__lruvec;
>   }
> @@ -1279,6 +1281,7 @@ static inline struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
>   {
>   	struct pglist_data *pgdat = folio_pgdat(folio);
>   
> +	rcu_read_lock();
>   	spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
>   	return &pgdat->__lruvec;
>   }
> @@ -1500,24 +1503,36 @@ static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
>   }
>   
>   static inline void lruvec_lock_irq(struct lruvec *lruvec)
> +	__acquires(&lruvec->lru_lock)
> +	__acquires(rcu)

It seems that functions marked as `inline` cannot be decorated with
`__acquires`? We’ve had to move these little helpers into `memcontrol.c`
and declare them as extern, but they’re so short that it hardly feels
worth the trouble. My own inclination is to drop the `__acquires`
annotations—mainly for performance reasons.

>   {
> +	rcu_read_lock();
>   	spin_lock_irq(&lruvec->lru_lock);
>   }
>   
>   static inline void lruvec_unlock(struct lruvec *lruvec)
> +	__releases(&lruvec->lru_lock)
> +	__releases(rcu)
>   {
>   	spin_unlock(&lruvec->lru_lock);
> +	rcu_read_unlock();
>   }
>   
>   static inline void lruvec_unlock_irq(struct lruvec *lruvec)
> +	__releases(&lruvec->lru_lock)
> +	__releases(rcu)
>   {
>   	spin_unlock_irq(&lruvec->lru_lock);
> +	rcu_read_unlock();
>   }
>   
>   static inline void lruvec_unlock_irqrestore(struct lruvec *lruvec,
>   		unsigned long flags)
> +	__releases(&lruvec->lru_lock)
> +	__releases(rcu)
>   {
>   	spin_unlock_irqrestore(&lruvec->lru_lock, flags);
> +	rcu_read_unlock();
>   }
>   
>   /* Test requires a stable folio->memcg binding, see folio_memcg() */
> diff --git a/include/linux/swap.h b/include/linux/swap.h
> index 62fc7499b4089..e60f45b48e74d 100644
> --- a/include/linux/swap.h
> +++ b/include/linux/swap.h
> @@ -330,6 +330,7 @@ extern unsigned long totalreserve_pages;
>   void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
>   		unsigned int nr_io, unsigned int nr_rotated)
>   		__releases(lruvec->lru_lock);
> +		__releases(rcu)

Missed a semicolon.

>   void lru_note_cost_refault(struct folio *);
>   void folio_add_lru(struct folio *);
>   void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
> diff --git a/mm/compaction.c b/mm/compaction.c
> index c3e338aaa0ffb..3648ce22c8072 100644
> --- a/mm/compaction.c
> +++ b/mm/compaction.c
> @@ -518,6 +518,24 @@ static bool compact_lock_irqsave(spinlock_t *lock, unsigned long *flags,
>   	return true;
>   }
>   
> +static struct lruvec *
> +compact_folio_lruvec_lock_irqsave(struct folio *folio, unsigned long *flags,
> +				  struct compact_control *cc)
> +{
> +	struct lruvec *lruvec;
> +
> +	rcu_read_lock();
> +retry:
> +	lruvec = folio_lruvec(folio);
> +	compact_lock_irqsave(&lruvec->lru_lock, flags, cc);
> +	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
> +		spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
> +		goto retry;
> +	}
> +
> +	return lruvec;
> +}
> +
>   /*
>    * Compaction requires the taking of some coarse locks that are potentially
>    * very heavily contended. The lock should be periodically unlocked to avoid
> @@ -839,7 +857,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
>   {
>   	pg_data_t *pgdat = cc->zone->zone_pgdat;
>   	unsigned long nr_scanned = 0, nr_isolated = 0;
> -	struct lruvec *lruvec;
> +	struct lruvec *lruvec = NULL;
>   	unsigned long flags = 0;
>   	struct lruvec *locked = NULL;
>   	struct folio *folio = NULL;
> @@ -1153,18 +1171,17 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
>   		if (!folio_test_clear_lru(folio))
>   			goto isolate_fail_put;
>   
> -		lruvec = folio_lruvec(folio);
> +		if (locked)
> +			lruvec = folio_lruvec(folio);
>   
>   		/* If we already hold the lock, we can skip some rechecking */
> -		if (lruvec != locked) {
> +		if (lruvec != locked || !locked) {
>   			if (locked)
>   				lruvec_unlock_irqrestore(locked, flags);
>   
> -			compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
> +			lruvec = compact_folio_lruvec_lock_irqsave(folio, &flags, cc);
>   			locked = lruvec;
>   
> -			lruvec_memcg_debug(lruvec, folio);
> -
>   			/*
>   			 * Try get exclusive access under lock. If marked for
>   			 * skip, the scan is aborted unless the current context
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 548e67dbf2386..a1573600d4188 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1201,23 +1201,6 @@ void mem_cgroup_scan_tasks(struct mem_cgroup *memcg,
>   	}
>   }
>   
> -#ifdef CONFIG_DEBUG_VM
> -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
> -{
> -	struct mem_cgroup *memcg;
> -
> -	if (mem_cgroup_disabled())
> -		return;
> -
> -	memcg = folio_memcg(folio);
> -
> -	if (!memcg)
> -		VM_BUG_ON_FOLIO(!mem_cgroup_is_root(lruvec_memcg(lruvec)), folio);
> -	else
> -		VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio);
> -}
> -#endif
> -
>   /**
>    * folio_lruvec_lock - Lock the lruvec for a folio.
>    * @folio: Pointer to the folio.
> @@ -1227,14 +1210,22 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
>    * - folio_test_lru false
>    * - folio frozen (refcount of 0)
>    *
> - * Return: The lruvec this folio is on with its lock held.
> + * Return: The lruvec this folio is on with its lock held and rcu read lock held.
>    */
>   struct lruvec *folio_lruvec_lock(struct folio *folio)
> +	__acquires(&lruvec->lru_lock)
> +	__acquires(rcu)
>   {
> -	struct lruvec *lruvec = folio_lruvec(folio);
> +	struct lruvec *lruvec;
>   
> +	rcu_read_lock();
> +retry:
> +	lruvec = folio_lruvec(folio);
>   	spin_lock(&lruvec->lru_lock);
> -	lruvec_memcg_debug(lruvec, folio);
> +	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
> +		spin_unlock(&lruvec->lru_lock);
> +		goto retry;
> +	}
>   
>   	return lruvec;
>   }
> @@ -1249,14 +1240,22 @@ struct lruvec *folio_lruvec_lock(struct folio *folio)
>    * - folio frozen (refcount of 0)
>    *
>    * Return: The lruvec this folio is on with its lock held and interrupts
> - * disabled.
> + * disabled and rcu read lock held.
>    */
>   struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
> +	__acquires(&lruvec->lru_lock)
> +	__acquires(rcu)
>   {
> -	struct lruvec *lruvec = folio_lruvec(folio);
> +	struct lruvec *lruvec;
>   
> +	rcu_read_lock();
> +retry:
> +	lruvec = folio_lruvec(folio);
>   	spin_lock_irq(&lruvec->lru_lock);
> -	lruvec_memcg_debug(lruvec, folio);
> +	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
> +		spin_unlock_irq(&lruvec->lru_lock);
> +		goto retry;
> +	}
>   
>   	return lruvec;
>   }
> @@ -1272,15 +1271,23 @@ struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
>    * - folio frozen (refcount of 0)
>    *
>    * Return: The lruvec this folio is on with its lock held and interrupts
> - * disabled.
> + * disabled and rcu read lock held.
>    */
>   struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
>   		unsigned long *flags)
> +	__acquires(&lruvec->lru_lock)
> +	__acquires(rcu)
>   {
> -	struct lruvec *lruvec = folio_lruvec(folio);
> +	struct lruvec *lruvec;
>   
> +	rcu_read_lock();
> +retry:
> +	lruvec = folio_lruvec(folio);
>   	spin_lock_irqsave(&lruvec->lru_lock, *flags);
> -	lruvec_memcg_debug(lruvec, folio);
> +	if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
> +		spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
> +		goto retry;
> +	}
>   
>   	return lruvec;
>   }
> diff --git a/mm/swap.c b/mm/swap.c
> index cb1148a92d8ec..7e53479ca1732 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -240,6 +240,7 @@ void folio_rotate_reclaimable(struct folio *folio)
>   void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
>   		unsigned int nr_io, unsigned int nr_rotated)
>   		__releases(lruvec->lru_lock)
> +		__releases(rcu)
>   {
>   	unsigned long cost;
>   
> @@ -253,6 +254,7 @@ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
>   	cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated;
>   	if (!cost) {
>   		spin_unlock_irq(&lruvec->lru_lock);
> +		rcu_read_unlock();

Better to use lruvec_unlock_irq(lruvec)?

>   		return;
>   	}
>   
> @@ -284,9 +286,11 @@ void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
>   		}
>   
>   		spin_unlock_irq(&lruvec->lru_lock);
> +		rcu_read_unlock();

Ditto.

>   		lruvec = parent_lruvec(lruvec);
>   		if (!lruvec)
>   			break;
> +		rcu_read_lock();
>   		spin_lock_irq(&lruvec->lru_lock);

lruvec_lock_irq(lruvec)?


Thanks.
>   	}
>   }

Re: [PATCH v3 24/30] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
Posted by Qi Zheng 3 weeks, 2 days ago

On 1/16/26 5:43 PM, Muchun Song wrote:
> 
> 
> On 2026/1/14 19:32, Qi Zheng wrote:
>> From: Muchun Song <songmuchun@bytedance.com>
>>
>> The following diagram illustrates how to ensure the safety of the folio
>> lruvec lock when LRU folios undergo reparenting.
>>
>> In the folio_lruvec_lock(folio) function:
>> ```
>>      rcu_read_lock();
>> retry:
>>      lruvec = folio_lruvec(folio);
>>      /* There is a possibility of folio reparenting at this point. */
>>      spin_lock(&lruvec->lru_lock);
>>      if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
>>          /*
>>           * The wrong lruvec lock was acquired, and a retry is required.
>>           * This is because the folio resides on the parent memcg lruvec
>>           * list.
>>           */
>>          spin_unlock(&lruvec->lru_lock);
>>          goto retry;
>>      }
>>
>>      /* Reaching here indicates that folio_memcg() is stable. */
>> ```
>>
>> In the memcg_reparent_objcgs(memcg) function:
>> ```
>>      spin_lock(&lruvec->lru_lock);
>>      spin_lock(&lruvec_parent->lru_lock);
>>      /* Transfer folios from the lruvec list to the parent's. */
>>      spin_unlock(&lruvec_parent->lru_lock);
>>      spin_unlock(&lruvec->lru_lock);
>> ```
>>
>> After acquiring the lruvec lock, it is necessary to verify whether
>> the folio has been reparented. If reparenting has occurred, the new
>> lruvec lock must be reacquired. During the LRU folio reparenting
>> process, the lruvec lock will also be acquired (this will be
>> implemented in a subsequent patch). Therefore, folio_memcg() remains
>> unchanged while the lruvec lock is held.
>>
>> Given that lruvec_memcg(lruvec) is always equal to folio_memcg(folio)
>> after the lruvec lock is acquired, the lruvec_memcg_debug() check is
>> redundant. Hence, it is removed.
>>
>> This patch serves as a preparation for the reparenting of LRU folios.
>>
>> Signed-off-by: Muchun Song <songmuchun@bytedance.com>
>> Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
>> Acked-by: Johannes Weiner <hannes@cmpxchg.org>
>> ---
>>   include/linux/memcontrol.h | 45 +++++++++++++++++++----------
>>   include/linux/swap.h       |  1 +
>>   mm/compaction.c            | 29 +++++++++++++++----
>>   mm/memcontrol.c            | 59 +++++++++++++++++++++-----------------
>>   mm/swap.c                  |  4 +++
>>   5 files changed, 91 insertions(+), 47 deletions(-)
>>
>> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
>> index 4b6f20dc694ba..26c3c0e375f58 100644
>> --- a/include/linux/memcontrol.h
>> +++ b/include/linux/memcontrol.h
>> @@ -742,7 +742,15 @@ static inline struct lruvec 
>> *mem_cgroup_lruvec(struct mem_cgroup *memcg,
>>    * folio_lruvec - return lruvec for isolating/putting an LRU folio
>>    * @folio: Pointer to the folio.
>>    *
>> - * This function relies on folio->mem_cgroup being stable.
>> + * Call with rcu_read_lock() held to ensure the lifetime of the 
>> returned lruvec.
>> + * Note that this alone will NOT guarantee the stability of the 
>> folio->lruvec
>> + * association; the folio can be reparented to an ancestor if this 
>> races with
>> + * cgroup deletion.
>> + *
>> + * Use folio_lruvec_lock() to ensure both lifetime and stability of 
>> the binding.
>> + * Once a lruvec is locked, folio_lruvec() can be called on other 
>> folios, and
>> + * their binding is stable if the returned lruvec matches the one the 
>> caller has
>> + * locked. Useful for lock batching.
>>    */
>>   static inline struct lruvec *folio_lruvec(struct folio *folio)
>>   {
>> @@ -761,18 +769,15 @@ struct mem_cgroup 
>> *get_mem_cgroup_from_current(void);
>>   struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio);
>>   struct lruvec *folio_lruvec_lock(struct folio *folio);
>> +    __acquires(&lruvec->lru_lock)
>> +    __acquires(rcu)
>>   struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
>> +    __acquires(&lruvec->lru_lock)
>> +    __acquires(rcu)
>>   struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
>>                           unsigned long *flags);
>> -
>> -#ifdef CONFIG_DEBUG_VM
>> -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
>> -#else
>> -static inline
>> -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
>> -{
>> -}
>> -#endif
>> +    __acquires(&lruvec->lru_lock)
>> +    __acquires(rcu)
>>   static inline
>>   struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state 
>> *css){
>> @@ -1199,11 +1204,6 @@ static inline struct lruvec 
>> *folio_lruvec(struct folio *folio)
>>       return &pgdat->__lruvec;
>>   }
>> -static inline
>> -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
>> -{
>> -}
>> -
>>   static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup 
>> *memcg)
>>   {
>>       return NULL;
>> @@ -1262,6 +1262,7 @@ static inline struct lruvec 
>> *folio_lruvec_lock(struct folio *folio)
>>   {
>>       struct pglist_data *pgdat = folio_pgdat(folio);
>> +    rcu_read_lock();
>>       spin_lock(&pgdat->__lruvec.lru_lock);
>>       return &pgdat->__lruvec;
>>   }
>> @@ -1270,6 +1271,7 @@ static inline struct lruvec 
>> *folio_lruvec_lock_irq(struct folio *folio)
>>   {
>>       struct pglist_data *pgdat = folio_pgdat(folio);
>> +    rcu_read_lock();
>>       spin_lock_irq(&pgdat->__lruvec.lru_lock);
>>       return &pgdat->__lruvec;
>>   }
>> @@ -1279,6 +1281,7 @@ static inline struct lruvec 
>> *folio_lruvec_lock_irqsave(struct folio *folio,
>>   {
>>       struct pglist_data *pgdat = folio_pgdat(folio);
>> +    rcu_read_lock();
>>       spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
>>       return &pgdat->__lruvec;
>>   }
>> @@ -1500,24 +1503,36 @@ static inline struct lruvec 
>> *parent_lruvec(struct lruvec *lruvec)
>>   }
>>   static inline void lruvec_lock_irq(struct lruvec *lruvec)
>> +    __acquires(&lruvec->lru_lock)
>> +    __acquires(rcu)
> 
> It seems that functions marked as `inline` cannot be decorated with
> `__acquires`? We’ve had to move these little helpers into `memcontrol.c`
> and declare them as extern, but they’re so short that it hardly feels

Right, I received a compilation error reported LKP:

All errors (new ones prefixed by >>):

    In file included from crypto/ahash.c:26:
    In file included from include/net/netlink.h:6:
    In file included from include/linux/netlink.h:9:
    In file included from include/net/scm.h:9:
    In file included from include/linux/security.h:35:
    In file included from include/linux/bpf.h:32:
 >> include/linux/memcontrol.h:772:14: error: use of undeclared 
identifier 'lruvec'
      772 |         __acquires(&lruvec->lru_lock)
          |                     ^~~~~~
    include/linux/memcontrol.h:773:13: error: use of undeclared 
identifier 'rcu'
      773 |         __acquires(rcu)
          |                    ^~~
    include/linux/memcontrol.h:775:14: error: use of undeclared 
identifier 'lruvec'
      775 |         __acquires(&lruvec->lru_lock)
          |                     ^~~~~~
    include/linux/memcontrol.h:776:13: error: use of undeclared 
identifier 'rcu'
      776 |         __acquires(rcu)
          |                    ^~~
    include/linux/memcontrol.h:779:14: error: use of undeclared 
identifier 'lruvec'
      779 |         __acquires(&lruvec->lru_lock)
          |                     ^~~~~~
    include/linux/memcontrol.h:780:13: error: use of undeclared 
identifier 'rcu'
      780 |         __acquires(rcu)
          |                    ^~~
    include/linux/memcontrol.h:1507:13: error: use of undeclared 
identifier 'rcu'
     1507 |         __acquires(rcu)
          |                    ^~~
    include/linux/memcontrol.h:1515:13: error: use of undeclared 
identifier 'rcu'
     1515 |         __releases(rcu)
          |                    ^~~
    include/linux/memcontrol.h:1523:13: error: use of undeclared 
identifier 'rcu'
     1523 |         __releases(rcu)
          |                    ^~~
    include/linux/memcontrol.h:1532:13: error: use of undeclared 
identifier 'rcu'
     1532 |         __releases(rcu)

And I reproduced this error with the following configuration:

1. enable CONFIG_WARN_CONTEXT_ANALYSIS_ALL
2. make CC=clang bzImage (clang version >= 22)

> worth the trouble. My own inclination is to drop the `__acquires`
> annotations—mainly for performance reasons.

If no one else objects, I will drop __acquires/__releases in the next
version.

> 
>>   {
>> +    rcu_read_lock();
>>       spin_lock_irq(&lruvec->lru_lock);
>>   }
>>   static inline void lruvec_unlock(struct lruvec *lruvec)
>> +    __releases(&lruvec->lru_lock)
>> +    __releases(rcu)
>>   {
>>       spin_unlock(&lruvec->lru_lock);
>> +    rcu_read_unlock();
>>   }
>>   static inline void lruvec_unlock_irq(struct lruvec *lruvec)
>> +    __releases(&lruvec->lru_lock)
>> +    __releases(rcu)
>>   {
>>       spin_unlock_irq(&lruvec->lru_lock);
>> +    rcu_read_unlock();
>>   }
>>   static inline void lruvec_unlock_irqrestore(struct lruvec *lruvec,
>>           unsigned long flags)
>> +    __releases(&lruvec->lru_lock)
>> +    __releases(rcu)
>>   {
>>       spin_unlock_irqrestore(&lruvec->lru_lock, flags);
>> +    rcu_read_unlock();
>>   }
>>   /* Test requires a stable folio->memcg binding, see folio_memcg() */
>> diff --git a/include/linux/swap.h b/include/linux/swap.h
>> index 62fc7499b4089..e60f45b48e74d 100644
>> --- a/include/linux/swap.h
>> +++ b/include/linux/swap.h
>> @@ -330,6 +330,7 @@ extern unsigned long totalreserve_pages;
>>   void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
>>           unsigned int nr_io, unsigned int nr_rotated)
>>           __releases(lruvec->lru_lock);
>> +        __releases(rcu)
> 
> Missed a semicolon.
> 
>>   void lru_note_cost_refault(struct folio *);
>>   void folio_add_lru(struct folio *);
>>   void folio_add_lru_vma(struct folio *, struct vm_area_struct *);
>> diff --git a/mm/compaction.c b/mm/compaction.c
>> index c3e338aaa0ffb..3648ce22c8072 100644
>> --- a/mm/compaction.c
>> +++ b/mm/compaction.c
>> @@ -518,6 +518,24 @@ static bool compact_lock_irqsave(spinlock_t 
>> *lock, unsigned long *flags,
>>       return true;
>>   }
>> +static struct lruvec *
>> +compact_folio_lruvec_lock_irqsave(struct folio *folio, unsigned long 
>> *flags,
>> +                  struct compact_control *cc)
>> +{
>> +    struct lruvec *lruvec;
>> +
>> +    rcu_read_lock();
>> +retry:
>> +    lruvec = folio_lruvec(folio);
>> +    compact_lock_irqsave(&lruvec->lru_lock, flags, cc);
>> +    if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
>> +        spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
>> +        goto retry;
>> +    }
>> +
>> +    return lruvec;
>> +}
>> +
>>   /*
>>    * Compaction requires the taking of some coarse locks that are 
>> potentially
>>    * very heavily contended. The lock should be periodically unlocked 
>> to avoid
>> @@ -839,7 +857,7 @@ isolate_migratepages_block(struct compact_control 
>> *cc, unsigned long low_pfn,
>>   {
>>       pg_data_t *pgdat = cc->zone->zone_pgdat;
>>       unsigned long nr_scanned = 0, nr_isolated = 0;
>> -    struct lruvec *lruvec;
>> +    struct lruvec *lruvec = NULL;
>>       unsigned long flags = 0;
>>       struct lruvec *locked = NULL;
>>       struct folio *folio = NULL;
>> @@ -1153,18 +1171,17 @@ isolate_migratepages_block(struct 
>> compact_control *cc, unsigned long low_pfn,
>>           if (!folio_test_clear_lru(folio))
>>               goto isolate_fail_put;
>> -        lruvec = folio_lruvec(folio);
>> +        if (locked)
>> +            lruvec = folio_lruvec(folio);
>>           /* If we already hold the lock, we can skip some rechecking */
>> -        if (lruvec != locked) {
>> +        if (lruvec != locked || !locked) {
>>               if (locked)
>>                   lruvec_unlock_irqrestore(locked, flags);
>> -            compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
>> +            lruvec = compact_folio_lruvec_lock_irqsave(folio, &flags, 
>> cc);
>>               locked = lruvec;
>> -            lruvec_memcg_debug(lruvec, folio);
>> -
>>               /*
>>                * Try get exclusive access under lock. If marked for
>>                * skip, the scan is aborted unless the current context
>> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
>> index 548e67dbf2386..a1573600d4188 100644
>> --- a/mm/memcontrol.c
>> +++ b/mm/memcontrol.c
>> @@ -1201,23 +1201,6 @@ void mem_cgroup_scan_tasks(struct mem_cgroup 
>> *memcg,
>>       }
>>   }
>> -#ifdef CONFIG_DEBUG_VM
>> -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
>> -{
>> -    struct mem_cgroup *memcg;
>> -
>> -    if (mem_cgroup_disabled())
>> -        return;
>> -
>> -    memcg = folio_memcg(folio);
>> -
>> -    if (!memcg)
>> -        VM_BUG_ON_FOLIO(!mem_cgroup_is_root(lruvec_memcg(lruvec)), 
>> folio);
>> -    else
>> -        VM_BUG_ON_FOLIO(lruvec_memcg(lruvec) != memcg, folio);
>> -}
>> -#endif
>> -
>>   /**
>>    * folio_lruvec_lock - Lock the lruvec for a folio.
>>    * @folio: Pointer to the folio.
>> @@ -1227,14 +1210,22 @@ void lruvec_memcg_debug(struct lruvec *lruvec, 
>> struct folio *folio)
>>    * - folio_test_lru false
>>    * - folio frozen (refcount of 0)
>>    *
>> - * Return: The lruvec this folio is on with its lock held.
>> + * Return: The lruvec this folio is on with its lock held and rcu 
>> read lock held.
>>    */
>>   struct lruvec *folio_lruvec_lock(struct folio *folio)
>> +    __acquires(&lruvec->lru_lock)
>> +    __acquires(rcu)
>>   {
>> -    struct lruvec *lruvec = folio_lruvec(folio);
>> +    struct lruvec *lruvec;
>> +    rcu_read_lock();
>> +retry:
>> +    lruvec = folio_lruvec(folio);
>>       spin_lock(&lruvec->lru_lock);
>> -    lruvec_memcg_debug(lruvec, folio);
>> +    if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
>> +        spin_unlock(&lruvec->lru_lock);
>> +        goto retry;
>> +    }
>>       return lruvec;
>>   }
>> @@ -1249,14 +1240,22 @@ struct lruvec *folio_lruvec_lock(struct folio 
>> *folio)
>>    * - folio frozen (refcount of 0)
>>    *
>>    * Return: The lruvec this folio is on with its lock held and 
>> interrupts
>> - * disabled.
>> + * disabled and rcu read lock held.
>>    */
>>   struct lruvec *folio_lruvec_lock_irq(struct folio *folio)
>> +    __acquires(&lruvec->lru_lock)
>> +    __acquires(rcu)
>>   {
>> -    struct lruvec *lruvec = folio_lruvec(folio);
>> +    struct lruvec *lruvec;
>> +    rcu_read_lock();
>> +retry:
>> +    lruvec = folio_lruvec(folio);
>>       spin_lock_irq(&lruvec->lru_lock);
>> -    lruvec_memcg_debug(lruvec, folio);
>> +    if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
>> +        spin_unlock_irq(&lruvec->lru_lock);
>> +        goto retry;
>> +    }
>>       return lruvec;
>>   }
>> @@ -1272,15 +1271,23 @@ struct lruvec *folio_lruvec_lock_irq(struct 
>> folio *folio)
>>    * - folio frozen (refcount of 0)
>>    *
>>    * Return: The lruvec this folio is on with its lock held and 
>> interrupts
>> - * disabled.
>> + * disabled and rcu read lock held.
>>    */
>>   struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
>>           unsigned long *flags)
>> +    __acquires(&lruvec->lru_lock)
>> +    __acquires(rcu)
>>   {
>> -    struct lruvec *lruvec = folio_lruvec(folio);
>> +    struct lruvec *lruvec;
>> +    rcu_read_lock();
>> +retry:
>> +    lruvec = folio_lruvec(folio);
>>       spin_lock_irqsave(&lruvec->lru_lock, *flags);
>> -    lruvec_memcg_debug(lruvec, folio);
>> +    if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
>> +        spin_unlock_irqrestore(&lruvec->lru_lock, *flags);
>> +        goto retry;
>> +    }
>>       return lruvec;
>>   }
>> diff --git a/mm/swap.c b/mm/swap.c
>> index cb1148a92d8ec..7e53479ca1732 100644
>> --- a/mm/swap.c
>> +++ b/mm/swap.c
>> @@ -240,6 +240,7 @@ void folio_rotate_reclaimable(struct folio *folio)
>>   void lru_note_cost_unlock_irq(struct lruvec *lruvec, bool file,
>>           unsigned int nr_io, unsigned int nr_rotated)
>>           __releases(lruvec->lru_lock)
>> +        __releases(rcu)
>>   {
>>       unsigned long cost;
>> @@ -253,6 +254,7 @@ void lru_note_cost_unlock_irq(struct lruvec 
>> *lruvec, bool file,
>>       cost = nr_io * SWAP_CLUSTER_MAX + nr_rotated;
>>       if (!cost) {
>>           spin_unlock_irq(&lruvec->lru_lock);
>> +        rcu_read_unlock();
> 
> Better to use lruvec_unlock_irq(lruvec)?
> 
>>           return;
>>       }
>> @@ -284,9 +286,11 @@ void lru_note_cost_unlock_irq(struct lruvec 
>> *lruvec, bool file,
>>           }
>>           spin_unlock_irq(&lruvec->lru_lock);
>> +        rcu_read_unlock();
> 
> Ditto.
> 
>>           lruvec = parent_lruvec(lruvec);
>>           if (!lruvec)
>>               break;
>> +        rcu_read_lock();
>>           spin_lock_irq(&lruvec->lru_lock);
> 
> lruvec_lock_irq(lruvec)?

OK, will do.

Thanks,
Qi

> 
> 
> Thanks.
>>       }
>>   }
> 

Re: [PATCH v3 24/30] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
Posted by Shakeel Butt 3 weeks, 1 day ago
On Fri, Jan 16, 2026 at 05:50:22PM +0800, Qi Zheng wrote:
> 
> 
> On 1/16/26 5:43 PM, Muchun Song wrote:
> > 
> > 
> > On 2026/1/14 19:32, Qi Zheng wrote:
> > > From: Muchun Song <songmuchun@bytedance.com>
> > > 
> > > The following diagram illustrates how to ensure the safety of the folio
> > > lruvec lock when LRU folios undergo reparenting.
> > > 
> > > In the folio_lruvec_lock(folio) function:
> > > ```
> > >      rcu_read_lock();
> > > retry:
> > >      lruvec = folio_lruvec(folio);
> > >      /* There is a possibility of folio reparenting at this point. */
> > >      spin_lock(&lruvec->lru_lock);
> > >      if (unlikely(lruvec_memcg(lruvec) != folio_memcg(folio))) {
> > >          /*
> > >           * The wrong lruvec lock was acquired, and a retry is required.
> > >           * This is because the folio resides on the parent memcg lruvec
> > >           * list.
> > >           */
> > >          spin_unlock(&lruvec->lru_lock);
> > >          goto retry;
> > >      }
> > > 
> > >      /* Reaching here indicates that folio_memcg() is stable. */
> > > ```
> > > 
> > > In the memcg_reparent_objcgs(memcg) function:
> > > ```
> > >      spin_lock(&lruvec->lru_lock);
> > >      spin_lock(&lruvec_parent->lru_lock);
> > >      /* Transfer folios from the lruvec list to the parent's. */
> > >      spin_unlock(&lruvec_parent->lru_lock);
> > >      spin_unlock(&lruvec->lru_lock);
> > > ```
> > > 
> > > After acquiring the lruvec lock, it is necessary to verify whether
> > > the folio has been reparented. If reparenting has occurred, the new
> > > lruvec lock must be reacquired. During the LRU folio reparenting
> > > process, the lruvec lock will also be acquired (this will be
> > > implemented in a subsequent patch). Therefore, folio_memcg() remains
> > > unchanged while the lruvec lock is held.
> > > 
> > > Given that lruvec_memcg(lruvec) is always equal to folio_memcg(folio)
> > > after the lruvec lock is acquired, the lruvec_memcg_debug() check is
> > > redundant. Hence, it is removed.
> > > 
> > > This patch serves as a preparation for the reparenting of LRU folios.
> > > 
> > > Signed-off-by: Muchun Song <songmuchun@bytedance.com>
> > > Signed-off-by: Qi Zheng <zhengqi.arch@bytedance.com>
> > > Acked-by: Johannes Weiner <hannes@cmpxchg.org>
> > > ---
> > >   include/linux/memcontrol.h | 45 +++++++++++++++++++----------
> > >   include/linux/swap.h       |  1 +
> > >   mm/compaction.c            | 29 +++++++++++++++----
> > >   mm/memcontrol.c            | 59 +++++++++++++++++++++-----------------
> > >   mm/swap.c                  |  4 +++
> > >   5 files changed, 91 insertions(+), 47 deletions(-)
> > > 
> > > diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> > > index 4b6f20dc694ba..26c3c0e375f58 100644
> > > --- a/include/linux/memcontrol.h
> > > +++ b/include/linux/memcontrol.h
> > > @@ -742,7 +742,15 @@ static inline struct lruvec
> > > *mem_cgroup_lruvec(struct mem_cgroup *memcg,
> > >    * folio_lruvec - return lruvec for isolating/putting an LRU folio
> > >    * @folio: Pointer to the folio.
> > >    *
> > > - * This function relies on folio->mem_cgroup being stable.
> > > + * Call with rcu_read_lock() held to ensure the lifetime of the
> > > returned lruvec.
> > > + * Note that this alone will NOT guarantee the stability of the
> > > folio->lruvec
> > > + * association; the folio can be reparented to an ancestor if this
> > > races with
> > > + * cgroup deletion.
> > > + *
> > > + * Use folio_lruvec_lock() to ensure both lifetime and stability of
> > > the binding.
> > > + * Once a lruvec is locked, folio_lruvec() can be called on other
> > > folios, and
> > > + * their binding is stable if the returned lruvec matches the one
> > > the caller has
> > > + * locked. Useful for lock batching.
> > >    */
> > >   static inline struct lruvec *folio_lruvec(struct folio *folio)
> > >   {
> > > @@ -761,18 +769,15 @@ struct mem_cgroup
> > > *get_mem_cgroup_from_current(void);
> > >   struct mem_cgroup *get_mem_cgroup_from_folio(struct folio *folio);
> > >   struct lruvec *folio_lruvec_lock(struct folio *folio);
> > > +    __acquires(&lruvec->lru_lock)
> > > +    __acquires(rcu)
> > >   struct lruvec *folio_lruvec_lock_irq(struct folio *folio);
> > > +    __acquires(&lruvec->lru_lock)
> > > +    __acquires(rcu)
> > >   struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio,
> > >                           unsigned long *flags);
> > > -
> > > -#ifdef CONFIG_DEBUG_VM
> > > -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio);
> > > -#else
> > > -static inline
> > > -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
> > > -{
> > > -}
> > > -#endif
> > > +    __acquires(&lruvec->lru_lock)
> > > +    __acquires(rcu)
> > >   static inline
> > >   struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state
> > > *css){
> > > @@ -1199,11 +1204,6 @@ static inline struct lruvec
> > > *folio_lruvec(struct folio *folio)
> > >       return &pgdat->__lruvec;
> > >   }
> > > -static inline
> > > -void lruvec_memcg_debug(struct lruvec *lruvec, struct folio *folio)
> > > -{
> > > -}
> > > -
> > >   static inline struct mem_cgroup *parent_mem_cgroup(struct
> > > mem_cgroup *memcg)
> > >   {
> > >       return NULL;
> > > @@ -1262,6 +1262,7 @@ static inline struct lruvec
> > > *folio_lruvec_lock(struct folio *folio)
> > >   {
> > >       struct pglist_data *pgdat = folio_pgdat(folio);
> > > +    rcu_read_lock();
> > >       spin_lock(&pgdat->__lruvec.lru_lock);
> > >       return &pgdat->__lruvec;
> > >   }
> > > @@ -1270,6 +1271,7 @@ static inline struct lruvec
> > > *folio_lruvec_lock_irq(struct folio *folio)
> > >   {
> > >       struct pglist_data *pgdat = folio_pgdat(folio);
> > > +    rcu_read_lock();
> > >       spin_lock_irq(&pgdat->__lruvec.lru_lock);
> > >       return &pgdat->__lruvec;
> > >   }
> > > @@ -1279,6 +1281,7 @@ static inline struct lruvec
> > > *folio_lruvec_lock_irqsave(struct folio *folio,
> > >   {
> > >       struct pglist_data *pgdat = folio_pgdat(folio);
> > > +    rcu_read_lock();
> > >       spin_lock_irqsave(&pgdat->__lruvec.lru_lock, *flagsp);
> > >       return &pgdat->__lruvec;
> > >   }
> > > @@ -1500,24 +1503,36 @@ static inline struct lruvec
> > > *parent_lruvec(struct lruvec *lruvec)
> > >   }
> > >   static inline void lruvec_lock_irq(struct lruvec *lruvec)
> > > +    __acquires(&lruvec->lru_lock)
> > > +    __acquires(rcu)
> > 
> > It seems that functions marked as `inline` cannot be decorated with
> > `__acquires`? We’ve had to move these little helpers into `memcontrol.c`
> > and declare them as extern, but they’re so short that it hardly feels
> 
> Right, I received a compilation error reported LKP:
> 
> All errors (new ones prefixed by >>):
> 
>    In file included from crypto/ahash.c:26:
>    In file included from include/net/netlink.h:6:
>    In file included from include/linux/netlink.h:9:
>    In file included from include/net/scm.h:9:
>    In file included from include/linux/security.h:35:
>    In file included from include/linux/bpf.h:32:
> >> include/linux/memcontrol.h:772:14: error: use of undeclared identifier
> 'lruvec'
>      772 |         __acquires(&lruvec->lru_lock)
>          |                     ^~~~~~
>    include/linux/memcontrol.h:773:13: error: use of undeclared identifier
> 'rcu'
>      773 |         __acquires(rcu)
>          |                    ^~~
>    include/linux/memcontrol.h:775:14: error: use of undeclared identifier
> 'lruvec'
>      775 |         __acquires(&lruvec->lru_lock)
>          |                     ^~~~~~
>    include/linux/memcontrol.h:776:13: error: use of undeclared identifier
> 'rcu'
>      776 |         __acquires(rcu)
>          |                    ^~~
>    include/linux/memcontrol.h:779:14: error: use of undeclared identifier
> 'lruvec'
>      779 |         __acquires(&lruvec->lru_lock)
>          |                     ^~~~~~
>    include/linux/memcontrol.h:780:13: error: use of undeclared identifier
> 'rcu'
>      780 |         __acquires(rcu)
>          |                    ^~~
>    include/linux/memcontrol.h:1507:13: error: use of undeclared identifier
> 'rcu'
>     1507 |         __acquires(rcu)
>          |                    ^~~
>    include/linux/memcontrol.h:1515:13: error: use of undeclared identifier
> 'rcu'
>     1515 |         __releases(rcu)
>          |                    ^~~
>    include/linux/memcontrol.h:1523:13: error: use of undeclared identifier
> 'rcu'
>     1523 |         __releases(rcu)
>          |                    ^~~
>    include/linux/memcontrol.h:1532:13: error: use of undeclared identifier
> 'rcu'
>     1532 |         __releases(rcu)
> 
> And I reproduced this error with the following configuration:
> 
> 1. enable CONFIG_WARN_CONTEXT_ANALYSIS_ALL
> 2. make CC=clang bzImage (clang version >= 22)
> 
> > worth the trouble. My own inclination is to drop the `__acquires`
> > annotations—mainly for performance reasons.
> 
> If no one else objects, I will drop __acquires/__releases in the next
> version.
> 

If you drop these annotations from header file and keep in the C file,
do you still get the compilation error?

Re: [PATCH v3 24/30] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
Posted by Qi Zheng 3 weeks ago

On 1/18/26 8:44 AM, Shakeel Butt wrote:
> On Fri, Jan 16, 2026 at 05:50:22PM +0800, Qi Zheng wrote:
>>
>>
>> On 1/16/26 5:43 PM, Muchun Song wrote:
>>>
>>>
>>> On 2026/1/14 19:32, Qi Zheng wrote:
>>>> From: Muchun Song <songmuchun@bytedance.com>
>>>>

[...]

>>>
>>> It seems that functions marked as `inline` cannot be decorated with
>>> `__acquires`? We’ve had to move these little helpers into `memcontrol.c`
>>> and declare them as extern, but they’re so short that it hardly feels
>>
>> Right, I received a compilation error reported LKP:
>>
>> All errors (new ones prefixed by >>):
>>
>>     In file included from crypto/ahash.c:26:
>>     In file included from include/net/netlink.h:6:
>>     In file included from include/linux/netlink.h:9:
>>     In file included from include/net/scm.h:9:
>>     In file included from include/linux/security.h:35:
>>     In file included from include/linux/bpf.h:32:
>>>> include/linux/memcontrol.h:772:14: error: use of undeclared identifier
>> 'lruvec'
>>       772 |         __acquires(&lruvec->lru_lock)
>>           |                     ^~~~~~
>>     include/linux/memcontrol.h:773:13: error: use of undeclared identifier
>> 'rcu'
>>       773 |         __acquires(rcu)
>>           |                    ^~~
>>     include/linux/memcontrol.h:775:14: error: use of undeclared identifier
>> 'lruvec'
>>       775 |         __acquires(&lruvec->lru_lock)
>>           |                     ^~~~~~
>>     include/linux/memcontrol.h:776:13: error: use of undeclared identifier
>> 'rcu'
>>       776 |         __acquires(rcu)
>>           |                    ^~~
>>     include/linux/memcontrol.h:779:14: error: use of undeclared identifier
>> 'lruvec'
>>       779 |         __acquires(&lruvec->lru_lock)
>>           |                     ^~~~~~
>>     include/linux/memcontrol.h:780:13: error: use of undeclared identifier
>> 'rcu'
>>       780 |         __acquires(rcu)
>>           |                    ^~~
>>     include/linux/memcontrol.h:1507:13: error: use of undeclared identifier
>> 'rcu'
>>      1507 |         __acquires(rcu)
>>           |                    ^~~
>>     include/linux/memcontrol.h:1515:13: error: use of undeclared identifier
>> 'rcu'
>>      1515 |         __releases(rcu)
>>           |                    ^~~
>>     include/linux/memcontrol.h:1523:13: error: use of undeclared identifier
>> 'rcu'
>>      1523 |         __releases(rcu)
>>           |                    ^~~
>>     include/linux/memcontrol.h:1532:13: error: use of undeclared identifier
>> 'rcu'
>>      1532 |         __releases(rcu)
>>
>> And I reproduced this error with the following configuration:
>>
>> 1. enable CONFIG_WARN_CONTEXT_ANALYSIS_ALL
>> 2. make CC=clang bzImage (clang version >= 22)
>>
>>> worth the trouble. My own inclination is to drop the `__acquires`
>>> annotations—mainly for performance reasons.
>>
>> If no one else objects, I will drop __acquires/__releases in the next
>> version.
>>
> 
> If you drop these annotations from header file and keep in the C file,
> do you still get the compilation error?

I did test it this way, and it does fix the compilation error, but
Muchun thinks these functions are very simple and there's no need to put
them in a C file.

> 

Re: [PATCH v3 24/30] mm: memcontrol: prepare for reparenting LRU pages for lruvec lock
Posted by Shakeel Butt 2 weeks, 5 days ago
On Mon, Jan 19, 2026 at 11:44:09AM +0800, Qi Zheng wrote:
> 
> 
[...]
> > > 
> > > If no one else objects, I will drop __acquires/__releases in the next
> > > version.
> > > 
> > 
> > If you drop these annotations from header file and keep in the C file,
> > do you still get the compilation error?
> 
> I did test it this way, and it does fix the compilation error, but
> Muchun thinks these functions are very simple and there's no need to put
> them in a C file.
> 

I think we should prefer correctness at this stage. We can always change
these functions later if we see performance impact of these. So, I would
recommend to keep them in C file along with these annotations.