[PATCH v2 5/7] mm: list_lru: introduce caller locking for additions and deletions

Johannes Weiner posted 7 patches 3 weeks, 4 days ago
There is a newer version of this series
[PATCH v2 5/7] mm: list_lru: introduce caller locking for additions and deletions
Posted by Johannes Weiner 3 weeks, 4 days ago
Locking is currently internal to the list_lru API. However, a caller
might want to keep auxiliary state synchronized with the LRU state.

For example, the THP shrinker uses the lock of its custom LRU to keep
PG_partially_mapped and vmstats consistent.

To allow the THP shrinker to switch to list_lru, provide normal and
irqsafe locking primitives as well as caller-locked variants of the
addition and deletion functions.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/list_lru.h |  34 +++++++++++++
 mm/list_lru.c            | 104 +++++++++++++++++++++++++++------------
 2 files changed, 107 insertions(+), 31 deletions(-)

diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index fe739d35a864..4afc02deb44d 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -83,6 +83,40 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
 			 gfp_t gfp);
 void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent);
 
+/**
+ * list_lru_lock: lock the sublist for the given node and memcg
+ * @lru: the lru pointer
+ * @nid: the node id of the sublist to lock.
+ * @memcg: the cgroup of the sublist to lock.
+ *
+ * Returns the locked list_lru_one sublist. The caller must call
+ * list_lru_unlock() when done.
+ *
+ * You must ensure that the memcg is not freed during this call (e.g., with
+ * rcu or by taking a css refcnt).
+ *
+ * Return: the locked list_lru_one, or NULL on failure
+ */
+struct list_lru_one *list_lru_lock(struct list_lru *lru, int nid,
+		struct mem_cgroup *memcg);
+
+/**
+ * list_lru_unlock: unlock a sublist locked by list_lru_lock()
+ * @l: the list_lru_one to unlock
+ */
+void list_lru_unlock(struct list_lru_one *l);
+
+struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid,
+		struct mem_cgroup *memcg, unsigned long *irq_flags);
+void list_lru_unlock_irqrestore(struct list_lru_one *l,
+		unsigned long *irq_flags);
+
+/* Caller-locked variants, see list_lru_add() etc for documentation */
+bool __list_lru_add(struct list_lru *lru, struct list_lru_one *l,
+		struct list_head *item, int nid, struct mem_cgroup *memcg);
+bool __list_lru_del(struct list_lru *lru, struct list_lru_one *l,
+		struct list_head *item, int nid);
+
 /**
  * list_lru_add: add an element to the lru list's tail
  * @lru: the lru pointer
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 4d74c2e9c2a5..779cb26cec84 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -15,17 +15,23 @@
 #include "slab.h"
 #include "internal.h"
 
-static inline void lock_list_lru(struct list_lru_one *l, bool irq)
+static inline void lock_list_lru(struct list_lru_one *l, bool irq,
+				 unsigned long *irq_flags)
 {
-	if (irq)
+	if (irq_flags)
+		spin_lock_irqsave(&l->lock, *irq_flags);
+	else if (irq)
 		spin_lock_irq(&l->lock);
 	else
 		spin_lock(&l->lock);
 }
 
-static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
+static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off,
+				   unsigned long *irq_flags)
 {
-	if (irq_off)
+	if (irq_flags)
+		spin_unlock_irqrestore(&l->lock, *irq_flags);
+	else if (irq_off)
 		spin_unlock_irq(&l->lock);
 	else
 		spin_unlock(&l->lock);
@@ -78,7 +84,7 @@ list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
 
 static inline struct list_lru_one *
 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
-		       bool irq, bool skip_empty)
+		       bool irq, unsigned long *irq_flags, bool skip_empty)
 {
 	struct list_lru_one *l;
 
@@ -86,12 +92,12 @@ lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 again:
 	l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
 	if (likely(l)) {
-		lock_list_lru(l, irq);
+		lock_list_lru(l, irq, irq_flags);
 		if (likely(READ_ONCE(l->nr_items) != LONG_MIN)) {
 			rcu_read_unlock();
 			return l;
 		}
-		unlock_list_lru(l, irq);
+		unlock_list_lru(l, irq, irq_flags);
 	}
 	/*
 	 * Caller may simply bail out if raced with reparenting or
@@ -132,37 +138,79 @@ list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
 
 static inline struct list_lru_one *
 lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
-		       bool irq, bool skip_empty)
+		       bool irq, unsigned long *irq_flags, bool skip_empty)
 {
 	struct list_lru_one *l = &lru->node[nid].lru;
 
-	lock_list_lru(l, irq);
+	lock_list_lru(l, irq, irq_flags);
 
 	return l;
 }
 #endif /* CONFIG_MEMCG */
 
-/* The caller must ensure the memcg lifetime. */
-bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
-		  struct mem_cgroup *memcg)
+struct list_lru_one *list_lru_lock(struct list_lru *lru, int nid,
+				   struct mem_cgroup *memcg)
 {
-	struct list_lru_node *nlru = &lru->node[nid];
-	struct list_lru_one *l;
+	return lock_list_lru_of_memcg(lru, nid, memcg, false, NULL, false);
+}
+
+void list_lru_unlock(struct list_lru_one *l)
+{
+	unlock_list_lru(l, false, NULL);
+}
+
+struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid,
+					   struct mem_cgroup *memcg,
+					   unsigned long *flags)
+{
+	return lock_list_lru_of_memcg(lru, nid, memcg, true, flags, false);
+}
+
+void list_lru_unlock_irqrestore(struct list_lru_one *l, unsigned long *flags)
+{
+	unlock_list_lru(l, true, flags);
+}
 
-	l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
+bool __list_lru_add(struct list_lru *lru, struct list_lru_one *l,
+		    struct list_head *item, int nid,
+		    struct mem_cgroup *memcg)
+{
 	if (list_empty(item)) {
 		list_add_tail(item, &l->list);
 		/* Set shrinker bit if the first element was added */
 		if (!l->nr_items++)
 			set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
-		unlock_list_lru(l, false);
-		atomic_long_inc(&nlru->nr_items);
+		atomic_long_inc(&lru->node[nid].nr_items);
+		return true;
+	}
+	return false;
+}
+
+bool __list_lru_del(struct list_lru *lru, struct list_lru_one *l,
+		    struct list_head *item, int nid)
+{
+	if (!list_empty(item)) {
+		list_del_init(item);
+		l->nr_items--;
+		atomic_long_dec(&lru->node[nid].nr_items);
 		return true;
 	}
-	unlock_list_lru(l, false);
 	return false;
 }
 
+/* The caller must ensure the memcg lifetime. */
+bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
+		  struct mem_cgroup *memcg)
+{
+	struct list_lru_one *l;
+	bool ret;
+
+	l = list_lru_lock(lru, nid, memcg);
+	ret = __list_lru_add(lru, l, item, nid, memcg);
+	list_lru_unlock(l);
+	return ret;
+}
+
 bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)
 {
 	bool ret;
@@ -184,19 +232,13 @@ EXPORT_SYMBOL_GPL(list_lru_add_obj);
 bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
 		  struct mem_cgroup *memcg)
 {
-	struct list_lru_node *nlru = &lru->node[nid];
 	struct list_lru_one *l;
+	bool ret;
 
-	l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
-	if (!list_empty(item)) {
-		list_del_init(item);
-		l->nr_items--;
-		unlock_list_lru(l, false);
-		atomic_long_dec(&nlru->nr_items);
-		return true;
-	}
-	unlock_list_lru(l, false);
-	return false;
+	l = list_lru_lock(lru, nid, memcg);
+	ret = __list_lru_del(lru, l, item, nid);
+	list_lru_unlock(l);
+	return ret;
 }
 
 bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
@@ -269,7 +311,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 	unsigned long isolated = 0;
 
 restart:
-	l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true);
+	l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, NULL, true);
 	if (!l)
 		return isolated;
 	list_for_each_safe(item, n, &l->list) {
@@ -310,7 +352,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
 			BUG();
 		}
 	}
-	unlock_list_lru(l, irq_off);
+	unlock_list_lru(l, irq_off, NULL);
 out:
 	return isolated;
 }
-- 
2.53.0
Re: [PATCH v2 5/7] mm: list_lru: introduce caller locking for additions and deletions
Posted by David Hildenbrand (Arm) 3 weeks ago
On 3/12/26 21:51, Johannes Weiner wrote:
> Locking is currently internal to the list_lru API. However, a caller
> might want to keep auxiliary state synchronized with the LRU state.
> 
> For example, the THP shrinker uses the lock of its custom LRU to keep
> PG_partially_mapped and vmstats consistent.
> 
> To allow the THP shrinker to switch to list_lru, provide normal and
> irqsafe locking primitives as well as caller-locked variants of the
> addition and deletion functions.
> 
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
>  include/linux/list_lru.h |  34 +++++++++++++
>  mm/list_lru.c            | 104 +++++++++++++++++++++++++++------------
>  2 files changed, 107 insertions(+), 31 deletions(-)
> 
> diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
> index fe739d35a864..4afc02deb44d 100644
> --- a/include/linux/list_lru.h
> +++ b/include/linux/list_lru.h
> @@ -83,6 +83,40 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
>  			 gfp_t gfp);
>  void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent);
>  

[...]

>  static inline struct list_lru_one *
>  lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
> -		       bool irq, bool skip_empty)
> +		       bool irq, unsigned long *irq_flags, bool skip_empty)
>  {
>  	struct list_lru_one *l = &lru->node[nid].lru;
>  
> -	lock_list_lru(l, irq);
> +	lock_list_lru(l, irq, irq_flags);
>  
>  	return l;
>  }
>  #endif /* CONFIG_MEMCG */
>  
> -/* The caller must ensure the memcg lifetime. */
> -bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> -		  struct mem_cgroup *memcg)
> +struct list_lru_one *list_lru_lock(struct list_lru *lru, int nid,
> +				   struct mem_cgroup *memcg)
>  {
> -	struct list_lru_node *nlru = &lru->node[nid];
> -	struct list_lru_one *l;
> +	return lock_list_lru_of_memcg(lru, nid, memcg, false, NULL, false);

The two "bool" parameters really are ugly. Fortunately this is only an
internal function.

The callers are still a bit hard to read; we could add /*skip=empty=*/true).

like

return lock_list_lru_of_memcg(lru, nid, memcg, /* irq= */false, NULL,
			      /* skip_empty= */false);

Like we do in other code. But I guess we should do it consistently then
(or better add some proper flags).

Anyhow, something that could be cleaned up later.

> +}
> +
> +void list_lru_unlock(struct list_lru_one *l)
> +{
> +	unlock_list_lru(l, false, NULL);
> +}
> +
> +struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid,
> +					   struct mem_cgroup *memcg,
> +					   unsigned long *flags)
> +{
> +	return lock_list_lru_of_memcg(lru, nid, memcg, true, flags, false);

And here it gets really confusing. true false false ... am I reading
binary code?

I guess the second "false" should actually be "NULL" :)

> +}
> +
> +void list_lru_unlock_irqrestore(struct list_lru_one *l, unsigned long *flags)
> +{
> +	unlock_list_lru(l, true, flags);
> +}
>  
> -	l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
> +bool __list_lru_add(struct list_lru *lru, struct list_lru_one *l,
> +		    struct list_head *item, int nid,
> +		    struct mem_cgroup *memcg)
> +{
>  	if (list_empty(item)) {
>  		list_add_tail(item, &l->list);
>  		/* Set shrinker bit if the first element was added */
>  		if (!l->nr_items++)
>  			set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
> -		unlock_list_lru(l, false);
> -		atomic_long_inc(&nlru->nr_items);
> +		atomic_long_inc(&lru->node[nid].nr_items);
> +		return true;
> +	}
> +	return false;
> +}
> +
> +bool __list_lru_del(struct list_lru *lru, struct list_lru_one *l,
> +		    struct list_head *item, int nid)
> +{
> +	if (!list_empty(item)) {
> +		list_del_init(item);
> +		l->nr_items--;
> +		atomic_long_dec(&lru->node[nid].nr_items);
>  		return true;
>  	}
> -	unlock_list_lru(l, false);
>  	return false;
>  }
>  
> +/* The caller must ensure the memcg lifetime. */
> +bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> +		  struct mem_cgroup *memcg)
> +{
> +	struct list_lru_one *l;
> +	bool ret;
> +
> +	l = list_lru_lock(lru, nid, memcg);
> +	ret = __list_lru_add(lru, l, item, nid, memcg);
> +	list_lru_unlock(l);
> +	return ret;
> +}

Nice.


Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>

-- 
Cheers,

David
Re: [PATCH v2 5/7] mm: list_lru: introduce caller locking for additions and deletions
Posted by Johannes Weiner 3 weeks ago
On Tue, Mar 17, 2026 at 11:00:59AM +0100, David Hildenbrand (Arm) wrote:
> On 3/12/26 21:51, Johannes Weiner wrote:
> > -/* The caller must ensure the memcg lifetime. */
> > -bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> > -		  struct mem_cgroup *memcg)
> > +struct list_lru_one *list_lru_lock(struct list_lru *lru, int nid,
> > +				   struct mem_cgroup *memcg)
> >  {
> > -	struct list_lru_node *nlru = &lru->node[nid];
> > -	struct list_lru_one *l;
> > +	return lock_list_lru_of_memcg(lru, nid, memcg, false, NULL, false);
> 
> The two "bool" parameters really are ugly. Fortunately this is only an
> internal function.

Yeah, I absolutely hate this too. I only didn't look further because
it's internal, but...

> The callers are still a bit hard to read; we could add /*skip=empty=*/true).
> 
> like
> 
> return lock_list_lru_of_memcg(lru, nid, memcg, /* irq= */false, NULL,
> 			      /* skip_empty= */false);
> 
> Like we do in other code. But I guess we should do it consistently then
> (or better add some proper flags).
> 
> Anyhow, something that could be cleaned up later.

This is a great idea.

I have to send a v3 for the fix in __folio_freeze_and_split_unmapped()
and the lockdep key, so I'll make this change along with it.

> > +void list_lru_unlock(struct list_lru_one *l)
> > +{
> > +	unlock_list_lru(l, false, NULL);
> > +}
> > +
> > +struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid,
> > +					   struct mem_cgroup *memcg,
> > +					   unsigned long *flags)
> > +{
> > +	return lock_list_lru_of_memcg(lru, nid, memcg, true, flags, false);
> 
> And here it gets really confusing. true false false ... am I reading
> binary code?
> 
> I guess the second "false" should actually be "NULL" :)

Good catch, I'll fix that.

> > +/* The caller must ensure the memcg lifetime. */
> > +bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> > +		  struct mem_cgroup *memcg)
> > +{
> > +	struct list_lru_one *l;
> > +	bool ret;
> > +
> > +	l = list_lru_lock(lru, nid, memcg);
> > +	ret = __list_lru_add(lru, l, item, nid, memcg);
> > +	list_lru_unlock(l);
> > +	return ret;
> > +}
> 
> Nice.
> 
> Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>

Thanks for your review!
Re: [PATCH v2 5/7] mm: list_lru: introduce caller locking for additions and deletions
Posted by Johannes Weiner 3 weeks ago
On Tue, Mar 17, 2026 at 10:03:08AM -0400, Johannes Weiner wrote:
> On Tue, Mar 17, 2026 at 11:00:59AM +0100, David Hildenbrand (Arm) wrote:
> > On 3/12/26 21:51, Johannes Weiner wrote:
> > > +void list_lru_unlock(struct list_lru_one *l)
> > > +{
> > > +	unlock_list_lru(l, false, NULL);
> > > +}
> > > +
> > > +struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid,
> > > +					   struct mem_cgroup *memcg,
> > > +					   unsigned long *flags)
> > > +{
> > > +	return lock_list_lru_of_memcg(lru, nid, memcg, true, flags, false);
> > 
> > And here it gets really confusing. true false false ... am I reading
> > binary code?
> > 
> > I guess the second "false" should actually be "NULL" :)
> 
> Good catch, I'll fix that.

That's actually "flags" haha. But it supports your point.
Re: [PATCH v2 5/7] mm: list_lru: introduce caller locking for additions and deletions
Posted by David Hildenbrand (Arm) 2 weeks, 6 days ago
On 3/17/26 15:34, Johannes Weiner wrote:
> On Tue, Mar 17, 2026 at 10:03:08AM -0400, Johannes Weiner wrote:
>> On Tue, Mar 17, 2026 at 11:00:59AM +0100, David Hildenbrand (Arm) wrote:
>>>
>>> And here it gets really confusing. true false false ... am I reading
>>> binary code?
>>>
>>> I guess the second "false" should actually be "NULL" :)
>>
>> Good catch, I'll fix that.
> 
> That's actually "flags" haha. But it supports your point.

Ohhh, haha. Well, at least nothing to fix then :)

-- 
Cheers,

David