Locking is currently internal to the list_lru API. However, a caller
might want to keep auxiliary state synchronized with the LRU state.
For example, the THP shrinker uses the lock of its custom LRU to keep
PG_partially_mapped and vmstats consistent.
To allow the THP shrinker to switch to list_lru, provide normal and
irqsafe locking primitives as well as caller-locked variants of the
addition and deletion functions.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
---
include/linux/list_lru.h | 34 +++++++++++++
mm/list_lru.c | 104 +++++++++++++++++++++++++++------------
2 files changed, 107 insertions(+), 31 deletions(-)
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
index fe739d35a864..4afc02deb44d 100644
--- a/include/linux/list_lru.h
+++ b/include/linux/list_lru.h
@@ -83,6 +83,40 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
gfp_t gfp);
void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent);
+/**
+ * list_lru_lock: lock the sublist for the given node and memcg
+ * @lru: the lru pointer
+ * @nid: the node id of the sublist to lock.
+ * @memcg: the cgroup of the sublist to lock.
+ *
+ * Returns the locked list_lru_one sublist. The caller must call
+ * list_lru_unlock() when done.
+ *
+ * You must ensure that the memcg is not freed during this call (e.g., with
+ * rcu or by taking a css refcnt).
+ *
+ * Return: the locked list_lru_one, or NULL on failure
+ */
+struct list_lru_one *list_lru_lock(struct list_lru *lru, int nid,
+ struct mem_cgroup *memcg);
+
+/**
+ * list_lru_unlock: unlock a sublist locked by list_lru_lock()
+ * @l: the list_lru_one to unlock
+ */
+void list_lru_unlock(struct list_lru_one *l);
+
+struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid,
+ struct mem_cgroup *memcg, unsigned long *irq_flags);
+void list_lru_unlock_irqrestore(struct list_lru_one *l,
+ unsigned long *irq_flags);
+
+/* Caller-locked variants, see list_lru_add() etc for documentation */
+bool __list_lru_add(struct list_lru *lru, struct list_lru_one *l,
+ struct list_head *item, int nid, struct mem_cgroup *memcg);
+bool __list_lru_del(struct list_lru *lru, struct list_lru_one *l,
+ struct list_head *item, int nid);
+
/**
* list_lru_add: add an element to the lru list's tail
* @lru: the lru pointer
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 4d74c2e9c2a5..779cb26cec84 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -15,17 +15,23 @@
#include "slab.h"
#include "internal.h"
-static inline void lock_list_lru(struct list_lru_one *l, bool irq)
+static inline void lock_list_lru(struct list_lru_one *l, bool irq,
+ unsigned long *irq_flags)
{
- if (irq)
+ if (irq_flags)
+ spin_lock_irqsave(&l->lock, *irq_flags);
+ else if (irq)
spin_lock_irq(&l->lock);
else
spin_lock(&l->lock);
}
-static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off)
+static inline void unlock_list_lru(struct list_lru_one *l, bool irq_off,
+ unsigned long *irq_flags)
{
- if (irq_off)
+ if (irq_flags)
+ spin_unlock_irqrestore(&l->lock, *irq_flags);
+ else if (irq_off)
spin_unlock_irq(&l->lock);
else
spin_unlock(&l->lock);
@@ -78,7 +84,7 @@ list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
static inline struct list_lru_one *
lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
- bool irq, bool skip_empty)
+ bool irq, unsigned long *irq_flags, bool skip_empty)
{
struct list_lru_one *l;
@@ -86,12 +92,12 @@ lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
again:
l = list_lru_from_memcg_idx(lru, nid, memcg_kmem_id(memcg));
if (likely(l)) {
- lock_list_lru(l, irq);
+ lock_list_lru(l, irq, irq_flags);
if (likely(READ_ONCE(l->nr_items) != LONG_MIN)) {
rcu_read_unlock();
return l;
}
- unlock_list_lru(l, irq);
+ unlock_list_lru(l, irq, irq_flags);
}
/*
* Caller may simply bail out if raced with reparenting or
@@ -132,37 +138,79 @@ list_lru_from_memcg_idx(struct list_lru *lru, int nid, int idx)
static inline struct list_lru_one *
lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
- bool irq, bool skip_empty)
+ bool irq, unsigned long *irq_flags, bool skip_empty)
{
struct list_lru_one *l = &lru->node[nid].lru;
- lock_list_lru(l, irq);
+ lock_list_lru(l, irq, irq_flags);
return l;
}
#endif /* CONFIG_MEMCG */
-/* The caller must ensure the memcg lifetime. */
-bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
- struct mem_cgroup *memcg)
+struct list_lru_one *list_lru_lock(struct list_lru *lru, int nid,
+ struct mem_cgroup *memcg)
{
- struct list_lru_node *nlru = &lru->node[nid];
- struct list_lru_one *l;
+ return lock_list_lru_of_memcg(lru, nid, memcg, false, NULL, false);
+}
+
+void list_lru_unlock(struct list_lru_one *l)
+{
+ unlock_list_lru(l, false, NULL);
+}
+
+struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid,
+ struct mem_cgroup *memcg,
+ unsigned long *flags)
+{
+ return lock_list_lru_of_memcg(lru, nid, memcg, true, flags, false);
+}
+
+void list_lru_unlock_irqrestore(struct list_lru_one *l, unsigned long *flags)
+{
+ unlock_list_lru(l, true, flags);
+}
- l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
+bool __list_lru_add(struct list_lru *lru, struct list_lru_one *l,
+ struct list_head *item, int nid,
+ struct mem_cgroup *memcg)
+{
if (list_empty(item)) {
list_add_tail(item, &l->list);
/* Set shrinker bit if the first element was added */
if (!l->nr_items++)
set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
- unlock_list_lru(l, false);
- atomic_long_inc(&nlru->nr_items);
+ atomic_long_inc(&lru->node[nid].nr_items);
+ return true;
+ }
+ return false;
+}
+
+bool __list_lru_del(struct list_lru *lru, struct list_lru_one *l,
+ struct list_head *item, int nid)
+{
+ if (!list_empty(item)) {
+ list_del_init(item);
+ l->nr_items--;
+ atomic_long_dec(&lru->node[nid].nr_items);
return true;
}
- unlock_list_lru(l, false);
return false;
}
+/* The caller must ensure the memcg lifetime. */
+bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
+ struct mem_cgroup *memcg)
+{
+ struct list_lru_one *l;
+ bool ret;
+
+ l = list_lru_lock(lru, nid, memcg);
+ ret = __list_lru_add(lru, l, item, nid, memcg);
+ list_lru_unlock(l);
+ return ret;
+}
+
bool list_lru_add_obj(struct list_lru *lru, struct list_head *item)
{
bool ret;
@@ -184,19 +232,13 @@ EXPORT_SYMBOL_GPL(list_lru_add_obj);
bool list_lru_del(struct list_lru *lru, struct list_head *item, int nid,
struct mem_cgroup *memcg)
{
- struct list_lru_node *nlru = &lru->node[nid];
struct list_lru_one *l;
+ bool ret;
- l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
- if (!list_empty(item)) {
- list_del_init(item);
- l->nr_items--;
- unlock_list_lru(l, false);
- atomic_long_dec(&nlru->nr_items);
- return true;
- }
- unlock_list_lru(l, false);
- return false;
+ l = list_lru_lock(lru, nid, memcg);
+ ret = __list_lru_del(lru, l, item, nid);
+ list_lru_unlock(l);
+ return ret;
}
bool list_lru_del_obj(struct list_lru *lru, struct list_head *item)
@@ -269,7 +311,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
unsigned long isolated = 0;
restart:
- l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, true);
+ l = lock_list_lru_of_memcg(lru, nid, memcg, irq_off, NULL, true);
if (!l)
return isolated;
list_for_each_safe(item, n, &l->list) {
@@ -310,7 +352,7 @@ __list_lru_walk_one(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
BUG();
}
}
- unlock_list_lru(l, irq_off);
+ unlock_list_lru(l, irq_off, NULL);
out:
return isolated;
}
--
2.53.0
On 3/12/26 21:51, Johannes Weiner wrote:
> Locking is currently internal to the list_lru API. However, a caller
> might want to keep auxiliary state synchronized with the LRU state.
>
> For example, the THP shrinker uses the lock of its custom LRU to keep
> PG_partially_mapped and vmstats consistent.
>
> To allow the THP shrinker to switch to list_lru, provide normal and
> irqsafe locking primitives as well as caller-locked variants of the
> addition and deletion functions.
>
> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
> ---
> include/linux/list_lru.h | 34 +++++++++++++
> mm/list_lru.c | 104 +++++++++++++++++++++++++++------------
> 2 files changed, 107 insertions(+), 31 deletions(-)
>
> diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
> index fe739d35a864..4afc02deb44d 100644
> --- a/include/linux/list_lru.h
> +++ b/include/linux/list_lru.h
> @@ -83,6 +83,40 @@ int memcg_list_lru_alloc(struct mem_cgroup *memcg, struct list_lru *lru,
> gfp_t gfp);
> void memcg_reparent_list_lrus(struct mem_cgroup *memcg, struct mem_cgroup *parent);
>
[...]
> static inline struct list_lru_one *
> lock_list_lru_of_memcg(struct list_lru *lru, int nid, struct mem_cgroup *memcg,
> - bool irq, bool skip_empty)
> + bool irq, unsigned long *irq_flags, bool skip_empty)
> {
> struct list_lru_one *l = &lru->node[nid].lru;
>
> - lock_list_lru(l, irq);
> + lock_list_lru(l, irq, irq_flags);
>
> return l;
> }
> #endif /* CONFIG_MEMCG */
>
> -/* The caller must ensure the memcg lifetime. */
> -bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> - struct mem_cgroup *memcg)
> +struct list_lru_one *list_lru_lock(struct list_lru *lru, int nid,
> + struct mem_cgroup *memcg)
> {
> - struct list_lru_node *nlru = &lru->node[nid];
> - struct list_lru_one *l;
> + return lock_list_lru_of_memcg(lru, nid, memcg, false, NULL, false);
The two "bool" parameters really are ugly. Fortunately this is only an
internal function.
The callers are still a bit hard to read; we could add /*skip=empty=*/true).
like
return lock_list_lru_of_memcg(lru, nid, memcg, /* irq= */false, NULL,
/* skip_empty= */false);
Like we do in other code. But I guess we should do it consistently then
(or better add some proper flags).
Anyhow, something that could be cleaned up later.
> +}
> +
> +void list_lru_unlock(struct list_lru_one *l)
> +{
> + unlock_list_lru(l, false, NULL);
> +}
> +
> +struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid,
> + struct mem_cgroup *memcg,
> + unsigned long *flags)
> +{
> + return lock_list_lru_of_memcg(lru, nid, memcg, true, flags, false);
And here it gets really confusing. true false false ... am I reading
binary code?
I guess the second "false" should actually be "NULL" :)
> +}
> +
> +void list_lru_unlock_irqrestore(struct list_lru_one *l, unsigned long *flags)
> +{
> + unlock_list_lru(l, true, flags);
> +}
>
> - l = lock_list_lru_of_memcg(lru, nid, memcg, false, false);
> +bool __list_lru_add(struct list_lru *lru, struct list_lru_one *l,
> + struct list_head *item, int nid,
> + struct mem_cgroup *memcg)
> +{
> if (list_empty(item)) {
> list_add_tail(item, &l->list);
> /* Set shrinker bit if the first element was added */
> if (!l->nr_items++)
> set_shrinker_bit(memcg, nid, lru_shrinker_id(lru));
> - unlock_list_lru(l, false);
> - atomic_long_inc(&nlru->nr_items);
> + atomic_long_inc(&lru->node[nid].nr_items);
> + return true;
> + }
> + return false;
> +}
> +
> +bool __list_lru_del(struct list_lru *lru, struct list_lru_one *l,
> + struct list_head *item, int nid)
> +{
> + if (!list_empty(item)) {
> + list_del_init(item);
> + l->nr_items--;
> + atomic_long_dec(&lru->node[nid].nr_items);
> return true;
> }
> - unlock_list_lru(l, false);
> return false;
> }
>
> +/* The caller must ensure the memcg lifetime. */
> +bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> + struct mem_cgroup *memcg)
> +{
> + struct list_lru_one *l;
> + bool ret;
> +
> + l = list_lru_lock(lru, nid, memcg);
> + ret = __list_lru_add(lru, l, item, nid, memcg);
> + list_lru_unlock(l);
> + return ret;
> +}
Nice.
Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
--
Cheers,
David
On Tue, Mar 17, 2026 at 11:00:59AM +0100, David Hildenbrand (Arm) wrote:
> On 3/12/26 21:51, Johannes Weiner wrote:
> > -/* The caller must ensure the memcg lifetime. */
> > -bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> > - struct mem_cgroup *memcg)
> > +struct list_lru_one *list_lru_lock(struct list_lru *lru, int nid,
> > + struct mem_cgroup *memcg)
> > {
> > - struct list_lru_node *nlru = &lru->node[nid];
> > - struct list_lru_one *l;
> > + return lock_list_lru_of_memcg(lru, nid, memcg, false, NULL, false);
>
> The two "bool" parameters really are ugly. Fortunately this is only an
> internal function.
Yeah, I absolutely hate this too. I only didn't look further because
it's internal, but...
> The callers are still a bit hard to read; we could add /*skip=empty=*/true).
>
> like
>
> return lock_list_lru_of_memcg(lru, nid, memcg, /* irq= */false, NULL,
> /* skip_empty= */false);
>
> Like we do in other code. But I guess we should do it consistently then
> (or better add some proper flags).
>
> Anyhow, something that could be cleaned up later.
This is a great idea.
I have to send a v3 for the fix in __folio_freeze_and_split_unmapped()
and the lockdep key, so I'll make this change along with it.
> > +void list_lru_unlock(struct list_lru_one *l)
> > +{
> > + unlock_list_lru(l, false, NULL);
> > +}
> > +
> > +struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid,
> > + struct mem_cgroup *memcg,
> > + unsigned long *flags)
> > +{
> > + return lock_list_lru_of_memcg(lru, nid, memcg, true, flags, false);
>
> And here it gets really confusing. true false false ... am I reading
> binary code?
>
> I guess the second "false" should actually be "NULL" :)
Good catch, I'll fix that.
> > +/* The caller must ensure the memcg lifetime. */
> > +bool list_lru_add(struct list_lru *lru, struct list_head *item, int nid,
> > + struct mem_cgroup *memcg)
> > +{
> > + struct list_lru_one *l;
> > + bool ret;
> > +
> > + l = list_lru_lock(lru, nid, memcg);
> > + ret = __list_lru_add(lru, l, item, nid, memcg);
> > + list_lru_unlock(l);
> > + return ret;
> > +}
>
> Nice.
>
> Reviewed-by: David Hildenbrand (Arm) <david@kernel.org>
Thanks for your review!
On Tue, Mar 17, 2026 at 10:03:08AM -0400, Johannes Weiner wrote:
> On Tue, Mar 17, 2026 at 11:00:59AM +0100, David Hildenbrand (Arm) wrote:
> > On 3/12/26 21:51, Johannes Weiner wrote:
> > > +void list_lru_unlock(struct list_lru_one *l)
> > > +{
> > > + unlock_list_lru(l, false, NULL);
> > > +}
> > > +
> > > +struct list_lru_one *list_lru_lock_irqsave(struct list_lru *lru, int nid,
> > > + struct mem_cgroup *memcg,
> > > + unsigned long *flags)
> > > +{
> > > + return lock_list_lru_of_memcg(lru, nid, memcg, true, flags, false);
> >
> > And here it gets really confusing. true false false ... am I reading
> > binary code?
> >
> > I guess the second "false" should actually be "NULL" :)
>
> Good catch, I'll fix that.
That's actually "flags" haha. But it supports your point.
On 3/17/26 15:34, Johannes Weiner wrote: > On Tue, Mar 17, 2026 at 10:03:08AM -0400, Johannes Weiner wrote: >> On Tue, Mar 17, 2026 at 11:00:59AM +0100, David Hildenbrand (Arm) wrote: >>> >>> And here it gets really confusing. true false false ... am I reading >>> binary code? >>> >>> I guess the second "false" should actually be "NULL" :) >> >> Good catch, I'll fix that. > > That's actually "flags" haha. But it supports your point. Ohhh, haha. Well, at least nothing to fix then :) -- Cheers, David
© 2016 - 2026 Red Hat, Inc.