fs/nfsd/filecache.c | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-)
The shrinker, GC worker, and fsnotify/lease callbacks can unhash an
nfsd_file from the rhashtable and then call
nfsd_file_dispose_list_delayed() to move it to the per-net dispose list.
If nfsd_file_cache_shutdown_net() runs concurrently, its rhashtable walk
misses the already-unhashed file, and its drain of the per-net dispose
list can run before the file has been queued. The file then sits on
the per-net list with no thread to drain it, leaking both the file and
its associated state.
The GC worker and shrinker already hold nfsd_gc_lock while walking the
LRU, but in the original code they release it before calling
nfsd_file_dispose_list_delayed(). The fsnotify/lease path
(nfsd_file_close_inode) has no synchronisation at all.
Fix this by:
1. Widening nfsd_gc_lock in both nfsd_file_gc() and nfsd_file_lru_scan()
to cover the nfsd_file_dispose_list_delayed() call.
2. Wrapping nfsd_file_close_inode() in nfsd_gc_lock so that all three
callers of nfsd_file_dispose_list_delayed() hold the lock.
3. Adding a spin_lock/unlock(nfsd_gc_lock) barrier in
nfsd_file_cache_shutdown_net() after the purge, so that any
in-progress disposal has fully completed before the per-net list
is drained.
All operations inside the lock are non-sleeping (rhashtable lookups,
atomic bit/refcount ops, list moves, svc_wake_up), so the spinlock is
appropriate.
Fixes: 43fd953fa7e2 ("nfsd: simplify the delayed disposal list code")
Signed-off-by: Jeff Layton <jlayton@kernel.org>
Assisted-by: Claude:claude-opus-4-8
---
This is the remaining patch from the 9 patch series I posted earlier
this week. I'm only sending v4 to fix up a minor contextual diff vs. a
patch in Chuck's nfsd-testing branch. This one should merge cleanly.
---
Changes in v4:
- Fix minor contextual conflict in a comment
- Link to v3: https://lore.kernel.org/r/20260604-nfsd-testing-v3-1-c9c58cc45c71@kernel.org
Changes in v3:
- Drop already-merged patches from series
- Protect against race vs. pernet teardown by using nfsd_gc_lock instead
of taking net references
- Link to v2: https://lore.kernel.org/r/20260602-nfsd-testing-v2-0-e4ea62e3cd5c@kernel.org
Changes in v2:
- rework filecache patch to only take net ref at disposal time
- fix ordering of operations in nfsd4_release_compoundargs()
- add Al's patch to simplify nfsd_cross_mnt() cleanup
- Link to v1: https://lore.kernel.org/r/20260601-nfsd-testing-v1-0-d0f61e536df8@kernel.org
---
fs/nfsd/filecache.c | 35 ++++++++++++++++++++++++++---------
1 file changed, 26 insertions(+), 9 deletions(-)
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index fccef285f47b..0f7d8c85aff3 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -55,6 +55,14 @@
/* We only care about NFSD_MAY_READ/WRITE for this cache */
#define NFSD_FILE_MAY_MASK (NFSD_MAY_READ|NFSD_MAY_WRITE|NFSD_MAY_LOCALIO)
+/* If the shrinker runs between calls to list_lru_walk_node() in
+ * nfsd_file_gc(), the "remaining" count will be wrong. This could
+ * result in premature freeing of some files. This may not matter much
+ * but is easy to fix with this spinlock which temporarily disables
+ * the shrinker.
+ */
+static DEFINE_SPINLOCK(nfsd_gc_lock);
+
static DEFINE_PER_CPU(unsigned long, nfsd_file_cache_hits);
static DEFINE_PER_CPU(unsigned long, nfsd_file_acquisitions);
static DEFINE_PER_CPU(unsigned long, nfsd_file_allocations);
@@ -426,10 +434,16 @@ nfsd_file_dispose_list(struct list_head *dispose)
* Transfers each file to the dispose list in its nfsd_net and wakes an nfsd
* thread to do the actual close. This keeps the cost of fput() in the nfsd
* threads rather than in the shrinker or GC worker.
+ *
+ * All callers must hold nfsd_gc_lock, so that nfsd_file_cache_shutdown_net()
+ * can synchronise against them before draining the per-net dispose list.
+ * This guarantees nf_net is still live when we call net_generic().
*/
static void
nfsd_file_dispose_list_delayed(struct list_head *dispose)
{
+ lockdep_assert_held(&nfsd_gc_lock);
+
while (!list_empty(dispose)) {
struct nfsd_file *nf = list_first_entry(dispose,
struct nfsd_file, nf_gc);
@@ -560,13 +574,6 @@ nfsd_file_gc_cb(struct list_head *item, struct list_lru_one *lru,
return nfsd_file_lru_cb(item, lru, arg);
}
-/* If the shrinker runs between calls to list_lru_walk_node() in
- * nfsd_file_gc(), the "remaining" count will be wrong. This could
- * result in premature freeing of some files. This may not matter much
- * but is easy to fix with this spinlock which temporarily disables
- * the shrinker.
- */
-static DEFINE_SPINLOCK(nfsd_gc_lock);
static void
nfsd_file_gc(void)
{
@@ -589,9 +596,9 @@ nfsd_file_gc(void)
remaining = 0;
}
}
+ nfsd_file_dispose_list_delayed(&dispose);
spin_unlock(&nfsd_gc_lock);
trace_nfsd_file_gc_removed(ret, list_lru_count(&nfsd_file_lru));
- nfsd_file_dispose_list_delayed(&dispose);
}
static void
@@ -619,9 +626,9 @@ nfsd_file_lru_scan(struct shrinker *s, struct shrink_control *sc)
ret = list_lru_shrink_walk(&nfsd_file_lru, sc,
nfsd_file_lru_cb, &dispose);
+ nfsd_file_dispose_list_delayed(&dispose);
spin_unlock(&nfsd_gc_lock);
trace_nfsd_file_shrinker_removed(ret, list_lru_count(&nfsd_file_lru));
- nfsd_file_dispose_list_delayed(&dispose);
return ret;
}
@@ -707,8 +714,10 @@ nfsd_file_close_inode(struct inode *inode)
{
LIST_HEAD(dispose);
+ spin_lock(&nfsd_gc_lock);
nfsd_file_queue_for_close(inode, &dispose);
nfsd_file_dispose_list_delayed(&dispose);
+ spin_unlock(&nfsd_gc_lock);
}
/**
@@ -1012,6 +1021,14 @@ nfsd_file_cache_shutdown_net(struct net *net)
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
nfsd_file_cache_purge(net);
+ /*
+ * Ensure any in-progress shrinker, GC, or fsnotify/lease callback
+ * (all of which hold nfsd_gc_lock while calling
+ * nfsd_file_dispose_list_delayed()) has fully completed before
+ * draining the per-net dispose list.
+ */
+ spin_lock(&nfsd_gc_lock);
+ spin_unlock(&nfsd_gc_lock);
nfsd_file_dispose_list(&nn->fcache_dispose_list);
}
---
base-commit: fb997598bc8c885e9d17b19b809009bd34f39779
change-id: 20260601-nfsd-testing-e3509d5e035e
Best regards,
--
Jeff Layton <jlayton@kernel.org>
From: Chuck Lever <chuck.lever@oracle.com>
On Thu, 04 Jun 2026 10:31:09 -0400, Jeff Layton wrote:
> The shrinker, GC worker, and fsnotify/lease callbacks can unhash an
> nfsd_file from the rhashtable and then call
> nfsd_file_dispose_list_delayed() to move it to the per-net dispose list.
> If nfsd_file_cache_shutdown_net() runs concurrently, its rhashtable walk
> misses the already-unhashed file, and its drain of the per-net dispose
> list can run before the file has been queued. The file then sits on
> the per-net list with no thread to drain it, leaking both the file and
> its associated state.
>
> [...]
Applied to nfsd-testing, thanks!
[1/1] nfsd: close shrinker/GC/fsnotify vs per-net shutdown race in filecache
commit: 2f8b2f5b13f99fdd76a434ab284e09dbc7afb212
--
Chuck Lever <chuck.lever@oracle.com>
© 2016 - 2026 Red Hat, Inc.