fs/lockd/svcsubs.c | 65 +++++++++++++++++++++++++++------------------- 1 file changed, 38 insertions(+), 27 deletions(-)
nlm_traverse_files() pins the current file with f_count++ across
a mutex_unlock for nlm_inspect_file(), but nothing pins the saved
next pointer. A concurrent nlm_release_file() can kfree the next
file during the unlock window, and the iterator dereferences freed
memory on the next loop step.
Pin both current and next before the lock-drop. Advance by
swapping the pinned cursors at the end of each iteration so next
is always held alive across the unlock.
Always call nlm_file_release() after dropping the iteration pin,
regardless of whether the file matched the predicate. Use
nlm_file_inuse(), which does a live walk of the inode lock list,
rather than the cached f_locks field, so skipped files that never
ran nlm_inspect_file() are evaluated correctly.
Cc: stable@vger.kernel.org
Fixes: 01df9c5e918a ("LOCKD: Fix a deadlock in nlm_traverse_files()")
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Michael Bommarito <michael.bommarito@gmail.com>
---
fs/lockd/svcsubs.c | 65 +++++++++++++++++++++++++++-------------------
1 file changed, 38 insertions(+), 27 deletions(-)
Changes since v2:
- Use nlm_file_inuse() (live inode lock-list walk) for the cleanup
decision instead of the cached f_locks field. v2 skipped cleanup
entirely for non-matching files, leaking dead entries; the v1 fix
used cached f_locks which is stale for skipped files.
nlm_file_inuse() is correct on both paths because it walks the
actual POSIX lock list rather than relying on nlm_inspect_file()
having refreshed the cache. (Chuck Lever.)
Changes since v1:
- Fixed premature kfree of non-matching files.
Reproduced under UML + KASAN with 768 concurrent POSIX holders and
parallel /proc/fs/nfsd/unlock_filesystem writes. Stock kernel trips
KASAN slab-use-after-free in nlm_traverse_files. Patched v3 silent.
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dd0214dcb6950..0ce1e3711f003 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -284,47 +284,58 @@ nlm_file_inuse(struct nlm_file *file)
return 0;
}
-static void nlm_close_files(struct nlm_file *file)
-{
- if (file->f_file[O_RDONLY])
- nlmsvc_ops->fclose(file->f_file[O_RDONLY]);
- if (file->f_file[O_WRONLY])
- nlmsvc_ops->fclose(file->f_file[O_WRONLY]);
-}
-
/*
* Loop over all files in the file table.
*/
+static void nlm_file_release(struct nlm_file *file)
+{
+ if (!nlm_file_inuse(file))
+ nlm_delete_file(file);
+}
+
static int
nlm_traverse_files(void *data, nlm_host_match_fn_t match,
int (*is_failover_file)(void *data, struct nlm_file *file))
{
- struct hlist_node *next;
- struct nlm_file *file;
+ struct nlm_file *file, *next;
int i, ret = 0;
mutex_lock(&nlm_file_mutex);
for (i = 0; i < FILE_NRHASH; i++) {
- hlist_for_each_entry_safe(file, next, &nlm_files[i], f_list) {
- if (is_failover_file && !is_failover_file(data, file))
- continue;
+ file = hlist_entry_safe(nlm_files[i].first,
+ struct nlm_file, f_list);
+ if (file)
file->f_count++;
- mutex_unlock(&nlm_file_mutex);
-
- /* Traverse locks, blocks and shares of this file
- * and update file->f_locks count */
- if (nlm_inspect_file(data, file, match))
- ret = 1;
+ while (file) {
+ /*
+ * Pin the next neighbour before we drop the mutex
+ * for nlm_inspect_file(); a concurrent
+ * nlm_release_file() under the same mutex would
+ * otherwise be free to unlink and kfree it during
+ * the unlock window, leaving us to dereference a
+ * freed slab when we walked to next afterwards.
+ */
+ next = hlist_entry_safe(file->f_list.next,
+ struct nlm_file, f_list);
+ if (next)
+ next->f_count++;
+
+ if (!is_failover_file || is_failover_file(data, file)) {
+ mutex_unlock(&nlm_file_mutex);
+
+ /*
+ * Traverse locks, blocks and shares of this
+ * file and update file->f_locks count.
+ */
+ if (nlm_inspect_file(data, file, match))
+ ret = 1;
+
+ mutex_lock(&nlm_file_mutex);
+ }
- mutex_lock(&nlm_file_mutex);
file->f_count--;
- /* No more references to this file. Let go of it. */
- if (list_empty(&file->f_blocks) && !file->f_locks
- && !file->f_shares && !file->f_count) {
- hlist_del(&file->f_list);
- nlm_close_files(file);
- kfree(file);
- }
+ nlm_file_release(file);
+ file = next;
}
}
mutex_unlock(&nlm_file_mutex);
--
2.53.0
From: Chuck Lever <chuck.lever@oracle.com>
On Sun, 24 May 2026 07:55:27 -0400, Michael Bommarito wrote:
> nlm_traverse_files() pins the current file with f_count++ across
> a mutex_unlock for nlm_inspect_file(), but nothing pins the saved
> next pointer. A concurrent nlm_release_file() can kfree the next
> file during the unlock window, and the iterator dereferences freed
> memory on the next loop step.
>
> Pin both current and next before the lock-drop. Advance by
> swapping the pinned cursors at the end of each iteration so next
> is always held alive across the unlock.
>
> [...]
Applied to nfsd-testing, thanks!
[1/1] lockd: pin next file across nlm_inspect_file lock-drop
commit: 925b64e5fb12ad6517ff7c6729bf2fba7485f42c
--
Chuck Lever <chuck.lever@oracle.com>
© 2016 - 2026 Red Hat, Inc.