fs/lockd/svcsubs.c | 64 +++++++++++++++++++++++++++++++--------------- 1 file changed, 44 insertions(+), 20 deletions(-)
nlm_traverse_files() pins the current file with f_count++ across
a mutex_unlock for nlm_inspect_file(), but nothing pins the saved
next pointer. A concurrent nlm_release_file() can kfree the next
file during the unlock window, and the iterator dereferences freed
memory on the next loop step.
Pin both current and next before the lock-drop. Advance by
swapping the pinned cursors at the end of each iteration so next
is always held alive across the unlock.
Only call nlm_file_release() for files that matched the predicate
and were inspected. Skipped files just get f_count-- to undo the
iteration pin; their f_locks is stale and must not drive cleanup.
Cc: stable@vger.kernel.org
Fixes: 01df9c5e918a ("LOCKD: Fix a deadlock in nlm_traverse_files()")
Assisted-by: Claude:claude-opus-4-7
Signed-off-by: Michael Bommarito <michael.bommarito@gmail.com>
---
fs/lockd/svcsubs.c | 64 +++++++++++++++++++++++++++++++---------------
1 file changed, 44 insertions(+), 20 deletions(-)
Changes since v1:
- Fixed premature kfree of non-matching files: nlm_file_release()
is now called only for files that matched the predicate and were
inspected. Non-matching files just get f_count-- to undo the
iteration pin. (Spotted by sashiko.dev automated review.)
Reproduced under UML + KASAN with 768 concurrent POSIX holders and
parallel /proc/fs/nfsd/unlock_filesystem writes.
Stock kernel:
BUG: KASAN: slab-use-after-free in nlm_traverse_files+0x71d/0x9d0
Allocated by: nlm_lookup_file via nlm4svc_proc_lock
Freed by: another nlm_traverse_files instance
Patched v2 UML kernel ran the same harness silently.
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dd0214dcb6950..0b38125cf86ab 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -295,36 +295,60 @@ static void nlm_close_files(struct nlm_file *file)
/*
* Loop over all files in the file table.
*/
+static void nlm_file_release(struct nlm_file *file)
+{
+ if (list_empty(&file->f_blocks) && !file->f_locks
+ && !file->f_shares && !file->f_count) {
+ hlist_del(&file->f_list);
+ nlm_close_files(file);
+ kfree(file);
+ }
+}
+
static int
nlm_traverse_files(void *data, nlm_host_match_fn_t match,
int (*is_failover_file)(void *data, struct nlm_file *file))
{
- struct hlist_node *next;
- struct nlm_file *file;
+ struct nlm_file *file, *next;
int i, ret = 0;
mutex_lock(&nlm_file_mutex);
for (i = 0; i < FILE_NRHASH; i++) {
- hlist_for_each_entry_safe(file, next, &nlm_files[i], f_list) {
- if (is_failover_file && !is_failover_file(data, file))
- continue;
+ file = hlist_entry_safe(nlm_files[i].first,
+ struct nlm_file, f_list);
+ if (file)
file->f_count++;
- mutex_unlock(&nlm_file_mutex);
-
- /* Traverse locks, blocks and shares of this file
- * and update file->f_locks count */
- if (nlm_inspect_file(data, file, match))
- ret = 1;
-
- mutex_lock(&nlm_file_mutex);
- file->f_count--;
- /* No more references to this file. Let go of it. */
- if (list_empty(&file->f_blocks) && !file->f_locks
- && !file->f_shares && !file->f_count) {
- hlist_del(&file->f_list);
- nlm_close_files(file);
- kfree(file);
+ while (file) {
+ /*
+ * Pin the next neighbour before we drop the mutex
+ * for nlm_inspect_file(); a concurrent
+ * nlm_release_file() under the same mutex would
+ * otherwise be free to unlink and kfree it during
+ * the unlock window, leaving us to dereference a
+ * freed slab when we walked to next afterwards.
+ */
+ next = hlist_entry_safe(file->f_list.next,
+ struct nlm_file, f_list);
+ if (next)
+ next->f_count++;
+
+ if (!is_failover_file || is_failover_file(data, file)) {
+ mutex_unlock(&nlm_file_mutex);
+
+ /*
+ * Traverse locks, blocks and shares of this
+ * file and update file->f_locks count.
+ */
+ if (nlm_inspect_file(data, file, match))
+ ret = 1;
+
+ mutex_lock(&nlm_file_mutex);
+ file->f_count--;
+ nlm_file_release(file);
+ } else {
+ file->f_count--;
}
+ file = next;
}
}
mutex_unlock(&nlm_file_mutex);
--
2.53.0
On Sat, May 23, 2026, at 8:30 AM, Michael Bommarito wrote:
> nlm_traverse_files() pins the current file with f_count++ across
> a mutex_unlock for nlm_inspect_file(), but nothing pins the saved
> next pointer. A concurrent nlm_release_file() can kfree the next
> file during the unlock window, and the iterator dereferences freed
> memory on the next loop step.
>
> Pin both current and next before the lock-drop. Advance by
> swapping the pinned cursors at the end of each iteration so next
> is always held alive across the unlock.
>
> Only call nlm_file_release() for files that matched the predicate
> and were inspected. Skipped files just get f_count-- to undo the
> iteration pin; their f_locks is stale and must not drive cleanup.
>
> Cc: stable@vger.kernel.org
> Fixes: 01df9c5e918a ("LOCKD: Fix a deadlock in nlm_traverse_files()")
> Assisted-by: Claude:claude-opus-4-7
> Signed-off-by: Michael Bommarito <michael.bommarito@gmail.com>
> ---
> fs/lockd/svcsubs.c | 64 +++++++++++++++++++++++++++++++---------------
> 1 file changed, 44 insertions(+), 20 deletions(-)
>
>
> Changes since v1:
> - Fixed premature kfree of non-matching files: nlm_file_release()
> is now called only for files that matched the predicate and were
> inspected. Non-matching files just get f_count-- to undo the
> iteration pin. (Spotted by sashiko.dev automated review.)
>
> Reproduced under UML + KASAN with 768 concurrent POSIX holders and
> parallel /proc/fs/nfsd/unlock_filesystem writes.
>
> Stock kernel:
>
> BUG: KASAN: slab-use-after-free in nlm_traverse_files+0x71d/0x9d0
>
> Allocated by: nlm_lookup_file via nlm4svc_proc_lock
> Freed by: another nlm_traverse_files instance
>
> Patched v2 UML kernel ran the same harness silently.
>
> diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
> index dd0214dcb6950..0b38125cf86ab 100644
> --- a/fs/lockd/svcsubs.c
> +++ b/fs/lockd/svcsubs.c
> @@ -295,36 +295,60 @@ static void nlm_close_files(struct nlm_file *file)
> /*
> * Loop over all files in the file table.
> */
> +static void nlm_file_release(struct nlm_file *file)
> +{
> + if (list_empty(&file->f_blocks) && !file->f_locks
> + && !file->f_shares && !file->f_count) {
> + hlist_del(&file->f_list);
> + nlm_close_files(file);
> + kfree(file);
> + }
> +}
> +
> static int
> nlm_traverse_files(void *data, nlm_host_match_fn_t match,
> int (*is_failover_file)(void *data, struct nlm_file *file))
> {
> - struct hlist_node *next;
> - struct nlm_file *file;
> + struct nlm_file *file, *next;
> int i, ret = 0;
>
> mutex_lock(&nlm_file_mutex);
> for (i = 0; i < FILE_NRHASH; i++) {
> - hlist_for_each_entry_safe(file, next, &nlm_files[i], f_list) {
> - if (is_failover_file && !is_failover_file(data, file))
> - continue;
> + file = hlist_entry_safe(nlm_files[i].first,
> + struct nlm_file, f_list);
> + if (file)
> file->f_count++;
> - mutex_unlock(&nlm_file_mutex);
> -
> - /* Traverse locks, blocks and shares of this file
> - * and update file->f_locks count */
> - if (nlm_inspect_file(data, file, match))
> - ret = 1;
> -
> - mutex_lock(&nlm_file_mutex);
> - file->f_count--;
> - /* No more references to this file. Let go of it. */
> - if (list_empty(&file->f_blocks) && !file->f_locks
> - && !file->f_shares && !file->f_count) {
> - hlist_del(&file->f_list);
> - nlm_close_files(file);
> - kfree(file);
> + while (file) {
> + /*
> + * Pin the next neighbour before we drop the mutex
> + * for nlm_inspect_file(); a concurrent
> + * nlm_release_file() under the same mutex would
> + * otherwise be free to unlink and kfree it during
> + * the unlock window, leaving us to dereference a
> + * freed slab when we walked to next afterwards.
> + */
> + next = hlist_entry_safe(file->f_list.next,
> + struct nlm_file, f_list);
> + if (next)
> + next->f_count++;
> +
> + if (!is_failover_file || is_failover_file(data, file)) {
> + mutex_unlock(&nlm_file_mutex);
> +
> + /*
> + * Traverse locks, blocks and shares of this
> + * file and update file->f_locks count.
> + */
> + if (nlm_inspect_file(data, file, match))
> + ret = 1;
> +
> + mutex_lock(&nlm_file_mutex);
> + file->f_count--;
> + nlm_file_release(file);
> + } else {
> + file->f_count--;
> }
> + file = next;
> }
> }
> mutex_unlock(&nlm_file_mutex);
> --
> 2.53.0
Codex (gpt-5.5 xhigh) review reports:
> When nlmsvc_unlock_all_by_sb() skips a file from another superblock,
> this branch can be handling an entry that was already pinned as the
> previous file's saved next. If that entry's last real nlm_release_file()
> ran while the traversal pin was held, it saw f_count > 0 and skipped
> nlm_file_inuse()/deletion; this decrement then takes f_count to zero
> without any cleanup, leaving the nlm_file and its open f_file
> references hashed indefinitely.
It appears that sashiko identified the same issue:
https://sashiko.dev/#/patchset/20260523123053.3480369-1-michael.bommarito@gmail.com?part=1
--
Chuck Lever
© 2016 - 2026 Red Hat, Inc.