[PATCH] fs: move mntput_no_expire() slowpath into a dedicated routine

Mateusz Guzik posted 1 patch 2 months, 3 weeks ago
fs/namespace.c | 38 ++++++++++++++++++++++----------------
1 file changed, 22 insertions(+), 16 deletions(-)
[PATCH] fs: move mntput_no_expire() slowpath into a dedicated routine
Posted by Mateusz Guzik 2 months, 3 weeks ago
In the stock variant the compiler spills several registers on the stack
and employs stack smashing protection, adding even more code + a branch
on exit..

The actual fast path is small enough that the compiler inlines it for
all callers -- the symbol is no longer emitted.

Forcing noinline on it just for code-measurement purposes shows the fast
path dropping from 111 to 39 bytes.

Signed-off-by: Mateusz Guzik <mjguzik@gmail.com>
---

fast path prior:
    call   ffffffff81374630 <__fentry__>
    push   %r15
    push   %r14
    push   %r13
    push   %r12
    push   %rbp
    push   %rbx
    sub    $0x18,%rsp
    mov    %gs:0x2deef5d(%rip),%rbx        # ffffffff8454f008 <__stack_chk_guard>

    mov    %rbx,0x10(%rsp)
    mov    %rdi,%rbx
    mov    %rsp,(%rsp)
    mov    %rsp,0x8(%rsp)
    call   ffffffff814615f0 <__rcu_read_lock>
    mov    0xe8(%rbx),%rax
    test   %rax,%rax
    je     ffffffff817600ff <mntput_no_expire+0x6f>
    mov    0x58(%rbx),%rax
    decl   %gs:(%rax)
    call   ffffffff81466810 <__rcu_read_unlock>
    mov    0x10(%rsp),%rax
    sub    %gs:0x2deef22(%rip),%rax        # ffffffff8454f008 <__stack_chk_guard>

    jne    ffffffff8176030b <mntput_no_expire+0x27b>
    add    $0x18,%rsp
    pop    %rbx
    pop    %rbp
    pop    %r12
    pop    %r13
    pop    %r14
    pop    %r15
    jmp    ffffffff823091f0 <__pi___x86_return_thunk>

after (when forced to be out-of-line):
    call   ffffffff81374630 <__fentry__>
    push   %rbx
    mov    %rdi,%rbx
    call   ffffffff814615f0 <__rcu_read_lock>
    mov    0xe8(%rbx),%rax
    test   %rax,%rax
    je     ffffffff81760347 <mntput_no_expire+0x27>
    mov    0x58(%rbx),%rax
    decl   %gs:(%rax)
    pop    %rbx
    jmp    ffffffff81466810 <__rcu_read_unlock>

 fs/namespace.c | 38 ++++++++++++++++++++++----------------
 1 file changed, 22 insertions(+), 16 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index e8f1fe4bca06..6af6b082043c 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1346,26 +1346,12 @@ static void delayed_mntput(struct work_struct *unused)
 }
 static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
 
-static void mntput_no_expire(struct mount *mnt)
+static void noinline mntput_no_expire_slowpath(struct mount *mnt)
 {
 	LIST_HEAD(list);
 	int count;
 
-	rcu_read_lock();
-	if (likely(READ_ONCE(mnt->mnt_ns))) {
-		/*
-		 * Since we don't do lock_mount_hash() here,
-		 * ->mnt_ns can change under us.  However, if it's
-		 * non-NULL, then there's a reference that won't
-		 * be dropped until after an RCU delay done after
-		 * turning ->mnt_ns NULL.  So if we observe it
-		 * non-NULL under rcu_read_lock(), the reference
-		 * we are dropping is not the final one.
-		 */
-		mnt_add_count(mnt, -1);
-		rcu_read_unlock();
-		return;
-	}
+	VFS_BUG_ON(mnt->mnt_ns);
 	lock_mount_hash();
 	/*
 	 * make sure that if __legitimize_mnt() has not seen us grab
@@ -1416,6 +1402,26 @@ static void mntput_no_expire(struct mount *mnt)
 	cleanup_mnt(mnt);
 }
 
+static void mntput_no_expire(struct mount *mnt)
+{
+	rcu_read_lock();
+	if (likely(READ_ONCE(mnt->mnt_ns))) {
+		/*
+		 * Since we don't do lock_mount_hash() here,
+		 * ->mnt_ns can change under us.  However, if it's
+		 * non-NULL, then there's a reference that won't
+		 * be dropped until after an RCU delay done after
+		 * turning ->mnt_ns NULL.  So if we observe it
+		 * non-NULL under rcu_read_lock(), the reference
+		 * we are dropping is not the final one.
+		 */
+		mnt_add_count(mnt, -1);
+		rcu_read_unlock();
+		return;
+	}
+	mntput_no_expire_slowpath(mnt);
+}
+
 void mntput(struct vfsmount *mnt)
 {
 	if (mnt) {
-- 
2.48.1
Re: [PATCH] fs: move mntput_no_expire() slowpath into a dedicated routine
Posted by Christian Brauner 2 months, 2 weeks ago
On Fri, 14 Nov 2025 21:18:03 +0100, Mateusz Guzik wrote:
> In the stock variant the compiler spills several registers on the stack
> and employs stack smashing protection, adding even more code + a branch
> on exit..
> 
> The actual fast path is small enough that the compiler inlines it for
> all callers -- the symbol is no longer emitted.
> 
> [...]

Applied to the vfs-6.19.misc branch of the vfs/vfs.git tree.
Patches in the vfs-6.19.misc branch should appear in linux-next soon.

Please report any outstanding bugs that were missed during review in a
new review to the original patch series allowing us to drop it.

It's encouraged to provide Acked-bys and Reviewed-bys even though the
patch has now been applied. If possible patch trailers will be updated.

Note that commit hashes shown below are subject to change due to rebase,
trailer updates or similar. If in doubt, please check the listed branch.

tree:   https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
branch: vfs-6.19.misc

[1/1] fs: move mntput_no_expire() slowpath into a dedicated routine
      https://git.kernel.org/vfs/vfs/c/bfef6e1f3488