[PATCH v2 18/33] mnt: support ns lookup

Christian Brauner posted 33 patches 2 weeks, 6 days ago
[PATCH v2 18/33] mnt: support ns lookup
Posted by Christian Brauner 2 weeks, 6 days ago
Move the mount namespace to the generic ns lookup infrastructure.
This allows us to drop a bunch of members from struct mnt_namespace.
                                                                      t
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/mount.h     |  10 +---
 fs/namespace.c | 141 +++++++++++++--------------------------------------------
 fs/nsfs.c      |   4 +-
 3 files changed, 35 insertions(+), 120 deletions(-)

diff --git a/fs/mount.h b/fs/mount.h
index 97737051a8b9..76bf863c9ae2 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -17,11 +17,7 @@ struct mnt_namespace {
 	};
 	struct user_namespace	*user_ns;
 	struct ucounts		*ucounts;
-	u64			seq;	/* Sequence number to prevent loops */
-	union {
-		wait_queue_head_t	poll;
-		struct rcu_head		mnt_ns_rcu;
-	};
+	wait_queue_head_t	poll;
 	u64			seq_origin; /* Sequence number of origin mount namespace */
 	u64 event;
 #ifdef CONFIG_FSNOTIFY
@@ -30,8 +26,6 @@ struct mnt_namespace {
 #endif
 	unsigned int		nr_mounts; /* # of mounts in the namespace */
 	unsigned int		pending_mounts;
-	struct rb_node		mnt_ns_tree_node; /* node in the mnt_ns_tree */
-	struct list_head	mnt_ns_list; /* entry in the sequential list of mounts namespace */
 	refcount_t		passive; /* number references not pinning @mounts */
 } __randomize_layout;
 
@@ -173,7 +167,7 @@ static inline bool is_local_mountpoint(const struct dentry *dentry)
 
 static inline bool is_anon_ns(struct mnt_namespace *ns)
 {
-	return ns->seq == 0;
+	return ns->ns.ns_id == 0;
 }
 
 static inline bool anon_ns_root(const struct mount *m)
diff --git a/fs/namespace.c b/fs/namespace.c
index 14c5cdbdd6e1..40a8d75f6b16 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -33,6 +33,7 @@
 #include <linux/shmem_fs.h>
 #include <linux/mnt_idmapping.h>
 #include <linux/pidfs.h>
+#include <linux/nstree.h>
 
 #include "pnode.h"
 #include "internal.h"
@@ -80,13 +81,10 @@ static DECLARE_RWSEM(namespace_sem);
 static HLIST_HEAD(unmounted);	/* protected by namespace_sem */
 static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
 static struct mnt_namespace *emptied_ns; /* protected by namespace_sem */
-static DEFINE_SEQLOCK(mnt_ns_tree_lock);
 
 #ifdef CONFIG_FSNOTIFY
 LIST_HEAD(notify_list); /* protected by namespace_sem */
 #endif
-static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
-static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */
 
 enum mount_kattr_flags_t {
 	MOUNT_KATTR_RECURSE		= (1 << 0),
@@ -119,53 +117,12 @@ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
 
 static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
 {
+	struct ns_common *ns;
+
 	if (!node)
 		return NULL;
-	return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
-}
-
-static int mnt_ns_cmp(struct rb_node *a, const struct rb_node *b)
-{
-	struct mnt_namespace *ns_a = node_to_mnt_ns(a);
-	struct mnt_namespace *ns_b = node_to_mnt_ns(b);
-	u64 seq_a = ns_a->seq;
-	u64 seq_b = ns_b->seq;
-
-	if (seq_a < seq_b)
-		return -1;
-	if (seq_a > seq_b)
-		return 1;
-	return 0;
-}
-
-static inline void mnt_ns_tree_write_lock(void)
-{
-	write_seqlock(&mnt_ns_tree_lock);
-}
-
-static inline void mnt_ns_tree_write_unlock(void)
-{
-	write_sequnlock(&mnt_ns_tree_lock);
-}
-
-static void mnt_ns_tree_add(struct mnt_namespace *ns)
-{
-	struct rb_node *node, *prev;
-
-	mnt_ns_tree_write_lock();
-	node = rb_find_add_rcu(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_cmp);
-	/*
-	 * If there's no previous entry simply add it after the
-	 * head and if there is add it after the previous entry.
-	 */
-	prev = rb_prev(&ns->mnt_ns_tree_node);
-	if (!prev)
-		list_add_rcu(&ns->mnt_ns_list, &mnt_ns_list);
-	else
-		list_add_rcu(&ns->mnt_ns_list, &node_to_mnt_ns(prev)->mnt_ns_list);
-	mnt_ns_tree_write_unlock();
-
-	WARN_ON_ONCE(node);
+	ns = rb_entry(node, struct ns_common, ns_tree_node);
+	return container_of(ns, struct mnt_namespace, ns);
 }
 
 static void mnt_ns_release(struct mnt_namespace *ns)
@@ -181,32 +138,16 @@ DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
 
 static void mnt_ns_release_rcu(struct rcu_head *rcu)
 {
-	mnt_ns_release(container_of(rcu, struct mnt_namespace, mnt_ns_rcu));
+	mnt_ns_release(container_of(rcu, struct mnt_namespace, ns.ns_rcu));
 }
 
 static void mnt_ns_tree_remove(struct mnt_namespace *ns)
 {
 	/* remove from global mount namespace list */
-	if (!is_anon_ns(ns)) {
-		mnt_ns_tree_write_lock();
-		rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
-		list_bidir_del_rcu(&ns->mnt_ns_list);
-		mnt_ns_tree_write_unlock();
-	}
-
-	call_rcu(&ns->mnt_ns_rcu, mnt_ns_release_rcu);
-}
-
-static int mnt_ns_find(const void *key, const struct rb_node *node)
-{
-	const u64 mnt_ns_id = *(u64 *)key;
-	const struct mnt_namespace *ns = node_to_mnt_ns(node);
+	if (!is_anon_ns(ns))
+		ns_tree_remove(ns);
 
-	if (mnt_ns_id < ns->seq)
-		return -1;
-	if (mnt_ns_id > ns->seq)
-		return 1;
-	return 0;
+	call_rcu(&ns->ns.ns_rcu, mnt_ns_release_rcu);
 }
 
 /*
@@ -225,28 +166,21 @@ static int mnt_ns_find(const void *key, const struct rb_node *node)
  */
 static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
 {
-	struct mnt_namespace *ns;
-	struct rb_node *node;
-	unsigned int seq;
+	struct mnt_namespace *mnt_ns;
+	struct ns_common *ns;
 
 	guard(rcu)();
-	do {
-		seq = read_seqbegin(&mnt_ns_tree_lock);
-		node = rb_find_rcu(&mnt_ns_id, &mnt_ns_tree, mnt_ns_find);
-		if (node)
-			break;
-	} while (read_seqretry(&mnt_ns_tree_lock, seq));
-
-	if (!node)
+	ns = ns_tree_lookup_rcu(mnt_ns_id, CLONE_NEWNS);
+	if (!ns)
 		return NULL;
 
 	/*
 	 * The last reference count is put with RCU delay so we can
 	 * unconditonally acquire a reference here.
 	 */
-	ns = node_to_mnt_ns(node);
-	refcount_inc(&ns->passive);
-	return ns;
+	mnt_ns = container_of(ns, struct mnt_namespace, ns);
+	refcount_inc(&mnt_ns->passive);
+	return mnt_ns;
 }
 
 static inline void lock_mount_hash(void)
@@ -1017,7 +951,7 @@ static inline bool check_anonymous_mnt(struct mount *mnt)
 		return false;
 
 	seq = mnt->mnt_ns->seq_origin;
-	return !seq || (seq == current->nsproxy->mnt_ns->seq);
+	return !seq || (seq == current->nsproxy->mnt_ns->ns.ns_id);
 }
 
 /*
@@ -2155,19 +2089,16 @@ struct ns_common *from_mnt_ns(struct mnt_namespace *mnt)
 
 struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool previous)
 {
+	struct ns_common *ns;
+
 	guard(rcu)();
 
 	for (;;) {
-		struct list_head *list;
-
-		if (previous)
-			list = rcu_dereference(list_bidir_prev_rcu(&mntns->mnt_ns_list));
-		else
-			list = rcu_dereference(list_next_rcu(&mntns->mnt_ns_list));
-		if (list_is_head(list, &mnt_ns_list))
-			return ERR_PTR(-ENOENT);
+		ns = ns_tree_adjoined_rcu(mntns, previous);
+		if (IS_ERR(ns))
+			return ERR_CAST(ns);
 
-		mntns = list_entry_rcu(list, struct mnt_namespace, mnt_ns_list);
+		mntns = to_mnt_ns(ns);
 
 		/*
 		 * The last passive reference count is put with RCU
@@ -2207,7 +2138,7 @@ static bool mnt_ns_loop(struct dentry *dentry)
 	if (!mnt_ns)
 		return false;
 
-	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
+	return current->nsproxy->mnt_ns->ns.ns_id >= mnt_ns->ns.ns_id;
 }
 
 struct mount *copy_tree(struct mount *src_root, struct dentry *dentry,
@@ -3070,7 +3001,7 @@ static struct file *open_detached_copy(struct path *path, bool recursive)
 		if (is_anon_ns(src_mnt_ns))
 			ns->seq_origin = src_mnt_ns->seq_origin;
 		else
-			ns->seq_origin = src_mnt_ns->seq;
+			ns->seq_origin = src_mnt_ns->ns.ns_id;
 	}
 
 	mnt = __do_loopback(path, recursive);
@@ -4153,15 +4084,6 @@ static void free_mnt_ns(struct mnt_namespace *ns)
 	mnt_ns_tree_remove(ns);
 }
 
-/*
- * Assign a sequence number so we can detect when we attempt to bind
- * mount a reference to an older mount namespace into the current
- * mount namespace, preventing reference counting loops.  A 64bit
- * number incrementing at 10Ghz will take 12,427 years to wrap which
- * is effectively never, so we can ignore the possibility.
- */
-static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1);
-
 static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool anon)
 {
 	struct mnt_namespace *new_ns;
@@ -4185,11 +4107,11 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
 		return ERR_PTR(ret);
 	}
 	if (!anon)
-		new_ns->seq = atomic64_inc_return(&mnt_ns_seq);
+		ns_tree_gen_id(&new_ns->ns);
+	RB_CLEAR_NODE(&new_ns->ns.ns_tree_node);
+	INIT_LIST_HEAD(&new_ns->ns.ns_list_node);
 	refcount_set(&new_ns->passive, 1);
 	new_ns->mounts = RB_ROOT;
-	INIT_LIST_HEAD(&new_ns->mnt_ns_list);
-	RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
 	init_waitqueue_head(&new_ns->poll);
 	new_ns->user_ns = get_user_ns(user_ns);
 	new_ns->ucounts = ucounts;
@@ -4275,7 +4197,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
 	if (pwdmnt)
 		mntput(pwdmnt);
 
-	mnt_ns_tree_add(new_ns);
+	ns_tree_add_raw(new_ns);
 	return new_ns;
 }
 
@@ -5385,7 +5307,7 @@ static int statmount_sb_source(struct kstatmount *s, struct seq_file *seq)
 static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)
 {
 	s->sm.mask |= STATMOUNT_MNT_NS_ID;
-	s->sm.mnt_ns_id = ns->seq;
+	s->sm.mnt_ns_id = ns->ns.ns_id;
 }
 
 static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
@@ -6090,7 +6012,6 @@ static void __init init_mount_tree(void)
 	ns = alloc_mnt_ns(&init_user_ns, true);
 	if (IS_ERR(ns))
 		panic("Can't allocate initial namespace");
-	ns->seq = atomic64_inc_return(&mnt_ns_seq);
 	ns->ns.inum = PROC_MNT_INIT_INO;
 	m = real_mount(mnt);
 	ns->root = m;
@@ -6105,7 +6026,7 @@ static void __init init_mount_tree(void)
 	set_fs_pwd(current->fs, &root);
 	set_fs_root(current->fs, &root);
 
-	mnt_ns_tree_add(ns);
+	ns_tree_add(ns);
 }
 
 void __init mnt_init(void)
diff --git a/fs/nsfs.c b/fs/nsfs.c
index d016d36272d4..80e631aeb3ce 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -139,7 +139,7 @@ static int copy_ns_info_to_user(const struct mnt_namespace *mnt_ns,
 	 * the size value will be set to the size the kernel knows about.
 	 */
 	kinfo->size		= min(usize, sizeof(*kinfo));
-	kinfo->mnt_ns_id	= mnt_ns->seq;
+	kinfo->mnt_ns_id	= mnt_ns->ns.ns_id;
 	kinfo->nr_mounts	= READ_ONCE(mnt_ns->nr_mounts);
 	/* Subtract the root mount of the mount namespace. */
 	if (kinfo->nr_mounts)
@@ -221,7 +221,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
 
 		mnt_ns = container_of(ns, struct mnt_namespace, ns);
 		idp = (__u64 __user *)arg;
-		id = mnt_ns->seq;
+		id = mnt_ns->ns.ns_id;
 		return put_user(id, idp);
 	}
 	case NS_GET_PID_FROM_PIDNS:

-- 
2.47.3
Re: [PATCH v2 18/33] mnt: support ns lookup
Posted by Al Viro 2 weeks, 2 days ago
On Fri, Sep 12, 2025 at 01:52:41PM +0200, Christian Brauner wrote:
> Move the mount namespace to the generic ns lookup infrastructure.
> This allows us to drop a bunch of members from struct mnt_namespace.

>  static void mnt_ns_tree_remove(struct mnt_namespace *ns)
>  {
>  	/* remove from global mount namespace list */
> -	if (!is_anon_ns(ns)) {
> -		mnt_ns_tree_write_lock();
> -		rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
> -		list_bidir_del_rcu(&ns->mnt_ns_list);
> -		mnt_ns_tree_write_unlock();
> -	}
> -
> -	call_rcu(&ns->mnt_ns_rcu, mnt_ns_release_rcu);
> -}
> -
> -static int mnt_ns_find(const void *key, const struct rb_node *node)
> -{
> -	const u64 mnt_ns_id = *(u64 *)key;
> -	const struct mnt_namespace *ns = node_to_mnt_ns(node);
> +	if (!is_anon_ns(ns))
> +		ns_tree_remove(ns);

Conflicts with "mnt_ns_tree_remove(): DTRT if mnt_ns had never been
added to mnt_ns_list".

FWIW, the right thing to do here is

	if (!RB_EMPTY_NODE(to_ns_common(ns)->ns_tree_node))
		ns_tree_remove(ns);
Re: [PATCH v2 18/33] mnt: support ns lookup
Posted by Al Viro 2 weeks, 2 days ago
On Tue, Sep 16, 2025 at 04:56:33AM +0100, Al Viro wrote:
> 	if (!RB_EMPTY_NODE(to_ns_common(ns)->ns_tree_node))

 	if (!RB_EMPTY_NODE(&to_ns_common(ns)->ns_tree_node))

obviously...
Re: [PATCH v2 18/33] mnt: support ns lookup
Posted by Al Viro 2 weeks, 2 days ago
On Tue, Sep 16, 2025 at 04:59:49AM +0100, Al Viro wrote:
> On Tue, Sep 16, 2025 at 04:56:33AM +0100, Al Viro wrote:
> > 	if (!RB_EMPTY_NODE(to_ns_common(ns)->ns_tree_node))
> 
>  	if (!RB_EMPTY_NODE(&to_ns_common(ns)->ns_tree_node))
> 
> obviously...

FWIW, how about the following - I put the commit below into never-rebased
branch, pull it into #work.mount and you do the same to your branch
just prior to 18/33?  The difference from one in #work.mount is that
this variant checks RB_EMPTY_NODE(&ns->mnt_ns_tree_node) instead of
list_empty(&ns->mnt_ns_list).  The reasons why it's safe lockless are
pretty much the same...

Objections?  Does vfs/vfs.git #no-rebases-mnt_ns_tree_remove look sane
for you?

mnt_ns_tree_remove(): DTRT if mnt_ns had never been added to mnt_ns_list
    
Actual removal is done under the lock, but for checking if need to bother
the lockless RB_EMPTY_NODE() is safe - either that namespace had never
been added to mnt_ns_tree, in which case the the node will stay empty, or
whoever had allocated it has called mnt_ns_tree_add() and it has already
run to completion.  After that point RB_EMPTY_NODE() will become false and
will remain false, no matter what we do with other nodes in the tree.
    
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
diff --git a/fs/namespace.c b/fs/namespace.c
index ae6d1312b184..39afeb521a80 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -187,7 +187,7 @@ static void mnt_ns_release_rcu(struct rcu_head *rcu)
 static void mnt_ns_tree_remove(struct mnt_namespace *ns)
 {
 	/* remove from global mount namespace list */
-	if (!is_anon_ns(ns)) {
+	if (!RB_EMPTY_NODE(&ns->mnt_ns_tree_node)) {
 		mnt_ns_tree_write_lock();
 		rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
 		list_bidir_del_rcu(&ns->mnt_ns_list);
Re: [PATCH v2 18/33] mnt: support ns lookup
Posted by Christian Brauner 2 weeks, 1 day ago
On Tue, Sep 16, 2025 at 05:46:48AM +0100, Al Viro wrote:
> On Tue, Sep 16, 2025 at 04:59:49AM +0100, Al Viro wrote:
> > On Tue, Sep 16, 2025 at 04:56:33AM +0100, Al Viro wrote:
> > > 	if (!RB_EMPTY_NODE(to_ns_common(ns)->ns_tree_node))
> > 
> >  	if (!RB_EMPTY_NODE(&to_ns_common(ns)->ns_tree_node))
> > 
> > obviously...
> 
> FWIW, how about the following - I put the commit below into never-rebased
> branch, pull it into #work.mount and you do the same to your branch
> just prior to 18/33?  The difference from one in #work.mount is that
> this variant checks RB_EMPTY_NODE(&ns->mnt_ns_tree_node) instead of
> list_empty(&ns->mnt_ns_list).  The reasons why it's safe lockless are
> pretty much the same...
> 
> Objections?  Does vfs/vfs.git #no-rebases-mnt_ns_tree_remove look sane
> for you?

Perfect, thank you!

> 
> mnt_ns_tree_remove(): DTRT if mnt_ns had never been added to mnt_ns_list
>     
> Actual removal is done under the lock, but for checking if need to bother
> the lockless RB_EMPTY_NODE() is safe - either that namespace had never
> been added to mnt_ns_tree, in which case the the node will stay empty, or
> whoever had allocated it has called mnt_ns_tree_add() and it has already
> run to completion.  After that point RB_EMPTY_NODE() will become false and
> will remain false, no matter what we do with other nodes in the tree.
>     
> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
> ---
> diff --git a/fs/namespace.c b/fs/namespace.c
> index ae6d1312b184..39afeb521a80 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -187,7 +187,7 @@ static void mnt_ns_release_rcu(struct rcu_head *rcu)
>  static void mnt_ns_tree_remove(struct mnt_namespace *ns)
>  {
>  	/* remove from global mount namespace list */
> -	if (!is_anon_ns(ns)) {
> +	if (!RB_EMPTY_NODE(&ns->mnt_ns_tree_node)) {
>  		mnt_ns_tree_write_lock();
>  		rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
>  		list_bidir_del_rcu(&ns->mnt_ns_list);
Re: [PATCH v2 18/33] mnt: support ns lookup
Posted by Al Viro 2 weeks ago
On Wed, Sep 17, 2025 at 11:50:29AM +0200, Christian Brauner wrote:

> Perfect, thank you!

All right, #work.mount contains this now:

commit 1b966c4471e6c3862a14f80aeb316ef636d40f84
Merge: 57a7b5b0b6d9 38f4885088fc
Author: Al Viro <viro@zeniv.linux.org.uk>
Date:   Wed Sep 17 15:58:06 2025 -0400
 
    Merge branch 'no-rebase-mnt_ns_tree_remove' into work.mount

and vfs/vfs.git #no-rebase-mnt_ns_tree_remove is 38f4885088fc

IOW, merge it into your branch and do your regular changes on top of that -
should be no conflicts in mnt_ns_tree_remove() after that (diff between
no-rebase-mnt_ns_tree_remove and work.mount has nothing in that function)
Re: [PATCH v2 18/33] mnt: support ns lookup
Posted by Kuniyuki Iwashima 2 weeks, 2 days ago
Hi,

From: Christian Brauner <brauner@kernel.org>
Date: Fri, 12 Sep 2025 13:52:41 +0200
[...]
> @@ -4275,7 +4197,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
>  	if (pwdmnt)
>  		mntput(pwdmnt);
>  
> -	mnt_ns_tree_add(new_ns);
> +	ns_tree_add_raw(new_ns);

When copy_tree() fails, new_ns's rb tree could be empty,

	guard(namespace_excl)();
...
	new = copy_tree(old, old->mnt.mnt_root, copy_flags);
	if (IS_ERR(new)) {
		emptied_ns = new_ns;
		return ERR_CAST(new);
	}

which seems to trigger this warning in __ns_tree_remove().

	VFS_WARN_ON_ONCE(RB_EMPTY_NODE(&ns->ns_tree_node));


stack trace captured by syzbot:

WARNING: kernel/nstree.c:115 at __ns_tree_remove+0x28d/0x330 kernel/nstree.c:115, CPU#0: syz.5.2042/14092
Modules linked in:
CPU: 0 UID: 0 PID: 14092 Comm: syz.5.2042 Not tainted syzkaller #0 PREEMPT(full)
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 08/18/2025
RIP: 0010:__ns_tree_remove+0x28d/0x330 kernel/nstree.c:115
Code: 0f 85 a4 00 00 00 48 8b 04 24 ff 00 48 8b 7c 24 18 48 83 c4 30 5b 41 5c 41 5d 41 5e 41 5f 5d e9 e9 b4 f8 09 e8 94 f1 31 00 90 <0f> 0b 90 e9 cc fd ff ff e8 86 f1 31 00 90 0f 0b 90 e9 ee fd ff ff
RSP: 0018:ffffc9001040faa8 EFLAGS: 00010287
RAX: ffffffff818de18c RBX: ffff8880565d4620 RCX: 0000000000080000
RDX: ffffc9001ca51000 RSI: 00000000000003f8 RDI: 00000000000003f9
RBP: dffffc0000000000 R08: ffffffff8e1eff53 R09: 1ffffffff1c3dfea
R10: dffffc0000000000 R11: fffffbfff1c3dfeb R12: ffff8880565d4620
R13: 1ffff1100acba8c3 R14: ffff8880565d4600 R15: ffffffff8e1ea7c0
FS:  00007fba829d56c0(0000) GS:ffff8881259e0000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000110c3b8de5 CR3: 000000004bb54000 CR4: 00000000003526f0
Call Trace:
 <TASK>
 mnt_ns_tree_remove fs/namespace.c:165 [inline]
 free_mnt_ns+0xd1/0x110 fs/namespace.c:4074
 namespace_unlock+0x529/0x760 fs/namespace.c:1710
 class_namespace_excl_destructor fs/namespace.c:96 [inline]
 copy_mnt_ns+0x6e0/0x870 fs/namespace.c:4180
 create_new_namespaces+0xd1/0x720 kernel/nsproxy.c:78
 unshare_nsproxy_namespaces+0x11c/0x170 kernel/nsproxy.c:218
 ksys_unshare+0x4c8/0x8c0 kernel/fork.c:3144
 __do_sys_unshare kernel/fork.c:3215 [inline]
 __se_sys_unshare kernel/fork.c:3213 [inline]
 __x64_sys_unshare+0x38/0x50 kernel/fork.c:3213
 do_syscall_x64 arch/x86/entry/syscall_64.c:63 [inline]
 do_syscall_64+0xfa/0xfa0 arch/x86/entry/syscall_64.c:94
 entry_SYSCALL_64_after_hwframe+0x77/0x7f
RIP: 0033:0x7fba8478eba9
Code: ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 a8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007fba829d5038 EFLAGS: 00000246 ORIG_RAX: 0000000000000110
RAX: ffffffffffffffda RBX: 00007fba849d6180 RCX: 00007fba8478eba9
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000028060400
RBP: 00007fba84811e19 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
R13: 00007fba849d6218 R14: 00007fba849d6180 R15: 00007ffe311606c8
 </TASK>
Re: [PATCH v2 18/33] mnt: support ns lookup
Posted by Al Viro 2 weeks, 2 days ago
On Tue, Sep 16, 2025 at 03:24:07AM +0000, Kuniyuki Iwashima wrote:

See upthread; basically, that
	if (!is_anon_ns(ns))
		ns_tree_remove(ns);

should've been
        if (!RB_EMPTY_NODE(&to_ns_common(ns)->ns_tree_node))
		ns_tree_remove(ns);
Re: [PATCH v2 18/33] mnt: support ns lookup
Posted by Jan Kara 2 weeks, 3 days ago
On Fri 12-09-25 13:52:41, Christian Brauner wrote:
> Move the mount namespace to the generic ns lookup infrastructure.
> This allows us to drop a bunch of members from struct mnt_namespace.
>                                                                       t
> Signed-off-by: Christian Brauner <brauner@kernel.org>

Just one nit below. Feel free to add:

Reviewed-by: Jan Kara <jack@suse.cz>

> @@ -4185,11 +4107,11 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
>  		return ERR_PTR(ret);
>  	}
>  	if (!anon)
> -		new_ns->seq = atomic64_inc_return(&mnt_ns_seq);
> +		ns_tree_gen_id(&new_ns->ns);
> +	RB_CLEAR_NODE(&new_ns->ns.ns_tree_node);
> +	INIT_LIST_HEAD(&new_ns->ns.ns_list_node);

Why do you initialize ns_tree_node and ns_list_node here when
ns_init_common() already initialized them?

								Honza
-- 
Jan Kara <jack@suse.com>
SUSE Labs, CR
Re: [PATCH v2 18/33] mnt: support ns lookup
Posted by Christian Brauner 2 weeks, 3 days ago
On Mon, Sep 15, 2025 at 01:48:51PM +0200, Jan Kara wrote:
> On Fri 12-09-25 13:52:41, Christian Brauner wrote:
> > Move the mount namespace to the generic ns lookup infrastructure.
> > This allows us to drop a bunch of members from struct mnt_namespace.
> >                                                                       t
> > Signed-off-by: Christian Brauner <brauner@kernel.org>
> 
> Just one nit below. Feel free to add:
> 
> Reviewed-by: Jan Kara <jack@suse.cz>
> 
> > @@ -4185,11 +4107,11 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
> >  		return ERR_PTR(ret);
> >  	}
> >  	if (!anon)
> > -		new_ns->seq = atomic64_inc_return(&mnt_ns_seq);
> > +		ns_tree_gen_id(&new_ns->ns);
> > +	RB_CLEAR_NODE(&new_ns->ns.ns_tree_node);
> > +	INIT_LIST_HEAD(&new_ns->ns.ns_list_node);
> 
> Why do you initialize ns_tree_node and ns_list_node here when
> ns_init_common() already initialized them?

I forgot to remove that. I should note I did all this while severly sick. :D