This will allow userspace to lookup and stat a namespace simply by its
identifier without having to know what type of namespace it is.
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
include/linux/ns_common.h | 4 ++
kernel/nscommon.c | 1 +
kernel/nstree.c | 114 ++++++++++++++++++++++++++++++++++++----------
3 files changed, 95 insertions(+), 24 deletions(-)
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index adc3542042af..88f27b678b4e 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -109,6 +109,10 @@ struct ns_common {
union {
struct {
u64 ns_id;
+ struct /* global namespace rbtree and list */ {
+ struct rb_node ns_unified_tree_node;
+ struct list_head ns_unified_list_node;
+ };
struct /* per type rbtree and list */ {
struct rb_node ns_tree_node;
struct list_head ns_list_node;
diff --git a/kernel/nscommon.c b/kernel/nscommon.c
index 1935f640f05a..98a237be64bc 100644
--- a/kernel/nscommon.c
+++ b/kernel/nscommon.c
@@ -61,6 +61,7 @@ int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_ope
ns->ns_id = 0;
ns->ns_type = ns_type;
RB_CLEAR_NODE(&ns->ns_tree_node);
+ RB_CLEAR_NODE(&ns->ns_unified_tree_node);
INIT_LIST_HEAD(&ns->ns_list_node);
#ifdef CONFIG_DEBUG_VFS
diff --git a/kernel/nstree.c b/kernel/nstree.c
index a231cd2e9368..dbe4fb18f021 100644
--- a/kernel/nstree.c
+++ b/kernel/nstree.c
@@ -4,75 +4,86 @@
#include <linux/proc_ns.h>
#include <linux/vfsdebug.h>
+__cacheline_aligned_in_smp DEFINE_SEQLOCK(ns_tree_lock);
+static struct rb_root ns_unified_tree = RB_ROOT; /* protected by ns_tree_lock */
+
/**
* struct ns_tree - Namespace tree
* @ns_tree: Rbtree of namespaces of a particular type
* @ns_list: Sequentially walkable list of all namespaces of this type
- * @ns_tree_lock: Seqlock to protect the tree and list
* @type: type of namespaces in this tree
*/
struct ns_tree {
- struct rb_root ns_tree;
- struct list_head ns_list;
- seqlock_t ns_tree_lock;
- int type;
+ struct rb_root ns_tree;
+ struct list_head ns_list;
+#ifdef CONFIG_DEBUG_VFS
+ int type;
+#endif
};
struct ns_tree mnt_ns_tree = {
.ns_tree = RB_ROOT,
.ns_list = LIST_HEAD_INIT(mnt_ns_tree.ns_list),
- .ns_tree_lock = __SEQLOCK_UNLOCKED(mnt_ns_tree.ns_tree_lock),
+#ifdef CONFIG_DEBUG_VFS
.type = CLONE_NEWNS,
+#endif
};
struct ns_tree net_ns_tree = {
.ns_tree = RB_ROOT,
.ns_list = LIST_HEAD_INIT(net_ns_tree.ns_list),
- .ns_tree_lock = __SEQLOCK_UNLOCKED(net_ns_tree.ns_tree_lock),
+#ifdef CONFIG_DEBUG_VFS
.type = CLONE_NEWNET,
+#endif
};
EXPORT_SYMBOL_GPL(net_ns_tree);
struct ns_tree uts_ns_tree = {
.ns_tree = RB_ROOT,
.ns_list = LIST_HEAD_INIT(uts_ns_tree.ns_list),
- .ns_tree_lock = __SEQLOCK_UNLOCKED(uts_ns_tree.ns_tree_lock),
+#ifdef CONFIG_DEBUG_VFS
.type = CLONE_NEWUTS,
+#endif
};
struct ns_tree user_ns_tree = {
.ns_tree = RB_ROOT,
.ns_list = LIST_HEAD_INIT(user_ns_tree.ns_list),
- .ns_tree_lock = __SEQLOCK_UNLOCKED(user_ns_tree.ns_tree_lock),
+#ifdef CONFIG_DEBUG_VFS
.type = CLONE_NEWUSER,
+#endif
};
struct ns_tree ipc_ns_tree = {
.ns_tree = RB_ROOT,
.ns_list = LIST_HEAD_INIT(ipc_ns_tree.ns_list),
- .ns_tree_lock = __SEQLOCK_UNLOCKED(ipc_ns_tree.ns_tree_lock),
+#ifdef CONFIG_DEBUG_VFS
.type = CLONE_NEWIPC,
+#endif
};
struct ns_tree pid_ns_tree = {
.ns_tree = RB_ROOT,
.ns_list = LIST_HEAD_INIT(pid_ns_tree.ns_list),
- .ns_tree_lock = __SEQLOCK_UNLOCKED(pid_ns_tree.ns_tree_lock),
+#ifdef CONFIG_DEBUG_VFS
.type = CLONE_NEWPID,
+#endif
};
struct ns_tree cgroup_ns_tree = {
.ns_tree = RB_ROOT,
.ns_list = LIST_HEAD_INIT(cgroup_ns_tree.ns_list),
- .ns_tree_lock = __SEQLOCK_UNLOCKED(cgroup_ns_tree.ns_tree_lock),
+#ifdef CONFIG_DEBUG_VFS
.type = CLONE_NEWCGROUP,
+#endif
};
struct ns_tree time_ns_tree = {
.ns_tree = RB_ROOT,
.ns_list = LIST_HEAD_INIT(time_ns_tree.ns_list),
- .ns_tree_lock = __SEQLOCK_UNLOCKED(time_ns_tree.ns_tree_lock),
+#ifdef CONFIG_DEBUG_VFS
.type = CLONE_NEWTIME,
+#endif
};
DEFINE_COOKIE(namespace_cookie);
@@ -84,6 +95,13 @@ static inline struct ns_common *node_to_ns(const struct rb_node *node)
return rb_entry(node, struct ns_common, ns_tree_node);
}
+static inline struct ns_common *node_to_ns_unified(const struct rb_node *node)
+{
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct ns_common, ns_unified_tree_node);
+}
+
static inline int ns_cmp(struct rb_node *a, const struct rb_node *b)
{
struct ns_common *ns_a = node_to_ns(a);
@@ -98,15 +116,27 @@ static inline int ns_cmp(struct rb_node *a, const struct rb_node *b)
return 0;
}
+static inline int ns_cmp_unified(struct rb_node *a, const struct rb_node *b)
+{
+ struct ns_common *ns_a = node_to_ns_unified(a);
+ struct ns_common *ns_b = node_to_ns_unified(b);
+ u64 ns_id_a = ns_a->ns_id;
+ u64 ns_id_b = ns_b->ns_id;
+
+ if (ns_id_a < ns_id_b)
+ return -1;
+ if (ns_id_a > ns_id_b)
+ return 1;
+ return 0;
+}
+
void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree)
{
struct rb_node *node, *prev;
VFS_WARN_ON_ONCE(!ns->ns_id);
- write_seqlock(&ns_tree->ns_tree_lock);
-
- VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type);
+ write_seqlock(&ns_tree_lock);
node = rb_find_add_rcu(&ns->ns_tree_node, &ns_tree->ns_tree, ns_cmp);
/*
@@ -119,7 +149,8 @@ void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree)
else
list_add_rcu(&ns->ns_list_node, &node_to_ns(prev)->ns_list_node);
- write_sequnlock(&ns_tree->ns_tree_lock);
+ rb_find_add_rcu(&ns->ns_unified_tree_node, &ns_unified_tree, ns_cmp_unified);
+ write_sequnlock(&ns_tree_lock);
VFS_WARN_ON_ONCE(node);
@@ -138,11 +169,12 @@ void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree)
VFS_WARN_ON_ONCE(list_empty(&ns->ns_list_node));
VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type);
- write_seqlock(&ns_tree->ns_tree_lock);
+ write_seqlock(&ns_tree_lock);
rb_erase(&ns->ns_tree_node, &ns_tree->ns_tree);
+ rb_erase(&ns->ns_unified_tree_node, &ns_unified_tree);
list_bidir_del_rcu(&ns->ns_list_node);
RB_CLEAR_NODE(&ns->ns_tree_node);
- write_sequnlock(&ns_tree->ns_tree_lock);
+ write_sequnlock(&ns_tree_lock);
}
EXPORT_SYMBOL_GPL(__ns_tree_remove);
@@ -158,6 +190,17 @@ static int ns_find(const void *key, const struct rb_node *node)
return 0;
}
+static int ns_find_unified(const void *key, const struct rb_node *node)
+{
+ const u64 ns_id = *(u64 *)key;
+ const struct ns_common *ns = node_to_ns_unified(node);
+
+ if (ns_id < ns->ns_id)
+ return -1;
+ if (ns_id > ns->ns_id)
+ return 1;
+ return 0;
+}
static struct ns_tree *ns_tree_from_type(int ns_type)
{
@@ -183,28 +226,51 @@ static struct ns_tree *ns_tree_from_type(int ns_type)
return NULL;
}
-struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type)
+static struct ns_common *__ns_unified_tree_lookup_rcu(u64 ns_id)
{
- struct ns_tree *ns_tree;
struct rb_node *node;
unsigned int seq;
- RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_lookup_rcu() usage");
+ do {
+ seq = read_seqbegin(&ns_tree_lock);
+ node = rb_find_rcu(&ns_id, &ns_unified_tree, ns_find_unified);
+ if (node)
+ break;
+ } while (read_seqretry(&ns_tree_lock, seq));
+
+ return node_to_ns_unified(node);
+}
+
+static struct ns_common *__ns_tree_lookup_rcu(u64 ns_id, int ns_type)
+{
+ struct ns_tree *ns_tree;
+ struct rb_node *node;
+ unsigned int seq;
ns_tree = ns_tree_from_type(ns_type);
if (!ns_tree)
return NULL;
do {
- seq = read_seqbegin(&ns_tree->ns_tree_lock);
+ seq = read_seqbegin(&ns_tree_lock);
node = rb_find_rcu(&ns_id, &ns_tree->ns_tree, ns_find);
if (node)
break;
- } while (read_seqretry(&ns_tree->ns_tree_lock, seq));
+ } while (read_seqretry(&ns_tree_lock, seq));
return node_to_ns(node);
}
+struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type)
+{
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_lookup_rcu() usage");
+
+ if (ns_type)
+ return __ns_tree_lookup_rcu(ns_id, ns_type);
+
+ return __ns_unified_tree_lookup_rcu(ns_id);
+}
+
/**
* ns_tree_adjoined_rcu - find the next/previous namespace in the same
* tree
--
2.47.3