bpf: add a few hooks for sandboxing

[PATCH 3/4] selftests/bpf: add ns hook selftest

Posted by Christian Brauner 1 month, 1 week ago

Add a BPF LSM selftest that implements a "lock on entry" namespace
sandbox policy.

Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/ns_sandbox.c  | 99 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_ns_sandbox.c  | 91 ++++++++++++++++++++
 2 files changed, 190 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/ns_sandbox.c b/tools/testing/selftests/bpf/prog_tests/ns_sandbox.c
new file mode 100644
index 000000000000..0ac2acfb6365
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ns_sandbox.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
+
+/*
+ * Test BPF LSM namespace sandbox: once you enter, you stay.
+ *
+ * The parent creates a tracked namespace, then forks a child.
+ * The child enters the tracked namespace (allowed) and is then locked
+ * out of any further setns().
+ */
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sched.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include "test_ns_sandbox.skel.h"
+
+void test_ns_sandbox(void)
+{
+	int orig_utsns = -1, new_utsns = -1;
+	struct test_ns_sandbox *skel = NULL;
+	int err, status;
+	pid_t child;
+
+	/* Save FD to current (host) namespace */
+	orig_utsns = open("/proc/self/ns/uts", O_RDONLY);
+	if (!ASSERT_OK_FD(orig_utsns, "open orig utsns"))
+		goto close_fds;
+
+	skel = test_ns_sandbox__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+		goto close_fds;
+
+	err = test_ns_sandbox__attach(skel);
+	if (!ASSERT_OK(err, "skel attach"))
+		goto destroy;
+
+	skel->bss->monitor_pid = getpid();
+
+	/*
+	 * Create a sandbox namespace.  The alloc hook records its
+	 * inum because this task's pid matches monitor_pid.
+	 */
+	err = unshare(CLONE_NEWUTS);
+	if (!ASSERT_OK(err, "unshare sandbox"))
+		goto destroy;
+
+	new_utsns = open("/proc/self/ns/uts", O_RDONLY);
+	if (!ASSERT_OK_FD(new_utsns, "open sandbox utsns"))
+		goto restore;
+
+	/*
+	 * Return parent to host namespace.  The host namespace is not
+	 * in the map so the install hook lets us through.
+	 */
+	err = setns(orig_utsns, CLONE_NEWUTS);
+	if (!ASSERT_OK(err, "parent setns host utsns"))
+		goto restore;
+
+	/*
+	 * Fork a child that:
+	 *  1. Enters the sandbox UTS namespace — succeeds and locks it.
+	 *  2. Tries to switch to host UTS — denied (locked).
+	 */
+	child = fork();
+	if (child == 0) {
+		/* Enter tracked namespace — allowed, we get locked */
+		if (setns(new_utsns, CLONE_NEWUTS) != 0)
+			_exit(1);
+
+		/* Locked: switching to host must fail */
+		if (setns(orig_utsns, CLONE_NEWUTS) != -1 ||
+		    errno != EPERM)
+			_exit(2);
+
+		_exit(0);
+	}
+	if (!ASSERT_GE(child, 0, "fork child"))
+		goto restore;
+
+	err = waitpid(child, &status, 0);
+	ASSERT_GT(err, 0, "waitpid child");
+	ASSERT_TRUE(WIFEXITED(status), "child exited");
+	ASSERT_EQ(WEXITSTATUS(status), 0, "child locked in");
+
+	goto destroy;
+
+restore:
+	setns(orig_utsns, CLONE_NEWUTS);
+destroy:
+	test_ns_sandbox__destroy(skel);
+close_fds:
+	if (new_utsns >= 0)
+		close(new_utsns);
+	if (orig_utsns >= 0)
+		close(orig_utsns);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ns_sandbox.c b/tools/testing/selftests/bpf/progs/test_ns_sandbox.c
new file mode 100644
index 000000000000..75c3493932a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ns_sandbox.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
+
+/*
+ * BPF LSM namespace sandbox: once you enter, you stay.
+ *
+ * A designated process creates namespaces (tracked via alloc).  When
+ * any other process joins one of those namespaces it gets recorded in
+ * locked_tasks.  From that point on that process cannot setns() into
+ * any other namespace — it is locked in.  Task local storage is
+ * automatically freed when the task exits.
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/*
+ * Namespaces created by the monitored process.
+ * Key:   namespace inode number.
+ * Value: namespace type (CLONE_NEW* flag).
+ */
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 64);
+	__type(key, __u32);
+	__type(value, __u32);
+} known_namespaces SEC(".maps");
+
+/* PID of the process whose namespace creations are tracked. */
+int monitor_pid;
+
+/*
+ * Task local storage: marks tasks that have entered a tracked namespace
+ * and are now locked.
+ */
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, __u8);
+} locked_tasks SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+/* Only the monitored process's namespace creations are tracked. */
+SEC("lsm.s/namespace_alloc")
+int BPF_PROG(ns_alloc, struct ns_common *ns)
+{
+	__u32 inum, ns_type;
+
+	if ((bpf_get_current_pid_tgid() >> 32) != monitor_pid)
+		return 0;
+
+	inum = ns->inum;
+	ns_type = ns->ns_type;
+	bpf_map_update_elem(&known_namespaces, &inum, &ns_type, BPF_ANY);
+
+	return 0;
+}
+
+/*
+ * Enforce the lock-in policy for all tasks:
+ * - Already locked?  Deny any setns.
+ * - Entering a tracked namespace?  Lock the task and allow.
+ * - Everything else passes through.
+ */
+SEC("lsm.s/namespace_install")
+int BPF_PROG(ns_install, struct nsset *nsset, struct ns_common *ns)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	__u32 inum = ns->inum;
+
+	if (bpf_task_storage_get(&locked_tasks, task, 0, 0))
+		return -EPERM;
+
+	if (bpf_map_lookup_elem(&known_namespaces, &inum))
+		bpf_task_storage_get(&locked_tasks, task, 0,
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
+
+	return 0;
+}
+
+SEC("lsm/namespace_free")
+void BPF_PROG(ns_free, struct ns_common *ns)
+{
+	__u32 inum = ns->inum;
+
+	bpf_map_delete_elem(&known_namespaces, &inum);
+}

-- 
2.47.3

Re: [PATCH 3/4] selftests/bpf: add ns hook selftest

Posted by Alan Maguire 4 weeks ago

On 20/02/2026 00:38, Christian Brauner wrote:
> Add a BPF LSM selftest that implements a "lock on entry" namespace
> sandbox policy.
> 
> Signed-off-by: Christian Brauner <brauner@kernel.org>

Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Tested-by: Alan Maguire <alan.maguire@oracle.com>

one small thing below...

> ---
>  .../testing/selftests/bpf/prog_tests/ns_sandbox.c  | 99 ++++++++++++++++++++++
>  .../testing/selftests/bpf/progs/test_ns_sandbox.c  | 91 ++++++++++++++++++++
>  2 files changed, 190 insertions(+)
> 
> diff --git a/tools/testing/selftests/bpf/prog_tests/ns_sandbox.c b/tools/testing/selftests/bpf/prog_tests/ns_sandbox.c
> new file mode 100644
> index 000000000000..0ac2acfb6365
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/ns_sandbox.c
> @@ -0,0 +1,99 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
> +
> +/*
> + * Test BPF LSM namespace sandbox: once you enter, you stay.
> + *
> + * The parent creates a tracked namespace, then forks a child.
> + * The child enters the tracked namespace (allowed) and is then locked
> + * out of any further setns().
> + */
> +
> +#define _GNU_SOURCE
> +#include <test_progs.h>
> +#include <sched.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <sys/wait.h>
> +#include "test_ns_sandbox.skel.h"
> +
> +void test_ns_sandbox(void)
> +{
> +	int orig_utsns = -1, new_utsns = -1;
> +	struct test_ns_sandbox *skel = NULL;
> +	int err, status;
> +	pid_t child;
> +
> +	/* Save FD to current (host) namespace */
> +	orig_utsns = open("/proc/self/ns/uts", O_RDONLY);
> +	if (!ASSERT_OK_FD(orig_utsns, "open orig utsns"))
> +		goto close_fds;
> +
> +	skel = test_ns_sandbox__open_and_load();
> +	if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
> +		goto close_fds;
> +
> +	err = test_ns_sandbox__attach(skel);
> +	if (!ASSERT_OK(err, "skel attach"))
> +		goto destroy;
> +
> +	skel->bss->monitor_pid = getpid();
> +
> +	/*
> +	 * Create a sandbox namespace.  The alloc hook records its
> +	 * inum because this task's pid matches monitor_pid.
> +	 */
> +	err = unshare(CLONE_NEWUTS);
> +	if (!ASSERT_OK(err, "unshare sandbox"))
> +		goto destroy;
> +
> +	new_utsns = open("/proc/self/ns/uts", O_RDONLY);
> +	if (!ASSERT_OK_FD(new_utsns, "open sandbox utsns"))
> +		goto restore;
> +
> +	/*
> +	 * Return parent to host namespace.  The host namespace is not
> +	 * in the map so the install hook lets us through.
> +	 */
> +	err = setns(orig_utsns, CLONE_NEWUTS);
> +	if (!ASSERT_OK(err, "parent setns host utsns"))
> +		goto restore;
> +
> +	/*
> +	 * Fork a child that:
> +	 *  1. Enters the sandbox UTS namespace — succeeds and locks it.
> +	 *  2. Tries to switch to host UTS — denied (locked).
> +	 */
> +	child = fork();
> +	if (child == 0) {
> +		/* Enter tracked namespace — allowed, we get locked */
> +		if (setns(new_utsns, CLONE_NEWUTS) != 0)
> +			_exit(1);
> +
> +		/* Locked: switching to host must fail */
> +		if (setns(orig_utsns, CLONE_NEWUTS) != -1 ||
> +		    errno != EPERM)
> +			_exit(2);
> +
> +		_exit(0);
> +	}
> +	if (!ASSERT_GE(child, 0, "fork child"))

should be ASSERT_GT() I think since we deal with the child == 0 path above.

> +		goto restore;
> +
> +	err = waitpid(child, &status, 0);
> +	ASSERT_GT(err, 0, "waitpid child");
> +	ASSERT_TRUE(WIFEXITED(status), "child exited");
> +	ASSERT_EQ(WEXITSTATUS(status), 0, "child locked in");
> +
> +	goto destroy;
> +
> +restore:
> +	setns(orig_utsns, CLONE_NEWUTS);
> +destroy:
> +	test_ns_sandbox__destroy(skel);
> +close_fds:
> +	if (new_utsns >= 0)
> +		close(new_utsns);
> +	if (orig_utsns >= 0)
> +		close(orig_utsns);
> +}
> diff --git a/tools/testing/selftests/bpf/progs/test_ns_sandbox.c b/tools/testing/selftests/bpf/progs/test_ns_sandbox.c
> new file mode 100644
> index 000000000000..75c3493932a1
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_ns_sandbox.c
> @@ -0,0 +1,91 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2026 Christian Brauner <brauner@kernel.org> */
> +
> +/*
> + * BPF LSM namespace sandbox: once you enter, you stay.
> + *
> + * A designated process creates namespaces (tracked via alloc).  When
> + * any other process joins one of those namespaces it gets recorded in
> + * locked_tasks.  From that point on that process cannot setns() into
> + * any other namespace — it is locked in.  Task local storage is
> + * automatically freed when the task exits.
> + */
> +
> +#include "vmlinux.h"
> +#include <errno.h>
> +#include <bpf/bpf_helpers.h>
> +#include <bpf/bpf_tracing.h>
> +
> +/*
> + * Namespaces created by the monitored process.
> + * Key:   namespace inode number.
> + * Value: namespace type (CLONE_NEW* flag).
> + */
> +struct {
> +	__uint(type, BPF_MAP_TYPE_HASH);
> +	__uint(max_entries, 64);
> +	__type(key, __u32);
> +	__type(value, __u32);
> +} known_namespaces SEC(".maps");
> +
> +/* PID of the process whose namespace creations are tracked. */
> +int monitor_pid;
> +
> +/*
> + * Task local storage: marks tasks that have entered a tracked namespace
> + * and are now locked.
> + */
> +struct {
> +	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
> +	__uint(map_flags, BPF_F_NO_PREALLOC);
> +	__type(key, int);
> +	__type(value, __u8);
> +} locked_tasks SEC(".maps");
> +
> +char _license[] SEC("license") = "GPL";
> +
> +/* Only the monitored process's namespace creations are tracked. */
> +SEC("lsm.s/namespace_alloc")
> +int BPF_PROG(ns_alloc, struct ns_common *ns)
> +{
> +	__u32 inum, ns_type;
> +
> +	if ((bpf_get_current_pid_tgid() >> 32) != monitor_pid)
> +		return 0;
> +
> +	inum = ns->inum;
> +	ns_type = ns->ns_type;
> +	bpf_map_update_elem(&known_namespaces, &inum, &ns_type, BPF_ANY);
> +
> +	return 0;
> +}
> +
> +/*
> + * Enforce the lock-in policy for all tasks:
> + * - Already locked?  Deny any setns.
> + * - Entering a tracked namespace?  Lock the task and allow.
> + * - Everything else passes through.
> + */
> +SEC("lsm.s/namespace_install")
> +int BPF_PROG(ns_install, struct nsset *nsset, struct ns_common *ns)
> +{
> +	struct task_struct *task = bpf_get_current_task_btf();
> +	__u32 inum = ns->inum;
> +
> +	if (bpf_task_storage_get(&locked_tasks, task, 0, 0))
> +		return -EPERM;
> +
> +	if (bpf_map_lookup_elem(&known_namespaces, &inum))
> +		bpf_task_storage_get(&locked_tasks, task, 0,
> +				     BPF_LOCAL_STORAGE_GET_F_CREATE);
> +
> +	return 0;
> +}
> +
> +SEC("lsm/namespace_free")
> +void BPF_PROG(ns_free, struct ns_common *ns)
> +{
> +	__u32 inum = ns->inum;
> +
> +	bpf_map_delete_elem(&known_namespaces, &inum);
> +}
>

[PATCH 1/4] ns: add bpf hooks
[PATCH 2/4] cgroup: add bpf hook for attach
[PATCH 3/4] selftests/bpf: add ns hook selftest
[PATCH 4/4] selftests/bpf: add cgroup attach selftests