From nobody Fri Apr 3 09:52:02 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D3A161EDA32; Fri, 20 Feb 2026 00:38:45 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771547925; cv=none; b=IVkXw3MejERxrrSTzT2AsFzhTWQpgdbjwl8uO9YnIbmtMTSpN734YqXgjjCU0EOIKrD5wLMqhIM25Y+4/4kJjyU7FkvBdv1PO8BYx/SdCU1olsC2vg43/9+0EDf2nYksqWcbIpFQEeiIvMzAkIzHoOPVPi80BI7kN6FjhyiHi7c= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771547925; c=relaxed/simple; bh=QZ3jVMjUhxTz1y23OlYN9+hzPO/kmADiRKgb9P3oOQk=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=LNmppF4jbCqUK6iqTn9sJqq8uRDwws472pw6MxOa01mdt6eXbxvYfvlj+51rSrkW/CY1dH1zIQMMO1a/u3GUs0Uq0SxIY4SlrfvRQUjAW2b+JPCvboXIEd5g54F7Znxn89SQ9WdoIGchYlUDl00aEtdbE+rHwW4JYF/Ug7MwAok= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=B7SyJJwK; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="B7SyJJwK" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 683E5C19423; Fri, 20 Feb 2026 00:38:43 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1771547925; bh=QZ3jVMjUhxTz1y23OlYN9+hzPO/kmADiRKgb9P3oOQk=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=B7SyJJwKnhRUPaYu+LgUJKyURL0kVZHzL19za6v1KFimR6/wHdl/zeGTDrqPuGGSm 3YIedu66KCP/DKORsnKkZOixvK3Dy07zTiiso+A7rvEaHS/omj65GAG6clw3H4Y3qd xoT/AOGesNG2NHOjgvyY5iFy9tVjJnpRlx2RvlsIx47E+RQ/66hIz62MV06BBbZAjC AGY90o1hZ8A4IG8O/xwDZvcvLo4t/7MUHT2wzIUzvGCa9Bbhu8mIPDmd+52IsLfn8+ HZjkJ9u87/guHYmnalC6/piU/oNjA7jXzZ2jXwFejB+LWYvt118ghGQT1jRjA4H4tr cZFeoe/jt2bmw== From: Christian Brauner Date: Fri, 20 Feb 2026 01:38:29 +0100 Subject: [PATCH 1/4] ns: add bpf hooks Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260220-work-bpf-namespace-v1-1-866207db7b83@kernel.org> References: <20260220-work-bpf-namespace-v1-0-866207db7b83@kernel.org> In-Reply-To: <20260220-work-bpf-namespace-v1-0-866207db7b83@kernel.org> To: Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Tejun Heo Cc: KP Singh , bpf@vger.kernel.org, linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, Lennart Poettering , Christian Brauner X-Mailer: b4 0.15-dev-47773 X-Developer-Signature: v=1; a=openpgp-sha256; l=5458; i=brauner@kernel.org; h=from:subject:message-id; bh=QZ3jVMjUhxTz1y23OlYN9+hzPO/kmADiRKgb9P3oOQk=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWROXyuQd8Nhz52pFZ01n5fa294KYQ9+dOvO2s8yvZoTn E6+W7DAsaOUhUGMi0FWTJHFod0kXG45T8Vmo0wNmDmsTCBDGLg4BWAiCtkM/2PnN52bJ7wnuMjx 6L1FW851nlU989525v3c39+Xn7s9V7OKkWFDeJAE9x5nbk/RYI7vvBZ3tZJYGj0ZFix1lN0gwSt zgRMA X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 Add the three namespace lifecycle hooks and make them available to bpf lsm program types. This allows bpf to supervise namespace creation. I'm in the process of adding various "universal truth" bpf programs to systemd that will make use of this. This e.g., allows to lock in a program into a given set of namespaces. Signed-off-by: Christian Brauner Acked-by: Song Liu --- include/linux/bpf_lsm.h | 21 +++++++++++++++++++++ kernel/bpf/bpf_lsm.c | 25 +++++++++++++++++++++++++ kernel/nscommon.c | 9 ++++++++- kernel/nsproxy.c | 7 +++++++ 4 files changed, 61 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 643809cc78c3..5ae438fdf567 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -12,6 +12,9 @@ #include #include =20 +struct ns_common; +struct nsset; + #ifdef CONFIG_BPF_LSM =20 #define LSM_HOOK(RET, DEFAULT, NAME, ...) \ @@ -48,6 +51,11 @@ void bpf_lsm_find_cgroup_shim(const struct bpf_prog *pro= g, bpf_func_t *bpf_func) =20 int bpf_lsm_get_retval_range(const struct bpf_prog *prog, struct bpf_retval_range *range); + +int bpf_lsm_namespace_alloc(struct ns_common *ns); +void bpf_lsm_namespace_free(struct ns_common *ns); +int bpf_lsm_namespace_install(struct nsset *nsset, struct ns_common *ns); + int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__s= tr, const struct bpf_dynptr *value_p, int flags); int bpf_remove_dentry_xattr_locked(struct dentry *dentry, const char *name= __str); @@ -104,6 +112,19 @@ static inline bool bpf_lsm_has_d_inode_locked(const st= ruct bpf_prog *prog) { return false; } + +static inline int bpf_lsm_namespace_alloc(struct ns_common *ns) +{ + return 0; +} +static inline void bpf_lsm_namespace_free(struct ns_common *ns) +{ +} +static inline int bpf_lsm_namespace_install(struct nsset *nsset, + struct ns_common *ns) +{ + return 0; +} #endif /* CONFIG_BPF_LSM */ =20 #endif /* _LINUX_BPF_LSM_H */ diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index 0c4a0c8e6f70..f6378db46220 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -30,10 +30,32 @@ __weak noinline RET bpf_lsm_##NAME(__VA_ARGS__) \ #include #undef LSM_HOOK =20 +__bpf_hook_start(); + +__weak noinline int bpf_lsm_namespace_alloc(struct ns_common *ns) +{ + return 0; +} + +__weak noinline void bpf_lsm_namespace_free(struct ns_common *ns) +{ +} + +__weak noinline int bpf_lsm_namespace_install(struct nsset *nsset, + struct ns_common *ns) +{ + return 0; +} + +__bpf_hook_end(); + #define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME) BTF_SET_START(bpf_lsm_hooks) #include #undef LSM_HOOK +BTF_ID(func, bpf_lsm_namespace_alloc) +BTF_ID(func, bpf_lsm_namespace_free) +BTF_ID(func, bpf_lsm_namespace_install) BTF_SET_END(bpf_lsm_hooks) =20 BTF_SET_START(bpf_lsm_disabled_hooks) @@ -383,6 +405,8 @@ BTF_ID(func, bpf_lsm_task_prctl) BTF_ID(func, bpf_lsm_task_setscheduler) BTF_ID(func, bpf_lsm_task_to_inode) BTF_ID(func, bpf_lsm_userns_create) +BTF_ID(func, bpf_lsm_namespace_alloc) +BTF_ID(func, bpf_lsm_namespace_install) BTF_SET_END(sleepable_lsm_hooks) =20 BTF_SET_START(untrusted_lsm_hooks) @@ -395,6 +419,7 @@ BTF_ID(func, bpf_lsm_sk_alloc_security) BTF_ID(func, bpf_lsm_sk_free_security) #endif /* CONFIG_SECURITY_NETWORK */ BTF_ID(func, bpf_lsm_task_free) +BTF_ID(func, bpf_lsm_namespace_free) BTF_SET_END(untrusted_lsm_hooks) =20 bool bpf_lsm_is_sleepable_hook(u32 btf_id) diff --git a/kernel/nscommon.c b/kernel/nscommon.c index bdc3c86231d3..c3613cab3d41 100644 --- a/kernel/nscommon.c +++ b/kernel/nscommon.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (c) 2025 Christian Brauner */ =20 +#include #include #include #include @@ -77,6 +78,7 @@ int __ns_common_init(struct ns_common *ns, u32 ns_type, c= onst struct proc_ns_ope ret =3D proc_alloc_inum(&ns->inum); if (ret) return ret; + /* * Tree ref starts at 0. It's incremented when namespace enters * active use (installed in nsproxy) and decremented when all @@ -86,11 +88,16 @@ int __ns_common_init(struct ns_common *ns, u32 ns_type,= const struct proc_ns_ope atomic_set(&ns->__ns_ref_active, 1); else atomic_set(&ns->__ns_ref_active, 0); - return 0; + + ret =3D bpf_lsm_namespace_alloc(ns); + if (ret && !inum) + proc_free_inum(ns->inum); + return ret; } =20 void __ns_common_free(struct ns_common *ns) { + bpf_lsm_namespace_free(ns); proc_free_inum(ns->inum); } =20 diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index 259c4b4f1eeb..5742f9664dbb 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -9,6 +9,7 @@ * Pavel Emelianov */ =20 +#include #include #include #include @@ -379,6 +380,12 @@ static int prepare_nsset(unsigned flags, struct nsset = *nsset) =20 static inline int validate_ns(struct nsset *nsset, struct ns_common *ns) { + int ret; + + ret =3D bpf_lsm_namespace_install(nsset, ns); + if (ret) + return ret; + return ns->ops->install(nsset, ns); } =20 --=20 2.47.3 From nobody Fri Apr 3 09:52:02 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BF8951F4180; Fri, 20 Feb 2026 00:38:48 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771547928; cv=none; b=GPmmKUXpAanuiJMfxY0D7WXPeT6LDFRaG3HywmQIrrjEx+zRiX3a0NjW/DjmtihjLmPx8pNQqAe5jXaMZAuWdoqaIIzrxwrOT15S2YEzgxy2y5i2fkb1k/nwWgJEmff2FuuE15WI210Ffasd2Z9tR/x+iKdxCUwQrRPvSypvQBo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771547928; c=relaxed/simple; bh=rQtRX5ohX7vGBGBjzVHcp0rW36b98A2mn7TXYUJHP4Y=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=A6kVk6caTrR7504iMSGmA0GOYHpsNdTxLkCGwTYcRSquggvGU597cQLUZfVP7/qgCIqyBHv66Y0Z39CPN1GuF8kfmZ8R5vaKhxFn28D3GsV3k56cciX1ZuKRHK38wZ/N2nQQTAl+GOPwGlV4uLQtimWcTgI5HdJZjGxaBOlXYV4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=FL0K6bXF; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="FL0K6bXF" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 330C1C4CEF7; Fri, 20 Feb 2026 00:38:46 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1771547928; bh=rQtRX5ohX7vGBGBjzVHcp0rW36b98A2mn7TXYUJHP4Y=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=FL0K6bXFqg4jIllEBmUHk3Xk8hYcNar9d4pesEyDEPuGTWtQCR9e9Rk2vZunQTvht PygePh15IsSKliaXnNJBDC9ZDPJTmMK6kRjPAvDLvfj9FohXS9MFAqhxukiwekfsNn HN0D51SiDyp0FAtm9cmNqS5q3h+XPkD4pKq+eSS9DWcClbUz0DxE2Xbyt/Bh9ilSgh fzCcSpHDHdUT6yGBU6ucfC6tQD9f4Hg40zk7TN4Mu1TdJ0mbBWSsS2auIa8c3Jexfp Hy1uhqcPrKMzoUTvRtDh6qI7jroOZr/K+BJ8/aKnKDX5LjsVGw06O4G9f/USMyc06T lUribCddDssow== From: Christian Brauner Date: Fri, 20 Feb 2026 01:38:30 +0100 Subject: [PATCH 2/4] cgroup: add bpf hook for attach Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260220-work-bpf-namespace-v1-2-866207db7b83@kernel.org> References: <20260220-work-bpf-namespace-v1-0-866207db7b83@kernel.org> In-Reply-To: <20260220-work-bpf-namespace-v1-0-866207db7b83@kernel.org> To: Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Tejun Heo Cc: KP Singh , bpf@vger.kernel.org, linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, Lennart Poettering , Christian Brauner X-Mailer: b4 0.15-dev-47773 X-Developer-Signature: v=1; a=openpgp-sha256; l=6350; i=brauner@kernel.org; h=from:subject:message-id; bh=rQtRX5ohX7vGBGBjzVHcp0rW36b98A2mn7TXYUJHP4Y=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWROXyvAv7x/5U7GLz9cpMJnX7ruWG/l+r143/L3Gfndy Xd9Gi3dO0pZGMS4GGTFFFkc2k3C5ZbzVGw2ytSAmcPKBDKEgYtTACYiMIORYcMl3gj3l10hazzL m+N2csfxNfksmj1t7aOrAfvd2hJS3zL8lfPc8OjB+lNfd+s8rDzzRtVVQEbiJZd8klX5r7y0vhu b2QE= X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 Add a hook to manage attaching tasks to cgroup. I'm in the process of adding various "universal truth" bpf programs to systemd that will make use of this. This has been a long-standing request (cf. [1] and [2]). It will allow us to enforce cgroup migrations and ensure that services can never escape their cgroups. This is just one of many use-cases. Link: https://github.com/systemd/systemd/issues/6356 [1] Link: https://github.com/systemd/systemd/issues/22874 [2] Signed-off-by: Christian Brauner --- include/linux/bpf_lsm.h | 15 +++++++++++++++ kernel/bpf/bpf_lsm.c | 12 ++++++++++++ kernel/cgroup/cgroup.c | 18 +++++++++++------- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h index 5ae438fdf567..bc1d35b271f5 100644 --- a/include/linux/bpf_lsm.h +++ b/include/linux/bpf_lsm.h @@ -12,8 +12,11 @@ #include #include =20 +struct cgroup; +struct cgroup_namespace; struct ns_common; struct nsset; +struct super_block; =20 #ifdef CONFIG_BPF_LSM =20 @@ -55,6 +58,9 @@ int bpf_lsm_get_retval_range(const struct bpf_prog *prog, int bpf_lsm_namespace_alloc(struct ns_common *ns); void bpf_lsm_namespace_free(struct ns_common *ns); int bpf_lsm_namespace_install(struct nsset *nsset, struct ns_common *ns); +int bpf_lsm_cgroup_attach(struct task_struct *task, struct cgroup *src_cgr= p, + struct cgroup *dst_cgrp, struct super_block *sb, + bool threadgroup, struct cgroup_namespace *ns); =20 int bpf_set_dentry_xattr_locked(struct dentry *dentry, const char *name__s= tr, const struct bpf_dynptr *value_p, int flags); @@ -125,6 +131,15 @@ static inline int bpf_lsm_namespace_install(struct nss= et *nsset, { return 0; } +static inline int bpf_lsm_cgroup_attach(struct task_struct *task, + struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, + struct super_block *sb, + bool threadgroup, + struct cgroup_namespace *ns) +{ + return 0; +} #endif /* CONFIG_BPF_LSM */ =20 #endif /* _LINUX_BPF_LSM_H */ diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index f6378db46220..1da5585082fa 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -47,6 +47,16 @@ __weak noinline int bpf_lsm_namespace_install(struct nss= et *nsset, return 0; } =20 +__weak noinline int bpf_lsm_cgroup_attach(struct task_struct *task, + struct cgroup *src_cgrp, + struct cgroup *dst_cgrp, + struct super_block *sb, + bool threadgroup, + struct cgroup_namespace *ns) +{ + return 0; +} + __bpf_hook_end(); =20 #define LSM_HOOK(RET, DEFAULT, NAME, ...) BTF_ID(func, bpf_lsm_##NAME) @@ -56,6 +66,7 @@ BTF_SET_START(bpf_lsm_hooks) BTF_ID(func, bpf_lsm_namespace_alloc) BTF_ID(func, bpf_lsm_namespace_free) BTF_ID(func, bpf_lsm_namespace_install) +BTF_ID(func, bpf_lsm_cgroup_attach) BTF_SET_END(bpf_lsm_hooks) =20 BTF_SET_START(bpf_lsm_disabled_hooks) @@ -407,6 +418,7 @@ BTF_ID(func, bpf_lsm_task_to_inode) BTF_ID(func, bpf_lsm_userns_create) BTF_ID(func, bpf_lsm_namespace_alloc) BTF_ID(func, bpf_lsm_namespace_install) +BTF_ID(func, bpf_lsm_cgroup_attach) BTF_SET_END(sleepable_lsm_hooks) =20 BTF_SET_START(untrusted_lsm_hooks) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 8af4351536cf..16535349b22f 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -28,6 +28,7 @@ #include "cgroup-internal.h" =20 #include +#include #include #include #include @@ -5334,7 +5335,8 @@ static int cgroup_procs_write_permission(struct cgrou= p *src_cgrp, return 0; } =20 -static int cgroup_attach_permissions(struct cgroup *src_cgrp, +static int cgroup_attach_permissions(struct task_struct *task, + struct cgroup *src_cgrp, struct cgroup *dst_cgrp, struct super_block *sb, bool threadgroup, struct cgroup_namespace *ns) @@ -5350,9 +5352,9 @@ static int cgroup_attach_permissions(struct cgroup *s= rc_cgrp, return ret; =20 if (!threadgroup && (src_cgrp->dom_cgrp !=3D dst_cgrp->dom_cgrp)) - ret =3D -EOPNOTSUPP; + return -EOPNOTSUPP; =20 - return ret; + return bpf_lsm_cgroup_attach(task, src_cgrp, dst_cgrp, sb, threadgroup, n= s); } =20 static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, @@ -5384,7 +5386,7 @@ static ssize_t __cgroup_procs_write(struct kernfs_ope= n_file *of, char *buf, * inherited fd attacks. */ scoped_with_creds(of->file->f_cred) - ret =3D cgroup_attach_permissions(src_cgrp, dst_cgrp, + ret =3D cgroup_attach_permissions(task, src_cgrp, dst_cgrp, of->file->f_path.dentry->d_sb, threadgroup, ctx->ns); if (ret) @@ -6669,6 +6671,7 @@ static struct cgroup *cgroup_get_from_file(struct fil= e *f) =20 /** * cgroup_css_set_fork - find or create a css_set for a child process + * @task: the task to be attached * @kargs: the arguments passed to create the child process * * This functions finds or creates a new css_set which the child @@ -6683,7 +6686,8 @@ static struct cgroup *cgroup_get_from_file(struct fil= e *f) * before grabbing cgroup_threadgroup_rwsem and will hold a reference * to the target cgroup. */ -static int cgroup_css_set_fork(struct kernel_clone_args *kargs) +static int cgroup_css_set_fork(struct task_struct *task, + struct kernel_clone_args *kargs) __acquires(&cgroup_mutex) __acquires(&cgroup_threadgroup_rwsem) { int ret; @@ -6752,7 +6756,7 @@ static int cgroup_css_set_fork(struct kernel_clone_ar= gs *kargs) * cgroup.procs of the cgroup indicated by @dfd_cgroup. This allows us * to always use the caller's credentials. */ - ret =3D cgroup_attach_permissions(cset->dfl_cgrp, dst_cgrp, sb, + ret =3D cgroup_attach_permissions(task, cset->dfl_cgrp, dst_cgrp, sb, !(kargs->flags & CLONE_THREAD), current->nsproxy->cgroup_ns); if (ret) @@ -6824,7 +6828,7 @@ int cgroup_can_fork(struct task_struct *child, struct= kernel_clone_args *kargs) struct cgroup_subsys *ss; int i, j, ret; =20 - ret =3D cgroup_css_set_fork(kargs); + ret =3D cgroup_css_set_fork(child, kargs); if (ret) return ret; =20 --=20 2.47.3 From nobody Fri Apr 3 09:52:02 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 06C481F418F; Fri, 20 Feb 2026 00:38:51 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771547932; cv=none; b=CsZz6uijMNyy2k1+VAEgElcGZt9ULLTxgBsra9LRIYR2NKJUF75zRoKkDbDgpu3JcFf1qvYX3GiABvnNyTJARD6rcrNzOv5TUbhxALg6A34h3ofqJmaVKXyNSddZMlMZqtd5ApT2gVJr0IPnnlCpVnoyLNhxGOtkzgPSB6PEvjs= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771547932; c=relaxed/simple; bh=2Y18MqPJiqmQVhKG7YQFspLjTM48CG/DtkSLoIi4Iow=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=j8dlW3Z9Qk7Bp9oGacKobMS0kWe+L/gb+VQmB2qydt6bgTxzfU6rc3wYbMiU+47xOehi1WxpxVpSXowFPuXV01IevokyV9k4Ypnrq7/16vPwKHhu9L52BLuTYbkjtc13H87wYJzHI/4+M5Pq0+RIe0qfk6TJHGDrFytyryAFqqI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=H1moi2wX; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="H1moi2wX" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2E02EC19423; Fri, 20 Feb 2026 00:38:48 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1771547931; bh=2Y18MqPJiqmQVhKG7YQFspLjTM48CG/DtkSLoIi4Iow=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=H1moi2wXruReO4MD5w/4SbDwD21QYAFcHx6YNZTKWco5i242sHeKzYbbrzcTd09in ZP3cmgMfraUiXlNW4V7z7Hzt+EYXEQ0dW2OLD220Jwe3r7TBlVjvFOYAuQhOkjweP8 un7FZgRTFDEWGBy6FJ9G/LoYH1a7wl0B23LStgEg5pOaS0BWQkPzrjtLpzT3z0Km9c HWLMR6QZgf3utEhs0h4Tb8kByqwsZKMDQDQUmt0RBrJvFDq3BN29psa82UAsxaaMC4 QdQolFOK2DP2g8EdnBQ29jxyHzPor0/UNT4k5WLo52JTAIr16eSHJh3CZSMFDzbXra 6O6L7WEX1wPXw== From: Christian Brauner Date: Fri, 20 Feb 2026 01:38:31 +0100 Subject: [PATCH 3/4] selftests/bpf: add ns hook selftest Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260220-work-bpf-namespace-v1-3-866207db7b83@kernel.org> References: <20260220-work-bpf-namespace-v1-0-866207db7b83@kernel.org> In-Reply-To: <20260220-work-bpf-namespace-v1-0-866207db7b83@kernel.org> To: Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Tejun Heo Cc: KP Singh , bpf@vger.kernel.org, linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, Lennart Poettering , Christian Brauner X-Mailer: b4 0.15-dev-47773 X-Developer-Signature: v=1; a=openpgp-sha256; l=6123; i=brauner@kernel.org; h=from:subject:message-id; bh=2Y18MqPJiqmQVhKG7YQFspLjTM48CG/DtkSLoIi4Iow=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWROXyuwp1nHZ8kM0/AVap3XHvUH+22dcb9kYcaXSXuub b0ikxP9sKOUhUGMi0FWTJHFod0kXG45T8Vmo0wNmDmsTCBDGLg4BWAiR3Yw/GZNWLDqcLy1hmlT Rvjb95pbl4csXipzWClEiivg0Zs0j15Ghq933/u+3bgqa0sbv6vL1YvZtoEz7nf+8g/SF/eLz8g 6ygoA X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 Add a BPF LSM selftest that implements a "lock on entry" namespace sandbox policy. Signed-off-by: Christian Brauner Reviewed-by: Alan Maguire Tested-by: Alan Maguire --- .../testing/selftests/bpf/prog_tests/ns_sandbox.c | 99 ++++++++++++++++++= ++++ .../testing/selftests/bpf/progs/test_ns_sandbox.c | 91 ++++++++++++++++++= ++ 2 files changed, 190 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/ns_sandbox.c b/tools/te= sting/selftests/bpf/prog_tests/ns_sandbox.c new file mode 100644 index 000000000000..0ac2acfb6365 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ns_sandbox.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Christian Brauner */ + +/* + * Test BPF LSM namespace sandbox: once you enter, you stay. + * + * The parent creates a tracked namespace, then forks a child. + * The child enters the tracked namespace (allowed) and is then locked + * out of any further setns(). + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include "test_ns_sandbox.skel.h" + +void test_ns_sandbox(void) +{ + int orig_utsns =3D -1, new_utsns =3D -1; + struct test_ns_sandbox *skel =3D NULL; + int err, status; + pid_t child; + + /* Save FD to current (host) namespace */ + orig_utsns =3D open("/proc/self/ns/uts", O_RDONLY); + if (!ASSERT_OK_FD(orig_utsns, "open orig utsns")) + goto close_fds; + + skel =3D test_ns_sandbox__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + goto close_fds; + + err =3D test_ns_sandbox__attach(skel); + if (!ASSERT_OK(err, "skel attach")) + goto destroy; + + skel->bss->monitor_pid =3D getpid(); + + /* + * Create a sandbox namespace. The alloc hook records its + * inum because this task's pid matches monitor_pid. + */ + err =3D unshare(CLONE_NEWUTS); + if (!ASSERT_OK(err, "unshare sandbox")) + goto destroy; + + new_utsns =3D open("/proc/self/ns/uts", O_RDONLY); + if (!ASSERT_OK_FD(new_utsns, "open sandbox utsns")) + goto restore; + + /* + * Return parent to host namespace. The host namespace is not + * in the map so the install hook lets us through. + */ + err =3D setns(orig_utsns, CLONE_NEWUTS); + if (!ASSERT_OK(err, "parent setns host utsns")) + goto restore; + + /* + * Fork a child that: + * 1. Enters the sandbox UTS namespace =E2=80=94 succeeds and locks it. + * 2. Tries to switch to host UTS =E2=80=94 denied (locked). + */ + child =3D fork(); + if (child =3D=3D 0) { + /* Enter tracked namespace =E2=80=94 allowed, we get locked */ + if (setns(new_utsns, CLONE_NEWUTS) !=3D 0) + _exit(1); + + /* Locked: switching to host must fail */ + if (setns(orig_utsns, CLONE_NEWUTS) !=3D -1 || + errno !=3D EPERM) + _exit(2); + + _exit(0); + } + if (!ASSERT_GE(child, 0, "fork child")) + goto restore; + + err =3D waitpid(child, &status, 0); + ASSERT_GT(err, 0, "waitpid child"); + ASSERT_TRUE(WIFEXITED(status), "child exited"); + ASSERT_EQ(WEXITSTATUS(status), 0, "child locked in"); + + goto destroy; + +restore: + setns(orig_utsns, CLONE_NEWUTS); +destroy: + test_ns_sandbox__destroy(skel); +close_fds: + if (new_utsns >=3D 0) + close(new_utsns); + if (orig_utsns >=3D 0) + close(orig_utsns); +} diff --git a/tools/testing/selftests/bpf/progs/test_ns_sandbox.c b/tools/te= sting/selftests/bpf/progs/test_ns_sandbox.c new file mode 100644 index 000000000000..75c3493932a1 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ns_sandbox.c @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Christian Brauner */ + +/* + * BPF LSM namespace sandbox: once you enter, you stay. + * + * A designated process creates namespaces (tracked via alloc). When + * any other process joins one of those namespaces it gets recorded in + * locked_tasks. From that point on that process cannot setns() into + * any other namespace =E2=80=94 it is locked in. Task local storage is + * automatically freed when the task exits. + */ + +#include "vmlinux.h" +#include +#include +#include + +/* + * Namespaces created by the monitored process. + * Key: namespace inode number. + * Value: namespace type (CLONE_NEW* flag). + */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 64); + __type(key, __u32); + __type(value, __u32); +} known_namespaces SEC(".maps"); + +/* PID of the process whose namespace creations are tracked. */ +int monitor_pid; + +/* + * Task local storage: marks tasks that have entered a tracked namespace + * and are now locked. + */ +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, __u8); +} locked_tasks SEC(".maps"); + +char _license[] SEC("license") =3D "GPL"; + +/* Only the monitored process's namespace creations are tracked. */ +SEC("lsm.s/namespace_alloc") +int BPF_PROG(ns_alloc, struct ns_common *ns) +{ + __u32 inum, ns_type; + + if ((bpf_get_current_pid_tgid() >> 32) !=3D monitor_pid) + return 0; + + inum =3D ns->inum; + ns_type =3D ns->ns_type; + bpf_map_update_elem(&known_namespaces, &inum, &ns_type, BPF_ANY); + + return 0; +} + +/* + * Enforce the lock-in policy for all tasks: + * - Already locked? Deny any setns. + * - Entering a tracked namespace? Lock the task and allow. + * - Everything else passes through. + */ +SEC("lsm.s/namespace_install") +int BPF_PROG(ns_install, struct nsset *nsset, struct ns_common *ns) +{ + struct task_struct *task =3D bpf_get_current_task_btf(); + __u32 inum =3D ns->inum; + + if (bpf_task_storage_get(&locked_tasks, task, 0, 0)) + return -EPERM; + + if (bpf_map_lookup_elem(&known_namespaces, &inum)) + bpf_task_storage_get(&locked_tasks, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + + return 0; +} + +SEC("lsm/namespace_free") +void BPF_PROG(ns_free, struct ns_common *ns) +{ + __u32 inum =3D ns->inum; + + bpf_map_delete_elem(&known_namespaces, &inum); +} --=20 2.47.3 From nobody Fri Apr 3 09:52:02 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 9418B1F4180; Fri, 20 Feb 2026 00:38:54 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771547934; cv=none; b=gta2CEIjNkrzxDNcP0C30ZV31oU9xFSlUZ1awoifL4DhzO6Yhn/DOfmiJ8fAGC2QS18MZROxzNeAg9OqOgAtfLs8aG1ZuohA8H5A50BHJFwOpYr3YMBQe7bwQImBd1paTBZb9d27ZYT822H9Ww+2FWy6pksZ2f4sT9wcj5XvIQQ= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1771547934; c=relaxed/simple; bh=mQc8EiFwm60GFoFhfzxR1JRaHCQHgCBHuPjqBgX857M=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=qp4e7wM0dKsVUipypVL/THmxI40T48V3vKCBH8SKgpTbamNTjQBu+e9bXirSwD3pD0SgVj6z8lXgbFnwj2PeZ78e7dZbC7sei5R8PGRyZ+bY990tJCEvi4+xF4IhmsunkxACKXLizOLQVyYDFlhV2H9VDbvMyEyOmTGAveWNm/k= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=gL+2sGWj; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="gL+2sGWj" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 064D9C4CEF7; Fri, 20 Feb 2026 00:38:51 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1771547934; bh=mQc8EiFwm60GFoFhfzxR1JRaHCQHgCBHuPjqBgX857M=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=gL+2sGWjZCvJ7vGGBvagZx90u2ZhzAePGvsfep9cOMK7bMd9vSHYwn7OonqenSeKC 67zhpe+gnmZKBfV92Sc4IxCLHJVl4+/ZzAIsQ7+y1R/NxmeoID1btlrHQyFhguou23 AifF4yXZHf/sD9CvjfI0YrM/AkMWPb4Eb9z7h7G6MwNohJxrR5KmMpL4hgwJQmX1jK 5pne5Oz/Z3WXfstYJJtiTW3FhwCms22kk4q/aU5peIw97Gxuz4ysiLMLn09ULj6ojz m8cRXszic3//7ay9h8Pcn/UyP0+edzED/Hu35mYs6lmiS+sfkaxvzI/s0PgVzlCbdR dW5aVtZ9pWvLQ== From: Christian Brauner Date: Fri, 20 Feb 2026 01:38:32 +0100 Subject: [PATCH 4/4] selftests/bpf: add cgroup attach selftests Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20260220-work-bpf-namespace-v1-4-866207db7b83@kernel.org> References: <20260220-work-bpf-namespace-v1-0-866207db7b83@kernel.org> In-Reply-To: <20260220-work-bpf-namespace-v1-0-866207db7b83@kernel.org> To: Alexei Starovoitov , Daniel Borkmann , Andrii Nakryiko , Martin KaFai Lau , Tejun Heo Cc: KP Singh , bpf@vger.kernel.org, linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, Lennart Poettering , Christian Brauner X-Mailer: b4 0.15-dev-47773 X-Developer-Signature: v=1; a=openpgp-sha256; l=13245; i=brauner@kernel.org; h=from:subject:message-id; bh=mQc8EiFwm60GFoFhfzxR1JRaHCQHgCBHuPjqBgX857M=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWROXyvAt7/kxAmdY6zyehaLr1VKcP/TvHm2y36nqxbbG veO60eFO0pZGMS4GGTFFFkc2k3C5ZbzVGw2ytSAmcPKBDKEgYtTACZSzc/IcOjdr5B0H9FZSy4z z633Cl6sxxO5Ru3lxJsfVwvPrHnpH8HIMD9X7/T+xcW3OpIObzLhuKORwNcp9s5wM1NHiFI1x/F bjAA= X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 Signed-off-by: Christian Brauner Reviewed-by: Alan Maguire Tested-by: Alan Maguire --- .../selftests/bpf/prog_tests/cgroup_attach.c | 362 +++++++++++++++++= ++++ .../selftests/bpf/progs/test_cgroup_attach.c | 85 +++++ 2 files changed, 447 insertions(+) diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach.c b/tools= /testing/selftests/bpf/prog_tests/cgroup_attach.c new file mode 100644 index 000000000000..05addf93af46 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach.c @@ -0,0 +1,362 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Christian Brauner */ + +/* + * Test the bpf_lsm_cgroup_attach hook. + * + * Verifies that a BPF LSM program can supervise cgroup migration + * through both the cgroup.procs write path and the clone3 + + * CLONE_INTO_CGROUP path. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "cgroup_helpers.h" +#include "test_cgroup_attach.skel.h" + +/* Must match the definition in progs/test_cgroup_attach.c */ +struct attach_event { + __u32 task_pid; + __u64 src_cgrp_id; + __u64 dst_cgrp_id; + __u8 threadgroup; + __u32 hook_count; +}; + +#ifndef CLONE_INTO_CGROUP +#define CLONE_INTO_CGROUP 0x200000000ULL +#endif + +#ifndef __NR_clone3 +#define __NR_clone3 435 +#endif + +struct __clone_args { + __aligned_u64 flags; + __aligned_u64 pidfd; + __aligned_u64 child_tid; + __aligned_u64 parent_tid; + __aligned_u64 exit_signal; + __aligned_u64 stack; + __aligned_u64 stack_size; + __aligned_u64 tls; + __aligned_u64 set_tid; + __aligned_u64 set_tid_size; + __aligned_u64 cgroup; +}; + +static pid_t do_clone3(int cgroup_fd) +{ + struct __clone_args args =3D { + .flags =3D CLONE_INTO_CGROUP, + .exit_signal =3D SIGCHLD, + .cgroup =3D cgroup_fd, + }; + + return syscall(__NR_clone3, &args, sizeof(args)); +} + +/* + * Subtest: deny_migration + * + * Verify that the BPF hook can deny cgroup migration through cgroup.procs + * and that detaching the BPF program removes enforcement. + */ +static void test_deny_migration(void) +{ + struct test_cgroup_attach *skel =3D NULL; + int allowed_fd =3D -1, denied_fd =3D -1; + unsigned long long denied_cgid; + int err, status; + __u64 key; + __u8 val =3D 1; + pid_t child; + + if (!ASSERT_OK(setup_cgroup_environment(), "setup_cgroup_env")) + return; + + allowed_fd =3D create_and_get_cgroup("/allowed"); + if (!ASSERT_GE(allowed_fd, 0, "create /allowed")) + goto cleanup; + + denied_fd =3D create_and_get_cgroup("/denied"); + if (!ASSERT_GE(denied_fd, 0, "create /denied")) + goto cleanup; + + skel =3D test_cgroup_attach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + goto cleanup; + + err =3D test_cgroup_attach__attach(skel); + if (!ASSERT_OK(err, "skel attach")) + goto cleanup; + + skel->bss->monitored_pid =3D getpid(); + + denied_cgid =3D get_cgroup_id("/denied"); + if (!ASSERT_NEQ(denied_cgid, 0ULL, "get denied cgroup id")) + goto cleanup; + + key =3D denied_cgid; + err =3D bpf_map__update_elem(skel->maps.denied_cgroups, + &key, sizeof(key), &val, sizeof(val), 0); + if (!ASSERT_OK(err, "add denied cgroup")) + goto cleanup; + + /* + * Forked children must use join_parent_cgroup() because the + * cgroup workdir was created under the parent's PID and + * join_cgroup() constructs paths using getpid(). + */ + + /* Child migrating to /allowed should succeed */ + child =3D fork(); + if (!ASSERT_GE(child, 0, "fork child allowed")) + goto cleanup; + if (child =3D=3D 0) { + if (join_parent_cgroup("/allowed")) + _exit(1); + _exit(0); + } + err =3D waitpid(child, &status, 0); + ASSERT_GT(err, 0, "waitpid allowed"); + ASSERT_TRUE(WIFEXITED(status), "allowed child exited"); + ASSERT_EQ(WEXITSTATUS(status), 0, "allowed migration succeeds"); + + /* Child migrating to /denied should fail */ + child =3D fork(); + if (!ASSERT_GE(child, 0, "fork child denied")) + goto cleanup; + if (child =3D=3D 0) { + if (join_parent_cgroup("/denied") =3D=3D 0) + _exit(1); /* Should have failed */ + if (errno !=3D EPERM) + _exit(2); /* Wrong errno */ + _exit(0); + } + err =3D waitpid(child, &status, 0); + ASSERT_GT(err, 0, "waitpid denied"); + ASSERT_TRUE(WIFEXITED(status), "denied child exited"); + ASSERT_EQ(WEXITSTATUS(status), 0, "denied migration blocked"); + + /* Detach BPF =E2=80=94 /denied should now be accessible */ + test_cgroup_attach__detach(skel); + + child =3D fork(); + if (!ASSERT_GE(child, 0, "fork child post-detach")) + goto cleanup; + if (child =3D=3D 0) { + if (join_parent_cgroup("/denied")) + _exit(1); + _exit(0); + } + err =3D waitpid(child, &status, 0); + ASSERT_GT(err, 0, "waitpid post-detach"); + ASSERT_TRUE(WIFEXITED(status), "post-detach child exited"); + ASSERT_EQ(WEXITSTATUS(status), 0, "post-detach migration free"); + +cleanup: + if (skel) + test_cgroup_attach__destroy(skel); + if (allowed_fd >=3D 0) + close(allowed_fd); + if (denied_fd >=3D 0) + close(denied_fd); + cleanup_cgroup_environment(); +} + +/* + * Subtest: verify_hook_args + * + * Verify that the hook receives correct src_cgrp, dst_cgrp, task pid, + * and threadgroup values. + */ +static void test_verify_hook_args(void) +{ + struct test_cgroup_attach *skel =3D NULL; + struct attach_event evt =3D {}; + unsigned long long src_cgid, dst_cgid; + int src_fd =3D -1, dst_fd =3D -1; + __u32 map_key =3D 0; + char pid_str[32]; + int err; + + if (!ASSERT_OK(setup_cgroup_environment(), "setup_cgroup_env")) + return; + + src_fd =3D create_and_get_cgroup("/src"); + if (!ASSERT_GE(src_fd, 0, "create /src")) + goto cleanup; + + dst_fd =3D create_and_get_cgroup("/dst"); + if (!ASSERT_GE(dst_fd, 0, "create /dst")) + goto cleanup; + + /* Move ourselves to /src first */ + if (!ASSERT_OK(join_cgroup("/src"), "join /src")) + goto cleanup; + + skel =3D test_cgroup_attach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + goto cleanup; + + err =3D test_cgroup_attach__attach(skel); + if (!ASSERT_OK(err, "skel attach")) + goto cleanup; + + skel->bss->monitored_pid =3D getpid(); + + src_cgid =3D get_cgroup_id("/src"); + dst_cgid =3D get_cgroup_id("/dst"); + if (!ASSERT_NEQ(src_cgid, 0ULL, "get src cgroup id")) + goto cleanup; + if (!ASSERT_NEQ(dst_cgid, 0ULL, "get dst cgroup id")) + goto cleanup; + + /* Migrate self to /dst via cgroup.procs (threadgroup=3Dtrue) */ + snprintf(pid_str, sizeof(pid_str), "%d", getpid()); + if (!ASSERT_OK(write_cgroup_file("/dst", "cgroup.procs", pid_str), + "migrate to /dst")) + goto cleanup; + + /* Read the recorded event */ + err =3D bpf_map__lookup_elem(skel->maps.last_event, + &map_key, sizeof(map_key), + &evt, sizeof(evt), 0); + if (!ASSERT_OK(err, "read last_event")) + goto cleanup; + + ASSERT_EQ(evt.src_cgrp_id, src_cgid, "src_cgrp_id matches"); + ASSERT_EQ(evt.dst_cgrp_id, dst_cgid, "dst_cgrp_id matches"); + ASSERT_EQ(evt.task_pid, (__u32)getpid(), "task_pid matches"); + ASSERT_EQ(evt.threadgroup, 1, "threadgroup is true for cgroup.procs"); + ASSERT_GE(evt.hook_count, (__u32)1, "hook fired at least once"); + +cleanup: + if (skel) + test_cgroup_attach__destroy(skel); + if (src_fd >=3D 0) + close(src_fd); + if (dst_fd >=3D 0) + close(dst_fd); + cleanup_cgroup_environment(); +} + +/* + * Subtest: clone_into_cgroup + * + * Verify the hook fires on the clone3(CLONE_INTO_CGROUP) path and can + * deny spawning a child directly into a cgroup. + */ +static void test_clone_into_cgroup(void) +{ + struct test_cgroup_attach *skel =3D NULL; + int allowed_fd =3D -1, denied_fd =3D -1; + unsigned long long denied_cgid, allowed_cgid; + struct attach_event evt =3D {}; + __u32 map_key =3D 0; + __u64 key; + __u8 val =3D 1; + int err, status; + pid_t child; + + if (!ASSERT_OK(setup_cgroup_environment(), "setup_cgroup_env")) + return; + + allowed_fd =3D create_and_get_cgroup("/clone_allowed"); + if (!ASSERT_GE(allowed_fd, 0, "create /clone_allowed")) + goto cleanup; + + denied_fd =3D create_and_get_cgroup("/clone_denied"); + if (!ASSERT_GE(denied_fd, 0, "create /clone_denied")) + goto cleanup; + + skel =3D test_cgroup_attach__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel open_and_load")) + goto cleanup; + + err =3D test_cgroup_attach__attach(skel); + if (!ASSERT_OK(err, "skel attach")) + goto cleanup; + + skel->bss->monitored_pid =3D getpid(); + + denied_cgid =3D get_cgroup_id("/clone_denied"); + allowed_cgid =3D get_cgroup_id("/clone_allowed"); + if (!ASSERT_NEQ(denied_cgid, 0ULL, "get denied cgroup id")) + goto cleanup; + if (!ASSERT_NEQ(allowed_cgid, 0ULL, "get allowed cgroup id")) + goto cleanup; + + key =3D denied_cgid; + err =3D bpf_map__update_elem(skel->maps.denied_cgroups, + &key, sizeof(key), &val, sizeof(val), 0); + if (!ASSERT_OK(err, "add denied cgroup")) + goto cleanup; + + /* clone3 into denied cgroup should fail */ + child =3D do_clone3(denied_fd); + if (child >=3D 0) { + waitpid(child, NULL, 0); + ASSERT_LT(child, 0, "clone3 into denied should fail"); + goto cleanup; + } + if (errno =3D=3D ENOSYS || errno =3D=3D E2BIG) { + test__skip(); + goto cleanup; + } + ASSERT_EQ(errno, EPERM, "clone3 denied errno"); + + /* clone3 into allowed cgroup should succeed */ + child =3D do_clone3(allowed_fd); + if (!ASSERT_GE(child, 0, "clone3 into allowed")) + goto cleanup; + if (child =3D=3D 0) + _exit(0); + + err =3D waitpid(child, &status, 0); + ASSERT_GT(err, 0, "waitpid clone3 allowed"); + ASSERT_TRUE(WIFEXITED(status), "clone3 child exited"); + ASSERT_EQ(WEXITSTATUS(status), 0, "clone3 child ok"); + + /* Verify the hook recorded the allowed clone */ + err =3D bpf_map__lookup_elem(skel->maps.last_event, + &map_key, sizeof(map_key), + &evt, sizeof(evt), 0); + if (!ASSERT_OK(err, "read last_event")) + goto cleanup; + + ASSERT_EQ(evt.dst_cgrp_id, allowed_cgid, "clone3 dst_cgrp_id"); + +cleanup: + if (skel) + test_cgroup_attach__destroy(skel); + if (allowed_fd >=3D 0) + close(allowed_fd); + if (denied_fd >=3D 0) + close(denied_fd); + cleanup_cgroup_environment(); +} + +void test_cgroup_attach(void) +{ + if (test__start_subtest("deny_migration")) + test_deny_migration(); + if (test__start_subtest("verify_hook_args")) + test_verify_hook_args(); + if (test__start_subtest("clone_into_cgroup")) + test_clone_into_cgroup(); +} diff --git a/tools/testing/selftests/bpf/progs/test_cgroup_attach.c b/tools= /testing/selftests/bpf/progs/test_cgroup_attach.c new file mode 100644 index 000000000000..90915d1d7d64 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cgroup_attach.c @@ -0,0 +1,85 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2026 Christian Brauner */ + +/* + * BPF LSM cgroup attach policy: supervise cgroup migration. + * + * A designated process populates a denied_cgroups map with cgroup IDs + * that should reject migration. The cgroup_attach hook checks every + * migration and returns -EPERM when the destination cgroup is denied. + * It also records the last hook invocation into last_event for the + * userspace test to verify arguments. + */ + +#include "vmlinux.h" +#include +#include +#include +#include + +struct attach_event { + __u32 task_pid; + __u64 src_cgrp_id; + __u64 dst_cgrp_id; + __u8 threadgroup; + __u32 hook_count; +}; + +/* + * Cgroups that should reject migration. + * Key: cgroup kn->id (u64). + * Value: unused marker. + */ +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 16); + __type(key, __u64); + __type(value, __u8); +} denied_cgroups SEC(".maps"); + +/* + * Record the last hook invocation for argument verification. + * Key: 0. + * Value: struct attach_event. + */ +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct attach_event); +} last_event SEC(".maps"); + +__u32 monitored_pid; + +char _license[] SEC("license") =3D "GPL"; + +SEC("lsm.s/cgroup_attach") +int BPF_PROG(cgroup_attach, struct task_struct *task, + struct cgroup *src_cgrp, struct cgroup *dst_cgrp, + struct super_block *sb, bool threadgroup, + struct cgroup_namespace *ns) +{ + struct task_struct *current =3D bpf_get_current_task_btf(); + struct attach_event *evt; + __u64 dst_id; + __u32 key =3D 0; + + dst_id =3D BPF_CORE_READ(dst_cgrp, kn, id); + + if (bpf_map_lookup_elem(&denied_cgroups, &dst_id)) + return -EPERM; + + if (!monitored_pid || current->tgid !=3D monitored_pid) + return 0; + + evt =3D bpf_map_lookup_elem(&last_event, &key); + if (evt) { + evt->task_pid =3D task->pid; + evt->src_cgrp_id =3D BPF_CORE_READ(src_cgrp, kn, id); + evt->dst_cgrp_id =3D dst_id; + evt->threadgroup =3D threadgroup ? 1 : 0; + evt->hook_count++; + } + + return 0; +} --=20 2.47.3