From nobody Sun Feb 8 14:23:15 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7293B537E9; Sun, 9 Nov 2025 21:13:23 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722803; cv=none; b=NuCuMUsS56yd56/gNwqykrYKgUGaXBgcMdum0nroFZQHaII4N5KZh2ikSJtPAwJTOeXDTsHpsA8KMduyHs+JkElajvEagjDTxCMKsfcYQX8bXWUJ3Tqjvy9npzLcxOE/crmG6za0FsIclChwPVXmkNQrvV67QUtgA/1tmxj+GLE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722803; c=relaxed/simple; bh=e8o+HFjAfwAZDJFrtqSN9t1c4O21+LqtO4Xq3Z91+Rc=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=DQignIF2RP+h8nAox9TY2yBH/BjRQTq3uvJ7HJLsdGymuINRUkhwd88vdm8CrrK713gvFn/aVCIeEBmAMhT3Da8AEJF6+N2l0avWNKfi0rdCd9KZRFFke8zf/KSeZ6Y6QquaZQ/LpTBedNDfnsEGkhPdgcw0I5ozpzY4+Rg1mZE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=kPnWD0id; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="kPnWD0id" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 7210CC19423; Sun, 9 Nov 2025 21:13:18 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1762722803; bh=e8o+HFjAfwAZDJFrtqSN9t1c4O21+LqtO4Xq3Z91+Rc=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=kPnWD0idXLT923SzBCwSSPqIuJZot0Gh/CaFdnJHfa4h1skMH2hsLNpza/0y+rIPz Ma9LOG2TG/lbG717bwJAMh15bwR6Z2dZb5PsAW+IBoC7oYropAwoaM2o7e3MnMqxyi BzQ7+4fOPCn+RMPsuqk3mrbVput4H4knzFzy5RlqnA9Q+t3bQWRGRE9leu6fnPDlPg IedBF7UySDRdDJCZgUsL4GQfQVp6B3sXJM2N37qYrwuDsyI1IVE2p0lQRE/C648zYm n4PeOjF37LF3ehTd97d25coG83YBjKzgOb40MMQQ+UtrRUP6XkTzahxA53vXbZoNXC clAth6UtPy/NQ== From: Christian Brauner Date: Sun, 09 Nov 2025 22:11:22 +0100 Subject: [PATCH 1/8] ns: don't skip active reference count initialization Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20251109-namespace-6-19-fixes-v1-1-ae8a4ad5a3b3@kernel.org> References: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> In-Reply-To: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> To: linux-fsdevel@vger.kernel.org, Josef Bacik , Jeff Layton Cc: Jann Horn , Mike Yuan , =?utf-8?q?Zbigniew_J=C4=99drzejewski-Szmek?= , Lennart Poettering , Daan De Meyer , Aleksa Sarai , Amir Goldstein , Tejun Heo , Johannes Weiner , Thomas Gleixner , Alexander Viro , Jan Kara , linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, bpf@vger.kernel.org, Eric Dumazet , Jakub Kicinski , netdev@vger.kernel.org, Arnd Bergmann , Christian Brauner X-Mailer: b4 0.15-dev-a6db3 X-Developer-Signature: v=1; a=openpgp-sha256; l=1127; i=brauner@kernel.org; h=from:subject:message-id; bh=e8o+HFjAfwAZDJFrtqSN9t1c4O21+LqtO4Xq3Z91+Rc=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWQKMr8Q+245RWauFlvbwZiLqla79Jbe1Vmbblz7Qtyzx M76Q59VRykLgxgXg6yYIotDu0m43HKeis1GmRowc1iZQIYwcHEKwEQm1DL8s7bevlLM/1RjctH3 6eXugrst2/WXLf3x52o5U2j/756L3IwMn1eJLJ1dFV3ZapA1Penh1Ld7PCdHzfSTuJtftz2558I fDgA= X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 Don't skip active reference count initialization for initial namespaces. Doing this will break network namespace active reference counting. Fixes: 3a18f809184b ("ns: add active reference count") Signed-off-by: Christian Brauner --- kernel/nscommon.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/kernel/nscommon.c b/kernel/nscommon.c index 6fe1c747fa46..d67ae7ad7759 100644 --- a/kernel/nscommon.c +++ b/kernel/nscommon.c @@ -54,7 +54,7 @@ static void ns_debug(struct ns_common *ns, const struct p= roc_ns_operations *ops) =20 int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_= ns_operations *ops, int inum) { - int ret; + int ret =3D 0; =20 refcount_set(&ns->__ns_ref, 1); ns->stashed =3D NULL; @@ -74,11 +74,10 @@ int __ns_common_init(struct ns_common *ns, u32 ns_type,= const struct proc_ns_ope ns_debug(ns, ops); #endif =20 - if (inum) { + if (inum) ns->inum =3D inum; - return 0; - } - ret =3D proc_alloc_inum(&ns->inum); + else + ret =3D proc_alloc_inum(&ns->inum); if (ret) return ret; /* --=20 2.47.3 From nobody Sun Feb 8 14:23:15 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id B8483263F38; Sun, 9 Nov 2025 21:13:28 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722808; cv=none; b=P8/zlsMDc5eg0OR0FVLRMRNumCwGGfgemJPwKgWkoLuHxZOrkhxJ1sSM6Dum0wNGrZTy2AK1Leb1KJ1SWsE0VX+GW5ATQapLj4mO2h+Yu+3dnoFYxZh5gLwVmraNJB3zN9GjNShZ3m0UDZ8BA5YmHR6wjomos4F0YK5DZUOfZoM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722808; c=relaxed/simple; bh=mlYOb4HtaVV/70l/JDdVHAwQod1IKKXY70EZg+ntWnk=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=OiZs5UyXdCRbIoLrjq9PdoClsjFfY4WZiDeMnjR0D3GyRZVrok//bHvi980/uIcgeCCHWF7Jz8YGMYuiM4RXb9Wkv+fbiyzw3CcR/SvJO2Ef7YxH5QbByC8vwkonxHpYSwCo/EanpuR+we11xDjagp2aq9E8AKq/WO46Y/jNxRc= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=l+DrKrzn; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="l+DrKrzn" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 78F24C4CEFB; Sun, 9 Nov 2025 21:13:23 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1762722808; bh=mlYOb4HtaVV/70l/JDdVHAwQod1IKKXY70EZg+ntWnk=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=l+DrKrznMk291oiVMWT2W4NyaJYSkfWs7q3kVFizjoDJqZTWCOS66w7a8g8aNoWdE O7uXo10XIvPe/hN4h+wklQWdPrbg8uR0cQFI46Y4ZIQ7rxWBzkVgkhUpleMeeXD6ua XBouleJ/3pkd23cxOkeDbZKPiqgJ9fcC2c7k/JRHXpDqzzHdsStfbfiEE+sEq5uG9T x8ffhtyTxMc+UwV9n9EenhmCSMWxHqXqqpkkyj6ZFz5gaSXnhZKyfE4QcTdf+YWoc/ EtcXmD/3frLj4LGSESY7u8R58UTP2nJXHX+otc52oSYTT5y8/HCHuZn49cmNnjNW6p gD/Eubs4eqLdA== From: Christian Brauner Date: Sun, 09 Nov 2025 22:11:23 +0100 Subject: [PATCH 2/8] ns: don't increment or decrement initial namespaces Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20251109-namespace-6-19-fixes-v1-2-ae8a4ad5a3b3@kernel.org> References: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> In-Reply-To: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> To: linux-fsdevel@vger.kernel.org, Josef Bacik , Jeff Layton Cc: Jann Horn , Mike Yuan , =?utf-8?q?Zbigniew_J=C4=99drzejewski-Szmek?= , Lennart Poettering , Daan De Meyer , Aleksa Sarai , Amir Goldstein , Tejun Heo , Johannes Weiner , Thomas Gleixner , Alexander Viro , Jan Kara , linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, bpf@vger.kernel.org, Eric Dumazet , Jakub Kicinski , netdev@vger.kernel.org, Arnd Bergmann , Christian Brauner X-Mailer: b4 0.15-dev-a6db3 X-Developer-Signature: v=1; a=openpgp-sha256; l=3636; i=brauner@kernel.org; h=from:subject:message-id; bh=mlYOb4HtaVV/70l/JDdVHAwQod1IKKXY70EZg+ntWnk=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWQKMr9Y2vOUpf28kGpY723eh/uP9O8XqmcvZDKIq3VgY f7sEbqko5SFQYyLQVZMkcWh3SRcbjlPxWajTA2YOaxMIEMYuDgFYCL8VxkZjm255bWQI0Pxl2/H gotSs5lCFx4/0JmzPVwy4yiLoGPTDob/bmaKT1YJ3tJI+hvHVmeaMeuq2lapcx4Lj24oq9LIn3y FEQA= X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 There's no need to bump the active reference counts of initial namespaces as they're always active and can simply remain at 1. Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 23 ++++++++++++++++++++--- kernel/nscommon.c | 6 ++++++ 2 files changed, 26 insertions(+), 3 deletions(-) diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index bd4492ef6ffc..791b18dc77d0 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -141,6 +141,12 @@ static __always_inline bool is_initial_namespace(struc= t ns_common *ns) IPC_NS_INIT_INO - MNT_NS_INIT_INO + 1)); } =20 +static __always_inline bool is_ns_init_id(const struct ns_common *ns) +{ + VFS_WARN_ON_ONCE(ns->ns_id =3D=3D 0); + return ns->ns_id <=3D NS_LAST_INIT_ID; +} + #define to_ns_common(__ns) \ _Generic((__ns), \ struct cgroup_namespace *: &(__ns)->ns, \ @@ -285,14 +291,19 @@ void __ns_ref_active_get_owner(struct ns_common *ns); =20 static __always_inline void __ns_ref_active_get(struct ns_common *ns) { - WARN_ON_ONCE(atomic_add_negative(1, &ns->__ns_ref_active)); - VFS_WARN_ON_ONCE(is_initial_namespace(ns) && __ns_ref_active_read(ns) <= =3D 0); + /* Initial namespaces are always active. */ + if (!is_ns_init_id(ns)) + WARN_ON_ONCE(atomic_add_negative(1, &ns->__ns_ref_active)); } #define ns_ref_active_get(__ns) \ do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0) =20 static __always_inline bool __ns_ref_active_get_not_zero(struct ns_common = *ns) { + /* Initial namespaces are always active. */ + if (is_ns_init_id(ns)) + return true; + if (atomic_inc_not_zero(&ns->__ns_ref_active)) { VFS_WARN_ON_ONCE(!__ns_ref_read(ns)); return true; @@ -307,6 +318,10 @@ void __ns_ref_active_put_owner(struct ns_common *ns); =20 static __always_inline void __ns_ref_active_put(struct ns_common *ns) { + /* Initial namespaces are always active. */ + if (is_ns_init_id(ns)) + return; + if (atomic_dec_and_test(&ns->__ns_ref_active)) { VFS_WARN_ON_ONCE(is_initial_namespace(ns)); VFS_WARN_ON_ONCE(!__ns_ref_read(ns)); @@ -319,8 +334,10 @@ static __always_inline void __ns_ref_active_put(struct= ns_common *ns) static __always_inline struct ns_common *__must_check ns_get_unless_inacti= ve(struct ns_common *ns) { VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) && !__ns_ref_read(ns)); - if (!__ns_ref_active_read(ns)) + if (!__ns_ref_active_read(ns)) { + VFS_WARN_ON_ONCE(is_ns_init_id(ns)); return NULL; + } if (!__ns_ref_get(ns)) return NULL; return ns; diff --git a/kernel/nscommon.c b/kernel/nscommon.c index d67ae7ad7759..70cb66232e4c 100644 --- a/kernel/nscommon.c +++ b/kernel/nscommon.c @@ -177,6 +177,7 @@ void __ns_ref_active_put_owner(struct ns_common *ns) ns =3D ns_owner(ns); if (!ns) return; + VFS_WARN_ON_ONCE(is_ns_init_id(ns)); if (!atomic_dec_and_test(&ns->__ns_ref_active)) return; } @@ -276,6 +277,10 @@ void __ns_ref_active_put_owner(struct ns_common *ns) */ void __ns_ref_active_resurrect(struct ns_common *ns) { + /* Initial namespaces are always active. */ + if (is_ns_init_id(ns)) + return; + /* If we didn't resurrect the namespace we're done. */ if (atomic_fetch_add(1, &ns->__ns_ref_active)) return; @@ -289,6 +294,7 @@ void __ns_ref_active_resurrect(struct ns_common *ns) if (!ns) return; =20 + VFS_WARN_ON_ONCE(is_ns_init_id(ns)); if (atomic_fetch_add(1, &ns->__ns_ref_active)) return; } --=20 2.47.3 From nobody Sun Feb 8 14:23:15 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id BD1B82676DE; Sun, 9 Nov 2025 21:13:33 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722813; cv=none; b=rjUcAeDZCwv5PAf51hV58Mf9pM1SmFyaqEakS/1/BpsGzZjD3OfVrnwinctvI6eXF3wriMp5QUUsgwcKmRc8EvlToeOZcG/pmr1y4umQFw+1iDvkMdW0PimsEzSmv83LZvekGbPT6dmKo4FaqkHRhT4/XtZ6uuRH5SHMRDcAMUM= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722813; c=relaxed/simple; bh=Bq9qstsnC44ddSAXmzDi/UX9dfyP5an3pKXgC51EVaA=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=Jj9mtXOzUHgCk1hVCtuyddJN4SFEv5B0Q3CExBMDqoL58P741hW7oqkGjVDnW1OuxMYRp4lpw4hJ4GhLqDv6UgOLON/8wu0oD9hMxSRGz6eUJ6MN4dHTis5En6iQvFbPPHE9el3GeTSBF0igqAHvTpZSpsVgE2zYztr8TwFD8pM= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Ygpmqw5r; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Ygpmqw5r" Received: by smtp.kernel.org (Postfix) with ESMTPSA id B9901C4CEF7; Sun, 9 Nov 2025 21:13:28 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1762722813; bh=Bq9qstsnC44ddSAXmzDi/UX9dfyP5an3pKXgC51EVaA=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=Ygpmqw5rgH8uzKl0RCQraHSu12wI6iq3rVUv/1D9eklofJqvkR1xOgtEIrLYN4SxB t8yClCwocPlXroMGqujVPyc8jXxg5cog3nfnMPHcPIPegGBh0JQJPPJOESZABJaluR /XWGQz6h2D+9seeFTOUEOTDI4S6WODcd+t2mdWNBmKYnuQ4vJTCs7Q5KJvW5KifrG6 eTwtHtlh4f4Nwm7cq9YTjHgQlVlx5eb7CbEAqhk0fCH2esPy9wk3FqG3Kw9nUR0bE3 L+hO96/nmQj2vZN0sf/QpbxTIFSpSJVFEc9yf7vnx/W5iNvG5/XeE9Ww4S55cAVQZL 4Hw22NWBVxmRw== From: Christian Brauner Date: Sun, 09 Nov 2025 22:11:24 +0100 Subject: [PATCH 3/8] ns: make sure reference are dropped outside of rcu lock Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20251109-namespace-6-19-fixes-v1-3-ae8a4ad5a3b3@kernel.org> References: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> In-Reply-To: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> To: linux-fsdevel@vger.kernel.org, Josef Bacik , Jeff Layton Cc: Jann Horn , Mike Yuan , =?utf-8?q?Zbigniew_J=C4=99drzejewski-Szmek?= , Lennart Poettering , Daan De Meyer , Aleksa Sarai , Amir Goldstein , Tejun Heo , Johannes Weiner , Thomas Gleixner , Alexander Viro , Jan Kara , linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, bpf@vger.kernel.org, Eric Dumazet , Jakub Kicinski , netdev@vger.kernel.org, Arnd Bergmann , Christian Brauner X-Mailer: b4 0.15-dev-a6db3 X-Developer-Signature: v=1; a=openpgp-sha256; l=3665; i=brauner@kernel.org; h=from:subject:message-id; bh=Bq9qstsnC44ddSAXmzDi/UX9dfyP5an3pKXgC51EVaA=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWQKMr8oVG2qbjG9w7WD4YRYwN8oVqOiW69vLPxjul53h +xm+fJDHaUsDGJcDLJiiiwO7Sbhcst5KjYbZWrAzGFlAhnCwMUpABNRWszwT0lFMXEJ5/crhr9u iJxS0NZ+J5f81q45TSWH0Txs/cYv3xgZ1rWUzle/+G1eT/PmB2vSS7Y9FY06dWfyncyoF7lzXjx oZwQA X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 The mount namespace may in fact sleep when putting the last passive reference so we need to drop the namespace reference outside of the rcu read lock. Do this by delaying the put until the next iteration where we've already moved on to the next namespace and legitimized it. Once we drop the rcu read lock to call put_user() we will also drop the reference to the previous namespace in the tree. Fixes: 76b6f5dfb3fd ("nstree: add listns()") Signed-off-by: Christian Brauner --- kernel/nstree.c | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/kernel/nstree.c b/kernel/nstree.c index 4a8838683b6b..55b72d4f8de4 100644 --- a/kernel/nstree.c +++ b/kernel/nstree.c @@ -505,13 +505,13 @@ static inline bool __must_check may_list_ns(const str= uct klistns *kls, return false; } =20 -static void __ns_put(struct ns_common *ns) +static inline void ns_put(struct ns_common *ns) { - if (ns->ops) + if (ns && ns->ops) ns->ops->put(ns); } =20 -DEFINE_FREE(ns_put, struct ns_common *, if (!IS_ERR_OR_NULL(_T)) __ns_put(= _T)) +DEFINE_FREE(ns_put, struct ns_common *, if (!IS_ERR_OR_NULL(_T)) ns_put(_T= )) =20 static inline struct ns_common *__must_check legitimize_ns(const struct kl= istns *kls, struct ns_common *candidate) @@ -535,7 +535,7 @@ static ssize_t do_listns_userns(struct klistns *kls) { u64 __user *ns_ids =3D kls->uns_ids; size_t nr_ns_ids =3D kls->nr_ns_ids; - struct ns_common *ns =3D NULL, *first_ns =3D NULL; + struct ns_common *ns =3D NULL, *first_ns =3D NULL, *prev =3D NULL; const struct list_head *head; ssize_t ret; =20 @@ -568,9 +568,10 @@ static ssize_t do_listns_userns(struct klistns *kls) =20 if (!first_ns) first_ns =3D list_entry_rcu(head->next, typeof(*ns), ns_owner_entry); + for (ns =3D first_ns; &ns->ns_owner_entry !=3D head && nr_ns_ids; ns =3D list_entry_rcu(ns->ns_owner_entry.next, typeof(*ns), ns_owner= _entry)) { - struct ns_common *valid __free(ns_put); + struct ns_common *valid; =20 valid =3D legitimize_ns(kls, ns); if (!valid) @@ -578,8 +579,14 @@ static ssize_t do_listns_userns(struct klistns *kls) =20 rcu_read_unlock(); =20 - if (put_user(valid->ns_id, ns_ids + ret)) + ns_put(prev); + prev =3D valid; + + if (put_user(valid->ns_id, ns_ids + ret)) { + ns_put(prev); return -EINVAL; + } + nr_ns_ids--; ret++; =20 @@ -587,6 +594,7 @@ static ssize_t do_listns_userns(struct klistns *kls) } =20 rcu_read_unlock(); + ns_put(prev); return ret; } =20 @@ -668,7 +676,7 @@ static ssize_t do_listns(struct klistns *kls) { u64 __user *ns_ids =3D kls->uns_ids; size_t nr_ns_ids =3D kls->nr_ns_ids; - struct ns_common *ns, *first_ns =3D NULL; + struct ns_common *ns, *first_ns =3D NULL, *prev =3D NULL; struct ns_tree *ns_tree =3D NULL; const struct list_head *head; u32 ns_type; @@ -705,7 +713,7 @@ static ssize_t do_listns(struct klistns *kls) =20 for (ns =3D first_ns; !ns_common_is_head(ns, head, ns_tree) && nr_ns_ids; ns =3D next_ns_common(ns, ns_tree)) { - struct ns_common *valid __free(ns_put); + struct ns_common *valid; =20 valid =3D legitimize_ns(kls, ns); if (!valid) @@ -713,8 +721,13 @@ static ssize_t do_listns(struct klistns *kls) =20 rcu_read_unlock(); =20 - if (put_user(valid->ns_id, ns_ids + ret)) + ns_put(prev); + prev =3D valid; + + if (put_user(valid->ns_id, ns_ids + ret)) { + ns_put(prev); return -EINVAL; + } =20 nr_ns_ids--; ret++; @@ -723,6 +736,7 @@ static ssize_t do_listns(struct klistns *kls) } =20 rcu_read_unlock(); + ns_put(prev); return ret; } =20 --=20 2.47.3 From nobody Sun Feb 8 14:23:15 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id D8FB2262FED; Sun, 9 Nov 2025 21:13:38 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722819; cv=none; b=kbo15esUrpLAqz0rWdwvHwYM7XryryrfzqVQwJRPOzcJPD3DeR38nTZF9pUYfdYcRZJJmMMsu4sQ8sstWl0cWWAIvR2ggASb2Xnfmtn5BhLd0sjz5hf1Il6olgRfwkvgKGqtj3FdQ6UdrhJE7Pp0BuG9hZcqsVmDy36xhEcc7vw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722819; c=relaxed/simple; bh=4u/o3PQG0aiPE+oADFsMi+pBX0ERWY6h/lrCzyzz43Q=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=GyyrD9cirsdsKhDljT8QroP542QAEjlrk4W0RfcdnxF0adez+gNlOWsUBIEHkFYCCw8HbZ2Qjp0MyOwT6FwGBxRKfbFX+rIbntHk2fzM1c7XPuzE31MxrcH2395gTv61Y+M4bVWmeoUtrTIUynbVYV27+XiEIT/sduncktSNbqw= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=Px4uighz; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="Px4uighz" Received: by smtp.kernel.org (Postfix) with ESMTPSA id BA8A8C4CEFB; Sun, 9 Nov 2025 21:13:33 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1762722818; bh=4u/o3PQG0aiPE+oADFsMi+pBX0ERWY6h/lrCzyzz43Q=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=Px4uighz2xmr5yERGOhFsa04jTkOju8yjAXi3j59loeVbRuXklnY6u2q8//WLboQ4 jjvnVdAowgU3Ijd7KgJAsBcwp3wfwv9Bj7oBi+qGKjz4cY1gUI3Twvyq/zNCz642nD 2I+JelNTdDwzp+MKZlcjaYnKYdigYlxqqia74RpLQ+AFm67nyfDbWG5mjqm5OKNzRA sqZZ45dhn98ypjE2F2d4OIiiIsIYpbN/bGv+nHLB5FEE8aZeA4Gt/BAJTYfa/Yu2ra PgivPGU23OMylZzclt1tpgwzUREuxmeEnXCJchpedGGtXlKfoaqwiKGzqbpN0WAhsB v1gB0XIf2NijA== From: Christian Brauner Date: Sun, 09 Nov 2025 22:11:25 +0100 Subject: [PATCH 4/8] ns: return EFAULT on put_user() error Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20251109-namespace-6-19-fixes-v1-4-ae8a4ad5a3b3@kernel.org> References: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> In-Reply-To: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> To: linux-fsdevel@vger.kernel.org, Josef Bacik , Jeff Layton Cc: Jann Horn , Mike Yuan , =?utf-8?q?Zbigniew_J=C4=99drzejewski-Szmek?= , Lennart Poettering , Daan De Meyer , Aleksa Sarai , Amir Goldstein , Tejun Heo , Johannes Weiner , Thomas Gleixner , Alexander Viro , Jan Kara , linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, bpf@vger.kernel.org, Eric Dumazet , Jakub Kicinski , netdev@vger.kernel.org, Arnd Bergmann , Christian Brauner X-Mailer: b4 0.15-dev-a6db3 X-Developer-Signature: v=1; a=openpgp-sha256; l=784; i=brauner@kernel.org; h=from:subject:message-id; bh=4u/o3PQG0aiPE+oADFsMi+pBX0ERWY6h/lrCzyzz43Q=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWQKMr9Inpw05dK3y/bZK94LFos2bfX67GSxNOSKO5/3H I2YS3KNHaUsDGJcDLJiiiwO7Sbhcst5KjYbZWrAzGFlAhnCwMUpABOZUMHIcH6efS/vvwlhfrLG yc2q9i+zrl2UsL1/+r3otphtq3me9jP805qvonj7STjzPB+XePOd0Xacj3UWNAZ3KGZE1WmFvGn mBgA= X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 Don't return EINVAL, return EFAULT just like we do in other system calls. Signed-off-by: Christian Brauner --- kernel/nstree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kernel/nstree.c b/kernel/nstree.c index 55b72d4f8de4..f27f772a6762 100644 --- a/kernel/nstree.c +++ b/kernel/nstree.c @@ -584,7 +584,7 @@ static ssize_t do_listns_userns(struct klistns *kls) =20 if (put_user(valid->ns_id, ns_ids + ret)) { ns_put(prev); - return -EINVAL; + return -EFAULT; } =20 nr_ns_ids--; @@ -726,7 +726,7 @@ static ssize_t do_listns(struct klistns *kls) =20 if (put_user(valid->ns_id, ns_ids + ret)) { ns_put(prev); - return -EINVAL; + return -EFAULT; } =20 nr_ns_ids--; --=20 2.47.3 From nobody Sun Feb 8 14:23:15 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CEACC2690D1; Sun, 9 Nov 2025 21:13:43 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722823; cv=none; b=HI6dUoXzURKsuqkqvxFjlsmauCeM6KNTksC1sKNSstfCForrHTXJgjABR4McvKil2NoOeLbg103b8gyfXR4r6oXc16gfkT//2J3x5Nwm+sWAwZKgQdvAMj10z3cITf/VpahHPWthF2mQ2ghQVis0zUGuVEmNjh4C21iFmqfpYyA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722823; c=relaxed/simple; bh=X4O1l1jh5P5U12QpZJZKtUrS5jtWwPrcPGImd8GD70A=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=ggInucuZz8KZjNIc9pAHLn9jqNPKI0G/NlJ81Rh8cOOb1KOPnZ72MdhbXp+n3RnfnrYr4OsUyjzFN1DfqtGiRTiTjoTSEYuDTJ8jPvrKIAFtH5hmnQOJBR4/KBMroHiIoBxr2kMJ3VVoTdFiS97If29b967TeWQSxfx3LvU0cz8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=pRLGi6wX; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="pRLGi6wX" Received: by smtp.kernel.org (Postfix) with ESMTPSA id DDEB9C19422; Sun, 9 Nov 2025 21:13:38 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1762722823; bh=X4O1l1jh5P5U12QpZJZKtUrS5jtWwPrcPGImd8GD70A=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=pRLGi6wXPGrkaBRp6Yl4XkFhp9sOaFqh90jUfGrLpOmKKCJTtw7DjdZkhAG5sfubh /tagEkZqRrXEtmKPmn9S1Msvbfpe2pGA9bYLuztMQiWo49I8sKINmyWXoTTDLrkcrN AhcLn9MocMoqh4peiCgjdsup8fBac+aIeQRnKj0+VtSJAB1wUFvBKuIQBD0ipmg19c wg78R+NFGtHZZnQDAPvbofw/B0ZUG6gih+JDnsgO4VenqdiOXdkePhwhfxdJ1grXRn S6XxijK5fEo2jpItItR/eZMGLG0DO8PwZc09xOWeybvJOGzbYL10s0MnCleszIr+M9 P+5UwrVi10iuw== From: Christian Brauner Date: Sun, 09 Nov 2025 22:11:26 +0100 Subject: [PATCH 5/8] ns: handle setns(pidfd, ...) cleanly Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20251109-namespace-6-19-fixes-v1-5-ae8a4ad5a3b3@kernel.org> References: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> In-Reply-To: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> To: linux-fsdevel@vger.kernel.org, Josef Bacik , Jeff Layton Cc: Jann Horn , Mike Yuan , =?utf-8?q?Zbigniew_J=C4=99drzejewski-Szmek?= , Lennart Poettering , Daan De Meyer , Aleksa Sarai , Amir Goldstein , Tejun Heo , Johannes Weiner , Thomas Gleixner , Alexander Viro , Jan Kara , linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, bpf@vger.kernel.org, Eric Dumazet , Jakub Kicinski , netdev@vger.kernel.org, Arnd Bergmann , Christian Brauner , syzbot+1957b26299cf3ff7890c@syzkaller.appspotmail.com X-Mailer: b4 0.15-dev-a6db3 X-Developer-Signature: v=1; a=openpgp-sha256; l=8855; i=brauner@kernel.org; h=from:subject:message-id; bh=X4O1l1jh5P5U12QpZJZKtUrS5jtWwPrcPGImd8GD70A=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWQKMr9oLqsztM1nyOf7+aetJei2/0xeHn2D7yt/FiwPW aZYen59RykLgxgXg6yYIotDu0m43HKeis1GmRowc1iZQIYwcHEKwESsyhj+8Abbr1avPyFdV9fX 9THMefelb6cu77aediVC8OyGDwJxmxn+ipSsdQ64fMdIzd7u8trKHxeEZjxZn1LJ9EDD9mrVuUN X2QA= X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 The setns() system call supports: (1) namespace file descriptors (nsfd) (2) process file descriptors (pidfd) When using nsfds the namespaces will remain active because they are pinned by the vfs. However, when pidfds are used things are more complicated. When the target task exits and passes through exit_nsproxy_namespaces() or is reaped and thus also passes through exit_cred_namespaces() after the setns()'ing task has called prepare_nsset() but before the active reference count of the set of namespaces it wants to setns() to might have been dropped already: P1 P2 pid_p1 =3D clone(CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWNS) pidfd =3D= pidfd_open(pid_p1) setns(pid= fd, CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWNS) prepare_n= sset() exit(0) // ns->__ns_active_ref =3D=3D 1 // parent_ns->__ns_active_ref =3D=3D 1 -> exit_nsproxy_namespaces() -> exit_cred_namespaces() // ns_active_ref_put() will also put // the reference on the owner of the // namespace. If the only reason the // owning namespace was alive was // because it was a parent of @ns // it's active reference count now goes // to zero... -------------------------------- // | // ns->__ns_active_ref =3D=3D 0 | // parent_ns->__ns_active_ref =3D=3D 0 | | commit_ns= set() -----------------> // If set= ns() // now ma= nages to install the namespaces // it wil= l call ns_active_ref_get() // on the= m thus bumping the active reference // count = from zero again but without also // taking= the required reference on the owner. // Thus w= e get: // // ns->__= ns_active_ref =3D=3D 1 // parent= _ns->__ns_active_ref =3D=3D 0 When later someone does ns_active_ref_put() on @ns it will underflow parent_ns->__ns_active_ref leading to a splat from our asserts thinking there are still active references when in fact the counter just underflowed. So resurrect the ownership chain if necessary as well. If the caller succeeded to grab passive references to the set of namespaces the setns() should simply succeed even if the target task exists or gets reaped in the meantime and thus has dropped all active references to its namespaces. The race is rare and can only be triggered when using pidfs to setns() to namespaces. Also note that active reference on initial namespaces are nops. Since we now always handle parent references directly we can drop ns_ref_active_get_owner() when adding a namespace to a namespace tree. This is now all handled uniformly in the places where the new namespaces actually become active. Reported-by: syzbot+1957b26299cf3ff7890c@syzkaller.appspotmail.com Fixes: 3c9820d5c64a ("ns: add active reference count") Signed-off-by: Christian Brauner --- fs/nsfs.c | 2 +- include/linux/ns_common.h | 47 ++++---------------------------------------= ---- kernel/nscommon.c | 21 ++++++++++++--------- kernel/nstree.c | 8 -------- 4 files changed, 17 insertions(+), 61 deletions(-) diff --git a/fs/nsfs.c b/fs/nsfs.c index ba6c8975c82e..a80f8d2a4122 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -430,7 +430,7 @@ static int nsfs_init_inode(struct inode *inode, void *d= ata) * ioctl on such a socket will resurrect the relevant namespace * subtree. */ - __ns_ref_active_resurrect(ns); + __ns_ref_active_get(ns); return 0; } =20 diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 791b18dc77d0..3aaba2ca31d7 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -287,47 +287,8 @@ static __always_inline __must_check int __ns_ref_read(= const struct ns_common *ns #define ns_ref_active_read(__ns) \ ((__ns) ? __ns_ref_active_read(to_ns_common(__ns)) : 0) =20 -void __ns_ref_active_get_owner(struct ns_common *ns); +void __ns_ref_active_put(struct ns_common *ns); =20 -static __always_inline void __ns_ref_active_get(struct ns_common *ns) -{ - /* Initial namespaces are always active. */ - if (!is_ns_init_id(ns)) - WARN_ON_ONCE(atomic_add_negative(1, &ns->__ns_ref_active)); -} -#define ns_ref_active_get(__ns) \ - do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0) - -static __always_inline bool __ns_ref_active_get_not_zero(struct ns_common = *ns) -{ - /* Initial namespaces are always active. */ - if (is_ns_init_id(ns)) - return true; - - if (atomic_inc_not_zero(&ns->__ns_ref_active)) { - VFS_WARN_ON_ONCE(!__ns_ref_read(ns)); - return true; - } - return false; -} - -#define ns_ref_active_get_owner(__ns) \ - do { if (__ns) __ns_ref_active_get_owner(to_ns_common(__ns)); } while (0) - -void __ns_ref_active_put_owner(struct ns_common *ns); - -static __always_inline void __ns_ref_active_put(struct ns_common *ns) -{ - /* Initial namespaces are always active. */ - if (is_ns_init_id(ns)) - return; - - if (atomic_dec_and_test(&ns->__ns_ref_active)) { - VFS_WARN_ON_ONCE(is_initial_namespace(ns)); - VFS_WARN_ON_ONCE(!__ns_ref_read(ns)); - __ns_ref_active_put_owner(ns); - } -} #define ns_ref_active_put(__ns) \ do { if (__ns) __ns_ref_active_put(to_ns_common(__ns)); } while (0) =20 @@ -343,9 +304,9 @@ static __always_inline struct ns_common *__must_check n= s_get_unless_inactive(str return ns; } =20 -void __ns_ref_active_resurrect(struct ns_common *ns); +void __ns_ref_active_get(struct ns_common *ns); =20 -#define ns_ref_active_resurrect(__ns) \ - do { if (__ns) __ns_ref_active_resurrect(to_ns_common(__ns)); } while (0) +#define ns_ref_active_get(__ns) \ + do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0) =20 #endif diff --git a/kernel/nscommon.c b/kernel/nscommon.c index 70cb66232e4c..bfd2d6805776 100644 --- a/kernel/nscommon.c +++ b/kernel/nscommon.c @@ -114,13 +114,6 @@ struct ns_common *__must_check ns_owner(struct ns_comm= on *ns) return to_ns_common(owner); } =20 -void __ns_ref_active_get_owner(struct ns_common *ns) -{ - ns =3D ns_owner(ns); - if (ns) - WARN_ON_ONCE(atomic_add_negative(1, &ns->__ns_ref_active)); -} - /* * The active reference count works by having each namespace that gets * created take a single active reference on its owning user namespace. @@ -171,8 +164,18 @@ void __ns_ref_active_get_owner(struct ns_common *ns) * The iteration stops once we reach a namespace that still has active * references. */ -void __ns_ref_active_put_owner(struct ns_common *ns) +void __ns_ref_active_put(struct ns_common *ns) { + /* Initial namespaces are always active. */ + if (is_ns_init_id(ns)) + return; + + if (!atomic_dec_and_test(&ns->__ns_ref_active)) + return; + + VFS_WARN_ON_ONCE(is_ns_init_id(ns)); + VFS_WARN_ON_ONCE(!__ns_ref_read(ns)); + for (;;) { ns =3D ns_owner(ns); if (!ns) @@ -275,7 +278,7 @@ void __ns_ref_active_put_owner(struct ns_common *ns) * it also needs to take another reference on its owning user namespace * and so on. */ -void __ns_ref_active_resurrect(struct ns_common *ns) +void __ns_ref_active_get(struct ns_common *ns) { /* Initial namespaces are always active. */ if (is_ns_init_id(ns)) diff --git a/kernel/nstree.c b/kernel/nstree.c index f27f772a6762..97404fb90749 100644 --- a/kernel/nstree.c +++ b/kernel/nstree.c @@ -173,14 +173,6 @@ void __ns_tree_add_raw(struct ns_common *ns, struct ns= _tree *ns_tree) write_sequnlock(&ns_tree_lock); =20 VFS_WARN_ON_ONCE(node); - - /* - * Take an active reference on the owner namespace. This ensures - * that the owner remains visible while any of its child namespaces - * are active. For init namespaces this is a no-op as ns_owner() - * returns NULL for namespaces owned by init_user_ns. - */ - __ns_ref_active_get_owner(ns); } =20 void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree) --=20 2.47.3 From nobody Sun Feb 8 14:23:15 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id EBE242737E4; Sun, 9 Nov 2025 21:13:48 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722829; cv=none; b=vC+kq3BeBI4lvTfoZI7uzpTOcn0rCdpXuPgNklzdjs/Us/7SQvQWTS4veYJzLwtTSV6H/UcAMos+Ae+fZHl4Fz96EHb4v3zlVUh3xQ3oKIapN/0GIyTpRJKELBiVuaenDHnNu9Yu925UuPF4gWdvEVw24HYrj78825DSweZD7hA= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722829; c=relaxed/simple; bh=8y4aMICEEV0LiihR+FyZdw88fBca7WRMqCEtGLIdliw=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=n7UQNkFNrNCJbxZ8JpHroLjj05q4J3G//lybCHPYPCvZqGX6MmwAIAjlHUPQfKblJKtx36iDPYZj/K99oNeCLxnh1YM78IoZtOrRlVw5HukrC0Xmi5ZMnBZbJpaoxAAM2cj/1VL6TPlG/rCUXYMk0FkzOUetiQAwYx15KFsfcs4= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=mOopcn5n; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="mOopcn5n" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 2CFCFC4CEF8; Sun, 9 Nov 2025 21:13:43 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1762722828; bh=8y4aMICEEV0LiihR+FyZdw88fBca7WRMqCEtGLIdliw=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=mOopcn5nPKjp9ET/HK1MoGqbKamvY8j+oN3zAbpyOBrjIIQB8BJKgij2MDIouYqqF 1XAzGyLVi51vxjHXvsu+g38vArQ83PceTdh59Fv6tVU2ddt/PPXlCZ/04RUCfnzeRB +zHvbqEKft6+NK7r1QdLdFXk54WyAAiV6C0Dygd2iO7kBQJNB61AIusvI27t22Aj6K tV/CxOELIvpP+5bax176AfwrDdScfeZ5LXxl9EClPUgfkyCqnCFOK18REeLRbnkBqK cU1wu1vuz6WUISiDTtW8tlLd0h7OP514y9kyw6T99AKNg77ksgLRgHGZP65XyckwTw bXIbiJAMiV6sg== From: Christian Brauner Date: Sun, 09 Nov 2025 22:11:27 +0100 Subject: [PATCH 6/8] ns: add asserts for active refcount underflow Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20251109-namespace-6-19-fixes-v1-6-ae8a4ad5a3b3@kernel.org> References: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> In-Reply-To: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> To: linux-fsdevel@vger.kernel.org, Josef Bacik , Jeff Layton Cc: Jann Horn , Mike Yuan , =?utf-8?q?Zbigniew_J=C4=99drzejewski-Szmek?= , Lennart Poettering , Daan De Meyer , Aleksa Sarai , Amir Goldstein , Tejun Heo , Johannes Weiner , Thomas Gleixner , Alexander Viro , Jan Kara , linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, bpf@vger.kernel.org, Eric Dumazet , Jakub Kicinski , netdev@vger.kernel.org, Arnd Bergmann , Christian Brauner X-Mailer: b4 0.15-dev-a6db3 X-Developer-Signature: v=1; a=openpgp-sha256; l=2410; i=brauner@kernel.org; h=from:subject:message-id; bh=8y4aMICEEV0LiihR+FyZdw88fBca7WRMqCEtGLIdliw=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWQKMr94l1/5uyM4UfXv9Zy/QcYVfFzZC07+d68/ur7O7 PWfd6m/O0pZGMS4GGTFFFkc2k3C5ZbzVGw2ytSAmcPKBDKEgYtTACby8AUjw8H4pUZdv+6x1BhK qgSXsaVatl3+m54+0Wopv2/5vMCkW4wM6+daXThSYHuq7+cKpkaTPz4+plG9O/ftU7loJ7569uI ULgA= X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 Add a few more assert to detect active reference count underflows. Signed-off-by: Christian Brauner --- include/linux/ns_common.h | 1 - kernel/nscommon.c | 18 ++++++++++++++---- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 3aaba2ca31d7..66ea09b48377 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -294,7 +294,6 @@ void __ns_ref_active_put(struct ns_common *ns); =20 static __always_inline struct ns_common *__must_check ns_get_unless_inacti= ve(struct ns_common *ns) { - VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) && !__ns_ref_read(ns)); if (!__ns_ref_active_read(ns)) { VFS_WARN_ON_ONCE(is_ns_init_id(ns)); return NULL; diff --git a/kernel/nscommon.c b/kernel/nscommon.c index bfd2d6805776..c910b979e433 100644 --- a/kernel/nscommon.c +++ b/kernel/nscommon.c @@ -170,8 +170,10 @@ void __ns_ref_active_put(struct ns_common *ns) if (is_ns_init_id(ns)) return; =20 - if (!atomic_dec_and_test(&ns->__ns_ref_active)) + if (!atomic_dec_and_test(&ns->__ns_ref_active)) { + VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) < 0); return; + } =20 VFS_WARN_ON_ONCE(is_ns_init_id(ns)); VFS_WARN_ON_ONCE(!__ns_ref_read(ns)); @@ -181,8 +183,10 @@ void __ns_ref_active_put(struct ns_common *ns) if (!ns) return; VFS_WARN_ON_ONCE(is_ns_init_id(ns)); - if (!atomic_dec_and_test(&ns->__ns_ref_active)) + if (!atomic_dec_and_test(&ns->__ns_ref_active)) { + VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) < 0); return; + } } } =20 @@ -280,12 +284,16 @@ void __ns_ref_active_put(struct ns_common *ns) */ void __ns_ref_active_get(struct ns_common *ns) { + int prev; + /* Initial namespaces are always active. */ if (is_ns_init_id(ns)) return; =20 /* If we didn't resurrect the namespace we're done. */ - if (atomic_fetch_add(1, &ns->__ns_ref_active)) + prev =3D atomic_fetch_add(1, &ns->__ns_ref_active); + VFS_WARN_ON_ONCE(prev < 0); + if (likely(prev)) return; =20 /* @@ -298,7 +306,9 @@ void __ns_ref_active_get(struct ns_common *ns) return; =20 VFS_WARN_ON_ONCE(is_ns_init_id(ns)); - if (atomic_fetch_add(1, &ns->__ns_ref_active)) + prev =3D atomic_fetch_add(1, &ns->__ns_ref_active); + VFS_WARN_ON_ONCE(prev < 0); + if (likely(prev)) return; } } --=20 2.47.3 From nobody Sun Feb 8 14:23:15 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 48D6426CE07; Sun, 9 Nov 2025 21:13:54 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722834; cv=none; b=Yr4RT05iZ/xVMbKsdjiIaoFFomXaVpzkaGtsY/CYiIQz0RThXJ6ef5DiwV+dcYVI0vJAIKus55doWK8yhb7MJNtzCS4jKBvTTVrR8t+mZs3bNZqymcHPcphWkkfvDPyZNizaLA4ifhFyRe01G7YVK13GW4Yz4qVhcsRARYOfHXE= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722834; c=relaxed/simple; bh=hjIC8MoMy22tTyZzN7VrtnIt0FY8IbJnB2GTuhPLsjc=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=m+dE/415bJCi9FZE7kytUyRc5wNMSvBcgsEDhYqBzP5czfqpqkaB5+COC5s2I4nIvb6xNa1AGMdH+WQhq1ux9jWs+vl6QBs/qknRUJGj5sSkKxF1rDSUdA73JOquEtMJ+NH2dA+2zzQygdDr5u0N8DgYlYcrZsiwUKJZj9sdypo= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=PdhwGjva; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="PdhwGjva" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 3D6D7C4CEF7; Sun, 9 Nov 2025 21:13:49 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1762722833; bh=hjIC8MoMy22tTyZzN7VrtnIt0FY8IbJnB2GTuhPLsjc=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=PdhwGjvaxle3BHuwErRwea24DSGUj6VsFwvL/i35en7+On7B8fTQqr3M3VbthI459 TXhpGx2uiqhbkLjHydc5H4QKQVGC8FKp5awqU4DVYcEpdBJKhdD67UWYDzN5JX1yaD SEaL+BkA7aKlasQRWo7oj4Pju3hDeleTgqHXj9/o1MUw9Hkeyj7B8bFWChGmrnZ8Mb la7La2IzdDAmjQDksrKII7oHr9ohYp7PsRYvqnwGFgUoAWFtf/+UXcK4FBcH6QaZMF i6ILu6R/uC8ONonjuy4FzRhTGSHSs65NfHBxQZRC0wbVjGEbTrPP6HQyhwkrakd/YN R9t4i7c3jLt9w== From: Christian Brauner Date: Sun, 09 Nov 2025 22:11:28 +0100 Subject: [PATCH 7/8] selftests/namespaces: add active reference count regression test Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20251109-namespace-6-19-fixes-v1-7-ae8a4ad5a3b3@kernel.org> References: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> In-Reply-To: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> To: linux-fsdevel@vger.kernel.org, Josef Bacik , Jeff Layton Cc: Jann Horn , Mike Yuan , =?utf-8?q?Zbigniew_J=C4=99drzejewski-Szmek?= , Lennart Poettering , Daan De Meyer , Aleksa Sarai , Amir Goldstein , Tejun Heo , Johannes Weiner , Thomas Gleixner , Alexander Viro , Jan Kara , linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, bpf@vger.kernel.org, Eric Dumazet , Jakub Kicinski , netdev@vger.kernel.org, Arnd Bergmann , Christian Brauner X-Mailer: b4 0.15-dev-a6db3 X-Developer-Signature: v=1; a=openpgp-sha256; l=4949; i=brauner@kernel.org; h=from:subject:message-id; bh=hjIC8MoMy22tTyZzN7VrtnIt0FY8IbJnB2GTuhPLsjc=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWQKMr9Ytectw83jb9NCRYVZy571HzMSXv4nqiNNnKv7R ldGWU5IRykLgxgXg6yYIotDu0m43HKeis1GmRowc1iZQIYwcHEKwEROizP84e/ZbChy0G6P3oSl utGvdnYsWmPxOu76zYVqjEwCtZkdnAx/BY8rNccK/y4/167Vf4m3cI2DwwXXC7/W8uq7/Zs328i GFwA= X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 Add a regression test for setns() with pidfd. Signed-off-by: Christian Brauner --- tools/testing/selftests/namespaces/.gitignore | 1 + tools/testing/selftests/namespaces/Makefile | 4 +- .../namespaces/regression_pidfd_setns_test.c | 113 +++++++++++++++++= ++++ 3 files changed, 117 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/namespaces/.gitignore b/tools/testing/= selftests/namespaces/.gitignore index f4d2209ca4e4..4cb428d77659 100644 --- a/tools/testing/selftests/namespaces/.gitignore +++ b/tools/testing/selftests/namespaces/.gitignore @@ -8,3 +8,4 @@ siocgskns_test cred_change_test stress_test listns_pagination_bug +regression_pidfd_setns_test diff --git a/tools/testing/selftests/namespaces/Makefile b/tools/testing/se= lftests/namespaces/Makefile index 01569e0abbdb..1f36c7bf7728 100644 --- a/tools/testing/selftests/namespaces/Makefile +++ b/tools/testing/selftests/namespaces/Makefile @@ -11,7 +11,8 @@ TEST_GEN_PROGS :=3D nsid_test \ siocgskns_test \ cred_change_test \ stress_test \ - listns_pagination_bug + listns_pagination_bug \ + regression_pidfd_setns_test =20 include ../lib.mk =20 @@ -22,4 +23,5 @@ $(OUTPUT)/siocgskns_test: ../filesystems/utils.c $(OUTPUT)/cred_change_test: ../filesystems/utils.c $(OUTPUT)/stress_test: ../filesystems/utils.c $(OUTPUT)/listns_pagination_bug: ../filesystems/utils.c +$(OUTPUT)/regression_pidfd_setns_test: ../filesystems/utils.c =20 diff --git a/tools/testing/selftests/namespaces/regression_pidfd_setns_test= .c b/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c new file mode 100644 index 000000000000..753fd29dffd8 --- /dev/null +++ b/tools/testing/selftests/namespaces/regression_pidfd_setns_test.c @@ -0,0 +1,113 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include "../pidfd/pidfd.h" +#include "../kselftest_harness.h" + +/* + * Regression tests for the setns(pidfd) active reference counting bug. + * + * These tests are based on the reproducers that triggered the race condit= ion + * fixed by commit 1c465d0518dc ("ns: handle setns(pidfd, ...) cleanly"). + * + * The bug: When using setns() with a pidfd, if the target task exits betw= een + * prepare_nsset() and commit_nsset(), the namespaces would become inactiv= e. + * Then ns_ref_active_get() would increment from 0 without properly resurr= ecting + * the owner chain, causing active reference count underflows. + */ + +/* + * Simple pidfd setns test using create_child()+unshare(). + * + * Without the fix, this would trigger active refcount warnings when the + * parent exits after doing setns(pidfd) on a child that has already exite= d. + */ +TEST(simple_pidfd_setns) +{ + pid_t child_pid; + int pidfd =3D -1; + int ret; + int sv[2]; + char c; + + /* Ignore SIGCHLD for autoreap */ + ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR); + + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0); + + /* Create a child process without namespaces initially */ + child_pid =3D create_child(&pidfd, 0); + ASSERT_GE(child_pid, 0); + + if (child_pid =3D=3D 0) { + close(sv[0]); + + if (unshare(CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUSER) = < 0) { + close(sv[1]); + _exit(1); + } + + /* Signal parent that namespaces are ready */ + if (write_nointr(sv[1], "1", 1) < 0) { + close(sv[1]); + _exit(1); + } + + close(sv[1]); + _exit(0); + } + ASSERT_GE(pidfd, 0); + EXPECT_EQ(close(sv[1]), 0); + + ret =3D read_nointr(sv[0], &c, 1); + ASSERT_EQ(ret, 1); + EXPECT_EQ(close(sv[0]), 0); + + /* Set to child's namespaces via pidfd */ + ret =3D setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC); + TH_LOG("setns() returned %d", ret); + close(pidfd); +} + +/* + * Simple pidfd setns test using create_child(). + * + * This variation uses create_child() with namespace flags directly. + * Namespaces are created immediately at clone time. + */ +TEST(simple_pidfd_setns_clone) +{ + pid_t child_pid; + int pidfd =3D -1; + int ret; + + /* Ignore SIGCHLD for autoreap */ + ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR); + + /* Create a child process with new namespaces using create_child() */ + child_pid =3D create_child(&pidfd, CLONE_NEWUSER | CLONE_NEWUTS | CLONE_N= EWIPC | CLONE_NEWNET); + ASSERT_GE(child_pid, 0); + + if (child_pid =3D=3D 0) { + /* Child: sleep for a while so parent can setns to us */ + sleep(2); + _exit(0); + } + + /* Parent: pidfd was already created by create_child() */ + ASSERT_GE(pidfd, 0); + + /* Set to child's namespaces via pidfd */ + ret =3D setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC); + close(pidfd); + TH_LOG("setns() returned %d", ret); +} + +TEST_HARNESS_MAIN --=20 2.47.3 From nobody Sun Feb 8 14:23:15 2026 Received: from smtp.kernel.org (aws-us-west-2-korg-mail-1.web.codeaurora.org [10.30.226.201]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 3483C263F38; Sun, 9 Nov 2025 21:13:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=10.30.226.201 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722839; cv=none; b=myz+wL4YXDsg80rA5/Fzn1bRw1SvfXOBC5/IH5o1TreybJ3RLLdALkGCtcPo5Q07jPEeYav+kAMqnt8IMiWpLY2JD10STkBaJuBlmElGoVgl0TgsfMt1amgTgLHVbQY6xhPYk0oLthQEt76cRqtmDFBoxQS4VrUUoUUtHebaFXc= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1762722839; c=relaxed/simple; bh=6df/qcuFKGvBEaRawblBEmuTMzDVDKNeLPHCBkB1I/Y=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=WQViQ80hDM4Wu2sR2Cmak5jcwUEraoqXDQ6iErRbuDB0Tm78ZvQ0SiMoP0YwyVSCozJgEohYjgUCgExcKzYSJbcezfWREs/0P/f9/ICCeeokVLp+gyA/2GrpdhlaCkIBWltT8MlFn0NPOA4gLGOa5HGv22KFjCGNdwqSS9rXcQE= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b=kPMHRWMn; arc=none smtp.client-ip=10.30.226.201 Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=kernel.org header.i=@kernel.org header.b="kPMHRWMn" Received: by smtp.kernel.org (Postfix) with ESMTPSA id 5A511C19422; Sun, 9 Nov 2025 21:13:54 +0000 (UTC) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=kernel.org; s=k20201202; t=1762722839; bh=6df/qcuFKGvBEaRawblBEmuTMzDVDKNeLPHCBkB1I/Y=; h=From:Date:Subject:References:In-Reply-To:To:Cc:From; b=kPMHRWMn2qNPBQbx3/s6FWTdlMcKqgGs3J2KtkgmM/37ZuGssFcyEeNf4ld7rkol2 tlBY8sODrhh9bhmshSjMnUb/I18514YuDVSe/H5D14sTW7PxEb2NTdnrKrEBVUnByO 2pqQhUkrzngIslRqh5ZIFjCY+wSdrUcAfa3sijXF9DunDLOJIEqKNLIlaWfVuWy4Da YZkSsERfIQ9IYKsXcF/0KDZNNOhiyEOhlaWBS/P+LCKn4PgvDgJk7CeoqAnkhyqCZA QuLvHCVX4dU49zWFV6U+HOxgHt2lbXPUfHHWa5sh3KqnnKF7T8GfvjjxqyFGFlJ8qc C8OpcOmNiAhjQ== From: Christian Brauner Date: Sun, 09 Nov 2025 22:11:29 +0100 Subject: [PATCH 8/8] selftests/namespaces: test for efault Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20251109-namespace-6-19-fixes-v1-8-ae8a4ad5a3b3@kernel.org> References: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> In-Reply-To: <20251109-namespace-6-19-fixes-v1-0-ae8a4ad5a3b3@kernel.org> To: linux-fsdevel@vger.kernel.org, Josef Bacik , Jeff Layton Cc: Jann Horn , Mike Yuan , =?utf-8?q?Zbigniew_J=C4=99drzejewski-Szmek?= , Lennart Poettering , Daan De Meyer , Aleksa Sarai , Amir Goldstein , Tejun Heo , Johannes Weiner , Thomas Gleixner , Alexander Viro , Jan Kara , linux-kernel@vger.kernel.org, cgroups@vger.kernel.org, bpf@vger.kernel.org, Eric Dumazet , Jakub Kicinski , netdev@vger.kernel.org, Arnd Bergmann , Christian Brauner X-Mailer: b4 0.15-dev-a6db3 X-Developer-Signature: v=1; a=openpgp-sha256; l=15334; i=brauner@kernel.org; h=from:subject:message-id; bh=6df/qcuFKGvBEaRawblBEmuTMzDVDKNeLPHCBkB1I/Y=; b=owGbwMvMwCU28Zj0gdSKO4sYT6slMWQKMr+IvxG5/LPH+WUSBQa/jikzXV358EH6s8zJv/rrN s9m/6jO01HKwiDGxSArpsji0G4SLrecp2KzUaYGzBxWJpAhDFycAjCRCdyMDH/Td9k1MwoF+X7P 1upgbXm66m5gIsNKrRNJpzgcNq1WmsLwT2vGTf3ad6sj94W2/XdIm//stl7HZ+eP6vMl2c8uPDs 9kAcA X-Developer-Key: i=brauner@kernel.org; a=openpgp; fpr=4880B8C9BD0E5106FC070F4F7B3C391EFEA93624 Ensure that put_user() can fail and that namespace cleanup works correctly. Signed-off-by: Christian Brauner --- tools/testing/selftests/namespaces/.gitignore | 1 + tools/testing/selftests/namespaces/Makefile | 2 + .../selftests/namespaces/listns_efault_test.c | 521 +++++++++++++++++= ++++ 3 files changed, 524 insertions(+) diff --git a/tools/testing/selftests/namespaces/.gitignore b/tools/testing/= selftests/namespaces/.gitignore index 4cb428d77659..0989e80da457 100644 --- a/tools/testing/selftests/namespaces/.gitignore +++ b/tools/testing/selftests/namespaces/.gitignore @@ -4,6 +4,7 @@ init_ino_test ns_active_ref_test listns_test listns_permissions_test +listns_efault_test siocgskns_test cred_change_test stress_test diff --git a/tools/testing/selftests/namespaces/Makefile b/tools/testing/se= lftests/namespaces/Makefile index 1f36c7bf7728..fbb821652c17 100644 --- a/tools/testing/selftests/namespaces/Makefile +++ b/tools/testing/selftests/namespaces/Makefile @@ -8,6 +8,7 @@ TEST_GEN_PROGS :=3D nsid_test \ ns_active_ref_test \ listns_test \ listns_permissions_test \ + listns_efault_test \ siocgskns_test \ cred_change_test \ stress_test \ @@ -19,6 +20,7 @@ include ../lib.mk $(OUTPUT)/ns_active_ref_test: ../filesystems/utils.c $(OUTPUT)/listns_test: ../filesystems/utils.c $(OUTPUT)/listns_permissions_test: ../filesystems/utils.c +$(OUTPUT)/listns_efault_test: ../filesystems/utils.c $(OUTPUT)/siocgskns_test: ../filesystems/utils.c $(OUTPUT)/cred_change_test: ../filesystems/utils.c $(OUTPUT)/stress_test: ../filesystems/utils.c diff --git a/tools/testing/selftests/namespaces/listns_efault_test.c b/tool= s/testing/selftests/namespaces/listns_efault_test.c new file mode 100644 index 000000000000..906d8df90ab2 --- /dev/null +++ b/tools/testing/selftests/namespaces/listns_efault_test.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "../kselftest_harness.h" +#include "../filesystems/utils.h" +#include "../pidfd/pidfd.h" +#include "wrappers.h" + +/* + * Test listns() error handling with invalid buffer addresses. + * + * When the buffer pointer is invalid (e.g., crossing page boundaries + * into unmapped memory), listns() returns EINVAL. + * + * This test also creates mount namespaces that get destroyed during + * iteration, testing that namespace cleanup happens outside the RCU + * read lock. + */ +TEST(listns_partial_fault_with_ns_cleanup) +{ + void *map; + __u64 *ns_ids; + ssize_t ret; + long page_size; + pid_t pid, iter_pid; + int pidfds[5]; + int sv[5][2]; + int iter_pidfd; + int i, status; + char c; + + page_size =3D sysconf(_SC_PAGESIZE); + ASSERT_GT(page_size, 0); + + /* + * Map two pages: + * - First page: readable and writable + * - Second page: will be unmapped to trigger EFAULT + */ + map =3D mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(map, MAP_FAILED); + + /* Unmap the second page */ + ret =3D munmap((char *)map + page_size, page_size); + ASSERT_EQ(ret, 0); + + /* + * Position the buffer pointer so there's room for exactly one u64 + * before the page boundary. The second u64 would fall into the + * unmapped page. + */ + ns_ids =3D ((__u64 *)((char *)map + page_size)) - 1; + + /* + * Create a separate process to run listns() in a loop concurrently + * with namespace creation and destruction. + */ + iter_pid =3D create_child(&iter_pidfd, 0); + ASSERT_NE(iter_pid, -1); + + if (iter_pid =3D=3D 0) { + struct ns_id_req req =3D { + .size =3D sizeof(req), + .spare =3D 0, + .ns_id =3D 0, + .ns_type =3D 0, /* All types */ + .spare2 =3D 0, + .user_ns_id =3D 0, /* Global listing */ + }; + int iter_ret; + + /* + * Loop calling listns() until killed. + * The kernel should: + * 1. Successfully write the first namespace ID (within valid page) + * 2. Fail with EFAULT when trying to write the second ID (unmapped page) + * 3. Handle concurrent namespace destruction without deadlock + */ + while (1) { + iter_ret =3D sys_listns(&req, ns_ids, 2, 0); + + if (iter_ret =3D=3D -1 && errno =3D=3D ENOSYS) + _exit(PIDFD_SKIP); + } + } + + /* Small delay to let iterator start looping */ + usleep(50000); + + /* + * Create several child processes, each in its own mount namespace. + * These will be destroyed while the iterator is running listns(). + */ + for (i =3D 0; i < 5; i++) { + /* Create socketpair for synchronization */ + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0); + + pid =3D create_child(&pidfds[i], CLONE_NEWNS); + ASSERT_NE(pid, -1); + + if (pid =3D=3D 0) { + close(sv[i][0]); /* Close parent end */ + + /* Child: create a couple of tmpfs mounts */ + if (mkdir("/tmp/test_mnt1", 0755) =3D=3D -1 && errno !=3D EEXIST) + _exit(1); + if (mkdir("/tmp/test_mnt2", 0755) =3D=3D -1 && errno !=3D EEXIST) + _exit(1); + + if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) =3D=3D -1) + _exit(1); + if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) =3D=3D -1) + _exit(1); + + /* Signal parent that setup is complete */ + if (write_nointr(sv[i][1], "R", 1) !=3D 1) + _exit(1); + + /* Wait for parent to signal us to exit */ + if (read_nointr(sv[i][1], &c, 1) !=3D 1) + _exit(1); + + close(sv[i][1]); + _exit(0); + } + + close(sv[i][1]); /* Close child end */ + } + + /* Wait for all children to finish setup */ + for (i =3D 0; i < 5; i++) { + ret =3D read_nointr(sv[i][0], &c, 1); + ASSERT_EQ(ret, 1); + ASSERT_EQ(c, 'R'); + } + + /* + * Signal children to exit. This will destroy their mount namespaces + * while listns() is iterating the namespace tree. + * This tests that cleanup happens outside the RCU read lock. + */ + for (i =3D 0; i < 5; i++) + write_nointr(sv[i][0], "X", 1); + + /* Wait for all mount namespace children to exit and cleanup */ + for (i =3D 0; i < 5; i++) { + waitpid(-1, NULL, 0); + close(sv[i][0]); + close(pidfds[i]); + } + + /* Kill iterator and wait for it */ + sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0); + ret =3D waitpid(iter_pid, &status, 0); + ASSERT_EQ(ret, iter_pid); + close(iter_pidfd); + + /* Should have been killed */ + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_EQ(WTERMSIG(status), SIGKILL); + + /* Clean up */ + munmap(map, page_size); +} + +/* + * Test listns() error handling when the entire buffer is invalid. + * This is a sanity check that basic invalid pointer detection works. + */ +TEST(listns_complete_fault) +{ + struct ns_id_req req =3D { + .size =3D sizeof(req), + .spare =3D 0, + .ns_id =3D 0, + .ns_type =3D 0, + .spare2 =3D 0, + .user_ns_id =3D 0, + }; + __u64 *ns_ids; + ssize_t ret; + + /* Use a clearly invalid pointer */ + ns_ids =3D (__u64 *)0xdeadbeef; + + ret =3D sys_listns(&req, ns_ids, 10, 0); + + if (ret =3D=3D -1 && errno =3D=3D ENOSYS) + SKIP(return, "listns() not supported"); + + /* Should fail with EFAULT */ + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EFAULT); +} + +/* + * Test listns() error handling when the buffer is NULL. + */ +TEST(listns_null_buffer) +{ + struct ns_id_req req =3D { + .size =3D sizeof(req), + .spare =3D 0, + .ns_id =3D 0, + .ns_type =3D 0, + .spare2 =3D 0, + .user_ns_id =3D 0, + }; + ssize_t ret; + + /* NULL buffer with non-zero count should fail */ + ret =3D sys_listns(&req, NULL, 10, 0); + + if (ret =3D=3D -1 && errno =3D=3D ENOSYS) + SKIP(return, "listns() not supported"); + + /* Should fail with EFAULT */ + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EFAULT); +} + +/* + * Test listns() with a buffer that becomes invalid mid-iteration + * (after several successful writes), combined with mount namespace + * destruction to test RCU cleanup logic. + */ +TEST(listns_late_fault_with_ns_cleanup) +{ + void *map; + __u64 *ns_ids; + ssize_t ret; + long page_size; + pid_t pid, iter_pid; + int pidfds[10]; + int sv[10][2]; + int iter_pidfd; + int i, status; + char c; + + page_size =3D sysconf(_SC_PAGESIZE); + ASSERT_GT(page_size, 0); + + /* Map two pages */ + map =3D mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(map, MAP_FAILED); + + /* Unmap the second page */ + ret =3D munmap((char *)map + page_size, page_size); + ASSERT_EQ(ret, 0); + + /* + * Position buffer so we can write several u64s successfully + * before hitting the page boundary. + */ + ns_ids =3D ((__u64 *)((char *)map + page_size)) - 5; + + /* + * Create a separate process to run listns() concurrently. + */ + iter_pid =3D create_child(&iter_pidfd, 0); + ASSERT_NE(iter_pid, -1); + + if (iter_pid =3D=3D 0) { + struct ns_id_req req =3D { + .size =3D sizeof(req), + .spare =3D 0, + .ns_id =3D 0, + .ns_type =3D 0, + .spare2 =3D 0, + .user_ns_id =3D 0, + }; + int iter_ret; + + /* + * Loop calling listns() until killed. + * Request 10 namespace IDs while namespaces are being destroyed. + * This tests: + * 1. EFAULT handling when buffer becomes invalid + * 2. Namespace cleanup outside RCU read lock during iteration + */ + while (1) { + iter_ret =3D sys_listns(&req, ns_ids, 10, 0); + + if (iter_ret =3D=3D -1 && errno =3D=3D ENOSYS) + _exit(PIDFD_SKIP); + } + } + + /* Small delay to let iterator start looping */ + usleep(50000); + + /* + * Create more children with mount namespaces to increase the + * likelihood that namespace cleanup happens during iteration. + */ + for (i =3D 0; i < 10; i++) { + /* Create socketpair for synchronization */ + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0); + + pid =3D create_child(&pidfds[i], CLONE_NEWNS); + ASSERT_NE(pid, -1); + + if (pid =3D=3D 0) { + close(sv[i][0]); /* Close parent end */ + + /* Child: create tmpfs mounts */ + if (mkdir("/tmp/test_mnt1", 0755) =3D=3D -1 && errno !=3D EEXIST) + _exit(1); + if (mkdir("/tmp/test_mnt2", 0755) =3D=3D -1 && errno !=3D EEXIST) + _exit(1); + + if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) =3D=3D -1) + _exit(1); + if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) =3D=3D -1) + _exit(1); + + /* Signal parent that setup is complete */ + if (write_nointr(sv[i][1], "R", 1) !=3D 1) + _exit(1); + + /* Wait for parent to signal us to exit */ + if (read_nointr(sv[i][1], &c, 1) !=3D 1) + _exit(1); + + close(sv[i][1]); + _exit(0); + } + + close(sv[i][1]); /* Close child end */ + } + + /* Wait for all children to finish setup */ + for (i =3D 0; i < 10; i++) { + ret =3D read_nointr(sv[i][0], &c, 1); + ASSERT_EQ(ret, 1); + ASSERT_EQ(c, 'R'); + } + + /* Kill half the children */ + for (i =3D 0; i < 5; i++) + write_nointr(sv[i][0], "X", 1); + + /* Small delay to let some exit */ + usleep(10000); + + /* Kill remaining children */ + for (i =3D 5; i < 10; i++) + write_nointr(sv[i][0], "X", 1); + + /* Wait for all children and cleanup */ + for (i =3D 0; i < 10; i++) { + waitpid(-1, NULL, 0); + close(sv[i][0]); + close(pidfds[i]); + } + + /* Kill iterator and wait for it */ + sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0); + ret =3D waitpid(iter_pid, &status, 0); + ASSERT_EQ(ret, iter_pid); + close(iter_pidfd); + + /* Should have been killed */ + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_EQ(WTERMSIG(status), SIGKILL); + + /* Clean up */ + munmap(map, page_size); +} + +/* + * Test specifically focused on mount namespace cleanup during EFAULT. + * Filter for mount namespaces only. + */ +TEST(listns_mnt_ns_cleanup_on_fault) +{ + void *map; + __u64 *ns_ids; + ssize_t ret; + long page_size; + pid_t pid, iter_pid; + int pidfds[8]; + int sv[8][2]; + int iter_pidfd; + int i, status; + char c; + + page_size =3D sysconf(_SC_PAGESIZE); + ASSERT_GT(page_size, 0); + + /* Set up partial fault buffer */ + map =3D mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + ASSERT_NE(map, MAP_FAILED); + + ret =3D munmap((char *)map + page_size, page_size); + ASSERT_EQ(ret, 0); + + /* Position for 3 successful writes, then fault */ + ns_ids =3D ((__u64 *)((char *)map + page_size)) - 3; + + /* + * Create a separate process to run listns() concurrently. + */ + iter_pid =3D create_child(&iter_pidfd, 0); + ASSERT_NE(iter_pid, -1); + + if (iter_pid =3D=3D 0) { + struct ns_id_req req =3D { + .size =3D sizeof(req), + .spare =3D 0, + .ns_id =3D 0, + .ns_type =3D CLONE_NEWNS, /* Only mount namespaces */ + .spare2 =3D 0, + .user_ns_id =3D 0, + }; + int iter_ret; + + /* + * Loop calling listns() until killed. + * Call listns() to race with namespace destruction. + */ + while (1) { + iter_ret =3D sys_listns(&req, ns_ids, 10, 0); + + if (iter_ret =3D=3D -1 && errno =3D=3D ENOSYS) + _exit(PIDFD_SKIP); + } + } + + /* Small delay to let iterator start looping */ + usleep(50000); + + /* Create children with mount namespaces */ + for (i =3D 0; i < 8; i++) { + /* Create socketpair for synchronization */ + ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0); + + pid =3D create_child(&pidfds[i], CLONE_NEWNS); + ASSERT_NE(pid, -1); + + if (pid =3D=3D 0) { + close(sv[i][0]); /* Close parent end */ + + /* Do some mount operations to make cleanup more interesting */ + if (mkdir("/tmp/test_mnt1", 0755) =3D=3D -1 && errno !=3D EEXIST) + _exit(1); + if (mkdir("/tmp/test_mnt2", 0755) =3D=3D -1 && errno !=3D EEXIST) + _exit(1); + + if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) =3D=3D -1) + _exit(1); + if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) =3D=3D -1) + _exit(1); + + /* Signal parent that setup is complete */ + if (write_nointr(sv[i][1], "R", 1) !=3D 1) + _exit(1); + + /* Wait for parent to signal us to exit */ + if (read_nointr(sv[i][1], &c, 1) !=3D 1) + _exit(1); + + close(sv[i][1]); + _exit(0); + } + + close(sv[i][1]); /* Close child end */ + } + + /* Wait for all children to finish setup */ + for (i =3D 0; i < 8; i++) { + ret =3D read_nointr(sv[i][0], &c, 1); + ASSERT_EQ(ret, 1); + ASSERT_EQ(c, 'R'); + } + + /* Kill children to trigger namespace destruction during iteration */ + for (i =3D 0; i < 8; i++) + write_nointr(sv[i][0], "X", 1); + + /* Wait for children and cleanup */ + for (i =3D 0; i < 8; i++) { + waitpid(-1, NULL, 0); + close(sv[i][0]); + close(pidfds[i]); + } + + /* Kill iterator and wait for it */ + sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0); + ret =3D waitpid(iter_pid, &status, 0); + ASSERT_EQ(ret, iter_pid); + close(iter_pidfd); + + /* Should have been killed */ + ASSERT_TRUE(WIFSIGNALED(status)); + ASSERT_EQ(WTERMSIG(status), SIGKILL); + + munmap(map, page_size); +} + +TEST_HARNESS_MAIN --=20 2.47.3