From nobody Sun Oct 5 10:44:54 2025 Received: from mout-p-102.mailbox.org (mout-p-102.mailbox.org [80.241.56.152]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id CBE6F2C190; Tue, 5 Aug 2025 05:45:38 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=80.241.56.152 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1754372740; cv=none; b=MHUUNwJTnoiNuaX2wJUfyU6FZar02A6aPbaJQg/u/ZyqZRQ6KW+WK2JOeEJ9HcfsR+7zlKOTm1st0H6qmoJV+OTMN8eFNRuMzxAN9keBAtZi6qD8aPZZo9D/EioqpN6XIeNQ+agU4L+wDbaYjg2hfDiT5hyQSk6EGZPcl5jfg54= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1754372740; c=relaxed/simple; bh=p0CI35ogoHgUlRWLHKPkYHs040kuapK3lWwQMhtdaMU=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=VyhTC11x7C+LVSA1JjrVEVPhukfhAlQka5uKSRQe9k1ZHHvJX5OEn1IcbyqusEpxn8EWug0QkdkDzmeIOB4zjYYRCXvHPC9uxT7VMWVQE3Av6v9CKjdNrjMhjB9Sx7Vxgc2ldcftk6swAgWWZQQjWleuOPrb+OffvWIE/Ol0ORs= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com; spf=pass smtp.mailfrom=cyphar.com; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b=aJUalihm; arc=none smtp.client-ip=80.241.56.152 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=cyphar.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b="aJUalihm" Received: from smtp2.mailbox.org (smtp2.mailbox.org [10.196.197.2]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by mout-p-102.mailbox.org (Postfix) with ESMTPS id 4bx2TC06ZSz9t3b; Tue, 5 Aug 2025 07:45:35 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cyphar.com; s=MBO0001; t=1754372735; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=nYYuPVtguJfcotsXuI5EAsnUJxvEUQg+kC38k/XfePw=; b=aJUalihmBKwkqsdQ+VG5ModXOvpMiIxLgIKL+TgcwkZ4901IFJFQbKKgpywbwE1KZQnnjg PGu/ySxFLHhDMuIYEud02KdpxApj1Eh+17IEO9gPVOQ+ooMNvTn+ly/tZSStpYwPHxyggJ bo7ILnBHZcSb3sHAs91AqYFkZp8O+t0UNqoIO7pVCJu7jym1y1tOrRwjFXCy4IIf+WF+JX p6uDEcdB4gZsZz3SatMimGxDoGV/bdHHmNa/KNHw/79JzfuRnnD+NOBI+MwXPmwRGgeFdT AkPqsm4IG9cpSeGn6b+LvwJuLRMKW3a+TLhDIQggP5irJWhQeJR8tcdyp0k9SQ== From: Aleksa Sarai Date: Tue, 05 Aug 2025 15:45:08 +1000 Subject: [PATCH v4 1/4] pidns: move is-ancestor logic to helper Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20250805-procfs-pidns-api-v4-1-705f984940e7@cyphar.com> References: <20250805-procfs-pidns-api-v4-0-705f984940e7@cyphar.com> In-Reply-To: <20250805-procfs-pidns-api-v4-0-705f984940e7@cyphar.com> To: Alexander Viro , Christian Brauner , Jan Kara , Jonathan Corbet , Shuah Khan Cc: Andy Lutomirski , linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-api@vger.kernel.org, linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org, Aleksa Sarai X-Developer-Signature: v=1; a=openpgp-sha256; l=2708; i=cyphar@cyphar.com; h=from:subject:message-id; bh=p0CI35ogoHgUlRWLHKPkYHs040kuapK3lWwQMhtdaMU=; b=owGbwMvMwCWmMf3Xpe0vXfIZT6slMWRMnJWnvnOpV8KXhUuWsZ3U2yTemfjtiN4P58UvRY9Iy e2MiQ9Y2VHKwiDGxSArpsiyzc8zdNP8xVeSP61kg5nDygQyhIGLUwAmklbAyLDSPL3equ7lo6Wa 3Qcf1Dj5+aTd/yaTPFVhu7DDd6UF3HsYGT5PYk7jcNjzI2jJSnmLvX8zD/2zX5X45UGkyHt+lWP rk9kB X-Developer-Key: i=cyphar@cyphar.com; a=openpgp; fpr=C9C370B246B09F6DBCFC744C34401015D1D2D386 This check will be needed in later patches, and there's no point open-coding it each time. Signed-off-by: Aleksa Sarai --- include/linux/pid_namespace.h | 9 +++++++++ kernel/pid_namespace.c | 22 ++++++++++++++-------- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 7c67a5811199..17fdc059f8da 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -84,6 +84,9 @@ extern void zap_pid_ns_processes(struct pid_namespace *pi= d_ns); extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); extern void put_pid_ns(struct pid_namespace *ns); =20 +extern bool pidns_is_ancestor(struct pid_namespace *child, + struct pid_namespace *ancestor); + #else /* !CONFIG_PID_NS */ #include =20 @@ -118,6 +121,12 @@ static inline int reboot_pid_ns(struct pid_namespace *= pid_ns, int cmd) { return 0; } + +static inline bool pidns_is_ancestor(struct pid_namespace *child, + struct pid_namespace *ancestor) +{ + return false; +} #endif /* CONFIG_PID_NS */ =20 extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 7098ed44e717..b7b45c2597ec 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -390,11 +390,23 @@ static void pidns_put(struct ns_common *ns) put_pid_ns(to_pid_ns(ns)); } =20 +bool pidns_is_ancestor(struct pid_namespace *child, + struct pid_namespace *ancestor) +{ + struct pid_namespace *ns; + + if (child->level < ancestor->level) + return false; + for (ns =3D child; ns->level > ancestor->level; ns =3D ns->parent) + ; + return ns =3D=3D ancestor; +} + static int pidns_install(struct nsset *nsset, struct ns_common *ns) { struct nsproxy *nsproxy =3D nsset->nsproxy; struct pid_namespace *active =3D task_active_pid_ns(current); - struct pid_namespace *ancestor, *new =3D to_pid_ns(ns); + struct pid_namespace *new =3D to_pid_ns(ns); =20 if (!ns_capable(new->user_ns, CAP_SYS_ADMIN) || !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN)) @@ -408,13 +420,7 @@ static int pidns_install(struct nsset *nsset, struct n= s_common *ns) * this maintains the property that processes and their * children can not escape their current pid namespace. */ - if (new->level < active->level) - return -EINVAL; - - ancestor =3D new; - while (ancestor->level > active->level) - ancestor =3D ancestor->parent; - if (ancestor !=3D active) + if (!pidns_is_ancestor(new, active)) return -EINVAL; =20 put_pid_ns(nsproxy->pid_ns_for_children); --=20 2.50.1 From nobody Sun Oct 5 10:44:54 2025 Received: from mout-p-102.mailbox.org (mout-p-102.mailbox.org [80.241.56.152]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 2D7532C190; Tue, 5 Aug 2025 05:45:43 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=80.241.56.152 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1754372746; cv=none; b=pae/mBBfqjSCmSiItEfBnh73aozuDICElFq+JWR/fEreZmaCEEklXdxSQrj19Tqh/x4fF5Z8talafzMNX2DN/99wMASzLKPxg9iarjRjYOqjDnhMGRMv/i52edMLcfbO78BtwMTttZOuNAbpdt/CGzQ3wfjzlEdiK/arU0A180k= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1754372746; c=relaxed/simple; bh=7x0mPvQs3PnM6hnHwlsK0YkdyZQeEL2pqrRNyBv2YRw=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=o/Mbu1VNph6755EQVQUF4oJ0ENAK35ov51hL3CfqPFZtxRv2LAicwRU38iGOOE6uJomCmxCtHX8To/y5ACfXXBopxxDDxS4b+seekOW7lGB2gjtGl6eb5PSmGjmr3KpGTVetzukYMAX+qEWZGHMAUdzwCpwOWIfUIiu4+JhOPm8= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com; spf=pass smtp.mailfrom=cyphar.com; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b=RaeI5DqD; arc=none smtp.client-ip=80.241.56.152 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=cyphar.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b="RaeI5DqD" Received: from smtp2.mailbox.org (smtp2.mailbox.org [IPv6:2001:67c:2050:b231:465::2]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by mout-p-102.mailbox.org (Postfix) with ESMTPS id 4bx2TJ48fzz9spD; Tue, 5 Aug 2025 07:45:40 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cyphar.com; s=MBO0001; t=1754372740; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=c5r3LUoIJotYAOtHbyXu8/Q2mXiFa0pZX7ipMCHCVe0=; b=RaeI5DqDynnMWoB9Pyeut6Ssh45JIVgC0CCIJCsu4UDcS6toKQGEwjDLEnp/8YSPgahq4f y7pAsl1IytM63yCsnglN9AA0dHhbZByvRwuODGiSUPfHya4/F0RBP3IyOAN2iSkfiHduC7 cPcHPOJimiS7/rBorAmYOdCYmduoMYVA5Gqo+86+kGihsQxikwyfratztx+H2BWDkkahQd 5sjcvfULX1bIqxnS3SnlOcKOgZTOUm27A2KngRCzh7PWd3w0NnI4Oa4qZFJiE0/N1t1+Sr oKsZq0SCGiC7m3WFQvuoaWEvcdRSrEtn/+UE1HYd4u3XMwvvT3njBIRtnK4Vzw== Authentication-Results: outgoing_mbo_mout; dkim=none; spf=pass (outgoing_mbo_mout: domain of cyphar@cyphar.com designates 2001:67c:2050:b231:465::2 as permitted sender) smtp.mailfrom=cyphar@cyphar.com From: Aleksa Sarai Date: Tue, 05 Aug 2025 15:45:09 +1000 Subject: [PATCH v4 2/4] procfs: add "pidns" mount option Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20250805-procfs-pidns-api-v4-2-705f984940e7@cyphar.com> References: <20250805-procfs-pidns-api-v4-0-705f984940e7@cyphar.com> In-Reply-To: <20250805-procfs-pidns-api-v4-0-705f984940e7@cyphar.com> To: Alexander Viro , Christian Brauner , Jan Kara , Jonathan Corbet , Shuah Khan Cc: Andy Lutomirski , linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-api@vger.kernel.org, linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org, Aleksa Sarai X-Developer-Signature: v=1; a=openpgp-sha256; l=7569; i=cyphar@cyphar.com; h=from:subject:message-id; bh=7x0mPvQs3PnM6hnHwlsK0YkdyZQeEL2pqrRNyBv2YRw=; b=owGbwMvMwCWmMf3Xpe0vXfIZT6slMWRMnJWnyC8rJRpQOvXnRiuzhSy1l1R8tj2UULn8aNLJt EPHYorfdJSyMIhxMciKKbJs8/MM3TR/8ZXkTyvZYOawMoEMYeDiFICJvLrI8N9nT9rnT+8ZpXco X7xa0rdjvqG0bnPlAXW+VrW11dXX2uoZGU5OMj35pWsC88n3/z7cXiDMaTF32WvGx+2fXbRSHvY IbuMBAA== X-Developer-Key: i=cyphar@cyphar.com; a=openpgp; fpr=C9C370B246B09F6DBCFC744C34401015D1D2D386 X-Rspamd-Queue-Id: 4bx2TJ48fzz9spD Since the introduction of pid namespaces, their interaction with procfs has been entirely implicit in ways that require a lot of dancing around by programs that need to construct sandboxes with different PID namespaces. Being able to explicitly specify the pid namespace to use when constructing a procfs super block will allow programs to no longer need to fork off a process which does then does unshare(2) / setns(2) and forks again in order to construct a procfs in a pidns. So, provide a "pidns" mount option which allows such users to just explicitly state which pid namespace they want that procfs instance to use. This interface can be used with fsconfig(2) either with a file descriptor or a path: fsconfig(procfd, FSCONFIG_SET_FD, "pidns", NULL, nsfd); fsconfig(procfd, FSCONFIG_SET_STRING, "pidns", "/proc/self/ns/pid", 0); or with classic mount(2) / mount(8): // mount -t proc -o pidns=3D/proc/self/ns/pid proc /tmp/proc mount("proc", "/tmp/proc", "proc", MS_..., "pidns=3D/proc/self/ns/pid"); As this new API is effectively shorthand for setns(2) followed by mount(2), the permission model for this mirrors pidns_install() to avoid opening up new attack surfaces by loosening the existing permission model. In order to avoid having to RCU-protect all users of proc_pid_ns() (to avoid UAFs), attempting to reconfigure an existing procfs instance's pid namespace will error out with -EBUSY. Creating new procfs instances is quite cheap, so this should not be an impediment to most users, and lets us avoid a lot of churn in fs/proc/* for a feature that it seems unlikely userspace would use. Signed-off-by: Aleksa Sarai --- Documentation/filesystems/proc.rst | 8 ++++ fs/proc/root.c | 98 ++++++++++++++++++++++++++++++++++= +--- 2 files changed, 100 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems= /proc.rst index 5236cb52e357..5a157dadea0b 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -2360,6 +2360,7 @@ The following mount options are supported: hidepid=3D Set /proc// access mode. gid=3D Set the group authorized to learn processes information. subset=3D Show only the specified subset of procfs. + pidns=3D Specify a the namespace used by this procfs. =3D=3D=3D=3D=3D=3D=3D=3D=3D =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =20 hidepid=3Doff or hidepid=3D0 means classic mode - everybody may access all @@ -2392,6 +2393,13 @@ information about processes information, just add id= entd to this group. subset=3Dpid hides all top level files and directories in the procfs that are not related to tasks. =20 +pidns=3D specifies a pid namespace (either as a string path to something l= ike +`/proc/$pid/ns/pid`, or a file descriptor when using `FSCONFIG_SET_FD`) th= at +will be used by the procfs instance when translating pids. By default, pro= cfs +will use the calling process's active pid namespace. Note that the pid +namespace of an existing procfs instance cannot be modified (attempting to= do +so will give an `-EBUSY` error). + Chapter 5: Filesystem behavior =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D =20 diff --git a/fs/proc/root.c b/fs/proc/root.c index ed86ac710384..fd1f1c8a939a 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -38,12 +38,14 @@ enum proc_param { Opt_gid, Opt_hidepid, Opt_subset, + Opt_pidns, }; =20 static const struct fs_parameter_spec proc_fs_parameters[] =3D { - fsparam_u32("gid", Opt_gid), + fsparam_u32("gid", Opt_gid), fsparam_string("hidepid", Opt_hidepid), fsparam_string("subset", Opt_subset), + fsparam_file_or_string("pidns", Opt_pidns), {} }; =20 @@ -109,11 +111,66 @@ static int proc_parse_subset_param(struct fs_context = *fc, char *value) return 0; } =20 +#ifdef CONFIG_PID_NS +static int proc_parse_pidns_param(struct fs_context *fc, + struct fs_parameter *param, + struct fs_parse_result *result) +{ + struct proc_fs_context *ctx =3D fc->fs_private; + struct pid_namespace *target, *active =3D task_active_pid_ns(current); + struct ns_common *ns; + struct file *ns_filp __free(fput) =3D NULL; + + switch (param->type) { + case fs_value_is_file: + /* came through fsconfig, steal the file reference */ + ns_filp =3D no_free_ptr(param->file); + break; + case fs_value_is_string: + ns_filp =3D filp_open(param->string, O_RDONLY, 0); + break; + default: + WARN_ON_ONCE(true); + break; + } + if (!ns_filp) + ns_filp =3D ERR_PTR(-EBADF); + if (IS_ERR(ns_filp)) { + errorfc(fc, "could not get file from pidns argument"); + return PTR_ERR(ns_filp); + } + + if (!proc_ns_file(ns_filp)) + return invalfc(fc, "pidns argument is not an nsfs file"); + ns =3D get_proc_ns(file_inode(ns_filp)); + if (ns->ops->type !=3D CLONE_NEWPID) + return invalfc(fc, "pidns argument is not a pidns file"); + target =3D container_of(ns, struct pid_namespace, ns); + + /* + * pidns=3D is shorthand for joining the pidns to get a fsopen fd, so the + * permission model should be the same as pidns_install(). + */ + if (!ns_capable(target->user_ns, CAP_SYS_ADMIN)) { + errorfc(fc, "insufficient permissions to set pidns"); + return -EPERM; + } + if (!pidns_is_ancestor(target, active)) + return invalfc(fc, "cannot set pidns to non-descendant pidns"); + + put_pid_ns(ctx->pid_ns); + ctx->pid_ns =3D get_pid_ns(target); + put_user_ns(fc->user_ns); + fc->user_ns =3D get_user_ns(ctx->pid_ns->user_ns); + return 0; +} +#endif /* CONFIG_PID_NS */ + static int proc_parse_param(struct fs_context *fc, struct fs_parameter *pa= ram) { struct proc_fs_context *ctx =3D fc->fs_private; struct fs_parse_result result; - int opt; + int opt, err; =20 opt =3D fs_parse(fc, proc_fs_parameters, param, &result); if (opt < 0) @@ -125,14 +182,38 @@ static int proc_parse_param(struct fs_context *fc, st= ruct fs_parameter *param) break; =20 case Opt_hidepid: - if (proc_parse_hidepid_param(fc, param)) - return -EINVAL; + err =3D proc_parse_hidepid_param(fc, param); + if (err) + return err; break; =20 case Opt_subset: - if (proc_parse_subset_param(fc, param->string) < 0) - return -EINVAL; + err =3D proc_parse_subset_param(fc, param->string); + if (err) + return err; + break; + + case Opt_pidns: +#ifdef CONFIG_PID_NS + /* + * We would have to RCU-protect every proc_pid_ns() or + * proc_sb_info() access if we allowed this to be reconfigured + * for an existing procfs instance. Luckily, procfs instances + * are cheap to create, and mount-beneath would let you + * atomically replace an instance even with overmounts. + */ + if (fc->purpose =3D=3D FS_CONTEXT_FOR_RECONFIGURE) { + errorfc(fc, "cannot reconfigure pidns for existing procfs"); + return -EBUSY; + } + err =3D proc_parse_pidns_param(fc, param, &result); + if (err) + return err; break; +#else + errorfc(fc, "pidns mount flag not supported on this system"); + return -EOPNOTSUPP; +#endif =20 default: return -EINVAL; @@ -154,6 +235,11 @@ static void proc_apply_options(struct proc_fs_info *fs= _info, fs_info->hide_pid =3D ctx->hidepid; if (ctx->mask & (1 << Opt_subset)) fs_info->pidonly =3D ctx->pidonly; + if (ctx->mask & (1 << Opt_pidns) && + !WARN_ON_ONCE(fc->purpose =3D=3D FS_CONTEXT_FOR_RECONFIGURE)) { + put_pid_ns(fs_info->pid_ns); + fs_info->pid_ns =3D get_pid_ns(ctx->pid_ns); + } } =20 static int proc_fill_super(struct super_block *s, struct fs_context *fc) --=20 2.50.1 From nobody Sun Oct 5 10:44:54 2025 Received: from mout-p-202.mailbox.org (mout-p-202.mailbox.org [80.241.56.172]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 825262C190; Tue, 5 Aug 2025 05:45:50 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=80.241.56.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1754372752; cv=none; b=S4YNkFEFRwNEaV+VUNFd9IStQ3WrkXOU6pwLTPqtbavVX6fLEAdo+eWcxWiDZHiuOpplboqKtrfeI0y/BTeGmpGqsA5n1LX37IcRyGHJz+YXJoq34FrkvakP5kDTBKdXyXY8Gs4N2YQT4S1pu3mikXdmERq5xYRakLlMl8hEh6g= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1754372752; c=relaxed/simple; bh=/C2qq021xjStGJSu48HiSZ6087KYAeI74YlFmkObQ3M=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=rfZYPhzoLA4GQJiuWD2tqIFH2VJXfJmzoPelBwcSiUmxTDmF0DoDBlpiMOe4RqNx1GdUIToX6b/EamCUdT6eiBFaJSpN+R09MC0gPn36iE/q3C8vWNU0fx4kMysS1i4NbUVuDQmLwoELSGIIHh9yaLxyvS8aFiry9E//4OOPpMg= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com; spf=pass smtp.mailfrom=cyphar.com; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b=jng5FF6m; arc=none smtp.client-ip=80.241.56.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=cyphar.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b="jng5FF6m" Received: from smtp2.mailbox.org (smtp2.mailbox.org [IPv6:2001:67c:2050:b231:465::2]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by mout-p-202.mailbox.org (Postfix) with ESMTPS id 4bx2TQ02zQz9t5k; Tue, 5 Aug 2025 07:45:46 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cyphar.com; s=MBO0001; t=1754372746; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=pxh09IEl4Hu0nkwkl0408Z833h/l8jerTqbtoqOiqXE=; b=jng5FF6mcfs6j9WuoI0ngkbm+BiE+nBYPxA0tG+4/XKBC4k0M+/LJZTac/G8vPuhwDv2T5 GYjG9aNiWkRizdmVpmz5b2iv2JYwJylMD/fJh1wwHCG0g4I52ptJpKJ5yHJvpVAmdeaDGs vKzTBDzPOPa6y7T4ej+Wjy4XJS3hQeCJ3MRZRKx3AuCbLWbnwSsqWIGROfIYPUGV4pneM6 ruotpSPck9eKzgAh88hZqweXigkXSOATlHddpE/lvN3VO4csSdkcJZovOK01leE/xFKtDx 6sYoPtXHytOkORwObg66OsppSByt6KpgxEs07DjpCJ7geKPN/uNyj/3iIfBAHw== Authentication-Results: outgoing_mbo_mout; dkim=none; spf=pass (outgoing_mbo_mout: domain of cyphar@cyphar.com designates 2001:67c:2050:b231:465::2 as permitted sender) smtp.mailfrom=cyphar@cyphar.com From: Aleksa Sarai Date: Tue, 05 Aug 2025 15:45:10 +1000 Subject: [PATCH v4 3/4] procfs: add PROCFS_GET_PID_NAMESPACE ioctl Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20250805-procfs-pidns-api-v4-3-705f984940e7@cyphar.com> References: <20250805-procfs-pidns-api-v4-0-705f984940e7@cyphar.com> In-Reply-To: <20250805-procfs-pidns-api-v4-0-705f984940e7@cyphar.com> To: Alexander Viro , Christian Brauner , Jan Kara , Jonathan Corbet , Shuah Khan Cc: Andy Lutomirski , linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-api@vger.kernel.org, linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org, Aleksa Sarai X-Developer-Signature: v=1; a=openpgp-sha256; l=6728; i=cyphar@cyphar.com; h=from:subject:message-id; bh=/C2qq021xjStGJSu48HiSZ6087KYAeI74YlFmkObQ3M=; b=owGbwMvMwCWmMf3Xpe0vXfIZT6slMWRMnJXvsWmHmjTTrkUTt0szTJr+56zM1Rm3vi4QzkxbW fva5Ge0bkcpC4MYF4OsmCLLNj/P0E3zF19J/rSSDWYOKxPIEAYuTgGYSJINw/+04Ai5cjP5xfPE HrTYBnjm/F7d9eZlpYf3hrRl3asdej0YGZ4K/ckSOD0zpmj/N59DlSeMW3a4Tfy3+umH7/zT/wn +/cwKAA== X-Developer-Key: i=cyphar@cyphar.com; a=openpgp; fpr=C9C370B246B09F6DBCFC744C34401015D1D2D386 X-Rspamd-Queue-Id: 4bx2TQ02zQz9t5k /proc has historically had very opaque semantics about PID namespaces, which is a little unfortunate for container runtimes and other programs that deal with switching namespaces very often. One common issue is that of converting between PIDs in the process's namespace and PIDs in the namespace of /proc. In principle, it is possible to do this today by opening a pidfd with pidfd_open(2) and then looking at /proc/self/fdinfo/$n (which will contain a PID value translated to the pid namespace associated with that procfs superblock). However, allocating a new file for each PID to be converted is less than ideal for programs that may need to scan procfs, and it is generally useful for userspace to be able to finally get this information from procfs. So, add a new API to get the pid namespace of a procfs instance, in the form of an ioctl(2) you can call on the root directory of said procfs. The returned file descriptor will have O_CLOEXEC set. This acts as a sister feature to the new "pidns" mount option, finally allowing userspace full control of the pid namespaces associated with procfs instances. The permission model for this is a bit looser than that of the "pidns" mount option (and also setns(2)) because /proc/1/ns/pid provides the same information, so as long as you have access to that magic-link (or something equivalently reasonable such as being in an ancestor pid namespace) it makes sense to allow userspace to grab a handle. Ideally we would check for ptrace-read access against all processes in the pidns (which is very likely to be true for at least one process, as SUID_DUMP_DISABLE is cleared on exec(2) and is rarely set by most programs), but this would obviously not scale. setns(2) will still have their own permission checks, so being able to open a pidns handle doesn't really provide too many other capabilities. Signed-off-by: Aleksa Sarai --- Documentation/filesystems/proc.rst | 4 +++ fs/proc/root.c | 68 ++++++++++++++++++++++++++++++++++= ++-- include/uapi/linux/fs.h | 4 +++ 3 files changed, 74 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems= /proc.rst index 5a157dadea0b..840f820fb467 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -2400,6 +2400,10 @@ will use the calling process's active pid namespace.= Note that the pid namespace of an existing procfs instance cannot be modified (attempting to= do so will give an `-EBUSY` error). =20 +Processes can check which pid namespace is used by a procfs instance by us= ing +the `PROCFS_GET_PID_NAMESPACE` ioctl() on the root directory of the procfs +instance. + Chapter 5: Filesystem behavior =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D =20 diff --git a/fs/proc/root.c b/fs/proc/root.c index fd1f1c8a939a..ac9b115fad7b 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -23,8 +23,10 @@ #include #include #include +#include =20 #include "internal.h" +#include "../internal.h" =20 struct proc_fs_context { struct pid_namespace *pid_ns; @@ -426,15 +428,77 @@ static int proc_root_readdir(struct file *file, struc= t dir_context *ctx) return proc_pid_readdir(file, ctx); } =20 +static long int proc_root_ioctl(struct file *filp, unsigned int cmd, unsig= ned long arg) +{ + switch (cmd) { + case PROCFS_GET_PID_NAMESPACE: { +#ifdef CONFIG_PID_NS + struct pid_namespace *active =3D task_active_pid_ns(current); + struct pid_namespace *ns =3D proc_pid_ns(file_inode(filp)->i_sb); + bool can_access_pidns =3D false; + + /* + * Having a handle to a pidns is not sufficient to do anything + * particularly harmful, as setns(2) has its own separate + * privilege checks. So, we can loosen the privilege + * requirements here a little to make this more ergonomic. + * + * If we are in an ancestor pidns of the pidns, then we can + * already address any process in the pidns. From a setns(2) + * privileges perspective, we can create a pidfd which setns(2) + * would also accept (pending any privilege checks). + * + * If we are not in an ancestor pidns, because this operation + * is being done on the root of the /proc instance, the caller + * can try to access /proc/1/ns/pid which is equivalent to this + * ioctl and so we should copy the PTRACE_MODE_READ_FSCREDS + * permission model use by proc_ns_get_link(). Ideally we would + * check for ptrace-read access against all processes in the + * pidns (which is very likely to be true for at least one + * process, as SUID_DUMP_DISABLE is cleared on exec(2) and is + * rarely set by most programs), but this would obviously not + * scale. + * + * If there is no root process, then there is no real downside + * to unprivileged users to open a handle to it. + */ + can_access_pidns =3D pidns_is_ancestor(ns, active); + if (!can_access_pidns) { + bool cannot_ptrace_pid1 =3D false; + + read_lock(&tasklist_lock); + if (ns->child_reaper) + cannot_ptrace_pid1 =3D ptrace_may_access(ns->child_reaper, + PTRACE_MODE_READ_FSCREDS); + read_unlock(&tasklist_lock); + can_access_pidns =3D !cannot_ptrace_pid1; + } + if (!can_access_pidns) + return -EPERM; + + /* open_namespace() unconditionally consumes the reference. */ + get_pid_ns(ns); + return open_namespace(to_ns_common(ns)); +#else + return -EOPNOTSUPP; +#endif + } + default: + return -ENOIOCTLCMD; + } +} + /* * The root /proc directory is special, as it has the * directories. Thus we don't use the generic * directory handling functions for that.. */ static const struct file_operations proc_root_operations =3D { - .read =3D generic_read_dir, - .iterate_shared =3D proc_root_readdir, + .read =3D generic_read_dir, + .iterate_shared =3D proc_root_readdir, .llseek =3D generic_file_llseek, + .unlocked_ioctl =3D proc_root_ioctl, + .compat_ioctl =3D compat_ptr_ioctl, }; =20 /* diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 0bd678a4a10e..68e65e6d7d6b 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -435,8 +435,12 @@ typedef int __bitwise __kernel_rwf_t; RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\ RWF_DONTCACHE) =20 +/* This matches XSDFEC_MAGIC, so we need to allocate subvalues carefully. = */ #define PROCFS_IOCTL_MAGIC 'f' =20 +/* procfs root ioctls */ +#define PROCFS_GET_PID_NAMESPACE _IO(PROCFS_IOCTL_MAGIC, 32) + /* Pagemap ioctl */ #define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) =20 --=20 2.50.1 From nobody Sun Oct 5 10:44:54 2025 Received: from mout-p-101.mailbox.org (mout-p-101.mailbox.org [80.241.56.151]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 1B2F621CA08; Tue, 5 Aug 2025 05:45:54 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=80.241.56.151 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1754372757; cv=none; b=aOLg058OOB5eFIrsiTceA4Uk20NNfHbtvS95yT/dmNDDzrx/HJ2bpvY7+9JaDfVhBCZgNSgk/w9au7fwD8xH6Ak9zf3+s6jYy5hxX+f7fVK0bZ6pRG4+CA1FwGoSuFEU27lB3tA/Z1nUIdg4HpGkAupK3cv7YRNIdCqBZxHR+7s= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1754372757; c=relaxed/simple; bh=40vUJ0jC1FNCskPUMlIRHQw/7AqXNiyfzz/oz1EbBMs=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=E9q3IExcb6EPrFmAsL0nf3ozNmqQJNJv/PTaLrIA9c4dQ5z/vc5wSPkYwo86hTKQDPvpjirx2K5Pl2NJ/Q6woiYXd1v7rl2AqzuqyU4FNiy7uZ0IOgbJ0LKDJ7JnoxtS4skEn+/YRYM00O1bjCbHcUq5MXmYwhL2QijKqhHFVY0= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com; spf=pass smtp.mailfrom=cyphar.com; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b=bU24rnvI; arc=none smtp.client-ip=80.241.56.151 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=cyphar.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b="bU24rnvI" Received: from smtp2.mailbox.org (smtp2.mailbox.org [10.196.197.2]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by mout-p-101.mailbox.org (Postfix) with ESMTPS id 4bx2TW344Vz9sWC; Tue, 5 Aug 2025 07:45:51 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cyphar.com; s=MBO0001; t=1754372751; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=M0UA9Hr24iLGhfKjvVdc9u7HZA7KbxdOYa3vKq7R6ls=; b=bU24rnvI85XF+YJCV59nAXeWYAZyfR7nAy3ygq+GTJgIOWWxdUV46c0rElSImrZxrtON9Q uH2GT2V/ML7Il5EmYJwFepX+iQFZUB86Fc/ZjZo/6qiaxu84fqOJSk0rqqCfFpepsPIjys yFtjq8zvwvvQTPloBm6ehE4DqeJONf1l204r7YGGHN50rGoYOTvvToDEXJ9LUbZr1Pa9Sm t4sVQYcEpVMTwo6CNGgrQNv2Qg96nAw0Ugeq6OP7ps9cutuhZvJILwckMQaLsG3ZPixP7J 1p/ODrpFPlmYNBO6HfWJD4wgxZ9txNlLxXn3S0yh8l/gUqFXsh/NvxbRtJcV3Q== From: Aleksa Sarai Date: Tue, 05 Aug 2025 15:45:11 +1000 Subject: [PATCH v4 4/4] selftests/proc: add tests for new pidns APIs Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20250805-procfs-pidns-api-v4-4-705f984940e7@cyphar.com> References: <20250805-procfs-pidns-api-v4-0-705f984940e7@cyphar.com> In-Reply-To: <20250805-procfs-pidns-api-v4-0-705f984940e7@cyphar.com> To: Alexander Viro , Christian Brauner , Jan Kara , Jonathan Corbet , Shuah Khan Cc: Andy Lutomirski , linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-api@vger.kernel.org, linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org, Aleksa Sarai X-Developer-Signature: v=1; a=openpgp-sha256; l=11255; i=cyphar@cyphar.com; h=from:subject:message-id; bh=40vUJ0jC1FNCskPUMlIRHQw/7AqXNiyfzz/oz1EbBMs=; b=owGbwMvMwCWmMf3Xpe0vXfIZT6slMWRMnJUv/TapQPDr8huGIcv+zPZ78MXLqb7zBMN9dY9D9 06Zsxd/6ChlYRDjYpAVU2TZ5ucZumn+4ivJn1aywcxhZQIZwsDFKQATWXKa4X/Yfp8nWUHMbQtv ihcd3LJVePbEtxeXXnsnFJO+zPn8Tu6vDP+TV4jGNzfGil3ZsD5gWejiqjj2VoYj9R/u1ErnXpd vY2UFAA== X-Developer-Key: i=cyphar@cyphar.com; a=openpgp; fpr=C9C370B246B09F6DBCFC744C34401015D1D2D386 Signed-off-by: Aleksa Sarai --- tools/testing/selftests/proc/.gitignore | 1 + tools/testing/selftests/proc/Makefile | 1 + tools/testing/selftests/proc/proc-pidns.c | 315 ++++++++++++++++++++++++++= ++++ 3 files changed, 317 insertions(+) diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selfte= sts/proc/.gitignore index 973968f45bba..2dced03e9e0e 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -17,6 +17,7 @@ /proc-tid0 /proc-uptime-001 /proc-uptime-002 +/proc-pidns /read /self /setns-dcache diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftest= s/proc/Makefile index b12921b9794b..c6f7046b9860 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -27,5 +27,6 @@ TEST_GEN_PROGS +=3D setns-sysvipc TEST_GEN_PROGS +=3D thread-self TEST_GEN_PROGS +=3D proc-multiple-procfs TEST_GEN_PROGS +=3D proc-fsconfig-hidepid +TEST_GEN_PROGS +=3D proc-pidns =20 include ../lib.mk diff --git a/tools/testing/selftests/proc/proc-pidns.c b/tools/testing/self= tests/proc/proc-pidns.c new file mode 100644 index 000000000000..f7dd80a2c150 --- /dev/null +++ b/tools/testing/selftests/proc/proc-pidns.c @@ -0,0 +1,315 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai + * Copyright (C) 2025 SUSE LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" + +#define ASSERT_ERRNO(expected, _t, seen) \ + __EXPECT(expected, #expected, \ + ({__typeof__(seen) _tmp_seen =3D (seen); \ + _tmp_seen >=3D 0 ? _tmp_seen : -errno; }), #seen, _t, 1) + +#define ASSERT_ERRNO_EQ(expected, seen) \ + ASSERT_ERRNO(expected, =3D=3D, seen) + +#define ASSERT_SUCCESS(seen) \ + ASSERT_ERRNO(0, <=3D, seen) + +static int touch(char *path) +{ + int fd =3D open(path, O_WRONLY|O_CREAT|O_CLOEXEC, 0644); + if (fd < 0) + return -1; + return close(fd); +} + +FIXTURE(ns) +{ + int host_mntns, host_pidns; + int dummy_pidns; +}; + +FIXTURE_SETUP(ns) +{ + /* Stash the old mntns. */ + self->host_mntns =3D open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->host_mntns); + + /* Create a new mount namespace and make it private. */ + ASSERT_SUCCESS(unshare(CLONE_NEWNS)); + ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)); + + /* + * Create a proper tmpfs that we can use and will disappear once we + * leave this mntns. + */ + ASSERT_SUCCESS(mount("tmpfs", "/tmp", "tmpfs", 0, NULL)); + + /* + * Create a pidns we can use for later tests. We need to fork off a + * child so that we get a usable nsfd that we can bind-mount and open. + */ + ASSERT_SUCCESS(mkdir("/tmp/dummy", 0755)); + ASSERT_SUCCESS(touch("/tmp/dummy/pidns")); + ASSERT_SUCCESS(mkdir("/tmp/dummy/proc", 0755)); + + self->host_pidns =3D open("/proc/self/ns/pid", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->host_pidns); + ASSERT_SUCCESS(unshare(CLONE_NEWPID)); + + pid_t pid =3D fork(); + ASSERT_SUCCESS(pid); + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGKILL); + ASSERT_SUCCESS(mount("/proc/self/ns/pid", "/tmp/dummy/pidns", NULL, MS_B= IND, NULL)); + ASSERT_SUCCESS(mount("proc", "/tmp/dummy/proc", "proc", 0, NULL)); + exit(0); + } + + int wstatus; + ASSERT_EQ(waitpid(pid, &wstatus, 0), pid); + ASSERT_TRUE(WIFEXITED(wstatus)); + ASSERT_EQ(WEXITSTATUS(wstatus), 0); + + ASSERT_SUCCESS(setns(self->host_pidns, CLONE_NEWPID)); + + self->dummy_pidns =3D open("/tmp/dummy/pidns", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->dummy_pidns); +} + +FIXTURE_TEARDOWN(ns) +{ + ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS)); + ASSERT_SUCCESS(close(self->host_mntns)); + + ASSERT_SUCCESS(close(self->host_pidns)); + ASSERT_SUCCESS(close(self->dummy_pidns)); +} + +TEST_F(ns, pidns_mount_string_path) +{ + ASSERT_SUCCESS(mkdir("/tmp/proc-host", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc-host", "proc", 0, "pidns=3D/proc/= self/ns/pid")); + ASSERT_SUCCESS(access("/tmp/proc-host/self/", X_OK)); + + ASSERT_SUCCESS(mkdir("/tmp/proc-dummy", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc-dummy", "proc", 0, "pidns=3D/tmp/= dummy/pidns")); + ASSERT_ERRNO_EQ(-ENOENT, access("/tmp/proc-dummy/1/", X_OK)); + ASSERT_ERRNO_EQ(-ENOENT, access("/tmp/proc-dummy/self/", X_OK)); +} + +TEST_F(ns, pidns_fsconfig_string_path) +{ + int fsfd =3D fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy/p= idns", 0)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd =3D fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_fsconfig_fd) +{ + int fsfd =3D fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy= _pidns)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd =3D fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_ERRNO_EQ(-ENOENT, faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_reconfigure_remount) +{ + ASSERT_SUCCESS(mkdir("/tmp/proc", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc", "proc", 0, "")); + + ASSERT_SUCCESS(access("/tmp/proc/1/", X_OK)); + ASSERT_SUCCESS(access("/tmp/proc/self/", X_OK)); + + ASSERT_ERRNO_EQ(-EBUSY, mount(NULL, "/tmp/proc", NULL, MS_REMOUNT, "pidns= =3D/tmp/dummy/pidns")); + + ASSERT_SUCCESS(access("/tmp/proc/1/", X_OK)); + ASSERT_SUCCESS(access("/tmp/proc/self/", X_OK)); +} + +TEST_F(ns, pidns_reconfigure_fsconfig_string_path) +{ + int fsfd =3D fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd =3D fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_ERRNO_EQ(-EBUSY, fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tm= p/dummy/pidns", 0)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); = /* noop */ + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_reconfigure_fsconfig_fd) +{ + int fsfd =3D fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd =3D fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_ERRNO_EQ(-EBUSY, fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, se= lf->dummy_pidns)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); = /* noop */ + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +int is_same_inode(int fd1, int fd2) +{ + struct stat stat1, stat2; + + assert(fstat(fd1, &stat1) =3D=3D 0); + assert(fstat(fd2, &stat2) =3D=3D 0); + + return stat1.st_ino =3D=3D stat2.st_ino && stat1.st_dev =3D=3D stat2.st_d= ev; +} + +#define PROCFS_IOCTL_MAGIC 'f' +#define PROCFS_GET_PID_NAMESPACE _IO(PROCFS_IOCTL_MAGIC, 32) + +TEST_F(ns, host_get_pidns_ioctl) +{ + int procfs =3D open("/proc", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(procfs); + + int procfs_pidns =3D ioctl(procfs, PROCFS_GET_PID_NAMESPACE); + ASSERT_SUCCESS(procfs_pidns); + + ASSERT_TRUE(is_same_inode(self->host_pidns, procfs_pidns)); + ASSERT_FALSE(is_same_inode(self->dummy_pidns, procfs_pidns)); + + ASSERT_SUCCESS(close(procfs)); + ASSERT_SUCCESS(close(procfs_pidns)); +} + +TEST_F(ns, mount_implicit_get_pidns_ioctl) +{ + int procfs =3D open("/tmp/dummy/proc", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(procfs); + + int procfs_pidns =3D ioctl(procfs, PROCFS_GET_PID_NAMESPACE); + ASSERT_SUCCESS(procfs_pidns); + + ASSERT_FALSE(is_same_inode(self->host_pidns, procfs_pidns)); + ASSERT_TRUE(is_same_inode(self->dummy_pidns, procfs_pidns)); + + ASSERT_SUCCESS(close(procfs)); + ASSERT_SUCCESS(close(procfs_pidns)); +} + +TEST_F(ns, mount_pidns_get_pidns_ioctl) +{ + ASSERT_SUCCESS(mkdir("/tmp/proc-host", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc-host", "proc", 0, "pidns=3D/proc/= self/ns/pid")); + + int host_procfs =3D open("/tmp/proc-host", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(host_procfs); + int host_procfs_pidns =3D ioctl(host_procfs, PROCFS_GET_PID_NAMESPACE); + ASSERT_SUCCESS(host_procfs_pidns); + + ASSERT_TRUE(is_same_inode(self->host_pidns, host_procfs_pidns)); + ASSERT_FALSE(is_same_inode(self->dummy_pidns, host_procfs_pidns)); + + ASSERT_SUCCESS(mkdir("/tmp/proc-dummy", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc-dummy", "proc", 0, "pidns=3D/tmp/= dummy/pidns")); + + int dummy_procfs =3D open("/tmp/proc-dummy", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(dummy_procfs); + int dummy_procfs_pidns =3D ioctl(dummy_procfs, PROCFS_GET_PID_NAMESPACE); + ASSERT_SUCCESS(dummy_procfs_pidns); + + ASSERT_FALSE(is_same_inode(self->host_pidns, dummy_procfs_pidns)); + ASSERT_TRUE(is_same_inode(self->dummy_pidns, dummy_procfs_pidns)); + + ASSERT_SUCCESS(close(host_procfs)); + ASSERT_SUCCESS(close(host_procfs_pidns)); + ASSERT_SUCCESS(close(dummy_procfs)); + ASSERT_SUCCESS(close(dummy_procfs_pidns)); +} + +TEST_F(ns, fsconfig_pidns_get_pidns_ioctl) +{ + int fsfd =3D fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy= _pidns)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd =3D fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_SUCCESS(mountfd); + + /* fsmount returns an O_PATH, which ioctl(2) doesn't accept. */ + int new_mountfd =3D openat(mountfd, ".", O_RDONLY|O_DIRECTORY|O_CLOEXEC); + ASSERT_SUCCESS(new_mountfd); + + ASSERT_SUCCESS(close(mountfd)); + mountfd =3D -EBADF; + + int procfs_pidns =3D ioctl(new_mountfd, PROCFS_GET_PID_NAMESPACE); + ASSERT_SUCCESS(procfs_pidns); + + ASSERT_NE(self->dummy_pidns, procfs_pidns); + ASSERT_FALSE(is_same_inode(self->host_pidns, procfs_pidns)); + ASSERT_TRUE(is_same_inode(self->dummy_pidns, procfs_pidns)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(new_mountfd)); + ASSERT_SUCCESS(close(procfs_pidns)); +} + +TEST_HARNESS_MAIN --=20 2.50.1