From nobody Mon Oct 6 13:37:48 2025 Received: from mout-p-201.mailbox.org (mout-p-201.mailbox.org [80.241.56.171]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 29D21288536; Mon, 21 Jul 2025 08:44:52 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=80.241.56.171 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087494; cv=none; b=H/rL9xDoIQTHuBTfopAxavNbV92dzS/qGytgJclhCRn4xyhfdOVrpnESp04A8RK7+osY+U1CnVvFANKT1qdq1RI0ZDUfK9RRsHeOeIx2sIJvOqGwhXwIBiUnD261/v6o8XR6vnnGhEwflVL3FhSezXIYcrFJUwctihydDbQxcyo= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087494; c=relaxed/simple; bh=vNxZDH7Bhbz1a0HBJOn6Dz7tHFiiCT6i8sc1DF47H0Y=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=d9ek3NXr4L4TbT7ZboMboOFDojJOyB9AtbmZW09CIZ2hjlI58NVH7EOOlpcHQ9K5ASC9eY6c+6sg6HVq5f+YVQ1S+kk170426kJM78Vka7u6PLxje3HbTDBXCON63967L8mRkebmRJzlPO1TRChLD3NABp4azHJ6BvKGIgRmhmQ= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com; spf=pass smtp.mailfrom=cyphar.com; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b=Si3AZFYG; arc=none smtp.client-ip=80.241.56.171 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=cyphar.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b="Si3AZFYG" Received: from smtp2.mailbox.org (smtp2.mailbox.org [IPv6:2001:67c:2050:b231:465::2]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by mout-p-201.mailbox.org (Postfix) with ESMTPS id 4blv8x3nYLz9smq; Mon, 21 Jul 2025 10:44:49 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cyphar.com; s=MBO0001; t=1753087489; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=KlhX/YlPjk57dYLUv3DmyL/xzfHmw74JI5i75aK49y8=; b=Si3AZFYGrm1OjV4Ha7ykFtgaBuOJi/xuI71oagydQoofK76nG264wF8DKS9CZvhO4uFtvN iUMnVZRY2S1LuObRArvLGyMca2zNsPoZlMbSA3srPabFTnFDhvdfPRJdb70+N8RjAByCkg BGpO69oP8VebPY/AdRjPxeMhdluhlvPKPf9OkkTBClZFbwpautuoVCiq3s8UciuuMTJLKb gwp7sbflF/mSbsqp2Xvt66A0ZAb/SulazazcUBAM24wxyrSPeAodDQQ3Fh1FIVwnJFKY/P mieQzmH0t3HPQJ4zJ8JlOWtpIGMm5usUpTDuYVSiMsgE8EoN0CpU/6Pci0Ypiw== Authentication-Results: outgoing_mbo_mout; dkim=none; spf=pass (outgoing_mbo_mout: domain of cyphar@cyphar.com designates 2001:67c:2050:b231:465::2 as permitted sender) smtp.mailfrom=cyphar@cyphar.com From: Aleksa Sarai Date: Mon, 21 Jul 2025 18:44:11 +1000 Subject: [PATCH RFC 1/4] pidns: move is-ancestor logic to helper Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20250721-procfs-pidns-api-v1-1-5cd9007e512d@cyphar.com> References: <20250721-procfs-pidns-api-v1-0-5cd9007e512d@cyphar.com> In-Reply-To: <20250721-procfs-pidns-api-v1-0-5cd9007e512d@cyphar.com> To: Alexander Viro , Christian Brauner , Jan Kara , Jonathan Corbet , Shuah Khan Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-api@vger.kernel.org, linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org, Aleksa Sarai X-Developer-Signature: v=1; a=openpgp-sha256; l=2455; i=cyphar@cyphar.com; h=from:subject:message-id; bh=vNxZDH7Bhbz1a0HBJOn6Dz7tHFiiCT6i8sc1DF47H0Y=; b=owGbwMvMwCWmMf3Xpe0vXfIZT6slMWTU/n1foi34orx7cV2r49lTNu4Otaf66wK4PzCffa5Sv ldXcc+ljlIWBjEuBlkxRZZtfp6hm+YvvpL8aSUbzBxWJpAhDFycAjCRAD5GhklVUyLtxeVznI4X eL9O+FztmCZScvbTh2/KMdnqp+4xyjMyTNTmdHb4ueXQr+ri5Z7njqUqJL9NOuy5yEzg3ofnJ+o /MAMA X-Developer-Key: i=cyphar@cyphar.com; a=openpgp; fpr=C9C370B246B09F6DBCFC744C34401015D1D2D386 X-Rspamd-Queue-Id: 4blv8x3nYLz9smq This check will be needed in later patches, and there's no point open-coding it each time. Signed-off-by: Aleksa Sarai --- include/linux/pid_namespace.h | 9 +++++++++ kernel/pid_namespace.c | 21 ++++++++++++++------- 2 files changed, 23 insertions(+), 7 deletions(-) diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 7c67a5811199..df421a1e3e0b 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -84,6 +84,9 @@ extern void zap_pid_ns_processes(struct pid_namespace *pi= d_ns); extern int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd); extern void put_pid_ns(struct pid_namespace *ns); =20 +extern bool pidns_is_ancestor(struct pid_namespace *child, + struct pid_namespace *ancestor); + #else /* !CONFIG_PID_NS */ #include =20 @@ -118,6 +121,12 @@ static inline int reboot_pid_ns(struct pid_namespace *= pid_ns, int cmd) { return 0; } + +bool pidns_is_ancestor(struct pid_namespace *child, + struct pid_namespace *ancestor) +{ + return false; +} #endif /* CONFIG_PID_NS */ =20 extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 7098ed44e717..5719b1f679ad 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -390,6 +390,19 @@ static void pidns_put(struct ns_common *ns) put_pid_ns(to_pid_ns(ns)); } =20 +bool pidns_is_ancestor(struct pid_namespace *child, + struct pid_namespace *ancestor) +{ + struct pid_namespace *ns; + + if (child->level < ancestor->level) + return false; + for (ns =3D child; ns->level > ancestor->level; ns =3D ns->parent) + ; + return ns =3D=3D ancestor; +} +EXPORT_SYMBOL_GPL(pidns_is_ancestor); + static int pidns_install(struct nsset *nsset, struct ns_common *ns) { struct nsproxy *nsproxy =3D nsset->nsproxy; @@ -408,13 +421,7 @@ static int pidns_install(struct nsset *nsset, struct n= s_common *ns) * this maintains the property that processes and their * children can not escape their current pid namespace. */ - if (new->level < active->level) - return -EINVAL; - - ancestor =3D new; - while (ancestor->level > active->level) - ancestor =3D ancestor->parent; - if (ancestor !=3D active) + if (!pidns_is_ancestor(new, active)) return -EINVAL; =20 put_pid_ns(nsproxy->pid_ns_for_children); --=20 2.50.0 From nobody Mon Oct 6 13:37:48 2025 Received: from mout-p-201.mailbox.org (mout-p-201.mailbox.org [80.241.56.171]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 56F4628725A; Mon, 21 Jul 2025 08:44:59 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=80.241.56.171 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087501; cv=none; b=NjBCGYQLuRvwsWwrCl8nWZBn/ZimTBpwGerHry/0lNgBCNucx01QNEws2THKpH2DwLyRiGop+qSVU2E6EL82J7cGAwQ+kqP9xMbO+4Am4N1Mc5iz92s0LuKZqgHQ3ufHOwOPrsljUttC7t79t2InclJKQtLM5kt5zm6DbVzGPLI= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087501; c=relaxed/simple; bh=AEvujwBUjRTmHReBUwGHeNu6tYPvFq7/igA3xCswtkY=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=gq9Ys+lQWjBSsiOoDwxX/+5QTHQLRL384x8SNZfFp3TgszK1RRQKRPeZasS6hEUFb3hWvIKpc4wmVUBUbofi4MLBW5vZlZeQMP4rOlf+wjdtIUykrSVcSLZiKETpyqMVYMyFSPYysbK6LW3GO+dhpM6JAZK7sTr1wJaLrT4X0sU= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com; spf=pass smtp.mailfrom=cyphar.com; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b=C8pHkJ+A; arc=none smtp.client-ip=80.241.56.171 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=cyphar.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b="C8pHkJ+A" Received: from smtp2.mailbox.org (smtp2.mailbox.org [IPv6:2001:67c:2050:b231:465::2]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by mout-p-201.mailbox.org (Postfix) with ESMTPS id 4blv93734kz9st4; Mon, 21 Jul 2025 10:44:55 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cyphar.com; s=MBO0001; t=1753087496; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=sKANVQjFFkeVoyvMJ4VBA7OL+5rWrKofQzXQlRR9R/0=; b=C8pHkJ+Awd/FYZ4yOxQAcf1Uv6XS8s9brBayCd143sYWU9yMPhnFhPNq3Un980FvQNQUeR k27hbfOLAquSu57Xattp7LplP1UouOpcYpW36IV5REkJBIEQtBT6cIidNd5OXZ8jiUAZay /mw3D0X23+fqOq0Pzh8zQqZW+KlAt9MJsdCsG8hMe2SccsSFdWomJ0uJ0IVUSOTClv9Dp4 xKUPvpCFj4winaycviD1ZtDKzuYXipjtOfGb5Xkmo0ZZj3Jy4FFSm/uGCcLZ5mJMhhhtHx PzZuA+fiH3nuCLJwrfjbubBCMNJsIcBGvhu8//Og+dEeoIBGUAQE+6EYvqHENg== Authentication-Results: outgoing_mbo_mout; dkim=none; spf=pass (outgoing_mbo_mout: domain of cyphar@cyphar.com designates 2001:67c:2050:b231:465::2 as permitted sender) smtp.mailfrom=cyphar@cyphar.com From: Aleksa Sarai Date: Mon, 21 Jul 2025 18:44:12 +1000 Subject: [PATCH RFC 2/4] procfs: add pidns= mount option Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20250721-procfs-pidns-api-v1-2-5cd9007e512d@cyphar.com> References: <20250721-procfs-pidns-api-v1-0-5cd9007e512d@cyphar.com> In-Reply-To: <20250721-procfs-pidns-api-v1-0-5cd9007e512d@cyphar.com> To: Alexander Viro , Christian Brauner , Jan Kara , Jonathan Corbet , Shuah Khan Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-api@vger.kernel.org, linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org, Aleksa Sarai X-Developer-Signature: v=1; a=openpgp-sha256; l=5544; i=cyphar@cyphar.com; h=from:subject:message-id; bh=AEvujwBUjRTmHReBUwGHeNu6tYPvFq7/igA3xCswtkY=; b=owGbwMvMwCWmMf3Xpe0vXfIZT6slMWTU/v2Qr/PwJhd71N87f5L+lsvnWLpqt/35VVizVv8P/ 6YaubSLHaUsDGJcDLJiiizb/DxDN81ffCX500o2mDmsTCBDGLg4BWAi9e8YGZ60FxnIBt+8U6wZ s/vUmy3pjn2mV7Z9M3IqKxBhEOL1/snwP3BHGUvThJAbj0tVNkpMCRV9N7V3v8D1E58jAudOip8 yhQkA X-Developer-Key: i=cyphar@cyphar.com; a=openpgp; fpr=C9C370B246B09F6DBCFC744C34401015D1D2D386 X-Rspamd-Queue-Id: 4blv93734kz9st4 Since the introduction of pid namespaces, their interaction with procfs has been entirely implicit in ways that require a lot of dancing around by programs that need to construct sandboxes with different PID namespaces. Being able to explicitly specify the pid namespace to use when constructing a procfs super block will allow programs to no longer need to fork off a process which does then does unshare(2) / setns(2) and forks again in order to construct a procfs in a pidns. Signed-off-by: Aleksa Sarai --- Documentation/filesystems/proc.rst | 6 +++ fs/proc/root.c | 80 ++++++++++++++++++++++++++++++++++= +--- 2 files changed, 80 insertions(+), 6 deletions(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems= /proc.rst index 5236cb52e357..c520b9f8a3fd 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -2360,6 +2360,7 @@ The following mount options are supported: hidepid=3D Set /proc// access mode. gid=3D Set the group authorized to learn processes information. subset=3D Show only the specified subset of procfs. + pidns=3D Specify a the namespace used by this procfs. =3D=3D=3D=3D=3D=3D=3D=3D=3D =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D =20 hidepid=3Doff or hidepid=3D0 means classic mode - everybody may access all @@ -2392,6 +2393,11 @@ information about processes information, just add id= entd to this group. subset=3Dpid hides all top level files and directories in the procfs that are not related to tasks. =20 +pidns=3D specifies a pid namespace (either as a string path to something l= ike +`/proc/$pid/ns/pid`, or a file descriptor when using `FSCONFIG_SET_FD`) th= at +will be used by the procfs instance when translating pids. By default, pro= cfs +will use the calling process's active pid namespace. + Chapter 5: Filesystem behavior =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D =20 diff --git a/fs/proc/root.c b/fs/proc/root.c index ed86ac710384..10ca94be0eef 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -38,12 +38,14 @@ enum proc_param { Opt_gid, Opt_hidepid, Opt_subset, + Opt_pidns, }; =20 static const struct fs_parameter_spec proc_fs_parameters[] =3D { - fsparam_u32("gid", Opt_gid), + fsparam_u32("gid", Opt_gid), fsparam_string("hidepid", Opt_hidepid), fsparam_string("subset", Opt_subset), + fsparam_file_or_string("pidns", Opt_pidns), {} }; =20 @@ -109,11 +111,65 @@ static int proc_parse_subset_param(struct fs_context = *fc, char *value) return 0; } =20 +static int proc_parse_pidns_param(struct fs_context *fc, + struct fs_parameter *param, + struct fs_parse_result *result) +{ + struct proc_fs_context *ctx =3D fc->fs_private; + struct pid_namespace *target, *active =3D task_active_pid_ns(current); + struct ns_common *ns; + struct file *ns_filp __free(fput) =3D NULL; + + switch (param->type) { + case fs_value_is_file: + /* came throug fsconfig, steal the file reference */ + ns_filp =3D param->file; + param->file =3D NULL; + break; + case fs_value_is_string: + ns_filp =3D filp_open(param->string, O_RDONLY, 0); + break; + default: + WARN_ON_ONCE(true); + break; + } + if (!ns_filp) + ns_filp =3D ERR_PTR(-EBADF); + if (IS_ERR(ns_filp)) { + errorfc(fc, "could not get file from pidns argument"); + return PTR_ERR(ns_filp); + } + + if (!proc_ns_file(ns_filp)) + return invalfc(fc, "pidns argument is not an nsfs file"); + ns =3D get_proc_ns(file_inode(ns_filp)); + if (ns->ops->type !=3D CLONE_NEWPID) + return invalfc(fc, "pidns argument is not a pidns file"); + target =3D container_of(ns, struct pid_namespace, ns); + + /* + * pidns=3D is shorthand for joining the pidns to get a fsopen fd, so the + * permission model should be the same as pidns_install(). + */ + if (!ns_capable(target->user_ns, CAP_SYS_ADMIN)) { + errorfc(fc, "insufficient permissions to set pidns"); + return -EPERM; + } + if (!pidns_is_ancestor(target, active)) + return invalfc(fc, "cannot set pidns to non-descendant pidns"); + + put_pid_ns(ctx->pid_ns); + ctx->pid_ns =3D get_pid_ns(target); + put_user_ns(fc->user_ns); + fc->user_ns =3D get_user_ns(ctx->pid_ns->user_ns); + return 0; +} + static int proc_parse_param(struct fs_context *fc, struct fs_parameter *pa= ram) { struct proc_fs_context *ctx =3D fc->fs_private; struct fs_parse_result result; - int opt; + int opt, err; =20 opt =3D fs_parse(fc, proc_fs_parameters, param, &result); if (opt < 0) @@ -125,13 +181,21 @@ static int proc_parse_param(struct fs_context *fc, st= ruct fs_parameter *param) break; =20 case Opt_hidepid: - if (proc_parse_hidepid_param(fc, param)) - return -EINVAL; + err =3D proc_parse_hidepid_param(fc, param); + if (err) + return err; break; =20 case Opt_subset: - if (proc_parse_subset_param(fc, param->string) < 0) - return -EINVAL; + err =3D proc_parse_subset_param(fc, param->string); + if (err) + return err; + break; + + case Opt_pidns: + err =3D proc_parse_pidns_param(fc, param, &result); + if (err) + return err; break; =20 default: @@ -154,6 +218,10 @@ static void proc_apply_options(struct proc_fs_info *fs= _info, fs_info->hide_pid =3D ctx->hidepid; if (ctx->mask & (1 << Opt_subset)) fs_info->pidonly =3D ctx->pidonly; + if (ctx->mask & (1 << Opt_pidns)) { + put_pid_ns(fs_info->pid_ns); + fs_info->pid_ns =3D get_pid_ns(ctx->pid_ns); + } } =20 static int proc_fill_super(struct super_block *s, struct fs_context *fc) --=20 2.50.0 From nobody Mon Oct 6 13:37:48 2025 Received: from mout-p-202.mailbox.org (mout-p-202.mailbox.org [80.241.56.172]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 7119428983B; Mon, 21 Jul 2025 08:45:05 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=80.241.56.172 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087507; cv=none; b=Sczv/hn1AAGNwLSD3XMrZPMysHKQG5R0UFmG/gTHkcDqJIt00lFaeAF1BPU744oUTkKsQh5zyORzjzLESGdv7r/Dc+5TZRDdCxMIz1Q/2zyzZ8j5qYSYot2HUnpaXr//IbR16vmchToP9Bv1ZKnuejaBqPoUCEumv4w3nUqlH2o= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087507; c=relaxed/simple; bh=VDUvhCyC74lM9Wn9zyyY7zzmasB5eWfkNk+iC9mxqfM=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=EY6DEFjH22544usgMEb1zeyqojFA+a3XsLnG3Tjbu20vyaCh+t730Xyj/ePUL7GAU8x6yg52NC47I5AsshWWt1cPIalZQHx1jGa9mTdMfZ5//PpiQZh9yeuD7x/HVuXpeyMW+GFC/Jsgprxy4wOz/5nwwy103Tc5LStX/U2+ApI= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com; spf=pass smtp.mailfrom=cyphar.com; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b=kv5YexTN; arc=none smtp.client-ip=80.241.56.172 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=cyphar.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b="kv5YexTN" Received: from smtp2.mailbox.org (smtp2.mailbox.org [IPv6:2001:67c:2050:b231:465::2]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by mout-p-202.mailbox.org (Postfix) with ESMTPS id 4blv995h4Sz9spn; Mon, 21 Jul 2025 10:45:01 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cyphar.com; s=MBO0001; t=1753087501; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=VeipYVnusBOwdX61rjwa47m9ibpcdMacHHxPihfaDm8=; b=kv5YexTNwCM3tRGGO3uyUB6GteAnuDJPur89GZ4fdCP3deIYQYnzvC0KLwkLtwTUuec36i OPb09CP346CRcoaJRIUC+gpXN1Bo28HW34W/JVXPaZu3dwIqKNx9cKpMERosga84s/rfHQ k7akG9KVlp4HzH1cMJK3fT+axMkXksu5lAWiL9p0hA6oeKH4l7i4KoQrCtbIrfnrhVeHL/ 4jOyXoTL6sIKT81zdOfj1W9BeNfoo7nxxxfJj37AKQ+XT01MypQLkZG6atTrzhd36LTiHW CNXCbzn12IgP9dD7sgLxLJhvssLP+AgTQhWC3krdtjKT/piMUJmqXePyVWy3Rw== Authentication-Results: outgoing_mbo_mout; dkim=none; spf=pass (outgoing_mbo_mout: domain of cyphar@cyphar.com designates 2001:67c:2050:b231:465::2 as permitted sender) smtp.mailfrom=cyphar@cyphar.com From: Aleksa Sarai Date: Mon, 21 Jul 2025 18:44:13 +1000 Subject: [PATCH RFC 3/4] procfs: add PROCFS_GET_PID_NAMESPACE ioctl Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20250721-procfs-pidns-api-v1-3-5cd9007e512d@cyphar.com> References: <20250721-procfs-pidns-api-v1-0-5cd9007e512d@cyphar.com> In-Reply-To: <20250721-procfs-pidns-api-v1-0-5cd9007e512d@cyphar.com> To: Alexander Viro , Christian Brauner , Jan Kara , Jonathan Corbet , Shuah Khan Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-api@vger.kernel.org, linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org, Aleksa Sarai X-Developer-Signature: v=1; a=openpgp-sha256; l=4941; i=cyphar@cyphar.com; h=from:subject:message-id; bh=VDUvhCyC74lM9Wn9zyyY7zzmasB5eWfkNk+iC9mxqfM=; b=owGbwMvMwCWmMf3Xpe0vXfIZT6slMWTU/v0wj31npZ+X6u/Kuszn3cGfLLyjV2hc5wpc9cHt+ 80Ke42+jlIWBjEuBlkxRZZtfp6hm+YvvpL8aSUbzBxWJpAhDFycAjCRX/sY/rtk6Mb+sTm5JTjY RO9E6pRGGaXzMzkkepOmhb+/N0n5uxHD/+SWuWkT2HdELL5wj2HS859M0p7vbggJL3utVSiV8uK jBy8A X-Developer-Key: i=cyphar@cyphar.com; a=openpgp; fpr=C9C370B246B09F6DBCFC744C34401015D1D2D386 X-Rspamd-Queue-Id: 4blv995h4Sz9spn /proc has historically had very opaque semantics about PID namespaces, which is a little unfortunate for container runtimes and other programs that deal with switching namespaces very often. One common issue is that of converting between PIDs in the process's namespace and PIDs in the namespace of /proc. In principle, it is possible to do this today by opening a pidfd with pidfd_open(2) and then looking at /proc/self/fdinfo/$n (which will contain a PID value translated to the pid namespace associated with that procfs superblock). However, allocating a new file for each PID to be converted is less than ideal for programs that may need to scan procfs, and it is generally useful for userspace to be able to finally get this information from procfs. This also acts as a sister feature to the pidns=3D mount option, finally allowing userspace full control of the pid namespaces associated with /proc instances. Signed-off-by: Aleksa Sarai --- Documentation/filesystems/proc.rst | 4 +++ fs/proc/root.c | 52 ++++++++++++++++++++++++++++++++++= ++-- include/uapi/linux/fs.h | 3 +++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems= /proc.rst index c520b9f8a3fd..506383273c9d 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -2398,6 +2398,10 @@ pidns=3D specifies a pid namespace (either as a stri= ng path to something like will be used by the procfs instance when translating pids. By default, pro= cfs will use the calling process's active pid namespace. =20 +Processes can check which pid namespace is used by a procfs instance by us= ing +the `PROCFS_GET_PID_NAMESPACE` ioctl() on the root directory of the procfs +instance. + Chapter 5: Filesystem behavior =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D= =3D=3D=3D=3D=3D=3D =20 diff --git a/fs/proc/root.c b/fs/proc/root.c index 10ca94be0eef..ee90749ccd8e 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -23,8 +23,10 @@ #include #include #include +#include =20 #include "internal.h" +#include "../internal.h" =20 struct proc_fs_context { struct pid_namespace *pid_ns; @@ -408,15 +410,61 @@ static int proc_root_readdir(struct file *file, struc= t dir_context *ctx) return proc_pid_readdir(file, ctx); } =20 +static long int proc_root_ioctl(struct file *filp, unsigned int cmd, unsig= ned long arg) +{ + switch (cmd) { + case PROCFS_GET_PID_NAMESPACE: { + struct pid_namespace *active =3D task_active_pid_ns(current); + struct pid_namespace *ns =3D proc_pid_ns(file_inode(filp)->i_sb); + bool can_access_pidns =3D false; + + /* + * If we are in an ancestors of the pidns, or have join + * privileges (CAP_SYS_ADMIN), then it makes sense that we + * would be able to grab a handle to the pidns. + * + * Otherwise, if there is a root process, then being able to + * access /proc/$pid/ns/pid is equivalent to this ioctl and so + * we should probably match the permission model. For empty + * namespaces it seems unlikely for there to be a downside to + * allowing unprivileged users to open a handle to it (setns + * will fail for unprivileged users anyway). + */ + can_access_pidns =3D pidns_is_ancestor(ns, active) || + ns_capable(ns->user_ns, CAP_SYS_ADMIN); + if (!can_access_pidns) { + bool cannot_ptrace_pid1 =3D false; + + read_lock(&tasklist_lock); + if (ns->child_reaper) + cannot_ptrace_pid1 =3D ptrace_may_access(ns->child_reaper, + PTRACE_MODE_READ_FSCREDS); + read_unlock(&tasklist_lock); + can_access_pidns =3D !cannot_ptrace_pid1; + } + if (!can_access_pidns) + return -EPERM; + + /* open_namespace() unconditionally consumes the reference. */ + get_pid_ns(ns); + return open_namespace(to_ns_common(ns)); + } + default: + return -ENOIOCTLCMD; + } +} + /* * The root /proc directory is special, as it has the * directories. Thus we don't use the generic * directory handling functions for that.. */ static const struct file_operations proc_root_operations =3D { - .read =3D generic_read_dir, - .iterate_shared =3D proc_root_readdir, + .read =3D generic_read_dir, + .iterate_shared =3D proc_root_readdir, .llseek =3D generic_file_llseek, + .unlocked_ioctl =3D proc_root_ioctl, + .compat_ioctl =3D compat_ptr_ioctl, }; =20 /* diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 0bd678a4a10e..aa642cb48feb 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -437,6 +437,9 @@ typedef int __bitwise __kernel_rwf_t; =20 #define PROCFS_IOCTL_MAGIC 'f' =20 +/* procfs root ioctls */ +#define PROCFS_GET_PID_NAMESPACE _IO(PROCFS_IOCTL_MAGIC, 1) + /* Pagemap ioctl */ #define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg) =20 --=20 2.50.0 From nobody Mon Oct 6 13:37:48 2025 Received: from mout-p-102.mailbox.org (mout-p-102.mailbox.org [80.241.56.152]) (using TLSv1.2 with cipher ECDHE-RSA-AES256-GCM-SHA384 (256/256 bits)) (No client certificate requested) by smtp.subspace.kernel.org (Postfix) with ESMTPS id 8B6021ABED9; Mon, 21 Jul 2025 08:45:11 +0000 (UTC) Authentication-Results: smtp.subspace.kernel.org; arc=none smtp.client-ip=80.241.56.152 ARC-Seal: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087513; cv=none; b=MeIBihIk3wFEve5dbuP3u4+VkQBrhuDxBQYRA/S24/EsYoX3KoC0+OTwB7coTMsfrfUwRUNIlbDLqG0Rc4hH3auGQ9KwRsYjorWZkCHQyiWGjCcJCASajNhlZ6OdGovbh6P/wOJ7XIkuXVWL1GE4FZDbEfrGZYO9jiPZNW3iWUw= ARC-Message-Signature: i=1; a=rsa-sha256; d=subspace.kernel.org; s=arc-20240116; t=1753087513; c=relaxed/simple; bh=iERN6Xzk9sHzsB4pLCPzg+cd0rkOPiJ6NJagUizg4D0=; h=From:Date:Subject:MIME-Version:Content-Type:Message-Id:References: In-Reply-To:To:Cc; b=u/ROAqnRgOUL3e0fGP/PB8epJ+FetmpBSnYHN2+y2NyNL1nbazIk3UBUXrvUg4AWgiNvVAX86+mOKMggHI/T1jjjJSGkkHfSEtAWeFAVQpwu3rsz7srpg4PKw2Tu47tHLM+/DdBh3l2D/2IrqnLgyH4sVaEQK+/nEcV3Mu3Kcto= ARC-Authentication-Results: i=1; smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com; spf=pass smtp.mailfrom=cyphar.com; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b=Uv1zY+Mw; arc=none smtp.client-ip=80.241.56.152 Authentication-Results: smtp.subspace.kernel.org; dmarc=pass (p=reject dis=none) header.from=cyphar.com Authentication-Results: smtp.subspace.kernel.org; spf=pass smtp.mailfrom=cyphar.com Authentication-Results: smtp.subspace.kernel.org; dkim=pass (2048-bit key) header.d=cyphar.com header.i=@cyphar.com header.b="Uv1zY+Mw" Received: from smtp2.mailbox.org (smtp2.mailbox.org [IPv6:2001:67c:2050:b231:465::2]) (using TLSv1.3 with cipher TLS_AES_256_GCM_SHA384 (256/256 bits) key-exchange X25519 server-signature RSA-PSS (4096 bits) server-digest SHA256) (No client certificate requested) by mout-p-102.mailbox.org (Postfix) with ESMTPS id 4blv9H6kGwz9tJm; Mon, 21 Jul 2025 10:45:07 +0200 (CEST) DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=cyphar.com; s=MBO0001; t=1753087508; h=from:from:reply-to:subject:subject:date:date:message-id:message-id: to:to:cc:cc:mime-version:mime-version:content-type:content-type: content-transfer-encoding:content-transfer-encoding: in-reply-to:in-reply-to:references:references; bh=Ga146VfUnbYMyYkmA+aailWJBUlJlGizk/gmO5Hyyfg=; b=Uv1zY+MwXnDcLR14+LPmtIhODIUR+30RApR7k08bh1MtOoRhHr9SDbGVboFMLkQ6cDQN5m 32aSS/m74x+blPhDqFrd45G63FIqUwul6jnrW1eOe2jDXDzeXp6TS+Aay2/8xiB1f4Cogh bOf/P3PO5dSTUDyqDlN6hPH7YmzQq+hYfvyfvH285ZqfQxj0idhPi+N46BHpyfpjfljRQ3 x2ZYZ1j/RZi9iDKOlqYUdxfEDGZcw6fqYjl3HernEsYNyQSDVnErsKb77sRrnZ+xjSqKox lSc85pTV4YzLLyozYLApWG3t5nOXzrvbVTh8fTGVk/CH/hjfRoLThV+AZjS1/Q== Authentication-Results: outgoing_mbo_mout; dkim=none; spf=pass (outgoing_mbo_mout: domain of cyphar@cyphar.com designates 2001:67c:2050:b231:465::2 as permitted sender) smtp.mailfrom=cyphar@cyphar.com From: Aleksa Sarai Date: Mon, 21 Jul 2025 18:44:14 +1000 Subject: [PATCH RFC 4/4] selftests/proc: add tests for new pidns APIs Precedence: bulk X-Mailing-List: linux-kernel@vger.kernel.org List-Id: List-Subscribe: List-Unsubscribe: MIME-Version: 1.0 Content-Type: text/plain; charset="utf-8" Content-Transfer-Encoding: quoted-printable Message-Id: <20250721-procfs-pidns-api-v1-4-5cd9007e512d@cyphar.com> References: <20250721-procfs-pidns-api-v1-0-5cd9007e512d@cyphar.com> In-Reply-To: <20250721-procfs-pidns-api-v1-0-5cd9007e512d@cyphar.com> To: Alexander Viro , Christian Brauner , Jan Kara , Jonathan Corbet , Shuah Khan Cc: linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org, linux-api@vger.kernel.org, linux-doc@vger.kernel.org, linux-kselftest@vger.kernel.org, Aleksa Sarai X-Developer-Signature: v=1; a=openpgp-sha256; l=9962; i=cyphar@cyphar.com; h=from:subject:message-id; bh=iERN6Xzk9sHzsB4pLCPzg+cd0rkOPiJ6NJagUizg4D0=; b=owGbwMvMwCWmMf3Xpe0vXfIZT6slMWTU/v3AyfntrMi9mMzomFoPHcHe5KXLfzZs14s9+/aOd +uryrRLHaUsDGJcDLJiiizb/DxDN81ffCX500o2mDmsTCBDGLg4BWAiIpsZGb66x8puMvv3XVhS Ys153pjrq2ab3H08YW9uWo1TleS+3XWMDGeWPav+U9qoy+zgcTXU4UGdxp2fvmsulBr+dzaeOo1 DhQMA X-Developer-Key: i=cyphar@cyphar.com; a=openpgp; fpr=C9C370B246B09F6DBCFC744C34401015D1D2D386 X-Rspamd-Queue-Id: 4blv9H6kGwz9tJm Signed-off-by: Aleksa Sarai --- tools/testing/selftests/proc/.gitignore | 1 + tools/testing/selftests/proc/Makefile | 1 + tools/testing/selftests/proc/proc-pidns.c | 286 ++++++++++++++++++++++++++= ++++ 3 files changed, 288 insertions(+) diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selfte= sts/proc/.gitignore index 973968f45bba..2dced03e9e0e 100644 --- a/tools/testing/selftests/proc/.gitignore +++ b/tools/testing/selftests/proc/.gitignore @@ -17,6 +17,7 @@ /proc-tid0 /proc-uptime-001 /proc-uptime-002 +/proc-pidns /read /self /setns-dcache diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftest= s/proc/Makefile index b12921b9794b..c6f7046b9860 100644 --- a/tools/testing/selftests/proc/Makefile +++ b/tools/testing/selftests/proc/Makefile @@ -27,5 +27,6 @@ TEST_GEN_PROGS +=3D setns-sysvipc TEST_GEN_PROGS +=3D thread-self TEST_GEN_PROGS +=3D proc-multiple-procfs TEST_GEN_PROGS +=3D proc-fsconfig-hidepid +TEST_GEN_PROGS +=3D proc-pidns =20 include ../lib.mk diff --git a/tools/testing/selftests/proc/proc-pidns.c b/tools/testing/self= tests/proc/proc-pidns.c new file mode 100644 index 000000000000..e7e34c78d383 --- /dev/null +++ b/tools/testing/selftests/proc/proc-pidns.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai + * Copyright (C) 2025 SUSE LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" + +#define bail(fmt, ...) \ + do { \ + fprintf(stderr, fmt ": %m", __VA_ARGS__); \ + exit(1); \ + } while (0) + +#define ASSERT_SUCCESS ASSERT_FALSE +#define ASSERT_FAIL ASSERT_TRUE + +int touch(char *path) +{ + int fd =3D open(path, O_WRONLY|O_CREAT|O_CLOEXEC, 0644); + if (fd < 0 || close(fd) < 0) + return -errno; + return 0; +} + +FIXTURE(ns) +{ + int host_mntns, host_pidns; + int dummy_pidns; +}; + +FIXTURE_SETUP(ns) +{ + /* Stash the old mntns. */ + self->host_mntns =3D open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC); + ASSERT_GE(self->host_mntns, 0); + + /* Create a new mount namespace and make it private. */ + ASSERT_SUCCESS(unshare(CLONE_NEWNS)); + ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)); + + /* + * Create a proper tmpfs that we can use and will disappear once we + * leave this mntns. + */ + ASSERT_SUCCESS(mount("tmpfs", "/tmp", "tmpfs", 0, NULL)); + + /* + * Create a pidns we can use for later tests. We need to fork off a + * child so that we get a usable nsfd that we can bind-mount and open. + */ + ASSERT_SUCCESS(touch("/tmp/dummy-pidns")); + + self->host_pidns =3D open("/proc/self/ns/pid", O_RDONLY|O_CLOEXEC); + ASSERT_GE(self->host_pidns, 0); + ASSERT_SUCCESS(unshare(CLONE_NEWPID)); + + pid_t pid =3D fork(); + ASSERT_GE(pid, 0); + if (!pid) { + prctl(PR_SET_PDEATHSIG, SIGKILL); + ASSERT_SUCCESS(mount("/proc/self/ns/pid", "/tmp/dummy-pidns", NULL, MS_B= IND, 0)); + exit(0); + } + + int wstatus; + ASSERT_EQ(waitpid(pid, &wstatus, 0), pid); + ASSERT_TRUE(WIFEXITED(wstatus)); + ASSERT_EQ(WEXITSTATUS(wstatus), 0); + + ASSERT_SUCCESS(setns(self->host_pidns, CLONE_NEWPID)); + + self->dummy_pidns =3D open("/tmp/dummy-pidns", O_RDONLY|O_CLOEXEC); + ASSERT_GE(self->dummy_pidns, 0); +} + +FIXTURE_TEARDOWN(ns) +{ + ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS)); + ASSERT_SUCCESS(close(self->host_mntns)); + + ASSERT_SUCCESS(close(self->host_pidns)); + ASSERT_SUCCESS(close(self->dummy_pidns)); +} + +TEST_F(ns, pidns_mount_string_path) +{ + ASSERT_SUCCESS(mkdir("/tmp/proc-host", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc-host", "proc", 0, "pidns=3D/proc/= self/ns/pid")); + ASSERT_SUCCESS(access("/tmp/proc-host/self/", X_OK)); + + ASSERT_SUCCESS(mkdir("/tmp/proc-dummy", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc-dummy", "proc", 0, "pidns=3D/tmp/= dummy-pidns")); + ASSERT_FAIL(access("/tmp/proc-dummy/1/", X_OK)); + ASSERT_FAIL(access("/tmp/proc-dummy/self/", X_OK)); +} + +TEST_F(ns, pidns_fsconfig_string_path) +{ + int fsfd =3D fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_GE(fsfd, 0); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy-p= idns", 0)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd =3D fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_GE(mountfd, 0); + + ASSERT_FAIL(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_FAIL(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_fsconfig_fd) +{ + int fsfd =3D fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_GE(fsfd, 0); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy= _pidns)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd =3D fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_GE(mountfd, 0); + + ASSERT_FAIL(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_FAIL(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_reconfigure_remount) +{ + ASSERT_SUCCESS(mkdir("/tmp/proc", 0755)); + ASSERT_SUCCESS(mount("proc", "/tmp/proc", "proc", 0, "")); + ASSERT_SUCCESS(access("/tmp/proc/self/", X_OK)); + + ASSERT_SUCCESS(mount(NULL, "/tmp/proc", NULL, MS_REMOUNT, "pidns=3D/tmp/d= ummy-pidns")); + ASSERT_FAIL(access("/tmp/proc/self/", X_OK)); +} + +TEST_F(ns, pidns_reconfigure_fsconfig_string_path) +{ + int fsfd =3D fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_GE(fsfd, 0); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd =3D fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_GE(mountfd, 0); + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy-p= idns", 0)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); + + ASSERT_FAIL(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_FAIL(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +TEST_F(ns, pidns_reconfigure_fsconfig_fd) +{ + int fsfd =3D fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_GE(fsfd, 0); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd =3D fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_GE(mountfd, 0); + + ASSERT_SUCCESS(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_SUCCESS(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy= _pidns)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); + + ASSERT_FAIL(faccessat(mountfd, "1/", X_OK, 0)); + ASSERT_FAIL(faccessat(mountfd, "self/", X_OK, 0)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(mountfd)); +} + +int is_same_inode(int fd1, int fd2) +{ + struct stat stat1, stat2; + + assert(fstat(fd1, &stat1) =3D=3D 0); + assert(fstat(fd2, &stat2) =3D=3D 0); + + return stat1.st_ino =3D=3D stat2.st_ino && stat1.st_dev =3D=3D stat2.st_d= ev; +} + +#define PROCFS_IOCTL_MAGIC 'f' +#define PROCFS_GET_PID_NAMESPACE _IO(PROCFS_IOCTL_MAGIC, 1) + +TEST_F(ns, get_pidns_ioctl) +{ + int fsfd =3D fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_GE(fsfd, 0); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_FD, "pidns", NULL, self->dummy= _pidns)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd =3D fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_GE(mountfd, 0); + + /* fsmount returns an O_PATH, which ioctl(2) doesn't accept. */ + int new_mountfd =3D openat(mountfd, ".", O_RDONLY|O_DIRECTORY|O_CLOEXEC); + ASSERT_GE(new_mountfd, 0); + + ASSERT_SUCCESS(close(mountfd)); + mountfd =3D -EBADF; + + int procfs_pidns =3D ioctl(new_mountfd, PROCFS_GET_PID_NAMESPACE); + ASSERT_GE(procfs_pidns, 0); + + ASSERT_NE(self->dummy_pidns, procfs_pidns); + ASSERT_FALSE(is_same_inode(self->host_pidns, procfs_pidns)); + ASSERT_TRUE(is_same_inode(self->dummy_pidns, procfs_pidns)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(new_mountfd)); + ASSERT_SUCCESS(close(procfs_pidns)); +} + +TEST_F(ns, reconfigure_get_pidns_ioctl) +{ + int fsfd =3D fsopen("proc", FSOPEN_CLOEXEC); + ASSERT_GE(fsfd, 0); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + + int mountfd =3D fsmount(fsfd, FSMOUNT_CLOEXEC, 0); + ASSERT_GE(mountfd, 0); + + /* fsmount returns an O_PATH, which ioctl(2) doesn't accept. */ + int new_mountfd =3D openat(mountfd, ".", O_RDONLY|O_DIRECTORY|O_CLOEXEC); + ASSERT_GE(new_mountfd, 0); + + ASSERT_SUCCESS(close(mountfd)); + mountfd =3D -EBADF; + + int procfs_pidns1 =3D ioctl(new_mountfd, PROCFS_GET_PID_NAMESPACE); + ASSERT_GE(procfs_pidns1, 0); + + ASSERT_NE(self->dummy_pidns, procfs_pidns1); + ASSERT_TRUE(is_same_inode(self->host_pidns, procfs_pidns1)); + ASSERT_FALSE(is_same_inode(self->dummy_pidns, procfs_pidns1)); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_SET_STRING, "pidns", "/tmp/dummy-p= idns", 0)); + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0)); + + int procfs_pidns2 =3D ioctl(new_mountfd, PROCFS_GET_PID_NAMESPACE); + ASSERT_GE(procfs_pidns2, 0); + + ASSERT_NE(self->dummy_pidns, procfs_pidns2); + ASSERT_FALSE(is_same_inode(self->host_pidns, procfs_pidns2)); + ASSERT_TRUE(is_same_inode(self->dummy_pidns, procfs_pidns2)); + + ASSERT_SUCCESS(close(fsfd)); + ASSERT_SUCCESS(close(new_mountfd)); + ASSERT_SUCCESS(close(procfs_pidns1)); + ASSERT_SUCCESS(close(procfs_pidns2)); +} + +TEST_HARNESS_MAIN --=20 2.50.0