[PATCH] pidfd: add ioctl to retrieve pid info

luca.boccassi@gmail.com posted 1 patch 1 month, 3 weeks ago
There is a newer version of this series
fs/pidfs.c                                    | 61 ++++++++++++++++++-
include/uapi/linux/pidfd.h                    | 17 ++++++
.../testing/selftests/pidfd/pidfd_open_test.c | 50 ++++++++++++++-
3 files changed, 126 insertions(+), 2 deletions(-)
[PATCH] pidfd: add ioctl to retrieve pid info
Posted by luca.boccassi@gmail.com 1 month, 3 weeks ago
From: Luca Boccassi <bluca@debian.org>

A common pattern when using pid fds is having to get information
about the process, which currently requires /proc being mounted,
resolving the fd to a pid, and then do manual string parsing of
/proc/N/status and friends. This needs to be reimplemented over
and over in all userspace projects (e.g.: I have reimplemented
resolving in systemd, dbus, dbus-daemon, polkit so far), and
requires additional care in checking that the fd is still valid
after having parsed the data, to avoid races.

Having a programmatic API that can be used directly removes all
these requirements, including having /proc mounted.

As discussed at LPC24, add an ioctl with an extensible struct
so that more parameters can be added later if needed. Start with
exposing: pid, uid, gid, groupid, security label (the latter was
requested by the LSM maintainer).

Signed-off-by: Luca Boccassi <bluca@debian.org>
---
 fs/pidfs.c                                    | 61 ++++++++++++++++++-
 include/uapi/linux/pidfd.h                    | 17 ++++++
 .../testing/selftests/pidfd/pidfd_open_test.c | 50 ++++++++++++++-
 3 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/fs/pidfs.c b/fs/pidfs.c
index 7ffdc88dfb52..dd386d37309c 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -114,6 +114,62 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
 	return poll_flags;
 }
 
+static long pidfd_info(struct task_struct *task, struct pid *pid, unsigned long arg)
+{
+	struct pidfd_info uinfo = {}, info = {};
+
+	if (copy_from_user(&uinfo, (struct pidfd_info *)arg, sizeof(struct pidfd_info)))
+		return -EFAULT;
+	if (uinfo.size > sizeof(struct pidfd_info))
+		return -E2BIG;
+	if (uinfo.size < sizeof(struct pidfd_info))
+		return -EINVAL; /* First version, no smaller struct possible */
+
+	if (uinfo.request_mask & ~(PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID | PIDFD_INFO_SECURITY_CONTEXT))
+		return -EINVAL;
+
+	memcpy(&info, &uinfo, uinfo.size);
+
+	if (uinfo.request_mask & PIDFD_INFO_PID)
+		info.pid = pid_nr_ns(pid, task_active_pid_ns(task));
+
+	if (uinfo.request_mask & PIDFD_INFO_CREDS) {
+		const struct cred *c = get_task_cred(task);
+		if (!c)
+			return -ESRCH;
+
+		info.uid = from_kuid_munged(current_user_ns(), c->uid);
+		info.gid = from_kgid_munged(current_user_ns(), c->gid);
+	}
+
+	if (uinfo.request_mask & PIDFD_INFO_CGROUPID) {
+		struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup;
+		if (!cgrp)
+			return -ENODEV;
+
+		info.cgroupid = cgroup_id(cgrp);
+	}
+
+	if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) {
+		char *secctx;
+		u32 secid, secctx_len;
+		const struct cred *c = get_task_cred(task);
+		if (!c)
+			return -ESRCH;
+
+		security_cred_getsecid(c, &secid);
+		if (security_secid_to_secctx(secid, &secctx, &secctx_len))
+			return -EFAULT;
+
+		memcpy(info.security_context, secctx, min_t(u32, secctx_len, NAME_MAX-1));
+	}
+
+	if (copy_to_user((void __user *)arg, &info, uinfo.size))
+		return -EFAULT;
+
+	return 0;
+}
+
 static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct task_struct *task __free(put_task) = NULL;
@@ -121,13 +177,16 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	struct pid *pid = pidfd_pid(file);
 	struct ns_common *ns_common = NULL;
 
-	if (arg)
+	if (!!arg != (cmd == PIDFD_GET_INFO))
 		return -EINVAL;
 
 	task = get_pid_task(pid, PIDTYPE_PID);
 	if (!task)
 		return -ESRCH;
 
+	if (cmd == PIDFD_GET_INFO)
+		return pidfd_info(task, pid, arg);
+
 	scoped_guard(task_lock, task) {
 		nsp = task->nsproxy;
 		if (nsp)
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 565fc0629fff..bfd0965e01f3 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -16,6 +16,22 @@
 #define PIDFD_SIGNAL_THREAD_GROUP	(1UL << 1)
 #define PIDFD_SIGNAL_PROCESS_GROUP	(1UL << 2)
 
+/* Flags for pidfd_info. */
+#define PIDFD_INFO_PID	    	        (1UL << 0)
+#define PIDFD_INFO_CREDS	            (1UL << 1)
+#define PIDFD_INFO_CGROUPID	            (1UL << 2)
+#define PIDFD_INFO_SECURITY_CONTEXT	    (1UL << 3)
+
+struct pidfd_info {
+        __u64 request_mask;
+        __u32 size;
+        uint pid;
+        uint uid;
+        uint gid;
+        __u64 cgroupid;
+        char security_context[NAME_MAX];
+} __packed;
+
 #define PIDFS_IOCTL_MAGIC 0xFF
 
 #define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
@@ -28,5 +44,6 @@
 #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
 #define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
 #define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
+#define PIDFD_GET_INFO                        _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
 
 #endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index c62564c264b1..929588c7e0f0 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -13,6 +13,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <syscall.h>
+#include <sys/ioctl.h>
 #include <sys/mount.h>
 #include <sys/prctl.h>
 #include <sys/wait.h>
@@ -21,6 +22,28 @@
 #include "pidfd.h"
 #include "../kselftest.h"
 
+#ifndef PIDFS_IOCTL_MAGIC
+#define PIDFS_IOCTL_MAGIC 0xFF
+#endif
+
+#ifndef PIDFD_GET_INFO
+#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
+#define PIDFD_INFO_PID                  (1UL << 0)
+#define PIDFD_INFO_CREDS                (1UL << 1)
+#define PIDFD_INFO_CGROUPID	            (1UL << 2)
+#define PIDFD_INFO_SECURITY_CONTEXT	    (1UL << 3)
+
+struct pidfd_info {
+        __u64 request_mask;
+        __u32 size;
+        uint pid;
+        uint uid;
+        uint gid;
+        __u64 cgroupid;
+        char security_context[NAME_MAX];
+} __attribute__((__packed__));
+#endif
+
 static int safe_int(const char *numstr, int *converted)
 {
 	char *err = NULL;
@@ -120,10 +143,14 @@ static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen)
 
 int main(int argc, char **argv)
 {
+	struct pidfd_info info = {
+		.size = sizeof(struct pidfd_info),
+		.request_mask = PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID,
+	};
 	int pidfd = -1, ret = 1;
 	pid_t pid;
 
-	ksft_set_plan(3);
+	ksft_set_plan(4);
 
 	pidfd = sys_pidfd_open(-1, 0);
 	if (pidfd >= 0) {
@@ -153,6 +180,27 @@ int main(int argc, char **argv)
 	pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1);
 	ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid);
 
+	if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) {
+		ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno));
+		goto on_error;
+	}
+	if (info.pid != pid) {
+		ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n",
+			       pid, info.pid);
+		goto on_error;
+	}
+	if (info.uid != getuid()) {
+		ksft_print_msg("uid %d does not match uid from info %d\n",
+			       getuid(), info.uid);
+		goto on_error;
+	}
+	if (info.gid != getgid()) {
+		ksft_print_msg("gid %d does not match gid from info %d\n",
+			       getgid(), info.gid);
+		goto on_error;
+	}
+	ksft_test_result_pass("get info from pidfd test: passed\n");
+
 	ret = 0;
 
 on_error:
-- 
2.45.2
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Christian Brauner 1 month, 3 weeks ago
On Wed, Oct 02, 2024 at 03:24:33PM GMT, luca.boccassi@gmail.com wrote:
> From: Luca Boccassi <bluca@debian.org>
> 
> A common pattern when using pid fds is having to get information
> about the process, which currently requires /proc being mounted,
> resolving the fd to a pid, and then do manual string parsing of
> /proc/N/status and friends. This needs to be reimplemented over
> and over in all userspace projects (e.g.: I have reimplemented
> resolving in systemd, dbus, dbus-daemon, polkit so far), and
> requires additional care in checking that the fd is still valid
> after having parsed the data, to avoid races.
> 
> Having a programmatic API that can be used directly removes all
> these requirements, including having /proc mounted.

Yes, thanks for working on that.

> 
> As discussed at LPC24, add an ioctl with an extensible struct
> so that more parameters can be added later if needed. Start with
> exposing: pid, uid, gid, groupid, security label (the latter was
> requested by the LSM maintainer).
> 
> Signed-off-by: Luca Boccassi <bluca@debian.org>
> ---
>  fs/pidfs.c                                    | 61 ++++++++++++++++++-
>  include/uapi/linux/pidfd.h                    | 17 ++++++
>  .../testing/selftests/pidfd/pidfd_open_test.c | 50 ++++++++++++++-
>  3 files changed, 126 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/pidfs.c b/fs/pidfs.c
> index 7ffdc88dfb52..dd386d37309c 100644
> --- a/fs/pidfs.c
> +++ b/fs/pidfs.c
> @@ -114,6 +114,62 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
>  	return poll_flags;
>  }
>  
> +static long pidfd_info(struct task_struct *task, struct pid *pid, unsigned long arg)
> +{
> +	struct pidfd_info uinfo = {}, info = {};
> +
> +	if (copy_from_user(&uinfo, (struct pidfd_info *)arg, sizeof(struct pidfd_info)))
> +		return -EFAULT;
> +	if (uinfo.size > sizeof(struct pidfd_info))
> +		return -E2BIG;

We want to allow for the struct passed down to be bigger than what the
kernel knows so older kernels can handle newer structs just fine. So
this check needs to go.

> +	if (uinfo.size < sizeof(struct pidfd_info))
> +		return -EINVAL; /* First version, no smaller struct possible */
> +
> +	if (uinfo.request_mask & ~(PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID | PIDFD_INFO_SECURITY_CONTEXT))
> +		return -EINVAL;

This means you'll fail hard when requesting information that older
kernels don't provide. For a request mask based interface the correct
protocol is to answer in a result_mask what information the kernel was
actually able to provide. This way you get seamless forward and backward
compatibility without having the caller to guess what extension the
kernel doesn't support.

> +
> +	memcpy(&info, &uinfo, uinfo.size);
> +
> +	if (uinfo.request_mask & PIDFD_INFO_PID)

You should drop this and return basic pid information unconditionally.

> +		info.pid = pid_nr_ns(pid, task_active_pid_ns(task));

I think this is wrong what this should return is the pid of the process
as seen from the caller's pid namespace. Otherwise you'll report
meaningless pid numbers to the caller when the caller's pid namespace is
outside of the pid namespace hierarchy of the process that pidfd refers
to. This can easily happen when you receive pidfds via SCM_RIGHTS.

The other question is what you want to return here. What you did above
is equivalent to what pid_vnr() does. IIRC, then this will yield the
thread-group id in pidfd_info->pid if the pidfd is a thread-group leader
pidfd and the thread-id if the pidfd is a PIDFD_THREAD. While the caller
should be able to infer that from the type of pidfd I think we should
just have a unique meaning for the pid field.

It would probably even make sense to have:

(1) thread_id
(2) thread_group_id
(3) parent_id

> +	if (uinfo.request_mask & PIDFD_INFO_CREDS) {

You should drop this and return basic uid/gid information unconditionally.

Also the uid and gid field maybe should be named ruid and rgid because
sooner or later someone will want euid/egid and suid/sgid and
fsuid/fsgid.

In fact, we might want to probably return ruid/rgid, euid/egid,
suid/sgid, fsuid/fsgid unconditionally.

I suspect that stuff like supplementary groups should rather be reported
through an extra ioctl instead of having a variable sized array in the
struct.

> +		const struct cred *c = get_task_cred(task);
> +		if (!c)
> +			return -ESRCH;
> +
> +		info.uid = from_kuid_munged(current_user_ns(), c->uid);
> +		info.gid = from_kgid_munged(current_user_ns(), c->gid);
> +	}
> +
> +	if (uinfo.request_mask & PIDFD_INFO_CGROUPID) {
> +		struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup;
> +		if (!cgrp)
> +			return -ENODEV;
> +
> +		info.cgroupid = cgroup_id(cgrp);
> +	}
> +
> +	if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) {

It would make sense for security information to get a separate ioctl so
that struct pidfd_info just return simple and fast information and the
security stuff can include things such as seccomp, caps etc pp.

> +		char *secctx;
> +		u32 secid, secctx_len;
> +		const struct cred *c = get_task_cred(task);
> +		if (!c)
> +			return -ESRCH;
> +
> +		security_cred_getsecid(c, &secid);
> +		if (security_secid_to_secctx(secid, &secctx, &secctx_len))
> +			return -EFAULT;
> +
> +		memcpy(info.security_context, secctx, min_t(u32, secctx_len, NAME_MAX-1));
> +	}
> +
> +	if (copy_to_user((void __user *)arg, &info, uinfo.size))
> +		return -EFAULT;
> +
> +	return 0;
> +}
> +
>  static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>  {
>  	struct task_struct *task __free(put_task) = NULL;
> @@ -121,13 +177,16 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
>  	struct pid *pid = pidfd_pid(file);
>  	struct ns_common *ns_common = NULL;
>  
> -	if (arg)
> +	if (!!arg != (cmd == PIDFD_GET_INFO))
>  		return -EINVAL;
>  
>  	task = get_pid_task(pid, PIDTYPE_PID);
>  	if (!task)
>  		return -ESRCH;
>  
> +	if (cmd == PIDFD_GET_INFO)
> +		return pidfd_info(task, pid, arg);

So, please take a look at nsfs or look at seccomp. The gist is that in
order to keep this extensible we don't match on the ioctl command
directly because it encodes the size of the argument instead we do:

        /* extensible ioctls */
        switch (_IOC_NR(cmd)) {
        case _IOC_NR(PIDFD_GET_INFO): {
                struct pidfd_info kinfo = {};
                struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
                size_t usize = _IOC_SIZE(cmd);
 
                if (!uinfo)
                        return -EINVAL;
 
                if (usize < PIDFD_INFO_SIZE_VER0)
                        return -EINVAL;
 
        /* fill in kinfo information */
        kinfo->ruid ...
        kinfo->cgroup_id ...
        kinfo->result_mask ...
 
        /*
         * If userspace and the kernel have the same struct size it can just
         * be copied. If userspace provides an older struct, only the bits that
         * userspace knows about will be copied. If userspace provides a new
         * struct, only the bits that the kernel knows about will be copied and
         * the size value will be set to the size the kernel knows about.
         */
        if (copy_to_user(uinfo, kinfo, min(usize, sizeof(*kinfo))))
                return -EFAULT;

> +
>  	scoped_guard(task_lock, task) {
>  		nsp = task->nsproxy;
>  		if (nsp)
> diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
> index 565fc0629fff..bfd0965e01f3 100644
> --- a/include/uapi/linux/pidfd.h
> +++ b/include/uapi/linux/pidfd.h
> @@ -16,6 +16,22 @@
>  #define PIDFD_SIGNAL_THREAD_GROUP	(1UL << 1)
>  #define PIDFD_SIGNAL_PROCESS_GROUP	(1UL << 2)
>  
> +/* Flags for pidfd_info. */
> +#define PIDFD_INFO_PID	    	        (1UL << 0)
> +#define PIDFD_INFO_CREDS	            (1UL << 1)
> +#define PIDFD_INFO_CGROUPID	            (1UL << 2)
> +#define PIDFD_INFO_SECURITY_CONTEXT	    (1UL << 3)
> +
> +struct pidfd_info {
> +        __u64 request_mask;
> +        __u32 size;
> +        uint pid;

The size is unnecessary because it is directly encoded into the ioctl
command.

> +        uint uid;
> +        uint gid;
> +        __u64 cgroupid;
> +        char security_context[NAME_MAX];
> +} __packed;

The packed attribute should be unnecessary. The structure should simply
be correctly padded and should use explicitly sized types:

struct pidfd_info {
	/* Let userspace request expensive stuff explictly. */
	__u64 request_mask;
	/* And let the kernel indicate whether it knows about it. */
	__u64 result_mask;
	__u32 pid;
	__u32 uid;
	__u32 gid;
	__u64 cgroup_id;
	__u32 spare0[1];
};

I'm not sure what LSM info to be put in there and we can just do it as
an extension.

> +
>  #define PIDFS_IOCTL_MAGIC 0xFF
>  
>  #define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
> @@ -28,5 +44,6 @@
>  #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
>  #define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
>  #define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
> +#define PIDFD_GET_INFO                        _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
>  
>  #endif /* _UAPI_LINUX_PIDFD_H */
> diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
> index c62564c264b1..929588c7e0f0 100644
> --- a/tools/testing/selftests/pidfd/pidfd_open_test.c
> +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
> @@ -13,6 +13,7 @@
>  #include <stdlib.h>
>  #include <string.h>
>  #include <syscall.h>
> +#include <sys/ioctl.h>
>  #include <sys/mount.h>
>  #include <sys/prctl.h>
>  #include <sys/wait.h>
> @@ -21,6 +22,28 @@
>  #include "pidfd.h"
>  #include "../kselftest.h"
>  
> +#ifndef PIDFS_IOCTL_MAGIC
> +#define PIDFS_IOCTL_MAGIC 0xFF
> +#endif
> +
> +#ifndef PIDFD_GET_INFO
> +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
> +#define PIDFD_INFO_PID                  (1UL << 0)
> +#define PIDFD_INFO_CREDS                (1UL << 1)
> +#define PIDFD_INFO_CGROUPID	            (1UL << 2)
> +#define PIDFD_INFO_SECURITY_CONTEXT	    (1UL << 3)
> +
> +struct pidfd_info {
> +        __u64 request_mask;
> +        __u32 size;
> +        uint pid;
> +        uint uid;
> +        uint gid;
> +        __u64 cgroupid;
> +        char security_context[NAME_MAX];
> +} __attribute__((__packed__));
> +#endif
> +
>  static int safe_int(const char *numstr, int *converted)
>  {
>  	char *err = NULL;
> @@ -120,10 +143,14 @@ static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen)
>  
>  int main(int argc, char **argv)
>  {
> +	struct pidfd_info info = {
> +		.size = sizeof(struct pidfd_info),
> +		.request_mask = PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID,
> +	};
>  	int pidfd = -1, ret = 1;
>  	pid_t pid;
>  
> -	ksft_set_plan(3);
> +	ksft_set_plan(4);
>  
>  	pidfd = sys_pidfd_open(-1, 0);
>  	if (pidfd >= 0) {
> @@ -153,6 +180,27 @@ int main(int argc, char **argv)
>  	pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1);
>  	ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid);
>  
> +	if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) {
> +		ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno));
> +		goto on_error;
> +	}
> +	if (info.pid != pid) {
> +		ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n",
> +			       pid, info.pid);
> +		goto on_error;
> +	}
> +	if (info.uid != getuid()) {
> +		ksft_print_msg("uid %d does not match uid from info %d\n",
> +			       getuid(), info.uid);
> +		goto on_error;
> +	}
> +	if (info.gid != getgid()) {
> +		ksft_print_msg("gid %d does not match gid from info %d\n",
> +			       getgid(), info.gid);
> +		goto on_error;
> +	}
> +	ksft_test_result_pass("get info from pidfd test: passed\n");
> +
>  	ret = 0;
>  
>  on_error:
> -- 
> 2.45.2
>
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Josh Triplett 1 month, 3 weeks ago
Christian Brauner wrote:
> struct pidfd_info {
>	/* Let userspace request expensive stuff explictly. */
>	__u64 request_mask;
>	/* And let the kernel indicate whether it knows about it. */
>	__u64 result_mask;

I don't think it's necessary to have these two fields separate. The kernel should write to the same mask field userspace used.

In theory there could be an operation to probe for *everything* the kernel understands, but in practice with a binary structure there's little point finding out about flags you don't know the corresponding structure bits for.
RE: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by David Laight 1 month, 3 weeks ago
...
> It would make sense for security information to get a separate ioctl so
> that struct pidfd_info just return simple and fast information and the
> security stuff can include things such as seccomp, caps etc pp.

Which also means it is pointless having the two bitmasks.
They just complicate things unnecessarily.

What you might want to do is have the kernel return the size
of the structure it would fill in (probably as the first field).
Also have the kernel fill (probably with zeros) the end of the
user buffer it didn't write to.
The ioctl is then an IOR() one.

Mind you, if you dig into the history (possibly of SYSV and/or BSD)
you'll find a structure the kernel filled with the info 'ps' needs.
The text based code that Linux uses is probably more extensible.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Luca Boccassi 1 month, 3 weeks ago
On Fri, 4 Oct 2024 at 10:29, Christian Brauner <brauner@kernel.org> wrote:
>
> On Wed, Oct 02, 2024 at 03:24:33PM GMT, luca.boccassi@gmail.com wrote:
> > +             info.pid = pid_nr_ns(pid, task_active_pid_ns(task));
>
> I think this is wrong what this should return is the pid of the process
> as seen from the caller's pid namespace. Otherwise you'll report
> meaningless pid numbers to the caller when the caller's pid namespace is
> outside of the pid namespace hierarchy of the process that pidfd refers
> to. This can easily happen when you receive pidfds via SCM_RIGHTS.

Thanks for the review, I applied the rest of the comments in v2 (I
think at least), but for this one I can't tell, how should I do it?
Naively I thought that task_active_pid_ns(task) would already fulfil
this requirement and resolve it using the correct pid namespace, is
that not the case? If not, what else should I do here?
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Oleg Nesterov 1 month, 3 weeks ago
I wasn't CC'ed, so I didn't see the patch, but looking at Christian's
reply ...

On 10/04, Luca Boccassi wrote:
> On Fri, 4 Oct 2024 at 10:29, Christian Brauner <brauner@kernel.org> wrote:
> >
> > On Wed, Oct 02, 2024 at 03:24:33PM GMT, luca.boccassi@gmail.com wrote:
> > > +             info.pid = pid_nr_ns(pid, task_active_pid_ns(task));
> >
> > I think this is wrong what this should return is the pid of the process
> > as seen from the caller's pid namespace.

Agreed,

> Thanks for the review, I applied the rest of the comments in v2 (I
> think at least), but for this one I can't tell, how should I do it?

I guess Christian meant you should simply use

		info.pid = task_pid_vnr(task);

task_pid_vnr(task) returns the task's pid in the caller's namespace.

Oleg.
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Luca Boccassi 1 month, 3 weeks ago
On Fri, 4 Oct 2024 at 20:30, Oleg Nesterov <oleg@redhat.com> wrote:
>
> I wasn't CC'ed, so I didn't see the patch, but looking at Christian's
> reply ...
>
> On 10/04, Luca Boccassi wrote:
> > On Fri, 4 Oct 2024 at 10:29, Christian Brauner <brauner@kernel.org> wrote:
> > >
> > > On Wed, Oct 02, 2024 at 03:24:33PM GMT, luca.boccassi@gmail.com wrote:
> > > > +             info.pid = pid_nr_ns(pid, task_active_pid_ns(task));
> > >
> > > I think this is wrong what this should return is the pid of the process
> > > as seen from the caller's pid namespace.
>
> Agreed,
>
> > Thanks for the review, I applied the rest of the comments in v2 (I
> > think at least), but for this one I can't tell, how should I do it?
>
> I guess Christian meant you should simply use
>
>                 info.pid = task_pid_vnr(task);
>
> task_pid_vnr(task) returns the task's pid in the caller's namespace.

Ah I see, I didn't realize there was a difference, sent v3 with the
suggested change just now, thanks.
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Oleg Nesterov 1 month, 3 weeks ago
On 10/04, Luca Boccassi wrote:
>
> On Fri, 4 Oct 2024 at 20:30, Oleg Nesterov <oleg@redhat.com> wrote:
> >
> > I guess Christian meant you should simply use
> >
> >                 info.pid = task_pid_vnr(task);
> >
> > task_pid_vnr(task) returns the task's pid in the caller's namespace.
>
> Ah I see, I didn't realize there was a difference, sent v3 with the
> suggested change just now, thanks.

I didn't get v3, I guess I wasn't cc'ed again.

So, just in case, let me add that task_pid_vnr(task) can return 0 if
this task exits after get_pid_task().

Perhaps this is fine, I do not know. But perhaps you should actually
use pid_vnr(pid).

Oleg.
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Luca Boccassi 1 month, 3 weeks ago
On Sat, 5 Oct 2024 at 12:29, Oleg Nesterov <oleg@redhat.com> wrote:
>
> On 10/04, Luca Boccassi wrote:
> >
> > On Fri, 4 Oct 2024 at 20:30, Oleg Nesterov <oleg@redhat.com> wrote:
> > >
> > > I guess Christian meant you should simply use
> > >
> > >                 info.pid = task_pid_vnr(task);
> > >
> > > task_pid_vnr(task) returns the task's pid in the caller's namespace.
> >
> > Ah I see, I didn't realize there was a difference, sent v3 with the
> > suggested change just now, thanks.
>
> I didn't get v3, I guess I wasn't cc'ed again.
>
> So, just in case, let me add that task_pid_vnr(task) can return 0 if
> this task exits after get_pid_task().
>
> Perhaps this is fine, I do not know. But perhaps you should actually
> use pid_vnr(pid).
>
> Oleg.

I have just sent v5 CC'ing you and adding a final check before the
copy to userspace, that returns ESRCH if the task has exited. This
should solve that issue, and also be future-proof against potential
additions that might slow down processing due to gathering more data
or so.
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Paul Moore 1 month, 3 weeks ago
On Fri, Oct 4, 2024 at 5:29 AM Christian Brauner <brauner@kernel.org> wrote:
> On Wed, Oct 02, 2024 at 03:24:33PM GMT, luca.boccassi@gmail.com wrote:
> > From: Luca Boccassi <bluca@debian.org>
> >
> > A common pattern when using pid fds is having to get information
> > about the process, which currently requires /proc being mounted,
> > resolving the fd to a pid, and then do manual string parsing of
> > /proc/N/status and friends. This needs to be reimplemented over
> > and over in all userspace projects (e.g.: I have reimplemented
> > resolving in systemd, dbus, dbus-daemon, polkit so far), and
> > requires additional care in checking that the fd is still valid
> > after having parsed the data, to avoid races.
> >
> > Having a programmatic API that can be used directly removes all
> > these requirements, including having /proc mounted.

...

> > +             const struct cred *c = get_task_cred(task);
> > +             if (!c)
> > +                     return -ESRCH;
> > +
> > +             info.uid = from_kuid_munged(current_user_ns(), c->uid);
> > +             info.gid = from_kgid_munged(current_user_ns(), c->gid);
> > +     }
> > +
> > +     if (uinfo.request_mask & PIDFD_INFO_CGROUPID) {
> > +             struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup;
> > +             if (!cgrp)
> > +                     return -ENODEV;
> > +
> > +             info.cgroupid = cgroup_id(cgrp);
> > +     }
> > +
> > +     if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) {
>
> It would make sense for security information to get a separate ioctl so
> that struct pidfd_info just return simple and fast information and the
> security stuff can include things such as seccomp, caps etc pp.

I'm okay with moving the security related info to a separate ioctl,
but I'd like to strongly request that it be merged at the same time as
the process ID related info.  It can be a separate patch as part of a
single patchset if you want to make the ID patch backport friendly for
distros, but I think we should treat the security related info with
the same importance as the ID info.

> > +struct pidfd_info {
> > +        __u64 request_mask;
> > +        __u32 size;
> > +        uint pid;
>
> The size is unnecessary because it is directly encoded into the ioctl
> command.
>
> > +        uint uid;
> > +        uint gid;
> > +        __u64 cgroupid;
> > +        char security_context[NAME_MAX];
> > +} __packed;
>
> The packed attribute should be unnecessary. The structure should simply
> be correctly padded and should use explicitly sized types:
>
> struct pidfd_info {
>         /* Let userspace request expensive stuff explictly. */
>         __u64 request_mask;
>         /* And let the kernel indicate whether it knows about it. */
>         __u64 result_mask;
>         __u32 pid;
>         __u32 uid;
>         __u32 gid;
>         __u64 cgroup_id;
>         __u32 spare0[1];
> };
>
> I'm not sure what LSM info to be put in there and we can just do it as
> an extension.

See my original response to Luca on October 2nd, you were on the To/CC line.

-- 
paul-moore.com
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by kernel test robot 1 month, 3 weeks ago
Hi,

kernel test robot noticed the following build warnings:

[auto build test WARNING on shuah-kselftest/next]
[also build test WARNING on shuah-kselftest/fixes linus/master v6.12-rc1 next-20241003]
[cannot apply to brauner-vfs/vfs.all]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/luca-boccassi-gmail-com/pidfd-add-ioctl-to-retrieve-pid-info/20241002-223302
base:   https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next
patch link:    https://lore.kernel.org/r/20241002142516.110567-1-luca.boccassi%40gmail.com
patch subject: [PATCH] pidfd: add ioctl to retrieve pid info
config: x86_64-randconfig-123-20241004 (https://download.01.org/0day-ci/archive/20241004/202410041128.tLVDbeJB-lkp@intel.com/config)
compiler: gcc-12 (Debian 12.2.0-14) 12.2.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241004/202410041128.tLVDbeJB-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410041128.tLVDbeJB-lkp@intel.com/

sparse warnings: (new ones prefixed by >>)
>> fs/pidfs.c:121:37: sparse: sparse: incorrect type in argument 2 (different address spaces) @@     expected void const [noderef] __user *from @@     got struct pidfd_info * @@
   fs/pidfs.c:121:37: sparse:     expected void const [noderef] __user *from
   fs/pidfs.c:121:37: sparse:     got struct pidfd_info *

vim +121 fs/pidfs.c

   116	
   117	static long pidfd_info(struct task_struct *task, struct pid *pid, unsigned long arg)
   118	{
   119		struct pidfd_info uinfo = {}, info = {};
   120	
 > 121		if (copy_from_user(&uinfo, (struct pidfd_info *)arg, sizeof(struct pidfd_info)))
   122			return -EFAULT;
   123		if (uinfo.size > sizeof(struct pidfd_info))
   124			return -E2BIG;
   125		if (uinfo.size < sizeof(struct pidfd_info))
   126			return -EINVAL; /* First version, no smaller struct possible */
   127	
   128		if (uinfo.request_mask & ~(PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID | PIDFD_INFO_SECURITY_CONTEXT))
   129			return -EINVAL;
   130	
   131		memcpy(&info, &uinfo, uinfo.size);
   132	
   133		if (uinfo.request_mask & PIDFD_INFO_PID)
   134			info.pid = pid_nr_ns(pid, task_active_pid_ns(task));
   135	
   136		if (uinfo.request_mask & PIDFD_INFO_CREDS) {
   137			const struct cred *c = get_task_cred(task);
   138			if (!c)
   139				return -ESRCH;
   140	
   141			info.uid = from_kuid_munged(current_user_ns(), c->uid);
   142			info.gid = from_kgid_munged(current_user_ns(), c->gid);
   143		}
   144	
   145		if (uinfo.request_mask & PIDFD_INFO_CGROUPID) {
   146			struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup;
   147			if (!cgrp)
   148				return -ENODEV;
   149	
   150			info.cgroupid = cgroup_id(cgrp);
   151		}
   152	
   153		if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) {
   154			char *secctx;
   155			u32 secid, secctx_len;
   156			const struct cred *c = get_task_cred(task);
   157			if (!c)
   158				return -ESRCH;
   159	
   160			security_cred_getsecid(c, &secid);
   161			if (security_secid_to_secctx(secid, &secctx, &secctx_len))
   162				return -EFAULT;
   163	
   164			memcpy(info.security_context, secctx, min_t(u32, secctx_len, NAME_MAX-1));
   165		}
   166	
   167		if (copy_to_user((void __user *)arg, &info, uinfo.size))
   168			return -EFAULT;
   169	
   170		return 0;
   171	}
   172	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by kernel test robot 1 month, 3 weeks ago
Hi,

kernel test robot noticed the following build errors:

[auto build test ERROR on shuah-kselftest/next]
[also build test ERROR on shuah-kselftest/fixes linus/master v6.12-rc1 next-20241003]
[cannot apply to brauner-vfs/vfs.all]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/luca-boccassi-gmail-com/pidfd-add-ioctl-to-retrieve-pid-info/20241002-223302
base:   https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next
patch link:    https://lore.kernel.org/r/20241002142516.110567-1-luca.boccassi%40gmail.com
patch subject: [PATCH] pidfd: add ioctl to retrieve pid info
config: openrisc-allnoconfig (https://download.01.org/0day-ci/archive/20241004/202410040559.LZnpGpVU-lkp@intel.com/config)
compiler: or1k-linux-gcc (GCC) 14.1.0
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241004/202410040559.LZnpGpVU-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410040559.LZnpGpVU-lkp@intel.com/

All errors (new ones prefixed by >>):

   fs/pidfs.c: In function 'pidfd_info':
>> fs/pidfs.c:146:39: error: implicit declaration of function 'task_css_check' [-Wimplicit-function-declaration]
     146 |                 struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup;
         |                                       ^~~~~~~~~~~~~~
>> fs/pidfs.c:146:60: error: 'pids_cgrp_id' undeclared (first use in this function)
     146 |                 struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup;
         |                                                            ^~~~~~~~~~~~
   fs/pidfs.c:146:60: note: each undeclared identifier is reported only once for each function it appears in


vim +/task_css_check +146 fs/pidfs.c

   116	
   117	static long pidfd_info(struct task_struct *task, struct pid *pid, unsigned long arg)
   118	{
   119		struct pidfd_info uinfo = {}, info = {};
   120	
   121		if (copy_from_user(&uinfo, (struct pidfd_info *)arg, sizeof(struct pidfd_info)))
   122			return -EFAULT;
   123		if (uinfo.size > sizeof(struct pidfd_info))
   124			return -E2BIG;
   125		if (uinfo.size < sizeof(struct pidfd_info))
   126			return -EINVAL; /* First version, no smaller struct possible */
   127	
   128		if (uinfo.request_mask & ~(PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID | PIDFD_INFO_SECURITY_CONTEXT))
   129			return -EINVAL;
   130	
   131		memcpy(&info, &uinfo, uinfo.size);
   132	
   133		if (uinfo.request_mask & PIDFD_INFO_PID)
   134			info.pid = pid_nr_ns(pid, task_active_pid_ns(task));
   135	
   136		if (uinfo.request_mask & PIDFD_INFO_CREDS) {
   137			const struct cred *c = get_task_cred(task);
   138			if (!c)
   139				return -ESRCH;
   140	
   141			info.uid = from_kuid_munged(current_user_ns(), c->uid);
   142			info.gid = from_kgid_munged(current_user_ns(), c->gid);
   143		}
   144	
   145		if (uinfo.request_mask & PIDFD_INFO_CGROUPID) {
 > 146			struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup;
   147			if (!cgrp)
   148				return -ENODEV;
   149	
   150			info.cgroupid = cgroup_id(cgrp);
   151		}
   152	
   153		if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) {
   154			char *secctx;
   155			u32 secid, secctx_len;
   156			const struct cred *c = get_task_cred(task);
   157			if (!c)
   158				return -ESRCH;
   159	
   160			security_cred_getsecid(c, &secid);
   161			if (security_secid_to_secctx(secid, &secctx, &secctx_len))
   162				return -EFAULT;
   163	
   164			memcpy(info.security_context, secctx, min_t(u32, secctx_len, NAME_MAX-1));
   165		}
   166	
   167		if (copy_to_user((void __user *)arg, &info, uinfo.size))
   168			return -EFAULT;
   169	
   170		return 0;
   171	}
   172	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by kernel test robot 1 month, 3 weeks ago
Hi,

kernel test robot noticed the following build errors:

[auto build test ERROR on shuah-kselftest/next]
[also build test ERROR on shuah-kselftest/fixes linus/master v6.12-rc1 next-20241003]
[cannot apply to brauner-vfs/vfs.all]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch#_base_tree_information]

url:    https://github.com/intel-lab-lkp/linux/commits/luca-boccassi-gmail-com/pidfd-add-ioctl-to-retrieve-pid-info/20241002-223302
base:   https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next
patch link:    https://lore.kernel.org/r/20241002142516.110567-1-luca.boccassi%40gmail.com
patch subject: [PATCH] pidfd: add ioctl to retrieve pid info
config: x86_64-allnoconfig (https://download.01.org/0day-ci/archive/20241004/202410040539.j2SZpABt-lkp@intel.com/config)
compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff)
reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241004/202410040539.j2SZpABt-lkp@intel.com/reproduce)

If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <lkp@intel.com>
| Closes: https://lore.kernel.org/oe-kbuild-all/202410040539.j2SZpABt-lkp@intel.com/

All errors (new ones prefixed by >>):

>> fs/pidfs.c:146:25: error: call to undeclared function 'task_css_check'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
     146 |                 struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup;
         |                                       ^
>> fs/pidfs.c:146:46: error: use of undeclared identifier 'pids_cgrp_id'
     146 |                 struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup;
         |                                                            ^
   2 errors generated.


vim +/task_css_check +146 fs/pidfs.c

   116	
   117	static long pidfd_info(struct task_struct *task, struct pid *pid, unsigned long arg)
   118	{
   119		struct pidfd_info uinfo = {}, info = {};
   120	
   121		if (copy_from_user(&uinfo, (struct pidfd_info *)arg, sizeof(struct pidfd_info)))
   122			return -EFAULT;
   123		if (uinfo.size > sizeof(struct pidfd_info))
   124			return -E2BIG;
   125		if (uinfo.size < sizeof(struct pidfd_info))
   126			return -EINVAL; /* First version, no smaller struct possible */
   127	
   128		if (uinfo.request_mask & ~(PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID | PIDFD_INFO_SECURITY_CONTEXT))
   129			return -EINVAL;
   130	
   131		memcpy(&info, &uinfo, uinfo.size);
   132	
   133		if (uinfo.request_mask & PIDFD_INFO_PID)
   134			info.pid = pid_nr_ns(pid, task_active_pid_ns(task));
   135	
   136		if (uinfo.request_mask & PIDFD_INFO_CREDS) {
   137			const struct cred *c = get_task_cred(task);
   138			if (!c)
   139				return -ESRCH;
   140	
   141			info.uid = from_kuid_munged(current_user_ns(), c->uid);
   142			info.gid = from_kgid_munged(current_user_ns(), c->gid);
   143		}
   144	
   145		if (uinfo.request_mask & PIDFD_INFO_CGROUPID) {
 > 146			struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup;
   147			if (!cgrp)
   148				return -ENODEV;
   149	
   150			info.cgroupid = cgroup_id(cgrp);
   151		}
   152	
   153		if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) {
   154			char *secctx;
   155			u32 secid, secctx_len;
   156			const struct cred *c = get_task_cred(task);
   157			if (!c)
   158				return -ESRCH;
   159	
   160			security_cred_getsecid(c, &secid);
   161			if (security_secid_to_secctx(secid, &secctx, &secctx_len))
   162				return -EFAULT;
   163	
   164			memcpy(info.security_context, secctx, min_t(u32, secctx_len, NAME_MAX-1));
   165		}
   166	
   167		if (copy_to_user((void __user *)arg, &info, uinfo.size))
   168			return -EFAULT;
   169	
   170		return 0;
   171	}
   172	

-- 
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Paul Moore 1 month, 3 weeks ago
On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote:
>
> From: Luca Boccassi <bluca@debian.org>
>
> A common pattern when using pid fds is having to get information
> about the process, which currently requires /proc being mounted,
> resolving the fd to a pid, and then do manual string parsing of
> /proc/N/status and friends. This needs to be reimplemented over
> and over in all userspace projects (e.g.: I have reimplemented
> resolving in systemd, dbus, dbus-daemon, polkit so far), and
> requires additional care in checking that the fd is still valid
> after having parsed the data, to avoid races.
>
> Having a programmatic API that can be used directly removes all
> these requirements, including having /proc mounted.
>
> As discussed at LPC24, add an ioctl with an extensible struct
> so that more parameters can be added later if needed. Start with
> exposing: pid, uid, gid, groupid, security label (the latter was
> requested by the LSM maintainer).
>
> Signed-off-by: Luca Boccassi <bluca@debian.org>
> ---
>  fs/pidfs.c                                    | 61 ++++++++++++++++++-
>  include/uapi/linux/pidfd.h                    | 17 ++++++
>  .../testing/selftests/pidfd/pidfd_open_test.c | 50 ++++++++++++++-
>  3 files changed, 126 insertions(+), 2 deletions(-)

...

> diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
> index 565fc0629fff..bfd0965e01f3 100644
> --- a/include/uapi/linux/pidfd.h
> +++ b/include/uapi/linux/pidfd.h
> @@ -16,6 +16,22 @@
>  #define PIDFD_SIGNAL_THREAD_GROUP      (1UL << 1)
>  #define PIDFD_SIGNAL_PROCESS_GROUP     (1UL << 2)
>
> +/* Flags for pidfd_info. */
> +#define PIDFD_INFO_PID                 (1UL << 0)
> +#define PIDFD_INFO_CREDS                   (1UL << 1)
> +#define PIDFD_INFO_CGROUPID                (1UL << 2)
> +#define PIDFD_INFO_SECURITY_CONTEXT        (1UL << 3)
> +
> +struct pidfd_info {
> +        __u64 request_mask;
> +        __u32 size;
> +        uint pid;
> +        uint uid;
> +        uint gid;
> +        __u64 cgroupid;
> +        char security_context[NAME_MAX];

[NOTE: please CC the LSM list on changes like this]

Thanks Luca :)

With the addition of the LSM syscalls we've created a lsm_ctx struct
(see include/uapi/linux/lsm.h) that properly supports multiple LSMs.
The original char ptr "secctx" approach worked back when only a single
LSM was supported at any given time, but now that multiple LSMs are
supported we need something richer, and it would be good to use this
new struct in any new userspace API.

See the lsm_get_self_attr(2) syscall for an example (defined in
security/lsm_syscalls.c but effectively implemented via
security_getselfattr() in security/security.c).

> +} __packed;
> +
>  #define PIDFS_IOCTL_MAGIC 0xFF
>
>  #define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
> @@ -28,5 +44,6 @@
>  #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
>  #define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
>  #define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
> +#define PIDFD_GET_INFO                        _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
>
>  #endif /* _UAPI_LINUX_PIDFD_H */

-- 
paul-moore.com
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Luca Boccassi 1 month, 3 weeks ago
On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote:
>
> On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote:
> >
> > From: Luca Boccassi <bluca@debian.org>
> >
> > A common pattern when using pid fds is having to get information
> > about the process, which currently requires /proc being mounted,
> > resolving the fd to a pid, and then do manual string parsing of
> > /proc/N/status and friends. This needs to be reimplemented over
> > and over in all userspace projects (e.g.: I have reimplemented
> > resolving in systemd, dbus, dbus-daemon, polkit so far), and
> > requires additional care in checking that the fd is still valid
> > after having parsed the data, to avoid races.
> >
> > Having a programmatic API that can be used directly removes all
> > these requirements, including having /proc mounted.
> >
> > As discussed at LPC24, add an ioctl with an extensible struct
> > so that more parameters can be added later if needed. Start with
> > exposing: pid, uid, gid, groupid, security label (the latter was
> > requested by the LSM maintainer).
> >
> > Signed-off-by: Luca Boccassi <bluca@debian.org>
> > ---
> >  fs/pidfs.c                                    | 61 ++++++++++++++++++-
> >  include/uapi/linux/pidfd.h                    | 17 ++++++
> >  .../testing/selftests/pidfd/pidfd_open_test.c | 50 ++++++++++++++-
> >  3 files changed, 126 insertions(+), 2 deletions(-)
>
> ...
>
> > diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
> > index 565fc0629fff..bfd0965e01f3 100644
> > --- a/include/uapi/linux/pidfd.h
> > +++ b/include/uapi/linux/pidfd.h
> > @@ -16,6 +16,22 @@
> >  #define PIDFD_SIGNAL_THREAD_GROUP      (1UL << 1)
> >  #define PIDFD_SIGNAL_PROCESS_GROUP     (1UL << 2)
> >
> > +/* Flags for pidfd_info. */
> > +#define PIDFD_INFO_PID                 (1UL << 0)
> > +#define PIDFD_INFO_CREDS                   (1UL << 1)
> > +#define PIDFD_INFO_CGROUPID                (1UL << 2)
> > +#define PIDFD_INFO_SECURITY_CONTEXT        (1UL << 3)
> > +
> > +struct pidfd_info {
> > +        __u64 request_mask;
> > +        __u32 size;
> > +        uint pid;
> > +        uint uid;
> > +        uint gid;
> > +        __u64 cgroupid;
> > +        char security_context[NAME_MAX];
>
> [NOTE: please CC the LSM list on changes like this]
>
> Thanks Luca :)
>
> With the addition of the LSM syscalls we've created a lsm_ctx struct
> (see include/uapi/linux/lsm.h) that properly supports multiple LSMs.
> The original char ptr "secctx" approach worked back when only a single
> LSM was supported at any given time, but now that multiple LSMs are
> supported we need something richer, and it would be good to use this
> new struct in any new userspace API.
>
> See the lsm_get_self_attr(2) syscall for an example (defined in
> security/lsm_syscalls.c but effectively implemented via
> security_getselfattr() in security/security.c).

Thanks for the review, makes sense to me - I had a look at those
examples but unfortunately it is getting a bit beyond my (very low)
kernel skills, so I've dropped the string-based security_context from
v2 but without adding something else, is there someone more familiar
with the LSM world that could help implementing that side?
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Paul Moore 1 month, 3 weeks ago
On Fri, Oct 4, 2024 at 2:48 PM Luca Boccassi <luca.boccassi@gmail.com> wrote:
> On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote:
> > On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote:

...

> > [NOTE: please CC the LSM list on changes like this]
> >
> > Thanks Luca :)
> >
> > With the addition of the LSM syscalls we've created a lsm_ctx struct
> > (see include/uapi/linux/lsm.h) that properly supports multiple LSMs.
> > The original char ptr "secctx" approach worked back when only a single
> > LSM was supported at any given time, but now that multiple LSMs are
> > supported we need something richer, and it would be good to use this
> > new struct in any new userspace API.
> >
> > See the lsm_get_self_attr(2) syscall for an example (defined in
> > security/lsm_syscalls.c but effectively implemented via
> > security_getselfattr() in security/security.c).
>
> Thanks for the review, makes sense to me - I had a look at those
> examples but unfortunately it is getting a bit beyond my (very low)
> kernel skills, so I've dropped the string-based security_context from
> v2 but without adding something else, is there someone more familiar
> with the LSM world that could help implementing that side?

We are running a little short on devs/time in LSM land right now so I
guess I'm the only real option (not that I have any time, but you know
how it goes).  If you can put together the ioctl side of things I can
likely put together the LSM side fairly quickly - sound good?

-- 
paul-moore.com
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by luca.boccassi@gmail.com 1 month ago
On Sat, 5 Oct 2024 at 17:06, Paul Moore <paul@paul-moore.com> wrote:
>
> On Fri, Oct 4, 2024 at 2:48 PM Luca Boccassi <luca.boccassi@gmail.com> wrote:
> > On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote:
> > > On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote:
>
> ...
>
> > > [NOTE: please CC the LSM list on changes like this]
> > >
> > > Thanks Luca :)
> > >
> > > With the addition of the LSM syscalls we've created a lsm_ctx struct
> > > (see include/uapi/linux/lsm.h) that properly supports multiple LSMs.
> > > The original char ptr "secctx" approach worked back when only a single
> > > LSM was supported at any given time, but now that multiple LSMs are
> > > supported we need something richer, and it would be good to use this
> > > new struct in any new userspace API.
> > >
> > > See the lsm_get_self_attr(2) syscall for an example (defined in
> > > security/lsm_syscalls.c but effectively implemented via
> > > security_getselfattr() in security/security.c).
> >
> > Thanks for the review, makes sense to me - I had a look at those
> > examples but unfortunately it is getting a bit beyond my (very low)
> > kernel skills, so I've dropped the string-based security_context from
> > v2 but without adding something else, is there someone more familiar
> > with the LSM world that could help implementing that side?
>
> We are running a little short on devs/time in LSM land right now so I
> guess I'm the only real option (not that I have any time, but you know
> how it goes).  If you can put together the ioctl side of things I can
> likely put together the LSM side fairly quickly - sound good?

Here's a skeleton ioctl, needs lsm-specific fields to be added to the new struct, and filled in the new function:

diff --git a/fs/pidfs.c b/fs/pidfs.c
index 4eaf30873105..30310489f5e1 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -115,6 +115,35 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
        return poll_flags;
 }
 
+static long pidfd_security(struct task_struct *task, unsigned int cmd, unsigned long arg)
+{
+       struct pidfd_security __user *usecurity = (struct pidfd_security __user *)arg;
+       size_t usize = _IOC_SIZE(cmd);
+       struct pidfd_security ksecurity = {};
+       __u64 mask;
+
+       if (!usecurity)
+               return -EINVAL;
+       if (usize < PIDFD_SECURITY_SIZE_VER0)
+               return -EINVAL; /* First version, no smaller struct possible */
+
+       if (copy_from_user(&mask, &usecurity->mask, sizeof(mask)))
+               return -EFAULT;
+
+       // TODO: fill in ksecurity
+
+       /*
+        * If userspace and the kernel have the same struct size it can just
+        * be copied. If userspace provides an older struct, only the bits that
+        * userspace knows about will be copied. If userspace provides a new
+        * struct, only the bits that the kernel knows about will be copied.
+        */
+       if (copy_to_user(usecurity, &ksecurity, min(usize, sizeof(ksecurity))))
+               return -EFAULT;
+
+       return 0;
+}
+
 static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg)
 {
        struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg;
@@ -160,7 +189,7 @@ static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long
         * the flag only if we can still access it.
         */
        rcu_read_lock();
-       cgrp = task_dfl_cgroup(current);
+       cgrp = task_dfl_cgroup(task);
        if (cgrp) {
                kinfo.cgroupid = cgroup_id(cgrp);
                kinfo.mask |= PIDFD_INFO_CGROUPID;
@@ -209,9 +238,11 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        if (!task)
                return -ESRCH;
 
-       /* Extensible IOCTL that does not open namespace FDs, take a shortcut */
+       /* Extensible IOCTLs that do not open namespace FDs, take a shortcut */
        if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO))
                return pidfd_info(task, cmd, arg);
+       if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_SECURITY))
+               return pidfd_security(task, cmd, arg);
 
        if (arg)
                return -EINVAL;
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 4540f6301b8c..0658880a9089 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -65,6 +65,14 @@ struct pidfd_info {
        __u32 spare0[1];
 };
 
+#define PIDFD_SECURITY_SIZE_VER0               8 /* sizeof first published struct */
+
+struct pidfd_security {
+       /* This mask follows the same rules as pidfd_info.mask. */
+       __u64 mask;
+       // TODO: add data fields and flags and increase size defined above
+};
+
 #define PIDFS_IOCTL_MAGIC 0xFF
 
 #define PIDFD_GET_CGROUP_NAMESPACE            _IO(PIDFS_IOCTL_MAGIC, 1)
@@ -78,5 +86,6 @@ struct pidfd_info {
 #define PIDFD_GET_USER_NAMESPACE              _IO(PIDFS_IOCTL_MAGIC, 9)
 #define PIDFD_GET_UTS_NAMESPACE               _IO(PIDFS_IOCTL_MAGIC, 10)
 #define PIDFD_GET_INFO                        _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
+#define PIDFD_GET_SECURITY                    _IOWR(PIDFS_IOCTL_MAGIC, 12, struct pidfd_security)
 
 #endif /* _UAPI_LINUX_PIDFD_H */
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Luca Boccassi 1 month ago
On Wed, 23 Oct 2024 at 00:45, <luca.boccassi@gmail.com> wrote:
>
> On Sat, 5 Oct 2024 at 17:06, Paul Moore <paul@paul-moore.com> wrote:
> >
> > On Fri, Oct 4, 2024 at 2:48 PM Luca Boccassi <luca.boccassi@gmail.com> wrote:
> > > On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote:
> > > > On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote:
> >
> > ...
> >
> > > > [NOTE: please CC the LSM list on changes like this]
> > > >
> > > > Thanks Luca :)
> > > >
> > > > With the addition of the LSM syscalls we've created a lsm_ctx struct
> > > > (see include/uapi/linux/lsm.h) that properly supports multiple LSMs.
> > > > The original char ptr "secctx" approach worked back when only a single
> > > > LSM was supported at any given time, but now that multiple LSMs are
> > > > supported we need something richer, and it would be good to use this
> > > > new struct in any new userspace API.
> > > >
> > > > See the lsm_get_self_attr(2) syscall for an example (defined in
> > > > security/lsm_syscalls.c but effectively implemented via
> > > > security_getselfattr() in security/security.c).
> > >
> > > Thanks for the review, makes sense to me - I had a look at those
> > > examples but unfortunately it is getting a bit beyond my (very low)
> > > kernel skills, so I've dropped the string-based security_context from
> > > v2 but without adding something else, is there someone more familiar
> > > with the LSM world that could help implementing that side?
> >
> > We are running a little short on devs/time in LSM land right now so I
> > guess I'm the only real option (not that I have any time, but you know
> > how it goes).  If you can put together the ioctl side of things I can
> > likely put together the LSM side fairly quickly - sound good?
>
> Here's a skeleton ioctl, needs lsm-specific fields to be added to the new struct, and filled in the new function:

Forgot to mention, this is based on the vfs.pidfs branch of
https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Paul Moore 1 month ago
On Tue, Oct 22, 2024 at 7:56 PM Luca Boccassi <luca.boccassi@gmail.com> wrote:
> On Wed, 23 Oct 2024 at 00:45, <luca.boccassi@gmail.com> wrote:
> > On Sat, 5 Oct 2024 at 17:06, Paul Moore <paul@paul-moore.com> wrote:
> > > On Fri, Oct 4, 2024 at 2:48 PM Luca Boccassi <luca.boccassi@gmail.com> wrote:
> > > > On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote:
> > > > > On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote:

...

> > > We are running a little short on devs/time in LSM land right now so I
> > > guess I'm the only real option (not that I have any time, but you know
> > > how it goes).  If you can put together the ioctl side of things I can
> > > likely put together the LSM side fairly quickly - sound good?
> >
> > Here's a skeleton ioctl, needs lsm-specific fields to be added to the new struct, and filled in the new function:
>
> Forgot to mention, this is based on the vfs.pidfs branch of
> https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git

Thanks.  I'll take a closer look at this next week.  In the meantime,
do you have this patch in a tree somewhere publicly fetchable?

-- 
paul-moore.com
Re: [PATCH] pidfd: add ioctl to retrieve pid info
Posted by Luca Boccassi 1 month ago
On Fri, 25 Oct 2024 at 00:14, Paul Moore <paul@paul-moore.com> wrote:
>
> On Tue, Oct 22, 2024 at 7:56 PM Luca Boccassi <luca.boccassi@gmail.com> wrote:
> > On Wed, 23 Oct 2024 at 00:45, <luca.boccassi@gmail.com> wrote:
> > > On Sat, 5 Oct 2024 at 17:06, Paul Moore <paul@paul-moore.com> wrote:
> > > > On Fri, Oct 4, 2024 at 2:48 PM Luca Boccassi <luca.boccassi@gmail.com> wrote:
> > > > > On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote:
> > > > > > On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote:
>
> ...
>
> > > > We are running a little short on devs/time in LSM land right now so I
> > > > guess I'm the only real option (not that I have any time, but you know
> > > > how it goes).  If you can put together the ioctl side of things I can
> > > > likely put together the LSM side fairly quickly - sound good?
> > >
> > > Here's a skeleton ioctl, needs lsm-specific fields to be added to the new struct, and filled in the new function:
> >
> > Forgot to mention, this is based on the vfs.pidfs branch of
> > https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
>
> Thanks.  I'll take a closer look at this next week.  In the meantime,
> do you have this patch in a tree somewhere publicly fetchable?

I've pushed it here: https://github.com/bluca/linux/tree/pidfd_ioctl_lsm