fs/pidfs.c | 61 ++++++++++++++++++- include/uapi/linux/pidfd.h | 17 ++++++ .../testing/selftests/pidfd/pidfd_open_test.c | 50 ++++++++++++++- 3 files changed, 126 insertions(+), 2 deletions(-)
From: Luca Boccassi <bluca@debian.org>
A common pattern when using pid fds is having to get information
about the process, which currently requires /proc being mounted,
resolving the fd to a pid, and then do manual string parsing of
/proc/N/status and friends. This needs to be reimplemented over
and over in all userspace projects (e.g.: I have reimplemented
resolving in systemd, dbus, dbus-daemon, polkit so far), and
requires additional care in checking that the fd is still valid
after having parsed the data, to avoid races.
Having a programmatic API that can be used directly removes all
these requirements, including having /proc mounted.
As discussed at LPC24, add an ioctl with an extensible struct
so that more parameters can be added later if needed. Start with
exposing: pid, uid, gid, groupid, security label (the latter was
requested by the LSM maintainer).
Signed-off-by: Luca Boccassi <bluca@debian.org>
---
fs/pidfs.c | 61 ++++++++++++++++++-
include/uapi/linux/pidfd.h | 17 ++++++
.../testing/selftests/pidfd/pidfd_open_test.c | 50 ++++++++++++++-
3 files changed, 126 insertions(+), 2 deletions(-)
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 7ffdc88dfb52..dd386d37309c 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -114,6 +114,62 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts)
return poll_flags;
}
+static long pidfd_info(struct task_struct *task, struct pid *pid, unsigned long arg)
+{
+ struct pidfd_info uinfo = {}, info = {};
+
+ if (copy_from_user(&uinfo, (struct pidfd_info *)arg, sizeof(struct pidfd_info)))
+ return -EFAULT;
+ if (uinfo.size > sizeof(struct pidfd_info))
+ return -E2BIG;
+ if (uinfo.size < sizeof(struct pidfd_info))
+ return -EINVAL; /* First version, no smaller struct possible */
+
+ if (uinfo.request_mask & ~(PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID | PIDFD_INFO_SECURITY_CONTEXT))
+ return -EINVAL;
+
+ memcpy(&info, &uinfo, uinfo.size);
+
+ if (uinfo.request_mask & PIDFD_INFO_PID)
+ info.pid = pid_nr_ns(pid, task_active_pid_ns(task));
+
+ if (uinfo.request_mask & PIDFD_INFO_CREDS) {
+ const struct cred *c = get_task_cred(task);
+ if (!c)
+ return -ESRCH;
+
+ info.uid = from_kuid_munged(current_user_ns(), c->uid);
+ info.gid = from_kgid_munged(current_user_ns(), c->gid);
+ }
+
+ if (uinfo.request_mask & PIDFD_INFO_CGROUPID) {
+ struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup;
+ if (!cgrp)
+ return -ENODEV;
+
+ info.cgroupid = cgroup_id(cgrp);
+ }
+
+ if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) {
+ char *secctx;
+ u32 secid, secctx_len;
+ const struct cred *c = get_task_cred(task);
+ if (!c)
+ return -ESRCH;
+
+ security_cred_getsecid(c, &secid);
+ if (security_secid_to_secctx(secid, &secctx, &secctx_len))
+ return -EFAULT;
+
+ memcpy(info.security_context, secctx, min_t(u32, secctx_len, NAME_MAX-1));
+ }
+
+ if (copy_to_user((void __user *)arg, &info, uinfo.size))
+ return -EFAULT;
+
+ return 0;
+}
+
static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
struct task_struct *task __free(put_task) = NULL;
@@ -121,13 +177,16 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
struct pid *pid = pidfd_pid(file);
struct ns_common *ns_common = NULL;
- if (arg)
+ if (!!arg != (cmd == PIDFD_GET_INFO))
return -EINVAL;
task = get_pid_task(pid, PIDTYPE_PID);
if (!task)
return -ESRCH;
+ if (cmd == PIDFD_GET_INFO)
+ return pidfd_info(task, pid, arg);
+
scoped_guard(task_lock, task) {
nsp = task->nsproxy;
if (nsp)
diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h
index 565fc0629fff..bfd0965e01f3 100644
--- a/include/uapi/linux/pidfd.h
+++ b/include/uapi/linux/pidfd.h
@@ -16,6 +16,22 @@
#define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1)
#define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2)
+/* Flags for pidfd_info. */
+#define PIDFD_INFO_PID (1UL << 0)
+#define PIDFD_INFO_CREDS (1UL << 1)
+#define PIDFD_INFO_CGROUPID (1UL << 2)
+#define PIDFD_INFO_SECURITY_CONTEXT (1UL << 3)
+
+struct pidfd_info {
+ __u64 request_mask;
+ __u32 size;
+ uint pid;
+ uint uid;
+ uint gid;
+ __u64 cgroupid;
+ char security_context[NAME_MAX];
+} __packed;
+
#define PIDFS_IOCTL_MAGIC 0xFF
#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
@@ -28,5 +44,6 @@
#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
+#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
#endif /* _UAPI_LINUX_PIDFD_H */
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index c62564c264b1..929588c7e0f0 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -13,6 +13,7 @@
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
+#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/wait.h>
@@ -21,6 +22,28 @@
#include "pidfd.h"
#include "../kselftest.h"
+#ifndef PIDFS_IOCTL_MAGIC
+#define PIDFS_IOCTL_MAGIC 0xFF
+#endif
+
+#ifndef PIDFD_GET_INFO
+#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info)
+#define PIDFD_INFO_PID (1UL << 0)
+#define PIDFD_INFO_CREDS (1UL << 1)
+#define PIDFD_INFO_CGROUPID (1UL << 2)
+#define PIDFD_INFO_SECURITY_CONTEXT (1UL << 3)
+
+struct pidfd_info {
+ __u64 request_mask;
+ __u32 size;
+ uint pid;
+ uint uid;
+ uint gid;
+ __u64 cgroupid;
+ char security_context[NAME_MAX];
+} __attribute__((__packed__));
+#endif
+
static int safe_int(const char *numstr, int *converted)
{
char *err = NULL;
@@ -120,10 +143,14 @@ static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen)
int main(int argc, char **argv)
{
+ struct pidfd_info info = {
+ .size = sizeof(struct pidfd_info),
+ .request_mask = PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID,
+ };
int pidfd = -1, ret = 1;
pid_t pid;
- ksft_set_plan(3);
+ ksft_set_plan(4);
pidfd = sys_pidfd_open(-1, 0);
if (pidfd >= 0) {
@@ -153,6 +180,27 @@ int main(int argc, char **argv)
pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1);
ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid);
+ if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) {
+ ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno));
+ goto on_error;
+ }
+ if (info.pid != pid) {
+ ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n",
+ pid, info.pid);
+ goto on_error;
+ }
+ if (info.uid != getuid()) {
+ ksft_print_msg("uid %d does not match uid from info %d\n",
+ getuid(), info.uid);
+ goto on_error;
+ }
+ if (info.gid != getgid()) {
+ ksft_print_msg("gid %d does not match gid from info %d\n",
+ getgid(), info.gid);
+ goto on_error;
+ }
+ ksft_test_result_pass("get info from pidfd test: passed\n");
+
ret = 0;
on_error:
--
2.45.2
On Wed, Oct 02, 2024 at 03:24:33PM GMT, luca.boccassi@gmail.com wrote: > From: Luca Boccassi <bluca@debian.org> > > A common pattern when using pid fds is having to get information > about the process, which currently requires /proc being mounted, > resolving the fd to a pid, and then do manual string parsing of > /proc/N/status and friends. This needs to be reimplemented over > and over in all userspace projects (e.g.: I have reimplemented > resolving in systemd, dbus, dbus-daemon, polkit so far), and > requires additional care in checking that the fd is still valid > after having parsed the data, to avoid races. > > Having a programmatic API that can be used directly removes all > these requirements, including having /proc mounted. Yes, thanks for working on that. > > As discussed at LPC24, add an ioctl with an extensible struct > so that more parameters can be added later if needed. Start with > exposing: pid, uid, gid, groupid, security label (the latter was > requested by the LSM maintainer). > > Signed-off-by: Luca Boccassi <bluca@debian.org> > --- > fs/pidfs.c | 61 ++++++++++++++++++- > include/uapi/linux/pidfd.h | 17 ++++++ > .../testing/selftests/pidfd/pidfd_open_test.c | 50 ++++++++++++++- > 3 files changed, 126 insertions(+), 2 deletions(-) > > diff --git a/fs/pidfs.c b/fs/pidfs.c > index 7ffdc88dfb52..dd386d37309c 100644 > --- a/fs/pidfs.c > +++ b/fs/pidfs.c > @@ -114,6 +114,62 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) > return poll_flags; > } > > +static long pidfd_info(struct task_struct *task, struct pid *pid, unsigned long arg) > +{ > + struct pidfd_info uinfo = {}, info = {}; > + > + if (copy_from_user(&uinfo, (struct pidfd_info *)arg, sizeof(struct pidfd_info))) > + return -EFAULT; > + if (uinfo.size > sizeof(struct pidfd_info)) > + return -E2BIG; We want to allow for the struct passed down to be bigger than what the kernel knows so older kernels can handle newer structs just fine. So this check needs to go. > + if (uinfo.size < sizeof(struct pidfd_info)) > + return -EINVAL; /* First version, no smaller struct possible */ > + > + if (uinfo.request_mask & ~(PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID | PIDFD_INFO_SECURITY_CONTEXT)) > + return -EINVAL; This means you'll fail hard when requesting information that older kernels don't provide. For a request mask based interface the correct protocol is to answer in a result_mask what information the kernel was actually able to provide. This way you get seamless forward and backward compatibility without having the caller to guess what extension the kernel doesn't support. > + > + memcpy(&info, &uinfo, uinfo.size); > + > + if (uinfo.request_mask & PIDFD_INFO_PID) You should drop this and return basic pid information unconditionally. > + info.pid = pid_nr_ns(pid, task_active_pid_ns(task)); I think this is wrong what this should return is the pid of the process as seen from the caller's pid namespace. Otherwise you'll report meaningless pid numbers to the caller when the caller's pid namespace is outside of the pid namespace hierarchy of the process that pidfd refers to. This can easily happen when you receive pidfds via SCM_RIGHTS. The other question is what you want to return here. What you did above is equivalent to what pid_vnr() does. IIRC, then this will yield the thread-group id in pidfd_info->pid if the pidfd is a thread-group leader pidfd and the thread-id if the pidfd is a PIDFD_THREAD. While the caller should be able to infer that from the type of pidfd I think we should just have a unique meaning for the pid field. It would probably even make sense to have: (1) thread_id (2) thread_group_id (3) parent_id > + if (uinfo.request_mask & PIDFD_INFO_CREDS) { You should drop this and return basic uid/gid information unconditionally. Also the uid and gid field maybe should be named ruid and rgid because sooner or later someone will want euid/egid and suid/sgid and fsuid/fsgid. In fact, we might want to probably return ruid/rgid, euid/egid, suid/sgid, fsuid/fsgid unconditionally. I suspect that stuff like supplementary groups should rather be reported through an extra ioctl instead of having a variable sized array in the struct. > + const struct cred *c = get_task_cred(task); > + if (!c) > + return -ESRCH; > + > + info.uid = from_kuid_munged(current_user_ns(), c->uid); > + info.gid = from_kgid_munged(current_user_ns(), c->gid); > + } > + > + if (uinfo.request_mask & PIDFD_INFO_CGROUPID) { > + struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup; > + if (!cgrp) > + return -ENODEV; > + > + info.cgroupid = cgroup_id(cgrp); > + } > + > + if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) { It would make sense for security information to get a separate ioctl so that struct pidfd_info just return simple and fast information and the security stuff can include things such as seccomp, caps etc pp. > + char *secctx; > + u32 secid, secctx_len; > + const struct cred *c = get_task_cred(task); > + if (!c) > + return -ESRCH; > + > + security_cred_getsecid(c, &secid); > + if (security_secid_to_secctx(secid, &secctx, &secctx_len)) > + return -EFAULT; > + > + memcpy(info.security_context, secctx, min_t(u32, secctx_len, NAME_MAX-1)); > + } > + > + if (copy_to_user((void __user *)arg, &info, uinfo.size)) > + return -EFAULT; > + > + return 0; > +} > + > static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) > { > struct task_struct *task __free(put_task) = NULL; > @@ -121,13 +177,16 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) > struct pid *pid = pidfd_pid(file); > struct ns_common *ns_common = NULL; > > - if (arg) > + if (!!arg != (cmd == PIDFD_GET_INFO)) > return -EINVAL; > > task = get_pid_task(pid, PIDTYPE_PID); > if (!task) > return -ESRCH; > > + if (cmd == PIDFD_GET_INFO) > + return pidfd_info(task, pid, arg); So, please take a look at nsfs or look at seccomp. The gist is that in order to keep this extensible we don't match on the ioctl command directly because it encodes the size of the argument instead we do: /* extensible ioctls */ switch (_IOC_NR(cmd)) { case _IOC_NR(PIDFD_GET_INFO): { struct pidfd_info kinfo = {}; struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; size_t usize = _IOC_SIZE(cmd); if (!uinfo) return -EINVAL; if (usize < PIDFD_INFO_SIZE_VER0) return -EINVAL; /* fill in kinfo information */ kinfo->ruid ... kinfo->cgroup_id ... kinfo->result_mask ... /* * If userspace and the kernel have the same struct size it can just * be copied. If userspace provides an older struct, only the bits that * userspace knows about will be copied. If userspace provides a new * struct, only the bits that the kernel knows about will be copied and * the size value will be set to the size the kernel knows about. */ if (copy_to_user(uinfo, kinfo, min(usize, sizeof(*kinfo)))) return -EFAULT; > + > scoped_guard(task_lock, task) { > nsp = task->nsproxy; > if (nsp) > diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h > index 565fc0629fff..bfd0965e01f3 100644 > --- a/include/uapi/linux/pidfd.h > +++ b/include/uapi/linux/pidfd.h > @@ -16,6 +16,22 @@ > #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) > #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) > > +/* Flags for pidfd_info. */ > +#define PIDFD_INFO_PID (1UL << 0) > +#define PIDFD_INFO_CREDS (1UL << 1) > +#define PIDFD_INFO_CGROUPID (1UL << 2) > +#define PIDFD_INFO_SECURITY_CONTEXT (1UL << 3) > + > +struct pidfd_info { > + __u64 request_mask; > + __u32 size; > + uint pid; The size is unnecessary because it is directly encoded into the ioctl command. > + uint uid; > + uint gid; > + __u64 cgroupid; > + char security_context[NAME_MAX]; > +} __packed; The packed attribute should be unnecessary. The structure should simply be correctly padded and should use explicitly sized types: struct pidfd_info { /* Let userspace request expensive stuff explictly. */ __u64 request_mask; /* And let the kernel indicate whether it knows about it. */ __u64 result_mask; __u32 pid; __u32 uid; __u32 gid; __u64 cgroup_id; __u32 spare0[1]; }; I'm not sure what LSM info to be put in there and we can just do it as an extension. > + > #define PIDFS_IOCTL_MAGIC 0xFF > > #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) > @@ -28,5 +44,6 @@ > #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) > #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) > #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) > +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) > > #endif /* _UAPI_LINUX_PIDFD_H */ > diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c > index c62564c264b1..929588c7e0f0 100644 > --- a/tools/testing/selftests/pidfd/pidfd_open_test.c > +++ b/tools/testing/selftests/pidfd/pidfd_open_test.c > @@ -13,6 +13,7 @@ > #include <stdlib.h> > #include <string.h> > #include <syscall.h> > +#include <sys/ioctl.h> > #include <sys/mount.h> > #include <sys/prctl.h> > #include <sys/wait.h> > @@ -21,6 +22,28 @@ > #include "pidfd.h" > #include "../kselftest.h" > > +#ifndef PIDFS_IOCTL_MAGIC > +#define PIDFS_IOCTL_MAGIC 0xFF > +#endif > + > +#ifndef PIDFD_GET_INFO > +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) > +#define PIDFD_INFO_PID (1UL << 0) > +#define PIDFD_INFO_CREDS (1UL << 1) > +#define PIDFD_INFO_CGROUPID (1UL << 2) > +#define PIDFD_INFO_SECURITY_CONTEXT (1UL << 3) > + > +struct pidfd_info { > + __u64 request_mask; > + __u32 size; > + uint pid; > + uint uid; > + uint gid; > + __u64 cgroupid; > + char security_context[NAME_MAX]; > +} __attribute__((__packed__)); > +#endif > + > static int safe_int(const char *numstr, int *converted) > { > char *err = NULL; > @@ -120,10 +143,14 @@ static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen) > > int main(int argc, char **argv) > { > + struct pidfd_info info = { > + .size = sizeof(struct pidfd_info), > + .request_mask = PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID, > + }; > int pidfd = -1, ret = 1; > pid_t pid; > > - ksft_set_plan(3); > + ksft_set_plan(4); > > pidfd = sys_pidfd_open(-1, 0); > if (pidfd >= 0) { > @@ -153,6 +180,27 @@ int main(int argc, char **argv) > pid = get_pid_from_fdinfo_file(pidfd, "Pid:", sizeof("Pid:") - 1); > ksft_print_msg("pidfd %d refers to process with pid %d\n", pidfd, pid); > > + if (ioctl(pidfd, PIDFD_GET_INFO, &info) < 0) { > + ksft_print_msg("%s - failed to get info from pidfd\n", strerror(errno)); > + goto on_error; > + } > + if (info.pid != pid) { > + ksft_print_msg("pid from fdinfo file %d does not match pid from ioctl %d\n", > + pid, info.pid); > + goto on_error; > + } > + if (info.uid != getuid()) { > + ksft_print_msg("uid %d does not match uid from info %d\n", > + getuid(), info.uid); > + goto on_error; > + } > + if (info.gid != getgid()) { > + ksft_print_msg("gid %d does not match gid from info %d\n", > + getgid(), info.gid); > + goto on_error; > + } > + ksft_test_result_pass("get info from pidfd test: passed\n"); > + > ret = 0; > > on_error: > -- > 2.45.2 >
Christian Brauner wrote: > struct pidfd_info { > /* Let userspace request expensive stuff explictly. */ > __u64 request_mask; > /* And let the kernel indicate whether it knows about it. */ > __u64 result_mask; I don't think it's necessary to have these two fields separate. The kernel should write to the same mask field userspace used. In theory there could be an operation to probe for *everything* the kernel understands, but in practice with a binary structure there's little point finding out about flags you don't know the corresponding structure bits for.
... > It would make sense for security information to get a separate ioctl so > that struct pidfd_info just return simple and fast information and the > security stuff can include things such as seccomp, caps etc pp. Which also means it is pointless having the two bitmasks. They just complicate things unnecessarily. What you might want to do is have the kernel return the size of the structure it would fill in (probably as the first field). Also have the kernel fill (probably with zeros) the end of the user buffer it didn't write to. The ioctl is then an IOR() one. Mind you, if you dig into the history (possibly of SYSV and/or BSD) you'll find a structure the kernel filled with the info 'ps' needs. The text based code that Linux uses is probably more extensible. David - Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK Registration No: 1397386 (Wales)
On Fri, 4 Oct 2024 at 10:29, Christian Brauner <brauner@kernel.org> wrote: > > On Wed, Oct 02, 2024 at 03:24:33PM GMT, luca.boccassi@gmail.com wrote: > > + info.pid = pid_nr_ns(pid, task_active_pid_ns(task)); > > I think this is wrong what this should return is the pid of the process > as seen from the caller's pid namespace. Otherwise you'll report > meaningless pid numbers to the caller when the caller's pid namespace is > outside of the pid namespace hierarchy of the process that pidfd refers > to. This can easily happen when you receive pidfds via SCM_RIGHTS. Thanks for the review, I applied the rest of the comments in v2 (I think at least), but for this one I can't tell, how should I do it? Naively I thought that task_active_pid_ns(task) would already fulfil this requirement and resolve it using the correct pid namespace, is that not the case? If not, what else should I do here?
I wasn't CC'ed, so I didn't see the patch, but looking at Christian's reply ... On 10/04, Luca Boccassi wrote: > On Fri, 4 Oct 2024 at 10:29, Christian Brauner <brauner@kernel.org> wrote: > > > > On Wed, Oct 02, 2024 at 03:24:33PM GMT, luca.boccassi@gmail.com wrote: > > > + info.pid = pid_nr_ns(pid, task_active_pid_ns(task)); > > > > I think this is wrong what this should return is the pid of the process > > as seen from the caller's pid namespace. Agreed, > Thanks for the review, I applied the rest of the comments in v2 (I > think at least), but for this one I can't tell, how should I do it? I guess Christian meant you should simply use info.pid = task_pid_vnr(task); task_pid_vnr(task) returns the task's pid in the caller's namespace. Oleg.
On Fri, 4 Oct 2024 at 20:30, Oleg Nesterov <oleg@redhat.com> wrote: > > I wasn't CC'ed, so I didn't see the patch, but looking at Christian's > reply ... > > On 10/04, Luca Boccassi wrote: > > On Fri, 4 Oct 2024 at 10:29, Christian Brauner <brauner@kernel.org> wrote: > > > > > > On Wed, Oct 02, 2024 at 03:24:33PM GMT, luca.boccassi@gmail.com wrote: > > > > + info.pid = pid_nr_ns(pid, task_active_pid_ns(task)); > > > > > > I think this is wrong what this should return is the pid of the process > > > as seen from the caller's pid namespace. > > Agreed, > > > Thanks for the review, I applied the rest of the comments in v2 (I > > think at least), but for this one I can't tell, how should I do it? > > I guess Christian meant you should simply use > > info.pid = task_pid_vnr(task); > > task_pid_vnr(task) returns the task's pid in the caller's namespace. Ah I see, I didn't realize there was a difference, sent v3 with the suggested change just now, thanks.
On 10/04, Luca Boccassi wrote: > > On Fri, 4 Oct 2024 at 20:30, Oleg Nesterov <oleg@redhat.com> wrote: > > > > I guess Christian meant you should simply use > > > > info.pid = task_pid_vnr(task); > > > > task_pid_vnr(task) returns the task's pid in the caller's namespace. > > Ah I see, I didn't realize there was a difference, sent v3 with the > suggested change just now, thanks. I didn't get v3, I guess I wasn't cc'ed again. So, just in case, let me add that task_pid_vnr(task) can return 0 if this task exits after get_pid_task(). Perhaps this is fine, I do not know. But perhaps you should actually use pid_vnr(pid). Oleg.
On Sat, 5 Oct 2024 at 12:29, Oleg Nesterov <oleg@redhat.com> wrote: > > On 10/04, Luca Boccassi wrote: > > > > On Fri, 4 Oct 2024 at 20:30, Oleg Nesterov <oleg@redhat.com> wrote: > > > > > > I guess Christian meant you should simply use > > > > > > info.pid = task_pid_vnr(task); > > > > > > task_pid_vnr(task) returns the task's pid in the caller's namespace. > > > > Ah I see, I didn't realize there was a difference, sent v3 with the > > suggested change just now, thanks. > > I didn't get v3, I guess I wasn't cc'ed again. > > So, just in case, let me add that task_pid_vnr(task) can return 0 if > this task exits after get_pid_task(). > > Perhaps this is fine, I do not know. But perhaps you should actually > use pid_vnr(pid). > > Oleg. I have just sent v5 CC'ing you and adding a final check before the copy to userspace, that returns ESRCH if the task has exited. This should solve that issue, and also be future-proof against potential additions that might slow down processing due to gathering more data or so.
On Fri, Oct 4, 2024 at 5:29 AM Christian Brauner <brauner@kernel.org> wrote: > On Wed, Oct 02, 2024 at 03:24:33PM GMT, luca.boccassi@gmail.com wrote: > > From: Luca Boccassi <bluca@debian.org> > > > > A common pattern when using pid fds is having to get information > > about the process, which currently requires /proc being mounted, > > resolving the fd to a pid, and then do manual string parsing of > > /proc/N/status and friends. This needs to be reimplemented over > > and over in all userspace projects (e.g.: I have reimplemented > > resolving in systemd, dbus, dbus-daemon, polkit so far), and > > requires additional care in checking that the fd is still valid > > after having parsed the data, to avoid races. > > > > Having a programmatic API that can be used directly removes all > > these requirements, including having /proc mounted. ... > > + const struct cred *c = get_task_cred(task); > > + if (!c) > > + return -ESRCH; > > + > > + info.uid = from_kuid_munged(current_user_ns(), c->uid); > > + info.gid = from_kgid_munged(current_user_ns(), c->gid); > > + } > > + > > + if (uinfo.request_mask & PIDFD_INFO_CGROUPID) { > > + struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup; > > + if (!cgrp) > > + return -ENODEV; > > + > > + info.cgroupid = cgroup_id(cgrp); > > + } > > + > > + if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) { > > It would make sense for security information to get a separate ioctl so > that struct pidfd_info just return simple and fast information and the > security stuff can include things such as seccomp, caps etc pp. I'm okay with moving the security related info to a separate ioctl, but I'd like to strongly request that it be merged at the same time as the process ID related info. It can be a separate patch as part of a single patchset if you want to make the ID patch backport friendly for distros, but I think we should treat the security related info with the same importance as the ID info. > > +struct pidfd_info { > > + __u64 request_mask; > > + __u32 size; > > + uint pid; > > The size is unnecessary because it is directly encoded into the ioctl > command. > > > + uint uid; > > + uint gid; > > + __u64 cgroupid; > > + char security_context[NAME_MAX]; > > +} __packed; > > The packed attribute should be unnecessary. The structure should simply > be correctly padded and should use explicitly sized types: > > struct pidfd_info { > /* Let userspace request expensive stuff explictly. */ > __u64 request_mask; > /* And let the kernel indicate whether it knows about it. */ > __u64 result_mask; > __u32 pid; > __u32 uid; > __u32 gid; > __u64 cgroup_id; > __u32 spare0[1]; > }; > > I'm not sure what LSM info to be put in there and we can just do it as > an extension. See my original response to Luca on October 2nd, you were on the To/CC line. -- paul-moore.com
Hi, kernel test robot noticed the following build warnings: [auto build test WARNING on shuah-kselftest/next] [also build test WARNING on shuah-kselftest/fixes linus/master v6.12-rc1 next-20241003] [cannot apply to brauner-vfs/vfs.all] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/luca-boccassi-gmail-com/pidfd-add-ioctl-to-retrieve-pid-info/20241002-223302 base: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next patch link: https://lore.kernel.org/r/20241002142516.110567-1-luca.boccassi%40gmail.com patch subject: [PATCH] pidfd: add ioctl to retrieve pid info config: x86_64-randconfig-123-20241004 (https://download.01.org/0day-ci/archive/20241004/202410041128.tLVDbeJB-lkp@intel.com/config) compiler: gcc-12 (Debian 12.2.0-14) 12.2.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241004/202410041128.tLVDbeJB-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202410041128.tLVDbeJB-lkp@intel.com/ sparse warnings: (new ones prefixed by >>) >> fs/pidfs.c:121:37: sparse: sparse: incorrect type in argument 2 (different address spaces) @@ expected void const [noderef] __user *from @@ got struct pidfd_info * @@ fs/pidfs.c:121:37: sparse: expected void const [noderef] __user *from fs/pidfs.c:121:37: sparse: got struct pidfd_info * vim +121 fs/pidfs.c 116 117 static long pidfd_info(struct task_struct *task, struct pid *pid, unsigned long arg) 118 { 119 struct pidfd_info uinfo = {}, info = {}; 120 > 121 if (copy_from_user(&uinfo, (struct pidfd_info *)arg, sizeof(struct pidfd_info))) 122 return -EFAULT; 123 if (uinfo.size > sizeof(struct pidfd_info)) 124 return -E2BIG; 125 if (uinfo.size < sizeof(struct pidfd_info)) 126 return -EINVAL; /* First version, no smaller struct possible */ 127 128 if (uinfo.request_mask & ~(PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID | PIDFD_INFO_SECURITY_CONTEXT)) 129 return -EINVAL; 130 131 memcpy(&info, &uinfo, uinfo.size); 132 133 if (uinfo.request_mask & PIDFD_INFO_PID) 134 info.pid = pid_nr_ns(pid, task_active_pid_ns(task)); 135 136 if (uinfo.request_mask & PIDFD_INFO_CREDS) { 137 const struct cred *c = get_task_cred(task); 138 if (!c) 139 return -ESRCH; 140 141 info.uid = from_kuid_munged(current_user_ns(), c->uid); 142 info.gid = from_kgid_munged(current_user_ns(), c->gid); 143 } 144 145 if (uinfo.request_mask & PIDFD_INFO_CGROUPID) { 146 struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup; 147 if (!cgrp) 148 return -ENODEV; 149 150 info.cgroupid = cgroup_id(cgrp); 151 } 152 153 if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) { 154 char *secctx; 155 u32 secid, secctx_len; 156 const struct cred *c = get_task_cred(task); 157 if (!c) 158 return -ESRCH; 159 160 security_cred_getsecid(c, &secid); 161 if (security_secid_to_secctx(secid, &secctx, &secctx_len)) 162 return -EFAULT; 163 164 memcpy(info.security_context, secctx, min_t(u32, secctx_len, NAME_MAX-1)); 165 } 166 167 if (copy_to_user((void __user *)arg, &info, uinfo.size)) 168 return -EFAULT; 169 170 return 0; 171 } 172 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi, kernel test robot noticed the following build errors: [auto build test ERROR on shuah-kselftest/next] [also build test ERROR on shuah-kselftest/fixes linus/master v6.12-rc1 next-20241003] [cannot apply to brauner-vfs/vfs.all] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/luca-boccassi-gmail-com/pidfd-add-ioctl-to-retrieve-pid-info/20241002-223302 base: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next patch link: https://lore.kernel.org/r/20241002142516.110567-1-luca.boccassi%40gmail.com patch subject: [PATCH] pidfd: add ioctl to retrieve pid info config: openrisc-allnoconfig (https://download.01.org/0day-ci/archive/20241004/202410040559.LZnpGpVU-lkp@intel.com/config) compiler: or1k-linux-gcc (GCC) 14.1.0 reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241004/202410040559.LZnpGpVU-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202410040559.LZnpGpVU-lkp@intel.com/ All errors (new ones prefixed by >>): fs/pidfs.c: In function 'pidfd_info': >> fs/pidfs.c:146:39: error: implicit declaration of function 'task_css_check' [-Wimplicit-function-declaration] 146 | struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup; | ^~~~~~~~~~~~~~ >> fs/pidfs.c:146:60: error: 'pids_cgrp_id' undeclared (first use in this function) 146 | struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup; | ^~~~~~~~~~~~ fs/pidfs.c:146:60: note: each undeclared identifier is reported only once for each function it appears in vim +/task_css_check +146 fs/pidfs.c 116 117 static long pidfd_info(struct task_struct *task, struct pid *pid, unsigned long arg) 118 { 119 struct pidfd_info uinfo = {}, info = {}; 120 121 if (copy_from_user(&uinfo, (struct pidfd_info *)arg, sizeof(struct pidfd_info))) 122 return -EFAULT; 123 if (uinfo.size > sizeof(struct pidfd_info)) 124 return -E2BIG; 125 if (uinfo.size < sizeof(struct pidfd_info)) 126 return -EINVAL; /* First version, no smaller struct possible */ 127 128 if (uinfo.request_mask & ~(PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID | PIDFD_INFO_SECURITY_CONTEXT)) 129 return -EINVAL; 130 131 memcpy(&info, &uinfo, uinfo.size); 132 133 if (uinfo.request_mask & PIDFD_INFO_PID) 134 info.pid = pid_nr_ns(pid, task_active_pid_ns(task)); 135 136 if (uinfo.request_mask & PIDFD_INFO_CREDS) { 137 const struct cred *c = get_task_cred(task); 138 if (!c) 139 return -ESRCH; 140 141 info.uid = from_kuid_munged(current_user_ns(), c->uid); 142 info.gid = from_kgid_munged(current_user_ns(), c->gid); 143 } 144 145 if (uinfo.request_mask & PIDFD_INFO_CGROUPID) { > 146 struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup; 147 if (!cgrp) 148 return -ENODEV; 149 150 info.cgroupid = cgroup_id(cgrp); 151 } 152 153 if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) { 154 char *secctx; 155 u32 secid, secctx_len; 156 const struct cred *c = get_task_cred(task); 157 if (!c) 158 return -ESRCH; 159 160 security_cred_getsecid(c, &secid); 161 if (security_secid_to_secctx(secid, &secctx, &secctx_len)) 162 return -EFAULT; 163 164 memcpy(info.security_context, secctx, min_t(u32, secctx_len, NAME_MAX-1)); 165 } 166 167 if (copy_to_user((void __user *)arg, &info, uinfo.size)) 168 return -EFAULT; 169 170 return 0; 171 } 172 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
Hi, kernel test robot noticed the following build errors: [auto build test ERROR on shuah-kselftest/next] [also build test ERROR on shuah-kselftest/fixes linus/master v6.12-rc1 next-20241003] [cannot apply to brauner-vfs/vfs.all] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch#_base_tree_information] url: https://github.com/intel-lab-lkp/linux/commits/luca-boccassi-gmail-com/pidfd-add-ioctl-to-retrieve-pid-info/20241002-223302 base: https://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git next patch link: https://lore.kernel.org/r/20241002142516.110567-1-luca.boccassi%40gmail.com patch subject: [PATCH] pidfd: add ioctl to retrieve pid info config: x86_64-allnoconfig (https://download.01.org/0day-ci/archive/20241004/202410040539.j2SZpABt-lkp@intel.com/config) compiler: clang version 18.1.8 (https://github.com/llvm/llvm-project 3b5b5c1ec4a3095ab096dd780e84d7ab81f3d7ff) reproduce (this is a W=1 build): (https://download.01.org/0day-ci/archive/20241004/202410040539.j2SZpABt-lkp@intel.com/reproduce) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <lkp@intel.com> | Closes: https://lore.kernel.org/oe-kbuild-all/202410040539.j2SZpABt-lkp@intel.com/ All errors (new ones prefixed by >>): >> fs/pidfs.c:146:25: error: call to undeclared function 'task_css_check'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration] 146 | struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup; | ^ >> fs/pidfs.c:146:46: error: use of undeclared identifier 'pids_cgrp_id' 146 | struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup; | ^ 2 errors generated. vim +/task_css_check +146 fs/pidfs.c 116 117 static long pidfd_info(struct task_struct *task, struct pid *pid, unsigned long arg) 118 { 119 struct pidfd_info uinfo = {}, info = {}; 120 121 if (copy_from_user(&uinfo, (struct pidfd_info *)arg, sizeof(struct pidfd_info))) 122 return -EFAULT; 123 if (uinfo.size > sizeof(struct pidfd_info)) 124 return -E2BIG; 125 if (uinfo.size < sizeof(struct pidfd_info)) 126 return -EINVAL; /* First version, no smaller struct possible */ 127 128 if (uinfo.request_mask & ~(PIDFD_INFO_PID | PIDFD_INFO_CREDS | PIDFD_INFO_CGROUPID | PIDFD_INFO_SECURITY_CONTEXT)) 129 return -EINVAL; 130 131 memcpy(&info, &uinfo, uinfo.size); 132 133 if (uinfo.request_mask & PIDFD_INFO_PID) 134 info.pid = pid_nr_ns(pid, task_active_pid_ns(task)); 135 136 if (uinfo.request_mask & PIDFD_INFO_CREDS) { 137 const struct cred *c = get_task_cred(task); 138 if (!c) 139 return -ESRCH; 140 141 info.uid = from_kuid_munged(current_user_ns(), c->uid); 142 info.gid = from_kgid_munged(current_user_ns(), c->gid); 143 } 144 145 if (uinfo.request_mask & PIDFD_INFO_CGROUPID) { > 146 struct cgroup *cgrp = task_css_check(task, pids_cgrp_id, 1)->cgroup; 147 if (!cgrp) 148 return -ENODEV; 149 150 info.cgroupid = cgroup_id(cgrp); 151 } 152 153 if (uinfo.request_mask & PIDFD_INFO_SECURITY_CONTEXT) { 154 char *secctx; 155 u32 secid, secctx_len; 156 const struct cred *c = get_task_cred(task); 157 if (!c) 158 return -ESRCH; 159 160 security_cred_getsecid(c, &secid); 161 if (security_secid_to_secctx(secid, &secctx, &secctx_len)) 162 return -EFAULT; 163 164 memcpy(info.security_context, secctx, min_t(u32, secctx_len, NAME_MAX-1)); 165 } 166 167 if (copy_to_user((void __user *)arg, &info, uinfo.size)) 168 return -EFAULT; 169 170 return 0; 171 } 172 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote: > > From: Luca Boccassi <bluca@debian.org> > > A common pattern when using pid fds is having to get information > about the process, which currently requires /proc being mounted, > resolving the fd to a pid, and then do manual string parsing of > /proc/N/status and friends. This needs to be reimplemented over > and over in all userspace projects (e.g.: I have reimplemented > resolving in systemd, dbus, dbus-daemon, polkit so far), and > requires additional care in checking that the fd is still valid > after having parsed the data, to avoid races. > > Having a programmatic API that can be used directly removes all > these requirements, including having /proc mounted. > > As discussed at LPC24, add an ioctl with an extensible struct > so that more parameters can be added later if needed. Start with > exposing: pid, uid, gid, groupid, security label (the latter was > requested by the LSM maintainer). > > Signed-off-by: Luca Boccassi <bluca@debian.org> > --- > fs/pidfs.c | 61 ++++++++++++++++++- > include/uapi/linux/pidfd.h | 17 ++++++ > .../testing/selftests/pidfd/pidfd_open_test.c | 50 ++++++++++++++- > 3 files changed, 126 insertions(+), 2 deletions(-) ... > diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h > index 565fc0629fff..bfd0965e01f3 100644 > --- a/include/uapi/linux/pidfd.h > +++ b/include/uapi/linux/pidfd.h > @@ -16,6 +16,22 @@ > #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) > #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) > > +/* Flags for pidfd_info. */ > +#define PIDFD_INFO_PID (1UL << 0) > +#define PIDFD_INFO_CREDS (1UL << 1) > +#define PIDFD_INFO_CGROUPID (1UL << 2) > +#define PIDFD_INFO_SECURITY_CONTEXT (1UL << 3) > + > +struct pidfd_info { > + __u64 request_mask; > + __u32 size; > + uint pid; > + uint uid; > + uint gid; > + __u64 cgroupid; > + char security_context[NAME_MAX]; [NOTE: please CC the LSM list on changes like this] Thanks Luca :) With the addition of the LSM syscalls we've created a lsm_ctx struct (see include/uapi/linux/lsm.h) that properly supports multiple LSMs. The original char ptr "secctx" approach worked back when only a single LSM was supported at any given time, but now that multiple LSMs are supported we need something richer, and it would be good to use this new struct in any new userspace API. See the lsm_get_self_attr(2) syscall for an example (defined in security/lsm_syscalls.c but effectively implemented via security_getselfattr() in security/security.c). > +} __packed; > + > #define PIDFS_IOCTL_MAGIC 0xFF > > #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) > @@ -28,5 +44,6 @@ > #define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) > #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) > #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) > +#define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) > > #endif /* _UAPI_LINUX_PIDFD_H */ -- paul-moore.com
On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote: > > On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote: > > > > From: Luca Boccassi <bluca@debian.org> > > > > A common pattern when using pid fds is having to get information > > about the process, which currently requires /proc being mounted, > > resolving the fd to a pid, and then do manual string parsing of > > /proc/N/status and friends. This needs to be reimplemented over > > and over in all userspace projects (e.g.: I have reimplemented > > resolving in systemd, dbus, dbus-daemon, polkit so far), and > > requires additional care in checking that the fd is still valid > > after having parsed the data, to avoid races. > > > > Having a programmatic API that can be used directly removes all > > these requirements, including having /proc mounted. > > > > As discussed at LPC24, add an ioctl with an extensible struct > > so that more parameters can be added later if needed. Start with > > exposing: pid, uid, gid, groupid, security label (the latter was > > requested by the LSM maintainer). > > > > Signed-off-by: Luca Boccassi <bluca@debian.org> > > --- > > fs/pidfs.c | 61 ++++++++++++++++++- > > include/uapi/linux/pidfd.h | 17 ++++++ > > .../testing/selftests/pidfd/pidfd_open_test.c | 50 ++++++++++++++- > > 3 files changed, 126 insertions(+), 2 deletions(-) > > ... > > > diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h > > index 565fc0629fff..bfd0965e01f3 100644 > > --- a/include/uapi/linux/pidfd.h > > +++ b/include/uapi/linux/pidfd.h > > @@ -16,6 +16,22 @@ > > #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) > > #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) > > > > +/* Flags for pidfd_info. */ > > +#define PIDFD_INFO_PID (1UL << 0) > > +#define PIDFD_INFO_CREDS (1UL << 1) > > +#define PIDFD_INFO_CGROUPID (1UL << 2) > > +#define PIDFD_INFO_SECURITY_CONTEXT (1UL << 3) > > + > > +struct pidfd_info { > > + __u64 request_mask; > > + __u32 size; > > + uint pid; > > + uint uid; > > + uint gid; > > + __u64 cgroupid; > > + char security_context[NAME_MAX]; > > [NOTE: please CC the LSM list on changes like this] > > Thanks Luca :) > > With the addition of the LSM syscalls we've created a lsm_ctx struct > (see include/uapi/linux/lsm.h) that properly supports multiple LSMs. > The original char ptr "secctx" approach worked back when only a single > LSM was supported at any given time, but now that multiple LSMs are > supported we need something richer, and it would be good to use this > new struct in any new userspace API. > > See the lsm_get_self_attr(2) syscall for an example (defined in > security/lsm_syscalls.c but effectively implemented via > security_getselfattr() in security/security.c). Thanks for the review, makes sense to me - I had a look at those examples but unfortunately it is getting a bit beyond my (very low) kernel skills, so I've dropped the string-based security_context from v2 but without adding something else, is there someone more familiar with the LSM world that could help implementing that side?
On Fri, Oct 4, 2024 at 2:48 PM Luca Boccassi <luca.boccassi@gmail.com> wrote: > On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote: > > On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote: ... > > [NOTE: please CC the LSM list on changes like this] > > > > Thanks Luca :) > > > > With the addition of the LSM syscalls we've created a lsm_ctx struct > > (see include/uapi/linux/lsm.h) that properly supports multiple LSMs. > > The original char ptr "secctx" approach worked back when only a single > > LSM was supported at any given time, but now that multiple LSMs are > > supported we need something richer, and it would be good to use this > > new struct in any new userspace API. > > > > See the lsm_get_self_attr(2) syscall for an example (defined in > > security/lsm_syscalls.c but effectively implemented via > > security_getselfattr() in security/security.c). > > Thanks for the review, makes sense to me - I had a look at those > examples but unfortunately it is getting a bit beyond my (very low) > kernel skills, so I've dropped the string-based security_context from > v2 but without adding something else, is there someone more familiar > with the LSM world that could help implementing that side? We are running a little short on devs/time in LSM land right now so I guess I'm the only real option (not that I have any time, but you know how it goes). If you can put together the ioctl side of things I can likely put together the LSM side fairly quickly - sound good? -- paul-moore.com
On Sat, 5 Oct 2024 at 17:06, Paul Moore <paul@paul-moore.com> wrote: > > On Fri, Oct 4, 2024 at 2:48 PM Luca Boccassi <luca.boccassi@gmail.com> wrote: > > On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote: > > > On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote: > > ... > > > > [NOTE: please CC the LSM list on changes like this] > > > > > > Thanks Luca :) > > > > > > With the addition of the LSM syscalls we've created a lsm_ctx struct > > > (see include/uapi/linux/lsm.h) that properly supports multiple LSMs. > > > The original char ptr "secctx" approach worked back when only a single > > > LSM was supported at any given time, but now that multiple LSMs are > > > supported we need something richer, and it would be good to use this > > > new struct in any new userspace API. > > > > > > See the lsm_get_self_attr(2) syscall for an example (defined in > > > security/lsm_syscalls.c but effectively implemented via > > > security_getselfattr() in security/security.c). > > > > Thanks for the review, makes sense to me - I had a look at those > > examples but unfortunately it is getting a bit beyond my (very low) > > kernel skills, so I've dropped the string-based security_context from > > v2 but without adding something else, is there someone more familiar > > with the LSM world that could help implementing that side? > > We are running a little short on devs/time in LSM land right now so I > guess I'm the only real option (not that I have any time, but you know > how it goes). If you can put together the ioctl side of things I can > likely put together the LSM side fairly quickly - sound good? Here's a skeleton ioctl, needs lsm-specific fields to be added to the new struct, and filled in the new function: diff --git a/fs/pidfs.c b/fs/pidfs.c index 4eaf30873105..30310489f5e1 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -115,6 +115,35 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) return poll_flags; } +static long pidfd_security(struct task_struct *task, unsigned int cmd, unsigned long arg) +{ + struct pidfd_security __user *usecurity = (struct pidfd_security __user *)arg; + size_t usize = _IOC_SIZE(cmd); + struct pidfd_security ksecurity = {}; + __u64 mask; + + if (!usecurity) + return -EINVAL; + if (usize < PIDFD_SECURITY_SIZE_VER0) + return -EINVAL; /* First version, no smaller struct possible */ + + if (copy_from_user(&mask, &usecurity->mask, sizeof(mask))) + return -EFAULT; + + // TODO: fill in ksecurity + + /* + * If userspace and the kernel have the same struct size it can just + * be copied. If userspace provides an older struct, only the bits that + * userspace knows about will be copied. If userspace provides a new + * struct, only the bits that the kernel knows about will be copied. + */ + if (copy_to_user(usecurity, &ksecurity, min(usize, sizeof(ksecurity)))) + return -EFAULT; + + return 0; +} + static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long arg) { struct pidfd_info __user *uinfo = (struct pidfd_info __user *)arg; @@ -160,7 +189,7 @@ static long pidfd_info(struct task_struct *task, unsigned int cmd, unsigned long * the flag only if we can still access it. */ rcu_read_lock(); - cgrp = task_dfl_cgroup(current); + cgrp = task_dfl_cgroup(task); if (cgrp) { kinfo.cgroupid = cgroup_id(cgrp); kinfo.mask |= PIDFD_INFO_CGROUPID; @@ -209,9 +238,11 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (!task) return -ESRCH; - /* Extensible IOCTL that does not open namespace FDs, take a shortcut */ + /* Extensible IOCTLs that do not open namespace FDs, take a shortcut */ if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_INFO)) return pidfd_info(task, cmd, arg); + if (_IOC_NR(cmd) == _IOC_NR(PIDFD_GET_SECURITY)) + return pidfd_security(task, cmd, arg); if (arg) return -EINVAL; diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 4540f6301b8c..0658880a9089 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -65,6 +65,14 @@ struct pidfd_info { __u32 spare0[1]; }; +#define PIDFD_SECURITY_SIZE_VER0 8 /* sizeof first published struct */ + +struct pidfd_security { + /* This mask follows the same rules as pidfd_info.mask. */ + __u64 mask; + // TODO: add data fields and flags and increase size defined above +}; + #define PIDFS_IOCTL_MAGIC 0xFF #define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) @@ -78,5 +86,6 @@ struct pidfd_info { #define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) #define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) #define PIDFD_GET_INFO _IOWR(PIDFS_IOCTL_MAGIC, 11, struct pidfd_info) +#define PIDFD_GET_SECURITY _IOWR(PIDFS_IOCTL_MAGIC, 12, struct pidfd_security) #endif /* _UAPI_LINUX_PIDFD_H */
On Wed, 23 Oct 2024 at 00:45, <luca.boccassi@gmail.com> wrote: > > On Sat, 5 Oct 2024 at 17:06, Paul Moore <paul@paul-moore.com> wrote: > > > > On Fri, Oct 4, 2024 at 2:48 PM Luca Boccassi <luca.boccassi@gmail.com> wrote: > > > On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote: > > > > On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote: > > > > ... > > > > > > [NOTE: please CC the LSM list on changes like this] > > > > > > > > Thanks Luca :) > > > > > > > > With the addition of the LSM syscalls we've created a lsm_ctx struct > > > > (see include/uapi/linux/lsm.h) that properly supports multiple LSMs. > > > > The original char ptr "secctx" approach worked back when only a single > > > > LSM was supported at any given time, but now that multiple LSMs are > > > > supported we need something richer, and it would be good to use this > > > > new struct in any new userspace API. > > > > > > > > See the lsm_get_self_attr(2) syscall for an example (defined in > > > > security/lsm_syscalls.c but effectively implemented via > > > > security_getselfattr() in security/security.c). > > > > > > Thanks for the review, makes sense to me - I had a look at those > > > examples but unfortunately it is getting a bit beyond my (very low) > > > kernel skills, so I've dropped the string-based security_context from > > > v2 but without adding something else, is there someone more familiar > > > with the LSM world that could help implementing that side? > > > > We are running a little short on devs/time in LSM land right now so I > > guess I'm the only real option (not that I have any time, but you know > > how it goes). If you can put together the ioctl side of things I can > > likely put together the LSM side fairly quickly - sound good? > > Here's a skeleton ioctl, needs lsm-specific fields to be added to the new struct, and filled in the new function: Forgot to mention, this is based on the vfs.pidfs branch of https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git
On Tue, Oct 22, 2024 at 7:56 PM Luca Boccassi <luca.boccassi@gmail.com> wrote: > On Wed, 23 Oct 2024 at 00:45, <luca.boccassi@gmail.com> wrote: > > On Sat, 5 Oct 2024 at 17:06, Paul Moore <paul@paul-moore.com> wrote: > > > On Fri, Oct 4, 2024 at 2:48 PM Luca Boccassi <luca.boccassi@gmail.com> wrote: > > > > On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote: > > > > > On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote: ... > > > We are running a little short on devs/time in LSM land right now so I > > > guess I'm the only real option (not that I have any time, but you know > > > how it goes). If you can put together the ioctl side of things I can > > > likely put together the LSM side fairly quickly - sound good? > > > > Here's a skeleton ioctl, needs lsm-specific fields to be added to the new struct, and filled in the new function: > > Forgot to mention, this is based on the vfs.pidfs branch of > https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git Thanks. I'll take a closer look at this next week. In the meantime, do you have this patch in a tree somewhere publicly fetchable? -- paul-moore.com
On Fri, 25 Oct 2024 at 00:14, Paul Moore <paul@paul-moore.com> wrote: > > On Tue, Oct 22, 2024 at 7:56 PM Luca Boccassi <luca.boccassi@gmail.com> wrote: > > On Wed, 23 Oct 2024 at 00:45, <luca.boccassi@gmail.com> wrote: > > > On Sat, 5 Oct 2024 at 17:06, Paul Moore <paul@paul-moore.com> wrote: > > > > On Fri, Oct 4, 2024 at 2:48 PM Luca Boccassi <luca.boccassi@gmail.com> wrote: > > > > > On Wed, 2 Oct 2024 at 15:48, Paul Moore <paul@paul-moore.com> wrote: > > > > > > On Wed, Oct 2, 2024 at 10:25 AM <luca.boccassi@gmail.com> wrote: > > ... > > > > > We are running a little short on devs/time in LSM land right now so I > > > > guess I'm the only real option (not that I have any time, but you know > > > > how it goes). If you can put together the ioctl side of things I can > > > > likely put together the LSM side fairly quickly - sound good? > > > > > > Here's a skeleton ioctl, needs lsm-specific fields to be added to the new struct, and filled in the new function: > > > > Forgot to mention, this is based on the vfs.pidfs branch of > > https://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs.git > > Thanks. I'll take a closer look at this next week. In the meantime, > do you have this patch in a tree somewhere publicly fetchable? I've pushed it here: https://github.com/bluca/linux/tree/pidfd_ioctl_lsm
© 2016 - 2024 Red Hat, Inc.