The means by which a pid is determined from a pidfd is duplicated, with
some callers holding a reference to the (pid)fd, and others explicitly
pinning the pid.
Introduce __pidfd_get_pid() which abstracts both approaches and provide
optional output parameters for file->f_flags and the fd (the latter of
which, if provided, prevents the function from decrementing the fd's
refernce count).
Additionally, allow the ability to open a pidfd by opening a /proc/<pid>
directory, utilised by the pidfd_send_signal() system call, providing a
pidfd_get_pid_proc() helper function to do so.
Doing this allows us to eliminate open-coded pidfd pid lookup and to
consistently handle this in one place.
This lays the groundwork for a subsequent patch which adds a new sentinel
pidfd to explicitly reference the current process (i.e. thread group
leader) without the need for a pidfd.
Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
---
include/linux/pid.h | 42 +++++++++++++++++++++++++++++++-
kernel/pid.c | 58 ++++++++++++++++++++++++++++++---------------
kernel/signal.c | 22 ++++-------------
3 files changed, 84 insertions(+), 38 deletions(-)
diff --git a/include/linux/pid.h b/include/linux/pid.h
index a3aad9b4074c..68b02eab7509 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -2,6 +2,7 @@
#ifndef _LINUX_PID_H
#define _LINUX_PID_H
+#include <linux/file.h>
#include <linux/pid_types.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
@@ -72,8 +73,47 @@ extern struct pid init_struct_pid;
struct file;
+
+/**
+ * __pidfd_get_pid() - Retrieve a pid associated with the specified pidfd.
+ *
+ * @pidfd: The pidfd whose pid we want, or the fd of a /proc/<pid> file if
+ * @alloc_proc is also set.
+ * @pin_pid: If set, then the reference counter of the returned pid is
+ * incremented. If not set, then @fd should be provided to pin the
+ * pidfd.
+ * @allow_proc: If set, then an fd of a /proc/<pid> file can be passed instead
+ * of a pidfd, and this will be used to determine the pid.
+ * @flags: Output variable, if non-NULL, then the file->f_flags of the
+ * pidfd will be set here.
+ * @fd: Output variable, if non-NULL, then the pidfd reference will
+ * remain elevated and the caller will need to decrement it
+ * themselves.
+ *
+ * Returns: If successful, the pid associated with the pidfd, otherwise an
+ * error.
+ */
+struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid,
+ bool allow_proc, unsigned int *flags,
+ struct fd *fd);
+
+static inline struct pid *pidfd_get_pid(unsigned int pidfd, unsigned int *flags)
+{
+ return __pidfd_get_pid(pidfd, /* pin_pid = */ true,
+ /* allow_proc = */ false,
+ flags, /* fd = */ NULL);
+}
+
+static inline struct pid *pidfd_to_pid_proc(unsigned int pidfd,
+ unsigned int *flags,
+ struct fd *fd)
+{
+ return __pidfd_get_pid(pidfd, /* pin_pid = */ false,
+ /* allow_proc = */ true,
+ flags, fd);
+}
+
struct pid *pidfd_pid(const struct file *file);
-struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags);
struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags);
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret);
void do_notify_pidfd(struct task_struct *task);
diff --git a/kernel/pid.c b/kernel/pid.c
index 2715afb77eab..25cc1c36a1b1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -36,6 +36,7 @@
#include <linux/pid_namespace.h>
#include <linux/init_task.h>
#include <linux/syscalls.h>
+#include <linux/proc_fs.h>
#include <linux/proc_ns.h>
#include <linux/refcount.h>
#include <linux/anon_inodes.h>
@@ -534,22 +535,46 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
}
EXPORT_SYMBOL_GPL(find_ge_pid);
-struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
+struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid,
+ bool allow_proc, unsigned int *flags,
+ struct fd *fd)
{
- struct fd f;
+ struct file *file;
struct pid *pid;
+ struct fd f = fdget(pidfd);
- f = fdget(fd);
- if (!fd_file(f))
+ file = fd_file(f);
+ if (!file)
return ERR_PTR(-EBADF);
- pid = pidfd_pid(fd_file(f));
- if (!IS_ERR(pid)) {
- get_pid(pid);
- *flags = fd_file(f)->f_flags;
+ pid = pidfd_pid(file);
+ /* If we allow opening a pidfd via /proc/<pid>, do so. */
+ if (IS_ERR(pid) && allow_proc)
+ pid = tgid_pidfd_to_pid(file);
+
+ if (IS_ERR(pid)) {
+ fdput(f);
+ return pid;
}
- fdput(f);
+ if (pin_pid)
+ get_pid(pid);
+ else
+ WARN_ON_ONCE(!fd); /* Nothing to keep pid/pidfd around? */
+
+ if (flags)
+ *flags = file->f_flags;
+
+ /*
+ * If the user provides an fd output then it will handle decrementing
+ * its reference counter.
+ */
+ if (fd)
+ *fd = f;
+ else
+ /* Otherwise we release it. */
+ fdput(f);
+
return pid;
}
@@ -747,23 +772,18 @@ SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd,
unsigned int, flags)
{
struct pid *pid;
- struct fd f;
int ret;
/* flags is currently unused - make sure it's unset */
if (flags)
return -EINVAL;
- f = fdget(pidfd);
- if (!fd_file(f))
- return -EBADF;
-
- pid = pidfd_pid(fd_file(f));
+ pid = pidfd_get_pid(pidfd, NULL);
if (IS_ERR(pid))
- ret = PTR_ERR(pid);
- else
- ret = pidfd_getfd(pid, fd);
+ return PTR_ERR(pid);
- fdput(f);
+ ret = pidfd_getfd(pid, fd);
+
+ put_pid(pid);
return ret;
}
diff --git a/kernel/signal.c b/kernel/signal.c
index 4344860ffcac..868bfa674c62 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3875,17 +3875,6 @@ static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo,
return copy_siginfo_from_user(kinfo, info);
}
-static struct pid *pidfd_to_pid(const struct file *file)
-{
- struct pid *pid;
-
- pid = pidfd_pid(file);
- if (!IS_ERR(pid))
- return pid;
-
- return tgid_pidfd_to_pid(file);
-}
-
#define PIDFD_SEND_SIGNAL_FLAGS \
(PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP | \
PIDFD_SIGNAL_PROCESS_GROUP)
@@ -3908,10 +3897,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
siginfo_t __user *, info, unsigned int, flags)
{
int ret;
- struct fd f;
struct pid *pid;
kernel_siginfo_t kinfo;
enum pid_type type;
+ unsigned int f_flags;
+ struct fd f;
/* Enforce flags be set to 0 until we add an extension. */
if (flags & ~PIDFD_SEND_SIGNAL_FLAGS)
@@ -3921,12 +3911,8 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1)
return -EINVAL;
- f = fdget(pidfd);
- if (!fd_file(f))
- return -EBADF;
-
/* Is this a pidfd? */
- pid = pidfd_to_pid(fd_file(f));
+ pid = pidfd_to_pid_proc(pidfd, &f_flags, &f);
if (IS_ERR(pid)) {
ret = PTR_ERR(pid);
goto err;
@@ -3939,7 +3925,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig,
switch (flags) {
case 0:
/* Infer scope from the type of pidfd. */
- if (fd_file(f)->f_flags & PIDFD_THREAD)
+ if (f_flags & PIDFD_THREAD)
type = PIDTYPE_PID;
else
type = PIDTYPE_TGID;
--
2.46.2
On Fri, Oct 11, 2024 at 12:05:55PM +0100, Lorenzo Stoakes wrote: > The means by which a pid is determined from a pidfd is duplicated, with > some callers holding a reference to the (pid)fd, and others explicitly > pinning the pid. > > Introduce __pidfd_get_pid() which abstracts both approaches and provide > optional output parameters for file->f_flags and the fd (the latter of > which, if provided, prevents the function from decrementing the fd's > refernce count). > > Additionally, allow the ability to open a pidfd by opening a /proc/<pid> > directory, utilised by the pidfd_send_signal() system call, providing a > pidfd_get_pid_proc() helper function to do so. > > Doing this allows us to eliminate open-coded pidfd pid lookup and to > consistently handle this in one place. > > This lays the groundwork for a subsequent patch which adds a new sentinel > pidfd to explicitly reference the current process (i.e. thread group > leader) without the need for a pidfd. > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > --- > include/linux/pid.h | 42 +++++++++++++++++++++++++++++++- > kernel/pid.c | 58 ++++++++++++++++++++++++++++++--------------- > kernel/signal.c | 22 ++++------------- > 3 files changed, 84 insertions(+), 38 deletions(-) > > diff --git a/include/linux/pid.h b/include/linux/pid.h > index a3aad9b4074c..68b02eab7509 100644 > --- a/include/linux/pid.h > +++ b/include/linux/pid.h > @@ -2,6 +2,7 @@ > #ifndef _LINUX_PID_H > #define _LINUX_PID_H > > +#include <linux/file.h> > #include <linux/pid_types.h> > #include <linux/rculist.h> > #include <linux/rcupdate.h> > @@ -72,8 +73,47 @@ extern struct pid init_struct_pid; > > struct file; > > + > +/** > + * __pidfd_get_pid() - Retrieve a pid associated with the specified pidfd. > + * > + * @pidfd: The pidfd whose pid we want, or the fd of a /proc/<pid> file if > + * @alloc_proc is also set. > + * @pin_pid: If set, then the reference counter of the returned pid is > + * incremented. If not set, then @fd should be provided to pin the > + * pidfd. > + * @allow_proc: If set, then an fd of a /proc/<pid> file can be passed instead > + * of a pidfd, and this will be used to determine the pid. > + * @flags: Output variable, if non-NULL, then the file->f_flags of the > + * pidfd will be set here. > + * @fd: Output variable, if non-NULL, then the pidfd reference will > + * remain elevated and the caller will need to decrement it > + * themselves. > + * > + * Returns: If successful, the pid associated with the pidfd, otherwise an > + * error. > + */ > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > + bool allow_proc, unsigned int *flags, > + struct fd *fd); > + > +static inline struct pid *pidfd_get_pid(unsigned int pidfd, unsigned int *flags) > +{ > + return __pidfd_get_pid(pidfd, /* pin_pid = */ true, > + /* allow_proc = */ false, > + flags, /* fd = */ NULL); > +} > + > +static inline struct pid *pidfd_to_pid_proc(unsigned int pidfd, > + unsigned int *flags, > + struct fd *fd) > +{ > + return __pidfd_get_pid(pidfd, /* pin_pid = */ false, > + /* allow_proc = */ true, > + flags, fd); > +} > + > struct pid *pidfd_pid(const struct file *file); > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); > struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); > int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); > void do_notify_pidfd(struct task_struct *task); > diff --git a/kernel/pid.c b/kernel/pid.c > index 2715afb77eab..25cc1c36a1b1 100644 > --- a/kernel/pid.c > +++ b/kernel/pid.c > @@ -36,6 +36,7 @@ > #include <linux/pid_namespace.h> > #include <linux/init_task.h> > #include <linux/syscalls.h> > +#include <linux/proc_fs.h> > #include <linux/proc_ns.h> > #include <linux/refcount.h> > #include <linux/anon_inodes.h> > @@ -534,22 +535,46 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) > } > EXPORT_SYMBOL_GPL(find_ge_pid); > > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > + bool allow_proc, unsigned int *flags, > + struct fd *fd) Hm, we should never return a struct fd. A struct fd is an inherently scoped-bound concept - or at least aims to be. Simply put, we always want to have the fdget() and the fdput() in the same scope as the file pointer you can access via fd_file() is only valid as long as we're in the syscall. Ideally we mostly use CLASS(fd/fd_raw) and nearly never fdget(). The point is that this is the wrong api to expose. It would probably be wiser if you added a pidfd based fdget() inspired primitive.
On Wed, Oct 16, 2024 at 03:00:55PM +0200, Christian Brauner wrote: > On Fri, Oct 11, 2024 at 12:05:55PM +0100, Lorenzo Stoakes wrote: > > The means by which a pid is determined from a pidfd is duplicated, with > > some callers holding a reference to the (pid)fd, and others explicitly > > pinning the pid. > > > > Introduce __pidfd_get_pid() which abstracts both approaches and provide > > optional output parameters for file->f_flags and the fd (the latter of > > which, if provided, prevents the function from decrementing the fd's > > refernce count). > > > > Additionally, allow the ability to open a pidfd by opening a /proc/<pid> > > directory, utilised by the pidfd_send_signal() system call, providing a > > pidfd_get_pid_proc() helper function to do so. > > > > Doing this allows us to eliminate open-coded pidfd pid lookup and to > > consistently handle this in one place. > > > > This lays the groundwork for a subsequent patch which adds a new sentinel > > pidfd to explicitly reference the current process (i.e. thread group > > leader) without the need for a pidfd. > > > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > > --- > > include/linux/pid.h | 42 +++++++++++++++++++++++++++++++- > > kernel/pid.c | 58 ++++++++++++++++++++++++++++++--------------- > > kernel/signal.c | 22 ++++------------- > > 3 files changed, 84 insertions(+), 38 deletions(-) > > > > diff --git a/include/linux/pid.h b/include/linux/pid.h > > index a3aad9b4074c..68b02eab7509 100644 > > --- a/include/linux/pid.h > > +++ b/include/linux/pid.h > > @@ -2,6 +2,7 @@ > > #ifndef _LINUX_PID_H > > #define _LINUX_PID_H > > > > +#include <linux/file.h> > > #include <linux/pid_types.h> > > #include <linux/rculist.h> > > #include <linux/rcupdate.h> > > @@ -72,8 +73,47 @@ extern struct pid init_struct_pid; > > > > struct file; > > > > + > > +/** > > + * __pidfd_get_pid() - Retrieve a pid associated with the specified pidfd. > > + * > > + * @pidfd: The pidfd whose pid we want, or the fd of a /proc/<pid> file if > > + * @alloc_proc is also set. > > + * @pin_pid: If set, then the reference counter of the returned pid is > > + * incremented. If not set, then @fd should be provided to pin the > > + * pidfd. > > + * @allow_proc: If set, then an fd of a /proc/<pid> file can be passed instead > > + * of a pidfd, and this will be used to determine the pid. > > + * @flags: Output variable, if non-NULL, then the file->f_flags of the > > + * pidfd will be set here. > > + * @fd: Output variable, if non-NULL, then the pidfd reference will > > + * remain elevated and the caller will need to decrement it > > + * themselves. > > + * > > + * Returns: If successful, the pid associated with the pidfd, otherwise an > > + * error. > > + */ > > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > > + bool allow_proc, unsigned int *flags, > > + struct fd *fd); > > + > > +static inline struct pid *pidfd_get_pid(unsigned int pidfd, unsigned int *flags) > > +{ > > + return __pidfd_get_pid(pidfd, /* pin_pid = */ true, > > + /* allow_proc = */ false, > > + flags, /* fd = */ NULL); > > +} > > + > > +static inline struct pid *pidfd_to_pid_proc(unsigned int pidfd, > > + unsigned int *flags, > > + struct fd *fd) > > +{ > > + return __pidfd_get_pid(pidfd, /* pin_pid = */ false, > > + /* allow_proc = */ true, > > + flags, fd); > > +} > > + > > struct pid *pidfd_pid(const struct file *file); > > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); > > struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); > > int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); > > void do_notify_pidfd(struct task_struct *task); > > diff --git a/kernel/pid.c b/kernel/pid.c > > index 2715afb77eab..25cc1c36a1b1 100644 > > --- a/kernel/pid.c > > +++ b/kernel/pid.c > > @@ -36,6 +36,7 @@ > > #include <linux/pid_namespace.h> > > #include <linux/init_task.h> > > #include <linux/syscalls.h> > > +#include <linux/proc_fs.h> > > #include <linux/proc_ns.h> > > #include <linux/refcount.h> > > #include <linux/anon_inodes.h> > > @@ -534,22 +535,46 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) > > } > > EXPORT_SYMBOL_GPL(find_ge_pid); > > > > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) > > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > > + bool allow_proc, unsigned int *flags, > > + struct fd *fd) > > Hm, we should never return a struct fd. A struct fd is an inherently > scoped-bound concept - or at least aims to be. Simply put, we always > want to have the fdget() and the fdput() in the same scope as the file > pointer you can access via fd_file() is only valid as long as we're in > the syscall. > > Ideally we mostly use CLASS(fd/fd_raw) and nearly never fdget(). The > point is that this is the wrong api to expose. > > It would probably be wiser if you added a pidfd based fdget() inspired > primitive. I think we can actually probably just avoid passing it back and pin the pid instead of the fd, which keeps the scope as before and simplifies things generally. Let me experiment with that!
Hello, kernel test robot noticed "BUG:unable_to_handle_page_fault_for_address" on: commit: e65dbb5c9051a4da2305787fd558e1d60de2275a ("[PATCH v2 1/3] pidfd: extend pidfd_get_pid() and de-duplicate pid lookup") url: https://github.com/intel-lab-lkp/linux/commits/Lorenzo-Stoakes/pidfd-extend-pidfd_get_pid-and-de-duplicate-pid-lookup/20241011-191241 base: https://git.kernel.org/cgit/linux/kernel/git/shuah/linux-kselftest.git next patch link: https://lore.kernel.org/all/8e7edaf2f648fb01a71def749f17f76c0502dee1.1728643714.git.lorenzo.stoakes@oracle.com/ patch subject: [PATCH v2 1/3] pidfd: extend pidfd_get_pid() and de-duplicate pid lookup in testcase: trinity version: trinity-i386-abe9de86-1_20230429 with following parameters: runtime: 600s config: x86_64-randconfig-072-20241015 compiler: gcc-12 test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G (please refer to attached dmesg/kmsg for entire log/backtrace) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202410161634.abca3854-lkp@intel.com [ 416.054386][ T1959] BUG: unable to handle page fault for address: ffffffff8fed9474 [ 416.055651][ T1959] #PF: supervisor write access in kernel mode [ 416.056550][ T1959] #PF: error_code(0x0003) - permissions violation [ 416.057502][ T1959] PGD 3e90f5067 P4D 3e90f5067 PUD 3e90f6063 PMD 3e50001a1 [ 416.058587][ T1959] Oops: Oops: 0003 [#1] PREEMPT SMP KASAN [ 416.059414][ T1959] CPU: 1 UID: 65534 PID: 1959 Comm: trinity-c3 Not tainted 6.12.0-rc1-00004-ge65dbb5c9051 #1 d7a38916ac9252f968706afc2c77f70fbdabe689 [ 416.061328][ T1959] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 [ 416.062850][ T1959] RIP: 0010:fput (arch/x86/include/asm/atomic64_64.h:61 include/linux/atomic/atomic-arch-fallback.h:4404 include/linux/atomic/atomic-long.h:1571 include/linux/atomic/atomic-instrumented.h:4540 fs/file_table.c:482) [ 416.063578][ T1959] Code: ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 55 48 89 e5 41 55 41 54 53 48 89 fb be 08 00 00 00 e8 96 c6 f7 ff <f0> 48 ff 0b 0f 85 dd 00 00 00 65 4c 8b 25 04 ff 0e 70 4c 8d 6b 48 All code ======== 0: ff (bad) 1: ff 66 66 jmp *0x66(%rsi) 4: 2e 0f 1f 84 00 00 00 cs nopl 0x0(%rax,%rax,1) b: 00 00 d: 0f 1f 00 nopl (%rax) 10: f3 0f 1e fa endbr64 14: 55 push %rbp 15: 48 89 e5 mov %rsp,%rbp 18: 41 55 push %r13 1a: 41 54 push %r12 1c: 53 push %rbx 1d: 48 89 fb mov %rdi,%rbx 20: be 08 00 00 00 mov $0x8,%esi 25: e8 96 c6 f7 ff call 0xfffffffffff7c6c0 2a:* f0 48 ff 0b lock decq (%rbx) <-- trapping instruction 2e: 0f 85 dd 00 00 00 jne 0x111 34: 65 4c 8b 25 04 ff 0e mov %gs:0x700eff04(%rip),%r12 # 0x700eff40 3b: 70 3c: 4c 8d 6b 48 lea 0x48(%rbx),%r13 Code starting with the faulting instruction =========================================== 0: f0 48 ff 0b lock decq (%rbx) 4: 0f 85 dd 00 00 00 jne 0xe7 a: 65 4c 8b 25 04 ff 0e mov %gs:0x700eff04(%rip),%r12 # 0x700eff16 11: 70 12: 4c 8d 6b 48 lea 0x48(%rbx),%r13 [ 416.066250][ T1959] RSP: 0018:ffffc9000299fa70 EFLAGS: 00010246 [ 416.067156][ T1959] RAX: 0000000000000001 RBX: ffffffff8fed9474 RCX: 0000000000000000 [ 416.068377][ T1959] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 [ 416.069091][ T1980] module: module-autoload: duplicate request for module net-pf-12 [ 416.069532][ T1959] RBP: ffffc9000299fa88 R08: 0000000000000000 R09: 0000000000000000 [ 416.069538][ T1959] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 [ 416.069541][ T1959] R13: fffffffffffffff7 R14: ffffc9000299fb70 R15: dffffc0000000000 [ 416.078460][ T1959] FS: 0000000000000000(0000) GS:ffff8883a8500000(0063) knlGS:00000000f7ef8280 [ 416.079775][ T1959] CS: 0010 DS: 002b ES: 002b CR0: 0000000080050033 [ 416.080740][ T1959] CR2: ffffffff8fed9474 CR3: 0000000120fe6000 CR4: 00000000000406f0 [ 416.081938][ T1959] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 416.083156][ T1959] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 [ 416.084359][ T1959] Call Trace: [ 416.084939][ T1959] <TASK> [ 416.085461][ T1959] ? show_regs (arch/x86/kernel/dumpstack.c:479) [ 416.088241][ T1964] module: module-autoload: duplicate request for module net-pf-32 [ 416.089149][ T1959] ? __die (arch/x86/kernel/dumpstack.c:421 arch/x86/kernel/dumpstack.c:434) [ 416.089165][ T1959] ? __kasan_check_read (mm/kasan/shadow.c:31) [ 416.089175][ T1959] ? page_fault_oops (arch/x86/mm/fault.c:710) [ 416.092872][ T1959] ? fput (arch/x86/include/asm/atomic64_64.h:61 include/linux/atomic/atomic-arch-fallback.h:4404 include/linux/atomic/atomic-long.h:1571 include/linux/atomic/atomic-instrumented.h:4540 fs/file_table.c:482) [ 416.093516][ T1959] ? show_fault_oops (arch/x86/mm/fault.c:643) [ 416.094304][ T1959] ? fput (arch/x86/include/asm/atomic64_64.h:61 include/linux/atomic/atomic-arch-fallback.h:4404 include/linux/atomic/atomic-long.h:1571 include/linux/atomic/atomic-instrumented.h:4540 fs/file_table.c:482) [ 416.094957][ T1959] ? search_exception_tables (kernel/extable.c:64) [ 416.095760][ T1959] ? fixup_exception (arch/x86/mm/extable.c:320) [ 416.096496][ T1959] ? validate_chain (arch/x86/include/asm/bitops.h:227 arch/x86/include/asm/bitops.h:239 include/asm-generic/bitops/instrumented-non-atomic.h:142 kernel/locking/lockdep.c:228 kernel/locking/lockdep.c:3816 kernel/locking/lockdep.c:3872) [ 416.097269][ T1959] ? __kasan_check_read (mm/kasan/shadow.c:31) [ 416.098069][ T1959] ? kernelmode_fixup_or_oops+0x84/0xb0 [ 416.099036][ T1959] ? __kasan_check_read (mm/kasan/shadow.c:31) [ 416.099822][ T1959] ? __bad_area_nosemaphore (arch/x86/mm/fault.c:828) [ 416.100680][ T1959] ? __kasan_check_read (mm/kasan/shadow.c:32) [ 416.101464][ T1959] ? check_prev_add (kernel/locking/lockdep.c:3860) [ 416.102255][ T1959] ? __kasan_check_read (mm/kasan/shadow.c:31) [ 416.103036][ T1959] ? __kasan_check_read (mm/kasan/shadow.c:31) [ 416.103805][ T1959] ? bad_area_nosemaphore (arch/x86/mm/fault.c:835) [ 416.104574][ T1959] ? do_kern_addr_fault (arch/x86/mm/fault.c:862 arch/x86/mm/fault.c:881) [ 416.105445][ T1959] ? __kasan_check_read (mm/kasan/shadow.c:31) [ 416.106242][ T1959] ? do_kern_addr_fault (arch/x86/mm/fault.c:1199) [ 416.107023][ T1959] ? exc_page_fault (arch/x86/mm/fault.c:1479 arch/x86/mm/fault.c:1539) [ 416.107781][ T1959] ? asm_exc_page_fault (arch/x86/include/asm/idtentry.h:623) [ 416.108538][ T1959] ? __kasan_check_read (mm/kasan/shadow.c:31) [ 416.109332][ T1959] ? fput (arch/x86/include/asm/atomic64_64.h:61 include/linux/atomic/atomic-arch-fallback.h:4404 include/linux/atomic/atomic-long.h:1571 include/linux/atomic/atomic-instrumented.h:4540 fs/file_table.c:482) [ 416.110003][ T1959] __do_sys_pidfd_send_signal (kernel/signal.c:3968) [ 416.110881][ T1959] ? copy_siginfo_from_user32 (kernel/signal.c:3898) [ 416.111737][ T1959] ? __kasan_check_read (mm/kasan/shadow.c:32) [ 416.112533][ T1959] ? check_prev_add (kernel/locking/lockdep.c:3860) [ 416.113327][ T1959] __ia32_sys_pidfd_send_signal (kernel/signal.c:3896) [ 416.115877][ T1959] ? trace_hardirqs_on (kernel/trace/trace_preemptirq.c:63 (discriminator 22)) [ 416.116669][ T1959] ia32_sys_call (arch/x86/entry/syscall_32.c:44) [ 416.117417][ T1959] __do_fast_syscall_32 (arch/x86/entry/common.c:165 arch/x86/entry/common.c:386) [ 416.118224][ T1959] ? __lock_acquire (kernel/locking/lockdep.c:5202) [ 416.119009][ T1959] ? __task_pid_nr_ns (include/linux/rcupdate.h:337 include/linux/rcupdate.h:849 kernel/pid.c:511) [ 416.119778][ T1959] ? lock_acquire (include/trace/events/lock.h:24 kernel/locking/lockdep.c:5796) [ 416.120563][ T1959] ? __task_pid_nr_ns (include/linux/rcupdate.h:337 include/linux/rcupdate.h:849 kernel/pid.c:511) [ 416.121348][ T1959] ? find_held_lock (kernel/locking/lockdep.c:5315) [ 416.122114][ T1959] ? __lock_release+0x100/0x530 [ 416.122949][ T1959] ? __task_pid_nr_ns (include/linux/rcupdate.h:347 include/linux/rcupdate.h:880 kernel/pid.c:515) [ 416.123737][ T1959] ? reacquire_held_locks (kernel/locking/lockdep.c:5476) [ 416.124570][ T1959] ? __task_pid_nr_ns (include/linux/rcupdate.h:347 include/linux/rcupdate.h:880 kernel/pid.c:515) [ 416.125359][ T1959] ? syscall_exit_to_user_mode (include/linux/entry-common.h:321 kernel/entry/common.c:207 kernel/entry/common.c:218) [ 416.126234][ T1959] ? syscall_exit_to_user_mode (kernel/entry/common.c:221) [ 416.127090][ T1959] ? __do_fast_syscall_32 (arch/x86/entry/common.c:390) [ 416.127922][ T1959] ? syscall_exit_to_user_mode (kernel/entry/common.c:221) [ 416.128760][ T1959] ? __do_fast_syscall_32 (arch/x86/entry/common.c:390) [ 416.129599][ T1959] ? __do_fast_syscall_32 (arch/x86/entry/common.c:390) [ 416.130419][ T1959] do_fast_syscall_32 (arch/x86/entry/common.c:411) [ 416.131158][ T1959] do_SYSENTER_32 (arch/x86/entry/common.c:450) [ 416.131840][ T1959] entry_SYSENTER_compat_after_hwframe (arch/x86/entry/entry_64_compat.S:127) [ 416.132788][ T1959] RIP: 0023:0xf7efd579 [ 416.133446][ T1959] Code: b8 01 10 06 03 74 b4 01 10 07 03 74 b0 01 10 08 03 74 d8 01 00 00 00 00 00 00 00 00 00 00 00 00 00 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90 90 90 90 8d b4 26 00 00 00 00 8d b4 26 00 00 00 00 All code ======== 0: b8 01 10 06 03 mov $0x3061001,%eax 5: 74 b4 je 0xffffffffffffffbb 7: 01 10 add %edx,(%rax) 9: 07 (bad) a: 03 74 b0 01 add 0x1(%rax,%rsi,4),%esi e: 10 08 adc %cl,(%rax) 10: 03 74 d8 01 add 0x1(%rax,%rbx,8),%esi ... 20: 00 51 52 add %dl,0x52(%rcx) 23: 55 push %rbp 24:* 89 e5 mov %esp,%ebp <-- trapping instruction 26: 0f 34 sysenter 28: cd 80 int $0x80 2a: 5d pop %rbp 2b: 5a pop %rdx 2c: 59 pop %rcx 2d: c3 ret 2e: 90 nop 2f: 90 nop 30: 90 nop 31: 90 nop 32: 8d b4 26 00 00 00 00 lea 0x0(%rsi,%riz,1),%esi 39: 8d b4 26 00 00 00 00 lea 0x0(%rsi,%riz,1),%esi Code starting with the faulting instruction =========================================== 0: 5d pop %rbp 1: 5a pop %rdx 2: 59 pop %rcx 3: c3 ret 4: 90 nop 5: 90 nop 6: 90 nop 7: 90 nop 8: 8d b4 26 00 00 00 00 lea 0x0(%rsi,%riz,1),%esi f: 8d b4 26 00 00 00 00 lea 0x0(%rsi,%riz,1),%esi The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20241016/202410161634.abca3854-lkp@intel.com -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki
On Wed, Oct 16, 2024 at 04:50:56PM +0800, kernel test robot wrote: > > > Hello, > > kernel test robot noticed "BUG:unable_to_handle_page_fault_for_address" on: Thanks, see below for analysis. > > commit: e65dbb5c9051a4da2305787fd558e1d60de2275a ("[PATCH v2 1/3] pidfd: extend pidfd_get_pid() and de-duplicate pid lookup") > url: https://github.com/intel-lab-lkp/linux/commits/Lorenzo-Stoakes/pidfd-extend-pidfd_get_pid-and-de-duplicate-pid-lookup/20241011-191241 > base: https://git.kernel.org/cgit/linux/kernel/git/shuah/linux-kselftest.git next > patch link: https://lore.kernel.org/all/8e7edaf2f648fb01a71def749f17f76c0502dee1.1728643714.git.lorenzo.stoakes@oracle.com/ > patch subject: [PATCH v2 1/3] pidfd: extend pidfd_get_pid() and de-duplicate pid lookup > > in testcase: trinity > version: trinity-i386-abe9de86-1_20230429 > with following parameters: > > runtime: 600s > > > > config: x86_64-randconfig-072-20241015 > compiler: gcc-12 > test machine: qemu-system-x86_64 -enable-kvm -cpu SandyBridge -smp 2 -m 16G > > (please refer to attached dmesg/kmsg for entire log/backtrace) > > > > If you fix the issue in a separate patch/commit (i.e. not just a new version of > the same patch/commit), kindly add following tags > | Reported-by: kernel test robot <oliver.sang@intel.com> > | Closes: https://lore.kernel.org/oe-lkp/202410161634.abca3854-lkp@intel.com > > > [ 416.054386][ T1959] BUG: unable to handle page fault for address: ffffffff8fed9474 > [ 416.055651][ T1959] #PF: supervisor write access in kernel mode > [ 416.056550][ T1959] #PF: error_code(0x0003) - permissions violation > [ 416.057502][ T1959] PGD 3e90f5067 P4D 3e90f5067 PUD 3e90f6063 PMD 3e50001a1 > [ 416.058587][ T1959] Oops: Oops: 0003 [#1] PREEMPT SMP KASAN > [ 416.059414][ T1959] CPU: 1 UID: 65534 PID: 1959 Comm: trinity-c3 Not tainted 6.12.0-rc1-00004-ge65dbb5c9051 #1 d7a38916ac9252f968706afc2c77f70fbdabe689 > [ 416.061328][ T1959] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.2-debian-1.16.2-1 04/01/2014 > [ 416.062850][ T1959] RIP: 0010:fput (arch/x86/include/asm/atomic64_64.h:61 include/linux/atomic/atomic-arch-fallback.h:4404 include/linux/atomic/atomic-long.h:1571 include/linux/atomic/atomic-instrumented.h:4540 fs/file_table.c:482) > [ 416.063578][ T1959] Code: ff ff 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 00 f3 0f 1e fa 55 48 89 e5 41 55 41 54 53 48 89 fb be 08 00 00 00 e8 96 c6 f7 ff <f0> 48 ff 0b 0f 85 dd 00 00 00 65 4c 8b 25 04 ff 0e 70 4c 8d 6b 48 > All code > ======== > 0: ff (bad) > 1: ff 66 66 jmp *0x66(%rsi) > 4: 2e 0f 1f 84 00 00 00 cs nopl 0x0(%rax,%rax,1) > b: 00 00 > d: 0f 1f 00 nopl (%rax) > 10: f3 0f 1e fa endbr64 > 14: 55 push %rbp > 15: 48 89 e5 mov %rsp,%rbp > 18: 41 55 push %r13 > 1a: 41 54 push %r12 > 1c: 53 push %rbx > 1d: 48 89 fb mov %rdi,%rbx > 20: be 08 00 00 00 mov $0x8,%esi > 25: e8 96 c6 f7 ff call 0xfffffffffff7c6c0 > 2a:* f0 48 ff 0b lock decq (%rbx) <-- trapping instruction OK so this looks like the fput() invoking atomic_long_dec_and_test() on an invalid &file->f_count. It looks like 0xffffffff8fed9474 in RBX is the file... And that's because I'm not setting f in SYSCALL_DEFINE4(pidfd_send_signal, ...) at: pidfd_to_pid_proc(pidfd, &f_flags, &f); On error and yet then jump to err: fdput(f); return ret; Which is trying to fdput() (thus fput()) the f, ugh. OK I will fix this + respin, thanks for the report! [snip]
On Fri, Oct 11, 2024 at 4:06 AM Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote: > > The means by which a pid is determined from a pidfd is duplicated, with > some callers holding a reference to the (pid)fd, and others explicitly > pinning the pid. > > Introduce __pidfd_get_pid() which abstracts both approaches and provide > optional output parameters for file->f_flags and the fd (the latter of > which, if provided, prevents the function from decrementing the fd's > refernce count). > > Additionally, allow the ability to open a pidfd by opening a /proc/<pid> > directory, utilised by the pidfd_send_signal() system call, providing a > pidfd_get_pid_proc() helper function to do so. > > Doing this allows us to eliminate open-coded pidfd pid lookup and to > consistently handle this in one place. > > This lays the groundwork for a subsequent patch which adds a new sentinel > pidfd to explicitly reference the current process (i.e. thread group > leader) without the need for a pidfd. > > Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> > --- > include/linux/pid.h | 42 +++++++++++++++++++++++++++++++- > kernel/pid.c | 58 ++++++++++++++++++++++++++++++--------------- > kernel/signal.c | 22 ++++------------- > 3 files changed, 84 insertions(+), 38 deletions(-) > > diff --git a/include/linux/pid.h b/include/linux/pid.h > index a3aad9b4074c..68b02eab7509 100644 > --- a/include/linux/pid.h > +++ b/include/linux/pid.h > @@ -2,6 +2,7 @@ > #ifndef _LINUX_PID_H > #define _LINUX_PID_H > > +#include <linux/file.h> > #include <linux/pid_types.h> > #include <linux/rculist.h> > #include <linux/rcupdate.h> > @@ -72,8 +73,47 @@ extern struct pid init_struct_pid; > > struct file; > > + > +/** > + * __pidfd_get_pid() - Retrieve a pid associated with the specified pidfd. > + * > + * @pidfd: The pidfd whose pid we want, or the fd of a /proc/<pid> file if > + * @alloc_proc is also set. > + * @pin_pid: If set, then the reference counter of the returned pid is > + * incremented. If not set, then @fd should be provided to pin the > + * pidfd. > + * @allow_proc: If set, then an fd of a /proc/<pid> file can be passed instead > + * of a pidfd, and this will be used to determine the pid. > + * @flags: Output variable, if non-NULL, then the file->f_flags of the > + * pidfd will be set here. > + * @fd: Output variable, if non-NULL, then the pidfd reference will > + * remain elevated and the caller will need to decrement it > + * themselves. > + * > + * Returns: If successful, the pid associated with the pidfd, otherwise an > + * error. > + */ > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > + bool allow_proc, unsigned int *flags, > + struct fd *fd); > + > +static inline struct pid *pidfd_get_pid(unsigned int pidfd, unsigned int *flags) > +{ > + return __pidfd_get_pid(pidfd, /* pin_pid = */ true, > + /* allow_proc = */ false, > + flags, /* fd = */ NULL); > +} > + > +static inline struct pid *pidfd_to_pid_proc(unsigned int pidfd, > + unsigned int *flags, > + struct fd *fd) > +{ > + return __pidfd_get_pid(pidfd, /* pin_pid = */ false, > + /* allow_proc = */ true, > + flags, fd); > +} > + > struct pid *pidfd_pid(const struct file *file); > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); > struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); > int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); > void do_notify_pidfd(struct task_struct *task); > diff --git a/kernel/pid.c b/kernel/pid.c > index 2715afb77eab..25cc1c36a1b1 100644 > --- a/kernel/pid.c > +++ b/kernel/pid.c > @@ -36,6 +36,7 @@ > #include <linux/pid_namespace.h> > #include <linux/init_task.h> > #include <linux/syscalls.h> > +#include <linux/proc_fs.h> > #include <linux/proc_ns.h> > #include <linux/refcount.h> > #include <linux/anon_inodes.h> > @@ -534,22 +535,46 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns) > } > EXPORT_SYMBOL_GPL(find_ge_pid); > > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > + bool allow_proc, unsigned int *flags, > + struct fd *fd) > { > - struct fd f; > + struct file *file; > struct pid *pid; > + struct fd f = fdget(pidfd); > > - f = fdget(fd); > - if (!fd_file(f)) > + file = fd_file(f); > + if (!file) > return ERR_PTR(-EBADF); > > - pid = pidfd_pid(fd_file(f)); > - if (!IS_ERR(pid)) { > - get_pid(pid); > - *flags = fd_file(f)->f_flags; > + pid = pidfd_pid(file); > + /* If we allow opening a pidfd via /proc/<pid>, do so. */ > + if (IS_ERR(pid) && allow_proc) > + pid = tgid_pidfd_to_pid(file); > + > + if (IS_ERR(pid)) { > + fdput(f); > + return pid; > } > > - fdput(f); > + if (pin_pid) > + get_pid(pid); > + else > + WARN_ON_ONCE(!fd); /* Nothing to keep pid/pidfd around? */ > + > + if (flags) > + *flags = file->f_flags; > + > + /* > + * If the user provides an fd output then it will handle decrementing > + * its reference counter. > + */ > + if (fd) > + *fd = f; > + else > + /* Otherwise we release it. */ > + fdput(f); > + > return pid; > } There is an EXPORT_SYMBOL_GPL(pidfd_get_pid) right after this line. It should also be changed to EXPORT_SYMBOL_GPL(__pidfd_get_pid), otherwise __pidfd_get_pid() will not be exported. A module calling pidfd_get_pid() now inlined in the header file will try to call __pidfd_get_pid() and will have trouble resolving this symbol. > > @@ -747,23 +772,18 @@ SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd, > unsigned int, flags) > { > struct pid *pid; > - struct fd f; > int ret; > > /* flags is currently unused - make sure it's unset */ > if (flags) > return -EINVAL; > > - f = fdget(pidfd); > - if (!fd_file(f)) > - return -EBADF; > - > - pid = pidfd_pid(fd_file(f)); > + pid = pidfd_get_pid(pidfd, NULL); > if (IS_ERR(pid)) > - ret = PTR_ERR(pid); > - else > - ret = pidfd_getfd(pid, fd); > + return PTR_ERR(pid); > > - fdput(f); > + ret = pidfd_getfd(pid, fd); > + > + put_pid(pid); > return ret; > } > diff --git a/kernel/signal.c b/kernel/signal.c > index 4344860ffcac..868bfa674c62 100644 > --- a/kernel/signal.c > +++ b/kernel/signal.c > @@ -3875,17 +3875,6 @@ static int copy_siginfo_from_user_any(kernel_siginfo_t *kinfo, > return copy_siginfo_from_user(kinfo, info); > } > > -static struct pid *pidfd_to_pid(const struct file *file) > -{ > - struct pid *pid; > - > - pid = pidfd_pid(file); > - if (!IS_ERR(pid)) > - return pid; > - > - return tgid_pidfd_to_pid(file); > -} > - > #define PIDFD_SEND_SIGNAL_FLAGS \ > (PIDFD_SIGNAL_THREAD | PIDFD_SIGNAL_THREAD_GROUP | \ > PIDFD_SIGNAL_PROCESS_GROUP) > @@ -3908,10 +3897,11 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, > siginfo_t __user *, info, unsigned int, flags) > { > int ret; > - struct fd f; > struct pid *pid; > kernel_siginfo_t kinfo; > enum pid_type type; > + unsigned int f_flags; > + struct fd f; > > /* Enforce flags be set to 0 until we add an extension. */ > if (flags & ~PIDFD_SEND_SIGNAL_FLAGS) > @@ -3921,12 +3911,8 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, > if (hweight32(flags & PIDFD_SEND_SIGNAL_FLAGS) > 1) > return -EINVAL; > > - f = fdget(pidfd); > - if (!fd_file(f)) > - return -EBADF; > - > /* Is this a pidfd? */ > - pid = pidfd_to_pid(fd_file(f)); > + pid = pidfd_to_pid_proc(pidfd, &f_flags, &f); > if (IS_ERR(pid)) { > ret = PTR_ERR(pid); > goto err; > @@ -3939,7 +3925,7 @@ SYSCALL_DEFINE4(pidfd_send_signal, int, pidfd, int, sig, > switch (flags) { > case 0: > /* Infer scope from the type of pidfd. */ > - if (fd_file(f)->f_flags & PIDFD_THREAD) > + if (f_flags & PIDFD_THREAD) > type = PIDTYPE_PID; > else > type = PIDTYPE_TGID; > -- > 2.46.2 >
On Tue, Oct 15, 2024 at 12:40:41PM -0700, Suren Baghdasaryan wrote: [snip] > > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) > > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > > + bool allow_proc, unsigned int *flags, > > + struct fd *fd) > > { > > - struct fd f; > > + struct file *file; > > struct pid *pid; > > + struct fd f = fdget(pidfd); > > > > - f = fdget(fd); > > - if (!fd_file(f)) > > + file = fd_file(f); > > + if (!file) > > return ERR_PTR(-EBADF); > > > > - pid = pidfd_pid(fd_file(f)); > > - if (!IS_ERR(pid)) { > > - get_pid(pid); > > - *flags = fd_file(f)->f_flags; > > + pid = pidfd_pid(file); > > + /* If we allow opening a pidfd via /proc/<pid>, do so. */ > > + if (IS_ERR(pid) && allow_proc) > > + pid = tgid_pidfd_to_pid(file); > > + > > + if (IS_ERR(pid)) { > > + fdput(f); > > + return pid; > > } > > > > - fdput(f); > > + if (pin_pid) > > + get_pid(pid); > > + else > > + WARN_ON_ONCE(!fd); /* Nothing to keep pid/pidfd around? */ > > + > > + if (flags) > > + *flags = file->f_flags; > > + > > + /* > > + * If the user provides an fd output then it will handle decrementing > > + * its reference counter. > > + */ > > + if (fd) > > + *fd = f; > > + else > > + /* Otherwise we release it. */ > > + fdput(f); > > + > > return pid; > > } > > There is an EXPORT_SYMBOL_GPL(pidfd_get_pid) right after this line. It > should also be changed to EXPORT_SYMBOL_GPL(__pidfd_get_pid), > otherwise __pidfd_get_pid() will not be exported. A module calling > pidfd_get_pid() now inlined in the header file will try to call > __pidfd_get_pid() and will have trouble resolving this symbol. Hmm hang on not there isn't? I don't see that anywhere? [snip]
On Tue, Oct 15, 2024 at 11:05 PM Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote: > > On Tue, Oct 15, 2024 at 12:40:41PM -0700, Suren Baghdasaryan wrote: > [snip] > > > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) > > > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > > > + bool allow_proc, unsigned int *flags, > > > + struct fd *fd) > > > { > > > - struct fd f; > > > + struct file *file; > > > struct pid *pid; > > > + struct fd f = fdget(pidfd); > > > > > > - f = fdget(fd); > > > - if (!fd_file(f)) > > > + file = fd_file(f); > > > + if (!file) > > > return ERR_PTR(-EBADF); > > > > > > - pid = pidfd_pid(fd_file(f)); > > > - if (!IS_ERR(pid)) { > > > - get_pid(pid); > > > - *flags = fd_file(f)->f_flags; > > > + pid = pidfd_pid(file); > > > + /* If we allow opening a pidfd via /proc/<pid>, do so. */ > > > + if (IS_ERR(pid) && allow_proc) > > > + pid = tgid_pidfd_to_pid(file); > > > + > > > + if (IS_ERR(pid)) { > > > + fdput(f); > > > + return pid; > > > } > > > > > > - fdput(f); > > > + if (pin_pid) > > > + get_pid(pid); > > > + else > > > + WARN_ON_ONCE(!fd); /* Nothing to keep pid/pidfd around? */ > > > + > > > + if (flags) > > > + *flags = file->f_flags; > > > + > > > + /* > > > + * If the user provides an fd output then it will handle decrementing > > > + * its reference counter. > > > + */ > > > + if (fd) > > > + *fd = f; > > > + else > > > + /* Otherwise we release it. */ > > > + fdput(f); > > > + > > > return pid; > > > } > > > > There is an EXPORT_SYMBOL_GPL(pidfd_get_pid) right after this line. It > > should also be changed to EXPORT_SYMBOL_GPL(__pidfd_get_pid), > > otherwise __pidfd_get_pid() will not be exported. A module calling > > pidfd_get_pid() now inlined in the header file will try to call > > __pidfd_get_pid() and will have trouble resolving this symbol. > > Hmm hang on not there isn't? I don't see that anywhere? Doh! Sorry, I didn't realize the export was an out-of-tree Android change. Never mind... > > [snip]
On Wed, Oct 16, 2024 at 01:16:15AM -0700, Suren Baghdasaryan wrote: > On Tue, Oct 15, 2024 at 11:05 PM Lorenzo Stoakes > <lorenzo.stoakes@oracle.com> wrote: > > > > On Tue, Oct 15, 2024 at 12:40:41PM -0700, Suren Baghdasaryan wrote: > > [snip] > > > > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) > > > > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > > > > + bool allow_proc, unsigned int *flags, > > > > + struct fd *fd) > > > > { > > > > - struct fd f; > > > > + struct file *file; > > > > struct pid *pid; > > > > + struct fd f = fdget(pidfd); > > > > > > > > - f = fdget(fd); > > > > - if (!fd_file(f)) > > > > + file = fd_file(f); > > > > + if (!file) > > > > return ERR_PTR(-EBADF); > > > > > > > > - pid = pidfd_pid(fd_file(f)); > > > > - if (!IS_ERR(pid)) { > > > > - get_pid(pid); > > > > - *flags = fd_file(f)->f_flags; > > > > + pid = pidfd_pid(file); > > > > + /* If we allow opening a pidfd via /proc/<pid>, do so. */ > > > > + if (IS_ERR(pid) && allow_proc) > > > > + pid = tgid_pidfd_to_pid(file); > > > > + > > > > + if (IS_ERR(pid)) { > > > > + fdput(f); > > > > + return pid; > > > > } > > > > > > > > - fdput(f); > > > > + if (pin_pid) > > > > + get_pid(pid); > > > > + else > > > > + WARN_ON_ONCE(!fd); /* Nothing to keep pid/pidfd around? */ > > > > + > > > > + if (flags) > > > > + *flags = file->f_flags; > > > > + > > > > + /* > > > > + * If the user provides an fd output then it will handle decrementing > > > > + * its reference counter. > > > > + */ > > > > + if (fd) > > > > + *fd = f; > > > > + else > > > > + /* Otherwise we release it. */ > > > > + fdput(f); > > > > + > > > > return pid; > > > > } > > > > > > There is an EXPORT_SYMBOL_GPL(pidfd_get_pid) right after this line. It > > > should also be changed to EXPORT_SYMBOL_GPL(__pidfd_get_pid), > > > otherwise __pidfd_get_pid() will not be exported. A module calling > > > pidfd_get_pid() now inlined in the header file will try to call > > > __pidfd_get_pid() and will have trouble resolving this symbol. > > > > Hmm hang on not there isn't? I don't see that anywhere? > > Doh! Sorry, I didn't realize the export was an out-of-tree Android > change. Never mind... No probs :P just glad I didn't miss something in this series! Hey maybe a motivation to upstream some of this? ;) > > > > > [snip]
On Wed, Oct 16, 2024 at 1:22 AM Lorenzo Stoakes <lorenzo.stoakes@oracle.com> wrote: > > On Wed, Oct 16, 2024 at 01:16:15AM -0700, Suren Baghdasaryan wrote: > > On Tue, Oct 15, 2024 at 11:05 PM Lorenzo Stoakes > > <lorenzo.stoakes@oracle.com> wrote: > > > > > > On Tue, Oct 15, 2024 at 12:40:41PM -0700, Suren Baghdasaryan wrote: > > > [snip] > > > > > -struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags) > > > > > +struct pid *__pidfd_get_pid(unsigned int pidfd, bool pin_pid, > > > > > + bool allow_proc, unsigned int *flags, > > > > > + struct fd *fd) > > > > > { > > > > > - struct fd f; > > > > > + struct file *file; > > > > > struct pid *pid; > > > > > + struct fd f = fdget(pidfd); > > > > > > > > > > - f = fdget(fd); > > > > > - if (!fd_file(f)) > > > > > + file = fd_file(f); > > > > > + if (!file) > > > > > return ERR_PTR(-EBADF); > > > > > > > > > > - pid = pidfd_pid(fd_file(f)); > > > > > - if (!IS_ERR(pid)) { > > > > > - get_pid(pid); > > > > > - *flags = fd_file(f)->f_flags; > > > > > + pid = pidfd_pid(file); > > > > > + /* If we allow opening a pidfd via /proc/<pid>, do so. */ > > > > > + if (IS_ERR(pid) && allow_proc) > > > > > + pid = tgid_pidfd_to_pid(file); > > > > > + > > > > > + if (IS_ERR(pid)) { > > > > > + fdput(f); > > > > > + return pid; > > > > > } > > > > > > > > > > - fdput(f); > > > > > + if (pin_pid) > > > > > + get_pid(pid); > > > > > + else > > > > > + WARN_ON_ONCE(!fd); /* Nothing to keep pid/pidfd around? */ > > > > > + > > > > > + if (flags) > > > > > + *flags = file->f_flags; > > > > > + > > > > > + /* > > > > > + * If the user provides an fd output then it will handle decrementing > > > > > + * its reference counter. > > > > > + */ > > > > > + if (fd) > > > > > + *fd = f; > > > > > + else > > > > > + /* Otherwise we release it. */ > > > > > + fdput(f); > > > > > + > > > > > return pid; > > > > > } > > > > > > > > There is an EXPORT_SYMBOL_GPL(pidfd_get_pid) right after this line. It > > > > should also be changed to EXPORT_SYMBOL_GPL(__pidfd_get_pid), > > > > otherwise __pidfd_get_pid() will not be exported. A module calling > > > > pidfd_get_pid() now inlined in the header file will try to call > > > > __pidfd_get_pid() and will have trouble resolving this symbol. > > > > > > Hmm hang on not there isn't? I don't see that anywhere? > > > > Doh! Sorry, I didn't realize the export was an out-of-tree Android > > change. Never mind... > > No probs :P just glad I didn't miss something in this series! > > Hey maybe a motivation to upstream some of this? ;) I wish... Without an upstream user the exports are not accepted upstream and unfortunately Android vendors often resist upstreaming their modules. > > > > > > > > > [snip]
© 2016 - 2024 Red Hat, Inc.