rust/kernel/task.rs | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-)
(Note: This is not a bugfix, it just cleans up incorrect assumptions.)
Task::pid() and Task::group_leader() assume that task::pid and
task::group_leader remain constant until the task refcount drops to zero.
However, Linux has a special quirk where, when execve() is called by a
thread other than the thread group leader (the main thread), the thread
calling execve() swaps its identity with the thread group leader's,
becoming the new thread group leader. This means task::pid and
task::group_leader can't be assumed to be immutable for non-current tasks.
(The actual swapping of PIDs is implemented in exchange_tids(); the change
of leadership is in de_thread().)
For reference, you can see that accessing the ->group_leader of some random
task requires extra caution in the prlimit64() syscall, which grabs the
tasklist_lock and has a comment explaining that this is done to prevent
races with de_thread().
Signed-off-by: Jann Horn <jannh@google.com>
---
rust/kernel/task.rs | 34 ++++++++++++++++++----------------
1 file changed, 18 insertions(+), 16 deletions(-)
diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
index 49fad6de0674..989165116278 100644
--- a/rust/kernel/task.rs
+++ b/rust/kernel/task.rs
@@ -103,7 +103,7 @@ macro_rules! current {
unsafe impl Send for Task {}
// SAFETY: It's OK to access `Task` through shared references from other threads because we're
-// either accessing properties that don't change (e.g., `pid`, `group_leader`) or that are properly
+// either accessing properties that don't change or that are properly
// synchronised by C code (e.g., `signal_pending`).
unsafe impl Sync for Task {}
@@ -204,23 +204,13 @@ pub fn as_ptr(&self) -> *mut bindings::task_struct {
self.0.get()
}
- /// Returns the group leader of the given task.
- pub fn group_leader(&self) -> &Task {
- // SAFETY: The group leader of a task never changes after initialization, so reading this
- // field is not a data race.
- let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) };
-
- // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`,
- // and given that a task has a reference to its group leader, we know it must be valid for
- // the lifetime of the returned task reference.
- unsafe { &*ptr.cast() }
- }
-
/// Returns the PID of the given task.
pub fn pid(&self) -> Pid {
- // SAFETY: The pid of a task never changes after initialization, so reading this field is
- // not a data race.
- unsafe { *ptr::addr_of!((*self.as_ptr()).pid) }
+ // SAFETY: The pid of a task almost never changes after initialization,
+ // so reading this field is usually not a data race.
+ // The exception is a race where the task is part of a process that
+ // goes through execve(), see exchange_tids().
+ unsafe { ptr::addr_of!((*self.as_ptr()).pid).read_volatile() }
}
/// Returns the UID of the given task.
@@ -345,6 +335,18 @@ pub fn active_pid_ns(&self) -> Option<&PidNamespace> {
// `release_task()` call.
Some(unsafe { PidNamespace::from_ptr(active_ns) })
}
+
+ /// Returns the group leader of the current task.
+ pub fn group_leader(&self) -> &Task {
+ // SAFETY: The group leader of the current task never changes in syscall
+ // context (except in the implementation of execve()).
+ let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) };
+
+ // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`,
+ // and given that a task has a reference to its group leader, we know it must be valid for
+ // the lifetime of the returned task reference.
+ unsafe { &*ptr.cast() }
+ }
}
// SAFETY: The type invariants guarantee that `Task` is always refcounted.
---
base-commit: 192c0159402e6bfbe13de6f8379546943297783d
change-id: 20260212-rust-de_thread-0ad9154aedb0
--
Jann Horn <jannh@google.com>
On Thu, Feb 12, 2026 at 05:44:15PM +0100, Jann Horn wrote:
> (Note: This is not a bugfix, it just cleans up incorrect assumptions.)
>
> Task::pid() and Task::group_leader() assume that task::pid and
> task::group_leader remain constant until the task refcount drops to zero.
>
> However, Linux has a special quirk where, when execve() is called by a
> thread other than the thread group leader (the main thread), the thread
> calling execve() swaps its identity with the thread group leader's,
> becoming the new thread group leader. This means task::pid and
> task::group_leader can't be assumed to be immutable for non-current tasks.
> (The actual swapping of PIDs is implemented in exchange_tids(); the change
> of leadership is in de_thread().)
>
> For reference, you can see that accessing the ->group_leader of some random
> task requires extra caution in the prlimit64() syscall, which grabs the
> tasklist_lock and has a comment explaining that this is done to prevent
> races with de_thread().
>
Thank you for the patch. I think it's actually a fix to the API, so we
need to Cc: stable here. Could you also split the patches into two? One
is moving the `group_leader()` into `CurrentTask` and the other is using
*atomic load* to read `pid` (see below)?
> Signed-off-by: Jann Horn <jannh@google.com>
> ---
> rust/kernel/task.rs | 34 ++++++++++++++++++----------------
> 1 file changed, 18 insertions(+), 16 deletions(-)
>
> diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
> index 49fad6de0674..989165116278 100644
> --- a/rust/kernel/task.rs
> +++ b/rust/kernel/task.rs
> @@ -103,7 +103,7 @@ macro_rules! current {
> unsafe impl Send for Task {}
>
> // SAFETY: It's OK to access `Task` through shared references from other threads because we're
> -// either accessing properties that don't change (e.g., `pid`, `group_leader`) or that are properly
> +// either accessing properties that don't change or that are properly
> // synchronised by C code (e.g., `signal_pending`).
> unsafe impl Sync for Task {}
>
> @@ -204,23 +204,13 @@ pub fn as_ptr(&self) -> *mut bindings::task_struct {
> self.0.get()
> }
>
> - /// Returns the group leader of the given task.
> - pub fn group_leader(&self) -> &Task {
> - // SAFETY: The group leader of a task never changes after initialization, so reading this
> - // field is not a data race.
> - let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) };
> -
> - // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`,
> - // and given that a task has a reference to its group leader, we know it must be valid for
> - // the lifetime of the returned task reference.
> - unsafe { &*ptr.cast() }
> - }
> -
> /// Returns the PID of the given task.
> pub fn pid(&self) -> Pid {
> - // SAFETY: The pid of a task never changes after initialization, so reading this field is
> - // not a data race.
> - unsafe { *ptr::addr_of!((*self.as_ptr()).pid) }
> + // SAFETY: The pid of a task almost never changes after initialization,
> + // so reading this field is usually not a data race.
> + // The exception is a race where the task is part of a process that
> + // goes through execve(), see exchange_tids().
> + unsafe { ptr::addr_of!((*self.as_ptr()).pid).read_volatile() }
Please use
Atomic::from_ptr(&raw const (*self.as_ptr()).pid).load(Relaxed)
here. Or maybe you want to use `atomic_load()` [1]? We should avoid
using arbitrary `read_volatile()`.
[1]: https://lore.kernel.org/rust-for-linux/20260120115207.55318-3-boqun.feng@gmail.com/
Regards,
Boqun
> }
>
> /// Returns the UID of the given task.
> @@ -345,6 +335,18 @@ pub fn active_pid_ns(&self) -> Option<&PidNamespace> {
> // `release_task()` call.
> Some(unsafe { PidNamespace::from_ptr(active_ns) })
> }
> +
> + /// Returns the group leader of the current task.
> + pub fn group_leader(&self) -> &Task {
> + // SAFETY: The group leader of the current task never changes in syscall
> + // context (except in the implementation of execve()).
> + let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) };
> +
> + // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`,
> + // and given that a task has a reference to its group leader, we know it must be valid for
> + // the lifetime of the returned task reference.
> + unsafe { &*ptr.cast() }
> + }
> }
>
> // SAFETY: The type invariants guarantee that `Task` is always refcounted.
>
> ---
> base-commit: 192c0159402e6bfbe13de6f8379546943297783d
> change-id: 20260212-rust-de_thread-0ad9154aedb0
>
> --
> Jann Horn <jannh@google.com>
>
On Thu, Feb 12, 2026 at 6:13 PM Boqun Feng <boqun@kernel.org> wrote:
> On Thu, Feb 12, 2026 at 05:44:15PM +0100, Jann Horn wrote:
> > (Note: This is not a bugfix, it just cleans up incorrect assumptions.)
> >
> > Task::pid() and Task::group_leader() assume that task::pid and
> > task::group_leader remain constant until the task refcount drops to zero.
> >
> > However, Linux has a special quirk where, when execve() is called by a
> > thread other than the thread group leader (the main thread), the thread
> > calling execve() swaps its identity with the thread group leader's,
> > becoming the new thread group leader. This means task::pid and
> > task::group_leader can't be assumed to be immutable for non-current tasks.
> > (The actual swapping of PIDs is implemented in exchange_tids(); the change
> > of leadership is in de_thread().)
> >
> > For reference, you can see that accessing the ->group_leader of some random
> > task requires extra caution in the prlimit64() syscall, which grabs the
> > tasklist_lock and has a comment explaining that this is done to prevent
> > races with de_thread().
> >
>
> Thank you for the patch. I think it's actually a fix to the API, so we
> need to Cc: stable here.
I don't think "a fix to the API" (when talking to a kernel-internal
API) is a reason why a patch has to go into stable.
https://docs.kernel.org/process/stable-kernel-rules.html says: "It
must either fix a real bug that bothers people or just add a device
ID."
In this case, I don't think the change will cause any actual change in
userspace-visible behavior; the group_leader() change clearly has no
effect on behavior because all existing users use it on `current`, and
the pid() change also seems very unlikely to actually change compiler
output in a way that makes things less safe.
> Could you also split the patches into two? One
> is moving the `group_leader()` into `CurrentTask` and the other is using
> *atomic load* to read `pid` (see below)?
I figured these two changes were semantically related enough to go in
one patch, but sure, I'll split it up.
> > Signed-off-by: Jann Horn <jannh@google.com>
> > ---
> > rust/kernel/task.rs | 34 ++++++++++++++++++----------------
> > 1 file changed, 18 insertions(+), 16 deletions(-)
> >
> > diff --git a/rust/kernel/task.rs b/rust/kernel/task.rs
> > index 49fad6de0674..989165116278 100644
> > --- a/rust/kernel/task.rs
> > +++ b/rust/kernel/task.rs
> > @@ -103,7 +103,7 @@ macro_rules! current {
> > unsafe impl Send for Task {}
> >
> > // SAFETY: It's OK to access `Task` through shared references from other threads because we're
> > -// either accessing properties that don't change (e.g., `pid`, `group_leader`) or that are properly
> > +// either accessing properties that don't change or that are properly
> > // synchronised by C code (e.g., `signal_pending`).
> > unsafe impl Sync for Task {}
> >
> > @@ -204,23 +204,13 @@ pub fn as_ptr(&self) -> *mut bindings::task_struct {
> > self.0.get()
> > }
> >
> > - /// Returns the group leader of the given task.
> > - pub fn group_leader(&self) -> &Task {
> > - // SAFETY: The group leader of a task never changes after initialization, so reading this
> > - // field is not a data race.
> > - let ptr = unsafe { *ptr::addr_of!((*self.as_ptr()).group_leader) };
> > -
> > - // SAFETY: The lifetime of the returned task reference is tied to the lifetime of `self`,
> > - // and given that a task has a reference to its group leader, we know it must be valid for
> > - // the lifetime of the returned task reference.
> > - unsafe { &*ptr.cast() }
> > - }
> > -
> > /// Returns the PID of the given task.
> > pub fn pid(&self) -> Pid {
> > - // SAFETY: The pid of a task never changes after initialization, so reading this field is
> > - // not a data race.
> > - unsafe { *ptr::addr_of!((*self.as_ptr()).pid) }
> > + // SAFETY: The pid of a task almost never changes after initialization,
> > + // so reading this field is usually not a data race.
> > + // The exception is a race where the task is part of a process that
> > + // goes through execve(), see exchange_tids().
> > + unsafe { ptr::addr_of!((*self.as_ptr()).pid).read_volatile() }
>
> Please use
>
> Atomic::from_ptr(&raw const (*self.as_ptr()).pid).load(Relaxed)
>
> here. Or maybe you want to use `atomic_load()` [1]? We should avoid
> using arbitrary `read_volatile()`.
Ack, I'll change it to a relaxed load.
> [1]: https://lore.kernel.org/rust-for-linux/20260120115207.55318-3-boqun.feng@gmail.com/
© 2016 - 2026 Red Hat, Inc.