[patch V3 16/18] posix-timers: Dont iterate /proc/$PID/timers with sighand:: Siglock held

Thomas Gleixner posted 18 patches 9 months, 2 weeks ago
[patch V3 16/18] posix-timers: Dont iterate /proc/$PID/timers with sighand:: Siglock held
Posted by Thomas Gleixner 9 months, 2 weeks ago
The readout of /proc/$PID/timers holds sighand::siglock with interrupts
disabled. That is required to protect against concurrent modifications of
the task::signal::posix_timers list because the list is not RCU safe.

With the conversion of the timer storage to a RCU protected hlist, this is
not longer required.

The only requirement is to protect the returned entry against a concurrent
free, which is trivial as the timers are RCU protected.

Removing the trylock of sighand::siglock is benign because the life time of
task_struct::signal is bound to the life time of the task_struct itself.

There are two scenarios where this matters:

  1) The process is life and not about to be checkpointed

  2) The process is stopped via ptrace for checkpointing

#1 is a racy snapshot of the armed timers and nothing can rely on it. It's
   not more than debug information and it has been that way before because
   sighand lock is dropped when the buffer is full and the restart of
   the iteration might find a completely different set of timers.

   The task and therefore task::signal cannot be freed as timers_start()
   acquired a reference count via get_pid_task().

#2 the process is stopped for checkpointing so nothing can delete or create
   timers at this point. Neither can the process exit during the traversal.

   If CRIU fails to observe an exit in progress prior to the dissimination
   of the timers, then there are more severe problems to solve in the CRIU
   mechanics as they can't rely on posix timers being enabled in the first
   place.

Therefore replace the lock acquisition with rcu_read_lock() and switch the
timer storage traversal over to seq_hlist_*_rcu().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

---
 fs/proc/base.c |   48 ++++++++++++++++++++----------------------------
 1 file changed, 20 insertions(+), 28 deletions(-)

--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2497,11 +2497,9 @@ static const struct file_operations proc
 
 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
 struct timers_private {
-	struct pid *pid;
-	struct task_struct *task;
-	struct sighand_struct *sighand;
-	struct pid_namespace *ns;
-	unsigned long flags;
+	struct pid		*pid;
+	struct task_struct	*task;
+	struct pid_namespace	*ns;
 };
 
 static void *timers_start(struct seq_file *m, loff_t *pos)
@@ -2512,54 +2510,48 @@ static void *timers_start(struct seq_fil
 	if (!tp->task)
 		return ERR_PTR(-ESRCH);
 
-	tp->sighand = lock_task_sighand(tp->task, &tp->flags);
-	if (!tp->sighand)
-		return ERR_PTR(-ESRCH);
-
-	return seq_hlist_start(&tp->task->signal->posix_timers, *pos);
+	rcu_read_lock();
+	return seq_hlist_start_rcu(&tp->task->signal->posix_timers, *pos);
 }
 
 static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct timers_private *tp = m->private;
-	return seq_hlist_next(v, &tp->task->signal->posix_timers, pos);
+
+	return seq_hlist_next_rcu(v, &tp->task->signal->posix_timers, pos);
 }
 
 static void timers_stop(struct seq_file *m, void *v)
 {
 	struct timers_private *tp = m->private;
 
-	if (tp->sighand) {
-		unlock_task_sighand(tp->task, &tp->flags);
-		tp->sighand = NULL;
-	}
-
 	if (tp->task) {
 		put_task_struct(tp->task);
 		tp->task = NULL;
+		rcu_read_unlock();
 	}
 }
 
 static int show_timer(struct seq_file *m, void *v)
 {
-	struct k_itimer *timer;
-	struct timers_private *tp = m->private;
-	int notify;
 	static const char * const nstr[] = {
-		[SIGEV_SIGNAL] = "signal",
-		[SIGEV_NONE] = "none",
-		[SIGEV_THREAD] = "thread",
+		[SIGEV_SIGNAL]	= "signal",
+		[SIGEV_NONE]	= "none",
+		[SIGEV_THREAD]	= "thread",
 	};
 
-	timer = hlist_entry((struct hlist_node *)v, struct k_itimer, list);
-	notify = timer->it_sigev_notify;
+	struct k_itimer *timer = hlist_entry((struct hlist_node *)v, struct k_itimer, list);
+	struct timers_private *tp = m->private;
+	int notify = timer->it_sigev_notify;
+
+	guard(spinlock_irq)(&timer->it_lock);
+	if (!posixtimer_valid(timer))
+		return 0;
 
 	seq_printf(m, "ID: %d\n", timer->it_id);
-	seq_printf(m, "signal: %d/%px\n",
-		   timer->sigq.info.si_signo,
+	seq_printf(m, "signal: %d/%px\n", timer->sigq.info.si_signo,
 		   timer->sigq.info.si_value.sival_ptr);
-	seq_printf(m, "notify: %s/%s.%d\n",
-		   nstr[notify & ~SIGEV_THREAD_ID],
+	seq_printf(m, "notify: %s/%s.%d\n", nstr[notify & ~SIGEV_THREAD_ID],
 		   (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
 		   pid_nr_ns(timer->it_pid, tp->ns));
 	seq_printf(m, "ClockID: %d\n", timer->it_clock);
Re: [patch V3 16/18] posix-timers: Dont iterate /proc/$PID/timers with sighand:: Siglock held
Posted by Frederic Weisbecker 9 months, 1 week ago
Le Sat, Mar 08, 2025 at 05:48:45PM +0100, Thomas Gleixner a écrit :
> The readout of /proc/$PID/timers holds sighand::siglock with interrupts
> disabled. That is required to protect against concurrent modifications of
> the task::signal::posix_timers list because the list is not RCU safe.
> 
> With the conversion of the timer storage to a RCU protected hlist, this is
> not longer required.
> 
> The only requirement is to protect the returned entry against a concurrent
> free, which is trivial as the timers are RCU protected.
> 
> Removing the trylock of sighand::siglock is benign because the life time of
> task_struct::signal is bound to the life time of the task_struct itself.
> 
> There are two scenarios where this matters:
> 
>   1) The process is life and not about to be checkpointed
> 
>   2) The process is stopped via ptrace for checkpointing
> 
> #1 is a racy snapshot of the armed timers and nothing can rely on it. It's
>    not more than debug information and it has been that way before because
>    sighand lock is dropped when the buffer is full and the restart of
>    the iteration might find a completely different set of timers.
> 
>    The task and therefore task::signal cannot be freed as timers_start()
>    acquired a reference count via get_pid_task().
> 
> #2 the process is stopped for checkpointing so nothing can delete or create
>    timers at this point. Neither can the process exit during the traversal.
> 
>    If CRIU fails to observe an exit in progress prior to the dissimination
>    of the timers, then there are more severe problems to solve in the CRIU
>    mechanics as they can't rely on posix timers being enabled in the first
>    place.
> 
> Therefore replace the lock acquisition with rcu_read_lock() and switch the
> timer storage traversal over to seq_hlist_*_rcu().
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Re: [patch V3 16/18] posix-timers: Dont iterate /proc/$PID/timers with sighand:: Siglock held
Posted by Cyrill Gorcunov 9 months, 2 weeks ago
On Sat, Mar 08, 2025 at 05:48:45PM +0100, Thomas Gleixner wrote:
...
>  
>  static int show_timer(struct seq_file *m, void *v)
>  {
> -	struct k_itimer *timer;
> -	struct timers_private *tp = m->private;
> -	int notify;
>  	static const char * const nstr[] = {
> +		[SIGEV_SIGNAL]	= "signal",
> +		[SIGEV_NONE]	= "none",
> +		[SIGEV_THREAD]	= "thread",
>  	};
>  
...
> -	seq_printf(m, "notify: %s/%s.%d\n",
> -		   nstr[notify & ~SIGEV_THREAD_ID],
> +	seq_printf(m, "notify: %s/%s.%d\n", nstr[notify & ~SIGEV_THREAD_ID],
>  		   (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
>  		   pid_nr_ns(timer->it_pid, tp->ns));
...

Btw this nstr[notify & ~SIGEV_THREAD_ID] has been always fishy since ~SIGEV_THREAD_ID
doesn't give a proper mask over nstr size :-) It just happen to work but if for some
reason ::it_sigev_notify get screwed we will get a surprise. I think later (not in this
series) we better provide an explicit bitwise mask here.

	Cyrill
[tip: timers/core] posix-timers: Dont iterate /proc/$PID/timers with sighand:: Siglock held
Posted by tip-bot2 for Thomas Gleixner 9 months, 1 week ago
The following commit has been merged into the timers/core branch of tip:

Commit-ID:     2dc4dbf89cf186639c25c1b04a07c11496f060ad
Gitweb:        https://git.kernel.org/tip/2dc4dbf89cf186639c25c1b04a07c11496f060ad
Author:        Thomas Gleixner <tglx@linutronix.de>
AuthorDate:    Sat, 08 Mar 2025 17:48:45 +01:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Thu, 13 Mar 2025 12:07:18 +01:00

posix-timers: Dont iterate /proc/$PID/timers with sighand:: Siglock held

The readout of /proc/$PID/timers holds sighand::siglock with interrupts
disabled. That is required to protect against concurrent modifications of
the task::signal::posix_timers list because the list is not RCU safe.

With the conversion of the timer storage to a RCU protected hlist, this is
not longer required.

The only requirement is to protect the returned entry against a concurrent
free, which is trivial as the timers are RCU protected.

Removing the trylock of sighand::siglock is benign because the life time of
task_struct::signal is bound to the life time of the task_struct itself.

There are two scenarios where this matters:

  1) The process is life and not about to be checkpointed

  2) The process is stopped via ptrace for checkpointing

#1 is a racy snapshot of the armed timers and nothing can rely on it. It's
   not more than debug information and it has been that way before because
   sighand lock is dropped when the buffer is full and the restart of
   the iteration might find a completely different set of timers.

   The task and therefore task::signal cannot be freed as timers_start()
   acquired a reference count via get_pid_task().

#2 the process is stopped for checkpointing so nothing can delete or create
   timers at this point. Neither can the process exit during the traversal.

   If CRIU fails to observe an exit in progress prior to the dissimination
   of the timers, then there are more severe problems to solve in the CRIU
   mechanics as they can't rely on posix timers being enabled in the first
   place.

Therefore replace the lock acquisition with rcu_read_lock() and switch the
timer storage traversal over to seq_hlist_*_rcu().

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
Link: https://lore.kernel.org/all/20250308155624.465175807@linutronix.de


---
 fs/proc/base.c | 48 ++++++++++++++++++++----------------------------
 1 file changed, 20 insertions(+), 28 deletions(-)

diff --git a/fs/proc/base.c b/fs/proc/base.c
index cd89e95..5a1d682 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2497,11 +2497,9 @@ static const struct file_operations proc_map_files_operations = {
 
 #if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS)
 struct timers_private {
-	struct pid *pid;
-	struct task_struct *task;
-	struct sighand_struct *sighand;
-	struct pid_namespace *ns;
-	unsigned long flags;
+	struct pid		*pid;
+	struct task_struct	*task;
+	struct pid_namespace	*ns;
 };
 
 static void *timers_start(struct seq_file *m, loff_t *pos)
@@ -2512,54 +2510,48 @@ static void *timers_start(struct seq_file *m, loff_t *pos)
 	if (!tp->task)
 		return ERR_PTR(-ESRCH);
 
-	tp->sighand = lock_task_sighand(tp->task, &tp->flags);
-	if (!tp->sighand)
-		return ERR_PTR(-ESRCH);
-
-	return seq_hlist_start(&tp->task->signal->posix_timers, *pos);
+	rcu_read_lock();
+	return seq_hlist_start_rcu(&tp->task->signal->posix_timers, *pos);
 }
 
 static void *timers_next(struct seq_file *m, void *v, loff_t *pos)
 {
 	struct timers_private *tp = m->private;
-	return seq_hlist_next(v, &tp->task->signal->posix_timers, pos);
+
+	return seq_hlist_next_rcu(v, &tp->task->signal->posix_timers, pos);
 }
 
 static void timers_stop(struct seq_file *m, void *v)
 {
 	struct timers_private *tp = m->private;
 
-	if (tp->sighand) {
-		unlock_task_sighand(tp->task, &tp->flags);
-		tp->sighand = NULL;
-	}
-
 	if (tp->task) {
 		put_task_struct(tp->task);
 		tp->task = NULL;
+		rcu_read_unlock();
 	}
 }
 
 static int show_timer(struct seq_file *m, void *v)
 {
-	struct k_itimer *timer;
-	struct timers_private *tp = m->private;
-	int notify;
 	static const char * const nstr[] = {
-		[SIGEV_SIGNAL] = "signal",
-		[SIGEV_NONE] = "none",
-		[SIGEV_THREAD] = "thread",
+		[SIGEV_SIGNAL]	= "signal",
+		[SIGEV_NONE]	= "none",
+		[SIGEV_THREAD]	= "thread",
 	};
 
-	timer = hlist_entry((struct hlist_node *)v, struct k_itimer, list);
-	notify = timer->it_sigev_notify;
+	struct k_itimer *timer = hlist_entry((struct hlist_node *)v, struct k_itimer, list);
+	struct timers_private *tp = m->private;
+	int notify = timer->it_sigev_notify;
+
+	guard(spinlock_irq)(&timer->it_lock);
+	if (!posixtimer_valid(timer))
+		return 0;
 
 	seq_printf(m, "ID: %d\n", timer->it_id);
-	seq_printf(m, "signal: %d/%px\n",
-		   timer->sigq.info.si_signo,
+	seq_printf(m, "signal: %d/%px\n", timer->sigq.info.si_signo,
 		   timer->sigq.info.si_value.sival_ptr);
-	seq_printf(m, "notify: %s/%s.%d\n",
-		   nstr[notify & ~SIGEV_THREAD_ID],
+	seq_printf(m, "notify: %s/%s.%d\n", nstr[notify & ~SIGEV_THREAD_ID],
 		   (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
 		   pid_nr_ns(timer->it_pid, tp->ns));
 	seq_printf(m, "ClockID: %d\n", timer->it_clock);