[patch V5 17/26] posix-timers: Embed sigqueue in struct k_itimer

Thomas Gleixner posted 26 patches 1 month, 4 weeks ago
There is a newer version of this series
[patch V5 17/26] posix-timers: Embed sigqueue in struct k_itimer
Posted by Thomas Gleixner 1 month, 4 weeks ago
From: Thomas Gleixner <tglx@linutronix.de>

To cure the SIG_IGN handling for posix interval timers, the preallocated
sigqueue needs to be embedded into struct k_itimer to prevent life time
races of all sorts.

Now that the prerequisites are in place, embed the sigqueue into struct
k_itimer and fixup the relevant usage sites.

Aside of preparing for proper SIG_IGN handling, this spares an extra
allocation.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 fs/proc/base.c               |    4 +--
 include/linux/posix-timers.h |   23 +++++++++++++++--
 kernel/signal.c              |    9 +++++-
 kernel/time/posix-timers.c   |   57 ++++++++++++++++++++++++++-----------------
 4 files changed, 65 insertions(+), 28 deletions(-)
---
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2553,8 +2553,8 @@ static int show_timer(struct seq_file *m
 
 	seq_printf(m, "ID: %d\n", timer->it_id);
 	seq_printf(m, "signal: %d/%px\n",
-		   timer->sigq->info.si_signo,
-		   timer->sigq->info.si_value.sival_ptr);
+		   timer->sigq.info.si_signo,
+		   timer->sigq.info.si_value.sival_ptr);
 	seq_printf(m, "notify: %s/%s.%d\n",
 		   nstr[notify & ~SIGEV_THREAD_ID],
 		   (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -39,6 +39,8 @@ static inline int clockid_to_fd(const cl
 
 #ifdef CONFIG_POSIX_TIMERS
 
+#include <linux/signal_types.h>
+
 /**
  * cpu_timer - Posix CPU timer representation for k_itimer
  * @node:	timerqueue node to queue in the task/sig
@@ -166,7 +168,7 @@ static inline void posix_cputimers_init_
  * @it_pid:		The pid of the process/task targeted by the signal
  * @it_process:		The task to wakeup on clock_nanosleep (CPU timers)
  * @rcuref:		Reference count for life time management
- * @sigq:		Pointer to preallocated sigqueue
+ * @sigq:		Embedded sigqueue
  * @it:			Union representing the various posix timer type
  *			internals.
  * @rcu:		RCU head for freeing the timer.
@@ -190,7 +192,7 @@ struct k_itimer {
 		struct pid		*it_pid;
 		struct task_struct	*it_process;
 	};
-	struct sigqueue		*sigq;
+	struct sigqueue		sigq;
 	rcuref_t		rcuref;
 	union {
 		struct {
@@ -218,6 +220,23 @@ static inline void posixtimer_putref(str
 	if (rcuref_put(&tmr->rcuref))
 		posixtimer_free_timer(tmr);
 }
+
+static inline void posixtimer_sigqueue_getref(struct sigqueue *q)
+{
+	struct k_itimer *tmr = container_of(q, struct k_itimer, sigq);
+
+	WARN_ON_ONCE(!rcuref_get(&tmr->rcuref));
+}
+
+static inline void posixtimer_sigqueue_putref(struct sigqueue *q)
+{
+	struct k_itimer *tmr = container_of(q, struct k_itimer, sigq);
+
+	posixtimer_putref(tmr);
+}
+#else  /* CONFIG_POSIX_TIMERS */
+static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { }
+static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { }
 #endif /* !CONFIG_POSIX_TIMERS */
 
 #endif
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -460,8 +460,10 @@ static struct sigqueue *__sigqueue_alloc
 
 static void __sigqueue_free(struct sigqueue *q)
 {
-	if (q->flags & SIGQUEUE_PREALLOC)
+	if (q->flags & SIGQUEUE_PREALLOC) {
+		posixtimer_sigqueue_putref(q);
 		return;
+	}
 	if (q->ucounts) {
 		dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
 		q->ucounts = NULL;
@@ -1981,7 +1983,7 @@ static inline struct task_struct *posixt
 
 int posixtimer_send_sigqueue(struct k_itimer *tmr)
 {
-	struct sigqueue *q = tmr->sigq;
+	struct sigqueue *q = &tmr->sigq;
 	int sig = q->info.si_signo;
 	struct task_struct *t;
 	unsigned long flags;
@@ -2040,9 +2042,12 @@ int posixtimer_send_sigqueue(struct k_it
 
 	ret = 0;
 	if (unlikely(!list_empty(&q->list))) {
+		/* This holds a reference count already */
 		result = TRACE_SIGNAL_ALREADY_PENDING;
 		goto out;
 	}
+
+	posixtimer_sigqueue_getref(q);
 	posixtimer_queue_sigqueue(q, t, tmr->it_pid_type);
 	result = TRACE_SIGNAL_DELIVERED;
 out:
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -251,12 +251,13 @@ static void common_hrtimer_rearm(struct
 
 /*
  * This function is called from the signal delivery code. It decides
- * whether the signal should be dropped and rearms interval timers.
+ * whether the signal should be dropped and rearms interval timers.  The
+ * timer can be unconditionally accessed as there is a reference held on
+ * it.
  */
 bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq)
 {
-	struct k_itimer *timr;
-	unsigned long flags;
+	struct k_itimer *timr = container_of(timer_sigq, struct k_itimer, sigq);
 	bool ret = false;
 
 	/*
@@ -264,12 +265,14 @@ bool posixtimer_deliver_signal(struct ke
 	 * timr::it_lock. Keep interrupts disabled.
 	 */
 	spin_unlock(&current->sighand->siglock);
+	spin_lock(&timr->it_lock);
 
-	timr = lock_timer(info->si_tid, &flags);
-	if (!timr)
-		goto out;
-
-	if (timr->it_signal_seq != info->si_sys_private)
+	/*
+	 * Check if the timer is still alive or whether it got modified
+	 * since the signal was queued. In either case, don't rearm and
+	 * drop the signal.
+	 */
+	if (!timr->it_signal || timr->it_signal_seq != info->si_sys_private)
 		goto out_unlock;
 
 	if (timr->it_interval && timr->it_status == POSIX_TIMER_REQUEUE_PENDING) {
@@ -285,8 +288,10 @@ bool posixtimer_deliver_signal(struct ke
 	ret = true;
 
 out_unlock:
-	unlock_timer(timr, flags);
-out:
+	spin_unlock(&timr->it_lock);
+	/* Drop the reference which was acquired when the signal was queued */
+	posixtimer_putref(timr);
+
 	spin_lock(&current->sighand->siglock);
 
 	/* Don't expose the si_sys_private value to userspace */
@@ -404,17 +409,17 @@ static struct pid *good_sigevent(sigeven
 	}
 }
 
-static struct k_itimer * alloc_posix_timer(void)
+static struct k_itimer *alloc_posix_timer(void)
 {
 	struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
 
 	if (!tmr)
 		return tmr;
-	if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
+
+	if (unlikely(!posixtimer_init_sigqueue(&tmr->sigq))) {
 		kmem_cache_free(posix_timers_cache, tmr);
 		return NULL;
 	}
-	clear_siginfo(&tmr->sigq->info);
 	rcuref_init(&tmr->rcuref, 1);
 	return tmr;
 }
@@ -429,7 +434,8 @@ static void k_itimer_rcu_free(struct rcu
 void posixtimer_free_timer(struct k_itimer *tmr)
 {
 	put_pid(tmr->it_pid);
-	sigqueue_free(tmr->sigq);
+	if (tmr->sigq.ucounts)
+		dec_rlimit_put_ucounts(tmr->sigq.ucounts, UCOUNT_RLIMIT_SIGPENDING);
 	call_rcu(&tmr->rcu, k_itimer_rcu_free);
 }
 
@@ -491,13 +497,13 @@ static int do_timer_create(clockid_t whi
 			goto out;
 		}
 		new_timer->it_sigev_notify     = event->sigev_notify;
-		new_timer->sigq->info.si_signo = event->sigev_signo;
-		new_timer->sigq->info.si_value = event->sigev_value;
+		new_timer->sigq.info.si_signo = event->sigev_signo;
+		new_timer->sigq.info.si_value = event->sigev_value;
 	} else {
 		new_timer->it_sigev_notify     = SIGEV_SIGNAL;
-		new_timer->sigq->info.si_signo = SIGALRM;
-		memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t));
-		new_timer->sigq->info.si_value.sival_int = new_timer->it_id;
+		new_timer->sigq.info.si_signo = SIGALRM;
+		memset(&new_timer->sigq.info.si_value, 0, sizeof(sigval_t));
+		new_timer->sigq.info.si_value.sival_int = new_timer->it_id;
 		new_timer->it_pid = get_pid(task_tgid(current));
 	}
 
@@ -506,8 +512,8 @@ static int do_timer_create(clockid_t whi
 	else
 		new_timer->it_pid_type = PIDTYPE_TGID;
 
-	new_timer->sigq->info.si_tid   = new_timer->it_id;
-	new_timer->sigq->info.si_code  = SI_TIMER;
+	new_timer->sigq.info.si_tid = new_timer->it_id;
+	new_timer->sigq.info.si_code = SI_TIMER;
 
 	if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) {
 		error = -EFAULT;
@@ -591,7 +597,14 @@ static struct k_itimer *__lock_timer(tim
 	 *  1) Set timr::it_signal to NULL with timr::it_lock held
 	 *  2) Release timr::it_lock
 	 *  3) Remove from the hash under hash_lock
-	 *  4) Call RCU for removal after the grace period
+	 *  4) Put the reference count.
+	 *
+	 * The reference count might not drop to zero if timr::sigq is
+	 * queued. In that case the signal delivery or flush will put the
+	 * last reference count.
+	 *
+	 * When the reference count reaches zero, the timer is scheduled
+	 * for RCU removal after the grace period.
 	 *
 	 * Holding rcu_read_lock() accross the lookup ensures that
 	 * the timer cannot be freed.
Re: [patch V5 17/26] posix-timers: Embed sigqueue in struct k_itimer
Posted by Frederic Weisbecker 4 weeks ago
Le Tue, Oct 01, 2024 at 10:42:23AM +0200, Thomas Gleixner a écrit :
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> To cure the SIG_IGN handling for posix interval timers, the preallocated
> sigqueue needs to be embedded into struct k_itimer to prevent life time
> races of all sorts.
> 
> Now that the prerequisites are in place, embed the sigqueue into struct
> k_itimer and fixup the relevant usage sites.
> 
> Aside of preparing for proper SIG_IGN handling, this spares an extra
> allocation.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
> ---
>  fs/proc/base.c               |    4 +--
>  include/linux/posix-timers.h |   23 +++++++++++++++--
>  kernel/signal.c              |    9 +++++-
>  kernel/time/posix-timers.c   |   57 ++++++++++++++++++++++++++-----------------
>  4 files changed, 65 insertions(+), 28 deletions(-)
> ---
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -2553,8 +2553,8 @@ static int show_timer(struct seq_file *m
>  
>  	seq_printf(m, "ID: %d\n", timer->it_id);
>  	seq_printf(m, "signal: %d/%px\n",
> -		   timer->sigq->info.si_signo,
> -		   timer->sigq->info.si_value.sival_ptr);
> +		   timer->sigq.info.si_signo,
> +		   timer->sigq.info.si_value.sival_ptr);
>  	seq_printf(m, "notify: %s/%s.%d\n",
>  		   nstr[notify & ~SIGEV_THREAD_ID],
>  		   (notify & SIGEV_THREAD_ID) ? "tid" : "pid",
> --- a/include/linux/posix-timers.h
> +++ b/include/linux/posix-timers.h
> @@ -39,6 +39,8 @@ static inline int clockid_to_fd(const cl
>  
>  #ifdef CONFIG_POSIX_TIMERS
>  
> +#include <linux/signal_types.h>
> +
>  /**
>   * cpu_timer - Posix CPU timer representation for k_itimer
>   * @node:	timerqueue node to queue in the task/sig
> @@ -166,7 +168,7 @@ static inline void posix_cputimers_init_
>   * @it_pid:		The pid of the process/task targeted by the signal
>   * @it_process:		The task to wakeup on clock_nanosleep (CPU timers)
>   * @rcuref:		Reference count for life time management
> - * @sigq:		Pointer to preallocated sigqueue
> + * @sigq:		Embedded sigqueue
>   * @it:			Union representing the various posix timer type
>   *			internals.
>   * @rcu:		RCU head for freeing the timer.
> @@ -190,7 +192,7 @@ struct k_itimer {
>  		struct pid		*it_pid;
>  		struct task_struct	*it_process;
>  	};
> -	struct sigqueue		*sigq;
> +	struct sigqueue		sigq;
>  	rcuref_t		rcuref;
>  	union {
>  		struct {
> @@ -218,6 +220,23 @@ static inline void posixtimer_putref(str
>  	if (rcuref_put(&tmr->rcuref))
>  		posixtimer_free_timer(tmr);
>  }
> +
> +static inline void posixtimer_sigqueue_getref(struct sigqueue *q)
> +{
> +	struct k_itimer *tmr = container_of(q, struct k_itimer, sigq);
> +
> +	WARN_ON_ONCE(!rcuref_get(&tmr->rcuref));
> +}
> +
> +static inline void posixtimer_sigqueue_putref(struct sigqueue *q)
> +{
> +	struct k_itimer *tmr = container_of(q, struct k_itimer, sigq);
> +
> +	posixtimer_putref(tmr);
> +}
> +#else  /* CONFIG_POSIX_TIMERS */
> +static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { }
> +static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { }
>  #endif /* !CONFIG_POSIX_TIMERS */
>  
>  #endif
> --- a/kernel/signal.c
> +++ b/kernel/signal.c
> @@ -460,8 +460,10 @@ static struct sigqueue *__sigqueue_alloc
>  
>  static void __sigqueue_free(struct sigqueue *q)
>  {
> -	if (q->flags & SIGQUEUE_PREALLOC)
> +	if (q->flags & SIGQUEUE_PREALLOC) {
> +		posixtimer_sigqueue_putref(q);
>  		return;
> +	}
>  	if (q->ucounts) {
>  		dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
>  		q->ucounts = NULL;
> @@ -1981,7 +1983,7 @@ static inline struct task_struct *posixt
>  
>  int posixtimer_send_sigqueue(struct k_itimer *tmr)
>  {
> -	struct sigqueue *q = tmr->sigq;
> +	struct sigqueue *q = &tmr->sigq;
>  	int sig = q->info.si_signo;
>  	struct task_struct *t;
>  	unsigned long flags;
> @@ -2040,9 +2042,12 @@ int posixtimer_send_sigqueue(struct k_it
>  
>  	ret = 0;
>  	if (unlikely(!list_empty(&q->list))) {
> +		/* This holds a reference count already */
>  		result = TRACE_SIGNAL_ALREADY_PENDING;
>  		goto out;
>  	}
> +
> +	posixtimer_sigqueue_getref(q);
>  	posixtimer_queue_sigqueue(q, t, tmr->it_pid_type);
>  	result = TRACE_SIGNAL_DELIVERED;
>  out:
> --- a/kernel/time/posix-timers.c
> +++ b/kernel/time/posix-timers.c
> @@ -251,12 +251,13 @@ static void common_hrtimer_rearm(struct
>  
>  /*
>   * This function is called from the signal delivery code. It decides
> - * whether the signal should be dropped and rearms interval timers.
> + * whether the signal should be dropped and rearms interval timers.  The
> + * timer can be unconditionally accessed as there is a reference held on
> + * it.
>   */
>  bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq)
>  {
> -	struct k_itimer *timr;
> -	unsigned long flags;
> +	struct k_itimer *timr = container_of(timer_sigq, struct k_itimer, sigq);
>  	bool ret = false;
>  
>  	/*
> @@ -264,12 +265,14 @@ bool posixtimer_deliver_signal(struct ke
>  	 * timr::it_lock. Keep interrupts disabled.
>  	 */
>  	spin_unlock(&current->sighand->siglock);
> +	spin_lock(&timr->it_lock);
>  
> -	timr = lock_timer(info->si_tid, &flags);
> -	if (!timr)
> -		goto out;
> -
> -	if (timr->it_signal_seq != info->si_sys_private)
> +	/*
> +	 * Check if the timer is still alive or whether it got modified
> +	 * since the signal was queued. In either case, don't rearm and
> +	 * drop the signal.
> +	 */
> +	if (!timr->it_signal || timr->it_signal_seq != info->si_sys_private)
>  		goto out_unlock;

It could be:

if (timr->it_signal_seq != info->si_sys_private ||
    WARN_ON_ONCE(!timr->it_signal))
   goto out_unlock;

Because if the timer has been deleted, the current seq should be different from
the queued seq.

Thanks.

>  
>  	if (timr->it_interval && timr->it_status == POSIX_TIMER_REQUEUE_PENDING) {
> @@ -285,8 +288,10 @@ bool posixtimer_deliver_signal(struct ke
>  	ret = true;
>  
>  out_unlock:
> -	unlock_timer(timr, flags);
> -out:
> +	spin_unlock(&timr->it_lock);
> +	/* Drop the reference which was acquired when the signal was queued */
> +	posixtimer_putref(timr);
> +
>  	spin_lock(&current->sighand->siglock);
>  
>  	/* Don't expose the si_sys_private value to userspace */
> @@ -404,17 +409,17 @@ static struct pid *good_sigevent(sigeven
>  	}
>  }
>  
> -static struct k_itimer * alloc_posix_timer(void)
> +static struct k_itimer *alloc_posix_timer(void)
>  {
>  	struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
>  
>  	if (!tmr)
>  		return tmr;
> -	if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
> +
> +	if (unlikely(!posixtimer_init_sigqueue(&tmr->sigq))) {
>  		kmem_cache_free(posix_timers_cache, tmr);
>  		return NULL;
>  	}
> -	clear_siginfo(&tmr->sigq->info);
>  	rcuref_init(&tmr->rcuref, 1);
>  	return tmr;
>  }
> @@ -429,7 +434,8 @@ static void k_itimer_rcu_free(struct rcu
>  void posixtimer_free_timer(struct k_itimer *tmr)
>  {
>  	put_pid(tmr->it_pid);
> -	sigqueue_free(tmr->sigq);
> +	if (tmr->sigq.ucounts)
> +		dec_rlimit_put_ucounts(tmr->sigq.ucounts, UCOUNT_RLIMIT_SIGPENDING);
>  	call_rcu(&tmr->rcu, k_itimer_rcu_free);
>  }
>  
> @@ -491,13 +497,13 @@ static int do_timer_create(clockid_t whi
>  			goto out;
>  		}
>  		new_timer->it_sigev_notify     = event->sigev_notify;
> -		new_timer->sigq->info.si_signo = event->sigev_signo;
> -		new_timer->sigq->info.si_value = event->sigev_value;
> +		new_timer->sigq.info.si_signo = event->sigev_signo;
> +		new_timer->sigq.info.si_value = event->sigev_value;
>  	} else {
>  		new_timer->it_sigev_notify     = SIGEV_SIGNAL;
> -		new_timer->sigq->info.si_signo = SIGALRM;
> -		memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t));
> -		new_timer->sigq->info.si_value.sival_int = new_timer->it_id;
> +		new_timer->sigq.info.si_signo = SIGALRM;
> +		memset(&new_timer->sigq.info.si_value, 0, sizeof(sigval_t));
> +		new_timer->sigq.info.si_value.sival_int = new_timer->it_id;
>  		new_timer->it_pid = get_pid(task_tgid(current));
>  	}
>  
> @@ -506,8 +512,8 @@ static int do_timer_create(clockid_t whi
>  	else
>  		new_timer->it_pid_type = PIDTYPE_TGID;
>  
> -	new_timer->sigq->info.si_tid   = new_timer->it_id;
> -	new_timer->sigq->info.si_code  = SI_TIMER;
> +	new_timer->sigq.info.si_tid = new_timer->it_id;
> +	new_timer->sigq.info.si_code = SI_TIMER;
>  
>  	if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) {
>  		error = -EFAULT;
> @@ -591,7 +597,14 @@ static struct k_itimer *__lock_timer(tim
>  	 *  1) Set timr::it_signal to NULL with timr::it_lock held
>  	 *  2) Release timr::it_lock
>  	 *  3) Remove from the hash under hash_lock
> -	 *  4) Call RCU for removal after the grace period
> +	 *  4) Put the reference count.
> +	 *
> +	 * The reference count might not drop to zero if timr::sigq is
> +	 * queued. In that case the signal delivery or flush will put the
> +	 * last reference count.
> +	 *
> +	 * When the reference count reaches zero, the timer is scheduled
> +	 * for RCU removal after the grace period.
>  	 *
>  	 * Holding rcu_read_lock() accross the lookup ensures that
>  	 * the timer cannot be freed.
> 
Re: [patch V5 17/26] posix-timers: Embed sigqueue in struct k_itimer
Posted by Thomas Gleixner 4 weeks ago
On Wed, Oct 30 2024 at 14:55, Frederic Weisbecker wrote:
> Le Tue, Oct 01, 2024 at 10:42:23AM +0200, Thomas Gleixner a écrit :
>> -	if (timr->it_signal_seq != info->si_sys_private)
>> +	/*
>> +	 * Check if the timer is still alive or whether it got modified
>> +	 * since the signal was queued. In either case, don't rearm and
>> +	 * drop the signal.
>> +	 */
>> +	if (!timr->it_signal || timr->it_signal_seq != info->si_sys_private)
>>  		goto out_unlock;
>
> It could be:
>
> if (timr->it_signal_seq != info->si_sys_private ||
>     WARN_ON_ONCE(!timr->it_signal))
>    goto out_unlock;
>
> Because if the timer has been deleted, the current seq should be different from
> the queued seq.

Indeed
Re: [patch V5 17/26] posix-timers: Embed sigqueue in struct k_itimer
Posted by Frederic Weisbecker 4 weeks, 1 day ago
Le Tue, Oct 01, 2024 at 10:42:23AM +0200, Thomas Gleixner a écrit :
> From: Thomas Gleixner <tglx@linutronix.de>
> 
> To cure the SIG_IGN handling for posix interval timers, the preallocated
> sigqueue needs to be embedded into struct k_itimer to prevent life time
> races of all sorts.
> 
> Now that the prerequisites are in place, embed the sigqueue into struct
> k_itimer and fixup the relevant usage sites.
> 
> Aside of preparing for proper SIG_IGN handling, this spares an extra
> allocation.
> 
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>