From: Thomas Gleixner <tglx@linutronix.de>
To cure the SIG_IGN handling for posix interval timers, the preallocated
sigqueue needs to be embedded into struct k_itimer to prevent life time
races of all sorts.
Now that the prerequisites are in place, embed the sigqueue into struct
k_itimer and fixup the relevant usage sites.
Aside of preparing for proper SIG_IGN handling, this spares an extra
allocation.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
fs/proc/base.c | 4 +--
include/linux/posix-timers.h | 23 +++++++++++++++--
kernel/signal.c | 9 +++++-
kernel/time/posix-timers.c | 57 ++++++++++++++++++++++++++-----------------
4 files changed, 65 insertions(+), 28 deletions(-)
---
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2553,8 +2553,8 @@ static int show_timer(struct seq_file *m
seq_printf(m, "ID: %d\n", timer->it_id);
seq_printf(m, "signal: %d/%px\n",
- timer->sigq->info.si_signo,
- timer->sigq->info.si_value.sival_ptr);
+ timer->sigq.info.si_signo,
+ timer->sigq.info.si_value.sival_ptr);
seq_printf(m, "notify: %s/%s.%d\n",
nstr[notify & ~SIGEV_THREAD_ID],
(notify & SIGEV_THREAD_ID) ? "tid" : "pid",
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -39,6 +39,8 @@ static inline int clockid_to_fd(const cl
#ifdef CONFIG_POSIX_TIMERS
+#include <linux/signal_types.h>
+
/**
* cpu_timer - Posix CPU timer representation for k_itimer
* @node: timerqueue node to queue in the task/sig
@@ -166,7 +168,7 @@ static inline void posix_cputimers_init_
* @it_pid: The pid of the process/task targeted by the signal
* @it_process: The task to wakeup on clock_nanosleep (CPU timers)
* @rcuref: Reference count for life time management
- * @sigq: Pointer to preallocated sigqueue
+ * @sigq: Embedded sigqueue
* @it: Union representing the various posix timer type
* internals.
* @rcu: RCU head for freeing the timer.
@@ -190,7 +192,7 @@ struct k_itimer {
struct pid *it_pid;
struct task_struct *it_process;
};
- struct sigqueue *sigq;
+ struct sigqueue sigq;
rcuref_t rcuref;
union {
struct {
@@ -218,6 +220,23 @@ static inline void posixtimer_putref(str
if (rcuref_put(&tmr->rcuref))
posixtimer_free_timer(tmr);
}
+
+static inline void posixtimer_sigqueue_getref(struct sigqueue *q)
+{
+ struct k_itimer *tmr = container_of(q, struct k_itimer, sigq);
+
+ WARN_ON_ONCE(!rcuref_get(&tmr->rcuref));
+}
+
+static inline void posixtimer_sigqueue_putref(struct sigqueue *q)
+{
+ struct k_itimer *tmr = container_of(q, struct k_itimer, sigq);
+
+ posixtimer_putref(tmr);
+}
+#else /* CONFIG_POSIX_TIMERS */
+static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { }
+static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { }
#endif /* !CONFIG_POSIX_TIMERS */
#endif
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -460,8 +460,10 @@ static struct sigqueue *__sigqueue_alloc
static void __sigqueue_free(struct sigqueue *q)
{
- if (q->flags & SIGQUEUE_PREALLOC)
+ if (q->flags & SIGQUEUE_PREALLOC) {
+ posixtimer_sigqueue_putref(q);
return;
+ }
if (q->ucounts) {
dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING);
q->ucounts = NULL;
@@ -1981,7 +1983,7 @@ static inline struct task_struct *posixt
int posixtimer_send_sigqueue(struct k_itimer *tmr)
{
- struct sigqueue *q = tmr->sigq;
+ struct sigqueue *q = &tmr->sigq;
int sig = q->info.si_signo;
struct task_struct *t;
unsigned long flags;
@@ -2040,9 +2042,12 @@ int posixtimer_send_sigqueue(struct k_it
ret = 0;
if (unlikely(!list_empty(&q->list))) {
+ /* This holds a reference count already */
result = TRACE_SIGNAL_ALREADY_PENDING;
goto out;
}
+
+ posixtimer_sigqueue_getref(q);
posixtimer_queue_sigqueue(q, t, tmr->it_pid_type);
result = TRACE_SIGNAL_DELIVERED;
out:
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -251,12 +251,13 @@ static void common_hrtimer_rearm(struct
/*
* This function is called from the signal delivery code. It decides
- * whether the signal should be dropped and rearms interval timers.
+ * whether the signal should be dropped and rearms interval timers. The
+ * timer can be unconditionally accessed as there is a reference held on
+ * it.
*/
bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq)
{
- struct k_itimer *timr;
- unsigned long flags;
+ struct k_itimer *timr = container_of(timer_sigq, struct k_itimer, sigq);
bool ret = false;
/*
@@ -264,12 +265,14 @@ bool posixtimer_deliver_signal(struct ke
* timr::it_lock. Keep interrupts disabled.
*/
spin_unlock(¤t->sighand->siglock);
+ spin_lock(&timr->it_lock);
- timr = lock_timer(info->si_tid, &flags);
- if (!timr)
- goto out;
-
- if (timr->it_signal_seq != info->si_sys_private)
+ /*
+ * Check if the timer is still alive or whether it got modified
+ * since the signal was queued. In either case, don't rearm and
+ * drop the signal.
+ */
+ if (!timr->it_signal || timr->it_signal_seq != info->si_sys_private)
goto out_unlock;
if (timr->it_interval && timr->it_status == POSIX_TIMER_REQUEUE_PENDING) {
@@ -285,8 +288,10 @@ bool posixtimer_deliver_signal(struct ke
ret = true;
out_unlock:
- unlock_timer(timr, flags);
-out:
+ spin_unlock(&timr->it_lock);
+ /* Drop the reference which was acquired when the signal was queued */
+ posixtimer_putref(timr);
+
spin_lock(¤t->sighand->siglock);
/* Don't expose the si_sys_private value to userspace */
@@ -404,17 +409,17 @@ static struct pid *good_sigevent(sigeven
}
}
-static struct k_itimer * alloc_posix_timer(void)
+static struct k_itimer *alloc_posix_timer(void)
{
struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL);
if (!tmr)
return tmr;
- if (unlikely(!(tmr->sigq = sigqueue_alloc()))) {
+
+ if (unlikely(!posixtimer_init_sigqueue(&tmr->sigq))) {
kmem_cache_free(posix_timers_cache, tmr);
return NULL;
}
- clear_siginfo(&tmr->sigq->info);
rcuref_init(&tmr->rcuref, 1);
return tmr;
}
@@ -429,7 +434,8 @@ static void k_itimer_rcu_free(struct rcu
void posixtimer_free_timer(struct k_itimer *tmr)
{
put_pid(tmr->it_pid);
- sigqueue_free(tmr->sigq);
+ if (tmr->sigq.ucounts)
+ dec_rlimit_put_ucounts(tmr->sigq.ucounts, UCOUNT_RLIMIT_SIGPENDING);
call_rcu(&tmr->rcu, k_itimer_rcu_free);
}
@@ -491,13 +497,13 @@ static int do_timer_create(clockid_t whi
goto out;
}
new_timer->it_sigev_notify = event->sigev_notify;
- new_timer->sigq->info.si_signo = event->sigev_signo;
- new_timer->sigq->info.si_value = event->sigev_value;
+ new_timer->sigq.info.si_signo = event->sigev_signo;
+ new_timer->sigq.info.si_value = event->sigev_value;
} else {
new_timer->it_sigev_notify = SIGEV_SIGNAL;
- new_timer->sigq->info.si_signo = SIGALRM;
- memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t));
- new_timer->sigq->info.si_value.sival_int = new_timer->it_id;
+ new_timer->sigq.info.si_signo = SIGALRM;
+ memset(&new_timer->sigq.info.si_value, 0, sizeof(sigval_t));
+ new_timer->sigq.info.si_value.sival_int = new_timer->it_id;
new_timer->it_pid = get_pid(task_tgid(current));
}
@@ -506,8 +512,8 @@ static int do_timer_create(clockid_t whi
else
new_timer->it_pid_type = PIDTYPE_TGID;
- new_timer->sigq->info.si_tid = new_timer->it_id;
- new_timer->sigq->info.si_code = SI_TIMER;
+ new_timer->sigq.info.si_tid = new_timer->it_id;
+ new_timer->sigq.info.si_code = SI_TIMER;
if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) {
error = -EFAULT;
@@ -591,7 +597,14 @@ static struct k_itimer *__lock_timer(tim
* 1) Set timr::it_signal to NULL with timr::it_lock held
* 2) Release timr::it_lock
* 3) Remove from the hash under hash_lock
- * 4) Call RCU for removal after the grace period
+ * 4) Put the reference count.
+ *
+ * The reference count might not drop to zero if timr::sigq is
+ * queued. In that case the signal delivery or flush will put the
+ * last reference count.
+ *
+ * When the reference count reaches zero, the timer is scheduled
+ * for RCU removal after the grace period.
*
* Holding rcu_read_lock() accross the lookup ensures that
* the timer cannot be freed.
Le Tue, Oct 01, 2024 at 10:42:23AM +0200, Thomas Gleixner a écrit : > From: Thomas Gleixner <tglx@linutronix.de> > > To cure the SIG_IGN handling for posix interval timers, the preallocated > sigqueue needs to be embedded into struct k_itimer to prevent life time > races of all sorts. > > Now that the prerequisites are in place, embed the sigqueue into struct > k_itimer and fixup the relevant usage sites. > > Aside of preparing for proper SIG_IGN handling, this spares an extra > allocation. > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> > --- > fs/proc/base.c | 4 +-- > include/linux/posix-timers.h | 23 +++++++++++++++-- > kernel/signal.c | 9 +++++- > kernel/time/posix-timers.c | 57 ++++++++++++++++++++++++++----------------- > 4 files changed, 65 insertions(+), 28 deletions(-) > --- > --- a/fs/proc/base.c > +++ b/fs/proc/base.c > @@ -2553,8 +2553,8 @@ static int show_timer(struct seq_file *m > > seq_printf(m, "ID: %d\n", timer->it_id); > seq_printf(m, "signal: %d/%px\n", > - timer->sigq->info.si_signo, > - timer->sigq->info.si_value.sival_ptr); > + timer->sigq.info.si_signo, > + timer->sigq.info.si_value.sival_ptr); > seq_printf(m, "notify: %s/%s.%d\n", > nstr[notify & ~SIGEV_THREAD_ID], > (notify & SIGEV_THREAD_ID) ? "tid" : "pid", > --- a/include/linux/posix-timers.h > +++ b/include/linux/posix-timers.h > @@ -39,6 +39,8 @@ static inline int clockid_to_fd(const cl > > #ifdef CONFIG_POSIX_TIMERS > > +#include <linux/signal_types.h> > + > /** > * cpu_timer - Posix CPU timer representation for k_itimer > * @node: timerqueue node to queue in the task/sig > @@ -166,7 +168,7 @@ static inline void posix_cputimers_init_ > * @it_pid: The pid of the process/task targeted by the signal > * @it_process: The task to wakeup on clock_nanosleep (CPU timers) > * @rcuref: Reference count for life time management > - * @sigq: Pointer to preallocated sigqueue > + * @sigq: Embedded sigqueue > * @it: Union representing the various posix timer type > * internals. > * @rcu: RCU head for freeing the timer. > @@ -190,7 +192,7 @@ struct k_itimer { > struct pid *it_pid; > struct task_struct *it_process; > }; > - struct sigqueue *sigq; > + struct sigqueue sigq; > rcuref_t rcuref; > union { > struct { > @@ -218,6 +220,23 @@ static inline void posixtimer_putref(str > if (rcuref_put(&tmr->rcuref)) > posixtimer_free_timer(tmr); > } > + > +static inline void posixtimer_sigqueue_getref(struct sigqueue *q) > +{ > + struct k_itimer *tmr = container_of(q, struct k_itimer, sigq); > + > + WARN_ON_ONCE(!rcuref_get(&tmr->rcuref)); > +} > + > +static inline void posixtimer_sigqueue_putref(struct sigqueue *q) > +{ > + struct k_itimer *tmr = container_of(q, struct k_itimer, sigq); > + > + posixtimer_putref(tmr); > +} > +#else /* CONFIG_POSIX_TIMERS */ > +static inline void posixtimer_sigqueue_getref(struct sigqueue *q) { } > +static inline void posixtimer_sigqueue_putref(struct sigqueue *q) { } > #endif /* !CONFIG_POSIX_TIMERS */ > > #endif > --- a/kernel/signal.c > +++ b/kernel/signal.c > @@ -460,8 +460,10 @@ static struct sigqueue *__sigqueue_alloc > > static void __sigqueue_free(struct sigqueue *q) > { > - if (q->flags & SIGQUEUE_PREALLOC) > + if (q->flags & SIGQUEUE_PREALLOC) { > + posixtimer_sigqueue_putref(q); > return; > + } > if (q->ucounts) { > dec_rlimit_put_ucounts(q->ucounts, UCOUNT_RLIMIT_SIGPENDING); > q->ucounts = NULL; > @@ -1981,7 +1983,7 @@ static inline struct task_struct *posixt > > int posixtimer_send_sigqueue(struct k_itimer *tmr) > { > - struct sigqueue *q = tmr->sigq; > + struct sigqueue *q = &tmr->sigq; > int sig = q->info.si_signo; > struct task_struct *t; > unsigned long flags; > @@ -2040,9 +2042,12 @@ int posixtimer_send_sigqueue(struct k_it > > ret = 0; > if (unlikely(!list_empty(&q->list))) { > + /* This holds a reference count already */ > result = TRACE_SIGNAL_ALREADY_PENDING; > goto out; > } > + > + posixtimer_sigqueue_getref(q); > posixtimer_queue_sigqueue(q, t, tmr->it_pid_type); > result = TRACE_SIGNAL_DELIVERED; > out: > --- a/kernel/time/posix-timers.c > +++ b/kernel/time/posix-timers.c > @@ -251,12 +251,13 @@ static void common_hrtimer_rearm(struct > > /* > * This function is called from the signal delivery code. It decides > - * whether the signal should be dropped and rearms interval timers. > + * whether the signal should be dropped and rearms interval timers. The > + * timer can be unconditionally accessed as there is a reference held on > + * it. > */ > bool posixtimer_deliver_signal(struct kernel_siginfo *info, struct sigqueue *timer_sigq) > { > - struct k_itimer *timr; > - unsigned long flags; > + struct k_itimer *timr = container_of(timer_sigq, struct k_itimer, sigq); > bool ret = false; > > /* > @@ -264,12 +265,14 @@ bool posixtimer_deliver_signal(struct ke > * timr::it_lock. Keep interrupts disabled. > */ > spin_unlock(¤t->sighand->siglock); > + spin_lock(&timr->it_lock); > > - timr = lock_timer(info->si_tid, &flags); > - if (!timr) > - goto out; > - > - if (timr->it_signal_seq != info->si_sys_private) > + /* > + * Check if the timer is still alive or whether it got modified > + * since the signal was queued. In either case, don't rearm and > + * drop the signal. > + */ > + if (!timr->it_signal || timr->it_signal_seq != info->si_sys_private) > goto out_unlock; It could be: if (timr->it_signal_seq != info->si_sys_private || WARN_ON_ONCE(!timr->it_signal)) goto out_unlock; Because if the timer has been deleted, the current seq should be different from the queued seq. Thanks. > > if (timr->it_interval && timr->it_status == POSIX_TIMER_REQUEUE_PENDING) { > @@ -285,8 +288,10 @@ bool posixtimer_deliver_signal(struct ke > ret = true; > > out_unlock: > - unlock_timer(timr, flags); > -out: > + spin_unlock(&timr->it_lock); > + /* Drop the reference which was acquired when the signal was queued */ > + posixtimer_putref(timr); > + > spin_lock(¤t->sighand->siglock); > > /* Don't expose the si_sys_private value to userspace */ > @@ -404,17 +409,17 @@ static struct pid *good_sigevent(sigeven > } > } > > -static struct k_itimer * alloc_posix_timer(void) > +static struct k_itimer *alloc_posix_timer(void) > { > struct k_itimer *tmr = kmem_cache_zalloc(posix_timers_cache, GFP_KERNEL); > > if (!tmr) > return tmr; > - if (unlikely(!(tmr->sigq = sigqueue_alloc()))) { > + > + if (unlikely(!posixtimer_init_sigqueue(&tmr->sigq))) { > kmem_cache_free(posix_timers_cache, tmr); > return NULL; > } > - clear_siginfo(&tmr->sigq->info); > rcuref_init(&tmr->rcuref, 1); > return tmr; > } > @@ -429,7 +434,8 @@ static void k_itimer_rcu_free(struct rcu > void posixtimer_free_timer(struct k_itimer *tmr) > { > put_pid(tmr->it_pid); > - sigqueue_free(tmr->sigq); > + if (tmr->sigq.ucounts) > + dec_rlimit_put_ucounts(tmr->sigq.ucounts, UCOUNT_RLIMIT_SIGPENDING); > call_rcu(&tmr->rcu, k_itimer_rcu_free); > } > > @@ -491,13 +497,13 @@ static int do_timer_create(clockid_t whi > goto out; > } > new_timer->it_sigev_notify = event->sigev_notify; > - new_timer->sigq->info.si_signo = event->sigev_signo; > - new_timer->sigq->info.si_value = event->sigev_value; > + new_timer->sigq.info.si_signo = event->sigev_signo; > + new_timer->sigq.info.si_value = event->sigev_value; > } else { > new_timer->it_sigev_notify = SIGEV_SIGNAL; > - new_timer->sigq->info.si_signo = SIGALRM; > - memset(&new_timer->sigq->info.si_value, 0, sizeof(sigval_t)); > - new_timer->sigq->info.si_value.sival_int = new_timer->it_id; > + new_timer->sigq.info.si_signo = SIGALRM; > + memset(&new_timer->sigq.info.si_value, 0, sizeof(sigval_t)); > + new_timer->sigq.info.si_value.sival_int = new_timer->it_id; > new_timer->it_pid = get_pid(task_tgid(current)); > } > > @@ -506,8 +512,8 @@ static int do_timer_create(clockid_t whi > else > new_timer->it_pid_type = PIDTYPE_TGID; > > - new_timer->sigq->info.si_tid = new_timer->it_id; > - new_timer->sigq->info.si_code = SI_TIMER; > + new_timer->sigq.info.si_tid = new_timer->it_id; > + new_timer->sigq.info.si_code = SI_TIMER; > > if (copy_to_user(created_timer_id, &new_timer_id, sizeof (new_timer_id))) { > error = -EFAULT; > @@ -591,7 +597,14 @@ static struct k_itimer *__lock_timer(tim > * 1) Set timr::it_signal to NULL with timr::it_lock held > * 2) Release timr::it_lock > * 3) Remove from the hash under hash_lock > - * 4) Call RCU for removal after the grace period > + * 4) Put the reference count. > + * > + * The reference count might not drop to zero if timr::sigq is > + * queued. In that case the signal delivery or flush will put the > + * last reference count. > + * > + * When the reference count reaches zero, the timer is scheduled > + * for RCU removal after the grace period. > * > * Holding rcu_read_lock() accross the lookup ensures that > * the timer cannot be freed. >
On Wed, Oct 30 2024 at 14:55, Frederic Weisbecker wrote: > Le Tue, Oct 01, 2024 at 10:42:23AM +0200, Thomas Gleixner a écrit : >> - if (timr->it_signal_seq != info->si_sys_private) >> + /* >> + * Check if the timer is still alive or whether it got modified >> + * since the signal was queued. In either case, don't rearm and >> + * drop the signal. >> + */ >> + if (!timr->it_signal || timr->it_signal_seq != info->si_sys_private) >> goto out_unlock; > > It could be: > > if (timr->it_signal_seq != info->si_sys_private || > WARN_ON_ONCE(!timr->it_signal)) > goto out_unlock; > > Because if the timer has been deleted, the current seq should be different from > the queued seq. Indeed
Le Tue, Oct 01, 2024 at 10:42:23AM +0200, Thomas Gleixner a écrit : > From: Thomas Gleixner <tglx@linutronix.de> > > To cure the SIG_IGN handling for posix interval timers, the preallocated > sigqueue needs to be embedded into struct k_itimer to prevent life time > races of all sorts. > > Now that the prerequisites are in place, embed the sigqueue into struct > k_itimer and fixup the relevant usage sites. > > Aside of preparing for proper SIG_IGN handling, this spares an extra > allocation. > > Signed-off-by: Thomas Gleixner <tglx@linutronix.de> > Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
© 2016 - 2024 Red Hat, Inc.