[RFC PATCH v2 06/12] sched: Adapt sched tracepoints for RV task model

Gabriele Monaco posted 12 patches 9 months ago
There is a newer version of this series
[RFC PATCH v2 06/12] sched: Adapt sched tracepoints for RV task model
Posted by Gabriele Monaco 9 months ago
Add the following tracepoints:
* sched_set_need_resched(tsk, cpu, tif)
    Called when a task is set the need resched [lazy] flag
* sched_switch_vain(preempt, tsk, tsk_state)
    Called when a task is selected again during __schedule
    i.e. prev == next == tsk : no real context switch

Add new parameter to sched_set_state to identify whether the state
change was due to an explicit call or a signal pending while scheduling.
We now also trace from try_to_block_task in case a signal was pending
and the task is set to runnable.

These tracepoints are useful to describe the Linux task model and are
adapted from the patches by Daniel Bristot de Oliveira
(https://bristot.me/linux-task-model/).

Signed-off-by: Gabriele Monaco <gmonaco@redhat.com>
---
 include/linux/sched.h        |  7 ++++++-
 include/trace/events/sched.h | 17 +++++++++++++++--
 kernel/sched/core.c          | 10 +++++++++-
 3 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 04f808ab8888..4d9da32330bc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -340,9 +340,11 @@ extern void io_schedule_finish(int token);
 extern long io_schedule_timeout(long timeout);
 extern void io_schedule(void);
 
-/* wrapper function to trace from this header file */
+/* wrapper functions to trace from this header file */
 DECLARE_TRACEPOINT(sched_set_state_tp);
 extern void __trace_set_current_state(int state_value);
+DECLARE_TRACEPOINT(sched_set_need_resched_tp);
+extern void __trace_set_need_resched(struct task_struct *curr, int tif);
 
 /**
  * struct prev_cputime - snapshot of system and user cputime
@@ -2065,6 +2067,9 @@ static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
 
 static inline void set_tsk_need_resched(struct task_struct *tsk)
 {
+	if (tracepoint_enabled(sched_set_need_resched_tp) &&
+	    !test_tsk_thread_flag(tsk, TIF_NEED_RESCHED))
+		__trace_set_need_resched(tsk, TIF_NEED_RESCHED);
 	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
 }
 
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index 2390818b139b..158b9c504fab 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -889,11 +889,24 @@ DECLARE_TRACE(sched_exit_tp,
 	TP_PROTO(bool is_switch, unsigned long ip),
 	TP_ARGS(is_switch, ip));
 
+/*
+ * Tracepoint called when setting the state of a task;
+ * this tracepoint is guaranteed to be called from the waking context of the
+ * task setting the state.
+ */
 DECLARE_TRACE_CONDITION(sched_set_state_tp,
-	TP_PROTO(struct task_struct *tsk, int state),
-	TP_ARGS(tsk, state),
+	TP_PROTO(struct task_struct *tsk, int state, bool from_signal),
+	TP_ARGS(tsk, state, from_signal),
 	TP_CONDITION(!!(tsk->__state) != !!state));
 
+DECLARE_TRACE(sched_set_need_resched_tp,
+	TP_PROTO(struct task_struct *tsk, int cpu, int tif),
+	TP_ARGS(tsk, cpu, tif));
+
+DECLARE_TRACE(sched_switch_vain_tp,
+	TP_PROTO(bool preempt, struct task_struct *tsk, unsigned int prev_state),
+	TP_ARGS(preempt, tsk, prev_state));
+
 #endif /* _TRACE_SCHED_H */
 
 /* This part must be outside protection */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5f844bae1a14..89e81fc7f393 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -494,7 +494,7 @@ EXPORT_TRACEPOINT_SYMBOL(sched_set_state_tp);
 /* Call via the helper macro trace_set_current_state. */
 void __trace_set_current_state(int state_value)
 {
-	trace_sched_set_state_tp(current, state_value);
+	trace_sched_set_state_tp(current, state_value, false);
 }
 EXPORT_SYMBOL(__trace_set_current_state);
 
@@ -1109,6 +1109,7 @@ static void __resched_curr(struct rq *rq, int tif)
 
 	cpu = cpu_of(rq);
 
+	trace_sched_set_need_resched_tp(curr, cpu, tif);
 	if (cpu == smp_processor_id()) {
 		set_ti_thread_flag(cti, tif);
 		if (tif == TIF_NEED_RESCHED)
@@ -1124,6 +1125,11 @@ static void __resched_curr(struct rq *rq, int tif)
 	}
 }
 
+void __trace_set_need_resched(struct task_struct *curr, int tif)
+{
+	trace_sched_set_need_resched_tp(curr, smp_processor_id(), tif);
+}
+
 void resched_curr(struct rq *rq)
 {
 	__resched_curr(rq, TIF_NEED_RESCHED);
@@ -6587,6 +6593,7 @@ static bool try_to_block_task(struct rq *rq, struct task_struct *p,
 	int flags = DEQUEUE_NOCLOCK;
 
 	if (signal_pending_state(task_state, p)) {
+		trace_sched_set_state_tp(p, TASK_RUNNING, true);
 		WRITE_ONCE(p->__state, TASK_RUNNING);
 		*task_state_p = TASK_RUNNING;
 		return false;
@@ -6779,6 +6786,7 @@ static void __sched notrace __schedule(int sched_mode)
 		rq = context_switch(rq, prev, next, &rf);
 	} else {
 		rq_unpin_lock(rq, &rf);
+		trace_sched_switch_vain_tp(preempt, prev, prev_state);
 		__balance_callbacks(rq);
 		raw_spin_rq_unlock_irq(rq);
 	}
-- 
2.49.0
Re: [RFC PATCH v2 06/12] sched: Adapt sched tracepoints for RV task model
Posted by Nam Cao 8 months, 3 weeks ago
On Wed, May 14, 2025 at 10:43:08AM +0200, Gabriele Monaco wrote:
>  DECLARE_TRACE_CONDITION(sched_set_state_tp,
> -	TP_PROTO(struct task_struct *tsk, int state),
> -	TP_ARGS(tsk, state),
> +	TP_PROTO(struct task_struct *tsk, int state, bool from_signal),
> +	TP_ARGS(tsk, state, from_signal),
>  	TP_CONDITION(!!(tsk->__state) != !!state));

Doesn't this break the build? Because the monitors still use the old
signatures?

I understand you adapt the monitor to this new signature in a follow-up
patch. But every commits in the series should be buildable, otherwise you
break "git bisect".

Best regards,
Nam
Re: [RFC PATCH v2 06/12] sched: Adapt sched tracepoints for RV task model
Posted by Gabriele Monaco 8 months, 3 weeks ago
On Mon, 2025-05-19 at 10:29 +0200, Nam Cao wrote:
> On Wed, May 14, 2025 at 10:43:08AM +0200, Gabriele Monaco wrote:
> >  DECLARE_TRACE_CONDITION(sched_set_state_tp,
> > -	TP_PROTO(struct task_struct *tsk, int state),
> > -	TP_ARGS(tsk, state),
> > +	TP_PROTO(struct task_struct *tsk, int state, bool
> > from_signal),
> > +	TP_ARGS(tsk, state, from_signal),
> >  	TP_CONDITION(!!(tsk->__state) != !!state));
> 
> Doesn't this break the build? Because the monitors still use the old
> signatures?
> 
> I understand you adapt the monitor to this new signature in a follow-
> up
> patch. But every commits in the series should be buildable, otherwise
> you
> break "git bisect".
> 

Yeah good point, do you suggest at least fixing signatures in monitors
inside this commit?
I can keep the other commits to actually fix/adapt monitors but at
least allow building from here.

Thanks,
Gabriele
Re: [RFC PATCH v2 06/12] sched: Adapt sched tracepoints for RV task model
Posted by Nam Cao 8 months, 3 weeks ago
On Mon, May 19, 2025 at 10:41:47AM +0200, Gabriele Monaco wrote:
> On Mon, 2025-05-19 at 10:29 +0200, Nam Cao wrote:
> > On Wed, May 14, 2025 at 10:43:08AM +0200, Gabriele Monaco wrote:
> > >  DECLARE_TRACE_CONDITION(sched_set_state_tp,
> > > -	TP_PROTO(struct task_struct *tsk, int state),
> > > -	TP_ARGS(tsk, state),
> > > +	TP_PROTO(struct task_struct *tsk, int state, bool
> > > from_signal),
> > > +	TP_ARGS(tsk, state, from_signal),
> > >  	TP_CONDITION(!!(tsk->__state) != !!state));
> > 
> > Doesn't this break the build? Because the monitors still use the old
> > signatures?
> > 
> > I understand you adapt the monitor to this new signature in a follow-
> > up
> > patch. But every commits in the series should be buildable, otherwise
> > you
> > break "git bisect".
> > 
> 
> Yeah good point, do you suggest at least fixing signatures in monitors
> inside this commit?

Yes.

> I can keep the other commits to actually fix/adapt monitors but at
> least allow building from here.

 Also yes, I would only change the signature in this commit.

Best regards,
Nam