[PATCH v8 2/4] sched: Don't account irq time if sched_clock_irqtime is disabled

Yafang Shao posted 4 patches 1 year, 1 month ago
There is a newer version of this series
[PATCH v8 2/4] sched: Don't account irq time if sched_clock_irqtime is disabled
Posted by Yafang Shao 1 year, 1 month ago
sched_clock_irqtime may be disabled due to the clock source, in which case
IRQ time should not be accounted. Let's add a conditional check to avoid
unnecessary logic.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
---
 kernel/sched/core.c | 44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 84902936a620..22dfcd3e92ed 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -740,29 +740,31 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
 	s64 __maybe_unused steal = 0, irq_delta = 0;
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
+	if (irqtime_enabled()) {
+		irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
 
-	/*
-	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-	 * this case when a previous update_rq_clock() happened inside a
-	 * {soft,}IRQ region.
-	 *
-	 * When this happens, we stop ->clock_task and only update the
-	 * prev_irq_time stamp to account for the part that fit, so that a next
-	 * update will consume the rest. This ensures ->clock_task is
-	 * monotonic.
-	 *
-	 * It does however cause some slight miss-attribution of {soft,}IRQ
-	 * time, a more accurate solution would be to update the irq_time using
-	 * the current rq->clock timestamp, except that would require using
-	 * atomic ops.
-	 */
-	if (irq_delta > delta)
-		irq_delta = delta;
+		/*
+		 * Since irq_time is only updated on {soft,}irq_exit, we might run into
+		 * this case when a previous update_rq_clock() happened inside a
+		 * {soft,}IRQ region.
+		 *
+		 * When this happens, we stop ->clock_task and only update the
+		 * prev_irq_time stamp to account for the part that fit, so that a next
+		 * update will consume the rest. This ensures ->clock_task is
+		 * monotonic.
+		 *
+		 * It does however cause some slight miss-attribution of {soft,}IRQ
+		 * time, a more accurate solution would be to update the irq_time using
+		 * the current rq->clock timestamp, except that would require using
+		 * atomic ops.
+		 */
+		if (irq_delta > delta)
+			irq_delta = delta;
 
-	rq->prev_irq_time += irq_delta;
-	delta -= irq_delta;
-	delayacct_irq(rq->curr, irq_delta);
+		rq->prev_irq_time += irq_delta;
+		delta -= irq_delta;
+		delayacct_irq(rq->curr, irq_delta);
+	}
 #endif
 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
 	if (static_key_false((&paravirt_steal_rq_enabled))) {
-- 
2.43.5

Re: [PATCH v8 2/4] sched: Don't account irq time if sched_clock_irqtime is disabled
Posted by Vincent Guittot 1 year, 1 month ago
On Fri, 3 Jan 2025 at 03:24, Yafang Shao <laoar.shao@gmail.com> wrote:
>
> sched_clock_irqtime may be disabled due to the clock source, in which case
> IRQ time should not be accounted. Let's add a conditional check to avoid
> unnecessary logic.
>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> Reviewed-by: Michal Koutný <mkoutny@suse.com>

Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>


> ---
>  kernel/sched/core.c | 44 +++++++++++++++++++++++---------------------
>  1 file changed, 23 insertions(+), 21 deletions(-)
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 84902936a620..22dfcd3e92ed 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -740,29 +740,31 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
>         s64 __maybe_unused steal = 0, irq_delta = 0;
>
>  #ifdef CONFIG_IRQ_TIME_ACCOUNTING
> -       irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
> +       if (irqtime_enabled()) {
> +               irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
>
> -       /*
> -        * Since irq_time is only updated on {soft,}irq_exit, we might run into
> -        * this case when a previous update_rq_clock() happened inside a
> -        * {soft,}IRQ region.
> -        *
> -        * When this happens, we stop ->clock_task and only update the
> -        * prev_irq_time stamp to account for the part that fit, so that a next
> -        * update will consume the rest. This ensures ->clock_task is
> -        * monotonic.
> -        *
> -        * It does however cause some slight miss-attribution of {soft,}IRQ
> -        * time, a more accurate solution would be to update the irq_time using
> -        * the current rq->clock timestamp, except that would require using
> -        * atomic ops.
> -        */
> -       if (irq_delta > delta)
> -               irq_delta = delta;
> +               /*
> +                * Since irq_time is only updated on {soft,}irq_exit, we might run into
> +                * this case when a previous update_rq_clock() happened inside a
> +                * {soft,}IRQ region.
> +                *
> +                * When this happens, we stop ->clock_task and only update the
> +                * prev_irq_time stamp to account for the part that fit, so that a next
> +                * update will consume the rest. This ensures ->clock_task is
> +                * monotonic.
> +                *
> +                * It does however cause some slight miss-attribution of {soft,}IRQ
> +                * time, a more accurate solution would be to update the irq_time using
> +                * the current rq->clock timestamp, except that would require using
> +                * atomic ops.
> +                */
> +               if (irq_delta > delta)
> +                       irq_delta = delta;
>
> -       rq->prev_irq_time += irq_delta;
> -       delta -= irq_delta;
> -       delayacct_irq(rq->curr, irq_delta);
> +               rq->prev_irq_time += irq_delta;
> +               delta -= irq_delta;
> +               delayacct_irq(rq->curr, irq_delta);
> +       }
>  #endif
>  #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
>         if (static_key_false((&paravirt_steal_rq_enabled))) {
> --
> 2.43.5
>
[tip: sched/core] sched: Don't account irq time if sched_clock_irqtime is disabled
Posted by tip-bot2 for Yafang Shao 1 year ago
The following commit has been merged into the sched/core branch of tip:

Commit-ID:     763a744e24a8cfbcc13f699dcdae13a627b8588e
Gitweb:        https://git.kernel.org/tip/763a744e24a8cfbcc13f699dcdae13a627b8588e
Author:        Yafang Shao <laoar.shao@gmail.com>
AuthorDate:    Fri, 03 Jan 2025 10:24:07 +08:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Mon, 13 Jan 2025 14:10:25 +01:00

sched: Don't account irq time if sched_clock_irqtime is disabled

sched_clock_irqtime may be disabled due to the clock source, in which case
IRQ time should not be accounted. Let's add a conditional check to avoid
unnecessary logic.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lore.kernel.org/r/20250103022409.2544-3-laoar.shao@gmail.com
---
 kernel/sched/core.c | 44 +++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 21 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 8490293..22dfcd3 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -740,29 +740,31 @@ static void update_rq_clock_task(struct rq *rq, s64 delta)
 	s64 __maybe_unused steal = 0, irq_delta = 0;
 
 #ifdef CONFIG_IRQ_TIME_ACCOUNTING
-	irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
+	if (irqtime_enabled()) {
+		irq_delta = irq_time_read(cpu_of(rq)) - rq->prev_irq_time;
 
-	/*
-	 * Since irq_time is only updated on {soft,}irq_exit, we might run into
-	 * this case when a previous update_rq_clock() happened inside a
-	 * {soft,}IRQ region.
-	 *
-	 * When this happens, we stop ->clock_task and only update the
-	 * prev_irq_time stamp to account for the part that fit, so that a next
-	 * update will consume the rest. This ensures ->clock_task is
-	 * monotonic.
-	 *
-	 * It does however cause some slight miss-attribution of {soft,}IRQ
-	 * time, a more accurate solution would be to update the irq_time using
-	 * the current rq->clock timestamp, except that would require using
-	 * atomic ops.
-	 */
-	if (irq_delta > delta)
-		irq_delta = delta;
+		/*
+		 * Since irq_time is only updated on {soft,}irq_exit, we might run into
+		 * this case when a previous update_rq_clock() happened inside a
+		 * {soft,}IRQ region.
+		 *
+		 * When this happens, we stop ->clock_task and only update the
+		 * prev_irq_time stamp to account for the part that fit, so that a next
+		 * update will consume the rest. This ensures ->clock_task is
+		 * monotonic.
+		 *
+		 * It does however cause some slight miss-attribution of {soft,}IRQ
+		 * time, a more accurate solution would be to update the irq_time using
+		 * the current rq->clock timestamp, except that would require using
+		 * atomic ops.
+		 */
+		if (irq_delta > delta)
+			irq_delta = delta;
 
-	rq->prev_irq_time += irq_delta;
-	delta -= irq_delta;
-	delayacct_irq(rq->curr, irq_delta);
+		rq->prev_irq_time += irq_delta;
+		delta -= irq_delta;
+		delayacct_irq(rq->curr, irq_delta);
+	}
 #endif
 #ifdef CONFIG_PARAVIRT_TIME_ACCOUNTING
 	if (static_key_false((&paravirt_steal_rq_enabled))) {