[PATCH v10 03/20] timers: Move marking timer bases idle into tick_nohz_stop_tick()

Anna-Maria Behnsen posted 20 patches 1 year, 11 months ago
There is a newer version of this series
[PATCH v10 03/20] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Anna-Maria Behnsen 1 year, 11 months ago
 tick_nohz_stop_tick()

The timer base is marked idle when get_next_timer_interrupt() is
executed. But the decision whether the tick will be stopped and whether the
system is able to go idle is done later. When the timer bases is marked
idle and a new first timer is enqueued remote an IPI is raised. Even if it
is not required because the tick is not stopped and the timer base is
evaluated again at the next tick.

To prevent this, the timer base is marked idle in tick_nohz_stop_tick() and
get_next_timer_interrupt() is streamlined by only looking for the next
timer interrupt. All other work is postponed to
timer_base_try_to_set_idle() which is called by tick_nohz_stop_tick().

With this, tick_sched::tick_stopped and timer_base::is_idle is always in
sync. So there is no longer the need to execute timer_clear_idle() in
tick_nohz_idle_retain_tick(). This was required before, as
tick_nohz_next_event() set timer_base::is_idle even if the tick would not
be stopped. So timer_clear_idle() is only executed, when timer base is
idle. So the check whether timer base is idle, is now no longer required as
well.

While at it fix some nearby whitespace damage as well.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
---
 kernel/time/tick-internal.h |  1 +
 kernel/time/tick-sched.c    | 47 +++++++++++++++++++++--------
 kernel/time/timer.c         | 60 ++++++++++++++++++++++++++-----------
 3 files changed, 78 insertions(+), 30 deletions(-)

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 481b7ab65e2c..47df30b871e4 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -163,6 +163,7 @@ static inline void timers_update_nohz(void) { }
 DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
 
 extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
+u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle);
 void timer_clear_idle(void);
 
 #define CLOCK_SET_WALL							\
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index a17d26002831..c6223afc801f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -849,11 +849,6 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 	 */
 	delta = next_tick - basemono;
 	if (delta <= (u64)TICK_NSEC) {
-		/*
-		 * Tell the timer code that the base is not idle, i.e. undo
-		 * the effect of get_next_timer_interrupt():
-		 */
-		timer_clear_idle();
 		/*
 		 * We've not stopped the tick yet, and there's a timer in the
 		 * next period, so no point in stopping it either, bail.
@@ -889,12 +884,41 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 {
 	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
+	unsigned long basejiff = ts->last_jiffies;
 	u64 basemono = ts->timer_expires_base;
-	u64 expires = ts->timer_expires;
+	bool timer_idle;
+	u64 expires;
 
 	/* Make sure we won't be trying to stop it twice in a row. */
 	ts->timer_expires_base = 0;
 
+	/*
+	 * Now the tick should be stopped definitely - so the timer base needs
+	 * to be marked idle as well to not miss a newly queued timer.
+	 */
+	expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle);
+	if (!timer_idle) {
+		/*
+		 * Do not clear tick_stopped here when it was already set - it
+		 * will be retained on the next idle iteration when the tick
+		 * expired earlier than expected.
+		 */
+		expires = basemono + TICK_NSEC;
+	} else if (expires > ts->timer_expires) {
+		/*
+		 * This path could only happen when the first timer was removed
+		 * between calculating the possible sleep length and now (when
+		 * high resolution mode is not active, timer could also be a
+		 * hrtimer).
+		 *
+		 * We have to stick to the original calculated expiry value to
+		 * not stop the tick for too long with a shallow C-state (which
+		 * was programmed by cpuidle because of an early next expiration
+		 * value).
+		 */
+		expires = ts->timer_expires;
+	}
+
 	/*
 	 * If this CPU is the one which updates jiffies, then give up
 	 * the assignment and let it be taken by the CPU which runs
@@ -930,6 +954,10 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 	 * scheduler tick in tick_nohz_restart_sched_tick().
 	 */
 	if (!ts->tick_stopped) {
+		/* If the timer base is not idle, retain the tick. */
+		if (!timer_idle)
+			return;
+
 		calc_load_nohz_start();
 		quiet_vmstat();
 
@@ -991,7 +1019,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 	touch_softlockup_watchdog_sched();
 
 	/* Cancel the scheduled timer and restore the tick: */
-	ts->tick_stopped  = 0;
+	ts->tick_stopped = 0;
 	tick_nohz_restart(ts, now);
 }
 
@@ -1147,11 +1175,6 @@ void tick_nohz_idle_stop_tick(void)
 void tick_nohz_idle_retain_tick(void)
 {
 	tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
-	/*
-	 * Undo the effect of get_next_timer_interrupt() called from
-	 * tick_nohz_next_event().
-	 */
-	timer_clear_idle();
 }
 
 /**
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2aea55d53416..3a668060692e 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1911,19 +1911,22 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
 	return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
 }
 
-static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem)
+static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
+					     bool *idle)
 {
 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 	unsigned long nextevt = basej + NEXT_TIMER_MAX_DELTA;
 	u64 expires = KTIME_MAX;
-	bool was_idle;
 
 	/*
 	 * Pretend that there is no timer pending if the cpu is offline.
 	 * Possible pending timers will be migrated later to an active cpu.
 	 */
-	if (cpu_is_offline(smp_processor_id()))
+	if (cpu_is_offline(smp_processor_id())) {
+		if (idle)
+			*idle = true;
 		return expires;
+	}
 
 	raw_spin_lock(&base->lock);
 	if (base->next_expiry_recalc)
@@ -1953,17 +1956,26 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem)
 	__forward_timer_base(base, basej);
 
 	/*
-	 * Base is idle if the next event is more than a tick away.
-	 *
-	 * If the base is marked idle then any timer add operation must forward
-	 * the base clk itself to keep granularity small. This idle logic is
-	 * only maintained for the BASE_STD base, deferrable timers may still
-	 * see large granularity skew (by design).
+	 * Set base->is_idle only when caller is timer_base_try_to_set_idle()
 	 */
-	was_idle = base->is_idle;
-	base->is_idle = time_after(nextevt, basej + 1);
-	if (was_idle != base->is_idle)
-		trace_timer_base_idle(base->is_idle, base->cpu);
+	if (idle) {
+		/*
+		 * Base is idle if the next event is more than a tick away.
+		 *
+		 * If the base is marked idle then any timer add operation must
+		 * forward the base clk itself to keep granularity small. This
+		 * idle logic is only maintained for the BASE_STD base,
+		 * deferrable timers may still see large granularity skew (by
+		 * design).
+		 */
+		if (!base->is_idle) {
+			if (time_after(nextevt, basej + 1)) {
+				base->is_idle = true;
+				trace_timer_base_idle(true, base->cpu);
+			}
+		}
+		*idle = base->is_idle;
+	}
 
 	raw_spin_unlock(&base->lock);
 
@@ -1980,7 +1992,21 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem)
  */
 u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 {
-	return __get_next_timer_interrupt(basej, basem);
+	return __get_next_timer_interrupt(basej, basem, NULL);
+}
+
+/**
+ * timer_base_try_to_set_idle() - Try to set the idle state of the timer bases
+ * @basej:	base time jiffies
+ * @basem:	base time clock monotonic
+ * @idle:	pointer to store the value of timer_base->is_idle
+ *
+ * Returns the tick aligned clock monotonic time of the next pending
+ * timer or KTIME_MAX if no timer is pending.
+ */
+u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle)
+{
+	return __get_next_timer_interrupt(basej, basem, idle);
 }
 
 /**
@@ -1998,10 +2024,8 @@ void timer_clear_idle(void)
 	 * sending the IPI a few instructions smaller for the cost of taking
 	 * the lock in the exit from idle path.
 	 */
-	if (base->is_idle) {
-		base->is_idle = false;
-		trace_timer_base_idle(false, smp_processor_id());
-	}
+	base->is_idle = false;
+	trace_timer_base_idle(false, smp_processor_id());
 }
 #endif
 
-- 
2.39.2
Re: [PATCH v10 03/20] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Frederic Weisbecker 1 year, 11 months ago
Le Mon, Jan 15, 2024 at 03:37:26PM +0100, Anna-Maria Behnsen a écrit :
> @@ -889,12 +884,41 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
>  static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>  {
>  	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
> +	unsigned long basejiff = ts->last_jiffies;
>  	u64 basemono = ts->timer_expires_base;
> -	u64 expires = ts->timer_expires;
> +	bool timer_idle;
> +	u64 expires;
>  
>  	/* Make sure we won't be trying to stop it twice in a row. */
>  	ts->timer_expires_base = 0;
>  
> +	/*
> +	 * Now the tick should be stopped definitely - so the timer base needs
> +	 * to be marked idle as well to not miss a newly queued timer.
> +	 */
> +	expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle);
> +	if (!timer_idle) {
> +		/*
> +		 * Do not clear tick_stopped here when it was already set - it

Can that really happen? Looking at __get_next_timer_interrupt(), you're making a
behavioural change: if base->is_idle was previously set and the next timer is
now below/equal a jiffy, base->is_idle is not going to be cleared by
__get_next_timer_interrupt().

Therefore you shouldn't observe ts->tick_stopped && !timer_idle

But I'm assuming that behavioural change wasn't intended?

> +		 * will be retained on the next idle iteration when the tick
> +		 * expired earlier than expected.

I'm a bit confused by this sentence.

> +		 */
> +		expires = basemono + TICK_NSEC;

Do you need this line?

> @@ -1147,11 +1175,6 @@ void tick_nohz_idle_stop_tick(void)
>  void tick_nohz_idle_retain_tick(void)
>  {
>  	tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));

Looks like the content of tick_nohz_retain_tick() can move here now.

> -	/*
> -	 * Undo the effect of get_next_timer_interrupt() called from
> -	 * tick_nohz_next_event().
> -	 */
> -	timer_clear_idle();
>  }

Thanks.
Re: [PATCH v10 03/20] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Anna-Maria Behnsen 1 year, 10 months ago
Frederic Weisbecker <frederic@kernel.org> writes:

> Le Mon, Jan 15, 2024 at 03:37:26PM +0100, Anna-Maria Behnsen a écrit :
>> @@ -889,12 +884,41 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
>>  static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>>  {
>>  	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
>> +	unsigned long basejiff = ts->last_jiffies;
>>  	u64 basemono = ts->timer_expires_base;
>> -	u64 expires = ts->timer_expires;
>> +	bool timer_idle;
>> +	u64 expires;
>>  
>>  	/* Make sure we won't be trying to stop it twice in a row. */
>>  	ts->timer_expires_base = 0;
>>  
>> +	/*
>> +	 * Now the tick should be stopped definitely - so the timer base needs
>> +	 * to be marked idle as well to not miss a newly queued timer.
>> +	 */
>> +	expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle);
>> +	if (!timer_idle) {
>> +		/*
>> +		 * Do not clear tick_stopped here when it was already set - it
>
> Can that really happen? Looking at __get_next_timer_interrupt(), you're making a
> behavioural change: if base->is_idle was previously set and the next timer is
> now below/equal a jiffy, base->is_idle is not going to be cleared by
> __get_next_timer_interrupt().
>
> Therefore you shouldn't observe ts->tick_stopped && !timer_idle
>
> But I'm assuming that behavioural change wasn't intended?

It was intended to keep tick_stopped and base->is_idle in sync. So when
tick_stopped is set also base->is_idle needs to be set and dropping it
before tick_stopped is dropped will break the plan to keep it in sync.

>> +		 * will be retained on the next idle iteration when the tick
>> +		 * expired earlier than expected.
>
> I'm a bit confused by this sentence.

Me too :) It is there because of a previous version and I didn't cleaned
it up properly.

>> +		 */
>> +		expires = basemono + TICK_NSEC;
>
> Do you need this line?

No. After revisiting it once more, it is not required, as it should be
set properly by the return value of timer_base_try_to_set_idle(). So I
should be able to completely drop this first part of the if statement.

>
>> @@ -1147,11 +1175,6 @@ void tick_nohz_idle_stop_tick(void)
>>  void tick_nohz_idle_retain_tick(void)
>>  {
>>  	tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
>
> Looks like the content of tick_nohz_retain_tick() can move here now.

I can do this.

>> -	/*
>> -	 * Undo the effect of get_next_timer_interrupt() called from
>> -	 * tick_nohz_next_event().
>> -	 */
>> -	timer_clear_idle();
>>  }
>
> Thanks.

Thanks,

	Anna-Maria
Re: [PATCH v10 03/20] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Frederic Weisbecker 1 year, 10 months ago
Le Mon, Jan 22, 2024 at 12:45:03PM +0100, Anna-Maria Behnsen a écrit :
> Frederic Weisbecker <frederic@kernel.org> writes:
> 
> > Le Mon, Jan 15, 2024 at 03:37:26PM +0100, Anna-Maria Behnsen a écrit :
> >> @@ -889,12 +884,41 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
> >>  static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
> >>  {
> >>  	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
> >> +	unsigned long basejiff = ts->last_jiffies;
> >>  	u64 basemono = ts->timer_expires_base;
> >> -	u64 expires = ts->timer_expires;
> >> +	bool timer_idle;
> >> +	u64 expires;
> >>  
> >>  	/* Make sure we won't be trying to stop it twice in a row. */
> >>  	ts->timer_expires_base = 0;
> >>  
> >> +	/*
> >> +	 * Now the tick should be stopped definitely - so the timer base needs
> >> +	 * to be marked idle as well to not miss a newly queued timer.
> >> +	 */
> >> +	expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle);
> >> +	if (!timer_idle) {
> >> +		/*
> >> +		 * Do not clear tick_stopped here when it was already set - it
> >
> > Can that really happen? Looking at __get_next_timer_interrupt(), you're making a
> > behavioural change: if base->is_idle was previously set and the next timer is
> > now below/equal a jiffy, base->is_idle is not going to be cleared by
> > __get_next_timer_interrupt().
> >
> > Therefore you shouldn't observe ts->tick_stopped && !timer_idle
> >
> > But I'm assuming that behavioural change wasn't intended?
> 
> It was intended to keep tick_stopped and base->is_idle in sync. So when
> tick_stopped is set also base->is_idle needs to be set and dropping it
> before tick_stopped is dropped will break the plan to keep it in sync.

Ok that sounds good.

Thanks!
[PATCH v10a] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Anna-Maria Behnsen 1 year, 10 months ago
The timer base is marked idle when get_next_timer_interrupt() is
executed. But the decision whether the tick will be stopped and whether the
system is able to go idle is done later. When the timer bases is marked
idle and a new first timer is enqueued remote an IPI is raised. Even if it
is not required because the tick is not stopped and the timer base is
evaluated again at the next tick.

To prevent this, the timer base is marked idle in tick_nohz_stop_tick() and
get_next_timer_interrupt() is streamlined by only looking for the next timer
interrupt. All other work is postponed to timer_base_try_to_set_idle() which is
called by tick_nohz_stop_tick(). timer_base_try_to_set_idle() never resets
timer_base::is_idle state. This is done when the tick is restarted via
tick_nohz_restart_sched_tick().

With this, tick_sched::tick_stopped and timer_base::is_idle are always in
sync. So there is no longer the need to execute timer_clear_idle() in
tick_nohz_idle_retain_tick(). This was required before, as
tick_nohz_next_event() set timer_base::is_idle even if the tick would not be
stopped. So timer_clear_idle() is only executed, when timer base is idle. So the
check whether timer base is idle, is now no longer required as well.

While at it fix some nearby whitespace damage as well.

Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
---
v10a:
 - Drop the unnecessary if branch which handles return value of
   timer_base_try_to_set_idle()
 - Do not open code 'tick_nohz_retain_tick()' and keep
   tick_nohz_idle_retain_tick() as is.
---
 kernel/time/tick-internal.h |  1 +
 kernel/time/tick-sched.c    | 40 +++++++++++++++++--------
 kernel/time/timer.c         | 60 ++++++++++++++++++++++++++-----------
 3 files changed, 71 insertions(+), 30 deletions(-)

diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 481b7ab65e2c..47df30b871e4 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -163,6 +163,7 @@ static inline void timers_update_nohz(void) { }
 DECLARE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases);
 
 extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
+u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle);
 void timer_clear_idle(void);
 
 #define CLOCK_SET_WALL							\
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 01fb50c1b17e..4c7ccb1c9307 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -849,11 +849,6 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 	 */
 	delta = next_tick - basemono;
 	if (delta <= (u64)TICK_NSEC) {
-		/*
-		 * Tell the timer code that the base is not idle, i.e. undo
-		 * the effect of get_next_timer_interrupt():
-		 */
-		timer_clear_idle();
 		/*
 		 * We've not stopped the tick yet, and there's a timer in the
 		 * next period, so no point in stopping it either, bail.
@@ -889,12 +884,34 @@ static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
 static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 {
 	struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev);
+	unsigned long basejiff = ts->last_jiffies;
 	u64 basemono = ts->timer_expires_base;
-	u64 expires = ts->timer_expires;
+	bool timer_idle;
+	u64 expires;
 
 	/* Make sure we won't be trying to stop it twice in a row. */
 	ts->timer_expires_base = 0;
 
+	/*
+	 * Now the tick should be stopped definitely - so the timer base needs
+	 * to be marked idle as well to not miss a newly queued timer.
+	 */
+	expires = timer_base_try_to_set_idle(basejiff, basemono, &timer_idle);
+	if (expires > ts->timer_expires) {
+		/*
+		 * This path could only happen when the first timer was removed
+		 * between calculating the possible sleep length and now (when
+		 * high resolution mode is not active, timer could also be a
+		 * hrtimer).
+		 *
+		 * We have to stick to the original calculated expiry value to
+		 * not stop the tick for too long with a shallow C-state (which
+		 * was programmed by cpuidle because of an early next expiration
+		 * value).
+		 */
+		expires = ts->timer_expires;
+	}
+
 	/*
 	 * If this CPU is the one which updates jiffies, then give up
 	 * the assignment and let it be taken by the CPU which runs
@@ -930,6 +947,10 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 	 * scheduler tick in tick_nohz_restart_sched_tick().
 	 */
 	if (!ts->tick_stopped) {
+		/* If the timer base is not idle, retain the tick. */
+		if (!timer_idle)
+			return;
+
 		calc_load_nohz_start();
 		quiet_vmstat();
 
@@ -991,7 +1012,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now)
 	touch_softlockup_watchdog_sched();
 
 	/* Cancel the scheduled timer and restore the tick: */
-	ts->tick_stopped  = 0;
+	ts->tick_stopped = 0;
 	tick_nohz_restart(ts, now);
 }
 
@@ -1147,11 +1168,6 @@ void tick_nohz_idle_stop_tick(void)
 void tick_nohz_idle_retain_tick(void)
 {
 	tick_nohz_retain_tick(this_cpu_ptr(&tick_cpu_sched));
-	/*
-	 * Undo the effect of get_next_timer_interrupt() called from
-	 * tick_nohz_next_event().
-	 */
-	timer_clear_idle();
 }
 
 /**
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 2aea55d53416..3a668060692e 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1911,19 +1911,22 @@ static u64 cmp_next_hrtimer_event(u64 basem, u64 expires)
 	return DIV_ROUND_UP_ULL(nextevt, TICK_NSEC) * TICK_NSEC;
 }
 
-static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem)
+static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem,
+					     bool *idle)
 {
 	struct timer_base *base = this_cpu_ptr(&timer_bases[BASE_STD]);
 	unsigned long nextevt = basej + NEXT_TIMER_MAX_DELTA;
 	u64 expires = KTIME_MAX;
-	bool was_idle;
 
 	/*
 	 * Pretend that there is no timer pending if the cpu is offline.
 	 * Possible pending timers will be migrated later to an active cpu.
 	 */
-	if (cpu_is_offline(smp_processor_id()))
+	if (cpu_is_offline(smp_processor_id())) {
+		if (idle)
+			*idle = true;
 		return expires;
+	}
 
 	raw_spin_lock(&base->lock);
 	if (base->next_expiry_recalc)
@@ -1953,17 +1956,26 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem)
 	__forward_timer_base(base, basej);
 
 	/*
-	 * Base is idle if the next event is more than a tick away.
-	 *
-	 * If the base is marked idle then any timer add operation must forward
-	 * the base clk itself to keep granularity small. This idle logic is
-	 * only maintained for the BASE_STD base, deferrable timers may still
-	 * see large granularity skew (by design).
+	 * Set base->is_idle only when caller is timer_base_try_to_set_idle()
 	 */
-	was_idle = base->is_idle;
-	base->is_idle = time_after(nextevt, basej + 1);
-	if (was_idle != base->is_idle)
-		trace_timer_base_idle(base->is_idle, base->cpu);
+	if (idle) {
+		/*
+		 * Base is idle if the next event is more than a tick away.
+		 *
+		 * If the base is marked idle then any timer add operation must
+		 * forward the base clk itself to keep granularity small. This
+		 * idle logic is only maintained for the BASE_STD base,
+		 * deferrable timers may still see large granularity skew (by
+		 * design).
+		 */
+		if (!base->is_idle) {
+			if (time_after(nextevt, basej + 1)) {
+				base->is_idle = true;
+				trace_timer_base_idle(true, base->cpu);
+			}
+		}
+		*idle = base->is_idle;
+	}
 
 	raw_spin_unlock(&base->lock);
 
@@ -1980,7 +1992,21 @@ static inline u64 __get_next_timer_interrupt(unsigned long basej, u64 basem)
  */
 u64 get_next_timer_interrupt(unsigned long basej, u64 basem)
 {
-	return __get_next_timer_interrupt(basej, basem);
+	return __get_next_timer_interrupt(basej, basem, NULL);
+}
+
+/**
+ * timer_base_try_to_set_idle() - Try to set the idle state of the timer bases
+ * @basej:	base time jiffies
+ * @basem:	base time clock monotonic
+ * @idle:	pointer to store the value of timer_base->is_idle
+ *
+ * Returns the tick aligned clock monotonic time of the next pending
+ * timer or KTIME_MAX if no timer is pending.
+ */
+u64 timer_base_try_to_set_idle(unsigned long basej, u64 basem, bool *idle)
+{
+	return __get_next_timer_interrupt(basej, basem, idle);
 }
 
 /**
@@ -1998,10 +2024,8 @@ void timer_clear_idle(void)
 	 * sending the IPI a few instructions smaller for the cost of taking
 	 * the lock in the exit from idle path.
 	 */
-	if (base->is_idle) {
-		base->is_idle = false;
-		trace_timer_base_idle(false, smp_processor_id());
-	}
+	base->is_idle = false;
+	trace_timer_base_idle(false, smp_processor_id());
 }
 #endif
 
-- 
2.39.2
Re: [PATCH v10a] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Frederic Weisbecker 1 year, 10 months ago
Le Mon, Feb 19, 2024 at 09:52:36AM +0100, Anna-Maria Behnsen a écrit :
> The timer base is marked idle when get_next_timer_interrupt() is
> executed. But the decision whether the tick will be stopped and whether the
> system is able to go idle is done later. When the timer bases is marked
> idle and a new first timer is enqueued remote an IPI is raised. Even if it
> is not required because the tick is not stopped and the timer base is
> evaluated again at the next tick.
> 
> To prevent this, the timer base is marked idle in tick_nohz_stop_tick() and
> get_next_timer_interrupt() is streamlined by only looking for the next timer
> interrupt. All other work is postponed to timer_base_try_to_set_idle() which is
> called by tick_nohz_stop_tick(). timer_base_try_to_set_idle() never resets
> timer_base::is_idle state. This is done when the tick is restarted via
> tick_nohz_restart_sched_tick().
> 
> With this, tick_sched::tick_stopped and timer_base::is_idle are always in
> sync. So there is no longer the need to execute timer_clear_idle() in
> tick_nohz_idle_retain_tick(). This was required before, as
> tick_nohz_next_event() set timer_base::is_idle even if the tick would not be
> stopped. So timer_clear_idle() is only executed, when timer base is idle. So the
> check whether timer base is idle, is now no longer required as well.
> 
> While at it fix some nearby whitespace damage as well.
> 
> Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>

Reviewed-by: Frederic Weisbecker <frederic@kernel.org>

Just a small detail below that can be fixed in a further patch:

> @@ -930,6 +947,10 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>  	 * scheduler tick in tick_nohz_restart_sched_tick().
>  	 */
>  	if (!ts->tick_stopped) {
> +		/* If the timer base is not idle, retain the tick. */
> +		if (!timer_idle)
> +			return;

This happens after tick_do_timer_cpu has been set to TICK_DO_TIMER_NONE. Ideally
it would be better to do it before. Not that it hurts in practice: another CPU
or this one will take the duty. But it looks weird to stop halfway.

Thanks!
Re: [PATCH v10a] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Anna-Maria Behnsen 1 year, 10 months ago
Frederic Weisbecker <frederic@kernel.org> writes:

> Le Mon, Feb 19, 2024 at 09:52:36AM +0100, Anna-Maria Behnsen a écrit :
>> The timer base is marked idle when get_next_timer_interrupt() is
>> executed. But the decision whether the tick will be stopped and whether the
>> system is able to go idle is done later. When the timer bases is marked
>> idle and a new first timer is enqueued remote an IPI is raised. Even if it
>> is not required because the tick is not stopped and the timer base is
>> evaluated again at the next tick.
>> 
>> To prevent this, the timer base is marked idle in tick_nohz_stop_tick() and
>> get_next_timer_interrupt() is streamlined by only looking for the next timer
>> interrupt. All other work is postponed to timer_base_try_to_set_idle() which is
>> called by tick_nohz_stop_tick(). timer_base_try_to_set_idle() never resets
>> timer_base::is_idle state. This is done when the tick is restarted via
>> tick_nohz_restart_sched_tick().
>> 
>> With this, tick_sched::tick_stopped and timer_base::is_idle are always in
>> sync. So there is no longer the need to execute timer_clear_idle() in
>> tick_nohz_idle_retain_tick(). This was required before, as
>> tick_nohz_next_event() set timer_base::is_idle even if the tick would not be
>> stopped. So timer_clear_idle() is only executed, when timer base is idle. So the
>> check whether timer base is idle, is now no longer required as well.
>> 
>> While at it fix some nearby whitespace damage as well.
>> 
>> Signed-off-by: Anna-Maria Behnsen <anna-maria@linutronix.de>
>
> Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
>
> Just a small detail below that can be fixed in a further patch:
>
>> @@ -930,6 +947,10 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>>  	 * scheduler tick in tick_nohz_restart_sched_tick().
>>  	 */
>>  	if (!ts->tick_stopped) {
>> +		/* If the timer base is not idle, retain the tick. */
>> +		if (!timer_idle)
>> +			return;
>
> This happens after tick_do_timer_cpu has been set to TICK_DO_TIMER_NONE. Ideally
> it would be better to do it before. Not that it hurts in practice: another CPU
> or this one will take the duty. But it looks weird to stop halfway.
>

Yes, you are right. I would prefere, to clean it up directly and add
another patch before this patch which simply moves the
TICK_DO_TIMER_NONE related block after the !ts->tick_stopped
block. Because a changed order shouldn't be a problem at the moment as
well, or am I wrong?

Thanks,

	Anna-Maria

---8<----
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 01fb50c1b17e..b93f0e6f273f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -895,21 +895,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 	/* Make sure we won't be trying to stop it twice in a row. */
 	ts->timer_expires_base = 0;
 
-	/*
-	 * If this CPU is the one which updates jiffies, then give up
-	 * the assignment and let it be taken by the CPU which runs
-	 * the tick timer next, which might be this CPU as well. If we
-	 * don't drop this here, the jiffies might be stale and
-	 * do_timer() never gets invoked. Keep track of the fact that it
-	 * was the one which had the do_timer() duty last.
-	 */
-	if (cpu == tick_do_timer_cpu) {
-		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
-		ts->do_timer_last = 1;
-	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
-		ts->do_timer_last = 0;
-	}
-
 	/* Skip reprogram of event if it's not changed */
 	if (ts->tick_stopped && (expires == ts->next_tick)) {
 		/* Sanity check: make sure clockevent is actually programmed */
@@ -938,6 +923,21 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 		trace_tick_stop(1, TICK_DEP_MASK_NONE);
 	}
 
+	/*
+	 * If this CPU is the one which updates jiffies, then give up
+	 * the assignment and let it be taken by the CPU which runs
+	 * the tick timer next, which might be this CPU as well. If we
+	 * don't drop this here, the jiffies might be stale and
+	 * do_timer() never gets invoked. Keep track of the fact that it
+	 * was the one which had the do_timer() duty last.
+	 */
+	if (cpu == tick_do_timer_cpu) {
+		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+		ts->do_timer_last = 1;
+	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
+		ts->do_timer_last = 0;
+	}
+
 	ts->next_tick = expires;
 
 	/*
Re: [PATCH v10a] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Frederic Weisbecker 1 year, 10 months ago
Le Tue, Feb 20, 2024 at 11:48:19AM +0100, Anna-Maria Behnsen a écrit :
> Frederic Weisbecker <frederic@kernel.org> writes:
> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> index 01fb50c1b17e..b93f0e6f273f 100644
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -895,21 +895,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>  	/* Make sure we won't be trying to stop it twice in a row. */
>  	ts->timer_expires_base = 0;
>  
> -	/*
> -	 * If this CPU is the one which updates jiffies, then give up
> -	 * the assignment and let it be taken by the CPU which runs
> -	 * the tick timer next, which might be this CPU as well. If we
> -	 * don't drop this here, the jiffies might be stale and
> -	 * do_timer() never gets invoked. Keep track of the fact that it
> -	 * was the one which had the do_timer() duty last.
> -	 */
> -	if (cpu == tick_do_timer_cpu) {
> -		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
> -		ts->do_timer_last = 1;
> -	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
> -		ts->do_timer_last = 0;
> -	}
> -
>  	/* Skip reprogram of event if it's not changed */
>  	if (ts->tick_stopped && (expires == ts->next_tick)) {
>  		/* Sanity check: make sure clockevent is actually programmed */

That should work but then you lose the optimization that resets
ts->do_timer_last even if the next timer hasn't changed.

Thanks.



> @@ -938,6 +923,21 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>  		trace_tick_stop(1, TICK_DEP_MASK_NONE);
>  	}
>  
> +	/*
> +	 * If this CPU is the one which updates jiffies, then give up
> +	 * the assignment and let it be taken by the CPU which runs
> +	 * the tick timer next, which might be this CPU as well. If we
> +	 * don't drop this here, the jiffies might be stale and
> +	 * do_timer() never gets invoked. Keep track of the fact that it
> +	 * was the one which had the do_timer() duty last.
> +	 */
> +	if (cpu == tick_do_timer_cpu) {
> +		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
> +		ts->do_timer_last = 1;
> +	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
> +		ts->do_timer_last = 0;
> +	}
> +
>  	ts->next_tick = expires;
>  
>  	/*
Re: [PATCH v10a] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Anna-Maria Behnsen 1 year, 9 months ago
Frederic Weisbecker <frederic@kernel.org> writes:

> Le Tue, Feb 20, 2024 at 11:48:19AM +0100, Anna-Maria Behnsen a écrit :
>> Frederic Weisbecker <frederic@kernel.org> writes:
>> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
>> index 01fb50c1b17e..b93f0e6f273f 100644
>> --- a/kernel/time/tick-sched.c
>> +++ b/kernel/time/tick-sched.c
>> @@ -895,21 +895,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>>  	/* Make sure we won't be trying to stop it twice in a row. */
>>  	ts->timer_expires_base = 0;
>>  
>> -	/*
>> -	 * If this CPU is the one which updates jiffies, then give up
>> -	 * the assignment and let it be taken by the CPU which runs
>> -	 * the tick timer next, which might be this CPU as well. If we
>> -	 * don't drop this here, the jiffies might be stale and
>> -	 * do_timer() never gets invoked. Keep track of the fact that it
>> -	 * was the one which had the do_timer() duty last.
>> -	 */
>> -	if (cpu == tick_do_timer_cpu) {
>> -		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
>> -		ts->do_timer_last = 1;
>> -	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
>> -		ts->do_timer_last = 0;
>> -	}
>> -
>>  	/* Skip reprogram of event if it's not changed */
>>  	if (ts->tick_stopped && (expires == ts->next_tick)) {
>>  		/* Sanity check: make sure clockevent is actually programmed */
>
> That should work but then you lose the optimization that resets
> ts->do_timer_last even if the next timer hasn't changed.
>

Beside of this optimization thing, I see onther problem. But I'm not
sure, if I understood it correctly: When the CPU drops the
tick_do_timer_cpu assignment and stops the tick, it is possible, that
this CPU nevertheless executes tick_sched_do_timer() and then reassigns
to tick_do_timer_cpu?

Then it is mandatory that we have this drop the assignment also in the
path when the tick is already stopped. Otherwise the problem described
in the comment could happen with stale jiffies, no?

Thanks

> Thanks.
>
>
>
>> @@ -938,6 +923,21 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>>  		trace_tick_stop(1, TICK_DEP_MASK_NONE);
>>  	}
>>  
>> +	/*
>> +	 * If this CPU is the one which updates jiffies, then give up
>> +	 * the assignment and let it be taken by the CPU which runs
>> +	 * the tick timer next, which might be this CPU as well. If we
>> +	 * don't drop this here, the jiffies might be stale and
>> +	 * do_timer() never gets invoked. Keep track of the fact that it
>> +	 * was the one which had the do_timer() duty last.
>> +	 */
>> +	if (cpu == tick_do_timer_cpu) {
>> +		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
>> +		ts->do_timer_last = 1;
>> +	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
>> +		ts->do_timer_last = 0;
>> +	}
>> +
>>  	ts->next_tick = expires;
>>  
>>  	/*
Re: [PATCH v10a] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Frederic Weisbecker 1 year, 9 months ago
Le Tue, Feb 20, 2024 at 01:02:18PM +0100, Anna-Maria Behnsen a écrit :
> Frederic Weisbecker <frederic@kernel.org> writes:
> 
> > Le Tue, Feb 20, 2024 at 11:48:19AM +0100, Anna-Maria Behnsen a écrit :
> >> Frederic Weisbecker <frederic@kernel.org> writes:
> >> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> >> index 01fb50c1b17e..b93f0e6f273f 100644
> >> --- a/kernel/time/tick-sched.c
> >> +++ b/kernel/time/tick-sched.c
> >> @@ -895,21 +895,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
> >>  	/* Make sure we won't be trying to stop it twice in a row. */
> >>  	ts->timer_expires_base = 0;
> >>  
> >> -	/*
> >> -	 * If this CPU is the one which updates jiffies, then give up
> >> -	 * the assignment and let it be taken by the CPU which runs
> >> -	 * the tick timer next, which might be this CPU as well. If we
> >> -	 * don't drop this here, the jiffies might be stale and
> >> -	 * do_timer() never gets invoked. Keep track of the fact that it
> >> -	 * was the one which had the do_timer() duty last.
> >> -	 */
> >> -	if (cpu == tick_do_timer_cpu) {
> >> -		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
> >> -		ts->do_timer_last = 1;
> >> -	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
> >> -		ts->do_timer_last = 0;
> >> -	}
> >> -
> >>  	/* Skip reprogram of event if it's not changed */
> >>  	if (ts->tick_stopped && (expires == ts->next_tick)) {
> >>  		/* Sanity check: make sure clockevent is actually programmed */
> >
> > That should work but then you lose the optimization that resets
> > ts->do_timer_last even if the next timer hasn't changed.
> >
> 
> Beside of this optimization thing, I see onther problem. But I'm not
> sure, if I understood it correctly: When the CPU drops the
> tick_do_timer_cpu assignment and stops the tick, it is possible, that
> this CPU nevertheless executes tick_sched_do_timer() and then reassigns
> to tick_do_timer_cpu?

Yes but in this case a timer interrupt has executed and ts->next_tick
is cleared, so the above skip reprogramm branch is not taken.

Thanks.

> 
> Then it is mandatory that we have this drop the assignment also in the
> path when the tick is already stopped. Otherwise the problem described
> in the comment could happen with stale jiffies, no?
> 
> Thanks
> 
> > Thanks.
> >
> >
> >
> >> @@ -938,6 +923,21 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
> >>  		trace_tick_stop(1, TICK_DEP_MASK_NONE);
> >>  	}
> >>  
> >> +	/*
> >> +	 * If this CPU is the one which updates jiffies, then give up
> >> +	 * the assignment and let it be taken by the CPU which runs
> >> +	 * the tick timer next, which might be this CPU as well. If we
> >> +	 * don't drop this here, the jiffies might be stale and
> >> +	 * do_timer() never gets invoked. Keep track of the fact that it
> >> +	 * was the one which had the do_timer() duty last.
> >> +	 */
> >> +	if (cpu == tick_do_timer_cpu) {
> >> +		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
> >> +		ts->do_timer_last = 1;
> >> +	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
> >> +		ts->do_timer_last = 0;
> >> +	}
> >> +
> >>  	ts->next_tick = expires;
> >>  
> >>  	/*
Re: [PATCH v10a] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Anna-Maria Behnsen 1 year, 9 months ago
Frederic Weisbecker <frederic@kernel.org> writes:

> Le Tue, Feb 20, 2024 at 01:02:18PM +0100, Anna-Maria Behnsen a écrit :
>> Frederic Weisbecker <frederic@kernel.org> writes:
>> 
>> > Le Tue, Feb 20, 2024 at 11:48:19AM +0100, Anna-Maria Behnsen a écrit :
>> >> Frederic Weisbecker <frederic@kernel.org> writes:
>> >> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
>> >> index 01fb50c1b17e..b93f0e6f273f 100644
>> >> --- a/kernel/time/tick-sched.c
>> >> +++ b/kernel/time/tick-sched.c
>> >> @@ -895,21 +895,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>> >>  	/* Make sure we won't be trying to stop it twice in a row. */
>> >>  	ts->timer_expires_base = 0;
>> >>  
>> >> -	/*
>> >> -	 * If this CPU is the one which updates jiffies, then give up
>> >> -	 * the assignment and let it be taken by the CPU which runs
>> >> -	 * the tick timer next, which might be this CPU as well. If we
>> >> -	 * don't drop this here, the jiffies might be stale and
>> >> -	 * do_timer() never gets invoked. Keep track of the fact that it
>> >> -	 * was the one which had the do_timer() duty last.
>> >> -	 */
>> >> -	if (cpu == tick_do_timer_cpu) {
>> >> -		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
>> >> -		ts->do_timer_last = 1;
>> >> -	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
>> >> -		ts->do_timer_last = 0;
>> >> -	}
>> >> -
>> >>  	/* Skip reprogram of event if it's not changed */
>> >>  	if (ts->tick_stopped && (expires == ts->next_tick)) {
>> >>  		/* Sanity check: make sure clockevent is actually programmed */
>> >
>> > That should work but then you lose the optimization that resets
>> > ts->do_timer_last even if the next timer hasn't changed.
>> >
>> 
>> Beside of this optimization thing, I see onther problem. But I'm not
>> sure, if I understood it correctly: When the CPU drops the
>> tick_do_timer_cpu assignment and stops the tick, it is possible, that
>> this CPU nevertheless executes tick_sched_do_timer() and then reassigns
>> to tick_do_timer_cpu?
>
> Yes but in this case a timer interrupt has executed and ts->next_tick
> is cleared, so the above skip reprogramm branch is not taken.
>

Yes... So I need to change it without dropping the
optimization. Otherwise someone might complain about it.

Two possible solutions:

a) split out this if/else thing for dropping the tick_do_timer_cpu
   assignment into a separate function and call it:
   - before the return in the skip reprogramm branch
   - and after the if clause which contains stopping the tick (where it
     is executed in the current proposal)

b) Take my current proposal and add before the return in the skip
   reprogramm branch the following lines:

   if (tick_do_timer_cpu != TICK_DO_TIMER_NONE)
   	ts->do_timer_last = 0;

   as the first part of the tick_do_timer_cpu/last logic shouldn't be
   required (because then also ts->next_tick is already cleared).

What do you prefere? Or do you prefere something else?

Thanks

> Thanks.
>
>> 
>> Then it is mandatory that we have this drop the assignment also in the
>> path when the tick is already stopped. Otherwise the problem described
>> in the comment could happen with stale jiffies, no?
>> 
>> Thanks
>> 
>> > Thanks.
>> >
>> >
>> >
>> >> @@ -938,6 +923,21 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>> >>  		trace_tick_stop(1, TICK_DEP_MASK_NONE);
>> >>  	}
>> >>  
>> >> +	/*
>> >> +	 * If this CPU is the one which updates jiffies, then give up
>> >> +	 * the assignment and let it be taken by the CPU which runs
>> >> +	 * the tick timer next, which might be this CPU as well. If we
>> >> +	 * don't drop this here, the jiffies might be stale and
>> >> +	 * do_timer() never gets invoked. Keep track of the fact that it
>> >> +	 * was the one which had the do_timer() duty last.
>> >> +	 */
>> >> +	if (cpu == tick_do_timer_cpu) {
>> >> +		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
>> >> +		ts->do_timer_last = 1;
>> >> +	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
>> >> +		ts->do_timer_last = 0;
>> >> +	}
>> >> +
>> >>  	ts->next_tick = expires;
>> >>  
>> >>  	/*
Re: [PATCH v10a] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Frederic Weisbecker 1 year, 9 months ago
On Tue, Feb 20, 2024 at 03:00:57PM +0100, Anna-Maria Behnsen wrote:
> Frederic Weisbecker <frederic@kernel.org> writes:
> 
> > Le Tue, Feb 20, 2024 at 01:02:18PM +0100, Anna-Maria Behnsen a écrit :
> >> Frederic Weisbecker <frederic@kernel.org> writes:
> >> 
> >> > Le Tue, Feb 20, 2024 at 11:48:19AM +0100, Anna-Maria Behnsen a écrit :
> >> >> Frederic Weisbecker <frederic@kernel.org> writes:
> >> >> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> >> >> index 01fb50c1b17e..b93f0e6f273f 100644
> >> >> --- a/kernel/time/tick-sched.c
> >> >> +++ b/kernel/time/tick-sched.c
> >> >> @@ -895,21 +895,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
> >> >>  	/* Make sure we won't be trying to stop it twice in a row. */
> >> >>  	ts->timer_expires_base = 0;
> >> >>  
> >> >> -	/*
> >> >> -	 * If this CPU is the one which updates jiffies, then give up
> >> >> -	 * the assignment and let it be taken by the CPU which runs
> >> >> -	 * the tick timer next, which might be this CPU as well. If we
> >> >> -	 * don't drop this here, the jiffies might be stale and
> >> >> -	 * do_timer() never gets invoked. Keep track of the fact that it
> >> >> -	 * was the one which had the do_timer() duty last.
> >> >> -	 */
> >> >> -	if (cpu == tick_do_timer_cpu) {
> >> >> -		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
> >> >> -		ts->do_timer_last = 1;
> >> >> -	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
> >> >> -		ts->do_timer_last = 0;
> >> >> -	}
> >> >> -
> >> >>  	/* Skip reprogram of event if it's not changed */
> >> >>  	if (ts->tick_stopped && (expires == ts->next_tick)) {
> >> >>  		/* Sanity check: make sure clockevent is actually programmed */
> >> >
> >> > That should work but then you lose the optimization that resets
> >> > ts->do_timer_last even if the next timer hasn't changed.
> >> >
> >> 
> >> Beside of this optimization thing, I see onther problem. But I'm not
> >> sure, if I understood it correctly: When the CPU drops the
> >> tick_do_timer_cpu assignment and stops the tick, it is possible, that
> >> this CPU nevertheless executes tick_sched_do_timer() and then reassigns
> >> to tick_do_timer_cpu?
> >
> > Yes but in this case a timer interrupt has executed and ts->next_tick
> > is cleared, so the above skip reprogramm branch is not taken.
> >
> 
> Yes... So I need to change it without dropping the
> optimization. Otherwise someone might complain about it.
> 
> Two possible solutions:
> 
> a) split out this if/else thing for dropping the tick_do_timer_cpu
>    assignment into a separate function and call it:
>    - before the return in the skip reprogramm branch
>    - and after the if clause which contains stopping the tick (where it
>      is executed in the current proposal)
> 
> b) Take my current proposal and add before the return in the skip
>    reprogramm branch the following lines:
> 
>    if (tick_do_timer_cpu != TICK_DO_TIMER_NONE)
>    	ts->do_timer_last = 0;
> 
>    as the first part of the tick_do_timer_cpu/last logic shouldn't be
>    required (because then also ts->next_tick is already cleared).
> 
> What do you prefere? Or do you prefere something else?

Wouldn't the following work? If timer_idle is false, then the tick isn't
even stopped and there is nothing to do? So you can early return.

diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index fdd57f1af1d7..1b2984acafbd 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -924,6 +924,9 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
 		expires = ts->timer_expires;
 	}
 
+	if (!timer_idle)
+		return;
+
 	/*
 	 * If this CPU is the one which updates jiffies, then give up
 	 * the assignment and let it be taken by the CPU which runs
Re: [PATCH v10a] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Anna-Maria Behnsen 1 year, 9 months ago
Frederic Weisbecker <frederic@kernel.org> writes:

> On Tue, Feb 20, 2024 at 03:00:57PM +0100, Anna-Maria Behnsen wrote:
>> Frederic Weisbecker <frederic@kernel.org> writes:
>> 
>> > Le Tue, Feb 20, 2024 at 01:02:18PM +0100, Anna-Maria Behnsen a écrit :
>> >> Frederic Weisbecker <frederic@kernel.org> writes:
>> >> 
>> >> > Le Tue, Feb 20, 2024 at 11:48:19AM +0100, Anna-Maria Behnsen a écrit :
>> >> >> Frederic Weisbecker <frederic@kernel.org> writes:
>> >> >> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
>> >> >> index 01fb50c1b17e..b93f0e6f273f 100644
>> >> >> --- a/kernel/time/tick-sched.c
>> >> >> +++ b/kernel/time/tick-sched.c
>> >> >> @@ -895,21 +895,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>> >> >>  	/* Make sure we won't be trying to stop it twice in a row. */
>> >> >>  	ts->timer_expires_base = 0;
>> >> >>  
>> >> >> -	/*
>> >> >> -	 * If this CPU is the one which updates jiffies, then give up
>> >> >> -	 * the assignment and let it be taken by the CPU which runs
>> >> >> -	 * the tick timer next, which might be this CPU as well. If we
>> >> >> -	 * don't drop this here, the jiffies might be stale and
>> >> >> -	 * do_timer() never gets invoked. Keep track of the fact that it
>> >> >> -	 * was the one which had the do_timer() duty last.
>> >> >> -	 */
>> >> >> -	if (cpu == tick_do_timer_cpu) {
>> >> >> -		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
>> >> >> -		ts->do_timer_last = 1;
>> >> >> -	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
>> >> >> -		ts->do_timer_last = 0;
>> >> >> -	}
>> >> >> -
>> >> >>  	/* Skip reprogram of event if it's not changed */
>> >> >>  	if (ts->tick_stopped && (expires == ts->next_tick)) {
>> >> >>  		/* Sanity check: make sure clockevent is actually programmed */
>> >> >
>> >> > That should work but then you lose the optimization that resets
>> >> > ts->do_timer_last even if the next timer hasn't changed.
>> >> >
>> >> 
>> >> Beside of this optimization thing, I see onther problem. But I'm not
>> >> sure, if I understood it correctly: When the CPU drops the
>> >> tick_do_timer_cpu assignment and stops the tick, it is possible, that
>> >> this CPU nevertheless executes tick_sched_do_timer() and then reassigns
>> >> to tick_do_timer_cpu?
>> >
>> > Yes but in this case a timer interrupt has executed and ts->next_tick
>> > is cleared, so the above skip reprogramm branch is not taken.
>> >
>> 
>> Yes... So I need to change it without dropping the
>> optimization. Otherwise someone might complain about it.
>> 
>> Two possible solutions:
>> 
>> a) split out this if/else thing for dropping the tick_do_timer_cpu
>>    assignment into a separate function and call it:
>>    - before the return in the skip reprogramm branch
>>    - and after the if clause which contains stopping the tick (where it
>>      is executed in the current proposal)
>> 
>> b) Take my current proposal and add before the return in the skip
>>    reprogramm branch the following lines:
>> 
>>    if (tick_do_timer_cpu != TICK_DO_TIMER_NONE)
>>    	ts->do_timer_last = 0;
>> 
>>    as the first part of the tick_do_timer_cpu/last logic shouldn't be
>>    required (because then also ts->next_tick is already cleared).
>> 
>> What do you prefere? Or do you prefere something else?
>
> Wouldn't the following work? If timer_idle is false, then the tick isn't
> even stopped and there is nothing to do? So you can early return.
>
> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> index fdd57f1af1d7..1b2984acafbd 100644
> --- a/kernel/time/tick-sched.c
> +++ b/kernel/time/tick-sched.c
> @@ -924,6 +924,9 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
>  		expires = ts->timer_expires;
>  	}
>  
> +	if (!timer_idle)
> +		return;
> +
>  	/*
>  	 * If this CPU is the one which updates jiffies, then give up
>  	 * the assignment and let it be taken by the CPU which runs

Yes... And then I can drop the if (!timer_idle) thing inside
!ts->tick_stopped branch.
Re: [PATCH v10a] timers: Move marking timer bases idle into tick_nohz_stop_tick()
Posted by Frederic Weisbecker 1 year, 9 months ago
On Tue, Feb 20, 2024 at 04:23:26PM +0100, Anna-Maria Behnsen wrote:
> Frederic Weisbecker <frederic@kernel.org> writes:
> 
> > On Tue, Feb 20, 2024 at 03:00:57PM +0100, Anna-Maria Behnsen wrote:
> >> Frederic Weisbecker <frederic@kernel.org> writes:
> >> 
> >> > Le Tue, Feb 20, 2024 at 01:02:18PM +0100, Anna-Maria Behnsen a écrit :
> >> >> Frederic Weisbecker <frederic@kernel.org> writes:
> >> >> 
> >> >> > Le Tue, Feb 20, 2024 at 11:48:19AM +0100, Anna-Maria Behnsen a écrit :
> >> >> >> Frederic Weisbecker <frederic@kernel.org> writes:
> >> >> >> diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> >> >> >> index 01fb50c1b17e..b93f0e6f273f 100644
> >> >> >> --- a/kernel/time/tick-sched.c
> >> >> >> +++ b/kernel/time/tick-sched.c
> >> >> >> @@ -895,21 +895,6 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
> >> >> >>  	/* Make sure we won't be trying to stop it twice in a row. */
> >> >> >>  	ts->timer_expires_base = 0;
> >> >> >>  
> >> >> >> -	/*
> >> >> >> -	 * If this CPU is the one which updates jiffies, then give up
> >> >> >> -	 * the assignment and let it be taken by the CPU which runs
> >> >> >> -	 * the tick timer next, which might be this CPU as well. If we
> >> >> >> -	 * don't drop this here, the jiffies might be stale and
> >> >> >> -	 * do_timer() never gets invoked. Keep track of the fact that it
> >> >> >> -	 * was the one which had the do_timer() duty last.
> >> >> >> -	 */
> >> >> >> -	if (cpu == tick_do_timer_cpu) {
> >> >> >> -		tick_do_timer_cpu = TICK_DO_TIMER_NONE;
> >> >> >> -		ts->do_timer_last = 1;
> >> >> >> -	} else if (tick_do_timer_cpu != TICK_DO_TIMER_NONE) {
> >> >> >> -		ts->do_timer_last = 0;
> >> >> >> -	}
> >> >> >> -
> >> >> >>  	/* Skip reprogram of event if it's not changed */
> >> >> >>  	if (ts->tick_stopped && (expires == ts->next_tick)) {
> >> >> >>  		/* Sanity check: make sure clockevent is actually programmed */
> >> >> >
> >> >> > That should work but then you lose the optimization that resets
> >> >> > ts->do_timer_last even if the next timer hasn't changed.
> >> >> >
> >> >> 
> >> >> Beside of this optimization thing, I see onther problem. But I'm not
> >> >> sure, if I understood it correctly: When the CPU drops the
> >> >> tick_do_timer_cpu assignment and stops the tick, it is possible, that
> >> >> this CPU nevertheless executes tick_sched_do_timer() and then reassigns
> >> >> to tick_do_timer_cpu?
> >> >
> >> > Yes but in this case a timer interrupt has executed and ts->next_tick
> >> > is cleared, so the above skip reprogramm branch is not taken.
> >> >
> >> 
> >> Yes... So I need to change it without dropping the
> >> optimization. Otherwise someone might complain about it.
> >> 
> >> Two possible solutions:
> >> 
> >> a) split out this if/else thing for dropping the tick_do_timer_cpu
> >>    assignment into a separate function and call it:
> >>    - before the return in the skip reprogramm branch
> >>    - and after the if clause which contains stopping the tick (where it
> >>      is executed in the current proposal)
> >> 
> >> b) Take my current proposal and add before the return in the skip
> >>    reprogramm branch the following lines:
> >> 
> >>    if (tick_do_timer_cpu != TICK_DO_TIMER_NONE)
> >>    	ts->do_timer_last = 0;
> >> 
> >>    as the first part of the tick_do_timer_cpu/last logic shouldn't be
> >>    required (because then also ts->next_tick is already cleared).
> >> 
> >> What do you prefere? Or do you prefere something else?
> >
> > Wouldn't the following work? If timer_idle is false, then the tick isn't
> > even stopped and there is nothing to do? So you can early return.
> >
> > diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
> > index fdd57f1af1d7..1b2984acafbd 100644
> > --- a/kernel/time/tick-sched.c
> > +++ b/kernel/time/tick-sched.c
> > @@ -924,6 +924,9 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)
> >  		expires = ts->timer_expires;
> >  	}
> >  
> > +	if (!timer_idle)
> > +		return;
> > +
> >  	/*
> >  	 * If this CPU is the one which updates jiffies, then give up
> >  	 * the assignment and let it be taken by the CPU which runs
> 
> Yes... And then I can drop the if (!timer_idle) thing inside
> !ts->tick_stopped branch.
> 

Right!