[PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()

Oleg Nesterov posted 1 patch 1 year, 6 months ago
kernel/time/tick-common.c | 39 +++++++++++++--------------------------
1 file changed, 13 insertions(+), 26 deletions(-)
[PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()
Posted by Oleg Nesterov 1 year, 6 months ago
After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot
crash when the boot CPU is nohz_full") the kernel no longer crashes, but
there is another problem.

In this case tick_setup_device() calls tick_take_do_timer_from_boot() to
update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled)
in smp_call_function_single().

Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new
comment tries to explain why this is safe (thanks Thomas!).

Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full")
Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/time/tick-common.c | 39 +++++++++++++--------------------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index d88b13076b79..27d0018c8b05 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -178,26 +178,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 	}
 }
 
-#ifdef CONFIG_NO_HZ_FULL
-static void giveup_do_timer(void *info)
-{
-	int cpu = *(unsigned int *)info;
-
-	WARN_ON(tick_do_timer_cpu != smp_processor_id());
-
-	tick_do_timer_cpu = cpu;
-}
-
-static void tick_take_do_timer_from_boot(void)
-{
-	int cpu = smp_processor_id();
-	int from = tick_do_timer_boot_cpu;
-
-	if (from >= 0 && from != cpu)
-		smp_call_function_single(from, giveup_do_timer, &cpu, 1);
-}
-#endif
-
 /*
  * Setup the tick device
  */
@@ -221,19 +201,26 @@ static void tick_setup_device(struct tick_device *td,
 			tick_next_period = ktime_get();
 #ifdef CONFIG_NO_HZ_FULL
 			/*
-			 * The boot CPU may be nohz_full, in which case set
-			 * tick_do_timer_boot_cpu so the first housekeeping
-			 * secondary that comes up will take do_timer from
-			 * us.
+			 * The boot CPU may be nohz_full, in which case the
+			 * first housekeeping secondary will take do_timer()
+			 * from us.
 			 */
 			if (tick_nohz_full_cpu(cpu))
 				tick_do_timer_boot_cpu = cpu;
 
 		} else if (tick_do_timer_boot_cpu != -1 &&
 						!tick_nohz_full_cpu(cpu)) {
-			tick_take_do_timer_from_boot();
 			tick_do_timer_boot_cpu = -1;
-			WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
+			/*
+			 * The boot CPU will stay in periodic (NOHZ disabled)
+			 * mode until clocksource_done_booting() called after
+			 * smp_init() selects a high resolution clocksource and
+			 * timekeeping_notify() kicks the NOHZ stuff alive.
+			 *
+			 * So this WRITE_ONCE can only race with the READ_ONCE
+			 * check in tick_periodic() but this race is harmless.
+			 */
+			WRITE_ONCE(tick_do_timer_cpu, cpu);
 #endif
 		}
 
-- 
2.25.1.362.g51ebf55
[PING ;)] Re: [PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()
Posted by Oleg Nesterov 1 year, 6 months ago
Thomas, et al,

should I redo and/or resend this patch (and the next one on top of it) ?

Or what else can I do to fix this problem?

Thanks,

Oleg.

On 05/28, Oleg Nesterov wrote:
>
> After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot
> crash when the boot CPU is nohz_full") the kernel no longer crashes, but
> there is another problem.
> 
> In this case tick_setup_device() calls tick_take_do_timer_from_boot() to
> update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled)
> in smp_call_function_single().
> 
> Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new
> comment tries to explain why this is safe (thanks Thomas!).
> 
> Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full")
> Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
>  kernel/time/tick-common.c | 39 +++++++++++++--------------------------
>  1 file changed, 13 insertions(+), 26 deletions(-)
> 
> diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
> index d88b13076b79..27d0018c8b05 100644
> --- a/kernel/time/tick-common.c
> +++ b/kernel/time/tick-common.c
> @@ -178,26 +178,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
>  	}
>  }
>  
> -#ifdef CONFIG_NO_HZ_FULL
> -static void giveup_do_timer(void *info)
> -{
> -	int cpu = *(unsigned int *)info;
> -
> -	WARN_ON(tick_do_timer_cpu != smp_processor_id());
> -
> -	tick_do_timer_cpu = cpu;
> -}
> -
> -static void tick_take_do_timer_from_boot(void)
> -{
> -	int cpu = smp_processor_id();
> -	int from = tick_do_timer_boot_cpu;
> -
> -	if (from >= 0 && from != cpu)
> -		smp_call_function_single(from, giveup_do_timer, &cpu, 1);
> -}
> -#endif
> -
>  /*
>   * Setup the tick device
>   */
> @@ -221,19 +201,26 @@ static void tick_setup_device(struct tick_device *td,
>  			tick_next_period = ktime_get();
>  #ifdef CONFIG_NO_HZ_FULL
>  			/*
> -			 * The boot CPU may be nohz_full, in which case set
> -			 * tick_do_timer_boot_cpu so the first housekeeping
> -			 * secondary that comes up will take do_timer from
> -			 * us.
> +			 * The boot CPU may be nohz_full, in which case the
> +			 * first housekeeping secondary will take do_timer()
> +			 * from us.
>  			 */
>  			if (tick_nohz_full_cpu(cpu))
>  				tick_do_timer_boot_cpu = cpu;
>  
>  		} else if (tick_do_timer_boot_cpu != -1 &&
>  						!tick_nohz_full_cpu(cpu)) {
> -			tick_take_do_timer_from_boot();
>  			tick_do_timer_boot_cpu = -1;
> -			WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
> +			/*
> +			 * The boot CPU will stay in periodic (NOHZ disabled)
> +			 * mode until clocksource_done_booting() called after
> +			 * smp_init() selects a high resolution clocksource and
> +			 * timekeeping_notify() kicks the NOHZ stuff alive.
> +			 *
> +			 * So this WRITE_ONCE can only race with the READ_ONCE
> +			 * check in tick_periodic() but this race is harmless.
> +			 */
> +			WRITE_ONCE(tick_do_timer_cpu, cpu);
>  #endif
>  		}
>  
> -- 
> 2.25.1.362.g51ebf55
>
Re: [PING ;)] Re: [PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()
Posted by Thomas Gleixner 1 year, 6 months ago
On Mon, Jun 10 2024 at 17:55, Oleg Nesterov wrote:
> should I redo and/or resend this patch (and the next one on top of it) ?

I lost track of the maze in this discussion. As the patch is obviously
correct, I just apply it as is and any cosmetic changes can go on top.

Thanks,

        tglx
Re: [PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()
Posted by Frederic Weisbecker 1 year, 6 months ago
Le Tue, May 28, 2024 at 02:20:19PM +0200, Oleg Nesterov a écrit :
> After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot
> crash when the boot CPU is nohz_full") the kernel no longer crashes, but
> there is another problem.
> 
> In this case tick_setup_device() calls tick_take_do_timer_from_boot() to
> update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled)
> in smp_call_function_single().
> 
> Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new
> comment tries to explain why this is safe (thanks Thomas!).
> 
> Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full")
> Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
>  kernel/time/tick-common.c | 39 +++++++++++++--------------------------
>  1 file changed, 13 insertions(+), 26 deletions(-)
> 
> diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
> index d88b13076b79..27d0018c8b05 100644
> --- a/kernel/time/tick-common.c
> +++ b/kernel/time/tick-common.c
> @@ -178,26 +178,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
>  	}
>  }
>  
> -#ifdef CONFIG_NO_HZ_FULL
> -static void giveup_do_timer(void *info)
> -{
> -	int cpu = *(unsigned int *)info;
> -
> -	WARN_ON(tick_do_timer_cpu != smp_processor_id());
> -
> -	tick_do_timer_cpu = cpu;
> -}
> -
> -static void tick_take_do_timer_from_boot(void)
> -{
> -	int cpu = smp_processor_id();
> -	int from = tick_do_timer_boot_cpu;
> -
> -	if (from >= 0 && from != cpu)
> -		smp_call_function_single(from, giveup_do_timer, &cpu, 1);
> -}
> -#endif
> -
>  /*
>   * Setup the tick device
>   */
> @@ -221,19 +201,26 @@ static void tick_setup_device(struct tick_device *td,
>  			tick_next_period = ktime_get();
>  #ifdef CONFIG_NO_HZ_FULL
>  			/*
> -			 * The boot CPU may be nohz_full, in which case set
> -			 * tick_do_timer_boot_cpu so the first housekeeping
> -			 * secondary that comes up will take do_timer from
> -			 * us.
> +			 * The boot CPU may be nohz_full, in which case the
> +			 * first housekeeping secondary will take do_timer()
> +			 * from us.
>  			 */
>  			if (tick_nohz_full_cpu(cpu))
>  				tick_do_timer_boot_cpu = cpu;
>  
>  		} else if (tick_do_timer_boot_cpu != -1 &&
>  						!tick_nohz_full_cpu(cpu)) {
> -			tick_take_do_timer_from_boot();
>  			tick_do_timer_boot_cpu = -1;
> -			WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
> +			/*
> +			 * The boot CPU will stay in periodic (NOHZ disabled)
> +			 * mode until clocksource_done_booting() called after
> +			 * smp_init() selects a high resolution clocksource and
> +			 * timekeeping_notify() kicks the NOHZ stuff alive.
> +			 *
> +			 * So this WRITE_ONCE can only race with the READ_ONCE
> +			 * check in tick_periodic() but this race is harmless.
> +			 */
> +			WRITE_ONCE(tick_do_timer_cpu, cpu);

Looks good, but can we have a WARN_ONCE(tick_do_timer_cpu != tick_do_timer_boot_cpu)
right before that, just to make sure our assumptions above are right forever and
the boot CPU hasn't stopped the tick up to that point?

And after all, pushing a bit further your subsequent patch, can we get rid of
tick_do_timer_boot_cpu and ifdefery altogether? Such as:

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index fb0fdec8719a..63a7bd405de7 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -48,14 +48,6 @@ ktime_t tick_next_period;
  *    procedure also covers cpu hotplug.
  */
 int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
-#ifdef CONFIG_NO_HZ_FULL
-/*
- * tick_do_timer_boot_cpu indicates the boot CPU temporarily owns
- * tick_do_timer_cpu and it should be taken over by an eligible secondary
- * when one comes online.
- */
-static int tick_do_timer_boot_cpu __read_mostly = -1;
-#endif
 
 /*
  * Debugging: see timer_list.c
@@ -177,26 +169,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 	}
 }
 
-#ifdef CONFIG_NO_HZ_FULL
-static void giveup_do_timer(void *info)
-{
-	int cpu = *(unsigned int *)info;
-
-	WARN_ON(tick_do_timer_cpu != smp_processor_id());
-
-	tick_do_timer_cpu = cpu;
-}
-
-static void tick_take_do_timer_from_boot(void)
-{
-	int cpu = smp_processor_id();
-	int from = tick_do_timer_boot_cpu;
-
-	if (from >= 0 && from != cpu)
-		smp_call_function_single(from, giveup_do_timer, &cpu, 1);
-}
-#endif
-
 /*
  * Setup the tick device
  */
@@ -211,29 +183,28 @@ static void tick_setup_device(struct tick_device *td,
 	 * First device setup ?
 	 */
 	if (!td->evtdev) {
+		int timekeeper = READ_ONCE(tick_do_timer_cpu);
 		/*
 		 * If no cpu took the do_timer update, assign it to
 		 * this cpu:
 		 */
-		if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
+		if (timekeeper == TICK_DO_TIMER_BOOT) {
 			tick_do_timer_cpu = cpu;
 			tick_next_period = ktime_get();
-#ifdef CONFIG_NO_HZ_FULL
+		} else if (timekeeper == TICK_DO_TIMER_NONE) {
+			if (WARN_ON_ONCE(tick_nohz_full_enabled()))
+				WRITE_ONCE(tick_do_timer_cpu, cpu);
+		} else if (tick_nohz_full_cpu(timekeeper) && !tick_nohz_full_cpu(cpu)) {
 			/*
-			 * The boot CPU may be nohz_full, in which case set
-			 * tick_do_timer_boot_cpu so the first housekeeping
-			 * secondary that comes up will take do_timer from
-			 * us.
+			 * The boot CPU will stay in periodic (NOHZ disabled)
+			 * mode until clocksource_done_booting() called after
+			 * smp_init() selects a high resolution clocksource and
+			 * timekeeping_notify() kicks the NOHZ stuff alive.
+			 *
+			 * So this WRITE_ONCE can only race with the READ_ONCE
+			 * check in tick_periodic() but this race is harmless.
 			 */
-			if (tick_nohz_full_cpu(cpu))
-				tick_do_timer_boot_cpu = cpu;
-
-		} else if (tick_do_timer_boot_cpu != -1 &&
-						!tick_nohz_full_cpu(cpu)) {
-			tick_take_do_timer_from_boot();
-			tick_do_timer_boot_cpu = -1;
-			WARN_ON(tick_do_timer_cpu != cpu);
-#endif
+			WRITE_ONCE(tick_do_timer_cpu, cpu);
 		}
 
 		/*
Re: [PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()
Posted by Oleg Nesterov 1 year, 6 months ago
Hi Frederic,

First of all, can we please make the additional changes you suggest on top of
this patch? I'd prefer to keep it as simple as possible, I will need to backport
it and I'd like to simplify the internal review.

On 05/30, Frederic Weisbecker wrote:
>
> And after all, pushing a bit further your subsequent patch, can we get rid of
> tick_do_timer_boot_cpu and ifdefery altogether? Such as:

Sure, I thought about this from the very beginning, see
https://lore.kernel.org/all/20240525135120.GA24152@redhat.com/
and the changelog in
[PATCH] tick/nohz_full: turn tick_do_timer_boot_cpu into boot_cpu_is_nohz_full
https://lore.kernel.org/all/20240530124032.GA26833@redhat.com/
on top of this patch.

And yes, in this case it is better to check that tick_do_timer_cpu != _NONE to
ensure that tick_nohz_full_cpu(tick_cpu) can't crash.

So I considered the change which is very close to yours, except

> +		} else if (timekeeper == TICK_DO_TIMER_NONE) {
> +			if (WARN_ON_ONCE(tick_nohz_full_enabled()))
> +				WRITE_ONCE(tick_do_timer_cpu, cpu);

I don't think we need to change tick_do_timer_cpu in this case.
And I am not sure we need to check tick_nohz_full_enabled() here.
IOW, I was thinking about

	if (!td->evtdev) {
		int tick_cpu = READ_ONCE(tick_do_timer_cpu);
		/*
		 * If no cpu took the do_timer update, assign it to
		 * this cpu:
		 */
		if (tick_cpu == TICK_DO_TIMER_BOOT) {
			WRITE_ONCE(tick_do_timer_cpu, cpu);
			tick_next_period = ktime_get();
			/*
			 * The boot CPU may be nohz_full, in which case the
			 * first housekeeping secondary will take do_timer()
			 * from us.
			 */
		} else if (!WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE)) &&
			   tick_nohz_full_cpu(tick_cpu) &&
			   !tick_nohz_full_cpu(cpu)) {
			/*
			 * The boot CPU will stay in periodic (NOHZ disabled)
			 * mode until clocksource_done_booting() called after
			 * smp_init() selects a high resolution clocksource and
			 * timekeeping_notify() kicks the NOHZ stuff alive.
			 *
			 * So this WRITE_ONCE can only race with the READ_ONCE
			 * check in tick_periodic() but this race is harmless.
			 */
			WRITE_ONCE(tick_do_timer_cpu, cpu);
		}

But you know, somehow I like
[PATCH] tick/nohz_full: turn tick_do_timer_boot_cpu into boot_cpu_is_nohz_full
https://lore.kernel.org/all/20240530124032.GA26833@redhat.com/
a bit more, to me the code looks more understandable this way.

Note that this patch doesn't really need to keep #ifdef CONFIG_NO_HZ_FULL,

	if (!td->evtdev) {
		static bool boot_cpu_is_nohz_full;
		/*
		 * If no cpu took the do_timer update, assign it to
		 * this cpu:
		 */
		if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
			WRITE_ONCE(tick_do_timer_cpu, cpu);
			tick_next_period = ktime_get();
			/*
			 * The boot CPU may be nohz_full, in which case the
			 * first housekeeping secondary will take do_timer()
			 * from us.
			 */
			boot_cpu_is_nohz_full = tick_nohz_full_cpu(cpu);
		} else if (boot_cpu_is_nohz_full && !tick_nohz_full_cpu(cpu)) {
			boot_cpu_is_nohz_full = false;
			/*
			 * The boot CPU will stay in periodic (NOHZ disabled)
			 * mode until clocksource_done_booting() called after
			 * smp_init() selects a high resolution clocksource and
			 * timekeeping_notify() kicks the NOHZ stuff alive.
			 *
			 * So this WRITE_ONCE can only race with the READ_ONCE
			 * check in tick_periodic() but this race is harmless.
			 */
			WRITE_ONCE(tick_do_timer_cpu, cpu);
		}

should work without #ifdef.

In this case I don't think we need the _NONE check, tick_sched_do_timer() will
complain.

But I won't argue. I will be happy to make V2 which follows your recommendations
but again, can I do this on top of this patch?

Oleg.
Re: [PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()
Posted by Frederic Weisbecker 1 year, 6 months ago
Le Sat, Jun 01, 2024 at 04:03:22PM +0200, Oleg Nesterov a écrit :
> Hi Frederic,
> 
> First of all, can we please make the additional changes you suggest on top of
> this patch? I'd prefer to keep it as simple as possible, I will need to backport
> it and I'd like to simplify the internal review.

Sure!

> 
> On 05/30, Frederic Weisbecker wrote:
> >
> > And after all, pushing a bit further your subsequent patch, can we get rid of
> > tick_do_timer_boot_cpu and ifdefery altogether? Such as:
> 
> Sure, I thought about this from the very beginning, see
> https://lore.kernel.org/all/20240525135120.GA24152@redhat.com/
> and the changelog in
> [PATCH] tick/nohz_full: turn tick_do_timer_boot_cpu into boot_cpu_is_nohz_full
> https://lore.kernel.org/all/20240530124032.GA26833@redhat.com/
> on top of this patch.
> 
> And yes, in this case it is better to check that tick_do_timer_cpu != _NONE to
> ensure that tick_nohz_full_cpu(tick_cpu) can't crash.
> 
> So I considered the change which is very close to yours, except
> 
> > +		} else if (timekeeper == TICK_DO_TIMER_NONE) {
> > +			if (WARN_ON_ONCE(tick_nohz_full_enabled()))
> > +				WRITE_ONCE(tick_do_timer_cpu, cpu);
> 
> I don't think we need to change tick_do_timer_cpu in this case.
> And I am not sure we need to check tick_nohz_full_enabled() here.
> IOW, I was thinking about

Hmm, in case of cpu-hotplug operations (that is after boot), we may be
past nohz enablement and therefore it might be TICK_DO_TIMER_NONE.

> 
> 	if (!td->evtdev) {
> 		int tick_cpu = READ_ONCE(tick_do_timer_cpu);
> 		/*
> 		 * If no cpu took the do_timer update, assign it to
> 		 * this cpu:
> 		 */
> 		if (tick_cpu == TICK_DO_TIMER_BOOT) {
> 			WRITE_ONCE(tick_do_timer_cpu, cpu);
> 			tick_next_period = ktime_get();
> 			/*
> 			 * The boot CPU may be nohz_full, in which case the
> 			 * first housekeeping secondary will take do_timer()
> 			 * from us.
> 			 */
> 		} else if (!WARN_ON_ONCE(tick_cpu == TICK_DO_TIMER_NONE)) &&
> 			   tick_nohz_full_cpu(tick_cpu) &&
> 			   !tick_nohz_full_cpu(cpu)) {
> 			/*
> 			 * The boot CPU will stay in periodic (NOHZ disabled)
> 			 * mode until clocksource_done_booting() called after
> 			 * smp_init() selects a high resolution clocksource and
> 			 * timekeeping_notify() kicks the NOHZ stuff alive.
> 			 *
> 			 * So this WRITE_ONCE can only race with the READ_ONCE
> 			 * check in tick_periodic() but this race is harmless.
> 			 */
> 			WRITE_ONCE(tick_do_timer_cpu, cpu);
> 		}
> 
> But you know, somehow I like
> [PATCH] tick/nohz_full: turn tick_do_timer_boot_cpu into boot_cpu_is_nohz_full
> https://lore.kernel.org/all/20240530124032.GA26833@redhat.com/
> a bit more, to me the code looks more understandable this way.
> 
> Note that this patch doesn't really need to keep #ifdef CONFIG_NO_HZ_FULL,
> 
> 	if (!td->evtdev) {
> 		static bool boot_cpu_is_nohz_full;
> 		/*
> 		 * If no cpu took the do_timer update, assign it to
> 		 * this cpu:
> 		 */
> 		if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
> 			WRITE_ONCE(tick_do_timer_cpu, cpu);
> 			tick_next_period = ktime_get();
> 			/*
> 			 * The boot CPU may be nohz_full, in which case the
> 			 * first housekeeping secondary will take do_timer()
> 			 * from us.
> 			 */
> 			boot_cpu_is_nohz_full = tick_nohz_full_cpu(cpu);
> 		} else if (boot_cpu_is_nohz_full && !tick_nohz_full_cpu(cpu)) {
> 			boot_cpu_is_nohz_full = false;
> 			/*
> 			 * The boot CPU will stay in periodic (NOHZ disabled)
> 			 * mode until clocksource_done_booting() called after
> 			 * smp_init() selects a high resolution clocksource and
> 			 * timekeeping_notify() kicks the NOHZ stuff alive.
> 			 *
> 			 * So this WRITE_ONCE can only race with the READ_ONCE
> 			 * check in tick_periodic() but this race is harmless.
> 			 */
> 			WRITE_ONCE(tick_do_timer_cpu, cpu);
> 		}
> 
> should work without #ifdef.
> 
> In this case I don't think we need the _NONE check, tick_sched_do_timer() will
> complain.

Right...

> 
> But I won't argue. I will be happy to make V2 which follows your recommendations
> but again, can I do this on top of this patch?

I guess the static version above should work to remove the ifdef. And yes on top is fine.

Thanks!
Re: [PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()
Posted by Oleg Nesterov 1 year, 6 months ago
On 06/02, Frederic Weisbecker wrote:
>
> I guess the static version above should work to remove the ifdef. And yes on top is fine.

OK, I've sent v2.

But again, I won't argue if you prefer to keep tick_do_timer_boot_cpu and add
WARN_ON_ONCE(tick_cpu != tick_do_timer_boot_cpu) before WRITE_ONCE(tick_do_timer_cpu).
In this case another patch makes no sense, I'll update this one.

Just tell me what you like more. Sorry for the chaotic emails.

Oleg.
Re: [PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()
Posted by Frederic Weisbecker 1 year, 6 months ago
Le Mon, Jun 03, 2024 at 05:41:33PM +0200, Oleg Nesterov a écrit :
> On 06/02, Frederic Weisbecker wrote:
> >
> > I guess the static version above should work to remove the ifdef. And yes on top is fine.
> 
> OK, I've sent v2.
> 
> But again, I won't argue if you prefer to keep tick_do_timer_boot_cpu and add
> WARN_ON_ONCE(tick_cpu != tick_do_timer_boot_cpu) before WRITE_ONCE(tick_do_timer_cpu).
> In this case another patch makes no sense, I'll update this one.
> 
> Just tell me what you like more. Sorry for the chaotic emails.

I'm fine with the last one posted :-)

Thanks.
Re: [PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()
Posted by Oleg Nesterov 1 year, 6 months ago
On 05/30, Frederic Weisbecker wrote:
>
> Looks good, but can we have a WARN_ONCE(tick_do_timer_cpu != tick_do_timer_boot_cpu)
> right before that, just to make sure our assumptions above are right forever and
> the boot CPU hasn't stopped the tick up to that point?

Sure, I thought about the additional sanity checks too. Although I had something
different in mind.

Frederic, et al, I am on private trip again without my working laptop, can't read
the code. I'll reply on Saturday, OK?

Thanks for review!

Oleg.
Re: [PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()
Posted by Oleg Nesterov 1 year, 6 months ago
Frederic,

Thanks for review.

On 05/30, Frederic Weisbecker wrote:
>
> Looks good, but can we have a WARN_ONCE(tick_do_timer_cpu != tick_do_timer_boot_cpu)
> right before that, just to make sure our assumptions above are right forever and
> the boot CPU hasn't stopped the tick up to that point?

Sure, I thought about the additional sanity checks too. Although I had something
different in mind.

Frederic, et al, I am on private trip again without my working laptop, can't read
the code. I'll reply on Saturday, OK?

Oleg.

> 
> And after all, pushing a bit further your subsequent patch, can we get rid of
> tick_do_timer_boot_cpu and ifdefery altogether? Such as:
> 
> diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
> index fb0fdec8719a..63a7bd405de7 100644
> --- a/kernel/time/tick-common.c
> +++ b/kernel/time/tick-common.c
> @@ -48,14 +48,6 @@ ktime_t tick_next_period;
>   *    procedure also covers cpu hotplug.
>   */
>  int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
> -#ifdef CONFIG_NO_HZ_FULL
> -/*
> - * tick_do_timer_boot_cpu indicates the boot CPU temporarily owns
> - * tick_do_timer_cpu and it should be taken over by an eligible secondary
> - * when one comes online.
> - */
> -static int tick_do_timer_boot_cpu __read_mostly = -1;
> -#endif
>  
>  /*
>   * Debugging: see timer_list.c
> @@ -177,26 +169,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
>  	}
>  }
>  
> -#ifdef CONFIG_NO_HZ_FULL
> -static void giveup_do_timer(void *info)
> -{
> -	int cpu = *(unsigned int *)info;
> -
> -	WARN_ON(tick_do_timer_cpu != smp_processor_id());
> -
> -	tick_do_timer_cpu = cpu;
> -}
> -
> -static void tick_take_do_timer_from_boot(void)
> -{
> -	int cpu = smp_processor_id();
> -	int from = tick_do_timer_boot_cpu;
> -
> -	if (from >= 0 && from != cpu)
> -		smp_call_function_single(from, giveup_do_timer, &cpu, 1);
> -}
> -#endif
> -
>  /*
>   * Setup the tick device
>   */
> @@ -211,29 +183,28 @@ static void tick_setup_device(struct tick_device *td,
>  	 * First device setup ?
>  	 */
>  	if (!td->evtdev) {
> +		int timekeeper = READ_ONCE(tick_do_timer_cpu);
>  		/*
>  		 * If no cpu took the do_timer update, assign it to
>  		 * this cpu:
>  		 */
> -		if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
> +		if (timekeeper == TICK_DO_TIMER_BOOT) {
>  			tick_do_timer_cpu = cpu;
>  			tick_next_period = ktime_get();
> -#ifdef CONFIG_NO_HZ_FULL
> +		} else if (timekeeper == TICK_DO_TIMER_NONE) {
> +			if (WARN_ON_ONCE(tick_nohz_full_enabled()))
> +				WRITE_ONCE(tick_do_timer_cpu, cpu);
> +		} else if (tick_nohz_full_cpu(timekeeper) && !tick_nohz_full_cpu(cpu)) {
>  			/*
> -			 * The boot CPU may be nohz_full, in which case set
> -			 * tick_do_timer_boot_cpu so the first housekeeping
> -			 * secondary that comes up will take do_timer from
> -			 * us.
> +			 * The boot CPU will stay in periodic (NOHZ disabled)
> +			 * mode until clocksource_done_booting() called after
> +			 * smp_init() selects a high resolution clocksource and
> +			 * timekeeping_notify() kicks the NOHZ stuff alive.
> +			 *
> +			 * So this WRITE_ONCE can only race with the READ_ONCE
> +			 * check in tick_periodic() but this race is harmless.
>  			 */
> -			if (tick_nohz_full_cpu(cpu))
> -				tick_do_timer_boot_cpu = cpu;
> -
> -		} else if (tick_do_timer_boot_cpu != -1 &&
> -						!tick_nohz_full_cpu(cpu)) {
> -			tick_take_do_timer_from_boot();
> -			tick_do_timer_boot_cpu = -1;
> -			WARN_ON(tick_do_timer_cpu != cpu);
> -#endif
> +			WRITE_ONCE(tick_do_timer_cpu, cpu);
>  		}
>  
>  		/*
>
Re: [PATCH] tick/nohz_full: don't abuse smp_call_function_single() in tick_setup_device()
Posted by Oleg Nesterov 1 year, 6 months ago
If this patch/changelog/comment is fine, I'll send another trivial
one which turns tick_do_timer_boot_cpu into "bool boot_cpu_is_nohz_full".

On 05/28, Oleg Nesterov wrote:
>
> After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot
> crash when the boot CPU is nohz_full") the kernel no longer crashes, but
> there is another problem.
> 
> In this case tick_setup_device() calls tick_take_do_timer_from_boot() to
> update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled)
> in smp_call_function_single().
> 
> Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new
> comment tries to explain why this is safe (thanks Thomas!).
> 
> Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full")
> Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> ---
>  kernel/time/tick-common.c | 39 +++++++++++++--------------------------
>  1 file changed, 13 insertions(+), 26 deletions(-)
> 
> diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
> index d88b13076b79..27d0018c8b05 100644
> --- a/kernel/time/tick-common.c
> +++ b/kernel/time/tick-common.c
> @@ -178,26 +178,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
>  	}
>  }
>  
> -#ifdef CONFIG_NO_HZ_FULL
> -static void giveup_do_timer(void *info)
> -{
> -	int cpu = *(unsigned int *)info;
> -
> -	WARN_ON(tick_do_timer_cpu != smp_processor_id());
> -
> -	tick_do_timer_cpu = cpu;
> -}
> -
> -static void tick_take_do_timer_from_boot(void)
> -{
> -	int cpu = smp_processor_id();
> -	int from = tick_do_timer_boot_cpu;
> -
> -	if (from >= 0 && from != cpu)
> -		smp_call_function_single(from, giveup_do_timer, &cpu, 1);
> -}
> -#endif
> -
>  /*
>   * Setup the tick device
>   */
> @@ -221,19 +201,26 @@ static void tick_setup_device(struct tick_device *td,
>  			tick_next_period = ktime_get();
>  #ifdef CONFIG_NO_HZ_FULL
>  			/*
> -			 * The boot CPU may be nohz_full, in which case set
> -			 * tick_do_timer_boot_cpu so the first housekeeping
> -			 * secondary that comes up will take do_timer from
> -			 * us.
> +			 * The boot CPU may be nohz_full, in which case the
> +			 * first housekeeping secondary will take do_timer()
> +			 * from us.
>  			 */
>  			if (tick_nohz_full_cpu(cpu))
>  				tick_do_timer_boot_cpu = cpu;
>  
>  		} else if (tick_do_timer_boot_cpu != -1 &&
>  						!tick_nohz_full_cpu(cpu)) {
> -			tick_take_do_timer_from_boot();
>  			tick_do_timer_boot_cpu = -1;
> -			WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
> +			/*
> +			 * The boot CPU will stay in periodic (NOHZ disabled)
> +			 * mode until clocksource_done_booting() called after
> +			 * smp_init() selects a high resolution clocksource and
> +			 * timekeeping_notify() kicks the NOHZ stuff alive.
> +			 *
> +			 * So this WRITE_ONCE can only race with the READ_ONCE
> +			 * check in tick_periodic() but this race is harmless.
> +			 */
> +			WRITE_ONCE(tick_do_timer_cpu, cpu);
>  #endif
>  		}
>  
> -- 
> 2.25.1.362.g51ebf55
>
[tip: timers/urgent] tick/nohz_full: Don't abuse smp_call_function_single() in tick_setup_device()
Posted by tip-bot2 for Oleg Nesterov 1 year, 6 months ago
The following commit has been merged into the timers/urgent branch of tip:

Commit-ID:     07c54cc5988f19c9642fd463c2dbdac7fc52f777
Gitweb:        https://git.kernel.org/tip/07c54cc5988f19c9642fd463c2dbdac7fc52f777
Author:        Oleg Nesterov <oleg@redhat.com>
AuthorDate:    Tue, 28 May 2024 14:20:19 +02:00
Committer:     Thomas Gleixner <tglx@linutronix.de>
CommitterDate: Mon, 10 Jun 2024 20:18:13 +02:00

tick/nohz_full: Don't abuse smp_call_function_single() in tick_setup_device()

After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot crash
when the boot CPU is nohz_full") the kernel no longer crashes, but there is
another problem.

In this case tick_setup_device() calls tick_take_do_timer_from_boot() to
update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled)
in smp_call_function_single().

Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new
comment explains why this is safe (thanks Thomas!).

Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full")
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/r/20240528122019.GA28794@redhat.com
Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com
---
 kernel/time/tick-common.c | 42 ++++++++++++--------------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index d88b130..a47bcf7 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -178,26 +178,6 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 	}
 }
 
-#ifdef CONFIG_NO_HZ_FULL
-static void giveup_do_timer(void *info)
-{
-	int cpu = *(unsigned int *)info;
-
-	WARN_ON(tick_do_timer_cpu != smp_processor_id());
-
-	tick_do_timer_cpu = cpu;
-}
-
-static void tick_take_do_timer_from_boot(void)
-{
-	int cpu = smp_processor_id();
-	int from = tick_do_timer_boot_cpu;
-
-	if (from >= 0 && from != cpu)
-		smp_call_function_single(from, giveup_do_timer, &cpu, 1);
-}
-#endif
-
 /*
  * Setup the tick device
  */
@@ -221,19 +201,25 @@ static void tick_setup_device(struct tick_device *td,
 			tick_next_period = ktime_get();
 #ifdef CONFIG_NO_HZ_FULL
 			/*
-			 * The boot CPU may be nohz_full, in which case set
-			 * tick_do_timer_boot_cpu so the first housekeeping
-			 * secondary that comes up will take do_timer from
-			 * us.
+			 * The boot CPU may be nohz_full, in which case the
+			 * first housekeeping secondary will take do_timer()
+			 * from it.
 			 */
 			if (tick_nohz_full_cpu(cpu))
 				tick_do_timer_boot_cpu = cpu;
 
-		} else if (tick_do_timer_boot_cpu != -1 &&
-						!tick_nohz_full_cpu(cpu)) {
-			tick_take_do_timer_from_boot();
+		} else if (tick_do_timer_boot_cpu != -1 && !tick_nohz_full_cpu(cpu)) {
 			tick_do_timer_boot_cpu = -1;
-			WARN_ON(READ_ONCE(tick_do_timer_cpu) != cpu);
+			/*
+			 * The boot CPU will stay in periodic (NOHZ disabled)
+			 * mode until clocksource_done_booting() called after
+			 * smp_init() selects a high resolution clocksource and
+			 * timekeeping_notify() kicks the NOHZ stuff alive.
+			 *
+			 * So this WRITE_ONCE can only race with the READ_ONCE
+			 * check in tick_periodic() but this race is harmless.
+			 */
+			WRITE_ONCE(tick_do_timer_cpu, cpu);
 #endif
 		}
Re: [tip: timers/urgent] tick/nohz_full: Don't abuse smp_call_function_single() in tick_setup_device()
Posted by Frederic Weisbecker 1 year, 6 months ago
Le Mon, Jun 10, 2024 at 06:26:21PM -0000, tip-bot2 for Oleg Nesterov a écrit :
> The following commit has been merged into the timers/urgent branch of tip:
> 
> Commit-ID:     07c54cc5988f19c9642fd463c2dbdac7fc52f777
> Gitweb:        https://git.kernel.org/tip/07c54cc5988f19c9642fd463c2dbdac7fc52f777
> Author:        Oleg Nesterov <oleg@redhat.com>
> AuthorDate:    Tue, 28 May 2024 14:20:19 +02:00
> Committer:     Thomas Gleixner <tglx@linutronix.de>
> CommitterDate: Mon, 10 Jun 2024 20:18:13 +02:00
> 
> tick/nohz_full: Don't abuse smp_call_function_single() in tick_setup_device()
> 
> After the recent commit 5097cbcb38e6 ("sched/isolation: Prevent boot crash
> when the boot CPU is nohz_full") the kernel no longer crashes, but there is
> another problem.
> 
> In this case tick_setup_device() calls tick_take_do_timer_from_boot() to
> update tick_do_timer_cpu and this triggers the WARN_ON_ONCE(irqs_disabled)
> in smp_call_function_single().
> 
> Kill tick_take_do_timer_from_boot() and just use WRITE_ONCE(), the new
> comment explains why this is safe (thanks Thomas!).
> 
> Fixes: 08ae95f4fd3b ("nohz_full: Allow the boot CPU to be nohz_full")
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>
> Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
> Cc: stable@vger.kernel.org
> Link: https://lore.kernel.org/r/20240528122019.GA28794@redhat.com
> Link: https://lore.kernel.org/all/20240522151742.GA10400@redhat.com

I think we agreed on that version actually:

https://lore.kernel.org/all/20240603153557.GA8311@redhat.com/

Thanks.
[PATCH] tick/nohz_full: turn tick_do_timer_boot_cpu into boot_cpu_is_nohz_full
Posted by Oleg Nesterov 1 year, 6 months ago
We don't need to record the cpu number of the nohz_full boot CPU, a simple
boolean is enough.

We could even kill it along with #ifdef, the "else if" branch could check
tick_nohz_full_cpu(tick_do_timer_cpu) && !tick_nohz_full_cpu(cpu).

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/time/tick-common.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 27d0018c8b05..9829a2cda072 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -49,15 +49,6 @@ ktime_t tick_next_period;
  *    procedure also covers cpu hotplug.
  */
 int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
-#ifdef CONFIG_NO_HZ_FULL
-/*
- * tick_do_timer_boot_cpu indicates the boot CPU temporarily owns
- * tick_do_timer_cpu and it should be taken over by an eligible secondary
- * when one comes online.
- */
-static int tick_do_timer_boot_cpu __read_mostly = -1;
-#endif
-
 /*
  * Debugging: see timer_list.c
  */
@@ -178,9 +169,13 @@ void tick_setup_periodic(struct clock_event_device *dev, int broadcast)
 	}
 }
 
+#ifdef CONFIG_NO_HZ_FULL
 /*
- * Setup the tick device
+ * Iindicates that the boot CPU temporarily owns tick_do_timer_cpu.
  */
+static bool boot_cpu_is_nohz_full __read_mostly;
+#endif
+
 static void tick_setup_device(struct tick_device *td,
 			      struct clock_event_device *newdev, int cpu,
 			      const struct cpumask *cpumask)
@@ -205,12 +200,9 @@ static void tick_setup_device(struct tick_device *td,
 			 * first housekeeping secondary will take do_timer()
 			 * from us.
 			 */
-			if (tick_nohz_full_cpu(cpu))
-				tick_do_timer_boot_cpu = cpu;
-
-		} else if (tick_do_timer_boot_cpu != -1 &&
-						!tick_nohz_full_cpu(cpu)) {
-			tick_do_timer_boot_cpu = -1;
+			boot_cpu_is_nohz_full = tick_nohz_full_cpu(cpu);
+		} else if (boot_cpu_is_nohz_full && !tick_nohz_full_cpu(cpu)) {
+			boot_cpu_is_nohz_full = false;
 			/*
 			 * The boot CPU will stay in periodic (NOHZ disabled)
 			 * mode until clocksource_done_booting() called after
-- 
2.25.1.362.g51ebf55
[PATCH v2] tick/nohz_full: turn tick_do_timer_boot_cpu into boot_cpu_is_nohz_full
Posted by Oleg Nesterov 1 year, 6 months ago
We don't need to record the cpu number of the nohz_full boot CPU, a simple
boolean is enough. We could even kill it, the "else if" branch could check
tick_nohz_full_cpu(tick_do_timer_cpu) && !tick_nohz_full_cpu(cpu).

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
---
 kernel/time/tick-common.c | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 27d0018c8b05..82c1b2206631 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -49,15 +49,6 @@ ktime_t tick_next_period;
  *    procedure also covers cpu hotplug.
  */
 int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
-#ifdef CONFIG_NO_HZ_FULL
-/*
- * tick_do_timer_boot_cpu indicates the boot CPU temporarily owns
- * tick_do_timer_cpu and it should be taken over by an eligible secondary
- * when one comes online.
- */
-static int tick_do_timer_boot_cpu __read_mostly = -1;
-#endif
-
 /*
  * Debugging: see timer_list.c
  */
@@ -192,6 +183,7 @@ static void tick_setup_device(struct tick_device *td,
 	 * First device setup ?
 	 */
 	if (!td->evtdev) {
+		static bool boot_cpu_is_nohz_full __read_mostly;
 		/*
 		 * If no cpu took the do_timer update, assign it to
 		 * this cpu:
@@ -199,18 +191,14 @@ static void tick_setup_device(struct tick_device *td,
 		if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
 			WRITE_ONCE(tick_do_timer_cpu, cpu);
 			tick_next_period = ktime_get();
-#ifdef CONFIG_NO_HZ_FULL
 			/*
 			 * The boot CPU may be nohz_full, in which case the
 			 * first housekeeping secondary will take do_timer()
 			 * from us.
 			 */
-			if (tick_nohz_full_cpu(cpu))
-				tick_do_timer_boot_cpu = cpu;
-
-		} else if (tick_do_timer_boot_cpu != -1 &&
-						!tick_nohz_full_cpu(cpu)) {
-			tick_do_timer_boot_cpu = -1;
+			boot_cpu_is_nohz_full = tick_nohz_full_cpu(cpu);
+		} else if (boot_cpu_is_nohz_full && !tick_nohz_full_cpu(cpu)) {
+			boot_cpu_is_nohz_full = false;
 			/*
 			 * The boot CPU will stay in periodic (NOHZ disabled)
 			 * mode until clocksource_done_booting() called after
@@ -221,7 +209,6 @@ static void tick_setup_device(struct tick_device *td,
 			 * check in tick_periodic() but this race is harmless.
 			 */
 			WRITE_ONCE(tick_do_timer_cpu, cpu);
-#endif
 		}
 
 		/*
-- 
2.25.1.362.g51ebf55
Re: [PATCH v2] tick/nohz_full: turn tick_do_timer_boot_cpu into boot_cpu_is_nohz_full
Posted by Nicholas Piggin 1 year, 6 months ago
On Tue Jun 4, 2024 at 1:35 AM AEST, Oleg Nesterov wrote:
> We don't need to record the cpu number of the nohz_full boot CPU, a simple
> boolean is enough. We could even kill it, the "else if" branch could check
> tick_nohz_full_cpu(tick_do_timer_cpu) && !tick_nohz_full_cpu(cpu).
>
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Reviewed-by: Nicholas Piggin <npiggin@gmail.com>

> ---
>  kernel/time/tick-common.c | 21 ++++-----------------
>  1 file changed, 4 insertions(+), 17 deletions(-)
>
> diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
> index 27d0018c8b05..82c1b2206631 100644
> --- a/kernel/time/tick-common.c
> +++ b/kernel/time/tick-common.c
> @@ -49,15 +49,6 @@ ktime_t tick_next_period;
>   *    procedure also covers cpu hotplug.
>   */
>  int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
> -#ifdef CONFIG_NO_HZ_FULL
> -/*
> - * tick_do_timer_boot_cpu indicates the boot CPU temporarily owns
> - * tick_do_timer_cpu and it should be taken over by an eligible secondary
> - * when one comes online.
> - */
> -static int tick_do_timer_boot_cpu __read_mostly = -1;
> -#endif
> -
>  /*
>   * Debugging: see timer_list.c
>   */
> @@ -192,6 +183,7 @@ static void tick_setup_device(struct tick_device *td,
>  	 * First device setup ?
>  	 */
>  	if (!td->evtdev) {
> +		static bool boot_cpu_is_nohz_full __read_mostly;
>  		/*
>  		 * If no cpu took the do_timer update, assign it to
>  		 * this cpu:
> @@ -199,18 +191,14 @@ static void tick_setup_device(struct tick_device *td,
>  		if (READ_ONCE(tick_do_timer_cpu) == TICK_DO_TIMER_BOOT) {
>  			WRITE_ONCE(tick_do_timer_cpu, cpu);
>  			tick_next_period = ktime_get();
> -#ifdef CONFIG_NO_HZ_FULL
>  			/*
>  			 * The boot CPU may be nohz_full, in which case the
>  			 * first housekeeping secondary will take do_timer()
>  			 * from us.
>  			 */
> -			if (tick_nohz_full_cpu(cpu))
> -				tick_do_timer_boot_cpu = cpu;
> -
> -		} else if (tick_do_timer_boot_cpu != -1 &&
> -						!tick_nohz_full_cpu(cpu)) {
> -			tick_do_timer_boot_cpu = -1;
> +			boot_cpu_is_nohz_full = tick_nohz_full_cpu(cpu);
> +		} else if (boot_cpu_is_nohz_full && !tick_nohz_full_cpu(cpu)) {
> +			boot_cpu_is_nohz_full = false;
>  			/*
>  			 * The boot CPU will stay in periodic (NOHZ disabled)
>  			 * mode until clocksource_done_booting() called after
> @@ -221,7 +209,6 @@ static void tick_setup_device(struct tick_device *td,
>  			 * check in tick_periodic() but this race is harmless.
>  			 */
>  			WRITE_ONCE(tick_do_timer_cpu, cpu);
> -#endif
>  		}
>  
>  		/*
Re: [PATCH v2] tick/nohz_full: turn tick_do_timer_boot_cpu into boot_cpu_is_nohz_full
Posted by Frederic Weisbecker 1 year, 6 months ago
Le Mon, Jun 03, 2024 at 05:35:57PM +0200, Oleg Nesterov a écrit :
> We don't need to record the cpu number of the nohz_full boot CPU, a simple
> boolean is enough. We could even kill it, the "else if" branch could check
> tick_nohz_full_cpu(tick_do_timer_cpu) && !tick_nohz_full_cpu(cpu).
> 
> Signed-off-by: Oleg Nesterov <oleg@redhat.com>

Acked-by: Frederic Weisbecker <frederic@kernel.org>