include/linux/sched/idle.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-)
In architectures that use the polling bit, current_clr_polling() employs
smp_mb() to ensure that the clearing of the polling bit is visible to
other cores before checking TIF_NEED_RESCHED.
However, smp_mb() can be costly. Given that clear_bit() is an atomic
operation, replacing smp_mb() with smp_mb__after_atomic() is appropriate.
Many architectures implement smp_mb__after_atomic() as a lighter-weight
barrier compared to smp_mb(), leading to performance improvements.
For instance, on x86, smp_mb__after_atomic() is a no-op. This change
eliminates a smp_mb() instruction in the cpuidle wake-up path, saving
several CPU cycles and thereby reducing wake-up latency.
Architectures that do not use the polling bit will retain the original
smp_mb() behavior to ensure that existing dependencies remain unaffected.
Signed-off-by: Yujun Dong <yujundong@pascal-lab.net>
---
include/linux/sched/idle.h | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index e670ac282333..439f6029d3b9 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -79,6 +79,21 @@ static __always_inline bool __must_check current_clr_polling_and_test(void)
return unlikely(tif_need_resched());
}
+static __always_inline void current_clr_polling(void)
+{
+ __current_clr_polling();
+
+ /*
+ * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
+ * Once the bit is cleared, we'll get IPIs with every new
+ * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
+ * fold.
+ */
+ smp_mb__after_atomic(); /* paired with resched_curr() */
+
+ preempt_fold_need_resched();
+}
+
#else
static inline void __current_set_polling(void) { }
static inline void __current_clr_polling(void) { }
@@ -91,21 +106,15 @@ static inline bool __must_check current_clr_polling_and_test(void)
{
return unlikely(tif_need_resched());
}
-#endif
static __always_inline void current_clr_polling(void)
{
__current_clr_polling();
- /*
- * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
- * Once the bit is cleared, we'll get IPIs with every new
- * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
- * fold.
- */
smp_mb(); /* paired with resched_curr() */
preempt_fold_need_resched();
}
+#endif
#endif /* _LINUX_SCHED_IDLE_H */
--
2.47.1
[ Sorry about the belated reply, found this in my TODO pile ... ]
* Yujun Dong <yujundong@pascal-lab.net> wrote:
> In architectures that use the polling bit, current_clr_polling() employs
> smp_mb() to ensure that the clearing of the polling bit is visible to
> other cores before checking TIF_NEED_RESCHED.
>
> However, smp_mb() can be costly. Given that clear_bit() is an atomic
> operation, replacing smp_mb() with smp_mb__after_atomic() is appropriate.
>
> Many architectures implement smp_mb__after_atomic() as a lighter-weight
> barrier compared to smp_mb(), leading to performance improvements.
> For instance, on x86, smp_mb__after_atomic() is a no-op. This change
> eliminates a smp_mb() instruction in the cpuidle wake-up path, saving
> several CPU cycles and thereby reducing wake-up latency.
>
> Architectures that do not use the polling bit will retain the original
> smp_mb() behavior to ensure that existing dependencies remain unaffected.
>
> Signed-off-by: Yujun Dong <yujundong@pascal-lab.net>
> ---
> include/linux/sched/idle.h | 23 ++++++++++++++++-------
> 1 file changed, 16 insertions(+), 7 deletions(-)
>
> diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
> index e670ac282333..439f6029d3b9 100644
> --- a/include/linux/sched/idle.h
> +++ b/include/linux/sched/idle.h
> @@ -79,6 +79,21 @@ static __always_inline bool __must_check current_clr_polling_and_test(void)
> return unlikely(tif_need_resched());
> }
>
> +static __always_inline void current_clr_polling(void)
> +{
> + __current_clr_polling();
> +
> + /*
> + * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
> + * Once the bit is cleared, we'll get IPIs with every new
> + * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
> + * fold.
> + */
> + smp_mb__after_atomic(); /* paired with resched_curr() */
> +
> + preempt_fold_need_resched();
> +}
> +
> #else
> static inline void __current_set_polling(void) { }
> static inline void __current_clr_polling(void) { }
> @@ -91,21 +106,15 @@ static inline bool __must_check current_clr_polling_and_test(void)
> {
> return unlikely(tif_need_resched());
> }
> -#endif
>
> static __always_inline void current_clr_polling(void)
> {
> __current_clr_polling();
>
> - /*
> - * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
> - * Once the bit is cleared, we'll get IPIs with every new
> - * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
> - * fold.
> - */
> smp_mb(); /* paired with resched_curr() */
So this part is weird: you remove the comment that justifies the
smp_mb(), but you leave the smp_mb() in place. Why?
Thanks,
Ingo
Hi Ingo,
* Ingo Molnar <mingo@kernel.org> wrote:
>
> [ Sorry about the belated reply, found this in my TODO pile ... ]
>
> * Yujun Dong <yujundong@pascal-lab.net> wrote:
>
>> In architectures that use the polling bit, current_clr_polling() employs
>> smp_mb() to ensure that the clearing of the polling bit is visible to
>> other cores before checking TIF_NEED_RESCHED.
>>
>> However, smp_mb() can be costly. Given that clear_bit() is an atomic
>> operation, replacing smp_mb() with smp_mb__after_atomic() is appropriate.
>>
>> Many architectures implement smp_mb__after_atomic() as a lighter-weight
>> barrier compared to smp_mb(), leading to performance improvements.
>> For instance, on x86, smp_mb__after_atomic() is a no-op. This change
>> eliminates a smp_mb() instruction in the cpuidle wake-up path, saving
>> several CPU cycles and thereby reducing wake-up latency.
>>
>> Architectures that do not use the polling bit will retain the original
>> smp_mb() behavior to ensure that existing dependencies remain unaffected.
>>
>> Signed-off-by: Yujun Dong <yujundong@pascal-lab.net>
>> ---
>> include/linux/sched/idle.h | 23 ++++++++++++++++-------
>> 1 file changed, 16 insertions(+), 7 deletions(-)
>>
>> diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
>> index e670ac282333..439f6029d3b9 100644
>> --- a/include/linux/sched/idle.h
>> +++ b/include/linux/sched/idle.h
>> @@ -79,6 +79,21 @@ static __always_inline bool __must_check current_clr_polling_and_test(void)
>> return unlikely(tif_need_resched());
>> }
>>
>> +static __always_inline void current_clr_polling(void)
>> +{
>> + __current_clr_polling();
>> +
>> + /*
>> + * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
>> + * Once the bit is cleared, we'll get IPIs with every new
>> + * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
>> + * fold.
>> + */
>> + smp_mb__after_atomic(); /* paired with resched_curr() */
>> +
>> + preempt_fold_need_resched();
>> +}
>> +
>> #else
>> static inline void __current_set_polling(void) { }
>> static inline void __current_clr_polling(void) { }
>> @@ -91,21 +106,15 @@ static inline bool __must_check current_clr_polling_and_test(void)
>> {
>> return unlikely(tif_need_resched());
>> }
>> -#endif
>>
>> static __always_inline void current_clr_polling(void)
>> {
>> __current_clr_polling();
>>
>> - /*
>> - * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
>> - * Once the bit is cleared, we'll get IPIs with every new
>> - * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
>> - * fold.
>> - */
>> smp_mb(); /* paired with resched_curr() */
>
> So this part is weird: you remove the comment that justifies the
> smp_mb(), but you leave the smp_mb() in place. Why?
>
> Thanks,
>
> Ingo
Thanks for pointing that out. The comment removal in the non-polling
branch was intentional, but my original explanation was unclear. Let
me rephrase:
Polling architectures (with the TIF_POLLING flag):
1. __current_clr_polling() performs atomic ops ->
Use smp_mb__after_atomic()
2. Keep original "clear polling bit" comment as it directly explains
the barrier's purpose.
Non-polling architectures (#else branch):
1. __current_clr_polling() is a no-op -> Original comment about
"clearing the bit" becomes misleading.
2. However, the smp_mb() must remain to preserve pre-existing memory
ordering guarantees. And explicitly documenting it requires new
wording to avoid confusion.
Proposed approaches:
Option A: Add a comment for non-polling smp_mb() like "Paired with
resched_curr(), as per pre-existing memory ordering guarantees"
Option B: Leave code as-is (no comment) and elaborate in the commit
message: "For non-polling architectures, retain smp_mb() to avoid
subtle regressions, while intentionally omitting the bit-specific
comment that no longer applies."
Which direction would you consider most maintainable? Your insight
would be greatly appreciated.
Best regards,
Yujun
* Yujun Dong <yujundong@pascal-lab.net> wrote:
> Hi Ingo,
>
> * Ingo Molnar <mingo@kernel.org> wrote:
> >
> > [ Sorry about the belated reply, found this in my TODO pile ... ]
> >
> > * Yujun Dong <yujundong@pascal-lab.net> wrote:
> >
> >> In architectures that use the polling bit, current_clr_polling() employs
> >> smp_mb() to ensure that the clearing of the polling bit is visible to
> >> other cores before checking TIF_NEED_RESCHED.
> >>
> >> However, smp_mb() can be costly. Given that clear_bit() is an atomic
> >> operation, replacing smp_mb() with smp_mb__after_atomic() is appropriate.
> >>
> >> Many architectures implement smp_mb__after_atomic() as a lighter-weight
> >> barrier compared to smp_mb(), leading to performance improvements.
> >> For instance, on x86, smp_mb__after_atomic() is a no-op. This change
> >> eliminates a smp_mb() instruction in the cpuidle wake-up path, saving
> >> several CPU cycles and thereby reducing wake-up latency.
> >>
> >> Architectures that do not use the polling bit will retain the original
> >> smp_mb() behavior to ensure that existing dependencies remain unaffected.
> >>
> >> Signed-off-by: Yujun Dong <yujundong@pascal-lab.net>
> >> ---
> >> include/linux/sched/idle.h | 23 ++++++++++++++++-------
> >> 1 file changed, 16 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
> >> index e670ac282333..439f6029d3b9 100644
> >> --- a/include/linux/sched/idle.h
> >> +++ b/include/linux/sched/idle.h
> >> @@ -79,6 +79,21 @@ static __always_inline bool __must_check current_clr_polling_and_test(void)
> >> return unlikely(tif_need_resched());
> >> }
> >>
> >> +static __always_inline void current_clr_polling(void)
> >> +{
> >> + __current_clr_polling();
> >> +
> >> + /*
> >> + * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
> >> + * Once the bit is cleared, we'll get IPIs with every new
> >> + * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
> >> + * fold.
> >> + */
> >> + smp_mb__after_atomic(); /* paired with resched_curr() */
> >> +
> >> + preempt_fold_need_resched();
> >> +}
> >> +
> >> #else
> >> static inline void __current_set_polling(void) { }
> >> static inline void __current_clr_polling(void) { }
> >> @@ -91,21 +106,15 @@ static inline bool __must_check current_clr_polling_and_test(void)
> >> {
> >> return unlikely(tif_need_resched());
> >> }
> >> -#endif
> >>
> >> static __always_inline void current_clr_polling(void)
> >> {
> >> __current_clr_polling();
> >>
> >> - /*
> >> - * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
> >> - * Once the bit is cleared, we'll get IPIs with every new
> >> - * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
> >> - * fold.
> >> - */
> >> smp_mb(); /* paired with resched_curr() */
> >
> > So this part is weird: you remove the comment that justifies the
> > smp_mb(), but you leave the smp_mb() in place. Why?
> >
> > Thanks,
> >
> > Ingo
>
> Thanks for pointing that out. The comment removal in the non-polling
> branch was intentional, but my original explanation was unclear. Let
> me rephrase:
>
> Polling architectures (with the TIF_POLLING flag):
> 1. __current_clr_polling() performs atomic ops ->
> Use smp_mb__after_atomic()
> 2. Keep original "clear polling bit" comment as it directly explains
> the barrier's purpose.
>
> Non-polling architectures (#else branch):
> 1. __current_clr_polling() is a no-op -> Original comment about
> "clearing the bit" becomes misleading.
> 2. However, the smp_mb() must remain to preserve pre-existing memory
> ordering guarantees. And explicitly documenting it requires new
> wording to avoid confusion.
Thanks for the explanation, on a second reading that makes a lot of
sense.
> Proposed approaches:
> Option A: Add a comment for non-polling smp_mb() like "Paired with
> resched_curr(), as per pre-existing memory ordering guarantees"
> Option B: Leave code as-is (no comment) and elaborate in the commit
> message: "For non-polling architectures, retain smp_mb() to avoid
> subtle regressions, while intentionally omitting the bit-specific
> comment that no longer applies."
>
> Which direction would you consider most maintainable? Your insight
> would be greatly appreciated.
No action needed on your side, because it was really just me being
dense and not understanding that the comment was moved as it didn't
apply to the original place, it was not eliminated. The 'paired with
resched_curr()' comment of smp_mb() is operative and remained intact.
I applied your optimization to the scheduler tree and it should go
upstream in the v6.15 merge window if all goes well in testing.
Thanks,
Ingo
The following commit has been merged into the sched/core branch of tip:
Commit-ID: 3785c7dbae0f733f13f8857beaaada5d7dc63e02
Gitweb: https://git.kernel.org/tip/3785c7dbae0f733f13f8857beaaada5d7dc63e02
Author: Yujun Dong <yujundong@pascal-lab.net>
AuthorDate: Mon, 30 Dec 2024 22:16:24 +08:00
Committer: Ingo Molnar <mingo@kernel.org>
CommitterDate: Thu, 20 Mar 2025 10:03:52 +01:00
cpuidle, sched: Use smp_mb__after_atomic() in current_clr_polling()
In architectures that use the polling bit, current_clr_polling() employs
smp_mb() to ensure that the clearing of the polling bit is visible to
other cores before checking TIF_NEED_RESCHED.
However, smp_mb() can be costly. Given that clear_bit() is an atomic
operation, replacing smp_mb() with smp_mb__after_atomic() is appropriate.
Many architectures implement smp_mb__after_atomic() as a lighter-weight
barrier compared to smp_mb(), leading to performance improvements.
For instance, on x86, smp_mb__after_atomic() is a no-op. This change
eliminates a smp_mb() instruction in the cpuidle wake-up path, saving
several CPU cycles and thereby reducing wake-up latency.
Architectures that do not use the polling bit will retain the original
smp_mb() behavior to ensure that existing dependencies remain unaffected.
Signed-off-by: Yujun Dong <yujundong@pascal-lab.net>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/20241230141624.155356-1-yujundong@pascal-lab.net
---
include/linux/sched/idle.h | 23 ++++++++++++++++-------
1 file changed, 16 insertions(+), 7 deletions(-)
diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h
index e670ac2..439f602 100644
--- a/include/linux/sched/idle.h
+++ b/include/linux/sched/idle.h
@@ -79,6 +79,21 @@ static __always_inline bool __must_check current_clr_polling_and_test(void)
return unlikely(tif_need_resched());
}
+static __always_inline void current_clr_polling(void)
+{
+ __current_clr_polling();
+
+ /*
+ * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
+ * Once the bit is cleared, we'll get IPIs with every new
+ * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
+ * fold.
+ */
+ smp_mb__after_atomic(); /* paired with resched_curr() */
+
+ preempt_fold_need_resched();
+}
+
#else
static inline void __current_set_polling(void) { }
static inline void __current_clr_polling(void) { }
@@ -91,21 +106,15 @@ static inline bool __must_check current_clr_polling_and_test(void)
{
return unlikely(tif_need_resched());
}
-#endif
static __always_inline void current_clr_polling(void)
{
__current_clr_polling();
- /*
- * Ensure we check TIF_NEED_RESCHED after we clear the polling bit.
- * Once the bit is cleared, we'll get IPIs with every new
- * TIF_NEED_RESCHED and the IPI handler, scheduler_ipi(), will also
- * fold.
- */
smp_mb(); /* paired with resched_curr() */
preempt_fold_need_resched();
}
+#endif
#endif /* _LINUX_SCHED_IDLE_H */
© 2016 - 2026 Red Hat, Inc.