kernel/sched/core.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+)
When sched_core_enabled(), we sometimes need to call update_rq_clock()
to update the rq clock of sibling CPUs on the same core, before that we
need to clear RQCF_UPDATED of rq->clock_update_flags to avoid the
WARN_DOUBLE_CLOCK warning. Because at this time the rq->clock_update_flags
of sibling CPUs may be RQCF_UPDATED. If sched_core_enabled(), we will get
a core wide rq->lock, so at this point we can safely clear RQCF_UPDATED of
rq->clock_update_flags of all CPUs on this core to avoid the
WARN_DOUBLE_CLOCK warning.
We cannot clear rq->clock_update_flags of other cpus on the same core in
rq_pin_lock(). Because in some functions, we will temporarily give up
core wide rq->lock, and then use raw_spin_rq_lock() to obtain core wide
rq->lock, such as newidle_balance() and _double_lock_balance().
Steps to reproduce:
1. Enable CONFIG_SCHED_DEBUG and CONFIG_SCHED_CORE when compiling
the kernel
2. echo 1 > /sys/kernel/debug/clear_warn_once
echo "WARN_DOUBLE_CLOCK" > /sys/kernel/debug/sched/features
3. Run the linux/tools/testing/selftests/sched/cs_prctl_test test
Signed-off-by: Hao Jia <jiahao.os@bytedance.com>
---
- Adapt WARN_DOUBLE_CLOCK machinery for core-sched instead of clearing
WARN_DOUBLE_CLOCK warning one by one.
- Modify commit information
[v1] https://lore.kernel.org/all/20221206070550.31763-1-jiahao.os@bytedance.com
kernel/sched/core.c | 17 +++++++++++++++++
1 file changed, 17 insertions(+)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index e838feb6adc5..16a33e5adb77 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -427,11 +427,27 @@ void sched_core_put(void)
schedule_work(&_work);
}
+static inline void sched_core_rq_clock_clear_update(struct rq *rq)
+{
+#ifdef CONFIG_SCHED_DEBUG
+ const struct cpumask *smt_mask;
+ int i;
+
+ if (rq->core_enabled) {
+ smt_mask = cpu_smt_mask(rq->cpu);
+ for_each_cpu(i, smt_mask) {
+ if (rq->cpu != i)
+ cpu_rq(i)->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP);
+ }
+ }
+#endif
+}
#else /* !CONFIG_SCHED_CORE */
static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { }
static inline void
sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { }
+static inline void sched_core_rq_clock_clear_update(struct rq *rq) { }
#endif /* CONFIG_SCHED_CORE */
@@ -546,6 +562,7 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass)
if (likely(lock == __rq_lockp(rq))) {
/* preempt_count *MUST* be > 1 */
preempt_enable_no_resched();
+ sched_core_rq_clock_clear_update(rq);
return;
}
raw_spin_unlock(lock);
--
2.37.0 (Apple Git-136)
On Wed, Feb 15, 2023 at 03:39:27PM +0800, Hao Jia wrote: > When sched_core_enabled(), we sometimes need to call update_rq_clock() > to update the rq clock of sibling CPUs on the same core, before that we > need to clear RQCF_UPDATED of rq->clock_update_flags to avoid the > WARN_DOUBLE_CLOCK warning. Because at this time the rq->clock_update_flags > of sibling CPUs may be RQCF_UPDATED. If sched_core_enabled(), we will get > a core wide rq->lock, so at this point we can safely clear RQCF_UPDATED of > rq->clock_update_flags of all CPUs on this core to avoid the > WARN_DOUBLE_CLOCK warning. > > We cannot clear rq->clock_update_flags of other cpus on the same core in > rq_pin_lock(). Because in some functions, we will temporarily give up > core wide rq->lock, and then use raw_spin_rq_lock() to obtain core wide > rq->lock, such as newidle_balance() and _double_lock_balance(). > > Steps to reproduce: > 1. Enable CONFIG_SCHED_DEBUG and CONFIG_SCHED_CORE when compiling > the kernel > 2. echo 1 > /sys/kernel/debug/clear_warn_once > echo "WARN_DOUBLE_CLOCK" > /sys/kernel/debug/sched/features > 3. Run the linux/tools/testing/selftests/sched/cs_prctl_test test > > Signed-off-by: Hao Jia <jiahao.os@bytedance.com> > --- > - Adapt WARN_DOUBLE_CLOCK machinery for core-sched instead of clearing > WARN_DOUBLE_CLOCK warning one by one. > - Modify commit information > [v1] https://lore.kernel.org/all/20221206070550.31763-1-jiahao.os@bytedance.com > > kernel/sched/core.c | 17 +++++++++++++++++ > 1 file changed, 17 insertions(+) > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index e838feb6adc5..16a33e5adb77 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -427,11 +427,27 @@ void sched_core_put(void) > schedule_work(&_work); > } > > +static inline void sched_core_rq_clock_clear_update(struct rq *rq) > +{ > +#ifdef CONFIG_SCHED_DEBUG > + const struct cpumask *smt_mask; > + int i; > + > + if (rq->core_enabled) { > + smt_mask = cpu_smt_mask(rq->cpu); > + for_each_cpu(i, smt_mask) { > + if (rq->cpu != i) > + cpu_rq(i)->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); > + } > + } > +#endif So sort of ok, but that function name.... so long :/ > +} > #else /* !CONFIG_SCHED_CORE */ > > static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { } > static inline void > sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { } > +static inline void sched_core_rq_clock_clear_update(struct rq *rq) { } > > #endif /* CONFIG_SCHED_CORE */ > > @@ -546,6 +562,7 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass) > if (likely(lock == __rq_lockp(rq))) { > /* preempt_count *MUST* be > 1 */ > preempt_enable_no_resched(); > + sched_core_rq_clock_clear_update(rq); > return; > } > raw_spin_unlock(lock); This otoh don't make much sense. Why put it here and not extend rq_pin_lock()? That is, what's wrong with something like so? --- diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 771f8ddb7053..c1a92eced930 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1571,11 +1571,18 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) rf->cookie = lockdep_pin_lock(__rq_lockp(rq)); #ifdef CONFIG_SCHED_DEBUG - rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); - rf->clock_update_flags = 0; #ifdef CONFIG_SMP SCHED_WARN_ON(rq->balance_callback && rq->balance_callback != &balance_push_callback); #endif + rf->clock_update_flags = 0; + if (sched_core_enabled()) { + int i; + + for_each_cpu(i, cpu_smt_mask(rq->cpu)) + cpu_rq(i)->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); + } else { + rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); + } #endif }
On 2023/2/20 Peter Zijlstra wrote: > On Wed, Feb 15, 2023 at 03:39:27PM +0800, Hao Jia wrote: >> When sched_core_enabled(), we sometimes need to call update_rq_clock() >> to update the rq clock of sibling CPUs on the same core, before that we >> need to clear RQCF_UPDATED of rq->clock_update_flags to avoid the >> WARN_DOUBLE_CLOCK warning. Because at this time the rq->clock_update_flags >> of sibling CPUs may be RQCF_UPDATED. If sched_core_enabled(), we will get >> a core wide rq->lock, so at this point we can safely clear RQCF_UPDATED of >> rq->clock_update_flags of all CPUs on this core to avoid the >> WARN_DOUBLE_CLOCK warning. >> >> We cannot clear rq->clock_update_flags of other cpus on the same core in >> rq_pin_lock(). Because in some functions, we will temporarily give up >> core wide rq->lock, and then use raw_spin_rq_lock() to obtain core wide >> rq->lock, such as newidle_balance() and _double_lock_balance(). >> >> Steps to reproduce: >> 1. Enable CONFIG_SCHED_DEBUG and CONFIG_SCHED_CORE when compiling >> the kernel >> 2. echo 1 > /sys/kernel/debug/clear_warn_once >> echo "WARN_DOUBLE_CLOCK" > /sys/kernel/debug/sched/features >> 3. Run the linux/tools/testing/selftests/sched/cs_prctl_test test >> >> Signed-off-by: Hao Jia <jiahao.os@bytedance.com> >> --- >> - Adapt WARN_DOUBLE_CLOCK machinery for core-sched instead of clearing >> WARN_DOUBLE_CLOCK warning one by one. >> - Modify commit information >> [v1] https://lore.kernel.org/all/20221206070550.31763-1-jiahao.os@bytedance.com >> >> kernel/sched/core.c | 17 +++++++++++++++++ >> 1 file changed, 17 insertions(+) >> >> diff --git a/kernel/sched/core.c b/kernel/sched/core.c >> index e838feb6adc5..16a33e5adb77 100644 >> --- a/kernel/sched/core.c >> +++ b/kernel/sched/core.c >> @@ -427,11 +427,27 @@ void sched_core_put(void) >> schedule_work(&_work); >> } >> >> +static inline void sched_core_rq_clock_clear_update(struct rq *rq) >> +{ >> +#ifdef CONFIG_SCHED_DEBUG >> + const struct cpumask *smt_mask; >> + int i; >> + >> + if (rq->core_enabled) { >> + smt_mask = cpu_smt_mask(rq->cpu); >> + for_each_cpu(i, smt_mask) { >> + if (rq->cpu != i) >> + cpu_rq(i)->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); >> + } >> + } >> +#endif > > So sort of ok, but that function name.... so long :/ What about clear_core_clock_updated()? Indeed, a good function name is difficult. Thanks, Hao > >> +} >> #else /* !CONFIG_SCHED_CORE */ >> >> static inline void sched_core_enqueue(struct rq *rq, struct task_struct *p) { } >> static inline void >> sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) { } >> +static inline void sched_core_rq_clock_clear_update(struct rq *rq) { } >> >> #endif /* CONFIG_SCHED_CORE */ >> >> @@ -546,6 +562,7 @@ void raw_spin_rq_lock_nested(struct rq *rq, int subclass) >> if (likely(lock == __rq_lockp(rq))) { >> /* preempt_count *MUST* be > 1 */ >> preempt_enable_no_resched(); >> + sched_core_rq_clock_clear_update(rq); >> return; >> } >> raw_spin_unlock(lock); > > This otoh don't make much sense. Why put it here and not extend > rq_pin_lock()? > > That is, what's wrong with something like so? We sometimes use rq_pin_lock() and raw_spin_rq_lock() separately. in some functions(), we will temporarily give up core wide rq->lock, and then use raw_spin_rq_lock() to obtain core wide rq->lock, at this time the rq->clock_update_flags may be RQCF_UPDATED. such as newidle_balance() and _double_lock_balance(). Perhaps it is more flexible to use rq_pin_lock() and raw_spin_rq_lock() separately? such as: newidle_balance() rq_unpin_lock() raw_spin_rq_unlock() update_blocked_averages() update_rq_clock(rq) <-- RQCF_UPDATED raw_spin_rq_lock(); (1) rq_repin_lock() <-- we need *restore* to RQCF_UPDATED We cannot replace raw_spin_rq_lock() in (1) with rq_lock(), because this will set rf->clock_update_flags to 0, This may cause assert_clock_updated() to be triggered later. Thinks, Hao > > --- > > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h > index 771f8ddb7053..c1a92eced930 100644 > --- a/kernel/sched/sched.h > +++ b/kernel/sched/sched.h > @@ -1571,11 +1571,18 @@ static inline void rq_pin_lock(struct rq *rq, struct rq_flags *rf) > rf->cookie = lockdep_pin_lock(__rq_lockp(rq)); > > #ifdef CONFIG_SCHED_DEBUG > - rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); > - rf->clock_update_flags = 0; > #ifdef CONFIG_SMP > SCHED_WARN_ON(rq->balance_callback && rq->balance_callback != &balance_push_callback); > #endif > + rf->clock_update_flags = 0; > + if (sched_core_enabled()) { > + int i; > + > + for_each_cpu(i, cpu_smt_mask(rq->cpu)) > + cpu_rq(i)->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); > + } else { > + rq->clock_update_flags &= (RQCF_REQ_SKIP|RQCF_ACT_SKIP); > + } > #endif > } >
© 2016 - 2025 Red Hat, Inc.