Given a util_avg value, the new function will return the future one
given a runtime delta.
This will be useful in later patches to help replace some magic margins
with more deterministic behavior.
Signed-off-by: Qais Yousef (Google) <qyousef@layalina.io>
---
kernel/sched/pelt.c | 22 +++++++++++++++++++++-
kernel/sched/sched.h | 3 +++
2 files changed, 24 insertions(+), 1 deletion(-)
diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
index 0f310768260c..50322005a0ae 100644
--- a/kernel/sched/pelt.c
+++ b/kernel/sched/pelt.c
@@ -466,4 +466,24 @@ int update_irq_load_avg(struct rq *rq, u64 running)
return ret;
}
-#endif
+#endif /* CONFIG_HAVE_SCHED_AVG_IRQ */
+
+/*
+ * Approximate the new util_avg value assuming an entity has continued to run
+ * for @delta us.
+ */
+unsigned long approximate_util_avg(unsigned long util, u64 delta)
+{
+ struct sched_avg sa = {
+ .util_sum = util * PELT_MIN_DIVIDER,
+ .util_avg = util,
+ };
+
+ if (unlikely(!delta))
+ return util;
+
+ accumulate_sum(delta, &sa, 0, 0, 1);
+ ___update_load_avg(&sa, 0);
+
+ return sa.util_avg;
+}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 56eeb5b05b50..5f76b8a75a9f 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2997,6 +2997,9 @@ enum cpu_util_type {
unsigned long effective_cpu_util(int cpu, unsigned long util_cfs,
enum cpu_util_type type,
struct task_struct *p);
+
+unsigned long approximate_util_avg(unsigned long util, u64 delta);
+
/*
* DVFS decision are made at discrete points. If CPU stays busy, the util will
* continue to grow, which means it could need to run at a higher frequency
--
2.34.1
On 28/08/2023 01:31, Qais Yousef wrote:
> Given a util_avg value, the new function will return the future one
> given a runtime delta.
>
> This will be useful in later patches to help replace some magic margins
> with more deterministic behavior.
>
> Signed-off-by: Qais Yousef (Google) <qyousef@layalina.io>
> ---
> kernel/sched/pelt.c | 22 +++++++++++++++++++++-
> kernel/sched/sched.h | 3 +++
> 2 files changed, 24 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
> index 0f310768260c..50322005a0ae 100644
> --- a/kernel/sched/pelt.c
> +++ b/kernel/sched/pelt.c
> @@ -466,4 +466,24 @@ int update_irq_load_avg(struct rq *rq, u64 running)
>
> return ret;
> }
> -#endif
> +#endif /* CONFIG_HAVE_SCHED_AVG_IRQ */
> +
> +/*
> + * Approximate the new util_avg value assuming an entity has continued to run
> + * for @delta us.
> + */
> +unsigned long approximate_util_avg(unsigned long util, u64 delta)
> +{
> + struct sched_avg sa = {
> + .util_sum = util * PELT_MIN_DIVIDER,
> + .util_avg = util,
> + };
> +
> + if (unlikely(!delta))
> + return util;
> +
> + accumulate_sum(delta, &sa, 0, 0, 1);
IMHO, you miss the handling of `periods != 0`. load = 0 eclipses this
code in accumulate_sum().
> + ___update_load_avg(&sa, 0);
> +
> + return sa.util_avg;
> +}
We already discussed something similar like this in Nov 22, the so
called UTIL_EST_FASTER thing.
https://lkml.kernel.org/r/Y2kLA8x40IiBEPYg@hirez.programming.kicks-ass.net
+/*
+ * Compute a pelt util_avg assuming no history and @delta runtime.
+ */
+unsigned long faster_est_approx(u64 delta)
+{
+ unsigned long contrib = (unsigned long)delta; /* p == 0 -> delta < 1024 */
+ u64 periods = delta / 1024;
+
+ if (periods) {
+ delta %= 1024;
+ contrib = __accumulate_pelt_segments(periods, 1024, delta);
+ }
+
+ return (contrib << SCHED_CAPACITY_SHIFT) / PELT_MIN_DIVIDER;
+}
+
[...]
On 09/06/23 14:56, Dietmar Eggemann wrote:
> On 28/08/2023 01:31, Qais Yousef wrote:
> > Given a util_avg value, the new function will return the future one
> > given a runtime delta.
> >
> > This will be useful in later patches to help replace some magic margins
> > with more deterministic behavior.
> >
> > Signed-off-by: Qais Yousef (Google) <qyousef@layalina.io>
> > ---
> > kernel/sched/pelt.c | 22 +++++++++++++++++++++-
> > kernel/sched/sched.h | 3 +++
> > 2 files changed, 24 insertions(+), 1 deletion(-)
> >
> > diff --git a/kernel/sched/pelt.c b/kernel/sched/pelt.c
> > index 0f310768260c..50322005a0ae 100644
> > --- a/kernel/sched/pelt.c
> > +++ b/kernel/sched/pelt.c
> > @@ -466,4 +466,24 @@ int update_irq_load_avg(struct rq *rq, u64 running)
> >
> > return ret;
> > }
> > -#endif
> > +#endif /* CONFIG_HAVE_SCHED_AVG_IRQ */
> > +
> > +/*
> > + * Approximate the new util_avg value assuming an entity has continued to run
> > + * for @delta us.
> > + */
> > +unsigned long approximate_util_avg(unsigned long util, u64 delta)
> > +{
> > + struct sched_avg sa = {
> > + .util_sum = util * PELT_MIN_DIVIDER,
> > + .util_avg = util,
> > + };
> > +
> > + if (unlikely(!delta))
> > + return util;
> > +
> > + accumulate_sum(delta, &sa, 0, 0, 1);
>
> IMHO, you miss the handling of `periods != 0`. load = 0 eclipses this
> code in accumulate_sum().
Yes. For some reason I got blank registered when I saw if this codepath can
impact util_avg..
>
> > + ___update_load_avg(&sa, 0);
> > +
> > + return sa.util_avg;
> > +}
>
> We already discussed something similar like this in Nov 22, the so
> called UTIL_EST_FASTER thing.
>
> https://lkml.kernel.org/r/Y2kLA8x40IiBEPYg@hirez.programming.kicks-ass.net
>
> +/*
> + * Compute a pelt util_avg assuming no history and @delta runtime.
> + */
> +unsigned long faster_est_approx(u64 delta)
> +{
> + unsigned long contrib = (unsigned long)delta; /* p == 0 -> delta < 1024 */
> + u64 periods = delta / 1024;
> +
> + if (periods) {
> + delta %= 1024;
> + contrib = __accumulate_pelt_segments(periods, 1024, delta);
> + }
> +
> + return (contrib << SCHED_CAPACITY_SHIFT) / PELT_MIN_DIVIDER;
> +}
> +
I could look at using this version instead. This misses the decay part though?
Thanks!
--
Qais Yousef
On 06/09/2023 23:19, Qais Yousef wrote:
> On 09/06/23 14:56, Dietmar Eggemann wrote:
>> On 28/08/2023 01:31, Qais Yousef wrote:
[...]
>>> +/*
>>> + * Approximate the new util_avg value assuming an entity has continued to run
>>> + * for @delta us.
>>> + */
>>> +unsigned long approximate_util_avg(unsigned long util, u64 delta)
>>> +{
>>> + struct sched_avg sa = {
>>> + .util_sum = util * PELT_MIN_DIVIDER,
>>> + .util_avg = util,
>>> + };
>>> +
>>> + if (unlikely(!delta))
>>> + return util;
>>> +
>>> + accumulate_sum(delta, &sa, 0, 0, 1);
>>
>> IMHO, you miss the handling of `periods != 0`. load = 0 eclipses this
>> code in accumulate_sum().
You could call accumulate_sum(delta, &sa, 1, 0, 1);
>
> Yes. For some reason I got blank registered when I saw if this codepath can
> impact util_avg..
Another thing ... I guess if you call accumulate_sum with delta the PELT
machinery assumes `delta = now - sa->last_update_time` which means you
would have to use `clock_pelt + TICK_USEC` as delta.
>>
>>> + ___update_load_avg(&sa, 0);
>>> +
>>> + return sa.util_avg;
>>> +}
>>
>> We already discussed something similar like this in Nov 22, the so
>> called UTIL_EST_FASTER thing.
>>
>> https://lkml.kernel.org/r/Y2kLA8x40IiBEPYg@hirez.programming.kicks-ass.net
>>
>> +/*
>> + * Compute a pelt util_avg assuming no history and @delta runtime.
>> + */
>> +unsigned long faster_est_approx(u64 delta)
>> +{
>> + unsigned long contrib = (unsigned long)delta; /* p == 0 -> delta < 1024 */
>> + u64 periods = delta / 1024;
>> +
>> + if (periods) {
>> + delta %= 1024;
>> + contrib = __accumulate_pelt_segments(periods, 1024, delta);
>> + }
>> +
>> + return (contrib << SCHED_CAPACITY_SHIFT) / PELT_MIN_DIVIDER;
>> +}
>> +
>
> I could look at using this version instead. This misses the decay part though?
__accumulate_pelt_segments(periods, ...) decays the periods. But
obviously not the util you pass into approximate_util_avg().
On 09/07/23 13:12, Dietmar Eggemann wrote:
> On 06/09/2023 23:19, Qais Yousef wrote:
> > On 09/06/23 14:56, Dietmar Eggemann wrote:
> >> On 28/08/2023 01:31, Qais Yousef wrote:
>
> [...]
>
> >>> +/*
> >>> + * Approximate the new util_avg value assuming an entity has continued to run
> >>> + * for @delta us.
> >>> + */
> >>> +unsigned long approximate_util_avg(unsigned long util, u64 delta)
> >>> +{
> >>> + struct sched_avg sa = {
> >>> + .util_sum = util * PELT_MIN_DIVIDER,
> >>> + .util_avg = util,
> >>> + };
> >>> +
> >>> + if (unlikely(!delta))
> >>> + return util;
> >>> +
> >>> + accumulate_sum(delta, &sa, 0, 0, 1);
> >>
> >> IMHO, you miss the handling of `periods != 0`. load = 0 eclipses this
> >> code in accumulate_sum().
>
> You could call accumulate_sum(delta, &sa, 1, 0, 1);
Yes. I initially thought the load is not necessary, but good catch. I didn't
get a chance to rerun to see the numbers, but hopefully this should fix the
wrong numbers I was seeing. Thanks!
>
> >
> > Yes. For some reason I got blank registered when I saw if this codepath can
> > impact util_avg..
>
> Another thing ... I guess if you call accumulate_sum with delta the PELT
> machinery assumes `delta = now - sa->last_update_time` which means you
> would have to use `clock_pelt + TICK_USEC` as delta.
Right.
The way I understood it is that at TICK we should do update_load_avg() which
would call __update_load_sum() which uses
delta = now - sa->last_update_time
which passes this delta to accumulate_sum()
I can see we are not very accurate since there will be a small additional time
besides TICK_USEC that we are not accounting for. But I can't see how this can
cause a big error.
predicted (assumed) tick time/delta
sa->last_update_time = now
tick_time = TICK_USEC + now
delta = tick_time - sa->last_update_time
delta = TICK_USEC + now - now
delta = TICK_USEC
but actual tick time/delta
sa->last_update_time = now - x
tick_time = TICK_USEC + now
delta = tick_time - sa->last_update_time
delta = TICK_USEC + now - (now - x)
delta = TICK_USEC + x
So the delta I am using might be slightly shorter than it should be.
IIUC, what you're saying that the `x` in my equation above is clock_pelt,
right?
Thanks!
--
Qais Yousef
On 10/09/2023 21:58, Qais Yousef wrote: > On 09/07/23 13:12, Dietmar Eggemann wrote: >> On 06/09/2023 23:19, Qais Yousef wrote: >>> On 09/06/23 14:56, Dietmar Eggemann wrote: >>>> On 28/08/2023 01:31, Qais Yousef wrote: [...] >> Another thing ... I guess if you call accumulate_sum with delta the PELT >> machinery assumes `delta = now - sa->last_update_time` which means you >> would have to use `clock_pelt + TICK_USEC` as delta. > > Right. > > The way I understood it is that at TICK we should do update_load_avg() which > would call __update_load_sum() which uses > > delta = now - sa->last_update_time > > which passes this delta to accumulate_sum() > > I can see we are not very accurate since there will be a small additional time > besides TICK_USEC that we are not accounting for. But I can't see how this can > cause a big error. > > predicted (assumed) tick time/delta > > sa->last_update_time = now > tick_time = TICK_USEC + now > > delta = tick_time - sa->last_update_time > delta = TICK_USEC + now - now > delta = TICK_USEC > > but actual tick time/delta > > sa->last_update_time = now - x > tick_time = TICK_USEC + now > > delta = tick_time - sa->last_update_time > delta = TICK_USEC + now - (now - x) > delta = TICK_USEC + x > > So the delta I am using might be slightly shorter than it should be. > > IIUC, what you're saying that the `x` in my equation above is clock_pelt, > right? No, I was wrong here. Calling accumulate_sum with `delta = TICK_USEC` is fine. accumulate_sum() will accrue `sa->util.sum` and ___update_load_avg() will then adjust `sa->util_avg` accordingly. delta should be 4000 on Arm64 boards so you will cross period boundaries. In case `delta < 1024` you might want to not call ___update_load_avg() to be in pair with __update_load_avg_cfs_rq().
On 09/13/23 19:22, Dietmar Eggemann wrote: > On 10/09/2023 21:58, Qais Yousef wrote: > > On 09/07/23 13:12, Dietmar Eggemann wrote: > >> On 06/09/2023 23:19, Qais Yousef wrote: > >>> On 09/06/23 14:56, Dietmar Eggemann wrote: > >>>> On 28/08/2023 01:31, Qais Yousef wrote: > > [...] > > >> Another thing ... I guess if you call accumulate_sum with delta the PELT > >> machinery assumes `delta = now - sa->last_update_time` which means you > >> would have to use `clock_pelt + TICK_USEC` as delta. > > > > Right. > > > > The way I understood it is that at TICK we should do update_load_avg() which > > would call __update_load_sum() which uses > > > > delta = now - sa->last_update_time > > > > which passes this delta to accumulate_sum() > > > > I can see we are not very accurate since there will be a small additional time > > besides TICK_USEC that we are not accounting for. But I can't see how this can > > cause a big error. > > > > predicted (assumed) tick time/delta > > > > sa->last_update_time = now > > tick_time = TICK_USEC + now > > > > delta = tick_time - sa->last_update_time > > delta = TICK_USEC + now - now > > delta = TICK_USEC > > > > but actual tick time/delta > > > > sa->last_update_time = now - x > > tick_time = TICK_USEC + now > > > > delta = tick_time - sa->last_update_time > > delta = TICK_USEC + now - (now - x) > > delta = TICK_USEC + x > > > > So the delta I am using might be slightly shorter than it should be. > > > > IIUC, what you're saying that the `x` in my equation above is clock_pelt, > > right? > > No, I was wrong here. Calling accumulate_sum with `delta = TICK_USEC` is > fine. > > accumulate_sum() will accrue `sa->util.sum` and ___update_load_avg() > will then adjust `sa->util_avg` accordingly. > > delta should be 4000 on Arm64 boards so you will cross period > boundaries. In case `delta < 1024` you might want to not call > ___update_load_avg() to be in pair with __update_load_avg_cfs_rq(). You mean *not* call, or actually *do* call ___update_load_avg() if delta < 1024? I am certainly not calling it now and I think you're suggesting to actually call it when period is less than 1024. This area is not my strength, so I do sure appreciate any suggestion to make it better! :-) I will look into that for next version. Many thanks! -- Qais Yousef
On 09/16/23 20:49, Qais Yousef wrote: > On 09/13/23 19:22, Dietmar Eggemann wrote: > > On 10/09/2023 21:58, Qais Yousef wrote: > > > On 09/07/23 13:12, Dietmar Eggemann wrote: > > >> On 06/09/2023 23:19, Qais Yousef wrote: > > >>> On 09/06/23 14:56, Dietmar Eggemann wrote: > > >>>> On 28/08/2023 01:31, Qais Yousef wrote: > > > > [...] > > > > >> Another thing ... I guess if you call accumulate_sum with delta the PELT > > >> machinery assumes `delta = now - sa->last_update_time` which means you > > >> would have to use `clock_pelt + TICK_USEC` as delta. > > > > > > Right. > > > > > > The way I understood it is that at TICK we should do update_load_avg() which > > > would call __update_load_sum() which uses > > > > > > delta = now - sa->last_update_time > > > > > > which passes this delta to accumulate_sum() > > > > > > I can see we are not very accurate since there will be a small additional time > > > besides TICK_USEC that we are not accounting for. But I can't see how this can > > > cause a big error. > > > > > > predicted (assumed) tick time/delta > > > > > > sa->last_update_time = now > > > tick_time = TICK_USEC + now > > > > > > delta = tick_time - sa->last_update_time > > > delta = TICK_USEC + now - now > > > delta = TICK_USEC > > > > > > but actual tick time/delta > > > > > > sa->last_update_time = now - x > > > tick_time = TICK_USEC + now > > > > > > delta = tick_time - sa->last_update_time > > > delta = TICK_USEC + now - (now - x) > > > delta = TICK_USEC + x > > > > > > So the delta I am using might be slightly shorter than it should be. > > > > > > IIUC, what you're saying that the `x` in my equation above is clock_pelt, > > > right? > > > > No, I was wrong here. Calling accumulate_sum with `delta = TICK_USEC` is > > fine. > > > > accumulate_sum() will accrue `sa->util.sum` and ___update_load_avg() > > will then adjust `sa->util_avg` accordingly. > > > > delta should be 4000 on Arm64 boards so you will cross period > > boundaries. In case `delta < 1024` you might want to not call > > ___update_load_avg() to be in pair with __update_load_avg_cfs_rq(). > > You mean *not* call, or actually *do* call ___update_load_avg() if delta > < 1024? I am certainly not calling it now and I think you're suggesting to > actually call it when period is less than 1024. Oops my bad, I got confused. I am calling it. Ignore me! Cheers -- Qais Yousef
© 2016 - 2025 Red Hat, Inc.