[PATCH 3/4] sched/psi: extract update_triggers side effect

Domenico Cerasuolo posted 4 patches 3 years, 1 month ago
There is a newer version of this series
[PATCH 3/4] sched/psi: extract update_triggers side effect
Posted by Domenico Cerasuolo 3 years, 1 month ago
The update of rtpoll_total inside update_triggers can be moved out of
the function since changed_states has the same information as the
update_total flag used in the function. Besides the simplification of
the function, with the next patch it would become an unwanted side
effect needed only for PSI_POLL.

Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
---
 kernel/sched/psi.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index a3d0b5cf797a..476941c1cbea 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -433,7 +433,6 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
 static u64 update_triggers(struct psi_group *group, u64 now)
 {
 	struct psi_trigger *t;
-	bool update_total = false;
 	u64 *total = group->total[PSI_POLL];
 
 	/*
@@ -456,14 +455,6 @@ static u64 update_triggers(struct psi_group *group, u64 now)
 		 * events without dropping any).
 		 */
 		if (new_stall) {
-			/*
-			 * Multiple triggers might be looking at the same state,
-			 * remember to update group->polling_total[] once we've
-			 * been through all of them. Also remember to extend the
-			 * polling time if we see new stall activity.
-			 */
-			update_total = true;
-
 			/* Calculate growth since last update */
 			growth = window_update(&t->win, now, total[t->state]);
 			if (!t->pending_event) {
@@ -484,11 +475,6 @@ static u64 update_triggers(struct psi_group *group, u64 now)
 		/* Reset threshold breach flag once event got generated */
 		t->pending_event = false;
 	}
-
-	if (update_total)
-		memcpy(group->rtpoll_total, total,
-				sizeof(group->rtpoll_total));
-
 	return now + group->rtpoll_min_period;
 }
 
@@ -686,8 +672,12 @@ static void psi_rtpoll_work(struct psi_group *group)
 		goto out;
 	}
 
-	if (now >= group->rtpoll_next_update)
+	if (now >= group->rtpoll_next_update) {
 		group->rtpoll_next_update = update_triggers(group, now);
+		if (changed_states & group->rtpoll_states)
+			memcpy(group->rtpoll_total, group->total[PSI_POLL],
+				   sizeof(group->rtpoll_total));
+	}
 
 	psi_schedule_rtpoll_work(group,
 		nsecs_to_jiffies(group->rtpoll_next_update - now) + 1,
-- 
2.34.1
Re: [PATCH 3/4] sched/psi: extract update_triggers side effect
Posted by Suren Baghdasaryan 3 years ago
On Thu, Mar 9, 2023 at 9:08 AM Domenico Cerasuolo
<cerasuolodomenico@gmail.com> wrote:
>
> The update of rtpoll_total inside update_triggers can be moved out of
> the function since changed_states has the same information as the
> update_total flag used in the function. Besides the simplification of
> the function, with the next patch it would become an unwanted side
> effect needed only for PSI_POLL.

(changed_states & group->rtpoll_states) and update_total flag are not
really equivalent. update_total flag depends on the difference between
group->polling_total[state] and group->total[PSI_POLL][state] while
changed_states depends on the difference between groupc->times and
groupc->times_prev. groupc->times_prev is updated every time
collect_percpu_times() is called and there are 3 places where that
happens: from psi_avgs_work(), from psi_poll_work() and from
psi_show(). group->polling_total[state] is updated only from
psi_poll_work(). Therefore the deltas between these values might not
always be in-sync.

Consider the following sequence as an example:

psi_poll_work()
...
psi_avgs_work()/psi_show()
  collect_percpu_times() // we detect a change in a monitored state
...
psi_poll_work()
  collect_percpu_times() // this time no change in monitored states
  update_triggers() // group->polling_total[state] !=
group->total[PSI_POLL][state]

In the last psi_poll_work() collect_percpu_times() recorded no change
in monitored states, so (changed_states & group->rtpoll_states) == 0,
however since the last time psi_poll_work() was called there was
actually a change in monitored states recorded by the first
collect_percpu_times(), therefore (group->polling_total[t->state] !=
total[t->state]) and we should update the totals. With your change we
will miss that update.

I think you can easily fix that by introducing update_triggers as an
output parameter in window_update() like this:

static u64 window_update(struct psi_window *win, u64 now, u64 value,
bool *update_triggers) {
    *update_total = false;
...
    if (new_stall) {
        *update_total = true;
...
}

static void psi_rtpoll_work(struct psi_group *group) {
+       bool update_triggers;
...
-       if (now >= group->rtpoll_next_update)
+       if (now >= group->rtpoll_next_update) {
                group->rtpoll_next_update = update_triggers(group,
now, &update_triggers);
+               if (update_triggers)
+                       memcpy(group->rtpoll_total, group->total[PSI_POLL],
+                                  sizeof(group->rtpoll_total));
+       }
}


>
> Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
> Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
> ---
>  kernel/sched/psi.c | 20 +++++---------------
>  1 file changed, 5 insertions(+), 15 deletions(-)
>
> diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
> index a3d0b5cf797a..476941c1cbea 100644
> --- a/kernel/sched/psi.c
> +++ b/kernel/sched/psi.c
> @@ -433,7 +433,6 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
>  static u64 update_triggers(struct psi_group *group, u64 now)
>  {
>         struct psi_trigger *t;
> -       bool update_total = false;
>         u64 *total = group->total[PSI_POLL];
>
>         /*
> @@ -456,14 +455,6 @@ static u64 update_triggers(struct psi_group *group, u64 now)
>                  * events without dropping any).
>                  */
>                 if (new_stall) {
> -                       /*
> -                        * Multiple triggers might be looking at the same state,
> -                        * remember to update group->polling_total[] once we've
> -                        * been through all of them. Also remember to extend the
> -                        * polling time if we see new stall activity.
> -                        */
> -                       update_total = true;
> -
>                         /* Calculate growth since last update */
>                         growth = window_update(&t->win, now, total[t->state]);
>                         if (!t->pending_event) {
> @@ -484,11 +475,6 @@ static u64 update_triggers(struct psi_group *group, u64 now)
>                 /* Reset threshold breach flag once event got generated */
>                 t->pending_event = false;
>         }
> -
> -       if (update_total)
> -               memcpy(group->rtpoll_total, total,
> -                               sizeof(group->rtpoll_total));
> -
>         return now + group->rtpoll_min_period;
>  }
>
> @@ -686,8 +672,12 @@ static void psi_rtpoll_work(struct psi_group *group)
>                 goto out;
>         }
>
> -       if (now >= group->rtpoll_next_update)
> +       if (now >= group->rtpoll_next_update) {
>                 group->rtpoll_next_update = update_triggers(group, now);
> +               if (changed_states & group->rtpoll_states)
> +                       memcpy(group->rtpoll_total, group->total[PSI_POLL],
> +                                  sizeof(group->rtpoll_total));
> +       }
>
>         psi_schedule_rtpoll_work(group,
>                 nsecs_to_jiffies(group->rtpoll_next_update - now) + 1,
> --
> 2.34.1
>