[PATCH v2] sched/fair: Replace random newidle_balance with Bresenham accumulator

Qing Wang posted 1 patch 5 days, 21 hours ago
include/linux/sched/topology.h |  1 +
kernel/sched/core.c            |  3 ---
kernel/sched/fair.c            | 18 +++++++++++++-----
kernel/sched/sched.h           |  6 ------
kernel/sched/topology.c        |  1 +
5 files changed, 15 insertions(+), 14 deletions(-)
[PATCH v2] sched/fair: Replace random newidle_balance with Bresenham accumulator
Posted by Qing Wang 5 days, 21 hours ago
The current NI_RANDOM implementation uses a random dice roll to allow
newidle_balance attempts according to the success rate. There is a better
way to implememte it.

Replace the random dice with a Bresenham accumulator that distributes the
allowed attempts with uniformly spaced evenly across a 1024-step window:

Each step do those:
  - Accumulate (1 + newidle_ratio) into newidle_window_pos.
  - If the accumulator reaches 1024, allow the balance attempt and
    subtract 1024.

This guarantees exactly (1 + newidle_ratio) newidle_balance per 1024 steps
and per newidle_balance with uniformly spaced for any ratio in [0, 1023].

Signed-off-by: Qing Wang <wangqing7171@gmail.com>
---

V2:
1. Remove sched_rnd_state and sched_rng() since they are no longer used.

 include/linux/sched/topology.h |  1 +
 kernel/sched/core.c            |  3 ---
 kernel/sched/fair.c            | 18 +++++++++++++-----
 kernel/sched/sched.h           |  6 ------
 kernel/sched/topology.c        |  1 +
 5 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 36553e14866d..fd6de240fb2b 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -95,6 +95,7 @@ struct sched_domain {
 	unsigned int newidle_call;
 	unsigned int newidle_success;
 	unsigned int newidle_ratio;
+	unsigned int newidle_window_pos;
 	u64 newidle_stamp;
 	u64 max_newidle_lb_cost;
 	unsigned long last_decay_max_lb_cost;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b8871449d3c6..4c0f613326d4 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -129,7 +129,6 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(sched_dl_server_start_tp);
 EXPORT_TRACEPOINT_SYMBOL_GPL(sched_dl_server_stop_tp);
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-DEFINE_PER_CPU(struct rnd_state, sched_rnd_state);
 
 #ifdef CONFIG_SCHED_PROXY_EXEC
 DEFINE_STATIC_KEY_TRUE(__sched_proxy_exec);
@@ -8820,8 +8819,6 @@ void __init sched_init_smp(void)
 {
 	sched_init_numa(NUMA_NO_NODE);
 
-	prandom_init_once(&sched_rnd_state);
-
 	/*
 	 * There's no userspace yet to cause hotplug operations; hence all the
 	 * CPU masks are stable and all blatant races in the below code cannot
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 3ebec186f982..c48f092dc5d4 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -12545,6 +12545,7 @@ static inline void update_newidle_stats(struct sched_domain *sd, unsigned int su
 		ratio += sd->newidle_success;
 
 		sd->newidle_ratio = min(1024, ratio);
+		sd->newidle_window_pos = 0;
 		sd->newidle_call /= 2;
 		sd->newidle_success /= 2;
 	}
@@ -13253,16 +13254,23 @@ static int sched_balance_newidle(struct rq *this_rq, struct rq_flags *rf)
 
 			if (sched_feat(NI_RANDOM) && sd->newidle_ratio < 1024) {
 				/*
-				 * Throw a 1k sided dice; and only run
-				 * newidle_balance according to the success
-				 * rate.
+				 * Base on accumulator of Bresenham's line algorithm
+				 *
+				 * Accumulate ratio each time and trigger balance
+				 * when sum ≥ 1024.
+				 *
+				 * Ensures exactly ratio balances per 1024 calls
+				 * and distributes ratio balance operations uniformly
+				 * across every 1024 invocations.
 				 */
-				u32 d1k = sched_rng() % 1024;
 				weight = 1 + sd->newidle_ratio;
-				if (d1k > weight) {
+
+				sd->newidle_window_pos += weight;
+				if (sd->newidle_window_pos < 1024) {
 					update_newidle_stats(sd, 0);
 					continue;
 				}
+				sd->newidle_window_pos -= 1024;
 				weight = (1024 + weight/2) / weight;
 			}
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 9f63b15d309d..b407835ae9a8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1384,12 +1384,6 @@ static inline bool is_migration_disabled(struct task_struct *p)
 }
 
 DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues);
-DECLARE_PER_CPU(struct rnd_state, sched_rnd_state);
-
-static inline u32 sched_rng(void)
-{
-	return prandom_u32_state(this_cpu_ptr(&sched_rnd_state));
-}
 
 static __always_inline struct rq *__this_rq(void)
 {
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 5847b83d9d55..0d8fa413f66c 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -1708,6 +1708,7 @@ sd_init(struct sched_domain_topology_level *tl,
 		.newidle_call		= 512,
 		.newidle_success	= 256,
 		.newidle_ratio		= 512,
+		.newidle_window_pos	= 0,
 		.newidle_stamp		= now,
 
 		.max_newidle_lb_cost	= 0,
-- 
2.34.1