[tip: sched/hrtick] hrtimer: Rework next event evaluation

tip-bot2 for Thomas Gleixner posted 1 patch 1 month, 2 weeks ago
kernel/time/hrtimer.c | 120 +++++++++++++++++++++++------------------
1 file changed, 69 insertions(+), 51 deletions(-)
[tip: sched/hrtick] hrtimer: Rework next event evaluation
Posted by tip-bot2 for Thomas Gleixner 1 month, 2 weeks ago
The following commit has been merged into the sched/hrtick branch of tip:

Commit-ID:     2bd1cc24fafc84be844c9ef66aa819d7dec285bf
Gitweb:        https://git.kernel.org/tip/2bd1cc24fafc84be844c9ef66aa819d7dec285bf
Author:        Thomas Gleixner <tglx@kernel.org>
AuthorDate:    Tue, 24 Feb 2026 17:38:33 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Fri, 27 Feb 2026 16:40:15 +01:00

hrtimer: Rework next event evaluation

The per clock base cached expiry time allows to do a more efficient
evaluation of the next expiry on a CPU.

Separate the reprogramming evaluation from the NOHZ idle evaluation which
needs to exclude the NOHZ timer to keep the reprogramming path lean and
clean.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163431.468186893@kernel.org
---
 kernel/time/hrtimer.c | 120 +++++++++++++++++++++++------------------
 1 file changed, 69 insertions(+), 51 deletions(-)

diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index d70899a..aa1cb4f 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -546,49 +546,67 @@ __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active)
 #define for_each_active_base(base, cpu_base, active)		\
 	while ((base = __next_base((cpu_base), &(active))))
 
-static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
-					 const struct hrtimer *exclude,
-					 unsigned int active, ktime_t expires_next)
+#if defined(CONFIG_NO_HZ_COMMON)
+/*
+ * Same as hrtimer_bases_next_event() below, but skips the excluded timer and
+ * does not update cpu_base->next_timer/expires.
+ */
+static ktime_t hrtimer_bases_next_event_without(struct hrtimer_cpu_base *cpu_base,
+						const struct hrtimer *exclude,
+						unsigned int active, ktime_t expires_next)
 {
 	struct hrtimer_clock_base *base;
 	ktime_t expires;
 
+	lockdep_assert_held(&cpu_base->lock);
+
 	for_each_active_base(base, cpu_base, active) {
-		struct timerqueue_node *next;
-		struct hrtimer *timer;
+		expires = ktime_sub(base->expires_next, base->offset);
+		if (expires >= expires_next)
+			continue;
 
-		next = timerqueue_getnext(&base->active);
-		timer = container_of(next, struct hrtimer, node);
-		if (timer == exclude) {
-			/* Get to the next timer in the queue. */
-			next = timerqueue_iterate_next(next);
-			if (!next)
-				continue;
+		/*
+		 * If the excluded timer is the first on this base evaluate the
+		 * next timer.
+		 */
+		struct timerqueue_node *node = timerqueue_getnext(&base->active);
 
-			timer = container_of(next, struct hrtimer, node);
+		if (unlikely(&exclude->node == node)) {
+			node = timerqueue_iterate_next(node);
+			if (!node)
+				continue;
+			expires = ktime_sub(node->expires, base->offset);
+			if (expires >= expires_next)
+				continue;
 		}
-		expires = ktime_sub(hrtimer_get_expires(timer), base->offset);
-		if (expires < expires_next) {
-			expires_next = expires;
+		expires_next = expires;
+	}
+	/* If base->offset changed, the result might be negative */
+	return max(expires_next, 0);
+}
+#endif
 
-			/* Skip cpu_base update if a timer is being excluded. */
-			if (exclude)
-				continue;
+static __always_inline struct hrtimer *clock_base_next_timer(struct hrtimer_clock_base *base)
+{
+	struct timerqueue_node *next = timerqueue_getnext(&base->active);
+
+	return container_of(next, struct hrtimer, node);
+}
 
-			if (timer->is_soft)
-				cpu_base->softirq_next_timer = timer;
-			else
-				cpu_base->next_timer = timer;
+/* Find the base with the earliest expiry */
+static void hrtimer_bases_first(struct hrtimer_cpu_base *cpu_base,unsigned int active,
+				ktime_t *expires_next, struct hrtimer **next_timer)
+{
+	struct hrtimer_clock_base *base;
+	ktime_t expires;
+
+	for_each_active_base(base, cpu_base, active) {
+		expires = ktime_sub(base->expires_next, base->offset);
+		if (expires < *expires_next) {
+			*expires_next = expires;
+			*next_timer = clock_base_next_timer(base);
 		}
 	}
-	/*
-	 * clock_was_set() might have changed base->offset of any of
-	 * the clock bases so the result might be negative. Fix it up
-	 * to prevent a false positive in clockevents_program_event().
-	 */
-	if (expires_next < 0)
-		expires_next = 0;
-	return expires_next;
 }
 
 /*
@@ -617,19 +635,22 @@ static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsig
 	ktime_t expires_next = KTIME_MAX;
 	unsigned int active;
 
+	lockdep_assert_held(&cpu_base->lock);
+
 	if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) {
 		active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
-		cpu_base->softirq_next_timer = NULL;
-		expires_next = __hrtimer_next_event_base(cpu_base, NULL, active, KTIME_MAX);
-		next_timer = cpu_base->softirq_next_timer;
+		if (active)
+			hrtimer_bases_first(cpu_base, active, &expires_next, &next_timer);
+		cpu_base->softirq_next_timer = next_timer;
 	}
 
 	if (active_mask & HRTIMER_ACTIVE_HARD) {
 		active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
+		if (active)
+			hrtimer_bases_first(cpu_base, active, &expires_next, &next_timer);
 		cpu_base->next_timer = next_timer;
-		expires_next = __hrtimer_next_event_base(cpu_base, NULL, active, expires_next);
 	}
-	return expires_next;
+	return max(expires_next, 0);
 }
 
 static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
@@ -724,11 +745,7 @@ static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, struct hrtime
 	hrtimer_rearm_event(expires_next, false);
 }
 
-/*
- * Reprogram the event source with checking both queues for the
- * next event
- * Called with interrupts disabled and base->lock held
- */
+/* Reprogram the event source with a evaluation of all clock bases */
 static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, bool skip_equal)
 {
 	ktime_t expires_next = hrtimer_update_next_event(cpu_base);
@@ -1662,19 +1679,20 @@ u64 hrtimer_next_event_without(const struct hrtimer *exclude)
 {
 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
 	u64 expires = KTIME_MAX;
+	unsigned int active;
 
 	guard(raw_spinlock_irqsave)(&cpu_base->lock);
-	if (hrtimer_hres_active(cpu_base)) {
-		unsigned int active;
+	if (!hrtimer_hres_active(cpu_base))
+		return expires;
 
-		if (!cpu_base->softirq_activated) {
-			active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
-			expires = __hrtimer_next_event_base(cpu_base, exclude, active, KTIME_MAX);
-		}
-		active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
-		expires = __hrtimer_next_event_base(cpu_base, exclude, active, expires);
-	}
-	return expires;
+	active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT;
+	if (active && !cpu_base->softirq_activated)
+		expires = hrtimer_bases_next_event_without(cpu_base, exclude, active, KTIME_MAX);
+
+	active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD;
+	if (!active)
+		return expires;
+	return hrtimer_bases_next_event_without(cpu_base, exclude, active, expires);
 }
 #endif