[tip: sched/hrtick] hrtimer: Avoid re-evaluation when nothing changed

tip-bot2 for Thomas Gleixner posted 1 patch 1 month, 2 weeks ago
include/linux/hrtimer_defs.h | 53 +++++++++++++++++------------------
kernel/time/hrtimer.c        | 45 ++++++++++++++++++++----------
2 files changed, 58 insertions(+), 40 deletions(-)
[tip: sched/hrtick] hrtimer: Avoid re-evaluation when nothing changed
Posted by tip-bot2 for Thomas Gleixner 1 month, 2 weeks ago
The following commit has been merged into the sched/hrtick branch of tip:

Commit-ID:     b95c4442b02162904e9012e670b602ebeb3c6c1b
Gitweb:        https://git.kernel.org/tip/b95c4442b02162904e9012e670b602ebeb3c6c1b
Author:        Thomas Gleixner <tglx@kernel.org>
AuthorDate:    Tue, 24 Feb 2026 17:38:23 +01:00
Committer:     Peter Zijlstra <peterz@infradead.org>
CommitterDate: Fri, 27 Feb 2026 16:40:14 +01:00

hrtimer: Avoid re-evaluation when nothing changed

Most times there is no change between hrtimer_interrupt() deferring the rearm
and the invocation of hrtimer_rearm_deferred(). In those cases it's a pointless
exercise to re-evaluate the next expiring timer.

Cache the required data and use it if nothing changed.

Signed-off-by: Thomas Gleixner <tglx@kernel.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://patch.msgid.link/20260224163431.338569372@kernel.org
---
 include/linux/hrtimer_defs.h | 53 +++++++++++++++++------------------
 kernel/time/hrtimer.c        | 45 ++++++++++++++++++++----------
 2 files changed, 58 insertions(+), 40 deletions(-)

diff --git a/include/linux/hrtimer_defs.h b/include/linux/hrtimer_defs.h
index 2c3bdbd..b6846ef 100644
--- a/include/linux/hrtimer_defs.h
+++ b/include/linux/hrtimer_defs.h
@@ -47,32 +47,31 @@ enum  hrtimer_base_type {
 
 /**
  * struct hrtimer_cpu_base - the per cpu clock bases
- * @lock:		lock protecting the base and associated clock bases
- *			and timers
- * @cpu:		cpu number
- * @active_bases:	Bitfield to mark bases with active timers
- * @clock_was_set_seq:	Sequence counter of clock was set events
- * @hres_active:	State of high resolution mode
- * @deferred_rearm:	A deferred rearm is pending
- * @hang_detected:	The last hrtimer interrupt detected a hang
- * @softirq_activated:	displays, if the softirq is raised - update of softirq
- *			related settings is not required then.
- * @nr_events:		Total number of hrtimer interrupt events
- * @nr_retries:		Total number of hrtimer interrupt retries
- * @nr_hangs:		Total number of hrtimer interrupt hangs
- * @max_hang_time:	Maximum time spent in hrtimer_interrupt
- * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are
- *			 expired
- * @online:		CPU is online from an hrtimers point of view
- * @timer_waiters:	A hrtimer_cancel() invocation waits for the timer
- *			callback to finish.
- * @expires_next:	absolute time of the next event, is required for remote
- *			hrtimer enqueue; it is the total first expiry time (hard
- *			and soft hrtimer are taken into account)
- * @next_timer:		Pointer to the first expiring timer
- * @softirq_expires_next: Time to check, if soft queues needs also to be expired
- * @softirq_next_timer: Pointer to the first expiring softirq based timer
- * @clock_base:		array of clock bases for this cpu
+ * @lock:			lock protecting the base and associated clock bases and timers
+ * @cpu:			cpu number
+ * @active_bases:		Bitfield to mark bases with active timers
+ * @clock_was_set_seq:		Sequence counter of clock was set events
+ * @hres_active:		State of high resolution mode
+ * @deferred_rearm:		A deferred rearm is pending
+ * @deferred_needs_update:	The deferred rearm must re-evaluate the first timer
+ * @hang_detected:		The last hrtimer interrupt detected a hang
+ * @softirq_activated:		displays, if the softirq is raised - update of softirq
+ *				related settings is not required then.
+ * @nr_events:			Total number of hrtimer interrupt events
+ * @nr_retries:			Total number of hrtimer interrupt retries
+ * @nr_hangs:			Total number of hrtimer interrupt hangs
+ * @max_hang_time:		Maximum time spent in hrtimer_interrupt
+ * @softirq_expiry_lock:	Lock which is taken while softirq based hrtimer are expired
+ * @online:			CPU is online from an hrtimers point of view
+ * @timer_waiters:		A hrtimer_cancel() waiters for the timer callback to finish.
+ * @expires_next:		Absolute time of the next event, is required for remote
+ *				hrtimer enqueue; it is the total first expiry time (hard
+ *				and soft hrtimer are taken into account)
+ * @next_timer:			Pointer to the first expiring timer
+ * @softirq_expires_next:	Time to check, if soft queues needs also to be expired
+ * @softirq_next_timer:		Pointer to the first expiring softirq based timer
+ * @deferred_expires_next:	Cached expires next value for deferred rearm
+ * @clock_base:			Array of clock bases for this cpu
  *
  * Note: next_timer is just an optimization for __remove_hrtimer().
  *	 Do not dereference the pointer because it is not reliable on
@@ -85,6 +84,7 @@ struct hrtimer_cpu_base {
 	unsigned int			clock_was_set_seq;
 	bool				hres_active;
 	bool				deferred_rearm;
+	bool				deferred_needs_update;
 	bool				hang_detected;
 	bool				softirq_activated;
 	bool				online;
@@ -102,6 +102,7 @@ struct hrtimer_cpu_base {
 	struct hrtimer			*next_timer;
 	ktime_t				softirq_expires_next;
 	struct hrtimer			*softirq_next_timer;
+	ktime_t				deferred_expires_next;
 	struct hrtimer_clock_base	clock_base[HRTIMER_MAX_CLOCK_BASES];
 	call_single_data_t		csd;
 } ____cacheline_aligned;
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 2e5f0e2..e9592cb 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -919,8 +919,10 @@ static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, unsigned int act
 		return false;
 
 	/* If a deferred rearm is pending the remote CPU will take care of it */
-	if (cpu_base->deferred_rearm)
+	if (cpu_base->deferred_rearm) {
+		cpu_base->deferred_needs_update = true;
 		return false;
+	}
 
 	/*
 	 * Walk the affected clock bases and check whether the first expiring
@@ -1141,7 +1143,12 @@ static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *b
 	 * a local timer is removed to be immediately restarted. That's handled
 	 * at the call site.
 	 */
-	if (reprogram && timer == cpu_base->next_timer && !timer->is_lazy)
+	if (!reprogram || timer != cpu_base->next_timer || timer->is_lazy)
+		return;
+
+	if (cpu_base->deferred_rearm)
+		cpu_base->deferred_needs_update = true;
+	else
 		hrtimer_force_reprogram(cpu_base, /* skip_equal */ true);
 }
 
@@ -1328,8 +1335,10 @@ static bool __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 del
 	}
 
 	/* If a deferred rearm is pending skip reprogramming the device */
-	if (cpu_base->deferred_rearm)
+	if (cpu_base->deferred_rearm) {
+		cpu_base->deferred_needs_update = true;
 		return false;
+	}
 
 	if (!was_first || cpu_base != this_cpu_base) {
 		/*
@@ -1939,8 +1948,7 @@ static __latent_entropy void hrtimer_run_softirq(void)
  * Very similar to hrtimer_force_reprogram(), except it deals with
  * deferred_rearm and hang_detected.
  */
-static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now,
-			  ktime_t expires_next, bool deferred)
+static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next, bool deferred)
 {
 	cpu_base->expires_next = expires_next;
 	cpu_base->deferred_rearm = false;
@@ -1950,7 +1958,7 @@ static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 		 * Give the system a chance to do something else than looping
 		 * on hrtimer interrupts.
 		 */
-		expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC);
+		expires_next = ktime_add_ns(ktime_get(), 100 * NSEC_PER_MSEC);
 		cpu_base->hang_detected = false;
 	}
 	hrtimer_rearm_event(expires_next, deferred);
@@ -1960,27 +1968,36 @@ static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now,
 void __hrtimer_rearm_deferred(void)
 {
 	struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases);
-	ktime_t now, expires_next;
+	ktime_t expires_next;
 
 	if (!cpu_base->deferred_rearm)
 		return;
 
 	guard(raw_spinlock)(&cpu_base->lock);
-	now = hrtimer_update_base(cpu_base);
-	expires_next = hrtimer_update_next_event(cpu_base);
-	hrtimer_rearm(cpu_base, now, expires_next, true);
+	if (cpu_base->deferred_needs_update) {
+		hrtimer_update_base(cpu_base);
+		expires_next = hrtimer_update_next_event(cpu_base);
+	} else {
+		/* No timer added/removed. Use the cached value */
+		expires_next = cpu_base->deferred_expires_next;
+	}
+	hrtimer_rearm(cpu_base, expires_next, true);
 }
 
 static __always_inline void
-hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now, ktime_t expires_next)
+hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next)
 {
+	/* hrtimer_interrupt() just re-evaluated the first expiring timer */
+	cpu_base->deferred_needs_update = false;
+	/* Cache the expiry time */
+	cpu_base->deferred_expires_next = expires_next;
 	set_thread_flag(TIF_HRTIMER_REARM);
 }
 #else  /* CONFIG_HRTIMER_REARM_DEFERRED */
 static __always_inline void
-hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t now, ktime_t expires_next)
+hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next)
 {
-	hrtimer_rearm(cpu_base, now, expires_next, false);
+	hrtimer_rearm(cpu_base, expires_next, false);
 }
 #endif  /* !CONFIG_HRTIMER_REARM_DEFERRED */
 
@@ -2041,7 +2058,7 @@ retry:
 		cpu_base->hang_detected = true;
 	}
 
-	hrtimer_interrupt_rearm(cpu_base, now, expires_next);
+	hrtimer_interrupt_rearm(cpu_base, expires_next);
 	raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
 }