PM: QoS: Introduce a CPU system-wakeup QoS limit for s2idle

[PATCH v2 3/4] sched: idle: Respect the CPU system-wakeup QoS limit for s2idle

Posted by Ulf Hansson 3 months, 3 weeks ago

A CPU system-wakeup QoS limit may have been requested by user-space. To
avoid breaking this constraint when entering a low-power state during
s2idle, let's start to take into account the QoS limit.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---

Changes in v2:
	- Rework the code to take into account the failure/error path, when we
	don't find a s2idle specific state.

---
 drivers/cpuidle/cpuidle.c | 12 +++++++-----
 include/linux/cpuidle.h   |  6 ++++--
 kernel/sched/idle.c       | 12 +++++++-----
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 56132e843c99..c7876e9e024f 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -184,20 +184,22 @@ static noinstr void enter_s2idle_proper(struct cpuidle_driver *drv,
  * cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle.
  * @drv: cpuidle driver for the given CPU.
  * @dev: cpuidle device for the given CPU.
+ * @latency_limit_ns: Idle state exit latency limit
  *
  * If there are states with the ->enter_s2idle callback, find the deepest of
  * them and enter it with frozen tick.
  */
-int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
+int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
+			 u64 latency_limit_ns)
 {
 	int index;
 
 	/*
-	 * Find the deepest state with ->enter_s2idle present, which guarantees
-	 * that interrupts won't be enabled when it exits and allows the tick to
-	 * be frozen safely.
+	 * Find the deepest state with ->enter_s2idle present that meets the
+	 * specified latency limit, which guarantees that interrupts won't be
+	 * enabled when it exits and allows the tick to be frozen safely.
 	 */
-	index = find_deepest_state(drv, dev, U64_MAX, 0, true);
+	index = find_deepest_state(drv, dev, latency_limit_ns, 0, true);
 	if (index > 0) {
 		enter_s2idle_proper(drv, dev, index);
 		local_irq_enable();
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index a9ee4fe55dcf..4073690504a7 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -248,7 +248,8 @@ extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 				      struct cpuidle_device *dev,
 				      u64 latency_limit_ns);
 extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
-				struct cpuidle_device *dev);
+				struct cpuidle_device *dev,
+				u64 latency_limit_ns);
 extern void cpuidle_use_deepest_state(u64 latency_limit_ns);
 #else
 static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
@@ -256,7 +257,8 @@ static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
 					     u64 latency_limit_ns)
 {return -ENODEV; }
 static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
-				       struct cpuidle_device *dev)
+				       struct cpuidle_device *dev,
+				       u64 latency_limit_ns)
 {return -ENODEV; }
 static inline void cpuidle_use_deepest_state(u64 latency_limit_ns)
 {
diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
index c39b089d4f09..c1c3d0166610 100644
--- a/kernel/sched/idle.c
+++ b/kernel/sched/idle.c
@@ -131,12 +131,13 @@ void __cpuidle default_idle_call(void)
 }
 
 static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
-			       struct cpuidle_device *dev)
+			       struct cpuidle_device *dev,
+			       u64 max_latency_ns)
 {
 	if (current_clr_polling_and_test())
 		return -EBUSY;
 
-	return cpuidle_enter_s2idle(drv, dev);
+	return cpuidle_enter_s2idle(drv, dev, max_latency_ns);
 }
 
 static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
@@ -205,12 +206,13 @@ static void cpuidle_idle_call(void)
 		u64 max_latency_ns;
 
 		if (idle_should_enter_s2idle()) {
+			max_latency_ns = cpu_wakeup_latency_qos_limit() *
+					 NSEC_PER_USEC;
 
-			entered_state = call_cpuidle_s2idle(drv, dev);
+			entered_state = call_cpuidle_s2idle(drv, dev,
+							    max_latency_ns);
 			if (entered_state > 0)
 				goto exit_idle;
-
-			max_latency_ns = U64_MAX;
 		} else {
 			max_latency_ns = dev->forced_idle_latency_limit_ns;
 		}
-- 
2.43.0

Re: [PATCH v2 3/4] sched: idle: Respect the CPU system-wakeup QoS limit for s2idle

Posted by Dhruva Gole 3 months, 1 week ago

On Oct 16, 2025 at 17:19:23 +0200, Ulf Hansson wrote:
> A CPU system-wakeup QoS limit may have been requested by user-space. To
> avoid breaking this constraint when entering a low-power state during
> s2idle, let's start to take into account the QoS limit.
> 
> Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
> ---
> 
> Changes in v2:
> 	- Rework the code to take into account the failure/error path, when we
> 	don't find a s2idle specific state.
> 
> ---
>  drivers/cpuidle/cpuidle.c | 12 +++++++-----
>  include/linux/cpuidle.h   |  6 ++++--
>  kernel/sched/idle.c       | 12 +++++++-----
>  3 files changed, 18 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> index 56132e843c99..c7876e9e024f 100644
> --- a/drivers/cpuidle/cpuidle.c
> +++ b/drivers/cpuidle/cpuidle.c
> @@ -184,20 +184,22 @@ static noinstr void enter_s2idle_proper(struct cpuidle_driver *drv,
>   * cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle.
>   * @drv: cpuidle driver for the given CPU.
>   * @dev: cpuidle device for the given CPU.
> + * @latency_limit_ns: Idle state exit latency limit
>   *
>   * If there are states with the ->enter_s2idle callback, find the deepest of
>   * them and enter it with frozen tick.
>   */
> -int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> +int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> +			 u64 latency_limit_ns)
>  {
>  	int index;
>  
>  	/*
> -	 * Find the deepest state with ->enter_s2idle present, which guarantees
> -	 * that interrupts won't be enabled when it exits and allows the tick to
> -	 * be frozen safely.
> +	 * Find the deepest state with ->enter_s2idle present that meets the
> +	 * specified latency limit, which guarantees that interrupts won't be
> +	 * enabled when it exits and allows the tick to be frozen safely.
>  	 */
> -	index = find_deepest_state(drv, dev, U64_MAX, 0, true);
> +	index = find_deepest_state(drv, dev, latency_limit_ns, 0, true);
>  	if (index > 0) {
>  		enter_s2idle_proper(drv, dev, index);
>  		local_irq_enable();
> diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
> index a9ee4fe55dcf..4073690504a7 100644
> --- a/include/linux/cpuidle.h
> +++ b/include/linux/cpuidle.h
> @@ -248,7 +248,8 @@ extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
>  				      struct cpuidle_device *dev,
>  				      u64 latency_limit_ns);
>  extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
> -				struct cpuidle_device *dev);
> +				struct cpuidle_device *dev,
> +				u64 latency_limit_ns);
>  extern void cpuidle_use_deepest_state(u64 latency_limit_ns);
>  #else
>  static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
> @@ -256,7 +257,8 @@ static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
>  					     u64 latency_limit_ns)
>  {return -ENODEV; }
>  static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
> -				       struct cpuidle_device *dev)
> +				       struct cpuidle_device *dev,
> +				       u64 latency_limit_ns)
>  {return -ENODEV; }
>  static inline void cpuidle_use_deepest_state(u64 latency_limit_ns)
>  {
> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index c39b089d4f09..c1c3d0166610 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -131,12 +131,13 @@ void __cpuidle default_idle_call(void)
>  }
>  
>  static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
> -			       struct cpuidle_device *dev)
> +			       struct cpuidle_device *dev,
> +			       u64 max_latency_ns)
>  {
>  	if (current_clr_polling_and_test())
>  		return -EBUSY;
>  
> -	return cpuidle_enter_s2idle(drv, dev);
> +	return cpuidle_enter_s2idle(drv, dev, max_latency_ns);
>  }
>  
>  static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> @@ -205,12 +206,13 @@ static void cpuidle_idle_call(void)
>  		u64 max_latency_ns;
>  
>  		if (idle_should_enter_s2idle()) {
> +			max_latency_ns = cpu_wakeup_latency_qos_limit() *
> +					 NSEC_PER_USEC;

This is only taking into account the new API for the
cpu_wakeup_latency_qos_limit, however what if someone has set
cpu_latency_qos_limit, doesn't that need to be honoured?
Just trying to understand the differences in both qos here and why one
is chosen over the other.

>  
> -			entered_state = call_cpuidle_s2idle(drv, dev);
> +			entered_state = call_cpuidle_s2idle(drv, dev,
> +							    max_latency_ns);
>  			if (entered_state > 0)
>  				goto exit_idle;
> -
> -			max_latency_ns = U64_MAX;
>  		} else {
>  			max_latency_ns = dev->forced_idle_latency_limit_ns;
>  		}
> -- 
> 2.43.0
> 

-- 
Best regards,
Dhruva Gole
Texas Instruments Incorporated

Re: [PATCH v2 3/4] sched: idle: Respect the CPU system-wakeup QoS limit for s2idle

Posted by Ulf Hansson 3 months, 1 week ago

On Fri, 31 Oct 2025 at 20:23, Dhruva Gole <d-gole@ti.com> wrote:
>
> On Oct 16, 2025 at 17:19:23 +0200, Ulf Hansson wrote:
> > A CPU system-wakeup QoS limit may have been requested by user-space. To
> > avoid breaking this constraint when entering a low-power state during
> > s2idle, let's start to take into account the QoS limit.
> >
> > Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
> > ---
> >
> > Changes in v2:
> >       - Rework the code to take into account the failure/error path, when we
> >       don't find a s2idle specific state.
> >
> > ---
> >  drivers/cpuidle/cpuidle.c | 12 +++++++-----
> >  include/linux/cpuidle.h   |  6 ++++--
> >  kernel/sched/idle.c       | 12 +++++++-----
> >  3 files changed, 18 insertions(+), 12 deletions(-)
> >
> > diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
> > index 56132e843c99..c7876e9e024f 100644
> > --- a/drivers/cpuidle/cpuidle.c
> > +++ b/drivers/cpuidle/cpuidle.c
> > @@ -184,20 +184,22 @@ static noinstr void enter_s2idle_proper(struct cpuidle_driver *drv,
> >   * cpuidle_enter_s2idle - Enter an idle state suitable for suspend-to-idle.
> >   * @drv: cpuidle driver for the given CPU.
> >   * @dev: cpuidle device for the given CPU.
> > + * @latency_limit_ns: Idle state exit latency limit
> >   *
> >   * If there are states with the ->enter_s2idle callback, find the deepest of
> >   * them and enter it with frozen tick.
> >   */
> > -int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev)
> > +int cpuidle_enter_s2idle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> > +                      u64 latency_limit_ns)
> >  {
> >       int index;
> >
> >       /*
> > -      * Find the deepest state with ->enter_s2idle present, which guarantees
> > -      * that interrupts won't be enabled when it exits and allows the tick to
> > -      * be frozen safely.
> > +      * Find the deepest state with ->enter_s2idle present that meets the
> > +      * specified latency limit, which guarantees that interrupts won't be
> > +      * enabled when it exits and allows the tick to be frozen safely.
> >        */
> > -     index = find_deepest_state(drv, dev, U64_MAX, 0, true);
> > +     index = find_deepest_state(drv, dev, latency_limit_ns, 0, true);
> >       if (index > 0) {
> >               enter_s2idle_proper(drv, dev, index);
> >               local_irq_enable();
> > diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
> > index a9ee4fe55dcf..4073690504a7 100644
> > --- a/include/linux/cpuidle.h
> > +++ b/include/linux/cpuidle.h
> > @@ -248,7 +248,8 @@ extern int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
> >                                     struct cpuidle_device *dev,
> >                                     u64 latency_limit_ns);
> >  extern int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
> > -                             struct cpuidle_device *dev);
> > +                             struct cpuidle_device *dev,
> > +                             u64 latency_limit_ns);
> >  extern void cpuidle_use_deepest_state(u64 latency_limit_ns);
> >  #else
> >  static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
> > @@ -256,7 +257,8 @@ static inline int cpuidle_find_deepest_state(struct cpuidle_driver *drv,
> >                                            u64 latency_limit_ns)
> >  {return -ENODEV; }
> >  static inline int cpuidle_enter_s2idle(struct cpuidle_driver *drv,
> > -                                    struct cpuidle_device *dev)
> > +                                    struct cpuidle_device *dev,
> > +                                    u64 latency_limit_ns)
> >  {return -ENODEV; }
> >  static inline void cpuidle_use_deepest_state(u64 latency_limit_ns)
> >  {
> > diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> > index c39b089d4f09..c1c3d0166610 100644
> > --- a/kernel/sched/idle.c
> > +++ b/kernel/sched/idle.c
> > @@ -131,12 +131,13 @@ void __cpuidle default_idle_call(void)
> >  }
> >
> >  static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
> > -                            struct cpuidle_device *dev)
> > +                            struct cpuidle_device *dev,
> > +                            u64 max_latency_ns)
> >  {
> >       if (current_clr_polling_and_test())
> >               return -EBUSY;
> >
> > -     return cpuidle_enter_s2idle(drv, dev);
> > +     return cpuidle_enter_s2idle(drv, dev, max_latency_ns);
> >  }
> >
> >  static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> > @@ -205,12 +206,13 @@ static void cpuidle_idle_call(void)
> >               u64 max_latency_ns;
> >
> >               if (idle_should_enter_s2idle()) {
> > +                     max_latency_ns = cpu_wakeup_latency_qos_limit() *
> > +                                      NSEC_PER_USEC;
>
> This is only taking into account the new API for the
> cpu_wakeup_latency_qos_limit, however what if someone has set
> cpu_latency_qos_limit, doesn't that need to be honoured?
> Just trying to understand the differences in both qos here and why one
> is chosen over the other.

cpu_latency_qos_limit is for runtime only, during regular cpuidle idle
state selection.

The new cpu_wakeup_latency_qos_limit is taken into account above for
s2idle, specifically.

That said, Rafael suggests that the new cpu_wakeup_latency_qos_limit
should be respected for runtime cpuidle state selection too, so I am
working on updating the series to take that into account.

>
> >
> > -                     entered_state = call_cpuidle_s2idle(drv, dev);
> > +                     entered_state = call_cpuidle_s2idle(drv, dev,
> > +                                                         max_latency_ns);
> >                       if (entered_state > 0)
> >                               goto exit_idle;
> > -
> > -                     max_latency_ns = U64_MAX;
> >               } else {
> >                       max_latency_ns = dev->forced_idle_latency_limit_ns;
> >               }
> > --
> > 2.43.0
> >
>

Kind regards
Uffe

Re: [PATCH v2 3/4] sched: idle: Respect the CPU system-wakeup QoS limit for s2idle

Posted by Peter Zijlstra 3 months, 3 weeks ago

On Thu, Oct 16, 2025 at 05:19:23PM +0200, Ulf Hansson wrote:

No objections to this.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>

> diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c
> index c39b089d4f09..c1c3d0166610 100644
> --- a/kernel/sched/idle.c
> +++ b/kernel/sched/idle.c
> @@ -131,12 +131,13 @@ void __cpuidle default_idle_call(void)
>  }
>  
>  static int call_cpuidle_s2idle(struct cpuidle_driver *drv,
> -			       struct cpuidle_device *dev)
> +			       struct cpuidle_device *dev,
> +			       u64 max_latency_ns)
>  {
>  	if (current_clr_polling_and_test())
>  		return -EBUSY;
>  
> -	return cpuidle_enter_s2idle(drv, dev);
> +	return cpuidle_enter_s2idle(drv, dev, max_latency_ns);
>  }
>  
>  static int call_cpuidle(struct cpuidle_driver *drv, struct cpuidle_device *dev,
> @@ -205,12 +206,13 @@ static void cpuidle_idle_call(void)
>  		u64 max_latency_ns;
>  
>  		if (idle_should_enter_s2idle()) {
> +			max_latency_ns = cpu_wakeup_latency_qos_limit() *
> +					 NSEC_PER_USEC;
>  
> -			entered_state = call_cpuidle_s2idle(drv, dev);
> +			entered_state = call_cpuidle_s2idle(drv, dev,
> +							    max_latency_ns);
>  			if (entered_state > 0)
>  				goto exit_idle;
> -
> -			max_latency_ns = U64_MAX;
>  		} else {
>  			max_latency_ns = dev->forced_idle_latency_limit_ns;
>  		}
> -- 
> 2.43.0
>

[PATCH v2 1/4] PM: QoS: Introduce a CPU system-wakeup QoS limit
[PATCH v2 2/4] pmdomain: Respect the CPU system-wakeup QoS limit during s2idle
[PATCH v2 3/4] sched: idle: Respect the CPU system-wakeup QoS limit for s2idle
[PATCH v2 4/4] Documentation: power/cpuidle: Document the CPU system-wakeup latency QoS