[PATCH v2 2/2] alarmtimer: Modify alarmtimer suspend callback to check for imminent alarm using PM notifier

Pranav Prasad posted 2 patches 2 years ago
There is a newer version of this series
[PATCH v2 2/2] alarmtimer: Modify alarmtimer suspend callback to check for imminent alarm using PM notifier
Posted by Pranav Prasad 2 years ago
The alarmtimer driver currently fails suspend attempts when there is an
alarm pending within the next suspend_check_duration_ns nanoseconds, since
the system is expected to wake up soon anyway. The entire suspend process
is initiated even though the system will immediately awaken. This process
includes substantial work before the suspend fails and additional work
afterwards to undo the failed suspend that was attempted. Therefore on
battery-powered devices that initiate suspend attempts from userspace, it
may be advantageous to be able to fail the suspend earlier in the suspend
flow to avoid power consumption instead of unnecessarily doing extra work.
As one data point, an analysis of a subset of Android devices showed that
imminent alarms account for roughly 40% of all suspend failures on average
leading to unnecessary power wastage.

To facilitate this, register a PM notifier in the alarmtimer subsystem
that checks if an alarm is imminent during the prepare stage of kernel
suspend denoted by the event PM_SUSPEND_PREPARE. If an alarm is imminent,
it returns the errno code ETIME instead of EBUSY to userspace in order to
make it easily diagnosable.

Signed-off-by: Pranav Prasad <pranavpp@google.com>
---
 kernel/time/alarmtimer.c | 126 +++++++++++++++++++++++++++------------
 1 file changed, 87 insertions(+), 39 deletions(-)

diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index e4b88c8dc0e1..809cfd97328d 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -27,6 +27,7 @@
 #include <linux/compat.h>
 #include <linux/module.h>
 #include <linux/time_namespace.h>
+#include <linux/suspend.h>
 
 #include "posix-timers.h"
 
@@ -115,6 +116,87 @@ static int alarmtimer_sysfs_add(void)
 	return ret;
 }
 
+/**
+ * alarmtimer_get_soonest - Finds the soonest alarm to expire among the
+ * alarm bases.
+ * @rtc: ptr to rtc_device struct
+ * @min: ptr to relative time to the soonest alarm to expire
+ * @expires: ptr to absolute time of the soonest alarm to expire
+ * @type: ptr to alarm type
+ *
+ * Returns 1 if soonest alarm was found, returns 0 if don't care.
+ */
+static int alarmtimer_get_soonest(struct rtc_device *rtc, ktime_t *min,
+					ktime_t *expires, int *type)
+{
+	int i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&freezer_delta_lock, flags);
+	*min = freezer_delta;
+	*expires = freezer_expires;
+	*type = freezer_alarmtype;
+	freezer_delta = 0;
+	spin_unlock_irqrestore(&freezer_delta_lock, flags);
+
+	rtc = alarmtimer_get_rtcdev();
+	/* If we have no rtcdev, just return */
+	if (!rtc)
+		return 0;
+
+	/* Find the soonest timer to expire */
+	for (i = 0; i < ALARM_NUMTYPE; i++) {
+		struct alarm_base *base = &alarm_bases[i];
+		struct timerqueue_node *next;
+		ktime_t delta;
+
+		spin_lock_irqsave(&base->lock, flags);
+		next = timerqueue_getnext(&base->timerqueue);
+		spin_unlock_irqrestore(&base->lock, flags);
+		if (!next)
+			continue;
+		delta = ktime_sub(next->expires, base->get_ktime());
+		if (!*min || delta < *min) {
+			*expires = next->expires;
+			*min = delta;
+			*type = i;
+		}
+	}
+
+	if (*min == 0)
+		return 0;
+
+	return 1;
+}
+
+static int alarmtimer_pm_callback(struct notifier_block *nb,
+			    unsigned long mode, void *_unused)
+{
+	ktime_t min, expires;
+	struct rtc_device *rtc = NULL;
+	int type;
+
+	switch (mode) {
+	case PM_SUSPEND_PREPARE:
+		/* Find the soonest timer to expire */
+		if (!alarmtimer_get_soonest(rtc, &min, &expires, &type))
+			return NOTIFY_DONE;
+
+		if (ktime_to_ns(min) <
+			suspend_check_duration_ms * NSEC_PER_MSEC) {
+			pr_warn("[%s] Suspend abort due to imminent alarm\n", __func__);
+			pm_wakeup_event(&rtc->dev, suspend_check_duration_ms);
+			return notifier_from_errno(-ETIME);
+		}
+	}
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block alarmtimer_pm_notifier = {
+	.notifier_call = alarmtimer_pm_callback,
+};
+
 /**
  * alarmtimer_get_rtcdev - Return selected rtcdevice
  *
@@ -181,6 +263,7 @@ static int alarmtimer_rtc_add_device(struct device *dev)
 static inline void alarmtimer_rtc_timer_init(void)
 {
 	rtc_timer_init(&rtctimer, NULL, NULL);
+	register_pm_notifier(&alarmtimer_pm_notifier);
 }
 
 static struct class_interface alarmtimer_rtc_interface = {
@@ -296,49 +379,14 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
 static int alarmtimer_suspend(struct device *dev)
 {
 	ktime_t min, now, expires;
-	int i, ret, type;
-	struct rtc_device *rtc;
-	unsigned long flags;
+	struct rtc_device *rtc = NULL;
 	struct rtc_time tm;
+	int ret, type;
 
-	spin_lock_irqsave(&freezer_delta_lock, flags);
-	min = freezer_delta;
-	expires = freezer_expires;
-	type = freezer_alarmtype;
-	freezer_delta = 0;
-	spin_unlock_irqrestore(&freezer_delta_lock, flags);
-
-	rtc = alarmtimer_get_rtcdev();
-	/* If we have no rtcdev, just return */
-	if (!rtc)
-		return 0;
-
-	/* Find the soonest timer to expire*/
-	for (i = 0; i < ALARM_NUMTYPE; i++) {
-		struct alarm_base *base = &alarm_bases[i];
-		struct timerqueue_node *next;
-		ktime_t delta;
-
-		spin_lock_irqsave(&base->lock, flags);
-		next = timerqueue_getnext(&base->timerqueue);
-		spin_unlock_irqrestore(&base->lock, flags);
-		if (!next)
-			continue;
-		delta = ktime_sub(next->expires, base->get_ktime());
-		if (!min || (delta < min)) {
-			expires = next->expires;
-			min = delta;
-			type = i;
-		}
-	}
-	if (min == 0)
+	/* Find the soonest timer to expire */
+	if (!alarmtimer_get_soonest(rtc, &min, &expires, &type))
 		return 0;
 
-	if (ktime_to_ns(min) < suspend_check_duration_ms * NSEC_PER_MSEC) {
-		pm_wakeup_event(dev, suspend_check_duration_ms);
-		return -EBUSY;
-	}
-
 	trace_alarmtimer_suspend(expires, type);
 
 	/* Setup an rtc timer to fire that far in the future */
-- 
2.43.0.687.g38aa6559b0-goog
Re: [PATCH v2 2/2] alarmtimer: Modify alarmtimer suspend callback to check for imminent alarm using PM notifier
Posted by Thomas Gleixner 1 year, 12 months ago
On Thu, Feb 08 2024 at 19:56, Pranav Prasad wrote:

The subject line wants some trimming. It's supposed to be a concise
short summary and not a novel.

Aside of that it's blantantly wrong. It does not modify the suspend
callback to check with a PM notifier. It adds a PM notifier to check
early before the suspend callback runs.

> +/**
> + * alarmtimer_get_soonest - Finds the soonest alarm to expire among the
> + * alarm bases.
> + * @rtc: ptr to rtc_device struct
> + * @min: ptr to relative time to the soonest alarm to expire
> + * @expires: ptr to absolute time of the soonest alarm to expire
> + * @type: ptr to alarm type
> + *
> + * Returns 1 if soonest alarm was found, returns 0 if don't care.
> + */
> +static int alarmtimer_get_soonest(struct rtc_device *rtc, ktime_t *min,
> +					ktime_t *expires, int *type)

Please align the second argument with the first argument of the
function.

Also the return value wants to be bool.

> +{
> +	int i;
> +	unsigned long flags;

Please see:

https://www.kernel.org/doc/html/latest/process/maintainer-tip.html#variable-declarations

> +	spin_lock_irqsave(&freezer_delta_lock, flags);
> +	*min = freezer_delta;
> +	*expires = freezer_expires;
> +	*type = freezer_alarmtype;
> +	freezer_delta = 0;
> +	spin_unlock_irqrestore(&freezer_delta_lock, flags);

This only makes sense for the actual suspend operation because freezing
of processes happens after the notifier callback runs.

> +	rtc = alarmtimer_get_rtcdev();
> +	/* If we have no rtcdev, just return */
> +	if (!rtc)
> +		return 0;
> +
> +	/* Find the soonest timer to expire */
> +	for (i = 0; i < ALARM_NUMTYPE; i++) {
> +		struct alarm_base *base = &alarm_bases[i];
> +		struct timerqueue_node *next;
> +		ktime_t delta;
> +
> +		spin_lock_irqsave(&base->lock, flags);
> +		next = timerqueue_getnext(&base->timerqueue);
> +		spin_unlock_irqrestore(&base->lock, flags);
> +		if (!next)
> +			continue;
> +		delta = ktime_sub(next->expires, base->get_ktime());
> +		if (!*min || delta < *min) {

You can spare the !*min if you initialize min to KTIME_MAX.

> +			*expires = next->expires;
> +			*min = delta;
> +			*type = i;
> +		}
> +	}
> +
> +	if (*min == 0)

!*min above and *min == 0 here. Can we have consistency please?

> +		return 0;
> +
> +	return 1;
> +}
> +
> +static int alarmtimer_pm_callback(struct notifier_block *nb,
> +			    unsigned long mode, void *_unused)
> +{
> +	ktime_t min, expires;
> +	struct rtc_device *rtc = NULL;
> +	int type;

Same as above vs. ordering.

> +	switch (mode) {
> +	case PM_SUSPEND_PREPARE:
> +		/* Find the soonest timer to expire */
> +		if (!alarmtimer_get_soonest(rtc, &min, &expires, &type))
> +			return NOTIFY_DONE;
> +
> +		if (ktime_to_ns(min) <
> +			suspend_check_duration_ms * NSEC_PER_MSEC) {

No need for the line break. 80 character limit is gone.

> +			pr_warn("[%s] Suspend abort due to imminent alarm\n", __func__);

Why is this a warning? If at all it wants to be pr_debug() and the
__func_ is pretty useless as grep is able to find the string, no?

> +			pm_wakeup_event(&rtc->dev, suspend_check_duration_ms);

How is this supposed to work? rtc is NULL.

> +			return notifier_from_errno(-ETIME);
> +		}
> +	}
> +
> +	return NOTIFY_DONE;
> +}
> +
> +static struct notifier_block alarmtimer_pm_notifier = {
> +	.notifier_call = alarmtimer_pm_callback,
> +};
> +
>  /**
>   * alarmtimer_get_rtcdev - Return selected rtcdevice
>   *
> @@ -181,6 +263,7 @@ static int alarmtimer_rtc_add_device(struct device *dev)
>  static inline void alarmtimer_rtc_timer_init(void)
>  {
>  	rtc_timer_init(&rtctimer, NULL, NULL);
> +	register_pm_notifier(&alarmtimer_pm_notifier);
>  }
>  
>  static struct class_interface alarmtimer_rtc_interface = {
> @@ -296,49 +379,14 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
>  static int alarmtimer_suspend(struct device *dev)
>  {
>  	ktime_t min, now, expires;
> -	int i, ret, type;
> -	struct rtc_device *rtc;
> -	unsigned long flags;
> +	struct rtc_device *rtc = NULL;
>  	struct rtc_time tm;
> +	int ret, type;

See above.

SNIP

>  	/* Setup an rtc timer to fire that far in the future */

And another NULL pointer dereference follows suit.

How was this ever tested?

You need:

	rtc = alarmtimer_get_rtcdev();
	if (!rtc)
		return [0|NOTIFY_DONE];

in both functions and then hand in rtc to alarmtimer_get_soonest(), no?

Thanks,

        tglx
Re: [PATCH v2 2/2] alarmtimer: Modify alarmtimer suspend callback to check for imminent alarm using PM notifier
Posted by Pranav Prasad 1 year, 12 months ago
> Why is this a warning? If at all it wants to be pr_debug() and the
> __func_ is pretty useless as grep is able to find the string, no?

Agreed with the pr_debug(), doesn't need to be a warning. I had
made it a warning so that it is logged by default (KERN_WARN),
so that it can grepped for without enabling dynamic debug for this
module.

> How is this supposed to work? rtc is NULL.

alarmtimer_get_soonest() has the following:
	rtc = alarmtimer_get_rtcdev();
        if (!rtc)
		return 0;

rtc is set in alarmtimer_get_soonest(). If rtc is NULL, the notifier
returns NOTIFY_DONE before even reaching pm_wakeup_event(), hence there is
no NULL pointer dereference expected.

I agree that I should move it out of alarmtimer_get_soonest() and assign
it before the function call as it is unrelated to getting the soonest
alarm.

> How was this ever tested?

I tested it by forcing kernel suspend writing 'mem' to /sys/power/state and
using a large window (120s instead of the current 2s) so that a pending
alarm is likely. The debug print is logged as expected without any kernel
crash, and the suspend gets aborted.

Thanks for the other comments, Thomas and John!
I shall send out a v3 with all the fixes.

Regards,
Pranav
Re: [PATCH v2 2/2] alarmtimer: Modify alarmtimer suspend callback to check for imminent alarm using PM notifier
Posted by Thomas Gleixner 1 year, 12 months ago
Pranav!

On Tue, Feb 13 2024 at 20:30, Pranav Prasad wrote:
>> How is this supposed to work? rtc is NULL.
>
> alarmtimer_get_soonest() has the following:
> 	rtc = alarmtimer_get_rtcdev();
>         if (!rtc)
> 		return 0;
>
> rtc is set in alarmtimer_get_soonest(). If rtc is NULL, the notifier
> returns NOTIFY_DONE before even reaching pm_wakeup_event(), hence there is
> no NULL pointer dereference expected.

      struct rtc_device *rtc = NULL;

      if (!alarmtimer_get_soonest(rtc, ....)
      	  return 0 ;

      pm_wakeup_event(rtc->dev);

static bool alarmtimer_get_soonest(struct rtc_device *rtc, ....)
{
 	rtc = alarmtimer_get_rtcdev();
        if (!rtc)
 		return false;
        ...
        return true;
}

How is the assignment in alarmtimer_get_soonest() causing 'rtc' at the
callsite to become magically non NULL unless you have this shiny new AI
enhanced compiler with the long awaited 'do what I mean' optimization
enabled by default.

My old school brain based compiler is absolutely sure that both places
which I pointed out are straight forward unconditional NULL pointer
dereferences. See below.

>> How was this ever tested?
>
> I tested it by forcing kernel suspend writing 'mem' to /sys/power/state and
> using a large window (120s instead of the current 2s) so that a pending
> alarm is likely. The debug print is logged as expected without any kernel
> crash, and the suspend gets aborted.

I have no idea what you tested, but definitely not the complete
submitted code.

The only reason why this did not explode in your face in
pm_wakeup_event() is that this function has a NULL pointer check and
struct rtc_device has @dev as first member, which means that

       rtc == &rtc->dev

resulting in the NULL pointer check to be effective because &rtc->dev
evaluates to NULL.

But that does not make the code more correct. It's still am
unconditional NULL pointer dereference by definition, no?

Just flip the ordering of @dev in @owner in struct rtc_device and watch
the show.

alarmtimer_suspend() did not explode in your face because either the
notifier aborted suspend, which means the function was never reached, or
there was no timer armed. Why?

Because rtc_cancel_timer() has no NULL pointer check and unconditionally
dereferences @rtc.

Just for the record. I missed to spot this gem in your patch:

> +	/* Find the soonest timer to expire */
> +	if (!alarmtimer_get_soonest(rtc, &min, &expires, &type))
>  		return 0;
>  
> -	if (ktime_to_ns(min) < suspend_check_duration_ms * NSEC_PER_MSEC) {
> -		pm_wakeup_event(dev, suspend_check_duration_ms);
> -		return -EBUSY;
> -	}
> -

So you delete the threshold check. What makes sure that a timer which
got armed _after_ the notifier ran or one of the nanosleep timers which
are only accounted for after freezing are checked for expiring before
the threshold?

Nothing, right?

But sure, the patch did what you wanted to demonstrate and that's why it
is correct and perfect, right?

I conceed that your patch works perfectly correct under the following
condition:

    Either alarmtimer_pm_callback() aborts the suspend or
    alarmtimer_get_soonest() does not find an armed timer when called in
    alarmtimer_suspend()

Unfortunately that's not reflecting reality in production systems unless
I'm missing something important here.

Thanks,

        tglx
Re: [PATCH v2 2/2] alarmtimer: Modify alarmtimer suspend callback to check for imminent alarm using PM notifier
Posted by Pranav Prasad 1 year, 12 months ago
Thanks Thomas for the detailed review.
I understand that I got a false positive, and I shall make the suggested
changes for v3.

Regards,
Pranav
Re: [PATCH v2 2/2] alarmtimer: Modify alarmtimer suspend callback to check for imminent alarm using PM notifier
Posted by John Stultz 1 year, 12 months ago
On Thu, Feb 8, 2024 at 11:56 AM Pranav Prasad <pranavpp@google.com> wrote:
>
> The alarmtimer driver currently fails suspend attempts when there is an
> alarm pending within the next suspend_check_duration_ns nanoseconds, since
> the system is expected to wake up soon anyway. The entire suspend process
> is initiated even though the system will immediately awaken. This process
> includes substantial work before the suspend fails and additional work
> afterwards to undo the failed suspend that was attempted. Therefore on
> battery-powered devices that initiate suspend attempts from userspace, it
> may be advantageous to be able to fail the suspend earlier in the suspend
> flow to avoid power consumption instead of unnecessarily doing extra work.
> As one data point, an analysis of a subset of Android devices showed that
> imminent alarms account for roughly 40% of all suspend failures on average
> leading to unnecessary power wastage.
>
> To facilitate this, register a PM notifier in the alarmtimer subsystem
> that checks if an alarm is imminent during the prepare stage of kernel
> suspend denoted by the event PM_SUSPEND_PREPARE. If an alarm is imminent,
> it returns the errno code ETIME instead of EBUSY to userspace in order to
> make it easily diagnosable.

Thanks for continuing to iterate on this!

One concern below...

> +static int alarmtimer_pm_callback(struct notifier_block *nb,
> +                           unsigned long mode, void *_unused)
> +{
> +       ktime_t min, expires;
> +       struct rtc_device *rtc = NULL;
> +       int type;
> +
> +       switch (mode) {
> +       case PM_SUSPEND_PREPARE:
> +               /* Find the soonest timer to expire */
> +               if (!alarmtimer_get_soonest(rtc, &min, &expires, &type))
> +                       return NOTIFY_DONE;
> +
> +               if (ktime_to_ns(min) <
> +                       suspend_check_duration_ms * NSEC_PER_MSEC) {
> +                       pr_warn("[%s] Suspend abort due to imminent alarm\n", __func__);
> +                       pm_wakeup_event(&rtc->dev, suspend_check_duration_ms);
> +                       return notifier_from_errno(-ETIME);
> +               }
> +       }
> +
> +       return NOTIFY_DONE;
> +}
> +

So the alarmtimer_pm_callback provides an earlier warning that we have
an imminent alarm, looks ok to me.


> @@ -296,49 +379,14 @@ EXPORT_SYMBOL_GPL(alarm_expires_remaining);
>  static int alarmtimer_suspend(struct device *dev)
>  {
...
> +       /* Find the soonest timer to expire */
> +       if (!alarmtimer_get_soonest(rtc, &min, &expires, &type))
>                 return 0;
>
> -       if (ktime_to_ns(min) < suspend_check_duration_ms * NSEC_PER_MSEC) {
> -               pm_wakeup_event(dev, suspend_check_duration_ms);
> -               return -EBUSY;
> -       }

It seems like we'd want to preserve the check in alarmtimer_suspend()
as well, no? As the various suspend calls might take awhile and in
that time, the next timer may have slipped into the window of being
imminent.

thanks
-john