[RFC PATCH] PM: Add configurable sync timeout to prevent suspend hang

tuhaowen posted 1 patch 1 month ago
kernel/power/suspend.c | 56 +++++++++++++++++++++++++++++++++++++++++-
1 file changed, 55 insertions(+), 1 deletion(-)
[RFC PATCH] PM: Add configurable sync timeout to prevent suspend hang
Posted by tuhaowen 1 month ago
When users initiate suspend while large file copy operations are in
progress (especially to USB storage devices), the system can appear to
hang with a black screen for an extended period. This occurs because
ksys_sync_helper() in the suspend path blocks until all pending I/O
operations complete, which can take several minutes for large file
transfers.

This creates a poor user experience where the system appears
unresponsive, and users may force power off thinking the system has
crashed.

This RFC proposes adding an optional timeout mechanism for the sync
operation during suspend. The implementation includes:

- A configurable timeout (default: 60 seconds, disabled by default)
- Module parameters for runtime configuration:
  * sync_on_suspend_timeout_enable: Enable/disable the timeout feature
  * sync_on_suspend_timeout_ms: Configure timeout duration
- Uses a separate kernel thread to perform sync with timeout monitoring
- On timeout, suspend is aborted with clear error messaging
- Maintains backward compatibility (disabled by default)

Questions for the community:

1. Is this approach acceptable for addressing the user experience issue?
2. Are there better alternatives to handle long-running sync operations
   during suspend?
3. Should the default timeout value be different?
4. Any concerns with the implementation approach?

The feature is disabled by default to ensure no regression in existing
behavior. When enabled, it allows users to abort suspend if sync takes
too long, providing immediate feedback rather than an apparent system
hang.

Signed-off-by: tuhaowen <tuhaowen@uniontech.com>
---
 kernel/power/suspend.c | 56 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)

diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8eaec4ab1..feb1583c5 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -30,9 +30,25 @@
 #include <trace/events/power.h>
 #include <linux/compiler.h>
 #include <linux/moduleparam.h>
+#include <linux/completion.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h>
 
 #include "power.h"
 
+/* Sync timeout parameters */
+static bool sync_on_suspend_timeout_enable;
+module_param(sync_on_suspend_timeout_enable, bool, 0644);
+MODULE_PARM_DESC(sync_on_suspend_timeout_enable, "Enable sync timeout during suspend (default: false)");
+
+static unsigned int sync_on_suspend_timeout_ms = 60000;
+module_param(sync_on_suspend_timeout_ms, uint, 0644);
+MODULE_PARM_DESC(sync_on_suspend_timeout_ms, "Sync timeout in milliseconds during suspend (default: 60000)");
+
+/* Sync timeout implementation */
+static struct completion sync_completion;
+static struct task_struct *sync_task;
+
 const char * const pm_labels[] = {
 	[PM_SUSPEND_TO_IDLE] = "freeze",
 	[PM_SUSPEND_STANDBY] = "standby",
@@ -61,6 +77,40 @@ static DECLARE_SWAIT_QUEUE_HEAD(s2idle_wait_head);
 enum s2idle_states __read_mostly s2idle_state;
 static DEFINE_RAW_SPINLOCK(s2idle_lock);
 
+static int sync_thread_func(void *data)
+{
+	ksys_sync_helper();
+	complete(&sync_completion);
+	return 0;
+}
+
+static int suspend_sync_with_timeout(void)
+{
+	unsigned long timeout_jiffies;
+
+	if (!sync_on_suspend_timeout_enable) {
+		ksys_sync_helper();
+		return 0;
+	}
+
+	init_completion(&sync_completion);
+	sync_task = kthread_run(sync_thread_func, NULL, "suspend_sync");
+	if (IS_ERR(sync_task)) {
+		pr_warn("PM: Failed to create sync thread, performing sync directly\n");
+		ksys_sync_helper();
+		return 0;
+	}
+
+	timeout_jiffies = msecs_to_jiffies(sync_on_suspend_timeout_ms);
+	if (!wait_for_completion_timeout(&sync_completion, timeout_jiffies)) {
+		pr_warn("PM: Sync operation timed out after %u ms, aborting suspend\n",
+				sync_on_suspend_timeout_ms);
+		kthread_stop(sync_task);
+		return -ETIMEDOUT;
+	}
+	return 0;
+}
+
 /**
  * pm_suspend_default_s2idle - Check if suspend-to-idle is the default suspend.
  *
@@ -585,8 +635,12 @@ static int enter_state(suspend_state_t state)
 
 	if (sync_on_suspend_enabled) {
 		trace_suspend_resume(TPS("sync_filesystems"), 0, true);
-		ksys_sync_helper();
+		error = suspend_sync_with_timeout();
 		trace_suspend_resume(TPS("sync_filesystems"), 0, false);
+		if (error) {
+			pr_err("PM: Sync timeout, aborting suspend\n");
+			goto Unlock;
+		}
 	}
 
 	pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
-- 
2.20.1
Re: [RFC PATCH] PM: Add configurable sync timeout to prevent suspend hang
Posted by Rafael J. Wysocki 4 weeks ago
On Thu, Aug 28, 2025 at 5:47 AM tuhaowen <tuhaowen@uniontech.com> wrote:
>
> When users initiate suspend while large file copy operations are in
> progress (especially to USB storage devices), the system can appear to
> hang with a black screen for an extended period. This occurs because
> ksys_sync_helper() in the suspend path blocks until all pending I/O
> operations complete, which can take several minutes for large file
> transfers.
>
> This creates a poor user experience where the system appears
> unresponsive, and users may force power off thinking the system has
> crashed.
>
> This RFC proposes adding an optional timeout mechanism for the sync
> operation during suspend. The implementation includes:
>
> - A configurable timeout (default: 60 seconds, disabled by default)
> - Module parameters for runtime configuration:
>   * sync_on_suspend_timeout_enable: Enable/disable the timeout feature
>   * sync_on_suspend_timeout_ms: Configure timeout duration
> - Uses a separate kernel thread to perform sync with timeout monitoring
> - On timeout, suspend is aborted with clear error messaging
> - Maintains backward compatibility (disabled by default)
>
> Questions for the community:
>
> 1. Is this approach acceptable for addressing the user experience issue?
> 2. Are there better alternatives to handle long-running sync operations
>    during suspend?

Yes, please see
https://lore.kernel.org/linux-pm/20250821004237.2712312-1-wusamuel@google.com/

> 3. Should the default timeout value be different?
> 4. Any concerns with the implementation approach?
>
> The feature is disabled by default to ensure no regression in existing
> behavior. When enabled, it allows users to abort suspend if sync takes
> too long, providing immediate feedback rather than an apparent system
> hang.
>
> Signed-off-by: tuhaowen <tuhaowen@uniontech.com>
> ---
>  kernel/power/suspend.c | 56 +++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 55 insertions(+), 1 deletion(-)
>
> diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
> index 8eaec4ab1..feb1583c5 100644
> --- a/kernel/power/suspend.c
> +++ b/kernel/power/suspend.c
> @@ -30,9 +30,25 @@
>  #include <trace/events/power.h>
>  #include <linux/compiler.h>
>  #include <linux/moduleparam.h>
> +#include <linux/completion.h>
> +#include <linux/kthread.h>
> +#include <linux/jiffies.h>
>
>  #include "power.h"
>
> +/* Sync timeout parameters */
> +static bool sync_on_suspend_timeout_enable;
> +module_param(sync_on_suspend_timeout_enable, bool, 0644);
> +MODULE_PARM_DESC(sync_on_suspend_timeout_enable, "Enable sync timeout during suspend (default: false)");
> +
> +static unsigned int sync_on_suspend_timeout_ms = 60000;
> +module_param(sync_on_suspend_timeout_ms, uint, 0644);
> +MODULE_PARM_DESC(sync_on_suspend_timeout_ms, "Sync timeout in milliseconds during suspend (default: 60000)");
> +
> +/* Sync timeout implementation */
> +static struct completion sync_completion;
> +static struct task_struct *sync_task;
> +
>  const char * const pm_labels[] = {
>         [PM_SUSPEND_TO_IDLE] = "freeze",
>         [PM_SUSPEND_STANDBY] = "standby",
> @@ -61,6 +77,40 @@ static DECLARE_SWAIT_QUEUE_HEAD(s2idle_wait_head);
>  enum s2idle_states __read_mostly s2idle_state;
>  static DEFINE_RAW_SPINLOCK(s2idle_lock);
>
> +static int sync_thread_func(void *data)
> +{
> +       ksys_sync_helper();
> +       complete(&sync_completion);
> +       return 0;
> +}
> +
> +static int suspend_sync_with_timeout(void)
> +{
> +       unsigned long timeout_jiffies;
> +
> +       if (!sync_on_suspend_timeout_enable) {
> +               ksys_sync_helper();
> +               return 0;
> +       }
> +
> +       init_completion(&sync_completion);
> +       sync_task = kthread_run(sync_thread_func, NULL, "suspend_sync");
> +       if (IS_ERR(sync_task)) {
> +               pr_warn("PM: Failed to create sync thread, performing sync directly\n");
> +               ksys_sync_helper();
> +               return 0;
> +       }
> +
> +       timeout_jiffies = msecs_to_jiffies(sync_on_suspend_timeout_ms);
> +       if (!wait_for_completion_timeout(&sync_completion, timeout_jiffies)) {
> +               pr_warn("PM: Sync operation timed out after %u ms, aborting suspend\n",
> +                               sync_on_suspend_timeout_ms);
> +               kthread_stop(sync_task);
> +               return -ETIMEDOUT;
> +       }
> +       return 0;
> +}
> +
>  /**
>   * pm_suspend_default_s2idle - Check if suspend-to-idle is the default suspend.
>   *
> @@ -585,8 +635,12 @@ static int enter_state(suspend_state_t state)
>
>         if (sync_on_suspend_enabled) {
>                 trace_suspend_resume(TPS("sync_filesystems"), 0, true);
> -               ksys_sync_helper();
> +               error = suspend_sync_with_timeout();
>                 trace_suspend_resume(TPS("sync_filesystems"), 0, false);
> +               if (error) {
> +                       pr_err("PM: Sync timeout, aborting suspend\n");
> +                       goto Unlock;
> +               }
>         }
>
>         pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
> --
> 2.20.1
>