[PATCH v3] PM: Add configurable sync timeout for suspend and hibernation

tuhaowen posted 1 patch 14 hours ago
include/linux/suspend.h  |  3 ++
kernel/power/hibernate.c |  4 +-
kernel/power/main.c      | 88 ++++++++++++++++++++++++++++++++++++++++
kernel/power/suspend.c   |  6 ++-
4 files changed, 99 insertions(+), 2 deletions(-)
[PATCH v3] PM: Add configurable sync timeout for suspend and hibernation
Posted by tuhaowen 14 hours ago
When large file operations are in progress during system suspend or
hibernation, the ksys_sync() call can hang for extended periods,
leading to unresponsive system behavior. Users copying large files
to USB drives may experience black screen hangs when attempting to
suspend, requiring forced power cycles.

A specific problematic scenario occurs when data is being copied and
the system enters S3 suspend. If the block device is removed during
this process, some filesystems may not properly handle the device
disappearance and continue to believe the block device exists. This
causes ksys_sync() to be switched out and never complete, with the
following stack trace:

[<0>] __switch_to+0xd0/0x168
[<0>] iterate_supers+0x88/0x118
[<0>] ksys_sync+0x48/0xb8
[<0>] ksys_sync_helper+0x18/0xa0
[<0>] pm_suspend+0x260/0x3e8

This patch introduces a unified sync timeout mechanism for both
suspend-to-RAM (S3) and hibernation (S4) to prevent indefinite
hangs while maintaining data integrity.

Key features:
- Configurable timeout via sysfs interface
- Default behavior unchanged (timeout disabled by default)
- Unified implementation for both suspend and hibernation paths
- Graceful fallback to direct sync on thread creation failure
- Non-blocking timeout: sync continues in background after timeout

Sysfs interface:
- /sys/power/sleep_sync_timeout: Runtime configuration (0-600000ms)

When timeout is enabled and exceeded, the suspend/hibernation operation
fails gracefully with -ETIMEDOUT to prevent system hangs, while the
sync operation continues running in the background to ensure eventual
data integrity. This approach provides responsive user experience
without compromising data safety.

Implementation creates a separate kthread for sync operations when
timeout is enabled. If the timeout expires, the main suspend path
immediately returns with an error, allowing the system to remain
responsive, while the background sync thread continues to completion
independently. The sync operation is never forcibly terminated or
interrupted.

Signed-off-by: tuhaowen <tuhaowen@uniontech.com>
---
Changes in v3:
- Added mutex protection to prevent concurrent sync timeout operations
- Removed kthread_stop() to allow sync completion after system resume
- Non-blocking timeout implementation: sync continues in background
---
 include/linux/suspend.h  |  3 ++
 kernel/power/hibernate.c |  4 +-
 kernel/power/main.c      | 88 ++++++++++++++++++++++++++++++++++++++++
 kernel/power/suspend.c   |  6 ++-
 4 files changed, 99 insertions(+), 2 deletions(-)

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index da6ebca3f..976c8f8a1 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -439,6 +439,8 @@ void restore_processor_state(void);
 extern int register_pm_notifier(struct notifier_block *nb);
 extern int unregister_pm_notifier(struct notifier_block *nb);
 extern void ksys_sync_helper(void);
+extern int ksys_sync_helper_timeout(unsigned int timeout_ms);
+extern unsigned int sync_timeout_ms;
 extern void pm_report_hw_sleep_time(u64 t);
 extern void pm_report_max_hw_sleep(u64 t);
 
@@ -486,6 +488,7 @@ static inline void pm_report_hw_sleep_time(u64 t) {};
 static inline void pm_report_max_hw_sleep(u64 t) {};
 
 static inline void ksys_sync_helper(void) {}
+static inline int ksys_sync_helper_timeout(unsigned int timeout_ms) { return 0; }
 
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c
index 23c0f4e6c..2678181a5 100644
--- a/kernel/power/hibernate.c
+++ b/kernel/power/hibernate.c
@@ -777,7 +777,9 @@ int hibernate(void)
 	if (error)
 		goto Restore;
 
-	ksys_sync_helper();
+	error = ksys_sync_helper_timeout(sync_timeout_ms);
+	if (error)
+		goto Exit;
 
 	error = freeze_processes();
 	if (error)
diff --git a/kernel/power/main.c b/kernel/power/main.c
index 6254814d4..a437fa0b2 100644
--- a/kernel/power/main.c
+++ b/kernel/power/main.c
@@ -17,10 +17,22 @@
 #include <linux/suspend.h>
 #include <linux/syscalls.h>
 #include <linux/pm_runtime.h>
+#include <linux/completion.h>
+#include <linux/kthread.h>
+#include <linux/jiffies.h>
 
 #include "power.h"
 
 #ifdef CONFIG_PM_SLEEP
+/* Sync timeout parameters */
+unsigned int sync_timeout_ms;
+EXPORT_SYMBOL_GPL(sync_timeout_ms);
+
+/* Sync timeout implementation */
+static struct completion sync_completion;
+static struct task_struct *sync_task;
+static DEFINE_MUTEX(sync_timeout_mutex);
+
 /*
  * The following functions are used by the suspend/hibernate code to temporarily
  * change gfp_allowed_mask in order to avoid using I/O during memory allocations
@@ -79,6 +91,50 @@ void ksys_sync_helper(void)
 }
 EXPORT_SYMBOL_GPL(ksys_sync_helper);
 
+static int sync_thread_func(void *data)
+{
+	ksys_sync_helper();
+	complete(&sync_completion);
+	return 0;
+}
+
+int ksys_sync_helper_timeout(unsigned int timeout_ms)
+{
+	unsigned long timeout_jiffies;
+	int ret = 0;
+
+	/* If timeout is 0, use regular sync without timeout */
+	if (timeout_ms == 0) {
+		ksys_sync_helper();
+		return 0;
+	}
+
+	mutex_lock(&sync_timeout_mutex);
+	init_completion(&sync_completion);
+	sync_task = kthread_run(sync_thread_func, NULL, "sync_timeout");
+	if (IS_ERR(sync_task)) {
+		pr_warn("%s: Failed to create sync thread, performing sync directly\n",
+			__func__);
+		ksys_sync_helper();
+		goto unlock;
+	}
+
+	timeout_jiffies = msecs_to_jiffies(timeout_ms);
+	if (!wait_for_completion_timeout(&sync_completion, timeout_jiffies)) {
+		pr_warn("%s: Sync operation timed out after %u ms, aborting suspend/hibernation\n",
+			__func__, timeout_ms);
+		pr_info("%s: Sync operation continues in background\n", __func__);
+		ret = -ETIMEDOUT;
+	}
+
+unlock:
+	mutex_unlock(&sync_timeout_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ksys_sync_helper_timeout);
+
+
+
 /* Routines for PM-transition notifications */
 
 static BLOCKING_NOTIFIER_HEAD(pm_chain_head);
@@ -240,6 +296,37 @@ static ssize_t sync_on_suspend_store(struct kobject *kobj,
 }
 
 power_attr(sync_on_suspend);
+
+/*
+ * sleep_sync_timeout: configure sync timeout during suspend/hibernation.
+ *
+ * show() returns the current sync timeout in milliseconds.
+ * store() accepts timeout value in milliseconds. 0 disables timeout.
+ */
+static ssize_t sleep_sync_timeout_show(struct kobject *kobj,
+				 struct kobj_attribute *attr, char *buf)
+{
+	return sysfs_emit(buf, "%u\n", sync_timeout_ms);
+}
+
+static ssize_t sleep_sync_timeout_store(struct kobject *kobj,
+				  struct kobj_attribute *attr,
+				  const char *buf, size_t n)
+{
+	unsigned long val;
+
+	if (kstrtoul(buf, 10, &val))
+		return -EINVAL;
+
+	/* Allow any reasonable timeout value */
+	if (val > 600000) /* Max 10 minutes */
+		return -EINVAL;
+
+	sync_timeout_ms = val;
+	return n;
+}
+
+power_attr(sleep_sync_timeout);
 #endif /* CONFIG_SUSPEND */
 
 #ifdef CONFIG_PM_SLEEP_DEBUG
@@ -974,6 +1061,7 @@ static struct attribute * g[] = {
 #ifdef CONFIG_SUSPEND
 	&mem_sleep_attr.attr,
 	&sync_on_suspend_attr.attr,
+	&sleep_sync_timeout_attr.attr,
 #endif
 #ifdef CONFIG_PM_AUTOSLEEP
 	&autosleep_attr.attr,
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index 8eaec4ab1..4f8015a75 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -585,8 +585,12 @@ static int enter_state(suspend_state_t state)
 
 	if (sync_on_suspend_enabled) {
 		trace_suspend_resume(TPS("sync_filesystems"), 0, true);
-		ksys_sync_helper();
+		error = ksys_sync_helper_timeout(sync_timeout_ms);
 		trace_suspend_resume(TPS("sync_filesystems"), 0, false);
+		if (error) {
+			pr_err("PM: Sync timeout, aborting suspend\n");
+			goto Unlock;
+		}
 	}
 
 	pm_pr_dbg("Preparing system for sleep (%s)\n", mem_sleep_labels[state]);
-- 
2.20.1