[RFC/RFT PATCH] PM: sleep: Ignore device driver suspend() callback return values

Len Brown posted 1 patch 1 week, 1 day ago
Documentation/power/driver_api.rst | 25 +++++++++++++++++++++++++
Documentation/power/index.rst      |  1 +
drivers/base/power/main.c          | 25 ++++++++++++++++++-------
kernel/power/Kconfig               | 17 +++++++++++++++++
4 files changed, 61 insertions(+), 7 deletions(-)
create mode 100644 Documentation/power/driver_api.rst
[RFC/RFT PATCH] PM: sleep: Ignore device driver suspend() callback return values
Posted by Len Brown 1 week, 1 day ago
From: Len Brown <len.brown@intel.com>

Drivers commonly return non-zero values from their suspend
callbacks due to transient errors, not realizing that doing so
aborts system-wide suspend.

Log, but do not abort system suspend on non-zero return values
from driver's .suspend/.suspend_noirq/.suspend_late callbacks.

Both before and after this patch, the correct method for a
device driver to abort system-wide suspend is to invoke
pm_system_wakeup() during the suspend flow.

Legacy behaviour can be restored by adding this line to your .config:
CONFIG_PM_SLEEP_LEGACY_CALLBACK_ABORT=y

Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/power/driver_api.rst | 25 +++++++++++++++++++++++++
 Documentation/power/index.rst      |  1 +
 drivers/base/power/main.c          | 25 ++++++++++++++++++-------
 kernel/power/Kconfig               | 17 +++++++++++++++++
 4 files changed, 61 insertions(+), 7 deletions(-)
 create mode 100644 Documentation/power/driver_api.rst

diff --git a/Documentation/power/driver_api.rst b/Documentation/power/driver_api.rst
new file mode 100644
index 000000000000..b9a46a17f39b
--- /dev/null
+++ b/Documentation/power/driver_api.rst
@@ -0,0 +1,25 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+==================
+Driver Suspend API
+==================
+
+
+1. How Can A driver abort system suspend?
+-----------------------------------------
+
+Any driver can abort system-wide  by invoking pm_system_wakeup()
+during the suspend flow.
+
+ie. from the drivers suspend callbacks:
+ .suspend()
+ .suspend_noirq()
+ .suspend_late()
+
+Alternatively, if CONFIG_PM_SLEEP_LEGACY_CALLBACK_ABORT=y is present in .config,
+then any non-zero return value from any device drivers callback:
+ .suspend()
+ .suspend_noirq()
+ .suspend_late()
+will abort the system-wide suspend flow.
+Note that CONFIG_PM_SLEEP_LEGACY_CALLBACK_ABORT=n, by default.
diff --git a/Documentation/power/index.rst b/Documentation/power/index.rst
index a0f5244fb427..f655662a9c15 100644
--- a/Documentation/power/index.rst
+++ b/Documentation/power/index.rst
@@ -7,6 +7,7 @@ Power Management
 .. toctree::
     :maxdepth: 1
 
+    driver_api
     apm-acpi
     basic-pm-debugging
     charger-manager
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index 4a67e83300e1..1b4ab73112e4 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -1244,7 +1244,6 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state, bool asy
 Run:
 	error = dpm_run_callback(callback, dev, state, info);
 	if (error) {
-		async_error = error;
 		dpm_save_failed_dev(dev_name(dev));
 		pm_dev_err(dev, state, async ? " async noirq" : " noirq", error);
 		goto Complete;
@@ -1270,7 +1269,12 @@ static int device_suspend_noirq(struct device *dev, pm_message_t state, bool asy
 Complete:
 	complete_all(&dev->power.completion);
 	TRACE_SUSPEND(error);
-	return error;
+
+	if (IS_ENABLED(CONFIG_PM_SLEEP_LEGACY_CALLBACK_ABORT)) {
+		async_error = error;
+		return error;
+	}
+	return 0;
 }
 
 static void async_suspend_noirq(void *data, async_cookie_t cookie)
@@ -1424,7 +1428,6 @@ static int device_suspend_late(struct device *dev, pm_message_t state, bool asyn
 Run:
 	error = dpm_run_callback(callback, dev, state, info);
 	if (error) {
-		async_error = error;
 		dpm_save_failed_dev(dev_name(dev));
 		pm_dev_err(dev, state, async ? " async late" : " late", error);
 		goto Complete;
@@ -1437,7 +1440,12 @@ static int device_suspend_late(struct device *dev, pm_message_t state, bool asyn
 Complete:
 	TRACE_SUSPEND(error);
 	complete_all(&dev->power.completion);
-	return error;
+
+	if (IS_ENABLED(CONFIG_PM_SLEEP_LEGACY_CALLBACK_ABORT)) {
+		async_error = error;
+		return error;
+	}
+	return 0;
 }
 
 static void async_suspend_late(void *data, async_cookie_t cookie)
@@ -1681,7 +1689,7 @@ static int device_suspend(struct device *dev, pm_message_t state, bool async)
 	error = dpm_run_callback(callback, dev, state, info);
 
  End:
-	if (!error) {
+	if (!error || !IS_ENABLED(CONFIG_PM_SLEEP_LEGACY_CALLBACK_ABORT)) {
 		dev->power.is_suspended = true;
 		if (device_may_wakeup(dev))
 			dev->power.wakeup_path = true;
@@ -1695,14 +1703,17 @@ static int device_suspend(struct device *dev, pm_message_t state, bool async)
 
  Complete:
 	if (error) {
-		async_error = error;
 		dpm_save_failed_dev(dev_name(dev));
 		pm_dev_err(dev, state, async ? " async" : "", error);
 	}
 
 	complete_all(&dev->power.completion);
 	TRACE_SUSPEND(error);
-	return error;
+	if (IS_ENABLED(CONFIG_PM_SLEEP_LEGACY_CALLBACK_ABORT)) {
+		async_error = error;
+		return error;
+	}
+	return 0;
 }
 
 static void async_suspend(void *data, async_cookie_t cookie)
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index afce8130d8b9..db120bba0826 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -141,6 +141,23 @@ config PM_SLEEP
 	depends on SUSPEND || HIBERNATE_CALLBACKS
 	select PM
 
+config PM_SLEEP_LEGACY_CALLBACK_ABORT
+	bool "Enable legacy callback abort via return value"
+	depends on PM_SLEEP
+	help
+	This option enables the legacy API for device .suspend() callbacks.
+	That API empowered any driver to abort system-wide suspend
+	by returning any non-zero value from its suspend calbacks:
+	(.suspend/.suspend_noirq/.suspend_late)
+	In practice, these aborts are almost always spurious and unwanted.
+
+	Disabling this option (default) ignores .suspend() callback return values,
+	though they are still traced and logged.
+
+	The proper way for a device driver to abort system-wide suspend is to
+	invoke pm_system_wakeup() during the suspend flow.  This method is
+	valid, independent of this config option.
+
 config PM_SLEEP_SMP
 	def_bool y
 	depends on SMP
-- 
2.43.0