[PATCH v7 3/4] driver core: shut down devices asynchronously

Stuart Hayes posted 4 patches 1 year, 5 months ago
There is a newer version of this series
[PATCH v7 3/4] driver core: shut down devices asynchronously
Posted by Stuart Hayes 1 year, 5 months ago
Add code to allow asynchronous shutdown of devices, ensuring that each
device is shut down before its parents & suppliers.

Add async_shutdown_enable to struct device_driver, and expose it via sysfs.
This can be used to view or change driver opt-in to asynchronous shutdown.
Only devices with drivers that have async_shutdown_enable enabled will be
shut down asynchronously.

This can dramatically reduce system shutdown/reboot time on systems that
have multiple devices that take many seconds to shut down (like certain
NVMe drives). On one system tested, the shutdown time went from 11 minutes
without this patch to 55 seconds with the patch.

Signed-off-by: Stuart Hayes <stuart.w.hayes@gmail.com>
Signed-off-by: David Jeffery <djeffery@redhat.com>
---
 drivers/base/base.h           |  3 ++
 drivers/base/bus.c            | 26 +++++++++++++++++
 drivers/base/core.c           | 54 ++++++++++++++++++++++++++++++++++-
 include/linux/device/driver.h |  2 ++
 4 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/drivers/base/base.h b/drivers/base/base.h
index db4f910e8e36..6f65f159d039 100644
--- a/drivers/base/base.h
+++ b/drivers/base/base.h
@@ -10,6 +10,7 @@
  * shared outside of the drivers/base/ directory.
  *
  */
+#include <linux/async.h>
 #include <linux/notifier.h>
 
 /**
@@ -97,6 +98,7 @@ struct driver_private {
  *	the device; typically because it depends on another driver getting
  *	probed first.
  * @async_driver - pointer to device driver awaiting probe via async_probe
+ * @shutdown_after - used during device shutdown to ensure correct shutdown ordering.
  * @device - pointer back to the struct device that this structure is
  * associated with.
  * @dead - This device is currently either in the process of or has been
@@ -114,6 +116,7 @@ struct device_private {
 	struct list_head deferred_probe;
 	struct device_driver *async_driver;
 	char *deferred_probe_reason;
+	async_cookie_t shutdown_after;
 	struct device *device;
 	u8 dead:1;
 };
diff --git a/drivers/base/bus.c b/drivers/base/bus.c
index ffea0728b8b2..97fd02cff888 100644
--- a/drivers/base/bus.c
+++ b/drivers/base/bus.c
@@ -10,6 +10,7 @@
  */
 
 #include <linux/async.h>
+#include <linux/capability.h>
 #include <linux/device/bus.h>
 #include <linux/device.h>
 #include <linux/module.h>
@@ -635,6 +636,25 @@ static ssize_t uevent_store(struct device_driver *drv, const char *buf,
 }
 static DRIVER_ATTR_WO(uevent);
 
+static ssize_t async_shutdown_enable_show(struct device_driver *drv, char *buf)
+{
+	return sysfs_emit(buf, "%d\n", drv->async_shutdown_enable);
+}
+
+static ssize_t async_shutdown_enable_store(struct device_driver *drv, const char *buf,
+			      size_t count)
+{
+	if (!capable(CAP_SYS_BOOT))
+		return -EPERM;
+
+	if (kstrtobool(buf, &drv->async_shutdown_enable) < 0)
+		return -EINVAL;
+
+	return count;
+}
+
+static DRIVER_ATTR_RW(async_shutdown_enable);
+
 /**
  * bus_add_driver - Add a driver to the bus.
  * @drv: driver.
@@ -702,6 +722,12 @@ int bus_add_driver(struct device_driver *drv)
 		}
 	}
 
+	error = driver_create_file(drv, &driver_attr_async_shutdown_enable);
+	if (error) {
+		pr_err("%s: async_shutdown attr (%s) failed\n",
+			__func__, drv->name);
+	}
+
 	return 0;
 
 out_detach:
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 4be6071c2175..1eb5a7286c79 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/acpi.h>
+#include <linux/async.h>
 #include <linux/cpufreq.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -3528,6 +3529,7 @@ static int device_private_init(struct device *dev)
 	klist_init(&dev->p->klist_children, klist_children_get,
 		   klist_children_put);
 	INIT_LIST_HEAD(&dev->p->deferred_probe);
+	dev->p->shutdown_after = 0;
 	return 0;
 }
 
@@ -4778,6 +4780,8 @@ int device_change_owner(struct device *dev, kuid_t kuid, kgid_t kgid)
 }
 EXPORT_SYMBOL_GPL(device_change_owner);
 
+static ASYNC_DOMAIN(sd_domain);
+
 static void shutdown_one_device(struct device *dev)
 {
 	/* hold lock to avoid race with probe/release */
@@ -4813,12 +4817,34 @@ static void shutdown_one_device(struct device *dev)
 		put_device(dev->parent);
 }
 
+/**
+ * shutdown_one_device_async
+ * @data: the pointer to the struct device to be shutdown
+ * @cookie: not used
+ *
+ * Shuts down one device, after waiting for shutdown_after to complete.
+ * shutdown_after should be set to the cookie of the last child or consumer
+ * of this device to be shutdown (if any), or to the cookie of the previous
+ * device to be shut down for devices that don't enable asynchronous shutdown.
+ */
+static void shutdown_one_device_async(void *data, async_cookie_t cookie)
+{
+	struct device *dev = data;
+
+	async_synchronize_cookie_domain(dev->p->shutdown_after + 1, &sd_domain);
+
+	shutdown_one_device(dev);
+}
+
 /**
  * device_shutdown - call ->shutdown() on each device to shutdown.
  */
 void device_shutdown(void)
 {
 	struct device *dev, *parent;
+	async_cookie_t cookie = 0;
+	struct device_link *link;
+	int idx;
 
 	wait_for_device_probe();
 	device_block_probing();
@@ -4849,11 +4875,37 @@ void device_shutdown(void)
 		list_del_init(&dev->kobj.entry);
 		spin_unlock(&devices_kset->list_lock);
 
-		shutdown_one_device(dev);
+
+		/*
+		 * Set cookie for devices that will be shut down synchronously
+		 */
+		if (!dev->driver || !dev->driver->async_shutdown_enable)
+			dev->p->shutdown_after = cookie;
+
+		get_device(dev);
+		get_device(parent);
+
+		cookie = async_schedule_domain(shutdown_one_device_async,
+					       dev, &sd_domain);
+		/*
+		 * Ensure parent & suppliers wait for this device to shut down
+		 */
+		if (parent) {
+			parent->p->shutdown_after = cookie;
+			put_device(parent);
+		}
+
+		idx = device_links_read_lock();
+		list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+				device_links_read_lock_held())
+			link->supplier->p->shutdown_after = cookie;
+		device_links_read_unlock(idx);
+		put_device(dev);
 
 		spin_lock(&devices_kset->list_lock);
 	}
 	spin_unlock(&devices_kset->list_lock);
+	async_synchronize_full_domain(&sd_domain);
 }
 
 /*
diff --git a/include/linux/device/driver.h b/include/linux/device/driver.h
index 7738f458995f..1e78f2ab1366 100644
--- a/include/linux/device/driver.h
+++ b/include/linux/device/driver.h
@@ -56,6 +56,7 @@ enum probe_type {
  * @mod_name:	Used for built-in modules.
  * @suppress_bind_attrs: Disables bind/unbind via sysfs.
  * @probe_type:	Type of the probe (synchronous or asynchronous) to use.
+ * @async_shutdown_enable: Enables devices to be shutdown asynchronously.
  * @of_match_table: The open firmware table.
  * @acpi_match_table: The ACPI match table.
  * @probe:	Called to query the existence of a specific device,
@@ -102,6 +103,7 @@ struct device_driver {
 
 	bool suppress_bind_attrs;	/* disables bind/unbind via sysfs */
 	enum probe_type probe_type;
+	bool async_shutdown_enable;
 
 	const struct of_device_id	*of_match_table;
 	const struct acpi_device_id	*acpi_match_table;
-- 
2.39.3
Re: [PATCH v7 3/4] driver core: shut down devices asynchronously
Posted by Christoph Hellwig 1 year, 5 months ago
On Wed, Jun 26, 2024 at 02:46:49PM -0500, Stuart Hayes wrote:
> Add code to allow asynchronous shutdown of devices, ensuring that each
> device is shut down before its parents & suppliers.
> 
> Add async_shutdown_enable to struct device_driver, and expose it via sysfs.
> This can be used to view or change driver opt-in to asynchronous shutdown.
> Only devices with drivers that have async_shutdown_enable enabled will be
> shut down asynchronously.
> 
> This can dramatically reduce system shutdown/reboot time on systems that
> have multiple devices that take many seconds to shut down (like certain
> NVMe drives). On one system tested, the shutdown time went from 11 minutes
> without this patch to 55 seconds with the patch.

We discussed this before, but there is no summary of it and I of course
forgot the conclusion:

 - why don't we do this by default?
 - why is it safe to user enable it?

> + * @shutdown_after - used during device shutdown to ensure correct shutdown ordering.

Overly long line.

> +static ssize_t async_shutdown_enable_store(struct device_driver *drv, const char *buf,

.. and here.
Re: [PATCH v7 3/4] driver core: shut down devices asynchronously
Posted by stuart hayes 1 year, 5 months ago

On 6/27/2024 12:55 AM, Christoph Hellwig wrote:
> On Wed, Jun 26, 2024 at 02:46:49PM -0500, Stuart Hayes wrote:
>> Add code to allow asynchronous shutdown of devices, ensuring that each
>> device is shut down before its parents & suppliers.
>>
>> Add async_shutdown_enable to struct device_driver, and expose it via sysfs.
>> This can be used to view or change driver opt-in to asynchronous shutdown.
>> Only devices with drivers that have async_shutdown_enable enabled will be
>> shut down asynchronously.
>>
>> This can dramatically reduce system shutdown/reboot time on systems that
>> have multiple devices that take many seconds to shut down (like certain
>> NVMe drives). On one system tested, the shutdown time went from 11 minutes
>> without this patch to 55 seconds with the patch.
> 
> We discussed this before, but there is no summary of it and I of course
> forgot the conclusion:
> 
>   - why don't we do this by default?

It is done by default in this version, for devices whose drivers opt-in.

In the previous discussion, you mentioned that you thought "safe" was the
only sensible option (where "safe" was driver opt-in to async shutdown)...
that is the default (and only) option with this version.  Greg K-H also
requested opt-in as well, and suggested that "on" (driver opt-out) could
be removed.

>   - why is it safe to user enable it?

I guess it isn't necessarily safe, if there are any drivers that can't
handle their devices shutting down asynchronously. I thought it would be
nice to be able to enable driver opt-in from user space for testing, before
changing the default setting for the driver.

> 
>> + * @shutdown_after - used during device shutdown to ensure correct shutdown ordering.
> 
> Overly long line.
> 
>> +static ssize_t async_shutdown_enable_store(struct device_driver *drv, const char *buf,
> 
> .. and here.
> 
I can correct these lines. I thought that an 80 character line length limit
was no longer required, and saw another line a few lines above these that was
even longer... and the checkpatch script didn't flag it either.
Re: [PATCH v7 3/4] driver core: shut down devices asynchronously
Posted by Christoph Hellwig 1 year, 5 months ago
On Mon, Jul 01, 2024 at 12:57:40PM -0500, stuart hayes wrote:
>> We discussed this before, but there is no summary of it and I of course
>> forgot the conclusion:
>>
>>   - why don't we do this by default?
>
> It is done by default in this version, for devices whose drivers opt-in.
>
> In the previous discussion, you mentioned that you thought "safe" was the
> only sensible option (where "safe" was driver opt-in to async shutdown)...
> that is the default (and only) option with this version.  Greg K-H also
> requested opt-in as well, and suggested that "on" (driver opt-out) could
> be removed.
>
>>   - why is it safe to user enable it?
>
> I guess it isn't necessarily safe, if there are any drivers that can't
> handle their devices shutting down asynchronously. I thought it would be
> nice to be able to enable driver opt-in from user space for testing, before
> changing the default setting for the driver.

I was mostly getting into the contradiction that either we think the
async shutdown is safe everywhere, in which case we don't need a driver
opt-in, or it is not, in which case allowing user to just enabled it
also seems like a bad idea.

> I can correct these lines. I thought that an 80 character line length limit
> was no longer required, and saw another line a few lines above these that was
> even longer... and the checkpatch script didn't flag it either.

checkpatch is unfortunately completely broken, it flags totally harmless
things and doesn't catch other things.  > 80 characters are allowed for
individual lines where it improves readability.  The exact definition
of that depends on the maintainers and reviewers, but outside of
string constants I can't really find anything where it does.
Re: [PATCH v7 3/4] driver core: shut down devices asynchronously
Posted by stuart hayes 1 year, 5 months ago

On 7/2/2024 12:04 AM, Christoph Hellwig wrote:
> On Mon, Jul 01, 2024 at 12:57:40PM -0500, stuart hayes wrote:
>>> We discussed this before, but there is no summary of it and I of course
>>> forgot the conclusion:
>>>
>>>    - why don't we do this by default?
>>
>> It is done by default in this version, for devices whose drivers opt-in.
>>
>> In the previous discussion, you mentioned that you thought "safe" was the
>> only sensible option (where "safe" was driver opt-in to async shutdown)...
>> that is the default (and only) option with this version.  Greg K-H also
>> requested opt-in as well, and suggested that "on" (driver opt-out) could
>> be removed.
>>
>>>    - why is it safe to user enable it?
>>
>> I guess it isn't necessarily safe, if there are any drivers that can't
>> handle their devices shutting down asynchronously. I thought it would be
>> nice to be able to enable driver opt-in from user space for testing, before
>> changing the default setting for the driver.
> 
> I was mostly getting into the contradiction that either we think the
> async shutdown is safe everywhere, in which case we don't need a driver
> opt-in, or it is not, in which case allowing user to just enabled it
> also seems like a bad idea.
> 

I understand. My thinking was that is was very likely to be safe (the initial
version of this patch didn't have an opt-in or opt-out).

I have no issue removing the sysfs attribute if you think that's best.

>> I can correct these lines. I thought that an 80 character line length limit
>> was no longer required, and saw another line a few lines above these that was
>> even longer... and the checkpatch script didn't flag it either.
> 
> checkpatch is unfortunately completely broken, it flags totally harmless
> things and doesn't catch other things.  > 80 characters are allowed for
> individual lines where it improves readability.  The exact definition
> of that depends on the maintainers and reviewers, but outside of
> string constants I can't really find anything where it does.

Got it, thanks for the feedback.