[RFC PATCH v1] driver core: fw_devlink: Detect cycles when the supplier never gets a device

Saravana Kannan posted 1 patch 1 month ago
drivers/base/core.c | 45 +++++++++++++++++----------------------------
1 file changed, 17 insertions(+), 28 deletions(-)
[RFC PATCH v1] driver core: fw_devlink: Detect cycles when the supplier never gets a device
Posted by Saravana Kannan 1 month ago
Sometimes the supplier fwnode never gets a device created for it.  Instead,
the supplier fwnode is represented by the device that corresponds a
parent/ancestor fwnode.

In these cases, we currently don't do any cycle detection because the cycle
detection logic is only run when a device link is created between the
devices that correspond to the actual consumer and supplier fwnodes.

To detect these cycles correctly, run cycle detection logic even when
creating SYNC_STATE_ONLY proxy device links from a device that is the
parent of the consumer.

Reported-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Closes: https://lore.kernel.org/all/1a1ab663-d068-40fb-8c94-f0715403d276@ideasonboard.com/
Signed-off-by: Saravana Kannan <saravanak@google.com>
---
Tomi,

I didn't have a chance yet to test this on my end. But I expect that
this will allow the display to probe in your single-link case without
having to add post-init-providers. You should still add it for better
probe/suspend/resume/shutdown ordering.

While you test this, can you also do a diff of with and without this
change? It shouldn't have significant differences (other than the ones
with actual cycles):

ls -1 /sys/class/devlink

Greg,

This is RFC because I haven't tested it on my end. I need to do that
before I'd be okay merging this.

Thanks,
Saravana

 drivers/base/core.c | 45 +++++++++++++++++----------------------------
 1 file changed, 17 insertions(+), 28 deletions(-)

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 3b13fed1c3e3..cf20101c74ac 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1990,10 +1990,10 @@ static struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwn
  *
  * Return true if one or more cycles were found. Otherwise, return false.
  */
-static bool __fw_devlink_relax_cycles(struct device *con,
+static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle,
 				 struct fwnode_handle *sup_handle)
 {
-	struct device *sup_dev = NULL, *par_dev = NULL;
+	struct device *sup_dev = NULL, *par_dev = NULL, *con_dev = NULL;
 	struct fwnode_link *link;
 	struct device_link *dev_link;
 	bool ret = false;
@@ -2010,22 +2010,22 @@ static bool __fw_devlink_relax_cycles(struct device *con,
 
 	sup_handle->flags |= FWNODE_FLAG_VISITED;
 
-	sup_dev = get_dev_from_fwnode(sup_handle);
-
 	/* Termination condition. */
-	if (sup_dev == con) {
+	if (sup_handle == con_handle) {
 		pr_debug("----- cycle: start -----\n");
 		ret = true;
 		goto out;
 	}
 
+	sup_dev = get_dev_from_fwnode(sup_handle);
+	con_dev = get_dev_from_fwnode(con_handle);
 	/*
 	 * If sup_dev is bound to a driver and @con hasn't started binding to a
 	 * driver, sup_dev can't be a consumer of @con. So, no need to check
 	 * further.
 	 */
 	if (sup_dev && sup_dev->links.status ==  DL_DEV_DRIVER_BOUND &&
-	    con->links.status == DL_DEV_NO_DRIVER) {
+	    con_dev->links.status == DL_DEV_NO_DRIVER) {
 		ret = false;
 		goto out;
 	}
@@ -2034,7 +2034,7 @@ static bool __fw_devlink_relax_cycles(struct device *con,
 		if (link->flags & FWLINK_FLAG_IGNORE)
 			continue;
 
-		if (__fw_devlink_relax_cycles(con, link->supplier)) {
+		if (__fw_devlink_relax_cycles(con_handle, link->supplier)) {
 			__fwnode_link_cycle(link);
 			ret = true;
 		}
@@ -2049,7 +2049,7 @@ static bool __fw_devlink_relax_cycles(struct device *con,
 	else
 		par_dev = fwnode_get_next_parent_dev(sup_handle);
 
-	if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) {
+	if (par_dev && __fw_devlink_relax_cycles(con_handle, par_dev->fwnode)) {
 		pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle,
 			 par_dev->fwnode);
 		ret = true;
@@ -2067,7 +2067,7 @@ static bool __fw_devlink_relax_cycles(struct device *con,
 		    !(dev_link->flags & DL_FLAG_CYCLE))
 			continue;
 
-		if (__fw_devlink_relax_cycles(con,
+		if (__fw_devlink_relax_cycles(con_handle,
 					      dev_link->supplier->fwnode)) {
 			pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle,
 				 dev_link->supplier->fwnode);
@@ -2139,26 +2139,15 @@ static int fw_devlink_create_devlink(struct device *con,
 	    fwnode_is_ancestor_of(sup_handle, con->fwnode))
 		return -EINVAL;
 
-	/*
-	 * SYNC_STATE_ONLY device links don't block probing and supports cycles.
-	 * So, one might expect that cycle detection isn't necessary for them.
-	 * However, if the device link was marked as SYNC_STATE_ONLY because
-	 * it's part of a cycle, then we still need to do cycle detection. This
-	 * is because the consumer and supplier might be part of multiple cycles
-	 * and we need to detect all those cycles.
-	 */
-	if (!device_link_flag_is_sync_state_only(flags) ||
-	    flags & DL_FLAG_CYCLE) {
-		device_links_write_lock();
-		if (__fw_devlink_relax_cycles(con, sup_handle)) {
-			__fwnode_link_cycle(link);
-			flags = fw_devlink_get_flags(link->flags);
-			pr_debug("----- cycle: end -----\n");
-			dev_info(con, "Fixed dependency cycle(s) with %pfwf\n",
-				 sup_handle);
-		}
-		device_links_write_unlock();
+	device_links_write_lock();
+	if (__fw_devlink_relax_cycles(link->consumer, sup_handle)) {
+		__fwnode_link_cycle(link);
+		flags = fw_devlink_get_flags(link->flags);
+		pr_debug("----- cycle: end -----\n");
+		pr_info("%pfwf: Fixed dependency cycle(s) with %pfwf\n",
+			link->consumer, sup_handle);
 	}
+	device_links_write_unlock();
 
 	if (sup_handle->flags & FWNODE_FLAG_NOT_DEVICE)
 		sup_dev = fwnode_get_next_parent_dev(sup_handle);
-- 
2.47.0.163.g1226f6d8fa-goog
Re: [RFC PATCH v1] driver core: fw_devlink: Detect cycles when the supplier never gets a device
Posted by Saravana Kannan 1 month ago
On Fri, Oct 25, 2024 at 3:37 PM Saravana Kannan <saravanak@google.com> wrote:
>
> Sometimes the supplier fwnode never gets a device created for it.  Instead,
> the supplier fwnode is represented by the device that corresponds a
> parent/ancestor fwnode.
>
> In these cases, we currently don't do any cycle detection because the cycle
> detection logic is only run when a device link is created between the
> devices that correspond to the actual consumer and supplier fwnodes.
>
> To detect these cycles correctly, run cycle detection logic even when
> creating SYNC_STATE_ONLY proxy device links from a device that is the
> parent of the consumer.
>
> Reported-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
> Closes: https://lore.kernel.org/all/1a1ab663-d068-40fb-8c94-f0715403d276@ideasonboard.com/
> Signed-off-by: Saravana Kannan <saravanak@google.com>
> ---
> Tomi,
>
> I didn't have a chance yet to test this on my end. But I expect that
> this will allow the display to probe in your single-link case without
> having to add post-init-providers. You should still add it for better
> probe/suspend/resume/shutdown ordering.
>
> While you test this, can you also do a diff of with and without this
> change? It shouldn't have significant differences (other than the ones
> with actual cycles):
>
> ls -1 /sys/class/devlink
>
> Greg,
>
> This is RFC because I haven't tested it on my end. I need to do that
> before I'd be okay merging this.
>
> Thanks,
> Saravana
>
>  drivers/base/core.c | 45 +++++++++++++++++----------------------------
>  1 file changed, 17 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/base/core.c b/drivers/base/core.c
> index 3b13fed1c3e3..cf20101c74ac 100644
> --- a/drivers/base/core.c
> +++ b/drivers/base/core.c
> @@ -1990,10 +1990,10 @@ static struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwn
>   *
>   * Return true if one or more cycles were found. Otherwise, return false.
>   */
> -static bool __fw_devlink_relax_cycles(struct device *con,
> +static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle,
>                                  struct fwnode_handle *sup_handle)
>  {
> -       struct device *sup_dev = NULL, *par_dev = NULL;
> +       struct device *sup_dev = NULL, *par_dev = NULL, *con_dev = NULL;
>         struct fwnode_link *link;
>         struct device_link *dev_link;
>         bool ret = false;
> @@ -2010,22 +2010,22 @@ static bool __fw_devlink_relax_cycles(struct device *con,
>
>         sup_handle->flags |= FWNODE_FLAG_VISITED;
>
> -       sup_dev = get_dev_from_fwnode(sup_handle);
> -
>         /* Termination condition. */
> -       if (sup_dev == con) {
> +       if (sup_handle == con_handle) {
>                 pr_debug("----- cycle: start -----\n");
>                 ret = true;
>                 goto out;
>         }
>
> +       sup_dev = get_dev_from_fwnode(sup_handle);
> +       con_dev = get_dev_from_fwnode(con_handle);
>         /*
>          * If sup_dev is bound to a driver and @con hasn't started binding to a
>          * driver, sup_dev can't be a consumer of @con. So, no need to check
>          * further.
>          */
>         if (sup_dev && sup_dev->links.status ==  DL_DEV_DRIVER_BOUND &&
> -           con->links.status == DL_DEV_NO_DRIVER) {
> +           con_dev->links.status == DL_DEV_NO_DRIVER) {

This needs to be changed to the following to avoid a NULL deref:

con_dev && con_dev->links.status == DL_DEV_NO_DRIVER) {

My preliminary testing looks good. Once I'm happy with my tests, I'll
fix this and send out a v2.

Tomi, if this fixes your issue, please give a Tested-by:

-Saravana

>                 ret = false;
>                 goto out;
>         }
> @@ -2034,7 +2034,7 @@ static bool __fw_devlink_relax_cycles(struct device *con,
>                 if (link->flags & FWLINK_FLAG_IGNORE)
>                         continue;
>
> -               if (__fw_devlink_relax_cycles(con, link->supplier)) {
> +               if (__fw_devlink_relax_cycles(con_handle, link->supplier)) {
>                         __fwnode_link_cycle(link);
>                         ret = true;
>                 }
> @@ -2049,7 +2049,7 @@ static bool __fw_devlink_relax_cycles(struct device *con,
>         else
>                 par_dev = fwnode_get_next_parent_dev(sup_handle);
>
> -       if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) {
> +       if (par_dev && __fw_devlink_relax_cycles(con_handle, par_dev->fwnode)) {
>                 pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle,
>                          par_dev->fwnode);
>                 ret = true;
> @@ -2067,7 +2067,7 @@ static bool __fw_devlink_relax_cycles(struct device *con,
>                     !(dev_link->flags & DL_FLAG_CYCLE))
>                         continue;
>
> -               if (__fw_devlink_relax_cycles(con,
> +               if (__fw_devlink_relax_cycles(con_handle,
>                                               dev_link->supplier->fwnode)) {
>                         pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle,
>                                  dev_link->supplier->fwnode);
> @@ -2139,26 +2139,15 @@ static int fw_devlink_create_devlink(struct device *con,
>             fwnode_is_ancestor_of(sup_handle, con->fwnode))
>                 return -EINVAL;
>
> -       /*
> -        * SYNC_STATE_ONLY device links don't block probing and supports cycles.
> -        * So, one might expect that cycle detection isn't necessary for them.
> -        * However, if the device link was marked as SYNC_STATE_ONLY because
> -        * it's part of a cycle, then we still need to do cycle detection. This
> -        * is because the consumer and supplier might be part of multiple cycles
> -        * and we need to detect all those cycles.
> -        */
> -       if (!device_link_flag_is_sync_state_only(flags) ||
> -           flags & DL_FLAG_CYCLE) {
> -               device_links_write_lock();
> -               if (__fw_devlink_relax_cycles(con, sup_handle)) {
> -                       __fwnode_link_cycle(link);
> -                       flags = fw_devlink_get_flags(link->flags);
> -                       pr_debug("----- cycle: end -----\n");
> -                       dev_info(con, "Fixed dependency cycle(s) with %pfwf\n",
> -                                sup_handle);
> -               }
> -               device_links_write_unlock();
> +       device_links_write_lock();
> +       if (__fw_devlink_relax_cycles(link->consumer, sup_handle)) {
> +               __fwnode_link_cycle(link);
> +               flags = fw_devlink_get_flags(link->flags);
> +               pr_debug("----- cycle: end -----\n");
> +               pr_info("%pfwf: Fixed dependency cycle(s) with %pfwf\n",
> +                       link->consumer, sup_handle);
>         }
> +       device_links_write_unlock();
>
>         if (sup_handle->flags & FWNODE_FLAG_NOT_DEVICE)
>                 sup_dev = fwnode_get_next_parent_dev(sup_handle);
> --
> 2.47.0.163.g1226f6d8fa-goog
>