drivers/base/core.c | 55 ++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 31 deletions(-)
In attempting to optimize fw_devlink runtime, I introduced numerous cycle
detection bugs by foregoing cycle detection logic under specific
conditions. Each fix has further narrowed the conditions for optimization.
It's time to give up on these optimization attempts and just run the cycle
detection logic every time fw_devlink tries to create a device link.
The specific bug report that triggered this fix involved a supplier fwnode
that never gets a device created for it. Instead, the supplier fwnode is
represented by the device that corresponds to an ancestor fwnode.
In this case, fw_devlink didn't do any cycle detection because the cycle
detection logic is only run when a device link is created between the
devices that correspond to the actual consumer and supplier fwnodes.
With this change, fw_devlink will run cycle detection logic even when
creating SYNC_STATE_ONLY proxy device links from a device that is an
ancestor of a consumer fwnode.
Reported-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Closes: https://lore.kernel.org/all/1a1ab663-d068-40fb-8c94-f0715403d276@ideasonboard.com/
Fixes: 6442d79d880c ("driver core: fw_devlink: Improve detection of overlapping cycles")
Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com>
Signed-off-by: Saravana Kannan <saravanak@google.com>
---
drivers/base/core.c | 55 ++++++++++++++++++++-------------------------
1 file changed, 24 insertions(+), 31 deletions(-)
Tomi,
Thanks for all the testing and debugging help! And do use
post-init-providers even with this patch to improve ordering
enforcement. I probably should change the cycle log from info to warn in
a separate patch :)
Greg,
I no longer have concerns about pulling this into 6.13. But we can give
a week or so to Geert/Francesco to do some additional testing.
Geert/Francesco,
If you want to test this patch, pull it in and compare the output of
the following:
ls -1 /sys/class/devlink
The only device links that should be missing with the patch should be
device links in a cycle that weren't detected before.
Also, if you notice any significant boot time increase with this change,
let me know.
Thanks,
Saravana
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 3b13fed1c3e3..9a490b1b7a6f 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1990,10 +1990,10 @@ static struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwn
*
* Return true if one or more cycles were found. Otherwise, return false.
*/
-static bool __fw_devlink_relax_cycles(struct device *con,
+static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle,
struct fwnode_handle *sup_handle)
{
- struct device *sup_dev = NULL, *par_dev = NULL;
+ struct device *sup_dev = NULL, *par_dev = NULL, *con_dev = NULL;
struct fwnode_link *link;
struct device_link *dev_link;
bool ret = false;
@@ -2010,22 +2010,22 @@ static bool __fw_devlink_relax_cycles(struct device *con,
sup_handle->flags |= FWNODE_FLAG_VISITED;
- sup_dev = get_dev_from_fwnode(sup_handle);
-
/* Termination condition. */
- if (sup_dev == con) {
+ if (sup_handle == con_handle) {
pr_debug("----- cycle: start -----\n");
ret = true;
goto out;
}
+ sup_dev = get_dev_from_fwnode(sup_handle);
+ con_dev = get_dev_from_fwnode(con_handle);
/*
* If sup_dev is bound to a driver and @con hasn't started binding to a
* driver, sup_dev can't be a consumer of @con. So, no need to check
* further.
*/
if (sup_dev && sup_dev->links.status == DL_DEV_DRIVER_BOUND &&
- con->links.status == DL_DEV_NO_DRIVER) {
+ con_dev && con_dev->links.status == DL_DEV_NO_DRIVER) {
ret = false;
goto out;
}
@@ -2034,7 +2034,7 @@ static bool __fw_devlink_relax_cycles(struct device *con,
if (link->flags & FWLINK_FLAG_IGNORE)
continue;
- if (__fw_devlink_relax_cycles(con, link->supplier)) {
+ if (__fw_devlink_relax_cycles(con_handle, link->supplier)) {
__fwnode_link_cycle(link);
ret = true;
}
@@ -2049,7 +2049,7 @@ static bool __fw_devlink_relax_cycles(struct device *con,
else
par_dev = fwnode_get_next_parent_dev(sup_handle);
- if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) {
+ if (par_dev && __fw_devlink_relax_cycles(con_handle, par_dev->fwnode)) {
pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle,
par_dev->fwnode);
ret = true;
@@ -2067,7 +2067,7 @@ static bool __fw_devlink_relax_cycles(struct device *con,
!(dev_link->flags & DL_FLAG_CYCLE))
continue;
- if (__fw_devlink_relax_cycles(con,
+ if (__fw_devlink_relax_cycles(con_handle,
dev_link->supplier->fwnode)) {
pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle,
dev_link->supplier->fwnode);
@@ -2115,11 +2115,6 @@ static int fw_devlink_create_devlink(struct device *con,
if (link->flags & FWLINK_FLAG_IGNORE)
return 0;
- if (con->fwnode == link->consumer)
- flags = fw_devlink_get_flags(link->flags);
- else
- flags = FW_DEVLINK_FLAGS_PERMISSIVE;
-
/*
* In some cases, a device P might also be a supplier to its child node
* C. However, this would defer the probe of C until the probe of P
@@ -2140,25 +2135,23 @@ static int fw_devlink_create_devlink(struct device *con,
return -EINVAL;
/*
- * SYNC_STATE_ONLY device links don't block probing and supports cycles.
- * So, one might expect that cycle detection isn't necessary for them.
- * However, if the device link was marked as SYNC_STATE_ONLY because
- * it's part of a cycle, then we still need to do cycle detection. This
- * is because the consumer and supplier might be part of multiple cycles
- * and we need to detect all those cycles.
+ * Don't try to optimize by not calling the cycle detection logic under
+ * certain conditions. There's always some corner case that won't get
+ * detected.
*/
- if (!device_link_flag_is_sync_state_only(flags) ||
- flags & DL_FLAG_CYCLE) {
- device_links_write_lock();
- if (__fw_devlink_relax_cycles(con, sup_handle)) {
- __fwnode_link_cycle(link);
- flags = fw_devlink_get_flags(link->flags);
- pr_debug("----- cycle: end -----\n");
- dev_info(con, "Fixed dependency cycle(s) with %pfwf\n",
- sup_handle);
- }
- device_links_write_unlock();
+ device_links_write_lock();
+ if (__fw_devlink_relax_cycles(link->consumer, sup_handle)) {
+ __fwnode_link_cycle(link);
+ pr_debug("----- cycle: end -----\n");
+ pr_info("%pfwf: Fixed dependency cycle(s) with %pfwf\n",
+ link->consumer, sup_handle);
}
+ device_links_write_unlock();
+
+ if (con->fwnode == link->consumer)
+ flags = fw_devlink_get_flags(link->flags);
+ else
+ flags = FW_DEVLINK_FLAGS_PERMISSIVE;
if (sup_handle->flags & FWNODE_FLAG_NOT_DEVICE)
sup_dev = fwnode_get_next_parent_dev(sup_handle);
--
2.47.0.163.g1226f6d8fa-goog
Hi Saravana, On Wed, Oct 30, 2024 at 6:10 PM Saravana Kannan <saravanak@google.com> wrote: > In attempting to optimize fw_devlink runtime, I introduced numerous cycle > detection bugs by foregoing cycle detection logic under specific > conditions. Each fix has further narrowed the conditions for optimization. > > It's time to give up on these optimization attempts and just run the cycle > detection logic every time fw_devlink tries to create a device link. > > The specific bug report that triggered this fix involved a supplier fwnode > that never gets a device created for it. Instead, the supplier fwnode is > represented by the device that corresponds to an ancestor fwnode. > > In this case, fw_devlink didn't do any cycle detection because the cycle > detection logic is only run when a device link is created between the > devices that correspond to the actual consumer and supplier fwnodes. > > With this change, fw_devlink will run cycle detection logic even when > creating SYNC_STATE_ONLY proxy device links from a device that is an > ancestor of a consumer fwnode. > > Reported-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> > Closes: https://lore.kernel.org/all/1a1ab663-d068-40fb-8c94-f0715403d276@ideasonboard.com/ > Fixes: 6442d79d880c ("driver core: fw_devlink: Improve detection of overlapping cycles") > Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> > Signed-off-by: Saravana Kannan <saravanak@google.com> Thanks for your patch, which is now commit bac3b10b78e54b7d ("driver core: fw_devlink: Stop trying to optimize cycle detection logic") in next-20241107 and later. > Geert/Francesco, > > If you want to test this patch, pull it in and compare the output of > the following: > > ls -1 /sys/class/devlink > > The only device links that should be missing with the patch should be > device links in a cycle that weren't detected before. I gave it a try on all my boards, and compared the output on a few of them, and everything looks fine. Thanks! Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
On Tue, Nov 19, 2024 at 5:40 AM Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > Hi Saravana, > > On Wed, Oct 30, 2024 at 6:10 PM Saravana Kannan <saravanak@google.com> wrote: > > In attempting to optimize fw_devlink runtime, I introduced numerous cycle > > detection bugs by foregoing cycle detection logic under specific > > conditions. Each fix has further narrowed the conditions for optimization. > > > > It's time to give up on these optimization attempts and just run the cycle > > detection logic every time fw_devlink tries to create a device link. > > > > The specific bug report that triggered this fix involved a supplier fwnode > > that never gets a device created for it. Instead, the supplier fwnode is > > represented by the device that corresponds to an ancestor fwnode. > > > > In this case, fw_devlink didn't do any cycle detection because the cycle > > detection logic is only run when a device link is created between the > > devices that correspond to the actual consumer and supplier fwnodes. > > > > With this change, fw_devlink will run cycle detection logic even when > > creating SYNC_STATE_ONLY proxy device links from a device that is an > > ancestor of a consumer fwnode. > > > > Reported-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> > > Closes: https://lore.kernel.org/all/1a1ab663-d068-40fb-8c94-f0715403d276@ideasonboard.com/ > > Fixes: 6442d79d880c ("driver core: fw_devlink: Improve detection of overlapping cycles") > > Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> > > Signed-off-by: Saravana Kannan <saravanak@google.com> > > Thanks for your patch, which is now commit bac3b10b78e54b7d ("driver > core: fw_devlink: Stop trying to optimize cycle detection logic") in > next-20241107 and later. > > > Geert/Francesco, > > > > If you want to test this patch, pull it in and compare the output of > > the following: > > > > ls -1 /sys/class/devlink > > > > The only device links that should be missing with the patch should be > > device links in a cycle that weren't detected before. > > I gave it a try on all my boards, and compared the output on a few of > them, and everything looks fine. Thanks for testing the series Geert! And no noticeable boot time increases? Thanks, Saravana
Hi Saravana, On Wed, Nov 20, 2024 at 3:04 AM Saravana Kannan <saravanak@google.com> wrote: > On Tue, Nov 19, 2024 at 5:40 AM Geert Uytterhoeven <geert@linux-m68k.org> wrote: > > On Wed, Oct 30, 2024 at 6:10 PM Saravana Kannan <saravanak@google.com> wrote: > > > In attempting to optimize fw_devlink runtime, I introduced numerous cycle > > > detection bugs by foregoing cycle detection logic under specific > > > conditions. Each fix has further narrowed the conditions for optimization. > > > > > > It's time to give up on these optimization attempts and just run the cycle > > > detection logic every time fw_devlink tries to create a device link. > > > > > > The specific bug report that triggered this fix involved a supplier fwnode > > > that never gets a device created for it. Instead, the supplier fwnode is > > > represented by the device that corresponds to an ancestor fwnode. > > > > > > In this case, fw_devlink didn't do any cycle detection because the cycle > > > detection logic is only run when a device link is created between the > > > devices that correspond to the actual consumer and supplier fwnodes. > > > > > > With this change, fw_devlink will run cycle detection logic even when > > > creating SYNC_STATE_ONLY proxy device links from a device that is an > > > ancestor of a consumer fwnode. > > > > > > Reported-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> > > > Closes: https://lore.kernel.org/all/1a1ab663-d068-40fb-8c94-f0715403d276@ideasonboard.com/ > > > Fixes: 6442d79d880c ("driver core: fw_devlink: Improve detection of overlapping cycles") > > > Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> > > > Signed-off-by: Saravana Kannan <saravanak@google.com> > > > > Thanks for your patch, which is now commit bac3b10b78e54b7d ("driver > > core: fw_devlink: Stop trying to optimize cycle detection logic") in > > next-20241107 and later. > > > > > Geert/Francesco, > > > > > > If you want to test this patch, pull it in and compare the output of > > > the following: > > > > > > ls -1 /sys/class/devlink > > > > > > The only device links that should be missing with the patch should be > > > device links in a cycle that weren't detected before. > > > > I gave it a try on all my boards, and compared the output on a few of > > them, and everything looks fine. > > Thanks for testing the series Geert! > > And no noticeable boot time increases? That's a bit hard to measure, as the serial console output easily takes ten seconds. Gr{oetje,eeting}s, Geert -- Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- geert@linux-m68k.org In personal conversations with technical people, I call myself a hacker. But when I'm talking to journalists I just say "programmer" or something like that. -- Linus Torvalds
On Wed, Oct 30, 2024 at 10:10 AM Saravana Kannan <saravanak@google.com> wrote: > > In attempting to optimize fw_devlink runtime, I introduced numerous cycle > detection bugs by foregoing cycle detection logic under specific > conditions. Each fix has further narrowed the conditions for optimization. > > It's time to give up on these optimization attempts and just run the cycle > detection logic every time fw_devlink tries to create a device link. > > The specific bug report that triggered this fix involved a supplier fwnode > that never gets a device created for it. Instead, the supplier fwnode is > represented by the device that corresponds to an ancestor fwnode. > > In this case, fw_devlink didn't do any cycle detection because the cycle > detection logic is only run when a device link is created between the > devices that correspond to the actual consumer and supplier fwnodes. > > With this change, fw_devlink will run cycle detection logic even when > creating SYNC_STATE_ONLY proxy device links from a device that is an > ancestor of a consumer fwnode. > > Reported-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> > Closes: https://lore.kernel.org/all/1a1ab663-d068-40fb-8c94-f0715403d276@ideasonboard.com/ > Fixes: 6442d79d880c ("driver core: fw_devlink: Improve detection of overlapping cycles") > Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> > Signed-off-by: Saravana Kannan <saravanak@google.com> > --- > drivers/base/core.c | 55 ++++++++++++++++++++------------------------- > 1 file changed, 24 insertions(+), 31 deletions(-) > > Tomi, > > Thanks for all the testing and debugging help! And do use > post-init-providers even with this patch to improve ordering > enforcement. I probably should change the cycle log from info to warn in > a separate patch :) > > Greg, > > I no longer have concerns about pulling this into 6.13. But we can give > a week or so to Geert/Francesco to do some additional testing. > > Geert/Francesco, Heads up. Greg has pulled this into driver-core git's driver-core-testing branch. Which means in a week or two it'll get into the actual driver-core-next branch. So, if you want to do additional testing, you might want to jump on it soon. Thanks, Saravana > > If you want to test this patch, pull it in and compare the output of > the following: > > ls -1 /sys/class/devlink > > The only device links that should be missing with the patch should be > device links in a cycle that weren't detected before. > > Also, if you notice any significant boot time increase with this change, > let me know. > > Thanks, > Saravana > > diff --git a/drivers/base/core.c b/drivers/base/core.c > index 3b13fed1c3e3..9a490b1b7a6f 100644 > --- a/drivers/base/core.c > +++ b/drivers/base/core.c > @@ -1990,10 +1990,10 @@ static struct device *fwnode_get_next_parent_dev(const struct fwnode_handle *fwn > * > * Return true if one or more cycles were found. Otherwise, return false. > */ > -static bool __fw_devlink_relax_cycles(struct device *con, > +static bool __fw_devlink_relax_cycles(struct fwnode_handle *con_handle, > struct fwnode_handle *sup_handle) > { > - struct device *sup_dev = NULL, *par_dev = NULL; > + struct device *sup_dev = NULL, *par_dev = NULL, *con_dev = NULL; > struct fwnode_link *link; > struct device_link *dev_link; > bool ret = false; > @@ -2010,22 +2010,22 @@ static bool __fw_devlink_relax_cycles(struct device *con, > > sup_handle->flags |= FWNODE_FLAG_VISITED; > > - sup_dev = get_dev_from_fwnode(sup_handle); > - > /* Termination condition. */ > - if (sup_dev == con) { > + if (sup_handle == con_handle) { > pr_debug("----- cycle: start -----\n"); > ret = true; > goto out; > } > > + sup_dev = get_dev_from_fwnode(sup_handle); > + con_dev = get_dev_from_fwnode(con_handle); > /* > * If sup_dev is bound to a driver and @con hasn't started binding to a > * driver, sup_dev can't be a consumer of @con. So, no need to check > * further. > */ > if (sup_dev && sup_dev->links.status == DL_DEV_DRIVER_BOUND && > - con->links.status == DL_DEV_NO_DRIVER) { > + con_dev && con_dev->links.status == DL_DEV_NO_DRIVER) { > ret = false; > goto out; > } > @@ -2034,7 +2034,7 @@ static bool __fw_devlink_relax_cycles(struct device *con, > if (link->flags & FWLINK_FLAG_IGNORE) > continue; > > - if (__fw_devlink_relax_cycles(con, link->supplier)) { > + if (__fw_devlink_relax_cycles(con_handle, link->supplier)) { > __fwnode_link_cycle(link); > ret = true; > } > @@ -2049,7 +2049,7 @@ static bool __fw_devlink_relax_cycles(struct device *con, > else > par_dev = fwnode_get_next_parent_dev(sup_handle); > > - if (par_dev && __fw_devlink_relax_cycles(con, par_dev->fwnode)) { > + if (par_dev && __fw_devlink_relax_cycles(con_handle, par_dev->fwnode)) { > pr_debug("%pfwf: cycle: child of %pfwf\n", sup_handle, > par_dev->fwnode); > ret = true; > @@ -2067,7 +2067,7 @@ static bool __fw_devlink_relax_cycles(struct device *con, > !(dev_link->flags & DL_FLAG_CYCLE)) > continue; > > - if (__fw_devlink_relax_cycles(con, > + if (__fw_devlink_relax_cycles(con_handle, > dev_link->supplier->fwnode)) { > pr_debug("%pfwf: cycle: depends on %pfwf\n", sup_handle, > dev_link->supplier->fwnode); > @@ -2115,11 +2115,6 @@ static int fw_devlink_create_devlink(struct device *con, > if (link->flags & FWLINK_FLAG_IGNORE) > return 0; > > - if (con->fwnode == link->consumer) > - flags = fw_devlink_get_flags(link->flags); > - else > - flags = FW_DEVLINK_FLAGS_PERMISSIVE; > - > /* > * In some cases, a device P might also be a supplier to its child node > * C. However, this would defer the probe of C until the probe of P > @@ -2140,25 +2135,23 @@ static int fw_devlink_create_devlink(struct device *con, > return -EINVAL; > > /* > - * SYNC_STATE_ONLY device links don't block probing and supports cycles. > - * So, one might expect that cycle detection isn't necessary for them. > - * However, if the device link was marked as SYNC_STATE_ONLY because > - * it's part of a cycle, then we still need to do cycle detection. This > - * is because the consumer and supplier might be part of multiple cycles > - * and we need to detect all those cycles. > + * Don't try to optimize by not calling the cycle detection logic under > + * certain conditions. There's always some corner case that won't get > + * detected. > */ > - if (!device_link_flag_is_sync_state_only(flags) || > - flags & DL_FLAG_CYCLE) { > - device_links_write_lock(); > - if (__fw_devlink_relax_cycles(con, sup_handle)) { > - __fwnode_link_cycle(link); > - flags = fw_devlink_get_flags(link->flags); > - pr_debug("----- cycle: end -----\n"); > - dev_info(con, "Fixed dependency cycle(s) with %pfwf\n", > - sup_handle); > - } > - device_links_write_unlock(); > + device_links_write_lock(); > + if (__fw_devlink_relax_cycles(link->consumer, sup_handle)) { > + __fwnode_link_cycle(link); > + pr_debug("----- cycle: end -----\n"); > + pr_info("%pfwf: Fixed dependency cycle(s) with %pfwf\n", > + link->consumer, sup_handle); > } > + device_links_write_unlock(); > + > + if (con->fwnode == link->consumer) > + flags = fw_devlink_get_flags(link->flags); > + else > + flags = FW_DEVLINK_FLAGS_PERMISSIVE; > > if (sup_handle->flags & FWNODE_FLAG_NOT_DEVICE) > sup_dev = fwnode_get_next_parent_dev(sup_handle); > -- > 2.47.0.163.g1226f6d8fa-goog >
On Tue, Nov 05, 2024 at 05:30:20PM -0800, Saravana Kannan wrote: > On Wed, Oct 30, 2024 at 10:10 AM Saravana Kannan <saravanak@google.com> wrote: > > > > In attempting to optimize fw_devlink runtime, I introduced numerous cycle > > detection bugs by foregoing cycle detection logic under specific > > conditions. Each fix has further narrowed the conditions for optimization. > > > > It's time to give up on these optimization attempts and just run the cycle > > detection logic every time fw_devlink tries to create a device link. > > > > The specific bug report that triggered this fix involved a supplier fwnode > > that never gets a device created for it. Instead, the supplier fwnode is > > represented by the device that corresponds to an ancestor fwnode. > > > > In this case, fw_devlink didn't do any cycle detection because the cycle > > detection logic is only run when a device link is created between the > > devices that correspond to the actual consumer and supplier fwnodes. > > > > With this change, fw_devlink will run cycle detection logic even when > > creating SYNC_STATE_ONLY proxy device links from a device that is an > > ancestor of a consumer fwnode. > > > > Reported-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> > > Closes: https://lore.kernel.org/all/1a1ab663-d068-40fb-8c94-f0715403d276@ideasonboard.com/ > > Fixes: 6442d79d880c ("driver core: fw_devlink: Improve detection of overlapping cycles") > > Tested-by: Tomi Valkeinen <tomi.valkeinen@ideasonboard.com> > > Signed-off-by: Saravana Kannan <saravanak@google.com> > > --- > > drivers/base/core.c | 55 ++++++++++++++++++++------------------------- > > 1 file changed, 24 insertions(+), 31 deletions(-) > > > > Tomi, > > > > Thanks for all the testing and debugging help! And do use > > post-init-providers even with this patch to improve ordering > > enforcement. I probably should change the cycle log from info to warn in > > a separate patch :) > > > > Greg, > > > > I no longer have concerns about pulling this into 6.13. But we can give > > a week or so to Geert/Francesco to do some additional testing. > > > > Geert/Francesco, > > Heads up. Greg has pulled this into driver-core git's > driver-core-testing branch. Which means in a week or two it'll get > into the actual driver-core-next branch. So, if you want to do > additional testing, you might want to jump on it soon. It should now show up in the next linux-next release. thanks, greg k-h
© 2016 - 2024 Red Hat, Inc.